Fix tests on windows (#159)

[etc/vim.git] / black.py
diff --git a/black.py b/black.py

index 7c487f6e938c1b8113674b10bc91877da0546aa3..c77166a2a6c454486c564aa8ad0a3065e3d0edde 100644 (file)
--- a/black.py
+++ b/black.py
@@ -1,6 +1,7 @@
  #!/usr/bin/env python3
  
  import asyncio
+import pickle
  from asyncio.base_events import BaseEventLoop
  from concurrent.futures import Executor, ProcessPoolExecutor
  from enum import Enum
@@ -32,6 +33,7 @@ from typing import (
      Union,
  )
  
+from appdirs import user_cache_dir
  from attr import dataclass, Factory
  import click
  
@@ -54,6 +56,10 @@ Priority = int
  Index = int
  LN = Union[Leaf, Node]
  SplitFunc = Callable[["Line", bool], Iterator["Line"]]
+Timestamp = float
+FileSize = int
+CacheInfo = Tuple[Timestamp, FileSize]
+Cache = Dict[Path, CacheInfo]
  out = partial(click.secho, bold=True, err=True)
  err = partial(click.secho, fg="red", err=True)
  
@@ -104,6 +110,12 @@ class WriteBack(Enum):
      DIFF = 2
  
  
+class Changed(Enum):
+    NO = 0
+    CACHED = 1
+    YES = 2
+
+
  @click.command()
  @click.option(
      "-l",
@@ -172,12 +184,8 @@ def main(
              sources.append(Path("-"))
          else:
              err(f"invalid path: {s}")
-    if check and diff:
-        exc = click.ClickException("Options --check and --diff are mutually exclusive")
-        exc.exit_code = 2
-        raise exc
  
-    if check:
+    if check and not diff:
          write_back = WriteBack.NO
      elif diff:
          write_back = WriteBack.DIFF
@@ -185,22 +193,12 @@ def main(
          write_back = WriteBack.YES
      if len(sources) == 0:
          ctx.exit(0)
+        return
+
      elif len(sources) == 1:
-        p = sources[0]
-        report = Report(check=check, quiet=quiet)
-        try:
-            if not p.is_file() and str(p) == "-":
-                changed = format_stdin_to_stdout(
-                    line_length=line_length, fast=fast, write_back=write_back
-                )
-            else:
-                changed = format_file_in_place(
-                    p, line_length=line_length, fast=fast, write_back=write_back
-                )
-            report.done(p, changed)
-        except Exception as exc:
-            report.failed(p, str(exc))
-        ctx.exit(report.return_code)
+        return_code = reformat_one(
+            sources[0], line_length, fast, quiet, write_back, check
+        )
      else:
          loop = asyncio.get_event_loop()
          executor = ProcessPoolExecutor(max_workers=os.cpu_count())
@@ -208,12 +206,55 @@ def main(
          try:
              return_code = loop.run_until_complete(
                  schedule_formatting(
-                    sources, line_length, write_back, fast, quiet, loop, executor
+                    sources, line_length, write_back, fast, quiet, loop, executor, check
                  )
              )
          finally:
              shutdown(loop)
-            ctx.exit(return_code)
+    ctx.exit(return_code)
+
+
+def reformat_one(
+    src: Path,
+    line_length: int,
+    fast: bool,
+    quiet: bool,
+    write_back: WriteBack,
+    check: bool,
+) -> int:
+    """Reformat a single file under `src` without spawning child processes.
+
+    If `quiet` is True, non-error messages are not output. `line_length`,
+    `write_back`, and `fast` options are passed to :func:`format_file_in_place`.
+    """
+    report = Report(check=check, quiet=quiet)
+    try:
+        changed = Changed.NO
+        if not src.is_file() and str(src) == "-":
+            if format_stdin_to_stdout(
+                line_length=line_length, fast=fast, write_back=write_back
+            ):
+                changed = Changed.YES
+        else:
+            cache: Cache = {}
+            if write_back != WriteBack.DIFF:
+                cache = read_cache()
+                src = src.resolve()
+                if src in cache and cache[src] == get_cache_info(src):
+                    changed = Changed.CACHED
+            if (
+                changed is not Changed.CACHED
+                and format_file_in_place(
+                    src, line_length=line_length, fast=fast, write_back=write_back
+                )
+            ):
+                changed = Changed.YES
+            if write_back != WriteBack.DIFF and changed is not Changed.NO:
+                write_cache(cache, [src])
+        report.done(src, changed)
+    except Exception as exc:
+        report.failed(src, str(exc))
+    return report.return_code
  
  
  async def schedule_formatting(
@@ -224,6 +265,7 @@ async def schedule_formatting(
      quiet: bool,
      loop: BaseEventLoop,
      executor: Executor,
+    check: bool,
  ) -> int:
      """Run formatting of `sources` in parallel using the provided `executor`.
  
@@ -232,41 +274,59 @@ async def schedule_formatting(
      `line_length`, `write_back`, and `fast` options are passed to
      :func:`format_file_in_place`.
      """
-    lock = None
-    if write_back == WriteBack.DIFF:
-        # For diff output, we need locks to ensure we don't interleave output
-        # from different processes.
-        manager = Manager()
-        lock = manager.Lock()
-    tasks = {
-        src: loop.run_in_executor(
-            executor, format_file_in_place, src, line_length, fast, write_back, lock
-        )
-        for src in sources
-    }
-    _task_values = list(tasks.values())
-    loop.add_signal_handler(signal.SIGINT, cancel, _task_values)
-    loop.add_signal_handler(signal.SIGTERM, cancel, _task_values)
-    await asyncio.wait(tasks.values())
+    report = Report(check=check, quiet=quiet)
+    cache: Cache = {}
+    if write_back != WriteBack.DIFF:
+        cache = read_cache()
+        sources, cached = filter_cached(cache, sources)
+        for src in cached:
+            report.done(src, Changed.CACHED)
      cancelled = []
-    report = Report(check=write_back is WriteBack.NO, quiet=quiet)
-    for src, task in tasks.items():
-        if not task.done():
-            report.failed(src, "timed out, cancelling")
-            task.cancel()
-            cancelled.append(task)
-        elif task.cancelled():
-            cancelled.append(task)
-        elif task.exception():
-            report.failed(src, str(task.exception()))
-        else:
-            report.done(src, task.result())
+    formatted = []
+    if sources:
+        lock = None
+        if write_back == WriteBack.DIFF:
+            # For diff output, we need locks to ensure we don't interleave output
+            # from different processes.
+            manager = Manager()
+            lock = manager.Lock()
+        tasks = {
+            src: loop.run_in_executor(
+                executor, format_file_in_place, src, line_length, fast, write_back, lock
+            )
+            for src in sources
+        }
+        _task_values = list(tasks.values())
+        try:
+            loop.add_signal_handler(signal.SIGINT, cancel, _task_values)
+            loop.add_signal_handler(signal.SIGTERM, cancel, _task_values)
+        except NotImplementedError:
+            # There are no good alternatives for these on Windows
+            pass
+        await asyncio.wait(_task_values)
+        for src, task in tasks.items():
+            if not task.done():
+                report.failed(src, "timed out, cancelling")
+                task.cancel()
+                cancelled.append(task)
+            elif task.cancelled():
+                cancelled.append(task)
+            elif task.exception():
+                report.failed(src, str(task.exception()))
+            else:
+                formatted.append(src)
+                report.done(src, Changed.YES if task.result() else Changed.NO)
+
      if cancelled:
          await asyncio.gather(*cancelled, loop=loop, return_exceptions=True)
      elif not quiet:
          out("All done! ✨ 🍰 ✨")
      if not quiet:
          click.echo(str(report))
+
+    if write_back != WriteBack.DIFF and formatted:
+        write_cache(cache, formatted)
+
      return report.return_code
  
  
@@ -282,6 +342,7 @@ def format_file_in_place(
      If `write_back` is True, write reformatted code back to stdout.
      `line_length` and `fast` options are passed to :func:`format_file_contents`.
      """
+
      with tokenize.open(src) as src_buffer:
          src_contents = src_buffer.read()
      try:
@@ -552,6 +613,8 @@ class BracketTracker:
      bracket_match: Dict[Tuple[Depth, NodeType], Leaf] = Factory(dict)
      delimiters: Dict[LeafID, Priority] = Factory(dict)
      previous: Optional[Leaf] = None
+    _for_loop_variable: bool = False
+    _lambda_arguments: bool = False
  
      def mark(self, leaf: Leaf) -> None:
          """Mark `leaf` with bracket-related metadata. Keep track of delimiters.
@@ -571,6 +634,8 @@ class BracketTracker:
          if leaf.type == token.COMMENT:
              return
  
+        self.maybe_decrement_after_for_loop_variable(leaf)
+        self.maybe_decrement_after_lambda_arguments(leaf)
          if leaf.type in CLOSING_BRACKETS:
              self.depth -= 1
              opening_bracket = self.bracket_match.pop((self.depth, leaf.type))
@@ -588,6 +653,8 @@ class BracketTracker:
              self.bracket_match[self.depth, BRACKET[leaf.type]] = leaf
              self.depth += 1
          self.previous = leaf
+        self.maybe_increment_lambda_arguments(leaf)
+        self.maybe_increment_for_loop_variable(leaf)
  
      def any_open_brackets(self) -> bool:
          """Return True if there is an yet unmatched open bracket on the line."""
@@ -596,11 +663,55 @@ class BracketTracker:
      def max_delimiter_priority(self, exclude: Iterable[LeafID] = ()) -> int:
          """Return the highest priority of a delimiter found on the line.
  
-        Values are consistent with what `is_delimiter()` returns.
+        Values are consistent with what `is_split_*_delimiter()` return.
          Raises ValueError on no delimiters.
          """
          return max(v for k, v in self.delimiters.items() if k not in exclude)
  
+    def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool:
+        """In a for loop, or comprehension, the variables are often unpacks.
+
+        To avoid splitting on the comma in this situation, increase the depth of
+        tokens between `for` and `in`.
+        """
+        if leaf.type == token.NAME and leaf.value == "for":
+            self.depth += 1
+            self._for_loop_variable = True
+            return True
+
+        return False
+
+    def maybe_decrement_after_for_loop_variable(self, leaf: Leaf) -> bool:
+        """See `maybe_increment_for_loop_variable` above for explanation."""
+        if self._for_loop_variable and leaf.type == token.NAME and leaf.value == "in":
+            self.depth -= 1
+            self._for_loop_variable = False
+            return True
+
+        return False
+
+    def maybe_increment_lambda_arguments(self, leaf: Leaf) -> bool:
+        """In a lambda expression, there might be more than one argument.
+
+        To avoid splitting on the comma in this situation, increase the depth of
+        tokens between `lambda` and `:`.
+        """
+        if leaf.type == token.NAME and leaf.value == "lambda":
+            self.depth += 1
+            self._lambda_arguments = True
+            return True
+
+        return False
+
+    def maybe_decrement_after_lambda_arguments(self, leaf: Leaf) -> bool:
+        """See `maybe_increment_lambda_arguments` above for explanation."""
+        if self._lambda_arguments and leaf.type == token.COLON:
+            self.depth -= 1
+            self._lambda_arguments = False
+            return True
+
+        return False
+
  
  @dataclass
  class Line:
@@ -611,8 +722,6 @@ class Line:
      comments: List[Tuple[Index, Leaf]] = Factory(list)
      bracket_tracker: BracketTracker = Factory(BracketTracker)
      inside_brackets: bool = False
-    has_for: bool = False
-    _for_loop_variable: bool = False
  
      def append(self, leaf: Leaf, preformatted: bool = False) -> None:
          """Add a new `leaf` to the end of the line.
@@ -633,10 +742,8 @@ class Line:
              # imports, for which we only preserve newlines.
              leaf.prefix += whitespace(leaf)
          if self.inside_brackets or not preformatted:
-            self.maybe_decrement_after_for_loop_variable(leaf)
              self.bracket_tracker.mark(leaf)
              self.maybe_remove_trailing_comma(leaf)
-            self.maybe_increment_for_loop_variable(leaf)
  
          if not self.append_comment(leaf):
              self.leaves.append(leaf)
@@ -783,29 +890,6 @@ class Line:
  
          return False
  
-    def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool:
-        """In a for loop, or comprehension, the variables are often unpacks.
-
-        To avoid splitting on the comma in this situation, increase the depth of
-        tokens between `for` and `in`.
-        """
-        if leaf.type == token.NAME and leaf.value == "for":
-            self.has_for = True
-            self.bracket_tracker.depth += 1
-            self._for_loop_variable = True
-            return True
-
-        return False
-
-    def maybe_decrement_after_for_loop_variable(self, leaf: Leaf) -> bool:
-        """See `maybe_increment_for_loop_variable` above for explanation."""
-        if self._for_loop_variable and leaf.type == token.NAME and leaf.value == "in":
-            self.bracket_tracker.depth -= 1
-            self._for_loop_variable = False
-            return True
-
-        return False
-
      def append_comment(self, comment: Leaf) -> bool:
          """Add an inline or standalone comment to the line."""
          if (
@@ -1299,17 +1383,11 @@ def whitespace(leaf: Leaf) -> str:  # noqa C901
  
      if p.type in {syms.parameters, syms.arglist}:
          # untyped function signatures or calls
-        if t == token.RPAR:
-            return NO
-
          if not prev or prev.type != token.COMMA:
              return NO
  
      elif p.type == syms.varargslist:
          # lambdas
-        if t == token.RPAR:
-            return NO
-
          if prev and prev.type != token.COMMA:
              return NO
  
@@ -1364,7 +1442,7 @@ def whitespace(leaf: Leaf) -> str:  # noqa C901
              if not prevp or prevp.type == token.LPAR:
                  return NO
  
-        elif prev.type in {token.EQUAL, token.STAR, token.DOUBLESTAR}:
+        elif prev.type in {token.EQUAL} | STARS:
              return NO
  
      elif p.type == syms.decorator:
@@ -1403,21 +1481,9 @@ def whitespace(leaf: Leaf) -> str:  # noqa C901
              # dots, but not the first one.
              return NO
  
-    elif (
-        p.type == syms.listmaker
-        or p.type == syms.testlist_gexp
-        or p.type == syms.subscriptlist
-    ):
-        # list interior, including unpacking
-        if not prev:
-            return NO
-
      elif p.type == syms.dictsetmaker:
-        # dict and set interior, including unpacking
-        if not prev:
-            return NO
-
-        if prev.type == token.DOUBLESTAR:
+        # dict unpacking
+        if prev and prev.type == token.DOUBLESTAR:
              return NO
  
      elif p.type in {syms.factor, syms.star_expr}:
@@ -1543,17 +1609,6 @@ def is_split_before_delimiter(leaf: Leaf, previous: Leaf = None) -> int:
      return 0
  
  
-def is_delimiter(leaf: Leaf, previous: Leaf = None) -> int:
-    """Return the priority of the `leaf` delimiter. Return 0 if not delimiter.
-
-    Higher numbers are higher priority.
-    """
-    return max(
-        is_split_before_delimiter(leaf, previous),
-        is_split_after_delimiter(leaf, previous),
-    )
-
-
  def generate_comments(leaf: Leaf) -> Iterator[Leaf]:
      """Clean the prefix of the `leaf` and generate comments from it, if any.
  
@@ -2112,6 +2167,11 @@ def is_vararg(leaf: Leaf, within: Set[NodeType]) -> bool:
  
  
  def max_delimiter_priority_in_atom(node: LN) -> int:
+    """Return maximum delimiter priority inside `node`.
+
+    This is specific to atoms with contents contained in a pair of parentheses.
+    If `node` isn't an atom or there are no enclosing parentheses, returns 0.
+    """
      if node.type != syms.atom:
          return 0
  
@@ -2165,7 +2225,7 @@ def is_python36(node: Node) -> bool:
              and n.children[-1].type == token.COMMA
          ):
              for ch in n.children:
-                if ch.type == token.STAR or ch.type == token.DOUBLESTAR:
+                if ch.type in STARS:
                      return True
  
      return False
@@ -2201,16 +2261,20 @@ class Report:
      same_count: int = 0
      failure_count: int = 0
  
-    def done(self, src: Path, changed: bool) -> None:
+    def done(self, src: Path, changed: Changed) -> None:
          """Increment the counter for successful reformatting. Write out a message."""
-        if changed:
+        if changed is Changed.YES:
              reformatted = "would reformat" if self.check else "reformatted"
              if not self.quiet:
                  out(f"{reformatted} {src}")
              self.change_count += 1
          else:
              if not self.quiet:
-                out(f"{src} already well formatted, good job.", bold=False)
+                if changed is Changed.NO:
+                    msg = f"{src} already well formatted, good job."
+                else:
+                    msg = f"{src} wasn't modified on disk since last run."
+                out(msg, bold=False)
              self.same_count += 1
  
      def failed(self, src: Path, message: str) -> None:
@@ -2409,5 +2473,62 @@ def sub_twice(regex: Pattern[str], replacement: str, original: str) -> str:
      return regex.sub(replacement, regex.sub(replacement, original))
  
  
+CACHE_DIR = Path(user_cache_dir("black", version=__version__))
+CACHE_FILE = CACHE_DIR / "cache.pickle"
+
+
+def read_cache() -> Cache:
+    """Read the cache if it exists and is well formed.
+
+    If it is not well formed, the call to write_cache later should resolve the issue.
+    """
+    if not CACHE_FILE.exists():
+        return {}
+
+    with CACHE_FILE.open("rb") as fobj:
+        try:
+            cache: Cache = pickle.load(fobj)
+        except pickle.UnpicklingError:
+            return {}
+
+    return cache
+
+
+def get_cache_info(path: Path) -> CacheInfo:
+    """Return the information used to check if a file is already formatted or not."""
+    stat = path.stat()
+    return stat.st_mtime, stat.st_size
+
+
+def filter_cached(
+    cache: Cache, sources: Iterable[Path]
+) -> Tuple[List[Path], List[Path]]:
+    """Split a list of paths into two.
+
+    The first list contains paths of files that modified on disk or are not in the
+    cache. The other list contains paths to non-modified files.
+    """
+    todo, done = [], []
+    for src in sources:
+        src = src.resolve()
+        if cache.get(src) != get_cache_info(src):
+            todo.append(src)
+        else:
+            done.append(src)
+    return todo, done
+
+
+def write_cache(cache: Cache, sources: List[Path]) -> None:
+    """Update the cache file."""
+    try:
+        if not CACHE_DIR.exists():
+            CACHE_DIR.mkdir(parents=True)
+        new_cache = {**cache, **{src.resolve(): get_cache_info(src) for src in sources}}
+        with CACHE_FILE.open("wb") as fobj:
+            pickle.dump(new_cache, fobj, protocol=pickle.HIGHEST_PROTOCOL)
+    except OSError:
+        pass
+
+
  if __name__ == "__main__":
      main()