]> git.madduck.net Git - etc/vim.git/blobdiff - black.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

Add AppVeyor for Windows builds
[etc/vim.git] / black.py
index 7c487f6e938c1b8113674b10bc91877da0546aa3..58f7976aa60bc72b0ceb72b5a160a6107bf6637b 100644 (file)
--- a/black.py
+++ b/black.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python3
 
 import asyncio
 #!/usr/bin/env python3
 
 import asyncio
+import pickle
 from asyncio.base_events import BaseEventLoop
 from concurrent.futures import Executor, ProcessPoolExecutor
 from enum import Enum
 from asyncio.base_events import BaseEventLoop
 from concurrent.futures import Executor, ProcessPoolExecutor
 from enum import Enum
@@ -32,6 +33,7 @@ from typing import (
     Union,
 )
 
     Union,
 )
 
+from appdirs import user_cache_dir
 from attr import dataclass, Factory
 import click
 
 from attr import dataclass, Factory
 import click
 
@@ -54,6 +56,10 @@ Priority = int
 Index = int
 LN = Union[Leaf, Node]
 SplitFunc = Callable[["Line", bool], Iterator["Line"]]
 Index = int
 LN = Union[Leaf, Node]
 SplitFunc = Callable[["Line", bool], Iterator["Line"]]
+Timestamp = float
+FileSize = int
+CacheInfo = Tuple[Timestamp, FileSize]
+Cache = Dict[Path, CacheInfo]
 out = partial(click.secho, bold=True, err=True)
 err = partial(click.secho, fg="red", err=True)
 
 out = partial(click.secho, bold=True, err=True)
 err = partial(click.secho, fg="red", err=True)
 
@@ -104,6 +110,12 @@ class WriteBack(Enum):
     DIFF = 2
 
 
     DIFF = 2
 
 
+class Changed(Enum):
+    NO = 0
+    CACHED = 1
+    YES = 2
+
+
 @click.command()
 @click.option(
     "-l",
 @click.command()
 @click.option(
     "-l",
@@ -185,22 +197,10 @@ def main(
         write_back = WriteBack.YES
     if len(sources) == 0:
         ctx.exit(0)
         write_back = WriteBack.YES
     if len(sources) == 0:
         ctx.exit(0)
+        return
+
     elif len(sources) == 1:
     elif len(sources) == 1:
-        p = sources[0]
-        report = Report(check=check, quiet=quiet)
-        try:
-            if not p.is_file() and str(p) == "-":
-                changed = format_stdin_to_stdout(
-                    line_length=line_length, fast=fast, write_back=write_back
-                )
-            else:
-                changed = format_file_in_place(
-                    p, line_length=line_length, fast=fast, write_back=write_back
-                )
-            report.done(p, changed)
-        except Exception as exc:
-            report.failed(p, str(exc))
-        ctx.exit(report.return_code)
+        return_code = reformat_one(sources[0], line_length, fast, quiet, write_back)
     else:
         loop = asyncio.get_event_loop()
         executor = ProcessPoolExecutor(max_workers=os.cpu_count())
     else:
         loop = asyncio.get_event_loop()
         executor = ProcessPoolExecutor(max_workers=os.cpu_count())
@@ -213,7 +213,45 @@ def main(
             )
         finally:
             shutdown(loop)
             )
         finally:
             shutdown(loop)
-            ctx.exit(return_code)
+    ctx.exit(return_code)
+
+
+def reformat_one(
+    src: Path, line_length: int, fast: bool, quiet: bool, write_back: WriteBack
+) -> int:
+    """Reformat a single file under `src` without spawning child processes.
+
+    If `quiet` is True, non-error messages are not output. `line_length`,
+    `write_back`, and `fast` options are passed to :func:`format_file_in_place`.
+    """
+    report = Report(check=write_back is WriteBack.NO, quiet=quiet)
+    try:
+        changed = Changed.NO
+        if not src.is_file() and str(src) == "-":
+            if format_stdin_to_stdout(
+                line_length=line_length, fast=fast, write_back=write_back
+            ):
+                changed = Changed.YES
+        else:
+            cache: Cache = {}
+            if write_back != WriteBack.DIFF:
+                cache = read_cache()
+                src = src.resolve()
+                if src in cache and cache[src] == get_cache_info(src):
+                    changed = Changed.CACHED
+            if (
+                changed is not Changed.CACHED
+                and format_file_in_place(
+                    src, line_length=line_length, fast=fast, write_back=write_back
+                )
+            ):
+                changed = Changed.YES
+            if write_back != WriteBack.DIFF and changed is not Changed.NO:
+                write_cache(cache, [src])
+        report.done(src, changed)
+    except Exception as exc:
+        report.failed(src, str(exc))
+    return report.return_code
 
 
 async def schedule_formatting(
 
 
 async def schedule_formatting(
@@ -232,41 +270,59 @@ async def schedule_formatting(
     `line_length`, `write_back`, and `fast` options are passed to
     :func:`format_file_in_place`.
     """
     `line_length`, `write_back`, and `fast` options are passed to
     :func:`format_file_in_place`.
     """
-    lock = None
-    if write_back == WriteBack.DIFF:
-        # For diff output, we need locks to ensure we don't interleave output
-        # from different processes.
-        manager = Manager()
-        lock = manager.Lock()
-    tasks = {
-        src: loop.run_in_executor(
-            executor, format_file_in_place, src, line_length, fast, write_back, lock
-        )
-        for src in sources
-    }
-    _task_values = list(tasks.values())
-    loop.add_signal_handler(signal.SIGINT, cancel, _task_values)
-    loop.add_signal_handler(signal.SIGTERM, cancel, _task_values)
-    await asyncio.wait(tasks.values())
-    cancelled = []
     report = Report(check=write_back is WriteBack.NO, quiet=quiet)
     report = Report(check=write_back is WriteBack.NO, quiet=quiet)
-    for src, task in tasks.items():
-        if not task.done():
-            report.failed(src, "timed out, cancelling")
-            task.cancel()
-            cancelled.append(task)
-        elif task.cancelled():
-            cancelled.append(task)
-        elif task.exception():
-            report.failed(src, str(task.exception()))
-        else:
-            report.done(src, task.result())
+    cache: Cache = {}
+    if write_back != WriteBack.DIFF:
+        cache = read_cache()
+        sources, cached = filter_cached(cache, sources)
+        for src in cached:
+            report.done(src, Changed.CACHED)
+    cancelled = []
+    formatted = []
+    if sources:
+        lock = None
+        if write_back == WriteBack.DIFF:
+            # For diff output, we need locks to ensure we don't interleave output
+            # from different processes.
+            manager = Manager()
+            lock = manager.Lock()
+        tasks = {
+            src: loop.run_in_executor(
+                executor, format_file_in_place, src, line_length, fast, write_back, lock
+            )
+            for src in sources
+        }
+        _task_values = list(tasks.values())
+        try:
+            loop.add_signal_handler(signal.SIGINT, cancel, _task_values)
+            loop.add_signal_handler(signal.SIGTERM, cancel, _task_values)
+        except NotImplementedError:
+            # There are no good alternatives for these on Windows
+            pass
+        await asyncio.wait(_task_values)
+        for src, task in tasks.items():
+            if not task.done():
+                report.failed(src, "timed out, cancelling")
+                task.cancel()
+                cancelled.append(task)
+            elif task.cancelled():
+                cancelled.append(task)
+            elif task.exception():
+                report.failed(src, str(task.exception()))
+            else:
+                formatted.append(src)
+                report.done(src, Changed.YES if task.result() else Changed.NO)
+
     if cancelled:
         await asyncio.gather(*cancelled, loop=loop, return_exceptions=True)
     elif not quiet:
         out("All done! ✨ 🍰 ✨")
     if not quiet:
         click.echo(str(report))
     if cancelled:
         await asyncio.gather(*cancelled, loop=loop, return_exceptions=True)
     elif not quiet:
         out("All done! ✨ 🍰 ✨")
     if not quiet:
         click.echo(str(report))
+
+    if write_back != WriteBack.DIFF and formatted:
+        write_cache(cache, formatted)
+
     return report.return_code
 
 
     return report.return_code
 
 
@@ -282,6 +338,7 @@ def format_file_in_place(
     If `write_back` is True, write reformatted code back to stdout.
     `line_length` and `fast` options are passed to :func:`format_file_contents`.
     """
     If `write_back` is True, write reformatted code back to stdout.
     `line_length` and `fast` options are passed to :func:`format_file_contents`.
     """
+
     with tokenize.open(src) as src_buffer:
         src_contents = src_buffer.read()
     try:
     with tokenize.open(src) as src_buffer:
         src_contents = src_buffer.read()
     try:
@@ -552,6 +609,8 @@ class BracketTracker:
     bracket_match: Dict[Tuple[Depth, NodeType], Leaf] = Factory(dict)
     delimiters: Dict[LeafID, Priority] = Factory(dict)
     previous: Optional[Leaf] = None
     bracket_match: Dict[Tuple[Depth, NodeType], Leaf] = Factory(dict)
     delimiters: Dict[LeafID, Priority] = Factory(dict)
     previous: Optional[Leaf] = None
+    _for_loop_variable: bool = False
+    _lambda_arguments: bool = False
 
     def mark(self, leaf: Leaf) -> None:
         """Mark `leaf` with bracket-related metadata. Keep track of delimiters.
 
     def mark(self, leaf: Leaf) -> None:
         """Mark `leaf` with bracket-related metadata. Keep track of delimiters.
@@ -571,6 +630,8 @@ class BracketTracker:
         if leaf.type == token.COMMENT:
             return
 
         if leaf.type == token.COMMENT:
             return
 
+        self.maybe_decrement_after_for_loop_variable(leaf)
+        self.maybe_decrement_after_lambda_arguments(leaf)
         if leaf.type in CLOSING_BRACKETS:
             self.depth -= 1
             opening_bracket = self.bracket_match.pop((self.depth, leaf.type))
         if leaf.type in CLOSING_BRACKETS:
             self.depth -= 1
             opening_bracket = self.bracket_match.pop((self.depth, leaf.type))
@@ -588,6 +649,8 @@ class BracketTracker:
             self.bracket_match[self.depth, BRACKET[leaf.type]] = leaf
             self.depth += 1
         self.previous = leaf
             self.bracket_match[self.depth, BRACKET[leaf.type]] = leaf
             self.depth += 1
         self.previous = leaf
+        self.maybe_increment_lambda_arguments(leaf)
+        self.maybe_increment_for_loop_variable(leaf)
 
     def any_open_brackets(self) -> bool:
         """Return True if there is an yet unmatched open bracket on the line."""
 
     def any_open_brackets(self) -> bool:
         """Return True if there is an yet unmatched open bracket on the line."""
@@ -596,11 +659,55 @@ class BracketTracker:
     def max_delimiter_priority(self, exclude: Iterable[LeafID] = ()) -> int:
         """Return the highest priority of a delimiter found on the line.
 
     def max_delimiter_priority(self, exclude: Iterable[LeafID] = ()) -> int:
         """Return the highest priority of a delimiter found on the line.
 
-        Values are consistent with what `is_delimiter()` returns.
+        Values are consistent with what `is_split_*_delimiter()` return.
         Raises ValueError on no delimiters.
         """
         return max(v for k, v in self.delimiters.items() if k not in exclude)
 
         Raises ValueError on no delimiters.
         """
         return max(v for k, v in self.delimiters.items() if k not in exclude)
 
+    def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool:
+        """In a for loop, or comprehension, the variables are often unpacks.
+
+        To avoid splitting on the comma in this situation, increase the depth of
+        tokens between `for` and `in`.
+        """
+        if leaf.type == token.NAME and leaf.value == "for":
+            self.depth += 1
+            self._for_loop_variable = True
+            return True
+
+        return False
+
+    def maybe_decrement_after_for_loop_variable(self, leaf: Leaf) -> bool:
+        """See `maybe_increment_for_loop_variable` above for explanation."""
+        if self._for_loop_variable and leaf.type == token.NAME and leaf.value == "in":
+            self.depth -= 1
+            self._for_loop_variable = False
+            return True
+
+        return False
+
+    def maybe_increment_lambda_arguments(self, leaf: Leaf) -> bool:
+        """In a lambda expression, there might be more than one argument.
+
+        To avoid splitting on the comma in this situation, increase the depth of
+        tokens between `lambda` and `:`.
+        """
+        if leaf.type == token.NAME and leaf.value == "lambda":
+            self.depth += 1
+            self._lambda_arguments = True
+            return True
+
+        return False
+
+    def maybe_decrement_after_lambda_arguments(self, leaf: Leaf) -> bool:
+        """See `maybe_increment_lambda_arguments` above for explanation."""
+        if self._lambda_arguments and leaf.type == token.COLON:
+            self.depth -= 1
+            self._lambda_arguments = False
+            return True
+
+        return False
+
 
 @dataclass
 class Line:
 
 @dataclass
 class Line:
@@ -611,8 +718,6 @@ class Line:
     comments: List[Tuple[Index, Leaf]] = Factory(list)
     bracket_tracker: BracketTracker = Factory(BracketTracker)
     inside_brackets: bool = False
     comments: List[Tuple[Index, Leaf]] = Factory(list)
     bracket_tracker: BracketTracker = Factory(BracketTracker)
     inside_brackets: bool = False
-    has_for: bool = False
-    _for_loop_variable: bool = False
 
     def append(self, leaf: Leaf, preformatted: bool = False) -> None:
         """Add a new `leaf` to the end of the line.
 
     def append(self, leaf: Leaf, preformatted: bool = False) -> None:
         """Add a new `leaf` to the end of the line.
@@ -633,10 +738,8 @@ class Line:
             # imports, for which we only preserve newlines.
             leaf.prefix += whitespace(leaf)
         if self.inside_brackets or not preformatted:
             # imports, for which we only preserve newlines.
             leaf.prefix += whitespace(leaf)
         if self.inside_brackets or not preformatted:
-            self.maybe_decrement_after_for_loop_variable(leaf)
             self.bracket_tracker.mark(leaf)
             self.maybe_remove_trailing_comma(leaf)
             self.bracket_tracker.mark(leaf)
             self.maybe_remove_trailing_comma(leaf)
-            self.maybe_increment_for_loop_variable(leaf)
 
         if not self.append_comment(leaf):
             self.leaves.append(leaf)
 
         if not self.append_comment(leaf):
             self.leaves.append(leaf)
@@ -783,29 +886,6 @@ class Line:
 
         return False
 
 
         return False
 
-    def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool:
-        """In a for loop, or comprehension, the variables are often unpacks.
-
-        To avoid splitting on the comma in this situation, increase the depth of
-        tokens between `for` and `in`.
-        """
-        if leaf.type == token.NAME and leaf.value == "for":
-            self.has_for = True
-            self.bracket_tracker.depth += 1
-            self._for_loop_variable = True
-            return True
-
-        return False
-
-    def maybe_decrement_after_for_loop_variable(self, leaf: Leaf) -> bool:
-        """See `maybe_increment_for_loop_variable` above for explanation."""
-        if self._for_loop_variable and leaf.type == token.NAME and leaf.value == "in":
-            self.bracket_tracker.depth -= 1
-            self._for_loop_variable = False
-            return True
-
-        return False
-
     def append_comment(self, comment: Leaf) -> bool:
         """Add an inline or standalone comment to the line."""
         if (
     def append_comment(self, comment: Leaf) -> bool:
         """Add an inline or standalone comment to the line."""
         if (
@@ -1299,17 +1379,11 @@ def whitespace(leaf: Leaf) -> str:  # noqa C901
 
     if p.type in {syms.parameters, syms.arglist}:
         # untyped function signatures or calls
 
     if p.type in {syms.parameters, syms.arglist}:
         # untyped function signatures or calls
-        if t == token.RPAR:
-            return NO
-
         if not prev or prev.type != token.COMMA:
             return NO
 
     elif p.type == syms.varargslist:
         # lambdas
         if not prev or prev.type != token.COMMA:
             return NO
 
     elif p.type == syms.varargslist:
         # lambdas
-        if t == token.RPAR:
-            return NO
-
         if prev and prev.type != token.COMMA:
             return NO
 
         if prev and prev.type != token.COMMA:
             return NO
 
@@ -1364,7 +1438,7 @@ def whitespace(leaf: Leaf) -> str:  # noqa C901
             if not prevp or prevp.type == token.LPAR:
                 return NO
 
             if not prevp or prevp.type == token.LPAR:
                 return NO
 
-        elif prev.type in {token.EQUAL, token.STAR, token.DOUBLESTAR}:
+        elif prev.type in {token.EQUAL} | STARS:
             return NO
 
     elif p.type == syms.decorator:
             return NO
 
     elif p.type == syms.decorator:
@@ -1403,21 +1477,9 @@ def whitespace(leaf: Leaf) -> str:  # noqa C901
             # dots, but not the first one.
             return NO
 
             # dots, but not the first one.
             return NO
 
-    elif (
-        p.type == syms.listmaker
-        or p.type == syms.testlist_gexp
-        or p.type == syms.subscriptlist
-    ):
-        # list interior, including unpacking
-        if not prev:
-            return NO
-
     elif p.type == syms.dictsetmaker:
     elif p.type == syms.dictsetmaker:
-        # dict and set interior, including unpacking
-        if not prev:
-            return NO
-
-        if prev.type == token.DOUBLESTAR:
+        # dict unpacking
+        if prev and prev.type == token.DOUBLESTAR:
             return NO
 
     elif p.type in {syms.factor, syms.star_expr}:
             return NO
 
     elif p.type in {syms.factor, syms.star_expr}:
@@ -1543,17 +1605,6 @@ def is_split_before_delimiter(leaf: Leaf, previous: Leaf = None) -> int:
     return 0
 
 
     return 0
 
 
-def is_delimiter(leaf: Leaf, previous: Leaf = None) -> int:
-    """Return the priority of the `leaf` delimiter. Return 0 if not delimiter.
-
-    Higher numbers are higher priority.
-    """
-    return max(
-        is_split_before_delimiter(leaf, previous),
-        is_split_after_delimiter(leaf, previous),
-    )
-
-
 def generate_comments(leaf: Leaf) -> Iterator[Leaf]:
     """Clean the prefix of the `leaf` and generate comments from it, if any.
 
 def generate_comments(leaf: Leaf) -> Iterator[Leaf]:
     """Clean the prefix of the `leaf` and generate comments from it, if any.
 
@@ -2112,6 +2163,11 @@ def is_vararg(leaf: Leaf, within: Set[NodeType]) -> bool:
 
 
 def max_delimiter_priority_in_atom(node: LN) -> int:
 
 
 def max_delimiter_priority_in_atom(node: LN) -> int:
+    """Return maximum delimiter priority inside `node`.
+
+    This is specific to atoms with contents contained in a pair of parentheses.
+    If `node` isn't an atom or there are no enclosing parentheses, returns 0.
+    """
     if node.type != syms.atom:
         return 0
 
     if node.type != syms.atom:
         return 0
 
@@ -2165,7 +2221,7 @@ def is_python36(node: Node) -> bool:
             and n.children[-1].type == token.COMMA
         ):
             for ch in n.children:
             and n.children[-1].type == token.COMMA
         ):
             for ch in n.children:
-                if ch.type == token.STAR or ch.type == token.DOUBLESTAR:
+                if ch.type in STARS:
                     return True
 
     return False
                     return True
 
     return False
@@ -2201,16 +2257,20 @@ class Report:
     same_count: int = 0
     failure_count: int = 0
 
     same_count: int = 0
     failure_count: int = 0
 
-    def done(self, src: Path, changed: bool) -> None:
+    def done(self, src: Path, changed: Changed) -> None:
         """Increment the counter for successful reformatting. Write out a message."""
         """Increment the counter for successful reformatting. Write out a message."""
-        if changed:
+        if changed is Changed.YES:
             reformatted = "would reformat" if self.check else "reformatted"
             if not self.quiet:
                 out(f"{reformatted} {src}")
             self.change_count += 1
         else:
             if not self.quiet:
             reformatted = "would reformat" if self.check else "reformatted"
             if not self.quiet:
                 out(f"{reformatted} {src}")
             self.change_count += 1
         else:
             if not self.quiet:
-                out(f"{src} already well formatted, good job.", bold=False)
+                if changed is Changed.NO:
+                    msg = f"{src} already well formatted, good job."
+                else:
+                    msg = f"{src} wasn't modified on disk since last run."
+                out(msg, bold=False)
             self.same_count += 1
 
     def failed(self, src: Path, message: str) -> None:
             self.same_count += 1
 
     def failed(self, src: Path, message: str) -> None:
@@ -2409,5 +2469,62 @@ def sub_twice(regex: Pattern[str], replacement: str, original: str) -> str:
     return regex.sub(replacement, regex.sub(replacement, original))
 
 
     return regex.sub(replacement, regex.sub(replacement, original))
 
 
+CACHE_DIR = Path(user_cache_dir("black", version=__version__))
+CACHE_FILE = CACHE_DIR / "cache.pickle"
+
+
+def read_cache() -> Cache:
+    """Read the cache if it exists and is well formed.
+
+    If it is not well formed, the call to write_cache later should resolve the issue.
+    """
+    if not CACHE_FILE.exists():
+        return {}
+
+    with CACHE_FILE.open("rb") as fobj:
+        try:
+            cache: Cache = pickle.load(fobj)
+        except pickle.UnpicklingError:
+            return {}
+
+    return cache
+
+
+def get_cache_info(path: Path) -> CacheInfo:
+    """Return the information used to check if a file is already formatted or not."""
+    stat = path.stat()
+    return stat.st_mtime, stat.st_size
+
+
+def filter_cached(
+    cache: Cache, sources: Iterable[Path]
+) -> Tuple[List[Path], List[Path]]:
+    """Split a list of paths into two.
+
+    The first list contains paths of files that modified on disk or are not in the
+    cache. The other list contains paths to non-modified files.
+    """
+    todo, done = [], []
+    for src in sources:
+        src = src.resolve()
+        if cache.get(src) != get_cache_info(src):
+            todo.append(src)
+        else:
+            done.append(src)
+    return todo, done
+
+
+def write_cache(cache: Cache, sources: List[Path]) -> None:
+    """Update the cache file."""
+    try:
+        if not CACHE_DIR.exists():
+            CACHE_DIR.mkdir(parents=True)
+        new_cache = {**cache, **{src.resolve(): get_cache_info(src) for src in sources}}
+        with CACHE_FILE.open("wb") as fobj:
+            pickle.dump(new_cache, fobj, protocol=pickle.HIGHEST_PROTOCOL)
+    except OSError:
+        pass
+
+
 if __name__ == "__main__":
     main()
 if __name__ == "__main__":
     main()