X-Git-Url: https://git.madduck.net/etc/vim.git/blobdiff_plain/cf8e998f46e89da39fa64029cc4550e2862f7ec2..c86fb362329a2b980ed94200e0652f9c74d56bbd:/black.py?ds=inline diff --git a/black.py b/black.py index 9581d63..da645a1 100644 --- a/black.py +++ b/black.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import asyncio +import pickle from asyncio.base_events import BaseEventLoop from concurrent.futures import Executor, ProcessPoolExecutor from enum import Enum @@ -24,6 +25,7 @@ from typing import ( Iterator, List, Optional, + Pattern, Set, Tuple, Type, @@ -31,6 +33,7 @@ from typing import ( Union, ) +from appdirs import user_cache_dir from attr import dataclass, Factory import click @@ -40,7 +43,7 @@ from blib2to3 import pygram, pytree from blib2to3.pgen2 import driver, token from blib2to3.pgen2.parse import ParseError -__version__ = "18.4a0" +__version__ = "18.4a2" DEFAULT_LINE_LENGTH = 88 # types syms = pygram.python_symbols @@ -53,6 +56,10 @@ Priority = int Index = int LN = Union[Leaf, Node] SplitFunc = Callable[["Line", bool], Iterator["Line"]] +Timestamp = float +FileSize = int +CacheInfo = Tuple[Timestamp, FileSize] +Cache = Dict[Path, CacheInfo] out = partial(click.secho, bold=True, err=True) err = partial(click.secho, fg="red", err=True) @@ -103,6 +110,12 @@ class WriteBack(Enum): DIFF = 2 +class Changed(Enum): + NO = 0 + CACHED = 1 + YES = 2 + + @click.command() @click.option( "-l", @@ -171,59 +184,82 @@ def main( sources.append(Path("-")) else: err(f"invalid path: {s}") - if check and diff: - exc = click.ClickException("Options --check and --diff are mutually exclusive") - exc.exit_code = 2 - raise exc - if check: + if check and not diff: write_back = WriteBack.NO elif diff: write_back = WriteBack.DIFF else: write_back = WriteBack.YES + report = Report(check=check, quiet=quiet) if len(sources) == 0: ctx.exit(0) + return + elif len(sources) == 1: - p = sources[0] - report = Report(check=check, quiet=quiet) - try: - if not p.is_file() and str(p) == "-": - changed = format_stdin_to_stdout( - line_length=line_length, fast=fast, write_back=write_back - ) - else: - changed = format_file_in_place( - p, line_length=line_length, fast=fast, write_back=write_back - ) - report.done(p, changed) - except Exception as exc: - report.failed(p, str(exc)) - ctx.exit(report.return_code) + reformat_one(sources[0], line_length, fast, write_back, report) else: loop = asyncio.get_event_loop() executor = ProcessPoolExecutor(max_workers=os.cpu_count()) - return_code = 1 try: - return_code = loop.run_until_complete( + loop.run_until_complete( schedule_formatting( - sources, line_length, write_back, fast, quiet, loop, executor + sources, line_length, fast, write_back, report, loop, executor ) ) finally: shutdown(loop) - ctx.exit(return_code) + if not quiet: + out("All done! ✨ 🍰 ✨") + click.echo(str(report)) + ctx.exit(report.return_code) + + +def reformat_one( + src: Path, line_length: int, fast: bool, write_back: WriteBack, report: "Report" +) -> None: + """Reformat a single file under `src` without spawning child processes. + + If `quiet` is True, non-error messages are not output. `line_length`, + `write_back`, and `fast` options are passed to :func:`format_file_in_place`. + """ + try: + changed = Changed.NO + if not src.is_file() and str(src) == "-": + if format_stdin_to_stdout( + line_length=line_length, fast=fast, write_back=write_back + ): + changed = Changed.YES + else: + cache: Cache = {} + if write_back != WriteBack.DIFF: + cache = read_cache() + src = src.resolve() + if src in cache and cache[src] == get_cache_info(src): + changed = Changed.CACHED + if ( + changed is not Changed.CACHED + and format_file_in_place( + src, line_length=line_length, fast=fast, write_back=write_back + ) + ): + changed = Changed.YES + if write_back != WriteBack.DIFF and changed is not Changed.NO: + write_cache(cache, [src]) + report.done(src, changed) + except Exception as exc: + report.failed(src, str(exc)) async def schedule_formatting( sources: List[Path], line_length: int, - write_back: WriteBack, fast: bool, - quiet: bool, + write_back: WriteBack, + report: "Report", loop: BaseEventLoop, executor: Executor, -) -> int: +) -> None: """Run formatting of `sources` in parallel using the provided `executor`. (Use ProcessPoolExecutors for actual parallelism.) @@ -231,42 +267,52 @@ async def schedule_formatting( `line_length`, `write_back`, and `fast` options are passed to :func:`format_file_in_place`. """ - lock = None - if write_back == WriteBack.DIFF: - # For diff output, we need locks to ensure we don't interleave output - # from different processes. - manager = Manager() - lock = manager.Lock() - tasks = { - src: loop.run_in_executor( - executor, format_file_in_place, src, line_length, fast, write_back, lock - ) - for src in sources - } - _task_values = list(tasks.values()) - loop.add_signal_handler(signal.SIGINT, cancel, _task_values) - loop.add_signal_handler(signal.SIGTERM, cancel, _task_values) - await asyncio.wait(tasks.values()) + cache: Cache = {} + if write_back != WriteBack.DIFF: + cache = read_cache() + sources, cached = filter_cached(cache, sources) + for src in cached: + report.done(src, Changed.CACHED) cancelled = [] - report = Report(check=write_back is WriteBack.NO, quiet=quiet) - for src, task in tasks.items(): - if not task.done(): - report.failed(src, "timed out, cancelling") - task.cancel() - cancelled.append(task) - elif task.cancelled(): - cancelled.append(task) - elif task.exception(): - report.failed(src, str(task.exception())) - else: - report.done(src, task.result()) + formatted = [] + if sources: + lock = None + if write_back == WriteBack.DIFF: + # For diff output, we need locks to ensure we don't interleave output + # from different processes. + manager = Manager() + lock = manager.Lock() + tasks = { + src: loop.run_in_executor( + executor, format_file_in_place, src, line_length, fast, write_back, lock + ) + for src in sources + } + _task_values = list(tasks.values()) + try: + loop.add_signal_handler(signal.SIGINT, cancel, _task_values) + loop.add_signal_handler(signal.SIGTERM, cancel, _task_values) + except NotImplementedError: + # There are no good alternatives for these on Windows + pass + await asyncio.wait(_task_values) + for src, task in tasks.items(): + if not task.done(): + report.failed(src, "timed out, cancelling") + task.cancel() + cancelled.append(task) + elif task.cancelled(): + cancelled.append(task) + elif task.exception(): + report.failed(src, str(task.exception())) + else: + formatted.append(src) + report.done(src, Changed.YES if task.result() else Changed.NO) + if cancelled: await asyncio.gather(*cancelled, loop=loop, return_exceptions=True) - elif not quiet: - out("All done! ✨ 🍰 ✨") - if not quiet: - click.echo(str(report)) - return report.return_code + if write_back != WriteBack.DIFF and formatted: + write_cache(cache, formatted) def format_file_in_place( @@ -281,6 +327,7 @@ def format_file_in_place( If `write_back` is True, write reformatted code back to stdout. `line_length` and `fast` options are passed to :func:`format_file_contents`. """ + with tokenize.open(src) as src_buffer: src_contents = src_buffer.read() try: @@ -294,8 +341,8 @@ def format_file_in_place( with open(src, "w", encoding=src_buffer.encoding) as f: f.write(dst_contents) elif write_back == write_back.DIFF: - src_name = f"{src.name} (original)" - dst_name = f"{src.name} (formatted)" + src_name = f"{src} (original)" + dst_name = f"{src} (formatted)" diff_contents = diff(src_contents, dst_contents, src_name, dst_name) if lock: lock.acquire() @@ -381,7 +428,6 @@ def format_str(src_contents: str, line_length: int) -> FileContent: GRAMMARS = [ pygram.python_grammar_no_print_statement_no_exec_statement, pygram.python_grammar_no_print_statement, - pygram.python_grammar_no_exec_statement, pygram.python_grammar, ] @@ -521,7 +567,20 @@ MATH_OPERATORS = { token.DOUBLESTAR, token.DOUBLESLASH, } -VARARGS = {token.STAR, token.DOUBLESTAR} +STARS = {token.STAR, token.DOUBLESTAR} +VARARGS_PARENTS = { + syms.arglist, + syms.argument, # double star in arglist + syms.trailer, # single argument to call + syms.typedargslist, + syms.varargslist, # lambdas +} +UNPACKING_PARENTS = { + syms.atom, # single element of a list or set literal + syms.dictsetmaker, + syms.listmaker, + syms.testlist_gexp, +} COMPREHENSION_PRIORITY = 20 COMMA_PRIORITY = 10 LOGIC_PRIORITY = 5 @@ -538,6 +597,8 @@ class BracketTracker: bracket_match: Dict[Tuple[Depth, NodeType], Leaf] = Factory(dict) delimiters: Dict[LeafID, Priority] = Factory(dict) previous: Optional[Leaf] = None + _for_loop_variable: bool = False + _lambda_arguments: bool = False def mark(self, leaf: Leaf) -> None: """Mark `leaf` with bracket-related metadata. Keep track of delimiters. @@ -557,6 +618,8 @@ class BracketTracker: if leaf.type == token.COMMENT: return + self.maybe_decrement_after_for_loop_variable(leaf) + self.maybe_decrement_after_lambda_arguments(leaf) if leaf.type in CLOSING_BRACKETS: self.depth -= 1 opening_bracket = self.bracket_match.pop((self.depth, leaf.type)) @@ -574,6 +637,8 @@ class BracketTracker: self.bracket_match[self.depth, BRACKET[leaf.type]] = leaf self.depth += 1 self.previous = leaf + self.maybe_increment_lambda_arguments(leaf) + self.maybe_increment_for_loop_variable(leaf) def any_open_brackets(self) -> bool: """Return True if there is an yet unmatched open bracket on the line.""" @@ -582,11 +647,55 @@ class BracketTracker: def max_delimiter_priority(self, exclude: Iterable[LeafID] = ()) -> int: """Return the highest priority of a delimiter found on the line. - Values are consistent with what `is_delimiter()` returns. + Values are consistent with what `is_split_*_delimiter()` return. Raises ValueError on no delimiters. """ return max(v for k, v in self.delimiters.items() if k not in exclude) + def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool: + """In a for loop, or comprehension, the variables are often unpacks. + + To avoid splitting on the comma in this situation, increase the depth of + tokens between `for` and `in`. + """ + if leaf.type == token.NAME and leaf.value == "for": + self.depth += 1 + self._for_loop_variable = True + return True + + return False + + def maybe_decrement_after_for_loop_variable(self, leaf: Leaf) -> bool: + """See `maybe_increment_for_loop_variable` above for explanation.""" + if self._for_loop_variable and leaf.type == token.NAME and leaf.value == "in": + self.depth -= 1 + self._for_loop_variable = False + return True + + return False + + def maybe_increment_lambda_arguments(self, leaf: Leaf) -> bool: + """In a lambda expression, there might be more than one argument. + + To avoid splitting on the comma in this situation, increase the depth of + tokens between `lambda` and `:`. + """ + if leaf.type == token.NAME and leaf.value == "lambda": + self.depth += 1 + self._lambda_arguments = True + return True + + return False + + def maybe_decrement_after_lambda_arguments(self, leaf: Leaf) -> bool: + """See `maybe_increment_lambda_arguments` above for explanation.""" + if self._lambda_arguments and leaf.type == token.COLON: + self.depth -= 1 + self._lambda_arguments = False + return True + + return False + @dataclass class Line: @@ -597,8 +706,6 @@ class Line: comments: List[Tuple[Index, Leaf]] = Factory(list) bracket_tracker: BracketTracker = Factory(BracketTracker) inside_brackets: bool = False - has_for: bool = False - _for_loop_variable: bool = False def append(self, leaf: Leaf, preformatted: bool = False) -> None: """Add a new `leaf` to the end of the line. @@ -619,10 +726,8 @@ class Line: # imports, for which we only preserve newlines. leaf.prefix += whitespace(leaf) if self.inside_brackets or not preformatted: - self.maybe_decrement_after_for_loop_variable(leaf) self.bracket_tracker.mark(leaf) self.maybe_remove_trailing_comma(leaf) - self.maybe_increment_for_loop_variable(leaf) if not self.append_comment(leaf): self.leaves.append(leaf) @@ -769,29 +874,6 @@ class Line: return False - def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool: - """In a for loop, or comprehension, the variables are often unpacks. - - To avoid splitting on the comma in this situation, increase the depth of - tokens between `for` and `in`. - """ - if leaf.type == token.NAME and leaf.value == "for": - self.has_for = True - self.bracket_tracker.depth += 1 - self._for_loop_variable = True - return True - - return False - - def maybe_decrement_after_for_loop_variable(self, leaf: Leaf) -> bool: - """See `maybe_increment_for_loop_variable` above for explanation.""" - if self._for_loop_variable and leaf.type == token.NAME and leaf.value == "in": - self.bracket_tracker.depth -= 1 - self._for_loop_variable = False - return True - - return False - def append_comment(self, comment: Leaf) -> bool: """Add an inline or standalone comment to the line.""" if ( @@ -1254,18 +1336,8 @@ def whitespace(leaf: Leaf) -> str: # noqa C901 # that, too. return prevp.prefix - elif prevp.type == token.DOUBLESTAR: - if ( - prevp.parent - and prevp.parent.type in { - syms.arglist, - syms.argument, - syms.dictsetmaker, - syms.parameters, - syms.typedargslist, - syms.varargslist, - } - ): + elif prevp.type in STARS: + if is_vararg(prevp, within=VARARGS_PARENTS | UNPACKING_PARENTS): return NO elif prevp.type == token.COLON: @@ -1274,7 +1346,7 @@ def whitespace(leaf: Leaf) -> str: # noqa C901 elif ( prevp.parent - and prevp.parent.type in {syms.factor, syms.star_expr} + and prevp.parent.type == syms.factor and prevp.type in MATH_OPERATORS ): return NO @@ -1295,17 +1367,11 @@ def whitespace(leaf: Leaf) -> str: # noqa C901 if p.type in {syms.parameters, syms.arglist}: # untyped function signatures or calls - if t == token.RPAR: - return NO - if not prev or prev.type != token.COMMA: return NO elif p.type == syms.varargslist: # lambdas - if t == token.RPAR: - return NO - if prev and prev.type != token.COMMA: return NO @@ -1360,7 +1426,7 @@ def whitespace(leaf: Leaf) -> str: # noqa C901 if not prevp or prevp.type == token.LPAR: return NO - elif prev.type == token.EQUAL or prev.type == token.DOUBLESTAR: + elif prev.type in {token.EQUAL} | STARS: return NO elif p.type == syms.decorator: @@ -1399,21 +1465,9 @@ def whitespace(leaf: Leaf) -> str: # noqa C901 # dots, but not the first one. return NO - elif ( - p.type == syms.listmaker - or p.type == syms.testlist_gexp - or p.type == syms.subscriptlist - ): - # list interior, including unpacking - if not prev: - return NO - elif p.type == syms.dictsetmaker: - # dict and set interior, including unpacking - if not prev: - return NO - - if prev.type == token.DOUBLESTAR: + # dict unpacking + if prev and prev.type == token.DOUBLESTAR: return NO elif p.type in {syms.factor, syms.star_expr}: @@ -1495,11 +1549,7 @@ def is_split_before_delimiter(leaf: Leaf, previous: Leaf = None) -> int: Higher numbers are higher priority. """ - if ( - leaf.type in VARARGS - and leaf.parent - and leaf.parent.type in {syms.argument, syms.typedargslist} - ): + if is_vararg(leaf, within=VARARGS_PARENTS | UNPACKING_PARENTS): # * and ** might also be MATH_OPERATORS but in this case they are not. # Don't treat them as a delimiter. return 0 @@ -1543,17 +1593,6 @@ def is_split_before_delimiter(leaf: Leaf, previous: Leaf = None) -> int: return 0 -def is_delimiter(leaf: Leaf, previous: Leaf = None) -> int: - """Return the priority of the `leaf` delimiter. Return 0 if not delimiter. - - Higher numbers are higher priority. - """ - return max( - is_split_before_delimiter(leaf, previous), - is_split_after_delimiter(leaf, previous), - ) - - def generate_comments(leaf: Leaf) -> Iterator[Leaf]: """Clean the prefix of the `leaf` and generate comments from it, if any. @@ -1877,8 +1916,7 @@ def delimiter_split(line: Line, py36: bool = False) -> Iterator[Line]: lowest_depth = min(lowest_depth, leaf.bracket_depth) if ( leaf.bracket_depth == lowest_depth - and leaf.type == token.STAR - or leaf.type == token.DOUBLESTAR + and is_vararg(leaf, within=VARARGS_PARENTS) ): trailing_comma_safe = trailing_comma_safe and py36 leaf_priority = delimiters.get(id(leaf)) @@ -1984,9 +2022,10 @@ def normalize_string_quotes(leaf: Leaf) -> None: return # There's an internal error prefix = leaf.value[:first_quote_pos] - body = leaf.value[first_quote_pos + len(orig_quote):-len(orig_quote)] unescaped_new_quote = re.compile(rf"(([^\\]|^)(\\\\)*){new_quote}") - escaped_orig_quote = re.compile(rf"\\(\\\\)*{orig_quote}") + escaped_new_quote = re.compile(rf"([^\\]|^)\\(\\\\)*{new_quote}") + escaped_orig_quote = re.compile(rf"([^\\]|^)\\(\\\\)*{orig_quote}") + body = leaf.value[first_quote_pos + len(orig_quote):-len(orig_quote)] if "r" in prefix.casefold(): if unescaped_new_quote.search(body): # There's at least one unescaped new_quote in this raw string @@ -1996,11 +2035,14 @@ def normalize_string_quotes(leaf: Leaf) -> None: # Do not introduce or remove backslashes in raw strings new_body = body else: - new_body = escaped_orig_quote.sub(rf"\1{orig_quote}", body) - new_body = unescaped_new_quote.sub(rf"\1\\{new_quote}", new_body) - # Add escapes again for consecutive occurences of new_quote (sub - # doesn't match overlapping substrings). - new_body = unescaped_new_quote.sub(rf"\1\\{new_quote}", new_body) + # remove unnecessary quotes + new_body = sub_twice(escaped_new_quote, rf"\1\2{new_quote}", body) + if body != new_body: + # Consider the string without unnecessary quotes as the original + body = new_body + leaf.value = f"{prefix}{orig_quote}{body}{orig_quote}" + new_body = sub_twice(escaped_orig_quote, rf"\1\2{orig_quote}", new_body) + new_body = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_body) if new_quote == '"""' and new_body[-1] == '"': # edge case: new_body = new_body[:-1] + '\\"' @@ -2085,7 +2127,35 @@ def is_one_tuple(node: LN) -> bool: ) +def is_vararg(leaf: Leaf, within: Set[NodeType]) -> bool: + """Return True if `leaf` is a star or double star in a vararg or kwarg. + + If `within` includes VARARGS_PARENTS, this applies to function signatures. + If `within` includes COLLECTION_LIBERALS_PARENTS, it applies to right + hand-side extended iterable unpacking (PEP 3132) and additional unpacking + generalizations (PEP 448). + """ + if leaf.type not in STARS or not leaf.parent: + return False + + p = leaf.parent + if p.type == syms.star_expr: + # Star expressions are also used as assignment targets in extended + # iterable unpacking (PEP 3132). See what its parent is instead. + if not p.parent: + return False + + p = p.parent + + return p.type in within + + def max_delimiter_priority_in_atom(node: LN) -> int: + """Return maximum delimiter priority inside `node`. + + This is specific to atoms with contents contained in a pair of parentheses. + If `node` isn't an atom or there are no enclosing parentheses, returns 0. + """ if node.type != syms.atom: return 0 @@ -2139,7 +2209,7 @@ def is_python36(node: Node) -> bool: and n.children[-1].type == token.COMMA ): for ch in n.children: - if ch.type == token.STAR or ch.type == token.DOUBLESTAR: + if ch.type in STARS: return True return False @@ -2175,16 +2245,20 @@ class Report: same_count: int = 0 failure_count: int = 0 - def done(self, src: Path, changed: bool) -> None: + def done(self, src: Path, changed: Changed) -> None: """Increment the counter for successful reformatting. Write out a message.""" - if changed: + if changed is Changed.YES: reformatted = "would reformat" if self.check else "reformatted" if not self.quiet: out(f"{reformatted} {src}") self.change_count += 1 else: if not self.quiet: - out(f"{src} already well formatted, good job.", bold=False) + if changed is Changed.NO: + msg = f"{src} already well formatted, good job." + else: + msg = f"{src} wasn't modified on disk since last run." + out(msg, bold=False) self.same_count += 1 def failed(self, src: Path, message: str) -> None: @@ -2325,7 +2399,7 @@ def dump_to_file(*output: str) -> str: import tempfile with tempfile.NamedTemporaryFile( - mode="w", prefix="blk_", suffix=".log", delete=False + mode="w", prefix="blk_", suffix=".log", delete=False, encoding="utf8" ) as f: for lines in output: f.write(lines) @@ -2374,5 +2448,71 @@ def shutdown(loop: BaseEventLoop) -> None: loop.close() +def sub_twice(regex: Pattern[str], replacement: str, original: str) -> str: + """Replace `regex` with `replacement` twice on `original`. + + This is used by string normalization to perform replaces on + overlapping matches. + """ + return regex.sub(replacement, regex.sub(replacement, original)) + + +CACHE_DIR = Path(user_cache_dir("black", version=__version__)) +CACHE_FILE = CACHE_DIR / "cache.pickle" + + +def read_cache() -> Cache: + """Read the cache if it exists and is well formed. + + If it is not well formed, the call to write_cache later should resolve the issue. + """ + if not CACHE_FILE.exists(): + return {} + + with CACHE_FILE.open("rb") as fobj: + try: + cache: Cache = pickle.load(fobj) + except pickle.UnpicklingError: + return {} + + return cache + + +def get_cache_info(path: Path) -> CacheInfo: + """Return the information used to check if a file is already formatted or not.""" + stat = path.stat() + return stat.st_mtime, stat.st_size + + +def filter_cached( + cache: Cache, sources: Iterable[Path] +) -> Tuple[List[Path], List[Path]]: + """Split a list of paths into two. + + The first list contains paths of files that modified on disk or are not in the + cache. The other list contains paths to non-modified files. + """ + todo, done = [], [] + for src in sources: + src = src.resolve() + if cache.get(src) != get_cache_info(src): + todo.append(src) + else: + done.append(src) + return todo, done + + +def write_cache(cache: Cache, sources: List[Path]) -> None: + """Update the cache file.""" + try: + if not CACHE_DIR.exists(): + CACHE_DIR.mkdir(parents=True) + new_cache = {**cache, **{src.resolve(): get_cache_info(src) for src in sources}} + with CACHE_FILE.open("wb") as fobj: + pickle.dump(new_cache, fobj, protocol=pickle.HIGHEST_PROTOCOL) + except OSError: + pass + + if __name__ == "__main__": main()