X-Git-Url: https://git.madduck.net/etc/vim.git/blobdiff_plain/82198030eef6f99ad47470d82c34d9a6e1a4e6df..0967dfcbeba8aceaacd468b279cc23089d697878:/black.py diff --git a/black.py b/black.py index da645a1..d2d23c8 100644 --- a/black.py +++ b/black.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python3 - import asyncio import pickle from asyncio.base_events import BaseEventLoop @@ -43,8 +41,9 @@ from blib2to3 import pygram, pytree from blib2to3.pgen2 import driver, token from blib2to3.pgen2.parse import ParseError -__version__ = "18.4a2" +__version__ = "18.4a6" DEFAULT_LINE_LENGTH = 88 + # types syms = pygram.python_symbols FileContent = str @@ -88,11 +87,11 @@ class FormatError(Exception): self.consumed = consumed def trim_prefix(self, leaf: Leaf) -> None: - leaf.prefix = leaf.prefix[self.consumed:] + leaf.prefix = leaf.prefix[self.consumed :] def leaf_from_consumed(self, leaf: Leaf) -> Leaf: """Returns a new Leaf from the consumed part of the prefix.""" - unformatted_prefix = leaf.prefix[:self.consumed] + unformatted_prefix = leaf.prefix[: self.consumed] return Leaf(token.NEWLINE, unformatted_prefix) @@ -193,6 +192,7 @@ def main( write_back = WriteBack.YES report = Report(check=check, quiet=quiet) if len(sources) == 0: + out("No paths given. Nothing to do 😴") ctx.exit(0) return @@ -233,7 +233,7 @@ def reformat_one( else: cache: Cache = {} if write_back != WriteBack.DIFF: - cache = read_cache() + cache = read_cache(line_length) src = src.resolve() if src in cache and cache[src] == get_cache_info(src): changed = Changed.CACHED @@ -244,8 +244,8 @@ def reformat_one( ) ): changed = Changed.YES - if write_back != WriteBack.DIFF and changed is not Changed.NO: - write_cache(cache, [src]) + if write_back == WriteBack.YES and changed is not Changed.NO: + write_cache(cache, [src], line_length) report.done(src, changed) except Exception as exc: report.failed(src, str(exc)) @@ -269,7 +269,7 @@ async def schedule_formatting( """ cache: Cache = {} if write_back != WriteBack.DIFF: - cache = read_cache() + cache = read_cache(line_length) sources, cached = filter_cached(cache, sources) for src in cached: report.done(src, Changed.CACHED) @@ -311,8 +311,8 @@ async def schedule_formatting( if cancelled: await asyncio.gather(*cancelled, loop=loop, return_exceptions=True) - if write_back != WriteBack.DIFF and formatted: - write_cache(cache, formatted) + if write_back == WriteBack.YES and formatted: + write_cache(cache, formatted, line_length) def format_file_in_place( @@ -581,8 +581,26 @@ UNPACKING_PARENTS = { syms.listmaker, syms.testlist_gexp, } +TEST_DESCENDANTS = { + syms.test, + syms.lambdef, + syms.or_test, + syms.and_test, + syms.not_test, + syms.comparison, + syms.star_expr, + syms.expr, + syms.xor_expr, + syms.and_expr, + syms.shift_expr, + syms.arith_expr, + syms.trailer, + syms.term, + syms.power, +} COMPREHENSION_PRIORITY = 20 COMMA_PRIORITY = 10 +TERNARY_PRIORITY = 7 LOGIC_PRIORITY = 5 STRING_PRIORITY = 4 COMPARATOR_PRIORITY = 3 @@ -696,6 +714,10 @@ class BracketTracker: return False + def get_open_lsqb(self) -> Optional[Leaf]: + """Return the most recent opening square bracket (if any).""" + return self.bracket_match.get((self.depth - 1, token.RSQB)) + @dataclass class Line: @@ -721,14 +743,17 @@ class Line: if not has_value: return + if token.COLON == leaf.type and self.is_class_paren_empty: + del self.leaves[-2:] if self.leaves and not preformatted: # Note: at this point leaf.prefix should be empty except for # imports, for which we only preserve newlines. - leaf.prefix += whitespace(leaf) + leaf.prefix += whitespace( + leaf, complex_subscript=self.is_complex_subscript(leaf) + ) if self.inside_brackets or not preformatted: self.bracket_tracker.mark(leaf) self.maybe_remove_trailing_comma(leaf) - if not self.append_comment(leaf): self.leaves.append(leaf) @@ -816,6 +841,22 @@ class Line: and self.leaves[0].value == "yield" ) + @property + def is_class_paren_empty(self) -> bool: + """Is this a class with no base classes but using parentheses? + + Those are unnecessary and should be removed. + """ + return ( + bool(self) + and len(self.leaves) == 4 + and self.is_class + and self.leaves[2].type == token.LPAR + and self.leaves[2].value == "(" + and self.leaves[3].type == token.RPAR + and self.leaves[3].value == ")" + ) + def contains_standalone_comments(self, depth_limit: int = sys.maxsize) -> bool: """If so, needs to be split before emitting.""" for leaf in self.leaves: @@ -844,9 +885,14 @@ class Line: self.remove_trailing_comma() return True - # For parens let's check if it's safe to remove the comma. If the - # trailing one is the only one, we might mistakenly change a tuple - # into a different type by removing the comma. + # For parens let's check if it's safe to remove the comma. + # Imports are always safe. + if self.is_import: + self.remove_trailing_comma() + return True + + # Otheriwsse, if the trailing one is the only one, we might mistakenly + # change a tuple into a different type by removing the comma. depth = closing.bracket_depth + 1 commas = 0 opening = closing.opening_bracket @@ -857,7 +903,7 @@ class Line: else: return False - for leaf in self.leaves[_opening_index + 1:]: + for leaf in self.leaves[_opening_index + 1 :]: if leaf is closing: break @@ -918,6 +964,24 @@ class Line: self.comments[i] = (comma_index - 1, comment) self.leaves.pop() + def is_complex_subscript(self, leaf: Leaf) -> bool: + """Return True iff `leaf` is part of a slice with non-trivial exprs.""" + open_lsqb = ( + leaf if leaf.type == token.LSQB else self.bracket_tracker.get_open_lsqb() + ) + if open_lsqb is None: + return False + + subscript_start = open_lsqb.next_sibling + if ( + isinstance(subscript_start, Node) + and subscript_start.type == syms.subscriptlist + ): + subscript_start = child_towards(subscript_start, leaf) + return subscript_start is not None and any( + n.type in TEST_DESCENDANTS for n in subscript_start.pre_order() + ) + def __str__(self) -> str: """Render the line.""" if not self: @@ -1040,8 +1104,14 @@ class EmptyLineTracker: # Don't insert empty lines before the first line in the file. return 0, 0 - if self.previous_line and self.previous_line.is_decorator: - # Don't insert empty lines between decorators. + if self.previous_line.is_decorator: + return 0, 0 + + if ( + self.previous_line.is_comment + and self.previous_line.depth == current_line.depth + and before == 0 + ): return 0, 0 newlines = 2 @@ -1049,9 +1119,6 @@ class EmptyLineTracker: newlines -= 1 return newlines, 0 - if current_line.is_flow_control: - return before, 1 - if ( self.previous_line and self.previous_line.is_import @@ -1060,13 +1127,6 @@ class EmptyLineTracker: ): return (before or 1), 0 - if ( - self.previous_line - and self.previous_line.is_yield - and (not current_line.is_yield or depth != self.previous_line.depth) - ): - return (before or 1), 0 - return before, 0 @@ -1158,7 +1218,16 @@ class LineGenerator(Visitor[Line]): def visit_DEDENT(self, node: Node) -> Iterator[Line]: """Decrease indentation level, maybe yield a line.""" - # DEDENT has no value. Additionally, in blib2to3 it never holds comments. + # The current line might still wait for trailing comments. At DEDENT time + # there won't be any (they would be prefixes on the preceding NEWLINE). + # Emit the line then. + yield from self.line() + + # While DEDENT has no value, its prefix may contain standalone comments + # that belong to the current indentation level. Get 'em. + yield from self.visit_default(node) + + # Finally, emit the dedent. yield from self.line(-1) def visit_stmt( @@ -1296,8 +1365,12 @@ BRACKETS = OPENING_BRACKETS | CLOSING_BRACKETS ALWAYS_NO_SPACE = CLOSING_BRACKETS | {token.COMMA, STANDALONE_COMMENT} -def whitespace(leaf: Leaf) -> str: # noqa C901 - """Return whitespace prefix if needed for the given `leaf`.""" +def whitespace(leaf: Leaf, *, complex_subscript: bool) -> str: # noqa C901 + """Return whitespace prefix if needed for the given `leaf`. + + `complex_subscript` signals whether the given leaf is part of a subscription + which has non-trivial arguments, like arithmetic expressions or function calls. + """ NO = "" SPACE = " " DOUBLESPACE = " " @@ -1311,7 +1384,10 @@ def whitespace(leaf: Leaf) -> str: # noqa C901 return DOUBLESPACE assert p is not None, f"INTERNAL ERROR: hand-made leaf without parent: {leaf!r}" - if t == token.COLON and p.type not in {syms.subscript, syms.subscriptlist}: + if ( + t == token.COLON + and p.type not in {syms.subscript, syms.subscriptlist, syms.sliceop} + ): return NO prev = leaf.prev_sibling @@ -1321,7 +1397,13 @@ def whitespace(leaf: Leaf) -> str: # noqa C901 return NO if t == token.COLON: - return SPACE if prevp.type == token.COMMA else NO + if prevp.type == token.COLON: + return NO + + elif prevp.type != token.COMMA and not complex_subscript: + return NO + + return SPACE if prevp.type == token.EQUAL: if prevp.parent: @@ -1342,7 +1424,7 @@ def whitespace(leaf: Leaf) -> str: # noqa C901 elif prevp.type == token.COLON: if prevp.parent and prevp.parent.type in {syms.subscript, syms.sliceop}: - return NO + return SPACE if complex_subscript else NO elif ( prevp.parent @@ -1448,7 +1530,7 @@ def whitespace(leaf: Leaf) -> str: # noqa C901 if prev and prev.type == token.LPAR: return NO - elif p.type == syms.subscript: + elif p.type in {syms.subscript, syms.sliceop}: # indexing if not prev: assert p.parent is not None, "subscripts are always parented" @@ -1457,7 +1539,7 @@ def whitespace(leaf: Leaf) -> str: # noqa C901 return NO - else: + elif not complex_subscript: return NO elif p.type == syms.atom: @@ -1527,6 +1609,14 @@ def preceding_leaf(node: Optional[LN]) -> Optional[Leaf]: return None +def child_towards(ancestor: Node, descendant: LN) -> Optional[LN]: + """Return the child of `ancestor` that contains `descendant`.""" + node: Optional[LN] = descendant + while node and node.parent != ancestor: + node = node.parent + return node + + def is_split_after_delimiter(leaf: Leaf, previous: Leaf = None) -> int: """Return the priority of the `leaf` delimiter, given a line break after it. @@ -1587,6 +1677,14 @@ def is_split_before_delimiter(leaf: Leaf, previous: Leaf = None) -> int: ): return COMPREHENSION_PRIORITY + if ( + leaf.type == token.NAME + and leaf.value in {"if", "else"} + and leaf.parent + and leaf.parent.type == syms.test + ): + return TERNARY_PRIORITY + if leaf.type == token.NAME and leaf.value in LOGIC_OPERATORS and leaf.parent: return LOGIC_PRIORITY @@ -1698,6 +1796,8 @@ def split_line( split_funcs: List[SplitFunc] if line.is_def: split_funcs = [left_hand_split] + elif line.is_import: + split_funcs = [explode_split] elif line.inside_brackets: split_funcs = [delimiter_split, standalone_comment_split, right_hand_split] else: @@ -1730,7 +1830,8 @@ def left_hand_split(line: Line, py36: bool = False) -> Iterator[Line]: """Split line into many lines, starting with the first matching bracket pair. Note: this usually looks weird, only use this for function definitions. - Prefer RHS otherwise. + Prefer RHS otherwise. This is why this function is not symmetrical with + :func:`right_hand_split` which also handles optional parentheses. """ head = Line(depth=line.depth) body = Line(depth=line.depth + 1, inside_brackets=True) @@ -1770,7 +1871,10 @@ def left_hand_split(line: Line, py36: bool = False) -> Iterator[Line]: def right_hand_split( line: Line, py36: bool = False, omit: Collection[LeafID] = () ) -> Iterator[Line]: - """Split line into many lines, starting with the last matching bracket pair.""" + """Split line into many lines, starting with the last matching bracket pair. + + If the split was by optional parentheses, attempt splitting without them, too. + """ head = Line(depth=line.depth) body = Line(depth=line.depth + 1, inside_brackets=True) tail = Line(depth=line.depth) @@ -1809,20 +1913,25 @@ def right_hand_split( bracket_split_succeeded_or_raise(head, body, tail) assert opening_bracket and closing_bracket if ( + # the opening bracket is an optional paren opening_bracket.type == token.LPAR and not opening_bracket.value + # the closing bracket is an optional paren and closing_bracket.type == token.RPAR and not closing_bracket.value + # there are no delimiters or standalone comments in the body + and not body.bracket_tracker.delimiters + and not line.contains_standalone_comments(0) + # and it's not an import (optional parens are the only thing we can split + # on in this case; attempting a split without them is a waste of time) + and not line.is_import ): - # These parens were optional. If there aren't any delimiters or standalone - # comments in the body, they were unnecessary and another split without - # them should be attempted. - if not ( - body.bracket_tracker.delimiters or line.contains_standalone_comments(0) - ): - omit = {id(closing_bracket), *omit} + omit = {id(closing_bracket), *omit} + try: yield from right_hand_split(line, py36=py36, omit=omit) return + except CannotSplit: + pass ensure_visible(opening_bracket) ensure_visible(closing_bracket) @@ -1964,6 +2073,26 @@ def standalone_comment_split(line: Line, py36: bool = False) -> Iterator[Line]: yield current_line +def explode_split( + line: Line, py36: bool = False, omit: Collection[LeafID] = () +) -> Iterator[Line]: + """Split by rightmost bracket and immediately split contents by a delimiter.""" + new_lines = list(right_hand_split(line, py36, omit)) + if len(new_lines) != 3: + yield from new_lines + return + + yield new_lines[0] + + try: + yield from delimiter_split(new_lines[1], py36) + + except CannotSplit: + yield new_lines[1] + + yield new_lines[2] + + def is_import(leaf: Leaf) -> bool: """Return True if the given leaf starts an import statement.""" p = leaf.parent @@ -2025,7 +2154,7 @@ def normalize_string_quotes(leaf: Leaf) -> None: unescaped_new_quote = re.compile(rf"(([^\\]|^)(\\\\)*){new_quote}") escaped_new_quote = re.compile(rf"([^\\]|^)\\(\\\\)*{new_quote}") escaped_orig_quote = re.compile(rf"([^\\]|^)\\(\\\\)*{orig_quote}") - body = leaf.value[first_quote_pos + len(orig_quote):-len(orig_quote)] + body = leaf.value[first_quote_pos + len(orig_quote) : -len(orig_quote)] if "r" in prefix.casefold(): if unescaped_new_quote.search(body): # There's at least one unescaped new_quote in this raw string @@ -2067,17 +2196,7 @@ def normalize_invisible_parens(node: Node, parens_after: Set[str]) -> None: for child in list(node.children): if check_lpar: if child.type == syms.atom: - if not ( - is_empty_tuple(child) - or is_one_tuple(child) - or max_delimiter_priority_in_atom(child) >= COMMA_PRIORITY - ): - first = child.children[0] - last = child.children[-1] - if first.type == token.LPAR and last.type == token.RPAR: - # make parentheses invisible - first.value = "" # type: ignore - last.value = "" # type: ignore + maybe_make_parens_invisible_in_atom(child) elif is_one_tuple(child): # wrap child in visible parentheses lpar = Leaf(token.LPAR, "(") @@ -2094,6 +2213,29 @@ def normalize_invisible_parens(node: Node, parens_after: Set[str]) -> None: check_lpar = isinstance(child, Leaf) and child.value in parens_after +def maybe_make_parens_invisible_in_atom(node: LN) -> bool: + """If it's safe, make the parens in the atom `node` invisible, recusively.""" + if ( + node.type != syms.atom + or is_empty_tuple(node) + or is_one_tuple(node) + or max_delimiter_priority_in_atom(node) >= COMMA_PRIORITY + ): + return False + + first = node.children[0] + last = node.children[-1] + if first.type == token.LPAR and last.type == token.RPAR: + # make parentheses invisible + first.value = "" # type: ignore + last.value = "" # type: ignore + if len(node.children) > 1: + maybe_make_parens_invisible_in_atom(node.children[1]) + return True + + return False + + def is_empty_tuple(node: LN) -> bool: """Return True if `node` holds an empty tuple.""" return ( @@ -2458,18 +2600,22 @@ def sub_twice(regex: Pattern[str], replacement: str, original: str) -> str: CACHE_DIR = Path(user_cache_dir("black", version=__version__)) -CACHE_FILE = CACHE_DIR / "cache.pickle" -def read_cache() -> Cache: +def get_cache_file(line_length: int) -> Path: + return CACHE_DIR / f"cache.{line_length}.pickle" + + +def read_cache(line_length: int) -> Cache: """Read the cache if it exists and is well formed. If it is not well formed, the call to write_cache later should resolve the issue. """ - if not CACHE_FILE.exists(): + cache_file = get_cache_file(line_length) + if not cache_file.exists(): return {} - with CACHE_FILE.open("rb") as fobj: + with cache_file.open("rb") as fobj: try: cache: Cache = pickle.load(fobj) except pickle.UnpicklingError: @@ -2502,13 +2648,14 @@ def filter_cached( return todo, done -def write_cache(cache: Cache, sources: List[Path]) -> None: +def write_cache(cache: Cache, sources: List[Path], line_length: int) -> None: """Update the cache file.""" + cache_file = get_cache_file(line_length) try: if not CACHE_DIR.exists(): CACHE_DIR.mkdir(parents=True) new_cache = {**cache, **{src.resolve(): get_cache_info(src) for src in sources}} - with CACHE_FILE.open("wb") as fobj: + with cache_file.open("wb") as fobj: pickle.dump(new_cache, fobj, protocol=pickle.HIGHEST_PROTOCOL) except OSError: pass