X-Git-Url: https://git.madduck.net/etc/vim.git/blobdiff_plain/96e68f034197d6060aad876216a0e40e0eac0987..86e1c3650741ce9641a14a89db6162f30786f093:/black.py diff --git a/black.py b/black.py index f389176..8d55164 100644 --- a/black.py +++ b/black.py @@ -30,6 +30,7 @@ from typing import ( Type, TypeVar, Union, + cast, ) from appdirs import user_cache_dir @@ -43,11 +44,12 @@ from blib2to3.pgen2 import driver, token from blib2to3.pgen2.parse import ParseError -__version__ = "18.4a6" +__version__ = "18.5b0" DEFAULT_LINE_LENGTH = 88 +CACHE_DIR = Path(user_cache_dir("black", version=__version__)) + # types -syms = pygram.python_symbols FileContent = str Encoding = str Depth = int @@ -64,6 +66,9 @@ Cache = Dict[Path, CacheInfo] out = partial(click.secho, bold=True, err=True) err = partial(click.secho, fg="red", err=True) +pygram.initialize(CACHE_DIR) +syms = pygram.python_symbols + class NothingChanged(UserWarning): """Raised by :func:`format_file` when reformatted code is the same as source.""" @@ -626,21 +631,22 @@ LOGIC_PRIORITY = 14 STRING_PRIORITY = 12 COMPARATOR_PRIORITY = 10 MATH_PRIORITIES = { - token.VBAR: 8, - token.CIRCUMFLEX: 7, - token.AMPER: 6, - token.LEFTSHIFT: 5, - token.RIGHTSHIFT: 5, - token.PLUS: 4, - token.MINUS: 4, - token.STAR: 3, - token.SLASH: 3, - token.DOUBLESLASH: 3, - token.PERCENT: 3, - token.AT: 3, - token.TILDE: 2, - token.DOUBLESTAR: 1, + token.VBAR: 9, + token.CIRCUMFLEX: 8, + token.AMPER: 7, + token.LEFTSHIFT: 6, + token.RIGHTSHIFT: 6, + token.PLUS: 5, + token.MINUS: 5, + token.STAR: 4, + token.SLASH: 4, + token.DOUBLESLASH: 4, + token.PERCENT: 4, + token.AT: 4, + token.TILDE: 3, + token.DOUBLESTAR: 2, } +DOT_PRIORITY = 1 @dataclass @@ -706,6 +712,17 @@ class BracketTracker: """ return max(v for k, v in self.delimiters.items() if k not in exclude) + def delimiter_count_with_priority(self, priority: int = 0) -> int: + """Return the number of delimiters with the given `priority`. + + If no `priority` is passed, defaults to max priority on the line. + """ + if not self.delimiters: + return 0 + + priority = priority or self.max_delimiter_priority() + return sum(1 for p in self.delimiters.values() if p == priority) + def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool: """In a for loop, or comprehension, the variables are often unpacks. @@ -764,6 +781,7 @@ class Line: comments: List[Tuple[Index, Leaf]] = Factory(list) bracket_tracker: BracketTracker = Factory(BracketTracker) inside_brackets: bool = False + should_explode: bool = False def append(self, leaf: Leaf, preformatted: bool = False) -> None: """Add a new `leaf` to the end of the line. @@ -837,10 +855,9 @@ class Line: @property def is_stub_class(self) -> bool: """Is this line a class definition with a body consisting only of "..."?""" - return ( - self.is_class - and self.leaves[-3:] == [Leaf(token.DOT, ".") for _ in range(3)] - ) + return self.is_class and self.leaves[-3:] == [ + Leaf(token.DOT, ".") for _ in range(3) + ] @property def is_def(self) -> bool: @@ -1304,7 +1321,7 @@ class LineGenerator(Visitor[Line]): The relevant Python language `keywords` for a given statement will be NAME leaves within it. This methods puts those on a separate line. - `parens` holds a set of string leaf values immeditely after which + `parens` holds a set of string leaf values immediately after which invisible parens should be put. """ normalize_invisible_parens(node, parens_after=parens) @@ -1461,10 +1478,11 @@ def whitespace(leaf: Leaf, *, complex_subscript: bool) -> str: # noqa C901 return DOUBLESPACE assert p is not None, f"INTERNAL ERROR: hand-made leaf without parent: {leaf!r}" - if ( - t == token.COLON - and p.type not in {syms.subscript, syms.subscriptlist, syms.sliceop} - ): + if t == token.COLON and p.type not in { + syms.subscript, + syms.subscriptlist, + syms.sliceop, + }: return NO prev = leaf.prev_sibling @@ -1485,7 +1503,10 @@ def whitespace(leaf: Leaf, *, complex_subscript: bool) -> str: # noqa C901 if prevp.type == token.EQUAL: if prevp.parent: if prevp.parent.type in { - syms.arglist, syms.argument, syms.parameters, syms.varargslist + syms.arglist, + syms.argument, + syms.parameters, + syms.varargslist, }: return NO @@ -1638,10 +1659,10 @@ def whitespace(leaf: Leaf, *, complex_subscript: bool) -> str: # noqa C901 prevp_parent = prevp.parent assert prevp_parent is not None - if ( - prevp.type == token.COLON - and prevp_parent.type in {syms.subscript, syms.sliceop} - ): + if prevp.type == token.COLON and prevp_parent.type in { + syms.subscript, + syms.sliceop, + }: return NO elif prevp.type == token.EQUAL and prevp_parent.type == syms.argument: @@ -1721,6 +1742,14 @@ def is_split_before_delimiter(leaf: Leaf, previous: Leaf = None) -> int: # Don't treat them as a delimiter. return 0 + if ( + leaf.type == token.DOT + and leaf.parent + and leaf.parent.type not in {syms.import_from, syms.dotted_name} + and (previous is None or previous.type in CLOSING_BRACKETS) + ): + return DOT_PRIORITY + if ( leaf.type in MATH_OPERATORS and leaf.parent @@ -1738,31 +1767,54 @@ def is_split_before_delimiter(leaf: Leaf, previous: Leaf = None) -> int: ): return STRING_PRIORITY + if leaf.type != token.NAME: + return 0 + if ( - leaf.type == token.NAME - and leaf.value == "for" + leaf.value == "for" and leaf.parent and leaf.parent.type in {syms.comp_for, syms.old_comp_for} ): return COMPREHENSION_PRIORITY if ( - leaf.type == token.NAME - and leaf.value == "if" + leaf.value == "if" and leaf.parent and leaf.parent.type in {syms.comp_if, syms.old_comp_if} ): return COMPREHENSION_PRIORITY + if leaf.value in {"if", "else"} and leaf.parent and leaf.parent.type == syms.test: + return TERNARY_PRIORITY + + if leaf.value == "is": + return COMPARATOR_PRIORITY + if ( - leaf.type == token.NAME - and leaf.value in {"if", "else"} + leaf.value == "in" and leaf.parent - and leaf.parent.type == syms.test + and leaf.parent.type in {syms.comp_op, syms.comparison} + and not ( + previous is not None + and previous.type == token.NAME + and previous.value == "not" + ) ): - return TERNARY_PRIORITY + return COMPARATOR_PRIORITY - if leaf.type == token.NAME and leaf.value in LOGIC_OPERATORS and leaf.parent: + if ( + leaf.value == "not" + and leaf.parent + and leaf.parent.type == syms.comp_op + and not ( + previous is not None + and previous.type == token.NAME + and previous.value == "is" + ) + ): + return COMPARATOR_PRIORITY + + if leaf.value in LOGIC_OPERATORS and leaf.parent: return LOGIC_PRIORITY return 0 @@ -1862,25 +1914,27 @@ def split_line( return line_str = str(line).strip("\n") - if is_line_short_enough(line, line_length=line_length, line_str=line_str): + if not line.should_explode and is_line_short_enough( + line, line_length=line_length, line_str=line_str + ): yield line return split_funcs: List[SplitFunc] if line.is_def: split_funcs = [left_hand_split] - elif line.is_import: - split_funcs = [explode_split] else: def rhs(line: Line, py36: bool = False) -> Iterator[Line]: for omit in generate_trailers_to_omit(line, line_length): - lines = list(right_hand_split(line, py36, omit=omit)) + lines = list(right_hand_split(line, line_length, py36, omit=omit)) if is_line_short_enough(lines[0], line_length=line_length): yield from lines return # All splits failed, best effort split with no omits. + # This mostly happens to multiline strings that are by definition + # reported as not fitting a single line. yield from right_hand_split(line, py36) if line.inside_brackets: @@ -1954,7 +2008,7 @@ def left_hand_split(line: Line, py36: bool = False) -> Iterator[Line]: def right_hand_split( - line: Line, py36: bool = False, omit: Collection[LeafID] = () + line: Line, line_length: int, py36: bool = False, omit: Collection[LeafID] = () ) -> Iterator[Line]: """Split line into many lines, starting with the last matching bracket pair. @@ -2016,23 +2070,16 @@ def right_hand_split( and not line.is_import ): omit = {id(closing_bracket), *omit} - delimiter_count = len(body.bracket_tracker.delimiters) - if ( - delimiter_count == 0 - or delimiter_count == 1 - and ( - body.leaves[0].type in OPENING_BRACKETS - or body.leaves[-1].type in CLOSING_BRACKETS - ) - ): + if can_omit_invisible_parens(body, line_length): try: - yield from right_hand_split(line, py36=py36, omit=omit) + yield from right_hand_split(line, line_length, py36=py36, omit=omit) return except CannotSplit: pass ensure_visible(opening_bracket) ensure_visible(closing_bracket) + body.should_explode = should_explode(body, opening_bracket) for result in (head, body, tail): if result: yield result @@ -2091,14 +2138,16 @@ def delimiter_split(line: Line, py36: bool = False) -> Iterator[Line]: except IndexError: raise CannotSplit("Line empty") - delimiters = line.bracket_tracker.delimiters + bt = line.bracket_tracker try: - delimiter_priority = line.bracket_tracker.max_delimiter_priority( - exclude={id(last_leaf)} - ) + delimiter_priority = bt.max_delimiter_priority(exclude={id(last_leaf)}) except ValueError: raise CannotSplit("No delimiters found") + if delimiter_priority == DOT_PRIORITY: + if bt.delimiter_count_with_priority(delimiter_priority) == 1: + raise CannotSplit("Splitting a single attribute from its owner looks wrong") + current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets) lowest_depth = sys.maxsize trailing_comma_safe = True @@ -2121,12 +2170,11 @@ def delimiter_split(line: Line, py36: bool = False) -> Iterator[Line]: yield from append_to_line(comment_after) lowest_depth = min(lowest_depth, leaf.bracket_depth) - if ( - leaf.bracket_depth == lowest_depth - and is_vararg(leaf, within=VARARGS_PARENTS) + if leaf.bracket_depth == lowest_depth and is_vararg( + leaf, within=VARARGS_PARENTS ): trailing_comma_safe = trailing_comma_safe and py36 - leaf_priority = delimiters.get(id(leaf)) + leaf_priority = bt.delimiters.get(id(leaf)) if leaf_priority == delimiter_priority: yield current_line @@ -2171,26 +2219,6 @@ def standalone_comment_split(line: Line, py36: bool = False) -> Iterator[Line]: yield current_line -def explode_split( - line: Line, py36: bool = False, omit: Collection[LeafID] = () -) -> Iterator[Line]: - """Split by rightmost bracket and immediately split contents by a delimiter.""" - new_lines = list(right_hand_split(line, py36, omit)) - if len(new_lines) != 3: - yield from new_lines - return - - yield new_lines[0] - - try: - yield from delimiter_split(new_lines[1], py36) - - except CannotSplit: - yield new_lines[1] - - yield new_lines[2] - - def is_import(leaf: Leaf) -> bool: """Return True if the given leaf starts an import statement.""" p = leaf.parent @@ -2320,7 +2348,7 @@ def normalize_invisible_parens(node: Node, parens_after: Set[str]) -> None: rpar = Leaf(token.RPAR, ")") index = child.remove() or 0 node.insert_child(index, Node(syms.atom, [lpar, child, rpar])) - else: + elif not (isinstance(child, Leaf) and is_multiline_string(child)): # wrap child in invisible parentheses lpar = Leaf(token.LPAR, "") rpar = Leaf(token.RPAR, "") @@ -2431,6 +2459,12 @@ def is_vararg(leaf: Leaf, within: Set[NodeType]) -> bool: return p.type in within +def is_multiline_string(leaf: Leaf) -> bool: + """Return True if `leaf` is a multiline string that actually spans many lines.""" + value = leaf.value.lstrip("furbFURB") + return value[:3] in {'"""', "'''"} and "\n" in value + + def is_stub_suite(node: Node) -> bool: """Return True if `node` is a suite with a stub body.""" if ( @@ -2500,6 +2534,25 @@ def ensure_visible(leaf: Leaf) -> None: leaf.value = ")" +def should_explode(line: Line, opening_bracket: Leaf) -> bool: + """Should `line` immediately be split with `delimiter_split()` after RHS?""" + if not ( + opening_bracket.parent + and opening_bracket.parent.type in {syms.atom, syms.import_from} + and opening_bracket.value in "[{(" + ): + return False + + try: + last_leaf = line.leaves[-1] + exclude = {id(last_leaf)} if last_leaf.type == token.COMMA else set() + max_priority = line.bracket_tracker.max_delimiter_priority(exclude=exclude) + except (IndexError, ValueError): + return False + + return max_priority == COMMA_PRIORITY + + def is_python36(node: Node) -> bool: """Return True if the current file is using Python 3.6+ features. @@ -2548,21 +2601,13 @@ def generate_trailers_to_omit(line: Line, line_length: int) -> Iterator[Set[Leaf closing_bracket = None optional_brackets: Set[LeafID] = set() inner_brackets: Set[LeafID] = set() - for index, leaf in enumerate_reversed(line.leaves): - length += len(leaf.prefix) + len(leaf.value) + for index, leaf, leaf_length in enumerate_with_length(line, reversed=True): + length += leaf_length if length > line_length: break - comment: Optional[Leaf] - for comment in line.comments_after(leaf, index): - if "\n" in comment.prefix: - break # Oops, standalone comment! - - length += len(comment.value) - else: - comment = None - if comment is not None: - break # There was a standalone comment, we can't continue. + if leaf.type == STANDALONE_COMMENT: + break optional_brackets.discard(id(leaf)) if opening_bracket: @@ -2628,7 +2673,15 @@ def get_future_imports(node: Node) -> Set[str]: PYTHON_EXTENSIONS = {".py", ".pyi"} BLACKLISTED_DIRECTORIES = { - "build", "buck-out", "dist", "_build", ".git", ".hg", ".mypy_cache", ".tox", ".venv" + "build", + "buck-out", + "dist", + "_build", + ".git", + ".hg", + ".mypy_cache", + ".tox", + ".venv", } @@ -2876,6 +2929,32 @@ def enumerate_reversed(sequence: Sequence[T]) -> Iterator[Tuple[Index, T]]: index -= 1 +def enumerate_with_length( + line: Line, reversed: bool = False +) -> Iterator[Tuple[Index, Leaf, int]]: + """Return an enumeration of leaves with their length. + + Stops prematurely on multiline strings and standalone comments. + """ + op = cast( + Callable[[Sequence[Leaf]], Iterator[Tuple[Index, Leaf]]], + enumerate_reversed if reversed else enumerate, + ) + for index, leaf in op(line.leaves): + length = len(leaf.prefix) + len(leaf.value) + if "\n" in leaf.value: + return # Multiline strings, we can't continue. + + comment: Optional[Leaf] + for comment in line.comments_after(leaf, index): + if "\n" in comment.prefix: + return # Oops, standalone comment! + + length += len(comment.value) + + yield index, leaf, length + + def is_line_short_enough(line: Line, *, line_length: int, line_str: str = "") -> bool: """Return True if `line` is no longer than `line_length`. @@ -2890,7 +2969,93 @@ def is_line_short_enough(line: Line, *, line_length: int, line_str: str = "") -> ) -CACHE_DIR = Path(user_cache_dir("black", version=__version__)) +def can_omit_invisible_parens(line: Line, line_length: int) -> bool: + """Does `line` have a shape safe to reformat without optional parens around it? + + Returns True for only a subset of potentially nice looking formattings but + the point is to not return false positives that end up producing lines that + are too long. + """ + bt = line.bracket_tracker + if not bt.delimiters: + # Without delimiters the optional parentheses are useless. + return True + + max_priority = bt.max_delimiter_priority() + if bt.delimiter_count_with_priority(max_priority) > 1: + # With more than one delimiter of a kind the optional parentheses read better. + return False + + if max_priority == DOT_PRIORITY: + # A single stranded method call doesn't require optional parentheses. + return True + + assert len(line.leaves) >= 2, "Stranded delimiter" + + first = line.leaves[0] + second = line.leaves[1] + penultimate = line.leaves[-2] + last = line.leaves[-1] + + # With a single delimiter, omit if the expression starts or ends with + # a bracket. + if first.type in OPENING_BRACKETS and second.type not in CLOSING_BRACKETS: + remainder = False + length = 4 * line.depth + for _index, leaf, leaf_length in enumerate_with_length(line): + if leaf.type in CLOSING_BRACKETS and leaf.opening_bracket is first: + remainder = True + if remainder: + length += leaf_length + if length > line_length: + break + + if leaf.type in OPENING_BRACKETS: + # There are brackets we can further split on. + remainder = False + + else: + # checked the entire string and line length wasn't exceeded + if len(line.leaves) == _index + 1: + return True + + # Note: we are not returning False here because a line might have *both* + # a leading opening bracket and a trailing closing bracket. If the + # opening bracket doesn't match our rule, maybe the closing will. + + if ( + last.type == token.RPAR + or last.type == token.RBRACE + or ( + # don't use indexing for omitting optional parentheses; + # it looks weird + last.type == token.RSQB + and last.parent + and last.parent.type != syms.trailer + ) + ): + if penultimate.type in OPENING_BRACKETS: + # Empty brackets don't help. + return False + + if is_multiline_string(first): + # Additional wrapping of a multiline string in this situation is + # unnecessary. + return True + + length = 4 * line.depth + seen_other_brackets = False + for _index, leaf, leaf_length in enumerate_with_length(line): + length += leaf_length + if leaf is last.opening_bracket: + if seen_other_brackets or length <= line_length: + return True + + elif leaf.type in OPENING_BRACKETS: + # There are brackets we can further split on. + seen_other_brackets = True + + return False def get_cache_file(line_length: int) -> Path: