X-Git-Url: https://git.madduck.net/etc/vim.git/blobdiff_plain/2e0bb0fa9f9fffedecaa1f088f313f6c2d6df965..5192ed484bdbe507a8dd03dc31f93e4efec95b19:/black.py diff --git a/black.py b/black.py index 9b144ed..7c487f6 100644 --- a/black.py +++ b/black.py @@ -17,12 +17,14 @@ import sys from typing import ( Any, Callable, + Collection, Dict, Generic, Iterable, Iterator, List, Optional, + Pattern, Set, Tuple, Type, @@ -39,7 +41,7 @@ from blib2to3 import pygram, pytree from blib2to3.pgen2 import driver, token from blib2to3.pgen2.parse import ParseError -__version__ = "18.4a0" +__version__ = "18.4a2" DEFAULT_LINE_LENGTH = 88 # types syms = pygram.python_symbols @@ -315,12 +317,12 @@ def format_stdin_to_stdout( `line_length` and `fast` arguments are passed to :func:`format_file_contents`. """ src = sys.stdin.read() + dst = src try: dst = format_file_contents(src, line_length=line_length, fast=fast) return True except NothingChanged: - dst = src return False finally: @@ -520,7 +522,20 @@ MATH_OPERATORS = { token.DOUBLESTAR, token.DOUBLESLASH, } -VARARGS = {token.STAR, token.DOUBLESTAR} +STARS = {token.STAR, token.DOUBLESTAR} +VARARGS_PARENTS = { + syms.arglist, + syms.argument, # double star in arglist + syms.trailer, # single argument to call + syms.typedargslist, + syms.varargslist, # lambdas +} +UNPACKING_PARENTS = { + syms.atom, # single element of a list or set literal + syms.dictsetmaker, + syms.listmaker, + syms.testlist_gexp, +} COMPREHENSION_PRIORITY = 20 COMMA_PRIORITY = 10 LOGIC_PRIORITY = 5 @@ -582,6 +597,7 @@ class BracketTracker: """Return the highest priority of a delimiter found on the line. Values are consistent with what `is_delimiter()` returns. + Raises ValueError on no delimiters. """ return max(v for k, v in self.delimiters.items() if k not in exclude) @@ -608,7 +624,7 @@ class Line: Inline comments are put aside. """ - has_value = leaf.value.strip() + has_value = leaf.type in BRACKETS or bool(leaf.value.strip()) if not has_value: return @@ -709,12 +725,12 @@ class Line: and self.leaves[0].value == "yield" ) - @property - def contains_standalone_comments(self) -> bool: + def contains_standalone_comments(self, depth_limit: int = sys.maxsize) -> bool: """If so, needs to be split before emitting.""" for leaf in self.leaves: if leaf.type == STANDALONE_COMMENT: - return True + if leaf.bracket_depth <= depth_limit: + return True return False @@ -1077,15 +1093,22 @@ class LineGenerator(Visitor[Line]): # DEDENT has no value. Additionally, in blib2to3 it never holds comments. yield from self.line(-1) - def visit_stmt(self, node: Node, keywords: Set[str]) -> Iterator[Line]: + def visit_stmt( + self, node: Node, keywords: Set[str], parens: Set[str] + ) -> Iterator[Line]: """Visit a statement. This implementation is shared for `if`, `while`, `for`, `try`, `except`, - `def`, `with`, and `class`. + `def`, `with`, `class`, and `assert`. - The relevant Python language `keywords` for a given statement will be NAME - leaves within it. This methods puts those on a separate line. + The relevant Python language `keywords` for a given statement will be + NAME leaves within it. This methods puts those on a separate line. + + `parens` holds pairs of nodes where invisible parentheses should be put. + Keys hold nodes after which opening parentheses should be put, values + hold nodes before which closing parentheses should be put. """ + normalize_invisible_parens(node, parens_after=parens) for child in node.children: if child.type == token.NAME and child.value in keywords: # type: ignore yield from self.line() @@ -1125,6 +1148,32 @@ class LineGenerator(Visitor[Line]): yield from self.line() yield from self.visit(child) + def visit_import_from(self, node: Node) -> Iterator[Line]: + """Visit import_from and maybe put invisible parentheses. + + This is separate from `visit_stmt` because import statements don't + support arbitrary atoms and thus handling of parentheses is custom. + """ + check_lpar = False + for index, child in enumerate(node.children): + if check_lpar: + if child.type == token.LPAR: + # make parentheses invisible + child.value = "" # type: ignore + node.children[-1].value = "" # type: ignore + else: + # insert invisible parentheses + node.insert_child(index, Leaf(token.LPAR, "")) + node.append_child(Leaf(token.RPAR, "")) + break + + check_lpar = ( + child.type == token.NAME and child.value == "import" # type: ignore + ) + + for child in node.children: + yield from self.visit(child) + def visit_SEMI(self, leaf: Leaf) -> Iterator[Line]: """Remove a semicolon and put the other statement on a separate line.""" yield from self.line() @@ -1155,18 +1204,23 @@ class LineGenerator(Visitor[Line]): def __attrs_post_init__(self) -> None: """You are in a twisty little maze of passages.""" v = self.visit_stmt - self.visit_if_stmt = partial(v, keywords={"if", "else", "elif"}) - self.visit_while_stmt = partial(v, keywords={"while", "else"}) - self.visit_for_stmt = partial(v, keywords={"for", "else"}) - self.visit_try_stmt = partial(v, keywords={"try", "except", "else", "finally"}) - self.visit_except_clause = partial(v, keywords={"except"}) - self.visit_funcdef = partial(v, keywords={"def"}) - self.visit_with_stmt = partial(v, keywords={"with"}) - self.visit_classdef = partial(v, keywords={"class"}) + Ø: Set[str] = set() + self.visit_assert_stmt = partial(v, keywords={"assert"}, parens={"assert", ","}) + self.visit_if_stmt = partial(v, keywords={"if", "else", "elif"}, parens={"if"}) + self.visit_while_stmt = partial(v, keywords={"while", "else"}, parens={"while"}) + self.visit_for_stmt = partial(v, keywords={"for", "else"}, parens={"for", "in"}) + self.visit_try_stmt = partial( + v, keywords={"try", "except", "else", "finally"}, parens=Ø + ) + self.visit_except_clause = partial(v, keywords={"except"}, parens=Ø) + self.visit_with_stmt = partial(v, keywords={"with"}, parens=Ø) + self.visit_funcdef = partial(v, keywords={"def"}, parens=Ø) + self.visit_classdef = partial(v, keywords={"class"}, parens=Ø) self.visit_async_funcdef = self.visit_async_stmt self.visit_decorated = self.visit_decorators +IMPLICIT_TUPLE = {syms.testlist, syms.testlist_star_expr, syms.exprlist} BRACKET = {token.LPAR: token.RPAR, token.LSQB: token.RSQB, token.LBRACE: token.RBRACE} OPENING_BRACKETS = set(BRACKET.keys()) CLOSING_BRACKETS = set(BRACKET.values()) @@ -1214,15 +1268,8 @@ def whitespace(leaf: Leaf) -> str: # noqa C901 # that, too. return prevp.prefix - elif prevp.type == token.DOUBLESTAR: - if prevp.parent and prevp.parent.type in { - syms.arglist, - syms.argument, - syms.dictsetmaker, - syms.parameters, - syms.typedargslist, - syms.varargslist, - }: + elif prevp.type in STARS: + if is_vararg(prevp, within=VARARGS_PARENTS | UNPACKING_PARENTS): return NO elif prevp.type == token.COLON: @@ -1231,7 +1278,7 @@ def whitespace(leaf: Leaf) -> str: # noqa C901 elif ( prevp.parent - and prevp.parent.type in {syms.factor, syms.star_expr} + and prevp.parent.type == syms.factor and prevp.type in MATH_OPERATORS ): return NO @@ -1317,7 +1364,7 @@ def whitespace(leaf: Leaf) -> str: # noqa C901 if not prevp or prevp.type == token.LPAR: return NO - elif prev.type == token.EQUAL or prev.type == token.DOUBLESTAR: + elif prev.type in {token.EQUAL, token.STAR, token.DOUBLESTAR}: return NO elif p.type == syms.decorator: @@ -1382,9 +1429,10 @@ def whitespace(leaf: Leaf) -> str: # noqa C901 prevp_parent = prevp.parent assert prevp_parent is not None - if prevp.type == token.COLON and prevp_parent.type in { - syms.subscript, syms.sliceop - }: + if ( + prevp.type == token.COLON + and prevp_parent.type in {syms.subscript, syms.sliceop} + ): return NO elif prevp.type == token.EQUAL and prevp_parent.type == syms.argument: @@ -1451,11 +1499,7 @@ def is_split_before_delimiter(leaf: Leaf, previous: Leaf = None) -> int: Higher numbers are higher priority. """ - if ( - leaf.type in VARARGS - and leaf.parent - and leaf.parent.type in {syms.argument, syms.typedargslist} - ): + if is_vararg(leaf, within=VARARGS_PARENTS | UNPACKING_PARENTS): # * and ** might also be MATH_OPERATORS but in this case they are not. # Don't treat them as a delimiter. return 0 @@ -1607,7 +1651,7 @@ def split_line( if ( len(line_str) <= line_length and "\n" not in line_str # multiline strings - and not line.contains_standalone_comments + and not line.contains_standalone_comments() ): yield line return @@ -1673,9 +1717,7 @@ def left_hand_split(line: Line, py36: bool = False) -> Iterator[Line]: if body_leaves: normalize_prefix(body_leaves[0], inside_brackets=True) # Build the new lines. - for result, leaves in ( - (head, head_leaves), (body, body_leaves), (tail, tail_leaves) - ): + for result, leaves in (head, head_leaves), (body, body_leaves), (tail, tail_leaves): for leaf in leaves: result.append(leaf, preformatted=True) for comment_after in line.comments_after(leaf): @@ -1686,7 +1728,9 @@ def left_hand_split(line: Line, py36: bool = False) -> Iterator[Line]: yield result -def right_hand_split(line: Line, py36: bool = False) -> Iterator[Line]: +def right_hand_split( + line: Line, py36: bool = False, omit: Collection[LeafID] = () +) -> Iterator[Line]: """Split line into many lines, starting with the last matching bracket pair.""" head = Line(depth=line.depth) body = Line(depth=line.depth + 1, inside_brackets=True) @@ -1696,14 +1740,16 @@ def right_hand_split(line: Line, py36: bool = False) -> Iterator[Line]: head_leaves: List[Leaf] = [] current_leaves = tail_leaves opening_bracket = None + closing_bracket = None for leaf in reversed(line.leaves): if current_leaves is body_leaves: if leaf is opening_bracket: current_leaves = head_leaves if body_leaves else tail_leaves current_leaves.append(leaf) if current_leaves is tail_leaves: - if leaf.type in CLOSING_BRACKETS: + if leaf.type in CLOSING_BRACKETS and id(leaf) not in omit: opening_bracket = leaf.opening_bracket + closing_bracket = leaf current_leaves = body_leaves tail_leaves.reverse() body_leaves.reverse() @@ -1711,15 +1757,36 @@ def right_hand_split(line: Line, py36: bool = False) -> Iterator[Line]: # Since body is a new indent level, remove spurious leading whitespace. if body_leaves: normalize_prefix(body_leaves[0], inside_brackets=True) + elif not head_leaves: + # No `head` and no `body` means the split failed. `tail` has all content. + raise CannotSplit("No brackets found") + # Build the new lines. - for result, leaves in ( - (head, head_leaves), (body, body_leaves), (tail, tail_leaves) - ): + for result, leaves in (head, head_leaves), (body, body_leaves), (tail, tail_leaves): for leaf in leaves: result.append(leaf, preformatted=True) for comment_after in line.comments_after(leaf): result.append(comment_after, preformatted=True) bracket_split_succeeded_or_raise(head, body, tail) + assert opening_bracket and closing_bracket + if ( + opening_bracket.type == token.LPAR + and not opening_bracket.value + and closing_bracket.type == token.RPAR + and not closing_bracket.value + ): + # These parens were optional. If there aren't any delimiters or standalone + # comments in the body, they were unnecessary and another split without + # them should be attempted. + if not ( + body.bracket_tracker.delimiters or line.contains_standalone_comments(0) + ): + omit = {id(closing_bracket), *omit} + yield from right_hand_split(line, py36=py36, omit=omit) + return + + ensure_visible(opening_bracket) + ensure_visible(closing_bracket) for result in (head, body, tail): if result: yield result @@ -1810,8 +1877,7 @@ def delimiter_split(line: Line, py36: bool = False) -> Iterator[Line]: lowest_depth = min(lowest_depth, leaf.bracket_depth) if ( leaf.bracket_depth == lowest_depth - and leaf.type == token.STAR - or leaf.type == token.DOUBLESTAR + and is_vararg(leaf, within=VARARGS_PARENTS) ): trailing_comma_safe = trailing_comma_safe and py36 leaf_priority = delimiters.get(id(leaf)) @@ -1833,12 +1899,7 @@ def delimiter_split(line: Line, py36: bool = False) -> Iterator[Line]: @dont_increase_indentation def standalone_comment_split(line: Line, py36: bool = False) -> Iterator[Line]: """Split standalone comments from the rest of the line.""" - for leaf in line.leaves: - if leaf.type == STANDALONE_COMMENT: - if leaf.bracket_depth == 0: - break - - else: + if not line.contains_standalone_comments(0): raise CannotSplit("Line does not have any standalone comments") current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets) @@ -1922,9 +1983,10 @@ def normalize_string_quotes(leaf: Leaf) -> None: return # There's an internal error prefix = leaf.value[:first_quote_pos] + unescaped_new_quote = re.compile(rf"(([^\\]|^)(\\\\)*){new_quote}") + escaped_new_quote = re.compile(rf"([^\\]|^)\\(\\\\)*{new_quote}") + escaped_orig_quote = re.compile(rf"([^\\]|^)\\(\\\\)*{orig_quote}") body = leaf.value[first_quote_pos + len(orig_quote):-len(orig_quote)] - unescaped_new_quote = re.compile(r"(([^\\]|^)(\\\\)*)" + new_quote) - escaped_orig_quote = re.compile(r"\\(\\\\)*" + orig_quote) if "r" in prefix.casefold(): if unescaped_new_quote.search(body): # There's at least one unescaped new_quote in this raw string @@ -1934,8 +1996,14 @@ def normalize_string_quotes(leaf: Leaf) -> None: # Do not introduce or remove backslashes in raw strings new_body = body else: - new_body = escaped_orig_quote.sub(f"\\1{orig_quote}", body) - new_body = unescaped_new_quote.sub(f"\\1\\\\{new_quote}", new_body) + # remove unnecessary quotes + new_body = sub_twice(escaped_new_quote, rf"\1\2{new_quote}", body) + if body != new_body: + # Consider the string without unnecessary quotes as the original + body = new_body + leaf.value = f"{prefix}{orig_quote}{body}{orig_quote}" + new_body = sub_twice(escaped_orig_quote, rf"\1\2{orig_quote}", new_body) + new_body = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_body) if new_quote == '"""' and new_body[-1] == '"': # edge case: new_body = new_body[:-1] + '\\"' @@ -1950,6 +2018,134 @@ def normalize_string_quotes(leaf: Leaf) -> None: leaf.value = f"{prefix}{new_quote}{new_body}{new_quote}" +def normalize_invisible_parens(node: Node, parens_after: Set[str]) -> None: + """Make existing optional parentheses invisible or create new ones. + + Standardizes on visible parentheses for single-element tuples, and keeps + existing visible parentheses for other tuples and generator expressions. + """ + check_lpar = False + for child in list(node.children): + if check_lpar: + if child.type == syms.atom: + if not ( + is_empty_tuple(child) + or is_one_tuple(child) + or max_delimiter_priority_in_atom(child) >= COMMA_PRIORITY + ): + first = child.children[0] + last = child.children[-1] + if first.type == token.LPAR and last.type == token.RPAR: + # make parentheses invisible + first.value = "" # type: ignore + last.value = "" # type: ignore + elif is_one_tuple(child): + # wrap child in visible parentheses + lpar = Leaf(token.LPAR, "(") + rpar = Leaf(token.RPAR, ")") + index = child.remove() or 0 + node.insert_child(index, Node(syms.atom, [lpar, child, rpar])) + else: + # wrap child in invisible parentheses + lpar = Leaf(token.LPAR, "") + rpar = Leaf(token.RPAR, "") + index = child.remove() or 0 + node.insert_child(index, Node(syms.atom, [lpar, child, rpar])) + + check_lpar = isinstance(child, Leaf) and child.value in parens_after + + +def is_empty_tuple(node: LN) -> bool: + """Return True if `node` holds an empty tuple.""" + return ( + node.type == syms.atom + and len(node.children) == 2 + and node.children[0].type == token.LPAR + and node.children[1].type == token.RPAR + ) + + +def is_one_tuple(node: LN) -> bool: + """Return True if `node` holds a tuple with one element, with or without parens.""" + if node.type == syms.atom: + if len(node.children) != 3: + return False + + lpar, gexp, rpar = node.children + if not ( + lpar.type == token.LPAR + and gexp.type == syms.testlist_gexp + and rpar.type == token.RPAR + ): + return False + + return len(gexp.children) == 2 and gexp.children[1].type == token.COMMA + + return ( + node.type in IMPLICIT_TUPLE + and len(node.children) == 2 + and node.children[1].type == token.COMMA + ) + + +def is_vararg(leaf: Leaf, within: Set[NodeType]) -> bool: + """Return True if `leaf` is a star or double star in a vararg or kwarg. + + If `within` includes VARARGS_PARENTS, this applies to function signatures. + If `within` includes COLLECTION_LIBERALS_PARENTS, it applies to right + hand-side extended iterable unpacking (PEP 3132) and additional unpacking + generalizations (PEP 448). + """ + if leaf.type not in STARS or not leaf.parent: + return False + + p = leaf.parent + if p.type == syms.star_expr: + # Star expressions are also used as assignment targets in extended + # iterable unpacking (PEP 3132). See what its parent is instead. + if not p.parent: + return False + + p = p.parent + + return p.type in within + + +def max_delimiter_priority_in_atom(node: LN) -> int: + if node.type != syms.atom: + return 0 + + first = node.children[0] + last = node.children[-1] + if not (first.type == token.LPAR and last.type == token.RPAR): + return 0 + + bt = BracketTracker() + for c in node.children[1:-1]: + if isinstance(c, Leaf): + bt.mark(c) + else: + for leaf in c.leaves(): + bt.mark(leaf) + try: + return bt.max_delimiter_priority() + + except ValueError: + return 0 + + +def ensure_visible(leaf: Leaf) -> None: + """Make sure parentheses are visible. + + They could be invisible as part of some statements (see + :func:`normalize_invible_parens` and :func:`visit_import_from`). + """ + if leaf.type == token.LPAR: + leaf.value = "(" + elif leaf.type == token.RPAR: + leaf.value = ")" + + def is_python36(node: Node) -> bool: """Return True if the current file is using Python 3.6+ features. @@ -2155,7 +2351,7 @@ def dump_to_file(*output: str) -> str: import tempfile with tempfile.NamedTemporaryFile( - mode="w", prefix="blk_", suffix=".log", delete=False + mode="w", prefix="blk_", suffix=".log", delete=False, encoding="utf8" ) as f: for lines in output: f.write(lines) @@ -2204,5 +2400,14 @@ def shutdown(loop: BaseEventLoop) -> None: loop.close() +def sub_twice(regex: Pattern[str], replacement: str, original: str) -> str: + """Replace `regex` with `replacement` twice on `original`. + + This is used by string normalization to perform replaces on + overlapping matches. + """ + return regex.sub(replacement, regex.sub(replacement, original)) + + if __name__ == "__main__": main()