]> git.madduck.net Git - etc/vim.git/blobdiff - black.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

Delimit multiline expressions according to math operator priority
[etc/vim.git] / black.py
index 5458198b17560621545fdcb0d15a4c32fcdb6522..36380e6ae847b2314b4ce0e8d2c816071cdc418b 100644 (file)
--- a/black.py
+++ b/black.py
@@ -1,5 +1,3 @@
-#!/usr/bin/env python3
-
 import asyncio
 import pickle
 from asyncio.base_events import BaseEventLoop
 import asyncio
 import pickle
 from asyncio.base_events import BaseEventLoop
@@ -43,7 +41,7 @@ from blib2to3 import pygram, pytree
 from blib2to3.pgen2 import driver, token
 from blib2to3.pgen2.parse import ParseError
 
 from blib2to3.pgen2 import driver, token
 from blib2to3.pgen2.parse import ParseError
 
-__version__ = "18.4a3"
+__version__ = "18.4a6"
 DEFAULT_LINE_LENGTH = 88
 
 # types
 DEFAULT_LINE_LENGTH = 88
 
 # types
@@ -89,11 +87,11 @@ class FormatError(Exception):
         self.consumed = consumed
 
     def trim_prefix(self, leaf: Leaf) -> None:
         self.consumed = consumed
 
     def trim_prefix(self, leaf: Leaf) -> None:
-        leaf.prefix = leaf.prefix[self.consumed:]
+        leaf.prefix = leaf.prefix[self.consumed :]
 
     def leaf_from_consumed(self, leaf: Leaf) -> Leaf:
         """Returns a new Leaf from the consumed part of the prefix."""
 
     def leaf_from_consumed(self, leaf: Leaf) -> Leaf:
         """Returns a new Leaf from the consumed part of the prefix."""
-        unformatted_prefix = leaf.prefix[:self.consumed]
+        unformatted_prefix = leaf.prefix[: self.consumed]
         return Leaf(token.NEWLINE, unformatted_prefix)
 
 
         return Leaf(token.NEWLINE, unformatted_prefix)
 
 
@@ -194,6 +192,7 @@ def main(
         write_back = WriteBack.YES
     report = Report(check=check, quiet=quiet)
     if len(sources) == 0:
         write_back = WriteBack.YES
     report = Report(check=check, quiet=quiet)
     if len(sources) == 0:
+        out("No paths given. Nothing to do 😴")
         ctx.exit(0)
         return
 
         ctx.exit(0)
         return
 
@@ -245,7 +244,7 @@ def reformat_one(
                 )
             ):
                 changed = Changed.YES
                 )
             ):
                 changed = Changed.YES
-            if write_back != WriteBack.DIFF and changed is not Changed.NO:
+            if write_back == WriteBack.YES and changed is not Changed.NO:
                 write_cache(cache, [src], line_length)
         report.done(src, changed)
     except Exception as exc:
                 write_cache(cache, [src], line_length)
         report.done(src, changed)
     except Exception as exc:
@@ -312,7 +311,7 @@ async def schedule_formatting(
 
     if cancelled:
         await asyncio.gather(*cancelled, loop=loop, return_exceptions=True)
 
     if cancelled:
         await asyncio.gather(*cancelled, loop=loop, return_exceptions=True)
-    if write_back != WriteBack.DIFF and formatted:
+    if write_back == WriteBack.YES and formatted:
         write_cache(cache, formatted, line_length)
 
 
         write_cache(cache, formatted, line_length)
 
 
@@ -554,19 +553,20 @@ COMPARATORS = {
     token.GREATEREQUAL,
 }
 MATH_OPERATORS = {
     token.GREATEREQUAL,
 }
 MATH_OPERATORS = {
+    token.VBAR,
+    token.CIRCUMFLEX,
+    token.AMPER,
+    token.LEFTSHIFT,
+    token.RIGHTSHIFT,
     token.PLUS,
     token.MINUS,
     token.STAR,
     token.SLASH,
     token.PLUS,
     token.MINUS,
     token.STAR,
     token.SLASH,
-    token.VBAR,
-    token.AMPER,
+    token.DOUBLESLASH,
     token.PERCENT,
     token.PERCENT,
-    token.CIRCUMFLEX,
+    token.AT,
     token.TILDE,
     token.TILDE,
-    token.LEFTSHIFT,
-    token.RIGHTSHIFT,
     token.DOUBLESTAR,
     token.DOUBLESTAR,
-    token.DOUBLESLASH,
 }
 STARS = {token.STAR, token.DOUBLESTAR}
 VARARGS_PARENTS = {
 }
 STARS = {token.STAR, token.DOUBLESTAR}
 VARARGS_PARENTS = {
@@ -582,13 +582,45 @@ UNPACKING_PARENTS = {
     syms.listmaker,
     syms.testlist_gexp,
 }
     syms.listmaker,
     syms.testlist_gexp,
 }
+TEST_DESCENDANTS = {
+    syms.test,
+    syms.lambdef,
+    syms.or_test,
+    syms.and_test,
+    syms.not_test,
+    syms.comparison,
+    syms.star_expr,
+    syms.expr,
+    syms.xor_expr,
+    syms.and_expr,
+    syms.shift_expr,
+    syms.arith_expr,
+    syms.trailer,
+    syms.term,
+    syms.power,
+}
 COMPREHENSION_PRIORITY = 20
 COMPREHENSION_PRIORITY = 20
-COMMA_PRIORITY = 10
-TERNARY_PRIORITY = 7
-LOGIC_PRIORITY = 5
-STRING_PRIORITY = 4
-COMPARATOR_PRIORITY = 3
-MATH_PRIORITY = 1
+COMMA_PRIORITY = 18
+TERNARY_PRIORITY = 16
+LOGIC_PRIORITY = 14
+STRING_PRIORITY = 12
+COMPARATOR_PRIORITY = 10
+MATH_PRIORITIES = {
+    token.VBAR: 8,
+    token.CIRCUMFLEX: 7,
+    token.AMPER: 6,
+    token.LEFTSHIFT: 5,
+    token.RIGHTSHIFT: 5,
+    token.PLUS: 4,
+    token.MINUS: 4,
+    token.STAR: 3,
+    token.SLASH: 3,
+    token.DOUBLESLASH: 3,
+    token.PERCENT: 3,
+    token.AT: 3,
+    token.TILDE: 2,
+    token.DOUBLESTAR: 1,
+}
 
 
 @dataclass
 
 
 @dataclass
@@ -698,6 +730,10 @@ class BracketTracker:
 
         return False
 
 
         return False
 
+    def get_open_lsqb(self) -> Optional[Leaf]:
+        """Return the most recent opening square bracket (if any)."""
+        return self.bracket_match.get((self.depth - 1, token.RSQB))
+
 
 @dataclass
 class Line:
 
 @dataclass
 class Line:
@@ -723,14 +759,17 @@ class Line:
         if not has_value:
             return
 
         if not has_value:
             return
 
+        if token.COLON == leaf.type and self.is_class_paren_empty:
+            del self.leaves[-2:]
         if self.leaves and not preformatted:
             # Note: at this point leaf.prefix should be empty except for
             # imports, for which we only preserve newlines.
         if self.leaves and not preformatted:
             # Note: at this point leaf.prefix should be empty except for
             # imports, for which we only preserve newlines.
-            leaf.prefix += whitespace(leaf)
+            leaf.prefix += whitespace(
+                leaf, complex_subscript=self.is_complex_subscript(leaf)
+            )
         if self.inside_brackets or not preformatted:
             self.bracket_tracker.mark(leaf)
             self.maybe_remove_trailing_comma(leaf)
         if self.inside_brackets or not preformatted:
             self.bracket_tracker.mark(leaf)
             self.maybe_remove_trailing_comma(leaf)
-
         if not self.append_comment(leaf):
             self.leaves.append(leaf)
 
         if not self.append_comment(leaf):
             self.leaves.append(leaf)
 
@@ -818,6 +857,22 @@ class Line:
             and self.leaves[0].value == "yield"
         )
 
             and self.leaves[0].value == "yield"
         )
 
+    @property
+    def is_class_paren_empty(self) -> bool:
+        """Is this a class with no base classes but using parentheses?
+
+        Those are unnecessary and should be removed.
+        """
+        return (
+            bool(self)
+            and len(self.leaves) == 4
+            and self.is_class
+            and self.leaves[2].type == token.LPAR
+            and self.leaves[2].value == "("
+            and self.leaves[3].type == token.RPAR
+            and self.leaves[3].value == ")"
+        )
+
     def contains_standalone_comments(self, depth_limit: int = sys.maxsize) -> bool:
         """If so, needs to be split before emitting."""
         for leaf in self.leaves:
     def contains_standalone_comments(self, depth_limit: int = sys.maxsize) -> bool:
         """If so, needs to be split before emitting."""
         for leaf in self.leaves:
@@ -846,9 +901,14 @@ class Line:
                 self.remove_trailing_comma()
                 return True
 
                 self.remove_trailing_comma()
                 return True
 
-        # For parens let's check if it's safe to remove the comma.  If the
-        # trailing one is the only one, we might mistakenly change a tuple
-        # into a different type by removing the comma.
+        # For parens let's check if it's safe to remove the comma.
+        # Imports are always safe.
+        if self.is_import:
+            self.remove_trailing_comma()
+            return True
+
+        # Otheriwsse, if the trailing one is the only one, we might mistakenly
+        # change a tuple into a different type by removing the comma.
         depth = closing.bracket_depth + 1
         commas = 0
         opening = closing.opening_bracket
         depth = closing.bracket_depth + 1
         commas = 0
         opening = closing.opening_bracket
@@ -859,7 +919,7 @@ class Line:
         else:
             return False
 
         else:
             return False
 
-        for leaf in self.leaves[_opening_index + 1:]:
+        for leaf in self.leaves[_opening_index + 1 :]:
             if leaf is closing:
                 break
 
             if leaf is closing:
                 break
 
@@ -920,6 +980,24 @@ class Line:
                 self.comments[i] = (comma_index - 1, comment)
         self.leaves.pop()
 
                 self.comments[i] = (comma_index - 1, comment)
         self.leaves.pop()
 
+    def is_complex_subscript(self, leaf: Leaf) -> bool:
+        """Return True iff `leaf` is part of a slice with non-trivial exprs."""
+        open_lsqb = (
+            leaf if leaf.type == token.LSQB else self.bracket_tracker.get_open_lsqb()
+        )
+        if open_lsqb is None:
+            return False
+
+        subscript_start = open_lsqb.next_sibling
+        if (
+            isinstance(subscript_start, Node)
+            and subscript_start.type == syms.subscriptlist
+        ):
+            subscript_start = child_towards(subscript_start, leaf)
+        return subscript_start is not None and any(
+            n.type in TEST_DESCENDANTS for n in subscript_start.pre_order()
+        )
+
     def __str__(self) -> str:
         """Render the line."""
         if not self:
     def __str__(self) -> str:
         """Render the line."""
         if not self:
@@ -1303,8 +1381,12 @@ BRACKETS = OPENING_BRACKETS | CLOSING_BRACKETS
 ALWAYS_NO_SPACE = CLOSING_BRACKETS | {token.COMMA, STANDALONE_COMMENT}
 
 
 ALWAYS_NO_SPACE = CLOSING_BRACKETS | {token.COMMA, STANDALONE_COMMENT}
 
 
-def whitespace(leaf: Leaf) -> str:  # noqa C901
-    """Return whitespace prefix if needed for the given `leaf`."""
+def whitespace(leaf: Leaf, *, complex_subscript: bool) -> str:  # noqa C901
+    """Return whitespace prefix if needed for the given `leaf`.
+
+    `complex_subscript` signals whether the given leaf is part of a subscription
+    which has non-trivial arguments, like arithmetic expressions or function calls.
+    """
     NO = ""
     SPACE = " "
     DOUBLESPACE = "  "
     NO = ""
     SPACE = " "
     DOUBLESPACE = "  "
@@ -1318,7 +1400,10 @@ def whitespace(leaf: Leaf) -> str:  # noqa C901
         return DOUBLESPACE
 
     assert p is not None, f"INTERNAL ERROR: hand-made leaf without parent: {leaf!r}"
         return DOUBLESPACE
 
     assert p is not None, f"INTERNAL ERROR: hand-made leaf without parent: {leaf!r}"
-    if t == token.COLON and p.type not in {syms.subscript, syms.subscriptlist}:
+    if (
+        t == token.COLON
+        and p.type not in {syms.subscript, syms.subscriptlist, syms.sliceop}
+    ):
         return NO
 
     prev = leaf.prev_sibling
         return NO
 
     prev = leaf.prev_sibling
@@ -1328,7 +1413,13 @@ def whitespace(leaf: Leaf) -> str:  # noqa C901
             return NO
 
         if t == token.COLON:
             return NO
 
         if t == token.COLON:
-            return SPACE if prevp.type == token.COMMA else NO
+            if prevp.type == token.COLON:
+                return NO
+
+            elif prevp.type != token.COMMA and not complex_subscript:
+                return NO
+
+            return SPACE
 
         if prevp.type == token.EQUAL:
             if prevp.parent:
 
         if prevp.type == token.EQUAL:
             if prevp.parent:
@@ -1349,7 +1440,7 @@ def whitespace(leaf: Leaf) -> str:  # noqa C901
 
         elif prevp.type == token.COLON:
             if prevp.parent and prevp.parent.type in {syms.subscript, syms.sliceop}:
 
         elif prevp.type == token.COLON:
             if prevp.parent and prevp.parent.type in {syms.subscript, syms.sliceop}:
-                return NO
+                return SPACE if complex_subscript else NO
 
         elif (
             prevp.parent
 
         elif (
             prevp.parent
@@ -1455,7 +1546,7 @@ def whitespace(leaf: Leaf) -> str:  # noqa C901
         if prev and prev.type == token.LPAR:
             return NO
 
         if prev and prev.type == token.LPAR:
             return NO
 
-    elif p.type == syms.subscript:
+    elif p.type in {syms.subscript, syms.sliceop}:
         # indexing
         if not prev:
             assert p.parent is not None, "subscripts are always parented"
         # indexing
         if not prev:
             assert p.parent is not None, "subscripts are always parented"
@@ -1464,7 +1555,7 @@ def whitespace(leaf: Leaf) -> str:  # noqa C901
 
             return NO
 
 
             return NO
 
-        else:
+        elif not complex_subscript:
             return NO
 
     elif p.type == syms.atom:
             return NO
 
     elif p.type == syms.atom:
@@ -1534,6 +1625,14 @@ def preceding_leaf(node: Optional[LN]) -> Optional[Leaf]:
     return None
 
 
     return None
 
 
+def child_towards(ancestor: Node, descendant: LN) -> Optional[LN]:
+    """Return the child of `ancestor` that contains `descendant`."""
+    node: Optional[LN] = descendant
+    while node and node.parent != ancestor:
+        node = node.parent
+    return node
+
+
 def is_split_after_delimiter(leaf: Leaf, previous: Leaf = None) -> int:
     """Return the priority of the `leaf` delimiter, given a line break after it.
 
 def is_split_after_delimiter(leaf: Leaf, previous: Leaf = None) -> int:
     """Return the priority of the `leaf` delimiter, given a line break after it.
 
@@ -1566,7 +1665,7 @@ def is_split_before_delimiter(leaf: Leaf, previous: Leaf = None) -> int:
         and leaf.parent
         and leaf.parent.type not in {syms.factor, syms.star_expr}
     ):
         and leaf.parent
         and leaf.parent.type not in {syms.factor, syms.star_expr}
     ):
-        return MATH_PRIORITY
+        return MATH_PRIORITIES[leaf.type]
 
     if leaf.type in COMPARATORS:
         return COMPARATOR_PRIORITY
 
     if leaf.type in COMPARATORS:
         return COMPARATOR_PRIORITY
@@ -1747,7 +1846,8 @@ def left_hand_split(line: Line, py36: bool = False) -> Iterator[Line]:
     """Split line into many lines, starting with the first matching bracket pair.
 
     Note: this usually looks weird, only use this for function definitions.
     """Split line into many lines, starting with the first matching bracket pair.
 
     Note: this usually looks weird, only use this for function definitions.
-    Prefer RHS otherwise.
+    Prefer RHS otherwise.  This is why this function is not symmetrical with
+    :func:`right_hand_split` which also handles optional parentheses.
     """
     head = Line(depth=line.depth)
     body = Line(depth=line.depth + 1, inside_brackets=True)
     """
     head = Line(depth=line.depth)
     body = Line(depth=line.depth + 1, inside_brackets=True)
@@ -1787,7 +1887,10 @@ def left_hand_split(line: Line, py36: bool = False) -> Iterator[Line]:
 def right_hand_split(
     line: Line, py36: bool = False, omit: Collection[LeafID] = ()
 ) -> Iterator[Line]:
 def right_hand_split(
     line: Line, py36: bool = False, omit: Collection[LeafID] = ()
 ) -> Iterator[Line]:
-    """Split line into many lines, starting with the last matching bracket pair."""
+    """Split line into many lines, starting with the last matching bracket pair.
+
+    If the split was by optional parentheses, attempt splitting without them, too.
+    """
     head = Line(depth=line.depth)
     body = Line(depth=line.depth + 1, inside_brackets=True)
     tail = Line(depth=line.depth)
     head = Line(depth=line.depth)
     body = Line(depth=line.depth + 1, inside_brackets=True)
     tail = Line(depth=line.depth)
@@ -1826,20 +1929,25 @@ def right_hand_split(
     bracket_split_succeeded_or_raise(head, body, tail)
     assert opening_bracket and closing_bracket
     if (
     bracket_split_succeeded_or_raise(head, body, tail)
     assert opening_bracket and closing_bracket
     if (
+        # the opening bracket is an optional paren
         opening_bracket.type == token.LPAR
         and not opening_bracket.value
         opening_bracket.type == token.LPAR
         and not opening_bracket.value
+        # the closing bracket is an optional paren
         and closing_bracket.type == token.RPAR
         and not closing_bracket.value
         and closing_bracket.type == token.RPAR
         and not closing_bracket.value
+        # there are no delimiters or standalone comments in the body
+        and not body.bracket_tracker.delimiters
+        and not line.contains_standalone_comments(0)
+        # and it's not an import (optional parens are the only thing we can split
+        # on in this case; attempting a split without them is a waste of time)
+        and not line.is_import
     ):
     ):
-        # These parens were optional. If there aren't any delimiters or standalone
-        # comments in the body, they were unnecessary and another split without
-        # them should be attempted.
-        if not (
-            body.bracket_tracker.delimiters or line.contains_standalone_comments(0)
-        ):
-            omit = {id(closing_bracket), *omit}
+        omit = {id(closing_bracket), *omit}
+        try:
             yield from right_hand_split(line, py36=py36, omit=omit)
             return
             yield from right_hand_split(line, py36=py36, omit=omit)
             return
+        except CannotSplit:
+            pass
 
     ensure_visible(opening_bracket)
     ensure_visible(closing_bracket)
 
     ensure_visible(opening_bracket)
     ensure_visible(closing_bracket)
@@ -1994,6 +2102,7 @@ def explode_split(
 
     try:
         yield from delimiter_split(new_lines[1], py36)
 
     try:
         yield from delimiter_split(new_lines[1], py36)
+
     except CannotSplit:
         yield new_lines[1]
 
     except CannotSplit:
         yield new_lines[1]
 
@@ -2061,7 +2170,7 @@ def normalize_string_quotes(leaf: Leaf) -> None:
     unescaped_new_quote = re.compile(rf"(([^\\]|^)(\\\\)*){new_quote}")
     escaped_new_quote = re.compile(rf"([^\\]|^)\\(\\\\)*{new_quote}")
     escaped_orig_quote = re.compile(rf"([^\\]|^)\\(\\\\)*{orig_quote}")
     unescaped_new_quote = re.compile(rf"(([^\\]|^)(\\\\)*){new_quote}")
     escaped_new_quote = re.compile(rf"([^\\]|^)\\(\\\\)*{new_quote}")
     escaped_orig_quote = re.compile(rf"([^\\]|^)\\(\\\\)*{orig_quote}")
-    body = leaf.value[first_quote_pos + len(orig_quote):-len(orig_quote)]
+    body = leaf.value[first_quote_pos + len(orig_quote) : -len(orig_quote)]
     if "r" in prefix.casefold():
         if unescaped_new_quote.search(body):
             # There's at least one unescaped new_quote in this raw string
     if "r" in prefix.casefold():
         if unescaped_new_quote.search(body):
             # There's at least one unescaped new_quote in this raw string
@@ -2103,17 +2212,7 @@ def normalize_invisible_parens(node: Node, parens_after: Set[str]) -> None:
     for child in list(node.children):
         if check_lpar:
             if child.type == syms.atom:
     for child in list(node.children):
         if check_lpar:
             if child.type == syms.atom:
-                if not (
-                    is_empty_tuple(child)
-                    or is_one_tuple(child)
-                    or max_delimiter_priority_in_atom(child) >= COMMA_PRIORITY
-                ):
-                    first = child.children[0]
-                    last = child.children[-1]
-                    if first.type == token.LPAR and last.type == token.RPAR:
-                        # make parentheses invisible
-                        first.value = ""  # type: ignore
-                        last.value = ""  # type: ignore
+                maybe_make_parens_invisible_in_atom(child)
             elif is_one_tuple(child):
                 # wrap child in visible parentheses
                 lpar = Leaf(token.LPAR, "(")
             elif is_one_tuple(child):
                 # wrap child in visible parentheses
                 lpar = Leaf(token.LPAR, "(")
@@ -2130,6 +2229,29 @@ def normalize_invisible_parens(node: Node, parens_after: Set[str]) -> None:
         check_lpar = isinstance(child, Leaf) and child.value in parens_after
 
 
         check_lpar = isinstance(child, Leaf) and child.value in parens_after
 
 
+def maybe_make_parens_invisible_in_atom(node: LN) -> bool:
+    """If it's safe, make the parens in the atom `node` invisible, recusively."""
+    if (
+        node.type != syms.atom
+        or is_empty_tuple(node)
+        or is_one_tuple(node)
+        or max_delimiter_priority_in_atom(node) >= COMMA_PRIORITY
+    ):
+        return False
+
+    first = node.children[0]
+    last = node.children[-1]
+    if first.type == token.LPAR and last.type == token.RPAR:
+        # make parentheses invisible
+        first.value = ""  # type: ignore
+        last.value = ""  # type: ignore
+        if len(node.children) > 1:
+            maybe_make_parens_invisible_in_atom(node.children[1])
+        return True
+
+    return False
+
+
 def is_empty_tuple(node: LN) -> bool:
     """Return True if `node` holds an empty tuple."""
     return (
 def is_empty_tuple(node: LN) -> bool:
     """Return True if `node` holds an empty tuple."""
     return (
@@ -2231,7 +2353,7 @@ def is_python36(node: Node) -> bool:
 
     Currently looking for:
     - f-strings; and
 
     Currently looking for:
     - f-strings; and
-    - trailing commas after * or ** in function signatures.
+    - trailing commas after * or ** in function signatures and calls.
     """
     for n in node.pre_order():
         if n.type == token.STRING:
     """
     for n in node.pre_order():
         if n.type == token.STRING:
@@ -2240,7 +2362,7 @@ def is_python36(node: Node) -> bool:
                 return True
 
         elif (
                 return True
 
         elif (
-            n.type == syms.typedargslist
+            n.type in {syms.typedargslist, syms.arglist}
             and n.children
             and n.children[-1].type == token.COMMA
         ):
             and n.children
             and n.children[-1].type == token.COMMA
         ):
@@ -2248,6 +2370,11 @@ def is_python36(node: Node) -> bool:
                 if ch.type in STARS:
                     return True
 
                 if ch.type in STARS:
                     return True
 
+                if ch.type == syms.argument:
+                    for argch in ch.children:
+                        if argch.type in STARS:
+                            return True
+
     return False
 
 
     return False