]> git.madduck.net Git - etc/vim.git/blobdiff - src/black/__init__.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

Stop running Primer on macOS as it's flaky on GitHub Actions
[etc/vim.git] / src / black / __init__.py
index 2250943665ad21e0cc18ca0efa8edd7288a9329f..200e31fd45873846fe60f45e1dba1f87bc5db8e2 100644 (file)
@@ -195,6 +195,7 @@ class Feature(Enum):
     ASYNC_KEYWORDS = 7
     ASSIGNMENT_EXPRESSIONS = 8
     POS_ONLY_ARGUMENTS = 9
+    FORCE_OPTIONAL_PARENTHESES = 50
 
 
 VERSION_TO_FEATURES: Dict[TargetVersion, Set[Feature]] = {
@@ -1284,6 +1285,7 @@ class BracketTracker:
     previous: Optional[Leaf] = None
     _for_loop_depths: List[int] = field(default_factory=list)
     _lambda_argument_depths: List[int] = field(default_factory=list)
+    invisible: List[Leaf] = field(default_factory=list)
 
     def mark(self, leaf: Leaf) -> None:
         """Mark `leaf` with bracket-related metadata. Keep track of delimiters.
@@ -1309,6 +1311,8 @@ class BracketTracker:
             self.depth -= 1
             opening_bracket = self.bracket_match.pop((self.depth, leaf.type))
             leaf.opening_bracket = opening_bracket
+            if not leaf.value:
+                self.invisible.append(leaf)
         leaf.bracket_depth = self.depth
         if self.depth == 0:
             delim = is_split_before_delimiter(leaf, self.previous)
@@ -1321,6 +1325,8 @@ class BracketTracker:
         if leaf.type in OPENING_BRACKETS:
             self.bracket_match[self.depth, BRACKET[leaf.type]] = leaf
             self.depth += 1
+            if not leaf.value:
+                self.invisible.append(leaf)
         self.previous = leaf
         self.maybe_increment_lambda_arguments(leaf)
         self.maybe_increment_for_loop_variable(leaf)
@@ -1442,7 +1448,8 @@ class Line:
             )
         if self.inside_brackets or not preformatted:
             self.bracket_tracker.mark(leaf)
-            self.maybe_remove_trailing_comma(leaf)
+            if self.maybe_should_explode(leaf):
+                self.should_explode = True
         if not self.append_comment(leaf):
             self.leaves.append(leaf)
 
@@ -1618,59 +1625,25 @@ class Line:
     def contains_multiline_strings(self) -> bool:
         return any(is_multiline_string(leaf) for leaf in self.leaves)
 
-    def maybe_remove_trailing_comma(self, closing: Leaf) -> bool:
-        """Remove trailing comma if there is one and it's safe."""
+    def maybe_should_explode(self, closing: Leaf) -> bool:
+        """Return True if this line should explode (always be split), that is when:
+        - there's a trailing comma here; and
+        - it's not a one-tuple.
+        """
         if not (
-            self.leaves
+            closing.type in CLOSING_BRACKETS
+            and self.leaves
             and self.leaves[-1].type == token.COMMA
-            and closing.type in CLOSING_BRACKETS
         ):
             return False
 
-        if closing.type == token.RBRACE:
-            self.remove_trailing_comma()
+        if closing.type in {token.RBRACE, token.RSQB}:
             return True
 
-        if closing.type == token.RSQB:
-            comma = self.leaves[-1]
-            if comma.parent and comma.parent.type == syms.listmaker:
-                self.remove_trailing_comma()
-                return True
-
-        # For parens let's check if it's safe to remove the comma.
-        # Imports are always safe.
         if self.is_import:
-            self.remove_trailing_comma()
             return True
 
-        # Otherwise, if the trailing one is the only one, we might mistakenly
-        # change a tuple into a different type by removing the comma.
-        depth = closing.bracket_depth + 1
-        commas = 0
-        opening = closing.opening_bracket
-        for _opening_index, leaf in enumerate(self.leaves):
-            if leaf is opening:
-                break
-
-        else:
-            return False
-
-        for leaf in self.leaves[_opening_index + 1 :]:
-            if leaf is closing:
-                break
-
-            bracket_depth = leaf.bracket_depth
-            if bracket_depth == depth and leaf.type == token.COMMA:
-                commas += 1
-                if leaf.parent and leaf.parent.type in {
-                    syms.arglist,
-                    syms.typedargslist,
-                }:
-                    commas += 1
-                    break
-
-        if commas > 1:
-            self.remove_trailing_comma()
+        if not is_one_tuple_between(closing.opening_bracket, closing, self.leaves):
             return True
 
         return False
@@ -2063,13 +2036,20 @@ class LineGenerator(Visitor[Line]):
         yield from self.visit_default(node)
 
     def visit_STRING(self, leaf: Leaf) -> Iterator[Line]:
-        # Check if it's a docstring
-        if prev_siblings_are(
-            leaf.parent, [None, token.NEWLINE, token.INDENT, syms.simple_stmt]
-        ) and is_multiline_string(leaf):
-            prefix = "    " * self.current_line.depth
-            docstring = fix_docstring(leaf.value[3:-3], prefix)
-            leaf.value = leaf.value[0:3] + docstring + leaf.value[-3:]
+        if is_docstring(leaf) and "\\\n" not in leaf.value:
+            # We're ignoring docstrings with backslash newline escapes because changing
+            # indentation of those changes the AST representation of the code.
+            prefix = get_string_prefix(leaf.value)
+            lead_len = len(prefix) + 3
+            tail_len = -3
+            indent = " " * 4 * self.current_line.depth
+            docstring = fix_docstring(leaf.value[lead_len:tail_len], indent)
+            if docstring:
+                if leaf.value[lead_len - 1] == docstring[0]:
+                    docstring = " " + docstring
+                if leaf.value[tail_len + 1] == docstring[-1]:
+                    docstring = docstring + " "
+            leaf.value = leaf.value[0:lead_len] + docstring + leaf.value[tail_len:]
             normalize_string_quotes(leaf)
 
         yield from self.visit_default(leaf)
@@ -2647,7 +2627,7 @@ def transform_line(
             is_line_short_enough(line, line_length=mode.line_length, line_str=line_str)
             or line.contains_unsplittable_type_ignore()
         )
-        and not (line.contains_standalone_comments() and line.inside_brackets)
+        and not (line.inside_brackets and line.contains_standalone_comments())
     ):
         # Only apply basic string preprocessing, since lines shouldn't be split here.
         if mode.experimental_string_processing:
@@ -2659,20 +2639,31 @@ def transform_line(
     else:
 
         def rhs(line: Line, features: Collection[Feature]) -> Iterator[Line]:
+            """Wraps calls to `right_hand_split`.
+
+            The calls increasingly `omit` right-hand trailers (bracket pairs with
+            content), meaning the trailers get glued together to split on another
+            bracket pair instead.
+            """
             for omit in generate_trailers_to_omit(line, mode.line_length):
                 lines = list(
                     right_hand_split(line, mode.line_length, features, omit=omit)
                 )
+                # Note: this check is only able to figure out if the first line of the
+                # *current* transformation fits in the line length.  This is true only
+                # for simple cases.  All others require running more transforms via
+                # `transform_line()`.  This check doesn't know if those would succeed.
                 if is_line_short_enough(lines[0], line_length=mode.line_length):
                     yield from lines
                     return
 
             # All splits failed, best effort split with no omits.
             # This mostly happens to multiline strings that are by definition
-            # reported as not fitting a single line.
-            # line_length=1 here was historically a bug that somehow became a feature.
-            # See #762 and #781 for the full story.
-            yield from right_hand_split(line, line_length=1, features=features)
+            # reported as not fitting a single line, as well as lines that contain
+            # trailing commas (those have to be exploded).
+            yield from right_hand_split(
+                line, line_length=mode.line_length, features=features
+            )
 
         if mode.experimental_string_processing:
             if line.inside_brackets:
@@ -2703,17 +2694,8 @@ def transform_line(
         # We are accumulating lines in `result` because we might want to abort
         # mission and return the original line in the end, or attempt a different
         # split altogether.
-        result: List[Line] = []
         try:
-            for transformed_line in transform(line, features):
-                if str(transformed_line).strip("\n") == line_str:
-                    raise CannotTransform(
-                        "Line transformer returned an unchanged result"
-                    )
-
-                result.extend(
-                    transform_line(transformed_line, mode=mode, features=features)
-                )
+            result = run_transformer(line, transform, mode, features, line_str=line_str)
         except CannotTransform:
             continue
         else:
@@ -2754,6 +2736,7 @@ class StringTransformer(ABC):
 
     line_length: int
     normalize_strings: bool
+    __name__ = "StringTransformer"
 
     @abstractmethod
     def do_match(self, line: Line) -> TMatchResult:
@@ -3000,7 +2983,7 @@ class StringMerger(CustomSplitMapMixin, StringTransformer):
             )
 
         new_line = line.clone()
-        new_line.comments = line.comments
+        new_line.comments = line.comments.copy()
         append_leaves(new_line, line, LL)
 
         new_string_leaf = new_line.leaves[string_idx]
@@ -3328,7 +3311,6 @@ class StringParenStripper(StringTransformer):
 
         new_line = line.clone()
         new_line.comments = line.comments.copy()
-
         append_leaves(new_line, line, LL[: string_idx - 1])
 
         string_leaf = Leaf(token.STRING, LL[string_idx].value)
@@ -4772,8 +4754,7 @@ def right_hand_split(
     tail = bracket_split_build_line(tail_leaves, line, opening_bracket)
     bracket_split_succeeded_or_raise(head, body, tail)
     if (
-        # the body shouldn't be exploded
-        not body.should_explode
+        Feature.FORCE_OPTIONAL_PARENTHESES not in features
         # the opening bracket is an optional paren
         and opening_bracket.type == token.LPAR
         and not opening_bracket.value
@@ -4786,7 +4767,7 @@ def right_hand_split(
         # there are no standalone comments in the body
         and not body.contains_standalone_comments(0)
         # and we can actually remove the parens
-        and can_omit_invisible_parens(body, line_length)
+        and can_omit_invisible_parens(body, line_length, omit_on_explode=omit)
     ):
         omit = {id(closing_bracket), *omit}
         try:
@@ -4872,7 +4853,8 @@ def bracket_split_build_line(
                         continue
 
                     if leaves[i].type != token.COMMA:
-                        leaves.insert(i + 1, Leaf(token.COMMA, ","))
+                        new_comma = Leaf(token.COMMA, ",")
+                        leaves.insert(i + 1, new_comma)
                     break
 
     # Populate the line
@@ -4880,8 +4862,8 @@ def bracket_split_build_line(
         result.append(leaf, preformatted=True)
         for comment_after in original.comments_after(leaf):
             result.append(comment_after, preformatted=True)
-    if is_body:
-        result.should_explode = should_explode(result, opening_bracket)
+    if is_body and should_split_body_explode(result, opening_bracket):
+        result.should_explode = True
     return result
 
 
@@ -4966,7 +4948,8 @@ def delimiter_split(line: Line, features: Collection[Feature] = ()) -> Iterator[
             and current_line.leaves[-1].type != token.COMMA
             and current_line.leaves[-1].type != STANDALONE_COMMENT
         ):
-            current_line.append(Leaf(token.COMMA, ","))
+            new_comma = Leaf(token.COMMA, ",")
+            current_line.append(new_comma)
         yield current_line
 
 
@@ -5588,24 +5571,63 @@ def ensure_visible(leaf: Leaf) -> None:
         leaf.value = ")"
 
 
-def should_explode(line: Line, opening_bracket: Leaf) -> bool:
-    """Should `line` immediately be split with `delimiter_split()` after RHS?"""
+def should_split_body_explode(line: Line, opening_bracket: Leaf) -> bool:
+    """Should `line` be immediately split with `delimiter_split()` after RHS?"""
 
-    if not (
-        opening_bracket.parent
-        and opening_bracket.parent.type in {syms.atom, syms.import_from}
-        and opening_bracket.value in "[{("
-    ):
+    if not (opening_bracket.parent and opening_bracket.value in "[{("):
         return False
 
+    # We're essentially checking if the body is delimited by commas and there's more
+    # than one of them (we're excluding the trailing comma and if the delimiter priority
+    # is still commas, that means there's more).
+    exclude = set()
+    trailing_comma = False
     try:
         last_leaf = line.leaves[-1]
-        exclude = {id(last_leaf)} if last_leaf.type == token.COMMA else set()
+        if last_leaf.type == token.COMMA:
+            trailing_comma = True
+            exclude.add(id(last_leaf))
         max_priority = line.bracket_tracker.max_delimiter_priority(exclude=exclude)
     except (IndexError, ValueError):
         return False
 
-    return max_priority == COMMA_PRIORITY
+    return max_priority == COMMA_PRIORITY and (
+        trailing_comma
+        # always explode imports
+        or opening_bracket.parent.type in {syms.atom, syms.import_from}
+    )
+
+
+def is_one_tuple_between(opening: Leaf, closing: Leaf, leaves: List[Leaf]) -> bool:
+    """Return True if content between `opening` and `closing` looks like a one-tuple."""
+    if opening.type != token.LPAR and closing.type != token.RPAR:
+        return False
+
+    depth = closing.bracket_depth + 1
+    for _opening_index, leaf in enumerate(leaves):
+        if leaf is opening:
+            break
+
+    else:
+        raise LookupError("Opening paren not found in `leaves`")
+
+    commas = 0
+    _opening_index += 1
+    for leaf in leaves[_opening_index:]:
+        if leaf is closing:
+            break
+
+        bracket_depth = leaf.bracket_depth
+        if bracket_depth == depth and leaf.type == token.COMMA:
+            commas += 1
+            if leaf.parent and leaf.parent.type in {
+                syms.arglist,
+                syms.typedargslist,
+            }:
+                commas += 1
+                break
+
+    return commas < 2
 
 
 def get_features_used(node: Node) -> Set[Feature]:
@@ -5672,11 +5694,13 @@ def generate_trailers_to_omit(line: Line, line_length: int) -> Iterator[Set[Leaf
     a preceding closing bracket fits in one line.
 
     Yielded sets are cumulative (contain results of previous yields, too).  First
-    set is empty.
+    set is empty, unless the line should explode, in which case bracket pairs until
+    the one that needs to explode are omitted.
     """
 
     omit: Set[LeafID] = set()
-    yield omit
+    if not line.should_explode:
+        yield omit
 
     length = 4 * line.depth
     opening_bracket: Optional[Leaf] = None
@@ -5695,9 +5719,23 @@ def generate_trailers_to_omit(line: Line, line_length: int) -> Iterator[Set[Leaf
             if leaf is opening_bracket:
                 opening_bracket = None
             elif leaf.type in CLOSING_BRACKETS:
+                prev = line.leaves[index - 1] if index > 0 else None
+                if (
+                    line.should_explode
+                    and prev
+                    and prev.type == token.COMMA
+                    and not is_one_tuple_between(
+                        leaf.opening_bracket, leaf, line.leaves
+                    )
+                ):
+                    # Never omit bracket pairs with trailing commas.
+                    # We need to explode on those.
+                    break
+
                 inner_brackets.add(id(leaf))
         elif leaf.type in CLOSING_BRACKETS:
-            if index > 0 and line.leaves[index - 1].type in OPENING_BRACKETS:
+            prev = line.leaves[index - 1] if index > 0 else None
+            if prev and prev.type in OPENING_BRACKETS:
                 # Empty brackets would fail a split so treat them as "inner"
                 # brackets (e.g. only add them to the `omit` set if another
                 # pair of brackets was good enough.
@@ -5710,6 +5748,16 @@ def generate_trailers_to_omit(line: Line, line_length: int) -> Iterator[Set[Leaf
                 inner_brackets.clear()
                 yield omit
 
+            if (
+                line.should_explode
+                and prev
+                and prev.type == token.COMMA
+                and not is_one_tuple_between(leaf.opening_bracket, leaf, line.leaves)
+            ):
+                # Never omit bracket pairs with trailing commas.
+                # We need to explode on those.
+                break
+
             if leaf.value:
                 opening_bracket = leaf.opening_bracket
                 closing_bracket = leaf
@@ -6115,6 +6163,7 @@ def assert_stable(src: str, dst: str, mode: Mode) -> None:
     newdst = format_str(dst, mode=mode)
     if dst != newdst:
         log = dump_to_file(
+            str(mode),
             diff(src, dst, "source", "first pass"),
             diff(dst, newdst, "first pass", "second pass"),
         )
@@ -6291,7 +6340,11 @@ def can_be_split(line: Line) -> bool:
     return True
 
 
-def can_omit_invisible_parens(line: Line, line_length: int) -> bool:
+def can_omit_invisible_parens(
+    line: Line,
+    line_length: int,
+    omit_on_explode: Collection[LeafID] = (),
+) -> bool:
     """Does `line` have a shape safe to reformat without optional parens around it?
 
     Returns True for only a subset of potentially nice looking formattings but
@@ -6314,37 +6367,27 @@ def can_omit_invisible_parens(line: Line, line_length: int) -> bool:
 
     assert len(line.leaves) >= 2, "Stranded delimiter"
 
-    first = line.leaves[0]
-    second = line.leaves[1]
-    penultimate = line.leaves[-2]
-    last = line.leaves[-1]
-
     # With a single delimiter, omit if the expression starts or ends with
     # a bracket.
+    first = line.leaves[0]
+    second = line.leaves[1]
     if first.type in OPENING_BRACKETS and second.type not in CLOSING_BRACKETS:
-        remainder = False
-        length = 4 * line.depth
-        for _index, leaf, leaf_length in enumerate_with_length(line):
-            if leaf.type in CLOSING_BRACKETS and leaf.opening_bracket is first:
-                remainder = True
-            if remainder:
-                length += leaf_length
-                if length > line_length:
-                    break
-
-                if leaf.type in OPENING_BRACKETS:
-                    # There are brackets we can further split on.
-                    remainder = False
-
-        else:
-            # checked the entire string and line length wasn't exceeded
-            if len(line.leaves) == _index + 1:
-                return True
+        if _can_omit_opening_paren(line, first=first, line_length=line_length):
+            return True
 
         # Note: we are not returning False here because a line might have *both*
         # a leading opening bracket and a trailing closing bracket.  If the
         # opening bracket doesn't match our rule, maybe the closing will.
 
+    penultimate = line.leaves[-2]
+    last = line.leaves[-1]
+    if line.should_explode:
+        try:
+            penultimate, last = last_two_except(line.leaves, omit=omit_on_explode)
+        except LookupError:
+            # Turns out we'd omit everything.  We cannot skip the optional parentheses.
+            return False
+
     if (
         last.type == token.RPAR
         or last.type == token.RBRACE
@@ -6365,21 +6408,120 @@ def can_omit_invisible_parens(line: Line, line_length: int) -> bool:
             # unnecessary.
             return True
 
-        length = 4 * line.depth
-        seen_other_brackets = False
-        for _index, leaf, leaf_length in enumerate_with_length(line):
+        if line.should_explode and penultimate.type == token.COMMA:
+            # The rightmost non-omitted bracket pair is the one we want to explode on.
+            return True
+
+        if _can_omit_closing_paren(line, last=last, line_length=line_length):
+            return True
+
+    return False
+
+
+def _can_omit_opening_paren(line: Line, *, first: Leaf, line_length: int) -> bool:
+    """See `can_omit_invisible_parens`."""
+    remainder = False
+    length = 4 * line.depth
+    _index = -1
+    for _index, leaf, leaf_length in enumerate_with_length(line):
+        if leaf.type in CLOSING_BRACKETS and leaf.opening_bracket is first:
+            remainder = True
+        if remainder:
             length += leaf_length
-            if leaf is last.opening_bracket:
-                if seen_other_brackets or length <= line_length:
-                    return True
+            if length > line_length:
+                break
 
-            elif leaf.type in OPENING_BRACKETS:
+            if leaf.type in OPENING_BRACKETS:
                 # There are brackets we can further split on.
-                seen_other_brackets = True
+                remainder = False
+
+    else:
+        # checked the entire string and line length wasn't exceeded
+        if len(line.leaves) == _index + 1:
+            return True
+
+    return False
+
+
+def _can_omit_closing_paren(line: Line, *, last: Leaf, line_length: int) -> bool:
+    """See `can_omit_invisible_parens`."""
+    length = 4 * line.depth
+    seen_other_brackets = False
+    for _index, leaf, leaf_length in enumerate_with_length(line):
+        length += leaf_length
+        if leaf is last.opening_bracket:
+            if seen_other_brackets or length <= line_length:
+                return True
+
+        elif leaf.type in OPENING_BRACKETS:
+            # There are brackets we can further split on.
+            seen_other_brackets = True
 
     return False
 
 
+def last_two_except(leaves: List[Leaf], omit: Collection[LeafID]) -> Tuple[Leaf, Leaf]:
+    """Return (penultimate, last) leaves skipping brackets in `omit` and contents."""
+    stop_after = None
+    last = None
+    for leaf in reversed(leaves):
+        if stop_after:
+            if leaf is stop_after:
+                stop_after = None
+            continue
+
+        if last:
+            return leaf, last
+
+        if id(leaf) in omit:
+            stop_after = leaf.opening_bracket
+        else:
+            last = leaf
+    else:
+        raise LookupError("Last two leaves were also skipped")
+
+
+def run_transformer(
+    line: Line,
+    transform: Transformer,
+    mode: Mode,
+    features: Collection[Feature],
+    *,
+    line_str: str = "",
+) -> List[Line]:
+    if not line_str:
+        line_str = line_to_string(line)
+    result: List[Line] = []
+    for transformed_line in transform(line, features):
+        if str(transformed_line).strip("\n") == line_str:
+            raise CannotTransform("Line transformer returned an unchanged result")
+
+        result.extend(transform_line(transformed_line, mode=mode, features=features))
+
+    if not (
+        transform.__name__ == "rhs"
+        and line.bracket_tracker.invisible
+        and not any(bracket.value for bracket in line.bracket_tracker.invisible)
+        and not line.contains_multiline_strings()
+        and not result[0].contains_uncollapsable_type_comments()
+        and not result[0].contains_unsplittable_type_ignore()
+        and not is_line_short_enough(result[0], line_length=mode.line_length)
+    ):
+        return result
+
+    line_copy = line.clone()
+    append_leaves(line_copy, line, line.leaves)
+    features_fop = set(features) | {Feature.FORCE_OPTIONAL_PARENTHESES}
+    second_opinion = run_transformer(
+        line_copy, transform, mode, features_fop, line_str=line_str
+    )
+    if all(
+        is_line_short_enough(ln, line_length=mode.line_length) for ln in second_opinion
+    ):
+        result = second_opinion
+    return result
+
+
 def get_cache_file(mode: Mode) -> Path:
     return CACHE_DIR / f"cache.{mode.get_cache_key()}.pickle"
 
@@ -6465,6 +6607,26 @@ def patched_main() -> None:
     main()
 
 
+def is_docstring(leaf: Leaf) -> bool:
+    if not is_multiline_string(leaf):
+        # For the purposes of docstring re-indentation, we don't need to do anything
+        # with single-line docstrings.
+        return False
+
+    if prev_siblings_are(
+        leaf.parent, [None, token.NEWLINE, token.INDENT, syms.simple_stmt]
+    ):
+        return True
+
+    # Multiline docstring on the same line as the `def`.
+    if prev_siblings_are(leaf.parent, [syms.parameters, token.COLON, syms.simple_stmt]):
+        # `syms.parameters` is only used in funcdefs and async_funcdefs in the Python
+        # grammar. We're safe to return True without further checks.
+        return True
+
+    return False
+
+
 def fix_docstring(docstring: str, prefix: str) -> str:
     # https://www.python.org/dev/peps/pep-0257/#handling-docstring-indentation
     if not docstring:
@@ -6488,7 +6650,6 @@ def fix_docstring(docstring: str, prefix: str) -> str:
                 trimmed.append(prefix + stripped_line)
             else:
                 trimmed.append("")
-    # Return a single string:
     return "\n".join(trimmed)