]> git.madduck.net Git - etc/vim.git/blobdiff - src/black/lines.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

Fix merging implicit multiline strings that have inline comments (#3956)
[etc/vim.git] / src / black / lines.py
index ec6ef5d952290ad153d4db27b6a99ff0c709ebe8..6acc95e7a7e004dbf2e35a571bb08def6f1100fe 100644 (file)
@@ -1,4 +1,5 @@
 import itertools
+import math
 import sys
 from dataclasses import dataclass, field
 from typing import (
@@ -10,11 +11,12 @@ from typing import (
     Sequence,
     Tuple,
     TypeVar,
+    Union,
     cast,
 )
 
-from black.brackets import DOT_PRIORITY, BracketTracker
-from black.mode import Mode
+from black.brackets import COMMA_PRIORITY, DOT_PRIORITY, BracketTracker
+from black.mode import Mode, Preview
 from black.nodes import (
     BRACKETS,
     CLOSING_BRACKETS,
@@ -26,10 +28,13 @@ from black.nodes import (
     is_multiline_string,
     is_one_sequence_between,
     is_type_comment,
+    is_type_ignore_comment,
+    is_with_or_async_with_stmt,
     replace_child,
     syms,
     whitespace,
 )
+from black.strings import str_width
 from blib2to3.pgen2 import token
 from blib2to3.pytree import Leaf, Node
 
@@ -37,13 +42,14 @@ from blib2to3.pytree import Leaf, Node
 T = TypeVar("T")
 Index = int
 LeafID = int
+LN = Union[Leaf, Node]
 
 
 @dataclass
 class Line:
     """Holds leaves and comments. Can be printed with `str(line)`."""
 
-    mode: Mode
+    mode: Mode = field(repr=False)
     depth: int = 0
     leaves: List[Leaf] = field(default_factory=list)
     # keys ordered like `leaves`
@@ -75,7 +81,9 @@ class Line:
             # Note: at this point leaf.prefix should be empty except for
             # imports, for which we only preserve newlines.
             leaf.prefix += whitespace(
-                leaf, complex_subscript=self.is_complex_subscript(leaf)
+                leaf,
+                complex_subscript=self.is_complex_subscript(leaf),
+                mode=self.mode,
             )
         if self.inside_brackets or not preformatted or track_bracket:
             self.bracket_tracker.mark(leaf)
@@ -119,6 +127,11 @@ class Line:
         """Is this an import line?"""
         return bool(self) and is_import(self.leaves[0])
 
+    @property
+    def is_with_or_async_with_stmt(self) -> bool:
+        """Is this a with_stmt line?"""
+        return bool(self) and is_with_or_async_with_stmt(self.leaves[0])
+
     @property
     def is_class(self) -> bool:
         """Is this line a class definition?"""
@@ -154,6 +167,13 @@ class Line:
             and second_leaf.value == "def"
         )
 
+    @property
+    def is_stub_def(self) -> bool:
+        """Is this line a function definition with a body consisting only of "..."?"""
+        return self.is_def and self.leaves[-4:] == [Leaf(token.COLON, ":")] + [
+            Leaf(token.DOT, ".") for _ in range(3)
+        ]
+
     @property
     def is_class_paren_empty(self) -> bool:
         """Is this a class with no base classes but using parentheses?
@@ -173,11 +193,16 @@ class Line:
     @property
     def is_triple_quoted_string(self) -> bool:
         """Is the line a triple quoted string?"""
-        return (
-            bool(self)
-            and self.leaves[0].type == token.STRING
-            and self.leaves[0].value.startswith(('"""', "'''"))
-        )
+        if not self or self.leaves[0].type != token.STRING:
+            return False
+        value = self.leaves[0].value
+        if value.startswith(('"""', "'''")):
+            return True
+        if Preview.accept_raw_docstrings in self.mode and value.startswith(
+            ("r'''", 'r"""', "R'''", 'R"""')
+        ):
+            return True
+        return False
 
     @property
     def opens_block(self) -> bool:
@@ -186,6 +211,26 @@ class Line:
             return False
         return self.leaves[-1].type == token.COLON
 
+    def is_fmt_pass_converted(
+        self, *, first_leaf_matches: Optional[Callable[[Leaf], bool]] = None
+    ) -> bool:
+        """Is this line converted from fmt off/skip code?
+
+        If first_leaf_matches is not None, it only returns True if the first
+        leaf of converted code matches.
+        """
+        if len(self.leaves) != 1:
+            return False
+        leaf = self.leaves[0]
+        if (
+            leaf.type != STANDALONE_COMMENT
+            or leaf.fmt_pass_converted_first_leaf is None
+        ):
+            return False
+        return first_leaf_matches is None or first_leaf_matches(
+            leaf.fmt_pass_converted_first_leaf
+        )
+
     def contains_standalone_comments(self, depth_limit: int = sys.maxsize) -> bool:
         """If so, needs to be split before emitting."""
         for leaf in self.leaves:
@@ -194,6 +239,21 @@ class Line:
 
         return False
 
+    def contains_implicit_multiline_string_with_comments(self) -> bool:
+        """Chck if we have an implicit multiline string with comments on the line"""
+        for leaf_type, leaf_group_iterator in itertools.groupby(
+            self.leaves, lambda leaf: leaf.type
+        ):
+            if leaf_type != token.STRING:
+                continue
+            leaf_list = list(leaf_group_iterator)
+            if len(leaf_list) == 1:
+                continue
+            for leaf in leaf_list:
+                if self.comments_after(leaf):
+                    return True
+        return False
+
     def contains_uncollapsable_type_comments(self) -> bool:
         ignored_ids = set()
         try:
@@ -221,7 +281,7 @@ class Line:
             for comment in comments:
                 if is_type_comment(comment):
                     if comment_seen or (
-                        not is_type_comment(comment, " ignore")
+                        not is_type_ignore_comment(comment)
                         and leaf_id not in ignored_ids
                     ):
                         return True
@@ -258,7 +318,7 @@ class Line:
             # line.
             for node in self.leaves[-2:]:
                 for comment in self.comments.get(id(node), []):
-                    if is_type_comment(comment, " ignore"):
+                    if is_type_ignore_comment(comment):
                         return True
 
         return False
@@ -446,6 +506,17 @@ class Line:
         return bool(self.leaves or self.comments)
 
 
+@dataclass
+class RHSResult:
+    """Intermediate split result from a right hand split."""
+
+    head: Line
+    body: Line
+    tail: Line
+    opening_bracket: Leaf
+    closing_bracket: Leaf
+
+
 @dataclass
 class LinesBlock:
     """Class that holds information about a block of formatted lines.
@@ -481,7 +552,7 @@ class EmptyLineTracker:
     mode: Mode
     previous_line: Optional[Line] = None
     previous_block: Optional[LinesBlock] = None
-    previous_defs: List[int] = field(default_factory=list)
+    previous_defs: List[Line] = field(default_factory=list)
     semantic_leading_comment: Optional[LinesBlock] = None
 
     def maybe_empty_lines(self, current_line: Line) -> LinesBlock:
@@ -499,6 +570,15 @@ class EmptyLineTracker:
             if self.previous_line is None
             else before - previous_after
         )
+        if (
+            Preview.module_docstring_newlines in current_line.mode
+            and self.previous_block
+            and self.previous_block.previous_block is None
+            and len(self.previous_block.original_line.leaves) == 1
+            and self.previous_block.original_line.is_triple_quoted_string
+        ):
+            before = 1
+
         block = LinesBlock(
             mode=self.mode,
             previous_block=self.previous_block,
@@ -536,13 +616,26 @@ class EmptyLineTracker:
             first_leaf.prefix = ""
         else:
             before = 0
+
+        user_had_newline = bool(before)
         depth = current_line.depth
-        while self.previous_defs and self.previous_defs[-1] >= depth:
+
+        previous_def = None
+        while self.previous_defs and self.previous_defs[-1].depth >= depth:
+            previous_def = self.previous_defs.pop()
+
+        if previous_def is not None:
+            assert self.previous_line is not None
             if self.mode.is_pyi:
-                assert self.previous_line is not None
                 if depth and not current_line.is_def and self.previous_line.is_def:
                     # Empty lines between attributes and methods should be preserved.
-                    before = min(1, before)
+                    before = 1 if user_had_newline else 0
+                elif (
+                    Preview.blank_line_after_nested_stub_class in self.mode
+                    and previous_def.is_class
+                    and not previous_def.is_stub_class
+                ):
+                    before = 1
                 elif depth:
                     before = 0
                 else:
@@ -552,7 +645,7 @@ class EmptyLineTracker:
                     before = 1
                 elif (
                     not depth
-                    and self.previous_defs[-1]
+                    and previous_def.depth
                     and current_line.leaves[-1].type == token.COLON
                     and (
                         current_line.leaves[0].value
@@ -569,14 +662,17 @@ class EmptyLineTracker:
                     before = 1
                 else:
                     before = 2
-            self.previous_defs.pop()
+
         if current_line.is_decorator or current_line.is_def or current_line.is_class:
-            return self._maybe_empty_lines_for_class_or_def(current_line, before)
+            return self._maybe_empty_lines_for_class_or_def(
+                current_line, before, user_had_newline
+            )
 
         if (
             self.previous_line
             and self.previous_line.is_import
             and not current_line.is_import
+            and not current_line.is_fmt_pass_converted(first_leaf_matches=is_import)
             and depth == self.previous_line.depth
         ):
             return (before or 1), 0
@@ -586,17 +682,19 @@ class EmptyLineTracker:
             and self.previous_line.is_class
             and current_line.is_triple_quoted_string
         ):
+            if Preview.no_blank_line_before_class_docstring in current_line.mode:
+                return 0, 1
             return before, 1
 
         if self.previous_line and self.previous_line.opens_block:
             return 0, 0
         return before, 0
 
-    def _maybe_empty_lines_for_class_or_def(
-        self, current_line: Line, before: int
+    def _maybe_empty_lines_for_class_or_def(  # noqa: C901
+        self, current_line: Line, before: int, user_had_newline: bool
     ) -> Tuple[int, int]:
         if not current_line.is_decorator:
-            self.previous_defs.append(current_line.depth)
+            self.previous_defs.append(current_line)
         if self.previous_line is None:
             # Don't insert empty lines before the first line in the file.
             return 0, 0
@@ -642,6 +740,17 @@ class EmptyLineTracker:
                     newlines = 0
                 else:
                     newlines = 1
+            # Remove case `self.previous_line.depth > current_line.depth` below when
+            # this becomes stable.
+            #
+            # Don't inspect the previous line if it's part of the body of the previous
+            # statement in the same level, we always want a blank line if there's
+            # something with a body preceding.
+            elif (
+                Preview.blank_line_between_nested_and_def_stub_file in current_line.mode
+                and self.previous_line.depth > current_line.depth
+            ):
+                newlines = 1
             elif (
                 current_line.is_def or current_line.is_decorator
             ) and not self.previous_line.is_def:
@@ -659,6 +768,14 @@ class EmptyLineTracker:
                 newlines = 0
         else:
             newlines = 1 if current_line.depth else 2
+            # If a user has left no space after a dummy implementation, don't insert
+            # new lines. This is useful for instance for @overload or Protocols.
+            if (
+                Preview.dummy_implementations in self.mode
+                and self.previous_line.is_stub_def
+                and not user_had_newline
+            ):
+                newlines = 0
         if comment_to_add_newlines is not None:
             previous_block = comment_to_add_newlines.previous_block
             if previous_block is not None:
@@ -701,18 +818,95 @@ def append_leaves(
             new_line.append(comment_leaf, preformatted=True)
 
 
-def is_line_short_enough(line: Line, *, line_length: int, line_str: str = "") -> bool:
-    """Return True if `line` is no longer than `line_length`.
-
+def is_line_short_enough(  # noqa: C901
+    line: Line, *, mode: Mode, line_str: str = ""
+) -> bool:
+    """For non-multiline strings, return True if `line` is no longer than `line_length`.
+    For multiline strings, looks at the context around `line` to determine
+    if it should be inlined or split up.
     Uses the provided `line_str` rendering, if any, otherwise computes a new one.
     """
     if not line_str:
         line_str = line_to_string(line)
-    return (
-        len(line_str) <= line_length
-        and "\n" not in line_str  # multiline strings
-        and not line.contains_standalone_comments()
-    )
+
+    width = str_width if mode.preview else len
+
+    if Preview.multiline_string_handling not in mode:
+        return (
+            width(line_str) <= mode.line_length
+            and "\n" not in line_str  # multiline strings
+            and not line.contains_standalone_comments()
+        )
+
+    if line.contains_standalone_comments():
+        return False
+    if "\n" not in line_str:
+        # No multiline strings (MLS) present
+        return width(line_str) <= mode.line_length
+
+    first, *_, last = line_str.split("\n")
+    if width(first) > mode.line_length or width(last) > mode.line_length:
+        return False
+
+    # Traverse the AST to examine the context of the multiline string (MLS),
+    # tracking aspects such as depth and comma existence,
+    # to determine whether to split the MLS or keep it together.
+    # Depth (which is based on the existing bracket_depth concept)
+    # is needed to determine nesting level of the MLS.
+    # Includes special case for trailing commas.
+    commas: List[int] = []  # tracks number of commas per depth level
+    multiline_string: Optional[Leaf] = None
+    # store the leaves that contain parts of the MLS
+    multiline_string_contexts: List[LN] = []
+
+    max_level_to_update: Union[int, float] = math.inf  # track the depth of the MLS
+    for i, leaf in enumerate(line.leaves):
+        if max_level_to_update == math.inf:
+            had_comma: Optional[int] = None
+            if leaf.bracket_depth + 1 > len(commas):
+                commas.append(0)
+            elif leaf.bracket_depth + 1 < len(commas):
+                had_comma = commas.pop()
+            if (
+                had_comma is not None
+                and multiline_string is not None
+                and multiline_string.bracket_depth == leaf.bracket_depth + 1
+            ):
+                # Have left the level with the MLS, stop tracking commas
+                max_level_to_update = leaf.bracket_depth
+                if had_comma > 0:
+                    # MLS was in parens with at least one comma - force split
+                    return False
+
+        if leaf.bracket_depth <= max_level_to_update and leaf.type == token.COMMA:
+            # Ignore non-nested trailing comma
+            # directly after MLS/MLS-containing expression
+            ignore_ctxs: List[Optional[LN]] = [None]
+            ignore_ctxs += multiline_string_contexts
+            if not (leaf.prev_sibling in ignore_ctxs and i == len(line.leaves) - 1):
+                commas[leaf.bracket_depth] += 1
+        if max_level_to_update != math.inf:
+            max_level_to_update = min(max_level_to_update, leaf.bracket_depth)
+
+        if is_multiline_string(leaf):
+            if len(multiline_string_contexts) > 0:
+                # >1 multiline string cannot fit on a single line - force split
+                return False
+            multiline_string = leaf
+            ctx: LN = leaf
+            # fetch the leaf components of the MLS in the AST
+            while str(ctx) in line_str:
+                multiline_string_contexts.append(ctx)
+                if ctx.parent is None:
+                    break
+                ctx = ctx.parent
+
+    # May not have a triple-quoted multiline string at all,
+    # in case of a regular string with embedded newlines and line continuations
+    if len(multiline_string_contexts) == 0:
+        return True
+
+    return all(val == 0 for val in commas)
 
 
 def can_be_split(line: Line) -> bool:
@@ -752,25 +946,42 @@ def can_be_split(line: Line) -> bool:
 
 
 def can_omit_invisible_parens(
-    line: Line,
+    rhs: RHSResult,
     line_length: int,
 ) -> bool:
-    """Does `line` have a shape safe to reformat without optional parens around it?
+    """Does `rhs.body` have a shape safe to reformat without optional parens around it?
 
     Returns True for only a subset of potentially nice looking formattings but
     the point is to not return false positives that end up producing lines that
     are too long.
     """
+    line = rhs.body
     bt = line.bracket_tracker
     if not bt.delimiters:
         # Without delimiters the optional parentheses are useless.
         return True
 
     max_priority = bt.max_delimiter_priority()
-    if bt.delimiter_count_with_priority(max_priority) > 1:
+    delimiter_count = bt.delimiter_count_with_priority(max_priority)
+    if delimiter_count > 1:
         # With more than one delimiter of a kind the optional parentheses read better.
         return False
 
+    if delimiter_count == 1:
+        if (
+            Preview.wrap_multiple_context_managers_in_parens in line.mode
+            and max_priority == COMMA_PRIORITY
+            and rhs.head.is_with_or_async_with_stmt
+        ):
+            # For two context manager with statements, the optional parentheses read
+            # better. In this case, `rhs.body` is the context managers part of
+            # the with statement. `rhs.head` is the `with (` part on the previous
+            # line.
+            return False
+        # Otherwise it may also read better, but we don't do it today and requires
+        # careful considerations for all possible cases. See
+        # https://github.com/psf/black/issues/2156.
+
     if max_priority == DOT_PRIORITY:
         # A single stranded method call doesn't require optional parentheses.
         return True