Fix parser bug where "type" was misinterpreted as a keyword inside a match (#3950)

[etc/vim.git] / src / black / lines.py
diff --git a/src/black/lines.py b/src/black/lines.py

index 08281bcf370a1a987fcffa9602d54a7fe3d71593..48fde88820853deec0e5b6405a8b4bf116bf9fe4 100644 (file)
--- a/src/black/lines.py
+++ b/src/black/lines.py
@@ -1,4 +1,5 @@
  import itertools
+import math
  import sys
  from dataclasses import dataclass, field
  from typing import (
@@ -10,10 +11,11 @@ from typing import (
      Sequence,
      Tuple,
      TypeVar,
+    Union,
      cast,
  )
  
-from black.brackets import DOT_PRIORITY, BracketTracker
+from black.brackets import COMMA_PRIORITY, DOT_PRIORITY, BracketTracker
  from black.mode import Mode, Preview
  from black.nodes import (
      BRACKETS,
@@ -26,10 +28,13 @@ from black.nodes import (
      is_multiline_string,
      is_one_sequence_between,
      is_type_comment,
+    is_type_ignore_comment,
+    is_with_or_async_with_stmt,
      replace_child,
      syms,
      whitespace,
  )
+from black.strings import str_width
  from blib2to3.pgen2 import token
  from blib2to3.pytree import Leaf, Node
  
@@ -37,13 +42,14 @@ from blib2to3.pytree import Leaf, Node
  T = TypeVar("T")
  Index = int
  LeafID = int
+LN = Union[Leaf, Node]
  
  
  @dataclass
  class Line:
      """Holds leaves and comments. Can be printed with `str(line)`."""
  
-    mode: Mode
+    mode: Mode = field(repr=False)
      depth: int = 0
      leaves: List[Leaf] = field(default_factory=list)
      # keys ordered like `leaves`
@@ -75,7 +81,9 @@ class Line:
              # Note: at this point leaf.prefix should be empty except for
              # imports, for which we only preserve newlines.
              leaf.prefix += whitespace(
-                leaf, complex_subscript=self.is_complex_subscript(leaf)
+                leaf,
+                complex_subscript=self.is_complex_subscript(leaf),
+                mode=self.mode,
              )
          if self.inside_brackets or not preformatted or track_bracket:
              self.bracket_tracker.mark(leaf)
@@ -119,6 +127,11 @@ class Line:
          """Is this an import line?"""
          return bool(self) and is_import(self.leaves[0])
  
+    @property
+    def is_with_or_async_with_stmt(self) -> bool:
+        """Is this a with_stmt line?"""
+        return bool(self) and is_with_or_async_with_stmt(self.leaves[0])
+
      @property
      def is_class(self) -> bool:
          """Is this line a class definition?"""
@@ -154,6 +167,13 @@ class Line:
              and second_leaf.value == "def"
          )
  
+    @property
+    def is_stub_def(self) -> bool:
+        """Is this line a function definition with a body consisting only of "..."?"""
+        return self.is_def and self.leaves[-4:] == [Leaf(token.COLON, ":")] + [
+            Leaf(token.DOT, ".") for _ in range(3)
+        ]
+
      @property
      def is_class_paren_empty(self) -> bool:
          """Is this a class with no base classes but using parentheses?
@@ -173,11 +193,16 @@ class Line:
      @property
      def is_triple_quoted_string(self) -> bool:
          """Is the line a triple quoted string?"""
-        return (
-            bool(self)
-            and self.leaves[0].type == token.STRING
-            and self.leaves[0].value.startswith(('"""', "'''"))
-        )
+        if not self or self.leaves[0].type != token.STRING:
+            return False
+        value = self.leaves[0].value
+        if value.startswith(('"""', "'''")):
+            return True
+        if Preview.accept_raw_docstrings in self.mode and value.startswith(
+            ("r'''", 'r"""', "R'''", 'R"""')
+        ):
+            return True
+        return False
  
      @property
      def opens_block(self) -> bool:
@@ -186,6 +211,26 @@ class Line:
              return False
          return self.leaves[-1].type == token.COLON
  
+    def is_fmt_pass_converted(
+        self, *, first_leaf_matches: Optional[Callable[[Leaf], bool]] = None
+    ) -> bool:
+        """Is this line converted from fmt off/skip code?
+
+        If first_leaf_matches is not None, it only returns True if the first
+        leaf of converted code matches.
+        """
+        if len(self.leaves) != 1:
+            return False
+        leaf = self.leaves[0]
+        if (
+            leaf.type != STANDALONE_COMMENT
+            or leaf.fmt_pass_converted_first_leaf is None
+        ):
+            return False
+        return first_leaf_matches is None or first_leaf_matches(
+            leaf.fmt_pass_converted_first_leaf
+        )
+
      def contains_standalone_comments(self, depth_limit: int = sys.maxsize) -> bool:
          """If so, needs to be split before emitting."""
          for leaf in self.leaves:
@@ -221,7 +266,7 @@ class Line:
              for comment in comments:
                  if is_type_comment(comment):
                      if comment_seen or (
-                        not is_type_comment(comment, " ignore")
+                        not is_type_ignore_comment(comment)
                          and leaf_id not in ignored_ids
                      ):
                          return True
@@ -258,7 +303,7 @@ class Line:
              # line.
              for node in self.leaves[-2:]:
                  for comment in self.comments.get(id(node), []):
-                    if is_type_comment(comment, " ignore"):
+                    if is_type_ignore_comment(comment):
                          return True
  
          return False
@@ -275,8 +320,7 @@ class Line:
          - it's not a single-element subscript
          Additionally, if ensure_removable:
          - it's not from square bracket indexing
-        (specifically, single-element square bracket indexing with
-        Preview.skip_magic_trailing_comma_in_subscript)
+        (specifically, single-element square bracket indexing)
          """
          if not (
              closing.type in CLOSING_BRACKETS
@@ -290,8 +334,7 @@ class Line:
  
          if closing.type == token.RSQB:
              if (
-                Preview.one_element_subscript in self.mode
-                and closing.parent
+                closing.parent
                  and closing.parent.type == syms.trailer
                  and closing.opening_bracket
                  and is_one_sequence_between(
@@ -309,18 +352,16 @@ class Line:
              comma = self.leaves[-1]
              if comma.parent is None:
                  return False
-            if Preview.skip_magic_trailing_comma_in_subscript in self.mode:
-                return (
-                    comma.parent.type != syms.subscriptlist
-                    or closing.opening_bracket is None
-                    or not is_one_sequence_between(
-                        closing.opening_bracket,
-                        closing,
-                        self.leaves,
-                        brackets=(token.LSQB, token.RSQB),
-                    )
+            return (
+                comma.parent.type != syms.subscriptlist
+                or closing.opening_bracket is None
+                or not is_one_sequence_between(
+                    closing.opening_bracket,
+                    closing,
+                    self.leaves,
+                    brackets=(token.LSQB, token.RSQB),
                  )
-            return comma.parent.type == syms.listmaker
+            )
  
          if self.is_import:
              return True
@@ -450,6 +491,17 @@ class Line:
          return bool(self.leaves or self.comments)
  
  
+@dataclass
+class RHSResult:
+    """Intermediate split result from a right hand split."""
+
+    head: Line
+    body: Line
+    tail: Line
+    opening_bracket: Leaf
+    closing_bracket: Leaf
+
+
  @dataclass
  class LinesBlock:
      """Class that holds information about a block of formatted lines.
@@ -485,7 +537,7 @@ class EmptyLineTracker:
      mode: Mode
      previous_line: Optional[Line] = None
      previous_block: Optional[LinesBlock] = None
-    previous_defs: List[int] = field(default_factory=list)
+    previous_defs: List[Line] = field(default_factory=list)
      semantic_leading_comment: Optional[LinesBlock] = None
  
      def maybe_empty_lines(self, current_line: Line) -> LinesBlock:
@@ -503,6 +555,15 @@ class EmptyLineTracker:
              if self.previous_line is None
              else before - previous_after
          )
+        if (
+            Preview.module_docstring_newlines in current_line.mode
+            and self.previous_block
+            and self.previous_block.previous_block is None
+            and len(self.previous_block.original_line.leaves) == 1
+            and self.previous_block.original_line.is_triple_quoted_string
+        ):
+            before = 1
+
          block = LinesBlock(
              mode=self.mode,
              previous_block=self.previous_block,
@@ -520,7 +581,8 @@ class EmptyLineTracker:
                  and (self.semantic_leading_comment is None or before)
              ):
                  self.semantic_leading_comment = block
-        elif not current_line.is_decorator:
+        # `or before` means this decorator already has an empty line before
+        elif not current_line.is_decorator or before:
              self.semantic_leading_comment = None
  
          self.previous_line = current_line
@@ -539,13 +601,26 @@ class EmptyLineTracker:
              first_leaf.prefix = ""
          else:
              before = 0
+
+        user_had_newline = bool(before)
          depth = current_line.depth
-        while self.previous_defs and self.previous_defs[-1] >= depth:
+
+        previous_def = None
+        while self.previous_defs and self.previous_defs[-1].depth >= depth:
+            previous_def = self.previous_defs.pop()
+
+        if previous_def is not None:
+            assert self.previous_line is not None
              if self.mode.is_pyi:
-                assert self.previous_line is not None
                  if depth and not current_line.is_def and self.previous_line.is_def:
                      # Empty lines between attributes and methods should be preserved.
-                    before = min(1, before)
+                    before = 1 if user_had_newline else 0
+                elif (
+                    Preview.blank_line_after_nested_stub_class in self.mode
+                    and previous_def.is_class
+                    and not previous_def.is_stub_class
+                ):
+                    before = 1
                  elif depth:
                      before = 0
                  else:
@@ -555,7 +630,7 @@ class EmptyLineTracker:
                      before = 1
                  elif (
                      not depth
-                    and self.previous_defs[-1]
+                    and previous_def.depth
                      and current_line.leaves[-1].type == token.COLON
                      and (
                          current_line.leaves[0].value
@@ -572,14 +647,17 @@ class EmptyLineTracker:
                      before = 1
                  else:
                      before = 2
-            self.previous_defs.pop()
+
          if current_line.is_decorator or current_line.is_def or current_line.is_class:
-            return self._maybe_empty_lines_for_class_or_def(current_line, before)
+            return self._maybe_empty_lines_for_class_or_def(
+                current_line, before, user_had_newline
+            )
  
          if (
              self.previous_line
              and self.previous_line.is_import
              and not current_line.is_import
+            and not current_line.is_fmt_pass_converted(first_leaf_matches=is_import)
              and depth == self.previous_line.depth
          ):
              return (before or 1), 0
@@ -589,21 +667,19 @@ class EmptyLineTracker:
              and self.previous_line.is_class
              and current_line.is_triple_quoted_string
          ):
+            if Preview.no_blank_line_before_class_docstring in current_line.mode:
+                return 0, 1
              return before, 1
  
-        if (
-            Preview.remove_block_trailing_newline in current_line.mode
-            and self.previous_line
-            and self.previous_line.opens_block
-        ):
+        if self.previous_line and self.previous_line.opens_block:
              return 0, 0
          return before, 0
  
-    def _maybe_empty_lines_for_class_or_def(
-        self, current_line: Line, before: int
+    def _maybe_empty_lines_for_class_or_def(  # noqa: C901
+        self, current_line: Line, before: int, user_had_newline: bool
      ) -> Tuple[int, int]:
          if not current_line.is_decorator:
-            self.previous_defs.append(current_line.depth)
+            self.previous_defs.append(current_line)
          if self.previous_line is None:
              # Don't insert empty lines before the first line in the file.
              return 0, 0
@@ -628,9 +704,7 @@ class EmptyLineTracker:
          ):
              slc = self.semantic_leading_comment
              if (
-                Preview.empty_lines_before_class_or_def_with_leading_comments
-                in current_line.mode
-                and slc is not None
+                slc is not None
                  and slc.previous_block is not None
                  and not slc.previous_block.original_line.is_class
                  and not slc.previous_block.original_line.opens_block
@@ -651,6 +725,17 @@ class EmptyLineTracker:
                      newlines = 0
                  else:
                      newlines = 1
+            # Remove case `self.previous_line.depth > current_line.depth` below when
+            # this becomes stable.
+            #
+            # Don't inspect the previous line if it's part of the body of the previous
+            # statement in the same level, we always want a blank line if there's
+            # something with a body preceding.
+            elif (
+                Preview.blank_line_between_nested_and_def_stub_file in current_line.mode
+                and self.previous_line.depth > current_line.depth
+            ):
+                newlines = 1
              elif (
                  current_line.is_def or current_line.is_decorator
              ) and not self.previous_line.is_def:
@@ -668,6 +753,14 @@ class EmptyLineTracker:
                  newlines = 0
          else:
              newlines = 1 if current_line.depth else 2
+            # If a user has left no space after a dummy implementation, don't insert
+            # new lines. This is useful for instance for @overload or Protocols.
+            if (
+                Preview.dummy_implementations in self.mode
+                and self.previous_line.is_stub_def
+                and not user_had_newline
+            ):
+                newlines = 0
          if comment_to_add_newlines is not None:
              previous_block = comment_to_add_newlines.previous_block
              if previous_block is not None:
@@ -710,18 +803,95 @@ def append_leaves(
              new_line.append(comment_leaf, preformatted=True)
  
  
-def is_line_short_enough(line: Line, *, line_length: int, line_str: str = "") -> bool:
-    """Return True if `line` is no longer than `line_length`.
-
+def is_line_short_enough(  # noqa: C901
+    line: Line, *, mode: Mode, line_str: str = ""
+) -> bool:
+    """For non-multiline strings, return True if `line` is no longer than `line_length`.
+    For multiline strings, looks at the context around `line` to determine
+    if it should be inlined or split up.
      Uses the provided `line_str` rendering, if any, otherwise computes a new one.
      """
      if not line_str:
          line_str = line_to_string(line)
-    return (
-        len(line_str) <= line_length
-        and "\n" not in line_str  # multiline strings
-        and not line.contains_standalone_comments()
-    )
+
+    width = str_width if mode.preview else len
+
+    if Preview.multiline_string_handling not in mode:
+        return (
+            width(line_str) <= mode.line_length
+            and "\n" not in line_str  # multiline strings
+            and not line.contains_standalone_comments()
+        )
+
+    if line.contains_standalone_comments():
+        return False
+    if "\n" not in line_str:
+        # No multiline strings (MLS) present
+        return width(line_str) <= mode.line_length
+
+    first, *_, last = line_str.split("\n")
+    if width(first) > mode.line_length or width(last) > mode.line_length:
+        return False
+
+    # Traverse the AST to examine the context of the multiline string (MLS),
+    # tracking aspects such as depth and comma existence,
+    # to determine whether to split the MLS or keep it together.
+    # Depth (which is based on the existing bracket_depth concept)
+    # is needed to determine nesting level of the MLS.
+    # Includes special case for trailing commas.
+    commas: List[int] = []  # tracks number of commas per depth level
+    multiline_string: Optional[Leaf] = None
+    # store the leaves that contain parts of the MLS
+    multiline_string_contexts: List[LN] = []
+
+    max_level_to_update: Union[int, float] = math.inf  # track the depth of the MLS
+    for i, leaf in enumerate(line.leaves):
+        if max_level_to_update == math.inf:
+            had_comma: Optional[int] = None
+            if leaf.bracket_depth + 1 > len(commas):
+                commas.append(0)
+            elif leaf.bracket_depth + 1 < len(commas):
+                had_comma = commas.pop()
+            if (
+                had_comma is not None
+                and multiline_string is not None
+                and multiline_string.bracket_depth == leaf.bracket_depth + 1
+            ):
+                # Have left the level with the MLS, stop tracking commas
+                max_level_to_update = leaf.bracket_depth
+                if had_comma > 0:
+                    # MLS was in parens with at least one comma - force split
+                    return False
+
+        if leaf.bracket_depth <= max_level_to_update and leaf.type == token.COMMA:
+            # Ignore non-nested trailing comma
+            # directly after MLS/MLS-containing expression
+            ignore_ctxs: List[Optional[LN]] = [None]
+            ignore_ctxs += multiline_string_contexts
+            if not (leaf.prev_sibling in ignore_ctxs and i == len(line.leaves) - 1):
+                commas[leaf.bracket_depth] += 1
+        if max_level_to_update != math.inf:
+            max_level_to_update = min(max_level_to_update, leaf.bracket_depth)
+
+        if is_multiline_string(leaf):
+            if len(multiline_string_contexts) > 0:
+                # >1 multiline string cannot fit on a single line - force split
+                return False
+            multiline_string = leaf
+            ctx: LN = leaf
+            # fetch the leaf components of the MLS in the AST
+            while str(ctx) in line_str:
+                multiline_string_contexts.append(ctx)
+                if ctx.parent is None:
+                    break
+                ctx = ctx.parent
+
+    # May not have a triple-quoted multiline string at all,
+    # in case of a regular string with embedded newlines and line continuations
+    if len(multiline_string_contexts) == 0:
+        return True
+
+    return all(val == 0 for val in commas)
  
  
  def can_be_split(line: Line) -> bool:
@@ -761,25 +931,42 @@ def can_be_split(line: Line) -> bool:
  
  
  def can_omit_invisible_parens(
-    line: Line,
+    rhs: RHSResult,
      line_length: int,
  ) -> bool:
-    """Does `line` have a shape safe to reformat without optional parens around it?
+    """Does `rhs.body` have a shape safe to reformat without optional parens around it?
  
      Returns True for only a subset of potentially nice looking formattings but
      the point is to not return false positives that end up producing lines that
      are too long.
      """
+    line = rhs.body
      bt = line.bracket_tracker
      if not bt.delimiters:
          # Without delimiters the optional parentheses are useless.
          return True
  
      max_priority = bt.max_delimiter_priority()
-    if bt.delimiter_count_with_priority(max_priority) > 1:
+    delimiter_count = bt.delimiter_count_with_priority(max_priority)
+    if delimiter_count > 1:
          # With more than one delimiter of a kind the optional parentheses read better.
          return False
  
+    if delimiter_count == 1:
+        if (
+            Preview.wrap_multiple_context_managers_in_parens in line.mode
+            and max_priority == COMMA_PRIORITY
+            and rhs.head.is_with_or_async_with_stmt
+        ):
+            # For two context manager with statements, the optional parentheses read
+            # better. In this case, `rhs.body` is the context managers part of
+            # the with statement. `rhs.head` is the `with (` part on the previous
+            # line.
+            return False
+        # Otherwise it may also read better, but we don't do it today and requires
+        # careful considerations for all possible cases. See
+        # https://github.com/psf/black/issues/2156.
+
      if max_priority == DOT_PRIORITY:
          # A single stranded method call doesn't require optional parentheses.
          return True