Remove blib2to3 grammar cache logging (#3193)

[etc/vim.git] / src / black / trans.py
diff --git a/src/black/trans.py b/src/black/trans.py

index a4d1e6fbc7990f3813dfcc469a906af519252eb7..dc9c5520d5bdfcaf7e56bd8c93fae537e779f0ed 100644 (file)
--- a/src/black/trans.py
+++ b/src/black/trans.py
@@ -1,10 +1,11 @@
  """
  String transformers that can split and merge strings.
  """
+import re
+import sys
  from abc import ABC, abstractmethod
  from collections import defaultdict
  from dataclasses import dataclass
-import regex as re  # We need recursive patterns here (?R)
  from typing import (
      Any,
      Callable,
@@ -21,29 +22,38 @@ from typing import (
      TypeVar,
      Union,
  )
-import sys
  
  if sys.version_info < (3, 8):
-    from typing_extensions import Final
+    from typing_extensions import Final, Literal
  else:
-    from typing import Final
+    from typing import Literal, Final
  
  from mypy_extensions import trait
  
-from black.rusty import Result, Ok, Err
-
-from black.mode import Feature
-from black.nodes import syms, replace_child, parent_type
-from black.nodes import is_empty_par, is_empty_lpar, is_empty_rpar
-from black.nodes import OPENING_BRACKETS, CLOSING_BRACKETS, STANDALONE_COMMENT
-from black.lines import Line, append_leaves
  from black.brackets import BracketMatchError
  from black.comments import contains_pragma_comment
-from black.strings import has_triple_quotes, get_string_prefix, assert_is_leaf_string
-from black.strings import normalize_string_quotes
-
-from blib2to3.pytree import Leaf, Node
+from black.lines import Line, append_leaves
+from black.mode import Feature
+from black.nodes import (
+    CLOSING_BRACKETS,
+    OPENING_BRACKETS,
+    STANDALONE_COMMENT,
+    is_empty_lpar,
+    is_empty_par,
+    is_empty_rpar,
+    parent_type,
+    replace_child,
+    syms,
+)
+from black.rusty import Err, Ok, Result
+from black.strings import (
+    assert_is_leaf_string,
+    get_string_prefix,
+    has_triple_quotes,
+    normalize_string_quotes,
+)
  from blib2to3.pgen2 import token
+from blib2to3.pytree import Leaf, Node
  
  
  class CannotTransform(Exception):
@@ -71,6 +81,84 @@ def TErr(err_msg: str) -> Err[CannotTransform]:
      return Err(cant_transform)
  
  
+def hug_power_op(line: Line, features: Collection[Feature]) -> Iterator[Line]:
+    """A transformer which normalizes spacing around power operators."""
+
+    # Performance optimization to avoid unnecessary Leaf clones and other ops.
+    for leaf in line.leaves:
+        if leaf.type == token.DOUBLESTAR:
+            break
+    else:
+        raise CannotTransform("No doublestar token was found in the line.")
+
+    def is_simple_lookup(index: int, step: Literal[1, -1]) -> bool:
+        # Brackets and parentheses indicate calls, subscripts, etc. ...
+        # basically stuff that doesn't count as "simple". Only a NAME lookup
+        # or dotted lookup (eg. NAME.NAME) is OK.
+        if step == -1:
+            disallowed = {token.RPAR, token.RSQB}
+        else:
+            disallowed = {token.LPAR, token.LSQB}
+
+        while 0 <= index < len(line.leaves):
+            current = line.leaves[index]
+            if current.type in disallowed:
+                return False
+            if current.type not in {token.NAME, token.DOT} or current.value == "for":
+                # If the current token isn't disallowed, we'll assume this is simple as
+                # only the disallowed tokens are semantically attached to this lookup
+                # expression we're checking. Also, stop early if we hit the 'for' bit
+                # of a comprehension.
+                return True
+
+            index += step
+
+        return True
+
+    def is_simple_operand(index: int, kind: Literal["base", "exponent"]) -> bool:
+        # An operand is considered "simple" if's a NAME, a numeric CONSTANT, a simple
+        # lookup (see above), with or without a preceding unary operator.
+        start = line.leaves[index]
+        if start.type in {token.NAME, token.NUMBER}:
+            return is_simple_lookup(index, step=(1 if kind == "exponent" else -1))
+
+        if start.type in {token.PLUS, token.MINUS, token.TILDE}:
+            if line.leaves[index + 1].type in {token.NAME, token.NUMBER}:
+                # step is always one as bases with a preceding unary op will be checked
+                # for simplicity starting from the next token (so it'll hit the check
+                # above).
+                return is_simple_lookup(index + 1, step=1)
+
+        return False
+
+    new_line = line.clone()
+    should_hug = False
+    for idx, leaf in enumerate(line.leaves):
+        new_leaf = leaf.clone()
+        if should_hug:
+            new_leaf.prefix = ""
+            should_hug = False
+
+        should_hug = (
+            (0 < idx < len(line.leaves) - 1)
+            and leaf.type == token.DOUBLESTAR
+            and is_simple_operand(idx - 1, kind="base")
+            and line.leaves[idx - 1].value != "lambda"
+            and is_simple_operand(idx + 1, kind="exponent")
+        )
+        if should_hug:
+            new_leaf.prefix = ""
+
+        # We have to be careful to make a new line properly:
+        # - bracket related metadata must be maintained (handled by Line.append)
+        # - comments need to copied over, updating the leaf IDs they're attached to
+        new_line.append(new_leaf, preformatted=True)
+        for comment_leaf in line.comments_after(leaf):
+            new_line.append(comment_leaf, preformatted=True)
+
+    yield new_line
+
+
  class StringTransformer(ABC):
      """
      An implementation of the Transformer protocol that relies on its
@@ -283,7 +371,7 @@ class StringMerger(StringTransformer, CustomSplitMapMixin):
  
          is_valid_index = is_valid_index_factory(LL)
  
-        for (i, leaf) in enumerate(LL):
+        for i, leaf in enumerate(LL):
              if (
                  leaf.type == token.STRING
                  and is_valid_index(i + 1)
@@ -453,7 +541,7 @@ class StringMerger(StringTransformer, CustomSplitMapMixin):
              # with 'f'...
              if "f" in prefix and "f" not in next_prefix:
                  # Then we must escape any braces contained in this substring.
-                SS = re.subf(r"(\{|\})", "{1}{1}", SS)
+                SS = re.sub(r"(\{|\})", r"\1\1", SS)
  
              NSS = make_naked(SS, next_prefix)
  
@@ -488,7 +576,7 @@ class StringMerger(StringTransformer, CustomSplitMapMixin):
  
          # Build the final line ('new_line') that this method will later return.
          new_line = line.clone()
-        for (i, leaf) in enumerate(LL):
+        for i, leaf in enumerate(LL):
              if i == string_idx:
                  new_line.append(string_leaf)
  
@@ -609,7 +697,7 @@ class StringParenStripper(StringTransformer):
  
          is_valid_index = is_valid_index_factory(LL)
  
-        for (idx, leaf) in enumerate(LL):
+        for idx, leaf in enumerate(LL):
              # Should be a string...
              if leaf.type != token.STRING:
                  continue
@@ -942,6 +1030,57 @@ class BaseStringSplitter(StringTransformer):
          return max_string_length
  
  
+def iter_fexpr_spans(s: str) -> Iterator[Tuple[int, int]]:
+    """
+    Yields spans corresponding to expressions in a given f-string.
+    Spans are half-open ranges (left inclusive, right exclusive).
+    Assumes the input string is a valid f-string, but will not crash if the input
+    string is invalid.
+    """
+    stack: List[int] = []  # our curly paren stack
+    i = 0
+    while i < len(s):
+        if s[i] == "{":
+            # if we're in a string part of the f-string, ignore escaped curly braces
+            if not stack and i + 1 < len(s) and s[i + 1] == "{":
+                i += 2
+                continue
+            stack.append(i)
+            i += 1
+            continue
+
+        if s[i] == "}":
+            if not stack:
+                i += 1
+                continue
+            j = stack.pop()
+            # we've made it back out of the expression! yield the span
+            if not stack:
+                yield (j, i + 1)
+            i += 1
+            continue
+
+        # if we're in an expression part of the f-string, fast forward through strings
+        # note that backslashes are not legal in the expression portion of f-strings
+        if stack:
+            delim = None
+            if s[i : i + 3] in ("'''", '"""'):
+                delim = s[i : i + 3]
+            elif s[i] in ("'", '"'):
+                delim = s[i]
+            if delim:
+                i += len(delim)
+                while i < len(s) and s[i : i + len(delim)] != delim:
+                    i += 1
+                i += len(delim)
+                continue
+        i += 1
+
+
+def fstring_contains_expr(s: str) -> bool:
+    return any(iter_fexpr_spans(s))
+
+
  class StringSplitter(BaseStringSplitter, CustomSplitMapMixin):
      """
      StringTransformer that splits "atom" strings (i.e. strings which exist on
@@ -981,17 +1120,6 @@ class StringSplitter(BaseStringSplitter, CustomSplitMapMixin):
      """
  
      MIN_SUBSTR_SIZE: Final = 6
-    # Matches an "f-expression" (e.g. {var}) that might be found in an f-string.
-    RE_FEXPR: Final = r"""
-    (?<!\{) (?:\{\{)* \{ (?!\{)
-        (?:
-            [^\{\}]
-            | \{\{
-            | \}\}
-            | (?R)
-        )+
-    \}
-    """
  
      def do_splitter_match(self, line: Line) -> TMatchResult:
          LL = line.leaves
@@ -1058,8 +1186,8 @@ class StringSplitter(BaseStringSplitter, CustomSplitMapMixin):
          # contain any f-expressions, but ONLY if the original f-string
          # contains at least one f-expression. Otherwise, we will alter the AST
          # of the program.
-        drop_pointless_f_prefix = ("f" in prefix) and re.search(
-            self.RE_FEXPR, LL[string_idx].value, re.VERBOSE
+        drop_pointless_f_prefix = ("f" in prefix) and fstring_contains_expr(
+            LL[string_idx].value
          )
  
          first_string_line = True
@@ -1299,9 +1427,7 @@ class StringSplitter(BaseStringSplitter, CustomSplitMapMixin):
          """
          if "f" not in get_string_prefix(string).lower():
              return
-
-        for match in re.finditer(self.RE_FEXPR, string, re.VERBOSE):
-            yield match.span()
+        yield from iter_fexpr_spans(string)
  
      def _get_illegal_split_indices(self, string: str) -> Set[Index]:
          illegal_indices: Set[Index] = set()
@@ -1417,7 +1543,7 @@ class StringSplitter(BaseStringSplitter, CustomSplitMapMixin):
          """
          assert_is_leaf_string(string)
  
-        if "f" in prefix and not re.search(self.RE_FEXPR, string, re.VERBOSE):
+        if "f" in prefix and not fstring_contains_expr(string):
              new_prefix = prefix.replace("f", "")
  
              temp = string[len(prefix) :]
@@ -1593,7 +1719,7 @@ class StringParenWrapper(BaseStringSplitter, CustomSplitMapMixin):
          if parent_type(LL[0]) == syms.assert_stmt and LL[0].value == "assert":
              is_valid_index = is_valid_index_factory(LL)
  
-            for (i, leaf) in enumerate(LL):
+            for i, leaf in enumerate(LL):
                  # We MUST find a comma...
                  if leaf.type == token.COMMA:
                      idx = i + 2 if is_empty_par(LL[i + 1]) else i + 1
@@ -1631,7 +1757,7 @@ class StringParenWrapper(BaseStringSplitter, CustomSplitMapMixin):
          ):
              is_valid_index = is_valid_index_factory(LL)
  
-            for (i, leaf) in enumerate(LL):
+            for i, leaf in enumerate(LL):
                  # We MUST find either an '=' or '+=' symbol...
                  if leaf.type in [token.EQUAL, token.PLUSEQUAL]:
                      idx = i + 2 if is_empty_par(LL[i + 1]) else i + 1
@@ -1674,7 +1800,7 @@ class StringParenWrapper(BaseStringSplitter, CustomSplitMapMixin):
          if syms.dictsetmaker in [parent_type(LL[0]), parent_type(LL[0].parent)]:
              is_valid_index = is_valid_index_factory(LL)
  
-            for (i, leaf) in enumerate(LL):
+            for i, leaf in enumerate(LL):
                  # We MUST find a colon...
                  if leaf.type == token.COLON:
                      idx = i + 2 if is_empty_par(LL[i + 1]) else i + 1