]> git.madduck.net Git - etc/vim.git/blobdiff - src/black/trans.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

primer: Hypothesis now requires Python>=3.8 (GH-2602)
[etc/vim.git] / src / black / trans.py
index 80e88a2d2fb78d834ed281256b8b3dd97511a16a..023dcd3618a5c0edb6b6f5e7e91e09bb6d3add30 100644 (file)
@@ -15,6 +15,7 @@ from typing import (
     List,
     Optional,
     Sequence,
+    Set,
     Tuple,
     TypeVar,
     Union,
@@ -411,7 +412,7 @@ class StringMerger(CustomSplitMapMixin, StringTransformer):
             and is_valid_index(next_str_idx)
             and LL[next_str_idx].type == token.STRING
         ):
-            prefix = get_string_prefix(LL[next_str_idx].value)
+            prefix = get_string_prefix(LL[next_str_idx].value).lower()
             next_str_idx += 1
 
         # The next loop merges the string group. The final string will be
@@ -431,7 +432,7 @@ class StringMerger(CustomSplitMapMixin, StringTransformer):
             num_of_strings += 1
 
             SS = LL[next_str_idx].value
-            next_prefix = get_string_prefix(SS)
+            next_prefix = get_string_prefix(SS).lower()
 
             # If this is an f-string group but this substring is not prefixed
             # with 'f'...
@@ -541,7 +542,7 @@ class StringMerger(CustomSplitMapMixin, StringTransformer):
                 return TErr("StringMerger does NOT merge multiline strings.")
 
             num_of_strings += 1
-            prefix = get_string_prefix(leaf.value)
+            prefix = get_string_prefix(leaf.value).lower()
             if "r" in prefix:
                 return TErr("StringMerger does NOT merge raw strings.")
 
@@ -738,6 +739,18 @@ class BaseStringSplitter(StringTransformer):
         * The target string is not a multiline (i.e. triple-quote) string.
     """
 
+    STRING_OPERATORS = [
+        token.EQEQUAL,
+        token.GREATER,
+        token.GREATEREQUAL,
+        token.LESS,
+        token.LESSEQUAL,
+        token.NOTEQUAL,
+        token.PERCENT,
+        token.PLUS,
+        token.STAR,
+    ]
+
     @abstractmethod
     def do_splitter_match(self, line: Line) -> TMatchResult:
         """
@@ -847,15 +860,15 @@ class BaseStringSplitter(StringTransformer):
                 p_idx -= 1
 
             P = LL[p_idx]
-            if P.type == token.PLUS:
-                # WMA4 a space and a '+' character (e.g. `+ STRING`).
-                offset += 2
+            if P.type in self.STRING_OPERATORS:
+                # WMA4 a space and a string operator (e.g. `+ STRING` or `== STRING`).
+                offset += len(str(P)) + 1
 
             if P.type == token.COMMA:
                 # WMA4 a space, a comma, and a closing bracket [e.g. `), STRING`].
                 offset += 3
 
-            if P.type in [token.COLON, token.EQUAL, token.NAME]:
+            if P.type in [token.COLON, token.EQUAL, token.PLUSEQUAL, token.NAME]:
                 # This conditional branch is meant to handle dictionary keys,
                 # variable assignments, 'return STRING' statement lines, and
                 # 'else STRING' ternary expression lines.
@@ -952,16 +965,6 @@ class StringSplitter(CustomSplitMapMixin, BaseStringSplitter):
         CustomSplit objects and add them to the custom split map.
     """
 
-    STRING_OPERATORS = [
-        token.PLUS,
-        token.STAR,
-        token.EQEQUAL,
-        token.NOTEQUAL,
-        token.LESS,
-        token.LESSEQUAL,
-        token.GREATER,
-        token.GREATEREQUAL,
-    ]
     MIN_SUBSTR_SIZE = 6
     # Matches an "f-expression" (e.g. {var}) that might be found in an f-string.
     RE_FEXPR = r"""
@@ -971,8 +974,8 @@ class StringSplitter(CustomSplitMapMixin, BaseStringSplitter):
             | \{\{
             | \}\}
             | (?R)
-        )+?
-    (?<!\}) \} (?:\}\})* (?!\})
+        )+
+    \}
     """
 
     def do_splitter_match(self, line: Line) -> TMatchResult:
@@ -1034,7 +1037,7 @@ class StringSplitter(CustomSplitMapMixin, BaseStringSplitter):
         is_valid_index = is_valid_index_factory(LL)
         insert_str_child = insert_str_child_factory(LL[string_idx])
 
-        prefix = get_string_prefix(LL[string_idx].value)
+        prefix = get_string_prefix(LL[string_idx].value).lower()
 
         # We MAY choose to drop the 'f' prefix from substrings that don't
         # contain any f-expressions, but ONLY if the original f-string
@@ -1149,21 +1152,32 @@ class StringSplitter(CustomSplitMapMixin, BaseStringSplitter):
 
             # --- Construct `next_value`
             next_value = rest_value[:break_idx] + QUOTE
+
+            # HACK: The following 'if' statement is a hack to fix the custom
+            # breakpoint index in the case of either: (a) substrings that were
+            # f-strings but will have the 'f' prefix removed OR (b) substrings
+            # that were not f-strings but will now become f-strings because of
+            # redundant use of the 'f' prefix (i.e. none of the substrings
+            # contain f-expressions but one or more of them had the 'f' prefix
+            # anyway; in which case, we will prepend 'f' to _all_ substrings).
+            #
+            # There is probably a better way to accomplish what is being done
+            # here...
+            #
+            # If this substring is an f-string, we _could_ remove the 'f'
+            # prefix, and the current custom split did NOT originally use a
+            # prefix...
             if (
-                # Are we allowed to try to drop a pointless 'f' prefix?
-                drop_pointless_f_prefix
-                # If we are, will we be successful?
-                and next_value != self._normalize_f_string(next_value, prefix)
+                next_value != self._normalize_f_string(next_value, prefix)
+                and use_custom_breakpoints
+                and not csplit.has_prefix
             ):
-                # If the current custom split did NOT originally use a prefix,
-                # then `csplit.break_idx` will be off by one after removing
+                # Then `csplit.break_idx` will be off by one after removing
                 # the 'f' prefix.
-                break_idx = (
-                    break_idx + 1
-                    if use_custom_breakpoints and not csplit.has_prefix
-                    else break_idx
-                )
+                break_idx += 1
                 next_value = rest_value[:break_idx] + QUOTE
+
+            if drop_pointless_f_prefix:
                 next_value = self._normalize_f_string(next_value, prefix)
 
             # --- Construct `next_leaf`
@@ -1230,6 +1244,61 @@ class StringSplitter(CustomSplitMapMixin, BaseStringSplitter):
             last_line.comments = line.comments.copy()
             yield Ok(last_line)
 
+    def _iter_nameescape_slices(self, string: str) -> Iterator[Tuple[Index, Index]]:
+        """
+        Yields:
+            All ranges of @string which, if @string were to be split there,
+            would result in the splitting of an \\N{...} expression (which is NOT
+            allowed).
+        """
+        # True - the previous backslash was unescaped
+        # False - the previous backslash was escaped *or* there was no backslash
+        previous_was_unescaped_backslash = False
+        it = iter(enumerate(string))
+        for idx, c in it:
+            if c == "\\":
+                previous_was_unescaped_backslash = not previous_was_unescaped_backslash
+                continue
+            if not previous_was_unescaped_backslash or c != "N":
+                previous_was_unescaped_backslash = False
+                continue
+            previous_was_unescaped_backslash = False
+
+            begin = idx - 1  # the position of backslash before \N{...}
+            for idx, c in it:
+                if c == "}":
+                    end = idx
+                    break
+            else:
+                # malformed nameescape expression?
+                # should have been detected by AST parsing earlier...
+                raise RuntimeError(f"{self.__class__.__name__} LOGIC ERROR!")
+            yield begin, end
+
+    def _iter_fexpr_slices(self, string: str) -> Iterator[Tuple[Index, Index]]:
+        """
+        Yields:
+            All ranges of @string which, if @string were to be split there,
+            would result in the splitting of an f-expression (which is NOT
+            allowed).
+        """
+        if "f" not in get_string_prefix(string).lower():
+            return
+
+        for match in re.finditer(self.RE_FEXPR, string, re.VERBOSE):
+            yield match.span()
+
+    def _get_illegal_split_indices(self, string: str) -> Set[Index]:
+        illegal_indices: Set[Index] = set()
+        iterators = [
+            self._iter_fexpr_slices(string),
+            self._iter_nameescape_slices(string),
+        ]
+        for it in iterators:
+            for begin, end in it:
+                illegal_indices.update(range(begin, end + 1))
+        return illegal_indices
+
     def _get_break_idx(self, string: str, max_break_idx: int) -> Optional[int]:
         """
         This method contains the algorithm that StringSplitter uses to
@@ -1259,40 +1328,15 @@ class StringSplitter(CustomSplitMapMixin, BaseStringSplitter):
         assert is_valid_index(max_break_idx)
         assert_is_leaf_string(string)
 
-        _fexpr_slices: Optional[List[Tuple[Index, Index]]] = None
+        _illegal_split_indices = self._get_illegal_split_indices(string)
 
-        def fexpr_slices() -> Iterator[Tuple[Index, Index]]:
-            """
-            Yields:
-                All ranges of @string which, if @string were to be split there,
-                would result in the splitting of an f-expression (which is NOT
-                allowed).
-            """
-            nonlocal _fexpr_slices
-
-            if _fexpr_slices is None:
-                _fexpr_slices = []
-                for match in re.finditer(self.RE_FEXPR, string, re.VERBOSE):
-                    _fexpr_slices.append(match.span())
-
-            yield from _fexpr_slices
-
-        is_fstring = "f" in get_string_prefix(string)
-
-        def breaks_fstring_expression(i: Index) -> bool:
+        def breaks_unsplittable_expression(i: Index) -> bool:
             """
             Returns:
                 True iff returning @i would result in the splitting of an
-                f-expression (which is NOT allowed).
+                unsplittable expression (which is NOT allowed).
             """
-            if not is_fstring:
-                return False
-
-            for (start, end) in fexpr_slices():
-                if start <= i < end:
-                    return True
-
-            return False
+            return i in _illegal_split_indices
 
         def passes_all_checks(i: Index) -> bool:
             """
@@ -1316,7 +1360,7 @@ class StringSplitter(CustomSplitMapMixin, BaseStringSplitter):
                 is_space
                 and is_not_escaped
                 and is_big_enough
-                and not breaks_fstring_expression(i)
+                and not breaks_unsplittable_expression(i)
             )
 
         # First, we check all indices BELOW @max_break_idx.