]> git.madduck.net Git - etc/vim.git/commitdiff

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

Fix determination of f-string expression spans (#2654)
authorShantanu <12621235+hauntsaninja@users.noreply.github.com>
Wed, 1 Dec 2021 17:52:24 +0000 (09:52 -0800)
committerGitHub <noreply@github.com>
Wed, 1 Dec 2021 17:52:24 +0000 (09:52 -0800)
Co-authored-by: Jelle Zijlstra <jelle.zijlstra@gmail.com>
CHANGES.md
src/black/trans.py
tests/test_trans.py [new file with mode: 0644]

index c0cf60af98aefdafe7eed9cd9eff15ee0a7996f9..5b648225ad7d679c7caac9caedd98627efcfe45a 100644 (file)
@@ -15,6 +15,7 @@
   times, like `match re.match()` (#2661)
 - Fix assignment to environment variables in Jupyter Notebooks (#2642)
 - Add `flake8-simplify` and `flake8-comprehensions` plugins (#2653)
+- Fix determination of f-string expression spans (#2654)
 - Fix parser error location on invalid syntax in a `match` statement (#2649)
 
 ## 21.11b1
index a4d1e6fbc7990f3813dfcc469a906af519252eb7..6aca3a8733f1f2f74624e6ad7aaba3e9e70313c6 100644 (file)
@@ -942,6 +942,57 @@ class BaseStringSplitter(StringTransformer):
         return max_string_length
 
 
+def iter_fexpr_spans(s: str) -> Iterator[Tuple[int, int]]:
+    """
+    Yields spans corresponding to expressions in a given f-string.
+    Spans are half-open ranges (left inclusive, right exclusive).
+    Assumes the input string is a valid f-string, but will not crash if the input
+    string is invalid.
+    """
+    stack: List[int] = []  # our curly paren stack
+    i = 0
+    while i < len(s):
+        if s[i] == "{":
+            # if we're in a string part of the f-string, ignore escaped curly braces
+            if not stack and i + 1 < len(s) and s[i + 1] == "{":
+                i += 2
+                continue
+            stack.append(i)
+            i += 1
+            continue
+
+        if s[i] == "}":
+            if not stack:
+                i += 1
+                continue
+            j = stack.pop()
+            # we've made it back out of the expression! yield the span
+            if not stack:
+                yield (j, i + 1)
+            i += 1
+            continue
+
+        # if we're in an expression part of the f-string, fast forward through strings
+        # note that backslashes are not legal in the expression portion of f-strings
+        if stack:
+            delim = None
+            if s[i : i + 3] in ("'''", '"""'):
+                delim = s[i : i + 3]
+            elif s[i] in ("'", '"'):
+                delim = s[i]
+            if delim:
+                i += len(delim)
+                while i < len(s) and s[i : i + len(delim)] != delim:
+                    i += 1
+                i += len(delim)
+                continue
+        i += 1
+
+
+def fstring_contains_expr(s: str) -> bool:
+    return any(iter_fexpr_spans(s))
+
+
 class StringSplitter(BaseStringSplitter, CustomSplitMapMixin):
     """
     StringTransformer that splits "atom" strings (i.e. strings which exist on
@@ -981,17 +1032,6 @@ class StringSplitter(BaseStringSplitter, CustomSplitMapMixin):
     """
 
     MIN_SUBSTR_SIZE: Final = 6
-    # Matches an "f-expression" (e.g. {var}) that might be found in an f-string.
-    RE_FEXPR: Final = r"""
-    (?<!\{) (?:\{\{)* \{ (?!\{)
-        (?:
-            [^\{\}]
-            | \{\{
-            | \}\}
-            | (?R)
-        )+
-    \}
-    """
 
     def do_splitter_match(self, line: Line) -> TMatchResult:
         LL = line.leaves
@@ -1058,8 +1098,8 @@ class StringSplitter(BaseStringSplitter, CustomSplitMapMixin):
         # contain any f-expressions, but ONLY if the original f-string
         # contains at least one f-expression. Otherwise, we will alter the AST
         # of the program.
-        drop_pointless_f_prefix = ("f" in prefix) and re.search(
-            self.RE_FEXPR, LL[string_idx].value, re.VERBOSE
+        drop_pointless_f_prefix = ("f" in prefix) and fstring_contains_expr(
+            LL[string_idx].value
         )
 
         first_string_line = True
@@ -1299,9 +1339,7 @@ class StringSplitter(BaseStringSplitter, CustomSplitMapMixin):
         """
         if "f" not in get_string_prefix(string).lower():
             return
-
-        for match in re.finditer(self.RE_FEXPR, string, re.VERBOSE):
-            yield match.span()
+        yield from iter_fexpr_spans(string)
 
     def _get_illegal_split_indices(self, string: str) -> Set[Index]:
         illegal_indices: Set[Index] = set()
@@ -1417,7 +1455,7 @@ class StringSplitter(BaseStringSplitter, CustomSplitMapMixin):
         """
         assert_is_leaf_string(string)
 
-        if "f" in prefix and not re.search(self.RE_FEXPR, string, re.VERBOSE):
+        if "f" in prefix and not fstring_contains_expr(string):
             new_prefix = prefix.replace("f", "")
 
             temp = string[len(prefix) :]
diff --git a/tests/test_trans.py b/tests/test_trans.py
new file mode 100644 (file)
index 0000000..a1666a9
--- /dev/null
@@ -0,0 +1,50 @@
+from typing import List, Tuple
+from black.trans import iter_fexpr_spans
+
+
+def test_fexpr_spans() -> None:
+    def check(
+        string: str, expected_spans: List[Tuple[int, int]], expected_slices: List[str]
+    ) -> None:
+        spans = list(iter_fexpr_spans(string))
+
+        # Checking slices isn't strictly necessary, but it's easier to verify at
+        # a glance than only spans
+        assert len(spans) == len(expected_slices)
+        for (i, j), slice in zip(spans, expected_slices):
+            assert len(string[i:j]) == j - i
+            assert string[i:j] == slice
+
+        assert spans == expected_spans
+
+    # Most of these test cases omit the leading 'f' and leading / closing quotes
+    # for convenience
+    # Some additional property-based tests can be found in
+    # https://github.com/psf/black/pull/2654#issuecomment-981411748
+    check("""{var}""", [(0, 5)], ["{var}"])
+    check("""f'{var}'""", [(2, 7)], ["{var}"])
+    check("""f'{1 + f() + 2 + "asdf"}'""", [(2, 24)], ["""{1 + f() + 2 + "asdf"}"""])
+    check("""text {var} text""", [(5, 10)], ["{var}"])
+    check("""text {{ {var} }} text""", [(8, 13)], ["{var}"])
+    check("""{a} {b} {c}""", [(0, 3), (4, 7), (8, 11)], ["{a}", "{b}", "{c}"])
+    check("""f'{a} {b} {c}'""", [(2, 5), (6, 9), (10, 13)], ["{a}", "{b}", "{c}"])
+    check("""{ {} }""", [(0, 6)], ["{ {} }"])
+    check("""{ {{}} }""", [(0, 8)], ["{ {{}} }"])
+    check("""{ {{{}}} }""", [(0, 10)], ["{ {{{}}} }"])
+    check("""{{ {{{}}} }}""", [(5, 7)], ["{}"])
+    check("""{{ {{{var}}} }}""", [(5, 10)], ["{var}"])
+    check("""{f"{0}"}""", [(0, 8)], ["""{f"{0}"}"""])
+    check("""{"'"}""", [(0, 5)], ["""{"'"}"""])
+    check("""{"{"}""", [(0, 5)], ["""{"{"}"""])
+    check("""{"}"}""", [(0, 5)], ["""{"}"}"""])
+    check("""{"{{"}""", [(0, 6)], ["""{"{{"}"""])
+    check("""{''' '''}""", [(0, 9)], ["""{''' '''}"""])
+    check("""{'''{'''}""", [(0, 9)], ["""{'''{'''}"""])
+    check("""{''' {'{ '''}""", [(0, 13)], ["""{''' {'{ '''}"""])
+    check(
+        '''f\'\'\'-{f"""*{f"+{f'.{x}.'}+"}*"""}-'y\\'\'\'\'''',
+        [(5, 33)],
+        ['''{f"""*{f"+{f'.{x}.'}+"}*"""}'''],
+    )
+    check(r"""{}{""", [(0, 2)], ["{}"])
+    check("""f"{'{'''''''''}\"""", [(2, 15)], ["{'{'''''''''}"])