From f1813e31b6deed0901c8a7fb1f102b9af53de351 Mon Sep 17 00:00:00 2001 From: Shantanu <12621235+hauntsaninja@users.noreply.github.com> Date: Wed, 1 Dec 2021 09:52:24 -0800 Subject: [PATCH] Fix determination of f-string expression spans (#2654) Co-authored-by: Jelle Zijlstra --- CHANGES.md | 1 + src/black/trans.py | 72 ++++++++++++++++++++++++++++++++++----------- tests/test_trans.py | 50 +++++++++++++++++++++++++++++++ 3 files changed, 106 insertions(+), 17 deletions(-) create mode 100644 tests/test_trans.py diff --git a/CHANGES.md b/CHANGES.md index c0cf60a..5b64822 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -15,6 +15,7 @@ times, like `match re.match()` (#2661) - Fix assignment to environment variables in Jupyter Notebooks (#2642) - Add `flake8-simplify` and `flake8-comprehensions` plugins (#2653) +- Fix determination of f-string expression spans (#2654) - Fix parser error location on invalid syntax in a `match` statement (#2649) ## 21.11b1 diff --git a/src/black/trans.py b/src/black/trans.py index a4d1e6f..6aca3a8 100644 --- a/src/black/trans.py +++ b/src/black/trans.py @@ -942,6 +942,57 @@ class BaseStringSplitter(StringTransformer): return max_string_length +def iter_fexpr_spans(s: str) -> Iterator[Tuple[int, int]]: + """ + Yields spans corresponding to expressions in a given f-string. + Spans are half-open ranges (left inclusive, right exclusive). + Assumes the input string is a valid f-string, but will not crash if the input + string is invalid. + """ + stack: List[int] = [] # our curly paren stack + i = 0 + while i < len(s): + if s[i] == "{": + # if we're in a string part of the f-string, ignore escaped curly braces + if not stack and i + 1 < len(s) and s[i + 1] == "{": + i += 2 + continue + stack.append(i) + i += 1 + continue + + if s[i] == "}": + if not stack: + i += 1 + continue + j = stack.pop() + # we've made it back out of the expression! yield the span + if not stack: + yield (j, i + 1) + i += 1 + continue + + # if we're in an expression part of the f-string, fast forward through strings + # note that backslashes are not legal in the expression portion of f-strings + if stack: + delim = None + if s[i : i + 3] in ("'''", '"""'): + delim = s[i : i + 3] + elif s[i] in ("'", '"'): + delim = s[i] + if delim: + i += len(delim) + while i < len(s) and s[i : i + len(delim)] != delim: + i += 1 + i += len(delim) + continue + i += 1 + + +def fstring_contains_expr(s: str) -> bool: + return any(iter_fexpr_spans(s)) + + class StringSplitter(BaseStringSplitter, CustomSplitMapMixin): """ StringTransformer that splits "atom" strings (i.e. strings which exist on @@ -981,17 +1032,6 @@ class StringSplitter(BaseStringSplitter, CustomSplitMapMixin): """ MIN_SUBSTR_SIZE: Final = 6 - # Matches an "f-expression" (e.g. {var}) that might be found in an f-string. - RE_FEXPR: Final = r""" - (? TMatchResult: LL = line.leaves @@ -1058,8 +1098,8 @@ class StringSplitter(BaseStringSplitter, CustomSplitMapMixin): # contain any f-expressions, but ONLY if the original f-string # contains at least one f-expression. Otherwise, we will alter the AST # of the program. - drop_pointless_f_prefix = ("f" in prefix) and re.search( - self.RE_FEXPR, LL[string_idx].value, re.VERBOSE + drop_pointless_f_prefix = ("f" in prefix) and fstring_contains_expr( + LL[string_idx].value ) first_string_line = True @@ -1299,9 +1339,7 @@ class StringSplitter(BaseStringSplitter, CustomSplitMapMixin): """ if "f" not in get_string_prefix(string).lower(): return - - for match in re.finditer(self.RE_FEXPR, string, re.VERBOSE): - yield match.span() + yield from iter_fexpr_spans(string) def _get_illegal_split_indices(self, string: str) -> Set[Index]: illegal_indices: Set[Index] = set() @@ -1417,7 +1455,7 @@ class StringSplitter(BaseStringSplitter, CustomSplitMapMixin): """ assert_is_leaf_string(string) - if "f" in prefix and not re.search(self.RE_FEXPR, string, re.VERBOSE): + if "f" in prefix and not fstring_contains_expr(string): new_prefix = prefix.replace("f", "") temp = string[len(prefix) :] diff --git a/tests/test_trans.py b/tests/test_trans.py new file mode 100644 index 0000000..a1666a9 --- /dev/null +++ b/tests/test_trans.py @@ -0,0 +1,50 @@ +from typing import List, Tuple +from black.trans import iter_fexpr_spans + + +def test_fexpr_spans() -> None: + def check( + string: str, expected_spans: List[Tuple[int, int]], expected_slices: List[str] + ) -> None: + spans = list(iter_fexpr_spans(string)) + + # Checking slices isn't strictly necessary, but it's easier to verify at + # a glance than only spans + assert len(spans) == len(expected_slices) + for (i, j), slice in zip(spans, expected_slices): + assert len(string[i:j]) == j - i + assert string[i:j] == slice + + assert spans == expected_spans + + # Most of these test cases omit the leading 'f' and leading / closing quotes + # for convenience + # Some additional property-based tests can be found in + # https://github.com/psf/black/pull/2654#issuecomment-981411748 + check("""{var}""", [(0, 5)], ["{var}"]) + check("""f'{var}'""", [(2, 7)], ["{var}"]) + check("""f'{1 + f() + 2 + "asdf"}'""", [(2, 24)], ["""{1 + f() + 2 + "asdf"}"""]) + check("""text {var} text""", [(5, 10)], ["{var}"]) + check("""text {{ {var} }} text""", [(8, 13)], ["{var}"]) + check("""{a} {b} {c}""", [(0, 3), (4, 7), (8, 11)], ["{a}", "{b}", "{c}"]) + check("""f'{a} {b} {c}'""", [(2, 5), (6, 9), (10, 13)], ["{a}", "{b}", "{c}"]) + check("""{ {} }""", [(0, 6)], ["{ {} }"]) + check("""{ {{}} }""", [(0, 8)], ["{ {{}} }"]) + check("""{ {{{}}} }""", [(0, 10)], ["{ {{{}}} }"]) + check("""{{ {{{}}} }}""", [(5, 7)], ["{}"]) + check("""{{ {{{var}}} }}""", [(5, 10)], ["{var}"]) + check("""{f"{0}"}""", [(0, 8)], ["""{f"{0}"}"""]) + check("""{"'"}""", [(0, 5)], ["""{"'"}"""]) + check("""{"{"}""", [(0, 5)], ["""{"{"}"""]) + check("""{"}"}""", [(0, 5)], ["""{"}"}"""]) + check("""{"{{"}""", [(0, 6)], ["""{"{{"}"""]) + check("""{''' '''}""", [(0, 9)], ["""{''' '''}"""]) + check("""{'''{'''}""", [(0, 9)], ["""{'''{'''}"""]) + check("""{''' {'{ '''}""", [(0, 13)], ["""{''' {'{ '''}"""]) + check( + '''f\'\'\'-{f"""*{f"+{f'.{x}.'}+"}*"""}-'y\\'\'\'\'''', + [(5, 33)], + ['''{f"""*{f"+{f'.{x}.'}+"}*"""}'''], + ) + check(r"""{}{""", [(0, 2)], ["{}"]) + check("""f"{'{'''''''''}\"""", [(2, 15)], ["{'{'''''''''}"]) -- 2.39.5