return max_string_length
+def iter_fexpr_spans(s: str) -> Iterator[Tuple[int, int]]:
+ """
+ Yields spans corresponding to expressions in a given f-string.
+ Spans are half-open ranges (left inclusive, right exclusive).
+ Assumes the input string is a valid f-string, but will not crash if the input
+ string is invalid.
+ """
+ stack: List[int] = [] # our curly paren stack
+ i = 0
+ while i < len(s):
+ if s[i] == "{":
+ # if we're in a string part of the f-string, ignore escaped curly braces
+ if not stack and i + 1 < len(s) and s[i + 1] == "{":
+ i += 2
+ continue
+ stack.append(i)
+ i += 1
+ continue
+
+ if s[i] == "}":
+ if not stack:
+ i += 1
+ continue
+ j = stack.pop()
+ # we've made it back out of the expression! yield the span
+ if not stack:
+ yield (j, i + 1)
+ i += 1
+ continue
+
+ # if we're in an expression part of the f-string, fast forward through strings
+ # note that backslashes are not legal in the expression portion of f-strings
+ if stack:
+ delim = None
+ if s[i : i + 3] in ("'''", '"""'):
+ delim = s[i : i + 3]
+ elif s[i] in ("'", '"'):
+ delim = s[i]
+ if delim:
+ i += len(delim)
+ while i < len(s) and s[i : i + len(delim)] != delim:
+ i += 1
+ i += len(delim)
+ continue
+ i += 1
+
+
+def fstring_contains_expr(s: str) -> bool:
+ return any(iter_fexpr_spans(s))
+
+
class StringSplitter(BaseStringSplitter, CustomSplitMapMixin):
"""
StringTransformer that splits "atom" strings (i.e. strings which exist on
"""
MIN_SUBSTR_SIZE: Final = 6
- # Matches an "f-expression" (e.g. {var}) that might be found in an f-string.
- RE_FEXPR: Final = r"""
- (?<!\{) (?:\{\{)* \{ (?!\{)
- (?:
- [^\{\}]
- | \{\{
- | \}\}
- | (?R)
- )+
- \}
- """
def do_splitter_match(self, line: Line) -> TMatchResult:
LL = line.leaves
# contain any f-expressions, but ONLY if the original f-string
# contains at least one f-expression. Otherwise, we will alter the AST
# of the program.
- drop_pointless_f_prefix = ("f" in prefix) and re.search(
- self.RE_FEXPR, LL[string_idx].value, re.VERBOSE
+ drop_pointless_f_prefix = ("f" in prefix) and fstring_contains_expr(
+ LL[string_idx].value
)
first_string_line = True
"""
if "f" not in get_string_prefix(string).lower():
return
-
- for match in re.finditer(self.RE_FEXPR, string, re.VERBOSE):
- yield match.span()
+ yield from iter_fexpr_spans(string)
def _get_illegal_split_indices(self, string: str) -> Set[Index]:
illegal_indices: Set[Index] = set()
"""
assert_is_leaf_string(string)
- if "f" in prefix and not re.search(self.RE_FEXPR, string, re.VERBOSE):
+ if "f" in prefix and not fstring_contains_expr(string):
new_prefix = prefix.replace("f", "")
temp = string[len(prefix) :]
--- /dev/null
+from typing import List, Tuple
+from black.trans import iter_fexpr_spans
+
+
+def test_fexpr_spans() -> None:
+ def check(
+ string: str, expected_spans: List[Tuple[int, int]], expected_slices: List[str]
+ ) -> None:
+ spans = list(iter_fexpr_spans(string))
+
+ # Checking slices isn't strictly necessary, but it's easier to verify at
+ # a glance than only spans
+ assert len(spans) == len(expected_slices)
+ for (i, j), slice in zip(spans, expected_slices):
+ assert len(string[i:j]) == j - i
+ assert string[i:j] == slice
+
+ assert spans == expected_spans
+
+ # Most of these test cases omit the leading 'f' and leading / closing quotes
+ # for convenience
+ # Some additional property-based tests can be found in
+ # https://github.com/psf/black/pull/2654#issuecomment-981411748
+ check("""{var}""", [(0, 5)], ["{var}"])
+ check("""f'{var}'""", [(2, 7)], ["{var}"])
+ check("""f'{1 + f() + 2 + "asdf"}'""", [(2, 24)], ["""{1 + f() + 2 + "asdf"}"""])
+ check("""text {var} text""", [(5, 10)], ["{var}"])
+ check("""text {{ {var} }} text""", [(8, 13)], ["{var}"])
+ check("""{a} {b} {c}""", [(0, 3), (4, 7), (8, 11)], ["{a}", "{b}", "{c}"])
+ check("""f'{a} {b} {c}'""", [(2, 5), (6, 9), (10, 13)], ["{a}", "{b}", "{c}"])
+ check("""{ {} }""", [(0, 6)], ["{ {} }"])
+ check("""{ {{}} }""", [(0, 8)], ["{ {{}} }"])
+ check("""{ {{{}}} }""", [(0, 10)], ["{ {{{}}} }"])
+ check("""{{ {{{}}} }}""", [(5, 7)], ["{}"])
+ check("""{{ {{{var}}} }}""", [(5, 10)], ["{var}"])
+ check("""{f"{0}"}""", [(0, 8)], ["""{f"{0}"}"""])
+ check("""{"'"}""", [(0, 5)], ["""{"'"}"""])
+ check("""{"{"}""", [(0, 5)], ["""{"{"}"""])
+ check("""{"}"}""", [(0, 5)], ["""{"}"}"""])
+ check("""{"{{"}""", [(0, 6)], ["""{"{{"}"""])
+ check("""{''' '''}""", [(0, 9)], ["""{''' '''}"""])
+ check("""{'''{'''}""", [(0, 9)], ["""{'''{'''}"""])
+ check("""{''' {'{ '''}""", [(0, 13)], ["""{''' {'{ '''}"""])
+ check(
+ '''f\'\'\'-{f"""*{f"+{f'.{x}.'}+"}*"""}-'y\\'\'\'\'''',
+ [(5, 33)],
+ ['''{f"""*{f"+{f'.{x}.'}+"}*"""}'''],
+ )
+ check(r"""{}{""", [(0, 2)], ["{}"])
+ check("""f"{'{'''''''''}\"""", [(2, 15)], ["{'{'''''''''}"])