"""
String transformers that can split and merge strings.
"""
+import re
+import sys
from abc import ABC, abstractmethod
from collections import defaultdict
from dataclasses import dataclass
-import regex as re # We need recursive patterns here (?R)
from typing import (
Any,
Callable,
TypeVar,
Union,
)
-import sys
if sys.version_info < (3, 8):
- from typing_extensions import Final
+ from typing_extensions import Final, Literal
else:
- from typing import Final
+ from typing import Literal, Final
from mypy_extensions import trait
-from black.rusty import Result, Ok, Err
-
-from black.mode import Feature
-from black.nodes import syms, replace_child, parent_type
-from black.nodes import is_empty_par, is_empty_lpar, is_empty_rpar
-from black.nodes import OPENING_BRACKETS, CLOSING_BRACKETS, STANDALONE_COMMENT
-from black.lines import Line, append_leaves
from black.brackets import BracketMatchError
from black.comments import contains_pragma_comment
-from black.strings import has_triple_quotes, get_string_prefix, assert_is_leaf_string
-from black.strings import normalize_string_quotes
-
-from blib2to3.pytree import Leaf, Node
+from black.lines import Line, append_leaves
+from black.mode import Feature
+from black.nodes import (
+ CLOSING_BRACKETS,
+ OPENING_BRACKETS,
+ STANDALONE_COMMENT,
+ is_empty_lpar,
+ is_empty_par,
+ is_empty_rpar,
+ parent_type,
+ replace_child,
+ syms,
+)
+from black.rusty import Err, Ok, Result
+from black.strings import (
+ assert_is_leaf_string,
+ get_string_prefix,
+ has_triple_quotes,
+ normalize_string_quotes,
+)
from blib2to3.pgen2 import token
+from blib2to3.pytree import Leaf, Node
class CannotTransform(Exception):
return Err(cant_transform)
+def hug_power_op(line: Line, features: Collection[Feature]) -> Iterator[Line]:
+ """A transformer which normalizes spacing around power operators."""
+
+ # Performance optimization to avoid unnecessary Leaf clones and other ops.
+ for leaf in line.leaves:
+ if leaf.type == token.DOUBLESTAR:
+ break
+ else:
+ raise CannotTransform("No doublestar token was found in the line.")
+
+ def is_simple_lookup(index: int, step: Literal[1, -1]) -> bool:
+ # Brackets and parentheses indicate calls, subscripts, etc. ...
+ # basically stuff that doesn't count as "simple". Only a NAME lookup
+ # or dotted lookup (eg. NAME.NAME) is OK.
+ if step == -1:
+ disallowed = {token.RPAR, token.RSQB}
+ else:
+ disallowed = {token.LPAR, token.LSQB}
+
+ while 0 <= index < len(line.leaves):
+ current = line.leaves[index]
+ if current.type in disallowed:
+ return False
+ if current.type not in {token.NAME, token.DOT} or current.value == "for":
+ # If the current token isn't disallowed, we'll assume this is simple as
+ # only the disallowed tokens are semantically attached to this lookup
+ # expression we're checking. Also, stop early if we hit the 'for' bit
+ # of a comprehension.
+ return True
+
+ index += step
+
+ return True
+
+ def is_simple_operand(index: int, kind: Literal["base", "exponent"]) -> bool:
+ # An operand is considered "simple" if's a NAME, a numeric CONSTANT, a simple
+ # lookup (see above), with or without a preceding unary operator.
+ start = line.leaves[index]
+ if start.type in {token.NAME, token.NUMBER}:
+ return is_simple_lookup(index, step=(1 if kind == "exponent" else -1))
+
+ if start.type in {token.PLUS, token.MINUS, token.TILDE}:
+ if line.leaves[index + 1].type in {token.NAME, token.NUMBER}:
+ # step is always one as bases with a preceding unary op will be checked
+ # for simplicity starting from the next token (so it'll hit the check
+ # above).
+ return is_simple_lookup(index + 1, step=1)
+
+ return False
+
+ new_line = line.clone()
+ should_hug = False
+ for idx, leaf in enumerate(line.leaves):
+ new_leaf = leaf.clone()
+ if should_hug:
+ new_leaf.prefix = ""
+ should_hug = False
+
+ should_hug = (
+ (0 < idx < len(line.leaves) - 1)
+ and leaf.type == token.DOUBLESTAR
+ and is_simple_operand(idx - 1, kind="base")
+ and line.leaves[idx - 1].value != "lambda"
+ and is_simple_operand(idx + 1, kind="exponent")
+ )
+ if should_hug:
+ new_leaf.prefix = ""
+
+ # We have to be careful to make a new line properly:
+ # - bracket related metadata must be maintained (handled by Line.append)
+ # - comments need to copied over, updating the leaf IDs they're attached to
+ new_line.append(new_leaf, preformatted=True)
+ for comment_leaf in line.comments_after(leaf):
+ new_line.append(comment_leaf, preformatted=True)
+
+ yield new_line
+
+
class StringTransformer(ABC):
"""
An implementation of the Transformer protocol that relies on its
is_valid_index = is_valid_index_factory(LL)
- for (i, leaf) in enumerate(LL):
+ for i, leaf in enumerate(LL):
if (
leaf.type == token.STRING
and is_valid_index(i + 1)
# with 'f'...
if "f" in prefix and "f" not in next_prefix:
# Then we must escape any braces contained in this substring.
- SS = re.subf(r"(\{|\})", "{1}{1}", SS)
+ SS = re.sub(r"(\{|\})", r"\1\1", SS)
NSS = make_naked(SS, next_prefix)
# Build the final line ('new_line') that this method will later return.
new_line = line.clone()
- for (i, leaf) in enumerate(LL):
+ for i, leaf in enumerate(LL):
if i == string_idx:
new_line.append(string_leaf)
is_valid_index = is_valid_index_factory(LL)
- for (idx, leaf) in enumerate(LL):
+ for idx, leaf in enumerate(LL):
# Should be a string...
if leaf.type != token.STRING:
continue
return max_string_length
+def iter_fexpr_spans(s: str) -> Iterator[Tuple[int, int]]:
+ """
+ Yields spans corresponding to expressions in a given f-string.
+ Spans are half-open ranges (left inclusive, right exclusive).
+ Assumes the input string is a valid f-string, but will not crash if the input
+ string is invalid.
+ """
+ stack: List[int] = [] # our curly paren stack
+ i = 0
+ while i < len(s):
+ if s[i] == "{":
+ # if we're in a string part of the f-string, ignore escaped curly braces
+ if not stack and i + 1 < len(s) and s[i + 1] == "{":
+ i += 2
+ continue
+ stack.append(i)
+ i += 1
+ continue
+
+ if s[i] == "}":
+ if not stack:
+ i += 1
+ continue
+ j = stack.pop()
+ # we've made it back out of the expression! yield the span
+ if not stack:
+ yield (j, i + 1)
+ i += 1
+ continue
+
+ # if we're in an expression part of the f-string, fast forward through strings
+ # note that backslashes are not legal in the expression portion of f-strings
+ if stack:
+ delim = None
+ if s[i : i + 3] in ("'''", '"""'):
+ delim = s[i : i + 3]
+ elif s[i] in ("'", '"'):
+ delim = s[i]
+ if delim:
+ i += len(delim)
+ while i < len(s) and s[i : i + len(delim)] != delim:
+ i += 1
+ i += len(delim)
+ continue
+ i += 1
+
+
+def fstring_contains_expr(s: str) -> bool:
+ return any(iter_fexpr_spans(s))
+
+
class StringSplitter(BaseStringSplitter, CustomSplitMapMixin):
"""
StringTransformer that splits "atom" strings (i.e. strings which exist on
"""
MIN_SUBSTR_SIZE: Final = 6
- # Matches an "f-expression" (e.g. {var}) that might be found in an f-string.
- RE_FEXPR: Final = r"""
- (?<!\{) (?:\{\{)* \{ (?!\{)
- (?:
- [^\{\}]
- | \{\{
- | \}\}
- | (?R)
- )+
- \}
- """
def do_splitter_match(self, line: Line) -> TMatchResult:
LL = line.leaves
# contain any f-expressions, but ONLY if the original f-string
# contains at least one f-expression. Otherwise, we will alter the AST
# of the program.
- drop_pointless_f_prefix = ("f" in prefix) and re.search(
- self.RE_FEXPR, LL[string_idx].value, re.VERBOSE
+ drop_pointless_f_prefix = ("f" in prefix) and fstring_contains_expr(
+ LL[string_idx].value
)
first_string_line = True
"""
if "f" not in get_string_prefix(string).lower():
return
-
- for match in re.finditer(self.RE_FEXPR, string, re.VERBOSE):
- yield match.span()
+ yield from iter_fexpr_spans(string)
def _get_illegal_split_indices(self, string: str) -> Set[Index]:
illegal_indices: Set[Index] = set()
"""
assert_is_leaf_string(string)
- if "f" in prefix and not re.search(self.RE_FEXPR, string, re.VERBOSE):
+ if "f" in prefix and not fstring_contains_expr(string):
new_prefix = prefix.replace("f", "")
temp = string[len(prefix) :]
if parent_type(LL[0]) == syms.assert_stmt and LL[0].value == "assert":
is_valid_index = is_valid_index_factory(LL)
- for (i, leaf) in enumerate(LL):
+ for i, leaf in enumerate(LL):
# We MUST find a comma...
if leaf.type == token.COMMA:
idx = i + 2 if is_empty_par(LL[i + 1]) else i + 1
):
is_valid_index = is_valid_index_factory(LL)
- for (i, leaf) in enumerate(LL):
+ for i, leaf in enumerate(LL):
# We MUST find either an '=' or '+=' symbol...
if leaf.type in [token.EQUAL, token.PLUSEQUAL]:
idx = i + 2 if is_empty_par(LL[i + 1]) else i + 1
if syms.dictsetmaker in [parent_type(LL[0]), parent_type(LL[0].parent)]:
is_valid_index = is_valid_index_factory(LL)
- for (i, leaf) in enumerate(LL):
+ for i, leaf in enumerate(LL):
# We MUST find a colon...
if leaf.type == token.COLON:
idx = i + 2 if is_empty_par(LL[i + 1]) else i + 1