from typing import (
Any,
Callable,
+ ClassVar,
Collection,
Dict,
Iterable,
List,
Optional,
Sequence,
+ Set,
Tuple,
TypeVar,
Union,
)
+import sys
+
+if sys.version_info < (3, 8):
+ from typing_extensions import Final
+else:
+ from typing import Final
+
+from mypy_extensions import trait
from black.rusty import Result, Ok, Err
return Err(cant_transform)
-@dataclass # type: ignore
class StringTransformer(ABC):
"""
An implementation of the Transformer protocol that relies on its
as much as possible.
"""
- line_length: int
- normalize_strings: bool
- __name__ = "StringTransformer"
+ __name__: Final = "StringTransformer"
+
+ # Ideally this would be a dataclass, but unfortunately mypyc breaks when used with
+ # `abc.ABC`.
+ def __init__(self, line_length: int, normalize_strings: bool) -> None:
+ self.line_length = line_length
+ self.normalize_strings = normalize_strings
@abstractmethod
def do_match(self, line: Line) -> TMatchResult:
break_idx: int
+@trait
class CustomSplitMapMixin:
"""
This mixin class is used to map merged strings to a sequence of
the resultant substrings go over the configured max line length.
"""
- _Key = Tuple[StringID, str]
- _CUSTOM_SPLIT_MAP: Dict[_Key, Tuple[CustomSplit, ...]] = defaultdict(tuple)
+ _Key: ClassVar = Tuple[StringID, str]
+ _CUSTOM_SPLIT_MAP: ClassVar[Dict[_Key, Tuple[CustomSplit, ...]]] = defaultdict(
+ tuple
+ )
@staticmethod
def _get_key(string: str) -> "CustomSplitMapMixin._Key":
return key in self._CUSTOM_SPLIT_MAP
-class StringMerger(CustomSplitMapMixin, StringTransformer):
+class StringMerger(StringTransformer, CustomSplitMapMixin):
"""StringTransformer that merges strings together.
Requirements:
and is_valid_index(next_str_idx)
and LL[next_str_idx].type == token.STRING
):
- prefix = get_string_prefix(LL[next_str_idx].value)
+ prefix = get_string_prefix(LL[next_str_idx].value).lower()
next_str_idx += 1
# The next loop merges the string group. The final string will be
num_of_strings += 1
SS = LL[next_str_idx].value
- next_prefix = get_string_prefix(SS)
+ next_prefix = get_string_prefix(SS).lower()
# If this is an f-string group but this substring is not prefixed
# with 'f'...
return TErr("StringMerger does NOT merge multiline strings.")
num_of_strings += 1
- prefix = get_string_prefix(leaf.value)
+ prefix = get_string_prefix(leaf.value).lower()
if "r" in prefix:
return TErr("StringMerger does NOT merge raw strings.")
* The target string is not a multiline (i.e. triple-quote) string.
"""
- STRING_OPERATORS = [
+ STRING_OPERATORS: Final = [
token.EQEQUAL,
token.GREATER,
token.GREATEREQUAL,
return max_string_length
-class StringSplitter(CustomSplitMapMixin, BaseStringSplitter):
+class StringSplitter(BaseStringSplitter, CustomSplitMapMixin):
"""
StringTransformer that splits "atom" strings (i.e. strings which exist on
lines by themselves).
CustomSplit objects and add them to the custom split map.
"""
- MIN_SUBSTR_SIZE = 6
+ MIN_SUBSTR_SIZE: Final = 6
# Matches an "f-expression" (e.g. {var}) that might be found in an f-string.
- RE_FEXPR = r"""
+ RE_FEXPR: Final = r"""
(?<!\{) (?:\{\{)* \{ (?!\{)
(?:
[^\{\}]
is_valid_index = is_valid_index_factory(LL)
insert_str_child = insert_str_child_factory(LL[string_idx])
- prefix = get_string_prefix(LL[string_idx].value)
+ prefix = get_string_prefix(LL[string_idx].value).lower()
# We MAY choose to drop the 'f' prefix from substrings that don't
# contain any f-expressions, but ONLY if the original f-string
# --- Construct `next_value`
next_value = rest_value[:break_idx] + QUOTE
+
+ # HACK: The following 'if' statement is a hack to fix the custom
+ # breakpoint index in the case of either: (a) substrings that were
+ # f-strings but will have the 'f' prefix removed OR (b) substrings
+ # that were not f-strings but will now become f-strings because of
+ # redundant use of the 'f' prefix (i.e. none of the substrings
+ # contain f-expressions but one or more of them had the 'f' prefix
+ # anyway; in which case, we will prepend 'f' to _all_ substrings).
+ #
+ # There is probably a better way to accomplish what is being done
+ # here...
+ #
+ # If this substring is an f-string, we _could_ remove the 'f'
+ # prefix, and the current custom split did NOT originally use a
+ # prefix...
if (
- # Are we allowed to try to drop a pointless 'f' prefix?
- drop_pointless_f_prefix
- # If we are, will we be successful?
- and next_value != self._normalize_f_string(next_value, prefix)
+ next_value != self._normalize_f_string(next_value, prefix)
+ and use_custom_breakpoints
+ and not csplit.has_prefix
):
- # If the current custom split did NOT originally use a prefix,
- # then `csplit.break_idx` will be off by one after removing
+ # Then `csplit.break_idx` will be off by one after removing
# the 'f' prefix.
- break_idx = (
- break_idx + 1
- if use_custom_breakpoints and not csplit.has_prefix
- else break_idx
- )
+ break_idx += 1
next_value = rest_value[:break_idx] + QUOTE
+
+ if drop_pointless_f_prefix:
next_value = self._normalize_f_string(next_value, prefix)
# --- Construct `next_leaf`
last_line.comments = line.comments.copy()
yield Ok(last_line)
+ def _iter_nameescape_slices(self, string: str) -> Iterator[Tuple[Index, Index]]:
+ """
+ Yields:
+ All ranges of @string which, if @string were to be split there,
+ would result in the splitting of an \\N{...} expression (which is NOT
+ allowed).
+ """
+ # True - the previous backslash was unescaped
+ # False - the previous backslash was escaped *or* there was no backslash
+ previous_was_unescaped_backslash = False
+ it = iter(enumerate(string))
+ for idx, c in it:
+ if c == "\\":
+ previous_was_unescaped_backslash = not previous_was_unescaped_backslash
+ continue
+ if not previous_was_unescaped_backslash or c != "N":
+ previous_was_unescaped_backslash = False
+ continue
+ previous_was_unescaped_backslash = False
+
+ begin = idx - 1 # the position of backslash before \N{...}
+ for idx, c in it:
+ if c == "}":
+ end = idx
+ break
+ else:
+ # malformed nameescape expression?
+ # should have been detected by AST parsing earlier...
+ raise RuntimeError(f"{self.__class__.__name__} LOGIC ERROR!")
+ yield begin, end
+
+ def _iter_fexpr_slices(self, string: str) -> Iterator[Tuple[Index, Index]]:
+ """
+ Yields:
+ All ranges of @string which, if @string were to be split there,
+ would result in the splitting of an f-expression (which is NOT
+ allowed).
+ """
+ if "f" not in get_string_prefix(string).lower():
+ return
+
+ for match in re.finditer(self.RE_FEXPR, string, re.VERBOSE):
+ yield match.span()
+
+ def _get_illegal_split_indices(self, string: str) -> Set[Index]:
+ illegal_indices: Set[Index] = set()
+ iterators = [
+ self._iter_fexpr_slices(string),
+ self._iter_nameescape_slices(string),
+ ]
+ for it in iterators:
+ for begin, end in it:
+ illegal_indices.update(range(begin, end + 1))
+ return illegal_indices
+
def _get_break_idx(self, string: str, max_break_idx: int) -> Optional[int]:
"""
This method contains the algorithm that StringSplitter uses to
assert is_valid_index(max_break_idx)
assert_is_leaf_string(string)
- _fexpr_slices: Optional[List[Tuple[Index, Index]]] = None
+ _illegal_split_indices = self._get_illegal_split_indices(string)
- def fexpr_slices() -> Iterator[Tuple[Index, Index]]:
- """
- Yields:
- All ranges of @string which, if @string were to be split there,
- would result in the splitting of an f-expression (which is NOT
- allowed).
- """
- nonlocal _fexpr_slices
-
- if _fexpr_slices is None:
- _fexpr_slices = []
- for match in re.finditer(self.RE_FEXPR, string, re.VERBOSE):
- _fexpr_slices.append(match.span())
-
- yield from _fexpr_slices
-
- is_fstring = "f" in get_string_prefix(string)
-
- def breaks_fstring_expression(i: Index) -> bool:
+ def breaks_unsplittable_expression(i: Index) -> bool:
"""
Returns:
True iff returning @i would result in the splitting of an
- f-expression (which is NOT allowed).
+ unsplittable expression (which is NOT allowed).
"""
- if not is_fstring:
- return False
-
- for (start, end) in fexpr_slices():
- if start <= i < end:
- return True
-
- return False
+ return i in _illegal_split_indices
def passes_all_checks(i: Index) -> bool:
"""
is_space
and is_not_escaped
and is_big_enough
- and not breaks_fstring_expression(i)
+ and not breaks_unsplittable_expression(i)
)
# First, we check all indices BELOW @max_break_idx.
return string_op_leaves
-class StringParenWrapper(CustomSplitMapMixin, BaseStringSplitter):
+class StringParenWrapper(BaseStringSplitter, CustomSplitMapMixin):
"""
StringTransformer that splits non-"atom" strings (i.e. strings that do not
exist on lines by themselves).
```
"""
- DEFAULT_TOKEN = -1
+ DEFAULT_TOKEN: Final = 20210605
# String Parser States
- START = 1
- DOT = 2
- NAME = 3
- PERCENT = 4
- SINGLE_FMT_ARG = 5
- LPAR = 6
- RPAR = 7
- DONE = 8
+ START: Final = 1
+ DOT: Final = 2
+ NAME: Final = 3
+ PERCENT: Final = 4
+ SINGLE_FMT_ARG: Final = 5
+ LPAR: Final = 6
+ RPAR: Final = 7
+ DONE: Final = 8
# Lookup Table for Next State
- _goto: Dict[Tuple[ParserState, NodeType], ParserState] = {
+ _goto: Final[Dict[Tuple[ParserState, NodeType], ParserState]] = {
# A string trailer may start with '.' OR '%'.
(START, token.DOT): DOT,
(START, token.PERCENT): PERCENT,