src/black/trans.py

   1 """
   2 String transformers that can split and merge strings.
   3 """
   4 from abc import ABC, abstractmethod
   5 from collections import defaultdict
   6 from dataclasses import dataclass
   7 import re
   8 from typing import (
   9     Any,
  10     Callable,
  11     ClassVar,
  12     Collection,
  13     Dict,
  14     Iterable,
  15     Iterator,
  16     List,
  17     Optional,
  18     Sequence,
  19     Set,
  20     Tuple,
  21     TypeVar,
  22     Union,
  23 )
  24 import sys
  25
  26 if sys.version_info < (3, 8):
  27     from typing_extensions import Literal, Final
  28 else:
  29     from typing import Literal, Final
  30
  31 from mypy_extensions import trait
  32
  33 from black.rusty import Result, Ok, Err
  34
  35 from black.mode import Feature
  36 from black.nodes import syms, replace_child, parent_type
  37 from black.nodes import is_empty_par, is_empty_lpar, is_empty_rpar
  38 from black.nodes import OPENING_BRACKETS, CLOSING_BRACKETS, STANDALONE_COMMENT
  39 from black.lines import Line, append_leaves
  40 from black.brackets import BracketMatchError
  41 from black.comments import contains_pragma_comment
  42 from black.strings import has_triple_quotes, get_string_prefix, assert_is_leaf_string
  43 from black.strings import normalize_string_quotes
  44
  45 from blib2to3.pytree import Leaf, Node
  46 from blib2to3.pgen2 import token
  47
  48
  49 class CannotTransform(Exception):
  50     """Base class for errors raised by Transformers."""
  51
  52
  53 # types
  54 T = TypeVar("T")
  55 LN = Union[Leaf, Node]
  56 Transformer = Callable[[Line, Collection[Feature]], Iterator[Line]]
  57 Index = int
  58 NodeType = int
  59 ParserState = int
  60 StringID = int
  61 TResult = Result[T, CannotTransform]  # (T)ransform Result
  62 TMatchResult = TResult[Index]
  63
  64
  65 def TErr(err_msg: str) -> Err[CannotTransform]:
  66     """(T)ransform Err
  67
  68     Convenience function used when working with the TResult type.
  69     """
  70     cant_transform = CannotTransform(err_msg)
  71     return Err(cant_transform)
  72
  73
  74 def hug_power_op(line: Line, features: Collection[Feature]) -> Iterator[Line]:
  75     """A transformer which normalizes spacing around power operators."""
  76
  77     # Performance optimization to avoid unnecessary Leaf clones and other ops.
  78     for leaf in line.leaves:
  79         if leaf.type == token.DOUBLESTAR:
  80             break
  81     else:
  82         raise CannotTransform("No doublestar token was found in the line.")
  83
  84     def is_simple_lookup(index: int, step: Literal[1, -1]) -> bool:
  85         # Brackets and parentheses indicate calls, subscripts, etc. ...
  86         # basically stuff that doesn't count as "simple". Only a NAME lookup
  87         # or dotted lookup (eg. NAME.NAME) is OK.
  88         if step == -1:
  89             disallowed = {token.RPAR, token.RSQB}
  90         else:
  91             disallowed = {token.LPAR, token.LSQB}
  92
  93         while 0 <= index < len(line.leaves):
  94             current = line.leaves[index]
  95             if current.type in disallowed:
  96                 return False
  97             if current.type not in {token.NAME, token.DOT} or current.value == "for":
  98                 # If the current token isn't disallowed, we'll assume this is simple as
  99                 # only the disallowed tokens are semantically attached to this lookup
 100                 # expression we're checking. Also, stop early if we hit the 'for' bit
 101                 # of a comprehension.
 102                 return True
 103
 104             index += step
 105
 106         return True
 107
 108     def is_simple_operand(index: int, kind: Literal["base", "exponent"]) -> bool:
 109         # An operand is considered "simple" if's a NAME, a numeric CONSTANT, a simple
 110         # lookup (see above), with or without a preceding unary operator.
 111         start = line.leaves[index]
 112         if start.type in {token.NAME, token.NUMBER}:
 113             return is_simple_lookup(index, step=(1 if kind == "exponent" else -1))
 114
 115         if start.type in {token.PLUS, token.MINUS, token.TILDE}:
 116             if line.leaves[index + 1].type in {token.NAME, token.NUMBER}:
 117                 # step is always one as bases with a preceding unary op will be checked
 118                 # for simplicity starting from the next token (so it'll hit the check
 119                 # above).
 120                 return is_simple_lookup(index + 1, step=1)
 121
 122         return False
 123
 124     leaves: List[Leaf] = []
 125     should_hug = False
 126     for idx, leaf in enumerate(line.leaves):
 127         new_leaf = leaf.clone()
 128         if should_hug:
 129             new_leaf.prefix = ""
 130             should_hug = False
 131
 132         should_hug = (
 133             (0 < idx < len(line.leaves) - 1)
 134             and leaf.type == token.DOUBLESTAR
 135             and is_simple_operand(idx - 1, kind="base")
 136             and line.leaves[idx - 1].value != "lambda"
 137             and is_simple_operand(idx + 1, kind="exponent")
 138         )
 139         if should_hug:
 140             new_leaf.prefix = ""
 141
 142         leaves.append(new_leaf)
 143
 144     yield Line(
 145         mode=line.mode,
 146         depth=line.depth,
 147         leaves=leaves,
 148         comments=line.comments,
 149         bracket_tracker=line.bracket_tracker,
 150         inside_brackets=line.inside_brackets,
 151         should_split_rhs=line.should_split_rhs,
 152         magic_trailing_comma=line.magic_trailing_comma,
 153     )
 154
 155
 156 class StringTransformer(ABC):
 157     """
 158     An implementation of the Transformer protocol that relies on its
 159     subclasses overriding the template methods `do_match(...)` and
 160     `do_transform(...)`.
 161
 162     This Transformer works exclusively on strings (for example, by merging
 163     or splitting them).
 164
 165     The following sections can be found among the docstrings of each concrete
 166     StringTransformer subclass.
 167
 168     Requirements:
 169         Which requirements must be met of the given Line for this
 170         StringTransformer to be applied?
 171
 172     Transformations:
 173         If the given Line meets all of the above requirements, which string
 174         transformations can you expect to be applied to it by this
 175         StringTransformer?
 176
 177     Collaborations:
 178         What contractual agreements does this StringTransformer have with other
 179         StringTransfomers? Such collaborations should be eliminated/minimized
 180         as much as possible.
 181     """
 182
 183     __name__: Final = "StringTransformer"
 184
 185     # Ideally this would be a dataclass, but unfortunately mypyc breaks when used with
 186     # `abc.ABC`.
 187     def __init__(self, line_length: int, normalize_strings: bool) -> None:
 188         self.line_length = line_length
 189         self.normalize_strings = normalize_strings
 190
 191     @abstractmethod
 192     def do_match(self, line: Line) -> TMatchResult:
 193         """
 194         Returns:
 195             * Ok(string_idx) such that `line.leaves[string_idx]` is our target
 196             string, if a match was able to be made.
 197                 OR
 198             * Err(CannotTransform), if a match was not able to be made.
 199         """
 200
 201     @abstractmethod
 202     def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]:
 203         """
 204         Yields:
 205             * Ok(new_line) where new_line is the new transformed line.
 206                 OR
 207             * Err(CannotTransform) if the transformation failed for some reason. The
 208             `do_match(...)` template method should usually be used to reject
 209             the form of the given Line, but in some cases it is difficult to
 210             know whether or not a Line meets the StringTransformer's
 211             requirements until the transformation is already midway.
 212
 213         Side Effects:
 214             This method should NOT mutate @line directly, but it MAY mutate the
 215             Line's underlying Node structure. (WARNING: If the underlying Node
 216             structure IS altered, then this method should NOT be allowed to
 217             yield an CannotTransform after that point.)
 218         """
 219
 220     def __call__(self, line: Line, _features: Collection[Feature]) -> Iterator[Line]:
 221         """
 222         StringTransformer instances have a call signature that mirrors that of
 223         the Transformer type.
 224
 225         Raises:
 226             CannotTransform(...) if the concrete StringTransformer class is unable
 227             to transform @line.
 228         """
 229         # Optimization to avoid calling `self.do_match(...)` when the line does
 230         # not contain any string.
 231         if not any(leaf.type == token.STRING for leaf in line.leaves):
 232             raise CannotTransform("There are no strings in this line.")
 233
 234         match_result = self.do_match(line)
 235
 236         if isinstance(match_result, Err):
 237             cant_transform = match_result.err()
 238             raise CannotTransform(
 239                 f"The string transformer {self.__class__.__name__} does not recognize"
 240                 " this line as one that it can transform."
 241             ) from cant_transform
 242
 243         string_idx = match_result.ok()
 244
 245         for line_result in self.do_transform(line, string_idx):
 246             if isinstance(line_result, Err):
 247                 cant_transform = line_result.err()
 248                 raise CannotTransform(
 249                     "StringTransformer failed while attempting to transform string."
 250                 ) from cant_transform
 251             line = line_result.ok()
 252             yield line
 253
 254
 255 @dataclass
 256 class CustomSplit:
 257     """A custom (i.e. manual) string split.
 258
 259     A single CustomSplit instance represents a single substring.
 260
 261     Examples:
 262         Consider the following string:
 263         ```
 264         "Hi there friend."
 265         " This is a custom"
 266         f" string {split}."
 267         ```
 268
 269         This string will correspond to the following three CustomSplit instances:
 270         ```
 271         CustomSplit(False, 16)
 272         CustomSplit(False, 17)
 273         CustomSplit(True, 16)
 274         ```
 275     """
 276
 277     has_prefix: bool
 278     break_idx: int
 279
 280
 281 @trait
 282 class CustomSplitMapMixin:
 283     """
 284     This mixin class is used to map merged strings to a sequence of
 285     CustomSplits, which will then be used to re-split the strings iff none of
 286     the resultant substrings go over the configured max line length.
 287     """
 288
 289     _Key: ClassVar = Tuple[StringID, str]
 290     _CUSTOM_SPLIT_MAP: ClassVar[Dict[_Key, Tuple[CustomSplit, ...]]] = defaultdict(
 291         tuple
 292     )
 293
 294     @staticmethod
 295     def _get_key(string: str) -> "CustomSplitMapMixin._Key":
 296         """
 297         Returns:
 298             A unique identifier that is used internally to map @string to a
 299             group of custom splits.
 300         """
 301         return (id(string), string)
 302
 303     def add_custom_splits(
 304         self, string: str, custom_splits: Iterable[CustomSplit]
 305     ) -> None:
 306         """Custom Split Map Setter Method
 307
 308         Side Effects:
 309             Adds a mapping from @string to the custom splits @custom_splits.
 310         """
 311         key = self._get_key(string)
 312         self._CUSTOM_SPLIT_MAP[key] = tuple(custom_splits)
 313
 314     def pop_custom_splits(self, string: str) -> List[CustomSplit]:
 315         """Custom Split Map Getter Method
 316
 317         Returns:
 318             * A list of the custom splits that are mapped to @string, if any
 319             exist.
 320                 OR
 321             * [], otherwise.
 322
 323         Side Effects:
 324             Deletes the mapping between @string and its associated custom
 325             splits (which are returned to the caller).
 326         """
 327         key = self._get_key(string)
 328
 329         custom_splits = self._CUSTOM_SPLIT_MAP[key]
 330         del self._CUSTOM_SPLIT_MAP[key]
 331
 332         return list(custom_splits)
 333
 334     def has_custom_splits(self, string: str) -> bool:
 335         """
 336         Returns:
 337             True iff @string is associated with a set of custom splits.
 338         """
 339         key = self._get_key(string)
 340         return key in self._CUSTOM_SPLIT_MAP
 341
 342
 343 class StringMerger(StringTransformer, CustomSplitMapMixin):
 344     """StringTransformer that merges strings together.
 345
 346     Requirements:
 347         (A) The line contains adjacent strings such that ALL of the validation checks
 348         listed in StringMerger.__validate_msg(...)'s docstring pass.
 349             OR
 350         (B) The line contains a string which uses line continuation backslashes.
 351
 352     Transformations:
 353         Depending on which of the two requirements above where met, either:
 354
 355         (A) The string group associated with the target string is merged.
 356             OR
 357         (B) All line-continuation backslashes are removed from the target string.
 358
 359     Collaborations:
 360         StringMerger provides custom split information to StringSplitter.
 361     """
 362
 363     def do_match(self, line: Line) -> TMatchResult:
 364         LL = line.leaves
 365
 366         is_valid_index = is_valid_index_factory(LL)
 367
 368         for (i, leaf) in enumerate(LL):
 369             if (
 370                 leaf.type == token.STRING
 371                 and is_valid_index(i + 1)
 372                 and LL[i + 1].type == token.STRING
 373             ):
 374                 return Ok(i)
 375
 376             if leaf.type == token.STRING and "\\\n" in leaf.value:
 377                 return Ok(i)
 378
 379         return TErr("This line has no strings that need merging.")
 380
 381     def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]:
 382         new_line = line
 383         rblc_result = self._remove_backslash_line_continuation_chars(
 384             new_line, string_idx
 385         )
 386         if isinstance(rblc_result, Ok):
 387             new_line = rblc_result.ok()
 388
 389         msg_result = self._merge_string_group(new_line, string_idx)
 390         if isinstance(msg_result, Ok):
 391             new_line = msg_result.ok()
 392
 393         if isinstance(rblc_result, Err) and isinstance(msg_result, Err):
 394             msg_cant_transform = msg_result.err()
 395             rblc_cant_transform = rblc_result.err()
 396             cant_transform = CannotTransform(
 397                 "StringMerger failed to merge any strings in this line."
 398             )
 399
 400             # Chain the errors together using `__cause__`.
 401             msg_cant_transform.__cause__ = rblc_cant_transform
 402             cant_transform.__cause__ = msg_cant_transform
 403
 404             yield Err(cant_transform)
 405         else:
 406             yield Ok(new_line)
 407
 408     @staticmethod
 409     def _remove_backslash_line_continuation_chars(
 410         line: Line, string_idx: int
 411     ) -> TResult[Line]:
 412         """
 413         Merge strings that were split across multiple lines using
 414         line-continuation backslashes.
 415
 416         Returns:
 417             Ok(new_line), if @line contains backslash line-continuation
 418             characters.
 419                 OR
 420             Err(CannotTransform), otherwise.
 421         """
 422         LL = line.leaves
 423
 424         string_leaf = LL[string_idx]
 425         if not (
 426             string_leaf.type == token.STRING
 427             and "\\\n" in string_leaf.value
 428             and not has_triple_quotes(string_leaf.value)
 429         ):
 430             return TErr(
 431                 f"String leaf {string_leaf} does not contain any backslash line"
 432                 " continuation characters."
 433             )
 434
 435         new_line = line.clone()
 436         new_line.comments = line.comments.copy()
 437         append_leaves(new_line, line, LL)
 438
 439         new_string_leaf = new_line.leaves[string_idx]
 440         new_string_leaf.value = new_string_leaf.value.replace("\\\n", "")
 441
 442         return Ok(new_line)
 443
 444     def _merge_string_group(self, line: Line, string_idx: int) -> TResult[Line]:
 445         """
 446         Merges string group (i.e. set of adjacent strings) where the first
 447         string in the group is `line.leaves[string_idx]`.
 448
 449         Returns:
 450             Ok(new_line), if ALL of the validation checks found in
 451             __validate_msg(...) pass.
 452                 OR
 453             Err(CannotTransform), otherwise.
 454         """
 455         LL = line.leaves
 456
 457         is_valid_index = is_valid_index_factory(LL)
 458
 459         vresult = self._validate_msg(line, string_idx)
 460         if isinstance(vresult, Err):
 461             return vresult
 462
 463         # If the string group is wrapped inside an Atom node, we must make sure
 464         # to later replace that Atom with our new (merged) string leaf.
 465         atom_node = LL[string_idx].parent
 466
 467         # We will place BREAK_MARK in between every two substrings that we
 468         # merge. We will then later go through our final result and use the
 469         # various instances of BREAK_MARK we find to add the right values to
 470         # the custom split map.
 471         BREAK_MARK = "@@@@@ BLACK BREAKPOINT MARKER @@@@@"
 472
 473         QUOTE = LL[string_idx].value[-1]
 474
 475         def make_naked(string: str, string_prefix: str) -> str:
 476             """Strip @string (i.e. make it a "naked" string)
 477
 478             Pre-conditions:
 479                 * assert_is_leaf_string(@string)
 480
 481             Returns:
 482                 A string that is identical to @string except that
 483                 @string_prefix has been stripped, the surrounding QUOTE
 484                 characters have been removed, and any remaining QUOTE
 485                 characters have been escaped.
 486             """
 487             assert_is_leaf_string(string)
 488
 489             RE_EVEN_BACKSLASHES = r"(?:(?<!\\)(?:\\\\)*)"
 490             naked_string = string[len(string_prefix) + 1 : -1]
 491             naked_string = re.sub(
 492                 "(" + RE_EVEN_BACKSLASHES + ")" + QUOTE, r"\1\\" + QUOTE, naked_string
 493             )
 494             return naked_string
 495
 496         # Holds the CustomSplit objects that will later be added to the custom
 497         # split map.
 498         custom_splits = []
 499
 500         # Temporary storage for the 'has_prefix' part of the CustomSplit objects.
 501         prefix_tracker = []
 502
 503         # Sets the 'prefix' variable. This is the prefix that the final merged
 504         # string will have.
 505         next_str_idx = string_idx
 506         prefix = ""
 507         while (
 508             not prefix
 509             and is_valid_index(next_str_idx)
 510             and LL[next_str_idx].type == token.STRING
 511         ):
 512             prefix = get_string_prefix(LL[next_str_idx].value).lower()
 513             next_str_idx += 1
 514
 515         # The next loop merges the string group. The final string will be
 516         # contained in 'S'.
 517         #
 518         # The following convenience variables are used:
 519         #
 520         #   S: string
 521         #   NS: naked string
 522         #   SS: next string
 523         #   NSS: naked next string
 524         S = ""
 525         NS = ""
 526         num_of_strings = 0
 527         next_str_idx = string_idx
 528         while is_valid_index(next_str_idx) and LL[next_str_idx].type == token.STRING:
 529             num_of_strings += 1
 530
 531             SS = LL[next_str_idx].value
 532             next_prefix = get_string_prefix(SS).lower()
 533
 534             # If this is an f-string group but this substring is not prefixed
 535             # with 'f'...
 536             if "f" in prefix and "f" not in next_prefix:
 537                 # Then we must escape any braces contained in this substring.
 538                 SS = re.sub(r"(\{|\})", r"\1\1", SS)
 539
 540             NSS = make_naked(SS, next_prefix)
 541
 542             has_prefix = bool(next_prefix)
 543             prefix_tracker.append(has_prefix)
 544
 545             S = prefix + QUOTE + NS + NSS + BREAK_MARK + QUOTE
 546             NS = make_naked(S, prefix)
 547
 548             next_str_idx += 1
 549
 550         S_leaf = Leaf(token.STRING, S)
 551         if self.normalize_strings:
 552             S_leaf.value = normalize_string_quotes(S_leaf.value)
 553
 554         # Fill the 'custom_splits' list with the appropriate CustomSplit objects.
 555         temp_string = S_leaf.value[len(prefix) + 1 : -1]
 556         for has_prefix in prefix_tracker:
 557             mark_idx = temp_string.find(BREAK_MARK)
 558             assert (
 559                 mark_idx >= 0
 560             ), "Logic error while filling the custom string breakpoint cache."
 561
 562             temp_string = temp_string[mark_idx + len(BREAK_MARK) :]
 563             breakpoint_idx = mark_idx + (len(prefix) if has_prefix else 0) + 1
 564             custom_splits.append(CustomSplit(has_prefix, breakpoint_idx))
 565
 566         string_leaf = Leaf(token.STRING, S_leaf.value.replace(BREAK_MARK, ""))
 567
 568         if atom_node is not None:
 569             replace_child(atom_node, string_leaf)
 570
 571         # Build the final line ('new_line') that this method will later return.
 572         new_line = line.clone()
 573         for (i, leaf) in enumerate(LL):
 574             if i == string_idx:
 575                 new_line.append(string_leaf)
 576
 577             if string_idx <= i < string_idx + num_of_strings:
 578                 for comment_leaf in line.comments_after(LL[i]):
 579                     new_line.append(comment_leaf, preformatted=True)
 580                 continue
 581
 582             append_leaves(new_line, line, [leaf])
 583
 584         self.add_custom_splits(string_leaf.value, custom_splits)
 585         return Ok(new_line)
 586
 587     @staticmethod
 588     def _validate_msg(line: Line, string_idx: int) -> TResult[None]:
 589         """Validate (M)erge (S)tring (G)roup
 590
 591         Transform-time string validation logic for __merge_string_group(...).
 592
 593         Returns:
 594             * Ok(None), if ALL validation checks (listed below) pass.
 595                 OR
 596             * Err(CannotTransform), if any of the following are true:
 597                 - The target string group does not contain ANY stand-alone comments.
 598                 - The target string is not in a string group (i.e. it has no
 599                   adjacent strings).
 600                 - The string group has more than one inline comment.
 601                 - The string group has an inline comment that appears to be a pragma.
 602                 - The set of all string prefixes in the string group is of
 603                   length greater than one and is not equal to {"", "f"}.
 604                 - The string group consists of raw strings.
 605         """
 606         # We first check for "inner" stand-alone comments (i.e. stand-alone
 607         # comments that have a string leaf before them AND after them).
 608         for inc in [1, -1]:
 609             i = string_idx
 610             found_sa_comment = False
 611             is_valid_index = is_valid_index_factory(line.leaves)
 612             while is_valid_index(i) and line.leaves[i].type in [
 613                 token.STRING,
 614                 STANDALONE_COMMENT,
 615             ]:
 616                 if line.leaves[i].type == STANDALONE_COMMENT:
 617                     found_sa_comment = True
 618                 elif found_sa_comment:
 619                     return TErr(
 620                         "StringMerger does NOT merge string groups which contain "
 621                         "stand-alone comments."
 622                     )
 623
 624                 i += inc
 625
 626         num_of_inline_string_comments = 0
 627         set_of_prefixes = set()
 628         num_of_strings = 0
 629         for leaf in line.leaves[string_idx:]:
 630             if leaf.type != token.STRING:
 631                 # If the string group is trailed by a comma, we count the
 632                 # comments trailing the comma to be one of the string group's
 633                 # comments.
 634                 if leaf.type == token.COMMA and id(leaf) in line.comments:
 635                     num_of_inline_string_comments += 1
 636                 break
 637
 638             if has_triple_quotes(leaf.value):
 639                 return TErr("StringMerger does NOT merge multiline strings.")
 640
 641             num_of_strings += 1
 642             prefix = get_string_prefix(leaf.value).lower()
 643             if "r" in prefix:
 644                 return TErr("StringMerger does NOT merge raw strings.")
 645
 646             set_of_prefixes.add(prefix)
 647
 648             if id(leaf) in line.comments:
 649                 num_of_inline_string_comments += 1
 650                 if contains_pragma_comment(line.comments[id(leaf)]):
 651                     return TErr("Cannot merge strings which have pragma comments.")
 652
 653         if num_of_strings < 2:
 654             return TErr(
 655                 f"Not enough strings to merge (num_of_strings={num_of_strings})."
 656             )
 657
 658         if num_of_inline_string_comments > 1:
 659             return TErr(
 660                 f"Too many inline string comments ({num_of_inline_string_comments})."
 661             )
 662
 663         if len(set_of_prefixes) > 1 and set_of_prefixes != {"", "f"}:
 664             return TErr(f"Too many different prefixes ({set_of_prefixes}).")
 665
 666         return Ok(None)
 667
 668
 669 class StringParenStripper(StringTransformer):
 670     """StringTransformer that strips surrounding parentheses from strings.
 671
 672     Requirements:
 673         The line contains a string which is surrounded by parentheses and:
 674             - The target string is NOT the only argument to a function call.
 675             - The target string is NOT a "pointless" string.
 676             - If the target string contains a PERCENT, the brackets are not
 677               preceded or followed by an operator with higher precedence than
 678               PERCENT.
 679
 680     Transformations:
 681         The parentheses mentioned in the 'Requirements' section are stripped.
 682
 683     Collaborations:
 684         StringParenStripper has its own inherent usefulness, but it is also
 685         relied on to clean up the parentheses created by StringParenWrapper (in
 686         the event that they are no longer needed).
 687     """
 688
 689     def do_match(self, line: Line) -> TMatchResult:
 690         LL = line.leaves
 691
 692         is_valid_index = is_valid_index_factory(LL)
 693
 694         for (idx, leaf) in enumerate(LL):
 695             # Should be a string...
 696             if leaf.type != token.STRING:
 697                 continue
 698
 699             # If this is a "pointless" string...
 700             if (
 701                 leaf.parent
 702                 and leaf.parent.parent
 703                 and leaf.parent.parent.type == syms.simple_stmt
 704             ):
 705                 continue
 706
 707             # Should be preceded by a non-empty LPAR...
 708             if (
 709                 not is_valid_index(idx - 1)
 710                 or LL[idx - 1].type != token.LPAR
 711                 or is_empty_lpar(LL[idx - 1])
 712             ):
 713                 continue
 714
 715             # That LPAR should NOT be preceded by a function name or a closing
 716             # bracket (which could be a function which returns a function or a
 717             # list/dictionary that contains a function)...
 718             if is_valid_index(idx - 2) and (
 719                 LL[idx - 2].type == token.NAME or LL[idx - 2].type in CLOSING_BRACKETS
 720             ):
 721                 continue
 722
 723             string_idx = idx
 724
 725             # Skip the string trailer, if one exists.
 726             string_parser = StringParser()
 727             next_idx = string_parser.parse(LL, string_idx)
 728
 729             # if the leaves in the parsed string include a PERCENT, we need to
 730             # make sure the initial LPAR is NOT preceded by an operator with
 731             # higher or equal precedence to PERCENT
 732             if is_valid_index(idx - 2):
 733                 # mypy can't quite follow unless we name this
 734                 before_lpar = LL[idx - 2]
 735                 if token.PERCENT in {leaf.type for leaf in LL[idx - 1 : next_idx]} and (
 736                     (
 737                         before_lpar.type
 738                         in {
 739                             token.STAR,
 740                             token.AT,
 741                             token.SLASH,
 742                             token.DOUBLESLASH,
 743                             token.PERCENT,
 744                             token.TILDE,
 745                             token.DOUBLESTAR,
 746                             token.AWAIT,
 747                             token.LSQB,
 748                             token.LPAR,
 749                         }
 750                     )
 751                     or (
 752                         # only unary PLUS/MINUS
 753                         before_lpar.parent
 754                         and before_lpar.parent.type == syms.factor
 755                         and (before_lpar.type in {token.PLUS, token.MINUS})
 756                     )
 757                 ):
 758                     continue
 759
 760             # Should be followed by a non-empty RPAR...
 761             if (
 762                 is_valid_index(next_idx)
 763                 and LL[next_idx].type == token.RPAR
 764                 and not is_empty_rpar(LL[next_idx])
 765             ):
 766                 # That RPAR should NOT be followed by anything with higher
 767                 # precedence than PERCENT
 768                 if is_valid_index(next_idx + 1) and LL[next_idx + 1].type in {
 769                     token.DOUBLESTAR,
 770                     token.LSQB,
 771                     token.LPAR,
 772                     token.DOT,
 773                 }:
 774                     continue
 775
 776                 return Ok(string_idx)
 777
 778         return TErr("This line has no strings wrapped in parens.")
 779
 780     def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]:
 781         LL = line.leaves
 782
 783         string_parser = StringParser()
 784         rpar_idx = string_parser.parse(LL, string_idx)
 785
 786         for leaf in (LL[string_idx - 1], LL[rpar_idx]):
 787             if line.comments_after(leaf):
 788                 yield TErr(
 789                     "Will not strip parentheses which have comments attached to them."
 790                 )
 791                 return
 792
 793         new_line = line.clone()
 794         new_line.comments = line.comments.copy()
 795         try:
 796             append_leaves(new_line, line, LL[: string_idx - 1])
 797         except BracketMatchError:
 798             # HACK: I believe there is currently a bug somewhere in
 799             # right_hand_split() that is causing brackets to not be tracked
 800             # properly by a shared BracketTracker.
 801             append_leaves(new_line, line, LL[: string_idx - 1], preformatted=True)
 802
 803         string_leaf = Leaf(token.STRING, LL[string_idx].value)
 804         LL[string_idx - 1].remove()
 805         replace_child(LL[string_idx], string_leaf)
 806         new_line.append(string_leaf)
 807
 808         append_leaves(
 809             new_line, line, LL[string_idx + 1 : rpar_idx] + LL[rpar_idx + 1 :]
 810         )
 811
 812         LL[rpar_idx].remove()
 813
 814         yield Ok(new_line)
 815
 816
 817 class BaseStringSplitter(StringTransformer):
 818     """
 819     Abstract class for StringTransformers which transform a Line's strings by splitting
 820     them or placing them on their own lines where necessary to avoid going over
 821     the configured line length.
 822
 823     Requirements:
 824         * The target string value is responsible for the line going over the
 825         line length limit. It follows that after all of black's other line
 826         split methods have been exhausted, this line (or one of the resulting
 827         lines after all line splits are performed) would still be over the
 828         line_length limit unless we split this string.
 829             AND
 830         * The target string is NOT a "pointless" string (i.e. a string that has
 831         no parent or siblings).
 832             AND
 833         * The target string is not followed by an inline comment that appears
 834         to be a pragma.
 835             AND
 836         * The target string is not a multiline (i.e. triple-quote) string.
 837     """
 838
 839     STRING_OPERATORS: Final = [
 840         token.EQEQUAL,
 841         token.GREATER,
 842         token.GREATEREQUAL,
 843         token.LESS,
 844         token.LESSEQUAL,
 845         token.NOTEQUAL,
 846         token.PERCENT,
 847         token.PLUS,
 848         token.STAR,
 849     ]
 850
 851     @abstractmethod
 852     def do_splitter_match(self, line: Line) -> TMatchResult:
 853         """
 854         BaseStringSplitter asks its clients to override this method instead of
 855         `StringTransformer.do_match(...)`.
 856
 857         Follows the same protocol as `StringTransformer.do_match(...)`.
 858
 859         Refer to `help(StringTransformer.do_match)` for more information.
 860         """
 861
 862     def do_match(self, line: Line) -> TMatchResult:
 863         match_result = self.do_splitter_match(line)
 864         if isinstance(match_result, Err):
 865             return match_result
 866
 867         string_idx = match_result.ok()
 868         vresult = self._validate(line, string_idx)
 869         if isinstance(vresult, Err):
 870             return vresult
 871
 872         return match_result
 873
 874     def _validate(self, line: Line, string_idx: int) -> TResult[None]:
 875         """
 876         Checks that @line meets all of the requirements listed in this classes'
 877         docstring. Refer to `help(BaseStringSplitter)` for a detailed
 878         description of those requirements.
 879
 880         Returns:
 881             * Ok(None), if ALL of the requirements are met.
 882                 OR
 883             * Err(CannotTransform), if ANY of the requirements are NOT met.
 884         """
 885         LL = line.leaves
 886
 887         string_leaf = LL[string_idx]
 888
 889         max_string_length = self._get_max_string_length(line, string_idx)
 890         if len(string_leaf.value) <= max_string_length:
 891             return TErr(
 892                 "The string itself is not what is causing this line to be too long."
 893             )
 894
 895         if not string_leaf.parent or [L.type for L in string_leaf.parent.children] == [
 896             token.STRING,
 897             token.NEWLINE,
 898         ]:
 899             return TErr(
 900                 f"This string ({string_leaf.value}) appears to be pointless (i.e. has"
 901                 " no parent)."
 902             )
 903
 904         if id(line.leaves[string_idx]) in line.comments and contains_pragma_comment(
 905             line.comments[id(line.leaves[string_idx])]
 906         ):
 907             return TErr(
 908                 "Line appears to end with an inline pragma comment. Splitting the line"
 909                 " could modify the pragma's behavior."
 910             )
 911
 912         if has_triple_quotes(string_leaf.value):
 913             return TErr("We cannot split multiline strings.")
 914
 915         return Ok(None)
 916
 917     def _get_max_string_length(self, line: Line, string_idx: int) -> int:
 918         """
 919         Calculates the max string length used when attempting to determine
 920         whether or not the target string is responsible for causing the line to
 921         go over the line length limit.
 922
 923         WARNING: This method is tightly coupled to both StringSplitter and
 924         (especially) StringParenWrapper. There is probably a better way to
 925         accomplish what is being done here.
 926
 927         Returns:
 928             max_string_length: such that `line.leaves[string_idx].value >
 929             max_string_length` implies that the target string IS responsible
 930             for causing this line to exceed the line length limit.
 931         """
 932         LL = line.leaves
 933
 934         is_valid_index = is_valid_index_factory(LL)
 935
 936         # We use the shorthand "WMA4" in comments to abbreviate "We must
 937         # account for". When giving examples, we use STRING to mean some/any
 938         # valid string.
 939         #
 940         # Finally, we use the following convenience variables:
 941         #
 942         #   P:  The leaf that is before the target string leaf.
 943         #   N:  The leaf that is after the target string leaf.
 944         #   NN: The leaf that is after N.
 945
 946         # WMA4 the whitespace at the beginning of the line.
 947         offset = line.depth * 4
 948
 949         if is_valid_index(string_idx - 1):
 950             p_idx = string_idx - 1
 951             if (
 952                 LL[string_idx - 1].type == token.LPAR
 953                 and LL[string_idx - 1].value == ""
 954                 and string_idx >= 2
 955             ):
 956                 # If the previous leaf is an empty LPAR placeholder, we should skip it.
 957                 p_idx -= 1
 958
 959             P = LL[p_idx]
 960             if P.type in self.STRING_OPERATORS:
 961                 # WMA4 a space and a string operator (e.g. `+ STRING` or `== STRING`).
 962                 offset += len(str(P)) + 1
 963
 964             if P.type == token.COMMA:
 965                 # WMA4 a space, a comma, and a closing bracket [e.g. `), STRING`].
 966                 offset += 3
 967
 968             if P.type in [token.COLON, token.EQUAL, token.PLUSEQUAL, token.NAME]:
 969                 # This conditional branch is meant to handle dictionary keys,
 970                 # variable assignments, 'return STRING' statement lines, and
 971                 # 'else STRING' ternary expression lines.
 972
 973                 # WMA4 a single space.
 974                 offset += 1
 975
 976                 # WMA4 the lengths of any leaves that came before that space,
 977                 # but after any closing bracket before that space.
 978                 for leaf in reversed(LL[: p_idx + 1]):
 979                     offset += len(str(leaf))
 980                     if leaf.type in CLOSING_BRACKETS:
 981                         break
 982
 983         if is_valid_index(string_idx + 1):
 984             N = LL[string_idx + 1]
 985             if N.type == token.RPAR and N.value == "" and len(LL) > string_idx + 2:
 986                 # If the next leaf is an empty RPAR placeholder, we should skip it.
 987                 N = LL[string_idx + 2]
 988
 989             if N.type == token.COMMA:
 990                 # WMA4 a single comma at the end of the string (e.g `STRING,`).
 991                 offset += 1
 992
 993             if is_valid_index(string_idx + 2):
 994                 NN = LL[string_idx + 2]
 995
 996                 if N.type == token.DOT and NN.type == token.NAME:
 997                     # This conditional branch is meant to handle method calls invoked
 998                     # off of a string literal up to and including the LPAR character.
 999
1000                     # WMA4 the '.' character.
1001                     offset += 1
1002
1003                     if (
1004                         is_valid_index(string_idx + 3)
1005                         and LL[string_idx + 3].type == token.LPAR
1006                     ):
1007                         # WMA4 the left parenthesis character.
1008                         offset += 1
1009
1010                     # WMA4 the length of the method's name.
1011                     offset += len(NN.value)
1012
1013         has_comments = False
1014         for comment_leaf in line.comments_after(LL[string_idx]):
1015             if not has_comments:
1016                 has_comments = True
1017                 # WMA4 two spaces before the '#' character.
1018                 offset += 2
1019
1020             # WMA4 the length of the inline comment.
1021             offset += len(comment_leaf.value)
1022
1023         max_string_length = self.line_length - offset
1024         return max_string_length
1025
1026
1027 def iter_fexpr_spans(s: str) -> Iterator[Tuple[int, int]]:
1028     """
1029     Yields spans corresponding to expressions in a given f-string.
1030     Spans are half-open ranges (left inclusive, right exclusive).
1031     Assumes the input string is a valid f-string, but will not crash if the input
1032     string is invalid.
1033     """
1034     stack: List[int] = []  # our curly paren stack
1035     i = 0
1036     while i < len(s):
1037         if s[i] == "{":
1038             # if we're in a string part of the f-string, ignore escaped curly braces
1039             if not stack and i + 1 < len(s) and s[i + 1] == "{":
1040                 i += 2
1041                 continue
1042             stack.append(i)
1043             i += 1
1044             continue
1045
1046         if s[i] == "}":
1047             if not stack:
1048                 i += 1
1049                 continue
1050             j = stack.pop()
1051             # we've made it back out of the expression! yield the span
1052             if not stack:
1053                 yield (j, i + 1)
1054             i += 1
1055             continue
1056
1057         # if we're in an expression part of the f-string, fast forward through strings
1058         # note that backslashes are not legal in the expression portion of f-strings
1059         if stack:
1060             delim = None
1061             if s[i : i + 3] in ("'''", '"""'):
1062                 delim = s[i : i + 3]
1063             elif s[i] in ("'", '"'):
1064                 delim = s[i]
1065             if delim:
1066                 i += len(delim)
1067                 while i < len(s) and s[i : i + len(delim)] != delim:
1068                     i += 1
1069                 i += len(delim)
1070                 continue
1071         i += 1
1072
1073
1074 def fstring_contains_expr(s: str) -> bool:
1075     return any(iter_fexpr_spans(s))
1076
1077
1078 class StringSplitter(BaseStringSplitter, CustomSplitMapMixin):
1079     """
1080     StringTransformer that splits "atom" strings (i.e. strings which exist on
1081     lines by themselves).
1082
1083     Requirements:
1084         * The line consists ONLY of a single string (possibly prefixed by a
1085         string operator [e.g. '+' or '==']), MAYBE a string trailer, and MAYBE
1086         a trailing comma.
1087             AND
1088         * All of the requirements listed in BaseStringSplitter's docstring.
1089
1090     Transformations:
1091         The string mentioned in the 'Requirements' section is split into as
1092         many substrings as necessary to adhere to the configured line length.
1093
1094         In the final set of substrings, no substring should be smaller than
1095         MIN_SUBSTR_SIZE characters.
1096
1097         The string will ONLY be split on spaces (i.e. each new substring should
1098         start with a space). Note that the string will NOT be split on a space
1099         which is escaped with a backslash.
1100
1101         If the string is an f-string, it will NOT be split in the middle of an
1102         f-expression (e.g. in f"FooBar: {foo() if x else bar()}", {foo() if x
1103         else bar()} is an f-expression).
1104
1105         If the string that is being split has an associated set of custom split
1106         records and those custom splits will NOT result in any line going over
1107         the configured line length, those custom splits are used. Otherwise the
1108         string is split as late as possible (from left-to-right) while still
1109         adhering to the transformation rules listed above.
1110
1111     Collaborations:
1112         StringSplitter relies on StringMerger to construct the appropriate
1113         CustomSplit objects and add them to the custom split map.
1114     """
1115
1116     MIN_SUBSTR_SIZE: Final = 6
1117
1118     def do_splitter_match(self, line: Line) -> TMatchResult:
1119         LL = line.leaves
1120
1121         is_valid_index = is_valid_index_factory(LL)
1122
1123         idx = 0
1124
1125         # The first two leaves MAY be the 'not in' keywords...
1126         if (
1127             is_valid_index(idx)
1128             and is_valid_index(idx + 1)
1129             and [LL[idx].type, LL[idx + 1].type] == [token.NAME, token.NAME]
1130             and str(LL[idx]) + str(LL[idx + 1]) == "not in"
1131         ):
1132             idx += 2
1133         # Else the first leaf MAY be a string operator symbol or the 'in' keyword...
1134         elif is_valid_index(idx) and (
1135             LL[idx].type in self.STRING_OPERATORS
1136             or LL[idx].type == token.NAME
1137             and str(LL[idx]) == "in"
1138         ):
1139             idx += 1
1140
1141         # The next/first leaf MAY be an empty LPAR...
1142         if is_valid_index(idx) and is_empty_lpar(LL[idx]):
1143             idx += 1
1144
1145         # The next/first leaf MUST be a string...
1146         if not is_valid_index(idx) or LL[idx].type != token.STRING:
1147             return TErr("Line does not start with a string.")
1148
1149         string_idx = idx
1150
1151         # Skip the string trailer, if one exists.
1152         string_parser = StringParser()
1153         idx = string_parser.parse(LL, string_idx)
1154
1155         # That string MAY be followed by an empty RPAR...
1156         if is_valid_index(idx) and is_empty_rpar(LL[idx]):
1157             idx += 1
1158
1159         # That string / empty RPAR leaf MAY be followed by a comma...
1160         if is_valid_index(idx) and LL[idx].type == token.COMMA:
1161             idx += 1
1162
1163         # But no more leaves are allowed...
1164         if is_valid_index(idx):
1165             return TErr("This line does not end with a string.")
1166
1167         return Ok(string_idx)
1168
1169     def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]:
1170         LL = line.leaves
1171
1172         QUOTE = LL[string_idx].value[-1]
1173
1174         is_valid_index = is_valid_index_factory(LL)
1175         insert_str_child = insert_str_child_factory(LL[string_idx])
1176
1177         prefix = get_string_prefix(LL[string_idx].value).lower()
1178
1179         # We MAY choose to drop the 'f' prefix from substrings that don't
1180         # contain any f-expressions, but ONLY if the original f-string
1181         # contains at least one f-expression. Otherwise, we will alter the AST
1182         # of the program.
1183         drop_pointless_f_prefix = ("f" in prefix) and fstring_contains_expr(
1184             LL[string_idx].value
1185         )
1186
1187         first_string_line = True
1188
1189         string_op_leaves = self._get_string_operator_leaves(LL)
1190         string_op_leaves_length = (
1191             sum([len(str(prefix_leaf)) for prefix_leaf in string_op_leaves]) + 1
1192             if string_op_leaves
1193             else 0
1194         )
1195
1196         def maybe_append_string_operators(new_line: Line) -> None:
1197             """
1198             Side Effects:
1199                 If @line starts with a string operator and this is the first
1200                 line we are constructing, this function appends the string
1201                 operator to @new_line and replaces the old string operator leaf
1202                 in the node structure. Otherwise this function does nothing.
1203             """
1204             maybe_prefix_leaves = string_op_leaves if first_string_line else []
1205             for i, prefix_leaf in enumerate(maybe_prefix_leaves):
1206                 replace_child(LL[i], prefix_leaf)
1207                 new_line.append(prefix_leaf)
1208
1209         ends_with_comma = (
1210             is_valid_index(string_idx + 1) and LL[string_idx + 1].type == token.COMMA
1211         )
1212
1213         def max_last_string() -> int:
1214             """
1215             Returns:
1216                 The max allowed length of the string value used for the last
1217                 line we will construct.
1218             """
1219             result = self.line_length
1220             result -= line.depth * 4
1221             result -= 1 if ends_with_comma else 0
1222             result -= string_op_leaves_length
1223             return result
1224
1225         # --- Calculate Max Break Index (for string value)
1226         # We start with the line length limit
1227         max_break_idx = self.line_length
1228         # The last index of a string of length N is N-1.
1229         max_break_idx -= 1
1230         # Leading whitespace is not present in the string value (e.g. Leaf.value).
1231         max_break_idx -= line.depth * 4
1232         if max_break_idx < 0:
1233             yield TErr(
1234                 f"Unable to split {LL[string_idx].value} at such high of a line depth:"
1235                 f" {line.depth}"
1236             )
1237             return
1238
1239         # Check if StringMerger registered any custom splits.
1240         custom_splits = self.pop_custom_splits(LL[string_idx].value)
1241         # We use them ONLY if none of them would produce lines that exceed the
1242         # line limit.
1243         use_custom_breakpoints = bool(
1244             custom_splits
1245             and all(csplit.break_idx <= max_break_idx for csplit in custom_splits)
1246         )
1247
1248         # Temporary storage for the remaining chunk of the string line that
1249         # can't fit onto the line currently being constructed.
1250         rest_value = LL[string_idx].value
1251
1252         def more_splits_should_be_made() -> bool:
1253             """
1254             Returns:
1255                 True iff `rest_value` (the remaining string value from the last
1256                 split), should be split again.
1257             """
1258             if use_custom_breakpoints:
1259                 return len(custom_splits) > 1
1260             else:
1261                 return len(rest_value) > max_last_string()
1262
1263         string_line_results: List[Ok[Line]] = []
1264         while more_splits_should_be_made():
1265             if use_custom_breakpoints:
1266                 # Custom User Split (manual)
1267                 csplit = custom_splits.pop(0)
1268                 break_idx = csplit.break_idx
1269             else:
1270                 # Algorithmic Split (automatic)
1271                 max_bidx = max_break_idx - string_op_leaves_length
1272                 maybe_break_idx = self._get_break_idx(rest_value, max_bidx)
1273                 if maybe_break_idx is None:
1274                     # If we are unable to algorithmically determine a good split
1275                     # and this string has custom splits registered to it, we
1276                     # fall back to using them--which means we have to start
1277                     # over from the beginning.
1278                     if custom_splits:
1279                         rest_value = LL[string_idx].value
1280                         string_line_results = []
1281                         first_string_line = True
1282                         use_custom_breakpoints = True
1283                         continue
1284
1285                     # Otherwise, we stop splitting here.
1286                     break
1287
1288                 break_idx = maybe_break_idx
1289
1290             # --- Construct `next_value`
1291             next_value = rest_value[:break_idx] + QUOTE
1292
1293             # HACK: The following 'if' statement is a hack to fix the custom
1294             # breakpoint index in the case of either: (a) substrings that were
1295             # f-strings but will have the 'f' prefix removed OR (b) substrings
1296             # that were not f-strings but will now become f-strings because of
1297             # redundant use of the 'f' prefix (i.e. none of the substrings
1298             # contain f-expressions but one or more of them had the 'f' prefix
1299             # anyway; in which case, we will prepend 'f' to _all_ substrings).
1300             #
1301             # There is probably a better way to accomplish what is being done
1302             # here...
1303             #
1304             # If this substring is an f-string, we _could_ remove the 'f'
1305             # prefix, and the current custom split did NOT originally use a
1306             # prefix...
1307             if (
1308                 next_value != self._normalize_f_string(next_value, prefix)
1309                 and use_custom_breakpoints
1310                 and not csplit.has_prefix
1311             ):
1312                 # Then `csplit.break_idx` will be off by one after removing
1313                 # the 'f' prefix.
1314                 break_idx += 1
1315                 next_value = rest_value[:break_idx] + QUOTE
1316
1317             if drop_pointless_f_prefix:
1318                 next_value = self._normalize_f_string(next_value, prefix)
1319
1320             # --- Construct `next_leaf`
1321             next_leaf = Leaf(token.STRING, next_value)
1322             insert_str_child(next_leaf)
1323             self._maybe_normalize_string_quotes(next_leaf)
1324
1325             # --- Construct `next_line`
1326             next_line = line.clone()
1327             maybe_append_string_operators(next_line)
1328             next_line.append(next_leaf)
1329             string_line_results.append(Ok(next_line))
1330
1331             rest_value = prefix + QUOTE + rest_value[break_idx:]
1332             first_string_line = False
1333
1334         yield from string_line_results
1335
1336         if drop_pointless_f_prefix:
1337             rest_value = self._normalize_f_string(rest_value, prefix)
1338
1339         rest_leaf = Leaf(token.STRING, rest_value)
1340         insert_str_child(rest_leaf)
1341
1342         # NOTE: I could not find a test case that verifies that the following
1343         # line is actually necessary, but it seems to be. Otherwise we risk
1344         # not normalizing the last substring, right?
1345         self._maybe_normalize_string_quotes(rest_leaf)
1346
1347         last_line = line.clone()
1348         maybe_append_string_operators(last_line)
1349
1350         # If there are any leaves to the right of the target string...
1351         if is_valid_index(string_idx + 1):
1352             # We use `temp_value` here to determine how long the last line
1353             # would be if we were to append all the leaves to the right of the
1354             # target string to the last string line.
1355             temp_value = rest_value
1356             for leaf in LL[string_idx + 1 :]:
1357                 temp_value += str(leaf)
1358                 if leaf.type == token.LPAR:
1359                     break
1360
1361             # Try to fit them all on the same line with the last substring...
1362             if (
1363                 len(temp_value) <= max_last_string()
1364                 or LL[string_idx + 1].type == token.COMMA
1365             ):
1366                 last_line.append(rest_leaf)
1367                 append_leaves(last_line, line, LL[string_idx + 1 :])
1368                 yield Ok(last_line)
1369             # Otherwise, place the last substring on one line and everything
1370             # else on a line below that...
1371             else:
1372                 last_line.append(rest_leaf)
1373                 yield Ok(last_line)
1374
1375                 non_string_line = line.clone()
1376                 append_leaves(non_string_line, line, LL[string_idx + 1 :])
1377                 yield Ok(non_string_line)
1378         # Else the target string was the last leaf...
1379         else:
1380             last_line.append(rest_leaf)
1381             last_line.comments = line.comments.copy()
1382             yield Ok(last_line)
1383
1384     def _iter_nameescape_slices(self, string: str) -> Iterator[Tuple[Index, Index]]:
1385         """
1386         Yields:
1387             All ranges of @string which, if @string were to be split there,
1388             would result in the splitting of an \\N{...} expression (which is NOT
1389             allowed).
1390         """
1391         # True - the previous backslash was unescaped
1392         # False - the previous backslash was escaped *or* there was no backslash
1393         previous_was_unescaped_backslash = False
1394         it = iter(enumerate(string))
1395         for idx, c in it:
1396             if c == "\\":
1397                 previous_was_unescaped_backslash = not previous_was_unescaped_backslash
1398                 continue
1399             if not previous_was_unescaped_backslash or c != "N":
1400                 previous_was_unescaped_backslash = False
1401                 continue
1402             previous_was_unescaped_backslash = False
1403
1404             begin = idx - 1  # the position of backslash before \N{...}
1405             for idx, c in it:
1406                 if c == "}":
1407                     end = idx
1408                     break
1409             else:
1410                 # malformed nameescape expression?
1411                 # should have been detected by AST parsing earlier...
1412                 raise RuntimeError(f"{self.__class__.__name__} LOGIC ERROR!")
1413             yield begin, end
1414
1415     def _iter_fexpr_slices(self, string: str) -> Iterator[Tuple[Index, Index]]:
1416         """
1417         Yields:
1418             All ranges of @string which, if @string were to be split there,
1419             would result in the splitting of an f-expression (which is NOT
1420             allowed).
1421         """
1422         if "f" not in get_string_prefix(string).lower():
1423             return
1424         yield from iter_fexpr_spans(string)
1425
1426     def _get_illegal_split_indices(self, string: str) -> Set[Index]:
1427         illegal_indices: Set[Index] = set()
1428         iterators = [
1429             self._iter_fexpr_slices(string),
1430             self._iter_nameescape_slices(string),
1431         ]
1432         for it in iterators:
1433             for begin, end in it:
1434                 illegal_indices.update(range(begin, end + 1))
1435         return illegal_indices
1436
1437     def _get_break_idx(self, string: str, max_break_idx: int) -> Optional[int]:
1438         """
1439         This method contains the algorithm that StringSplitter uses to
1440         determine which character to split each string at.
1441
1442         Args:
1443             @string: The substring that we are attempting to split.
1444             @max_break_idx: The ideal break index. We will return this value if it
1445             meets all the necessary conditions. In the likely event that it
1446             doesn't we will try to find the closest index BELOW @max_break_idx
1447             that does. If that fails, we will expand our search by also
1448             considering all valid indices ABOVE @max_break_idx.
1449
1450         Pre-Conditions:
1451             * assert_is_leaf_string(@string)
1452             * 0 <= @max_break_idx < len(@string)
1453
1454         Returns:
1455             break_idx, if an index is able to be found that meets all of the
1456             conditions listed in the 'Transformations' section of this classes'
1457             docstring.
1458                 OR
1459             None, otherwise.
1460         """
1461         is_valid_index = is_valid_index_factory(string)
1462
1463         assert is_valid_index(max_break_idx)
1464         assert_is_leaf_string(string)
1465
1466         _illegal_split_indices = self._get_illegal_split_indices(string)
1467
1468         def breaks_unsplittable_expression(i: Index) -> bool:
1469             """
1470             Returns:
1471                 True iff returning @i would result in the splitting of an
1472                 unsplittable expression (which is NOT allowed).
1473             """
1474             return i in _illegal_split_indices
1475
1476         def passes_all_checks(i: Index) -> bool:
1477             """
1478             Returns:
1479                 True iff ALL of the conditions listed in the 'Transformations'
1480                 section of this classes' docstring would be be met by returning @i.
1481             """
1482             is_space = string[i] == " "
1483
1484             is_not_escaped = True
1485             j = i - 1
1486             while is_valid_index(j) and string[j] == "\\":
1487                 is_not_escaped = not is_not_escaped
1488                 j -= 1
1489
1490             is_big_enough = (
1491                 len(string[i:]) >= self.MIN_SUBSTR_SIZE
1492                 and len(string[:i]) >= self.MIN_SUBSTR_SIZE
1493             )
1494             return (
1495                 is_space
1496                 and is_not_escaped
1497                 and is_big_enough
1498                 and not breaks_unsplittable_expression(i)
1499             )
1500
1501         # First, we check all indices BELOW @max_break_idx.
1502         break_idx = max_break_idx
1503         while is_valid_index(break_idx - 1) and not passes_all_checks(break_idx):
1504             break_idx -= 1
1505
1506         if not passes_all_checks(break_idx):
1507             # If that fails, we check all indices ABOVE @max_break_idx.
1508             #
1509             # If we are able to find a valid index here, the next line is going
1510             # to be longer than the specified line length, but it's probably
1511             # better than doing nothing at all.
1512             break_idx = max_break_idx + 1
1513             while is_valid_index(break_idx + 1) and not passes_all_checks(break_idx):
1514                 break_idx += 1
1515
1516             if not is_valid_index(break_idx) or not passes_all_checks(break_idx):
1517                 return None
1518
1519         return break_idx
1520
1521     def _maybe_normalize_string_quotes(self, leaf: Leaf) -> None:
1522         if self.normalize_strings:
1523             leaf.value = normalize_string_quotes(leaf.value)
1524
1525     def _normalize_f_string(self, string: str, prefix: str) -> str:
1526         """
1527         Pre-Conditions:
1528             * assert_is_leaf_string(@string)
1529
1530         Returns:
1531             * If @string is an f-string that contains no f-expressions, we
1532             return a string identical to @string except that the 'f' prefix
1533             has been stripped and all double braces (i.e. '{{' or '}}') have
1534             been normalized (i.e. turned into '{' or '}').
1535                 OR
1536             * Otherwise, we return @string.
1537         """
1538         assert_is_leaf_string(string)
1539
1540         if "f" in prefix and not fstring_contains_expr(string):
1541             new_prefix = prefix.replace("f", "")
1542
1543             temp = string[len(prefix) :]
1544             temp = re.sub(r"\{\{", "{", temp)
1545             temp = re.sub(r"\}\}", "}", temp)
1546             new_string = temp
1547
1548             return f"{new_prefix}{new_string}"
1549         else:
1550             return string
1551
1552     def _get_string_operator_leaves(self, leaves: Iterable[Leaf]) -> List[Leaf]:
1553         LL = list(leaves)
1554
1555         string_op_leaves = []
1556         i = 0
1557         while LL[i].type in self.STRING_OPERATORS + [token.NAME]:
1558             prefix_leaf = Leaf(LL[i].type, str(LL[i]).strip())
1559             string_op_leaves.append(prefix_leaf)
1560             i += 1
1561         return string_op_leaves
1562
1563
1564 class StringParenWrapper(BaseStringSplitter, CustomSplitMapMixin):
1565     """
1566     StringTransformer that splits non-"atom" strings (i.e. strings that do not
1567     exist on lines by themselves).
1568
1569     Requirements:
1570         All of the requirements listed in BaseStringSplitter's docstring in
1571         addition to the requirements listed below:
1572
1573         * The line is a return/yield statement, which returns/yields a string.
1574             OR
1575         * The line is part of a ternary expression (e.g. `x = y if cond else
1576         z`) such that the line starts with `else <string>`, where <string> is
1577         some string.
1578             OR
1579         * The line is an assert statement, which ends with a string.
1580             OR
1581         * The line is an assignment statement (e.g. `x = <string>` or `x +=
1582         <string>`) such that the variable is being assigned the value of some
1583         string.
1584             OR
1585         * The line is a dictionary key assignment where some valid key is being
1586         assigned the value of some string.
1587
1588     Transformations:
1589         The chosen string is wrapped in parentheses and then split at the LPAR.
1590
1591         We then have one line which ends with an LPAR and another line that
1592         starts with the chosen string. The latter line is then split again at
1593         the RPAR. This results in the RPAR (and possibly a trailing comma)
1594         being placed on its own line.
1595
1596         NOTE: If any leaves exist to the right of the chosen string (except
1597         for a trailing comma, which would be placed after the RPAR), those
1598         leaves are placed inside the parentheses.  In effect, the chosen
1599         string is not necessarily being "wrapped" by parentheses. We can,
1600         however, count on the LPAR being placed directly before the chosen
1601         string.
1602
1603         In other words, StringParenWrapper creates "atom" strings. These
1604         can then be split again by StringSplitter, if necessary.
1605
1606     Collaborations:
1607         In the event that a string line split by StringParenWrapper is
1608         changed such that it no longer needs to be given its own line,
1609         StringParenWrapper relies on StringParenStripper to clean up the
1610         parentheses it created.
1611     """
1612
1613     def do_splitter_match(self, line: Line) -> TMatchResult:
1614         LL = line.leaves
1615
1616         if line.leaves[-1].type in OPENING_BRACKETS:
1617             return TErr(
1618                 "Cannot wrap parens around a line that ends in an opening bracket."
1619             )
1620
1621         string_idx = (
1622             self._return_match(LL)
1623             or self._else_match(LL)
1624             or self._assert_match(LL)
1625             or self._assign_match(LL)
1626             or self._dict_match(LL)
1627         )
1628
1629         if string_idx is not None:
1630             string_value = line.leaves[string_idx].value
1631             # If the string has no spaces...
1632             if " " not in string_value:
1633                 # And will still violate the line length limit when split...
1634                 max_string_length = self.line_length - ((line.depth + 1) * 4)
1635                 if len(string_value) > max_string_length:
1636                     # And has no associated custom splits...
1637                     if not self.has_custom_splits(string_value):
1638                         # Then we should NOT put this string on its own line.
1639                         return TErr(
1640                             "We do not wrap long strings in parentheses when the"
1641                             " resultant line would still be over the specified line"
1642                             " length and can't be split further by StringSplitter."
1643                         )
1644             return Ok(string_idx)
1645
1646         return TErr("This line does not contain any non-atomic strings.")
1647
1648     @staticmethod
1649     def _return_match(LL: List[Leaf]) -> Optional[int]:
1650         """
1651         Returns:
1652             string_idx such that @LL[string_idx] is equal to our target (i.e.
1653             matched) string, if this line matches the return/yield statement
1654             requirements listed in the 'Requirements' section of this classes'
1655             docstring.
1656                 OR
1657             None, otherwise.
1658         """
1659         # If this line is apart of a return/yield statement and the first leaf
1660         # contains either the "return" or "yield" keywords...
1661         if parent_type(LL[0]) in [syms.return_stmt, syms.yield_expr] and LL[
1662             0
1663         ].value in ["return", "yield"]:
1664             is_valid_index = is_valid_index_factory(LL)
1665
1666             idx = 2 if is_valid_index(1) and is_empty_par(LL[1]) else 1
1667             # The next visible leaf MUST contain a string...
1668             if is_valid_index(idx) and LL[idx].type == token.STRING:
1669                 return idx
1670
1671         return None
1672
1673     @staticmethod
1674     def _else_match(LL: List[Leaf]) -> Optional[int]:
1675         """
1676         Returns:
1677             string_idx such that @LL[string_idx] is equal to our target (i.e.
1678             matched) string, if this line matches the ternary expression
1679             requirements listed in the 'Requirements' section of this classes'
1680             docstring.
1681                 OR
1682             None, otherwise.
1683         """
1684         # If this line is apart of a ternary expression and the first leaf
1685         # contains the "else" keyword...
1686         if (
1687             parent_type(LL[0]) == syms.test
1688             and LL[0].type == token.NAME
1689             and LL[0].value == "else"
1690         ):
1691             is_valid_index = is_valid_index_factory(LL)
1692
1693             idx = 2 if is_valid_index(1) and is_empty_par(LL[1]) else 1
1694             # The next visible leaf MUST contain a string...
1695             if is_valid_index(idx) and LL[idx].type == token.STRING:
1696                 return idx
1697
1698         return None
1699
1700     @staticmethod
1701     def _assert_match(LL: List[Leaf]) -> Optional[int]:
1702         """
1703         Returns:
1704             string_idx such that @LL[string_idx] is equal to our target (i.e.
1705             matched) string, if this line matches the assert statement
1706             requirements listed in the 'Requirements' section of this classes'
1707             docstring.
1708                 OR
1709             None, otherwise.
1710         """
1711         # If this line is apart of an assert statement and the first leaf
1712         # contains the "assert" keyword...
1713         if parent_type(LL[0]) == syms.assert_stmt and LL[0].value == "assert":
1714             is_valid_index = is_valid_index_factory(LL)
1715
1716             for (i, leaf) in enumerate(LL):
1717                 # We MUST find a comma...
1718                 if leaf.type == token.COMMA:
1719                     idx = i + 2 if is_empty_par(LL[i + 1]) else i + 1
1720
1721                     # That comma MUST be followed by a string...
1722                     if is_valid_index(idx) and LL[idx].type == token.STRING:
1723                         string_idx = idx
1724
1725                         # Skip the string trailer, if one exists.
1726                         string_parser = StringParser()
1727                         idx = string_parser.parse(LL, string_idx)
1728
1729                         # But no more leaves are allowed...
1730                         if not is_valid_index(idx):
1731                             return string_idx
1732
1733         return None
1734
1735     @staticmethod
1736     def _assign_match(LL: List[Leaf]) -> Optional[int]:
1737         """
1738         Returns:
1739             string_idx such that @LL[string_idx] is equal to our target (i.e.
1740             matched) string, if this line matches the assignment statement
1741             requirements listed in the 'Requirements' section of this classes'
1742             docstring.
1743                 OR
1744             None, otherwise.
1745         """
1746         # If this line is apart of an expression statement or is a function
1747         # argument AND the first leaf contains a variable name...
1748         if (
1749             parent_type(LL[0]) in [syms.expr_stmt, syms.argument, syms.power]
1750             and LL[0].type == token.NAME
1751         ):
1752             is_valid_index = is_valid_index_factory(LL)
1753
1754             for (i, leaf) in enumerate(LL):
1755                 # We MUST find either an '=' or '+=' symbol...
1756                 if leaf.type in [token.EQUAL, token.PLUSEQUAL]:
1757                     idx = i + 2 if is_empty_par(LL[i + 1]) else i + 1
1758
1759                     # That symbol MUST be followed by a string...
1760                     if is_valid_index(idx) and LL[idx].type == token.STRING:
1761                         string_idx = idx
1762
1763                         # Skip the string trailer, if one exists.
1764                         string_parser = StringParser()
1765                         idx = string_parser.parse(LL, string_idx)
1766
1767                         # The next leaf MAY be a comma iff this line is apart
1768                         # of a function argument...
1769                         if (
1770                             parent_type(LL[0]) == syms.argument
1771                             and is_valid_index(idx)
1772                             and LL[idx].type == token.COMMA
1773                         ):
1774                             idx += 1
1775
1776                         # But no more leaves are allowed...
1777                         if not is_valid_index(idx):
1778                             return string_idx
1779
1780         return None
1781
1782     @staticmethod
1783     def _dict_match(LL: List[Leaf]) -> Optional[int]:
1784         """
1785         Returns:
1786             string_idx such that @LL[string_idx] is equal to our target (i.e.
1787             matched) string, if this line matches the dictionary key assignment
1788             statement requirements listed in the 'Requirements' section of this
1789             classes' docstring.
1790                 OR
1791             None, otherwise.
1792         """
1793         # If this line is apart of a dictionary key assignment...
1794         if syms.dictsetmaker in [parent_type(LL[0]), parent_type(LL[0].parent)]:
1795             is_valid_index = is_valid_index_factory(LL)
1796
1797             for (i, leaf) in enumerate(LL):
1798                 # We MUST find a colon...
1799                 if leaf.type == token.COLON:
1800                     idx = i + 2 if is_empty_par(LL[i + 1]) else i + 1
1801
1802                     # That colon MUST be followed by a string...
1803                     if is_valid_index(idx) and LL[idx].type == token.STRING:
1804                         string_idx = idx
1805
1806                         # Skip the string trailer, if one exists.
1807                         string_parser = StringParser()
1808                         idx = string_parser.parse(LL, string_idx)
1809
1810                         # That string MAY be followed by a comma...
1811                         if is_valid_index(idx) and LL[idx].type == token.COMMA:
1812                             idx += 1
1813
1814                         # But no more leaves are allowed...
1815                         if not is_valid_index(idx):
1816                             return string_idx
1817
1818         return None
1819
1820     def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]:
1821         LL = line.leaves
1822
1823         is_valid_index = is_valid_index_factory(LL)
1824         insert_str_child = insert_str_child_factory(LL[string_idx])
1825
1826         comma_idx = -1
1827         ends_with_comma = False
1828         if LL[comma_idx].type == token.COMMA:
1829             ends_with_comma = True
1830
1831         leaves_to_steal_comments_from = [LL[string_idx]]
1832         if ends_with_comma:
1833             leaves_to_steal_comments_from.append(LL[comma_idx])
1834
1835         # --- First Line
1836         first_line = line.clone()
1837         left_leaves = LL[:string_idx]
1838
1839         # We have to remember to account for (possibly invisible) LPAR and RPAR
1840         # leaves that already wrapped the target string. If these leaves do
1841         # exist, we will replace them with our own LPAR and RPAR leaves.
1842         old_parens_exist = False
1843         if left_leaves and left_leaves[-1].type == token.LPAR:
1844             old_parens_exist = True
1845             leaves_to_steal_comments_from.append(left_leaves[-1])
1846             left_leaves.pop()
1847
1848         append_leaves(first_line, line, left_leaves)
1849
1850         lpar_leaf = Leaf(token.LPAR, "(")
1851         if old_parens_exist:
1852             replace_child(LL[string_idx - 1], lpar_leaf)
1853         else:
1854             insert_str_child(lpar_leaf)
1855         first_line.append(lpar_leaf)
1856
1857         # We throw inline comments that were originally to the right of the
1858         # target string to the top line. They will now be shown to the right of
1859         # the LPAR.
1860         for leaf in leaves_to_steal_comments_from:
1861             for comment_leaf in line.comments_after(leaf):
1862                 first_line.append(comment_leaf, preformatted=True)
1863
1864         yield Ok(first_line)
1865
1866         # --- Middle (String) Line
1867         # We only need to yield one (possibly too long) string line, since the
1868         # `StringSplitter` will break it down further if necessary.
1869         string_value = LL[string_idx].value
1870         string_line = Line(
1871             mode=line.mode,
1872             depth=line.depth + 1,
1873             inside_brackets=True,
1874             should_split_rhs=line.should_split_rhs,
1875             magic_trailing_comma=line.magic_trailing_comma,
1876         )
1877         string_leaf = Leaf(token.STRING, string_value)
1878         insert_str_child(string_leaf)
1879         string_line.append(string_leaf)
1880
1881         old_rpar_leaf = None
1882         if is_valid_index(string_idx + 1):
1883             right_leaves = LL[string_idx + 1 :]
1884             if ends_with_comma:
1885                 right_leaves.pop()
1886
1887             if old_parens_exist:
1888                 assert right_leaves and right_leaves[-1].type == token.RPAR, (
1889                     "Apparently, old parentheses do NOT exist?!"
1890                     f" (left_leaves={left_leaves}, right_leaves={right_leaves})"
1891                 )
1892                 old_rpar_leaf = right_leaves.pop()
1893
1894             append_leaves(string_line, line, right_leaves)
1895
1896         yield Ok(string_line)
1897
1898         # --- Last Line
1899         last_line = line.clone()
1900         last_line.bracket_tracker = first_line.bracket_tracker
1901
1902         new_rpar_leaf = Leaf(token.RPAR, ")")
1903         if old_rpar_leaf is not None:
1904             replace_child(old_rpar_leaf, new_rpar_leaf)
1905         else:
1906             insert_str_child(new_rpar_leaf)
1907         last_line.append(new_rpar_leaf)
1908
1909         # If the target string ended with a comma, we place this comma to the
1910         # right of the RPAR on the last line.
1911         if ends_with_comma:
1912             comma_leaf = Leaf(token.COMMA, ",")
1913             replace_child(LL[comma_idx], comma_leaf)
1914             last_line.append(comma_leaf)
1915
1916         yield Ok(last_line)
1917
1918
1919 class StringParser:
1920     """
1921     A state machine that aids in parsing a string's "trailer", which can be
1922     either non-existent, an old-style formatting sequence (e.g. `% varX` or `%
1923     (varX, varY)`), or a method-call / attribute access (e.g. `.format(varX,
1924     varY)`).
1925
1926     NOTE: A new StringParser object MUST be instantiated for each string
1927     trailer we need to parse.
1928
1929     Examples:
1930         We shall assume that `line` equals the `Line` object that corresponds
1931         to the following line of python code:
1932         ```
1933         x = "Some {}.".format("String") + some_other_string
1934         ```
1935
1936         Furthermore, we will assume that `string_idx` is some index such that:
1937         ```
1938         assert line.leaves[string_idx].value == "Some {}."
1939         ```
1940
1941         The following code snippet then holds:
1942         ```
1943         string_parser = StringParser()
1944         idx = string_parser.parse(line.leaves, string_idx)
1945         assert line.leaves[idx].type == token.PLUS
1946         ```
1947     """
1948
1949     DEFAULT_TOKEN: Final = 20210605
1950
1951     # String Parser States
1952     START: Final = 1
1953     DOT: Final = 2
1954     NAME: Final = 3
1955     PERCENT: Final = 4
1956     SINGLE_FMT_ARG: Final = 5
1957     LPAR: Final = 6
1958     RPAR: Final = 7
1959     DONE: Final = 8
1960
1961     # Lookup Table for Next State
1962     _goto: Final[Dict[Tuple[ParserState, NodeType], ParserState]] = {
1963         # A string trailer may start with '.' OR '%'.
1964         (START, token.DOT): DOT,
1965         (START, token.PERCENT): PERCENT,
1966         (START, DEFAULT_TOKEN): DONE,
1967         # A '.' MUST be followed by an attribute or method name.
1968         (DOT, token.NAME): NAME,
1969         # A method name MUST be followed by an '(', whereas an attribute name
1970         # is the last symbol in the string trailer.
1971         (NAME, token.LPAR): LPAR,
1972         (NAME, DEFAULT_TOKEN): DONE,
1973         # A '%' symbol can be followed by an '(' or a single argument (e.g. a
1974         # string or variable name).
1975         (PERCENT, token.LPAR): LPAR,
1976         (PERCENT, DEFAULT_TOKEN): SINGLE_FMT_ARG,
1977         # If a '%' symbol is followed by a single argument, that argument is
1978         # the last leaf in the string trailer.
1979         (SINGLE_FMT_ARG, DEFAULT_TOKEN): DONE,
1980         # If present, a ')' symbol is the last symbol in a string trailer.
1981         # (NOTE: LPARS and nested RPARS are not included in this lookup table,
1982         # since they are treated as a special case by the parsing logic in this
1983         # classes' implementation.)
1984         (RPAR, DEFAULT_TOKEN): DONE,
1985     }
1986
1987     def __init__(self) -> None:
1988         self._state = self.START
1989         self._unmatched_lpars = 0
1990
1991     def parse(self, leaves: List[Leaf], string_idx: int) -> int:
1992         """
1993         Pre-conditions:
1994             * @leaves[@string_idx].type == token.STRING
1995
1996         Returns:
1997             The index directly after the last leaf which is apart of the string
1998             trailer, if a "trailer" exists.
1999                 OR
2000             @string_idx + 1, if no string "trailer" exists.
2001         """
2002         assert leaves[string_idx].type == token.STRING
2003
2004         idx = string_idx + 1
2005         while idx < len(leaves) and self._next_state(leaves[idx]):
2006             idx += 1
2007         return idx
2008
2009     def _next_state(self, leaf: Leaf) -> bool:
2010         """
2011         Pre-conditions:
2012             * On the first call to this function, @leaf MUST be the leaf that
2013             was directly after the string leaf in question (e.g. if our target
2014             string is `line.leaves[i]` then the first call to this method must
2015             be `line.leaves[i + 1]`).
2016             * On the next call to this function, the leaf parameter passed in
2017             MUST be the leaf directly following @leaf.
2018
2019         Returns:
2020             True iff @leaf is apart of the string's trailer.
2021         """
2022         # We ignore empty LPAR or RPAR leaves.
2023         if is_empty_par(leaf):
2024             return True
2025
2026         next_token = leaf.type
2027         if next_token == token.LPAR:
2028             self._unmatched_lpars += 1
2029
2030         current_state = self._state
2031
2032         # The LPAR parser state is a special case. We will return True until we
2033         # find the matching RPAR token.
2034         if current_state == self.LPAR:
2035             if next_token == token.RPAR:
2036                 self._unmatched_lpars -= 1
2037                 if self._unmatched_lpars == 0:
2038                     self._state = self.RPAR
2039         # Otherwise, we use a lookup table to determine the next state.
2040         else:
2041             # If the lookup table matches the current state to the next
2042             # token, we use the lookup table.
2043             if (current_state, next_token) in self._goto:
2044                 self._state = self._goto[current_state, next_token]
2045             else:
2046                 # Otherwise, we check if a the current state was assigned a
2047                 # default.
2048                 if (current_state, self.DEFAULT_TOKEN) in self._goto:
2049                     self._state = self._goto[current_state, self.DEFAULT_TOKEN]
2050                 # If no default has been assigned, then this parser has a logic
2051                 # error.
2052                 else:
2053                     raise RuntimeError(f"{self.__class__.__name__} LOGIC ERROR!")
2054
2055             if self._state == self.DONE:
2056                 return False
2057
2058         return True
2059
2060
2061 def insert_str_child_factory(string_leaf: Leaf) -> Callable[[LN], None]:
2062     """
2063     Factory for a convenience function that is used to orphan @string_leaf
2064     and then insert multiple new leaves into the same part of the node
2065     structure that @string_leaf had originally occupied.
2066
2067     Examples:
2068         Let `string_leaf = Leaf(token.STRING, '"foo"')` and `N =
2069         string_leaf.parent`. Assume the node `N` has the following
2070         original structure:
2071
2072         Node(
2073             expr_stmt, [
2074                 Leaf(NAME, 'x'),
2075                 Leaf(EQUAL, '='),
2076                 Leaf(STRING, '"foo"'),
2077             ]
2078         )
2079
2080         We then run the code snippet shown below.
2081         ```
2082         insert_str_child = insert_str_child_factory(string_leaf)
2083
2084         lpar = Leaf(token.LPAR, '(')
2085         insert_str_child(lpar)
2086
2087         bar = Leaf(token.STRING, '"bar"')
2088         insert_str_child(bar)
2089
2090         rpar = Leaf(token.RPAR, ')')
2091         insert_str_child(rpar)
2092         ```
2093
2094         After which point, it follows that `string_leaf.parent is None` and
2095         the node `N` now has the following structure:
2096
2097         Node(
2098             expr_stmt, [
2099                 Leaf(NAME, 'x'),
2100                 Leaf(EQUAL, '='),
2101                 Leaf(LPAR, '('),
2102                 Leaf(STRING, '"bar"'),
2103                 Leaf(RPAR, ')'),
2104             ]
2105         )
2106     """
2107     string_parent = string_leaf.parent
2108     string_child_idx = string_leaf.remove()
2109
2110     def insert_str_child(child: LN) -> None:
2111         nonlocal string_child_idx
2112
2113         assert string_parent is not None
2114         assert string_child_idx is not None
2115
2116         string_parent.insert_child(string_child_idx, child)
2117         string_child_idx += 1
2118
2119     return insert_str_child
2120
2121
2122 def is_valid_index_factory(seq: Sequence[Any]) -> Callable[[int], bool]:
2123     """
2124     Examples:
2125         ```
2126         my_list = [1, 2, 3]
2127
2128         is_valid_index = is_valid_index_factory(my_list)
2129
2130         assert is_valid_index(0)
2131         assert is_valid_index(2)
2132
2133         assert not is_valid_index(3)
2134         assert not is_valid_index(-1)
2135         ```
2136     """
2137
2138     def is_valid_index(idx: int) -> bool:
2139         """
2140         Returns:
2141             True iff @idx is positive AND seq[@idx] does NOT raise an
2142             IndexError.
2143         """
2144         return 0 <= idx < len(seq)
2145
2146     return is_valid_index