src/black/trans.py

   1 """
   2 String transformers that can split and merge strings.
   3 """
   4 import re
   5 import sys
   6 from abc import ABC, abstractmethod
   7 from collections import defaultdict
   8 from dataclasses import dataclass
   9 from typing import (
  10     Any,
  11     Callable,
  12     ClassVar,
  13     Collection,
  14     Dict,
  15     Iterable,
  16     Iterator,
  17     List,
  18     Optional,
  19     Sequence,
  20     Set,
  21     Tuple,
  22     TypeVar,
  23     Union,
  24 )
  25
  26 if sys.version_info < (3, 8):
  27     from typing_extensions import Final, Literal
  28 else:
  29     from typing import Literal, Final
  30
  31 from mypy_extensions import trait
  32
  33 from black.brackets import BracketMatchError
  34 from black.comments import contains_pragma_comment
  35 from black.lines import Line, append_leaves
  36 from black.mode import Feature
  37 from black.nodes import (
  38     CLOSING_BRACKETS,
  39     OPENING_BRACKETS,
  40     STANDALONE_COMMENT,
  41     is_empty_lpar,
  42     is_empty_par,
  43     is_empty_rpar,
  44     parent_type,
  45     replace_child,
  46     syms,
  47 )
  48 from black.rusty import Err, Ok, Result
  49 from black.strings import (
  50     assert_is_leaf_string,
  51     get_string_prefix,
  52     has_triple_quotes,
  53     normalize_string_quotes,
  54 )
  55 from blib2to3.pgen2 import token
  56 from blib2to3.pytree import Leaf, Node
  57
  58
  59 class CannotTransform(Exception):
  60     """Base class for errors raised by Transformers."""
  61
  62
  63 # types
  64 T = TypeVar("T")
  65 LN = Union[Leaf, Node]
  66 Transformer = Callable[[Line, Collection[Feature]], Iterator[Line]]
  67 Index = int
  68 NodeType = int
  69 ParserState = int
  70 StringID = int
  71 TResult = Result[T, CannotTransform]  # (T)ransform Result
  72 TMatchResult = TResult[Index]
  73
  74
  75 def TErr(err_msg: str) -> Err[CannotTransform]:
  76     """(T)ransform Err
  77
  78     Convenience function used when working with the TResult type.
  79     """
  80     cant_transform = CannotTransform(err_msg)
  81     return Err(cant_transform)
  82
  83
  84 def hug_power_op(line: Line, features: Collection[Feature]) -> Iterator[Line]:
  85     """A transformer which normalizes spacing around power operators."""
  86
  87     # Performance optimization to avoid unnecessary Leaf clones and other ops.
  88     for leaf in line.leaves:
  89         if leaf.type == token.DOUBLESTAR:
  90             break
  91     else:
  92         raise CannotTransform("No doublestar token was found in the line.")
  93
  94     def is_simple_lookup(index: int, step: Literal[1, -1]) -> bool:
  95         # Brackets and parentheses indicate calls, subscripts, etc. ...
  96         # basically stuff that doesn't count as "simple". Only a NAME lookup
  97         # or dotted lookup (eg. NAME.NAME) is OK.
  98         if step == -1:
  99             disallowed = {token.RPAR, token.RSQB}
 100         else:
 101             disallowed = {token.LPAR, token.LSQB}
 102
 103         while 0 <= index < len(line.leaves):
 104             current = line.leaves[index]
 105             if current.type in disallowed:
 106                 return False
 107             if current.type not in {token.NAME, token.DOT} or current.value == "for":
 108                 # If the current token isn't disallowed, we'll assume this is simple as
 109                 # only the disallowed tokens are semantically attached to this lookup
 110                 # expression we're checking. Also, stop early if we hit the 'for' bit
 111                 # of a comprehension.
 112                 return True
 113
 114             index += step
 115
 116         return True
 117
 118     def is_simple_operand(index: int, kind: Literal["base", "exponent"]) -> bool:
 119         # An operand is considered "simple" if's a NAME, a numeric CONSTANT, a simple
 120         # lookup (see above), with or without a preceding unary operator.
 121         start = line.leaves[index]
 122         if start.type in {token.NAME, token.NUMBER}:
 123             return is_simple_lookup(index, step=(1 if kind == "exponent" else -1))
 124
 125         if start.type in {token.PLUS, token.MINUS, token.TILDE}:
 126             if line.leaves[index + 1].type in {token.NAME, token.NUMBER}:
 127                 # step is always one as bases with a preceding unary op will be checked
 128                 # for simplicity starting from the next token (so it'll hit the check
 129                 # above).
 130                 return is_simple_lookup(index + 1, step=1)
 131
 132         return False
 133
 134     new_line = line.clone()
 135     should_hug = False
 136     for idx, leaf in enumerate(line.leaves):
 137         new_leaf = leaf.clone()
 138         if should_hug:
 139             new_leaf.prefix = ""
 140             should_hug = False
 141
 142         should_hug = (
 143             (0 < idx < len(line.leaves) - 1)
 144             and leaf.type == token.DOUBLESTAR
 145             and is_simple_operand(idx - 1, kind="base")
 146             and line.leaves[idx - 1].value != "lambda"
 147             and is_simple_operand(idx + 1, kind="exponent")
 148         )
 149         if should_hug:
 150             new_leaf.prefix = ""
 151
 152         # We have to be careful to make a new line properly:
 153         # - bracket related metadata must be maintained (handled by Line.append)
 154         # - comments need to copied over, updating the leaf IDs they're attached to
 155         new_line.append(new_leaf, preformatted=True)
 156         for comment_leaf in line.comments_after(leaf):
 157             new_line.append(comment_leaf, preformatted=True)
 158
 159     yield new_line
 160
 161
 162 class StringTransformer(ABC):
 163     """
 164     An implementation of the Transformer protocol that relies on its
 165     subclasses overriding the template methods `do_match(...)` and
 166     `do_transform(...)`.
 167
 168     This Transformer works exclusively on strings (for example, by merging
 169     or splitting them).
 170
 171     The following sections can be found among the docstrings of each concrete
 172     StringTransformer subclass.
 173
 174     Requirements:
 175         Which requirements must be met of the given Line for this
 176         StringTransformer to be applied?
 177
 178     Transformations:
 179         If the given Line meets all of the above requirements, which string
 180         transformations can you expect to be applied to it by this
 181         StringTransformer?
 182
 183     Collaborations:
 184         What contractual agreements does this StringTransformer have with other
 185         StringTransfomers? Such collaborations should be eliminated/minimized
 186         as much as possible.
 187     """
 188
 189     __name__: Final = "StringTransformer"
 190
 191     # Ideally this would be a dataclass, but unfortunately mypyc breaks when used with
 192     # `abc.ABC`.
 193     def __init__(self, line_length: int, normalize_strings: bool) -> None:
 194         self.line_length = line_length
 195         self.normalize_strings = normalize_strings
 196
 197     @abstractmethod
 198     def do_match(self, line: Line) -> TMatchResult:
 199         """
 200         Returns:
 201             * Ok(string_idx) such that `line.leaves[string_idx]` is our target
 202             string, if a match was able to be made.
 203                 OR
 204             * Err(CannotTransform), if a match was not able to be made.
 205         """
 206
 207     @abstractmethod
 208     def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]:
 209         """
 210         Yields:
 211             * Ok(new_line) where new_line is the new transformed line.
 212                 OR
 213             * Err(CannotTransform) if the transformation failed for some reason. The
 214             `do_match(...)` template method should usually be used to reject
 215             the form of the given Line, but in some cases it is difficult to
 216             know whether or not a Line meets the StringTransformer's
 217             requirements until the transformation is already midway.
 218
 219         Side Effects:
 220             This method should NOT mutate @line directly, but it MAY mutate the
 221             Line's underlying Node structure. (WARNING: If the underlying Node
 222             structure IS altered, then this method should NOT be allowed to
 223             yield an CannotTransform after that point.)
 224         """
 225
 226     def __call__(self, line: Line, _features: Collection[Feature]) -> Iterator[Line]:
 227         """
 228         StringTransformer instances have a call signature that mirrors that of
 229         the Transformer type.
 230
 231         Raises:
 232             CannotTransform(...) if the concrete StringTransformer class is unable
 233             to transform @line.
 234         """
 235         # Optimization to avoid calling `self.do_match(...)` when the line does
 236         # not contain any string.
 237         if not any(leaf.type == token.STRING for leaf in line.leaves):
 238             raise CannotTransform("There are no strings in this line.")
 239
 240         match_result = self.do_match(line)
 241
 242         if isinstance(match_result, Err):
 243             cant_transform = match_result.err()
 244             raise CannotTransform(
 245                 f"The string transformer {self.__class__.__name__} does not recognize"
 246                 " this line as one that it can transform."
 247             ) from cant_transform
 248
 249         string_idx = match_result.ok()
 250
 251         for line_result in self.do_transform(line, string_idx):
 252             if isinstance(line_result, Err):
 253                 cant_transform = line_result.err()
 254                 raise CannotTransform(
 255                     "StringTransformer failed while attempting to transform string."
 256                 ) from cant_transform
 257             line = line_result.ok()
 258             yield line
 259
 260
 261 @dataclass
 262 class CustomSplit:
 263     """A custom (i.e. manual) string split.
 264
 265     A single CustomSplit instance represents a single substring.
 266
 267     Examples:
 268         Consider the following string:
 269         ```
 270         "Hi there friend."
 271         " This is a custom"
 272         f" string {split}."
 273         ```
 274
 275         This string will correspond to the following three CustomSplit instances:
 276         ```
 277         CustomSplit(False, 16)
 278         CustomSplit(False, 17)
 279         CustomSplit(True, 16)
 280         ```
 281     """
 282
 283     has_prefix: bool
 284     break_idx: int
 285
 286
 287 @trait
 288 class CustomSplitMapMixin:
 289     """
 290     This mixin class is used to map merged strings to a sequence of
 291     CustomSplits, which will then be used to re-split the strings iff none of
 292     the resultant substrings go over the configured max line length.
 293     """
 294
 295     _Key: ClassVar = Tuple[StringID, str]
 296     _CUSTOM_SPLIT_MAP: ClassVar[Dict[_Key, Tuple[CustomSplit, ...]]] = defaultdict(
 297         tuple
 298     )
 299
 300     @staticmethod
 301     def _get_key(string: str) -> "CustomSplitMapMixin._Key":
 302         """
 303         Returns:
 304             A unique identifier that is used internally to map @string to a
 305             group of custom splits.
 306         """
 307         return (id(string), string)
 308
 309     def add_custom_splits(
 310         self, string: str, custom_splits: Iterable[CustomSplit]
 311     ) -> None:
 312         """Custom Split Map Setter Method
 313
 314         Side Effects:
 315             Adds a mapping from @string to the custom splits @custom_splits.
 316         """
 317         key = self._get_key(string)
 318         self._CUSTOM_SPLIT_MAP[key] = tuple(custom_splits)
 319
 320     def pop_custom_splits(self, string: str) -> List[CustomSplit]:
 321         """Custom Split Map Getter Method
 322
 323         Returns:
 324             * A list of the custom splits that are mapped to @string, if any
 325             exist.
 326                 OR
 327             * [], otherwise.
 328
 329         Side Effects:
 330             Deletes the mapping between @string and its associated custom
 331             splits (which are returned to the caller).
 332         """
 333         key = self._get_key(string)
 334
 335         custom_splits = self._CUSTOM_SPLIT_MAP[key]
 336         del self._CUSTOM_SPLIT_MAP[key]
 337
 338         return list(custom_splits)
 339
 340     def has_custom_splits(self, string: str) -> bool:
 341         """
 342         Returns:
 343             True iff @string is associated with a set of custom splits.
 344         """
 345         key = self._get_key(string)
 346         return key in self._CUSTOM_SPLIT_MAP
 347
 348
 349 class StringMerger(StringTransformer, CustomSplitMapMixin):
 350     """StringTransformer that merges strings together.
 351
 352     Requirements:
 353         (A) The line contains adjacent strings such that ALL of the validation checks
 354         listed in StringMerger.__validate_msg(...)'s docstring pass.
 355             OR
 356         (B) The line contains a string which uses line continuation backslashes.
 357
 358     Transformations:
 359         Depending on which of the two requirements above where met, either:
 360
 361         (A) The string group associated with the target string is merged.
 362             OR
 363         (B) All line-continuation backslashes are removed from the target string.
 364
 365     Collaborations:
 366         StringMerger provides custom split information to StringSplitter.
 367     """
 368
 369     def do_match(self, line: Line) -> TMatchResult:
 370         LL = line.leaves
 371
 372         is_valid_index = is_valid_index_factory(LL)
 373
 374         for i, leaf in enumerate(LL):
 375             if (
 376                 leaf.type == token.STRING
 377                 and is_valid_index(i + 1)
 378                 and LL[i + 1].type == token.STRING
 379             ):
 380                 return Ok(i)
 381
 382             if leaf.type == token.STRING and "\\\n" in leaf.value:
 383                 return Ok(i)
 384
 385         return TErr("This line has no strings that need merging.")
 386
 387     def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]:
 388         new_line = line
 389         rblc_result = self._remove_backslash_line_continuation_chars(
 390             new_line, string_idx
 391         )
 392         if isinstance(rblc_result, Ok):
 393             new_line = rblc_result.ok()
 394
 395         msg_result = self._merge_string_group(new_line, string_idx)
 396         if isinstance(msg_result, Ok):
 397             new_line = msg_result.ok()
 398
 399         if isinstance(rblc_result, Err) and isinstance(msg_result, Err):
 400             msg_cant_transform = msg_result.err()
 401             rblc_cant_transform = rblc_result.err()
 402             cant_transform = CannotTransform(
 403                 "StringMerger failed to merge any strings in this line."
 404             )
 405
 406             # Chain the errors together using `__cause__`.
 407             msg_cant_transform.__cause__ = rblc_cant_transform
 408             cant_transform.__cause__ = msg_cant_transform
 409
 410             yield Err(cant_transform)
 411         else:
 412             yield Ok(new_line)
 413
 414     @staticmethod
 415     def _remove_backslash_line_continuation_chars(
 416         line: Line, string_idx: int
 417     ) -> TResult[Line]:
 418         """
 419         Merge strings that were split across multiple lines using
 420         line-continuation backslashes.
 421
 422         Returns:
 423             Ok(new_line), if @line contains backslash line-continuation
 424             characters.
 425                 OR
 426             Err(CannotTransform), otherwise.
 427         """
 428         LL = line.leaves
 429
 430         string_leaf = LL[string_idx]
 431         if not (
 432             string_leaf.type == token.STRING
 433             and "\\\n" in string_leaf.value
 434             and not has_triple_quotes(string_leaf.value)
 435         ):
 436             return TErr(
 437                 f"String leaf {string_leaf} does not contain any backslash line"
 438                 " continuation characters."
 439             )
 440
 441         new_line = line.clone()
 442         new_line.comments = line.comments.copy()
 443         append_leaves(new_line, line, LL)
 444
 445         new_string_leaf = new_line.leaves[string_idx]
 446         new_string_leaf.value = new_string_leaf.value.replace("\\\n", "")
 447
 448         return Ok(new_line)
 449
 450     def _merge_string_group(self, line: Line, string_idx: int) -> TResult[Line]:
 451         """
 452         Merges string group (i.e. set of adjacent strings) where the first
 453         string in the group is `line.leaves[string_idx]`.
 454
 455         Returns:
 456             Ok(new_line), if ALL of the validation checks found in
 457             __validate_msg(...) pass.
 458                 OR
 459             Err(CannotTransform), otherwise.
 460         """
 461         LL = line.leaves
 462
 463         is_valid_index = is_valid_index_factory(LL)
 464
 465         vresult = self._validate_msg(line, string_idx)
 466         if isinstance(vresult, Err):
 467             return vresult
 468
 469         # If the string group is wrapped inside an Atom node, we must make sure
 470         # to later replace that Atom with our new (merged) string leaf.
 471         atom_node = LL[string_idx].parent
 472
 473         # We will place BREAK_MARK in between every two substrings that we
 474         # merge. We will then later go through our final result and use the
 475         # various instances of BREAK_MARK we find to add the right values to
 476         # the custom split map.
 477         BREAK_MARK = "@@@@@ BLACK BREAKPOINT MARKER @@@@@"
 478
 479         QUOTE = LL[string_idx].value[-1]
 480
 481         def make_naked(string: str, string_prefix: str) -> str:
 482             """Strip @string (i.e. make it a "naked" string)
 483
 484             Pre-conditions:
 485                 * assert_is_leaf_string(@string)
 486
 487             Returns:
 488                 A string that is identical to @string except that
 489                 @string_prefix has been stripped, the surrounding QUOTE
 490                 characters have been removed, and any remaining QUOTE
 491                 characters have been escaped.
 492             """
 493             assert_is_leaf_string(string)
 494
 495             RE_EVEN_BACKSLASHES = r"(?:(?<!\\)(?:\\\\)*)"
 496             naked_string = string[len(string_prefix) + 1 : -1]
 497             naked_string = re.sub(
 498                 "(" + RE_EVEN_BACKSLASHES + ")" + QUOTE, r"\1\\" + QUOTE, naked_string
 499             )
 500             return naked_string
 501
 502         # Holds the CustomSplit objects that will later be added to the custom
 503         # split map.
 504         custom_splits = []
 505
 506         # Temporary storage for the 'has_prefix' part of the CustomSplit objects.
 507         prefix_tracker = []
 508
 509         # Sets the 'prefix' variable. This is the prefix that the final merged
 510         # string will have.
 511         next_str_idx = string_idx
 512         prefix = ""
 513         while (
 514             not prefix
 515             and is_valid_index(next_str_idx)
 516             and LL[next_str_idx].type == token.STRING
 517         ):
 518             prefix = get_string_prefix(LL[next_str_idx].value).lower()
 519             next_str_idx += 1
 520
 521         # The next loop merges the string group. The final string will be
 522         # contained in 'S'.
 523         #
 524         # The following convenience variables are used:
 525         #
 526         #   S: string
 527         #   NS: naked string
 528         #   SS: next string
 529         #   NSS: naked next string
 530         S = ""
 531         NS = ""
 532         num_of_strings = 0
 533         next_str_idx = string_idx
 534         while is_valid_index(next_str_idx) and LL[next_str_idx].type == token.STRING:
 535             num_of_strings += 1
 536
 537             SS = LL[next_str_idx].value
 538             next_prefix = get_string_prefix(SS).lower()
 539
 540             # If this is an f-string group but this substring is not prefixed
 541             # with 'f'...
 542             if "f" in prefix and "f" not in next_prefix:
 543                 # Then we must escape any braces contained in this substring.
 544                 SS = re.sub(r"(\{|\})", r"\1\1", SS)
 545
 546             NSS = make_naked(SS, next_prefix)
 547
 548             has_prefix = bool(next_prefix)
 549             prefix_tracker.append(has_prefix)
 550
 551             S = prefix + QUOTE + NS + NSS + BREAK_MARK + QUOTE
 552             NS = make_naked(S, prefix)
 553
 554             next_str_idx += 1
 555
 556         S_leaf = Leaf(token.STRING, S)
 557         if self.normalize_strings:
 558             S_leaf.value = normalize_string_quotes(S_leaf.value)
 559
 560         # Fill the 'custom_splits' list with the appropriate CustomSplit objects.
 561         temp_string = S_leaf.value[len(prefix) + 1 : -1]
 562         for has_prefix in prefix_tracker:
 563             mark_idx = temp_string.find(BREAK_MARK)
 564             assert (
 565                 mark_idx >= 0
 566             ), "Logic error while filling the custom string breakpoint cache."
 567
 568             temp_string = temp_string[mark_idx + len(BREAK_MARK) :]
 569             breakpoint_idx = mark_idx + (len(prefix) if has_prefix else 0) + 1
 570             custom_splits.append(CustomSplit(has_prefix, breakpoint_idx))
 571
 572         string_leaf = Leaf(token.STRING, S_leaf.value.replace(BREAK_MARK, ""))
 573
 574         if atom_node is not None:
 575             replace_child(atom_node, string_leaf)
 576
 577         # Build the final line ('new_line') that this method will later return.
 578         new_line = line.clone()
 579         for i, leaf in enumerate(LL):
 580             if i == string_idx:
 581                 new_line.append(string_leaf)
 582
 583             if string_idx <= i < string_idx + num_of_strings:
 584                 for comment_leaf in line.comments_after(LL[i]):
 585                     new_line.append(comment_leaf, preformatted=True)
 586                 continue
 587
 588             append_leaves(new_line, line, [leaf])
 589
 590         self.add_custom_splits(string_leaf.value, custom_splits)
 591         return Ok(new_line)
 592
 593     @staticmethod
 594     def _validate_msg(line: Line, string_idx: int) -> TResult[None]:
 595         """Validate (M)erge (S)tring (G)roup
 596
 597         Transform-time string validation logic for __merge_string_group(...).
 598
 599         Returns:
 600             * Ok(None), if ALL validation checks (listed below) pass.
 601                 OR
 602             * Err(CannotTransform), if any of the following are true:
 603                 - The target string group does not contain ANY stand-alone comments.
 604                 - The target string is not in a string group (i.e. it has no
 605                   adjacent strings).
 606                 - The string group has more than one inline comment.
 607                 - The string group has an inline comment that appears to be a pragma.
 608                 - The set of all string prefixes in the string group is of
 609                   length greater than one and is not equal to {"", "f"}.
 610                 - The string group consists of raw strings.
 611         """
 612         # We first check for "inner" stand-alone comments (i.e. stand-alone
 613         # comments that have a string leaf before them AND after them).
 614         for inc in [1, -1]:
 615             i = string_idx
 616             found_sa_comment = False
 617             is_valid_index = is_valid_index_factory(line.leaves)
 618             while is_valid_index(i) and line.leaves[i].type in [
 619                 token.STRING,
 620                 STANDALONE_COMMENT,
 621             ]:
 622                 if line.leaves[i].type == STANDALONE_COMMENT:
 623                     found_sa_comment = True
 624                 elif found_sa_comment:
 625                     return TErr(
 626                         "StringMerger does NOT merge string groups which contain "
 627                         "stand-alone comments."
 628                     )
 629
 630                 i += inc
 631
 632         num_of_inline_string_comments = 0
 633         set_of_prefixes = set()
 634         num_of_strings = 0
 635         for leaf in line.leaves[string_idx:]:
 636             if leaf.type != token.STRING:
 637                 # If the string group is trailed by a comma, we count the
 638                 # comments trailing the comma to be one of the string group's
 639                 # comments.
 640                 if leaf.type == token.COMMA and id(leaf) in line.comments:
 641                     num_of_inline_string_comments += 1
 642                 break
 643
 644             if has_triple_quotes(leaf.value):
 645                 return TErr("StringMerger does NOT merge multiline strings.")
 646
 647             num_of_strings += 1
 648             prefix = get_string_prefix(leaf.value).lower()
 649             if "r" in prefix:
 650                 return TErr("StringMerger does NOT merge raw strings.")
 651
 652             set_of_prefixes.add(prefix)
 653
 654             if id(leaf) in line.comments:
 655                 num_of_inline_string_comments += 1
 656                 if contains_pragma_comment(line.comments[id(leaf)]):
 657                     return TErr("Cannot merge strings which have pragma comments.")
 658
 659         if num_of_strings < 2:
 660             return TErr(
 661                 f"Not enough strings to merge (num_of_strings={num_of_strings})."
 662             )
 663
 664         if num_of_inline_string_comments > 1:
 665             return TErr(
 666                 f"Too many inline string comments ({num_of_inline_string_comments})."
 667             )
 668
 669         if len(set_of_prefixes) > 1 and set_of_prefixes != {"", "f"}:
 670             return TErr(f"Too many different prefixes ({set_of_prefixes}).")
 671
 672         return Ok(None)
 673
 674
 675 class StringParenStripper(StringTransformer):
 676     """StringTransformer that strips surrounding parentheses from strings.
 677
 678     Requirements:
 679         The line contains a string which is surrounded by parentheses and:
 680             - The target string is NOT the only argument to a function call.
 681             - The target string is NOT a "pointless" string.
 682             - If the target string contains a PERCENT, the brackets are not
 683               preceded or followed by an operator with higher precedence than
 684               PERCENT.
 685
 686     Transformations:
 687         The parentheses mentioned in the 'Requirements' section are stripped.
 688
 689     Collaborations:
 690         StringParenStripper has its own inherent usefulness, but it is also
 691         relied on to clean up the parentheses created by StringParenWrapper (in
 692         the event that they are no longer needed).
 693     """
 694
 695     def do_match(self, line: Line) -> TMatchResult:
 696         LL = line.leaves
 697
 698         is_valid_index = is_valid_index_factory(LL)
 699
 700         for idx, leaf in enumerate(LL):
 701             # Should be a string...
 702             if leaf.type != token.STRING:
 703                 continue
 704
 705             # If this is a "pointless" string...
 706             if (
 707                 leaf.parent
 708                 and leaf.parent.parent
 709                 and leaf.parent.parent.type == syms.simple_stmt
 710             ):
 711                 continue
 712
 713             # Should be preceded by a non-empty LPAR...
 714             if (
 715                 not is_valid_index(idx - 1)
 716                 or LL[idx - 1].type != token.LPAR
 717                 or is_empty_lpar(LL[idx - 1])
 718             ):
 719                 continue
 720
 721             # That LPAR should NOT be preceded by a function name or a closing
 722             # bracket (which could be a function which returns a function or a
 723             # list/dictionary that contains a function)...
 724             if is_valid_index(idx - 2) and (
 725                 LL[idx - 2].type == token.NAME or LL[idx - 2].type in CLOSING_BRACKETS
 726             ):
 727                 continue
 728
 729             string_idx = idx
 730
 731             # Skip the string trailer, if one exists.
 732             string_parser = StringParser()
 733             next_idx = string_parser.parse(LL, string_idx)
 734
 735             # if the leaves in the parsed string include a PERCENT, we need to
 736             # make sure the initial LPAR is NOT preceded by an operator with
 737             # higher or equal precedence to PERCENT
 738             if is_valid_index(idx - 2):
 739                 # mypy can't quite follow unless we name this
 740                 before_lpar = LL[idx - 2]
 741                 if token.PERCENT in {leaf.type for leaf in LL[idx - 1 : next_idx]} and (
 742                     (
 743                         before_lpar.type
 744                         in {
 745                             token.STAR,
 746                             token.AT,
 747                             token.SLASH,
 748                             token.DOUBLESLASH,
 749                             token.PERCENT,
 750                             token.TILDE,
 751                             token.DOUBLESTAR,
 752                             token.AWAIT,
 753                             token.LSQB,
 754                             token.LPAR,
 755                         }
 756                     )
 757                     or (
 758                         # only unary PLUS/MINUS
 759                         before_lpar.parent
 760                         and before_lpar.parent.type == syms.factor
 761                         and (before_lpar.type in {token.PLUS, token.MINUS})
 762                     )
 763                 ):
 764                     continue
 765
 766             # Should be followed by a non-empty RPAR...
 767             if (
 768                 is_valid_index(next_idx)
 769                 and LL[next_idx].type == token.RPAR
 770                 and not is_empty_rpar(LL[next_idx])
 771             ):
 772                 # That RPAR should NOT be followed by anything with higher
 773                 # precedence than PERCENT
 774                 if is_valid_index(next_idx + 1) and LL[next_idx + 1].type in {
 775                     token.DOUBLESTAR,
 776                     token.LSQB,
 777                     token.LPAR,
 778                     token.DOT,
 779                 }:
 780                     continue
 781
 782                 return Ok(string_idx)
 783
 784         return TErr("This line has no strings wrapped in parens.")
 785
 786     def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]:
 787         LL = line.leaves
 788
 789         string_parser = StringParser()
 790         rpar_idx = string_parser.parse(LL, string_idx)
 791
 792         for leaf in (LL[string_idx - 1], LL[rpar_idx]):
 793             if line.comments_after(leaf):
 794                 yield TErr(
 795                     "Will not strip parentheses which have comments attached to them."
 796                 )
 797                 return
 798
 799         new_line = line.clone()
 800         new_line.comments = line.comments.copy()
 801         try:
 802             append_leaves(new_line, line, LL[: string_idx - 1])
 803         except BracketMatchError:
 804             # HACK: I believe there is currently a bug somewhere in
 805             # right_hand_split() that is causing brackets to not be tracked
 806             # properly by a shared BracketTracker.
 807             append_leaves(new_line, line, LL[: string_idx - 1], preformatted=True)
 808
 809         string_leaf = Leaf(token.STRING, LL[string_idx].value)
 810         LL[string_idx - 1].remove()
 811         replace_child(LL[string_idx], string_leaf)
 812         new_line.append(string_leaf)
 813
 814         append_leaves(
 815             new_line, line, LL[string_idx + 1 : rpar_idx] + LL[rpar_idx + 1 :]
 816         )
 817
 818         LL[rpar_idx].remove()
 819
 820         yield Ok(new_line)
 821
 822
 823 class BaseStringSplitter(StringTransformer):
 824     """
 825     Abstract class for StringTransformers which transform a Line's strings by splitting
 826     them or placing them on their own lines where necessary to avoid going over
 827     the configured line length.
 828
 829     Requirements:
 830         * The target string value is responsible for the line going over the
 831         line length limit. It follows that after all of black's other line
 832         split methods have been exhausted, this line (or one of the resulting
 833         lines after all line splits are performed) would still be over the
 834         line_length limit unless we split this string.
 835             AND
 836         * The target string is NOT a "pointless" string (i.e. a string that has
 837         no parent or siblings).
 838             AND
 839         * The target string is not followed by an inline comment that appears
 840         to be a pragma.
 841             AND
 842         * The target string is not a multiline (i.e. triple-quote) string.
 843     """
 844
 845     STRING_OPERATORS: Final = [
 846         token.EQEQUAL,
 847         token.GREATER,
 848         token.GREATEREQUAL,
 849         token.LESS,
 850         token.LESSEQUAL,
 851         token.NOTEQUAL,
 852         token.PERCENT,
 853         token.PLUS,
 854         token.STAR,
 855     ]
 856
 857     @abstractmethod
 858     def do_splitter_match(self, line: Line) -> TMatchResult:
 859         """
 860         BaseStringSplitter asks its clients to override this method instead of
 861         `StringTransformer.do_match(...)`.
 862
 863         Follows the same protocol as `StringTransformer.do_match(...)`.
 864
 865         Refer to `help(StringTransformer.do_match)` for more information.
 866         """
 867
 868     def do_match(self, line: Line) -> TMatchResult:
 869         match_result = self.do_splitter_match(line)
 870         if isinstance(match_result, Err):
 871             return match_result
 872
 873         string_idx = match_result.ok()
 874         vresult = self._validate(line, string_idx)
 875         if isinstance(vresult, Err):
 876             return vresult
 877
 878         return match_result
 879
 880     def _validate(self, line: Line, string_idx: int) -> TResult[None]:
 881         """
 882         Checks that @line meets all of the requirements listed in this classes'
 883         docstring. Refer to `help(BaseStringSplitter)` for a detailed
 884         description of those requirements.
 885
 886         Returns:
 887             * Ok(None), if ALL of the requirements are met.
 888                 OR
 889             * Err(CannotTransform), if ANY of the requirements are NOT met.
 890         """
 891         LL = line.leaves
 892
 893         string_leaf = LL[string_idx]
 894
 895         max_string_length = self._get_max_string_length(line, string_idx)
 896         if len(string_leaf.value) <= max_string_length:
 897             return TErr(
 898                 "The string itself is not what is causing this line to be too long."
 899             )
 900
 901         if not string_leaf.parent or [L.type for L in string_leaf.parent.children] == [
 902             token.STRING,
 903             token.NEWLINE,
 904         ]:
 905             return TErr(
 906                 f"This string ({string_leaf.value}) appears to be pointless (i.e. has"
 907                 " no parent)."
 908             )
 909
 910         if id(line.leaves[string_idx]) in line.comments and contains_pragma_comment(
 911             line.comments[id(line.leaves[string_idx])]
 912         ):
 913             return TErr(
 914                 "Line appears to end with an inline pragma comment. Splitting the line"
 915                 " could modify the pragma's behavior."
 916             )
 917
 918         if has_triple_quotes(string_leaf.value):
 919             return TErr("We cannot split multiline strings.")
 920
 921         return Ok(None)
 922
 923     def _get_max_string_length(self, line: Line, string_idx: int) -> int:
 924         """
 925         Calculates the max string length used when attempting to determine
 926         whether or not the target string is responsible for causing the line to
 927         go over the line length limit.
 928
 929         WARNING: This method is tightly coupled to both StringSplitter and
 930         (especially) StringParenWrapper. There is probably a better way to
 931         accomplish what is being done here.
 932
 933         Returns:
 934             max_string_length: such that `line.leaves[string_idx].value >
 935             max_string_length` implies that the target string IS responsible
 936             for causing this line to exceed the line length limit.
 937         """
 938         LL = line.leaves
 939
 940         is_valid_index = is_valid_index_factory(LL)
 941
 942         # We use the shorthand "WMA4" in comments to abbreviate "We must
 943         # account for". When giving examples, we use STRING to mean some/any
 944         # valid string.
 945         #
 946         # Finally, we use the following convenience variables:
 947         #
 948         #   P:  The leaf that is before the target string leaf.
 949         #   N:  The leaf that is after the target string leaf.
 950         #   NN: The leaf that is after N.
 951
 952         # WMA4 the whitespace at the beginning of the line.
 953         offset = line.depth * 4
 954
 955         if is_valid_index(string_idx - 1):
 956             p_idx = string_idx - 1
 957             if (
 958                 LL[string_idx - 1].type == token.LPAR
 959                 and LL[string_idx - 1].value == ""
 960                 and string_idx >= 2
 961             ):
 962                 # If the previous leaf is an empty LPAR placeholder, we should skip it.
 963                 p_idx -= 1
 964
 965             P = LL[p_idx]
 966             if P.type in self.STRING_OPERATORS:
 967                 # WMA4 a space and a string operator (e.g. `+ STRING` or `== STRING`).
 968                 offset += len(str(P)) + 1
 969
 970             if P.type == token.COMMA:
 971                 # WMA4 a space, a comma, and a closing bracket [e.g. `), STRING`].
 972                 offset += 3
 973
 974             if P.type in [token.COLON, token.EQUAL, token.PLUSEQUAL, token.NAME]:
 975                 # This conditional branch is meant to handle dictionary keys,
 976                 # variable assignments, 'return STRING' statement lines, and
 977                 # 'else STRING' ternary expression lines.
 978
 979                 # WMA4 a single space.
 980                 offset += 1
 981
 982                 # WMA4 the lengths of any leaves that came before that space,
 983                 # but after any closing bracket before that space.
 984                 for leaf in reversed(LL[: p_idx + 1]):
 985                     offset += len(str(leaf))
 986                     if leaf.type in CLOSING_BRACKETS:
 987                         break
 988
 989         if is_valid_index(string_idx + 1):
 990             N = LL[string_idx + 1]
 991             if N.type == token.RPAR and N.value == "" and len(LL) > string_idx + 2:
 992                 # If the next leaf is an empty RPAR placeholder, we should skip it.
 993                 N = LL[string_idx + 2]
 994
 995             if N.type == token.COMMA:
 996                 # WMA4 a single comma at the end of the string (e.g `STRING,`).
 997                 offset += 1
 998
 999             if is_valid_index(string_idx + 2):
1000                 NN = LL[string_idx + 2]
1001
1002                 if N.type == token.DOT and NN.type == token.NAME:
1003                     # This conditional branch is meant to handle method calls invoked
1004                     # off of a string literal up to and including the LPAR character.
1005
1006                     # WMA4 the '.' character.
1007                     offset += 1
1008
1009                     if (
1010                         is_valid_index(string_idx + 3)
1011                         and LL[string_idx + 3].type == token.LPAR
1012                     ):
1013                         # WMA4 the left parenthesis character.
1014                         offset += 1
1015
1016                     # WMA4 the length of the method's name.
1017                     offset += len(NN.value)
1018
1019         has_comments = False
1020         for comment_leaf in line.comments_after(LL[string_idx]):
1021             if not has_comments:
1022                 has_comments = True
1023                 # WMA4 two spaces before the '#' character.
1024                 offset += 2
1025
1026             # WMA4 the length of the inline comment.
1027             offset += len(comment_leaf.value)
1028
1029         max_string_length = self.line_length - offset
1030         return max_string_length
1031
1032
1033 def iter_fexpr_spans(s: str) -> Iterator[Tuple[int, int]]:
1034     """
1035     Yields spans corresponding to expressions in a given f-string.
1036     Spans are half-open ranges (left inclusive, right exclusive).
1037     Assumes the input string is a valid f-string, but will not crash if the input
1038     string is invalid.
1039     """
1040     stack: List[int] = []  # our curly paren stack
1041     i = 0
1042     while i < len(s):
1043         if s[i] == "{":
1044             # if we're in a string part of the f-string, ignore escaped curly braces
1045             if not stack and i + 1 < len(s) and s[i + 1] == "{":
1046                 i += 2
1047                 continue
1048             stack.append(i)
1049             i += 1
1050             continue
1051
1052         if s[i] == "}":
1053             if not stack:
1054                 i += 1
1055                 continue
1056             j = stack.pop()
1057             # we've made it back out of the expression! yield the span
1058             if not stack:
1059                 yield (j, i + 1)
1060             i += 1
1061             continue
1062
1063         # if we're in an expression part of the f-string, fast forward through strings
1064         # note that backslashes are not legal in the expression portion of f-strings
1065         if stack:
1066             delim = None
1067             if s[i : i + 3] in ("'''", '"""'):
1068                 delim = s[i : i + 3]
1069             elif s[i] in ("'", '"'):
1070                 delim = s[i]
1071             if delim:
1072                 i += len(delim)
1073                 while i < len(s) and s[i : i + len(delim)] != delim:
1074                     i += 1
1075                 i += len(delim)
1076                 continue
1077         i += 1
1078
1079
1080 def fstring_contains_expr(s: str) -> bool:
1081     return any(iter_fexpr_spans(s))
1082
1083
1084 class StringSplitter(BaseStringSplitter, CustomSplitMapMixin):
1085     """
1086     StringTransformer that splits "atom" strings (i.e. strings which exist on
1087     lines by themselves).
1088
1089     Requirements:
1090         * The line consists ONLY of a single string (possibly prefixed by a
1091         string operator [e.g. '+' or '==']), MAYBE a string trailer, and MAYBE
1092         a trailing comma.
1093             AND
1094         * All of the requirements listed in BaseStringSplitter's docstring.
1095
1096     Transformations:
1097         The string mentioned in the 'Requirements' section is split into as
1098         many substrings as necessary to adhere to the configured line length.
1099
1100         In the final set of substrings, no substring should be smaller than
1101         MIN_SUBSTR_SIZE characters.
1102
1103         The string will ONLY be split on spaces (i.e. each new substring should
1104         start with a space). Note that the string will NOT be split on a space
1105         which is escaped with a backslash.
1106
1107         If the string is an f-string, it will NOT be split in the middle of an
1108         f-expression (e.g. in f"FooBar: {foo() if x else bar()}", {foo() if x
1109         else bar()} is an f-expression).
1110
1111         If the string that is being split has an associated set of custom split
1112         records and those custom splits will NOT result in any line going over
1113         the configured line length, those custom splits are used. Otherwise the
1114         string is split as late as possible (from left-to-right) while still
1115         adhering to the transformation rules listed above.
1116
1117     Collaborations:
1118         StringSplitter relies on StringMerger to construct the appropriate
1119         CustomSplit objects and add them to the custom split map.
1120     """
1121
1122     MIN_SUBSTR_SIZE: Final = 6
1123
1124     def do_splitter_match(self, line: Line) -> TMatchResult:
1125         LL = line.leaves
1126
1127         is_valid_index = is_valid_index_factory(LL)
1128
1129         idx = 0
1130
1131         # The first two leaves MAY be the 'not in' keywords...
1132         if (
1133             is_valid_index(idx)
1134             and is_valid_index(idx + 1)
1135             and [LL[idx].type, LL[idx + 1].type] == [token.NAME, token.NAME]
1136             and str(LL[idx]) + str(LL[idx + 1]) == "not in"
1137         ):
1138             idx += 2
1139         # Else the first leaf MAY be a string operator symbol or the 'in' keyword...
1140         elif is_valid_index(idx) and (
1141             LL[idx].type in self.STRING_OPERATORS
1142             or LL[idx].type == token.NAME
1143             and str(LL[idx]) == "in"
1144         ):
1145             idx += 1
1146
1147         # The next/first leaf MAY be an empty LPAR...
1148         if is_valid_index(idx) and is_empty_lpar(LL[idx]):
1149             idx += 1
1150
1151         # The next/first leaf MUST be a string...
1152         if not is_valid_index(idx) or LL[idx].type != token.STRING:
1153             return TErr("Line does not start with a string.")
1154
1155         string_idx = idx
1156
1157         # Skip the string trailer, if one exists.
1158         string_parser = StringParser()
1159         idx = string_parser.parse(LL, string_idx)
1160
1161         # That string MAY be followed by an empty RPAR...
1162         if is_valid_index(idx) and is_empty_rpar(LL[idx]):
1163             idx += 1
1164
1165         # That string / empty RPAR leaf MAY be followed by a comma...
1166         if is_valid_index(idx) and LL[idx].type == token.COMMA:
1167             idx += 1
1168
1169         # But no more leaves are allowed...
1170         if is_valid_index(idx):
1171             return TErr("This line does not end with a string.")
1172
1173         return Ok(string_idx)
1174
1175     def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]:
1176         LL = line.leaves
1177
1178         QUOTE = LL[string_idx].value[-1]
1179
1180         is_valid_index = is_valid_index_factory(LL)
1181         insert_str_child = insert_str_child_factory(LL[string_idx])
1182
1183         prefix = get_string_prefix(LL[string_idx].value).lower()
1184
1185         # We MAY choose to drop the 'f' prefix from substrings that don't
1186         # contain any f-expressions, but ONLY if the original f-string
1187         # contains at least one f-expression. Otherwise, we will alter the AST
1188         # of the program.
1189         drop_pointless_f_prefix = ("f" in prefix) and fstring_contains_expr(
1190             LL[string_idx].value
1191         )
1192
1193         first_string_line = True
1194
1195         string_op_leaves = self._get_string_operator_leaves(LL)
1196         string_op_leaves_length = (
1197             sum([len(str(prefix_leaf)) for prefix_leaf in string_op_leaves]) + 1
1198             if string_op_leaves
1199             else 0
1200         )
1201
1202         def maybe_append_string_operators(new_line: Line) -> None:
1203             """
1204             Side Effects:
1205                 If @line starts with a string operator and this is the first
1206                 line we are constructing, this function appends the string
1207                 operator to @new_line and replaces the old string operator leaf
1208                 in the node structure. Otherwise this function does nothing.
1209             """
1210             maybe_prefix_leaves = string_op_leaves if first_string_line else []
1211             for i, prefix_leaf in enumerate(maybe_prefix_leaves):
1212                 replace_child(LL[i], prefix_leaf)
1213                 new_line.append(prefix_leaf)
1214
1215         ends_with_comma = (
1216             is_valid_index(string_idx + 1) and LL[string_idx + 1].type == token.COMMA
1217         )
1218
1219         def max_last_string() -> int:
1220             """
1221             Returns:
1222                 The max allowed length of the string value used for the last
1223                 line we will construct.
1224             """
1225             result = self.line_length
1226             result -= line.depth * 4
1227             result -= 1 if ends_with_comma else 0
1228             result -= string_op_leaves_length
1229             return result
1230
1231         # --- Calculate Max Break Index (for string value)
1232         # We start with the line length limit
1233         max_break_idx = self.line_length
1234         # The last index of a string of length N is N-1.
1235         max_break_idx -= 1
1236         # Leading whitespace is not present in the string value (e.g. Leaf.value).
1237         max_break_idx -= line.depth * 4
1238         if max_break_idx < 0:
1239             yield TErr(
1240                 f"Unable to split {LL[string_idx].value} at such high of a line depth:"
1241                 f" {line.depth}"
1242             )
1243             return
1244
1245         # Check if StringMerger registered any custom splits.
1246         custom_splits = self.pop_custom_splits(LL[string_idx].value)
1247         # We use them ONLY if none of them would produce lines that exceed the
1248         # line limit.
1249         use_custom_breakpoints = bool(
1250             custom_splits
1251             and all(csplit.break_idx <= max_break_idx for csplit in custom_splits)
1252         )
1253
1254         # Temporary storage for the remaining chunk of the string line that
1255         # can't fit onto the line currently being constructed.
1256         rest_value = LL[string_idx].value
1257
1258         def more_splits_should_be_made() -> bool:
1259             """
1260             Returns:
1261                 True iff `rest_value` (the remaining string value from the last
1262                 split), should be split again.
1263             """
1264             if use_custom_breakpoints:
1265                 return len(custom_splits) > 1
1266             else:
1267                 return len(rest_value) > max_last_string()
1268
1269         string_line_results: List[Ok[Line]] = []
1270         while more_splits_should_be_made():
1271             if use_custom_breakpoints:
1272                 # Custom User Split (manual)
1273                 csplit = custom_splits.pop(0)
1274                 break_idx = csplit.break_idx
1275             else:
1276                 # Algorithmic Split (automatic)
1277                 max_bidx = max_break_idx - string_op_leaves_length
1278                 maybe_break_idx = self._get_break_idx(rest_value, max_bidx)
1279                 if maybe_break_idx is None:
1280                     # If we are unable to algorithmically determine a good split
1281                     # and this string has custom splits registered to it, we
1282                     # fall back to using them--which means we have to start
1283                     # over from the beginning.
1284                     if custom_splits:
1285                         rest_value = LL[string_idx].value
1286                         string_line_results = []
1287                         first_string_line = True
1288                         use_custom_breakpoints = True
1289                         continue
1290
1291                     # Otherwise, we stop splitting here.
1292                     break
1293
1294                 break_idx = maybe_break_idx
1295
1296             # --- Construct `next_value`
1297             next_value = rest_value[:break_idx] + QUOTE
1298
1299             # HACK: The following 'if' statement is a hack to fix the custom
1300             # breakpoint index in the case of either: (a) substrings that were
1301             # f-strings but will have the 'f' prefix removed OR (b) substrings
1302             # that were not f-strings but will now become f-strings because of
1303             # redundant use of the 'f' prefix (i.e. none of the substrings
1304             # contain f-expressions but one or more of them had the 'f' prefix
1305             # anyway; in which case, we will prepend 'f' to _all_ substrings).
1306             #
1307             # There is probably a better way to accomplish what is being done
1308             # here...
1309             #
1310             # If this substring is an f-string, we _could_ remove the 'f'
1311             # prefix, and the current custom split did NOT originally use a
1312             # prefix...
1313             if (
1314                 next_value != self._normalize_f_string(next_value, prefix)
1315                 and use_custom_breakpoints
1316                 and not csplit.has_prefix
1317             ):
1318                 # Then `csplit.break_idx` will be off by one after removing
1319                 # the 'f' prefix.
1320                 break_idx += 1
1321                 next_value = rest_value[:break_idx] + QUOTE
1322
1323             if drop_pointless_f_prefix:
1324                 next_value = self._normalize_f_string(next_value, prefix)
1325
1326             # --- Construct `next_leaf`
1327             next_leaf = Leaf(token.STRING, next_value)
1328             insert_str_child(next_leaf)
1329             self._maybe_normalize_string_quotes(next_leaf)
1330
1331             # --- Construct `next_line`
1332             next_line = line.clone()
1333             maybe_append_string_operators(next_line)
1334             next_line.append(next_leaf)
1335             string_line_results.append(Ok(next_line))
1336
1337             rest_value = prefix + QUOTE + rest_value[break_idx:]
1338             first_string_line = False
1339
1340         yield from string_line_results
1341
1342         if drop_pointless_f_prefix:
1343             rest_value = self._normalize_f_string(rest_value, prefix)
1344
1345         rest_leaf = Leaf(token.STRING, rest_value)
1346         insert_str_child(rest_leaf)
1347
1348         # NOTE: I could not find a test case that verifies that the following
1349         # line is actually necessary, but it seems to be. Otherwise we risk
1350         # not normalizing the last substring, right?
1351         self._maybe_normalize_string_quotes(rest_leaf)
1352
1353         last_line = line.clone()
1354         maybe_append_string_operators(last_line)
1355
1356         # If there are any leaves to the right of the target string...
1357         if is_valid_index(string_idx + 1):
1358             # We use `temp_value` here to determine how long the last line
1359             # would be if we were to append all the leaves to the right of the
1360             # target string to the last string line.
1361             temp_value = rest_value
1362             for leaf in LL[string_idx + 1 :]:
1363                 temp_value += str(leaf)
1364                 if leaf.type == token.LPAR:
1365                     break
1366
1367             # Try to fit them all on the same line with the last substring...
1368             if (
1369                 len(temp_value) <= max_last_string()
1370                 or LL[string_idx + 1].type == token.COMMA
1371             ):
1372                 last_line.append(rest_leaf)
1373                 append_leaves(last_line, line, LL[string_idx + 1 :])
1374                 yield Ok(last_line)
1375             # Otherwise, place the last substring on one line and everything
1376             # else on a line below that...
1377             else:
1378                 last_line.append(rest_leaf)
1379                 yield Ok(last_line)
1380
1381                 non_string_line = line.clone()
1382                 append_leaves(non_string_line, line, LL[string_idx + 1 :])
1383                 yield Ok(non_string_line)
1384         # Else the target string was the last leaf...
1385         else:
1386             last_line.append(rest_leaf)
1387             last_line.comments = line.comments.copy()
1388             yield Ok(last_line)
1389
1390     def _iter_nameescape_slices(self, string: str) -> Iterator[Tuple[Index, Index]]:
1391         """
1392         Yields:
1393             All ranges of @string which, if @string were to be split there,
1394             would result in the splitting of an \\N{...} expression (which is NOT
1395             allowed).
1396         """
1397         # True - the previous backslash was unescaped
1398         # False - the previous backslash was escaped *or* there was no backslash
1399         previous_was_unescaped_backslash = False
1400         it = iter(enumerate(string))
1401         for idx, c in it:
1402             if c == "\\":
1403                 previous_was_unescaped_backslash = not previous_was_unescaped_backslash
1404                 continue
1405             if not previous_was_unescaped_backslash or c != "N":
1406                 previous_was_unescaped_backslash = False
1407                 continue
1408             previous_was_unescaped_backslash = False
1409
1410             begin = idx - 1  # the position of backslash before \N{...}
1411             for idx, c in it:
1412                 if c == "}":
1413                     end = idx
1414                     break
1415             else:
1416                 # malformed nameescape expression?
1417                 # should have been detected by AST parsing earlier...
1418                 raise RuntimeError(f"{self.__class__.__name__} LOGIC ERROR!")
1419             yield begin, end
1420
1421     def _iter_fexpr_slices(self, string: str) -> Iterator[Tuple[Index, Index]]:
1422         """
1423         Yields:
1424             All ranges of @string which, if @string were to be split there,
1425             would result in the splitting of an f-expression (which is NOT
1426             allowed).
1427         """
1428         if "f" not in get_string_prefix(string).lower():
1429             return
1430         yield from iter_fexpr_spans(string)
1431
1432     def _get_illegal_split_indices(self, string: str) -> Set[Index]:
1433         illegal_indices: Set[Index] = set()
1434         iterators = [
1435             self._iter_fexpr_slices(string),
1436             self._iter_nameescape_slices(string),
1437         ]
1438         for it in iterators:
1439             for begin, end in it:
1440                 illegal_indices.update(range(begin, end + 1))
1441         return illegal_indices
1442
1443     def _get_break_idx(self, string: str, max_break_idx: int) -> Optional[int]:
1444         """
1445         This method contains the algorithm that StringSplitter uses to
1446         determine which character to split each string at.
1447
1448         Args:
1449             @string: The substring that we are attempting to split.
1450             @max_break_idx: The ideal break index. We will return this value if it
1451             meets all the necessary conditions. In the likely event that it
1452             doesn't we will try to find the closest index BELOW @max_break_idx
1453             that does. If that fails, we will expand our search by also
1454             considering all valid indices ABOVE @max_break_idx.
1455
1456         Pre-Conditions:
1457             * assert_is_leaf_string(@string)
1458             * 0 <= @max_break_idx < len(@string)
1459
1460         Returns:
1461             break_idx, if an index is able to be found that meets all of the
1462             conditions listed in the 'Transformations' section of this classes'
1463             docstring.
1464                 OR
1465             None, otherwise.
1466         """
1467         is_valid_index = is_valid_index_factory(string)
1468
1469         assert is_valid_index(max_break_idx)
1470         assert_is_leaf_string(string)
1471
1472         _illegal_split_indices = self._get_illegal_split_indices(string)
1473
1474         def breaks_unsplittable_expression(i: Index) -> bool:
1475             """
1476             Returns:
1477                 True iff returning @i would result in the splitting of an
1478                 unsplittable expression (which is NOT allowed).
1479             """
1480             return i in _illegal_split_indices
1481
1482         def passes_all_checks(i: Index) -> bool:
1483             """
1484             Returns:
1485                 True iff ALL of the conditions listed in the 'Transformations'
1486                 section of this classes' docstring would be be met by returning @i.
1487             """
1488             is_space = string[i] == " "
1489
1490             is_not_escaped = True
1491             j = i - 1
1492             while is_valid_index(j) and string[j] == "\\":
1493                 is_not_escaped = not is_not_escaped
1494                 j -= 1
1495
1496             is_big_enough = (
1497                 len(string[i:]) >= self.MIN_SUBSTR_SIZE
1498                 and len(string[:i]) >= self.MIN_SUBSTR_SIZE
1499             )
1500             return (
1501                 is_space
1502                 and is_not_escaped
1503                 and is_big_enough
1504                 and not breaks_unsplittable_expression(i)
1505             )
1506
1507         # First, we check all indices BELOW @max_break_idx.
1508         break_idx = max_break_idx
1509         while is_valid_index(break_idx - 1) and not passes_all_checks(break_idx):
1510             break_idx -= 1
1511
1512         if not passes_all_checks(break_idx):
1513             # If that fails, we check all indices ABOVE @max_break_idx.
1514             #
1515             # If we are able to find a valid index here, the next line is going
1516             # to be longer than the specified line length, but it's probably
1517             # better than doing nothing at all.
1518             break_idx = max_break_idx + 1
1519             while is_valid_index(break_idx + 1) and not passes_all_checks(break_idx):
1520                 break_idx += 1
1521
1522             if not is_valid_index(break_idx) or not passes_all_checks(break_idx):
1523                 return None
1524
1525         return break_idx
1526
1527     def _maybe_normalize_string_quotes(self, leaf: Leaf) -> None:
1528         if self.normalize_strings:
1529             leaf.value = normalize_string_quotes(leaf.value)
1530
1531     def _normalize_f_string(self, string: str, prefix: str) -> str:
1532         """
1533         Pre-Conditions:
1534             * assert_is_leaf_string(@string)
1535
1536         Returns:
1537             * If @string is an f-string that contains no f-expressions, we
1538             return a string identical to @string except that the 'f' prefix
1539             has been stripped and all double braces (i.e. '{{' or '}}') have
1540             been normalized (i.e. turned into '{' or '}').
1541                 OR
1542             * Otherwise, we return @string.
1543         """
1544         assert_is_leaf_string(string)
1545
1546         if "f" in prefix and not fstring_contains_expr(string):
1547             new_prefix = prefix.replace("f", "")
1548
1549             temp = string[len(prefix) :]
1550             temp = re.sub(r"\{\{", "{", temp)
1551             temp = re.sub(r"\}\}", "}", temp)
1552             new_string = temp
1553
1554             return f"{new_prefix}{new_string}"
1555         else:
1556             return string
1557
1558     def _get_string_operator_leaves(self, leaves: Iterable[Leaf]) -> List[Leaf]:
1559         LL = list(leaves)
1560
1561         string_op_leaves = []
1562         i = 0
1563         while LL[i].type in self.STRING_OPERATORS + [token.NAME]:
1564             prefix_leaf = Leaf(LL[i].type, str(LL[i]).strip())
1565             string_op_leaves.append(prefix_leaf)
1566             i += 1
1567         return string_op_leaves
1568
1569
1570 class StringParenWrapper(BaseStringSplitter, CustomSplitMapMixin):
1571     """
1572     StringTransformer that splits non-"atom" strings (i.e. strings that do not
1573     exist on lines by themselves).
1574
1575     Requirements:
1576         All of the requirements listed in BaseStringSplitter's docstring in
1577         addition to the requirements listed below:
1578
1579         * The line is a return/yield statement, which returns/yields a string.
1580             OR
1581         * The line is part of a ternary expression (e.g. `x = y if cond else
1582         z`) such that the line starts with `else <string>`, where <string> is
1583         some string.
1584             OR
1585         * The line is an assert statement, which ends with a string.
1586             OR
1587         * The line is an assignment statement (e.g. `x = <string>` or `x +=
1588         <string>`) such that the variable is being assigned the value of some
1589         string.
1590             OR
1591         * The line is a dictionary key assignment where some valid key is being
1592         assigned the value of some string.
1593
1594     Transformations:
1595         The chosen string is wrapped in parentheses and then split at the LPAR.
1596
1597         We then have one line which ends with an LPAR and another line that
1598         starts with the chosen string. The latter line is then split again at
1599         the RPAR. This results in the RPAR (and possibly a trailing comma)
1600         being placed on its own line.
1601
1602         NOTE: If any leaves exist to the right of the chosen string (except
1603         for a trailing comma, which would be placed after the RPAR), those
1604         leaves are placed inside the parentheses.  In effect, the chosen
1605         string is not necessarily being "wrapped" by parentheses. We can,
1606         however, count on the LPAR being placed directly before the chosen
1607         string.
1608
1609         In other words, StringParenWrapper creates "atom" strings. These
1610         can then be split again by StringSplitter, if necessary.
1611
1612     Collaborations:
1613         In the event that a string line split by StringParenWrapper is
1614         changed such that it no longer needs to be given its own line,
1615         StringParenWrapper relies on StringParenStripper to clean up the
1616         parentheses it created.
1617     """
1618
1619     def do_splitter_match(self, line: Line) -> TMatchResult:
1620         LL = line.leaves
1621
1622         if line.leaves[-1].type in OPENING_BRACKETS:
1623             return TErr(
1624                 "Cannot wrap parens around a line that ends in an opening bracket."
1625             )
1626
1627         string_idx = (
1628             self._return_match(LL)
1629             or self._else_match(LL)
1630             or self._assert_match(LL)
1631             or self._assign_match(LL)
1632             or self._dict_match(LL)
1633         )
1634
1635         if string_idx is not None:
1636             string_value = line.leaves[string_idx].value
1637             # If the string has no spaces...
1638             if " " not in string_value:
1639                 # And will still violate the line length limit when split...
1640                 max_string_length = self.line_length - ((line.depth + 1) * 4)
1641                 if len(string_value) > max_string_length:
1642                     # And has no associated custom splits...
1643                     if not self.has_custom_splits(string_value):
1644                         # Then we should NOT put this string on its own line.
1645                         return TErr(
1646                             "We do not wrap long strings in parentheses when the"
1647                             " resultant line would still be over the specified line"
1648                             " length and can't be split further by StringSplitter."
1649                         )
1650             return Ok(string_idx)
1651
1652         return TErr("This line does not contain any non-atomic strings.")
1653
1654     @staticmethod
1655     def _return_match(LL: List[Leaf]) -> Optional[int]:
1656         """
1657         Returns:
1658             string_idx such that @LL[string_idx] is equal to our target (i.e.
1659             matched) string, if this line matches the return/yield statement
1660             requirements listed in the 'Requirements' section of this classes'
1661             docstring.
1662                 OR
1663             None, otherwise.
1664         """
1665         # If this line is apart of a return/yield statement and the first leaf
1666         # contains either the "return" or "yield" keywords...
1667         if parent_type(LL[0]) in [syms.return_stmt, syms.yield_expr] and LL[
1668             0
1669         ].value in ["return", "yield"]:
1670             is_valid_index = is_valid_index_factory(LL)
1671
1672             idx = 2 if is_valid_index(1) and is_empty_par(LL[1]) else 1
1673             # The next visible leaf MUST contain a string...
1674             if is_valid_index(idx) and LL[idx].type == token.STRING:
1675                 return idx
1676
1677         return None
1678
1679     @staticmethod
1680     def _else_match(LL: List[Leaf]) -> Optional[int]:
1681         """
1682         Returns:
1683             string_idx such that @LL[string_idx] is equal to our target (i.e.
1684             matched) string, if this line matches the ternary expression
1685             requirements listed in the 'Requirements' section of this classes'
1686             docstring.
1687                 OR
1688             None, otherwise.
1689         """
1690         # If this line is apart of a ternary expression and the first leaf
1691         # contains the "else" keyword...
1692         if (
1693             parent_type(LL[0]) == syms.test
1694             and LL[0].type == token.NAME
1695             and LL[0].value == "else"
1696         ):
1697             is_valid_index = is_valid_index_factory(LL)
1698
1699             idx = 2 if is_valid_index(1) and is_empty_par(LL[1]) else 1
1700             # The next visible leaf MUST contain a string...
1701             if is_valid_index(idx) and LL[idx].type == token.STRING:
1702                 return idx
1703
1704         return None
1705
1706     @staticmethod
1707     def _assert_match(LL: List[Leaf]) -> Optional[int]:
1708         """
1709         Returns:
1710             string_idx such that @LL[string_idx] is equal to our target (i.e.
1711             matched) string, if this line matches the assert statement
1712             requirements listed in the 'Requirements' section of this classes'
1713             docstring.
1714                 OR
1715             None, otherwise.
1716         """
1717         # If this line is apart of an assert statement and the first leaf
1718         # contains the "assert" keyword...
1719         if parent_type(LL[0]) == syms.assert_stmt and LL[0].value == "assert":
1720             is_valid_index = is_valid_index_factory(LL)
1721
1722             for i, leaf in enumerate(LL):
1723                 # We MUST find a comma...
1724                 if leaf.type == token.COMMA:
1725                     idx = i + 2 if is_empty_par(LL[i + 1]) else i + 1
1726
1727                     # That comma MUST be followed by a string...
1728                     if is_valid_index(idx) and LL[idx].type == token.STRING:
1729                         string_idx = idx
1730
1731                         # Skip the string trailer, if one exists.
1732                         string_parser = StringParser()
1733                         idx = string_parser.parse(LL, string_idx)
1734
1735                         # But no more leaves are allowed...
1736                         if not is_valid_index(idx):
1737                             return string_idx
1738
1739         return None
1740
1741     @staticmethod
1742     def _assign_match(LL: List[Leaf]) -> Optional[int]:
1743         """
1744         Returns:
1745             string_idx such that @LL[string_idx] is equal to our target (i.e.
1746             matched) string, if this line matches the assignment statement
1747             requirements listed in the 'Requirements' section of this classes'
1748             docstring.
1749                 OR
1750             None, otherwise.
1751         """
1752         # If this line is apart of an expression statement or is a function
1753         # argument AND the first leaf contains a variable name...
1754         if (
1755             parent_type(LL[0]) in [syms.expr_stmt, syms.argument, syms.power]
1756             and LL[0].type == token.NAME
1757         ):
1758             is_valid_index = is_valid_index_factory(LL)
1759
1760             for i, leaf in enumerate(LL):
1761                 # We MUST find either an '=' or '+=' symbol...
1762                 if leaf.type in [token.EQUAL, token.PLUSEQUAL]:
1763                     idx = i + 2 if is_empty_par(LL[i + 1]) else i + 1
1764
1765                     # That symbol MUST be followed by a string...
1766                     if is_valid_index(idx) and LL[idx].type == token.STRING:
1767                         string_idx = idx
1768
1769                         # Skip the string trailer, if one exists.
1770                         string_parser = StringParser()
1771                         idx = string_parser.parse(LL, string_idx)
1772
1773                         # The next leaf MAY be a comma iff this line is apart
1774                         # of a function argument...
1775                         if (
1776                             parent_type(LL[0]) == syms.argument
1777                             and is_valid_index(idx)
1778                             and LL[idx].type == token.COMMA
1779                         ):
1780                             idx += 1
1781
1782                         # But no more leaves are allowed...
1783                         if not is_valid_index(idx):
1784                             return string_idx
1785
1786         return None
1787
1788     @staticmethod
1789     def _dict_match(LL: List[Leaf]) -> Optional[int]:
1790         """
1791         Returns:
1792             string_idx such that @LL[string_idx] is equal to our target (i.e.
1793             matched) string, if this line matches the dictionary key assignment
1794             statement requirements listed in the 'Requirements' section of this
1795             classes' docstring.
1796                 OR
1797             None, otherwise.
1798         """
1799         # If this line is apart of a dictionary key assignment...
1800         if syms.dictsetmaker in [parent_type(LL[0]), parent_type(LL[0].parent)]:
1801             is_valid_index = is_valid_index_factory(LL)
1802
1803             for i, leaf in enumerate(LL):
1804                 # We MUST find a colon...
1805                 if leaf.type == token.COLON:
1806                     idx = i + 2 if is_empty_par(LL[i + 1]) else i + 1
1807
1808                     # That colon MUST be followed by a string...
1809                     if is_valid_index(idx) and LL[idx].type == token.STRING:
1810                         string_idx = idx
1811
1812                         # Skip the string trailer, if one exists.
1813                         string_parser = StringParser()
1814                         idx = string_parser.parse(LL, string_idx)
1815
1816                         # That string MAY be followed by a comma...
1817                         if is_valid_index(idx) and LL[idx].type == token.COMMA:
1818                             idx += 1
1819
1820                         # But no more leaves are allowed...
1821                         if not is_valid_index(idx):
1822                             return string_idx
1823
1824         return None
1825
1826     def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]:
1827         LL = line.leaves
1828
1829         is_valid_index = is_valid_index_factory(LL)
1830         insert_str_child = insert_str_child_factory(LL[string_idx])
1831
1832         comma_idx = -1
1833         ends_with_comma = False
1834         if LL[comma_idx].type == token.COMMA:
1835             ends_with_comma = True
1836
1837         leaves_to_steal_comments_from = [LL[string_idx]]
1838         if ends_with_comma:
1839             leaves_to_steal_comments_from.append(LL[comma_idx])
1840
1841         # --- First Line
1842         first_line = line.clone()
1843         left_leaves = LL[:string_idx]
1844
1845         # We have to remember to account for (possibly invisible) LPAR and RPAR
1846         # leaves that already wrapped the target string. If these leaves do
1847         # exist, we will replace them with our own LPAR and RPAR leaves.
1848         old_parens_exist = False
1849         if left_leaves and left_leaves[-1].type == token.LPAR:
1850             old_parens_exist = True
1851             leaves_to_steal_comments_from.append(left_leaves[-1])
1852             left_leaves.pop()
1853
1854         append_leaves(first_line, line, left_leaves)
1855
1856         lpar_leaf = Leaf(token.LPAR, "(")
1857         if old_parens_exist:
1858             replace_child(LL[string_idx - 1], lpar_leaf)
1859         else:
1860             insert_str_child(lpar_leaf)
1861         first_line.append(lpar_leaf)
1862
1863         # We throw inline comments that were originally to the right of the
1864         # target string to the top line. They will now be shown to the right of
1865         # the LPAR.
1866         for leaf in leaves_to_steal_comments_from:
1867             for comment_leaf in line.comments_after(leaf):
1868                 first_line.append(comment_leaf, preformatted=True)
1869
1870         yield Ok(first_line)
1871
1872         # --- Middle (String) Line
1873         # We only need to yield one (possibly too long) string line, since the
1874         # `StringSplitter` will break it down further if necessary.
1875         string_value = LL[string_idx].value
1876         string_line = Line(
1877             mode=line.mode,
1878             depth=line.depth + 1,
1879             inside_brackets=True,
1880             should_split_rhs=line.should_split_rhs,
1881             magic_trailing_comma=line.magic_trailing_comma,
1882         )
1883         string_leaf = Leaf(token.STRING, string_value)
1884         insert_str_child(string_leaf)
1885         string_line.append(string_leaf)
1886
1887         old_rpar_leaf = None
1888         if is_valid_index(string_idx + 1):
1889             right_leaves = LL[string_idx + 1 :]
1890             if ends_with_comma:
1891                 right_leaves.pop()
1892
1893             if old_parens_exist:
1894                 assert right_leaves and right_leaves[-1].type == token.RPAR, (
1895                     "Apparently, old parentheses do NOT exist?!"
1896                     f" (left_leaves={left_leaves}, right_leaves={right_leaves})"
1897                 )
1898                 old_rpar_leaf = right_leaves.pop()
1899
1900             append_leaves(string_line, line, right_leaves)
1901
1902         yield Ok(string_line)
1903
1904         # --- Last Line
1905         last_line = line.clone()
1906         last_line.bracket_tracker = first_line.bracket_tracker
1907
1908         new_rpar_leaf = Leaf(token.RPAR, ")")
1909         if old_rpar_leaf is not None:
1910             replace_child(old_rpar_leaf, new_rpar_leaf)
1911         else:
1912             insert_str_child(new_rpar_leaf)
1913         last_line.append(new_rpar_leaf)
1914
1915         # If the target string ended with a comma, we place this comma to the
1916         # right of the RPAR on the last line.
1917         if ends_with_comma:
1918             comma_leaf = Leaf(token.COMMA, ",")
1919             replace_child(LL[comma_idx], comma_leaf)
1920             last_line.append(comma_leaf)
1921
1922         yield Ok(last_line)
1923
1924
1925 class StringParser:
1926     """
1927     A state machine that aids in parsing a string's "trailer", which can be
1928     either non-existent, an old-style formatting sequence (e.g. `% varX` or `%
1929     (varX, varY)`), or a method-call / attribute access (e.g. `.format(varX,
1930     varY)`).
1931
1932     NOTE: A new StringParser object MUST be instantiated for each string
1933     trailer we need to parse.
1934
1935     Examples:
1936         We shall assume that `line` equals the `Line` object that corresponds
1937         to the following line of python code:
1938         ```
1939         x = "Some {}.".format("String") + some_other_string
1940         ```
1941
1942         Furthermore, we will assume that `string_idx` is some index such that:
1943         ```
1944         assert line.leaves[string_idx].value == "Some {}."
1945         ```
1946
1947         The following code snippet then holds:
1948         ```
1949         string_parser = StringParser()
1950         idx = string_parser.parse(line.leaves, string_idx)
1951         assert line.leaves[idx].type == token.PLUS
1952         ```
1953     """
1954
1955     DEFAULT_TOKEN: Final = 20210605
1956
1957     # String Parser States
1958     START: Final = 1
1959     DOT: Final = 2
1960     NAME: Final = 3
1961     PERCENT: Final = 4
1962     SINGLE_FMT_ARG: Final = 5
1963     LPAR: Final = 6
1964     RPAR: Final = 7
1965     DONE: Final = 8
1966
1967     # Lookup Table for Next State
1968     _goto: Final[Dict[Tuple[ParserState, NodeType], ParserState]] = {
1969         # A string trailer may start with '.' OR '%'.
1970         (START, token.DOT): DOT,
1971         (START, token.PERCENT): PERCENT,
1972         (START, DEFAULT_TOKEN): DONE,
1973         # A '.' MUST be followed by an attribute or method name.
1974         (DOT, token.NAME): NAME,
1975         # A method name MUST be followed by an '(', whereas an attribute name
1976         # is the last symbol in the string trailer.
1977         (NAME, token.LPAR): LPAR,
1978         (NAME, DEFAULT_TOKEN): DONE,
1979         # A '%' symbol can be followed by an '(' or a single argument (e.g. a
1980         # string or variable name).
1981         (PERCENT, token.LPAR): LPAR,
1982         (PERCENT, DEFAULT_TOKEN): SINGLE_FMT_ARG,
1983         # If a '%' symbol is followed by a single argument, that argument is
1984         # the last leaf in the string trailer.
1985         (SINGLE_FMT_ARG, DEFAULT_TOKEN): DONE,
1986         # If present, a ')' symbol is the last symbol in a string trailer.
1987         # (NOTE: LPARS and nested RPARS are not included in this lookup table,
1988         # since they are treated as a special case by the parsing logic in this
1989         # classes' implementation.)
1990         (RPAR, DEFAULT_TOKEN): DONE,
1991     }
1992
1993     def __init__(self) -> None:
1994         self._state = self.START
1995         self._unmatched_lpars = 0
1996
1997     def parse(self, leaves: List[Leaf], string_idx: int) -> int:
1998         """
1999         Pre-conditions:
2000             * @leaves[@string_idx].type == token.STRING
2001
2002         Returns:
2003             The index directly after the last leaf which is apart of the string
2004             trailer, if a "trailer" exists.
2005                 OR
2006             @string_idx + 1, if no string "trailer" exists.
2007         """
2008         assert leaves[string_idx].type == token.STRING
2009
2010         idx = string_idx + 1
2011         while idx < len(leaves) and self._next_state(leaves[idx]):
2012             idx += 1
2013         return idx
2014
2015     def _next_state(self, leaf: Leaf) -> bool:
2016         """
2017         Pre-conditions:
2018             * On the first call to this function, @leaf MUST be the leaf that
2019             was directly after the string leaf in question (e.g. if our target
2020             string is `line.leaves[i]` then the first call to this method must
2021             be `line.leaves[i + 1]`).
2022             * On the next call to this function, the leaf parameter passed in
2023             MUST be the leaf directly following @leaf.
2024
2025         Returns:
2026             True iff @leaf is apart of the string's trailer.
2027         """
2028         # We ignore empty LPAR or RPAR leaves.
2029         if is_empty_par(leaf):
2030             return True
2031
2032         next_token = leaf.type
2033         if next_token == token.LPAR:
2034             self._unmatched_lpars += 1
2035
2036         current_state = self._state
2037
2038         # The LPAR parser state is a special case. We will return True until we
2039         # find the matching RPAR token.
2040         if current_state == self.LPAR:
2041             if next_token == token.RPAR:
2042                 self._unmatched_lpars -= 1
2043                 if self._unmatched_lpars == 0:
2044                     self._state = self.RPAR
2045         # Otherwise, we use a lookup table to determine the next state.
2046         else:
2047             # If the lookup table matches the current state to the next
2048             # token, we use the lookup table.
2049             if (current_state, next_token) in self._goto:
2050                 self._state = self._goto[current_state, next_token]
2051             else:
2052                 # Otherwise, we check if a the current state was assigned a
2053                 # default.
2054                 if (current_state, self.DEFAULT_TOKEN) in self._goto:
2055                     self._state = self._goto[current_state, self.DEFAULT_TOKEN]
2056                 # If no default has been assigned, then this parser has a logic
2057                 # error.
2058                 else:
2059                     raise RuntimeError(f"{self.__class__.__name__} LOGIC ERROR!")
2060
2061             if self._state == self.DONE:
2062                 return False
2063
2064         return True
2065
2066
2067 def insert_str_child_factory(string_leaf: Leaf) -> Callable[[LN], None]:
2068     """
2069     Factory for a convenience function that is used to orphan @string_leaf
2070     and then insert multiple new leaves into the same part of the node
2071     structure that @string_leaf had originally occupied.
2072
2073     Examples:
2074         Let `string_leaf = Leaf(token.STRING, '"foo"')` and `N =
2075         string_leaf.parent`. Assume the node `N` has the following
2076         original structure:
2077
2078         Node(
2079             expr_stmt, [
2080                 Leaf(NAME, 'x'),
2081                 Leaf(EQUAL, '='),
2082                 Leaf(STRING, '"foo"'),
2083             ]
2084         )
2085
2086         We then run the code snippet shown below.
2087         ```
2088         insert_str_child = insert_str_child_factory(string_leaf)
2089
2090         lpar = Leaf(token.LPAR, '(')
2091         insert_str_child(lpar)
2092
2093         bar = Leaf(token.STRING, '"bar"')
2094         insert_str_child(bar)
2095
2096         rpar = Leaf(token.RPAR, ')')
2097         insert_str_child(rpar)
2098         ```
2099
2100         After which point, it follows that `string_leaf.parent is None` and
2101         the node `N` now has the following structure:
2102
2103         Node(
2104             expr_stmt, [
2105                 Leaf(NAME, 'x'),
2106                 Leaf(EQUAL, '='),
2107                 Leaf(LPAR, '('),
2108                 Leaf(STRING, '"bar"'),
2109                 Leaf(RPAR, ')'),
2110             ]
2111         )
2112     """
2113     string_parent = string_leaf.parent
2114     string_child_idx = string_leaf.remove()
2115
2116     def insert_str_child(child: LN) -> None:
2117         nonlocal string_child_idx
2118
2119         assert string_parent is not None
2120         assert string_child_idx is not None
2121
2122         string_parent.insert_child(string_child_idx, child)
2123         string_child_idx += 1
2124
2125     return insert_str_child
2126
2127
2128 def is_valid_index_factory(seq: Sequence[Any]) -> Callable[[int], bool]:
2129     """
2130     Examples:
2131         ```
2132         my_list = [1, 2, 3]
2133
2134         is_valid_index = is_valid_index_factory(my_list)
2135
2136         assert is_valid_index(0)
2137         assert is_valid_index(2)
2138
2139         assert not is_valid_index(3)
2140         assert not is_valid_index(-1)
2141         ```
2142     """
2143
2144     def is_valid_index(idx: int) -> bool:
2145         """
2146         Returns:
2147             True iff @idx is positive AND seq[@idx] does NOT raise an
2148             IndexError.
2149         """
2150         return 0 <= idx < len(seq)
2151
2152     return is_valid_index