src/black/trans.py

   1 """
   2 String transformers that can split and merge strings.
   3 """
   4 import re
   5 from abc import ABC, abstractmethod
   6 from collections import defaultdict
   7 from dataclasses import dataclass
   8 from typing import (
   9     Any,
  10     Callable,
  11     ClassVar,
  12     Collection,
  13     Dict,
  14     Final,
  15     Iterable,
  16     Iterator,
  17     List,
  18     Literal,
  19     Optional,
  20     Sequence,
  21     Set,
  22     Tuple,
  23     TypeVar,
  24     Union,
  25 )
  26
  27 from mypy_extensions import trait
  28
  29 from black.comments import contains_pragma_comment
  30 from black.lines import Line, append_leaves
  31 from black.mode import Feature, Mode
  32 from black.nodes import (
  33     CLOSING_BRACKETS,
  34     OPENING_BRACKETS,
  35     STANDALONE_COMMENT,
  36     is_empty_lpar,
  37     is_empty_par,
  38     is_empty_rpar,
  39     is_part_of_annotation,
  40     parent_type,
  41     replace_child,
  42     syms,
  43 )
  44 from black.rusty import Err, Ok, Result
  45 from black.strings import (
  46     assert_is_leaf_string,
  47     count_chars_in_width,
  48     get_string_prefix,
  49     has_triple_quotes,
  50     normalize_string_quotes,
  51     str_width,
  52 )
  53 from blib2to3.pgen2 import token
  54 from blib2to3.pytree import Leaf, Node
  55
  56
  57 class CannotTransform(Exception):
  58     """Base class for errors raised by Transformers."""
  59
  60
  61 # types
  62 T = TypeVar("T")
  63 LN = Union[Leaf, Node]
  64 Transformer = Callable[[Line, Collection[Feature], Mode], Iterator[Line]]
  65 Index = int
  66 NodeType = int
  67 ParserState = int
  68 StringID = int
  69 TResult = Result[T, CannotTransform]  # (T)ransform Result
  70 TMatchResult = TResult[List[Index]]
  71
  72 SPLIT_SAFE_CHARS = frozenset(["\u3001", "\u3002", "\uff0c"])  # East Asian stops
  73
  74
  75 def TErr(err_msg: str) -> Err[CannotTransform]:
  76     """(T)ransform Err
  77
  78     Convenience function used when working with the TResult type.
  79     """
  80     cant_transform = CannotTransform(err_msg)
  81     return Err(cant_transform)
  82
  83
  84 def hug_power_op(
  85     line: Line, features: Collection[Feature], mode: Mode
  86 ) -> Iterator[Line]:
  87     """A transformer which normalizes spacing around power operators."""
  88
  89     # Performance optimization to avoid unnecessary Leaf clones and other ops.
  90     for leaf in line.leaves:
  91         if leaf.type == token.DOUBLESTAR:
  92             break
  93     else:
  94         raise CannotTransform("No doublestar token was found in the line.")
  95
  96     def is_simple_lookup(index: int, step: Literal[1, -1]) -> bool:
  97         # Brackets and parentheses indicate calls, subscripts, etc. ...
  98         # basically stuff that doesn't count as "simple". Only a NAME lookup
  99         # or dotted lookup (eg. NAME.NAME) is OK.
 100         if step == -1:
 101             disallowed = {token.RPAR, token.RSQB}
 102         else:
 103             disallowed = {token.LPAR, token.LSQB}
 104
 105         while 0 <= index < len(line.leaves):
 106             current = line.leaves[index]
 107             if current.type in disallowed:
 108                 return False
 109             if current.type not in {token.NAME, token.DOT} or current.value == "for":
 110                 # If the current token isn't disallowed, we'll assume this is simple as
 111                 # only the disallowed tokens are semantically attached to this lookup
 112                 # expression we're checking. Also, stop early if we hit the 'for' bit
 113                 # of a comprehension.
 114                 return True
 115
 116             index += step
 117
 118         return True
 119
 120     def is_simple_operand(index: int, kind: Literal["base", "exponent"]) -> bool:
 121         # An operand is considered "simple" if's a NAME, a numeric CONSTANT, a simple
 122         # lookup (see above), with or without a preceding unary operator.
 123         start = line.leaves[index]
 124         if start.type in {token.NAME, token.NUMBER}:
 125             return is_simple_lookup(index, step=(1 if kind == "exponent" else -1))
 126
 127         if start.type in {token.PLUS, token.MINUS, token.TILDE}:
 128             if line.leaves[index + 1].type in {token.NAME, token.NUMBER}:
 129                 # step is always one as bases with a preceding unary op will be checked
 130                 # for simplicity starting from the next token (so it'll hit the check
 131                 # above).
 132                 return is_simple_lookup(index + 1, step=1)
 133
 134         return False
 135
 136     new_line = line.clone()
 137     should_hug = False
 138     for idx, leaf in enumerate(line.leaves):
 139         new_leaf = leaf.clone()
 140         if should_hug:
 141             new_leaf.prefix = ""
 142             should_hug = False
 143
 144         should_hug = (
 145             (0 < idx < len(line.leaves) - 1)
 146             and leaf.type == token.DOUBLESTAR
 147             and is_simple_operand(idx - 1, kind="base")
 148             and line.leaves[idx - 1].value != "lambda"
 149             and is_simple_operand(idx + 1, kind="exponent")
 150         )
 151         if should_hug:
 152             new_leaf.prefix = ""
 153
 154         # We have to be careful to make a new line properly:
 155         # - bracket related metadata must be maintained (handled by Line.append)
 156         # - comments need to copied over, updating the leaf IDs they're attached to
 157         new_line.append(new_leaf, preformatted=True)
 158         for comment_leaf in line.comments_after(leaf):
 159             new_line.append(comment_leaf, preformatted=True)
 160
 161     yield new_line
 162
 163
 164 class StringTransformer(ABC):
 165     """
 166     An implementation of the Transformer protocol that relies on its
 167     subclasses overriding the template methods `do_match(...)` and
 168     `do_transform(...)`.
 169
 170     This Transformer works exclusively on strings (for example, by merging
 171     or splitting them).
 172
 173     The following sections can be found among the docstrings of each concrete
 174     StringTransformer subclass.
 175
 176     Requirements:
 177         Which requirements must be met of the given Line for this
 178         StringTransformer to be applied?
 179
 180     Transformations:
 181         If the given Line meets all of the above requirements, which string
 182         transformations can you expect to be applied to it by this
 183         StringTransformer?
 184
 185     Collaborations:
 186         What contractual agreements does this StringTransformer have with other
 187         StringTransfomers? Such collaborations should be eliminated/minimized
 188         as much as possible.
 189     """
 190
 191     __name__: Final = "StringTransformer"
 192
 193     # Ideally this would be a dataclass, but unfortunately mypyc breaks when used with
 194     # `abc.ABC`.
 195     def __init__(self, line_length: int, normalize_strings: bool) -> None:
 196         self.line_length = line_length
 197         self.normalize_strings = normalize_strings
 198
 199     @abstractmethod
 200     def do_match(self, line: Line) -> TMatchResult:
 201         """
 202         Returns:
 203             * Ok(string_indices) such that for each index, `line.leaves[index]`
 204               is our target string if a match was able to be made. For
 205               transformers that don't result in more lines (e.g. StringMerger,
 206               StringParenStripper), multiple matches and transforms are done at
 207               once to reduce the complexity.
 208               OR
 209             * Err(CannotTransform), if no match could be made.
 210         """
 211
 212     @abstractmethod
 213     def do_transform(
 214         self, line: Line, string_indices: List[int]
 215     ) -> Iterator[TResult[Line]]:
 216         """
 217         Yields:
 218             * Ok(new_line) where new_line is the new transformed line.
 219               OR
 220             * Err(CannotTransform) if the transformation failed for some reason. The
 221               `do_match(...)` template method should usually be used to reject
 222               the form of the given Line, but in some cases it is difficult to
 223               know whether or not a Line meets the StringTransformer's
 224               requirements until the transformation is already midway.
 225
 226         Side Effects:
 227             This method should NOT mutate @line directly, but it MAY mutate the
 228             Line's underlying Node structure. (WARNING: If the underlying Node
 229             structure IS altered, then this method should NOT be allowed to
 230             yield an CannotTransform after that point.)
 231         """
 232
 233     def __call__(
 234         self, line: Line, _features: Collection[Feature], _mode: Mode
 235     ) -> Iterator[Line]:
 236         """
 237         StringTransformer instances have a call signature that mirrors that of
 238         the Transformer type.
 239
 240         Raises:
 241             CannotTransform(...) if the concrete StringTransformer class is unable
 242             to transform @line.
 243         """
 244         # Optimization to avoid calling `self.do_match(...)` when the line does
 245         # not contain any string.
 246         if not any(leaf.type == token.STRING for leaf in line.leaves):
 247             raise CannotTransform("There are no strings in this line.")
 248
 249         match_result = self.do_match(line)
 250
 251         if isinstance(match_result, Err):
 252             cant_transform = match_result.err()
 253             raise CannotTransform(
 254                 f"The string transformer {self.__class__.__name__} does not recognize"
 255                 " this line as one that it can transform."
 256             ) from cant_transform
 257
 258         string_indices = match_result.ok()
 259
 260         for line_result in self.do_transform(line, string_indices):
 261             if isinstance(line_result, Err):
 262                 cant_transform = line_result.err()
 263                 raise CannotTransform(
 264                     "StringTransformer failed while attempting to transform string."
 265                 ) from cant_transform
 266             line = line_result.ok()
 267             yield line
 268
 269
 270 @dataclass
 271 class CustomSplit:
 272     """A custom (i.e. manual) string split.
 273
 274     A single CustomSplit instance represents a single substring.
 275
 276     Examples:
 277         Consider the following string:
 278         ```
 279         "Hi there friend."
 280         " This is a custom"
 281         f" string {split}."
 282         ```
 283
 284         This string will correspond to the following three CustomSplit instances:
 285         ```
 286         CustomSplit(False, 16)
 287         CustomSplit(False, 17)
 288         CustomSplit(True, 16)
 289         ```
 290     """
 291
 292     has_prefix: bool
 293     break_idx: int
 294
 295
 296 @trait
 297 class CustomSplitMapMixin:
 298     """
 299     This mixin class is used to map merged strings to a sequence of
 300     CustomSplits, which will then be used to re-split the strings iff none of
 301     the resultant substrings go over the configured max line length.
 302     """
 303
 304     _Key: ClassVar = Tuple[StringID, str]
 305     _CUSTOM_SPLIT_MAP: ClassVar[Dict[_Key, Tuple[CustomSplit, ...]]] = defaultdict(
 306         tuple
 307     )
 308
 309     @staticmethod
 310     def _get_key(string: str) -> "CustomSplitMapMixin._Key":
 311         """
 312         Returns:
 313             A unique identifier that is used internally to map @string to a
 314             group of custom splits.
 315         """
 316         return (id(string), string)
 317
 318     def add_custom_splits(
 319         self, string: str, custom_splits: Iterable[CustomSplit]
 320     ) -> None:
 321         """Custom Split Map Setter Method
 322
 323         Side Effects:
 324             Adds a mapping from @string to the custom splits @custom_splits.
 325         """
 326         key = self._get_key(string)
 327         self._CUSTOM_SPLIT_MAP[key] = tuple(custom_splits)
 328
 329     def pop_custom_splits(self, string: str) -> List[CustomSplit]:
 330         """Custom Split Map Getter Method
 331
 332         Returns:
 333             * A list of the custom splits that are mapped to @string, if any
 334               exist.
 335               OR
 336             * [], otherwise.
 337
 338         Side Effects:
 339             Deletes the mapping between @string and its associated custom
 340             splits (which are returned to the caller).
 341         """
 342         key = self._get_key(string)
 343
 344         custom_splits = self._CUSTOM_SPLIT_MAP[key]
 345         del self._CUSTOM_SPLIT_MAP[key]
 346
 347         return list(custom_splits)
 348
 349     def has_custom_splits(self, string: str) -> bool:
 350         """
 351         Returns:
 352             True iff @string is associated with a set of custom splits.
 353         """
 354         key = self._get_key(string)
 355         return key in self._CUSTOM_SPLIT_MAP
 356
 357
 358 class StringMerger(StringTransformer, CustomSplitMapMixin):
 359     """StringTransformer that merges strings together.
 360
 361     Requirements:
 362         (A) The line contains adjacent strings such that ALL of the validation checks
 363         listed in StringMerger._validate_msg(...)'s docstring pass.
 364         OR
 365         (B) The line contains a string which uses line continuation backslashes.
 366
 367     Transformations:
 368         Depending on which of the two requirements above where met, either:
 369
 370         (A) The string group associated with the target string is merged.
 371         OR
 372         (B) All line-continuation backslashes are removed from the target string.
 373
 374     Collaborations:
 375         StringMerger provides custom split information to StringSplitter.
 376     """
 377
 378     def do_match(self, line: Line) -> TMatchResult:
 379         LL = line.leaves
 380
 381         is_valid_index = is_valid_index_factory(LL)
 382
 383         string_indices = []
 384         idx = 0
 385         while is_valid_index(idx):
 386             leaf = LL[idx]
 387             if (
 388                 leaf.type == token.STRING
 389                 and is_valid_index(idx + 1)
 390                 and LL[idx + 1].type == token.STRING
 391             ):
 392                 if not is_part_of_annotation(leaf):
 393                     string_indices.append(idx)
 394
 395                 # Advance to the next non-STRING leaf.
 396                 idx += 2
 397                 while is_valid_index(idx) and LL[idx].type == token.STRING:
 398                     idx += 1
 399
 400             elif leaf.type == token.STRING and "\\\n" in leaf.value:
 401                 string_indices.append(idx)
 402                 # Advance to the next non-STRING leaf.
 403                 idx += 1
 404                 while is_valid_index(idx) and LL[idx].type == token.STRING:
 405                     idx += 1
 406
 407             else:
 408                 idx += 1
 409
 410         if string_indices:
 411             return Ok(string_indices)
 412         else:
 413             return TErr("This line has no strings that need merging.")
 414
 415     def do_transform(
 416         self, line: Line, string_indices: List[int]
 417     ) -> Iterator[TResult[Line]]:
 418         new_line = line
 419
 420         rblc_result = self._remove_backslash_line_continuation_chars(
 421             new_line, string_indices
 422         )
 423         if isinstance(rblc_result, Ok):
 424             new_line = rblc_result.ok()
 425
 426         msg_result = self._merge_string_group(new_line, string_indices)
 427         if isinstance(msg_result, Ok):
 428             new_line = msg_result.ok()
 429
 430         if isinstance(rblc_result, Err) and isinstance(msg_result, Err):
 431             msg_cant_transform = msg_result.err()
 432             rblc_cant_transform = rblc_result.err()
 433             cant_transform = CannotTransform(
 434                 "StringMerger failed to merge any strings in this line."
 435             )
 436
 437             # Chain the errors together using `__cause__`.
 438             msg_cant_transform.__cause__ = rblc_cant_transform
 439             cant_transform.__cause__ = msg_cant_transform
 440
 441             yield Err(cant_transform)
 442         else:
 443             yield Ok(new_line)
 444
 445     @staticmethod
 446     def _remove_backslash_line_continuation_chars(
 447         line: Line, string_indices: List[int]
 448     ) -> TResult[Line]:
 449         """
 450         Merge strings that were split across multiple lines using
 451         line-continuation backslashes.
 452
 453         Returns:
 454             Ok(new_line), if @line contains backslash line-continuation
 455             characters.
 456                 OR
 457             Err(CannotTransform), otherwise.
 458         """
 459         LL = line.leaves
 460
 461         indices_to_transform = []
 462         for string_idx in string_indices:
 463             string_leaf = LL[string_idx]
 464             if (
 465                 string_leaf.type == token.STRING
 466                 and "\\\n" in string_leaf.value
 467                 and not has_triple_quotes(string_leaf.value)
 468             ):
 469                 indices_to_transform.append(string_idx)
 470
 471         if not indices_to_transform:
 472             return TErr(
 473                 "Found no string leaves that contain backslash line continuation"
 474                 " characters."
 475             )
 476
 477         new_line = line.clone()
 478         new_line.comments = line.comments.copy()
 479         append_leaves(new_line, line, LL)
 480
 481         for string_idx in indices_to_transform:
 482             new_string_leaf = new_line.leaves[string_idx]
 483             new_string_leaf.value = new_string_leaf.value.replace("\\\n", "")
 484
 485         return Ok(new_line)
 486
 487     def _merge_string_group(
 488         self, line: Line, string_indices: List[int]
 489     ) -> TResult[Line]:
 490         """
 491         Merges string groups (i.e. set of adjacent strings).
 492
 493         Each index from `string_indices` designates one string group's first
 494         leaf in `line.leaves`.
 495
 496         Returns:
 497             Ok(new_line), if ALL of the validation checks found in
 498             _validate_msg(...) pass.
 499                 OR
 500             Err(CannotTransform), otherwise.
 501         """
 502         LL = line.leaves
 503
 504         is_valid_index = is_valid_index_factory(LL)
 505
 506         # A dict of {string_idx: tuple[num_of_strings, string_leaf]}.
 507         merged_string_idx_dict: Dict[int, Tuple[int, Leaf]] = {}
 508         for string_idx in string_indices:
 509             vresult = self._validate_msg(line, string_idx)
 510             if isinstance(vresult, Err):
 511                 continue
 512             merged_string_idx_dict[string_idx] = self._merge_one_string_group(
 513                 LL, string_idx, is_valid_index
 514             )
 515
 516         if not merged_string_idx_dict:
 517             return TErr("No string group is merged")
 518
 519         # Build the final line ('new_line') that this method will later return.
 520         new_line = line.clone()
 521         previous_merged_string_idx = -1
 522         previous_merged_num_of_strings = -1
 523         for i, leaf in enumerate(LL):
 524             if i in merged_string_idx_dict:
 525                 previous_merged_string_idx = i
 526                 previous_merged_num_of_strings, string_leaf = merged_string_idx_dict[i]
 527                 new_line.append(string_leaf)
 528
 529             if (
 530                 previous_merged_string_idx
 531                 <= i
 532                 < previous_merged_string_idx + previous_merged_num_of_strings
 533             ):
 534                 for comment_leaf in line.comments_after(LL[i]):
 535                     new_line.append(comment_leaf, preformatted=True)
 536                 continue
 537
 538             append_leaves(new_line, line, [leaf])
 539
 540         return Ok(new_line)
 541
 542     def _merge_one_string_group(
 543         self, LL: List[Leaf], string_idx: int, is_valid_index: Callable[[int], bool]
 544     ) -> Tuple[int, Leaf]:
 545         """
 546         Merges one string group where the first string in the group is
 547         `LL[string_idx]`.
 548
 549         Returns:
 550             A tuple of `(num_of_strings, leaf)` where `num_of_strings` is the
 551             number of strings merged and `leaf` is the newly merged string
 552             to be replaced in the new line.
 553         """
 554         # If the string group is wrapped inside an Atom node, we must make sure
 555         # to later replace that Atom with our new (merged) string leaf.
 556         atom_node = LL[string_idx].parent
 557
 558         # We will place BREAK_MARK in between every two substrings that we
 559         # merge. We will then later go through our final result and use the
 560         # various instances of BREAK_MARK we find to add the right values to
 561         # the custom split map.
 562         BREAK_MARK = "@@@@@ BLACK BREAKPOINT MARKER @@@@@"
 563
 564         QUOTE = LL[string_idx].value[-1]
 565
 566         def make_naked(string: str, string_prefix: str) -> str:
 567             """Strip @string (i.e. make it a "naked" string)
 568
 569             Pre-conditions:
 570                 * assert_is_leaf_string(@string)
 571
 572             Returns:
 573                 A string that is identical to @string except that
 574                 @string_prefix has been stripped, the surrounding QUOTE
 575                 characters have been removed, and any remaining QUOTE
 576                 characters have been escaped.
 577             """
 578             assert_is_leaf_string(string)
 579             if "f" in string_prefix:
 580                 string = _toggle_fexpr_quotes(string, QUOTE)
 581                 # After quotes toggling, quotes in expressions won't be escaped
 582                 # because quotes can't be reused in f-strings. So we can simply
 583                 # let the escaping logic below run without knowing f-string
 584                 # expressions.
 585
 586             RE_EVEN_BACKSLASHES = r"(?:(?<!\\)(?:\\\\)*)"
 587             naked_string = string[len(string_prefix) + 1 : -1]
 588             naked_string = re.sub(
 589                 "(" + RE_EVEN_BACKSLASHES + ")" + QUOTE, r"\1\\" + QUOTE, naked_string
 590             )
 591             return naked_string
 592
 593         # Holds the CustomSplit objects that will later be added to the custom
 594         # split map.
 595         custom_splits = []
 596
 597         # Temporary storage for the 'has_prefix' part of the CustomSplit objects.
 598         prefix_tracker = []
 599
 600         # Sets the 'prefix' variable. This is the prefix that the final merged
 601         # string will have.
 602         next_str_idx = string_idx
 603         prefix = ""
 604         while (
 605             not prefix
 606             and is_valid_index(next_str_idx)
 607             and LL[next_str_idx].type == token.STRING
 608         ):
 609             prefix = get_string_prefix(LL[next_str_idx].value).lower()
 610             next_str_idx += 1
 611
 612         # The next loop merges the string group. The final string will be
 613         # contained in 'S'.
 614         #
 615         # The following convenience variables are used:
 616         #
 617         #   S: string
 618         #   NS: naked string
 619         #   SS: next string
 620         #   NSS: naked next string
 621         S = ""
 622         NS = ""
 623         num_of_strings = 0
 624         next_str_idx = string_idx
 625         while is_valid_index(next_str_idx) and LL[next_str_idx].type == token.STRING:
 626             num_of_strings += 1
 627
 628             SS = LL[next_str_idx].value
 629             next_prefix = get_string_prefix(SS).lower()
 630
 631             # If this is an f-string group but this substring is not prefixed
 632             # with 'f'...
 633             if "f" in prefix and "f" not in next_prefix:
 634                 # Then we must escape any braces contained in this substring.
 635                 SS = re.sub(r"(\{|\})", r"\1\1", SS)
 636
 637             NSS = make_naked(SS, next_prefix)
 638
 639             has_prefix = bool(next_prefix)
 640             prefix_tracker.append(has_prefix)
 641
 642             S = prefix + QUOTE + NS + NSS + BREAK_MARK + QUOTE
 643             NS = make_naked(S, prefix)
 644
 645             next_str_idx += 1
 646
 647         # Take a note on the index of the non-STRING leaf.
 648         non_string_idx = next_str_idx
 649
 650         S_leaf = Leaf(token.STRING, S)
 651         if self.normalize_strings:
 652             S_leaf.value = normalize_string_quotes(S_leaf.value)
 653
 654         # Fill the 'custom_splits' list with the appropriate CustomSplit objects.
 655         temp_string = S_leaf.value[len(prefix) + 1 : -1]
 656         for has_prefix in prefix_tracker:
 657             mark_idx = temp_string.find(BREAK_MARK)
 658             assert (
 659                 mark_idx >= 0
 660             ), "Logic error while filling the custom string breakpoint cache."
 661
 662             temp_string = temp_string[mark_idx + len(BREAK_MARK) :]
 663             breakpoint_idx = mark_idx + (len(prefix) if has_prefix else 0) + 1
 664             custom_splits.append(CustomSplit(has_prefix, breakpoint_idx))
 665
 666         string_leaf = Leaf(token.STRING, S_leaf.value.replace(BREAK_MARK, ""))
 667
 668         if atom_node is not None:
 669             # If not all children of the atom node are merged (this can happen
 670             # when there is a standalone comment in the middle) ...
 671             if non_string_idx - string_idx < len(atom_node.children):
 672                 # We need to replace the old STRING leaves with the new string leaf.
 673                 first_child_idx = LL[string_idx].remove()
 674                 for idx in range(string_idx + 1, non_string_idx):
 675                     LL[idx].remove()
 676                 if first_child_idx is not None:
 677                     atom_node.insert_child(first_child_idx, string_leaf)
 678             else:
 679                 # Else replace the atom node with the new string leaf.
 680                 replace_child(atom_node, string_leaf)
 681
 682         self.add_custom_splits(string_leaf.value, custom_splits)
 683         return num_of_strings, string_leaf
 684
 685     @staticmethod
 686     def _validate_msg(line: Line, string_idx: int) -> TResult[None]:
 687         """Validate (M)erge (S)tring (G)roup
 688
 689         Transform-time string validation logic for _merge_string_group(...).
 690
 691         Returns:
 692             * Ok(None), if ALL validation checks (listed below) pass.
 693                 OR
 694             * Err(CannotTransform), if any of the following are true:
 695                 - The target string group does not contain ANY stand-alone comments.
 696                 - The target string is not in a string group (i.e. it has no
 697                   adjacent strings).
 698                 - The string group has more than one inline comment.
 699                 - The string group has an inline comment that appears to be a pragma.
 700                 - The set of all string prefixes in the string group is of
 701                   length greater than one and is not equal to {"", "f"}.
 702                 - The string group consists of raw strings.
 703                 - The string group is stringified type annotations. We don't want to
 704                   process stringified type annotations since pyright doesn't support
 705                   them spanning multiple string values. (NOTE: mypy, pytype, pyre do
 706                   support them, so we can change if pyright also gains support in the
 707                   future. See https://github.com/microsoft/pyright/issues/4359.)
 708         """
 709         # We first check for "inner" stand-alone comments (i.e. stand-alone
 710         # comments that have a string leaf before them AND after them).
 711         for inc in [1, -1]:
 712             i = string_idx
 713             found_sa_comment = False
 714             is_valid_index = is_valid_index_factory(line.leaves)
 715             while is_valid_index(i) and line.leaves[i].type in [
 716                 token.STRING,
 717                 STANDALONE_COMMENT,
 718             ]:
 719                 if line.leaves[i].type == STANDALONE_COMMENT:
 720                     found_sa_comment = True
 721                 elif found_sa_comment:
 722                     return TErr(
 723                         "StringMerger does NOT merge string groups which contain "
 724                         "stand-alone comments."
 725                     )
 726
 727                 i += inc
 728
 729         num_of_inline_string_comments = 0
 730         set_of_prefixes = set()
 731         num_of_strings = 0
 732         for leaf in line.leaves[string_idx:]:
 733             if leaf.type != token.STRING:
 734                 # If the string group is trailed by a comma, we count the
 735                 # comments trailing the comma to be one of the string group's
 736                 # comments.
 737                 if leaf.type == token.COMMA and id(leaf) in line.comments:
 738                     num_of_inline_string_comments += 1
 739                 break
 740
 741             if has_triple_quotes(leaf.value):
 742                 return TErr("StringMerger does NOT merge multiline strings.")
 743
 744             num_of_strings += 1
 745             prefix = get_string_prefix(leaf.value).lower()
 746             if "r" in prefix:
 747                 return TErr("StringMerger does NOT merge raw strings.")
 748
 749             set_of_prefixes.add(prefix)
 750
 751             if id(leaf) in line.comments:
 752                 num_of_inline_string_comments += 1
 753                 if contains_pragma_comment(line.comments[id(leaf)]):
 754                     return TErr("Cannot merge strings which have pragma comments.")
 755
 756         if num_of_strings < 2:
 757             return TErr(
 758                 f"Not enough strings to merge (num_of_strings={num_of_strings})."
 759             )
 760
 761         if num_of_inline_string_comments > 1:
 762             return TErr(
 763                 f"Too many inline string comments ({num_of_inline_string_comments})."
 764             )
 765
 766         if len(set_of_prefixes) > 1 and set_of_prefixes != {"", "f"}:
 767             return TErr(f"Too many different prefixes ({set_of_prefixes}).")
 768
 769         return Ok(None)
 770
 771
 772 class StringParenStripper(StringTransformer):
 773     """StringTransformer that strips surrounding parentheses from strings.
 774
 775     Requirements:
 776         The line contains a string which is surrounded by parentheses and:
 777             - The target string is NOT the only argument to a function call.
 778             - The target string is NOT a "pointless" string.
 779             - If the target string contains a PERCENT, the brackets are not
 780               preceded or followed by an operator with higher precedence than
 781               PERCENT.
 782
 783     Transformations:
 784         The parentheses mentioned in the 'Requirements' section are stripped.
 785
 786     Collaborations:
 787         StringParenStripper has its own inherent usefulness, but it is also
 788         relied on to clean up the parentheses created by StringParenWrapper (in
 789         the event that they are no longer needed).
 790     """
 791
 792     def do_match(self, line: Line) -> TMatchResult:
 793         LL = line.leaves
 794
 795         is_valid_index = is_valid_index_factory(LL)
 796
 797         string_indices = []
 798
 799         idx = -1
 800         while True:
 801             idx += 1
 802             if idx >= len(LL):
 803                 break
 804             leaf = LL[idx]
 805
 806             # Should be a string...
 807             if leaf.type != token.STRING:
 808                 continue
 809
 810             # If this is a "pointless" string...
 811             if (
 812                 leaf.parent
 813                 and leaf.parent.parent
 814                 and leaf.parent.parent.type == syms.simple_stmt
 815             ):
 816                 continue
 817
 818             # Should be preceded by a non-empty LPAR...
 819             if (
 820                 not is_valid_index(idx - 1)
 821                 or LL[idx - 1].type != token.LPAR
 822                 or is_empty_lpar(LL[idx - 1])
 823             ):
 824                 continue
 825
 826             # That LPAR should NOT be preceded by a function name or a closing
 827             # bracket (which could be a function which returns a function or a
 828             # list/dictionary that contains a function)...
 829             if is_valid_index(idx - 2) and (
 830                 LL[idx - 2].type == token.NAME or LL[idx - 2].type in CLOSING_BRACKETS
 831             ):
 832                 continue
 833
 834             string_idx = idx
 835
 836             # Skip the string trailer, if one exists.
 837             string_parser = StringParser()
 838             next_idx = string_parser.parse(LL, string_idx)
 839
 840             # if the leaves in the parsed string include a PERCENT, we need to
 841             # make sure the initial LPAR is NOT preceded by an operator with
 842             # higher or equal precedence to PERCENT
 843             if is_valid_index(idx - 2):
 844                 # mypy can't quite follow unless we name this
 845                 before_lpar = LL[idx - 2]
 846                 if token.PERCENT in {leaf.type for leaf in LL[idx - 1 : next_idx]} and (
 847                     (
 848                         before_lpar.type
 849                         in {
 850                             token.STAR,
 851                             token.AT,
 852                             token.SLASH,
 853                             token.DOUBLESLASH,
 854                             token.PERCENT,
 855                             token.TILDE,
 856                             token.DOUBLESTAR,
 857                             token.AWAIT,
 858                             token.LSQB,
 859                             token.LPAR,
 860                         }
 861                     )
 862                     or (
 863                         # only unary PLUS/MINUS
 864                         before_lpar.parent
 865                         and before_lpar.parent.type == syms.factor
 866                         and (before_lpar.type in {token.PLUS, token.MINUS})
 867                     )
 868                 ):
 869                     continue
 870
 871             # Should be followed by a non-empty RPAR...
 872             if (
 873                 is_valid_index(next_idx)
 874                 and LL[next_idx].type == token.RPAR
 875                 and not is_empty_rpar(LL[next_idx])
 876             ):
 877                 # That RPAR should NOT be followed by anything with higher
 878                 # precedence than PERCENT
 879                 if is_valid_index(next_idx + 1) and LL[next_idx + 1].type in {
 880                     token.DOUBLESTAR,
 881                     token.LSQB,
 882                     token.LPAR,
 883                     token.DOT,
 884                 }:
 885                     continue
 886
 887                 string_indices.append(string_idx)
 888                 idx = string_idx
 889                 while idx < len(LL) - 1 and LL[idx + 1].type == token.STRING:
 890                     idx += 1
 891
 892         if string_indices:
 893             return Ok(string_indices)
 894         return TErr("This line has no strings wrapped in parens.")
 895
 896     def do_transform(
 897         self, line: Line, string_indices: List[int]
 898     ) -> Iterator[TResult[Line]]:
 899         LL = line.leaves
 900
 901         string_and_rpar_indices: List[int] = []
 902         for string_idx in string_indices:
 903             string_parser = StringParser()
 904             rpar_idx = string_parser.parse(LL, string_idx)
 905
 906             should_transform = True
 907             for leaf in (LL[string_idx - 1], LL[rpar_idx]):
 908                 if line.comments_after(leaf):
 909                     # Should not strip parentheses which have comments attached
 910                     # to them.
 911                     should_transform = False
 912                     break
 913             if should_transform:
 914                 string_and_rpar_indices.extend((string_idx, rpar_idx))
 915
 916         if string_and_rpar_indices:
 917             yield Ok(self._transform_to_new_line(line, string_and_rpar_indices))
 918         else:
 919             yield Err(
 920                 CannotTransform("All string groups have comments attached to them.")
 921             )
 922
 923     def _transform_to_new_line(
 924         self, line: Line, string_and_rpar_indices: List[int]
 925     ) -> Line:
 926         LL = line.leaves
 927
 928         new_line = line.clone()
 929         new_line.comments = line.comments.copy()
 930
 931         previous_idx = -1
 932         # We need to sort the indices, since string_idx and its matching
 933         # rpar_idx may not come in order, e.g. in
 934         # `("outer" % ("inner".join(items)))`, the "inner" string's
 935         # string_idx is smaller than "outer" string's rpar_idx.
 936         for idx in sorted(string_and_rpar_indices):
 937             leaf = LL[idx]
 938             lpar_or_rpar_idx = idx - 1 if leaf.type == token.STRING else idx
 939             append_leaves(new_line, line, LL[previous_idx + 1 : lpar_or_rpar_idx])
 940             if leaf.type == token.STRING:
 941                 string_leaf = Leaf(token.STRING, LL[idx].value)
 942                 LL[lpar_or_rpar_idx].remove()  # Remove lpar.
 943                 replace_child(LL[idx], string_leaf)
 944                 new_line.append(string_leaf)
 945             else:
 946                 LL[lpar_or_rpar_idx].remove()  # This is a rpar.
 947
 948             previous_idx = idx
 949
 950         # Append the leaves after the last idx:
 951         append_leaves(new_line, line, LL[idx + 1 :])
 952
 953         return new_line
 954
 955
 956 class BaseStringSplitter(StringTransformer):
 957     """
 958     Abstract class for StringTransformers which transform a Line's strings by splitting
 959     them or placing them on their own lines where necessary to avoid going over
 960     the configured line length.
 961
 962     Requirements:
 963         * The target string value is responsible for the line going over the
 964           line length limit. It follows that after all of black's other line
 965           split methods have been exhausted, this line (or one of the resulting
 966           lines after all line splits are performed) would still be over the
 967           line_length limit unless we split this string.
 968           AND
 969
 970         * The target string is NOT a "pointless" string (i.e. a string that has
 971           no parent or siblings).
 972           AND
 973
 974         * The target string is not followed by an inline comment that appears
 975           to be a pragma.
 976           AND
 977
 978         * The target string is not a multiline (i.e. triple-quote) string.
 979     """
 980
 981     STRING_OPERATORS: Final = [
 982         token.EQEQUAL,
 983         token.GREATER,
 984         token.GREATEREQUAL,
 985         token.LESS,
 986         token.LESSEQUAL,
 987         token.NOTEQUAL,
 988         token.PERCENT,
 989         token.PLUS,
 990         token.STAR,
 991     ]
 992
 993     @abstractmethod
 994     def do_splitter_match(self, line: Line) -> TMatchResult:
 995         """
 996         BaseStringSplitter asks its clients to override this method instead of
 997         `StringTransformer.do_match(...)`.
 998
 999         Follows the same protocol as `StringTransformer.do_match(...)`.
1000
1001         Refer to `help(StringTransformer.do_match)` for more information.
1002         """
1003
1004     def do_match(self, line: Line) -> TMatchResult:
1005         match_result = self.do_splitter_match(line)
1006         if isinstance(match_result, Err):
1007             return match_result
1008
1009         string_indices = match_result.ok()
1010         assert len(string_indices) == 1, (
1011             f"{self.__class__.__name__} should only find one match at a time, found"
1012             f" {len(string_indices)}"
1013         )
1014         string_idx = string_indices[0]
1015         vresult = self._validate(line, string_idx)
1016         if isinstance(vresult, Err):
1017             return vresult
1018
1019         return match_result
1020
1021     def _validate(self, line: Line, string_idx: int) -> TResult[None]:
1022         """
1023         Checks that @line meets all of the requirements listed in this classes'
1024         docstring. Refer to `help(BaseStringSplitter)` for a detailed
1025         description of those requirements.
1026
1027         Returns:
1028             * Ok(None), if ALL of the requirements are met.
1029               OR
1030             * Err(CannotTransform), if ANY of the requirements are NOT met.
1031         """
1032         LL = line.leaves
1033
1034         string_leaf = LL[string_idx]
1035
1036         max_string_length = self._get_max_string_length(line, string_idx)
1037         if len(string_leaf.value) <= max_string_length:
1038             return TErr(
1039                 "The string itself is not what is causing this line to be too long."
1040             )
1041
1042         if not string_leaf.parent or [L.type for L in string_leaf.parent.children] == [
1043             token.STRING,
1044             token.NEWLINE,
1045         ]:
1046             return TErr(
1047                 f"This string ({string_leaf.value}) appears to be pointless (i.e. has"
1048                 " no parent)."
1049             )
1050
1051         if id(line.leaves[string_idx]) in line.comments and contains_pragma_comment(
1052             line.comments[id(line.leaves[string_idx])]
1053         ):
1054             return TErr(
1055                 "Line appears to end with an inline pragma comment. Splitting the line"
1056                 " could modify the pragma's behavior."
1057             )
1058
1059         if has_triple_quotes(string_leaf.value):
1060             return TErr("We cannot split multiline strings.")
1061
1062         return Ok(None)
1063
1064     def _get_max_string_length(self, line: Line, string_idx: int) -> int:
1065         """
1066         Calculates the max string length used when attempting to determine
1067         whether or not the target string is responsible for causing the line to
1068         go over the line length limit.
1069
1070         WARNING: This method is tightly coupled to both StringSplitter and
1071         (especially) StringParenWrapper. There is probably a better way to
1072         accomplish what is being done here.
1073
1074         Returns:
1075             max_string_length: such that `line.leaves[string_idx].value >
1076             max_string_length` implies that the target string IS responsible
1077             for causing this line to exceed the line length limit.
1078         """
1079         LL = line.leaves
1080
1081         is_valid_index = is_valid_index_factory(LL)
1082
1083         # We use the shorthand "WMA4" in comments to abbreviate "We must
1084         # account for". When giving examples, we use STRING to mean some/any
1085         # valid string.
1086         #
1087         # Finally, we use the following convenience variables:
1088         #
1089         #   P:  The leaf that is before the target string leaf.
1090         #   N:  The leaf that is after the target string leaf.
1091         #   NN: The leaf that is after N.
1092
1093         # WMA4 the whitespace at the beginning of the line.
1094         offset = line.depth * 4
1095
1096         if is_valid_index(string_idx - 1):
1097             p_idx = string_idx - 1
1098             if (
1099                 LL[string_idx - 1].type == token.LPAR
1100                 and LL[string_idx - 1].value == ""
1101                 and string_idx >= 2
1102             ):
1103                 # If the previous leaf is an empty LPAR placeholder, we should skip it.
1104                 p_idx -= 1
1105
1106             P = LL[p_idx]
1107             if P.type in self.STRING_OPERATORS:
1108                 # WMA4 a space and a string operator (e.g. `+ STRING` or `== STRING`).
1109                 offset += len(str(P)) + 1
1110
1111             if P.type == token.COMMA:
1112                 # WMA4 a space, a comma, and a closing bracket [e.g. `), STRING`].
1113                 offset += 3
1114
1115             if P.type in [token.COLON, token.EQUAL, token.PLUSEQUAL, token.NAME]:
1116                 # This conditional branch is meant to handle dictionary keys,
1117                 # variable assignments, 'return STRING' statement lines, and
1118                 # 'else STRING' ternary expression lines.
1119
1120                 # WMA4 a single space.
1121                 offset += 1
1122
1123                 # WMA4 the lengths of any leaves that came before that space,
1124                 # but after any closing bracket before that space.
1125                 for leaf in reversed(LL[: p_idx + 1]):
1126                     offset += len(str(leaf))
1127                     if leaf.type in CLOSING_BRACKETS:
1128                         break
1129
1130         if is_valid_index(string_idx + 1):
1131             N = LL[string_idx + 1]
1132             if N.type == token.RPAR and N.value == "" and len(LL) > string_idx + 2:
1133                 # If the next leaf is an empty RPAR placeholder, we should skip it.
1134                 N = LL[string_idx + 2]
1135
1136             if N.type == token.COMMA:
1137                 # WMA4 a single comma at the end of the string (e.g `STRING,`).
1138                 offset += 1
1139
1140             if is_valid_index(string_idx + 2):
1141                 NN = LL[string_idx + 2]
1142
1143                 if N.type == token.DOT and NN.type == token.NAME:
1144                     # This conditional branch is meant to handle method calls invoked
1145                     # off of a string literal up to and including the LPAR character.
1146
1147                     # WMA4 the '.' character.
1148                     offset += 1
1149
1150                     if (
1151                         is_valid_index(string_idx + 3)
1152                         and LL[string_idx + 3].type == token.LPAR
1153                     ):
1154                         # WMA4 the left parenthesis character.
1155                         offset += 1
1156
1157                     # WMA4 the length of the method's name.
1158                     offset += len(NN.value)
1159
1160         has_comments = False
1161         for comment_leaf in line.comments_after(LL[string_idx]):
1162             if not has_comments:
1163                 has_comments = True
1164                 # WMA4 two spaces before the '#' character.
1165                 offset += 2
1166
1167             # WMA4 the length of the inline comment.
1168             offset += len(comment_leaf.value)
1169
1170         max_string_length = count_chars_in_width(str(line), self.line_length - offset)
1171         return max_string_length
1172
1173     @staticmethod
1174     def _prefer_paren_wrap_match(LL: List[Leaf]) -> Optional[int]:
1175         """
1176         Returns:
1177             string_idx such that @LL[string_idx] is equal to our target (i.e.
1178             matched) string, if this line matches the "prefer paren wrap" statement
1179             requirements listed in the 'Requirements' section of the StringParenWrapper
1180             class's docstring.
1181                 OR
1182             None, otherwise.
1183         """
1184         # The line must start with a string.
1185         if LL[0].type != token.STRING:
1186             return None
1187
1188         matching_nodes = [
1189             syms.listmaker,
1190             syms.dictsetmaker,
1191             syms.testlist_gexp,
1192         ]
1193         # If the string is an immediate child of a list/set/tuple literal...
1194         if (
1195             parent_type(LL[0]) in matching_nodes
1196             or parent_type(LL[0].parent) in matching_nodes
1197         ):
1198             # And the string is surrounded by commas (or is the first/last child)...
1199             prev_sibling = LL[0].prev_sibling
1200             next_sibling = LL[0].next_sibling
1201             if (
1202                 not prev_sibling
1203                 and not next_sibling
1204                 and parent_type(LL[0]) == syms.atom
1205             ):
1206                 # If it's an atom string, we need to check the parent atom's siblings.
1207                 parent = LL[0].parent
1208                 assert parent is not None  # For type checkers.
1209                 prev_sibling = parent.prev_sibling
1210                 next_sibling = parent.next_sibling
1211             if (not prev_sibling or prev_sibling.type == token.COMMA) and (
1212                 not next_sibling or next_sibling.type == token.COMMA
1213             ):
1214                 return 0
1215
1216         return None
1217
1218
1219 def iter_fexpr_spans(s: str) -> Iterator[Tuple[int, int]]:
1220     """
1221     Yields spans corresponding to expressions in a given f-string.
1222     Spans are half-open ranges (left inclusive, right exclusive).
1223     Assumes the input string is a valid f-string, but will not crash if the input
1224     string is invalid.
1225     """
1226     stack: List[int] = []  # our curly paren stack
1227     i = 0
1228     while i < len(s):
1229         if s[i] == "{":
1230             # if we're in a string part of the f-string, ignore escaped curly braces
1231             if not stack and i + 1 < len(s) and s[i + 1] == "{":
1232                 i += 2
1233                 continue
1234             stack.append(i)
1235             i += 1
1236             continue
1237
1238         if s[i] == "}":
1239             if not stack:
1240                 i += 1
1241                 continue
1242             j = stack.pop()
1243             # we've made it back out of the expression! yield the span
1244             if not stack:
1245                 yield (j, i + 1)
1246             i += 1
1247             continue
1248
1249         # if we're in an expression part of the f-string, fast forward through strings
1250         # note that backslashes are not legal in the expression portion of f-strings
1251         if stack:
1252             delim = None
1253             if s[i : i + 3] in ("'''", '"""'):
1254                 delim = s[i : i + 3]
1255             elif s[i] in ("'", '"'):
1256                 delim = s[i]
1257             if delim:
1258                 i += len(delim)
1259                 while i < len(s) and s[i : i + len(delim)] != delim:
1260                     i += 1
1261                 i += len(delim)
1262                 continue
1263         i += 1
1264
1265
1266 def fstring_contains_expr(s: str) -> bool:
1267     return any(iter_fexpr_spans(s))
1268
1269
1270 def _toggle_fexpr_quotes(fstring: str, old_quote: str) -> str:
1271     """
1272     Toggles quotes used in f-string expressions that are `old_quote`.
1273
1274     f-string expressions can't contain backslashes, so we need to toggle the
1275     quotes if the f-string itself will end up using the same quote. We can
1276     simply toggle without escaping because, quotes can't be reused in f-string
1277     expressions. They will fail to parse.
1278
1279     NOTE: If PEP 701 is accepted, above statement will no longer be true.
1280     Though if quotes can be reused, we can simply reuse them without updates or
1281     escaping, once Black figures out how to parse the new grammar.
1282     """
1283     new_quote = "'" if old_quote == '"' else '"'
1284     parts = []
1285     previous_index = 0
1286     for start, end in iter_fexpr_spans(fstring):
1287         parts.append(fstring[previous_index:start])
1288         parts.append(fstring[start:end].replace(old_quote, new_quote))
1289         previous_index = end
1290     parts.append(fstring[previous_index:])
1291     return "".join(parts)
1292
1293
1294 class StringSplitter(BaseStringSplitter, CustomSplitMapMixin):
1295     """
1296     StringTransformer that splits "atom" strings (i.e. strings which exist on
1297     lines by themselves).
1298
1299     Requirements:
1300         * The line consists ONLY of a single string (possibly prefixed by a
1301           string operator [e.g. '+' or '==']), MAYBE a string trailer, and MAYBE
1302           a trailing comma.
1303           AND
1304         * All of the requirements listed in BaseStringSplitter's docstring.
1305
1306     Transformations:
1307         The string mentioned in the 'Requirements' section is split into as
1308         many substrings as necessary to adhere to the configured line length.
1309
1310         In the final set of substrings, no substring should be smaller than
1311         MIN_SUBSTR_SIZE characters.
1312
1313         The string will ONLY be split on spaces (i.e. each new substring should
1314         start with a space). Note that the string will NOT be split on a space
1315         which is escaped with a backslash.
1316
1317         If the string is an f-string, it will NOT be split in the middle of an
1318         f-expression (e.g. in f"FooBar: {foo() if x else bar()}", {foo() if x
1319         else bar()} is an f-expression).
1320
1321         If the string that is being split has an associated set of custom split
1322         records and those custom splits will NOT result in any line going over
1323         the configured line length, those custom splits are used. Otherwise the
1324         string is split as late as possible (from left-to-right) while still
1325         adhering to the transformation rules listed above.
1326
1327     Collaborations:
1328         StringSplitter relies on StringMerger to construct the appropriate
1329         CustomSplit objects and add them to the custom split map.
1330     """
1331
1332     MIN_SUBSTR_SIZE: Final = 6
1333
1334     def do_splitter_match(self, line: Line) -> TMatchResult:
1335         LL = line.leaves
1336
1337         if self._prefer_paren_wrap_match(LL) is not None:
1338             return TErr("Line needs to be wrapped in parens first.")
1339
1340         is_valid_index = is_valid_index_factory(LL)
1341
1342         idx = 0
1343
1344         # The first two leaves MAY be the 'not in' keywords...
1345         if (
1346             is_valid_index(idx)
1347             and is_valid_index(idx + 1)
1348             and [LL[idx].type, LL[idx + 1].type] == [token.NAME, token.NAME]
1349             and str(LL[idx]) + str(LL[idx + 1]) == "not in"
1350         ):
1351             idx += 2
1352         # Else the first leaf MAY be a string operator symbol or the 'in' keyword...
1353         elif is_valid_index(idx) and (
1354             LL[idx].type in self.STRING_OPERATORS
1355             or LL[idx].type == token.NAME
1356             and str(LL[idx]) == "in"
1357         ):
1358             idx += 1
1359
1360         # The next/first leaf MAY be an empty LPAR...
1361         if is_valid_index(idx) and is_empty_lpar(LL[idx]):
1362             idx += 1
1363
1364         # The next/first leaf MUST be a string...
1365         if not is_valid_index(idx) or LL[idx].type != token.STRING:
1366             return TErr("Line does not start with a string.")
1367
1368         string_idx = idx
1369
1370         # Skip the string trailer, if one exists.
1371         string_parser = StringParser()
1372         idx = string_parser.parse(LL, string_idx)
1373
1374         # That string MAY be followed by an empty RPAR...
1375         if is_valid_index(idx) and is_empty_rpar(LL[idx]):
1376             idx += 1
1377
1378         # That string / empty RPAR leaf MAY be followed by a comma...
1379         if is_valid_index(idx) and LL[idx].type == token.COMMA:
1380             idx += 1
1381
1382         # But no more leaves are allowed...
1383         if is_valid_index(idx):
1384             return TErr("This line does not end with a string.")
1385
1386         return Ok([string_idx])
1387
1388     def do_transform(
1389         self, line: Line, string_indices: List[int]
1390     ) -> Iterator[TResult[Line]]:
1391         LL = line.leaves
1392         assert len(string_indices) == 1, (
1393             f"{self.__class__.__name__} should only find one match at a time, found"
1394             f" {len(string_indices)}"
1395         )
1396         string_idx = string_indices[0]
1397
1398         QUOTE = LL[string_idx].value[-1]
1399
1400         is_valid_index = is_valid_index_factory(LL)
1401         insert_str_child = insert_str_child_factory(LL[string_idx])
1402
1403         prefix = get_string_prefix(LL[string_idx].value).lower()
1404
1405         # We MAY choose to drop the 'f' prefix from substrings that don't
1406         # contain any f-expressions, but ONLY if the original f-string
1407         # contains at least one f-expression. Otherwise, we will alter the AST
1408         # of the program.
1409         drop_pointless_f_prefix = ("f" in prefix) and fstring_contains_expr(
1410             LL[string_idx].value
1411         )
1412
1413         first_string_line = True
1414
1415         string_op_leaves = self._get_string_operator_leaves(LL)
1416         string_op_leaves_length = (
1417             sum(len(str(prefix_leaf)) for prefix_leaf in string_op_leaves) + 1
1418             if string_op_leaves
1419             else 0
1420         )
1421
1422         def maybe_append_string_operators(new_line: Line) -> None:
1423             """
1424             Side Effects:
1425                 If @line starts with a string operator and this is the first
1426                 line we are constructing, this function appends the string
1427                 operator to @new_line and replaces the old string operator leaf
1428                 in the node structure. Otherwise this function does nothing.
1429             """
1430             maybe_prefix_leaves = string_op_leaves if first_string_line else []
1431             for i, prefix_leaf in enumerate(maybe_prefix_leaves):
1432                 replace_child(LL[i], prefix_leaf)
1433                 new_line.append(prefix_leaf)
1434
1435         ends_with_comma = (
1436             is_valid_index(string_idx + 1) and LL[string_idx + 1].type == token.COMMA
1437         )
1438
1439         def max_last_string_column() -> int:
1440             """
1441             Returns:
1442                 The max allowed width of the string value used for the last
1443                 line we will construct.  Note that this value means the width
1444                 rather than the number of characters (e.g., many East Asian
1445                 characters expand to two columns).
1446             """
1447             result = self.line_length
1448             result -= line.depth * 4
1449             result -= 1 if ends_with_comma else 0
1450             result -= string_op_leaves_length
1451             return result
1452
1453         # --- Calculate Max Break Width (for string value)
1454         # We start with the line length limit
1455         max_break_width = self.line_length
1456         # The last index of a string of length N is N-1.
1457         max_break_width -= 1
1458         # Leading whitespace is not present in the string value (e.g. Leaf.value).
1459         max_break_width -= line.depth * 4
1460         if max_break_width < 0:
1461             yield TErr(
1462                 f"Unable to split {LL[string_idx].value} at such high of a line depth:"
1463                 f" {line.depth}"
1464             )
1465             return
1466
1467         # Check if StringMerger registered any custom splits.
1468         custom_splits = self.pop_custom_splits(LL[string_idx].value)
1469         # We use them ONLY if none of them would produce lines that exceed the
1470         # line limit.
1471         use_custom_breakpoints = bool(
1472             custom_splits
1473             and all(csplit.break_idx <= max_break_width for csplit in custom_splits)
1474         )
1475
1476         # Temporary storage for the remaining chunk of the string line that
1477         # can't fit onto the line currently being constructed.
1478         rest_value = LL[string_idx].value
1479
1480         def more_splits_should_be_made() -> bool:
1481             """
1482             Returns:
1483                 True iff `rest_value` (the remaining string value from the last
1484                 split), should be split again.
1485             """
1486             if use_custom_breakpoints:
1487                 return len(custom_splits) > 1
1488             else:
1489                 return str_width(rest_value) > max_last_string_column()
1490
1491         string_line_results: List[Ok[Line]] = []
1492         while more_splits_should_be_made():
1493             if use_custom_breakpoints:
1494                 # Custom User Split (manual)
1495                 csplit = custom_splits.pop(0)
1496                 break_idx = csplit.break_idx
1497             else:
1498                 # Algorithmic Split (automatic)
1499                 max_bidx = (
1500                     count_chars_in_width(rest_value, max_break_width)
1501                     - string_op_leaves_length
1502                 )
1503                 maybe_break_idx = self._get_break_idx(rest_value, max_bidx)
1504                 if maybe_break_idx is None:
1505                     # If we are unable to algorithmically determine a good split
1506                     # and this string has custom splits registered to it, we
1507                     # fall back to using them--which means we have to start
1508                     # over from the beginning.
1509                     if custom_splits:
1510                         rest_value = LL[string_idx].value
1511                         string_line_results = []
1512                         first_string_line = True
1513                         use_custom_breakpoints = True
1514                         continue
1515
1516                     # Otherwise, we stop splitting here.
1517                     break
1518
1519                 break_idx = maybe_break_idx
1520
1521             # --- Construct `next_value`
1522             next_value = rest_value[:break_idx] + QUOTE
1523
1524             # HACK: The following 'if' statement is a hack to fix the custom
1525             # breakpoint index in the case of either: (a) substrings that were
1526             # f-strings but will have the 'f' prefix removed OR (b) substrings
1527             # that were not f-strings but will now become f-strings because of
1528             # redundant use of the 'f' prefix (i.e. none of the substrings
1529             # contain f-expressions but one or more of them had the 'f' prefix
1530             # anyway; in which case, we will prepend 'f' to _all_ substrings).
1531             #
1532             # There is probably a better way to accomplish what is being done
1533             # here...
1534             #
1535             # If this substring is an f-string, we _could_ remove the 'f'
1536             # prefix, and the current custom split did NOT originally use a
1537             # prefix...
1538             if (
1539                 use_custom_breakpoints
1540                 and not csplit.has_prefix
1541                 and (
1542                     # `next_value == prefix + QUOTE` happens when the custom
1543                     # split is an empty string.
1544                     next_value == prefix + QUOTE
1545                     or next_value != self._normalize_f_string(next_value, prefix)
1546                 )
1547             ):
1548                 # Then `csplit.break_idx` will be off by one after removing
1549                 # the 'f' prefix.
1550                 break_idx += 1
1551                 next_value = rest_value[:break_idx] + QUOTE
1552
1553             if drop_pointless_f_prefix:
1554                 next_value = self._normalize_f_string(next_value, prefix)
1555
1556             # --- Construct `next_leaf`
1557             next_leaf = Leaf(token.STRING, next_value)
1558             insert_str_child(next_leaf)
1559             self._maybe_normalize_string_quotes(next_leaf)
1560
1561             # --- Construct `next_line`
1562             next_line = line.clone()
1563             maybe_append_string_operators(next_line)
1564             next_line.append(next_leaf)
1565             string_line_results.append(Ok(next_line))
1566
1567             rest_value = prefix + QUOTE + rest_value[break_idx:]
1568             first_string_line = False
1569
1570         yield from string_line_results
1571
1572         if drop_pointless_f_prefix:
1573             rest_value = self._normalize_f_string(rest_value, prefix)
1574
1575         rest_leaf = Leaf(token.STRING, rest_value)
1576         insert_str_child(rest_leaf)
1577
1578         # NOTE: I could not find a test case that verifies that the following
1579         # line is actually necessary, but it seems to be. Otherwise we risk
1580         # not normalizing the last substring, right?
1581         self._maybe_normalize_string_quotes(rest_leaf)
1582
1583         last_line = line.clone()
1584         maybe_append_string_operators(last_line)
1585
1586         # If there are any leaves to the right of the target string...
1587         if is_valid_index(string_idx + 1):
1588             # We use `temp_value` here to determine how long the last line
1589             # would be if we were to append all the leaves to the right of the
1590             # target string to the last string line.
1591             temp_value = rest_value
1592             for leaf in LL[string_idx + 1 :]:
1593                 temp_value += str(leaf)
1594                 if leaf.type == token.LPAR:
1595                     break
1596
1597             # Try to fit them all on the same line with the last substring...
1598             if (
1599                 str_width(temp_value) <= max_last_string_column()
1600                 or LL[string_idx + 1].type == token.COMMA
1601             ):
1602                 last_line.append(rest_leaf)
1603                 append_leaves(last_line, line, LL[string_idx + 1 :])
1604                 yield Ok(last_line)
1605             # Otherwise, place the last substring on one line and everything
1606             # else on a line below that...
1607             else:
1608                 last_line.append(rest_leaf)
1609                 yield Ok(last_line)
1610
1611                 non_string_line = line.clone()
1612                 append_leaves(non_string_line, line, LL[string_idx + 1 :])
1613                 yield Ok(non_string_line)
1614         # Else the target string was the last leaf...
1615         else:
1616             last_line.append(rest_leaf)
1617             last_line.comments = line.comments.copy()
1618             yield Ok(last_line)
1619
1620     def _iter_nameescape_slices(self, string: str) -> Iterator[Tuple[Index, Index]]:
1621         """
1622         Yields:
1623             All ranges of @string which, if @string were to be split there,
1624             would result in the splitting of an \\N{...} expression (which is NOT
1625             allowed).
1626         """
1627         # True - the previous backslash was unescaped
1628         # False - the previous backslash was escaped *or* there was no backslash
1629         previous_was_unescaped_backslash = False
1630         it = iter(enumerate(string))
1631         for idx, c in it:
1632             if c == "\\":
1633                 previous_was_unescaped_backslash = not previous_was_unescaped_backslash
1634                 continue
1635             if not previous_was_unescaped_backslash or c != "N":
1636                 previous_was_unescaped_backslash = False
1637                 continue
1638             previous_was_unescaped_backslash = False
1639
1640             begin = idx - 1  # the position of backslash before \N{...}
1641             for idx, c in it:
1642                 if c == "}":
1643                     end = idx
1644                     break
1645             else:
1646                 # malformed nameescape expression?
1647                 # should have been detected by AST parsing earlier...
1648                 raise RuntimeError(f"{self.__class__.__name__} LOGIC ERROR!")
1649             yield begin, end
1650
1651     def _iter_fexpr_slices(self, string: str) -> Iterator[Tuple[Index, Index]]:
1652         """
1653         Yields:
1654             All ranges of @string which, if @string were to be split there,
1655             would result in the splitting of an f-expression (which is NOT
1656             allowed).
1657         """
1658         if "f" not in get_string_prefix(string).lower():
1659             return
1660         yield from iter_fexpr_spans(string)
1661
1662     def _get_illegal_split_indices(self, string: str) -> Set[Index]:
1663         illegal_indices: Set[Index] = set()
1664         iterators = [
1665             self._iter_fexpr_slices(string),
1666             self._iter_nameescape_slices(string),
1667         ]
1668         for it in iterators:
1669             for begin, end in it:
1670                 illegal_indices.update(range(begin, end + 1))
1671         return illegal_indices
1672
1673     def _get_break_idx(self, string: str, max_break_idx: int) -> Optional[int]:
1674         """
1675         This method contains the algorithm that StringSplitter uses to
1676         determine which character to split each string at.
1677
1678         Args:
1679             @string: The substring that we are attempting to split.
1680             @max_break_idx: The ideal break index. We will return this value if it
1681             meets all the necessary conditions. In the likely event that it
1682             doesn't we will try to find the closest index BELOW @max_break_idx
1683             that does. If that fails, we will expand our search by also
1684             considering all valid indices ABOVE @max_break_idx.
1685
1686         Pre-Conditions:
1687             * assert_is_leaf_string(@string)
1688             * 0 <= @max_break_idx < len(@string)
1689
1690         Returns:
1691             break_idx, if an index is able to be found that meets all of the
1692             conditions listed in the 'Transformations' section of this classes'
1693             docstring.
1694                 OR
1695             None, otherwise.
1696         """
1697         is_valid_index = is_valid_index_factory(string)
1698
1699         assert is_valid_index(max_break_idx)
1700         assert_is_leaf_string(string)
1701
1702         _illegal_split_indices = self._get_illegal_split_indices(string)
1703
1704         def breaks_unsplittable_expression(i: Index) -> bool:
1705             """
1706             Returns:
1707                 True iff returning @i would result in the splitting of an
1708                 unsplittable expression (which is NOT allowed).
1709             """
1710             return i in _illegal_split_indices
1711
1712         def passes_all_checks(i: Index) -> bool:
1713             """
1714             Returns:
1715                 True iff ALL of the conditions listed in the 'Transformations'
1716                 section of this classes' docstring would be be met by returning @i.
1717             """
1718             is_space = string[i] == " "
1719             is_split_safe = is_valid_index(i - 1) and string[i - 1] in SPLIT_SAFE_CHARS
1720
1721             is_not_escaped = True
1722             j = i - 1
1723             while is_valid_index(j) and string[j] == "\\":
1724                 is_not_escaped = not is_not_escaped
1725                 j -= 1
1726
1727             is_big_enough = (
1728                 len(string[i:]) >= self.MIN_SUBSTR_SIZE
1729                 and len(string[:i]) >= self.MIN_SUBSTR_SIZE
1730             )
1731             return (
1732                 (is_space or is_split_safe)
1733                 and is_not_escaped
1734                 and is_big_enough
1735                 and not breaks_unsplittable_expression(i)
1736             )
1737
1738         # First, we check all indices BELOW @max_break_idx.
1739         break_idx = max_break_idx
1740         while is_valid_index(break_idx - 1) and not passes_all_checks(break_idx):
1741             break_idx -= 1
1742
1743         if not passes_all_checks(break_idx):
1744             # If that fails, we check all indices ABOVE @max_break_idx.
1745             #
1746             # If we are able to find a valid index here, the next line is going
1747             # to be longer than the specified line length, but it's probably
1748             # better than doing nothing at all.
1749             break_idx = max_break_idx + 1
1750             while is_valid_index(break_idx + 1) and not passes_all_checks(break_idx):
1751                 break_idx += 1
1752
1753             if not is_valid_index(break_idx) or not passes_all_checks(break_idx):
1754                 return None
1755
1756         return break_idx
1757
1758     def _maybe_normalize_string_quotes(self, leaf: Leaf) -> None:
1759         if self.normalize_strings:
1760             leaf.value = normalize_string_quotes(leaf.value)
1761
1762     def _normalize_f_string(self, string: str, prefix: str) -> str:
1763         """
1764         Pre-Conditions:
1765             * assert_is_leaf_string(@string)
1766
1767         Returns:
1768             * If @string is an f-string that contains no f-expressions, we
1769             return a string identical to @string except that the 'f' prefix
1770             has been stripped and all double braces (i.e. '{{' or '}}') have
1771             been normalized (i.e. turned into '{' or '}').
1772                 OR
1773             * Otherwise, we return @string.
1774         """
1775         assert_is_leaf_string(string)
1776
1777         if "f" in prefix and not fstring_contains_expr(string):
1778             new_prefix = prefix.replace("f", "")
1779
1780             temp = string[len(prefix) :]
1781             temp = re.sub(r"\{\{", "{", temp)
1782             temp = re.sub(r"\}\}", "}", temp)
1783             new_string = temp
1784
1785             return f"{new_prefix}{new_string}"
1786         else:
1787             return string
1788
1789     def _get_string_operator_leaves(self, leaves: Iterable[Leaf]) -> List[Leaf]:
1790         LL = list(leaves)
1791
1792         string_op_leaves = []
1793         i = 0
1794         while LL[i].type in self.STRING_OPERATORS + [token.NAME]:
1795             prefix_leaf = Leaf(LL[i].type, str(LL[i]).strip())
1796             string_op_leaves.append(prefix_leaf)
1797             i += 1
1798         return string_op_leaves
1799
1800
1801 class StringParenWrapper(BaseStringSplitter, CustomSplitMapMixin):
1802     """
1803     StringTransformer that wraps strings in parens and then splits at the LPAR.
1804
1805     Requirements:
1806         All of the requirements listed in BaseStringSplitter's docstring in
1807         addition to the requirements listed below:
1808
1809         * The line is a return/yield statement, which returns/yields a string.
1810           OR
1811         * The line is part of a ternary expression (e.g. `x = y if cond else
1812           z`) such that the line starts with `else <string>`, where <string> is
1813           some string.
1814           OR
1815         * The line is an assert statement, which ends with a string.
1816           OR
1817         * The line is an assignment statement (e.g. `x = <string>` or `x +=
1818           <string>`) such that the variable is being assigned the value of some
1819           string.
1820           OR
1821         * The line is a dictionary key assignment where some valid key is being
1822           assigned the value of some string.
1823           OR
1824         * The line is an lambda expression and the value is a string.
1825           OR
1826         * The line starts with an "atom" string that prefers to be wrapped in
1827           parens. It's preferred to be wrapped when it's is an immediate child of
1828           a list/set/tuple literal, AND the string is surrounded by commas (or is
1829           the first/last child).
1830
1831     Transformations:
1832         The chosen string is wrapped in parentheses and then split at the LPAR.
1833
1834         We then have one line which ends with an LPAR and another line that
1835         starts with the chosen string. The latter line is then split again at
1836         the RPAR. This results in the RPAR (and possibly a trailing comma)
1837         being placed on its own line.
1838
1839         NOTE: If any leaves exist to the right of the chosen string (except
1840         for a trailing comma, which would be placed after the RPAR), those
1841         leaves are placed inside the parentheses.  In effect, the chosen
1842         string is not necessarily being "wrapped" by parentheses. We can,
1843         however, count on the LPAR being placed directly before the chosen
1844         string.
1845
1846         In other words, StringParenWrapper creates "atom" strings. These
1847         can then be split again by StringSplitter, if necessary.
1848
1849     Collaborations:
1850         In the event that a string line split by StringParenWrapper is
1851         changed such that it no longer needs to be given its own line,
1852         StringParenWrapper relies on StringParenStripper to clean up the
1853         parentheses it created.
1854
1855         For "atom" strings that prefers to be wrapped in parens, it requires
1856         StringSplitter to hold the split until the string is wrapped in parens.
1857     """
1858
1859     def do_splitter_match(self, line: Line) -> TMatchResult:
1860         LL = line.leaves
1861
1862         if line.leaves[-1].type in OPENING_BRACKETS:
1863             return TErr(
1864                 "Cannot wrap parens around a line that ends in an opening bracket."
1865             )
1866
1867         string_idx = (
1868             self._return_match(LL)
1869             or self._else_match(LL)
1870             or self._assert_match(LL)
1871             or self._assign_match(LL)
1872             or self._dict_or_lambda_match(LL)
1873             or self._prefer_paren_wrap_match(LL)
1874         )
1875
1876         if string_idx is not None:
1877             string_value = line.leaves[string_idx].value
1878             # If the string has neither spaces nor East Asian stops...
1879             if not any(
1880                 char == " " or char in SPLIT_SAFE_CHARS for char in string_value
1881             ):
1882                 # And will still violate the line length limit when split...
1883                 max_string_width = self.line_length - ((line.depth + 1) * 4)
1884                 if str_width(string_value) > max_string_width:
1885                     # And has no associated custom splits...
1886                     if not self.has_custom_splits(string_value):
1887                         # Then we should NOT put this string on its own line.
1888                         return TErr(
1889                             "We do not wrap long strings in parentheses when the"
1890                             " resultant line would still be over the specified line"
1891                             " length and can't be split further by StringSplitter."
1892                         )
1893             return Ok([string_idx])
1894
1895         return TErr("This line does not contain any non-atomic strings.")
1896
1897     @staticmethod
1898     def _return_match(LL: List[Leaf]) -> Optional[int]:
1899         """
1900         Returns:
1901             string_idx such that @LL[string_idx] is equal to our target (i.e.
1902             matched) string, if this line matches the return/yield statement
1903             requirements listed in the 'Requirements' section of this classes'
1904             docstring.
1905                 OR
1906             None, otherwise.
1907         """
1908         # If this line is apart of a return/yield statement and the first leaf
1909         # contains either the "return" or "yield" keywords...
1910         if parent_type(LL[0]) in [syms.return_stmt, syms.yield_expr] and LL[
1911             0
1912         ].value in ["return", "yield"]:
1913             is_valid_index = is_valid_index_factory(LL)
1914
1915             idx = 2 if is_valid_index(1) and is_empty_par(LL[1]) else 1
1916             # The next visible leaf MUST contain a string...
1917             if is_valid_index(idx) and LL[idx].type == token.STRING:
1918                 return idx
1919
1920         return None
1921
1922     @staticmethod
1923     def _else_match(LL: List[Leaf]) -> Optional[int]:
1924         """
1925         Returns:
1926             string_idx such that @LL[string_idx] is equal to our target (i.e.
1927             matched) string, if this line matches the ternary expression
1928             requirements listed in the 'Requirements' section of this classes'
1929             docstring.
1930                 OR
1931             None, otherwise.
1932         """
1933         # If this line is apart of a ternary expression and the first leaf
1934         # contains the "else" keyword...
1935         if (
1936             parent_type(LL[0]) == syms.test
1937             and LL[0].type == token.NAME
1938             and LL[0].value == "else"
1939         ):
1940             is_valid_index = is_valid_index_factory(LL)
1941
1942             idx = 2 if is_valid_index(1) and is_empty_par(LL[1]) else 1
1943             # The next visible leaf MUST contain a string...
1944             if is_valid_index(idx) and LL[idx].type == token.STRING:
1945                 return idx
1946
1947         return None
1948
1949     @staticmethod
1950     def _assert_match(LL: List[Leaf]) -> Optional[int]:
1951         """
1952         Returns:
1953             string_idx such that @LL[string_idx] is equal to our target (i.e.
1954             matched) string, if this line matches the assert statement
1955             requirements listed in the 'Requirements' section of this classes'
1956             docstring.
1957                 OR
1958             None, otherwise.
1959         """
1960         # If this line is apart of an assert statement and the first leaf
1961         # contains the "assert" keyword...
1962         if parent_type(LL[0]) == syms.assert_stmt and LL[0].value == "assert":
1963             is_valid_index = is_valid_index_factory(LL)
1964
1965             for i, leaf in enumerate(LL):
1966                 # We MUST find a comma...
1967                 if leaf.type == token.COMMA:
1968                     idx = i + 2 if is_empty_par(LL[i + 1]) else i + 1
1969
1970                     # That comma MUST be followed by a string...
1971                     if is_valid_index(idx) and LL[idx].type == token.STRING:
1972                         string_idx = idx
1973
1974                         # Skip the string trailer, if one exists.
1975                         string_parser = StringParser()
1976                         idx = string_parser.parse(LL, string_idx)
1977
1978                         # But no more leaves are allowed...
1979                         if not is_valid_index(idx):
1980                             return string_idx
1981
1982         return None
1983
1984     @staticmethod
1985     def _assign_match(LL: List[Leaf]) -> Optional[int]:
1986         """
1987         Returns:
1988             string_idx such that @LL[string_idx] is equal to our target (i.e.
1989             matched) string, if this line matches the assignment statement
1990             requirements listed in the 'Requirements' section of this classes'
1991             docstring.
1992                 OR
1993             None, otherwise.
1994         """
1995         # If this line is apart of an expression statement or is a function
1996         # argument AND the first leaf contains a variable name...
1997         if (
1998             parent_type(LL[0]) in [syms.expr_stmt, syms.argument, syms.power]
1999             and LL[0].type == token.NAME
2000         ):
2001             is_valid_index = is_valid_index_factory(LL)
2002
2003             for i, leaf in enumerate(LL):
2004                 # We MUST find either an '=' or '+=' symbol...
2005                 if leaf.type in [token.EQUAL, token.PLUSEQUAL]:
2006                     idx = i + 2 if is_empty_par(LL[i + 1]) else i + 1
2007
2008                     # That symbol MUST be followed by a string...
2009                     if is_valid_index(idx) and LL[idx].type == token.STRING:
2010                         string_idx = idx
2011
2012                         # Skip the string trailer, if one exists.
2013                         string_parser = StringParser()
2014                         idx = string_parser.parse(LL, string_idx)
2015
2016                         # The next leaf MAY be a comma iff this line is apart
2017                         # of a function argument...
2018                         if (
2019                             parent_type(LL[0]) == syms.argument
2020                             and is_valid_index(idx)
2021                             and LL[idx].type == token.COMMA
2022                         ):
2023                             idx += 1
2024
2025                         # But no more leaves are allowed...
2026                         if not is_valid_index(idx):
2027                             return string_idx
2028
2029         return None
2030
2031     @staticmethod
2032     def _dict_or_lambda_match(LL: List[Leaf]) -> Optional[int]:
2033         """
2034         Returns:
2035             string_idx such that @LL[string_idx] is equal to our target (i.e.
2036             matched) string, if this line matches the dictionary key assignment
2037             statement or lambda expression requirements listed in the
2038             'Requirements' section of this classes' docstring.
2039                 OR
2040             None, otherwise.
2041         """
2042         # If this line is a part of a dictionary key assignment or lambda expression...
2043         parent_types = [parent_type(LL[0]), parent_type(LL[0].parent)]
2044         if syms.dictsetmaker in parent_types or syms.lambdef in parent_types:
2045             is_valid_index = is_valid_index_factory(LL)
2046
2047             for i, leaf in enumerate(LL):
2048                 # We MUST find a colon, it can either be dict's or lambda's colon...
2049                 if leaf.type == token.COLON and i < len(LL) - 1:
2050                     idx = i + 2 if is_empty_par(LL[i + 1]) else i + 1
2051
2052                     # That colon MUST be followed by a string...
2053                     if is_valid_index(idx) and LL[idx].type == token.STRING:
2054                         string_idx = idx
2055
2056                         # Skip the string trailer, if one exists.
2057                         string_parser = StringParser()
2058                         idx = string_parser.parse(LL, string_idx)
2059
2060                         # That string MAY be followed by a comma...
2061                         if is_valid_index(idx) and LL[idx].type == token.COMMA:
2062                             idx += 1
2063
2064                         # But no more leaves are allowed...
2065                         if not is_valid_index(idx):
2066                             return string_idx
2067
2068         return None
2069
2070     def do_transform(
2071         self, line: Line, string_indices: List[int]
2072     ) -> Iterator[TResult[Line]]:
2073         LL = line.leaves
2074         assert len(string_indices) == 1, (
2075             f"{self.__class__.__name__} should only find one match at a time, found"
2076             f" {len(string_indices)}"
2077         )
2078         string_idx = string_indices[0]
2079
2080         is_valid_index = is_valid_index_factory(LL)
2081         insert_str_child = insert_str_child_factory(LL[string_idx])
2082
2083         comma_idx = -1
2084         ends_with_comma = False
2085         if LL[comma_idx].type == token.COMMA:
2086             ends_with_comma = True
2087
2088         leaves_to_steal_comments_from = [LL[string_idx]]
2089         if ends_with_comma:
2090             leaves_to_steal_comments_from.append(LL[comma_idx])
2091
2092         # --- First Line
2093         first_line = line.clone()
2094         left_leaves = LL[:string_idx]
2095
2096         # We have to remember to account for (possibly invisible) LPAR and RPAR
2097         # leaves that already wrapped the target string. If these leaves do
2098         # exist, we will replace them with our own LPAR and RPAR leaves.
2099         old_parens_exist = False
2100         if left_leaves and left_leaves[-1].type == token.LPAR:
2101             old_parens_exist = True
2102             leaves_to_steal_comments_from.append(left_leaves[-1])
2103             left_leaves.pop()
2104
2105         append_leaves(first_line, line, left_leaves)
2106
2107         lpar_leaf = Leaf(token.LPAR, "(")
2108         if old_parens_exist:
2109             replace_child(LL[string_idx - 1], lpar_leaf)
2110         else:
2111             insert_str_child(lpar_leaf)
2112         first_line.append(lpar_leaf)
2113
2114         # We throw inline comments that were originally to the right of the
2115         # target string to the top line. They will now be shown to the right of
2116         # the LPAR.
2117         for leaf in leaves_to_steal_comments_from:
2118             for comment_leaf in line.comments_after(leaf):
2119                 first_line.append(comment_leaf, preformatted=True)
2120
2121         yield Ok(first_line)
2122
2123         # --- Middle (String) Line
2124         # We only need to yield one (possibly too long) string line, since the
2125         # `StringSplitter` will break it down further if necessary.
2126         string_value = LL[string_idx].value
2127         string_line = Line(
2128             mode=line.mode,
2129             depth=line.depth + 1,
2130             inside_brackets=True,
2131             should_split_rhs=line.should_split_rhs,
2132             magic_trailing_comma=line.magic_trailing_comma,
2133         )
2134         string_leaf = Leaf(token.STRING, string_value)
2135         insert_str_child(string_leaf)
2136         string_line.append(string_leaf)
2137
2138         old_rpar_leaf = None
2139         if is_valid_index(string_idx + 1):
2140             right_leaves = LL[string_idx + 1 :]
2141             if ends_with_comma:
2142                 right_leaves.pop()
2143
2144             if old_parens_exist:
2145                 assert right_leaves and right_leaves[-1].type == token.RPAR, (
2146                     "Apparently, old parentheses do NOT exist?!"
2147                     f" (left_leaves={left_leaves}, right_leaves={right_leaves})"
2148                 )
2149                 old_rpar_leaf = right_leaves.pop()
2150             elif right_leaves and right_leaves[-1].type == token.RPAR:
2151                 # Special case for lambda expressions as dict's value, e.g.:
2152                 #     my_dict = {
2153                 #        "key": lambda x: f"formatted: {x},
2154                 #     }
2155                 # After wrapping the dict's value with parentheses, the string is
2156                 # followed by a RPAR but its opening bracket is lambda's, not
2157                 # the string's:
2158                 #        "key": (lambda x: f"formatted: {x}),
2159                 opening_bracket = right_leaves[-1].opening_bracket
2160                 if opening_bracket is not None and opening_bracket in left_leaves:
2161                     index = left_leaves.index(opening_bracket)
2162                     if (
2163                         index > 0
2164                         and index < len(left_leaves) - 1
2165                         and left_leaves[index - 1].type == token.COLON
2166                         and left_leaves[index + 1].value == "lambda"
2167                     ):
2168                         right_leaves.pop()
2169
2170             append_leaves(string_line, line, right_leaves)
2171
2172         yield Ok(string_line)
2173
2174         # --- Last Line
2175         last_line = line.clone()
2176         last_line.bracket_tracker = first_line.bracket_tracker
2177
2178         new_rpar_leaf = Leaf(token.RPAR, ")")
2179         if old_rpar_leaf is not None:
2180             replace_child(old_rpar_leaf, new_rpar_leaf)
2181         else:
2182             insert_str_child(new_rpar_leaf)
2183         last_line.append(new_rpar_leaf)
2184
2185         # If the target string ended with a comma, we place this comma to the
2186         # right of the RPAR on the last line.
2187         if ends_with_comma:
2188             comma_leaf = Leaf(token.COMMA, ",")
2189             replace_child(LL[comma_idx], comma_leaf)
2190             last_line.append(comma_leaf)
2191
2192         yield Ok(last_line)
2193
2194
2195 class StringParser:
2196     """
2197     A state machine that aids in parsing a string's "trailer", which can be
2198     either non-existent, an old-style formatting sequence (e.g. `% varX` or `%
2199     (varX, varY)`), or a method-call / attribute access (e.g. `.format(varX,
2200     varY)`).
2201
2202     NOTE: A new StringParser object MUST be instantiated for each string
2203     trailer we need to parse.
2204
2205     Examples:
2206         We shall assume that `line` equals the `Line` object that corresponds
2207         to the following line of python code:
2208         ```
2209         x = "Some {}.".format("String") + some_other_string
2210         ```
2211
2212         Furthermore, we will assume that `string_idx` is some index such that:
2213         ```
2214         assert line.leaves[string_idx].value == "Some {}."
2215         ```
2216
2217         The following code snippet then holds:
2218         ```
2219         string_parser = StringParser()
2220         idx = string_parser.parse(line.leaves, string_idx)
2221         assert line.leaves[idx].type == token.PLUS
2222         ```
2223     """
2224
2225     DEFAULT_TOKEN: Final = 20210605
2226
2227     # String Parser States
2228     START: Final = 1
2229     DOT: Final = 2
2230     NAME: Final = 3
2231     PERCENT: Final = 4
2232     SINGLE_FMT_ARG: Final = 5
2233     LPAR: Final = 6
2234     RPAR: Final = 7
2235     DONE: Final = 8
2236
2237     # Lookup Table for Next State
2238     _goto: Final[Dict[Tuple[ParserState, NodeType], ParserState]] = {
2239         # A string trailer may start with '.' OR '%'.
2240         (START, token.DOT): DOT,
2241         (START, token.PERCENT): PERCENT,
2242         (START, DEFAULT_TOKEN): DONE,
2243         # A '.' MUST be followed by an attribute or method name.
2244         (DOT, token.NAME): NAME,
2245         # A method name MUST be followed by an '(', whereas an attribute name
2246         # is the last symbol in the string trailer.
2247         (NAME, token.LPAR): LPAR,
2248         (NAME, DEFAULT_TOKEN): DONE,
2249         # A '%' symbol can be followed by an '(' or a single argument (e.g. a
2250         # string or variable name).
2251         (PERCENT, token.LPAR): LPAR,
2252         (PERCENT, DEFAULT_TOKEN): SINGLE_FMT_ARG,
2253         # If a '%' symbol is followed by a single argument, that argument is
2254         # the last leaf in the string trailer.
2255         (SINGLE_FMT_ARG, DEFAULT_TOKEN): DONE,
2256         # If present, a ')' symbol is the last symbol in a string trailer.
2257         # (NOTE: LPARS and nested RPARS are not included in this lookup table,
2258         # since they are treated as a special case by the parsing logic in this
2259         # classes' implementation.)
2260         (RPAR, DEFAULT_TOKEN): DONE,
2261     }
2262
2263     def __init__(self) -> None:
2264         self._state = self.START
2265         self._unmatched_lpars = 0
2266
2267     def parse(self, leaves: List[Leaf], string_idx: int) -> int:
2268         """
2269         Pre-conditions:
2270             * @leaves[@string_idx].type == token.STRING
2271
2272         Returns:
2273             The index directly after the last leaf which is apart of the string
2274             trailer, if a "trailer" exists.
2275             OR
2276             @string_idx + 1, if no string "trailer" exists.
2277         """
2278         assert leaves[string_idx].type == token.STRING
2279
2280         idx = string_idx + 1
2281         while idx < len(leaves) and self._next_state(leaves[idx]):
2282             idx += 1
2283         return idx
2284
2285     def _next_state(self, leaf: Leaf) -> bool:
2286         """
2287         Pre-conditions:
2288             * On the first call to this function, @leaf MUST be the leaf that
2289               was directly after the string leaf in question (e.g. if our target
2290               string is `line.leaves[i]` then the first call to this method must
2291               be `line.leaves[i + 1]`).
2292             * On the next call to this function, the leaf parameter passed in
2293               MUST be the leaf directly following @leaf.
2294
2295         Returns:
2296             True iff @leaf is apart of the string's trailer.
2297         """
2298         # We ignore empty LPAR or RPAR leaves.
2299         if is_empty_par(leaf):
2300             return True
2301
2302         next_token = leaf.type
2303         if next_token == token.LPAR:
2304             self._unmatched_lpars += 1
2305
2306         current_state = self._state
2307
2308         # The LPAR parser state is a special case. We will return True until we
2309         # find the matching RPAR token.
2310         if current_state == self.LPAR:
2311             if next_token == token.RPAR:
2312                 self._unmatched_lpars -= 1
2313                 if self._unmatched_lpars == 0:
2314                     self._state = self.RPAR
2315         # Otherwise, we use a lookup table to determine the next state.
2316         else:
2317             # If the lookup table matches the current state to the next
2318             # token, we use the lookup table.
2319             if (current_state, next_token) in self._goto:
2320                 self._state = self._goto[current_state, next_token]
2321             else:
2322                 # Otherwise, we check if a the current state was assigned a
2323                 # default.
2324                 if (current_state, self.DEFAULT_TOKEN) in self._goto:
2325                     self._state = self._goto[current_state, self.DEFAULT_TOKEN]
2326                 # If no default has been assigned, then this parser has a logic
2327                 # error.
2328                 else:
2329                     raise RuntimeError(f"{self.__class__.__name__} LOGIC ERROR!")
2330
2331             if self._state == self.DONE:
2332                 return False
2333
2334         return True
2335
2336
2337 def insert_str_child_factory(string_leaf: Leaf) -> Callable[[LN], None]:
2338     """
2339     Factory for a convenience function that is used to orphan @string_leaf
2340     and then insert multiple new leaves into the same part of the node
2341     structure that @string_leaf had originally occupied.
2342
2343     Examples:
2344         Let `string_leaf = Leaf(token.STRING, '"foo"')` and `N =
2345         string_leaf.parent`. Assume the node `N` has the following
2346         original structure:
2347
2348         Node(
2349             expr_stmt, [
2350                 Leaf(NAME, 'x'),
2351                 Leaf(EQUAL, '='),
2352                 Leaf(STRING, '"foo"'),
2353             ]
2354         )
2355
2356         We then run the code snippet shown below.
2357         ```
2358         insert_str_child = insert_str_child_factory(string_leaf)
2359
2360         lpar = Leaf(token.LPAR, '(')
2361         insert_str_child(lpar)
2362
2363         bar = Leaf(token.STRING, '"bar"')
2364         insert_str_child(bar)
2365
2366         rpar = Leaf(token.RPAR, ')')
2367         insert_str_child(rpar)
2368         ```
2369
2370         After which point, it follows that `string_leaf.parent is None` and
2371         the node `N` now has the following structure:
2372
2373         Node(
2374             expr_stmt, [
2375                 Leaf(NAME, 'x'),
2376                 Leaf(EQUAL, '='),
2377                 Leaf(LPAR, '('),
2378                 Leaf(STRING, '"bar"'),
2379                 Leaf(RPAR, ')'),
2380             ]
2381         )
2382     """
2383     string_parent = string_leaf.parent
2384     string_child_idx = string_leaf.remove()
2385
2386     def insert_str_child(child: LN) -> None:
2387         nonlocal string_child_idx
2388
2389         assert string_parent is not None
2390         assert string_child_idx is not None
2391
2392         string_parent.insert_child(string_child_idx, child)
2393         string_child_idx += 1
2394
2395     return insert_str_child
2396
2397
2398 def is_valid_index_factory(seq: Sequence[Any]) -> Callable[[int], bool]:
2399     """
2400     Examples:
2401         ```
2402         my_list = [1, 2, 3]
2403
2404         is_valid_index = is_valid_index_factory(my_list)
2405
2406         assert is_valid_index(0)
2407         assert is_valid_index(2)
2408
2409         assert not is_valid_index(3)
2410         assert not is_valid_index(-1)
2411         ```
2412     """
2413
2414     def is_valid_index(idx: int) -> bool:
2415         """
2416         Returns:
2417             True iff @idx is positive AND seq[@idx] does NOT raise an
2418             IndexError.
2419         """
2420         return 0 <= idx < len(seq)
2421
2422     return is_valid_index