src/black/trans.py

   1 """
   2 String transformers that can split and merge strings.
   3 """
   4 import re
   5 import sys
   6 from abc import ABC, abstractmethod
   7 from collections import defaultdict
   8 from dataclasses import dataclass
   9 from typing import (
  10     Any,
  11     Callable,
  12     ClassVar,
  13     Collection,
  14     Dict,
  15     Iterable,
  16     Iterator,
  17     List,
  18     Optional,
  19     Sequence,
  20     Set,
  21     Tuple,
  22     TypeVar,
  23     Union,
  24 )
  25
  26 if sys.version_info < (3, 8):
  27     from typing_extensions import Final, Literal
  28 else:
  29     from typing import Literal, Final
  30
  31 from mypy_extensions import trait
  32
  33 from black.comments import contains_pragma_comment
  34 from black.lines import Line, append_leaves
  35 from black.mode import Feature, Mode
  36 from black.nodes import (
  37     CLOSING_BRACKETS,
  38     OPENING_BRACKETS,
  39     STANDALONE_COMMENT,
  40     is_empty_lpar,
  41     is_empty_par,
  42     is_empty_rpar,
  43     is_part_of_annotation,
  44     parent_type,
  45     replace_child,
  46     syms,
  47 )
  48 from black.rusty import Err, Ok, Result
  49 from black.strings import (
  50     assert_is_leaf_string,
  51     count_chars_in_width,
  52     get_string_prefix,
  53     has_triple_quotes,
  54     normalize_string_quotes,
  55     str_width,
  56 )
  57 from blib2to3.pgen2 import token
  58 from blib2to3.pytree import Leaf, Node
  59
  60
  61 class CannotTransform(Exception):
  62     """Base class for errors raised by Transformers."""
  63
  64
  65 # types
  66 T = TypeVar("T")
  67 LN = Union[Leaf, Node]
  68 Transformer = Callable[[Line, Collection[Feature], Mode], Iterator[Line]]
  69 Index = int
  70 NodeType = int
  71 ParserState = int
  72 StringID = int
  73 TResult = Result[T, CannotTransform]  # (T)ransform Result
  74 TMatchResult = TResult[List[Index]]
  75
  76 SPLIT_SAFE_CHARS = frozenset(["\u3001", "\u3002", "\uff0c"])  # East Asian stops
  77
  78
  79 def TErr(err_msg: str) -> Err[CannotTransform]:
  80     """(T)ransform Err
  81
  82     Convenience function used when working with the TResult type.
  83     """
  84     cant_transform = CannotTransform(err_msg)
  85     return Err(cant_transform)
  86
  87
  88 def hug_power_op(
  89     line: Line, features: Collection[Feature], mode: Mode
  90 ) -> Iterator[Line]:
  91     """A transformer which normalizes spacing around power operators."""
  92
  93     # Performance optimization to avoid unnecessary Leaf clones and other ops.
  94     for leaf in line.leaves:
  95         if leaf.type == token.DOUBLESTAR:
  96             break
  97     else:
  98         raise CannotTransform("No doublestar token was found in the line.")
  99
 100     def is_simple_lookup(index: int, step: Literal[1, -1]) -> bool:
 101         # Brackets and parentheses indicate calls, subscripts, etc. ...
 102         # basically stuff that doesn't count as "simple". Only a NAME lookup
 103         # or dotted lookup (eg. NAME.NAME) is OK.
 104         if step == -1:
 105             disallowed = {token.RPAR, token.RSQB}
 106         else:
 107             disallowed = {token.LPAR, token.LSQB}
 108
 109         while 0 <= index < len(line.leaves):
 110             current = line.leaves[index]
 111             if current.type in disallowed:
 112                 return False
 113             if current.type not in {token.NAME, token.DOT} or current.value == "for":
 114                 # If the current token isn't disallowed, we'll assume this is simple as
 115                 # only the disallowed tokens are semantically attached to this lookup
 116                 # expression we're checking. Also, stop early if we hit the 'for' bit
 117                 # of a comprehension.
 118                 return True
 119
 120             index += step
 121
 122         return True
 123
 124     def is_simple_operand(index: int, kind: Literal["base", "exponent"]) -> bool:
 125         # An operand is considered "simple" if's a NAME, a numeric CONSTANT, a simple
 126         # lookup (see above), with or without a preceding unary operator.
 127         start = line.leaves[index]
 128         if start.type in {token.NAME, token.NUMBER}:
 129             return is_simple_lookup(index, step=(1 if kind == "exponent" else -1))
 130
 131         if start.type in {token.PLUS, token.MINUS, token.TILDE}:
 132             if line.leaves[index + 1].type in {token.NAME, token.NUMBER}:
 133                 # step is always one as bases with a preceding unary op will be checked
 134                 # for simplicity starting from the next token (so it'll hit the check
 135                 # above).
 136                 return is_simple_lookup(index + 1, step=1)
 137
 138         return False
 139
 140     new_line = line.clone()
 141     should_hug = False
 142     for idx, leaf in enumerate(line.leaves):
 143         new_leaf = leaf.clone()
 144         if should_hug:
 145             new_leaf.prefix = ""
 146             should_hug = False
 147
 148         should_hug = (
 149             (0 < idx < len(line.leaves) - 1)
 150             and leaf.type == token.DOUBLESTAR
 151             and is_simple_operand(idx - 1, kind="base")
 152             and line.leaves[idx - 1].value != "lambda"
 153             and is_simple_operand(idx + 1, kind="exponent")
 154         )
 155         if should_hug:
 156             new_leaf.prefix = ""
 157
 158         # We have to be careful to make a new line properly:
 159         # - bracket related metadata must be maintained (handled by Line.append)
 160         # - comments need to copied over, updating the leaf IDs they're attached to
 161         new_line.append(new_leaf, preformatted=True)
 162         for comment_leaf in line.comments_after(leaf):
 163             new_line.append(comment_leaf, preformatted=True)
 164
 165     yield new_line
 166
 167
 168 class StringTransformer(ABC):
 169     """
 170     An implementation of the Transformer protocol that relies on its
 171     subclasses overriding the template methods `do_match(...)` and
 172     `do_transform(...)`.
 173
 174     This Transformer works exclusively on strings (for example, by merging
 175     or splitting them).
 176
 177     The following sections can be found among the docstrings of each concrete
 178     StringTransformer subclass.
 179
 180     Requirements:
 181         Which requirements must be met of the given Line for this
 182         StringTransformer to be applied?
 183
 184     Transformations:
 185         If the given Line meets all of the above requirements, which string
 186         transformations can you expect to be applied to it by this
 187         StringTransformer?
 188
 189     Collaborations:
 190         What contractual agreements does this StringTransformer have with other
 191         StringTransfomers? Such collaborations should be eliminated/minimized
 192         as much as possible.
 193     """
 194
 195     __name__: Final = "StringTransformer"
 196
 197     # Ideally this would be a dataclass, but unfortunately mypyc breaks when used with
 198     # `abc.ABC`.
 199     def __init__(self, line_length: int, normalize_strings: bool) -> None:
 200         self.line_length = line_length
 201         self.normalize_strings = normalize_strings
 202
 203     @abstractmethod
 204     def do_match(self, line: Line) -> TMatchResult:
 205         """
 206         Returns:
 207             * Ok(string_indices) such that for each index, `line.leaves[index]`
 208             is our target string if a match was able to be made. For
 209             transformers that don't result in more lines (e.g. StringMerger,
 210             StringParenStripper), multiple matches and transforms are done at
 211             once to reduce the complexity.
 212                 OR
 213             * Err(CannotTransform), if no match could be made.
 214         """
 215
 216     @abstractmethod
 217     def do_transform(
 218         self, line: Line, string_indices: List[int]
 219     ) -> Iterator[TResult[Line]]:
 220         """
 221         Yields:
 222             * Ok(new_line) where new_line is the new transformed line.
 223                 OR
 224             * Err(CannotTransform) if the transformation failed for some reason. The
 225             `do_match(...)` template method should usually be used to reject
 226             the form of the given Line, but in some cases it is difficult to
 227             know whether or not a Line meets the StringTransformer's
 228             requirements until the transformation is already midway.
 229
 230         Side Effects:
 231             This method should NOT mutate @line directly, but it MAY mutate the
 232             Line's underlying Node structure. (WARNING: If the underlying Node
 233             structure IS altered, then this method should NOT be allowed to
 234             yield an CannotTransform after that point.)
 235         """
 236
 237     def __call__(
 238         self, line: Line, _features: Collection[Feature], _mode: Mode
 239     ) -> Iterator[Line]:
 240         """
 241         StringTransformer instances have a call signature that mirrors that of
 242         the Transformer type.
 243
 244         Raises:
 245             CannotTransform(...) if the concrete StringTransformer class is unable
 246             to transform @line.
 247         """
 248         # Optimization to avoid calling `self.do_match(...)` when the line does
 249         # not contain any string.
 250         if not any(leaf.type == token.STRING for leaf in line.leaves):
 251             raise CannotTransform("There are no strings in this line.")
 252
 253         match_result = self.do_match(line)
 254
 255         if isinstance(match_result, Err):
 256             cant_transform = match_result.err()
 257             raise CannotTransform(
 258                 f"The string transformer {self.__class__.__name__} does not recognize"
 259                 " this line as one that it can transform."
 260             ) from cant_transform
 261
 262         string_indices = match_result.ok()
 263
 264         for line_result in self.do_transform(line, string_indices):
 265             if isinstance(line_result, Err):
 266                 cant_transform = line_result.err()
 267                 raise CannotTransform(
 268                     "StringTransformer failed while attempting to transform string."
 269                 ) from cant_transform
 270             line = line_result.ok()
 271             yield line
 272
 273
 274 @dataclass
 275 class CustomSplit:
 276     """A custom (i.e. manual) string split.
 277
 278     A single CustomSplit instance represents a single substring.
 279
 280     Examples:
 281         Consider the following string:
 282         ```
 283         "Hi there friend."
 284         " This is a custom"
 285         f" string {split}."
 286         ```
 287
 288         This string will correspond to the following three CustomSplit instances:
 289         ```
 290         CustomSplit(False, 16)
 291         CustomSplit(False, 17)
 292         CustomSplit(True, 16)
 293         ```
 294     """
 295
 296     has_prefix: bool
 297     break_idx: int
 298
 299
 300 @trait
 301 class CustomSplitMapMixin:
 302     """
 303     This mixin class is used to map merged strings to a sequence of
 304     CustomSplits, which will then be used to re-split the strings iff none of
 305     the resultant substrings go over the configured max line length.
 306     """
 307
 308     _Key: ClassVar = Tuple[StringID, str]
 309     _CUSTOM_SPLIT_MAP: ClassVar[Dict[_Key, Tuple[CustomSplit, ...]]] = defaultdict(
 310         tuple
 311     )
 312
 313     @staticmethod
 314     def _get_key(string: str) -> "CustomSplitMapMixin._Key":
 315         """
 316         Returns:
 317             A unique identifier that is used internally to map @string to a
 318             group of custom splits.
 319         """
 320         return (id(string), string)
 321
 322     def add_custom_splits(
 323         self, string: str, custom_splits: Iterable[CustomSplit]
 324     ) -> None:
 325         """Custom Split Map Setter Method
 326
 327         Side Effects:
 328             Adds a mapping from @string to the custom splits @custom_splits.
 329         """
 330         key = self._get_key(string)
 331         self._CUSTOM_SPLIT_MAP[key] = tuple(custom_splits)
 332
 333     def pop_custom_splits(self, string: str) -> List[CustomSplit]:
 334         """Custom Split Map Getter Method
 335
 336         Returns:
 337             * A list of the custom splits that are mapped to @string, if any
 338             exist.
 339                 OR
 340             * [], otherwise.
 341
 342         Side Effects:
 343             Deletes the mapping between @string and its associated custom
 344             splits (which are returned to the caller).
 345         """
 346         key = self._get_key(string)
 347
 348         custom_splits = self._CUSTOM_SPLIT_MAP[key]
 349         del self._CUSTOM_SPLIT_MAP[key]
 350
 351         return list(custom_splits)
 352
 353     def has_custom_splits(self, string: str) -> bool:
 354         """
 355         Returns:
 356             True iff @string is associated with a set of custom splits.
 357         """
 358         key = self._get_key(string)
 359         return key in self._CUSTOM_SPLIT_MAP
 360
 361
 362 class StringMerger(StringTransformer, CustomSplitMapMixin):
 363     """StringTransformer that merges strings together.
 364
 365     Requirements:
 366         (A) The line contains adjacent strings such that ALL of the validation checks
 367         listed in StringMerger._validate_msg(...)'s docstring pass.
 368             OR
 369         (B) The line contains a string which uses line continuation backslashes.
 370
 371     Transformations:
 372         Depending on which of the two requirements above where met, either:
 373
 374         (A) The string group associated with the target string is merged.
 375             OR
 376         (B) All line-continuation backslashes are removed from the target string.
 377
 378     Collaborations:
 379         StringMerger provides custom split information to StringSplitter.
 380     """
 381
 382     def do_match(self, line: Line) -> TMatchResult:
 383         LL = line.leaves
 384
 385         is_valid_index = is_valid_index_factory(LL)
 386
 387         string_indices = []
 388         idx = 0
 389         while is_valid_index(idx):
 390             leaf = LL[idx]
 391             if (
 392                 leaf.type == token.STRING
 393                 and is_valid_index(idx + 1)
 394                 and LL[idx + 1].type == token.STRING
 395             ):
 396                 if not is_part_of_annotation(leaf):
 397                     string_indices.append(idx)
 398
 399                 # Advance to the next non-STRING leaf.
 400                 idx += 2
 401                 while is_valid_index(idx) and LL[idx].type == token.STRING:
 402                     idx += 1
 403
 404             elif leaf.type == token.STRING and "\\\n" in leaf.value:
 405                 string_indices.append(idx)
 406                 # Advance to the next non-STRING leaf.
 407                 idx += 1
 408                 while is_valid_index(idx) and LL[idx].type == token.STRING:
 409                     idx += 1
 410
 411             else:
 412                 idx += 1
 413
 414         if string_indices:
 415             return Ok(string_indices)
 416         else:
 417             return TErr("This line has no strings that need merging.")
 418
 419     def do_transform(
 420         self, line: Line, string_indices: List[int]
 421     ) -> Iterator[TResult[Line]]:
 422         new_line = line
 423
 424         rblc_result = self._remove_backslash_line_continuation_chars(
 425             new_line, string_indices
 426         )
 427         if isinstance(rblc_result, Ok):
 428             new_line = rblc_result.ok()
 429
 430         msg_result = self._merge_string_group(new_line, string_indices)
 431         if isinstance(msg_result, Ok):
 432             new_line = msg_result.ok()
 433
 434         if isinstance(rblc_result, Err) and isinstance(msg_result, Err):
 435             msg_cant_transform = msg_result.err()
 436             rblc_cant_transform = rblc_result.err()
 437             cant_transform = CannotTransform(
 438                 "StringMerger failed to merge any strings in this line."
 439             )
 440
 441             # Chain the errors together using `__cause__`.
 442             msg_cant_transform.__cause__ = rblc_cant_transform
 443             cant_transform.__cause__ = msg_cant_transform
 444
 445             yield Err(cant_transform)
 446         else:
 447             yield Ok(new_line)
 448
 449     @staticmethod
 450     def _remove_backslash_line_continuation_chars(
 451         line: Line, string_indices: List[int]
 452     ) -> TResult[Line]:
 453         """
 454         Merge strings that were split across multiple lines using
 455         line-continuation backslashes.
 456
 457         Returns:
 458             Ok(new_line), if @line contains backslash line-continuation
 459             characters.
 460                 OR
 461             Err(CannotTransform), otherwise.
 462         """
 463         LL = line.leaves
 464
 465         indices_to_transform = []
 466         for string_idx in string_indices:
 467             string_leaf = LL[string_idx]
 468             if (
 469                 string_leaf.type == token.STRING
 470                 and "\\\n" in string_leaf.value
 471                 and not has_triple_quotes(string_leaf.value)
 472             ):
 473                 indices_to_transform.append(string_idx)
 474
 475         if not indices_to_transform:
 476             return TErr(
 477                 "Found no string leaves that contain backslash line continuation"
 478                 " characters."
 479             )
 480
 481         new_line = line.clone()
 482         new_line.comments = line.comments.copy()
 483         append_leaves(new_line, line, LL)
 484
 485         for string_idx in indices_to_transform:
 486             new_string_leaf = new_line.leaves[string_idx]
 487             new_string_leaf.value = new_string_leaf.value.replace("\\\n", "")
 488
 489         return Ok(new_line)
 490
 491     def _merge_string_group(
 492         self, line: Line, string_indices: List[int]
 493     ) -> TResult[Line]:
 494         """
 495         Merges string groups (i.e. set of adjacent strings).
 496
 497         Each index from `string_indices` designates one string group's first
 498         leaf in `line.leaves`.
 499
 500         Returns:
 501             Ok(new_line), if ALL of the validation checks found in
 502             _validate_msg(...) pass.
 503                 OR
 504             Err(CannotTransform), otherwise.
 505         """
 506         LL = line.leaves
 507
 508         is_valid_index = is_valid_index_factory(LL)
 509
 510         # A dict of {string_idx: tuple[num_of_strings, string_leaf]}.
 511         merged_string_idx_dict: Dict[int, Tuple[int, Leaf]] = {}
 512         for string_idx in string_indices:
 513             vresult = self._validate_msg(line, string_idx)
 514             if isinstance(vresult, Err):
 515                 continue
 516             merged_string_idx_dict[string_idx] = self._merge_one_string_group(
 517                 LL, string_idx, is_valid_index
 518             )
 519
 520         if not merged_string_idx_dict:
 521             return TErr("No string group is merged")
 522
 523         # Build the final line ('new_line') that this method will later return.
 524         new_line = line.clone()
 525         previous_merged_string_idx = -1
 526         previous_merged_num_of_strings = -1
 527         for i, leaf in enumerate(LL):
 528             if i in merged_string_idx_dict:
 529                 previous_merged_string_idx = i
 530                 previous_merged_num_of_strings, string_leaf = merged_string_idx_dict[i]
 531                 new_line.append(string_leaf)
 532
 533             if (
 534                 previous_merged_string_idx
 535                 <= i
 536                 < previous_merged_string_idx + previous_merged_num_of_strings
 537             ):
 538                 for comment_leaf in line.comments_after(LL[i]):
 539                     new_line.append(comment_leaf, preformatted=True)
 540                 continue
 541
 542             append_leaves(new_line, line, [leaf])
 543
 544         return Ok(new_line)
 545
 546     def _merge_one_string_group(
 547         self, LL: List[Leaf], string_idx: int, is_valid_index: Callable[[int], bool]
 548     ) -> Tuple[int, Leaf]:
 549         """
 550         Merges one string group where the first string in the group is
 551         `LL[string_idx]`.
 552
 553         Returns:
 554             A tuple of `(num_of_strings, leaf)` where `num_of_strings` is the
 555             number of strings merged and `leaf` is the newly merged string
 556             to be replaced in the new line.
 557         """
 558         # If the string group is wrapped inside an Atom node, we must make sure
 559         # to later replace that Atom with our new (merged) string leaf.
 560         atom_node = LL[string_idx].parent
 561
 562         # We will place BREAK_MARK in between every two substrings that we
 563         # merge. We will then later go through our final result and use the
 564         # various instances of BREAK_MARK we find to add the right values to
 565         # the custom split map.
 566         BREAK_MARK = "@@@@@ BLACK BREAKPOINT MARKER @@@@@"
 567
 568         QUOTE = LL[string_idx].value[-1]
 569
 570         def make_naked(string: str, string_prefix: str) -> str:
 571             """Strip @string (i.e. make it a "naked" string)
 572
 573             Pre-conditions:
 574                 * assert_is_leaf_string(@string)
 575
 576             Returns:
 577                 A string that is identical to @string except that
 578                 @string_prefix has been stripped, the surrounding QUOTE
 579                 characters have been removed, and any remaining QUOTE
 580                 characters have been escaped.
 581             """
 582             assert_is_leaf_string(string)
 583             if "f" in string_prefix:
 584                 string = _toggle_fexpr_quotes(string, QUOTE)
 585                 # After quotes toggling, quotes in expressions won't be escaped
 586                 # because quotes can't be reused in f-strings. So we can simply
 587                 # let the escaping logic below run without knowing f-string
 588                 # expressions.
 589
 590             RE_EVEN_BACKSLASHES = r"(?:(?<!\\)(?:\\\\)*)"
 591             naked_string = string[len(string_prefix) + 1 : -1]
 592             naked_string = re.sub(
 593                 "(" + RE_EVEN_BACKSLASHES + ")" + QUOTE, r"\1\\" + QUOTE, naked_string
 594             )
 595             return naked_string
 596
 597         # Holds the CustomSplit objects that will later be added to the custom
 598         # split map.
 599         custom_splits = []
 600
 601         # Temporary storage for the 'has_prefix' part of the CustomSplit objects.
 602         prefix_tracker = []
 603
 604         # Sets the 'prefix' variable. This is the prefix that the final merged
 605         # string will have.
 606         next_str_idx = string_idx
 607         prefix = ""
 608         while (
 609             not prefix
 610             and is_valid_index(next_str_idx)
 611             and LL[next_str_idx].type == token.STRING
 612         ):
 613             prefix = get_string_prefix(LL[next_str_idx].value).lower()
 614             next_str_idx += 1
 615
 616         # The next loop merges the string group. The final string will be
 617         # contained in 'S'.
 618         #
 619         # The following convenience variables are used:
 620         #
 621         #   S: string
 622         #   NS: naked string
 623         #   SS: next string
 624         #   NSS: naked next string
 625         S = ""
 626         NS = ""
 627         num_of_strings = 0
 628         next_str_idx = string_idx
 629         while is_valid_index(next_str_idx) and LL[next_str_idx].type == token.STRING:
 630             num_of_strings += 1
 631
 632             SS = LL[next_str_idx].value
 633             next_prefix = get_string_prefix(SS).lower()
 634
 635             # If this is an f-string group but this substring is not prefixed
 636             # with 'f'...
 637             if "f" in prefix and "f" not in next_prefix:
 638                 # Then we must escape any braces contained in this substring.
 639                 SS = re.sub(r"(\{|\})", r"\1\1", SS)
 640
 641             NSS = make_naked(SS, next_prefix)
 642
 643             has_prefix = bool(next_prefix)
 644             prefix_tracker.append(has_prefix)
 645
 646             S = prefix + QUOTE + NS + NSS + BREAK_MARK + QUOTE
 647             NS = make_naked(S, prefix)
 648
 649             next_str_idx += 1
 650
 651         # Take a note on the index of the non-STRING leaf.
 652         non_string_idx = next_str_idx
 653
 654         S_leaf = Leaf(token.STRING, S)
 655         if self.normalize_strings:
 656             S_leaf.value = normalize_string_quotes(S_leaf.value)
 657
 658         # Fill the 'custom_splits' list with the appropriate CustomSplit objects.
 659         temp_string = S_leaf.value[len(prefix) + 1 : -1]
 660         for has_prefix in prefix_tracker:
 661             mark_idx = temp_string.find(BREAK_MARK)
 662             assert (
 663                 mark_idx >= 0
 664             ), "Logic error while filling the custom string breakpoint cache."
 665
 666             temp_string = temp_string[mark_idx + len(BREAK_MARK) :]
 667             breakpoint_idx = mark_idx + (len(prefix) if has_prefix else 0) + 1
 668             custom_splits.append(CustomSplit(has_prefix, breakpoint_idx))
 669
 670         string_leaf = Leaf(token.STRING, S_leaf.value.replace(BREAK_MARK, ""))
 671
 672         if atom_node is not None:
 673             # If not all children of the atom node are merged (this can happen
 674             # when there is a standalone comment in the middle) ...
 675             if non_string_idx - string_idx < len(atom_node.children):
 676                 # We need to replace the old STRING leaves with the new string leaf.
 677                 first_child_idx = LL[string_idx].remove()
 678                 for idx in range(string_idx + 1, non_string_idx):
 679                     LL[idx].remove()
 680                 if first_child_idx is not None:
 681                     atom_node.insert_child(first_child_idx, string_leaf)
 682             else:
 683                 # Else replace the atom node with the new string leaf.
 684                 replace_child(atom_node, string_leaf)
 685
 686         self.add_custom_splits(string_leaf.value, custom_splits)
 687         return num_of_strings, string_leaf
 688
 689     @staticmethod
 690     def _validate_msg(line: Line, string_idx: int) -> TResult[None]:
 691         """Validate (M)erge (S)tring (G)roup
 692
 693         Transform-time string validation logic for _merge_string_group(...).
 694
 695         Returns:
 696             * Ok(None), if ALL validation checks (listed below) pass.
 697                 OR
 698             * Err(CannotTransform), if any of the following are true:
 699                 - The target string group does not contain ANY stand-alone comments.
 700                 - The target string is not in a string group (i.e. it has no
 701                   adjacent strings).
 702                 - The string group has more than one inline comment.
 703                 - The string group has an inline comment that appears to be a pragma.
 704                 - The set of all string prefixes in the string group is of
 705                   length greater than one and is not equal to {"", "f"}.
 706                 - The string group consists of raw strings.
 707                 - The string group is stringified type annotations. We don't want to
 708                   process stringified type annotations since pyright doesn't support
 709                   them spanning multiple string values. (NOTE: mypy, pytype, pyre do
 710                   support them, so we can change if pyright also gains support in the
 711                   future. See https://github.com/microsoft/pyright/issues/4359.)
 712         """
 713         # We first check for "inner" stand-alone comments (i.e. stand-alone
 714         # comments that have a string leaf before them AND after them).
 715         for inc in [1, -1]:
 716             i = string_idx
 717             found_sa_comment = False
 718             is_valid_index = is_valid_index_factory(line.leaves)
 719             while is_valid_index(i) and line.leaves[i].type in [
 720                 token.STRING,
 721                 STANDALONE_COMMENT,
 722             ]:
 723                 if line.leaves[i].type == STANDALONE_COMMENT:
 724                     found_sa_comment = True
 725                 elif found_sa_comment:
 726                     return TErr(
 727                         "StringMerger does NOT merge string groups which contain "
 728                         "stand-alone comments."
 729                     )
 730
 731                 i += inc
 732
 733         num_of_inline_string_comments = 0
 734         set_of_prefixes = set()
 735         num_of_strings = 0
 736         for leaf in line.leaves[string_idx:]:
 737             if leaf.type != token.STRING:
 738                 # If the string group is trailed by a comma, we count the
 739                 # comments trailing the comma to be one of the string group's
 740                 # comments.
 741                 if leaf.type == token.COMMA and id(leaf) in line.comments:
 742                     num_of_inline_string_comments += 1
 743                 break
 744
 745             if has_triple_quotes(leaf.value):
 746                 return TErr("StringMerger does NOT merge multiline strings.")
 747
 748             num_of_strings += 1
 749             prefix = get_string_prefix(leaf.value).lower()
 750             if "r" in prefix:
 751                 return TErr("StringMerger does NOT merge raw strings.")
 752
 753             set_of_prefixes.add(prefix)
 754
 755             if id(leaf) in line.comments:
 756                 num_of_inline_string_comments += 1
 757                 if contains_pragma_comment(line.comments[id(leaf)]):
 758                     return TErr("Cannot merge strings which have pragma comments.")
 759
 760         if num_of_strings < 2:
 761             return TErr(
 762                 f"Not enough strings to merge (num_of_strings={num_of_strings})."
 763             )
 764
 765         if num_of_inline_string_comments > 1:
 766             return TErr(
 767                 f"Too many inline string comments ({num_of_inline_string_comments})."
 768             )
 769
 770         if len(set_of_prefixes) > 1 and set_of_prefixes != {"", "f"}:
 771             return TErr(f"Too many different prefixes ({set_of_prefixes}).")
 772
 773         return Ok(None)
 774
 775
 776 class StringParenStripper(StringTransformer):
 777     """StringTransformer that strips surrounding parentheses from strings.
 778
 779     Requirements:
 780         The line contains a string which is surrounded by parentheses and:
 781             - The target string is NOT the only argument to a function call.
 782             - The target string is NOT a "pointless" string.
 783             - If the target string contains a PERCENT, the brackets are not
 784               preceded or followed by an operator with higher precedence than
 785               PERCENT.
 786
 787     Transformations:
 788         The parentheses mentioned in the 'Requirements' section are stripped.
 789
 790     Collaborations:
 791         StringParenStripper has its own inherent usefulness, but it is also
 792         relied on to clean up the parentheses created by StringParenWrapper (in
 793         the event that they are no longer needed).
 794     """
 795
 796     def do_match(self, line: Line) -> TMatchResult:
 797         LL = line.leaves
 798
 799         is_valid_index = is_valid_index_factory(LL)
 800
 801         string_indices = []
 802
 803         idx = -1
 804         while True:
 805             idx += 1
 806             if idx >= len(LL):
 807                 break
 808             leaf = LL[idx]
 809
 810             # Should be a string...
 811             if leaf.type != token.STRING:
 812                 continue
 813
 814             # If this is a "pointless" string...
 815             if (
 816                 leaf.parent
 817                 and leaf.parent.parent
 818                 and leaf.parent.parent.type == syms.simple_stmt
 819             ):
 820                 continue
 821
 822             # Should be preceded by a non-empty LPAR...
 823             if (
 824                 not is_valid_index(idx - 1)
 825                 or LL[idx - 1].type != token.LPAR
 826                 or is_empty_lpar(LL[idx - 1])
 827             ):
 828                 continue
 829
 830             # That LPAR should NOT be preceded by a function name or a closing
 831             # bracket (which could be a function which returns a function or a
 832             # list/dictionary that contains a function)...
 833             if is_valid_index(idx - 2) and (
 834                 LL[idx - 2].type == token.NAME or LL[idx - 2].type in CLOSING_BRACKETS
 835             ):
 836                 continue
 837
 838             string_idx = idx
 839
 840             # Skip the string trailer, if one exists.
 841             string_parser = StringParser()
 842             next_idx = string_parser.parse(LL, string_idx)
 843
 844             # if the leaves in the parsed string include a PERCENT, we need to
 845             # make sure the initial LPAR is NOT preceded by an operator with
 846             # higher or equal precedence to PERCENT
 847             if is_valid_index(idx - 2):
 848                 # mypy can't quite follow unless we name this
 849                 before_lpar = LL[idx - 2]
 850                 if token.PERCENT in {leaf.type for leaf in LL[idx - 1 : next_idx]} and (
 851                     (
 852                         before_lpar.type
 853                         in {
 854                             token.STAR,
 855                             token.AT,
 856                             token.SLASH,
 857                             token.DOUBLESLASH,
 858                             token.PERCENT,
 859                             token.TILDE,
 860                             token.DOUBLESTAR,
 861                             token.AWAIT,
 862                             token.LSQB,
 863                             token.LPAR,
 864                         }
 865                     )
 866                     or (
 867                         # only unary PLUS/MINUS
 868                         before_lpar.parent
 869                         and before_lpar.parent.type == syms.factor
 870                         and (before_lpar.type in {token.PLUS, token.MINUS})
 871                     )
 872                 ):
 873                     continue
 874
 875             # Should be followed by a non-empty RPAR...
 876             if (
 877                 is_valid_index(next_idx)
 878                 and LL[next_idx].type == token.RPAR
 879                 and not is_empty_rpar(LL[next_idx])
 880             ):
 881                 # That RPAR should NOT be followed by anything with higher
 882                 # precedence than PERCENT
 883                 if is_valid_index(next_idx + 1) and LL[next_idx + 1].type in {
 884                     token.DOUBLESTAR,
 885                     token.LSQB,
 886                     token.LPAR,
 887                     token.DOT,
 888                 }:
 889                     continue
 890
 891                 string_indices.append(string_idx)
 892                 idx = string_idx
 893                 while idx < len(LL) - 1 and LL[idx + 1].type == token.STRING:
 894                     idx += 1
 895
 896         if string_indices:
 897             return Ok(string_indices)
 898         return TErr("This line has no strings wrapped in parens.")
 899
 900     def do_transform(
 901         self, line: Line, string_indices: List[int]
 902     ) -> Iterator[TResult[Line]]:
 903         LL = line.leaves
 904
 905         string_and_rpar_indices: List[int] = []
 906         for string_idx in string_indices:
 907             string_parser = StringParser()
 908             rpar_idx = string_parser.parse(LL, string_idx)
 909
 910             should_transform = True
 911             for leaf in (LL[string_idx - 1], LL[rpar_idx]):
 912                 if line.comments_after(leaf):
 913                     # Should not strip parentheses which have comments attached
 914                     # to them.
 915                     should_transform = False
 916                     break
 917             if should_transform:
 918                 string_and_rpar_indices.extend((string_idx, rpar_idx))
 919
 920         if string_and_rpar_indices:
 921             yield Ok(self._transform_to_new_line(line, string_and_rpar_indices))
 922         else:
 923             yield Err(
 924                 CannotTransform("All string groups have comments attached to them.")
 925             )
 926
 927     def _transform_to_new_line(
 928         self, line: Line, string_and_rpar_indices: List[int]
 929     ) -> Line:
 930         LL = line.leaves
 931
 932         new_line = line.clone()
 933         new_line.comments = line.comments.copy()
 934
 935         previous_idx = -1
 936         # We need to sort the indices, since string_idx and its matching
 937         # rpar_idx may not come in order, e.g. in
 938         # `("outer" % ("inner".join(items)))`, the "inner" string's
 939         # string_idx is smaller than "outer" string's rpar_idx.
 940         for idx in sorted(string_and_rpar_indices):
 941             leaf = LL[idx]
 942             lpar_or_rpar_idx = idx - 1 if leaf.type == token.STRING else idx
 943             append_leaves(new_line, line, LL[previous_idx + 1 : lpar_or_rpar_idx])
 944             if leaf.type == token.STRING:
 945                 string_leaf = Leaf(token.STRING, LL[idx].value)
 946                 LL[lpar_or_rpar_idx].remove()  # Remove lpar.
 947                 replace_child(LL[idx], string_leaf)
 948                 new_line.append(string_leaf)
 949             else:
 950                 LL[lpar_or_rpar_idx].remove()  # This is a rpar.
 951
 952             previous_idx = idx
 953
 954         # Append the leaves after the last idx:
 955         append_leaves(new_line, line, LL[idx + 1 :])
 956
 957         return new_line
 958
 959
 960 class BaseStringSplitter(StringTransformer):
 961     """
 962     Abstract class for StringTransformers which transform a Line's strings by splitting
 963     them or placing them on their own lines where necessary to avoid going over
 964     the configured line length.
 965
 966     Requirements:
 967         * The target string value is responsible for the line going over the
 968         line length limit. It follows that after all of black's other line
 969         split methods have been exhausted, this line (or one of the resulting
 970         lines after all line splits are performed) would still be over the
 971         line_length limit unless we split this string.
 972             AND
 973         * The target string is NOT a "pointless" string (i.e. a string that has
 974         no parent or siblings).
 975             AND
 976         * The target string is not followed by an inline comment that appears
 977         to be a pragma.
 978             AND
 979         * The target string is not a multiline (i.e. triple-quote) string.
 980     """
 981
 982     STRING_OPERATORS: Final = [
 983         token.EQEQUAL,
 984         token.GREATER,
 985         token.GREATEREQUAL,
 986         token.LESS,
 987         token.LESSEQUAL,
 988         token.NOTEQUAL,
 989         token.PERCENT,
 990         token.PLUS,
 991         token.STAR,
 992     ]
 993
 994     @abstractmethod
 995     def do_splitter_match(self, line: Line) -> TMatchResult:
 996         """
 997         BaseStringSplitter asks its clients to override this method instead of
 998         `StringTransformer.do_match(...)`.
 999
1000         Follows the same protocol as `StringTransformer.do_match(...)`.
1001
1002         Refer to `help(StringTransformer.do_match)` for more information.
1003         """
1004
1005     def do_match(self, line: Line) -> TMatchResult:
1006         match_result = self.do_splitter_match(line)
1007         if isinstance(match_result, Err):
1008             return match_result
1009
1010         string_indices = match_result.ok()
1011         assert len(string_indices) == 1, (
1012             f"{self.__class__.__name__} should only find one match at a time, found"
1013             f" {len(string_indices)}"
1014         )
1015         string_idx = string_indices[0]
1016         vresult = self._validate(line, string_idx)
1017         if isinstance(vresult, Err):
1018             return vresult
1019
1020         return match_result
1021
1022     def _validate(self, line: Line, string_idx: int) -> TResult[None]:
1023         """
1024         Checks that @line meets all of the requirements listed in this classes'
1025         docstring. Refer to `help(BaseStringSplitter)` for a detailed
1026         description of those requirements.
1027
1028         Returns:
1029             * Ok(None), if ALL of the requirements are met.
1030                 OR
1031             * Err(CannotTransform), if ANY of the requirements are NOT met.
1032         """
1033         LL = line.leaves
1034
1035         string_leaf = LL[string_idx]
1036
1037         max_string_length = self._get_max_string_length(line, string_idx)
1038         if len(string_leaf.value) <= max_string_length:
1039             return TErr(
1040                 "The string itself is not what is causing this line to be too long."
1041             )
1042
1043         if not string_leaf.parent or [L.type for L in string_leaf.parent.children] == [
1044             token.STRING,
1045             token.NEWLINE,
1046         ]:
1047             return TErr(
1048                 f"This string ({string_leaf.value}) appears to be pointless (i.e. has"
1049                 " no parent)."
1050             )
1051
1052         if id(line.leaves[string_idx]) in line.comments and contains_pragma_comment(
1053             line.comments[id(line.leaves[string_idx])]
1054         ):
1055             return TErr(
1056                 "Line appears to end with an inline pragma comment. Splitting the line"
1057                 " could modify the pragma's behavior."
1058             )
1059
1060         if has_triple_quotes(string_leaf.value):
1061             return TErr("We cannot split multiline strings.")
1062
1063         return Ok(None)
1064
1065     def _get_max_string_length(self, line: Line, string_idx: int) -> int:
1066         """
1067         Calculates the max string length used when attempting to determine
1068         whether or not the target string is responsible for causing the line to
1069         go over the line length limit.
1070
1071         WARNING: This method is tightly coupled to both StringSplitter and
1072         (especially) StringParenWrapper. There is probably a better way to
1073         accomplish what is being done here.
1074
1075         Returns:
1076             max_string_length: such that `line.leaves[string_idx].value >
1077             max_string_length` implies that the target string IS responsible
1078             for causing this line to exceed the line length limit.
1079         """
1080         LL = line.leaves
1081
1082         is_valid_index = is_valid_index_factory(LL)
1083
1084         # We use the shorthand "WMA4" in comments to abbreviate "We must
1085         # account for". When giving examples, we use STRING to mean some/any
1086         # valid string.
1087         #
1088         # Finally, we use the following convenience variables:
1089         #
1090         #   P:  The leaf that is before the target string leaf.
1091         #   N:  The leaf that is after the target string leaf.
1092         #   NN: The leaf that is after N.
1093
1094         # WMA4 the whitespace at the beginning of the line.
1095         offset = line.depth * 4
1096
1097         if is_valid_index(string_idx - 1):
1098             p_idx = string_idx - 1
1099             if (
1100                 LL[string_idx - 1].type == token.LPAR
1101                 and LL[string_idx - 1].value == ""
1102                 and string_idx >= 2
1103             ):
1104                 # If the previous leaf is an empty LPAR placeholder, we should skip it.
1105                 p_idx -= 1
1106
1107             P = LL[p_idx]
1108             if P.type in self.STRING_OPERATORS:
1109                 # WMA4 a space and a string operator (e.g. `+ STRING` or `== STRING`).
1110                 offset += len(str(P)) + 1
1111
1112             if P.type == token.COMMA:
1113                 # WMA4 a space, a comma, and a closing bracket [e.g. `), STRING`].
1114                 offset += 3
1115
1116             if P.type in [token.COLON, token.EQUAL, token.PLUSEQUAL, token.NAME]:
1117                 # This conditional branch is meant to handle dictionary keys,
1118                 # variable assignments, 'return STRING' statement lines, and
1119                 # 'else STRING' ternary expression lines.
1120
1121                 # WMA4 a single space.
1122                 offset += 1
1123
1124                 # WMA4 the lengths of any leaves that came before that space,
1125                 # but after any closing bracket before that space.
1126                 for leaf in reversed(LL[: p_idx + 1]):
1127                     offset += len(str(leaf))
1128                     if leaf.type in CLOSING_BRACKETS:
1129                         break
1130
1131         if is_valid_index(string_idx + 1):
1132             N = LL[string_idx + 1]
1133             if N.type == token.RPAR and N.value == "" and len(LL) > string_idx + 2:
1134                 # If the next leaf is an empty RPAR placeholder, we should skip it.
1135                 N = LL[string_idx + 2]
1136
1137             if N.type == token.COMMA:
1138                 # WMA4 a single comma at the end of the string (e.g `STRING,`).
1139                 offset += 1
1140
1141             if is_valid_index(string_idx + 2):
1142                 NN = LL[string_idx + 2]
1143
1144                 if N.type == token.DOT and NN.type == token.NAME:
1145                     # This conditional branch is meant to handle method calls invoked
1146                     # off of a string literal up to and including the LPAR character.
1147
1148                     # WMA4 the '.' character.
1149                     offset += 1
1150
1151                     if (
1152                         is_valid_index(string_idx + 3)
1153                         and LL[string_idx + 3].type == token.LPAR
1154                     ):
1155                         # WMA4 the left parenthesis character.
1156                         offset += 1
1157
1158                     # WMA4 the length of the method's name.
1159                     offset += len(NN.value)
1160
1161         has_comments = False
1162         for comment_leaf in line.comments_after(LL[string_idx]):
1163             if not has_comments:
1164                 has_comments = True
1165                 # WMA4 two spaces before the '#' character.
1166                 offset += 2
1167
1168             # WMA4 the length of the inline comment.
1169             offset += len(comment_leaf.value)
1170
1171         max_string_length = count_chars_in_width(str(line), self.line_length - offset)
1172         return max_string_length
1173
1174     @staticmethod
1175     def _prefer_paren_wrap_match(LL: List[Leaf]) -> Optional[int]:
1176         """
1177         Returns:
1178             string_idx such that @LL[string_idx] is equal to our target (i.e.
1179             matched) string, if this line matches the "prefer paren wrap" statement
1180             requirements listed in the 'Requirements' section of the StringParenWrapper
1181             class's docstring.
1182                 OR
1183             None, otherwise.
1184         """
1185         # The line must start with a string.
1186         if LL[0].type != token.STRING:
1187             return None
1188
1189         # If the string is surrounded by commas (or is the first/last child)...
1190         prev_sibling = LL[0].prev_sibling
1191         next_sibling = LL[0].next_sibling
1192         if not prev_sibling and not next_sibling and parent_type(LL[0]) == syms.atom:
1193             # If it's an atom string, we need to check the parent atom's siblings.
1194             parent = LL[0].parent
1195             assert parent is not None  # For type checkers.
1196             prev_sibling = parent.prev_sibling
1197             next_sibling = parent.next_sibling
1198         if (not prev_sibling or prev_sibling.type == token.COMMA) and (
1199             not next_sibling or next_sibling.type == token.COMMA
1200         ):
1201             return 0
1202
1203         return None
1204
1205
1206 def iter_fexpr_spans(s: str) -> Iterator[Tuple[int, int]]:
1207     """
1208     Yields spans corresponding to expressions in a given f-string.
1209     Spans are half-open ranges (left inclusive, right exclusive).
1210     Assumes the input string is a valid f-string, but will not crash if the input
1211     string is invalid.
1212     """
1213     stack: List[int] = []  # our curly paren stack
1214     i = 0
1215     while i < len(s):
1216         if s[i] == "{":
1217             # if we're in a string part of the f-string, ignore escaped curly braces
1218             if not stack and i + 1 < len(s) and s[i + 1] == "{":
1219                 i += 2
1220                 continue
1221             stack.append(i)
1222             i += 1
1223             continue
1224
1225         if s[i] == "}":
1226             if not stack:
1227                 i += 1
1228                 continue
1229             j = stack.pop()
1230             # we've made it back out of the expression! yield the span
1231             if not stack:
1232                 yield (j, i + 1)
1233             i += 1
1234             continue
1235
1236         # if we're in an expression part of the f-string, fast forward through strings
1237         # note that backslashes are not legal in the expression portion of f-strings
1238         if stack:
1239             delim = None
1240             if s[i : i + 3] in ("'''", '"""'):
1241                 delim = s[i : i + 3]
1242             elif s[i] in ("'", '"'):
1243                 delim = s[i]
1244             if delim:
1245                 i += len(delim)
1246                 while i < len(s) and s[i : i + len(delim)] != delim:
1247                     i += 1
1248                 i += len(delim)
1249                 continue
1250         i += 1
1251
1252
1253 def fstring_contains_expr(s: str) -> bool:
1254     return any(iter_fexpr_spans(s))
1255
1256
1257 def _toggle_fexpr_quotes(fstring: str, old_quote: str) -> str:
1258     """
1259     Toggles quotes used in f-string expressions that are `old_quote`.
1260
1261     f-string expressions can't contain backslashes, so we need to toggle the
1262     quotes if the f-string itself will end up using the same quote. We can
1263     simply toggle without escaping because, quotes can't be reused in f-string
1264     expressions. They will fail to parse.
1265
1266     NOTE: If PEP 701 is accepted, above statement will no longer be true.
1267     Though if quotes can be reused, we can simply reuse them without updates or
1268     escaping, once Black figures out how to parse the new grammar.
1269     """
1270     new_quote = "'" if old_quote == '"' else '"'
1271     parts = []
1272     previous_index = 0
1273     for start, end in iter_fexpr_spans(fstring):
1274         parts.append(fstring[previous_index:start])
1275         parts.append(fstring[start:end].replace(old_quote, new_quote))
1276         previous_index = end
1277     parts.append(fstring[previous_index:])
1278     return "".join(parts)
1279
1280
1281 class StringSplitter(BaseStringSplitter, CustomSplitMapMixin):
1282     """
1283     StringTransformer that splits "atom" strings (i.e. strings which exist on
1284     lines by themselves).
1285
1286     Requirements:
1287         * The line consists ONLY of a single string (possibly prefixed by a
1288         string operator [e.g. '+' or '==']), MAYBE a string trailer, and MAYBE
1289         a trailing comma.
1290             AND
1291         * All of the requirements listed in BaseStringSplitter's docstring.
1292
1293     Transformations:
1294         The string mentioned in the 'Requirements' section is split into as
1295         many substrings as necessary to adhere to the configured line length.
1296
1297         In the final set of substrings, no substring should be smaller than
1298         MIN_SUBSTR_SIZE characters.
1299
1300         The string will ONLY be split on spaces (i.e. each new substring should
1301         start with a space). Note that the string will NOT be split on a space
1302         which is escaped with a backslash.
1303
1304         If the string is an f-string, it will NOT be split in the middle of an
1305         f-expression (e.g. in f"FooBar: {foo() if x else bar()}", {foo() if x
1306         else bar()} is an f-expression).
1307
1308         If the string that is being split has an associated set of custom split
1309         records and those custom splits will NOT result in any line going over
1310         the configured line length, those custom splits are used. Otherwise the
1311         string is split as late as possible (from left-to-right) while still
1312         adhering to the transformation rules listed above.
1313
1314     Collaborations:
1315         StringSplitter relies on StringMerger to construct the appropriate
1316         CustomSplit objects and add them to the custom split map.
1317     """
1318
1319     MIN_SUBSTR_SIZE: Final = 6
1320
1321     def do_splitter_match(self, line: Line) -> TMatchResult:
1322         LL = line.leaves
1323
1324         if self._prefer_paren_wrap_match(LL) is not None:
1325             return TErr("Line needs to be wrapped in parens first.")
1326
1327         is_valid_index = is_valid_index_factory(LL)
1328
1329         idx = 0
1330
1331         # The first two leaves MAY be the 'not in' keywords...
1332         if (
1333             is_valid_index(idx)
1334             and is_valid_index(idx + 1)
1335             and [LL[idx].type, LL[idx + 1].type] == [token.NAME, token.NAME]
1336             and str(LL[idx]) + str(LL[idx + 1]) == "not in"
1337         ):
1338             idx += 2
1339         # Else the first leaf MAY be a string operator symbol or the 'in' keyword...
1340         elif is_valid_index(idx) and (
1341             LL[idx].type in self.STRING_OPERATORS
1342             or LL[idx].type == token.NAME
1343             and str(LL[idx]) == "in"
1344         ):
1345             idx += 1
1346
1347         # The next/first leaf MAY be an empty LPAR...
1348         if is_valid_index(idx) and is_empty_lpar(LL[idx]):
1349             idx += 1
1350
1351         # The next/first leaf MUST be a string...
1352         if not is_valid_index(idx) or LL[idx].type != token.STRING:
1353             return TErr("Line does not start with a string.")
1354
1355         string_idx = idx
1356
1357         # Skip the string trailer, if one exists.
1358         string_parser = StringParser()
1359         idx = string_parser.parse(LL, string_idx)
1360
1361         # That string MAY be followed by an empty RPAR...
1362         if is_valid_index(idx) and is_empty_rpar(LL[idx]):
1363             idx += 1
1364
1365         # That string / empty RPAR leaf MAY be followed by a comma...
1366         if is_valid_index(idx) and LL[idx].type == token.COMMA:
1367             idx += 1
1368
1369         # But no more leaves are allowed...
1370         if is_valid_index(idx):
1371             return TErr("This line does not end with a string.")
1372
1373         return Ok([string_idx])
1374
1375     def do_transform(
1376         self, line: Line, string_indices: List[int]
1377     ) -> Iterator[TResult[Line]]:
1378         LL = line.leaves
1379         assert len(string_indices) == 1, (
1380             f"{self.__class__.__name__} should only find one match at a time, found"
1381             f" {len(string_indices)}"
1382         )
1383         string_idx = string_indices[0]
1384
1385         QUOTE = LL[string_idx].value[-1]
1386
1387         is_valid_index = is_valid_index_factory(LL)
1388         insert_str_child = insert_str_child_factory(LL[string_idx])
1389
1390         prefix = get_string_prefix(LL[string_idx].value).lower()
1391
1392         # We MAY choose to drop the 'f' prefix from substrings that don't
1393         # contain any f-expressions, but ONLY if the original f-string
1394         # contains at least one f-expression. Otherwise, we will alter the AST
1395         # of the program.
1396         drop_pointless_f_prefix = ("f" in prefix) and fstring_contains_expr(
1397             LL[string_idx].value
1398         )
1399
1400         first_string_line = True
1401
1402         string_op_leaves = self._get_string_operator_leaves(LL)
1403         string_op_leaves_length = (
1404             sum(len(str(prefix_leaf)) for prefix_leaf in string_op_leaves) + 1
1405             if string_op_leaves
1406             else 0
1407         )
1408
1409         def maybe_append_string_operators(new_line: Line) -> None:
1410             """
1411             Side Effects:
1412                 If @line starts with a string operator and this is the first
1413                 line we are constructing, this function appends the string
1414                 operator to @new_line and replaces the old string operator leaf
1415                 in the node structure. Otherwise this function does nothing.
1416             """
1417             maybe_prefix_leaves = string_op_leaves if first_string_line else []
1418             for i, prefix_leaf in enumerate(maybe_prefix_leaves):
1419                 replace_child(LL[i], prefix_leaf)
1420                 new_line.append(prefix_leaf)
1421
1422         ends_with_comma = (
1423             is_valid_index(string_idx + 1) and LL[string_idx + 1].type == token.COMMA
1424         )
1425
1426         def max_last_string_column() -> int:
1427             """
1428             Returns:
1429                 The max allowed width of the string value used for the last
1430                 line we will construct.  Note that this value means the width
1431                 rather than the number of characters (e.g., many East Asian
1432                 characters expand to two columns).
1433             """
1434             result = self.line_length
1435             result -= line.depth * 4
1436             result -= 1 if ends_with_comma else 0
1437             result -= string_op_leaves_length
1438             return result
1439
1440         # --- Calculate Max Break Width (for string value)
1441         # We start with the line length limit
1442         max_break_width = self.line_length
1443         # The last index of a string of length N is N-1.
1444         max_break_width -= 1
1445         # Leading whitespace is not present in the string value (e.g. Leaf.value).
1446         max_break_width -= line.depth * 4
1447         if max_break_width < 0:
1448             yield TErr(
1449                 f"Unable to split {LL[string_idx].value} at such high of a line depth:"
1450                 f" {line.depth}"
1451             )
1452             return
1453
1454         # Check if StringMerger registered any custom splits.
1455         custom_splits = self.pop_custom_splits(LL[string_idx].value)
1456         # We use them ONLY if none of them would produce lines that exceed the
1457         # line limit.
1458         use_custom_breakpoints = bool(
1459             custom_splits
1460             and all(csplit.break_idx <= max_break_width for csplit in custom_splits)
1461         )
1462
1463         # Temporary storage for the remaining chunk of the string line that
1464         # can't fit onto the line currently being constructed.
1465         rest_value = LL[string_idx].value
1466
1467         def more_splits_should_be_made() -> bool:
1468             """
1469             Returns:
1470                 True iff `rest_value` (the remaining string value from the last
1471                 split), should be split again.
1472             """
1473             if use_custom_breakpoints:
1474                 return len(custom_splits) > 1
1475             else:
1476                 return str_width(rest_value) > max_last_string_column()
1477
1478         string_line_results: List[Ok[Line]] = []
1479         while more_splits_should_be_made():
1480             if use_custom_breakpoints:
1481                 # Custom User Split (manual)
1482                 csplit = custom_splits.pop(0)
1483                 break_idx = csplit.break_idx
1484             else:
1485                 # Algorithmic Split (automatic)
1486                 max_bidx = (
1487                     count_chars_in_width(rest_value, max_break_width)
1488                     - string_op_leaves_length
1489                 )
1490                 maybe_break_idx = self._get_break_idx(rest_value, max_bidx)
1491                 if maybe_break_idx is None:
1492                     # If we are unable to algorithmically determine a good split
1493                     # and this string has custom splits registered to it, we
1494                     # fall back to using them--which means we have to start
1495                     # over from the beginning.
1496                     if custom_splits:
1497                         rest_value = LL[string_idx].value
1498                         string_line_results = []
1499                         first_string_line = True
1500                         use_custom_breakpoints = True
1501                         continue
1502
1503                     # Otherwise, we stop splitting here.
1504                     break
1505
1506                 break_idx = maybe_break_idx
1507
1508             # --- Construct `next_value`
1509             next_value = rest_value[:break_idx] + QUOTE
1510
1511             # HACK: The following 'if' statement is a hack to fix the custom
1512             # breakpoint index in the case of either: (a) substrings that were
1513             # f-strings but will have the 'f' prefix removed OR (b) substrings
1514             # that were not f-strings but will now become f-strings because of
1515             # redundant use of the 'f' prefix (i.e. none of the substrings
1516             # contain f-expressions but one or more of them had the 'f' prefix
1517             # anyway; in which case, we will prepend 'f' to _all_ substrings).
1518             #
1519             # There is probably a better way to accomplish what is being done
1520             # here...
1521             #
1522             # If this substring is an f-string, we _could_ remove the 'f'
1523             # prefix, and the current custom split did NOT originally use a
1524             # prefix...
1525             if (
1526                 use_custom_breakpoints
1527                 and not csplit.has_prefix
1528                 and (
1529                     # `next_value == prefix + QUOTE` happens when the custom
1530                     # split is an empty string.
1531                     next_value == prefix + QUOTE
1532                     or next_value != self._normalize_f_string(next_value, prefix)
1533                 )
1534             ):
1535                 # Then `csplit.break_idx` will be off by one after removing
1536                 # the 'f' prefix.
1537                 break_idx += 1
1538                 next_value = rest_value[:break_idx] + QUOTE
1539
1540             if drop_pointless_f_prefix:
1541                 next_value = self._normalize_f_string(next_value, prefix)
1542
1543             # --- Construct `next_leaf`
1544             next_leaf = Leaf(token.STRING, next_value)
1545             insert_str_child(next_leaf)
1546             self._maybe_normalize_string_quotes(next_leaf)
1547
1548             # --- Construct `next_line`
1549             next_line = line.clone()
1550             maybe_append_string_operators(next_line)
1551             next_line.append(next_leaf)
1552             string_line_results.append(Ok(next_line))
1553
1554             rest_value = prefix + QUOTE + rest_value[break_idx:]
1555             first_string_line = False
1556
1557         yield from string_line_results
1558
1559         if drop_pointless_f_prefix:
1560             rest_value = self._normalize_f_string(rest_value, prefix)
1561
1562         rest_leaf = Leaf(token.STRING, rest_value)
1563         insert_str_child(rest_leaf)
1564
1565         # NOTE: I could not find a test case that verifies that the following
1566         # line is actually necessary, but it seems to be. Otherwise we risk
1567         # not normalizing the last substring, right?
1568         self._maybe_normalize_string_quotes(rest_leaf)
1569
1570         last_line = line.clone()
1571         maybe_append_string_operators(last_line)
1572
1573         # If there are any leaves to the right of the target string...
1574         if is_valid_index(string_idx + 1):
1575             # We use `temp_value` here to determine how long the last line
1576             # would be if we were to append all the leaves to the right of the
1577             # target string to the last string line.
1578             temp_value = rest_value
1579             for leaf in LL[string_idx + 1 :]:
1580                 temp_value += str(leaf)
1581                 if leaf.type == token.LPAR:
1582                     break
1583
1584             # Try to fit them all on the same line with the last substring...
1585             if (
1586                 str_width(temp_value) <= max_last_string_column()
1587                 or LL[string_idx + 1].type == token.COMMA
1588             ):
1589                 last_line.append(rest_leaf)
1590                 append_leaves(last_line, line, LL[string_idx + 1 :])
1591                 yield Ok(last_line)
1592             # Otherwise, place the last substring on one line and everything
1593             # else on a line below that...
1594             else:
1595                 last_line.append(rest_leaf)
1596                 yield Ok(last_line)
1597
1598                 non_string_line = line.clone()
1599                 append_leaves(non_string_line, line, LL[string_idx + 1 :])
1600                 yield Ok(non_string_line)
1601         # Else the target string was the last leaf...
1602         else:
1603             last_line.append(rest_leaf)
1604             last_line.comments = line.comments.copy()
1605             yield Ok(last_line)
1606
1607     def _iter_nameescape_slices(self, string: str) -> Iterator[Tuple[Index, Index]]:
1608         """
1609         Yields:
1610             All ranges of @string which, if @string were to be split there,
1611             would result in the splitting of an \\N{...} expression (which is NOT
1612             allowed).
1613         """
1614         # True - the previous backslash was unescaped
1615         # False - the previous backslash was escaped *or* there was no backslash
1616         previous_was_unescaped_backslash = False
1617         it = iter(enumerate(string))
1618         for idx, c in it:
1619             if c == "\\":
1620                 previous_was_unescaped_backslash = not previous_was_unescaped_backslash
1621                 continue
1622             if not previous_was_unescaped_backslash or c != "N":
1623                 previous_was_unescaped_backslash = False
1624                 continue
1625             previous_was_unescaped_backslash = False
1626
1627             begin = idx - 1  # the position of backslash before \N{...}
1628             for idx, c in it:
1629                 if c == "}":
1630                     end = idx
1631                     break
1632             else:
1633                 # malformed nameescape expression?
1634                 # should have been detected by AST parsing earlier...
1635                 raise RuntimeError(f"{self.__class__.__name__} LOGIC ERROR!")
1636             yield begin, end
1637
1638     def _iter_fexpr_slices(self, string: str) -> Iterator[Tuple[Index, Index]]:
1639         """
1640         Yields:
1641             All ranges of @string which, if @string were to be split there,
1642             would result in the splitting of an f-expression (which is NOT
1643             allowed).
1644         """
1645         if "f" not in get_string_prefix(string).lower():
1646             return
1647         yield from iter_fexpr_spans(string)
1648
1649     def _get_illegal_split_indices(self, string: str) -> Set[Index]:
1650         illegal_indices: Set[Index] = set()
1651         iterators = [
1652             self._iter_fexpr_slices(string),
1653             self._iter_nameescape_slices(string),
1654         ]
1655         for it in iterators:
1656             for begin, end in it:
1657                 illegal_indices.update(range(begin, end + 1))
1658         return illegal_indices
1659
1660     def _get_break_idx(self, string: str, max_break_idx: int) -> Optional[int]:
1661         """
1662         This method contains the algorithm that StringSplitter uses to
1663         determine which character to split each string at.
1664
1665         Args:
1666             @string: The substring that we are attempting to split.
1667             @max_break_idx: The ideal break index. We will return this value if it
1668             meets all the necessary conditions. In the likely event that it
1669             doesn't we will try to find the closest index BELOW @max_break_idx
1670             that does. If that fails, we will expand our search by also
1671             considering all valid indices ABOVE @max_break_idx.
1672
1673         Pre-Conditions:
1674             * assert_is_leaf_string(@string)
1675             * 0 <= @max_break_idx < len(@string)
1676
1677         Returns:
1678             break_idx, if an index is able to be found that meets all of the
1679             conditions listed in the 'Transformations' section of this classes'
1680             docstring.
1681                 OR
1682             None, otherwise.
1683         """
1684         is_valid_index = is_valid_index_factory(string)
1685
1686         assert is_valid_index(max_break_idx)
1687         assert_is_leaf_string(string)
1688
1689         _illegal_split_indices = self._get_illegal_split_indices(string)
1690
1691         def breaks_unsplittable_expression(i: Index) -> bool:
1692             """
1693             Returns:
1694                 True iff returning @i would result in the splitting of an
1695                 unsplittable expression (which is NOT allowed).
1696             """
1697             return i in _illegal_split_indices
1698
1699         def passes_all_checks(i: Index) -> bool:
1700             """
1701             Returns:
1702                 True iff ALL of the conditions listed in the 'Transformations'
1703                 section of this classes' docstring would be be met by returning @i.
1704             """
1705             is_space = string[i] == " "
1706             is_split_safe = is_valid_index(i - 1) and string[i - 1] in SPLIT_SAFE_CHARS
1707
1708             is_not_escaped = True
1709             j = i - 1
1710             while is_valid_index(j) and string[j] == "\\":
1711                 is_not_escaped = not is_not_escaped
1712                 j -= 1
1713
1714             is_big_enough = (
1715                 len(string[i:]) >= self.MIN_SUBSTR_SIZE
1716                 and len(string[:i]) >= self.MIN_SUBSTR_SIZE
1717             )
1718             return (
1719                 (is_space or is_split_safe)
1720                 and is_not_escaped
1721                 and is_big_enough
1722                 and not breaks_unsplittable_expression(i)
1723             )
1724
1725         # First, we check all indices BELOW @max_break_idx.
1726         break_idx = max_break_idx
1727         while is_valid_index(break_idx - 1) and not passes_all_checks(break_idx):
1728             break_idx -= 1
1729
1730         if not passes_all_checks(break_idx):
1731             # If that fails, we check all indices ABOVE @max_break_idx.
1732             #
1733             # If we are able to find a valid index here, the next line is going
1734             # to be longer than the specified line length, but it's probably
1735             # better than doing nothing at all.
1736             break_idx = max_break_idx + 1
1737             while is_valid_index(break_idx + 1) and not passes_all_checks(break_idx):
1738                 break_idx += 1
1739
1740             if not is_valid_index(break_idx) or not passes_all_checks(break_idx):
1741                 return None
1742
1743         return break_idx
1744
1745     def _maybe_normalize_string_quotes(self, leaf: Leaf) -> None:
1746         if self.normalize_strings:
1747             leaf.value = normalize_string_quotes(leaf.value)
1748
1749     def _normalize_f_string(self, string: str, prefix: str) -> str:
1750         """
1751         Pre-Conditions:
1752             * assert_is_leaf_string(@string)
1753
1754         Returns:
1755             * If @string is an f-string that contains no f-expressions, we
1756             return a string identical to @string except that the 'f' prefix
1757             has been stripped and all double braces (i.e. '{{' or '}}') have
1758             been normalized (i.e. turned into '{' or '}').
1759                 OR
1760             * Otherwise, we return @string.
1761         """
1762         assert_is_leaf_string(string)
1763
1764         if "f" in prefix and not fstring_contains_expr(string):
1765             new_prefix = prefix.replace("f", "")
1766
1767             temp = string[len(prefix) :]
1768             temp = re.sub(r"\{\{", "{", temp)
1769             temp = re.sub(r"\}\}", "}", temp)
1770             new_string = temp
1771
1772             return f"{new_prefix}{new_string}"
1773         else:
1774             return string
1775
1776     def _get_string_operator_leaves(self, leaves: Iterable[Leaf]) -> List[Leaf]:
1777         LL = list(leaves)
1778
1779         string_op_leaves = []
1780         i = 0
1781         while LL[i].type in self.STRING_OPERATORS + [token.NAME]:
1782             prefix_leaf = Leaf(LL[i].type, str(LL[i]).strip())
1783             string_op_leaves.append(prefix_leaf)
1784             i += 1
1785         return string_op_leaves
1786
1787
1788 class StringParenWrapper(BaseStringSplitter, CustomSplitMapMixin):
1789     """
1790     StringTransformer that wraps strings in parens and then splits at the LPAR.
1791
1792     Requirements:
1793         All of the requirements listed in BaseStringSplitter's docstring in
1794         addition to the requirements listed below:
1795
1796         * The line is a return/yield statement, which returns/yields a string.
1797             OR
1798         * The line is part of a ternary expression (e.g. `x = y if cond else
1799         z`) such that the line starts with `else <string>`, where <string> is
1800         some string.
1801             OR
1802         * The line is an assert statement, which ends with a string.
1803             OR
1804         * The line is an assignment statement (e.g. `x = <string>` or `x +=
1805         <string>`) such that the variable is being assigned the value of some
1806         string.
1807             OR
1808         * The line is a dictionary key assignment where some valid key is being
1809         assigned the value of some string.
1810             OR
1811         * The line is an lambda expression and the value is a string.
1812             OR
1813         * The line starts with an "atom" string that prefers to be wrapped in
1814         parens. It's preferred to be wrapped when the string is surrounded by
1815         commas (or is the first/last child).
1816
1817     Transformations:
1818         The chosen string is wrapped in parentheses and then split at the LPAR.
1819
1820         We then have one line which ends with an LPAR and another line that
1821         starts with the chosen string. The latter line is then split again at
1822         the RPAR. This results in the RPAR (and possibly a trailing comma)
1823         being placed on its own line.
1824
1825         NOTE: If any leaves exist to the right of the chosen string (except
1826         for a trailing comma, which would be placed after the RPAR), those
1827         leaves are placed inside the parentheses.  In effect, the chosen
1828         string is not necessarily being "wrapped" by parentheses. We can,
1829         however, count on the LPAR being placed directly before the chosen
1830         string.
1831
1832         In other words, StringParenWrapper creates "atom" strings. These
1833         can then be split again by StringSplitter, if necessary.
1834
1835     Collaborations:
1836         In the event that a string line split by StringParenWrapper is
1837         changed such that it no longer needs to be given its own line,
1838         StringParenWrapper relies on StringParenStripper to clean up the
1839         parentheses it created.
1840
1841         For "atom" strings that prefers to be wrapped in parens, it requires
1842         StringSplitter to hold the split until the string is wrapped in parens.
1843     """
1844
1845     def do_splitter_match(self, line: Line) -> TMatchResult:
1846         LL = line.leaves
1847
1848         if line.leaves[-1].type in OPENING_BRACKETS:
1849             return TErr(
1850                 "Cannot wrap parens around a line that ends in an opening bracket."
1851             )
1852
1853         string_idx = (
1854             self._return_match(LL)
1855             or self._else_match(LL)
1856             or self._assert_match(LL)
1857             or self._assign_match(LL)
1858             or self._dict_or_lambda_match(LL)
1859             or self._prefer_paren_wrap_match(LL)
1860         )
1861
1862         if string_idx is not None:
1863             string_value = line.leaves[string_idx].value
1864             # If the string has neither spaces nor East Asian stops...
1865             if not any(
1866                 char == " " or char in SPLIT_SAFE_CHARS for char in string_value
1867             ):
1868                 # And will still violate the line length limit when split...
1869                 max_string_width = self.line_length - ((line.depth + 1) * 4)
1870                 if str_width(string_value) > max_string_width:
1871                     # And has no associated custom splits...
1872                     if not self.has_custom_splits(string_value):
1873                         # Then we should NOT put this string on its own line.
1874                         return TErr(
1875                             "We do not wrap long strings in parentheses when the"
1876                             " resultant line would still be over the specified line"
1877                             " length and can't be split further by StringSplitter."
1878                         )
1879             return Ok([string_idx])
1880
1881         return TErr("This line does not contain any non-atomic strings.")
1882
1883     @staticmethod
1884     def _return_match(LL: List[Leaf]) -> Optional[int]:
1885         """
1886         Returns:
1887             string_idx such that @LL[string_idx] is equal to our target (i.e.
1888             matched) string, if this line matches the return/yield statement
1889             requirements listed in the 'Requirements' section of this classes'
1890             docstring.
1891                 OR
1892             None, otherwise.
1893         """
1894         # If this line is apart of a return/yield statement and the first leaf
1895         # contains either the "return" or "yield" keywords...
1896         if parent_type(LL[0]) in [syms.return_stmt, syms.yield_expr] and LL[
1897             0
1898         ].value in ["return", "yield"]:
1899             is_valid_index = is_valid_index_factory(LL)
1900
1901             idx = 2 if is_valid_index(1) and is_empty_par(LL[1]) else 1
1902             # The next visible leaf MUST contain a string...
1903             if is_valid_index(idx) and LL[idx].type == token.STRING:
1904                 return idx
1905
1906         return None
1907
1908     @staticmethod
1909     def _else_match(LL: List[Leaf]) -> Optional[int]:
1910         """
1911         Returns:
1912             string_idx such that @LL[string_idx] is equal to our target (i.e.
1913             matched) string, if this line matches the ternary expression
1914             requirements listed in the 'Requirements' section of this classes'
1915             docstring.
1916                 OR
1917             None, otherwise.
1918         """
1919         # If this line is apart of a ternary expression and the first leaf
1920         # contains the "else" keyword...
1921         if (
1922             parent_type(LL[0]) == syms.test
1923             and LL[0].type == token.NAME
1924             and LL[0].value == "else"
1925         ):
1926             is_valid_index = is_valid_index_factory(LL)
1927
1928             idx = 2 if is_valid_index(1) and is_empty_par(LL[1]) else 1
1929             # The next visible leaf MUST contain a string...
1930             if is_valid_index(idx) and LL[idx].type == token.STRING:
1931                 return idx
1932
1933         return None
1934
1935     @staticmethod
1936     def _assert_match(LL: List[Leaf]) -> Optional[int]:
1937         """
1938         Returns:
1939             string_idx such that @LL[string_idx] is equal to our target (i.e.
1940             matched) string, if this line matches the assert statement
1941             requirements listed in the 'Requirements' section of this classes'
1942             docstring.
1943                 OR
1944             None, otherwise.
1945         """
1946         # If this line is apart of an assert statement and the first leaf
1947         # contains the "assert" keyword...
1948         if parent_type(LL[0]) == syms.assert_stmt and LL[0].value == "assert":
1949             is_valid_index = is_valid_index_factory(LL)
1950
1951             for i, leaf in enumerate(LL):
1952                 # We MUST find a comma...
1953                 if leaf.type == token.COMMA:
1954                     idx = i + 2 if is_empty_par(LL[i + 1]) else i + 1
1955
1956                     # That comma MUST be followed by a string...
1957                     if is_valid_index(idx) and LL[idx].type == token.STRING:
1958                         string_idx = idx
1959
1960                         # Skip the string trailer, if one exists.
1961                         string_parser = StringParser()
1962                         idx = string_parser.parse(LL, string_idx)
1963
1964                         # But no more leaves are allowed...
1965                         if not is_valid_index(idx):
1966                             return string_idx
1967
1968         return None
1969
1970     @staticmethod
1971     def _assign_match(LL: List[Leaf]) -> Optional[int]:
1972         """
1973         Returns:
1974             string_idx such that @LL[string_idx] is equal to our target (i.e.
1975             matched) string, if this line matches the assignment statement
1976             requirements listed in the 'Requirements' section of this classes'
1977             docstring.
1978                 OR
1979             None, otherwise.
1980         """
1981         # If this line is apart of an expression statement or is a function
1982         # argument AND the first leaf contains a variable name...
1983         if (
1984             parent_type(LL[0]) in [syms.expr_stmt, syms.argument, syms.power]
1985             and LL[0].type == token.NAME
1986         ):
1987             is_valid_index = is_valid_index_factory(LL)
1988
1989             for i, leaf in enumerate(LL):
1990                 # We MUST find either an '=' or '+=' symbol...
1991                 if leaf.type in [token.EQUAL, token.PLUSEQUAL]:
1992                     idx = i + 2 if is_empty_par(LL[i + 1]) else i + 1
1993
1994                     # That symbol MUST be followed by a string...
1995                     if is_valid_index(idx) and LL[idx].type == token.STRING:
1996                         string_idx = idx
1997
1998                         # Skip the string trailer, if one exists.
1999                         string_parser = StringParser()
2000                         idx = string_parser.parse(LL, string_idx)
2001
2002                         # The next leaf MAY be a comma iff this line is apart
2003                         # of a function argument...
2004                         if (
2005                             parent_type(LL[0]) == syms.argument
2006                             and is_valid_index(idx)
2007                             and LL[idx].type == token.COMMA
2008                         ):
2009                             idx += 1
2010
2011                         # But no more leaves are allowed...
2012                         if not is_valid_index(idx):
2013                             return string_idx
2014
2015         return None
2016
2017     @staticmethod
2018     def _dict_or_lambda_match(LL: List[Leaf]) -> Optional[int]:
2019         """
2020         Returns:
2021             string_idx such that @LL[string_idx] is equal to our target (i.e.
2022             matched) string, if this line matches the dictionary key assignment
2023             statement or lambda expression requirements listed in the
2024             'Requirements' section of this classes' docstring.
2025                 OR
2026             None, otherwise.
2027         """
2028         # If this line is a part of a dictionary key assignment or lambda expression...
2029         parent_types = [parent_type(LL[0]), parent_type(LL[0].parent)]
2030         if syms.dictsetmaker in parent_types or syms.lambdef in parent_types:
2031             is_valid_index = is_valid_index_factory(LL)
2032
2033             for i, leaf in enumerate(LL):
2034                 # We MUST find a colon, it can either be dict's or lambda's colon...
2035                 if leaf.type == token.COLON and i < len(LL) - 1:
2036                     idx = i + 2 if is_empty_par(LL[i + 1]) else i + 1
2037
2038                     # That colon MUST be followed by a string...
2039                     if is_valid_index(idx) and LL[idx].type == token.STRING:
2040                         string_idx = idx
2041
2042                         # Skip the string trailer, if one exists.
2043                         string_parser = StringParser()
2044                         idx = string_parser.parse(LL, string_idx)
2045
2046                         # That string MAY be followed by a comma...
2047                         if is_valid_index(idx) and LL[idx].type == token.COMMA:
2048                             idx += 1
2049
2050                         # But no more leaves are allowed...
2051                         if not is_valid_index(idx):
2052                             return string_idx
2053
2054         return None
2055
2056     def do_transform(
2057         self, line: Line, string_indices: List[int]
2058     ) -> Iterator[TResult[Line]]:
2059         LL = line.leaves
2060         assert len(string_indices) == 1, (
2061             f"{self.__class__.__name__} should only find one match at a time, found"
2062             f" {len(string_indices)}"
2063         )
2064         string_idx = string_indices[0]
2065
2066         is_valid_index = is_valid_index_factory(LL)
2067         insert_str_child = insert_str_child_factory(LL[string_idx])
2068
2069         comma_idx = -1
2070         ends_with_comma = False
2071         if LL[comma_idx].type == token.COMMA:
2072             ends_with_comma = True
2073
2074         leaves_to_steal_comments_from = [LL[string_idx]]
2075         if ends_with_comma:
2076             leaves_to_steal_comments_from.append(LL[comma_idx])
2077
2078         # --- First Line
2079         first_line = line.clone()
2080         left_leaves = LL[:string_idx]
2081
2082         # We have to remember to account for (possibly invisible) LPAR and RPAR
2083         # leaves that already wrapped the target string. If these leaves do
2084         # exist, we will replace them with our own LPAR and RPAR leaves.
2085         old_parens_exist = False
2086         if left_leaves and left_leaves[-1].type == token.LPAR:
2087             old_parens_exist = True
2088             leaves_to_steal_comments_from.append(left_leaves[-1])
2089             left_leaves.pop()
2090
2091         append_leaves(first_line, line, left_leaves)
2092
2093         lpar_leaf = Leaf(token.LPAR, "(")
2094         if old_parens_exist:
2095             replace_child(LL[string_idx - 1], lpar_leaf)
2096         else:
2097             insert_str_child(lpar_leaf)
2098         first_line.append(lpar_leaf)
2099
2100         # We throw inline comments that were originally to the right of the
2101         # target string to the top line. They will now be shown to the right of
2102         # the LPAR.
2103         for leaf in leaves_to_steal_comments_from:
2104             for comment_leaf in line.comments_after(leaf):
2105                 first_line.append(comment_leaf, preformatted=True)
2106
2107         yield Ok(first_line)
2108
2109         # --- Middle (String) Line
2110         # We only need to yield one (possibly too long) string line, since the
2111         # `StringSplitter` will break it down further if necessary.
2112         string_value = LL[string_idx].value
2113         string_line = Line(
2114             mode=line.mode,
2115             depth=line.depth + 1,
2116             inside_brackets=True,
2117             should_split_rhs=line.should_split_rhs,
2118             magic_trailing_comma=line.magic_trailing_comma,
2119         )
2120         string_leaf = Leaf(token.STRING, string_value)
2121         insert_str_child(string_leaf)
2122         string_line.append(string_leaf)
2123
2124         old_rpar_leaf = None
2125         if is_valid_index(string_idx + 1):
2126             right_leaves = LL[string_idx + 1 :]
2127             if ends_with_comma:
2128                 right_leaves.pop()
2129
2130             if old_parens_exist:
2131                 assert right_leaves and right_leaves[-1].type == token.RPAR, (
2132                     "Apparently, old parentheses do NOT exist?!"
2133                     f" (left_leaves={left_leaves}, right_leaves={right_leaves})"
2134                 )
2135                 old_rpar_leaf = right_leaves.pop()
2136             elif right_leaves and right_leaves[-1].type == token.RPAR:
2137                 # Special case for lambda expressions as dict's value, e.g.:
2138                 #     my_dict = {
2139                 #        "key": lambda x: f"formatted: {x},
2140                 #     }
2141                 # After wrapping the dict's value with parentheses, the string is
2142                 # followed by a RPAR but its opening bracket is lambda's, not
2143                 # the string's:
2144                 #        "key": (lambda x: f"formatted: {x}),
2145                 opening_bracket = right_leaves[-1].opening_bracket
2146                 if opening_bracket is not None and opening_bracket in left_leaves:
2147                     index = left_leaves.index(opening_bracket)
2148                     if (
2149                         index > 0
2150                         and index < len(left_leaves) - 1
2151                         and left_leaves[index - 1].type == token.COLON
2152                         and left_leaves[index + 1].value == "lambda"
2153                     ):
2154                         right_leaves.pop()
2155
2156             append_leaves(string_line, line, right_leaves)
2157
2158         yield Ok(string_line)
2159
2160         # --- Last Line
2161         last_line = line.clone()
2162         last_line.bracket_tracker = first_line.bracket_tracker
2163
2164         new_rpar_leaf = Leaf(token.RPAR, ")")
2165         if old_rpar_leaf is not None:
2166             replace_child(old_rpar_leaf, new_rpar_leaf)
2167         else:
2168             insert_str_child(new_rpar_leaf)
2169         last_line.append(new_rpar_leaf)
2170
2171         # If the target string ended with a comma, we place this comma to the
2172         # right of the RPAR on the last line.
2173         if ends_with_comma:
2174             comma_leaf = Leaf(token.COMMA, ",")
2175             replace_child(LL[comma_idx], comma_leaf)
2176             last_line.append(comma_leaf)
2177
2178         yield Ok(last_line)
2179
2180
2181 class StringParser:
2182     """
2183     A state machine that aids in parsing a string's "trailer", which can be
2184     either non-existent, an old-style formatting sequence (e.g. `% varX` or `%
2185     (varX, varY)`), or a method-call / attribute access (e.g. `.format(varX,
2186     varY)`).
2187
2188     NOTE: A new StringParser object MUST be instantiated for each string
2189     trailer we need to parse.
2190
2191     Examples:
2192         We shall assume that `line` equals the `Line` object that corresponds
2193         to the following line of python code:
2194         ```
2195         x = "Some {}.".format("String") + some_other_string
2196         ```
2197
2198         Furthermore, we will assume that `string_idx` is some index such that:
2199         ```
2200         assert line.leaves[string_idx].value == "Some {}."
2201         ```
2202
2203         The following code snippet then holds:
2204         ```
2205         string_parser = StringParser()
2206         idx = string_parser.parse(line.leaves, string_idx)
2207         assert line.leaves[idx].type == token.PLUS
2208         ```
2209     """
2210
2211     DEFAULT_TOKEN: Final = 20210605
2212
2213     # String Parser States
2214     START: Final = 1
2215     DOT: Final = 2
2216     NAME: Final = 3
2217     PERCENT: Final = 4
2218     SINGLE_FMT_ARG: Final = 5
2219     LPAR: Final = 6
2220     RPAR: Final = 7
2221     DONE: Final = 8
2222
2223     # Lookup Table for Next State
2224     _goto: Final[Dict[Tuple[ParserState, NodeType], ParserState]] = {
2225         # A string trailer may start with '.' OR '%'.
2226         (START, token.DOT): DOT,
2227         (START, token.PERCENT): PERCENT,
2228         (START, DEFAULT_TOKEN): DONE,
2229         # A '.' MUST be followed by an attribute or method name.
2230         (DOT, token.NAME): NAME,
2231         # A method name MUST be followed by an '(', whereas an attribute name
2232         # is the last symbol in the string trailer.
2233         (NAME, token.LPAR): LPAR,
2234         (NAME, DEFAULT_TOKEN): DONE,
2235         # A '%' symbol can be followed by an '(' or a single argument (e.g. a
2236         # string or variable name).
2237         (PERCENT, token.LPAR): LPAR,
2238         (PERCENT, DEFAULT_TOKEN): SINGLE_FMT_ARG,
2239         # If a '%' symbol is followed by a single argument, that argument is
2240         # the last leaf in the string trailer.
2241         (SINGLE_FMT_ARG, DEFAULT_TOKEN): DONE,
2242         # If present, a ')' symbol is the last symbol in a string trailer.
2243         # (NOTE: LPARS and nested RPARS are not included in this lookup table,
2244         # since they are treated as a special case by the parsing logic in this
2245         # classes' implementation.)
2246         (RPAR, DEFAULT_TOKEN): DONE,
2247     }
2248
2249     def __init__(self) -> None:
2250         self._state = self.START
2251         self._unmatched_lpars = 0
2252
2253     def parse(self, leaves: List[Leaf], string_idx: int) -> int:
2254         """
2255         Pre-conditions:
2256             * @leaves[@string_idx].type == token.STRING
2257
2258         Returns:
2259             The index directly after the last leaf which is apart of the string
2260             trailer, if a "trailer" exists.
2261                 OR
2262             @string_idx + 1, if no string "trailer" exists.
2263         """
2264         assert leaves[string_idx].type == token.STRING
2265
2266         idx = string_idx + 1
2267         while idx < len(leaves) and self._next_state(leaves[idx]):
2268             idx += 1
2269         return idx
2270
2271     def _next_state(self, leaf: Leaf) -> bool:
2272         """
2273         Pre-conditions:
2274             * On the first call to this function, @leaf MUST be the leaf that
2275             was directly after the string leaf in question (e.g. if our target
2276             string is `line.leaves[i]` then the first call to this method must
2277             be `line.leaves[i + 1]`).
2278             * On the next call to this function, the leaf parameter passed in
2279             MUST be the leaf directly following @leaf.
2280
2281         Returns:
2282             True iff @leaf is apart of the string's trailer.
2283         """
2284         # We ignore empty LPAR or RPAR leaves.
2285         if is_empty_par(leaf):
2286             return True
2287
2288         next_token = leaf.type
2289         if next_token == token.LPAR:
2290             self._unmatched_lpars += 1
2291
2292         current_state = self._state
2293
2294         # The LPAR parser state is a special case. We will return True until we
2295         # find the matching RPAR token.
2296         if current_state == self.LPAR:
2297             if next_token == token.RPAR:
2298                 self._unmatched_lpars -= 1
2299                 if self._unmatched_lpars == 0:
2300                     self._state = self.RPAR
2301         # Otherwise, we use a lookup table to determine the next state.
2302         else:
2303             # If the lookup table matches the current state to the next
2304             # token, we use the lookup table.
2305             if (current_state, next_token) in self._goto:
2306                 self._state = self._goto[current_state, next_token]
2307             else:
2308                 # Otherwise, we check if a the current state was assigned a
2309                 # default.
2310                 if (current_state, self.DEFAULT_TOKEN) in self._goto:
2311                     self._state = self._goto[current_state, self.DEFAULT_TOKEN]
2312                 # If no default has been assigned, then this parser has a logic
2313                 # error.
2314                 else:
2315                     raise RuntimeError(f"{self.__class__.__name__} LOGIC ERROR!")
2316
2317             if self._state == self.DONE:
2318                 return False
2319
2320         return True
2321
2322
2323 def insert_str_child_factory(string_leaf: Leaf) -> Callable[[LN], None]:
2324     """
2325     Factory for a convenience function that is used to orphan @string_leaf
2326     and then insert multiple new leaves into the same part of the node
2327     structure that @string_leaf had originally occupied.
2328
2329     Examples:
2330         Let `string_leaf = Leaf(token.STRING, '"foo"')` and `N =
2331         string_leaf.parent`. Assume the node `N` has the following
2332         original structure:
2333
2334         Node(
2335             expr_stmt, [
2336                 Leaf(NAME, 'x'),
2337                 Leaf(EQUAL, '='),
2338                 Leaf(STRING, '"foo"'),
2339             ]
2340         )
2341
2342         We then run the code snippet shown below.
2343         ```
2344         insert_str_child = insert_str_child_factory(string_leaf)
2345
2346         lpar = Leaf(token.LPAR, '(')
2347         insert_str_child(lpar)
2348
2349         bar = Leaf(token.STRING, '"bar"')
2350         insert_str_child(bar)
2351
2352         rpar = Leaf(token.RPAR, ')')
2353         insert_str_child(rpar)
2354         ```
2355
2356         After which point, it follows that `string_leaf.parent is None` and
2357         the node `N` now has the following structure:
2358
2359         Node(
2360             expr_stmt, [
2361                 Leaf(NAME, 'x'),
2362                 Leaf(EQUAL, '='),
2363                 Leaf(LPAR, '('),
2364                 Leaf(STRING, '"bar"'),
2365                 Leaf(RPAR, ')'),
2366             ]
2367         )
2368     """
2369     string_parent = string_leaf.parent
2370     string_child_idx = string_leaf.remove()
2371
2372     def insert_str_child(child: LN) -> None:
2373         nonlocal string_child_idx
2374
2375         assert string_parent is not None
2376         assert string_child_idx is not None
2377
2378         string_parent.insert_child(string_child_idx, child)
2379         string_child_idx += 1
2380
2381     return insert_str_child
2382
2383
2384 def is_valid_index_factory(seq: Sequence[Any]) -> Callable[[int], bool]:
2385     """
2386     Examples:
2387         ```
2388         my_list = [1, 2, 3]
2389
2390         is_valid_index = is_valid_index_factory(my_list)
2391
2392         assert is_valid_index(0)
2393         assert is_valid_index(2)
2394
2395         assert not is_valid_index(3)
2396         assert not is_valid_index(-1)
2397         ```
2398     """
2399
2400     def is_valid_index(idx: int) -> bool:
2401         """
2402         Returns:
2403             True iff @idx is positive AND seq[@idx] does NOT raise an
2404             IndexError.
2405         """
2406         return 0 <= idx < len(seq)
2407
2408     return is_valid_index