src/black/trans.py

   1 """
   2 String transformers that can split and merge strings.
   3 """
   4 import re
   5 import sys
   6 from abc import ABC, abstractmethod
   7 from collections import defaultdict
   8 from dataclasses import dataclass
   9 from typing import (
  10     Any,
  11     Callable,
  12     ClassVar,
  13     Collection,
  14     Dict,
  15     Iterable,
  16     Iterator,
  17     List,
  18     Optional,
  19     Sequence,
  20     Set,
  21     Tuple,
  22     TypeVar,
  23     Union,
  24 )
  25
  26 if sys.version_info < (3, 8):
  27     from typing_extensions import Final, Literal
  28 else:
  29     from typing import Literal, Final
  30
  31 from mypy_extensions import trait
  32
  33 from black.comments import contains_pragma_comment
  34 from black.lines import Line, append_leaves
  35 from black.mode import Feature
  36 from black.nodes import (
  37     CLOSING_BRACKETS,
  38     OPENING_BRACKETS,
  39     STANDALONE_COMMENT,
  40     is_empty_lpar,
  41     is_empty_par,
  42     is_empty_rpar,
  43     is_part_of_annotation,
  44     parent_type,
  45     replace_child,
  46     syms,
  47 )
  48 from black.rusty import Err, Ok, Result
  49 from black.strings import (
  50     assert_is_leaf_string,
  51     get_string_prefix,
  52     has_triple_quotes,
  53     normalize_string_quotes,
  54 )
  55 from blib2to3.pgen2 import token
  56 from blib2to3.pytree import Leaf, Node
  57
  58
  59 class CannotTransform(Exception):
  60     """Base class for errors raised by Transformers."""
  61
  62
  63 # types
  64 T = TypeVar("T")
  65 LN = Union[Leaf, Node]
  66 Transformer = Callable[[Line, Collection[Feature]], Iterator[Line]]
  67 Index = int
  68 NodeType = int
  69 ParserState = int
  70 StringID = int
  71 TResult = Result[T, CannotTransform]  # (T)ransform Result
  72 TMatchResult = TResult[List[Index]]
  73
  74
  75 def TErr(err_msg: str) -> Err[CannotTransform]:
  76     """(T)ransform Err
  77
  78     Convenience function used when working with the TResult type.
  79     """
  80     cant_transform = CannotTransform(err_msg)
  81     return Err(cant_transform)
  82
  83
  84 def hug_power_op(line: Line, features: Collection[Feature]) -> Iterator[Line]:
  85     """A transformer which normalizes spacing around power operators."""
  86
  87     # Performance optimization to avoid unnecessary Leaf clones and other ops.
  88     for leaf in line.leaves:
  89         if leaf.type == token.DOUBLESTAR:
  90             break
  91     else:
  92         raise CannotTransform("No doublestar token was found in the line.")
  93
  94     def is_simple_lookup(index: int, step: Literal[1, -1]) -> bool:
  95         # Brackets and parentheses indicate calls, subscripts, etc. ...
  96         # basically stuff that doesn't count as "simple". Only a NAME lookup
  97         # or dotted lookup (eg. NAME.NAME) is OK.
  98         if step == -1:
  99             disallowed = {token.RPAR, token.RSQB}
 100         else:
 101             disallowed = {token.LPAR, token.LSQB}
 102
 103         while 0 <= index < len(line.leaves):
 104             current = line.leaves[index]
 105             if current.type in disallowed:
 106                 return False
 107             if current.type not in {token.NAME, token.DOT} or current.value == "for":
 108                 # If the current token isn't disallowed, we'll assume this is simple as
 109                 # only the disallowed tokens are semantically attached to this lookup
 110                 # expression we're checking. Also, stop early if we hit the 'for' bit
 111                 # of a comprehension.
 112                 return True
 113
 114             index += step
 115
 116         return True
 117
 118     def is_simple_operand(index: int, kind: Literal["base", "exponent"]) -> bool:
 119         # An operand is considered "simple" if's a NAME, a numeric CONSTANT, a simple
 120         # lookup (see above), with or without a preceding unary operator.
 121         start = line.leaves[index]
 122         if start.type in {token.NAME, token.NUMBER}:
 123             return is_simple_lookup(index, step=(1 if kind == "exponent" else -1))
 124
 125         if start.type in {token.PLUS, token.MINUS, token.TILDE}:
 126             if line.leaves[index + 1].type in {token.NAME, token.NUMBER}:
 127                 # step is always one as bases with a preceding unary op will be checked
 128                 # for simplicity starting from the next token (so it'll hit the check
 129                 # above).
 130                 return is_simple_lookup(index + 1, step=1)
 131
 132         return False
 133
 134     new_line = line.clone()
 135     should_hug = False
 136     for idx, leaf in enumerate(line.leaves):
 137         new_leaf = leaf.clone()
 138         if should_hug:
 139             new_leaf.prefix = ""
 140             should_hug = False
 141
 142         should_hug = (
 143             (0 < idx < len(line.leaves) - 1)
 144             and leaf.type == token.DOUBLESTAR
 145             and is_simple_operand(idx - 1, kind="base")
 146             and line.leaves[idx - 1].value != "lambda"
 147             and is_simple_operand(idx + 1, kind="exponent")
 148         )
 149         if should_hug:
 150             new_leaf.prefix = ""
 151
 152         # We have to be careful to make a new line properly:
 153         # - bracket related metadata must be maintained (handled by Line.append)
 154         # - comments need to copied over, updating the leaf IDs they're attached to
 155         new_line.append(new_leaf, preformatted=True)
 156         for comment_leaf in line.comments_after(leaf):
 157             new_line.append(comment_leaf, preformatted=True)
 158
 159     yield new_line
 160
 161
 162 class StringTransformer(ABC):
 163     """
 164     An implementation of the Transformer protocol that relies on its
 165     subclasses overriding the template methods `do_match(...)` and
 166     `do_transform(...)`.
 167
 168     This Transformer works exclusively on strings (for example, by merging
 169     or splitting them).
 170
 171     The following sections can be found among the docstrings of each concrete
 172     StringTransformer subclass.
 173
 174     Requirements:
 175         Which requirements must be met of the given Line for this
 176         StringTransformer to be applied?
 177
 178     Transformations:
 179         If the given Line meets all of the above requirements, which string
 180         transformations can you expect to be applied to it by this
 181         StringTransformer?
 182
 183     Collaborations:
 184         What contractual agreements does this StringTransformer have with other
 185         StringTransfomers? Such collaborations should be eliminated/minimized
 186         as much as possible.
 187     """
 188
 189     __name__: Final = "StringTransformer"
 190
 191     # Ideally this would be a dataclass, but unfortunately mypyc breaks when used with
 192     # `abc.ABC`.
 193     def __init__(self, line_length: int, normalize_strings: bool) -> None:
 194         self.line_length = line_length
 195         self.normalize_strings = normalize_strings
 196
 197     @abstractmethod
 198     def do_match(self, line: Line) -> TMatchResult:
 199         """
 200         Returns:
 201             * Ok(string_indices) such that for each index, `line.leaves[index]`
 202             is our target string if a match was able to be made. For
 203             transformers that don't result in more lines (e.g. StringMerger,
 204             StringParenStripper), multiple matches and transforms are done at
 205             once to reduce the complexity.
 206                 OR
 207             * Err(CannotTransform), if no match could be made.
 208         """
 209
 210     @abstractmethod
 211     def do_transform(
 212         self, line: Line, string_indices: List[int]
 213     ) -> Iterator[TResult[Line]]:
 214         """
 215         Yields:
 216             * Ok(new_line) where new_line is the new transformed line.
 217                 OR
 218             * Err(CannotTransform) if the transformation failed for some reason. The
 219             `do_match(...)` template method should usually be used to reject
 220             the form of the given Line, but in some cases it is difficult to
 221             know whether or not a Line meets the StringTransformer's
 222             requirements until the transformation is already midway.
 223
 224         Side Effects:
 225             This method should NOT mutate @line directly, but it MAY mutate the
 226             Line's underlying Node structure. (WARNING: If the underlying Node
 227             structure IS altered, then this method should NOT be allowed to
 228             yield an CannotTransform after that point.)
 229         """
 230
 231     def __call__(self, line: Line, _features: Collection[Feature]) -> Iterator[Line]:
 232         """
 233         StringTransformer instances have a call signature that mirrors that of
 234         the Transformer type.
 235
 236         Raises:
 237             CannotTransform(...) if the concrete StringTransformer class is unable
 238             to transform @line.
 239         """
 240         # Optimization to avoid calling `self.do_match(...)` when the line does
 241         # not contain any string.
 242         if not any(leaf.type == token.STRING for leaf in line.leaves):
 243             raise CannotTransform("There are no strings in this line.")
 244
 245         match_result = self.do_match(line)
 246
 247         if isinstance(match_result, Err):
 248             cant_transform = match_result.err()
 249             raise CannotTransform(
 250                 f"The string transformer {self.__class__.__name__} does not recognize"
 251                 " this line as one that it can transform."
 252             ) from cant_transform
 253
 254         string_indices = match_result.ok()
 255
 256         for line_result in self.do_transform(line, string_indices):
 257             if isinstance(line_result, Err):
 258                 cant_transform = line_result.err()
 259                 raise CannotTransform(
 260                     "StringTransformer failed while attempting to transform string."
 261                 ) from cant_transform
 262             line = line_result.ok()
 263             yield line
 264
 265
 266 @dataclass
 267 class CustomSplit:
 268     """A custom (i.e. manual) string split.
 269
 270     A single CustomSplit instance represents a single substring.
 271
 272     Examples:
 273         Consider the following string:
 274         ```
 275         "Hi there friend."
 276         " This is a custom"
 277         f" string {split}."
 278         ```
 279
 280         This string will correspond to the following three CustomSplit instances:
 281         ```
 282         CustomSplit(False, 16)
 283         CustomSplit(False, 17)
 284         CustomSplit(True, 16)
 285         ```
 286     """
 287
 288     has_prefix: bool
 289     break_idx: int
 290
 291
 292 @trait
 293 class CustomSplitMapMixin:
 294     """
 295     This mixin class is used to map merged strings to a sequence of
 296     CustomSplits, which will then be used to re-split the strings iff none of
 297     the resultant substrings go over the configured max line length.
 298     """
 299
 300     _Key: ClassVar = Tuple[StringID, str]
 301     _CUSTOM_SPLIT_MAP: ClassVar[Dict[_Key, Tuple[CustomSplit, ...]]] = defaultdict(
 302         tuple
 303     )
 304
 305     @staticmethod
 306     def _get_key(string: str) -> "CustomSplitMapMixin._Key":
 307         """
 308         Returns:
 309             A unique identifier that is used internally to map @string to a
 310             group of custom splits.
 311         """
 312         return (id(string), string)
 313
 314     def add_custom_splits(
 315         self, string: str, custom_splits: Iterable[CustomSplit]
 316     ) -> None:
 317         """Custom Split Map Setter Method
 318
 319         Side Effects:
 320             Adds a mapping from @string to the custom splits @custom_splits.
 321         """
 322         key = self._get_key(string)
 323         self._CUSTOM_SPLIT_MAP[key] = tuple(custom_splits)
 324
 325     def pop_custom_splits(self, string: str) -> List[CustomSplit]:
 326         """Custom Split Map Getter Method
 327
 328         Returns:
 329             * A list of the custom splits that are mapped to @string, if any
 330             exist.
 331                 OR
 332             * [], otherwise.
 333
 334         Side Effects:
 335             Deletes the mapping between @string and its associated custom
 336             splits (which are returned to the caller).
 337         """
 338         key = self._get_key(string)
 339
 340         custom_splits = self._CUSTOM_SPLIT_MAP[key]
 341         del self._CUSTOM_SPLIT_MAP[key]
 342
 343         return list(custom_splits)
 344
 345     def has_custom_splits(self, string: str) -> bool:
 346         """
 347         Returns:
 348             True iff @string is associated with a set of custom splits.
 349         """
 350         key = self._get_key(string)
 351         return key in self._CUSTOM_SPLIT_MAP
 352
 353
 354 class StringMerger(StringTransformer, CustomSplitMapMixin):
 355     """StringTransformer that merges strings together.
 356
 357     Requirements:
 358         (A) The line contains adjacent strings such that ALL of the validation checks
 359         listed in StringMerger._validate_msg(...)'s docstring pass.
 360             OR
 361         (B) The line contains a string which uses line continuation backslashes.
 362
 363     Transformations:
 364         Depending on which of the two requirements above where met, either:
 365
 366         (A) The string group associated with the target string is merged.
 367             OR
 368         (B) All line-continuation backslashes are removed from the target string.
 369
 370     Collaborations:
 371         StringMerger provides custom split information to StringSplitter.
 372     """
 373
 374     def do_match(self, line: Line) -> TMatchResult:
 375         LL = line.leaves
 376
 377         is_valid_index = is_valid_index_factory(LL)
 378
 379         string_indices = []
 380         idx = 0
 381         while is_valid_index(idx):
 382             leaf = LL[idx]
 383             if (
 384                 leaf.type == token.STRING
 385                 and is_valid_index(idx + 1)
 386                 and LL[idx + 1].type == token.STRING
 387             ):
 388                 if not is_part_of_annotation(leaf):
 389                     string_indices.append(idx)
 390
 391                 # Advance to the next non-STRING leaf.
 392                 idx += 2
 393                 while is_valid_index(idx) and LL[idx].type == token.STRING:
 394                     idx += 1
 395
 396             elif leaf.type == token.STRING and "\\\n" in leaf.value:
 397                 string_indices.append(idx)
 398                 # Advance to the next non-STRING leaf.
 399                 idx += 1
 400                 while is_valid_index(idx) and LL[idx].type == token.STRING:
 401                     idx += 1
 402
 403             else:
 404                 idx += 1
 405
 406         if string_indices:
 407             return Ok(string_indices)
 408         else:
 409             return TErr("This line has no strings that need merging.")
 410
 411     def do_transform(
 412         self, line: Line, string_indices: List[int]
 413     ) -> Iterator[TResult[Line]]:
 414         new_line = line
 415
 416         rblc_result = self._remove_backslash_line_continuation_chars(
 417             new_line, string_indices
 418         )
 419         if isinstance(rblc_result, Ok):
 420             new_line = rblc_result.ok()
 421
 422         msg_result = self._merge_string_group(new_line, string_indices)
 423         if isinstance(msg_result, Ok):
 424             new_line = msg_result.ok()
 425
 426         if isinstance(rblc_result, Err) and isinstance(msg_result, Err):
 427             msg_cant_transform = msg_result.err()
 428             rblc_cant_transform = rblc_result.err()
 429             cant_transform = CannotTransform(
 430                 "StringMerger failed to merge any strings in this line."
 431             )
 432
 433             # Chain the errors together using `__cause__`.
 434             msg_cant_transform.__cause__ = rblc_cant_transform
 435             cant_transform.__cause__ = msg_cant_transform
 436
 437             yield Err(cant_transform)
 438         else:
 439             yield Ok(new_line)
 440
 441     @staticmethod
 442     def _remove_backslash_line_continuation_chars(
 443         line: Line, string_indices: List[int]
 444     ) -> TResult[Line]:
 445         """
 446         Merge strings that were split across multiple lines using
 447         line-continuation backslashes.
 448
 449         Returns:
 450             Ok(new_line), if @line contains backslash line-continuation
 451             characters.
 452                 OR
 453             Err(CannotTransform), otherwise.
 454         """
 455         LL = line.leaves
 456
 457         indices_to_transform = []
 458         for string_idx in string_indices:
 459             string_leaf = LL[string_idx]
 460             if (
 461                 string_leaf.type == token.STRING
 462                 and "\\\n" in string_leaf.value
 463                 and not has_triple_quotes(string_leaf.value)
 464             ):
 465                 indices_to_transform.append(string_idx)
 466
 467         if not indices_to_transform:
 468             return TErr(
 469                 "Found no string leaves that contain backslash line continuation"
 470                 " characters."
 471             )
 472
 473         new_line = line.clone()
 474         new_line.comments = line.comments.copy()
 475         append_leaves(new_line, line, LL)
 476
 477         for string_idx in indices_to_transform:
 478             new_string_leaf = new_line.leaves[string_idx]
 479             new_string_leaf.value = new_string_leaf.value.replace("\\\n", "")
 480
 481         return Ok(new_line)
 482
 483     def _merge_string_group(
 484         self, line: Line, string_indices: List[int]
 485     ) -> TResult[Line]:
 486         """
 487         Merges string groups (i.e. set of adjacent strings).
 488
 489         Each index from `string_indices` designates one string group's first
 490         leaf in `line.leaves`.
 491
 492         Returns:
 493             Ok(new_line), if ALL of the validation checks found in
 494             _validate_msg(...) pass.
 495                 OR
 496             Err(CannotTransform), otherwise.
 497         """
 498         LL = line.leaves
 499
 500         is_valid_index = is_valid_index_factory(LL)
 501
 502         # A dict of {string_idx: tuple[num_of_strings, string_leaf]}.
 503         merged_string_idx_dict: Dict[int, Tuple[int, Leaf]] = {}
 504         for string_idx in string_indices:
 505             vresult = self._validate_msg(line, string_idx)
 506             if isinstance(vresult, Err):
 507                 continue
 508             merged_string_idx_dict[string_idx] = self._merge_one_string_group(
 509                 LL, string_idx, is_valid_index
 510             )
 511
 512         if not merged_string_idx_dict:
 513             return TErr("No string group is merged")
 514
 515         # Build the final line ('new_line') that this method will later return.
 516         new_line = line.clone()
 517         previous_merged_string_idx = -1
 518         previous_merged_num_of_strings = -1
 519         for i, leaf in enumerate(LL):
 520             if i in merged_string_idx_dict:
 521                 previous_merged_string_idx = i
 522                 previous_merged_num_of_strings, string_leaf = merged_string_idx_dict[i]
 523                 new_line.append(string_leaf)
 524
 525             if (
 526                 previous_merged_string_idx
 527                 <= i
 528                 < previous_merged_string_idx + previous_merged_num_of_strings
 529             ):
 530                 for comment_leaf in line.comments_after(LL[i]):
 531                     new_line.append(comment_leaf, preformatted=True)
 532                 continue
 533
 534             append_leaves(new_line, line, [leaf])
 535
 536         return Ok(new_line)
 537
 538     def _merge_one_string_group(
 539         self, LL: List[Leaf], string_idx: int, is_valid_index: Callable[[int], bool]
 540     ) -> Tuple[int, Leaf]:
 541         """
 542         Merges one string group where the first string in the group is
 543         `LL[string_idx]`.
 544
 545         Returns:
 546             A tuple of `(num_of_strings, leaf)` where `num_of_strings` is the
 547             number of strings merged and `leaf` is the newly merged string
 548             to be replaced in the new line.
 549         """
 550         # If the string group is wrapped inside an Atom node, we must make sure
 551         # to later replace that Atom with our new (merged) string leaf.
 552         atom_node = LL[string_idx].parent
 553
 554         # We will place BREAK_MARK in between every two substrings that we
 555         # merge. We will then later go through our final result and use the
 556         # various instances of BREAK_MARK we find to add the right values to
 557         # the custom split map.
 558         BREAK_MARK = "@@@@@ BLACK BREAKPOINT MARKER @@@@@"
 559
 560         QUOTE = LL[string_idx].value[-1]
 561
 562         def make_naked(string: str, string_prefix: str) -> str:
 563             """Strip @string (i.e. make it a "naked" string)
 564
 565             Pre-conditions:
 566                 * assert_is_leaf_string(@string)
 567
 568             Returns:
 569                 A string that is identical to @string except that
 570                 @string_prefix has been stripped, the surrounding QUOTE
 571                 characters have been removed, and any remaining QUOTE
 572                 characters have been escaped.
 573             """
 574             assert_is_leaf_string(string)
 575             if "f" in string_prefix:
 576                 string = _toggle_fexpr_quotes(string, QUOTE)
 577                 # After quotes toggling, quotes in expressions won't be escaped
 578                 # because quotes can't be reused in f-strings. So we can simply
 579                 # let the escaping logic below run without knowing f-string
 580                 # expressions.
 581
 582             RE_EVEN_BACKSLASHES = r"(?:(?<!\\)(?:\\\\)*)"
 583             naked_string = string[len(string_prefix) + 1 : -1]
 584             naked_string = re.sub(
 585                 "(" + RE_EVEN_BACKSLASHES + ")" + QUOTE, r"\1\\" + QUOTE, naked_string
 586             )
 587             return naked_string
 588
 589         # Holds the CustomSplit objects that will later be added to the custom
 590         # split map.
 591         custom_splits = []
 592
 593         # Temporary storage for the 'has_prefix' part of the CustomSplit objects.
 594         prefix_tracker = []
 595
 596         # Sets the 'prefix' variable. This is the prefix that the final merged
 597         # string will have.
 598         next_str_idx = string_idx
 599         prefix = ""
 600         while (
 601             not prefix
 602             and is_valid_index(next_str_idx)
 603             and LL[next_str_idx].type == token.STRING
 604         ):
 605             prefix = get_string_prefix(LL[next_str_idx].value).lower()
 606             next_str_idx += 1
 607
 608         # The next loop merges the string group. The final string will be
 609         # contained in 'S'.
 610         #
 611         # The following convenience variables are used:
 612         #
 613         #   S: string
 614         #   NS: naked string
 615         #   SS: next string
 616         #   NSS: naked next string
 617         S = ""
 618         NS = ""
 619         num_of_strings = 0
 620         next_str_idx = string_idx
 621         while is_valid_index(next_str_idx) and LL[next_str_idx].type == token.STRING:
 622             num_of_strings += 1
 623
 624             SS = LL[next_str_idx].value
 625             next_prefix = get_string_prefix(SS).lower()
 626
 627             # If this is an f-string group but this substring is not prefixed
 628             # with 'f'...
 629             if "f" in prefix and "f" not in next_prefix:
 630                 # Then we must escape any braces contained in this substring.
 631                 SS = re.sub(r"(\{|\})", r"\1\1", SS)
 632
 633             NSS = make_naked(SS, next_prefix)
 634
 635             has_prefix = bool(next_prefix)
 636             prefix_tracker.append(has_prefix)
 637
 638             S = prefix + QUOTE + NS + NSS + BREAK_MARK + QUOTE
 639             NS = make_naked(S, prefix)
 640
 641             next_str_idx += 1
 642
 643         # Take a note on the index of the non-STRING leaf.
 644         non_string_idx = next_str_idx
 645
 646         S_leaf = Leaf(token.STRING, S)
 647         if self.normalize_strings:
 648             S_leaf.value = normalize_string_quotes(S_leaf.value)
 649
 650         # Fill the 'custom_splits' list with the appropriate CustomSplit objects.
 651         temp_string = S_leaf.value[len(prefix) + 1 : -1]
 652         for has_prefix in prefix_tracker:
 653             mark_idx = temp_string.find(BREAK_MARK)
 654             assert (
 655                 mark_idx >= 0
 656             ), "Logic error while filling the custom string breakpoint cache."
 657
 658             temp_string = temp_string[mark_idx + len(BREAK_MARK) :]
 659             breakpoint_idx = mark_idx + (len(prefix) if has_prefix else 0) + 1
 660             custom_splits.append(CustomSplit(has_prefix, breakpoint_idx))
 661
 662         string_leaf = Leaf(token.STRING, S_leaf.value.replace(BREAK_MARK, ""))
 663
 664         if atom_node is not None:
 665             # If not all children of the atom node are merged (this can happen
 666             # when there is a standalone comment in the middle) ...
 667             if non_string_idx - string_idx < len(atom_node.children):
 668                 # We need to replace the old STRING leaves with the new string leaf.
 669                 first_child_idx = LL[string_idx].remove()
 670                 for idx in range(string_idx + 1, non_string_idx):
 671                     LL[idx].remove()
 672                 if first_child_idx is not None:
 673                     atom_node.insert_child(first_child_idx, string_leaf)
 674             else:
 675                 # Else replace the atom node with the new string leaf.
 676                 replace_child(atom_node, string_leaf)
 677
 678         self.add_custom_splits(string_leaf.value, custom_splits)
 679         return num_of_strings, string_leaf
 680
 681     @staticmethod
 682     def _validate_msg(line: Line, string_idx: int) -> TResult[None]:
 683         """Validate (M)erge (S)tring (G)roup
 684
 685         Transform-time string validation logic for _merge_string_group(...).
 686
 687         Returns:
 688             * Ok(None), if ALL validation checks (listed below) pass.
 689                 OR
 690             * Err(CannotTransform), if any of the following are true:
 691                 - The target string group does not contain ANY stand-alone comments.
 692                 - The target string is not in a string group (i.e. it has no
 693                   adjacent strings).
 694                 - The string group has more than one inline comment.
 695                 - The string group has an inline comment that appears to be a pragma.
 696                 - The set of all string prefixes in the string group is of
 697                   length greater than one and is not equal to {"", "f"}.
 698                 - The string group consists of raw strings.
 699                 - The string group is stringified type annotations. We don't want to
 700                   process stringified type annotations since pyright doesn't support
 701                   them spanning multiple string values. (NOTE: mypy, pytype, pyre do
 702                   support them, so we can change if pyright also gains support in the
 703                   future. See https://github.com/microsoft/pyright/issues/4359.)
 704         """
 705         # We first check for "inner" stand-alone comments (i.e. stand-alone
 706         # comments that have a string leaf before them AND after them).
 707         for inc in [1, -1]:
 708             i = string_idx
 709             found_sa_comment = False
 710             is_valid_index = is_valid_index_factory(line.leaves)
 711             while is_valid_index(i) and line.leaves[i].type in [
 712                 token.STRING,
 713                 STANDALONE_COMMENT,
 714             ]:
 715                 if line.leaves[i].type == STANDALONE_COMMENT:
 716                     found_sa_comment = True
 717                 elif found_sa_comment:
 718                     return TErr(
 719                         "StringMerger does NOT merge string groups which contain "
 720                         "stand-alone comments."
 721                     )
 722
 723                 i += inc
 724
 725         num_of_inline_string_comments = 0
 726         set_of_prefixes = set()
 727         num_of_strings = 0
 728         for leaf in line.leaves[string_idx:]:
 729             if leaf.type != token.STRING:
 730                 # If the string group is trailed by a comma, we count the
 731                 # comments trailing the comma to be one of the string group's
 732                 # comments.
 733                 if leaf.type == token.COMMA and id(leaf) in line.comments:
 734                     num_of_inline_string_comments += 1
 735                 break
 736
 737             if has_triple_quotes(leaf.value):
 738                 return TErr("StringMerger does NOT merge multiline strings.")
 739
 740             num_of_strings += 1
 741             prefix = get_string_prefix(leaf.value).lower()
 742             if "r" in prefix:
 743                 return TErr("StringMerger does NOT merge raw strings.")
 744
 745             set_of_prefixes.add(prefix)
 746
 747             if id(leaf) in line.comments:
 748                 num_of_inline_string_comments += 1
 749                 if contains_pragma_comment(line.comments[id(leaf)]):
 750                     return TErr("Cannot merge strings which have pragma comments.")
 751
 752         if num_of_strings < 2:
 753             return TErr(
 754                 f"Not enough strings to merge (num_of_strings={num_of_strings})."
 755             )
 756
 757         if num_of_inline_string_comments > 1:
 758             return TErr(
 759                 f"Too many inline string comments ({num_of_inline_string_comments})."
 760             )
 761
 762         if len(set_of_prefixes) > 1 and set_of_prefixes != {"", "f"}:
 763             return TErr(f"Too many different prefixes ({set_of_prefixes}).")
 764
 765         return Ok(None)
 766
 767
 768 class StringParenStripper(StringTransformer):
 769     """StringTransformer that strips surrounding parentheses from strings.
 770
 771     Requirements:
 772         The line contains a string which is surrounded by parentheses and:
 773             - The target string is NOT the only argument to a function call.
 774             - The target string is NOT a "pointless" string.
 775             - If the target string contains a PERCENT, the brackets are not
 776               preceded or followed by an operator with higher precedence than
 777               PERCENT.
 778
 779     Transformations:
 780         The parentheses mentioned in the 'Requirements' section are stripped.
 781
 782     Collaborations:
 783         StringParenStripper has its own inherent usefulness, but it is also
 784         relied on to clean up the parentheses created by StringParenWrapper (in
 785         the event that they are no longer needed).
 786     """
 787
 788     def do_match(self, line: Line) -> TMatchResult:
 789         LL = line.leaves
 790
 791         is_valid_index = is_valid_index_factory(LL)
 792
 793         string_indices = []
 794
 795         idx = -1
 796         while True:
 797             idx += 1
 798             if idx >= len(LL):
 799                 break
 800             leaf = LL[idx]
 801
 802             # Should be a string...
 803             if leaf.type != token.STRING:
 804                 continue
 805
 806             # If this is a "pointless" string...
 807             if (
 808                 leaf.parent
 809                 and leaf.parent.parent
 810                 and leaf.parent.parent.type == syms.simple_stmt
 811             ):
 812                 continue
 813
 814             # Should be preceded by a non-empty LPAR...
 815             if (
 816                 not is_valid_index(idx - 1)
 817                 or LL[idx - 1].type != token.LPAR
 818                 or is_empty_lpar(LL[idx - 1])
 819             ):
 820                 continue
 821
 822             # That LPAR should NOT be preceded by a function name or a closing
 823             # bracket (which could be a function which returns a function or a
 824             # list/dictionary that contains a function)...
 825             if is_valid_index(idx - 2) and (
 826                 LL[idx - 2].type == token.NAME or LL[idx - 2].type in CLOSING_BRACKETS
 827             ):
 828                 continue
 829
 830             string_idx = idx
 831
 832             # Skip the string trailer, if one exists.
 833             string_parser = StringParser()
 834             next_idx = string_parser.parse(LL, string_idx)
 835
 836             # if the leaves in the parsed string include a PERCENT, we need to
 837             # make sure the initial LPAR is NOT preceded by an operator with
 838             # higher or equal precedence to PERCENT
 839             if is_valid_index(idx - 2):
 840                 # mypy can't quite follow unless we name this
 841                 before_lpar = LL[idx - 2]
 842                 if token.PERCENT in {leaf.type for leaf in LL[idx - 1 : next_idx]} and (
 843                     (
 844                         before_lpar.type
 845                         in {
 846                             token.STAR,
 847                             token.AT,
 848                             token.SLASH,
 849                             token.DOUBLESLASH,
 850                             token.PERCENT,
 851                             token.TILDE,
 852                             token.DOUBLESTAR,
 853                             token.AWAIT,
 854                             token.LSQB,
 855                             token.LPAR,
 856                         }
 857                     )
 858                     or (
 859                         # only unary PLUS/MINUS
 860                         before_lpar.parent
 861                         and before_lpar.parent.type == syms.factor
 862                         and (before_lpar.type in {token.PLUS, token.MINUS})
 863                     )
 864                 ):
 865                     continue
 866
 867             # Should be followed by a non-empty RPAR...
 868             if (
 869                 is_valid_index(next_idx)
 870                 and LL[next_idx].type == token.RPAR
 871                 and not is_empty_rpar(LL[next_idx])
 872             ):
 873                 # That RPAR should NOT be followed by anything with higher
 874                 # precedence than PERCENT
 875                 if is_valid_index(next_idx + 1) and LL[next_idx + 1].type in {
 876                     token.DOUBLESTAR,
 877                     token.LSQB,
 878                     token.LPAR,
 879                     token.DOT,
 880                 }:
 881                     continue
 882
 883                 string_indices.append(string_idx)
 884                 idx = string_idx
 885                 while idx < len(LL) - 1 and LL[idx + 1].type == token.STRING:
 886                     idx += 1
 887
 888         if string_indices:
 889             return Ok(string_indices)
 890         return TErr("This line has no strings wrapped in parens.")
 891
 892     def do_transform(
 893         self, line: Line, string_indices: List[int]
 894     ) -> Iterator[TResult[Line]]:
 895         LL = line.leaves
 896
 897         string_and_rpar_indices: List[int] = []
 898         for string_idx in string_indices:
 899             string_parser = StringParser()
 900             rpar_idx = string_parser.parse(LL, string_idx)
 901
 902             should_transform = True
 903             for leaf in (LL[string_idx - 1], LL[rpar_idx]):
 904                 if line.comments_after(leaf):
 905                     # Should not strip parentheses which have comments attached
 906                     # to them.
 907                     should_transform = False
 908                     break
 909             if should_transform:
 910                 string_and_rpar_indices.extend((string_idx, rpar_idx))
 911
 912         if string_and_rpar_indices:
 913             yield Ok(self._transform_to_new_line(line, string_and_rpar_indices))
 914         else:
 915             yield Err(
 916                 CannotTransform("All string groups have comments attached to them.")
 917             )
 918
 919     def _transform_to_new_line(
 920         self, line: Line, string_and_rpar_indices: List[int]
 921     ) -> Line:
 922         LL = line.leaves
 923
 924         new_line = line.clone()
 925         new_line.comments = line.comments.copy()
 926
 927         previous_idx = -1
 928         # We need to sort the indices, since string_idx and its matching
 929         # rpar_idx may not come in order, e.g. in
 930         # `("outer" % ("inner".join(items)))`, the "inner" string's
 931         # string_idx is smaller than "outer" string's rpar_idx.
 932         for idx in sorted(string_and_rpar_indices):
 933             leaf = LL[idx]
 934             lpar_or_rpar_idx = idx - 1 if leaf.type == token.STRING else idx
 935             append_leaves(new_line, line, LL[previous_idx + 1 : lpar_or_rpar_idx])
 936             if leaf.type == token.STRING:
 937                 string_leaf = Leaf(token.STRING, LL[idx].value)
 938                 LL[lpar_or_rpar_idx].remove()  # Remove lpar.
 939                 replace_child(LL[idx], string_leaf)
 940                 new_line.append(string_leaf)
 941             else:
 942                 LL[lpar_or_rpar_idx].remove()  # This is a rpar.
 943
 944             previous_idx = idx
 945
 946         # Append the leaves after the last idx:
 947         append_leaves(new_line, line, LL[idx + 1 :])
 948
 949         return new_line
 950
 951
 952 class BaseStringSplitter(StringTransformer):
 953     """
 954     Abstract class for StringTransformers which transform a Line's strings by splitting
 955     them or placing them on their own lines where necessary to avoid going over
 956     the configured line length.
 957
 958     Requirements:
 959         * The target string value is responsible for the line going over the
 960         line length limit. It follows that after all of black's other line
 961         split methods have been exhausted, this line (or one of the resulting
 962         lines after all line splits are performed) would still be over the
 963         line_length limit unless we split this string.
 964             AND
 965         * The target string is NOT a "pointless" string (i.e. a string that has
 966         no parent or siblings).
 967             AND
 968         * The target string is not followed by an inline comment that appears
 969         to be a pragma.
 970             AND
 971         * The target string is not a multiline (i.e. triple-quote) string.
 972     """
 973
 974     STRING_OPERATORS: Final = [
 975         token.EQEQUAL,
 976         token.GREATER,
 977         token.GREATEREQUAL,
 978         token.LESS,
 979         token.LESSEQUAL,
 980         token.NOTEQUAL,
 981         token.PERCENT,
 982         token.PLUS,
 983         token.STAR,
 984     ]
 985
 986     @abstractmethod
 987     def do_splitter_match(self, line: Line) -> TMatchResult:
 988         """
 989         BaseStringSplitter asks its clients to override this method instead of
 990         `StringTransformer.do_match(...)`.
 991
 992         Follows the same protocol as `StringTransformer.do_match(...)`.
 993
 994         Refer to `help(StringTransformer.do_match)` for more information.
 995         """
 996
 997     def do_match(self, line: Line) -> TMatchResult:
 998         match_result = self.do_splitter_match(line)
 999         if isinstance(match_result, Err):
1000             return match_result
1001
1002         string_indices = match_result.ok()
1003         assert len(string_indices) == 1, (
1004             f"{self.__class__.__name__} should only find one match at a time, found"
1005             f" {len(string_indices)}"
1006         )
1007         string_idx = string_indices[0]
1008         vresult = self._validate(line, string_idx)
1009         if isinstance(vresult, Err):
1010             return vresult
1011
1012         return match_result
1013
1014     def _validate(self, line: Line, string_idx: int) -> TResult[None]:
1015         """
1016         Checks that @line meets all of the requirements listed in this classes'
1017         docstring. Refer to `help(BaseStringSplitter)` for a detailed
1018         description of those requirements.
1019
1020         Returns:
1021             * Ok(None), if ALL of the requirements are met.
1022                 OR
1023             * Err(CannotTransform), if ANY of the requirements are NOT met.
1024         """
1025         LL = line.leaves
1026
1027         string_leaf = LL[string_idx]
1028
1029         max_string_length = self._get_max_string_length(line, string_idx)
1030         if len(string_leaf.value) <= max_string_length:
1031             return TErr(
1032                 "The string itself is not what is causing this line to be too long."
1033             )
1034
1035         if not string_leaf.parent or [L.type for L in string_leaf.parent.children] == [
1036             token.STRING,
1037             token.NEWLINE,
1038         ]:
1039             return TErr(
1040                 f"This string ({string_leaf.value}) appears to be pointless (i.e. has"
1041                 " no parent)."
1042             )
1043
1044         if id(line.leaves[string_idx]) in line.comments and contains_pragma_comment(
1045             line.comments[id(line.leaves[string_idx])]
1046         ):
1047             return TErr(
1048                 "Line appears to end with an inline pragma comment. Splitting the line"
1049                 " could modify the pragma's behavior."
1050             )
1051
1052         if has_triple_quotes(string_leaf.value):
1053             return TErr("We cannot split multiline strings.")
1054
1055         return Ok(None)
1056
1057     def _get_max_string_length(self, line: Line, string_idx: int) -> int:
1058         """
1059         Calculates the max string length used when attempting to determine
1060         whether or not the target string is responsible for causing the line to
1061         go over the line length limit.
1062
1063         WARNING: This method is tightly coupled to both StringSplitter and
1064         (especially) StringParenWrapper. There is probably a better way to
1065         accomplish what is being done here.
1066
1067         Returns:
1068             max_string_length: such that `line.leaves[string_idx].value >
1069             max_string_length` implies that the target string IS responsible
1070             for causing this line to exceed the line length limit.
1071         """
1072         LL = line.leaves
1073
1074         is_valid_index = is_valid_index_factory(LL)
1075
1076         # We use the shorthand "WMA4" in comments to abbreviate "We must
1077         # account for". When giving examples, we use STRING to mean some/any
1078         # valid string.
1079         #
1080         # Finally, we use the following convenience variables:
1081         #
1082         #   P:  The leaf that is before the target string leaf.
1083         #   N:  The leaf that is after the target string leaf.
1084         #   NN: The leaf that is after N.
1085
1086         # WMA4 the whitespace at the beginning of the line.
1087         offset = line.depth * 4
1088
1089         if is_valid_index(string_idx - 1):
1090             p_idx = string_idx - 1
1091             if (
1092                 LL[string_idx - 1].type == token.LPAR
1093                 and LL[string_idx - 1].value == ""
1094                 and string_idx >= 2
1095             ):
1096                 # If the previous leaf is an empty LPAR placeholder, we should skip it.
1097                 p_idx -= 1
1098
1099             P = LL[p_idx]
1100             if P.type in self.STRING_OPERATORS:
1101                 # WMA4 a space and a string operator (e.g. `+ STRING` or `== STRING`).
1102                 offset += len(str(P)) + 1
1103
1104             if P.type == token.COMMA:
1105                 # WMA4 a space, a comma, and a closing bracket [e.g. `), STRING`].
1106                 offset += 3
1107
1108             if P.type in [token.COLON, token.EQUAL, token.PLUSEQUAL, token.NAME]:
1109                 # This conditional branch is meant to handle dictionary keys,
1110                 # variable assignments, 'return STRING' statement lines, and
1111                 # 'else STRING' ternary expression lines.
1112
1113                 # WMA4 a single space.
1114                 offset += 1
1115
1116                 # WMA4 the lengths of any leaves that came before that space,
1117                 # but after any closing bracket before that space.
1118                 for leaf in reversed(LL[: p_idx + 1]):
1119                     offset += len(str(leaf))
1120                     if leaf.type in CLOSING_BRACKETS:
1121                         break
1122
1123         if is_valid_index(string_idx + 1):
1124             N = LL[string_idx + 1]
1125             if N.type == token.RPAR and N.value == "" and len(LL) > string_idx + 2:
1126                 # If the next leaf is an empty RPAR placeholder, we should skip it.
1127                 N = LL[string_idx + 2]
1128
1129             if N.type == token.COMMA:
1130                 # WMA4 a single comma at the end of the string (e.g `STRING,`).
1131                 offset += 1
1132
1133             if is_valid_index(string_idx + 2):
1134                 NN = LL[string_idx + 2]
1135
1136                 if N.type == token.DOT and NN.type == token.NAME:
1137                     # This conditional branch is meant to handle method calls invoked
1138                     # off of a string literal up to and including the LPAR character.
1139
1140                     # WMA4 the '.' character.
1141                     offset += 1
1142
1143                     if (
1144                         is_valid_index(string_idx + 3)
1145                         and LL[string_idx + 3].type == token.LPAR
1146                     ):
1147                         # WMA4 the left parenthesis character.
1148                         offset += 1
1149
1150                     # WMA4 the length of the method's name.
1151                     offset += len(NN.value)
1152
1153         has_comments = False
1154         for comment_leaf in line.comments_after(LL[string_idx]):
1155             if not has_comments:
1156                 has_comments = True
1157                 # WMA4 two spaces before the '#' character.
1158                 offset += 2
1159
1160             # WMA4 the length of the inline comment.
1161             offset += len(comment_leaf.value)
1162
1163         max_string_length = self.line_length - offset
1164         return max_string_length
1165
1166     @staticmethod
1167     def _prefer_paren_wrap_match(LL: List[Leaf]) -> Optional[int]:
1168         """
1169         Returns:
1170             string_idx such that @LL[string_idx] is equal to our target (i.e.
1171             matched) string, if this line matches the "prefer paren wrap" statement
1172             requirements listed in the 'Requirements' section of the StringParenWrapper
1173             class's docstring.
1174                 OR
1175             None, otherwise.
1176         """
1177         # The line must start with a string.
1178         if LL[0].type != token.STRING:
1179             return None
1180
1181         # If the string is surrounded by commas (or is the first/last child)...
1182         prev_sibling = LL[0].prev_sibling
1183         next_sibling = LL[0].next_sibling
1184         if not prev_sibling and not next_sibling and parent_type(LL[0]) == syms.atom:
1185             # If it's an atom string, we need to check the parent atom's siblings.
1186             parent = LL[0].parent
1187             assert parent is not None  # For type checkers.
1188             prev_sibling = parent.prev_sibling
1189             next_sibling = parent.next_sibling
1190         if (not prev_sibling or prev_sibling.type == token.COMMA) and (
1191             not next_sibling or next_sibling.type == token.COMMA
1192         ):
1193             return 0
1194
1195         return None
1196
1197
1198 def iter_fexpr_spans(s: str) -> Iterator[Tuple[int, int]]:
1199     """
1200     Yields spans corresponding to expressions in a given f-string.
1201     Spans are half-open ranges (left inclusive, right exclusive).
1202     Assumes the input string is a valid f-string, but will not crash if the input
1203     string is invalid.
1204     """
1205     stack: List[int] = []  # our curly paren stack
1206     i = 0
1207     while i < len(s):
1208         if s[i] == "{":
1209             # if we're in a string part of the f-string, ignore escaped curly braces
1210             if not stack and i + 1 < len(s) and s[i + 1] == "{":
1211                 i += 2
1212                 continue
1213             stack.append(i)
1214             i += 1
1215             continue
1216
1217         if s[i] == "}":
1218             if not stack:
1219                 i += 1
1220                 continue
1221             j = stack.pop()
1222             # we've made it back out of the expression! yield the span
1223             if not stack:
1224                 yield (j, i + 1)
1225             i += 1
1226             continue
1227
1228         # if we're in an expression part of the f-string, fast forward through strings
1229         # note that backslashes are not legal in the expression portion of f-strings
1230         if stack:
1231             delim = None
1232             if s[i : i + 3] in ("'''", '"""'):
1233                 delim = s[i : i + 3]
1234             elif s[i] in ("'", '"'):
1235                 delim = s[i]
1236             if delim:
1237                 i += len(delim)
1238                 while i < len(s) and s[i : i + len(delim)] != delim:
1239                     i += 1
1240                 i += len(delim)
1241                 continue
1242         i += 1
1243
1244
1245 def fstring_contains_expr(s: str) -> bool:
1246     return any(iter_fexpr_spans(s))
1247
1248
1249 def _toggle_fexpr_quotes(fstring: str, old_quote: str) -> str:
1250     """
1251     Toggles quotes used in f-string expressions that are `old_quote`.
1252
1253     f-string expressions can't contain backslashes, so we need to toggle the
1254     quotes if the f-string itself will end up using the same quote. We can
1255     simply toggle without escaping because, quotes can't be reused in f-string
1256     expressions. They will fail to parse.
1257
1258     NOTE: If PEP 701 is accepted, above statement will no longer be true.
1259     Though if quotes can be reused, we can simply reuse them without updates or
1260     escaping, once Black figures out how to parse the new grammar.
1261     """
1262     new_quote = "'" if old_quote == '"' else '"'
1263     parts = []
1264     previous_index = 0
1265     for start, end in iter_fexpr_spans(fstring):
1266         parts.append(fstring[previous_index:start])
1267         parts.append(fstring[start:end].replace(old_quote, new_quote))
1268         previous_index = end
1269     parts.append(fstring[previous_index:])
1270     return "".join(parts)
1271
1272
1273 class StringSplitter(BaseStringSplitter, CustomSplitMapMixin):
1274     """
1275     StringTransformer that splits "atom" strings (i.e. strings which exist on
1276     lines by themselves).
1277
1278     Requirements:
1279         * The line consists ONLY of a single string (possibly prefixed by a
1280         string operator [e.g. '+' or '==']), MAYBE a string trailer, and MAYBE
1281         a trailing comma.
1282             AND
1283         * All of the requirements listed in BaseStringSplitter's docstring.
1284
1285     Transformations:
1286         The string mentioned in the 'Requirements' section is split into as
1287         many substrings as necessary to adhere to the configured line length.
1288
1289         In the final set of substrings, no substring should be smaller than
1290         MIN_SUBSTR_SIZE characters.
1291
1292         The string will ONLY be split on spaces (i.e. each new substring should
1293         start with a space). Note that the string will NOT be split on a space
1294         which is escaped with a backslash.
1295
1296         If the string is an f-string, it will NOT be split in the middle of an
1297         f-expression (e.g. in f"FooBar: {foo() if x else bar()}", {foo() if x
1298         else bar()} is an f-expression).
1299
1300         If the string that is being split has an associated set of custom split
1301         records and those custom splits will NOT result in any line going over
1302         the configured line length, those custom splits are used. Otherwise the
1303         string is split as late as possible (from left-to-right) while still
1304         adhering to the transformation rules listed above.
1305
1306     Collaborations:
1307         StringSplitter relies on StringMerger to construct the appropriate
1308         CustomSplit objects and add them to the custom split map.
1309     """
1310
1311     MIN_SUBSTR_SIZE: Final = 6
1312
1313     def do_splitter_match(self, line: Line) -> TMatchResult:
1314         LL = line.leaves
1315
1316         if self._prefer_paren_wrap_match(LL) is not None:
1317             return TErr("Line needs to be wrapped in parens first.")
1318
1319         is_valid_index = is_valid_index_factory(LL)
1320
1321         idx = 0
1322
1323         # The first two leaves MAY be the 'not in' keywords...
1324         if (
1325             is_valid_index(idx)
1326             and is_valid_index(idx + 1)
1327             and [LL[idx].type, LL[idx + 1].type] == [token.NAME, token.NAME]
1328             and str(LL[idx]) + str(LL[idx + 1]) == "not in"
1329         ):
1330             idx += 2
1331         # Else the first leaf MAY be a string operator symbol or the 'in' keyword...
1332         elif is_valid_index(idx) and (
1333             LL[idx].type in self.STRING_OPERATORS
1334             or LL[idx].type == token.NAME
1335             and str(LL[idx]) == "in"
1336         ):
1337             idx += 1
1338
1339         # The next/first leaf MAY be an empty LPAR...
1340         if is_valid_index(idx) and is_empty_lpar(LL[idx]):
1341             idx += 1
1342
1343         # The next/first leaf MUST be a string...
1344         if not is_valid_index(idx) or LL[idx].type != token.STRING:
1345             return TErr("Line does not start with a string.")
1346
1347         string_idx = idx
1348
1349         # Skip the string trailer, if one exists.
1350         string_parser = StringParser()
1351         idx = string_parser.parse(LL, string_idx)
1352
1353         # That string MAY be followed by an empty RPAR...
1354         if is_valid_index(idx) and is_empty_rpar(LL[idx]):
1355             idx += 1
1356
1357         # That string / empty RPAR leaf MAY be followed by a comma...
1358         if is_valid_index(idx) and LL[idx].type == token.COMMA:
1359             idx += 1
1360
1361         # But no more leaves are allowed...
1362         if is_valid_index(idx):
1363             return TErr("This line does not end with a string.")
1364
1365         return Ok([string_idx])
1366
1367     def do_transform(
1368         self, line: Line, string_indices: List[int]
1369     ) -> Iterator[TResult[Line]]:
1370         LL = line.leaves
1371         assert len(string_indices) == 1, (
1372             f"{self.__class__.__name__} should only find one match at a time, found"
1373             f" {len(string_indices)}"
1374         )
1375         string_idx = string_indices[0]
1376
1377         QUOTE = LL[string_idx].value[-1]
1378
1379         is_valid_index = is_valid_index_factory(LL)
1380         insert_str_child = insert_str_child_factory(LL[string_idx])
1381
1382         prefix = get_string_prefix(LL[string_idx].value).lower()
1383
1384         # We MAY choose to drop the 'f' prefix from substrings that don't
1385         # contain any f-expressions, but ONLY if the original f-string
1386         # contains at least one f-expression. Otherwise, we will alter the AST
1387         # of the program.
1388         drop_pointless_f_prefix = ("f" in prefix) and fstring_contains_expr(
1389             LL[string_idx].value
1390         )
1391
1392         first_string_line = True
1393
1394         string_op_leaves = self._get_string_operator_leaves(LL)
1395         string_op_leaves_length = (
1396             sum(len(str(prefix_leaf)) for prefix_leaf in string_op_leaves) + 1
1397             if string_op_leaves
1398             else 0
1399         )
1400
1401         def maybe_append_string_operators(new_line: Line) -> None:
1402             """
1403             Side Effects:
1404                 If @line starts with a string operator and this is the first
1405                 line we are constructing, this function appends the string
1406                 operator to @new_line and replaces the old string operator leaf
1407                 in the node structure. Otherwise this function does nothing.
1408             """
1409             maybe_prefix_leaves = string_op_leaves if first_string_line else []
1410             for i, prefix_leaf in enumerate(maybe_prefix_leaves):
1411                 replace_child(LL[i], prefix_leaf)
1412                 new_line.append(prefix_leaf)
1413
1414         ends_with_comma = (
1415             is_valid_index(string_idx + 1) and LL[string_idx + 1].type == token.COMMA
1416         )
1417
1418         def max_last_string() -> int:
1419             """
1420             Returns:
1421                 The max allowed length of the string value used for the last
1422                 line we will construct.
1423             """
1424             result = self.line_length
1425             result -= line.depth * 4
1426             result -= 1 if ends_with_comma else 0
1427             result -= string_op_leaves_length
1428             return result
1429
1430         # --- Calculate Max Break Index (for string value)
1431         # We start with the line length limit
1432         max_break_idx = self.line_length
1433         # The last index of a string of length N is N-1.
1434         max_break_idx -= 1
1435         # Leading whitespace is not present in the string value (e.g. Leaf.value).
1436         max_break_idx -= line.depth * 4
1437         if max_break_idx < 0:
1438             yield TErr(
1439                 f"Unable to split {LL[string_idx].value} at such high of a line depth:"
1440                 f" {line.depth}"
1441             )
1442             return
1443
1444         # Check if StringMerger registered any custom splits.
1445         custom_splits = self.pop_custom_splits(LL[string_idx].value)
1446         # We use them ONLY if none of them would produce lines that exceed the
1447         # line limit.
1448         use_custom_breakpoints = bool(
1449             custom_splits
1450             and all(csplit.break_idx <= max_break_idx for csplit in custom_splits)
1451         )
1452
1453         # Temporary storage for the remaining chunk of the string line that
1454         # can't fit onto the line currently being constructed.
1455         rest_value = LL[string_idx].value
1456
1457         def more_splits_should_be_made() -> bool:
1458             """
1459             Returns:
1460                 True iff `rest_value` (the remaining string value from the last
1461                 split), should be split again.
1462             """
1463             if use_custom_breakpoints:
1464                 return len(custom_splits) > 1
1465             else:
1466                 return len(rest_value) > max_last_string()
1467
1468         string_line_results: List[Ok[Line]] = []
1469         while more_splits_should_be_made():
1470             if use_custom_breakpoints:
1471                 # Custom User Split (manual)
1472                 csplit = custom_splits.pop(0)
1473                 break_idx = csplit.break_idx
1474             else:
1475                 # Algorithmic Split (automatic)
1476                 max_bidx = max_break_idx - string_op_leaves_length
1477                 maybe_break_idx = self._get_break_idx(rest_value, max_bidx)
1478                 if maybe_break_idx is None:
1479                     # If we are unable to algorithmically determine a good split
1480                     # and this string has custom splits registered to it, we
1481                     # fall back to using them--which means we have to start
1482                     # over from the beginning.
1483                     if custom_splits:
1484                         rest_value = LL[string_idx].value
1485                         string_line_results = []
1486                         first_string_line = True
1487                         use_custom_breakpoints = True
1488                         continue
1489
1490                     # Otherwise, we stop splitting here.
1491                     break
1492
1493                 break_idx = maybe_break_idx
1494
1495             # --- Construct `next_value`
1496             next_value = rest_value[:break_idx] + QUOTE
1497
1498             # HACK: The following 'if' statement is a hack to fix the custom
1499             # breakpoint index in the case of either: (a) substrings that were
1500             # f-strings but will have the 'f' prefix removed OR (b) substrings
1501             # that were not f-strings but will now become f-strings because of
1502             # redundant use of the 'f' prefix (i.e. none of the substrings
1503             # contain f-expressions but one or more of them had the 'f' prefix
1504             # anyway; in which case, we will prepend 'f' to _all_ substrings).
1505             #
1506             # There is probably a better way to accomplish what is being done
1507             # here...
1508             #
1509             # If this substring is an f-string, we _could_ remove the 'f'
1510             # prefix, and the current custom split did NOT originally use a
1511             # prefix...
1512             if (
1513                 use_custom_breakpoints
1514                 and not csplit.has_prefix
1515                 and (
1516                     # `next_value == prefix + QUOTE` happens when the custom
1517                     # split is an empty string.
1518                     next_value == prefix + QUOTE
1519                     or next_value != self._normalize_f_string(next_value, prefix)
1520                 )
1521             ):
1522                 # Then `csplit.break_idx` will be off by one after removing
1523                 # the 'f' prefix.
1524                 break_idx += 1
1525                 next_value = rest_value[:break_idx] + QUOTE
1526
1527             if drop_pointless_f_prefix:
1528                 next_value = self._normalize_f_string(next_value, prefix)
1529
1530             # --- Construct `next_leaf`
1531             next_leaf = Leaf(token.STRING, next_value)
1532             insert_str_child(next_leaf)
1533             self._maybe_normalize_string_quotes(next_leaf)
1534
1535             # --- Construct `next_line`
1536             next_line = line.clone()
1537             maybe_append_string_operators(next_line)
1538             next_line.append(next_leaf)
1539             string_line_results.append(Ok(next_line))
1540
1541             rest_value = prefix + QUOTE + rest_value[break_idx:]
1542             first_string_line = False
1543
1544         yield from string_line_results
1545
1546         if drop_pointless_f_prefix:
1547             rest_value = self._normalize_f_string(rest_value, prefix)
1548
1549         rest_leaf = Leaf(token.STRING, rest_value)
1550         insert_str_child(rest_leaf)
1551
1552         # NOTE: I could not find a test case that verifies that the following
1553         # line is actually necessary, but it seems to be. Otherwise we risk
1554         # not normalizing the last substring, right?
1555         self._maybe_normalize_string_quotes(rest_leaf)
1556
1557         last_line = line.clone()
1558         maybe_append_string_operators(last_line)
1559
1560         # If there are any leaves to the right of the target string...
1561         if is_valid_index(string_idx + 1):
1562             # We use `temp_value` here to determine how long the last line
1563             # would be if we were to append all the leaves to the right of the
1564             # target string to the last string line.
1565             temp_value = rest_value
1566             for leaf in LL[string_idx + 1 :]:
1567                 temp_value += str(leaf)
1568                 if leaf.type == token.LPAR:
1569                     break
1570
1571             # Try to fit them all on the same line with the last substring...
1572             if (
1573                 len(temp_value) <= max_last_string()
1574                 or LL[string_idx + 1].type == token.COMMA
1575             ):
1576                 last_line.append(rest_leaf)
1577                 append_leaves(last_line, line, LL[string_idx + 1 :])
1578                 yield Ok(last_line)
1579             # Otherwise, place the last substring on one line and everything
1580             # else on a line below that...
1581             else:
1582                 last_line.append(rest_leaf)
1583                 yield Ok(last_line)
1584
1585                 non_string_line = line.clone()
1586                 append_leaves(non_string_line, line, LL[string_idx + 1 :])
1587                 yield Ok(non_string_line)
1588         # Else the target string was the last leaf...
1589         else:
1590             last_line.append(rest_leaf)
1591             last_line.comments = line.comments.copy()
1592             yield Ok(last_line)
1593
1594     def _iter_nameescape_slices(self, string: str) -> Iterator[Tuple[Index, Index]]:
1595         """
1596         Yields:
1597             All ranges of @string which, if @string were to be split there,
1598             would result in the splitting of an \\N{...} expression (which is NOT
1599             allowed).
1600         """
1601         # True - the previous backslash was unescaped
1602         # False - the previous backslash was escaped *or* there was no backslash
1603         previous_was_unescaped_backslash = False
1604         it = iter(enumerate(string))
1605         for idx, c in it:
1606             if c == "\\":
1607                 previous_was_unescaped_backslash = not previous_was_unescaped_backslash
1608                 continue
1609             if not previous_was_unescaped_backslash or c != "N":
1610                 previous_was_unescaped_backslash = False
1611                 continue
1612             previous_was_unescaped_backslash = False
1613
1614             begin = idx - 1  # the position of backslash before \N{...}
1615             for idx, c in it:
1616                 if c == "}":
1617                     end = idx
1618                     break
1619             else:
1620                 # malformed nameescape expression?
1621                 # should have been detected by AST parsing earlier...
1622                 raise RuntimeError(f"{self.__class__.__name__} LOGIC ERROR!")
1623             yield begin, end
1624
1625     def _iter_fexpr_slices(self, string: str) -> Iterator[Tuple[Index, Index]]:
1626         """
1627         Yields:
1628             All ranges of @string which, if @string were to be split there,
1629             would result in the splitting of an f-expression (which is NOT
1630             allowed).
1631         """
1632         if "f" not in get_string_prefix(string).lower():
1633             return
1634         yield from iter_fexpr_spans(string)
1635
1636     def _get_illegal_split_indices(self, string: str) -> Set[Index]:
1637         illegal_indices: Set[Index] = set()
1638         iterators = [
1639             self._iter_fexpr_slices(string),
1640             self._iter_nameescape_slices(string),
1641         ]
1642         for it in iterators:
1643             for begin, end in it:
1644                 illegal_indices.update(range(begin, end + 1))
1645         return illegal_indices
1646
1647     def _get_break_idx(self, string: str, max_break_idx: int) -> Optional[int]:
1648         """
1649         This method contains the algorithm that StringSplitter uses to
1650         determine which character to split each string at.
1651
1652         Args:
1653             @string: The substring that we are attempting to split.
1654             @max_break_idx: The ideal break index. We will return this value if it
1655             meets all the necessary conditions. In the likely event that it
1656             doesn't we will try to find the closest index BELOW @max_break_idx
1657             that does. If that fails, we will expand our search by also
1658             considering all valid indices ABOVE @max_break_idx.
1659
1660         Pre-Conditions:
1661             * assert_is_leaf_string(@string)
1662             * 0 <= @max_break_idx < len(@string)
1663
1664         Returns:
1665             break_idx, if an index is able to be found that meets all of the
1666             conditions listed in the 'Transformations' section of this classes'
1667             docstring.
1668                 OR
1669             None, otherwise.
1670         """
1671         is_valid_index = is_valid_index_factory(string)
1672
1673         assert is_valid_index(max_break_idx)
1674         assert_is_leaf_string(string)
1675
1676         _illegal_split_indices = self._get_illegal_split_indices(string)
1677
1678         def breaks_unsplittable_expression(i: Index) -> bool:
1679             """
1680             Returns:
1681                 True iff returning @i would result in the splitting of an
1682                 unsplittable expression (which is NOT allowed).
1683             """
1684             return i in _illegal_split_indices
1685
1686         def passes_all_checks(i: Index) -> bool:
1687             """
1688             Returns:
1689                 True iff ALL of the conditions listed in the 'Transformations'
1690                 section of this classes' docstring would be be met by returning @i.
1691             """
1692             is_space = string[i] == " "
1693
1694             is_not_escaped = True
1695             j = i - 1
1696             while is_valid_index(j) and string[j] == "\\":
1697                 is_not_escaped = not is_not_escaped
1698                 j -= 1
1699
1700             is_big_enough = (
1701                 len(string[i:]) >= self.MIN_SUBSTR_SIZE
1702                 and len(string[:i]) >= self.MIN_SUBSTR_SIZE
1703             )
1704             return (
1705                 is_space
1706                 and is_not_escaped
1707                 and is_big_enough
1708                 and not breaks_unsplittable_expression(i)
1709             )
1710
1711         # First, we check all indices BELOW @max_break_idx.
1712         break_idx = max_break_idx
1713         while is_valid_index(break_idx - 1) and not passes_all_checks(break_idx):
1714             break_idx -= 1
1715
1716         if not passes_all_checks(break_idx):
1717             # If that fails, we check all indices ABOVE @max_break_idx.
1718             #
1719             # If we are able to find a valid index here, the next line is going
1720             # to be longer than the specified line length, but it's probably
1721             # better than doing nothing at all.
1722             break_idx = max_break_idx + 1
1723             while is_valid_index(break_idx + 1) and not passes_all_checks(break_idx):
1724                 break_idx += 1
1725
1726             if not is_valid_index(break_idx) or not passes_all_checks(break_idx):
1727                 return None
1728
1729         return break_idx
1730
1731     def _maybe_normalize_string_quotes(self, leaf: Leaf) -> None:
1732         if self.normalize_strings:
1733             leaf.value = normalize_string_quotes(leaf.value)
1734
1735     def _normalize_f_string(self, string: str, prefix: str) -> str:
1736         """
1737         Pre-Conditions:
1738             * assert_is_leaf_string(@string)
1739
1740         Returns:
1741             * If @string is an f-string that contains no f-expressions, we
1742             return a string identical to @string except that the 'f' prefix
1743             has been stripped and all double braces (i.e. '{{' or '}}') have
1744             been normalized (i.e. turned into '{' or '}').
1745                 OR
1746             * Otherwise, we return @string.
1747         """
1748         assert_is_leaf_string(string)
1749
1750         if "f" in prefix and not fstring_contains_expr(string):
1751             new_prefix = prefix.replace("f", "")
1752
1753             temp = string[len(prefix) :]
1754             temp = re.sub(r"\{\{", "{", temp)
1755             temp = re.sub(r"\}\}", "}", temp)
1756             new_string = temp
1757
1758             return f"{new_prefix}{new_string}"
1759         else:
1760             return string
1761
1762     def _get_string_operator_leaves(self, leaves: Iterable[Leaf]) -> List[Leaf]:
1763         LL = list(leaves)
1764
1765         string_op_leaves = []
1766         i = 0
1767         while LL[i].type in self.STRING_OPERATORS + [token.NAME]:
1768             prefix_leaf = Leaf(LL[i].type, str(LL[i]).strip())
1769             string_op_leaves.append(prefix_leaf)
1770             i += 1
1771         return string_op_leaves
1772
1773
1774 class StringParenWrapper(BaseStringSplitter, CustomSplitMapMixin):
1775     """
1776     StringTransformer that wraps strings in parens and then splits at the LPAR.
1777
1778     Requirements:
1779         All of the requirements listed in BaseStringSplitter's docstring in
1780         addition to the requirements listed below:
1781
1782         * The line is a return/yield statement, which returns/yields a string.
1783             OR
1784         * The line is part of a ternary expression (e.g. `x = y if cond else
1785         z`) such that the line starts with `else <string>`, where <string> is
1786         some string.
1787             OR
1788         * The line is an assert statement, which ends with a string.
1789             OR
1790         * The line is an assignment statement (e.g. `x = <string>` or `x +=
1791         <string>`) such that the variable is being assigned the value of some
1792         string.
1793             OR
1794         * The line is a dictionary key assignment where some valid key is being
1795         assigned the value of some string.
1796             OR
1797         * The line is an lambda expression and the value is a string.
1798             OR
1799         * The line starts with an "atom" string that prefers to be wrapped in
1800         parens. It's preferred to be wrapped when the string is surrounded by
1801         commas (or is the first/last child).
1802
1803     Transformations:
1804         The chosen string is wrapped in parentheses and then split at the LPAR.
1805
1806         We then have one line which ends with an LPAR and another line that
1807         starts with the chosen string. The latter line is then split again at
1808         the RPAR. This results in the RPAR (and possibly a trailing comma)
1809         being placed on its own line.
1810
1811         NOTE: If any leaves exist to the right of the chosen string (except
1812         for a trailing comma, which would be placed after the RPAR), those
1813         leaves are placed inside the parentheses.  In effect, the chosen
1814         string is not necessarily being "wrapped" by parentheses. We can,
1815         however, count on the LPAR being placed directly before the chosen
1816         string.
1817
1818         In other words, StringParenWrapper creates "atom" strings. These
1819         can then be split again by StringSplitter, if necessary.
1820
1821     Collaborations:
1822         In the event that a string line split by StringParenWrapper is
1823         changed such that it no longer needs to be given its own line,
1824         StringParenWrapper relies on StringParenStripper to clean up the
1825         parentheses it created.
1826
1827         For "atom" strings that prefers to be wrapped in parens, it requires
1828         StringSplitter to hold the split until the string is wrapped in parens.
1829     """
1830
1831     def do_splitter_match(self, line: Line) -> TMatchResult:
1832         LL = line.leaves
1833
1834         if line.leaves[-1].type in OPENING_BRACKETS:
1835             return TErr(
1836                 "Cannot wrap parens around a line that ends in an opening bracket."
1837             )
1838
1839         string_idx = (
1840             self._return_match(LL)
1841             or self._else_match(LL)
1842             or self._assert_match(LL)
1843             or self._assign_match(LL)
1844             or self._dict_or_lambda_match(LL)
1845             or self._prefer_paren_wrap_match(LL)
1846         )
1847
1848         if string_idx is not None:
1849             string_value = line.leaves[string_idx].value
1850             # If the string has no spaces...
1851             if " " not in string_value:
1852                 # And will still violate the line length limit when split...
1853                 max_string_length = self.line_length - ((line.depth + 1) * 4)
1854                 if len(string_value) > max_string_length:
1855                     # And has no associated custom splits...
1856                     if not self.has_custom_splits(string_value):
1857                         # Then we should NOT put this string on its own line.
1858                         return TErr(
1859                             "We do not wrap long strings in parentheses when the"
1860                             " resultant line would still be over the specified line"
1861                             " length and can't be split further by StringSplitter."
1862                         )
1863             return Ok([string_idx])
1864
1865         return TErr("This line does not contain any non-atomic strings.")
1866
1867     @staticmethod
1868     def _return_match(LL: List[Leaf]) -> Optional[int]:
1869         """
1870         Returns:
1871             string_idx such that @LL[string_idx] is equal to our target (i.e.
1872             matched) string, if this line matches the return/yield statement
1873             requirements listed in the 'Requirements' section of this classes'
1874             docstring.
1875                 OR
1876             None, otherwise.
1877         """
1878         # If this line is apart of a return/yield statement and the first leaf
1879         # contains either the "return" or "yield" keywords...
1880         if parent_type(LL[0]) in [syms.return_stmt, syms.yield_expr] and LL[
1881             0
1882         ].value in ["return", "yield"]:
1883             is_valid_index = is_valid_index_factory(LL)
1884
1885             idx = 2 if is_valid_index(1) and is_empty_par(LL[1]) else 1
1886             # The next visible leaf MUST contain a string...
1887             if is_valid_index(idx) and LL[idx].type == token.STRING:
1888                 return idx
1889
1890         return None
1891
1892     @staticmethod
1893     def _else_match(LL: List[Leaf]) -> Optional[int]:
1894         """
1895         Returns:
1896             string_idx such that @LL[string_idx] is equal to our target (i.e.
1897             matched) string, if this line matches the ternary expression
1898             requirements listed in the 'Requirements' section of this classes'
1899             docstring.
1900                 OR
1901             None, otherwise.
1902         """
1903         # If this line is apart of a ternary expression and the first leaf
1904         # contains the "else" keyword...
1905         if (
1906             parent_type(LL[0]) == syms.test
1907             and LL[0].type == token.NAME
1908             and LL[0].value == "else"
1909         ):
1910             is_valid_index = is_valid_index_factory(LL)
1911
1912             idx = 2 if is_valid_index(1) and is_empty_par(LL[1]) else 1
1913             # The next visible leaf MUST contain a string...
1914             if is_valid_index(idx) and LL[idx].type == token.STRING:
1915                 return idx
1916
1917         return None
1918
1919     @staticmethod
1920     def _assert_match(LL: List[Leaf]) -> Optional[int]:
1921         """
1922         Returns:
1923             string_idx such that @LL[string_idx] is equal to our target (i.e.
1924             matched) string, if this line matches the assert statement
1925             requirements listed in the 'Requirements' section of this classes'
1926             docstring.
1927                 OR
1928             None, otherwise.
1929         """
1930         # If this line is apart of an assert statement and the first leaf
1931         # contains the "assert" keyword...
1932         if parent_type(LL[0]) == syms.assert_stmt and LL[0].value == "assert":
1933             is_valid_index = is_valid_index_factory(LL)
1934
1935             for i, leaf in enumerate(LL):
1936                 # We MUST find a comma...
1937                 if leaf.type == token.COMMA:
1938                     idx = i + 2 if is_empty_par(LL[i + 1]) else i + 1
1939
1940                     # That comma MUST be followed by a string...
1941                     if is_valid_index(idx) and LL[idx].type == token.STRING:
1942                         string_idx = idx
1943
1944                         # Skip the string trailer, if one exists.
1945                         string_parser = StringParser()
1946                         idx = string_parser.parse(LL, string_idx)
1947
1948                         # But no more leaves are allowed...
1949                         if not is_valid_index(idx):
1950                             return string_idx
1951
1952         return None
1953
1954     @staticmethod
1955     def _assign_match(LL: List[Leaf]) -> Optional[int]:
1956         """
1957         Returns:
1958             string_idx such that @LL[string_idx] is equal to our target (i.e.
1959             matched) string, if this line matches the assignment statement
1960             requirements listed in the 'Requirements' section of this classes'
1961             docstring.
1962                 OR
1963             None, otherwise.
1964         """
1965         # If this line is apart of an expression statement or is a function
1966         # argument AND the first leaf contains a variable name...
1967         if (
1968             parent_type(LL[0]) in [syms.expr_stmt, syms.argument, syms.power]
1969             and LL[0].type == token.NAME
1970         ):
1971             is_valid_index = is_valid_index_factory(LL)
1972
1973             for i, leaf in enumerate(LL):
1974                 # We MUST find either an '=' or '+=' symbol...
1975                 if leaf.type in [token.EQUAL, token.PLUSEQUAL]:
1976                     idx = i + 2 if is_empty_par(LL[i + 1]) else i + 1
1977
1978                     # That symbol MUST be followed by a string...
1979                     if is_valid_index(idx) and LL[idx].type == token.STRING:
1980                         string_idx = idx
1981
1982                         # Skip the string trailer, if one exists.
1983                         string_parser = StringParser()
1984                         idx = string_parser.parse(LL, string_idx)
1985
1986                         # The next leaf MAY be a comma iff this line is apart
1987                         # of a function argument...
1988                         if (
1989                             parent_type(LL[0]) == syms.argument
1990                             and is_valid_index(idx)
1991                             and LL[idx].type == token.COMMA
1992                         ):
1993                             idx += 1
1994
1995                         # But no more leaves are allowed...
1996                         if not is_valid_index(idx):
1997                             return string_idx
1998
1999         return None
2000
2001     @staticmethod
2002     def _dict_or_lambda_match(LL: List[Leaf]) -> Optional[int]:
2003         """
2004         Returns:
2005             string_idx such that @LL[string_idx] is equal to our target (i.e.
2006             matched) string, if this line matches the dictionary key assignment
2007             statement or lambda expression requirements listed in the
2008             'Requirements' section of this classes' docstring.
2009                 OR
2010             None, otherwise.
2011         """
2012         # If this line is a part of a dictionary key assignment or lambda expression...
2013         parent_types = [parent_type(LL[0]), parent_type(LL[0].parent)]
2014         if syms.dictsetmaker in parent_types or syms.lambdef in parent_types:
2015             is_valid_index = is_valid_index_factory(LL)
2016
2017             for i, leaf in enumerate(LL):
2018                 # We MUST find a colon, it can either be dict's or lambda's colon...
2019                 if leaf.type == token.COLON and i < len(LL) - 1:
2020                     idx = i + 2 if is_empty_par(LL[i + 1]) else i + 1
2021
2022                     # That colon MUST be followed by a string...
2023                     if is_valid_index(idx) and LL[idx].type == token.STRING:
2024                         string_idx = idx
2025
2026                         # Skip the string trailer, if one exists.
2027                         string_parser = StringParser()
2028                         idx = string_parser.parse(LL, string_idx)
2029
2030                         # That string MAY be followed by a comma...
2031                         if is_valid_index(idx) and LL[idx].type == token.COMMA:
2032                             idx += 1
2033
2034                         # But no more leaves are allowed...
2035                         if not is_valid_index(idx):
2036                             return string_idx
2037
2038         return None
2039
2040     def do_transform(
2041         self, line: Line, string_indices: List[int]
2042     ) -> Iterator[TResult[Line]]:
2043         LL = line.leaves
2044         assert len(string_indices) == 1, (
2045             f"{self.__class__.__name__} should only find one match at a time, found"
2046             f" {len(string_indices)}"
2047         )
2048         string_idx = string_indices[0]
2049
2050         is_valid_index = is_valid_index_factory(LL)
2051         insert_str_child = insert_str_child_factory(LL[string_idx])
2052
2053         comma_idx = -1
2054         ends_with_comma = False
2055         if LL[comma_idx].type == token.COMMA:
2056             ends_with_comma = True
2057
2058         leaves_to_steal_comments_from = [LL[string_idx]]
2059         if ends_with_comma:
2060             leaves_to_steal_comments_from.append(LL[comma_idx])
2061
2062         # --- First Line
2063         first_line = line.clone()
2064         left_leaves = LL[:string_idx]
2065
2066         # We have to remember to account for (possibly invisible) LPAR and RPAR
2067         # leaves that already wrapped the target string. If these leaves do
2068         # exist, we will replace them with our own LPAR and RPAR leaves.
2069         old_parens_exist = False
2070         if left_leaves and left_leaves[-1].type == token.LPAR:
2071             old_parens_exist = True
2072             leaves_to_steal_comments_from.append(left_leaves[-1])
2073             left_leaves.pop()
2074
2075         append_leaves(first_line, line, left_leaves)
2076
2077         lpar_leaf = Leaf(token.LPAR, "(")
2078         if old_parens_exist:
2079             replace_child(LL[string_idx - 1], lpar_leaf)
2080         else:
2081             insert_str_child(lpar_leaf)
2082         first_line.append(lpar_leaf)
2083
2084         # We throw inline comments that were originally to the right of the
2085         # target string to the top line. They will now be shown to the right of
2086         # the LPAR.
2087         for leaf in leaves_to_steal_comments_from:
2088             for comment_leaf in line.comments_after(leaf):
2089                 first_line.append(comment_leaf, preformatted=True)
2090
2091         yield Ok(first_line)
2092
2093         # --- Middle (String) Line
2094         # We only need to yield one (possibly too long) string line, since the
2095         # `StringSplitter` will break it down further if necessary.
2096         string_value = LL[string_idx].value
2097         string_line = Line(
2098             mode=line.mode,
2099             depth=line.depth + 1,
2100             inside_brackets=True,
2101             should_split_rhs=line.should_split_rhs,
2102             magic_trailing_comma=line.magic_trailing_comma,
2103         )
2104         string_leaf = Leaf(token.STRING, string_value)
2105         insert_str_child(string_leaf)
2106         string_line.append(string_leaf)
2107
2108         old_rpar_leaf = None
2109         if is_valid_index(string_idx + 1):
2110             right_leaves = LL[string_idx + 1 :]
2111             if ends_with_comma:
2112                 right_leaves.pop()
2113
2114             if old_parens_exist:
2115                 assert right_leaves and right_leaves[-1].type == token.RPAR, (
2116                     "Apparently, old parentheses do NOT exist?!"
2117                     f" (left_leaves={left_leaves}, right_leaves={right_leaves})"
2118                 )
2119                 old_rpar_leaf = right_leaves.pop()
2120             elif right_leaves and right_leaves[-1].type == token.RPAR:
2121                 # Special case for lambda expressions as dict's value, e.g.:
2122                 #     my_dict = {
2123                 #        "key": lambda x: f"formatted: {x},
2124                 #     }
2125                 # After wrapping the dict's value with parentheses, the string is
2126                 # followed by a RPAR but its opening bracket is lambda's, not
2127                 # the string's:
2128                 #        "key": (lambda x: f"formatted: {x}),
2129                 opening_bracket = right_leaves[-1].opening_bracket
2130                 if opening_bracket is not None and opening_bracket in left_leaves:
2131                     index = left_leaves.index(opening_bracket)
2132                     if (
2133                         index > 0
2134                         and index < len(left_leaves) - 1
2135                         and left_leaves[index - 1].type == token.COLON
2136                         and left_leaves[index + 1].value == "lambda"
2137                     ):
2138                         right_leaves.pop()
2139
2140             append_leaves(string_line, line, right_leaves)
2141
2142         yield Ok(string_line)
2143
2144         # --- Last Line
2145         last_line = line.clone()
2146         last_line.bracket_tracker = first_line.bracket_tracker
2147
2148         new_rpar_leaf = Leaf(token.RPAR, ")")
2149         if old_rpar_leaf is not None:
2150             replace_child(old_rpar_leaf, new_rpar_leaf)
2151         else:
2152             insert_str_child(new_rpar_leaf)
2153         last_line.append(new_rpar_leaf)
2154
2155         # If the target string ended with a comma, we place this comma to the
2156         # right of the RPAR on the last line.
2157         if ends_with_comma:
2158             comma_leaf = Leaf(token.COMMA, ",")
2159             replace_child(LL[comma_idx], comma_leaf)
2160             last_line.append(comma_leaf)
2161
2162         yield Ok(last_line)
2163
2164
2165 class StringParser:
2166     """
2167     A state machine that aids in parsing a string's "trailer", which can be
2168     either non-existent, an old-style formatting sequence (e.g. `% varX` or `%
2169     (varX, varY)`), or a method-call / attribute access (e.g. `.format(varX,
2170     varY)`).
2171
2172     NOTE: A new StringParser object MUST be instantiated for each string
2173     trailer we need to parse.
2174
2175     Examples:
2176         We shall assume that `line` equals the `Line` object that corresponds
2177         to the following line of python code:
2178         ```
2179         x = "Some {}.".format("String") + some_other_string
2180         ```
2181
2182         Furthermore, we will assume that `string_idx` is some index such that:
2183         ```
2184         assert line.leaves[string_idx].value == "Some {}."
2185         ```
2186
2187         The following code snippet then holds:
2188         ```
2189         string_parser = StringParser()
2190         idx = string_parser.parse(line.leaves, string_idx)
2191         assert line.leaves[idx].type == token.PLUS
2192         ```
2193     """
2194
2195     DEFAULT_TOKEN: Final = 20210605
2196
2197     # String Parser States
2198     START: Final = 1
2199     DOT: Final = 2
2200     NAME: Final = 3
2201     PERCENT: Final = 4
2202     SINGLE_FMT_ARG: Final = 5
2203     LPAR: Final = 6
2204     RPAR: Final = 7
2205     DONE: Final = 8
2206
2207     # Lookup Table for Next State
2208     _goto: Final[Dict[Tuple[ParserState, NodeType], ParserState]] = {
2209         # A string trailer may start with '.' OR '%'.
2210         (START, token.DOT): DOT,
2211         (START, token.PERCENT): PERCENT,
2212         (START, DEFAULT_TOKEN): DONE,
2213         # A '.' MUST be followed by an attribute or method name.
2214         (DOT, token.NAME): NAME,
2215         # A method name MUST be followed by an '(', whereas an attribute name
2216         # is the last symbol in the string trailer.
2217         (NAME, token.LPAR): LPAR,
2218         (NAME, DEFAULT_TOKEN): DONE,
2219         # A '%' symbol can be followed by an '(' or a single argument (e.g. a
2220         # string or variable name).
2221         (PERCENT, token.LPAR): LPAR,
2222         (PERCENT, DEFAULT_TOKEN): SINGLE_FMT_ARG,
2223         # If a '%' symbol is followed by a single argument, that argument is
2224         # the last leaf in the string trailer.
2225         (SINGLE_FMT_ARG, DEFAULT_TOKEN): DONE,
2226         # If present, a ')' symbol is the last symbol in a string trailer.
2227         # (NOTE: LPARS and nested RPARS are not included in this lookup table,
2228         # since they are treated as a special case by the parsing logic in this
2229         # classes' implementation.)
2230         (RPAR, DEFAULT_TOKEN): DONE,
2231     }
2232
2233     def __init__(self) -> None:
2234         self._state = self.START
2235         self._unmatched_lpars = 0
2236
2237     def parse(self, leaves: List[Leaf], string_idx: int) -> int:
2238         """
2239         Pre-conditions:
2240             * @leaves[@string_idx].type == token.STRING
2241
2242         Returns:
2243             The index directly after the last leaf which is apart of the string
2244             trailer, if a "trailer" exists.
2245                 OR
2246             @string_idx + 1, if no string "trailer" exists.
2247         """
2248         assert leaves[string_idx].type == token.STRING
2249
2250         idx = string_idx + 1
2251         while idx < len(leaves) and self._next_state(leaves[idx]):
2252             idx += 1
2253         return idx
2254
2255     def _next_state(self, leaf: Leaf) -> bool:
2256         """
2257         Pre-conditions:
2258             * On the first call to this function, @leaf MUST be the leaf that
2259             was directly after the string leaf in question (e.g. if our target
2260             string is `line.leaves[i]` then the first call to this method must
2261             be `line.leaves[i + 1]`).
2262             * On the next call to this function, the leaf parameter passed in
2263             MUST be the leaf directly following @leaf.
2264
2265         Returns:
2266             True iff @leaf is apart of the string's trailer.
2267         """
2268         # We ignore empty LPAR or RPAR leaves.
2269         if is_empty_par(leaf):
2270             return True
2271
2272         next_token = leaf.type
2273         if next_token == token.LPAR:
2274             self._unmatched_lpars += 1
2275
2276         current_state = self._state
2277
2278         # The LPAR parser state is a special case. We will return True until we
2279         # find the matching RPAR token.
2280         if current_state == self.LPAR:
2281             if next_token == token.RPAR:
2282                 self._unmatched_lpars -= 1
2283                 if self._unmatched_lpars == 0:
2284                     self._state = self.RPAR
2285         # Otherwise, we use a lookup table to determine the next state.
2286         else:
2287             # If the lookup table matches the current state to the next
2288             # token, we use the lookup table.
2289             if (current_state, next_token) in self._goto:
2290                 self._state = self._goto[current_state, next_token]
2291             else:
2292                 # Otherwise, we check if a the current state was assigned a
2293                 # default.
2294                 if (current_state, self.DEFAULT_TOKEN) in self._goto:
2295                     self._state = self._goto[current_state, self.DEFAULT_TOKEN]
2296                 # If no default has been assigned, then this parser has a logic
2297                 # error.
2298                 else:
2299                     raise RuntimeError(f"{self.__class__.__name__} LOGIC ERROR!")
2300
2301             if self._state == self.DONE:
2302                 return False
2303
2304         return True
2305
2306
2307 def insert_str_child_factory(string_leaf: Leaf) -> Callable[[LN], None]:
2308     """
2309     Factory for a convenience function that is used to orphan @string_leaf
2310     and then insert multiple new leaves into the same part of the node
2311     structure that @string_leaf had originally occupied.
2312
2313     Examples:
2314         Let `string_leaf = Leaf(token.STRING, '"foo"')` and `N =
2315         string_leaf.parent`. Assume the node `N` has the following
2316         original structure:
2317
2318         Node(
2319             expr_stmt, [
2320                 Leaf(NAME, 'x'),
2321                 Leaf(EQUAL, '='),
2322                 Leaf(STRING, '"foo"'),
2323             ]
2324         )
2325
2326         We then run the code snippet shown below.
2327         ```
2328         insert_str_child = insert_str_child_factory(string_leaf)
2329
2330         lpar = Leaf(token.LPAR, '(')
2331         insert_str_child(lpar)
2332
2333         bar = Leaf(token.STRING, '"bar"')
2334         insert_str_child(bar)
2335
2336         rpar = Leaf(token.RPAR, ')')
2337         insert_str_child(rpar)
2338         ```
2339
2340         After which point, it follows that `string_leaf.parent is None` and
2341         the node `N` now has the following structure:
2342
2343         Node(
2344             expr_stmt, [
2345                 Leaf(NAME, 'x'),
2346                 Leaf(EQUAL, '='),
2347                 Leaf(LPAR, '('),
2348                 Leaf(STRING, '"bar"'),
2349                 Leaf(RPAR, ')'),
2350             ]
2351         )
2352     """
2353     string_parent = string_leaf.parent
2354     string_child_idx = string_leaf.remove()
2355
2356     def insert_str_child(child: LN) -> None:
2357         nonlocal string_child_idx
2358
2359         assert string_parent is not None
2360         assert string_child_idx is not None
2361
2362         string_parent.insert_child(string_child_idx, child)
2363         string_child_idx += 1
2364
2365     return insert_str_child
2366
2367
2368 def is_valid_index_factory(seq: Sequence[Any]) -> Callable[[int], bool]:
2369     """
2370     Examples:
2371         ```
2372         my_list = [1, 2, 3]
2373
2374         is_valid_index = is_valid_index_factory(my_list)
2375
2376         assert is_valid_index(0)
2377         assert is_valid_index(2)
2378
2379         assert not is_valid_index(3)
2380         assert not is_valid_index(-1)
2381         ```
2382     """
2383
2384     def is_valid_index(idx: int) -> bool:
2385         """
2386         Returns:
2387             True iff @idx is positive AND seq[@idx] does NOT raise an
2388             IndexError.
2389         """
2390         return 0 <= idx < len(seq)
2391
2392     return is_valid_index