src/black/trans.py

   1 """
   2 String transformers that can split and merge strings.
   3 """
   4 import re
   5 from abc import ABC, abstractmethod
   6 from collections import defaultdict
   7 from dataclasses import dataclass
   8 from typing import (
   9     Any,
  10     Callable,
  11     ClassVar,
  12     Collection,
  13     Dict,
  14     Final,
  15     Iterable,
  16     Iterator,
  17     List,
  18     Literal,
  19     Optional,
  20     Sequence,
  21     Set,
  22     Tuple,
  23     TypeVar,
  24     Union,
  25 )
  26
  27 from mypy_extensions import trait
  28
  29 from black.comments import contains_pragma_comment
  30 from black.lines import Line, append_leaves
  31 from black.mode import Feature, Mode
  32 from black.nodes import (
  33     CLOSING_BRACKETS,
  34     OPENING_BRACKETS,
  35     STANDALONE_COMMENT,
  36     is_empty_lpar,
  37     is_empty_par,
  38     is_empty_rpar,
  39     is_part_of_annotation,
  40     parent_type,
  41     replace_child,
  42     syms,
  43 )
  44 from black.rusty import Err, Ok, Result
  45 from black.strings import (
  46     assert_is_leaf_string,
  47     count_chars_in_width,
  48     get_string_prefix,
  49     has_triple_quotes,
  50     normalize_string_quotes,
  51     str_width,
  52 )
  53 from blib2to3.pgen2 import token
  54 from blib2to3.pytree import Leaf, Node
  55
  56
  57 class CannotTransform(Exception):
  58     """Base class for errors raised by Transformers."""
  59
  60
  61 # types
  62 T = TypeVar("T")
  63 LN = Union[Leaf, Node]
  64 Transformer = Callable[[Line, Collection[Feature], Mode], Iterator[Line]]
  65 Index = int
  66 NodeType = int
  67 ParserState = int
  68 StringID = int
  69 TResult = Result[T, CannotTransform]  # (T)ransform Result
  70 TMatchResult = TResult[List[Index]]
  71
  72 SPLIT_SAFE_CHARS = frozenset(["\u3001", "\u3002", "\uff0c"])  # East Asian stops
  73
  74
  75 def TErr(err_msg: str) -> Err[CannotTransform]:
  76     """(T)ransform Err
  77
  78     Convenience function used when working with the TResult type.
  79     """
  80     cant_transform = CannotTransform(err_msg)
  81     return Err(cant_transform)
  82
  83
  84 def hug_power_op(
  85     line: Line, features: Collection[Feature], mode: Mode
  86 ) -> Iterator[Line]:
  87     """A transformer which normalizes spacing around power operators."""
  88
  89     # Performance optimization to avoid unnecessary Leaf clones and other ops.
  90     for leaf in line.leaves:
  91         if leaf.type == token.DOUBLESTAR:
  92             break
  93     else:
  94         raise CannotTransform("No doublestar token was found in the line.")
  95
  96     def is_simple_lookup(index: int, step: Literal[1, -1]) -> bool:
  97         # Brackets and parentheses indicate calls, subscripts, etc. ...
  98         # basically stuff that doesn't count as "simple". Only a NAME lookup
  99         # or dotted lookup (eg. NAME.NAME) is OK.
 100         if step == -1:
 101             disallowed = {token.RPAR, token.RSQB}
 102         else:
 103             disallowed = {token.LPAR, token.LSQB}
 104
 105         while 0 <= index < len(line.leaves):
 106             current = line.leaves[index]
 107             if current.type in disallowed:
 108                 return False
 109             if current.type not in {token.NAME, token.DOT} or current.value == "for":
 110                 # If the current token isn't disallowed, we'll assume this is simple as
 111                 # only the disallowed tokens are semantically attached to this lookup
 112                 # expression we're checking. Also, stop early if we hit the 'for' bit
 113                 # of a comprehension.
 114                 return True
 115
 116             index += step
 117
 118         return True
 119
 120     def is_simple_operand(index: int, kind: Literal["base", "exponent"]) -> bool:
 121         # An operand is considered "simple" if's a NAME, a numeric CONSTANT, a simple
 122         # lookup (see above), with or without a preceding unary operator.
 123         start = line.leaves[index]
 124         if start.type in {token.NAME, token.NUMBER}:
 125             return is_simple_lookup(index, step=(1 if kind == "exponent" else -1))
 126
 127         if start.type in {token.PLUS, token.MINUS, token.TILDE}:
 128             if line.leaves[index + 1].type in {token.NAME, token.NUMBER}:
 129                 # step is always one as bases with a preceding unary op will be checked
 130                 # for simplicity starting from the next token (so it'll hit the check
 131                 # above).
 132                 return is_simple_lookup(index + 1, step=1)
 133
 134         return False
 135
 136     new_line = line.clone()
 137     should_hug = False
 138     for idx, leaf in enumerate(line.leaves):
 139         new_leaf = leaf.clone()
 140         if should_hug:
 141             new_leaf.prefix = ""
 142             should_hug = False
 143
 144         should_hug = (
 145             (0 < idx < len(line.leaves) - 1)
 146             and leaf.type == token.DOUBLESTAR
 147             and is_simple_operand(idx - 1, kind="base")
 148             and line.leaves[idx - 1].value != "lambda"
 149             and is_simple_operand(idx + 1, kind="exponent")
 150         )
 151         if should_hug:
 152             new_leaf.prefix = ""
 153
 154         # We have to be careful to make a new line properly:
 155         # - bracket related metadata must be maintained (handled by Line.append)
 156         # - comments need to copied over, updating the leaf IDs they're attached to
 157         new_line.append(new_leaf, preformatted=True)
 158         for comment_leaf in line.comments_after(leaf):
 159             new_line.append(comment_leaf, preformatted=True)
 160
 161     yield new_line
 162
 163
 164 class StringTransformer(ABC):
 165     """
 166     An implementation of the Transformer protocol that relies on its
 167     subclasses overriding the template methods `do_match(...)` and
 168     `do_transform(...)`.
 169
 170     This Transformer works exclusively on strings (for example, by merging
 171     or splitting them).
 172
 173     The following sections can be found among the docstrings of each concrete
 174     StringTransformer subclass.
 175
 176     Requirements:
 177         Which requirements must be met of the given Line for this
 178         StringTransformer to be applied?
 179
 180     Transformations:
 181         If the given Line meets all of the above requirements, which string
 182         transformations can you expect to be applied to it by this
 183         StringTransformer?
 184
 185     Collaborations:
 186         What contractual agreements does this StringTransformer have with other
 187         StringTransfomers? Such collaborations should be eliminated/minimized
 188         as much as possible.
 189     """
 190
 191     __name__: Final = "StringTransformer"
 192
 193     # Ideally this would be a dataclass, but unfortunately mypyc breaks when used with
 194     # `abc.ABC`.
 195     def __init__(self, line_length: int, normalize_strings: bool) -> None:
 196         self.line_length = line_length
 197         self.normalize_strings = normalize_strings
 198
 199     @abstractmethod
 200     def do_match(self, line: Line) -> TMatchResult:
 201         """
 202         Returns:
 203             * Ok(string_indices) such that for each index, `line.leaves[index]`
 204               is our target string if a match was able to be made. For
 205               transformers that don't result in more lines (e.g. StringMerger,
 206               StringParenStripper), multiple matches and transforms are done at
 207               once to reduce the complexity.
 208               OR
 209             * Err(CannotTransform), if no match could be made.
 210         """
 211
 212     @abstractmethod
 213     def do_transform(
 214         self, line: Line, string_indices: List[int]
 215     ) -> Iterator[TResult[Line]]:
 216         """
 217         Yields:
 218             * Ok(new_line) where new_line is the new transformed line.
 219               OR
 220             * Err(CannotTransform) if the transformation failed for some reason. The
 221               `do_match(...)` template method should usually be used to reject
 222               the form of the given Line, but in some cases it is difficult to
 223               know whether or not a Line meets the StringTransformer's
 224               requirements until the transformation is already midway.
 225
 226         Side Effects:
 227             This method should NOT mutate @line directly, but it MAY mutate the
 228             Line's underlying Node structure. (WARNING: If the underlying Node
 229             structure IS altered, then this method should NOT be allowed to
 230             yield an CannotTransform after that point.)
 231         """
 232
 233     def __call__(
 234         self, line: Line, _features: Collection[Feature], _mode: Mode
 235     ) -> Iterator[Line]:
 236         """
 237         StringTransformer instances have a call signature that mirrors that of
 238         the Transformer type.
 239
 240         Raises:
 241             CannotTransform(...) if the concrete StringTransformer class is unable
 242             to transform @line.
 243         """
 244         # Optimization to avoid calling `self.do_match(...)` when the line does
 245         # not contain any string.
 246         if not any(leaf.type == token.STRING for leaf in line.leaves):
 247             raise CannotTransform("There are no strings in this line.")
 248
 249         match_result = self.do_match(line)
 250
 251         if isinstance(match_result, Err):
 252             cant_transform = match_result.err()
 253             raise CannotTransform(
 254                 f"The string transformer {self.__class__.__name__} does not recognize"
 255                 " this line as one that it can transform."
 256             ) from cant_transform
 257
 258         string_indices = match_result.ok()
 259
 260         for line_result in self.do_transform(line, string_indices):
 261             if isinstance(line_result, Err):
 262                 cant_transform = line_result.err()
 263                 raise CannotTransform(
 264                     "StringTransformer failed while attempting to transform string."
 265                 ) from cant_transform
 266             line = line_result.ok()
 267             yield line
 268
 269
 270 @dataclass
 271 class CustomSplit:
 272     """A custom (i.e. manual) string split.
 273
 274     A single CustomSplit instance represents a single substring.
 275
 276     Examples:
 277         Consider the following string:
 278         ```
 279         "Hi there friend."
 280         " This is a custom"
 281         f" string {split}."
 282         ```
 283
 284         This string will correspond to the following three CustomSplit instances:
 285         ```
 286         CustomSplit(False, 16)
 287         CustomSplit(False, 17)
 288         CustomSplit(True, 16)
 289         ```
 290     """
 291
 292     has_prefix: bool
 293     break_idx: int
 294
 295
 296 @trait
 297 class CustomSplitMapMixin:
 298     """
 299     This mixin class is used to map merged strings to a sequence of
 300     CustomSplits, which will then be used to re-split the strings iff none of
 301     the resultant substrings go over the configured max line length.
 302     """
 303
 304     _Key: ClassVar = Tuple[StringID, str]
 305     _CUSTOM_SPLIT_MAP: ClassVar[Dict[_Key, Tuple[CustomSplit, ...]]] = defaultdict(
 306         tuple
 307     )
 308
 309     @staticmethod
 310     def _get_key(string: str) -> "CustomSplitMapMixin._Key":
 311         """
 312         Returns:
 313             A unique identifier that is used internally to map @string to a
 314             group of custom splits.
 315         """
 316         return (id(string), string)
 317
 318     def add_custom_splits(
 319         self, string: str, custom_splits: Iterable[CustomSplit]
 320     ) -> None:
 321         """Custom Split Map Setter Method
 322
 323         Side Effects:
 324             Adds a mapping from @string to the custom splits @custom_splits.
 325         """
 326         key = self._get_key(string)
 327         self._CUSTOM_SPLIT_MAP[key] = tuple(custom_splits)
 328
 329     def pop_custom_splits(self, string: str) -> List[CustomSplit]:
 330         """Custom Split Map Getter Method
 331
 332         Returns:
 333             * A list of the custom splits that are mapped to @string, if any
 334               exist.
 335               OR
 336             * [], otherwise.
 337
 338         Side Effects:
 339             Deletes the mapping between @string and its associated custom
 340             splits (which are returned to the caller).
 341         """
 342         key = self._get_key(string)
 343
 344         custom_splits = self._CUSTOM_SPLIT_MAP[key]
 345         del self._CUSTOM_SPLIT_MAP[key]
 346
 347         return list(custom_splits)
 348
 349     def has_custom_splits(self, string: str) -> bool:
 350         """
 351         Returns:
 352             True iff @string is associated with a set of custom splits.
 353         """
 354         key = self._get_key(string)
 355         return key in self._CUSTOM_SPLIT_MAP
 356
 357
 358 class StringMerger(StringTransformer, CustomSplitMapMixin):
 359     """StringTransformer that merges strings together.
 360
 361     Requirements:
 362         (A) The line contains adjacent strings such that ALL of the validation checks
 363         listed in StringMerger._validate_msg(...)'s docstring pass.
 364         OR
 365         (B) The line contains a string which uses line continuation backslashes.
 366
 367     Transformations:
 368         Depending on which of the two requirements above where met, either:
 369
 370         (A) The string group associated with the target string is merged.
 371         OR
 372         (B) All line-continuation backslashes are removed from the target string.
 373
 374     Collaborations:
 375         StringMerger provides custom split information to StringSplitter.
 376     """
 377
 378     def do_match(self, line: Line) -> TMatchResult:
 379         LL = line.leaves
 380
 381         is_valid_index = is_valid_index_factory(LL)
 382
 383         string_indices = []
 384         idx = 0
 385         while is_valid_index(idx):
 386             leaf = LL[idx]
 387             if (
 388                 leaf.type == token.STRING
 389                 and is_valid_index(idx + 1)
 390                 and LL[idx + 1].type == token.STRING
 391             ):
 392                 if not is_part_of_annotation(leaf):
 393                     string_indices.append(idx)
 394
 395                 # Advance to the next non-STRING leaf.
 396                 idx += 2
 397                 while is_valid_index(idx) and LL[idx].type == token.STRING:
 398                     idx += 1
 399
 400             elif leaf.type == token.STRING and "\\\n" in leaf.value:
 401                 string_indices.append(idx)
 402                 # Advance to the next non-STRING leaf.
 403                 idx += 1
 404                 while is_valid_index(idx) and LL[idx].type == token.STRING:
 405                     idx += 1
 406
 407             else:
 408                 idx += 1
 409
 410         if string_indices:
 411             return Ok(string_indices)
 412         else:
 413             return TErr("This line has no strings that need merging.")
 414
 415     def do_transform(
 416         self, line: Line, string_indices: List[int]
 417     ) -> Iterator[TResult[Line]]:
 418         new_line = line
 419
 420         rblc_result = self._remove_backslash_line_continuation_chars(
 421             new_line, string_indices
 422         )
 423         if isinstance(rblc_result, Ok):
 424             new_line = rblc_result.ok()
 425
 426         msg_result = self._merge_string_group(new_line, string_indices)
 427         if isinstance(msg_result, Ok):
 428             new_line = msg_result.ok()
 429
 430         if isinstance(rblc_result, Err) and isinstance(msg_result, Err):
 431             msg_cant_transform = msg_result.err()
 432             rblc_cant_transform = rblc_result.err()
 433             cant_transform = CannotTransform(
 434                 "StringMerger failed to merge any strings in this line."
 435             )
 436
 437             # Chain the errors together using `__cause__`.
 438             msg_cant_transform.__cause__ = rblc_cant_transform
 439             cant_transform.__cause__ = msg_cant_transform
 440
 441             yield Err(cant_transform)
 442         else:
 443             yield Ok(new_line)
 444
 445     @staticmethod
 446     def _remove_backslash_line_continuation_chars(
 447         line: Line, string_indices: List[int]
 448     ) -> TResult[Line]:
 449         """
 450         Merge strings that were split across multiple lines using
 451         line-continuation backslashes.
 452
 453         Returns:
 454             Ok(new_line), if @line contains backslash line-continuation
 455             characters.
 456                 OR
 457             Err(CannotTransform), otherwise.
 458         """
 459         LL = line.leaves
 460
 461         indices_to_transform = []
 462         for string_idx in string_indices:
 463             string_leaf = LL[string_idx]
 464             if (
 465                 string_leaf.type == token.STRING
 466                 and "\\\n" in string_leaf.value
 467                 and not has_triple_quotes(string_leaf.value)
 468             ):
 469                 indices_to_transform.append(string_idx)
 470
 471         if not indices_to_transform:
 472             return TErr(
 473                 "Found no string leaves that contain backslash line continuation"
 474                 " characters."
 475             )
 476
 477         new_line = line.clone()
 478         new_line.comments = line.comments.copy()
 479         append_leaves(new_line, line, LL)
 480
 481         for string_idx in indices_to_transform:
 482             new_string_leaf = new_line.leaves[string_idx]
 483             new_string_leaf.value = new_string_leaf.value.replace("\\\n", "")
 484
 485         return Ok(new_line)
 486
 487     def _merge_string_group(
 488         self, line: Line, string_indices: List[int]
 489     ) -> TResult[Line]:
 490         """
 491         Merges string groups (i.e. set of adjacent strings).
 492
 493         Each index from `string_indices` designates one string group's first
 494         leaf in `line.leaves`.
 495
 496         Returns:
 497             Ok(new_line), if ALL of the validation checks found in
 498             _validate_msg(...) pass.
 499                 OR
 500             Err(CannotTransform), otherwise.
 501         """
 502         LL = line.leaves
 503
 504         is_valid_index = is_valid_index_factory(LL)
 505
 506         # A dict of {string_idx: tuple[num_of_strings, string_leaf]}.
 507         merged_string_idx_dict: Dict[int, Tuple[int, Leaf]] = {}
 508         for string_idx in string_indices:
 509             vresult = self._validate_msg(line, string_idx)
 510             if isinstance(vresult, Err):
 511                 continue
 512             merged_string_idx_dict[string_idx] = self._merge_one_string_group(
 513                 LL, string_idx, is_valid_index
 514             )
 515
 516         if not merged_string_idx_dict:
 517             return TErr("No string group is merged")
 518
 519         # Build the final line ('new_line') that this method will later return.
 520         new_line = line.clone()
 521         previous_merged_string_idx = -1
 522         previous_merged_num_of_strings = -1
 523         for i, leaf in enumerate(LL):
 524             if i in merged_string_idx_dict:
 525                 previous_merged_string_idx = i
 526                 previous_merged_num_of_strings, string_leaf = merged_string_idx_dict[i]
 527                 new_line.append(string_leaf)
 528
 529             if (
 530                 previous_merged_string_idx
 531                 <= i
 532                 < previous_merged_string_idx + previous_merged_num_of_strings
 533             ):
 534                 for comment_leaf in line.comments_after(LL[i]):
 535                     new_line.append(comment_leaf, preformatted=True)
 536                 continue
 537
 538             append_leaves(new_line, line, [leaf])
 539
 540         return Ok(new_line)
 541
 542     def _merge_one_string_group(
 543         self, LL: List[Leaf], string_idx: int, is_valid_index: Callable[[int], bool]
 544     ) -> Tuple[int, Leaf]:
 545         """
 546         Merges one string group where the first string in the group is
 547         `LL[string_idx]`.
 548
 549         Returns:
 550             A tuple of `(num_of_strings, leaf)` where `num_of_strings` is the
 551             number of strings merged and `leaf` is the newly merged string
 552             to be replaced in the new line.
 553         """
 554         # If the string group is wrapped inside an Atom node, we must make sure
 555         # to later replace that Atom with our new (merged) string leaf.
 556         atom_node = LL[string_idx].parent
 557
 558         # We will place BREAK_MARK in between every two substrings that we
 559         # merge. We will then later go through our final result and use the
 560         # various instances of BREAK_MARK we find to add the right values to
 561         # the custom split map.
 562         BREAK_MARK = "@@@@@ BLACK BREAKPOINT MARKER @@@@@"
 563
 564         QUOTE = LL[string_idx].value[-1]
 565
 566         def make_naked(string: str, string_prefix: str) -> str:
 567             """Strip @string (i.e. make it a "naked" string)
 568
 569             Pre-conditions:
 570                 * assert_is_leaf_string(@string)
 571
 572             Returns:
 573                 A string that is identical to @string except that
 574                 @string_prefix has been stripped, the surrounding QUOTE
 575                 characters have been removed, and any remaining QUOTE
 576                 characters have been escaped.
 577             """
 578             assert_is_leaf_string(string)
 579             if "f" in string_prefix:
 580                 string = _toggle_fexpr_quotes(string, QUOTE)
 581                 # After quotes toggling, quotes in expressions won't be escaped
 582                 # because quotes can't be reused in f-strings. So we can simply
 583                 # let the escaping logic below run without knowing f-string
 584                 # expressions.
 585
 586             RE_EVEN_BACKSLASHES = r"(?:(?<!\\)(?:\\\\)*)"
 587             naked_string = string[len(string_prefix) + 1 : -1]
 588             naked_string = re.sub(
 589                 "(" + RE_EVEN_BACKSLASHES + ")" + QUOTE, r"\1\\" + QUOTE, naked_string
 590             )
 591             return naked_string
 592
 593         # Holds the CustomSplit objects that will later be added to the custom
 594         # split map.
 595         custom_splits = []
 596
 597         # Temporary storage for the 'has_prefix' part of the CustomSplit objects.
 598         prefix_tracker = []
 599
 600         # Sets the 'prefix' variable. This is the prefix that the final merged
 601         # string will have.
 602         next_str_idx = string_idx
 603         prefix = ""
 604         while (
 605             not prefix
 606             and is_valid_index(next_str_idx)
 607             and LL[next_str_idx].type == token.STRING
 608         ):
 609             prefix = get_string_prefix(LL[next_str_idx].value).lower()
 610             next_str_idx += 1
 611
 612         # The next loop merges the string group. The final string will be
 613         # contained in 'S'.
 614         #
 615         # The following convenience variables are used:
 616         #
 617         #   S: string
 618         #   NS: naked string
 619         #   SS: next string
 620         #   NSS: naked next string
 621         S = ""
 622         NS = ""
 623         num_of_strings = 0
 624         next_str_idx = string_idx
 625         while is_valid_index(next_str_idx) and LL[next_str_idx].type == token.STRING:
 626             num_of_strings += 1
 627
 628             SS = LL[next_str_idx].value
 629             next_prefix = get_string_prefix(SS).lower()
 630
 631             # If this is an f-string group but this substring is not prefixed
 632             # with 'f'...
 633             if "f" in prefix and "f" not in next_prefix:
 634                 # Then we must escape any braces contained in this substring.
 635                 SS = re.sub(r"(\{|\})", r"\1\1", SS)
 636
 637             NSS = make_naked(SS, next_prefix)
 638
 639             has_prefix = bool(next_prefix)
 640             prefix_tracker.append(has_prefix)
 641
 642             S = prefix + QUOTE + NS + NSS + BREAK_MARK + QUOTE
 643             NS = make_naked(S, prefix)
 644
 645             next_str_idx += 1
 646
 647         # Take a note on the index of the non-STRING leaf.
 648         non_string_idx = next_str_idx
 649
 650         S_leaf = Leaf(token.STRING, S)
 651         if self.normalize_strings:
 652             S_leaf.value = normalize_string_quotes(S_leaf.value)
 653
 654         # Fill the 'custom_splits' list with the appropriate CustomSplit objects.
 655         temp_string = S_leaf.value[len(prefix) + 1 : -1]
 656         for has_prefix in prefix_tracker:
 657             mark_idx = temp_string.find(BREAK_MARK)
 658             assert (
 659                 mark_idx >= 0
 660             ), "Logic error while filling the custom string breakpoint cache."
 661
 662             temp_string = temp_string[mark_idx + len(BREAK_MARK) :]
 663             breakpoint_idx = mark_idx + (len(prefix) if has_prefix else 0) + 1
 664             custom_splits.append(CustomSplit(has_prefix, breakpoint_idx))
 665
 666         string_leaf = Leaf(token.STRING, S_leaf.value.replace(BREAK_MARK, ""))
 667
 668         if atom_node is not None:
 669             # If not all children of the atom node are merged (this can happen
 670             # when there is a standalone comment in the middle) ...
 671             if non_string_idx - string_idx < len(atom_node.children):
 672                 # We need to replace the old STRING leaves with the new string leaf.
 673                 first_child_idx = LL[string_idx].remove()
 674                 for idx in range(string_idx + 1, non_string_idx):
 675                     LL[idx].remove()
 676                 if first_child_idx is not None:
 677                     atom_node.insert_child(first_child_idx, string_leaf)
 678             else:
 679                 # Else replace the atom node with the new string leaf.
 680                 replace_child(atom_node, string_leaf)
 681
 682         self.add_custom_splits(string_leaf.value, custom_splits)
 683         return num_of_strings, string_leaf
 684
 685     @staticmethod
 686     def _validate_msg(line: Line, string_idx: int) -> TResult[None]:
 687         """Validate (M)erge (S)tring (G)roup
 688
 689         Transform-time string validation logic for _merge_string_group(...).
 690
 691         Returns:
 692             * Ok(None), if ALL validation checks (listed below) pass.
 693                 OR
 694             * Err(CannotTransform), if any of the following are true:
 695                 - The target string group does not contain ANY stand-alone comments.
 696                 - The target string is not in a string group (i.e. it has no
 697                   adjacent strings).
 698                 - The string group has more than one inline comment.
 699                 - The string group has an inline comment that appears to be a pragma.
 700                 - The set of all string prefixes in the string group is of
 701                   length greater than one and is not equal to {"", "f"}.
 702                 - The string group consists of raw strings.
 703                 - The string group is stringified type annotations. We don't want to
 704                   process stringified type annotations since pyright doesn't support
 705                   them spanning multiple string values. (NOTE: mypy, pytype, pyre do
 706                   support them, so we can change if pyright also gains support in the
 707                   future. See https://github.com/microsoft/pyright/issues/4359.)
 708         """
 709         # We first check for "inner" stand-alone comments (i.e. stand-alone
 710         # comments that have a string leaf before them AND after them).
 711         for inc in [1, -1]:
 712             i = string_idx
 713             found_sa_comment = False
 714             is_valid_index = is_valid_index_factory(line.leaves)
 715             while is_valid_index(i) and line.leaves[i].type in [
 716                 token.STRING,
 717                 STANDALONE_COMMENT,
 718             ]:
 719                 if line.leaves[i].type == STANDALONE_COMMENT:
 720                     found_sa_comment = True
 721                 elif found_sa_comment:
 722                     return TErr(
 723                         "StringMerger does NOT merge string groups which contain "
 724                         "stand-alone comments."
 725                     )
 726
 727                 i += inc
 728
 729         num_of_inline_string_comments = 0
 730         set_of_prefixes = set()
 731         num_of_strings = 0
 732         for leaf in line.leaves[string_idx:]:
 733             if leaf.type != token.STRING:
 734                 # If the string group is trailed by a comma, we count the
 735                 # comments trailing the comma to be one of the string group's
 736                 # comments.
 737                 if leaf.type == token.COMMA and id(leaf) in line.comments:
 738                     num_of_inline_string_comments += 1
 739                 break
 740
 741             if has_triple_quotes(leaf.value):
 742                 return TErr("StringMerger does NOT merge multiline strings.")
 743
 744             num_of_strings += 1
 745             prefix = get_string_prefix(leaf.value).lower()
 746             if "r" in prefix:
 747                 return TErr("StringMerger does NOT merge raw strings.")
 748
 749             set_of_prefixes.add(prefix)
 750
 751             if id(leaf) in line.comments:
 752                 num_of_inline_string_comments += 1
 753                 if contains_pragma_comment(line.comments[id(leaf)]):
 754                     return TErr("Cannot merge strings which have pragma comments.")
 755
 756         if num_of_strings < 2:
 757             return TErr(
 758                 f"Not enough strings to merge (num_of_strings={num_of_strings})."
 759             )
 760
 761         if num_of_inline_string_comments > 1:
 762             return TErr(
 763                 f"Too many inline string comments ({num_of_inline_string_comments})."
 764             )
 765
 766         if len(set_of_prefixes) > 1 and set_of_prefixes != {"", "f"}:
 767             return TErr(f"Too many different prefixes ({set_of_prefixes}).")
 768
 769         return Ok(None)
 770
 771
 772 class StringParenStripper(StringTransformer):
 773     """StringTransformer that strips surrounding parentheses from strings.
 774
 775     Requirements:
 776         The line contains a string which is surrounded by parentheses and:
 777             - The target string is NOT the only argument to a function call.
 778             - The target string is NOT a "pointless" string.
 779             - If the target string contains a PERCENT, the brackets are not
 780               preceded or followed by an operator with higher precedence than
 781               PERCENT.
 782
 783     Transformations:
 784         The parentheses mentioned in the 'Requirements' section are stripped.
 785
 786     Collaborations:
 787         StringParenStripper has its own inherent usefulness, but it is also
 788         relied on to clean up the parentheses created by StringParenWrapper (in
 789         the event that they are no longer needed).
 790     """
 791
 792     def do_match(self, line: Line) -> TMatchResult:
 793         LL = line.leaves
 794
 795         is_valid_index = is_valid_index_factory(LL)
 796
 797         string_indices = []
 798
 799         idx = -1
 800         while True:
 801             idx += 1
 802             if idx >= len(LL):
 803                 break
 804             leaf = LL[idx]
 805
 806             # Should be a string...
 807             if leaf.type != token.STRING:
 808                 continue
 809
 810             # If this is a "pointless" string...
 811             if (
 812                 leaf.parent
 813                 and leaf.parent.parent
 814                 and leaf.parent.parent.type == syms.simple_stmt
 815             ):
 816                 continue
 817
 818             # Should be preceded by a non-empty LPAR...
 819             if (
 820                 not is_valid_index(idx - 1)
 821                 or LL[idx - 1].type != token.LPAR
 822                 or is_empty_lpar(LL[idx - 1])
 823             ):
 824                 continue
 825
 826             # That LPAR should NOT be preceded by a function name or a closing
 827             # bracket (which could be a function which returns a function or a
 828             # list/dictionary that contains a function)...
 829             if is_valid_index(idx - 2) and (
 830                 LL[idx - 2].type == token.NAME or LL[idx - 2].type in CLOSING_BRACKETS
 831             ):
 832                 continue
 833
 834             string_idx = idx
 835
 836             # Skip the string trailer, if one exists.
 837             string_parser = StringParser()
 838             next_idx = string_parser.parse(LL, string_idx)
 839
 840             # if the leaves in the parsed string include a PERCENT, we need to
 841             # make sure the initial LPAR is NOT preceded by an operator with
 842             # higher or equal precedence to PERCENT
 843             if is_valid_index(idx - 2):
 844                 # mypy can't quite follow unless we name this
 845                 before_lpar = LL[idx - 2]
 846                 if token.PERCENT in {leaf.type for leaf in LL[idx - 1 : next_idx]} and (
 847                     (
 848                         before_lpar.type
 849                         in {
 850                             token.STAR,
 851                             token.AT,
 852                             token.SLASH,
 853                             token.DOUBLESLASH,
 854                             token.PERCENT,
 855                             token.TILDE,
 856                             token.DOUBLESTAR,
 857                             token.AWAIT,
 858                             token.LSQB,
 859                             token.LPAR,
 860                         }
 861                     )
 862                     or (
 863                         # only unary PLUS/MINUS
 864                         before_lpar.parent
 865                         and before_lpar.parent.type == syms.factor
 866                         and (before_lpar.type in {token.PLUS, token.MINUS})
 867                     )
 868                 ):
 869                     continue
 870
 871             # Should be followed by a non-empty RPAR...
 872             if (
 873                 is_valid_index(next_idx)
 874                 and LL[next_idx].type == token.RPAR
 875                 and not is_empty_rpar(LL[next_idx])
 876             ):
 877                 # That RPAR should NOT be followed by anything with higher
 878                 # precedence than PERCENT
 879                 if is_valid_index(next_idx + 1) and LL[next_idx + 1].type in {
 880                     token.DOUBLESTAR,
 881                     token.LSQB,
 882                     token.LPAR,
 883                     token.DOT,
 884                 }:
 885                     continue
 886
 887                 string_indices.append(string_idx)
 888                 idx = string_idx
 889                 while idx < len(LL) - 1 and LL[idx + 1].type == token.STRING:
 890                     idx += 1
 891
 892         if string_indices:
 893             return Ok(string_indices)
 894         return TErr("This line has no strings wrapped in parens.")
 895
 896     def do_transform(
 897         self, line: Line, string_indices: List[int]
 898     ) -> Iterator[TResult[Line]]:
 899         LL = line.leaves
 900
 901         string_and_rpar_indices: List[int] = []
 902         for string_idx in string_indices:
 903             string_parser = StringParser()
 904             rpar_idx = string_parser.parse(LL, string_idx)
 905
 906             should_transform = True
 907             for leaf in (LL[string_idx - 1], LL[rpar_idx]):
 908                 if line.comments_after(leaf):
 909                     # Should not strip parentheses which have comments attached
 910                     # to them.
 911                     should_transform = False
 912                     break
 913             if should_transform:
 914                 string_and_rpar_indices.extend((string_idx, rpar_idx))
 915
 916         if string_and_rpar_indices:
 917             yield Ok(self._transform_to_new_line(line, string_and_rpar_indices))
 918         else:
 919             yield Err(
 920                 CannotTransform("All string groups have comments attached to them.")
 921             )
 922
 923     def _transform_to_new_line(
 924         self, line: Line, string_and_rpar_indices: List[int]
 925     ) -> Line:
 926         LL = line.leaves
 927
 928         new_line = line.clone()
 929         new_line.comments = line.comments.copy()
 930
 931         previous_idx = -1
 932         # We need to sort the indices, since string_idx and its matching
 933         # rpar_idx may not come in order, e.g. in
 934         # `("outer" % ("inner".join(items)))`, the "inner" string's
 935         # string_idx is smaller than "outer" string's rpar_idx.
 936         for idx in sorted(string_and_rpar_indices):
 937             leaf = LL[idx]
 938             lpar_or_rpar_idx = idx - 1 if leaf.type == token.STRING else idx
 939             append_leaves(new_line, line, LL[previous_idx + 1 : lpar_or_rpar_idx])
 940             if leaf.type == token.STRING:
 941                 string_leaf = Leaf(token.STRING, LL[idx].value)
 942                 LL[lpar_or_rpar_idx].remove()  # Remove lpar.
 943                 replace_child(LL[idx], string_leaf)
 944                 new_line.append(string_leaf)
 945                 # replace comments
 946                 old_comments = new_line.comments.pop(id(LL[idx]), [])
 947                 new_line.comments.setdefault(id(string_leaf), []).extend(old_comments)
 948             else:
 949                 LL[lpar_or_rpar_idx].remove()  # This is a rpar.
 950
 951             previous_idx = idx
 952
 953         # Append the leaves after the last idx:
 954         append_leaves(new_line, line, LL[idx + 1 :])
 955
 956         return new_line
 957
 958
 959 class BaseStringSplitter(StringTransformer):
 960     """
 961     Abstract class for StringTransformers which transform a Line's strings by splitting
 962     them or placing them on their own lines where necessary to avoid going over
 963     the configured line length.
 964
 965     Requirements:
 966         * The target string value is responsible for the line going over the
 967           line length limit. It follows that after all of black's other line
 968           split methods have been exhausted, this line (or one of the resulting
 969           lines after all line splits are performed) would still be over the
 970           line_length limit unless we split this string.
 971           AND
 972
 973         * The target string is NOT a "pointless" string (i.e. a string that has
 974           no parent or siblings).
 975           AND
 976
 977         * The target string is not followed by an inline comment that appears
 978           to be a pragma.
 979           AND
 980
 981         * The target string is not a multiline (i.e. triple-quote) string.
 982     """
 983
 984     STRING_OPERATORS: Final = [
 985         token.EQEQUAL,
 986         token.GREATER,
 987         token.GREATEREQUAL,
 988         token.LESS,
 989         token.LESSEQUAL,
 990         token.NOTEQUAL,
 991         token.PERCENT,
 992         token.PLUS,
 993         token.STAR,
 994     ]
 995
 996     @abstractmethod
 997     def do_splitter_match(self, line: Line) -> TMatchResult:
 998         """
 999         BaseStringSplitter asks its clients to override this method instead of
1000         `StringTransformer.do_match(...)`.
1001
1002         Follows the same protocol as `StringTransformer.do_match(...)`.
1003
1004         Refer to `help(StringTransformer.do_match)` for more information.
1005         """
1006
1007     def do_match(self, line: Line) -> TMatchResult:
1008         match_result = self.do_splitter_match(line)
1009         if isinstance(match_result, Err):
1010             return match_result
1011
1012         string_indices = match_result.ok()
1013         assert len(string_indices) == 1, (
1014             f"{self.__class__.__name__} should only find one match at a time, found"
1015             f" {len(string_indices)}"
1016         )
1017         string_idx = string_indices[0]
1018         vresult = self._validate(line, string_idx)
1019         if isinstance(vresult, Err):
1020             return vresult
1021
1022         return match_result
1023
1024     def _validate(self, line: Line, string_idx: int) -> TResult[None]:
1025         """
1026         Checks that @line meets all of the requirements listed in this classes'
1027         docstring. Refer to `help(BaseStringSplitter)` for a detailed
1028         description of those requirements.
1029
1030         Returns:
1031             * Ok(None), if ALL of the requirements are met.
1032               OR
1033             * Err(CannotTransform), if ANY of the requirements are NOT met.
1034         """
1035         LL = line.leaves
1036
1037         string_leaf = LL[string_idx]
1038
1039         max_string_length = self._get_max_string_length(line, string_idx)
1040         if len(string_leaf.value) <= max_string_length:
1041             return TErr(
1042                 "The string itself is not what is causing this line to be too long."
1043             )
1044
1045         if not string_leaf.parent or [L.type for L in string_leaf.parent.children] == [
1046             token.STRING,
1047             token.NEWLINE,
1048         ]:
1049             return TErr(
1050                 f"This string ({string_leaf.value}) appears to be pointless (i.e. has"
1051                 " no parent)."
1052             )
1053
1054         if id(line.leaves[string_idx]) in line.comments and contains_pragma_comment(
1055             line.comments[id(line.leaves[string_idx])]
1056         ):
1057             return TErr(
1058                 "Line appears to end with an inline pragma comment. Splitting the line"
1059                 " could modify the pragma's behavior."
1060             )
1061
1062         if has_triple_quotes(string_leaf.value):
1063             return TErr("We cannot split multiline strings.")
1064
1065         return Ok(None)
1066
1067     def _get_max_string_length(self, line: Line, string_idx: int) -> int:
1068         """
1069         Calculates the max string length used when attempting to determine
1070         whether or not the target string is responsible for causing the line to
1071         go over the line length limit.
1072
1073         WARNING: This method is tightly coupled to both StringSplitter and
1074         (especially) StringParenWrapper. There is probably a better way to
1075         accomplish what is being done here.
1076
1077         Returns:
1078             max_string_length: such that `line.leaves[string_idx].value >
1079             max_string_length` implies that the target string IS responsible
1080             for causing this line to exceed the line length limit.
1081         """
1082         LL = line.leaves
1083
1084         is_valid_index = is_valid_index_factory(LL)
1085
1086         # We use the shorthand "WMA4" in comments to abbreviate "We must
1087         # account for". When giving examples, we use STRING to mean some/any
1088         # valid string.
1089         #
1090         # Finally, we use the following convenience variables:
1091         #
1092         #   P:  The leaf that is before the target string leaf.
1093         #   N:  The leaf that is after the target string leaf.
1094         #   NN: The leaf that is after N.
1095
1096         # WMA4 the whitespace at the beginning of the line.
1097         offset = line.depth * 4
1098
1099         if is_valid_index(string_idx - 1):
1100             p_idx = string_idx - 1
1101             if (
1102                 LL[string_idx - 1].type == token.LPAR
1103                 and LL[string_idx - 1].value == ""
1104                 and string_idx >= 2
1105             ):
1106                 # If the previous leaf is an empty LPAR placeholder, we should skip it.
1107                 p_idx -= 1
1108
1109             P = LL[p_idx]
1110             if P.type in self.STRING_OPERATORS:
1111                 # WMA4 a space and a string operator (e.g. `+ STRING` or `== STRING`).
1112                 offset += len(str(P)) + 1
1113
1114             if P.type == token.COMMA:
1115                 # WMA4 a space, a comma, and a closing bracket [e.g. `), STRING`].
1116                 offset += 3
1117
1118             if P.type in [token.COLON, token.EQUAL, token.PLUSEQUAL, token.NAME]:
1119                 # This conditional branch is meant to handle dictionary keys,
1120                 # variable assignments, 'return STRING' statement lines, and
1121                 # 'else STRING' ternary expression lines.
1122
1123                 # WMA4 a single space.
1124                 offset += 1
1125
1126                 # WMA4 the lengths of any leaves that came before that space,
1127                 # but after any closing bracket before that space.
1128                 for leaf in reversed(LL[: p_idx + 1]):
1129                     offset += len(str(leaf))
1130                     if leaf.type in CLOSING_BRACKETS:
1131                         break
1132
1133         if is_valid_index(string_idx + 1):
1134             N = LL[string_idx + 1]
1135             if N.type == token.RPAR and N.value == "" and len(LL) > string_idx + 2:
1136                 # If the next leaf is an empty RPAR placeholder, we should skip it.
1137                 N = LL[string_idx + 2]
1138
1139             if N.type == token.COMMA:
1140                 # WMA4 a single comma at the end of the string (e.g `STRING,`).
1141                 offset += 1
1142
1143             if is_valid_index(string_idx + 2):
1144                 NN = LL[string_idx + 2]
1145
1146                 if N.type == token.DOT and NN.type == token.NAME:
1147                     # This conditional branch is meant to handle method calls invoked
1148                     # off of a string literal up to and including the LPAR character.
1149
1150                     # WMA4 the '.' character.
1151                     offset += 1
1152
1153                     if (
1154                         is_valid_index(string_idx + 3)
1155                         and LL[string_idx + 3].type == token.LPAR
1156                     ):
1157                         # WMA4 the left parenthesis character.
1158                         offset += 1
1159
1160                     # WMA4 the length of the method's name.
1161                     offset += len(NN.value)
1162
1163         has_comments = False
1164         for comment_leaf in line.comments_after(LL[string_idx]):
1165             if not has_comments:
1166                 has_comments = True
1167                 # WMA4 two spaces before the '#' character.
1168                 offset += 2
1169
1170             # WMA4 the length of the inline comment.
1171             offset += len(comment_leaf.value)
1172
1173         max_string_length = count_chars_in_width(str(line), self.line_length - offset)
1174         return max_string_length
1175
1176     @staticmethod
1177     def _prefer_paren_wrap_match(LL: List[Leaf]) -> Optional[int]:
1178         """
1179         Returns:
1180             string_idx such that @LL[string_idx] is equal to our target (i.e.
1181             matched) string, if this line matches the "prefer paren wrap" statement
1182             requirements listed in the 'Requirements' section of the StringParenWrapper
1183             class's docstring.
1184                 OR
1185             None, otherwise.
1186         """
1187         # The line must start with a string.
1188         if LL[0].type != token.STRING:
1189             return None
1190
1191         matching_nodes = [
1192             syms.listmaker,
1193             syms.dictsetmaker,
1194             syms.testlist_gexp,
1195         ]
1196         # If the string is an immediate child of a list/set/tuple literal...
1197         if (
1198             parent_type(LL[0]) in matching_nodes
1199             or parent_type(LL[0].parent) in matching_nodes
1200         ):
1201             # And the string is surrounded by commas (or is the first/last child)...
1202             prev_sibling = LL[0].prev_sibling
1203             next_sibling = LL[0].next_sibling
1204             if (
1205                 not prev_sibling
1206                 and not next_sibling
1207                 and parent_type(LL[0]) == syms.atom
1208             ):
1209                 # If it's an atom string, we need to check the parent atom's siblings.
1210                 parent = LL[0].parent
1211                 assert parent is not None  # For type checkers.
1212                 prev_sibling = parent.prev_sibling
1213                 next_sibling = parent.next_sibling
1214             if (not prev_sibling or prev_sibling.type == token.COMMA) and (
1215                 not next_sibling or next_sibling.type == token.COMMA
1216             ):
1217                 return 0
1218
1219         return None
1220
1221
1222 def iter_fexpr_spans(s: str) -> Iterator[Tuple[int, int]]:
1223     """
1224     Yields spans corresponding to expressions in a given f-string.
1225     Spans are half-open ranges (left inclusive, right exclusive).
1226     Assumes the input string is a valid f-string, but will not crash if the input
1227     string is invalid.
1228     """
1229     stack: List[int] = []  # our curly paren stack
1230     i = 0
1231     while i < len(s):
1232         if s[i] == "{":
1233             # if we're in a string part of the f-string, ignore escaped curly braces
1234             if not stack and i + 1 < len(s) and s[i + 1] == "{":
1235                 i += 2
1236                 continue
1237             stack.append(i)
1238             i += 1
1239             continue
1240
1241         if s[i] == "}":
1242             if not stack:
1243                 i += 1
1244                 continue
1245             j = stack.pop()
1246             # we've made it back out of the expression! yield the span
1247             if not stack:
1248                 yield (j, i + 1)
1249             i += 1
1250             continue
1251
1252         # if we're in an expression part of the f-string, fast forward through strings
1253         # note that backslashes are not legal in the expression portion of f-strings
1254         if stack:
1255             delim = None
1256             if s[i : i + 3] in ("'''", '"""'):
1257                 delim = s[i : i + 3]
1258             elif s[i] in ("'", '"'):
1259                 delim = s[i]
1260             if delim:
1261                 i += len(delim)
1262                 while i < len(s) and s[i : i + len(delim)] != delim:
1263                     i += 1
1264                 i += len(delim)
1265                 continue
1266         i += 1
1267
1268
1269 def fstring_contains_expr(s: str) -> bool:
1270     return any(iter_fexpr_spans(s))
1271
1272
1273 def _toggle_fexpr_quotes(fstring: str, old_quote: str) -> str:
1274     """
1275     Toggles quotes used in f-string expressions that are `old_quote`.
1276
1277     f-string expressions can't contain backslashes, so we need to toggle the
1278     quotes if the f-string itself will end up using the same quote. We can
1279     simply toggle without escaping because, quotes can't be reused in f-string
1280     expressions. They will fail to parse.
1281
1282     NOTE: If PEP 701 is accepted, above statement will no longer be true.
1283     Though if quotes can be reused, we can simply reuse them without updates or
1284     escaping, once Black figures out how to parse the new grammar.
1285     """
1286     new_quote = "'" if old_quote == '"' else '"'
1287     parts = []
1288     previous_index = 0
1289     for start, end in iter_fexpr_spans(fstring):
1290         parts.append(fstring[previous_index:start])
1291         parts.append(fstring[start:end].replace(old_quote, new_quote))
1292         previous_index = end
1293     parts.append(fstring[previous_index:])
1294     return "".join(parts)
1295
1296
1297 class StringSplitter(BaseStringSplitter, CustomSplitMapMixin):
1298     """
1299     StringTransformer that splits "atom" strings (i.e. strings which exist on
1300     lines by themselves).
1301
1302     Requirements:
1303         * The line consists ONLY of a single string (possibly prefixed by a
1304           string operator [e.g. '+' or '==']), MAYBE a string trailer, and MAYBE
1305           a trailing comma.
1306           AND
1307         * All of the requirements listed in BaseStringSplitter's docstring.
1308
1309     Transformations:
1310         The string mentioned in the 'Requirements' section is split into as
1311         many substrings as necessary to adhere to the configured line length.
1312
1313         In the final set of substrings, no substring should be smaller than
1314         MIN_SUBSTR_SIZE characters.
1315
1316         The string will ONLY be split on spaces (i.e. each new substring should
1317         start with a space). Note that the string will NOT be split on a space
1318         which is escaped with a backslash.
1319
1320         If the string is an f-string, it will NOT be split in the middle of an
1321         f-expression (e.g. in f"FooBar: {foo() if x else bar()}", {foo() if x
1322         else bar()} is an f-expression).
1323
1324         If the string that is being split has an associated set of custom split
1325         records and those custom splits will NOT result in any line going over
1326         the configured line length, those custom splits are used. Otherwise the
1327         string is split as late as possible (from left-to-right) while still
1328         adhering to the transformation rules listed above.
1329
1330     Collaborations:
1331         StringSplitter relies on StringMerger to construct the appropriate
1332         CustomSplit objects and add them to the custom split map.
1333     """
1334
1335     MIN_SUBSTR_SIZE: Final = 6
1336
1337     def do_splitter_match(self, line: Line) -> TMatchResult:
1338         LL = line.leaves
1339
1340         if self._prefer_paren_wrap_match(LL) is not None:
1341             return TErr("Line needs to be wrapped in parens first.")
1342
1343         is_valid_index = is_valid_index_factory(LL)
1344
1345         idx = 0
1346
1347         # The first two leaves MAY be the 'not in' keywords...
1348         if (
1349             is_valid_index(idx)
1350             and is_valid_index(idx + 1)
1351             and [LL[idx].type, LL[idx + 1].type] == [token.NAME, token.NAME]
1352             and str(LL[idx]) + str(LL[idx + 1]) == "not in"
1353         ):
1354             idx += 2
1355         # Else the first leaf MAY be a string operator symbol or the 'in' keyword...
1356         elif is_valid_index(idx) and (
1357             LL[idx].type in self.STRING_OPERATORS
1358             or LL[idx].type == token.NAME
1359             and str(LL[idx]) == "in"
1360         ):
1361             idx += 1
1362
1363         # The next/first leaf MAY be an empty LPAR...
1364         if is_valid_index(idx) and is_empty_lpar(LL[idx]):
1365             idx += 1
1366
1367         # The next/first leaf MUST be a string...
1368         if not is_valid_index(idx) or LL[idx].type != token.STRING:
1369             return TErr("Line does not start with a string.")
1370
1371         string_idx = idx
1372
1373         # Skip the string trailer, if one exists.
1374         string_parser = StringParser()
1375         idx = string_parser.parse(LL, string_idx)
1376
1377         # That string MAY be followed by an empty RPAR...
1378         if is_valid_index(idx) and is_empty_rpar(LL[idx]):
1379             idx += 1
1380
1381         # That string / empty RPAR leaf MAY be followed by a comma...
1382         if is_valid_index(idx) and LL[idx].type == token.COMMA:
1383             idx += 1
1384
1385         # But no more leaves are allowed...
1386         if is_valid_index(idx):
1387             return TErr("This line does not end with a string.")
1388
1389         return Ok([string_idx])
1390
1391     def do_transform(
1392         self, line: Line, string_indices: List[int]
1393     ) -> Iterator[TResult[Line]]:
1394         LL = line.leaves
1395         assert len(string_indices) == 1, (
1396             f"{self.__class__.__name__} should only find one match at a time, found"
1397             f" {len(string_indices)}"
1398         )
1399         string_idx = string_indices[0]
1400
1401         QUOTE = LL[string_idx].value[-1]
1402
1403         is_valid_index = is_valid_index_factory(LL)
1404         insert_str_child = insert_str_child_factory(LL[string_idx])
1405
1406         prefix = get_string_prefix(LL[string_idx].value).lower()
1407
1408         # We MAY choose to drop the 'f' prefix from substrings that don't
1409         # contain any f-expressions, but ONLY if the original f-string
1410         # contains at least one f-expression. Otherwise, we will alter the AST
1411         # of the program.
1412         drop_pointless_f_prefix = ("f" in prefix) and fstring_contains_expr(
1413             LL[string_idx].value
1414         )
1415
1416         first_string_line = True
1417
1418         string_op_leaves = self._get_string_operator_leaves(LL)
1419         string_op_leaves_length = (
1420             sum(len(str(prefix_leaf)) for prefix_leaf in string_op_leaves) + 1
1421             if string_op_leaves
1422             else 0
1423         )
1424
1425         def maybe_append_string_operators(new_line: Line) -> None:
1426             """
1427             Side Effects:
1428                 If @line starts with a string operator and this is the first
1429                 line we are constructing, this function appends the string
1430                 operator to @new_line and replaces the old string operator leaf
1431                 in the node structure. Otherwise this function does nothing.
1432             """
1433             maybe_prefix_leaves = string_op_leaves if first_string_line else []
1434             for i, prefix_leaf in enumerate(maybe_prefix_leaves):
1435                 replace_child(LL[i], prefix_leaf)
1436                 new_line.append(prefix_leaf)
1437
1438         ends_with_comma = (
1439             is_valid_index(string_idx + 1) and LL[string_idx + 1].type == token.COMMA
1440         )
1441
1442         def max_last_string_column() -> int:
1443             """
1444             Returns:
1445                 The max allowed width of the string value used for the last
1446                 line we will construct.  Note that this value means the width
1447                 rather than the number of characters (e.g., many East Asian
1448                 characters expand to two columns).
1449             """
1450             result = self.line_length
1451             result -= line.depth * 4
1452             result -= 1 if ends_with_comma else 0
1453             result -= string_op_leaves_length
1454             return result
1455
1456         # --- Calculate Max Break Width (for string value)
1457         # We start with the line length limit
1458         max_break_width = self.line_length
1459         # The last index of a string of length N is N-1.
1460         max_break_width -= 1
1461         # Leading whitespace is not present in the string value (e.g. Leaf.value).
1462         max_break_width -= line.depth * 4
1463         if max_break_width < 0:
1464             yield TErr(
1465                 f"Unable to split {LL[string_idx].value} at such high of a line depth:"
1466                 f" {line.depth}"
1467             )
1468             return
1469
1470         # Check if StringMerger registered any custom splits.
1471         custom_splits = self.pop_custom_splits(LL[string_idx].value)
1472         # We use them ONLY if none of them would produce lines that exceed the
1473         # line limit.
1474         use_custom_breakpoints = bool(
1475             custom_splits
1476             and all(csplit.break_idx <= max_break_width for csplit in custom_splits)
1477         )
1478
1479         # Temporary storage for the remaining chunk of the string line that
1480         # can't fit onto the line currently being constructed.
1481         rest_value = LL[string_idx].value
1482
1483         def more_splits_should_be_made() -> bool:
1484             """
1485             Returns:
1486                 True iff `rest_value` (the remaining string value from the last
1487                 split), should be split again.
1488             """
1489             if use_custom_breakpoints:
1490                 return len(custom_splits) > 1
1491             else:
1492                 return str_width(rest_value) > max_last_string_column()
1493
1494         string_line_results: List[Ok[Line]] = []
1495         while more_splits_should_be_made():
1496             if use_custom_breakpoints:
1497                 # Custom User Split (manual)
1498                 csplit = custom_splits.pop(0)
1499                 break_idx = csplit.break_idx
1500             else:
1501                 # Algorithmic Split (automatic)
1502                 max_bidx = (
1503                     count_chars_in_width(rest_value, max_break_width)
1504                     - string_op_leaves_length
1505                 )
1506                 maybe_break_idx = self._get_break_idx(rest_value, max_bidx)
1507                 if maybe_break_idx is None:
1508                     # If we are unable to algorithmically determine a good split
1509                     # and this string has custom splits registered to it, we
1510                     # fall back to using them--which means we have to start
1511                     # over from the beginning.
1512                     if custom_splits:
1513                         rest_value = LL[string_idx].value
1514                         string_line_results = []
1515                         first_string_line = True
1516                         use_custom_breakpoints = True
1517                         continue
1518
1519                     # Otherwise, we stop splitting here.
1520                     break
1521
1522                 break_idx = maybe_break_idx
1523
1524             # --- Construct `next_value`
1525             next_value = rest_value[:break_idx] + QUOTE
1526
1527             # HACK: The following 'if' statement is a hack to fix the custom
1528             # breakpoint index in the case of either: (a) substrings that were
1529             # f-strings but will have the 'f' prefix removed OR (b) substrings
1530             # that were not f-strings but will now become f-strings because of
1531             # redundant use of the 'f' prefix (i.e. none of the substrings
1532             # contain f-expressions but one or more of them had the 'f' prefix
1533             # anyway; in which case, we will prepend 'f' to _all_ substrings).
1534             #
1535             # There is probably a better way to accomplish what is being done
1536             # here...
1537             #
1538             # If this substring is an f-string, we _could_ remove the 'f'
1539             # prefix, and the current custom split did NOT originally use a
1540             # prefix...
1541             if (
1542                 use_custom_breakpoints
1543                 and not csplit.has_prefix
1544                 and (
1545                     # `next_value == prefix + QUOTE` happens when the custom
1546                     # split is an empty string.
1547                     next_value == prefix + QUOTE
1548                     or next_value != self._normalize_f_string(next_value, prefix)
1549                 )
1550             ):
1551                 # Then `csplit.break_idx` will be off by one after removing
1552                 # the 'f' prefix.
1553                 break_idx += 1
1554                 next_value = rest_value[:break_idx] + QUOTE
1555
1556             if drop_pointless_f_prefix:
1557                 next_value = self._normalize_f_string(next_value, prefix)
1558
1559             # --- Construct `next_leaf`
1560             next_leaf = Leaf(token.STRING, next_value)
1561             insert_str_child(next_leaf)
1562             self._maybe_normalize_string_quotes(next_leaf)
1563
1564             # --- Construct `next_line`
1565             next_line = line.clone()
1566             maybe_append_string_operators(next_line)
1567             next_line.append(next_leaf)
1568             string_line_results.append(Ok(next_line))
1569
1570             rest_value = prefix + QUOTE + rest_value[break_idx:]
1571             first_string_line = False
1572
1573         yield from string_line_results
1574
1575         if drop_pointless_f_prefix:
1576             rest_value = self._normalize_f_string(rest_value, prefix)
1577
1578         rest_leaf = Leaf(token.STRING, rest_value)
1579         insert_str_child(rest_leaf)
1580
1581         # NOTE: I could not find a test case that verifies that the following
1582         # line is actually necessary, but it seems to be. Otherwise we risk
1583         # not normalizing the last substring, right?
1584         self._maybe_normalize_string_quotes(rest_leaf)
1585
1586         last_line = line.clone()
1587         maybe_append_string_operators(last_line)
1588
1589         # If there are any leaves to the right of the target string...
1590         if is_valid_index(string_idx + 1):
1591             # We use `temp_value` here to determine how long the last line
1592             # would be if we were to append all the leaves to the right of the
1593             # target string to the last string line.
1594             temp_value = rest_value
1595             for leaf in LL[string_idx + 1 :]:
1596                 temp_value += str(leaf)
1597                 if leaf.type == token.LPAR:
1598                     break
1599
1600             # Try to fit them all on the same line with the last substring...
1601             if (
1602                 str_width(temp_value) <= max_last_string_column()
1603                 or LL[string_idx + 1].type == token.COMMA
1604             ):
1605                 last_line.append(rest_leaf)
1606                 append_leaves(last_line, line, LL[string_idx + 1 :])
1607                 yield Ok(last_line)
1608             # Otherwise, place the last substring on one line and everything
1609             # else on a line below that...
1610             else:
1611                 last_line.append(rest_leaf)
1612                 yield Ok(last_line)
1613
1614                 non_string_line = line.clone()
1615                 append_leaves(non_string_line, line, LL[string_idx + 1 :])
1616                 yield Ok(non_string_line)
1617         # Else the target string was the last leaf...
1618         else:
1619             last_line.append(rest_leaf)
1620             last_line.comments = line.comments.copy()
1621             yield Ok(last_line)
1622
1623     def _iter_nameescape_slices(self, string: str) -> Iterator[Tuple[Index, Index]]:
1624         """
1625         Yields:
1626             All ranges of @string which, if @string were to be split there,
1627             would result in the splitting of an \\N{...} expression (which is NOT
1628             allowed).
1629         """
1630         # True - the previous backslash was unescaped
1631         # False - the previous backslash was escaped *or* there was no backslash
1632         previous_was_unescaped_backslash = False
1633         it = iter(enumerate(string))
1634         for idx, c in it:
1635             if c == "\\":
1636                 previous_was_unescaped_backslash = not previous_was_unescaped_backslash
1637                 continue
1638             if not previous_was_unescaped_backslash or c != "N":
1639                 previous_was_unescaped_backslash = False
1640                 continue
1641             previous_was_unescaped_backslash = False
1642
1643             begin = idx - 1  # the position of backslash before \N{...}
1644             for idx, c in it:
1645                 if c == "}":
1646                     end = idx
1647                     break
1648             else:
1649                 # malformed nameescape expression?
1650                 # should have been detected by AST parsing earlier...
1651                 raise RuntimeError(f"{self.__class__.__name__} LOGIC ERROR!")
1652             yield begin, end
1653
1654     def _iter_fexpr_slices(self, string: str) -> Iterator[Tuple[Index, Index]]:
1655         """
1656         Yields:
1657             All ranges of @string which, if @string were to be split there,
1658             would result in the splitting of an f-expression (which is NOT
1659             allowed).
1660         """
1661         if "f" not in get_string_prefix(string).lower():
1662             return
1663         yield from iter_fexpr_spans(string)
1664
1665     def _get_illegal_split_indices(self, string: str) -> Set[Index]:
1666         illegal_indices: Set[Index] = set()
1667         iterators = [
1668             self._iter_fexpr_slices(string),
1669             self._iter_nameescape_slices(string),
1670         ]
1671         for it in iterators:
1672             for begin, end in it:
1673                 illegal_indices.update(range(begin, end + 1))
1674         return illegal_indices
1675
1676     def _get_break_idx(self, string: str, max_break_idx: int) -> Optional[int]:
1677         """
1678         This method contains the algorithm that StringSplitter uses to
1679         determine which character to split each string at.
1680
1681         Args:
1682             @string: The substring that we are attempting to split.
1683             @max_break_idx: The ideal break index. We will return this value if it
1684             meets all the necessary conditions. In the likely event that it
1685             doesn't we will try to find the closest index BELOW @max_break_idx
1686             that does. If that fails, we will expand our search by also
1687             considering all valid indices ABOVE @max_break_idx.
1688
1689         Pre-Conditions:
1690             * assert_is_leaf_string(@string)
1691             * 0 <= @max_break_idx < len(@string)
1692
1693         Returns:
1694             break_idx, if an index is able to be found that meets all of the
1695             conditions listed in the 'Transformations' section of this classes'
1696             docstring.
1697                 OR
1698             None, otherwise.
1699         """
1700         is_valid_index = is_valid_index_factory(string)
1701
1702         assert is_valid_index(max_break_idx)
1703         assert_is_leaf_string(string)
1704
1705         _illegal_split_indices = self._get_illegal_split_indices(string)
1706
1707         def breaks_unsplittable_expression(i: Index) -> bool:
1708             """
1709             Returns:
1710                 True iff returning @i would result in the splitting of an
1711                 unsplittable expression (which is NOT allowed).
1712             """
1713             return i in _illegal_split_indices
1714
1715         def passes_all_checks(i: Index) -> bool:
1716             """
1717             Returns:
1718                 True iff ALL of the conditions listed in the 'Transformations'
1719                 section of this classes' docstring would be be met by returning @i.
1720             """
1721             is_space = string[i] == " "
1722             is_split_safe = is_valid_index(i - 1) and string[i - 1] in SPLIT_SAFE_CHARS
1723
1724             is_not_escaped = True
1725             j = i - 1
1726             while is_valid_index(j) and string[j] == "\\":
1727                 is_not_escaped = not is_not_escaped
1728                 j -= 1
1729
1730             is_big_enough = (
1731                 len(string[i:]) >= self.MIN_SUBSTR_SIZE
1732                 and len(string[:i]) >= self.MIN_SUBSTR_SIZE
1733             )
1734             return (
1735                 (is_space or is_split_safe)
1736                 and is_not_escaped
1737                 and is_big_enough
1738                 and not breaks_unsplittable_expression(i)
1739             )
1740
1741         # First, we check all indices BELOW @max_break_idx.
1742         break_idx = max_break_idx
1743         while is_valid_index(break_idx - 1) and not passes_all_checks(break_idx):
1744             break_idx -= 1
1745
1746         if not passes_all_checks(break_idx):
1747             # If that fails, we check all indices ABOVE @max_break_idx.
1748             #
1749             # If we are able to find a valid index here, the next line is going
1750             # to be longer than the specified line length, but it's probably
1751             # better than doing nothing at all.
1752             break_idx = max_break_idx + 1
1753             while is_valid_index(break_idx + 1) and not passes_all_checks(break_idx):
1754                 break_idx += 1
1755
1756             if not is_valid_index(break_idx) or not passes_all_checks(break_idx):
1757                 return None
1758
1759         return break_idx
1760
1761     def _maybe_normalize_string_quotes(self, leaf: Leaf) -> None:
1762         if self.normalize_strings:
1763             leaf.value = normalize_string_quotes(leaf.value)
1764
1765     def _normalize_f_string(self, string: str, prefix: str) -> str:
1766         """
1767         Pre-Conditions:
1768             * assert_is_leaf_string(@string)
1769
1770         Returns:
1771             * If @string is an f-string that contains no f-expressions, we
1772             return a string identical to @string except that the 'f' prefix
1773             has been stripped and all double braces (i.e. '{{' or '}}') have
1774             been normalized (i.e. turned into '{' or '}').
1775                 OR
1776             * Otherwise, we return @string.
1777         """
1778         assert_is_leaf_string(string)
1779
1780         if "f" in prefix and not fstring_contains_expr(string):
1781             new_prefix = prefix.replace("f", "")
1782
1783             temp = string[len(prefix) :]
1784             temp = re.sub(r"\{\{", "{", temp)
1785             temp = re.sub(r"\}\}", "}", temp)
1786             new_string = temp
1787
1788             return f"{new_prefix}{new_string}"
1789         else:
1790             return string
1791
1792     def _get_string_operator_leaves(self, leaves: Iterable[Leaf]) -> List[Leaf]:
1793         LL = list(leaves)
1794
1795         string_op_leaves = []
1796         i = 0
1797         while LL[i].type in self.STRING_OPERATORS + [token.NAME]:
1798             prefix_leaf = Leaf(LL[i].type, str(LL[i]).strip())
1799             string_op_leaves.append(prefix_leaf)
1800             i += 1
1801         return string_op_leaves
1802
1803
1804 class StringParenWrapper(BaseStringSplitter, CustomSplitMapMixin):
1805     """
1806     StringTransformer that wraps strings in parens and then splits at the LPAR.
1807
1808     Requirements:
1809         All of the requirements listed in BaseStringSplitter's docstring in
1810         addition to the requirements listed below:
1811
1812         * The line is a return/yield statement, which returns/yields a string.
1813           OR
1814         * The line is part of a ternary expression (e.g. `x = y if cond else
1815           z`) such that the line starts with `else <string>`, where <string> is
1816           some string.
1817           OR
1818         * The line is an assert statement, which ends with a string.
1819           OR
1820         * The line is an assignment statement (e.g. `x = <string>` or `x +=
1821           <string>`) such that the variable is being assigned the value of some
1822           string.
1823           OR
1824         * The line is a dictionary key assignment where some valid key is being
1825           assigned the value of some string.
1826           OR
1827         * The line is an lambda expression and the value is a string.
1828           OR
1829         * The line starts with an "atom" string that prefers to be wrapped in
1830           parens. It's preferred to be wrapped when it's is an immediate child of
1831           a list/set/tuple literal, AND the string is surrounded by commas (or is
1832           the first/last child).
1833
1834     Transformations:
1835         The chosen string is wrapped in parentheses and then split at the LPAR.
1836
1837         We then have one line which ends with an LPAR and another line that
1838         starts with the chosen string. The latter line is then split again at
1839         the RPAR. This results in the RPAR (and possibly a trailing comma)
1840         being placed on its own line.
1841
1842         NOTE: If any leaves exist to the right of the chosen string (except
1843         for a trailing comma, which would be placed after the RPAR), those
1844         leaves are placed inside the parentheses.  In effect, the chosen
1845         string is not necessarily being "wrapped" by parentheses. We can,
1846         however, count on the LPAR being placed directly before the chosen
1847         string.
1848
1849         In other words, StringParenWrapper creates "atom" strings. These
1850         can then be split again by StringSplitter, if necessary.
1851
1852     Collaborations:
1853         In the event that a string line split by StringParenWrapper is
1854         changed such that it no longer needs to be given its own line,
1855         StringParenWrapper relies on StringParenStripper to clean up the
1856         parentheses it created.
1857
1858         For "atom" strings that prefers to be wrapped in parens, it requires
1859         StringSplitter to hold the split until the string is wrapped in parens.
1860     """
1861
1862     def do_splitter_match(self, line: Line) -> TMatchResult:
1863         LL = line.leaves
1864
1865         if line.leaves[-1].type in OPENING_BRACKETS:
1866             return TErr(
1867                 "Cannot wrap parens around a line that ends in an opening bracket."
1868             )
1869
1870         string_idx = (
1871             self._return_match(LL)
1872             or self._else_match(LL)
1873             or self._assert_match(LL)
1874             or self._assign_match(LL)
1875             or self._dict_or_lambda_match(LL)
1876             or self._prefer_paren_wrap_match(LL)
1877         )
1878
1879         if string_idx is not None:
1880             string_value = line.leaves[string_idx].value
1881             # If the string has neither spaces nor East Asian stops...
1882             if not any(
1883                 char == " " or char in SPLIT_SAFE_CHARS for char in string_value
1884             ):
1885                 # And will still violate the line length limit when split...
1886                 max_string_width = self.line_length - ((line.depth + 1) * 4)
1887                 if str_width(string_value) > max_string_width:
1888                     # And has no associated custom splits...
1889                     if not self.has_custom_splits(string_value):
1890                         # Then we should NOT put this string on its own line.
1891                         return TErr(
1892                             "We do not wrap long strings in parentheses when the"
1893                             " resultant line would still be over the specified line"
1894                             " length and can't be split further by StringSplitter."
1895                         )
1896             return Ok([string_idx])
1897
1898         return TErr("This line does not contain any non-atomic strings.")
1899
1900     @staticmethod
1901     def _return_match(LL: List[Leaf]) -> Optional[int]:
1902         """
1903         Returns:
1904             string_idx such that @LL[string_idx] is equal to our target (i.e.
1905             matched) string, if this line matches the return/yield statement
1906             requirements listed in the 'Requirements' section of this classes'
1907             docstring.
1908                 OR
1909             None, otherwise.
1910         """
1911         # If this line is apart of a return/yield statement and the first leaf
1912         # contains either the "return" or "yield" keywords...
1913         if parent_type(LL[0]) in [syms.return_stmt, syms.yield_expr] and LL[
1914             0
1915         ].value in ["return", "yield"]:
1916             is_valid_index = is_valid_index_factory(LL)
1917
1918             idx = 2 if is_valid_index(1) and is_empty_par(LL[1]) else 1
1919             # The next visible leaf MUST contain a string...
1920             if is_valid_index(idx) and LL[idx].type == token.STRING:
1921                 return idx
1922
1923         return None
1924
1925     @staticmethod
1926     def _else_match(LL: List[Leaf]) -> Optional[int]:
1927         """
1928         Returns:
1929             string_idx such that @LL[string_idx] is equal to our target (i.e.
1930             matched) string, if this line matches the ternary expression
1931             requirements listed in the 'Requirements' section of this classes'
1932             docstring.
1933                 OR
1934             None, otherwise.
1935         """
1936         # If this line is apart of a ternary expression and the first leaf
1937         # contains the "else" keyword...
1938         if (
1939             parent_type(LL[0]) == syms.test
1940             and LL[0].type == token.NAME
1941             and LL[0].value == "else"
1942         ):
1943             is_valid_index = is_valid_index_factory(LL)
1944
1945             idx = 2 if is_valid_index(1) and is_empty_par(LL[1]) else 1
1946             # The next visible leaf MUST contain a string...
1947             if is_valid_index(idx) and LL[idx].type == token.STRING:
1948                 return idx
1949
1950         return None
1951
1952     @staticmethod
1953     def _assert_match(LL: List[Leaf]) -> Optional[int]:
1954         """
1955         Returns:
1956             string_idx such that @LL[string_idx] is equal to our target (i.e.
1957             matched) string, if this line matches the assert statement
1958             requirements listed in the 'Requirements' section of this classes'
1959             docstring.
1960                 OR
1961             None, otherwise.
1962         """
1963         # If this line is apart of an assert statement and the first leaf
1964         # contains the "assert" keyword...
1965         if parent_type(LL[0]) == syms.assert_stmt and LL[0].value == "assert":
1966             is_valid_index = is_valid_index_factory(LL)
1967
1968             for i, leaf in enumerate(LL):
1969                 # We MUST find a comma...
1970                 if leaf.type == token.COMMA:
1971                     idx = i + 2 if is_empty_par(LL[i + 1]) else i + 1
1972
1973                     # That comma MUST be followed by a string...
1974                     if is_valid_index(idx) and LL[idx].type == token.STRING:
1975                         string_idx = idx
1976
1977                         # Skip the string trailer, if one exists.
1978                         string_parser = StringParser()
1979                         idx = string_parser.parse(LL, string_idx)
1980
1981                         # But no more leaves are allowed...
1982                         if not is_valid_index(idx):
1983                             return string_idx
1984
1985         return None
1986
1987     @staticmethod
1988     def _assign_match(LL: List[Leaf]) -> Optional[int]:
1989         """
1990         Returns:
1991             string_idx such that @LL[string_idx] is equal to our target (i.e.
1992             matched) string, if this line matches the assignment statement
1993             requirements listed in the 'Requirements' section of this classes'
1994             docstring.
1995                 OR
1996             None, otherwise.
1997         """
1998         # If this line is apart of an expression statement or is a function
1999         # argument AND the first leaf contains a variable name...
2000         if (
2001             parent_type(LL[0]) in [syms.expr_stmt, syms.argument, syms.power]
2002             and LL[0].type == token.NAME
2003         ):
2004             is_valid_index = is_valid_index_factory(LL)
2005
2006             for i, leaf in enumerate(LL):
2007                 # We MUST find either an '=' or '+=' symbol...
2008                 if leaf.type in [token.EQUAL, token.PLUSEQUAL]:
2009                     idx = i + 2 if is_empty_par(LL[i + 1]) else i + 1
2010
2011                     # That symbol MUST be followed by a string...
2012                     if is_valid_index(idx) and LL[idx].type == token.STRING:
2013                         string_idx = idx
2014
2015                         # Skip the string trailer, if one exists.
2016                         string_parser = StringParser()
2017                         idx = string_parser.parse(LL, string_idx)
2018
2019                         # The next leaf MAY be a comma iff this line is apart
2020                         # of a function argument...
2021                         if (
2022                             parent_type(LL[0]) == syms.argument
2023                             and is_valid_index(idx)
2024                             and LL[idx].type == token.COMMA
2025                         ):
2026                             idx += 1
2027
2028                         # But no more leaves are allowed...
2029                         if not is_valid_index(idx):
2030                             return string_idx
2031
2032         return None
2033
2034     @staticmethod
2035     def _dict_or_lambda_match(LL: List[Leaf]) -> Optional[int]:
2036         """
2037         Returns:
2038             string_idx such that @LL[string_idx] is equal to our target (i.e.
2039             matched) string, if this line matches the dictionary key assignment
2040             statement or lambda expression requirements listed in the
2041             'Requirements' section of this classes' docstring.
2042                 OR
2043             None, otherwise.
2044         """
2045         # If this line is a part of a dictionary key assignment or lambda expression...
2046         parent_types = [parent_type(LL[0]), parent_type(LL[0].parent)]
2047         if syms.dictsetmaker in parent_types or syms.lambdef in parent_types:
2048             is_valid_index = is_valid_index_factory(LL)
2049
2050             for i, leaf in enumerate(LL):
2051                 # We MUST find a colon, it can either be dict's or lambda's colon...
2052                 if leaf.type == token.COLON and i < len(LL) - 1:
2053                     idx = i + 2 if is_empty_par(LL[i + 1]) else i + 1
2054
2055                     # That colon MUST be followed by a string...
2056                     if is_valid_index(idx) and LL[idx].type == token.STRING:
2057                         string_idx = idx
2058
2059                         # Skip the string trailer, if one exists.
2060                         string_parser = StringParser()
2061                         idx = string_parser.parse(LL, string_idx)
2062
2063                         # That string MAY be followed by a comma...
2064                         if is_valid_index(idx) and LL[idx].type == token.COMMA:
2065                             idx += 1
2066
2067                         # But no more leaves are allowed...
2068                         if not is_valid_index(idx):
2069                             return string_idx
2070
2071         return None
2072
2073     def do_transform(
2074         self, line: Line, string_indices: List[int]
2075     ) -> Iterator[TResult[Line]]:
2076         LL = line.leaves
2077         assert len(string_indices) == 1, (
2078             f"{self.__class__.__name__} should only find one match at a time, found"
2079             f" {len(string_indices)}"
2080         )
2081         string_idx = string_indices[0]
2082
2083         is_valid_index = is_valid_index_factory(LL)
2084         insert_str_child = insert_str_child_factory(LL[string_idx])
2085
2086         comma_idx = -1
2087         ends_with_comma = False
2088         if LL[comma_idx].type == token.COMMA:
2089             ends_with_comma = True
2090
2091         leaves_to_steal_comments_from = [LL[string_idx]]
2092         if ends_with_comma:
2093             leaves_to_steal_comments_from.append(LL[comma_idx])
2094
2095         # --- First Line
2096         first_line = line.clone()
2097         left_leaves = LL[:string_idx]
2098
2099         # We have to remember to account for (possibly invisible) LPAR and RPAR
2100         # leaves that already wrapped the target string. If these leaves do
2101         # exist, we will replace them with our own LPAR and RPAR leaves.
2102         old_parens_exist = False
2103         if left_leaves and left_leaves[-1].type == token.LPAR:
2104             old_parens_exist = True
2105             leaves_to_steal_comments_from.append(left_leaves[-1])
2106             left_leaves.pop()
2107
2108         append_leaves(first_line, line, left_leaves)
2109
2110         lpar_leaf = Leaf(token.LPAR, "(")
2111         if old_parens_exist:
2112             replace_child(LL[string_idx - 1], lpar_leaf)
2113         else:
2114             insert_str_child(lpar_leaf)
2115         first_line.append(lpar_leaf)
2116
2117         # We throw inline comments that were originally to the right of the
2118         # target string to the top line. They will now be shown to the right of
2119         # the LPAR.
2120         for leaf in leaves_to_steal_comments_from:
2121             for comment_leaf in line.comments_after(leaf):
2122                 first_line.append(comment_leaf, preformatted=True)
2123
2124         yield Ok(first_line)
2125
2126         # --- Middle (String) Line
2127         # We only need to yield one (possibly too long) string line, since the
2128         # `StringSplitter` will break it down further if necessary.
2129         string_value = LL[string_idx].value
2130         string_line = Line(
2131             mode=line.mode,
2132             depth=line.depth + 1,
2133             inside_brackets=True,
2134             should_split_rhs=line.should_split_rhs,
2135             magic_trailing_comma=line.magic_trailing_comma,
2136         )
2137         string_leaf = Leaf(token.STRING, string_value)
2138         insert_str_child(string_leaf)
2139         string_line.append(string_leaf)
2140
2141         old_rpar_leaf = None
2142         if is_valid_index(string_idx + 1):
2143             right_leaves = LL[string_idx + 1 :]
2144             if ends_with_comma:
2145                 right_leaves.pop()
2146
2147             if old_parens_exist:
2148                 assert right_leaves and right_leaves[-1].type == token.RPAR, (
2149                     "Apparently, old parentheses do NOT exist?!"
2150                     f" (left_leaves={left_leaves}, right_leaves={right_leaves})"
2151                 )
2152                 old_rpar_leaf = right_leaves.pop()
2153             elif right_leaves and right_leaves[-1].type == token.RPAR:
2154                 # Special case for lambda expressions as dict's value, e.g.:
2155                 #     my_dict = {
2156                 #        "key": lambda x: f"formatted: {x},
2157                 #     }
2158                 # After wrapping the dict's value with parentheses, the string is
2159                 # followed by a RPAR but its opening bracket is lambda's, not
2160                 # the string's:
2161                 #        "key": (lambda x: f"formatted: {x}),
2162                 opening_bracket = right_leaves[-1].opening_bracket
2163                 if opening_bracket is not None and opening_bracket in left_leaves:
2164                     index = left_leaves.index(opening_bracket)
2165                     if (
2166                         index > 0
2167                         and index < len(left_leaves) - 1
2168                         and left_leaves[index - 1].type == token.COLON
2169                         and left_leaves[index + 1].value == "lambda"
2170                     ):
2171                         right_leaves.pop()
2172
2173             append_leaves(string_line, line, right_leaves)
2174
2175         yield Ok(string_line)
2176
2177         # --- Last Line
2178         last_line = line.clone()
2179         last_line.bracket_tracker = first_line.bracket_tracker
2180
2181         new_rpar_leaf = Leaf(token.RPAR, ")")
2182         if old_rpar_leaf is not None:
2183             replace_child(old_rpar_leaf, new_rpar_leaf)
2184         else:
2185             insert_str_child(new_rpar_leaf)
2186         last_line.append(new_rpar_leaf)
2187
2188         # If the target string ended with a comma, we place this comma to the
2189         # right of the RPAR on the last line.
2190         if ends_with_comma:
2191             comma_leaf = Leaf(token.COMMA, ",")
2192             replace_child(LL[comma_idx], comma_leaf)
2193             last_line.append(comma_leaf)
2194
2195         yield Ok(last_line)
2196
2197
2198 class StringParser:
2199     """
2200     A state machine that aids in parsing a string's "trailer", which can be
2201     either non-existent, an old-style formatting sequence (e.g. `% varX` or `%
2202     (varX, varY)`), or a method-call / attribute access (e.g. `.format(varX,
2203     varY)`).
2204
2205     NOTE: A new StringParser object MUST be instantiated for each string
2206     trailer we need to parse.
2207
2208     Examples:
2209         We shall assume that `line` equals the `Line` object that corresponds
2210         to the following line of python code:
2211         ```
2212         x = "Some {}.".format("String") + some_other_string
2213         ```
2214
2215         Furthermore, we will assume that `string_idx` is some index such that:
2216         ```
2217         assert line.leaves[string_idx].value == "Some {}."
2218         ```
2219
2220         The following code snippet then holds:
2221         ```
2222         string_parser = StringParser()
2223         idx = string_parser.parse(line.leaves, string_idx)
2224         assert line.leaves[idx].type == token.PLUS
2225         ```
2226     """
2227
2228     DEFAULT_TOKEN: Final = 20210605
2229
2230     # String Parser States
2231     START: Final = 1
2232     DOT: Final = 2
2233     NAME: Final = 3
2234     PERCENT: Final = 4
2235     SINGLE_FMT_ARG: Final = 5
2236     LPAR: Final = 6
2237     RPAR: Final = 7
2238     DONE: Final = 8
2239
2240     # Lookup Table for Next State
2241     _goto: Final[Dict[Tuple[ParserState, NodeType], ParserState]] = {
2242         # A string trailer may start with '.' OR '%'.
2243         (START, token.DOT): DOT,
2244         (START, token.PERCENT): PERCENT,
2245         (START, DEFAULT_TOKEN): DONE,
2246         # A '.' MUST be followed by an attribute or method name.
2247         (DOT, token.NAME): NAME,
2248         # A method name MUST be followed by an '(', whereas an attribute name
2249         # is the last symbol in the string trailer.
2250         (NAME, token.LPAR): LPAR,
2251         (NAME, DEFAULT_TOKEN): DONE,
2252         # A '%' symbol can be followed by an '(' or a single argument (e.g. a
2253         # string or variable name).
2254         (PERCENT, token.LPAR): LPAR,
2255         (PERCENT, DEFAULT_TOKEN): SINGLE_FMT_ARG,
2256         # If a '%' symbol is followed by a single argument, that argument is
2257         # the last leaf in the string trailer.
2258         (SINGLE_FMT_ARG, DEFAULT_TOKEN): DONE,
2259         # If present, a ')' symbol is the last symbol in a string trailer.
2260         # (NOTE: LPARS and nested RPARS are not included in this lookup table,
2261         # since they are treated as a special case by the parsing logic in this
2262         # classes' implementation.)
2263         (RPAR, DEFAULT_TOKEN): DONE,
2264     }
2265
2266     def __init__(self) -> None:
2267         self._state = self.START
2268         self._unmatched_lpars = 0
2269
2270     def parse(self, leaves: List[Leaf], string_idx: int) -> int:
2271         """
2272         Pre-conditions:
2273             * @leaves[@string_idx].type == token.STRING
2274
2275         Returns:
2276             The index directly after the last leaf which is apart of the string
2277             trailer, if a "trailer" exists.
2278             OR
2279             @string_idx + 1, if no string "trailer" exists.
2280         """
2281         assert leaves[string_idx].type == token.STRING
2282
2283         idx = string_idx + 1
2284         while idx < len(leaves) and self._next_state(leaves[idx]):
2285             idx += 1
2286         return idx
2287
2288     def _next_state(self, leaf: Leaf) -> bool:
2289         """
2290         Pre-conditions:
2291             * On the first call to this function, @leaf MUST be the leaf that
2292               was directly after the string leaf in question (e.g. if our target
2293               string is `line.leaves[i]` then the first call to this method must
2294               be `line.leaves[i + 1]`).
2295             * On the next call to this function, the leaf parameter passed in
2296               MUST be the leaf directly following @leaf.
2297
2298         Returns:
2299             True iff @leaf is apart of the string's trailer.
2300         """
2301         # We ignore empty LPAR or RPAR leaves.
2302         if is_empty_par(leaf):
2303             return True
2304
2305         next_token = leaf.type
2306         if next_token == token.LPAR:
2307             self._unmatched_lpars += 1
2308
2309         current_state = self._state
2310
2311         # The LPAR parser state is a special case. We will return True until we
2312         # find the matching RPAR token.
2313         if current_state == self.LPAR:
2314             if next_token == token.RPAR:
2315                 self._unmatched_lpars -= 1
2316                 if self._unmatched_lpars == 0:
2317                     self._state = self.RPAR
2318         # Otherwise, we use a lookup table to determine the next state.
2319         else:
2320             # If the lookup table matches the current state to the next
2321             # token, we use the lookup table.
2322             if (current_state, next_token) in self._goto:
2323                 self._state = self._goto[current_state, next_token]
2324             else:
2325                 # Otherwise, we check if a the current state was assigned a
2326                 # default.
2327                 if (current_state, self.DEFAULT_TOKEN) in self._goto:
2328                     self._state = self._goto[current_state, self.DEFAULT_TOKEN]
2329                 # If no default has been assigned, then this parser has a logic
2330                 # error.
2331                 else:
2332                     raise RuntimeError(f"{self.__class__.__name__} LOGIC ERROR!")
2333
2334             if self._state == self.DONE:
2335                 return False
2336
2337         return True
2338
2339
2340 def insert_str_child_factory(string_leaf: Leaf) -> Callable[[LN], None]:
2341     """
2342     Factory for a convenience function that is used to orphan @string_leaf
2343     and then insert multiple new leaves into the same part of the node
2344     structure that @string_leaf had originally occupied.
2345
2346     Examples:
2347         Let `string_leaf = Leaf(token.STRING, '"foo"')` and `N =
2348         string_leaf.parent`. Assume the node `N` has the following
2349         original structure:
2350
2351         Node(
2352             expr_stmt, [
2353                 Leaf(NAME, 'x'),
2354                 Leaf(EQUAL, '='),
2355                 Leaf(STRING, '"foo"'),
2356             ]
2357         )
2358
2359         We then run the code snippet shown below.
2360         ```
2361         insert_str_child = insert_str_child_factory(string_leaf)
2362
2363         lpar = Leaf(token.LPAR, '(')
2364         insert_str_child(lpar)
2365
2366         bar = Leaf(token.STRING, '"bar"')
2367         insert_str_child(bar)
2368
2369         rpar = Leaf(token.RPAR, ')')
2370         insert_str_child(rpar)
2371         ```
2372
2373         After which point, it follows that `string_leaf.parent is None` and
2374         the node `N` now has the following structure:
2375
2376         Node(
2377             expr_stmt, [
2378                 Leaf(NAME, 'x'),
2379                 Leaf(EQUAL, '='),
2380                 Leaf(LPAR, '('),
2381                 Leaf(STRING, '"bar"'),
2382                 Leaf(RPAR, ')'),
2383             ]
2384         )
2385     """
2386     string_parent = string_leaf.parent
2387     string_child_idx = string_leaf.remove()
2388
2389     def insert_str_child(child: LN) -> None:
2390         nonlocal string_child_idx
2391
2392         assert string_parent is not None
2393         assert string_child_idx is not None
2394
2395         string_parent.insert_child(string_child_idx, child)
2396         string_child_idx += 1
2397
2398     return insert_str_child
2399
2400
2401 def is_valid_index_factory(seq: Sequence[Any]) -> Callable[[int], bool]:
2402     """
2403     Examples:
2404         ```
2405         my_list = [1, 2, 3]
2406
2407         is_valid_index = is_valid_index_factory(my_list)
2408
2409         assert is_valid_index(0)
2410         assert is_valid_index(2)
2411
2412         assert not is_valid_index(3)
2413         assert not is_valid_index(-1)
2414         ```
2415     """
2416
2417     def is_valid_index(idx: int) -> bool:
2418         """
2419         Returns:
2420             True iff @idx is positive AND seq[@idx] does NOT raise an
2421             IndexError.
2422         """
2423         return 0 <= idx < len(seq)
2424
2425     return is_valid_index