src/black/trans.py

   1 """
   2 String transformers that can split and merge strings.
   3 """
   4
   5 import re
   6 from abc import ABC, abstractmethod
   7 from collections import defaultdict
   8 from dataclasses import dataclass
   9 from typing import (
  10     Any,
  11     Callable,
  12     ClassVar,
  13     Collection,
  14     Dict,
  15     Final,
  16     Iterable,
  17     Iterator,
  18     List,
  19     Literal,
  20     Optional,
  21     Sequence,
  22     Set,
  23     Tuple,
  24     TypeVar,
  25     Union,
  26 )
  27
  28 from mypy_extensions import trait
  29
  30 from black.comments import contains_pragma_comment
  31 from black.lines import Line, append_leaves
  32 from black.mode import Feature, Mode
  33 from black.nodes import (
  34     CLOSING_BRACKETS,
  35     OPENING_BRACKETS,
  36     STANDALONE_COMMENT,
  37     is_empty_lpar,
  38     is_empty_par,
  39     is_empty_rpar,
  40     is_part_of_annotation,
  41     parent_type,
  42     replace_child,
  43     syms,
  44 )
  45 from black.rusty import Err, Ok, Result
  46 from black.strings import (
  47     assert_is_leaf_string,
  48     count_chars_in_width,
  49     get_string_prefix,
  50     has_triple_quotes,
  51     normalize_string_quotes,
  52     str_width,
  53 )
  54 from blib2to3.pgen2 import token
  55 from blib2to3.pytree import Leaf, Node
  56
  57
  58 class CannotTransform(Exception):
  59     """Base class for errors raised by Transformers."""
  60
  61
  62 # types
  63 T = TypeVar("T")
  64 LN = Union[Leaf, Node]
  65 Transformer = Callable[[Line, Collection[Feature], Mode], Iterator[Line]]
  66 Index = int
  67 NodeType = int
  68 ParserState = int
  69 StringID = int
  70 TResult = Result[T, CannotTransform]  # (T)ransform Result
  71 TMatchResult = TResult[List[Index]]
  72
  73 SPLIT_SAFE_CHARS = frozenset(["\u3001", "\u3002", "\uff0c"])  # East Asian stops
  74
  75
  76 def TErr(err_msg: str) -> Err[CannotTransform]:
  77     """(T)ransform Err
  78
  79     Convenience function used when working with the TResult type.
  80     """
  81     cant_transform = CannotTransform(err_msg)
  82     return Err(cant_transform)
  83
  84
  85 def hug_power_op(
  86     line: Line, features: Collection[Feature], mode: Mode
  87 ) -> Iterator[Line]:
  88     """A transformer which normalizes spacing around power operators."""
  89
  90     # Performance optimization to avoid unnecessary Leaf clones and other ops.
  91     for leaf in line.leaves:
  92         if leaf.type == token.DOUBLESTAR:
  93             break
  94     else:
  95         raise CannotTransform("No doublestar token was found in the line.")
  96
  97     def is_simple_lookup(index: int, step: Literal[1, -1]) -> bool:
  98         # Brackets and parentheses indicate calls, subscripts, etc. ...
  99         # basically stuff that doesn't count as "simple". Only a NAME lookup
 100         # or dotted lookup (eg. NAME.NAME) is OK.
 101         if step == -1:
 102             disallowed = {token.RPAR, token.RSQB}
 103         else:
 104             disallowed = {token.LPAR, token.LSQB}
 105
 106         while 0 <= index < len(line.leaves):
 107             current = line.leaves[index]
 108             if current.type in disallowed:
 109                 return False
 110             if current.type not in {token.NAME, token.DOT} or current.value == "for":
 111                 # If the current token isn't disallowed, we'll assume this is simple as
 112                 # only the disallowed tokens are semantically attached to this lookup
 113                 # expression we're checking. Also, stop early if we hit the 'for' bit
 114                 # of a comprehension.
 115                 return True
 116
 117             index += step
 118
 119         return True
 120
 121     def is_simple_operand(index: int, kind: Literal["base", "exponent"]) -> bool:
 122         # An operand is considered "simple" if's a NAME, a numeric CONSTANT, a simple
 123         # lookup (see above), with or without a preceding unary operator.
 124         start = line.leaves[index]
 125         if start.type in {token.NAME, token.NUMBER}:
 126             return is_simple_lookup(index, step=(1 if kind == "exponent" else -1))
 127
 128         if start.type in {token.PLUS, token.MINUS, token.TILDE}:
 129             if line.leaves[index + 1].type in {token.NAME, token.NUMBER}:
 130                 # step is always one as bases with a preceding unary op will be checked
 131                 # for simplicity starting from the next token (so it'll hit the check
 132                 # above).
 133                 return is_simple_lookup(index + 1, step=1)
 134
 135         return False
 136
 137     new_line = line.clone()
 138     should_hug = False
 139     for idx, leaf in enumerate(line.leaves):
 140         new_leaf = leaf.clone()
 141         if should_hug:
 142             new_leaf.prefix = ""
 143             should_hug = False
 144
 145         should_hug = (
 146             (0 < idx < len(line.leaves) - 1)
 147             and leaf.type == token.DOUBLESTAR
 148             and is_simple_operand(idx - 1, kind="base")
 149             and line.leaves[idx - 1].value != "lambda"
 150             and is_simple_operand(idx + 1, kind="exponent")
 151         )
 152         if should_hug:
 153             new_leaf.prefix = ""
 154
 155         # We have to be careful to make a new line properly:
 156         # - bracket related metadata must be maintained (handled by Line.append)
 157         # - comments need to copied over, updating the leaf IDs they're attached to
 158         new_line.append(new_leaf, preformatted=True)
 159         for comment_leaf in line.comments_after(leaf):
 160             new_line.append(comment_leaf, preformatted=True)
 161
 162     yield new_line
 163
 164
 165 class StringTransformer(ABC):
 166     """
 167     An implementation of the Transformer protocol that relies on its
 168     subclasses overriding the template methods `do_match(...)` and
 169     `do_transform(...)`.
 170
 171     This Transformer works exclusively on strings (for example, by merging
 172     or splitting them).
 173
 174     The following sections can be found among the docstrings of each concrete
 175     StringTransformer subclass.
 176
 177     Requirements:
 178         Which requirements must be met of the given Line for this
 179         StringTransformer to be applied?
 180
 181     Transformations:
 182         If the given Line meets all of the above requirements, which string
 183         transformations can you expect to be applied to it by this
 184         StringTransformer?
 185
 186     Collaborations:
 187         What contractual agreements does this StringTransformer have with other
 188         StringTransfomers? Such collaborations should be eliminated/minimized
 189         as much as possible.
 190     """
 191
 192     __name__: Final = "StringTransformer"
 193
 194     # Ideally this would be a dataclass, but unfortunately mypyc breaks when used with
 195     # `abc.ABC`.
 196     def __init__(self, line_length: int, normalize_strings: bool) -> None:
 197         self.line_length = line_length
 198         self.normalize_strings = normalize_strings
 199
 200     @abstractmethod
 201     def do_match(self, line: Line) -> TMatchResult:
 202         """
 203         Returns:
 204             * Ok(string_indices) such that for each index, `line.leaves[index]`
 205               is our target string if a match was able to be made. For
 206               transformers that don't result in more lines (e.g. StringMerger,
 207               StringParenStripper), multiple matches and transforms are done at
 208               once to reduce the complexity.
 209               OR
 210             * Err(CannotTransform), if no match could be made.
 211         """
 212
 213     @abstractmethod
 214     def do_transform(
 215         self, line: Line, string_indices: List[int]
 216     ) -> Iterator[TResult[Line]]:
 217         """
 218         Yields:
 219             * Ok(new_line) where new_line is the new transformed line.
 220               OR
 221             * Err(CannotTransform) if the transformation failed for some reason. The
 222               `do_match(...)` template method should usually be used to reject
 223               the form of the given Line, but in some cases it is difficult to
 224               know whether or not a Line meets the StringTransformer's
 225               requirements until the transformation is already midway.
 226
 227         Side Effects:
 228             This method should NOT mutate @line directly, but it MAY mutate the
 229             Line's underlying Node structure. (WARNING: If the underlying Node
 230             structure IS altered, then this method should NOT be allowed to
 231             yield an CannotTransform after that point.)
 232         """
 233
 234     def __call__(
 235         self, line: Line, _features: Collection[Feature], _mode: Mode
 236     ) -> Iterator[Line]:
 237         """
 238         StringTransformer instances have a call signature that mirrors that of
 239         the Transformer type.
 240
 241         Raises:
 242             CannotTransform(...) if the concrete StringTransformer class is unable
 243             to transform @line.
 244         """
 245         # Optimization to avoid calling `self.do_match(...)` when the line does
 246         # not contain any string.
 247         if not any(leaf.type == token.STRING for leaf in line.leaves):
 248             raise CannotTransform("There are no strings in this line.")
 249
 250         match_result = self.do_match(line)
 251
 252         if isinstance(match_result, Err):
 253             cant_transform = match_result.err()
 254             raise CannotTransform(
 255                 f"The string transformer {self.__class__.__name__} does not recognize"
 256                 " this line as one that it can transform."
 257             ) from cant_transform
 258
 259         string_indices = match_result.ok()
 260
 261         for line_result in self.do_transform(line, string_indices):
 262             if isinstance(line_result, Err):
 263                 cant_transform = line_result.err()
 264                 raise CannotTransform(
 265                     "StringTransformer failed while attempting to transform string."
 266                 ) from cant_transform
 267             line = line_result.ok()
 268             yield line
 269
 270
 271 @dataclass
 272 class CustomSplit:
 273     """A custom (i.e. manual) string split.
 274
 275     A single CustomSplit instance represents a single substring.
 276
 277     Examples:
 278         Consider the following string:
 279         ```
 280         "Hi there friend."
 281         " This is a custom"
 282         f" string {split}."
 283         ```
 284
 285         This string will correspond to the following three CustomSplit instances:
 286         ```
 287         CustomSplit(False, 16)
 288         CustomSplit(False, 17)
 289         CustomSplit(True, 16)
 290         ```
 291     """
 292
 293     has_prefix: bool
 294     break_idx: int
 295
 296
 297 @trait
 298 class CustomSplitMapMixin:
 299     """
 300     This mixin class is used to map merged strings to a sequence of
 301     CustomSplits, which will then be used to re-split the strings iff none of
 302     the resultant substrings go over the configured max line length.
 303     """
 304
 305     _Key: ClassVar = Tuple[StringID, str]
 306     _CUSTOM_SPLIT_MAP: ClassVar[Dict[_Key, Tuple[CustomSplit, ...]]] = defaultdict(
 307         tuple
 308     )
 309
 310     @staticmethod
 311     def _get_key(string: str) -> "CustomSplitMapMixin._Key":
 312         """
 313         Returns:
 314             A unique identifier that is used internally to map @string to a
 315             group of custom splits.
 316         """
 317         return (id(string), string)
 318
 319     def add_custom_splits(
 320         self, string: str, custom_splits: Iterable[CustomSplit]
 321     ) -> None:
 322         """Custom Split Map Setter Method
 323
 324         Side Effects:
 325             Adds a mapping from @string to the custom splits @custom_splits.
 326         """
 327         key = self._get_key(string)
 328         self._CUSTOM_SPLIT_MAP[key] = tuple(custom_splits)
 329
 330     def pop_custom_splits(self, string: str) -> List[CustomSplit]:
 331         """Custom Split Map Getter Method
 332
 333         Returns:
 334             * A list of the custom splits that are mapped to @string, if any
 335               exist.
 336               OR
 337             * [], otherwise.
 338
 339         Side Effects:
 340             Deletes the mapping between @string and its associated custom
 341             splits (which are returned to the caller).
 342         """
 343         key = self._get_key(string)
 344
 345         custom_splits = self._CUSTOM_SPLIT_MAP[key]
 346         del self._CUSTOM_SPLIT_MAP[key]
 347
 348         return list(custom_splits)
 349
 350     def has_custom_splits(self, string: str) -> bool:
 351         """
 352         Returns:
 353             True iff @string is associated with a set of custom splits.
 354         """
 355         key = self._get_key(string)
 356         return key in self._CUSTOM_SPLIT_MAP
 357
 358
 359 class StringMerger(StringTransformer, CustomSplitMapMixin):
 360     """StringTransformer that merges strings together.
 361
 362     Requirements:
 363         (A) The line contains adjacent strings such that ALL of the validation checks
 364         listed in StringMerger._validate_msg(...)'s docstring pass.
 365         OR
 366         (B) The line contains a string which uses line continuation backslashes.
 367
 368     Transformations:
 369         Depending on which of the two requirements above where met, either:
 370
 371         (A) The string group associated with the target string is merged.
 372         OR
 373         (B) All line-continuation backslashes are removed from the target string.
 374
 375     Collaborations:
 376         StringMerger provides custom split information to StringSplitter.
 377     """
 378
 379     def do_match(self, line: Line) -> TMatchResult:
 380         LL = line.leaves
 381
 382         is_valid_index = is_valid_index_factory(LL)
 383
 384         string_indices = []
 385         idx = 0
 386         while is_valid_index(idx):
 387             leaf = LL[idx]
 388             if (
 389                 leaf.type == token.STRING
 390                 and is_valid_index(idx + 1)
 391                 and LL[idx + 1].type == token.STRING
 392             ):
 393                 if not is_part_of_annotation(leaf):
 394                     string_indices.append(idx)
 395
 396                 # Advance to the next non-STRING leaf.
 397                 idx += 2
 398                 while is_valid_index(idx) and LL[idx].type == token.STRING:
 399                     idx += 1
 400
 401             elif leaf.type == token.STRING and "\\\n" in leaf.value:
 402                 string_indices.append(idx)
 403                 # Advance to the next non-STRING leaf.
 404                 idx += 1
 405                 while is_valid_index(idx) and LL[idx].type == token.STRING:
 406                     idx += 1
 407
 408             else:
 409                 idx += 1
 410
 411         if string_indices:
 412             return Ok(string_indices)
 413         else:
 414             return TErr("This line has no strings that need merging.")
 415
 416     def do_transform(
 417         self, line: Line, string_indices: List[int]
 418     ) -> Iterator[TResult[Line]]:
 419         new_line = line
 420
 421         rblc_result = self._remove_backslash_line_continuation_chars(
 422             new_line, string_indices
 423         )
 424         if isinstance(rblc_result, Ok):
 425             new_line = rblc_result.ok()
 426
 427         msg_result = self._merge_string_group(new_line, string_indices)
 428         if isinstance(msg_result, Ok):
 429             new_line = msg_result.ok()
 430
 431         if isinstance(rblc_result, Err) and isinstance(msg_result, Err):
 432             msg_cant_transform = msg_result.err()
 433             rblc_cant_transform = rblc_result.err()
 434             cant_transform = CannotTransform(
 435                 "StringMerger failed to merge any strings in this line."
 436             )
 437
 438             # Chain the errors together using `__cause__`.
 439             msg_cant_transform.__cause__ = rblc_cant_transform
 440             cant_transform.__cause__ = msg_cant_transform
 441
 442             yield Err(cant_transform)
 443         else:
 444             yield Ok(new_line)
 445
 446     @staticmethod
 447     def _remove_backslash_line_continuation_chars(
 448         line: Line, string_indices: List[int]
 449     ) -> TResult[Line]:
 450         """
 451         Merge strings that were split across multiple lines using
 452         line-continuation backslashes.
 453
 454         Returns:
 455             Ok(new_line), if @line contains backslash line-continuation
 456             characters.
 457                 OR
 458             Err(CannotTransform), otherwise.
 459         """
 460         LL = line.leaves
 461
 462         indices_to_transform = []
 463         for string_idx in string_indices:
 464             string_leaf = LL[string_idx]
 465             if (
 466                 string_leaf.type == token.STRING
 467                 and "\\\n" in string_leaf.value
 468                 and not has_triple_quotes(string_leaf.value)
 469             ):
 470                 indices_to_transform.append(string_idx)
 471
 472         if not indices_to_transform:
 473             return TErr(
 474                 "Found no string leaves that contain backslash line continuation"
 475                 " characters."
 476             )
 477
 478         new_line = line.clone()
 479         new_line.comments = line.comments.copy()
 480         append_leaves(new_line, line, LL)
 481
 482         for string_idx in indices_to_transform:
 483             new_string_leaf = new_line.leaves[string_idx]
 484             new_string_leaf.value = new_string_leaf.value.replace("\\\n", "")
 485
 486         return Ok(new_line)
 487
 488     def _merge_string_group(
 489         self, line: Line, string_indices: List[int]
 490     ) -> TResult[Line]:
 491         """
 492         Merges string groups (i.e. set of adjacent strings).
 493
 494         Each index from `string_indices` designates one string group's first
 495         leaf in `line.leaves`.
 496
 497         Returns:
 498             Ok(new_line), if ALL of the validation checks found in
 499             _validate_msg(...) pass.
 500                 OR
 501             Err(CannotTransform), otherwise.
 502         """
 503         LL = line.leaves
 504
 505         is_valid_index = is_valid_index_factory(LL)
 506
 507         # A dict of {string_idx: tuple[num_of_strings, string_leaf]}.
 508         merged_string_idx_dict: Dict[int, Tuple[int, Leaf]] = {}
 509         for string_idx in string_indices:
 510             vresult = self._validate_msg(line, string_idx)
 511             if isinstance(vresult, Err):
 512                 continue
 513             merged_string_idx_dict[string_idx] = self._merge_one_string_group(
 514                 LL, string_idx, is_valid_index
 515             )
 516
 517         if not merged_string_idx_dict:
 518             return TErr("No string group is merged")
 519
 520         # Build the final line ('new_line') that this method will later return.
 521         new_line = line.clone()
 522         previous_merged_string_idx = -1
 523         previous_merged_num_of_strings = -1
 524         for i, leaf in enumerate(LL):
 525             if i in merged_string_idx_dict:
 526                 previous_merged_string_idx = i
 527                 previous_merged_num_of_strings, string_leaf = merged_string_idx_dict[i]
 528                 new_line.append(string_leaf)
 529
 530             if (
 531                 previous_merged_string_idx
 532                 <= i
 533                 < previous_merged_string_idx + previous_merged_num_of_strings
 534             ):
 535                 for comment_leaf in line.comments_after(LL[i]):
 536                     new_line.append(comment_leaf, preformatted=True)
 537                 continue
 538
 539             append_leaves(new_line, line, [leaf])
 540
 541         return Ok(new_line)
 542
 543     def _merge_one_string_group(
 544         self, LL: List[Leaf], string_idx: int, is_valid_index: Callable[[int], bool]
 545     ) -> Tuple[int, Leaf]:
 546         """
 547         Merges one string group where the first string in the group is
 548         `LL[string_idx]`.
 549
 550         Returns:
 551             A tuple of `(num_of_strings, leaf)` where `num_of_strings` is the
 552             number of strings merged and `leaf` is the newly merged string
 553             to be replaced in the new line.
 554         """
 555         # If the string group is wrapped inside an Atom node, we must make sure
 556         # to later replace that Atom with our new (merged) string leaf.
 557         atom_node = LL[string_idx].parent
 558
 559         # We will place BREAK_MARK in between every two substrings that we
 560         # merge. We will then later go through our final result and use the
 561         # various instances of BREAK_MARK we find to add the right values to
 562         # the custom split map.
 563         BREAK_MARK = "@@@@@ BLACK BREAKPOINT MARKER @@@@@"
 564
 565         QUOTE = LL[string_idx].value[-1]
 566
 567         def make_naked(string: str, string_prefix: str) -> str:
 568             """Strip @string (i.e. make it a "naked" string)
 569
 570             Pre-conditions:
 571                 * assert_is_leaf_string(@string)
 572
 573             Returns:
 574                 A string that is identical to @string except that
 575                 @string_prefix has been stripped, the surrounding QUOTE
 576                 characters have been removed, and any remaining QUOTE
 577                 characters have been escaped.
 578             """
 579             assert_is_leaf_string(string)
 580             if "f" in string_prefix:
 581                 string = _toggle_fexpr_quotes(string, QUOTE)
 582                 # After quotes toggling, quotes in expressions won't be escaped
 583                 # because quotes can't be reused in f-strings. So we can simply
 584                 # let the escaping logic below run without knowing f-string
 585                 # expressions.
 586
 587             RE_EVEN_BACKSLASHES = r"(?:(?<!\\)(?:\\\\)*)"
 588             naked_string = string[len(string_prefix) + 1 : -1]
 589             naked_string = re.sub(
 590                 "(" + RE_EVEN_BACKSLASHES + ")" + QUOTE, r"\1\\" + QUOTE, naked_string
 591             )
 592             return naked_string
 593
 594         # Holds the CustomSplit objects that will later be added to the custom
 595         # split map.
 596         custom_splits = []
 597
 598         # Temporary storage for the 'has_prefix' part of the CustomSplit objects.
 599         prefix_tracker = []
 600
 601         # Sets the 'prefix' variable. This is the prefix that the final merged
 602         # string will have.
 603         next_str_idx = string_idx
 604         prefix = ""
 605         while (
 606             not prefix
 607             and is_valid_index(next_str_idx)
 608             and LL[next_str_idx].type == token.STRING
 609         ):
 610             prefix = get_string_prefix(LL[next_str_idx].value).lower()
 611             next_str_idx += 1
 612
 613         # The next loop merges the string group. The final string will be
 614         # contained in 'S'.
 615         #
 616         # The following convenience variables are used:
 617         #
 618         #   S: string
 619         #   NS: naked string
 620         #   SS: next string
 621         #   NSS: naked next string
 622         S = ""
 623         NS = ""
 624         num_of_strings = 0
 625         next_str_idx = string_idx
 626         while is_valid_index(next_str_idx) and LL[next_str_idx].type == token.STRING:
 627             num_of_strings += 1
 628
 629             SS = LL[next_str_idx].value
 630             next_prefix = get_string_prefix(SS).lower()
 631
 632             # If this is an f-string group but this substring is not prefixed
 633             # with 'f'...
 634             if "f" in prefix and "f" not in next_prefix:
 635                 # Then we must escape any braces contained in this substring.
 636                 SS = re.sub(r"(\{|\})", r"\1\1", SS)
 637
 638             NSS = make_naked(SS, next_prefix)
 639
 640             has_prefix = bool(next_prefix)
 641             prefix_tracker.append(has_prefix)
 642
 643             S = prefix + QUOTE + NS + NSS + BREAK_MARK + QUOTE
 644             NS = make_naked(S, prefix)
 645
 646             next_str_idx += 1
 647
 648         # Take a note on the index of the non-STRING leaf.
 649         non_string_idx = next_str_idx
 650
 651         S_leaf = Leaf(token.STRING, S)
 652         if self.normalize_strings:
 653             S_leaf.value = normalize_string_quotes(S_leaf.value)
 654
 655         # Fill the 'custom_splits' list with the appropriate CustomSplit objects.
 656         temp_string = S_leaf.value[len(prefix) + 1 : -1]
 657         for has_prefix in prefix_tracker:
 658             mark_idx = temp_string.find(BREAK_MARK)
 659             assert (
 660                 mark_idx >= 0
 661             ), "Logic error while filling the custom string breakpoint cache."
 662
 663             temp_string = temp_string[mark_idx + len(BREAK_MARK) :]
 664             breakpoint_idx = mark_idx + (len(prefix) if has_prefix else 0) + 1
 665             custom_splits.append(CustomSplit(has_prefix, breakpoint_idx))
 666
 667         string_leaf = Leaf(token.STRING, S_leaf.value.replace(BREAK_MARK, ""))
 668
 669         if atom_node is not None:
 670             # If not all children of the atom node are merged (this can happen
 671             # when there is a standalone comment in the middle) ...
 672             if non_string_idx - string_idx < len(atom_node.children):
 673                 # We need to replace the old STRING leaves with the new string leaf.
 674                 first_child_idx = LL[string_idx].remove()
 675                 for idx in range(string_idx + 1, non_string_idx):
 676                     LL[idx].remove()
 677                 if first_child_idx is not None:
 678                     atom_node.insert_child(first_child_idx, string_leaf)
 679             else:
 680                 # Else replace the atom node with the new string leaf.
 681                 replace_child(atom_node, string_leaf)
 682
 683         self.add_custom_splits(string_leaf.value, custom_splits)
 684         return num_of_strings, string_leaf
 685
 686     @staticmethod
 687     def _validate_msg(line: Line, string_idx: int) -> TResult[None]:
 688         """Validate (M)erge (S)tring (G)roup
 689
 690         Transform-time string validation logic for _merge_string_group(...).
 691
 692         Returns:
 693             * Ok(None), if ALL validation checks (listed below) pass.
 694                 OR
 695             * Err(CannotTransform), if any of the following are true:
 696                 - The target string group does not contain ANY stand-alone comments.
 697                 - The target string is not in a string group (i.e. it has no
 698                   adjacent strings).
 699                 - The string group has more than one inline comment.
 700                 - The string group has an inline comment that appears to be a pragma.
 701                 - The set of all string prefixes in the string group is of
 702                   length greater than one and is not equal to {"", "f"}.
 703                 - The string group consists of raw strings.
 704                 - The string group is stringified type annotations. We don't want to
 705                   process stringified type annotations since pyright doesn't support
 706                   them spanning multiple string values. (NOTE: mypy, pytype, pyre do
 707                   support them, so we can change if pyright also gains support in the
 708                   future. See https://github.com/microsoft/pyright/issues/4359.)
 709         """
 710         # We first check for "inner" stand-alone comments (i.e. stand-alone
 711         # comments that have a string leaf before them AND after them).
 712         for inc in [1, -1]:
 713             i = string_idx
 714             found_sa_comment = False
 715             is_valid_index = is_valid_index_factory(line.leaves)
 716             while is_valid_index(i) and line.leaves[i].type in [
 717                 token.STRING,
 718                 STANDALONE_COMMENT,
 719             ]:
 720                 if line.leaves[i].type == STANDALONE_COMMENT:
 721                     found_sa_comment = True
 722                 elif found_sa_comment:
 723                     return TErr(
 724                         "StringMerger does NOT merge string groups which contain "
 725                         "stand-alone comments."
 726                     )
 727
 728                 i += inc
 729
 730         num_of_inline_string_comments = 0
 731         set_of_prefixes = set()
 732         num_of_strings = 0
 733         for leaf in line.leaves[string_idx:]:
 734             if leaf.type != token.STRING:
 735                 # If the string group is trailed by a comma, we count the
 736                 # comments trailing the comma to be one of the string group's
 737                 # comments.
 738                 if leaf.type == token.COMMA and id(leaf) in line.comments:
 739                     num_of_inline_string_comments += 1
 740                 break
 741
 742             if has_triple_quotes(leaf.value):
 743                 return TErr("StringMerger does NOT merge multiline strings.")
 744
 745             num_of_strings += 1
 746             prefix = get_string_prefix(leaf.value).lower()
 747             if "r" in prefix:
 748                 return TErr("StringMerger does NOT merge raw strings.")
 749
 750             set_of_prefixes.add(prefix)
 751
 752             if id(leaf) in line.comments:
 753                 num_of_inline_string_comments += 1
 754                 if contains_pragma_comment(line.comments[id(leaf)]):
 755                     return TErr("Cannot merge strings which have pragma comments.")
 756
 757         if num_of_strings < 2:
 758             return TErr(
 759                 f"Not enough strings to merge (num_of_strings={num_of_strings})."
 760             )
 761
 762         if num_of_inline_string_comments > 1:
 763             return TErr(
 764                 f"Too many inline string comments ({num_of_inline_string_comments})."
 765             )
 766
 767         if len(set_of_prefixes) > 1 and set_of_prefixes != {"", "f"}:
 768             return TErr(f"Too many different prefixes ({set_of_prefixes}).")
 769
 770         return Ok(None)
 771
 772
 773 class StringParenStripper(StringTransformer):
 774     """StringTransformer that strips surrounding parentheses from strings.
 775
 776     Requirements:
 777         The line contains a string which is surrounded by parentheses and:
 778             - The target string is NOT the only argument to a function call.
 779             - The target string is NOT a "pointless" string.
 780             - If the target string contains a PERCENT, the brackets are not
 781               preceded or followed by an operator with higher precedence than
 782               PERCENT.
 783
 784     Transformations:
 785         The parentheses mentioned in the 'Requirements' section are stripped.
 786
 787     Collaborations:
 788         StringParenStripper has its own inherent usefulness, but it is also
 789         relied on to clean up the parentheses created by StringParenWrapper (in
 790         the event that they are no longer needed).
 791     """
 792
 793     def do_match(self, line: Line) -> TMatchResult:
 794         LL = line.leaves
 795
 796         is_valid_index = is_valid_index_factory(LL)
 797
 798         string_indices = []
 799
 800         idx = -1
 801         while True:
 802             idx += 1
 803             if idx >= len(LL):
 804                 break
 805             leaf = LL[idx]
 806
 807             # Should be a string...
 808             if leaf.type != token.STRING:
 809                 continue
 810
 811             # If this is a "pointless" string...
 812             if (
 813                 leaf.parent
 814                 and leaf.parent.parent
 815                 and leaf.parent.parent.type == syms.simple_stmt
 816             ):
 817                 continue
 818
 819             # Should be preceded by a non-empty LPAR...
 820             if (
 821                 not is_valid_index(idx - 1)
 822                 or LL[idx - 1].type != token.LPAR
 823                 or is_empty_lpar(LL[idx - 1])
 824             ):
 825                 continue
 826
 827             # That LPAR should NOT be preceded by a function name or a closing
 828             # bracket (which could be a function which returns a function or a
 829             # list/dictionary that contains a function)...
 830             if is_valid_index(idx - 2) and (
 831                 LL[idx - 2].type == token.NAME or LL[idx - 2].type in CLOSING_BRACKETS
 832             ):
 833                 continue
 834
 835             string_idx = idx
 836
 837             # Skip the string trailer, if one exists.
 838             string_parser = StringParser()
 839             next_idx = string_parser.parse(LL, string_idx)
 840
 841             # if the leaves in the parsed string include a PERCENT, we need to
 842             # make sure the initial LPAR is NOT preceded by an operator with
 843             # higher or equal precedence to PERCENT
 844             if is_valid_index(idx - 2):
 845                 # mypy can't quite follow unless we name this
 846                 before_lpar = LL[idx - 2]
 847                 if token.PERCENT in {leaf.type for leaf in LL[idx - 1 : next_idx]} and (
 848                     (
 849                         before_lpar.type
 850                         in {
 851                             token.STAR,
 852                             token.AT,
 853                             token.SLASH,
 854                             token.DOUBLESLASH,
 855                             token.PERCENT,
 856                             token.TILDE,
 857                             token.DOUBLESTAR,
 858                             token.AWAIT,
 859                             token.LSQB,
 860                             token.LPAR,
 861                         }
 862                     )
 863                     or (
 864                         # only unary PLUS/MINUS
 865                         before_lpar.parent
 866                         and before_lpar.parent.type == syms.factor
 867                         and (before_lpar.type in {token.PLUS, token.MINUS})
 868                     )
 869                 ):
 870                     continue
 871
 872             # Should be followed by a non-empty RPAR...
 873             if (
 874                 is_valid_index(next_idx)
 875                 and LL[next_idx].type == token.RPAR
 876                 and not is_empty_rpar(LL[next_idx])
 877             ):
 878                 # That RPAR should NOT be followed by anything with higher
 879                 # precedence than PERCENT
 880                 if is_valid_index(next_idx + 1) and LL[next_idx + 1].type in {
 881                     token.DOUBLESTAR,
 882                     token.LSQB,
 883                     token.LPAR,
 884                     token.DOT,
 885                 }:
 886                     continue
 887
 888                 string_indices.append(string_idx)
 889                 idx = string_idx
 890                 while idx < len(LL) - 1 and LL[idx + 1].type == token.STRING:
 891                     idx += 1
 892
 893         if string_indices:
 894             return Ok(string_indices)
 895         return TErr("This line has no strings wrapped in parens.")
 896
 897     def do_transform(
 898         self, line: Line, string_indices: List[int]
 899     ) -> Iterator[TResult[Line]]:
 900         LL = line.leaves
 901
 902         string_and_rpar_indices: List[int] = []
 903         for string_idx in string_indices:
 904             string_parser = StringParser()
 905             rpar_idx = string_parser.parse(LL, string_idx)
 906
 907             should_transform = True
 908             for leaf in (LL[string_idx - 1], LL[rpar_idx]):
 909                 if line.comments_after(leaf):
 910                     # Should not strip parentheses which have comments attached
 911                     # to them.
 912                     should_transform = False
 913                     break
 914             if should_transform:
 915                 string_and_rpar_indices.extend((string_idx, rpar_idx))
 916
 917         if string_and_rpar_indices:
 918             yield Ok(self._transform_to_new_line(line, string_and_rpar_indices))
 919         else:
 920             yield Err(
 921                 CannotTransform("All string groups have comments attached to them.")
 922             )
 923
 924     def _transform_to_new_line(
 925         self, line: Line, string_and_rpar_indices: List[int]
 926     ) -> Line:
 927         LL = line.leaves
 928
 929         new_line = line.clone()
 930         new_line.comments = line.comments.copy()
 931
 932         previous_idx = -1
 933         # We need to sort the indices, since string_idx and its matching
 934         # rpar_idx may not come in order, e.g. in
 935         # `("outer" % ("inner".join(items)))`, the "inner" string's
 936         # string_idx is smaller than "outer" string's rpar_idx.
 937         for idx in sorted(string_and_rpar_indices):
 938             leaf = LL[idx]
 939             lpar_or_rpar_idx = idx - 1 if leaf.type == token.STRING else idx
 940             append_leaves(new_line, line, LL[previous_idx + 1 : lpar_or_rpar_idx])
 941             if leaf.type == token.STRING:
 942                 string_leaf = Leaf(token.STRING, LL[idx].value)
 943                 LL[lpar_or_rpar_idx].remove()  # Remove lpar.
 944                 replace_child(LL[idx], string_leaf)
 945                 new_line.append(string_leaf)
 946                 # replace comments
 947                 old_comments = new_line.comments.pop(id(LL[idx]), [])
 948                 new_line.comments.setdefault(id(string_leaf), []).extend(old_comments)
 949             else:
 950                 LL[lpar_or_rpar_idx].remove()  # This is a rpar.
 951
 952             previous_idx = idx
 953
 954         # Append the leaves after the last idx:
 955         append_leaves(new_line, line, LL[idx + 1 :])
 956
 957         return new_line
 958
 959
 960 class BaseStringSplitter(StringTransformer):
 961     """
 962     Abstract class for StringTransformers which transform a Line's strings by splitting
 963     them or placing them on their own lines where necessary to avoid going over
 964     the configured line length.
 965
 966     Requirements:
 967         * The target string value is responsible for the line going over the
 968           line length limit. It follows that after all of black's other line
 969           split methods have been exhausted, this line (or one of the resulting
 970           lines after all line splits are performed) would still be over the
 971           line_length limit unless we split this string.
 972           AND
 973
 974         * The target string is NOT a "pointless" string (i.e. a string that has
 975           no parent or siblings).
 976           AND
 977
 978         * The target string is not followed by an inline comment that appears
 979           to be a pragma.
 980           AND
 981
 982         * The target string is not a multiline (i.e. triple-quote) string.
 983     """
 984
 985     STRING_OPERATORS: Final = [
 986         token.EQEQUAL,
 987         token.GREATER,
 988         token.GREATEREQUAL,
 989         token.LESS,
 990         token.LESSEQUAL,
 991         token.NOTEQUAL,
 992         token.PERCENT,
 993         token.PLUS,
 994         token.STAR,
 995     ]
 996
 997     @abstractmethod
 998     def do_splitter_match(self, line: Line) -> TMatchResult:
 999         """
1000         BaseStringSplitter asks its clients to override this method instead of
1001         `StringTransformer.do_match(...)`.
1002
1003         Follows the same protocol as `StringTransformer.do_match(...)`.
1004
1005         Refer to `help(StringTransformer.do_match)` for more information.
1006         """
1007
1008     def do_match(self, line: Line) -> TMatchResult:
1009         match_result = self.do_splitter_match(line)
1010         if isinstance(match_result, Err):
1011             return match_result
1012
1013         string_indices = match_result.ok()
1014         assert len(string_indices) == 1, (
1015             f"{self.__class__.__name__} should only find one match at a time, found"
1016             f" {len(string_indices)}"
1017         )
1018         string_idx = string_indices[0]
1019         vresult = self._validate(line, string_idx)
1020         if isinstance(vresult, Err):
1021             return vresult
1022
1023         return match_result
1024
1025     def _validate(self, line: Line, string_idx: int) -> TResult[None]:
1026         """
1027         Checks that @line meets all of the requirements listed in this classes'
1028         docstring. Refer to `help(BaseStringSplitter)` for a detailed
1029         description of those requirements.
1030
1031         Returns:
1032             * Ok(None), if ALL of the requirements are met.
1033               OR
1034             * Err(CannotTransform), if ANY of the requirements are NOT met.
1035         """
1036         LL = line.leaves
1037
1038         string_leaf = LL[string_idx]
1039
1040         max_string_length = self._get_max_string_length(line, string_idx)
1041         if len(string_leaf.value) <= max_string_length:
1042             return TErr(
1043                 "The string itself is not what is causing this line to be too long."
1044             )
1045
1046         if not string_leaf.parent or [L.type for L in string_leaf.parent.children] == [
1047             token.STRING,
1048             token.NEWLINE,
1049         ]:
1050             return TErr(
1051                 f"This string ({string_leaf.value}) appears to be pointless (i.e. has"
1052                 " no parent)."
1053             )
1054
1055         if id(line.leaves[string_idx]) in line.comments and contains_pragma_comment(
1056             line.comments[id(line.leaves[string_idx])]
1057         ):
1058             return TErr(
1059                 "Line appears to end with an inline pragma comment. Splitting the line"
1060                 " could modify the pragma's behavior."
1061             )
1062
1063         if has_triple_quotes(string_leaf.value):
1064             return TErr("We cannot split multiline strings.")
1065
1066         return Ok(None)
1067
1068     def _get_max_string_length(self, line: Line, string_idx: int) -> int:
1069         """
1070         Calculates the max string length used when attempting to determine
1071         whether or not the target string is responsible for causing the line to
1072         go over the line length limit.
1073
1074         WARNING: This method is tightly coupled to both StringSplitter and
1075         (especially) StringParenWrapper. There is probably a better way to
1076         accomplish what is being done here.
1077
1078         Returns:
1079             max_string_length: such that `line.leaves[string_idx].value >
1080             max_string_length` implies that the target string IS responsible
1081             for causing this line to exceed the line length limit.
1082         """
1083         LL = line.leaves
1084
1085         is_valid_index = is_valid_index_factory(LL)
1086
1087         # We use the shorthand "WMA4" in comments to abbreviate "We must
1088         # account for". When giving examples, we use STRING to mean some/any
1089         # valid string.
1090         #
1091         # Finally, we use the following convenience variables:
1092         #
1093         #   P:  The leaf that is before the target string leaf.
1094         #   N:  The leaf that is after the target string leaf.
1095         #   NN: The leaf that is after N.
1096
1097         # WMA4 the whitespace at the beginning of the line.
1098         offset = line.depth * 4
1099
1100         if is_valid_index(string_idx - 1):
1101             p_idx = string_idx - 1
1102             if (
1103                 LL[string_idx - 1].type == token.LPAR
1104                 and LL[string_idx - 1].value == ""
1105                 and string_idx >= 2
1106             ):
1107                 # If the previous leaf is an empty LPAR placeholder, we should skip it.
1108                 p_idx -= 1
1109
1110             P = LL[p_idx]
1111             if P.type in self.STRING_OPERATORS:
1112                 # WMA4 a space and a string operator (e.g. `+ STRING` or `== STRING`).
1113                 offset += len(str(P)) + 1
1114
1115             if P.type == token.COMMA:
1116                 # WMA4 a space, a comma, and a closing bracket [e.g. `), STRING`].
1117                 offset += 3
1118
1119             if P.type in [token.COLON, token.EQUAL, token.PLUSEQUAL, token.NAME]:
1120                 # This conditional branch is meant to handle dictionary keys,
1121                 # variable assignments, 'return STRING' statement lines, and
1122                 # 'else STRING' ternary expression lines.
1123
1124                 # WMA4 a single space.
1125                 offset += 1
1126
1127                 # WMA4 the lengths of any leaves that came before that space,
1128                 # but after any closing bracket before that space.
1129                 for leaf in reversed(LL[: p_idx + 1]):
1130                     offset += len(str(leaf))
1131                     if leaf.type in CLOSING_BRACKETS:
1132                         break
1133
1134         if is_valid_index(string_idx + 1):
1135             N = LL[string_idx + 1]
1136             if N.type == token.RPAR and N.value == "" and len(LL) > string_idx + 2:
1137                 # If the next leaf is an empty RPAR placeholder, we should skip it.
1138                 N = LL[string_idx + 2]
1139
1140             if N.type == token.COMMA:
1141                 # WMA4 a single comma at the end of the string (e.g `STRING,`).
1142                 offset += 1
1143
1144             if is_valid_index(string_idx + 2):
1145                 NN = LL[string_idx + 2]
1146
1147                 if N.type == token.DOT and NN.type == token.NAME:
1148                     # This conditional branch is meant to handle method calls invoked
1149                     # off of a string literal up to and including the LPAR character.
1150
1151                     # WMA4 the '.' character.
1152                     offset += 1
1153
1154                     if (
1155                         is_valid_index(string_idx + 3)
1156                         and LL[string_idx + 3].type == token.LPAR
1157                     ):
1158                         # WMA4 the left parenthesis character.
1159                         offset += 1
1160
1161                     # WMA4 the length of the method's name.
1162                     offset += len(NN.value)
1163
1164         has_comments = False
1165         for comment_leaf in line.comments_after(LL[string_idx]):
1166             if not has_comments:
1167                 has_comments = True
1168                 # WMA4 two spaces before the '#' character.
1169                 offset += 2
1170
1171             # WMA4 the length of the inline comment.
1172             offset += len(comment_leaf.value)
1173
1174         max_string_length = count_chars_in_width(str(line), self.line_length - offset)
1175         return max_string_length
1176
1177     @staticmethod
1178     def _prefer_paren_wrap_match(LL: List[Leaf]) -> Optional[int]:
1179         """
1180         Returns:
1181             string_idx such that @LL[string_idx] is equal to our target (i.e.
1182             matched) string, if this line matches the "prefer paren wrap" statement
1183             requirements listed in the 'Requirements' section of the StringParenWrapper
1184             class's docstring.
1185                 OR
1186             None, otherwise.
1187         """
1188         # The line must start with a string.
1189         if LL[0].type != token.STRING:
1190             return None
1191
1192         matching_nodes = [
1193             syms.listmaker,
1194             syms.dictsetmaker,
1195             syms.testlist_gexp,
1196         ]
1197         # If the string is an immediate child of a list/set/tuple literal...
1198         if (
1199             parent_type(LL[0]) in matching_nodes
1200             or parent_type(LL[0].parent) in matching_nodes
1201         ):
1202             # And the string is surrounded by commas (or is the first/last child)...
1203             prev_sibling = LL[0].prev_sibling
1204             next_sibling = LL[0].next_sibling
1205             if (
1206                 not prev_sibling
1207                 and not next_sibling
1208                 and parent_type(LL[0]) == syms.atom
1209             ):
1210                 # If it's an atom string, we need to check the parent atom's siblings.
1211                 parent = LL[0].parent
1212                 assert parent is not None  # For type checkers.
1213                 prev_sibling = parent.prev_sibling
1214                 next_sibling = parent.next_sibling
1215             if (not prev_sibling or prev_sibling.type == token.COMMA) and (
1216                 not next_sibling or next_sibling.type == token.COMMA
1217             ):
1218                 return 0
1219
1220         return None
1221
1222
1223 def iter_fexpr_spans(s: str) -> Iterator[Tuple[int, int]]:
1224     """
1225     Yields spans corresponding to expressions in a given f-string.
1226     Spans are half-open ranges (left inclusive, right exclusive).
1227     Assumes the input string is a valid f-string, but will not crash if the input
1228     string is invalid.
1229     """
1230     stack: List[int] = []  # our curly paren stack
1231     i = 0
1232     while i < len(s):
1233         if s[i] == "{":
1234             # if we're in a string part of the f-string, ignore escaped curly braces
1235             if not stack and i + 1 < len(s) and s[i + 1] == "{":
1236                 i += 2
1237                 continue
1238             stack.append(i)
1239             i += 1
1240             continue
1241
1242         if s[i] == "}":
1243             if not stack:
1244                 i += 1
1245                 continue
1246             j = stack.pop()
1247             # we've made it back out of the expression! yield the span
1248             if not stack:
1249                 yield (j, i + 1)
1250             i += 1
1251             continue
1252
1253         # if we're in an expression part of the f-string, fast forward through strings
1254         # note that backslashes are not legal in the expression portion of f-strings
1255         if stack:
1256             delim = None
1257             if s[i : i + 3] in ("'''", '"""'):
1258                 delim = s[i : i + 3]
1259             elif s[i] in ("'", '"'):
1260                 delim = s[i]
1261             if delim:
1262                 i += len(delim)
1263                 while i < len(s) and s[i : i + len(delim)] != delim:
1264                     i += 1
1265                 i += len(delim)
1266                 continue
1267         i += 1
1268
1269
1270 def fstring_contains_expr(s: str) -> bool:
1271     return any(iter_fexpr_spans(s))
1272
1273
1274 def _toggle_fexpr_quotes(fstring: str, old_quote: str) -> str:
1275     """
1276     Toggles quotes used in f-string expressions that are `old_quote`.
1277
1278     f-string expressions can't contain backslashes, so we need to toggle the
1279     quotes if the f-string itself will end up using the same quote. We can
1280     simply toggle without escaping because, quotes can't be reused in f-string
1281     expressions. They will fail to parse.
1282
1283     NOTE: If PEP 701 is accepted, above statement will no longer be true.
1284     Though if quotes can be reused, we can simply reuse them without updates or
1285     escaping, once Black figures out how to parse the new grammar.
1286     """
1287     new_quote = "'" if old_quote == '"' else '"'
1288     parts = []
1289     previous_index = 0
1290     for start, end in iter_fexpr_spans(fstring):
1291         parts.append(fstring[previous_index:start])
1292         parts.append(fstring[start:end].replace(old_quote, new_quote))
1293         previous_index = end
1294     parts.append(fstring[previous_index:])
1295     return "".join(parts)
1296
1297
1298 class StringSplitter(BaseStringSplitter, CustomSplitMapMixin):
1299     """
1300     StringTransformer that splits "atom" strings (i.e. strings which exist on
1301     lines by themselves).
1302
1303     Requirements:
1304         * The line consists ONLY of a single string (possibly prefixed by a
1305           string operator [e.g. '+' or '==']), MAYBE a string trailer, and MAYBE
1306           a trailing comma.
1307           AND
1308         * All of the requirements listed in BaseStringSplitter's docstring.
1309
1310     Transformations:
1311         The string mentioned in the 'Requirements' section is split into as
1312         many substrings as necessary to adhere to the configured line length.
1313
1314         In the final set of substrings, no substring should be smaller than
1315         MIN_SUBSTR_SIZE characters.
1316
1317         The string will ONLY be split on spaces (i.e. each new substring should
1318         start with a space). Note that the string will NOT be split on a space
1319         which is escaped with a backslash.
1320
1321         If the string is an f-string, it will NOT be split in the middle of an
1322         f-expression (e.g. in f"FooBar: {foo() if x else bar()}", {foo() if x
1323         else bar()} is an f-expression).
1324
1325         If the string that is being split has an associated set of custom split
1326         records and those custom splits will NOT result in any line going over
1327         the configured line length, those custom splits are used. Otherwise the
1328         string is split as late as possible (from left-to-right) while still
1329         adhering to the transformation rules listed above.
1330
1331     Collaborations:
1332         StringSplitter relies on StringMerger to construct the appropriate
1333         CustomSplit objects and add them to the custom split map.
1334     """
1335
1336     MIN_SUBSTR_SIZE: Final = 6
1337
1338     def do_splitter_match(self, line: Line) -> TMatchResult:
1339         LL = line.leaves
1340
1341         if self._prefer_paren_wrap_match(LL) is not None:
1342             return TErr("Line needs to be wrapped in parens first.")
1343
1344         is_valid_index = is_valid_index_factory(LL)
1345
1346         idx = 0
1347
1348         # The first two leaves MAY be the 'not in' keywords...
1349         if (
1350             is_valid_index(idx)
1351             and is_valid_index(idx + 1)
1352             and [LL[idx].type, LL[idx + 1].type] == [token.NAME, token.NAME]
1353             and str(LL[idx]) + str(LL[idx + 1]) == "not in"
1354         ):
1355             idx += 2
1356         # Else the first leaf MAY be a string operator symbol or the 'in' keyword...
1357         elif is_valid_index(idx) and (
1358             LL[idx].type in self.STRING_OPERATORS
1359             or LL[idx].type == token.NAME
1360             and str(LL[idx]) == "in"
1361         ):
1362             idx += 1
1363
1364         # The next/first leaf MAY be an empty LPAR...
1365         if is_valid_index(idx) and is_empty_lpar(LL[idx]):
1366             idx += 1
1367
1368         # The next/first leaf MUST be a string...
1369         if not is_valid_index(idx) or LL[idx].type != token.STRING:
1370             return TErr("Line does not start with a string.")
1371
1372         string_idx = idx
1373
1374         # Skip the string trailer, if one exists.
1375         string_parser = StringParser()
1376         idx = string_parser.parse(LL, string_idx)
1377
1378         # That string MAY be followed by an empty RPAR...
1379         if is_valid_index(idx) and is_empty_rpar(LL[idx]):
1380             idx += 1
1381
1382         # That string / empty RPAR leaf MAY be followed by a comma...
1383         if is_valid_index(idx) and LL[idx].type == token.COMMA:
1384             idx += 1
1385
1386         # But no more leaves are allowed...
1387         if is_valid_index(idx):
1388             return TErr("This line does not end with a string.")
1389
1390         return Ok([string_idx])
1391
1392     def do_transform(
1393         self, line: Line, string_indices: List[int]
1394     ) -> Iterator[TResult[Line]]:
1395         LL = line.leaves
1396         assert len(string_indices) == 1, (
1397             f"{self.__class__.__name__} should only find one match at a time, found"
1398             f" {len(string_indices)}"
1399         )
1400         string_idx = string_indices[0]
1401
1402         QUOTE = LL[string_idx].value[-1]
1403
1404         is_valid_index = is_valid_index_factory(LL)
1405         insert_str_child = insert_str_child_factory(LL[string_idx])
1406
1407         prefix = get_string_prefix(LL[string_idx].value).lower()
1408
1409         # We MAY choose to drop the 'f' prefix from substrings that don't
1410         # contain any f-expressions, but ONLY if the original f-string
1411         # contains at least one f-expression. Otherwise, we will alter the AST
1412         # of the program.
1413         drop_pointless_f_prefix = ("f" in prefix) and fstring_contains_expr(
1414             LL[string_idx].value
1415         )
1416
1417         first_string_line = True
1418
1419         string_op_leaves = self._get_string_operator_leaves(LL)
1420         string_op_leaves_length = (
1421             sum(len(str(prefix_leaf)) for prefix_leaf in string_op_leaves) + 1
1422             if string_op_leaves
1423             else 0
1424         )
1425
1426         def maybe_append_string_operators(new_line: Line) -> None:
1427             """
1428             Side Effects:
1429                 If @line starts with a string operator and this is the first
1430                 line we are constructing, this function appends the string
1431                 operator to @new_line and replaces the old string operator leaf
1432                 in the node structure. Otherwise this function does nothing.
1433             """
1434             maybe_prefix_leaves = string_op_leaves if first_string_line else []
1435             for i, prefix_leaf in enumerate(maybe_prefix_leaves):
1436                 replace_child(LL[i], prefix_leaf)
1437                 new_line.append(prefix_leaf)
1438
1439         ends_with_comma = (
1440             is_valid_index(string_idx + 1) and LL[string_idx + 1].type == token.COMMA
1441         )
1442
1443         def max_last_string_column() -> int:
1444             """
1445             Returns:
1446                 The max allowed width of the string value used for the last
1447                 line we will construct.  Note that this value means the width
1448                 rather than the number of characters (e.g., many East Asian
1449                 characters expand to two columns).
1450             """
1451             result = self.line_length
1452             result -= line.depth * 4
1453             result -= 1 if ends_with_comma else 0
1454             result -= string_op_leaves_length
1455             return result
1456
1457         # --- Calculate Max Break Width (for string value)
1458         # We start with the line length limit
1459         max_break_width = self.line_length
1460         # The last index of a string of length N is N-1.
1461         max_break_width -= 1
1462         # Leading whitespace is not present in the string value (e.g. Leaf.value).
1463         max_break_width -= line.depth * 4
1464         if max_break_width < 0:
1465             yield TErr(
1466                 f"Unable to split {LL[string_idx].value} at such high of a line depth:"
1467                 f" {line.depth}"
1468             )
1469             return
1470
1471         # Check if StringMerger registered any custom splits.
1472         custom_splits = self.pop_custom_splits(LL[string_idx].value)
1473         # We use them ONLY if none of them would produce lines that exceed the
1474         # line limit.
1475         use_custom_breakpoints = bool(
1476             custom_splits
1477             and all(csplit.break_idx <= max_break_width for csplit in custom_splits)
1478         )
1479
1480         # Temporary storage for the remaining chunk of the string line that
1481         # can't fit onto the line currently being constructed.
1482         rest_value = LL[string_idx].value
1483
1484         def more_splits_should_be_made() -> bool:
1485             """
1486             Returns:
1487                 True iff `rest_value` (the remaining string value from the last
1488                 split), should be split again.
1489             """
1490             if use_custom_breakpoints:
1491                 return len(custom_splits) > 1
1492             else:
1493                 return str_width(rest_value) > max_last_string_column()
1494
1495         string_line_results: List[Ok[Line]] = []
1496         while more_splits_should_be_made():
1497             if use_custom_breakpoints:
1498                 # Custom User Split (manual)
1499                 csplit = custom_splits.pop(0)
1500                 break_idx = csplit.break_idx
1501             else:
1502                 # Algorithmic Split (automatic)
1503                 max_bidx = (
1504                     count_chars_in_width(rest_value, max_break_width)
1505                     - string_op_leaves_length
1506                 )
1507                 maybe_break_idx = self._get_break_idx(rest_value, max_bidx)
1508                 if maybe_break_idx is None:
1509                     # If we are unable to algorithmically determine a good split
1510                     # and this string has custom splits registered to it, we
1511                     # fall back to using them--which means we have to start
1512                     # over from the beginning.
1513                     if custom_splits:
1514                         rest_value = LL[string_idx].value
1515                         string_line_results = []
1516                         first_string_line = True
1517                         use_custom_breakpoints = True
1518                         continue
1519
1520                     # Otherwise, we stop splitting here.
1521                     break
1522
1523                 break_idx = maybe_break_idx
1524
1525             # --- Construct `next_value`
1526             next_value = rest_value[:break_idx] + QUOTE
1527
1528             # HACK: The following 'if' statement is a hack to fix the custom
1529             # breakpoint index in the case of either: (a) substrings that were
1530             # f-strings but will have the 'f' prefix removed OR (b) substrings
1531             # that were not f-strings but will now become f-strings because of
1532             # redundant use of the 'f' prefix (i.e. none of the substrings
1533             # contain f-expressions but one or more of them had the 'f' prefix
1534             # anyway; in which case, we will prepend 'f' to _all_ substrings).
1535             #
1536             # There is probably a better way to accomplish what is being done
1537             # here...
1538             #
1539             # If this substring is an f-string, we _could_ remove the 'f'
1540             # prefix, and the current custom split did NOT originally use a
1541             # prefix...
1542             if (
1543                 use_custom_breakpoints
1544                 and not csplit.has_prefix
1545                 and (
1546                     # `next_value == prefix + QUOTE` happens when the custom
1547                     # split is an empty string.
1548                     next_value == prefix + QUOTE
1549                     or next_value != self._normalize_f_string(next_value, prefix)
1550                 )
1551             ):
1552                 # Then `csplit.break_idx` will be off by one after removing
1553                 # the 'f' prefix.
1554                 break_idx += 1
1555                 next_value = rest_value[:break_idx] + QUOTE
1556
1557             if drop_pointless_f_prefix:
1558                 next_value = self._normalize_f_string(next_value, prefix)
1559
1560             # --- Construct `next_leaf`
1561             next_leaf = Leaf(token.STRING, next_value)
1562             insert_str_child(next_leaf)
1563             self._maybe_normalize_string_quotes(next_leaf)
1564
1565             # --- Construct `next_line`
1566             next_line = line.clone()
1567             maybe_append_string_operators(next_line)
1568             next_line.append(next_leaf)
1569             string_line_results.append(Ok(next_line))
1570
1571             rest_value = prefix + QUOTE + rest_value[break_idx:]
1572             first_string_line = False
1573
1574         yield from string_line_results
1575
1576         if drop_pointless_f_prefix:
1577             rest_value = self._normalize_f_string(rest_value, prefix)
1578
1579         rest_leaf = Leaf(token.STRING, rest_value)
1580         insert_str_child(rest_leaf)
1581
1582         # NOTE: I could not find a test case that verifies that the following
1583         # line is actually necessary, but it seems to be. Otherwise we risk
1584         # not normalizing the last substring, right?
1585         self._maybe_normalize_string_quotes(rest_leaf)
1586
1587         last_line = line.clone()
1588         maybe_append_string_operators(last_line)
1589
1590         # If there are any leaves to the right of the target string...
1591         if is_valid_index(string_idx + 1):
1592             # We use `temp_value` here to determine how long the last line
1593             # would be if we were to append all the leaves to the right of the
1594             # target string to the last string line.
1595             temp_value = rest_value
1596             for leaf in LL[string_idx + 1 :]:
1597                 temp_value += str(leaf)
1598                 if leaf.type == token.LPAR:
1599                     break
1600
1601             # Try to fit them all on the same line with the last substring...
1602             if (
1603                 str_width(temp_value) <= max_last_string_column()
1604                 or LL[string_idx + 1].type == token.COMMA
1605             ):
1606                 last_line.append(rest_leaf)
1607                 append_leaves(last_line, line, LL[string_idx + 1 :])
1608                 yield Ok(last_line)
1609             # Otherwise, place the last substring on one line and everything
1610             # else on a line below that...
1611             else:
1612                 last_line.append(rest_leaf)
1613                 yield Ok(last_line)
1614
1615                 non_string_line = line.clone()
1616                 append_leaves(non_string_line, line, LL[string_idx + 1 :])
1617                 yield Ok(non_string_line)
1618         # Else the target string was the last leaf...
1619         else:
1620             last_line.append(rest_leaf)
1621             last_line.comments = line.comments.copy()
1622             yield Ok(last_line)
1623
1624     def _iter_nameescape_slices(self, string: str) -> Iterator[Tuple[Index, Index]]:
1625         """
1626         Yields:
1627             All ranges of @string which, if @string were to be split there,
1628             would result in the splitting of an \\N{...} expression (which is NOT
1629             allowed).
1630         """
1631         # True - the previous backslash was unescaped
1632         # False - the previous backslash was escaped *or* there was no backslash
1633         previous_was_unescaped_backslash = False
1634         it = iter(enumerate(string))
1635         for idx, c in it:
1636             if c == "\\":
1637                 previous_was_unescaped_backslash = not previous_was_unescaped_backslash
1638                 continue
1639             if not previous_was_unescaped_backslash or c != "N":
1640                 previous_was_unescaped_backslash = False
1641                 continue
1642             previous_was_unescaped_backslash = False
1643
1644             begin = idx - 1  # the position of backslash before \N{...}
1645             for idx, c in it:
1646                 if c == "}":
1647                     end = idx
1648                     break
1649             else:
1650                 # malformed nameescape expression?
1651                 # should have been detected by AST parsing earlier...
1652                 raise RuntimeError(f"{self.__class__.__name__} LOGIC ERROR!")
1653             yield begin, end
1654
1655     def _iter_fexpr_slices(self, string: str) -> Iterator[Tuple[Index, Index]]:
1656         """
1657         Yields:
1658             All ranges of @string which, if @string were to be split there,
1659             would result in the splitting of an f-expression (which is NOT
1660             allowed).
1661         """
1662         if "f" not in get_string_prefix(string).lower():
1663             return
1664         yield from iter_fexpr_spans(string)
1665
1666     def _get_illegal_split_indices(self, string: str) -> Set[Index]:
1667         illegal_indices: Set[Index] = set()
1668         iterators = [
1669             self._iter_fexpr_slices(string),
1670             self._iter_nameescape_slices(string),
1671         ]
1672         for it in iterators:
1673             for begin, end in it:
1674                 illegal_indices.update(range(begin, end + 1))
1675         return illegal_indices
1676
1677     def _get_break_idx(self, string: str, max_break_idx: int) -> Optional[int]:
1678         """
1679         This method contains the algorithm that StringSplitter uses to
1680         determine which character to split each string at.
1681
1682         Args:
1683             @string: The substring that we are attempting to split.
1684             @max_break_idx: The ideal break index. We will return this value if it
1685             meets all the necessary conditions. In the likely event that it
1686             doesn't we will try to find the closest index BELOW @max_break_idx
1687             that does. If that fails, we will expand our search by also
1688             considering all valid indices ABOVE @max_break_idx.
1689
1690         Pre-Conditions:
1691             * assert_is_leaf_string(@string)
1692             * 0 <= @max_break_idx < len(@string)
1693
1694         Returns:
1695             break_idx, if an index is able to be found that meets all of the
1696             conditions listed in the 'Transformations' section of this classes'
1697             docstring.
1698                 OR
1699             None, otherwise.
1700         """
1701         is_valid_index = is_valid_index_factory(string)
1702
1703         assert is_valid_index(max_break_idx)
1704         assert_is_leaf_string(string)
1705
1706         _illegal_split_indices = self._get_illegal_split_indices(string)
1707
1708         def breaks_unsplittable_expression(i: Index) -> bool:
1709             """
1710             Returns:
1711                 True iff returning @i would result in the splitting of an
1712                 unsplittable expression (which is NOT allowed).
1713             """
1714             return i in _illegal_split_indices
1715
1716         def passes_all_checks(i: Index) -> bool:
1717             """
1718             Returns:
1719                 True iff ALL of the conditions listed in the 'Transformations'
1720                 section of this classes' docstring would be be met by returning @i.
1721             """
1722             is_space = string[i] == " "
1723             is_split_safe = is_valid_index(i - 1) and string[i - 1] in SPLIT_SAFE_CHARS
1724
1725             is_not_escaped = True
1726             j = i - 1
1727             while is_valid_index(j) and string[j] == "\\":
1728                 is_not_escaped = not is_not_escaped
1729                 j -= 1
1730
1731             is_big_enough = (
1732                 len(string[i:]) >= self.MIN_SUBSTR_SIZE
1733                 and len(string[:i]) >= self.MIN_SUBSTR_SIZE
1734             )
1735             return (
1736                 (is_space or is_split_safe)
1737                 and is_not_escaped
1738                 and is_big_enough
1739                 and not breaks_unsplittable_expression(i)
1740             )
1741
1742         # First, we check all indices BELOW @max_break_idx.
1743         break_idx = max_break_idx
1744         while is_valid_index(break_idx - 1) and not passes_all_checks(break_idx):
1745             break_idx -= 1
1746
1747         if not passes_all_checks(break_idx):
1748             # If that fails, we check all indices ABOVE @max_break_idx.
1749             #
1750             # If we are able to find a valid index here, the next line is going
1751             # to be longer than the specified line length, but it's probably
1752             # better than doing nothing at all.
1753             break_idx = max_break_idx + 1
1754             while is_valid_index(break_idx + 1) and not passes_all_checks(break_idx):
1755                 break_idx += 1
1756
1757             if not is_valid_index(break_idx) or not passes_all_checks(break_idx):
1758                 return None
1759
1760         return break_idx
1761
1762     def _maybe_normalize_string_quotes(self, leaf: Leaf) -> None:
1763         if self.normalize_strings:
1764             leaf.value = normalize_string_quotes(leaf.value)
1765
1766     def _normalize_f_string(self, string: str, prefix: str) -> str:
1767         """
1768         Pre-Conditions:
1769             * assert_is_leaf_string(@string)
1770
1771         Returns:
1772             * If @string is an f-string that contains no f-expressions, we
1773             return a string identical to @string except that the 'f' prefix
1774             has been stripped and all double braces (i.e. '{{' or '}}') have
1775             been normalized (i.e. turned into '{' or '}').
1776                 OR
1777             * Otherwise, we return @string.
1778         """
1779         assert_is_leaf_string(string)
1780
1781         if "f" in prefix and not fstring_contains_expr(string):
1782             new_prefix = prefix.replace("f", "")
1783
1784             temp = string[len(prefix) :]
1785             temp = re.sub(r"\{\{", "{", temp)
1786             temp = re.sub(r"\}\}", "}", temp)
1787             new_string = temp
1788
1789             return f"{new_prefix}{new_string}"
1790         else:
1791             return string
1792
1793     def _get_string_operator_leaves(self, leaves: Iterable[Leaf]) -> List[Leaf]:
1794         LL = list(leaves)
1795
1796         string_op_leaves = []
1797         i = 0
1798         while LL[i].type in self.STRING_OPERATORS + [token.NAME]:
1799             prefix_leaf = Leaf(LL[i].type, str(LL[i]).strip())
1800             string_op_leaves.append(prefix_leaf)
1801             i += 1
1802         return string_op_leaves
1803
1804
1805 class StringParenWrapper(BaseStringSplitter, CustomSplitMapMixin):
1806     """
1807     StringTransformer that wraps strings in parens and then splits at the LPAR.
1808
1809     Requirements:
1810         All of the requirements listed in BaseStringSplitter's docstring in
1811         addition to the requirements listed below:
1812
1813         * The line is a return/yield statement, which returns/yields a string.
1814           OR
1815         * The line is part of a ternary expression (e.g. `x = y if cond else
1816           z`) such that the line starts with `else <string>`, where <string> is
1817           some string.
1818           OR
1819         * The line is an assert statement, which ends with a string.
1820           OR
1821         * The line is an assignment statement (e.g. `x = <string>` or `x +=
1822           <string>`) such that the variable is being assigned the value of some
1823           string.
1824           OR
1825         * The line is a dictionary key assignment where some valid key is being
1826           assigned the value of some string.
1827           OR
1828         * The line is an lambda expression and the value is a string.
1829           OR
1830         * The line starts with an "atom" string that prefers to be wrapped in
1831           parens. It's preferred to be wrapped when it's is an immediate child of
1832           a list/set/tuple literal, AND the string is surrounded by commas (or is
1833           the first/last child).
1834
1835     Transformations:
1836         The chosen string is wrapped in parentheses and then split at the LPAR.
1837
1838         We then have one line which ends with an LPAR and another line that
1839         starts with the chosen string. The latter line is then split again at
1840         the RPAR. This results in the RPAR (and possibly a trailing comma)
1841         being placed on its own line.
1842
1843         NOTE: If any leaves exist to the right of the chosen string (except
1844         for a trailing comma, which would be placed after the RPAR), those
1845         leaves are placed inside the parentheses.  In effect, the chosen
1846         string is not necessarily being "wrapped" by parentheses. We can,
1847         however, count on the LPAR being placed directly before the chosen
1848         string.
1849
1850         In other words, StringParenWrapper creates "atom" strings. These
1851         can then be split again by StringSplitter, if necessary.
1852
1853     Collaborations:
1854         In the event that a string line split by StringParenWrapper is
1855         changed such that it no longer needs to be given its own line,
1856         StringParenWrapper relies on StringParenStripper to clean up the
1857         parentheses it created.
1858
1859         For "atom" strings that prefers to be wrapped in parens, it requires
1860         StringSplitter to hold the split until the string is wrapped in parens.
1861     """
1862
1863     def do_splitter_match(self, line: Line) -> TMatchResult:
1864         LL = line.leaves
1865
1866         if line.leaves[-1].type in OPENING_BRACKETS:
1867             return TErr(
1868                 "Cannot wrap parens around a line that ends in an opening bracket."
1869             )
1870
1871         string_idx = (
1872             self._return_match(LL)
1873             or self._else_match(LL)
1874             or self._assert_match(LL)
1875             or self._assign_match(LL)
1876             or self._dict_or_lambda_match(LL)
1877             or self._prefer_paren_wrap_match(LL)
1878         )
1879
1880         if string_idx is not None:
1881             string_value = line.leaves[string_idx].value
1882             # If the string has neither spaces nor East Asian stops...
1883             if not any(
1884                 char == " " or char in SPLIT_SAFE_CHARS for char in string_value
1885             ):
1886                 # And will still violate the line length limit when split...
1887                 max_string_width = self.line_length - ((line.depth + 1) * 4)
1888                 if str_width(string_value) > max_string_width:
1889                     # And has no associated custom splits...
1890                     if not self.has_custom_splits(string_value):
1891                         # Then we should NOT put this string on its own line.
1892                         return TErr(
1893                             "We do not wrap long strings in parentheses when the"
1894                             " resultant line would still be over the specified line"
1895                             " length and can't be split further by StringSplitter."
1896                         )
1897             return Ok([string_idx])
1898
1899         return TErr("This line does not contain any non-atomic strings.")
1900
1901     @staticmethod
1902     def _return_match(LL: List[Leaf]) -> Optional[int]:
1903         """
1904         Returns:
1905             string_idx such that @LL[string_idx] is equal to our target (i.e.
1906             matched) string, if this line matches the return/yield statement
1907             requirements listed in the 'Requirements' section of this classes'
1908             docstring.
1909                 OR
1910             None, otherwise.
1911         """
1912         # If this line is apart of a return/yield statement and the first leaf
1913         # contains either the "return" or "yield" keywords...
1914         if parent_type(LL[0]) in [syms.return_stmt, syms.yield_expr] and LL[
1915             0
1916         ].value in ["return", "yield"]:
1917             is_valid_index = is_valid_index_factory(LL)
1918
1919             idx = 2 if is_valid_index(1) and is_empty_par(LL[1]) else 1
1920             # The next visible leaf MUST contain a string...
1921             if is_valid_index(idx) and LL[idx].type == token.STRING:
1922                 return idx
1923
1924         return None
1925
1926     @staticmethod
1927     def _else_match(LL: List[Leaf]) -> Optional[int]:
1928         """
1929         Returns:
1930             string_idx such that @LL[string_idx] is equal to our target (i.e.
1931             matched) string, if this line matches the ternary expression
1932             requirements listed in the 'Requirements' section of this classes'
1933             docstring.
1934                 OR
1935             None, otherwise.
1936         """
1937         # If this line is apart of a ternary expression and the first leaf
1938         # contains the "else" keyword...
1939         if (
1940             parent_type(LL[0]) == syms.test
1941             and LL[0].type == token.NAME
1942             and LL[0].value == "else"
1943         ):
1944             is_valid_index = is_valid_index_factory(LL)
1945
1946             idx = 2 if is_valid_index(1) and is_empty_par(LL[1]) else 1
1947             # The next visible leaf MUST contain a string...
1948             if is_valid_index(idx) and LL[idx].type == token.STRING:
1949                 return idx
1950
1951         return None
1952
1953     @staticmethod
1954     def _assert_match(LL: List[Leaf]) -> Optional[int]:
1955         """
1956         Returns:
1957             string_idx such that @LL[string_idx] is equal to our target (i.e.
1958             matched) string, if this line matches the assert statement
1959             requirements listed in the 'Requirements' section of this classes'
1960             docstring.
1961                 OR
1962             None, otherwise.
1963         """
1964         # If this line is apart of an assert statement and the first leaf
1965         # contains the "assert" keyword...
1966         if parent_type(LL[0]) == syms.assert_stmt and LL[0].value == "assert":
1967             is_valid_index = is_valid_index_factory(LL)
1968
1969             for i, leaf in enumerate(LL):
1970                 # We MUST find a comma...
1971                 if leaf.type == token.COMMA:
1972                     idx = i + 2 if is_empty_par(LL[i + 1]) else i + 1
1973
1974                     # That comma MUST be followed by a string...
1975                     if is_valid_index(idx) and LL[idx].type == token.STRING:
1976                         string_idx = idx
1977
1978                         # Skip the string trailer, if one exists.
1979                         string_parser = StringParser()
1980                         idx = string_parser.parse(LL, string_idx)
1981
1982                         # But no more leaves are allowed...
1983                         if not is_valid_index(idx):
1984                             return string_idx
1985
1986         return None
1987
1988     @staticmethod
1989     def _assign_match(LL: List[Leaf]) -> Optional[int]:
1990         """
1991         Returns:
1992             string_idx such that @LL[string_idx] is equal to our target (i.e.
1993             matched) string, if this line matches the assignment statement
1994             requirements listed in the 'Requirements' section of this classes'
1995             docstring.
1996                 OR
1997             None, otherwise.
1998         """
1999         # If this line is apart of an expression statement or is a function
2000         # argument AND the first leaf contains a variable name...
2001         if (
2002             parent_type(LL[0]) in [syms.expr_stmt, syms.argument, syms.power]
2003             and LL[0].type == token.NAME
2004         ):
2005             is_valid_index = is_valid_index_factory(LL)
2006
2007             for i, leaf in enumerate(LL):
2008                 # We MUST find either an '=' or '+=' symbol...
2009                 if leaf.type in [token.EQUAL, token.PLUSEQUAL]:
2010                     idx = i + 2 if is_empty_par(LL[i + 1]) else i + 1
2011
2012                     # That symbol MUST be followed by a string...
2013                     if is_valid_index(idx) and LL[idx].type == token.STRING:
2014                         string_idx = idx
2015
2016                         # Skip the string trailer, if one exists.
2017                         string_parser = StringParser()
2018                         idx = string_parser.parse(LL, string_idx)
2019
2020                         # The next leaf MAY be a comma iff this line is apart
2021                         # of a function argument...
2022                         if (
2023                             parent_type(LL[0]) == syms.argument
2024                             and is_valid_index(idx)
2025                             and LL[idx].type == token.COMMA
2026                         ):
2027                             idx += 1
2028
2029                         # But no more leaves are allowed...
2030                         if not is_valid_index(idx):
2031                             return string_idx
2032
2033         return None
2034
2035     @staticmethod
2036     def _dict_or_lambda_match(LL: List[Leaf]) -> Optional[int]:
2037         """
2038         Returns:
2039             string_idx such that @LL[string_idx] is equal to our target (i.e.
2040             matched) string, if this line matches the dictionary key assignment
2041             statement or lambda expression requirements listed in the
2042             'Requirements' section of this classes' docstring.
2043                 OR
2044             None, otherwise.
2045         """
2046         # If this line is a part of a dictionary key assignment or lambda expression...
2047         parent_types = [parent_type(LL[0]), parent_type(LL[0].parent)]
2048         if syms.dictsetmaker in parent_types or syms.lambdef in parent_types:
2049             is_valid_index = is_valid_index_factory(LL)
2050
2051             for i, leaf in enumerate(LL):
2052                 # We MUST find a colon, it can either be dict's or lambda's colon...
2053                 if leaf.type == token.COLON and i < len(LL) - 1:
2054                     idx = i + 2 if is_empty_par(LL[i + 1]) else i + 1
2055
2056                     # That colon MUST be followed by a string...
2057                     if is_valid_index(idx) and LL[idx].type == token.STRING:
2058                         string_idx = idx
2059
2060                         # Skip the string trailer, if one exists.
2061                         string_parser = StringParser()
2062                         idx = string_parser.parse(LL, string_idx)
2063
2064                         # That string MAY be followed by a comma...
2065                         if is_valid_index(idx) and LL[idx].type == token.COMMA:
2066                             idx += 1
2067
2068                         # But no more leaves are allowed...
2069                         if not is_valid_index(idx):
2070                             return string_idx
2071
2072         return None
2073
2074     def do_transform(
2075         self, line: Line, string_indices: List[int]
2076     ) -> Iterator[TResult[Line]]:
2077         LL = line.leaves
2078         assert len(string_indices) == 1, (
2079             f"{self.__class__.__name__} should only find one match at a time, found"
2080             f" {len(string_indices)}"
2081         )
2082         string_idx = string_indices[0]
2083
2084         is_valid_index = is_valid_index_factory(LL)
2085         insert_str_child = insert_str_child_factory(LL[string_idx])
2086
2087         comma_idx = -1
2088         ends_with_comma = False
2089         if LL[comma_idx].type == token.COMMA:
2090             ends_with_comma = True
2091
2092         leaves_to_steal_comments_from = [LL[string_idx]]
2093         if ends_with_comma:
2094             leaves_to_steal_comments_from.append(LL[comma_idx])
2095
2096         # --- First Line
2097         first_line = line.clone()
2098         left_leaves = LL[:string_idx]
2099
2100         # We have to remember to account for (possibly invisible) LPAR and RPAR
2101         # leaves that already wrapped the target string. If these leaves do
2102         # exist, we will replace them with our own LPAR and RPAR leaves.
2103         old_parens_exist = False
2104         if left_leaves and left_leaves[-1].type == token.LPAR:
2105             old_parens_exist = True
2106             leaves_to_steal_comments_from.append(left_leaves[-1])
2107             left_leaves.pop()
2108
2109         append_leaves(first_line, line, left_leaves)
2110
2111         lpar_leaf = Leaf(token.LPAR, "(")
2112         if old_parens_exist:
2113             replace_child(LL[string_idx - 1], lpar_leaf)
2114         else:
2115             insert_str_child(lpar_leaf)
2116         first_line.append(lpar_leaf)
2117
2118         # We throw inline comments that were originally to the right of the
2119         # target string to the top line. They will now be shown to the right of
2120         # the LPAR.
2121         for leaf in leaves_to_steal_comments_from:
2122             for comment_leaf in line.comments_after(leaf):
2123                 first_line.append(comment_leaf, preformatted=True)
2124
2125         yield Ok(first_line)
2126
2127         # --- Middle (String) Line
2128         # We only need to yield one (possibly too long) string line, since the
2129         # `StringSplitter` will break it down further if necessary.
2130         string_value = LL[string_idx].value
2131         string_line = Line(
2132             mode=line.mode,
2133             depth=line.depth + 1,
2134             inside_brackets=True,
2135             should_split_rhs=line.should_split_rhs,
2136             magic_trailing_comma=line.magic_trailing_comma,
2137         )
2138         string_leaf = Leaf(token.STRING, string_value)
2139         insert_str_child(string_leaf)
2140         string_line.append(string_leaf)
2141
2142         old_rpar_leaf = None
2143         if is_valid_index(string_idx + 1):
2144             right_leaves = LL[string_idx + 1 :]
2145             if ends_with_comma:
2146                 right_leaves.pop()
2147
2148             if old_parens_exist:
2149                 assert right_leaves and right_leaves[-1].type == token.RPAR, (
2150                     "Apparently, old parentheses do NOT exist?!"
2151                     f" (left_leaves={left_leaves}, right_leaves={right_leaves})"
2152                 )
2153                 old_rpar_leaf = right_leaves.pop()
2154             elif right_leaves and right_leaves[-1].type == token.RPAR:
2155                 # Special case for lambda expressions as dict's value, e.g.:
2156                 #     my_dict = {
2157                 #        "key": lambda x: f"formatted: {x},
2158                 #     }
2159                 # After wrapping the dict's value with parentheses, the string is
2160                 # followed by a RPAR but its opening bracket is lambda's, not
2161                 # the string's:
2162                 #        "key": (lambda x: f"formatted: {x}),
2163                 opening_bracket = right_leaves[-1].opening_bracket
2164                 if opening_bracket is not None and opening_bracket in left_leaves:
2165                     index = left_leaves.index(opening_bracket)
2166                     if (
2167                         index > 0
2168                         and index < len(left_leaves) - 1
2169                         and left_leaves[index - 1].type == token.COLON
2170                         and left_leaves[index + 1].value == "lambda"
2171                     ):
2172                         right_leaves.pop()
2173
2174             append_leaves(string_line, line, right_leaves)
2175
2176         yield Ok(string_line)
2177
2178         # --- Last Line
2179         last_line = line.clone()
2180         last_line.bracket_tracker = first_line.bracket_tracker
2181
2182         new_rpar_leaf = Leaf(token.RPAR, ")")
2183         if old_rpar_leaf is not None:
2184             replace_child(old_rpar_leaf, new_rpar_leaf)
2185         else:
2186             insert_str_child(new_rpar_leaf)
2187         last_line.append(new_rpar_leaf)
2188
2189         # If the target string ended with a comma, we place this comma to the
2190         # right of the RPAR on the last line.
2191         if ends_with_comma:
2192             comma_leaf = Leaf(token.COMMA, ",")
2193             replace_child(LL[comma_idx], comma_leaf)
2194             last_line.append(comma_leaf)
2195
2196         yield Ok(last_line)
2197
2198
2199 class StringParser:
2200     """
2201     A state machine that aids in parsing a string's "trailer", which can be
2202     either non-existent, an old-style formatting sequence (e.g. `% varX` or `%
2203     (varX, varY)`), or a method-call / attribute access (e.g. `.format(varX,
2204     varY)`).
2205
2206     NOTE: A new StringParser object MUST be instantiated for each string
2207     trailer we need to parse.
2208
2209     Examples:
2210         We shall assume that `line` equals the `Line` object that corresponds
2211         to the following line of python code:
2212         ```
2213         x = "Some {}.".format("String") + some_other_string
2214         ```
2215
2216         Furthermore, we will assume that `string_idx` is some index such that:
2217         ```
2218         assert line.leaves[string_idx].value == "Some {}."
2219         ```
2220
2221         The following code snippet then holds:
2222         ```
2223         string_parser = StringParser()
2224         idx = string_parser.parse(line.leaves, string_idx)
2225         assert line.leaves[idx].type == token.PLUS
2226         ```
2227     """
2228
2229     DEFAULT_TOKEN: Final = 20210605
2230
2231     # String Parser States
2232     START: Final = 1
2233     DOT: Final = 2
2234     NAME: Final = 3
2235     PERCENT: Final = 4
2236     SINGLE_FMT_ARG: Final = 5
2237     LPAR: Final = 6
2238     RPAR: Final = 7
2239     DONE: Final = 8
2240
2241     # Lookup Table for Next State
2242     _goto: Final[Dict[Tuple[ParserState, NodeType], ParserState]] = {
2243         # A string trailer may start with '.' OR '%'.
2244         (START, token.DOT): DOT,
2245         (START, token.PERCENT): PERCENT,
2246         (START, DEFAULT_TOKEN): DONE,
2247         # A '.' MUST be followed by an attribute or method name.
2248         (DOT, token.NAME): NAME,
2249         # A method name MUST be followed by an '(', whereas an attribute name
2250         # is the last symbol in the string trailer.
2251         (NAME, token.LPAR): LPAR,
2252         (NAME, DEFAULT_TOKEN): DONE,
2253         # A '%' symbol can be followed by an '(' or a single argument (e.g. a
2254         # string or variable name).
2255         (PERCENT, token.LPAR): LPAR,
2256         (PERCENT, DEFAULT_TOKEN): SINGLE_FMT_ARG,
2257         # If a '%' symbol is followed by a single argument, that argument is
2258         # the last leaf in the string trailer.
2259         (SINGLE_FMT_ARG, DEFAULT_TOKEN): DONE,
2260         # If present, a ')' symbol is the last symbol in a string trailer.
2261         # (NOTE: LPARS and nested RPARS are not included in this lookup table,
2262         # since they are treated as a special case by the parsing logic in this
2263         # classes' implementation.)
2264         (RPAR, DEFAULT_TOKEN): DONE,
2265     }
2266
2267     def __init__(self) -> None:
2268         self._state = self.START
2269         self._unmatched_lpars = 0
2270
2271     def parse(self, leaves: List[Leaf], string_idx: int) -> int:
2272         """
2273         Pre-conditions:
2274             * @leaves[@string_idx].type == token.STRING
2275
2276         Returns:
2277             The index directly after the last leaf which is apart of the string
2278             trailer, if a "trailer" exists.
2279             OR
2280             @string_idx + 1, if no string "trailer" exists.
2281         """
2282         assert leaves[string_idx].type == token.STRING
2283
2284         idx = string_idx + 1
2285         while idx < len(leaves) and self._next_state(leaves[idx]):
2286             idx += 1
2287         return idx
2288
2289     def _next_state(self, leaf: Leaf) -> bool:
2290         """
2291         Pre-conditions:
2292             * On the first call to this function, @leaf MUST be the leaf that
2293               was directly after the string leaf in question (e.g. if our target
2294               string is `line.leaves[i]` then the first call to this method must
2295               be `line.leaves[i + 1]`).
2296             * On the next call to this function, the leaf parameter passed in
2297               MUST be the leaf directly following @leaf.
2298
2299         Returns:
2300             True iff @leaf is apart of the string's trailer.
2301         """
2302         # We ignore empty LPAR or RPAR leaves.
2303         if is_empty_par(leaf):
2304             return True
2305
2306         next_token = leaf.type
2307         if next_token == token.LPAR:
2308             self._unmatched_lpars += 1
2309
2310         current_state = self._state
2311
2312         # The LPAR parser state is a special case. We will return True until we
2313         # find the matching RPAR token.
2314         if current_state == self.LPAR:
2315             if next_token == token.RPAR:
2316                 self._unmatched_lpars -= 1
2317                 if self._unmatched_lpars == 0:
2318                     self._state = self.RPAR
2319         # Otherwise, we use a lookup table to determine the next state.
2320         else:
2321             # If the lookup table matches the current state to the next
2322             # token, we use the lookup table.
2323             if (current_state, next_token) in self._goto:
2324                 self._state = self._goto[current_state, next_token]
2325             else:
2326                 # Otherwise, we check if a the current state was assigned a
2327                 # default.
2328                 if (current_state, self.DEFAULT_TOKEN) in self._goto:
2329                     self._state = self._goto[current_state, self.DEFAULT_TOKEN]
2330                 # If no default has been assigned, then this parser has a logic
2331                 # error.
2332                 else:
2333                     raise RuntimeError(f"{self.__class__.__name__} LOGIC ERROR!")
2334
2335             if self._state == self.DONE:
2336                 return False
2337
2338         return True
2339
2340
2341 def insert_str_child_factory(string_leaf: Leaf) -> Callable[[LN], None]:
2342     """
2343     Factory for a convenience function that is used to orphan @string_leaf
2344     and then insert multiple new leaves into the same part of the node
2345     structure that @string_leaf had originally occupied.
2346
2347     Examples:
2348         Let `string_leaf = Leaf(token.STRING, '"foo"')` and `N =
2349         string_leaf.parent`. Assume the node `N` has the following
2350         original structure:
2351
2352         Node(
2353             expr_stmt, [
2354                 Leaf(NAME, 'x'),
2355                 Leaf(EQUAL, '='),
2356                 Leaf(STRING, '"foo"'),
2357             ]
2358         )
2359
2360         We then run the code snippet shown below.
2361         ```
2362         insert_str_child = insert_str_child_factory(string_leaf)
2363
2364         lpar = Leaf(token.LPAR, '(')
2365         insert_str_child(lpar)
2366
2367         bar = Leaf(token.STRING, '"bar"')
2368         insert_str_child(bar)
2369
2370         rpar = Leaf(token.RPAR, ')')
2371         insert_str_child(rpar)
2372         ```
2373
2374         After which point, it follows that `string_leaf.parent is None` and
2375         the node `N` now has the following structure:
2376
2377         Node(
2378             expr_stmt, [
2379                 Leaf(NAME, 'x'),
2380                 Leaf(EQUAL, '='),
2381                 Leaf(LPAR, '('),
2382                 Leaf(STRING, '"bar"'),
2383                 Leaf(RPAR, ')'),
2384             ]
2385         )
2386     """
2387     string_parent = string_leaf.parent
2388     string_child_idx = string_leaf.remove()
2389
2390     def insert_str_child(child: LN) -> None:
2391         nonlocal string_child_idx
2392
2393         assert string_parent is not None
2394         assert string_child_idx is not None
2395
2396         string_parent.insert_child(string_child_idx, child)
2397         string_child_idx += 1
2398
2399     return insert_str_child
2400
2401
2402 def is_valid_index_factory(seq: Sequence[Any]) -> Callable[[int], bool]:
2403     """
2404     Examples:
2405         ```
2406         my_list = [1, 2, 3]
2407
2408         is_valid_index = is_valid_index_factory(my_list)
2409
2410         assert is_valid_index(0)
2411         assert is_valid_index(2)
2412
2413         assert not is_valid_index(3)
2414         assert not is_valid_index(-1)
2415         ```
2416     """
2417
2418     def is_valid_index(idx: int) -> bool:
2419         """
2420         Returns:
2421             True iff @idx is positive AND seq[@idx] does NOT raise an
2422             IndexError.
2423         """
2424         return 0 <= idx < len(seq)
2425
2426     return is_valid_index