src/black/__init__.py

   1 import ast
   2 import asyncio
   3 from abc import ABC, abstractmethod
   4 from collections import defaultdict
   5 from concurrent.futures import Executor, ThreadPoolExecutor, ProcessPoolExecutor
   6 from contextlib import contextmanager
   7 from datetime import datetime
   8 from enum import Enum
   9 from functools import lru_cache, partial, wraps
  10 import io
  11 import itertools
  12 import logging
  13 from multiprocessing import Manager, freeze_support
  14 import os
  15 from pathlib import Path
  16 import pickle
  17 import regex as re
  18 import signal
  19 import sys
  20 import tempfile
  21 import tokenize
  22 import traceback
  23 from typing import (
  24     Any,
  25     Callable,
  26     Collection,
  27     Dict,
  28     Generator,
  29     Generic,
  30     Iterable,
  31     Iterator,
  32     List,
  33     Optional,
  34     Pattern,
  35     Sequence,
  36     Set,
  37     Sized,
  38     Tuple,
  39     Type,
  40     TypeVar,
  41     Union,
  42     cast,
  43     TYPE_CHECKING,
  44 )
  45 from typing_extensions import Final
  46 from mypy_extensions import mypyc_attr
  47
  48 from appdirs import user_cache_dir
  49 from dataclasses import dataclass, field, replace
  50 import click
  51 import toml
  52 from typed_ast import ast3, ast27
  53 from pathspec import PathSpec
  54
  55 # lib2to3 fork
  56 from blib2to3.pytree import Node, Leaf, type_repr
  57 from blib2to3 import pygram, pytree
  58 from blib2to3.pgen2 import driver, token
  59 from blib2to3.pgen2.grammar import Grammar
  60 from blib2to3.pgen2.parse import ParseError
  61
  62 from _black_version import version as __version__
  63
  64 if TYPE_CHECKING:
  65     import colorama  # noqa: F401
  66
  67 DEFAULT_LINE_LENGTH = 88
  68 DEFAULT_EXCLUDES = r"/(\.direnv|\.eggs|\.git|\.hg|\.mypy_cache|\.nox|\.tox|\.venv|\.svn|_build|buck-out|build|dist)/"  # noqa: B950
  69 DEFAULT_INCLUDES = r"\.pyi?$"
  70 CACHE_DIR = Path(user_cache_dir("black", version=__version__))
  71
  72 STRING_PREFIX_CHARS: Final = "furbFURB"  # All possible string prefix characters.
  73
  74
  75 # types
  76 FileContent = str
  77 Encoding = str
  78 NewLine = str
  79 Depth = int
  80 NodeType = int
  81 ParserState = int
  82 LeafID = int
  83 StringID = int
  84 Priority = int
  85 Index = int
  86 LN = Union[Leaf, Node]
  87 Transformer = Callable[["Line", Collection["Feature"]], Iterator["Line"]]
  88 Timestamp = float
  89 FileSize = int
  90 CacheInfo = Tuple[Timestamp, FileSize]
  91 Cache = Dict[Path, CacheInfo]
  92 out = partial(click.secho, bold=True, err=True)
  93 err = partial(click.secho, fg="red", err=True)
  94
  95 pygram.initialize(CACHE_DIR)
  96 syms = pygram.python_symbols
  97
  98
  99 class NothingChanged(UserWarning):
 100     """Raised when reformatted code is the same as source."""
 101
 102
 103 class CannotTransform(Exception):
 104     """Base class for errors raised by Transformers."""
 105
 106
 107 class CannotSplit(CannotTransform):
 108     """A readable split that fits the allotted line length is impossible."""
 109
 110
 111 class InvalidInput(ValueError):
 112     """Raised when input source code fails all parse attempts."""
 113
 114
 115 class BracketMatchError(KeyError):
 116     """Raised when an opening bracket is unable to be matched to a closing bracket."""
 117
 118
 119 T = TypeVar("T")
 120 E = TypeVar("E", bound=Exception)
 121
 122
 123 class Ok(Generic[T]):
 124     def __init__(self, value: T) -> None:
 125         self._value = value
 126
 127     def ok(self) -> T:
 128         return self._value
 129
 130
 131 class Err(Generic[E]):
 132     def __init__(self, e: E) -> None:
 133         self._e = e
 134
 135     def err(self) -> E:
 136         return self._e
 137
 138
 139 # The 'Result' return type is used to implement an error-handling model heavily
 140 # influenced by that used by the Rust programming language
 141 # (see https://doc.rust-lang.org/book/ch09-00-error-handling.html).
 142 Result = Union[Ok[T], Err[E]]
 143 TResult = Result[T, CannotTransform]  # (T)ransform Result
 144 TMatchResult = TResult[Index]
 145
 146
 147 class WriteBack(Enum):
 148     NO = 0
 149     YES = 1
 150     DIFF = 2
 151     CHECK = 3
 152     COLOR_DIFF = 4
 153
 154     @classmethod
 155     def from_configuration(
 156         cls, *, check: bool, diff: bool, color: bool = False
 157     ) -> "WriteBack":
 158         if check and not diff:
 159             return cls.CHECK
 160
 161         if diff and color:
 162             return cls.COLOR_DIFF
 163
 164         return cls.DIFF if diff else cls.YES
 165
 166
 167 class Changed(Enum):
 168     NO = 0
 169     CACHED = 1
 170     YES = 2
 171
 172
 173 class TargetVersion(Enum):
 174     PY27 = 2
 175     PY33 = 3
 176     PY34 = 4
 177     PY35 = 5
 178     PY36 = 6
 179     PY37 = 7
 180     PY38 = 8
 181     PY39 = 9
 182
 183     def is_python2(self) -> bool:
 184         return self is TargetVersion.PY27
 185
 186
 187 class Feature(Enum):
 188     # All string literals are unicode
 189     UNICODE_LITERALS = 1
 190     F_STRINGS = 2
 191     NUMERIC_UNDERSCORES = 3
 192     TRAILING_COMMA_IN_CALL = 4
 193     TRAILING_COMMA_IN_DEF = 5
 194     # The following two feature-flags are mutually exclusive, and exactly one should be
 195     # set for every version of python.
 196     ASYNC_IDENTIFIERS = 6
 197     ASYNC_KEYWORDS = 7
 198     ASSIGNMENT_EXPRESSIONS = 8
 199     POS_ONLY_ARGUMENTS = 9
 200     RELAXED_DECORATORS = 10
 201     FORCE_OPTIONAL_PARENTHESES = 50
 202
 203
 204 VERSION_TO_FEATURES: Dict[TargetVersion, Set[Feature]] = {
 205     TargetVersion.PY27: {Feature.ASYNC_IDENTIFIERS},
 206     TargetVersion.PY33: {Feature.UNICODE_LITERALS, Feature.ASYNC_IDENTIFIERS},
 207     TargetVersion.PY34: {Feature.UNICODE_LITERALS, Feature.ASYNC_IDENTIFIERS},
 208     TargetVersion.PY35: {
 209         Feature.UNICODE_LITERALS,
 210         Feature.TRAILING_COMMA_IN_CALL,
 211         Feature.ASYNC_IDENTIFIERS,
 212     },
 213     TargetVersion.PY36: {
 214         Feature.UNICODE_LITERALS,
 215         Feature.F_STRINGS,
 216         Feature.NUMERIC_UNDERSCORES,
 217         Feature.TRAILING_COMMA_IN_CALL,
 218         Feature.TRAILING_COMMA_IN_DEF,
 219         Feature.ASYNC_IDENTIFIERS,
 220     },
 221     TargetVersion.PY37: {
 222         Feature.UNICODE_LITERALS,
 223         Feature.F_STRINGS,
 224         Feature.NUMERIC_UNDERSCORES,
 225         Feature.TRAILING_COMMA_IN_CALL,
 226         Feature.TRAILING_COMMA_IN_DEF,
 227         Feature.ASYNC_KEYWORDS,
 228     },
 229     TargetVersion.PY38: {
 230         Feature.UNICODE_LITERALS,
 231         Feature.F_STRINGS,
 232         Feature.NUMERIC_UNDERSCORES,
 233         Feature.TRAILING_COMMA_IN_CALL,
 234         Feature.TRAILING_COMMA_IN_DEF,
 235         Feature.ASYNC_KEYWORDS,
 236         Feature.ASSIGNMENT_EXPRESSIONS,
 237         Feature.POS_ONLY_ARGUMENTS,
 238     },
 239     TargetVersion.PY39: {
 240         Feature.UNICODE_LITERALS,
 241         Feature.F_STRINGS,
 242         Feature.NUMERIC_UNDERSCORES,
 243         Feature.TRAILING_COMMA_IN_CALL,
 244         Feature.TRAILING_COMMA_IN_DEF,
 245         Feature.ASYNC_KEYWORDS,
 246         Feature.ASSIGNMENT_EXPRESSIONS,
 247         Feature.RELAXED_DECORATORS,
 248         Feature.POS_ONLY_ARGUMENTS,
 249     },
 250 }
 251
 252
 253 @dataclass
 254 class Mode:
 255     target_versions: Set[TargetVersion] = field(default_factory=set)
 256     line_length: int = DEFAULT_LINE_LENGTH
 257     string_normalization: bool = True
 258     experimental_string_processing: bool = False
 259     is_pyi: bool = False
 260
 261     def get_cache_key(self) -> str:
 262         if self.target_versions:
 263             version_str = ",".join(
 264                 str(version.value)
 265                 for version in sorted(self.target_versions, key=lambda v: v.value)
 266             )
 267         else:
 268             version_str = "-"
 269         parts = [
 270             version_str,
 271             str(self.line_length),
 272             str(int(self.string_normalization)),
 273             str(int(self.is_pyi)),
 274         ]
 275         return ".".join(parts)
 276
 277
 278 # Legacy name, left for integrations.
 279 FileMode = Mode
 280
 281
 282 def supports_feature(target_versions: Set[TargetVersion], feature: Feature) -> bool:
 283     return all(feature in VERSION_TO_FEATURES[version] for version in target_versions)
 284
 285
 286 def find_pyproject_toml(path_search_start: Iterable[str]) -> Optional[str]:
 287     """Find the absolute filepath to a pyproject.toml if it exists"""
 288     path_project_root = find_project_root(path_search_start)
 289     path_pyproject_toml = path_project_root / "pyproject.toml"
 290     return str(path_pyproject_toml) if path_pyproject_toml.is_file() else None
 291
 292
 293 def parse_pyproject_toml(path_config: str) -> Dict[str, Any]:
 294     """Parse a pyproject toml file, pulling out relevant parts for Black
 295
 296     If parsing fails, will raise a toml.TomlDecodeError
 297     """
 298     pyproject_toml = toml.load(path_config)
 299     config = pyproject_toml.get("tool", {}).get("black", {})
 300     return {k.replace("--", "").replace("-", "_"): v for k, v in config.items()}
 301
 302
 303 def read_pyproject_toml(
 304     ctx: click.Context, param: click.Parameter, value: Optional[str]
 305 ) -> Optional[str]:
 306     """Inject Black configuration from "pyproject.toml" into defaults in `ctx`.
 307
 308     Returns the path to a successfully found and read configuration file, None
 309     otherwise.
 310     """
 311     if not value:
 312         value = find_pyproject_toml(ctx.params.get("src", ()))
 313         if value is None:
 314             return None
 315
 316     try:
 317         config = parse_pyproject_toml(value)
 318     except (toml.TomlDecodeError, OSError) as e:
 319         raise click.FileError(
 320             filename=value, hint=f"Error reading configuration file: {e}"
 321         )
 322
 323     if not config:
 324         return None
 325     else:
 326         # Sanitize the values to be Click friendly. For more information please see:
 327         # https://github.com/psf/black/issues/1458
 328         # https://github.com/pallets/click/issues/1567
 329         config = {
 330             k: str(v) if not isinstance(v, (list, dict)) else v
 331             for k, v in config.items()
 332         }
 333
 334     target_version = config.get("target_version")
 335     if target_version is not None and not isinstance(target_version, list):
 336         raise click.BadOptionUsage(
 337             "target-version", "Config key target-version must be a list"
 338         )
 339
 340     default_map: Dict[str, Any] = {}
 341     if ctx.default_map:
 342         default_map.update(ctx.default_map)
 343     default_map.update(config)
 344
 345     ctx.default_map = default_map
 346     return value
 347
 348
 349 def target_version_option_callback(
 350     c: click.Context, p: Union[click.Option, click.Parameter], v: Tuple[str, ...]
 351 ) -> List[TargetVersion]:
 352     """Compute the target versions from a --target-version flag.
 353
 354     This is its own function because mypy couldn't infer the type correctly
 355     when it was a lambda, causing mypyc trouble.
 356     """
 357     return [TargetVersion[val.upper()] for val in v]
 358
 359
 360 @click.command(context_settings=dict(help_option_names=["-h", "--help"]))
 361 @click.option("-c", "--code", type=str, help="Format the code passed in as a string.")
 362 @click.option(
 363     "-l",
 364     "--line-length",
 365     type=int,
 366     default=DEFAULT_LINE_LENGTH,
 367     help="How many characters per line to allow.",
 368     show_default=True,
 369 )
 370 @click.option(
 371     "-t",
 372     "--target-version",
 373     type=click.Choice([v.name.lower() for v in TargetVersion]),
 374     callback=target_version_option_callback,
 375     multiple=True,
 376     help=(
 377         "Python versions that should be supported by Black's output. [default: per-file"
 378         " auto-detection]"
 379     ),
 380 )
 381 @click.option(
 382     "--pyi",
 383     is_flag=True,
 384     help=(
 385         "Format all input files like typing stubs regardless of file extension (useful"
 386         " when piping source on standard input)."
 387     ),
 388 )
 389 @click.option(
 390     "-S",
 391     "--skip-string-normalization",
 392     is_flag=True,
 393     help="Don't normalize string quotes or prefixes.",
 394 )
 395 @click.option(
 396     "--experimental-string-processing",
 397     is_flag=True,
 398     hidden=True,
 399     help=(
 400         "Experimental option that performs more normalization on string literals."
 401         " Currently disabled because it leads to some crashes."
 402     ),
 403 )
 404 @click.option(
 405     "--check",
 406     is_flag=True,
 407     help=(
 408         "Don't write the files back, just return the status.  Return code 0 means"
 409         " nothing would change.  Return code 1 means some files would be reformatted."
 410         " Return code 123 means there was an internal error."
 411     ),
 412 )
 413 @click.option(
 414     "--diff",
 415     is_flag=True,
 416     help="Don't write the files back, just output a diff for each file on stdout.",
 417 )
 418 @click.option(
 419     "--color/--no-color",
 420     is_flag=True,
 421     help="Show colored diff. Only applies when `--diff` is given.",
 422 )
 423 @click.option(
 424     "--fast/--safe",
 425     is_flag=True,
 426     help="If --fast given, skip temporary sanity checks. [default: --safe]",
 427 )
 428 @click.option(
 429     "--include",
 430     type=str,
 431     default=DEFAULT_INCLUDES,
 432     help=(
 433         "A regular expression that matches files and directories that should be"
 434         " included on recursive searches.  An empty value means all files are included"
 435         " regardless of the name.  Use forward slashes for directories on all platforms"
 436         " (Windows, too).  Exclusions are calculated first, inclusions later."
 437     ),
 438     show_default=True,
 439 )
 440 @click.option(
 441     "--exclude",
 442     type=str,
 443     default=DEFAULT_EXCLUDES,
 444     help=(
 445         "A regular expression that matches files and directories that should be"
 446         " excluded on recursive searches.  An empty value means no paths are excluded."
 447         " Use forward slashes for directories on all platforms (Windows, too). "
 448         " Exclusions are calculated first, inclusions later."
 449     ),
 450     show_default=True,
 451 )
 452 @click.option(
 453     "--force-exclude",
 454     type=str,
 455     help=(
 456         "Like --exclude, but files and directories matching this regex will be "
 457         "excluded even when they are passed explicitly as arguments."
 458     ),
 459 )
 460 @click.option(
 461     "-q",
 462     "--quiet",
 463     is_flag=True,
 464     help=(
 465         "Don't emit non-error messages to stderr. Errors are still emitted; silence"
 466         " those with 2>/dev/null."
 467     ),
 468 )
 469 @click.option(
 470     "-v",
 471     "--verbose",
 472     is_flag=True,
 473     help=(
 474         "Also emit messages to stderr about files that were not changed or were ignored"
 475         " due to --exclude=."
 476     ),
 477 )
 478 @click.version_option(version=__version__)
 479 @click.argument(
 480     "src",
 481     nargs=-1,
 482     type=click.Path(
 483         exists=True, file_okay=True, dir_okay=True, readable=True, allow_dash=True
 484     ),
 485     is_eager=True,
 486 )
 487 @click.option(
 488     "--config",
 489     type=click.Path(
 490         exists=True,
 491         file_okay=True,
 492         dir_okay=False,
 493         readable=True,
 494         allow_dash=False,
 495         path_type=str,
 496     ),
 497     is_eager=True,
 498     callback=read_pyproject_toml,
 499     help="Read configuration from FILE path.",
 500 )
 501 @click.pass_context
 502 def main(
 503     ctx: click.Context,
 504     code: Optional[str],
 505     line_length: int,
 506     target_version: List[TargetVersion],
 507     check: bool,
 508     diff: bool,
 509     color: bool,
 510     fast: bool,
 511     pyi: bool,
 512     skip_string_normalization: bool,
 513     experimental_string_processing: bool,
 514     quiet: bool,
 515     verbose: bool,
 516     include: str,
 517     exclude: str,
 518     force_exclude: Optional[str],
 519     src: Tuple[str, ...],
 520     config: Optional[str],
 521 ) -> None:
 522     """The uncompromising code formatter."""
 523     write_back = WriteBack.from_configuration(check=check, diff=diff, color=color)
 524     if target_version:
 525         versions = set(target_version)
 526     else:
 527         # We'll autodetect later.
 528         versions = set()
 529     mode = Mode(
 530         target_versions=versions,
 531         line_length=line_length,
 532         is_pyi=pyi,
 533         string_normalization=not skip_string_normalization,
 534         experimental_string_processing=experimental_string_processing,
 535     )
 536     if config and verbose:
 537         out(f"Using configuration from {config}.", bold=False, fg="blue")
 538     if code is not None:
 539         print(format_str(code, mode=mode))
 540         ctx.exit(0)
 541     report = Report(check=check, diff=diff, quiet=quiet, verbose=verbose)
 542     sources = get_sources(
 543         ctx=ctx,
 544         src=src,
 545         quiet=quiet,
 546         verbose=verbose,
 547         include=include,
 548         exclude=exclude,
 549         force_exclude=force_exclude,
 550         report=report,
 551     )
 552
 553     path_empty(
 554         sources,
 555         "No Python files are present to be formatted. Nothing to do 😴",
 556         quiet,
 557         verbose,
 558         ctx,
 559     )
 560
 561     if len(sources) == 1:
 562         reformat_one(
 563             src=sources.pop(),
 564             fast=fast,
 565             write_back=write_back,
 566             mode=mode,
 567             report=report,
 568         )
 569     else:
 570         reformat_many(
 571             sources=sources, fast=fast, write_back=write_back, mode=mode, report=report
 572         )
 573
 574     if verbose or not quiet:
 575         out("Oh no! 💥 💔 💥" if report.return_code else "All done! ✨ 🍰 ✨")
 576         click.secho(str(report), err=True)
 577     ctx.exit(report.return_code)
 578
 579
 580 def get_sources(
 581     *,
 582     ctx: click.Context,
 583     src: Tuple[str, ...],
 584     quiet: bool,
 585     verbose: bool,
 586     include: str,
 587     exclude: str,
 588     force_exclude: Optional[str],
 589     report: "Report",
 590 ) -> Set[Path]:
 591     """Compute the set of files to be formatted."""
 592     try:
 593         include_regex = re_compile_maybe_verbose(include)
 594     except re.error:
 595         err(f"Invalid regular expression for include given: {include!r}")
 596         ctx.exit(2)
 597     try:
 598         exclude_regex = re_compile_maybe_verbose(exclude)
 599     except re.error:
 600         err(f"Invalid regular expression for exclude given: {exclude!r}")
 601         ctx.exit(2)
 602     try:
 603         force_exclude_regex = (
 604             re_compile_maybe_verbose(force_exclude) if force_exclude else None
 605         )
 606     except re.error:
 607         err(f"Invalid regular expression for force_exclude given: {force_exclude!r}")
 608         ctx.exit(2)
 609
 610     root = find_project_root(src)
 611     sources: Set[Path] = set()
 612     path_empty(src, "No Path provided. Nothing to do 😴", quiet, verbose, ctx)
 613     gitignore = get_gitignore(root)
 614
 615     for s in src:
 616         p = Path(s)
 617         if p.is_dir():
 618             sources.update(
 619                 gen_python_files(
 620                     p.iterdir(),
 621                     root,
 622                     include_regex,
 623                     exclude_regex,
 624                     force_exclude_regex,
 625                     report,
 626                     gitignore,
 627                 )
 628             )
 629         elif s == "-":
 630             sources.add(p)
 631         elif p.is_file():
 632             normalized_path = normalize_path_maybe_ignore(p, root, report)
 633             if normalized_path is None:
 634                 continue
 635
 636             normalized_path = "/" + normalized_path
 637             # Hard-exclude any files that matches the `--force-exclude` regex.
 638             if force_exclude_regex:
 639                 force_exclude_match = force_exclude_regex.search(normalized_path)
 640             else:
 641                 force_exclude_match = None
 642             if force_exclude_match and force_exclude_match.group(0):
 643                 report.path_ignored(p, "matches the --force-exclude regular expression")
 644                 continue
 645
 646             sources.add(p)
 647         else:
 648             err(f"invalid path: {s}")
 649     return sources
 650
 651
 652 def path_empty(
 653     src: Sized, msg: str, quiet: bool, verbose: bool, ctx: click.Context
 654 ) -> None:
 655     """
 656     Exit if there is no `src` provided for formatting
 657     """
 658     if not src and (verbose or not quiet):
 659         out(msg)
 660         ctx.exit(0)
 661
 662
 663 def reformat_one(
 664     src: Path, fast: bool, write_back: WriteBack, mode: Mode, report: "Report"
 665 ) -> None:
 666     """Reformat a single file under `src` without spawning child processes.
 667
 668     `fast`, `write_back`, and `mode` options are passed to
 669     :func:`format_file_in_place` or :func:`format_stdin_to_stdout`.
 670     """
 671     try:
 672         changed = Changed.NO
 673         if not src.is_file() and str(src) == "-":
 674             if format_stdin_to_stdout(fast=fast, write_back=write_back, mode=mode):
 675                 changed = Changed.YES
 676         else:
 677             cache: Cache = {}
 678             if write_back not in (WriteBack.DIFF, WriteBack.COLOR_DIFF):
 679                 cache = read_cache(mode)
 680                 res_src = src.resolve()
 681                 if res_src in cache and cache[res_src] == get_cache_info(res_src):
 682                     changed = Changed.CACHED
 683             if changed is not Changed.CACHED and format_file_in_place(
 684                 src, fast=fast, write_back=write_back, mode=mode
 685             ):
 686                 changed = Changed.YES
 687             if (write_back is WriteBack.YES and changed is not Changed.CACHED) or (
 688                 write_back is WriteBack.CHECK and changed is Changed.NO
 689             ):
 690                 write_cache(cache, [src], mode)
 691         report.done(src, changed)
 692     except Exception as exc:
 693         if report.verbose:
 694             traceback.print_exc()
 695         report.failed(src, str(exc))
 696
 697
 698 def reformat_many(
 699     sources: Set[Path], fast: bool, write_back: WriteBack, mode: Mode, report: "Report"
 700 ) -> None:
 701     """Reformat multiple files using a ProcessPoolExecutor."""
 702     executor: Executor
 703     loop = asyncio.get_event_loop()
 704     worker_count = os.cpu_count()
 705     if sys.platform == "win32":
 706         # Work around https://bugs.python.org/issue26903
 707         worker_count = min(worker_count, 61)
 708     try:
 709         executor = ProcessPoolExecutor(max_workers=worker_count)
 710     except (ImportError, OSError):
 711         # we arrive here if the underlying system does not support multi-processing
 712         # like in AWS Lambda or Termux, in which case we gracefully fallback to
 713         # a ThreadPollExecutor with just a single worker (more workers would not do us
 714         # any good due to the Global Interpreter Lock)
 715         executor = ThreadPoolExecutor(max_workers=1)
 716
 717     try:
 718         loop.run_until_complete(
 719             schedule_formatting(
 720                 sources=sources,
 721                 fast=fast,
 722                 write_back=write_back,
 723                 mode=mode,
 724                 report=report,
 725                 loop=loop,
 726                 executor=executor,
 727             )
 728         )
 729     finally:
 730         shutdown(loop)
 731         if executor is not None:
 732             executor.shutdown()
 733
 734
 735 async def schedule_formatting(
 736     sources: Set[Path],
 737     fast: bool,
 738     write_back: WriteBack,
 739     mode: Mode,
 740     report: "Report",
 741     loop: asyncio.AbstractEventLoop,
 742     executor: Executor,
 743 ) -> None:
 744     """Run formatting of `sources` in parallel using the provided `executor`.
 745
 746     (Use ProcessPoolExecutors for actual parallelism.)
 747
 748     `write_back`, `fast`, and `mode` options are passed to
 749     :func:`format_file_in_place`.
 750     """
 751     cache: Cache = {}
 752     if write_back not in (WriteBack.DIFF, WriteBack.COLOR_DIFF):
 753         cache = read_cache(mode)
 754         sources, cached = filter_cached(cache, sources)
 755         for src in sorted(cached):
 756             report.done(src, Changed.CACHED)
 757     if not sources:
 758         return
 759
 760     cancelled = []
 761     sources_to_cache = []
 762     lock = None
 763     if write_back in (WriteBack.DIFF, WriteBack.COLOR_DIFF):
 764         # For diff output, we need locks to ensure we don't interleave output
 765         # from different processes.
 766         manager = Manager()
 767         lock = manager.Lock()
 768     tasks = {
 769         asyncio.ensure_future(
 770             loop.run_in_executor(
 771                 executor, format_file_in_place, src, fast, mode, write_back, lock
 772             )
 773         ): src
 774         for src in sorted(sources)
 775     }
 776     pending: Iterable["asyncio.Future[bool]"] = tasks.keys()
 777     try:
 778         loop.add_signal_handler(signal.SIGINT, cancel, pending)
 779         loop.add_signal_handler(signal.SIGTERM, cancel, pending)
 780     except NotImplementedError:
 781         # There are no good alternatives for these on Windows.
 782         pass
 783     while pending:
 784         done, _ = await asyncio.wait(pending, return_when=asyncio.FIRST_COMPLETED)
 785         for task in done:
 786             src = tasks.pop(task)
 787             if task.cancelled():
 788                 cancelled.append(task)
 789             elif task.exception():
 790                 report.failed(src, str(task.exception()))
 791             else:
 792                 changed = Changed.YES if task.result() else Changed.NO
 793                 # If the file was written back or was successfully checked as
 794                 # well-formatted, store this information in the cache.
 795                 if write_back is WriteBack.YES or (
 796                     write_back is WriteBack.CHECK and changed is Changed.NO
 797                 ):
 798                     sources_to_cache.append(src)
 799                 report.done(src, changed)
 800     if cancelled:
 801         await asyncio.gather(*cancelled, loop=loop, return_exceptions=True)
 802     if sources_to_cache:
 803         write_cache(cache, sources_to_cache, mode)
 804
 805
 806 def format_file_in_place(
 807     src: Path,
 808     fast: bool,
 809     mode: Mode,
 810     write_back: WriteBack = WriteBack.NO,
 811     lock: Any = None,  # multiprocessing.Manager().Lock() is some crazy proxy
 812 ) -> bool:
 813     """Format file under `src` path. Return True if changed.
 814
 815     If `write_back` is DIFF, write a diff to stdout. If it is YES, write reformatted
 816     code to the file.
 817     `mode` and `fast` options are passed to :func:`format_file_contents`.
 818     """
 819     if src.suffix == ".pyi":
 820         mode = replace(mode, is_pyi=True)
 821
 822     then = datetime.utcfromtimestamp(src.stat().st_mtime)
 823     with open(src, "rb") as buf:
 824         src_contents, encoding, newline = decode_bytes(buf.read())
 825     try:
 826         dst_contents = format_file_contents(src_contents, fast=fast, mode=mode)
 827     except NothingChanged:
 828         return False
 829
 830     if write_back == WriteBack.YES:
 831         with open(src, "w", encoding=encoding, newline=newline) as f:
 832             f.write(dst_contents)
 833     elif write_back in (WriteBack.DIFF, WriteBack.COLOR_DIFF):
 834         now = datetime.utcnow()
 835         src_name = f"{src}\t{then} +0000"
 836         dst_name = f"{src}\t{now} +0000"
 837         diff_contents = diff(src_contents, dst_contents, src_name, dst_name)
 838
 839         if write_back == write_back.COLOR_DIFF:
 840             diff_contents = color_diff(diff_contents)
 841
 842         with lock or nullcontext():
 843             f = io.TextIOWrapper(
 844                 sys.stdout.buffer,
 845                 encoding=encoding,
 846                 newline=newline,
 847                 write_through=True,
 848             )
 849             f = wrap_stream_for_windows(f)
 850             f.write(diff_contents)
 851             f.detach()
 852
 853     return True
 854
 855
 856 def color_diff(contents: str) -> str:
 857     """Inject the ANSI color codes to the diff."""
 858     lines = contents.split("\n")
 859     for i, line in enumerate(lines):
 860         if line.startswith("+++") or line.startswith("---"):
 861             line = "\033[1;37m" + line + "\033[0m"  # bold white, reset
 862         elif line.startswith("@@"):
 863             line = "\033[36m" + line + "\033[0m"  # cyan, reset
 864         elif line.startswith("+"):
 865             line = "\033[32m" + line + "\033[0m"  # green, reset
 866         elif line.startswith("-"):
 867             line = "\033[31m" + line + "\033[0m"  # red, reset
 868         lines[i] = line
 869     return "\n".join(lines)
 870
 871
 872 def wrap_stream_for_windows(
 873     f: io.TextIOWrapper,
 874 ) -> Union[io.TextIOWrapper, "colorama.AnsiToWin32"]:
 875     """
 876     Wrap stream with colorama's wrap_stream so colors are shown on Windows.
 877
 878     If `colorama` is unavailable, the original stream is returned unmodified.
 879     Otherwise, the `wrap_stream()` function determines whether the stream needs
 880     to be wrapped for a Windows environment and will accordingly either return
 881     an `AnsiToWin32` wrapper or the original stream.
 882     """
 883     try:
 884         from colorama.initialise import wrap_stream
 885     except ImportError:
 886         return f
 887     else:
 888         # Set `strip=False` to avoid needing to modify test_express_diff_with_color.
 889         return wrap_stream(f, convert=None, strip=False, autoreset=False, wrap=True)
 890
 891
 892 def format_stdin_to_stdout(
 893     fast: bool, *, write_back: WriteBack = WriteBack.NO, mode: Mode
 894 ) -> bool:
 895     """Format file on stdin. Return True if changed.
 896
 897     If `write_back` is YES, write reformatted code back to stdout. If it is DIFF,
 898     write a diff to stdout. The `mode` argument is passed to
 899     :func:`format_file_contents`.
 900     """
 901     then = datetime.utcnow()
 902     src, encoding, newline = decode_bytes(sys.stdin.buffer.read())
 903     dst = src
 904     try:
 905         dst = format_file_contents(src, fast=fast, mode=mode)
 906         return True
 907
 908     except NothingChanged:
 909         return False
 910
 911     finally:
 912         f = io.TextIOWrapper(
 913             sys.stdout.buffer, encoding=encoding, newline=newline, write_through=True
 914         )
 915         if write_back == WriteBack.YES:
 916             f.write(dst)
 917         elif write_back in (WriteBack.DIFF, WriteBack.COLOR_DIFF):
 918             now = datetime.utcnow()
 919             src_name = f"STDIN\t{then} +0000"
 920             dst_name = f"STDOUT\t{now} +0000"
 921             d = diff(src, dst, src_name, dst_name)
 922             if write_back == WriteBack.COLOR_DIFF:
 923                 d = color_diff(d)
 924                 f = wrap_stream_for_windows(f)
 925             f.write(d)
 926         f.detach()
 927
 928
 929 def format_file_contents(src_contents: str, *, fast: bool, mode: Mode) -> FileContent:
 930     """Reformat contents of a file and return new contents.
 931
 932     If `fast` is False, additionally confirm that the reformatted code is
 933     valid by calling :func:`assert_equivalent` and :func:`assert_stable` on it.
 934     `mode` is passed to :func:`format_str`.
 935     """
 936     if not src_contents.strip():
 937         raise NothingChanged
 938
 939     dst_contents = format_str(src_contents, mode=mode)
 940     if src_contents == dst_contents:
 941         raise NothingChanged
 942
 943     if not fast:
 944         assert_equivalent(src_contents, dst_contents)
 945         assert_stable(src_contents, dst_contents, mode=mode)
 946     return dst_contents
 947
 948
 949 def format_str(src_contents: str, *, mode: Mode) -> FileContent:
 950     """Reformat a string and return new contents.
 951
 952     `mode` determines formatting options, such as how many characters per line are
 953     allowed.  Example:
 954
 955     >>> import black
 956     >>> print(black.format_str("def f(arg:str='')->None:...", mode=Mode()))
 957     def f(arg: str = "") -> None:
 958         ...
 959
 960     A more complex example:
 961
 962     >>> print(
 963     ...   black.format_str(
 964     ...     "def f(arg:str='')->None: hey",
 965     ...     mode=black.Mode(
 966     ...       target_versions={black.TargetVersion.PY36},
 967     ...       line_length=10,
 968     ...       string_normalization=False,
 969     ...       is_pyi=False,
 970     ...     ),
 971     ...   ),
 972     ... )
 973     def f(
 974         arg: str = '',
 975     ) -> None:
 976         hey
 977
 978     """
 979     src_node = lib2to3_parse(src_contents.lstrip(), mode.target_versions)
 980     dst_contents = []
 981     future_imports = get_future_imports(src_node)
 982     if mode.target_versions:
 983         versions = mode.target_versions
 984     else:
 985         versions = detect_target_versions(src_node)
 986     normalize_fmt_off(src_node)
 987     lines = LineGenerator(
 988         remove_u_prefix="unicode_literals" in future_imports
 989         or supports_feature(versions, Feature.UNICODE_LITERALS),
 990         is_pyi=mode.is_pyi,
 991         normalize_strings=mode.string_normalization,
 992     )
 993     elt = EmptyLineTracker(is_pyi=mode.is_pyi)
 994     empty_line = Line()
 995     after = 0
 996     split_line_features = {
 997         feature
 998         for feature in {Feature.TRAILING_COMMA_IN_CALL, Feature.TRAILING_COMMA_IN_DEF}
 999         if supports_feature(versions, feature)
1000     }
1001     for current_line in lines.visit(src_node):
1002         dst_contents.append(str(empty_line) * after)
1003         before, after = elt.maybe_empty_lines(current_line)
1004         dst_contents.append(str(empty_line) * before)
1005         for line in transform_line(
1006             current_line, mode=mode, features=split_line_features
1007         ):
1008             dst_contents.append(str(line))
1009     return "".join(dst_contents)
1010
1011
1012 def decode_bytes(src: bytes) -> Tuple[FileContent, Encoding, NewLine]:
1013     """Return a tuple of (decoded_contents, encoding, newline).
1014
1015     `newline` is either CRLF or LF but `decoded_contents` is decoded with
1016     universal newlines (i.e. only contains LF).
1017     """
1018     srcbuf = io.BytesIO(src)
1019     encoding, lines = tokenize.detect_encoding(srcbuf.readline)
1020     if not lines:
1021         return "", encoding, "\n"
1022
1023     newline = "\r\n" if b"\r\n" == lines[0][-2:] else "\n"
1024     srcbuf.seek(0)
1025     with io.TextIOWrapper(srcbuf, encoding) as tiow:
1026         return tiow.read(), encoding, newline
1027
1028
1029 def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]:
1030     if not target_versions:
1031         # No target_version specified, so try all grammars.
1032         return [
1033             # Python 3.7+
1034             pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords,
1035             # Python 3.0-3.6
1036             pygram.python_grammar_no_print_statement_no_exec_statement,
1037             # Python 2.7 with future print_function import
1038             pygram.python_grammar_no_print_statement,
1039             # Python 2.7
1040             pygram.python_grammar,
1041         ]
1042
1043     if all(version.is_python2() for version in target_versions):
1044         # Python 2-only code, so try Python 2 grammars.
1045         return [
1046             # Python 2.7 with future print_function import
1047             pygram.python_grammar_no_print_statement,
1048             # Python 2.7
1049             pygram.python_grammar,
1050         ]
1051
1052     # Python 3-compatible code, so only try Python 3 grammar.
1053     grammars = []
1054     # If we have to parse both, try to parse async as a keyword first
1055     if not supports_feature(target_versions, Feature.ASYNC_IDENTIFIERS):
1056         # Python 3.7+
1057         grammars.append(
1058             pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords
1059         )
1060     if not supports_feature(target_versions, Feature.ASYNC_KEYWORDS):
1061         # Python 3.0-3.6
1062         grammars.append(pygram.python_grammar_no_print_statement_no_exec_statement)
1063     # At least one of the above branches must have been taken, because every Python
1064     # version has exactly one of the two 'ASYNC_*' flags
1065     return grammars
1066
1067
1068 def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -> Node:
1069     """Given a string with source, return the lib2to3 Node."""
1070     if not src_txt.endswith("\n"):
1071         src_txt += "\n"
1072
1073     for grammar in get_grammars(set(target_versions)):
1074         drv = driver.Driver(grammar, pytree.convert)
1075         try:
1076             result = drv.parse_string(src_txt, True)
1077             break
1078
1079         except ParseError as pe:
1080             lineno, column = pe.context[1]
1081             lines = src_txt.splitlines()
1082             try:
1083                 faulty_line = lines[lineno - 1]
1084             except IndexError:
1085                 faulty_line = "<line number missing in source>"
1086             exc = InvalidInput(f"Cannot parse: {lineno}:{column}: {faulty_line}")
1087     else:
1088         raise exc from None
1089
1090     if isinstance(result, Leaf):
1091         result = Node(syms.file_input, [result])
1092     return result
1093
1094
1095 def lib2to3_unparse(node: Node) -> str:
1096     """Given a lib2to3 node, return its string representation."""
1097     code = str(node)
1098     return code
1099
1100
1101 class Visitor(Generic[T]):
1102     """Basic lib2to3 visitor that yields things of type `T` on `visit()`."""
1103
1104     def visit(self, node: LN) -> Iterator[T]:
1105         """Main method to visit `node` and its children.
1106
1107         It tries to find a `visit_*()` method for the given `node.type`, like
1108         `visit_simple_stmt` for Node objects or `visit_INDENT` for Leaf objects.
1109         If no dedicated `visit_*()` method is found, chooses `visit_default()`
1110         instead.
1111
1112         Then yields objects of type `T` from the selected visitor.
1113         """
1114         if node.type < 256:
1115             name = token.tok_name[node.type]
1116         else:
1117             name = str(type_repr(node.type))
1118         # We explicitly branch on whether a visitor exists (instead of
1119         # using self.visit_default as the default arg to getattr) in order
1120         # to save needing to create a bound method object and so mypyc can
1121         # generate a native call to visit_default.
1122         visitf = getattr(self, f"visit_{name}", None)
1123         if visitf:
1124             yield from visitf(node)
1125         else:
1126             yield from self.visit_default(node)
1127
1128     def visit_default(self, node: LN) -> Iterator[T]:
1129         """Default `visit_*()` implementation. Recurses to children of `node`."""
1130         if isinstance(node, Node):
1131             for child in node.children:
1132                 yield from self.visit(child)
1133
1134
1135 @dataclass
1136 class DebugVisitor(Visitor[T]):
1137     tree_depth: int = 0
1138
1139     def visit_default(self, node: LN) -> Iterator[T]:
1140         indent = " " * (2 * self.tree_depth)
1141         if isinstance(node, Node):
1142             _type = type_repr(node.type)
1143             out(f"{indent}{_type}", fg="yellow")
1144             self.tree_depth += 1
1145             for child in node.children:
1146                 yield from self.visit(child)
1147
1148             self.tree_depth -= 1
1149             out(f"{indent}/{_type}", fg="yellow", bold=False)
1150         else:
1151             _type = token.tok_name.get(node.type, str(node.type))
1152             out(f"{indent}{_type}", fg="blue", nl=False)
1153             if node.prefix:
1154                 # We don't have to handle prefixes for `Node` objects since
1155                 # that delegates to the first child anyway.
1156                 out(f" {node.prefix!r}", fg="green", bold=False, nl=False)
1157             out(f" {node.value!r}", fg="blue", bold=False)
1158
1159     @classmethod
1160     def show(cls, code: Union[str, Leaf, Node]) -> None:
1161         """Pretty-print the lib2to3 AST of a given string of `code`.
1162
1163         Convenience method for debugging.
1164         """
1165         v: DebugVisitor[None] = DebugVisitor()
1166         if isinstance(code, str):
1167             code = lib2to3_parse(code)
1168         list(v.visit(code))
1169
1170
1171 WHITESPACE: Final = {token.DEDENT, token.INDENT, token.NEWLINE}
1172 STATEMENT: Final = {
1173     syms.if_stmt,
1174     syms.while_stmt,
1175     syms.for_stmt,
1176     syms.try_stmt,
1177     syms.except_clause,
1178     syms.with_stmt,
1179     syms.funcdef,
1180     syms.classdef,
1181 }
1182 STANDALONE_COMMENT: Final = 153
1183 token.tok_name[STANDALONE_COMMENT] = "STANDALONE_COMMENT"
1184 LOGIC_OPERATORS: Final = {"and", "or"}
1185 COMPARATORS: Final = {
1186     token.LESS,
1187     token.GREATER,
1188     token.EQEQUAL,
1189     token.NOTEQUAL,
1190     token.LESSEQUAL,
1191     token.GREATEREQUAL,
1192 }
1193 MATH_OPERATORS: Final = {
1194     token.VBAR,
1195     token.CIRCUMFLEX,
1196     token.AMPER,
1197     token.LEFTSHIFT,
1198     token.RIGHTSHIFT,
1199     token.PLUS,
1200     token.MINUS,
1201     token.STAR,
1202     token.SLASH,
1203     token.DOUBLESLASH,
1204     token.PERCENT,
1205     token.AT,
1206     token.TILDE,
1207     token.DOUBLESTAR,
1208 }
1209 STARS: Final = {token.STAR, token.DOUBLESTAR}
1210 VARARGS_SPECIALS: Final = STARS | {token.SLASH}
1211 VARARGS_PARENTS: Final = {
1212     syms.arglist,
1213     syms.argument,  # double star in arglist
1214     syms.trailer,  # single argument to call
1215     syms.typedargslist,
1216     syms.varargslist,  # lambdas
1217 }
1218 UNPACKING_PARENTS: Final = {
1219     syms.atom,  # single element of a list or set literal
1220     syms.dictsetmaker,
1221     syms.listmaker,
1222     syms.testlist_gexp,
1223     syms.testlist_star_expr,
1224 }
1225 TEST_DESCENDANTS: Final = {
1226     syms.test,
1227     syms.lambdef,
1228     syms.or_test,
1229     syms.and_test,
1230     syms.not_test,
1231     syms.comparison,
1232     syms.star_expr,
1233     syms.expr,
1234     syms.xor_expr,
1235     syms.and_expr,
1236     syms.shift_expr,
1237     syms.arith_expr,
1238     syms.trailer,
1239     syms.term,
1240     syms.power,
1241 }
1242 ASSIGNMENTS: Final = {
1243     "=",
1244     "+=",
1245     "-=",
1246     "*=",
1247     "@=",
1248     "/=",
1249     "%=",
1250     "&=",
1251     "|=",
1252     "^=",
1253     "<<=",
1254     ">>=",
1255     "**=",
1256     "//=",
1257 }
1258 COMPREHENSION_PRIORITY: Final = 20
1259 COMMA_PRIORITY: Final = 18
1260 TERNARY_PRIORITY: Final = 16
1261 LOGIC_PRIORITY: Final = 14
1262 STRING_PRIORITY: Final = 12
1263 COMPARATOR_PRIORITY: Final = 10
1264 MATH_PRIORITIES: Final = {
1265     token.VBAR: 9,
1266     token.CIRCUMFLEX: 8,
1267     token.AMPER: 7,
1268     token.LEFTSHIFT: 6,
1269     token.RIGHTSHIFT: 6,
1270     token.PLUS: 5,
1271     token.MINUS: 5,
1272     token.STAR: 4,
1273     token.SLASH: 4,
1274     token.DOUBLESLASH: 4,
1275     token.PERCENT: 4,
1276     token.AT: 4,
1277     token.TILDE: 3,
1278     token.DOUBLESTAR: 2,
1279 }
1280 DOT_PRIORITY: Final = 1
1281
1282
1283 @dataclass
1284 class BracketTracker:
1285     """Keeps track of brackets on a line."""
1286
1287     depth: int = 0
1288     bracket_match: Dict[Tuple[Depth, NodeType], Leaf] = field(default_factory=dict)
1289     delimiters: Dict[LeafID, Priority] = field(default_factory=dict)
1290     previous: Optional[Leaf] = None
1291     _for_loop_depths: List[int] = field(default_factory=list)
1292     _lambda_argument_depths: List[int] = field(default_factory=list)
1293     invisible: List[Leaf] = field(default_factory=list)
1294
1295     def mark(self, leaf: Leaf) -> None:
1296         """Mark `leaf` with bracket-related metadata. Keep track of delimiters.
1297
1298         All leaves receive an int `bracket_depth` field that stores how deep
1299         within brackets a given leaf is. 0 means there are no enclosing brackets
1300         that started on this line.
1301
1302         If a leaf is itself a closing bracket, it receives an `opening_bracket`
1303         field that it forms a pair with. This is a one-directional link to
1304         avoid reference cycles.
1305
1306         If a leaf is a delimiter (a token on which Black can split the line if
1307         needed) and it's on depth 0, its `id()` is stored in the tracker's
1308         `delimiters` field.
1309         """
1310         if leaf.type == token.COMMENT:
1311             return
1312
1313         self.maybe_decrement_after_for_loop_variable(leaf)
1314         self.maybe_decrement_after_lambda_arguments(leaf)
1315         if leaf.type in CLOSING_BRACKETS:
1316             self.depth -= 1
1317             try:
1318                 opening_bracket = self.bracket_match.pop((self.depth, leaf.type))
1319             except KeyError as e:
1320                 raise BracketMatchError(
1321                     "Unable to match a closing bracket to the following opening"
1322                     f" bracket: {leaf}"
1323                 ) from e
1324             leaf.opening_bracket = opening_bracket
1325             if not leaf.value:
1326                 self.invisible.append(leaf)
1327         leaf.bracket_depth = self.depth
1328         if self.depth == 0:
1329             delim = is_split_before_delimiter(leaf, self.previous)
1330             if delim and self.previous is not None:
1331                 self.delimiters[id(self.previous)] = delim
1332             else:
1333                 delim = is_split_after_delimiter(leaf, self.previous)
1334                 if delim:
1335                     self.delimiters[id(leaf)] = delim
1336         if leaf.type in OPENING_BRACKETS:
1337             self.bracket_match[self.depth, BRACKET[leaf.type]] = leaf
1338             self.depth += 1
1339             if not leaf.value:
1340                 self.invisible.append(leaf)
1341         self.previous = leaf
1342         self.maybe_increment_lambda_arguments(leaf)
1343         self.maybe_increment_for_loop_variable(leaf)
1344
1345     def any_open_brackets(self) -> bool:
1346         """Return True if there is an yet unmatched open bracket on the line."""
1347         return bool(self.bracket_match)
1348
1349     def max_delimiter_priority(self, exclude: Iterable[LeafID] = ()) -> Priority:
1350         """Return the highest priority of a delimiter found on the line.
1351
1352         Values are consistent with what `is_split_*_delimiter()` return.
1353         Raises ValueError on no delimiters.
1354         """
1355         return max(v for k, v in self.delimiters.items() if k not in exclude)
1356
1357     def delimiter_count_with_priority(self, priority: Priority = 0) -> int:
1358         """Return the number of delimiters with the given `priority`.
1359
1360         If no `priority` is passed, defaults to max priority on the line.
1361         """
1362         if not self.delimiters:
1363             return 0
1364
1365         priority = priority or self.max_delimiter_priority()
1366         return sum(1 for p in self.delimiters.values() if p == priority)
1367
1368     def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool:
1369         """In a for loop, or comprehension, the variables are often unpacks.
1370
1371         To avoid splitting on the comma in this situation, increase the depth of
1372         tokens between `for` and `in`.
1373         """
1374         if leaf.type == token.NAME and leaf.value == "for":
1375             self.depth += 1
1376             self._for_loop_depths.append(self.depth)
1377             return True
1378
1379         return False
1380
1381     def maybe_decrement_after_for_loop_variable(self, leaf: Leaf) -> bool:
1382         """See `maybe_increment_for_loop_variable` above for explanation."""
1383         if (
1384             self._for_loop_depths
1385             and self._for_loop_depths[-1] == self.depth
1386             and leaf.type == token.NAME
1387             and leaf.value == "in"
1388         ):
1389             self.depth -= 1
1390             self._for_loop_depths.pop()
1391             return True
1392
1393         return False
1394
1395     def maybe_increment_lambda_arguments(self, leaf: Leaf) -> bool:
1396         """In a lambda expression, there might be more than one argument.
1397
1398         To avoid splitting on the comma in this situation, increase the depth of
1399         tokens between `lambda` and `:`.
1400         """
1401         if leaf.type == token.NAME and leaf.value == "lambda":
1402             self.depth += 1
1403             self._lambda_argument_depths.append(self.depth)
1404             return True
1405
1406         return False
1407
1408     def maybe_decrement_after_lambda_arguments(self, leaf: Leaf) -> bool:
1409         """See `maybe_increment_lambda_arguments` above for explanation."""
1410         if (
1411             self._lambda_argument_depths
1412             and self._lambda_argument_depths[-1] == self.depth
1413             and leaf.type == token.COLON
1414         ):
1415             self.depth -= 1
1416             self._lambda_argument_depths.pop()
1417             return True
1418
1419         return False
1420
1421     def get_open_lsqb(self) -> Optional[Leaf]:
1422         """Return the most recent opening square bracket (if any)."""
1423         return self.bracket_match.get((self.depth - 1, token.RSQB))
1424
1425
1426 @dataclass
1427 class Line:
1428     """Holds leaves and comments. Can be printed with `str(line)`."""
1429
1430     depth: int = 0
1431     leaves: List[Leaf] = field(default_factory=list)
1432     # keys ordered like `leaves`
1433     comments: Dict[LeafID, List[Leaf]] = field(default_factory=dict)
1434     bracket_tracker: BracketTracker = field(default_factory=BracketTracker)
1435     inside_brackets: bool = False
1436     should_explode: bool = False
1437
1438     def append(self, leaf: Leaf, preformatted: bool = False) -> None:
1439         """Add a new `leaf` to the end of the line.
1440
1441         Unless `preformatted` is True, the `leaf` will receive a new consistent
1442         whitespace prefix and metadata applied by :class:`BracketTracker`.
1443         Trailing commas are maybe removed, unpacked for loop variables are
1444         demoted from being delimiters.
1445
1446         Inline comments are put aside.
1447         """
1448         has_value = leaf.type in BRACKETS or bool(leaf.value.strip())
1449         if not has_value:
1450             return
1451
1452         if token.COLON == leaf.type and self.is_class_paren_empty:
1453             del self.leaves[-2:]
1454         if self.leaves and not preformatted:
1455             # Note: at this point leaf.prefix should be empty except for
1456             # imports, for which we only preserve newlines.
1457             leaf.prefix += whitespace(
1458                 leaf, complex_subscript=self.is_complex_subscript(leaf)
1459             )
1460         if self.inside_brackets or not preformatted:
1461             self.bracket_tracker.mark(leaf)
1462             if self.maybe_should_explode(leaf):
1463                 self.should_explode = True
1464         if not self.append_comment(leaf):
1465             self.leaves.append(leaf)
1466
1467     def append_safe(self, leaf: Leaf, preformatted: bool = False) -> None:
1468         """Like :func:`append()` but disallow invalid standalone comment structure.
1469
1470         Raises ValueError when any `leaf` is appended after a standalone comment
1471         or when a standalone comment is not the first leaf on the line.
1472         """
1473         if self.bracket_tracker.depth == 0:
1474             if self.is_comment:
1475                 raise ValueError("cannot append to standalone comments")
1476
1477             if self.leaves and leaf.type == STANDALONE_COMMENT:
1478                 raise ValueError(
1479                     "cannot append standalone comments to a populated line"
1480                 )
1481
1482         self.append(leaf, preformatted=preformatted)
1483
1484     @property
1485     def is_comment(self) -> bool:
1486         """Is this line a standalone comment?"""
1487         return len(self.leaves) == 1 and self.leaves[0].type == STANDALONE_COMMENT
1488
1489     @property
1490     def is_decorator(self) -> bool:
1491         """Is this line a decorator?"""
1492         return bool(self) and self.leaves[0].type == token.AT
1493
1494     @property
1495     def is_import(self) -> bool:
1496         """Is this an import line?"""
1497         return bool(self) and is_import(self.leaves[0])
1498
1499     @property
1500     def is_class(self) -> bool:
1501         """Is this line a class definition?"""
1502         return (
1503             bool(self)
1504             and self.leaves[0].type == token.NAME
1505             and self.leaves[0].value == "class"
1506         )
1507
1508     @property
1509     def is_stub_class(self) -> bool:
1510         """Is this line a class definition with a body consisting only of "..."?"""
1511         return self.is_class and self.leaves[-3:] == [
1512             Leaf(token.DOT, ".") for _ in range(3)
1513         ]
1514
1515     @property
1516     def is_def(self) -> bool:
1517         """Is this a function definition? (Also returns True for async defs.)"""
1518         try:
1519             first_leaf = self.leaves[0]
1520         except IndexError:
1521             return False
1522
1523         try:
1524             second_leaf: Optional[Leaf] = self.leaves[1]
1525         except IndexError:
1526             second_leaf = None
1527         return (first_leaf.type == token.NAME and first_leaf.value == "def") or (
1528             first_leaf.type == token.ASYNC
1529             and second_leaf is not None
1530             and second_leaf.type == token.NAME
1531             and second_leaf.value == "def"
1532         )
1533
1534     @property
1535     def is_class_paren_empty(self) -> bool:
1536         """Is this a class with no base classes but using parentheses?
1537
1538         Those are unnecessary and should be removed.
1539         """
1540         return (
1541             bool(self)
1542             and len(self.leaves) == 4
1543             and self.is_class
1544             and self.leaves[2].type == token.LPAR
1545             and self.leaves[2].value == "("
1546             and self.leaves[3].type == token.RPAR
1547             and self.leaves[3].value == ")"
1548         )
1549
1550     @property
1551     def is_triple_quoted_string(self) -> bool:
1552         """Is the line a triple quoted string?"""
1553         return (
1554             bool(self)
1555             and self.leaves[0].type == token.STRING
1556             and self.leaves[0].value.startswith(('"""', "'''"))
1557         )
1558
1559     def contains_standalone_comments(self, depth_limit: int = sys.maxsize) -> bool:
1560         """If so, needs to be split before emitting."""
1561         for leaf in self.leaves:
1562             if leaf.type == STANDALONE_COMMENT and leaf.bracket_depth <= depth_limit:
1563                 return True
1564
1565         return False
1566
1567     def contains_uncollapsable_type_comments(self) -> bool:
1568         ignored_ids = set()
1569         try:
1570             last_leaf = self.leaves[-1]
1571             ignored_ids.add(id(last_leaf))
1572             if last_leaf.type == token.COMMA or (
1573                 last_leaf.type == token.RPAR and not last_leaf.value
1574             ):
1575                 # When trailing commas or optional parens are inserted by Black for
1576                 # consistency, comments after the previous last element are not moved
1577                 # (they don't have to, rendering will still be correct).  So we ignore
1578                 # trailing commas and invisible.
1579                 last_leaf = self.leaves[-2]
1580                 ignored_ids.add(id(last_leaf))
1581         except IndexError:
1582             return False
1583
1584         # A type comment is uncollapsable if it is attached to a leaf
1585         # that isn't at the end of the line (since that could cause it
1586         # to get associated to a different argument) or if there are
1587         # comments before it (since that could cause it to get hidden
1588         # behind a comment.
1589         comment_seen = False
1590         for leaf_id, comments in self.comments.items():
1591             for comment in comments:
1592                 if is_type_comment(comment):
1593                     if comment_seen or (
1594                         not is_type_comment(comment, " ignore")
1595                         and leaf_id not in ignored_ids
1596                     ):
1597                         return True
1598
1599                 comment_seen = True
1600
1601         return False
1602
1603     def contains_unsplittable_type_ignore(self) -> bool:
1604         if not self.leaves:
1605             return False
1606
1607         # If a 'type: ignore' is attached to the end of a line, we
1608         # can't split the line, because we can't know which of the
1609         # subexpressions the ignore was meant to apply to.
1610         #
1611         # We only want this to apply to actual physical lines from the
1612         # original source, though: we don't want the presence of a
1613         # 'type: ignore' at the end of a multiline expression to
1614         # justify pushing it all onto one line. Thus we
1615         # (unfortunately) need to check the actual source lines and
1616         # only report an unsplittable 'type: ignore' if this line was
1617         # one line in the original code.
1618
1619         # Grab the first and last line numbers, skipping generated leaves
1620         first_line = next((leaf.lineno for leaf in self.leaves if leaf.lineno != 0), 0)
1621         last_line = next(
1622             (leaf.lineno for leaf in reversed(self.leaves) if leaf.lineno != 0), 0
1623         )
1624
1625         if first_line == last_line:
1626             # We look at the last two leaves since a comma or an
1627             # invisible paren could have been added at the end of the
1628             # line.
1629             for node in self.leaves[-2:]:
1630                 for comment in self.comments.get(id(node), []):
1631                     if is_type_comment(comment, " ignore"):
1632                         return True
1633
1634         return False
1635
1636     def contains_multiline_strings(self) -> bool:
1637         return any(is_multiline_string(leaf) for leaf in self.leaves)
1638
1639     def maybe_should_explode(self, closing: Leaf) -> bool:
1640         """Return True if this line should explode (always be split), that is when:
1641         - there's a trailing comma here; and
1642         - it's not a one-tuple.
1643         """
1644         if not (
1645             closing.type in CLOSING_BRACKETS
1646             and self.leaves
1647             and self.leaves[-1].type == token.COMMA
1648         ):
1649             return False
1650
1651         if closing.type in {token.RBRACE, token.RSQB}:
1652             return True
1653
1654         if self.is_import:
1655             return True
1656
1657         if not is_one_tuple_between(closing.opening_bracket, closing, self.leaves):
1658             return True
1659
1660         return False
1661
1662     def append_comment(self, comment: Leaf) -> bool:
1663         """Add an inline or standalone comment to the line."""
1664         if (
1665             comment.type == STANDALONE_COMMENT
1666             and self.bracket_tracker.any_open_brackets()
1667         ):
1668             comment.prefix = ""
1669             return False
1670
1671         if comment.type != token.COMMENT:
1672             return False
1673
1674         if not self.leaves:
1675             comment.type = STANDALONE_COMMENT
1676             comment.prefix = ""
1677             return False
1678
1679         last_leaf = self.leaves[-1]
1680         if (
1681             last_leaf.type == token.RPAR
1682             and not last_leaf.value
1683             and last_leaf.parent
1684             and len(list(last_leaf.parent.leaves())) <= 3
1685             and not is_type_comment(comment)
1686         ):
1687             # Comments on an optional parens wrapping a single leaf should belong to
1688             # the wrapped node except if it's a type comment. Pinning the comment like
1689             # this avoids unstable formatting caused by comment migration.
1690             if len(self.leaves) < 2:
1691                 comment.type = STANDALONE_COMMENT
1692                 comment.prefix = ""
1693                 return False
1694
1695             last_leaf = self.leaves[-2]
1696         self.comments.setdefault(id(last_leaf), []).append(comment)
1697         return True
1698
1699     def comments_after(self, leaf: Leaf) -> List[Leaf]:
1700         """Generate comments that should appear directly after `leaf`."""
1701         return self.comments.get(id(leaf), [])
1702
1703     def remove_trailing_comma(self) -> None:
1704         """Remove the trailing comma and moves the comments attached to it."""
1705         trailing_comma = self.leaves.pop()
1706         trailing_comma_comments = self.comments.pop(id(trailing_comma), [])
1707         self.comments.setdefault(id(self.leaves[-1]), []).extend(
1708             trailing_comma_comments
1709         )
1710
1711     def is_complex_subscript(self, leaf: Leaf) -> bool:
1712         """Return True iff `leaf` is part of a slice with non-trivial exprs."""
1713         open_lsqb = self.bracket_tracker.get_open_lsqb()
1714         if open_lsqb is None:
1715             return False
1716
1717         subscript_start = open_lsqb.next_sibling
1718
1719         if isinstance(subscript_start, Node):
1720             if subscript_start.type == syms.listmaker:
1721                 return False
1722
1723             if subscript_start.type == syms.subscriptlist:
1724                 subscript_start = child_towards(subscript_start, leaf)
1725         return subscript_start is not None and any(
1726             n.type in TEST_DESCENDANTS for n in subscript_start.pre_order()
1727         )
1728
1729     def clone(self) -> "Line":
1730         return Line(
1731             depth=self.depth,
1732             inside_brackets=self.inside_brackets,
1733             should_explode=self.should_explode,
1734         )
1735
1736     def __str__(self) -> str:
1737         """Render the line."""
1738         if not self:
1739             return "\n"
1740
1741         indent = "    " * self.depth
1742         leaves = iter(self.leaves)
1743         first = next(leaves)
1744         res = f"{first.prefix}{indent}{first.value}"
1745         for leaf in leaves:
1746             res += str(leaf)
1747         for comment in itertools.chain.from_iterable(self.comments.values()):
1748             res += str(comment)
1749
1750         return res + "\n"
1751
1752     def __bool__(self) -> bool:
1753         """Return True if the line has leaves or comments."""
1754         return bool(self.leaves or self.comments)
1755
1756
1757 @dataclass
1758 class EmptyLineTracker:
1759     """Provides a stateful method that returns the number of potential extra
1760     empty lines needed before and after the currently processed line.
1761
1762     Note: this tracker works on lines that haven't been split yet.  It assumes
1763     the prefix of the first leaf consists of optional newlines.  Those newlines
1764     are consumed by `maybe_empty_lines()` and included in the computation.
1765     """
1766
1767     is_pyi: bool = False
1768     previous_line: Optional[Line] = None
1769     previous_after: int = 0
1770     previous_defs: List[int] = field(default_factory=list)
1771
1772     def maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
1773         """Return the number of extra empty lines before and after the `current_line`.
1774
1775         This is for separating `def`, `async def` and `class` with extra empty
1776         lines (two on module-level).
1777         """
1778         before, after = self._maybe_empty_lines(current_line)
1779         before = (
1780             # Black should not insert empty lines at the beginning
1781             # of the file
1782             0
1783             if self.previous_line is None
1784             else before - self.previous_after
1785         )
1786         self.previous_after = after
1787         self.previous_line = current_line
1788         return before, after
1789
1790     def _maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
1791         max_allowed = 1
1792         if current_line.depth == 0:
1793             max_allowed = 1 if self.is_pyi else 2
1794         if current_line.leaves:
1795             # Consume the first leaf's extra newlines.
1796             first_leaf = current_line.leaves[0]
1797             before = first_leaf.prefix.count("\n")
1798             before = min(before, max_allowed)
1799             first_leaf.prefix = ""
1800         else:
1801             before = 0
1802         depth = current_line.depth
1803         while self.previous_defs and self.previous_defs[-1] >= depth:
1804             self.previous_defs.pop()
1805             if self.is_pyi:
1806                 before = 0 if depth else 1
1807             else:
1808                 before = 1 if depth else 2
1809         if current_line.is_decorator or current_line.is_def or current_line.is_class:
1810             return self._maybe_empty_lines_for_class_or_def(current_line, before)
1811
1812         if (
1813             self.previous_line
1814             and self.previous_line.is_import
1815             and not current_line.is_import
1816             and depth == self.previous_line.depth
1817         ):
1818             return (before or 1), 0
1819
1820         if (
1821             self.previous_line
1822             and self.previous_line.is_class
1823             and current_line.is_triple_quoted_string
1824         ):
1825             return before, 1
1826
1827         return before, 0
1828
1829     def _maybe_empty_lines_for_class_or_def(
1830         self, current_line: Line, before: int
1831     ) -> Tuple[int, int]:
1832         if not current_line.is_decorator:
1833             self.previous_defs.append(current_line.depth)
1834         if self.previous_line is None:
1835             # Don't insert empty lines before the first line in the file.
1836             return 0, 0
1837
1838         if self.previous_line.is_decorator:
1839             if self.is_pyi and current_line.is_stub_class:
1840                 # Insert an empty line after a decorated stub class
1841                 return 0, 1
1842
1843             return 0, 0
1844
1845         if self.previous_line.depth < current_line.depth and (
1846             self.previous_line.is_class or self.previous_line.is_def
1847         ):
1848             return 0, 0
1849
1850         if (
1851             self.previous_line.is_comment
1852             and self.previous_line.depth == current_line.depth
1853             and before == 0
1854         ):
1855             return 0, 0
1856
1857         if self.is_pyi:
1858             if self.previous_line.depth > current_line.depth:
1859                 newlines = 1
1860             elif current_line.is_class or self.previous_line.is_class:
1861                 if current_line.is_stub_class and self.previous_line.is_stub_class:
1862                     # No blank line between classes with an empty body
1863                     newlines = 0
1864                 else:
1865                     newlines = 1
1866             elif (
1867                 current_line.is_def or current_line.is_decorator
1868             ) and not self.previous_line.is_def:
1869                 # Blank line between a block of functions (maybe with preceding
1870                 # decorators) and a block of non-functions
1871                 newlines = 1
1872             else:
1873                 newlines = 0
1874         else:
1875             newlines = 2
1876         if current_line.depth and newlines:
1877             newlines -= 1
1878         return newlines, 0
1879
1880
1881 @dataclass
1882 class LineGenerator(Visitor[Line]):
1883     """Generates reformatted Line objects.  Empty lines are not emitted.
1884
1885     Note: destroys the tree it's visiting by mutating prefixes of its leaves
1886     in ways that will no longer stringify to valid Python code on the tree.
1887     """
1888
1889     is_pyi: bool = False
1890     normalize_strings: bool = True
1891     current_line: Line = field(default_factory=Line)
1892     remove_u_prefix: bool = False
1893
1894     def line(self, indent: int = 0) -> Iterator[Line]:
1895         """Generate a line.
1896
1897         If the line is empty, only emit if it makes sense.
1898         If the line is too long, split it first and then generate.
1899
1900         If any lines were generated, set up a new current_line.
1901         """
1902         if not self.current_line:
1903             self.current_line.depth += indent
1904             return  # Line is empty, don't emit. Creating a new one unnecessary.
1905
1906         complete_line = self.current_line
1907         self.current_line = Line(depth=complete_line.depth + indent)
1908         yield complete_line
1909
1910     def visit_default(self, node: LN) -> Iterator[Line]:
1911         """Default `visit_*()` implementation. Recurses to children of `node`."""
1912         if isinstance(node, Leaf):
1913             any_open_brackets = self.current_line.bracket_tracker.any_open_brackets()
1914             for comment in generate_comments(node):
1915                 if any_open_brackets:
1916                     # any comment within brackets is subject to splitting
1917                     self.current_line.append(comment)
1918                 elif comment.type == token.COMMENT:
1919                     # regular trailing comment
1920                     self.current_line.append(comment)
1921                     yield from self.line()
1922
1923                 else:
1924                     # regular standalone comment
1925                     yield from self.line()
1926
1927                     self.current_line.append(comment)
1928                     yield from self.line()
1929
1930             normalize_prefix(node, inside_brackets=any_open_brackets)
1931             if self.normalize_strings and node.type == token.STRING:
1932                 normalize_string_prefix(node, remove_u_prefix=self.remove_u_prefix)
1933                 normalize_string_quotes(node)
1934             if node.type == token.NUMBER:
1935                 normalize_numeric_literal(node)
1936             if node.type not in WHITESPACE:
1937                 self.current_line.append(node)
1938         yield from super().visit_default(node)
1939
1940     def visit_INDENT(self, node: Leaf) -> Iterator[Line]:
1941         """Increase indentation level, maybe yield a line."""
1942         # In blib2to3 INDENT never holds comments.
1943         yield from self.line(+1)
1944         yield from self.visit_default(node)
1945
1946     def visit_DEDENT(self, node: Leaf) -> Iterator[Line]:
1947         """Decrease indentation level, maybe yield a line."""
1948         # The current line might still wait for trailing comments.  At DEDENT time
1949         # there won't be any (they would be prefixes on the preceding NEWLINE).
1950         # Emit the line then.
1951         yield from self.line()
1952
1953         # While DEDENT has no value, its prefix may contain standalone comments
1954         # that belong to the current indentation level.  Get 'em.
1955         yield from self.visit_default(node)
1956
1957         # Finally, emit the dedent.
1958         yield from self.line(-1)
1959
1960     def visit_stmt(
1961         self, node: Node, keywords: Set[str], parens: Set[str]
1962     ) -> Iterator[Line]:
1963         """Visit a statement.
1964
1965         This implementation is shared for `if`, `while`, `for`, `try`, `except`,
1966         `def`, `with`, `class`, `assert` and assignments.
1967
1968         The relevant Python language `keywords` for a given statement will be
1969         NAME leaves within it. This methods puts those on a separate line.
1970
1971         `parens` holds a set of string leaf values immediately after which
1972         invisible parens should be put.
1973         """
1974         normalize_invisible_parens(node, parens_after=parens)
1975         for child in node.children:
1976             if child.type == token.NAME and child.value in keywords:  # type: ignore
1977                 yield from self.line()
1978
1979             yield from self.visit(child)
1980
1981     def visit_suite(self, node: Node) -> Iterator[Line]:
1982         """Visit a suite."""
1983         if self.is_pyi and is_stub_suite(node):
1984             yield from self.visit(node.children[2])
1985         else:
1986             yield from self.visit_default(node)
1987
1988     def visit_simple_stmt(self, node: Node) -> Iterator[Line]:
1989         """Visit a statement without nested statements."""
1990         is_suite_like = node.parent and node.parent.type in STATEMENT
1991         if is_suite_like:
1992             if self.is_pyi and is_stub_body(node):
1993                 yield from self.visit_default(node)
1994             else:
1995                 yield from self.line(+1)
1996                 yield from self.visit_default(node)
1997                 yield from self.line(-1)
1998
1999         else:
2000             if not self.is_pyi or not node.parent or not is_stub_suite(node.parent):
2001                 yield from self.line()
2002             yield from self.visit_default(node)
2003
2004     def visit_async_stmt(self, node: Node) -> Iterator[Line]:
2005         """Visit `async def`, `async for`, `async with`."""
2006         yield from self.line()
2007
2008         children = iter(node.children)
2009         for child in children:
2010             yield from self.visit(child)
2011
2012             if child.type == token.ASYNC:
2013                 break
2014
2015         internal_stmt = next(children)
2016         for child in internal_stmt.children:
2017             yield from self.visit(child)
2018
2019     def visit_decorators(self, node: Node) -> Iterator[Line]:
2020         """Visit decorators."""
2021         for child in node.children:
2022             yield from self.line()
2023             yield from self.visit(child)
2024
2025     def visit_SEMI(self, leaf: Leaf) -> Iterator[Line]:
2026         """Remove a semicolon and put the other statement on a separate line."""
2027         yield from self.line()
2028
2029     def visit_ENDMARKER(self, leaf: Leaf) -> Iterator[Line]:
2030         """End of file. Process outstanding comments and end with a newline."""
2031         yield from self.visit_default(leaf)
2032         yield from self.line()
2033
2034     def visit_STANDALONE_COMMENT(self, leaf: Leaf) -> Iterator[Line]:
2035         if not self.current_line.bracket_tracker.any_open_brackets():
2036             yield from self.line()
2037         yield from self.visit_default(leaf)
2038
2039     def visit_factor(self, node: Node) -> Iterator[Line]:
2040         """Force parentheses between a unary op and a binary power:
2041
2042         -2 ** 8 -> -(2 ** 8)
2043         """
2044         _operator, operand = node.children
2045         if (
2046             operand.type == syms.power
2047             and len(operand.children) == 3
2048             and operand.children[1].type == token.DOUBLESTAR
2049         ):
2050             lpar = Leaf(token.LPAR, "(")
2051             rpar = Leaf(token.RPAR, ")")
2052             index = operand.remove() or 0
2053             node.insert_child(index, Node(syms.atom, [lpar, operand, rpar]))
2054         yield from self.visit_default(node)
2055
2056     def visit_STRING(self, leaf: Leaf) -> Iterator[Line]:
2057         if is_docstring(leaf) and "\\\n" not in leaf.value:
2058             # We're ignoring docstrings with backslash newline escapes because changing
2059             # indentation of those changes the AST representation of the code.
2060             prefix = get_string_prefix(leaf.value)
2061             lead_len = len(prefix) + 3
2062             tail_len = -3
2063             indent = " " * 4 * self.current_line.depth
2064             docstring = fix_docstring(leaf.value[lead_len:tail_len], indent)
2065             if docstring:
2066                 if leaf.value[lead_len - 1] == docstring[0]:
2067                     docstring = " " + docstring
2068                 if leaf.value[tail_len + 1] == docstring[-1]:
2069                     docstring = docstring + " "
2070             leaf.value = leaf.value[0:lead_len] + docstring + leaf.value[tail_len:]
2071
2072         yield from self.visit_default(leaf)
2073
2074     def __post_init__(self) -> None:
2075         """You are in a twisty little maze of passages."""
2076         v = self.visit_stmt
2077         Ø: Set[str] = set()
2078         self.visit_assert_stmt = partial(v, keywords={"assert"}, parens={"assert", ","})
2079         self.visit_if_stmt = partial(
2080             v, keywords={"if", "else", "elif"}, parens={"if", "elif"}
2081         )
2082         self.visit_while_stmt = partial(v, keywords={"while", "else"}, parens={"while"})
2083         self.visit_for_stmt = partial(v, keywords={"for", "else"}, parens={"for", "in"})
2084         self.visit_try_stmt = partial(
2085             v, keywords={"try", "except", "else", "finally"}, parens=Ø
2086         )
2087         self.visit_except_clause = partial(v, keywords={"except"}, parens=Ø)
2088         self.visit_with_stmt = partial(v, keywords={"with"}, parens=Ø)
2089         self.visit_funcdef = partial(v, keywords={"def"}, parens=Ø)
2090         self.visit_classdef = partial(v, keywords={"class"}, parens=Ø)
2091         self.visit_expr_stmt = partial(v, keywords=Ø, parens=ASSIGNMENTS)
2092         self.visit_return_stmt = partial(v, keywords={"return"}, parens={"return"})
2093         self.visit_import_from = partial(v, keywords=Ø, parens={"import"})
2094         self.visit_del_stmt = partial(v, keywords=Ø, parens={"del"})
2095         self.visit_async_funcdef = self.visit_async_stmt
2096         self.visit_decorated = self.visit_decorators
2097
2098
2099 IMPLICIT_TUPLE = {syms.testlist, syms.testlist_star_expr, syms.exprlist}
2100 BRACKET = {token.LPAR: token.RPAR, token.LSQB: token.RSQB, token.LBRACE: token.RBRACE}
2101 OPENING_BRACKETS = set(BRACKET.keys())
2102 CLOSING_BRACKETS = set(BRACKET.values())
2103 BRACKETS = OPENING_BRACKETS | CLOSING_BRACKETS
2104 ALWAYS_NO_SPACE = CLOSING_BRACKETS | {token.COMMA, STANDALONE_COMMENT}
2105
2106
2107 def whitespace(leaf: Leaf, *, complex_subscript: bool) -> str:  # noqa: C901
2108     """Return whitespace prefix if needed for the given `leaf`.
2109
2110     `complex_subscript` signals whether the given leaf is part of a subscription
2111     which has non-trivial arguments, like arithmetic expressions or function calls.
2112     """
2113     NO = ""
2114     SPACE = " "
2115     DOUBLESPACE = "  "
2116     t = leaf.type
2117     p = leaf.parent
2118     v = leaf.value
2119     if t in ALWAYS_NO_SPACE:
2120         return NO
2121
2122     if t == token.COMMENT:
2123         return DOUBLESPACE
2124
2125     assert p is not None, f"INTERNAL ERROR: hand-made leaf without parent: {leaf!r}"
2126     if t == token.COLON and p.type not in {
2127         syms.subscript,
2128         syms.subscriptlist,
2129         syms.sliceop,
2130     }:
2131         return NO
2132
2133     prev = leaf.prev_sibling
2134     if not prev:
2135         prevp = preceding_leaf(p)
2136         if not prevp or prevp.type in OPENING_BRACKETS:
2137             return NO
2138
2139         if t == token.COLON:
2140             if prevp.type == token.COLON:
2141                 return NO
2142
2143             elif prevp.type != token.COMMA and not complex_subscript:
2144                 return NO
2145
2146             return SPACE
2147
2148         if prevp.type == token.EQUAL:
2149             if prevp.parent:
2150                 if prevp.parent.type in {
2151                     syms.arglist,
2152                     syms.argument,
2153                     syms.parameters,
2154                     syms.varargslist,
2155                 }:
2156                     return NO
2157
2158                 elif prevp.parent.type == syms.typedargslist:
2159                     # A bit hacky: if the equal sign has whitespace, it means we
2160                     # previously found it's a typed argument.  So, we're using
2161                     # that, too.
2162                     return prevp.prefix
2163
2164         elif prevp.type in VARARGS_SPECIALS:
2165             if is_vararg(prevp, within=VARARGS_PARENTS | UNPACKING_PARENTS):
2166                 return NO
2167
2168         elif prevp.type == token.COLON:
2169             if prevp.parent and prevp.parent.type in {syms.subscript, syms.sliceop}:
2170                 return SPACE if complex_subscript else NO
2171
2172         elif (
2173             prevp.parent
2174             and prevp.parent.type == syms.factor
2175             and prevp.type in MATH_OPERATORS
2176         ):
2177             return NO
2178
2179         elif (
2180             prevp.type == token.RIGHTSHIFT
2181             and prevp.parent
2182             and prevp.parent.type == syms.shift_expr
2183             and prevp.prev_sibling
2184             and prevp.prev_sibling.type == token.NAME
2185             and prevp.prev_sibling.value == "print"  # type: ignore
2186         ):
2187             # Python 2 print chevron
2188             return NO
2189         elif prevp.type == token.AT and p.parent and p.parent.type == syms.decorator:
2190             # no space in decorators
2191             return NO
2192
2193     elif prev.type in OPENING_BRACKETS:
2194         return NO
2195
2196     if p.type in {syms.parameters, syms.arglist}:
2197         # untyped function signatures or calls
2198         if not prev or prev.type != token.COMMA:
2199             return NO
2200
2201     elif p.type == syms.varargslist:
2202         # lambdas
2203         if prev and prev.type != token.COMMA:
2204             return NO
2205
2206     elif p.type == syms.typedargslist:
2207         # typed function signatures
2208         if not prev:
2209             return NO
2210
2211         if t == token.EQUAL:
2212             if prev.type != syms.tname:
2213                 return NO
2214
2215         elif prev.type == token.EQUAL:
2216             # A bit hacky: if the equal sign has whitespace, it means we
2217             # previously found it's a typed argument.  So, we're using that, too.
2218             return prev.prefix
2219
2220         elif prev.type != token.COMMA:
2221             return NO
2222
2223     elif p.type == syms.tname:
2224         # type names
2225         if not prev:
2226             prevp = preceding_leaf(p)
2227             if not prevp or prevp.type != token.COMMA:
2228                 return NO
2229
2230     elif p.type == syms.trailer:
2231         # attributes and calls
2232         if t == token.LPAR or t == token.RPAR:
2233             return NO
2234
2235         if not prev:
2236             if t == token.DOT:
2237                 prevp = preceding_leaf(p)
2238                 if not prevp or prevp.type != token.NUMBER:
2239                     return NO
2240
2241             elif t == token.LSQB:
2242                 return NO
2243
2244         elif prev.type != token.COMMA:
2245             return NO
2246
2247     elif p.type == syms.argument:
2248         # single argument
2249         if t == token.EQUAL:
2250             return NO
2251
2252         if not prev:
2253             prevp = preceding_leaf(p)
2254             if not prevp or prevp.type == token.LPAR:
2255                 return NO
2256
2257         elif prev.type in {token.EQUAL} | VARARGS_SPECIALS:
2258             return NO
2259
2260     elif p.type == syms.decorator:
2261         # decorators
2262         return NO
2263
2264     elif p.type == syms.dotted_name:
2265         if prev:
2266             return NO
2267
2268         prevp = preceding_leaf(p)
2269         if not prevp or prevp.type == token.AT or prevp.type == token.DOT:
2270             return NO
2271
2272     elif p.type == syms.classdef:
2273         if t == token.LPAR:
2274             return NO
2275
2276         if prev and prev.type == token.LPAR:
2277             return NO
2278
2279     elif p.type in {syms.subscript, syms.sliceop}:
2280         # indexing
2281         if not prev:
2282             assert p.parent is not None, "subscripts are always parented"
2283             if p.parent.type == syms.subscriptlist:
2284                 return SPACE
2285
2286             return NO
2287
2288         elif not complex_subscript:
2289             return NO
2290
2291     elif p.type == syms.atom:
2292         if prev and t == token.DOT:
2293             # dots, but not the first one.
2294             return NO
2295
2296     elif p.type == syms.dictsetmaker:
2297         # dict unpacking
2298         if prev and prev.type == token.DOUBLESTAR:
2299             return NO
2300
2301     elif p.type in {syms.factor, syms.star_expr}:
2302         # unary ops
2303         if not prev:
2304             prevp = preceding_leaf(p)
2305             if not prevp or prevp.type in OPENING_BRACKETS:
2306                 return NO
2307
2308             prevp_parent = prevp.parent
2309             assert prevp_parent is not None
2310             if prevp.type == token.COLON and prevp_parent.type in {
2311                 syms.subscript,
2312                 syms.sliceop,
2313             }:
2314                 return NO
2315
2316             elif prevp.type == token.EQUAL and prevp_parent.type == syms.argument:
2317                 return NO
2318
2319         elif t in {token.NAME, token.NUMBER, token.STRING}:
2320             return NO
2321
2322     elif p.type == syms.import_from:
2323         if t == token.DOT:
2324             if prev and prev.type == token.DOT:
2325                 return NO
2326
2327         elif t == token.NAME:
2328             if v == "import":
2329                 return SPACE
2330
2331             if prev and prev.type == token.DOT:
2332                 return NO
2333
2334     elif p.type == syms.sliceop:
2335         return NO
2336
2337     return SPACE
2338
2339
2340 def preceding_leaf(node: Optional[LN]) -> Optional[Leaf]:
2341     """Return the first leaf that precedes `node`, if any."""
2342     while node:
2343         res = node.prev_sibling
2344         if res:
2345             if isinstance(res, Leaf):
2346                 return res
2347
2348             try:
2349                 return list(res.leaves())[-1]
2350
2351             except IndexError:
2352                 return None
2353
2354         node = node.parent
2355     return None
2356
2357
2358 def prev_siblings_are(node: Optional[LN], tokens: List[Optional[NodeType]]) -> bool:
2359     """Return if the `node` and its previous siblings match types against the provided
2360     list of tokens; the provided `node`has its type matched against the last element in
2361     the list.  `None` can be used as the first element to declare that the start of the
2362     list is anchored at the start of its parent's children."""
2363     if not tokens:
2364         return True
2365     if tokens[-1] is None:
2366         return node is None
2367     if not node:
2368         return False
2369     if node.type != tokens[-1]:
2370         return False
2371     return prev_siblings_are(node.prev_sibling, tokens[:-1])
2372
2373
2374 def child_towards(ancestor: Node, descendant: LN) -> Optional[LN]:
2375     """Return the child of `ancestor` that contains `descendant`."""
2376     node: Optional[LN] = descendant
2377     while node and node.parent != ancestor:
2378         node = node.parent
2379     return node
2380
2381
2382 def container_of(leaf: Leaf) -> LN:
2383     """Return `leaf` or one of its ancestors that is the topmost container of it.
2384
2385     By "container" we mean a node where `leaf` is the very first child.
2386     """
2387     same_prefix = leaf.prefix
2388     container: LN = leaf
2389     while container:
2390         parent = container.parent
2391         if parent is None:
2392             break
2393
2394         if parent.children[0].prefix != same_prefix:
2395             break
2396
2397         if parent.type == syms.file_input:
2398             break
2399
2400         if parent.prev_sibling is not None and parent.prev_sibling.type in BRACKETS:
2401             break
2402
2403         container = parent
2404     return container
2405
2406
2407 def is_split_after_delimiter(leaf: Leaf, previous: Optional[Leaf] = None) -> Priority:
2408     """Return the priority of the `leaf` delimiter, given a line break after it.
2409
2410     The delimiter priorities returned here are from those delimiters that would
2411     cause a line break after themselves.
2412
2413     Higher numbers are higher priority.
2414     """
2415     if leaf.type == token.COMMA:
2416         return COMMA_PRIORITY
2417
2418     return 0
2419
2420
2421 def is_split_before_delimiter(leaf: Leaf, previous: Optional[Leaf] = None) -> Priority:
2422     """Return the priority of the `leaf` delimiter, given a line break before it.
2423
2424     The delimiter priorities returned here are from those delimiters that would
2425     cause a line break before themselves.
2426
2427     Higher numbers are higher priority.
2428     """
2429     if is_vararg(leaf, within=VARARGS_PARENTS | UNPACKING_PARENTS):
2430         # * and ** might also be MATH_OPERATORS but in this case they are not.
2431         # Don't treat them as a delimiter.
2432         return 0
2433
2434     if (
2435         leaf.type == token.DOT
2436         and leaf.parent
2437         and leaf.parent.type not in {syms.import_from, syms.dotted_name}
2438         and (previous is None or previous.type in CLOSING_BRACKETS)
2439     ):
2440         return DOT_PRIORITY
2441
2442     if (
2443         leaf.type in MATH_OPERATORS
2444         and leaf.parent
2445         and leaf.parent.type not in {syms.factor, syms.star_expr}
2446     ):
2447         return MATH_PRIORITIES[leaf.type]
2448
2449     if leaf.type in COMPARATORS:
2450         return COMPARATOR_PRIORITY
2451
2452     if (
2453         leaf.type == token.STRING
2454         and previous is not None
2455         and previous.type == token.STRING
2456     ):
2457         return STRING_PRIORITY
2458
2459     if leaf.type not in {token.NAME, token.ASYNC}:
2460         return 0
2461
2462     if (
2463         leaf.value == "for"
2464         and leaf.parent
2465         and leaf.parent.type in {syms.comp_for, syms.old_comp_for}
2466         or leaf.type == token.ASYNC
2467     ):
2468         if (
2469             not isinstance(leaf.prev_sibling, Leaf)
2470             or leaf.prev_sibling.value != "async"
2471         ):
2472             return COMPREHENSION_PRIORITY
2473
2474     if (
2475         leaf.value == "if"
2476         and leaf.parent
2477         and leaf.parent.type in {syms.comp_if, syms.old_comp_if}
2478     ):
2479         return COMPREHENSION_PRIORITY
2480
2481     if leaf.value in {"if", "else"} and leaf.parent and leaf.parent.type == syms.test:
2482         return TERNARY_PRIORITY
2483
2484     if leaf.value == "is":
2485         return COMPARATOR_PRIORITY
2486
2487     if (
2488         leaf.value == "in"
2489         and leaf.parent
2490         and leaf.parent.type in {syms.comp_op, syms.comparison}
2491         and not (
2492             previous is not None
2493             and previous.type == token.NAME
2494             and previous.value == "not"
2495         )
2496     ):
2497         return COMPARATOR_PRIORITY
2498
2499     if (
2500         leaf.value == "not"
2501         and leaf.parent
2502         and leaf.parent.type == syms.comp_op
2503         and not (
2504             previous is not None
2505             and previous.type == token.NAME
2506             and previous.value == "is"
2507         )
2508     ):
2509         return COMPARATOR_PRIORITY
2510
2511     if leaf.value in LOGIC_OPERATORS and leaf.parent:
2512         return LOGIC_PRIORITY
2513
2514     return 0
2515
2516
2517 FMT_OFF = {"# fmt: off", "# fmt:off", "# yapf: disable"}
2518 FMT_ON = {"# fmt: on", "# fmt:on", "# yapf: enable"}
2519
2520
2521 def generate_comments(leaf: LN) -> Iterator[Leaf]:
2522     """Clean the prefix of the `leaf` and generate comments from it, if any.
2523
2524     Comments in lib2to3 are shoved into the whitespace prefix.  This happens
2525     in `pgen2/driver.py:Driver.parse_tokens()`.  This was a brilliant implementation
2526     move because it does away with modifying the grammar to include all the
2527     possible places in which comments can be placed.
2528
2529     The sad consequence for us though is that comments don't "belong" anywhere.
2530     This is why this function generates simple parentless Leaf objects for
2531     comments.  We simply don't know what the correct parent should be.
2532
2533     No matter though, we can live without this.  We really only need to
2534     differentiate between inline and standalone comments.  The latter don't
2535     share the line with any code.
2536
2537     Inline comments are emitted as regular token.COMMENT leaves.  Standalone
2538     are emitted with a fake STANDALONE_COMMENT token identifier.
2539     """
2540     for pc in list_comments(leaf.prefix, is_endmarker=leaf.type == token.ENDMARKER):
2541         yield Leaf(pc.type, pc.value, prefix="\n" * pc.newlines)
2542
2543
2544 @dataclass
2545 class ProtoComment:
2546     """Describes a piece of syntax that is a comment.
2547
2548     It's not a :class:`blib2to3.pytree.Leaf` so that:
2549
2550     * it can be cached (`Leaf` objects should not be reused more than once as
2551       they store their lineno, column, prefix, and parent information);
2552     * `newlines` and `consumed` fields are kept separate from the `value`. This
2553       simplifies handling of special marker comments like ``# fmt: off/on``.
2554     """
2555
2556     type: int  # token.COMMENT or STANDALONE_COMMENT
2557     value: str  # content of the comment
2558     newlines: int  # how many newlines before the comment
2559     consumed: int  # how many characters of the original leaf's prefix did we consume
2560
2561
2562 @lru_cache(maxsize=4096)
2563 def list_comments(prefix: str, *, is_endmarker: bool) -> List[ProtoComment]:
2564     """Return a list of :class:`ProtoComment` objects parsed from the given `prefix`."""
2565     result: List[ProtoComment] = []
2566     if not prefix or "#" not in prefix:
2567         return result
2568
2569     consumed = 0
2570     nlines = 0
2571     ignored_lines = 0
2572     for index, line in enumerate(prefix.split("\n")):
2573         consumed += len(line) + 1  # adding the length of the split '\n'
2574         line = line.lstrip()
2575         if not line:
2576             nlines += 1
2577         if not line.startswith("#"):
2578             # Escaped newlines outside of a comment are not really newlines at
2579             # all. We treat a single-line comment following an escaped newline
2580             # as a simple trailing comment.
2581             if line.endswith("\\"):
2582                 ignored_lines += 1
2583             continue
2584
2585         if index == ignored_lines and not is_endmarker:
2586             comment_type = token.COMMENT  # simple trailing comment
2587         else:
2588             comment_type = STANDALONE_COMMENT
2589         comment = make_comment(line)
2590         result.append(
2591             ProtoComment(
2592                 type=comment_type, value=comment, newlines=nlines, consumed=consumed
2593             )
2594         )
2595         nlines = 0
2596     return result
2597
2598
2599 def make_comment(content: str) -> str:
2600     """Return a consistently formatted comment from the given `content` string.
2601
2602     All comments (except for "##", "#!", "#:", '#'", "#%%") should have a single
2603     space between the hash sign and the content.
2604
2605     If `content` didn't start with a hash sign, one is provided.
2606     """
2607     content = content.rstrip()
2608     if not content:
2609         return "#"
2610
2611     if content[0] == "#":
2612         content = content[1:]
2613     if content and content[0] not in " !:#'%":
2614         content = " " + content
2615     return "#" + content
2616
2617
2618 def transform_line(
2619     line: Line, mode: Mode, features: Collection[Feature] = ()
2620 ) -> Iterator[Line]:
2621     """Transform a `line`, potentially splitting it into many lines.
2622
2623     They should fit in the allotted `line_length` but might not be able to.
2624
2625     `features` are syntactical features that may be used in the output.
2626     """
2627     if line.is_comment:
2628         yield line
2629         return
2630
2631     line_str = line_to_string(line)
2632
2633     def init_st(ST: Type[StringTransformer]) -> StringTransformer:
2634         """Initialize StringTransformer"""
2635         return ST(mode.line_length, mode.string_normalization)
2636
2637     string_merge = init_st(StringMerger)
2638     string_paren_strip = init_st(StringParenStripper)
2639     string_split = init_st(StringSplitter)
2640     string_paren_wrap = init_st(StringParenWrapper)
2641
2642     transformers: List[Transformer]
2643     if (
2644         not line.contains_uncollapsable_type_comments()
2645         and not line.should_explode
2646         and (
2647             is_line_short_enough(line, line_length=mode.line_length, line_str=line_str)
2648             or line.contains_unsplittable_type_ignore()
2649         )
2650         and not (line.inside_brackets and line.contains_standalone_comments())
2651     ):
2652         # Only apply basic string preprocessing, since lines shouldn't be split here.
2653         if mode.experimental_string_processing:
2654             transformers = [string_merge, string_paren_strip]
2655         else:
2656             transformers = []
2657     elif line.is_def:
2658         transformers = [left_hand_split]
2659     else:
2660
2661         def rhs(line: Line, features: Collection[Feature]) -> Iterator[Line]:
2662             """Wraps calls to `right_hand_split`.
2663
2664             The calls increasingly `omit` right-hand trailers (bracket pairs with
2665             content), meaning the trailers get glued together to split on another
2666             bracket pair instead.
2667             """
2668             for omit in generate_trailers_to_omit(line, mode.line_length):
2669                 lines = list(
2670                     right_hand_split(line, mode.line_length, features, omit=omit)
2671                 )
2672                 # Note: this check is only able to figure out if the first line of the
2673                 # *current* transformation fits in the line length.  This is true only
2674                 # for simple cases.  All others require running more transforms via
2675                 # `transform_line()`.  This check doesn't know if those would succeed.
2676                 if is_line_short_enough(lines[0], line_length=mode.line_length):
2677                     yield from lines
2678                     return
2679
2680             # All splits failed, best effort split with no omits.
2681             # This mostly happens to multiline strings that are by definition
2682             # reported as not fitting a single line, as well as lines that contain
2683             # trailing commas (those have to be exploded).
2684             yield from right_hand_split(
2685                 line, line_length=mode.line_length, features=features
2686             )
2687
2688         if mode.experimental_string_processing:
2689             if line.inside_brackets:
2690                 transformers = [
2691                     string_merge,
2692                     string_paren_strip,
2693                     string_split,
2694                     delimiter_split,
2695                     standalone_comment_split,
2696                     string_paren_wrap,
2697                     rhs,
2698                 ]
2699             else:
2700                 transformers = [
2701                     string_merge,
2702                     string_paren_strip,
2703                     string_split,
2704                     string_paren_wrap,
2705                     rhs,
2706                 ]
2707         else:
2708             if line.inside_brackets:
2709                 transformers = [delimiter_split, standalone_comment_split, rhs]
2710             else:
2711                 transformers = [rhs]
2712
2713     for transform in transformers:
2714         # We are accumulating lines in `result` because we might want to abort
2715         # mission and return the original line in the end, or attempt a different
2716         # split altogether.
2717         try:
2718             result = run_transformer(line, transform, mode, features, line_str=line_str)
2719         except CannotTransform:
2720             continue
2721         else:
2722             yield from result
2723             break
2724
2725     else:
2726         yield line
2727
2728
2729 @dataclass  # type: ignore
2730 class StringTransformer(ABC):
2731     """
2732     An implementation of the Transformer protocol that relies on its
2733     subclasses overriding the template methods `do_match(...)` and
2734     `do_transform(...)`.
2735
2736     This Transformer works exclusively on strings (for example, by merging
2737     or splitting them).
2738
2739     The following sections can be found among the docstrings of each concrete
2740     StringTransformer subclass.
2741
2742     Requirements:
2743         Which requirements must be met of the given Line for this
2744         StringTransformer to be applied?
2745
2746     Transformations:
2747         If the given Line meets all of the above requirements, which string
2748         transformations can you expect to be applied to it by this
2749         StringTransformer?
2750
2751     Collaborations:
2752         What contractual agreements does this StringTransformer have with other
2753         StringTransfomers? Such collaborations should be eliminated/minimized
2754         as much as possible.
2755     """
2756
2757     line_length: int
2758     normalize_strings: bool
2759     __name__ = "StringTransformer"
2760
2761     @abstractmethod
2762     def do_match(self, line: Line) -> TMatchResult:
2763         """
2764         Returns:
2765             * Ok(string_idx) such that `line.leaves[string_idx]` is our target
2766             string, if a match was able to be made.
2767                 OR
2768             * Err(CannotTransform), if a match was not able to be made.
2769         """
2770
2771     @abstractmethod
2772     def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]:
2773         """
2774         Yields:
2775             * Ok(new_line) where new_line is the new transformed line.
2776                 OR
2777             * Err(CannotTransform) if the transformation failed for some reason. The
2778             `do_match(...)` template method should usually be used to reject
2779             the form of the given Line, but in some cases it is difficult to
2780             know whether or not a Line meets the StringTransformer's
2781             requirements until the transformation is already midway.
2782
2783         Side Effects:
2784             This method should NOT mutate @line directly, but it MAY mutate the
2785             Line's underlying Node structure. (WARNING: If the underlying Node
2786             structure IS altered, then this method should NOT be allowed to
2787             yield an CannotTransform after that point.)
2788         """
2789
2790     def __call__(self, line: Line, _features: Collection[Feature]) -> Iterator[Line]:
2791         """
2792         StringTransformer instances have a call signature that mirrors that of
2793         the Transformer type.
2794
2795         Raises:
2796             CannotTransform(...) if the concrete StringTransformer class is unable
2797             to transform @line.
2798         """
2799         # Optimization to avoid calling `self.do_match(...)` when the line does
2800         # not contain any string.
2801         if not any(leaf.type == token.STRING for leaf in line.leaves):
2802             raise CannotTransform("There are no strings in this line.")
2803
2804         match_result = self.do_match(line)
2805
2806         if isinstance(match_result, Err):
2807             cant_transform = match_result.err()
2808             raise CannotTransform(
2809                 f"The string transformer {self.__class__.__name__} does not recognize"
2810                 " this line as one that it can transform."
2811             ) from cant_transform
2812
2813         string_idx = match_result.ok()
2814
2815         for line_result in self.do_transform(line, string_idx):
2816             if isinstance(line_result, Err):
2817                 cant_transform = line_result.err()
2818                 raise CannotTransform(
2819                     "StringTransformer failed while attempting to transform string."
2820                 ) from cant_transform
2821             line = line_result.ok()
2822             yield line
2823
2824
2825 @dataclass
2826 class CustomSplit:
2827     """A custom (i.e. manual) string split.
2828
2829     A single CustomSplit instance represents a single substring.
2830
2831     Examples:
2832         Consider the following string:
2833         ```
2834         "Hi there friend."
2835         " This is a custom"
2836         f" string {split}."
2837         ```
2838
2839         This string will correspond to the following three CustomSplit instances:
2840         ```
2841         CustomSplit(False, 16)
2842         CustomSplit(False, 17)
2843         CustomSplit(True, 16)
2844         ```
2845     """
2846
2847     has_prefix: bool
2848     break_idx: int
2849
2850
2851 class CustomSplitMapMixin:
2852     """
2853     This mixin class is used to map merged strings to a sequence of
2854     CustomSplits, which will then be used to re-split the strings iff none of
2855     the resultant substrings go over the configured max line length.
2856     """
2857
2858     _Key = Tuple[StringID, str]
2859     _CUSTOM_SPLIT_MAP: Dict[_Key, Tuple[CustomSplit, ...]] = defaultdict(tuple)
2860
2861     @staticmethod
2862     def _get_key(string: str) -> "CustomSplitMapMixin._Key":
2863         """
2864         Returns:
2865             A unique identifier that is used internally to map @string to a
2866             group of custom splits.
2867         """
2868         return (id(string), string)
2869
2870     def add_custom_splits(
2871         self, string: str, custom_splits: Iterable[CustomSplit]
2872     ) -> None:
2873         """Custom Split Map Setter Method
2874
2875         Side Effects:
2876             Adds a mapping from @string to the custom splits @custom_splits.
2877         """
2878         key = self._get_key(string)
2879         self._CUSTOM_SPLIT_MAP[key] = tuple(custom_splits)
2880
2881     def pop_custom_splits(self, string: str) -> List[CustomSplit]:
2882         """Custom Split Map Getter Method
2883
2884         Returns:
2885             * A list of the custom splits that are mapped to @string, if any
2886             exist.
2887                 OR
2888             * [], otherwise.
2889
2890         Side Effects:
2891             Deletes the mapping between @string and its associated custom
2892             splits (which are returned to the caller).
2893         """
2894         key = self._get_key(string)
2895
2896         custom_splits = self._CUSTOM_SPLIT_MAP[key]
2897         del self._CUSTOM_SPLIT_MAP[key]
2898
2899         return list(custom_splits)
2900
2901     def has_custom_splits(self, string: str) -> bool:
2902         """
2903         Returns:
2904             True iff @string is associated with a set of custom splits.
2905         """
2906         key = self._get_key(string)
2907         return key in self._CUSTOM_SPLIT_MAP
2908
2909
2910 class StringMerger(CustomSplitMapMixin, StringTransformer):
2911     """StringTransformer that merges strings together.
2912
2913     Requirements:
2914         (A) The line contains adjacent strings such that ALL of the validation checks
2915         listed in StringMerger.__validate_msg(...)'s docstring pass.
2916             OR
2917         (B) The line contains a string which uses line continuation backslashes.
2918
2919     Transformations:
2920         Depending on which of the two requirements above where met, either:
2921
2922         (A) The string group associated with the target string is merged.
2923             OR
2924         (B) All line-continuation backslashes are removed from the target string.
2925
2926     Collaborations:
2927         StringMerger provides custom split information to StringSplitter.
2928     """
2929
2930     def do_match(self, line: Line) -> TMatchResult:
2931         LL = line.leaves
2932
2933         is_valid_index = is_valid_index_factory(LL)
2934
2935         for (i, leaf) in enumerate(LL):
2936             if (
2937                 leaf.type == token.STRING
2938                 and is_valid_index(i + 1)
2939                 and LL[i + 1].type == token.STRING
2940             ):
2941                 return Ok(i)
2942
2943             if leaf.type == token.STRING and "\\\n" in leaf.value:
2944                 return Ok(i)
2945
2946         return TErr("This line has no strings that need merging.")
2947
2948     def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]:
2949         new_line = line
2950         rblc_result = self.__remove_backslash_line_continuation_chars(
2951             new_line, string_idx
2952         )
2953         if isinstance(rblc_result, Ok):
2954             new_line = rblc_result.ok()
2955
2956         msg_result = self.__merge_string_group(new_line, string_idx)
2957         if isinstance(msg_result, Ok):
2958             new_line = msg_result.ok()
2959
2960         if isinstance(rblc_result, Err) and isinstance(msg_result, Err):
2961             msg_cant_transform = msg_result.err()
2962             rblc_cant_transform = rblc_result.err()
2963             cant_transform = CannotTransform(
2964                 "StringMerger failed to merge any strings in this line."
2965             )
2966
2967             # Chain the errors together using `__cause__`.
2968             msg_cant_transform.__cause__ = rblc_cant_transform
2969             cant_transform.__cause__ = msg_cant_transform
2970
2971             yield Err(cant_transform)
2972         else:
2973             yield Ok(new_line)
2974
2975     @staticmethod
2976     def __remove_backslash_line_continuation_chars(
2977         line: Line, string_idx: int
2978     ) -> TResult[Line]:
2979         """
2980         Merge strings that were split across multiple lines using
2981         line-continuation backslashes.
2982
2983         Returns:
2984             Ok(new_line), if @line contains backslash line-continuation
2985             characters.
2986                 OR
2987             Err(CannotTransform), otherwise.
2988         """
2989         LL = line.leaves
2990
2991         string_leaf = LL[string_idx]
2992         if not (
2993             string_leaf.type == token.STRING
2994             and "\\\n" in string_leaf.value
2995             and not has_triple_quotes(string_leaf.value)
2996         ):
2997             return TErr(
2998                 f"String leaf {string_leaf} does not contain any backslash line"
2999                 " continuation characters."
3000             )
3001
3002         new_line = line.clone()
3003         new_line.comments = line.comments.copy()
3004         append_leaves(new_line, line, LL)
3005
3006         new_string_leaf = new_line.leaves[string_idx]
3007         new_string_leaf.value = new_string_leaf.value.replace("\\\n", "")
3008
3009         return Ok(new_line)
3010
3011     def __merge_string_group(self, line: Line, string_idx: int) -> TResult[Line]:
3012         """
3013         Merges string group (i.e. set of adjacent strings) where the first
3014         string in the group is `line.leaves[string_idx]`.
3015
3016         Returns:
3017             Ok(new_line), if ALL of the validation checks found in
3018             __validate_msg(...) pass.
3019                 OR
3020             Err(CannotTransform), otherwise.
3021         """
3022         LL = line.leaves
3023
3024         is_valid_index = is_valid_index_factory(LL)
3025
3026         vresult = self.__validate_msg(line, string_idx)
3027         if isinstance(vresult, Err):
3028             return vresult
3029
3030         # If the string group is wrapped inside an Atom node, we must make sure
3031         # to later replace that Atom with our new (merged) string leaf.
3032         atom_node = LL[string_idx].parent
3033
3034         # We will place BREAK_MARK in between every two substrings that we
3035         # merge. We will then later go through our final result and use the
3036         # various instances of BREAK_MARK we find to add the right values to
3037         # the custom split map.
3038         BREAK_MARK = "@@@@@ BLACK BREAKPOINT MARKER @@@@@"
3039
3040         QUOTE = LL[string_idx].value[-1]
3041
3042         def make_naked(string: str, string_prefix: str) -> str:
3043             """Strip @string (i.e. make it a "naked" string)
3044
3045             Pre-conditions:
3046                 * assert_is_leaf_string(@string)
3047
3048             Returns:
3049                 A string that is identical to @string except that
3050                 @string_prefix has been stripped, the surrounding QUOTE
3051                 characters have been removed, and any remaining QUOTE
3052                 characters have been escaped.
3053             """
3054             assert_is_leaf_string(string)
3055
3056             RE_EVEN_BACKSLASHES = r"(?:(?<!\\)(?:\\\\)*)"
3057             naked_string = string[len(string_prefix) + 1 : -1]
3058             naked_string = re.sub(
3059                 "(" + RE_EVEN_BACKSLASHES + ")" + QUOTE, r"\1\\" + QUOTE, naked_string
3060             )
3061             return naked_string
3062
3063         # Holds the CustomSplit objects that will later be added to the custom
3064         # split map.
3065         custom_splits = []
3066
3067         # Temporary storage for the 'has_prefix' part of the CustomSplit objects.
3068         prefix_tracker = []
3069
3070         # Sets the 'prefix' variable. This is the prefix that the final merged
3071         # string will have.
3072         next_str_idx = string_idx
3073         prefix = ""
3074         while (
3075             not prefix
3076             and is_valid_index(next_str_idx)
3077             and LL[next_str_idx].type == token.STRING
3078         ):
3079             prefix = get_string_prefix(LL[next_str_idx].value)
3080             next_str_idx += 1
3081
3082         # The next loop merges the string group. The final string will be
3083         # contained in 'S'.
3084         #
3085         # The following convenience variables are used:
3086         #
3087         #   S: string
3088         #   NS: naked string
3089         #   SS: next string
3090         #   NSS: naked next string
3091         S = ""
3092         NS = ""
3093         num_of_strings = 0
3094         next_str_idx = string_idx
3095         while is_valid_index(next_str_idx) and LL[next_str_idx].type == token.STRING:
3096             num_of_strings += 1
3097
3098             SS = LL[next_str_idx].value
3099             next_prefix = get_string_prefix(SS)
3100
3101             # If this is an f-string group but this substring is not prefixed
3102             # with 'f'...
3103             if "f" in prefix and "f" not in next_prefix:
3104                 # Then we must escape any braces contained in this substring.
3105                 SS = re.subf(r"(\{|\})", "{1}{1}", SS)
3106
3107             NSS = make_naked(SS, next_prefix)
3108
3109             has_prefix = bool(next_prefix)
3110             prefix_tracker.append(has_prefix)
3111
3112             S = prefix + QUOTE + NS + NSS + BREAK_MARK + QUOTE
3113             NS = make_naked(S, prefix)
3114
3115             next_str_idx += 1
3116
3117         S_leaf = Leaf(token.STRING, S)
3118         if self.normalize_strings:
3119             normalize_string_quotes(S_leaf)
3120
3121         # Fill the 'custom_splits' list with the appropriate CustomSplit objects.
3122         temp_string = S_leaf.value[len(prefix) + 1 : -1]
3123         for has_prefix in prefix_tracker:
3124             mark_idx = temp_string.find(BREAK_MARK)
3125             assert (
3126                 mark_idx >= 0
3127             ), "Logic error while filling the custom string breakpoint cache."
3128
3129             temp_string = temp_string[mark_idx + len(BREAK_MARK) :]
3130             breakpoint_idx = mark_idx + (len(prefix) if has_prefix else 0) + 1
3131             custom_splits.append(CustomSplit(has_prefix, breakpoint_idx))
3132
3133         string_leaf = Leaf(token.STRING, S_leaf.value.replace(BREAK_MARK, ""))
3134
3135         if atom_node is not None:
3136             replace_child(atom_node, string_leaf)
3137
3138         # Build the final line ('new_line') that this method will later return.
3139         new_line = line.clone()
3140         for (i, leaf) in enumerate(LL):
3141             if i == string_idx:
3142                 new_line.append(string_leaf)
3143
3144             if string_idx <= i < string_idx + num_of_strings:
3145                 for comment_leaf in line.comments_after(LL[i]):
3146                     new_line.append(comment_leaf, preformatted=True)
3147                 continue
3148
3149             append_leaves(new_line, line, [leaf])
3150
3151         self.add_custom_splits(string_leaf.value, custom_splits)
3152         return Ok(new_line)
3153
3154     @staticmethod
3155     def __validate_msg(line: Line, string_idx: int) -> TResult[None]:
3156         """Validate (M)erge (S)tring (G)roup
3157
3158         Transform-time string validation logic for __merge_string_group(...).
3159
3160         Returns:
3161             * Ok(None), if ALL validation checks (listed below) pass.
3162                 OR
3163             * Err(CannotTransform), if any of the following are true:
3164                 - The target string group does not contain ANY stand-alone comments.
3165                 - The target string is not in a string group (i.e. it has no
3166                   adjacent strings).
3167                 - The string group has more than one inline comment.
3168                 - The string group has an inline comment that appears to be a pragma.
3169                 - The set of all string prefixes in the string group is of
3170                   length greater than one and is not equal to {"", "f"}.
3171                 - The string group consists of raw strings.
3172         """
3173         # We first check for "inner" stand-alone comments (i.e. stand-alone
3174         # comments that have a string leaf before them AND after them).
3175         for inc in [1, -1]:
3176             i = string_idx
3177             found_sa_comment = False
3178             is_valid_index = is_valid_index_factory(line.leaves)
3179             while is_valid_index(i) and line.leaves[i].type in [
3180                 token.STRING,
3181                 STANDALONE_COMMENT,
3182             ]:
3183                 if line.leaves[i].type == STANDALONE_COMMENT:
3184                     found_sa_comment = True
3185                 elif found_sa_comment:
3186                     return TErr(
3187                         "StringMerger does NOT merge string groups which contain "
3188                         "stand-alone comments."
3189                     )
3190
3191                 i += inc
3192
3193         num_of_inline_string_comments = 0
3194         set_of_prefixes = set()
3195         num_of_strings = 0
3196         for leaf in line.leaves[string_idx:]:
3197             if leaf.type != token.STRING:
3198                 # If the string group is trailed by a comma, we count the
3199                 # comments trailing the comma to be one of the string group's
3200                 # comments.
3201                 if leaf.type == token.COMMA and id(leaf) in line.comments:
3202                     num_of_inline_string_comments += 1
3203                 break
3204
3205             if has_triple_quotes(leaf.value):
3206                 return TErr("StringMerger does NOT merge multiline strings.")
3207
3208             num_of_strings += 1
3209             prefix = get_string_prefix(leaf.value)
3210             if "r" in prefix:
3211                 return TErr("StringMerger does NOT merge raw strings.")
3212
3213             set_of_prefixes.add(prefix)
3214
3215             if id(leaf) in line.comments:
3216                 num_of_inline_string_comments += 1
3217                 if contains_pragma_comment(line.comments[id(leaf)]):
3218                     return TErr("Cannot merge strings which have pragma comments.")
3219
3220         if num_of_strings < 2:
3221             return TErr(
3222                 f"Not enough strings to merge (num_of_strings={num_of_strings})."
3223             )
3224
3225         if num_of_inline_string_comments > 1:
3226             return TErr(
3227                 f"Too many inline string comments ({num_of_inline_string_comments})."
3228             )
3229
3230         if len(set_of_prefixes) > 1 and set_of_prefixes != {"", "f"}:
3231             return TErr(f"Too many different prefixes ({set_of_prefixes}).")
3232
3233         return Ok(None)
3234
3235
3236 class StringParenStripper(StringTransformer):
3237     """StringTransformer that strips surrounding parentheses from strings.
3238
3239     Requirements:
3240         The line contains a string which is surrounded by parentheses and:
3241             - The target string is NOT the only argument to a function call).
3242             - If the target string contains a PERCENT, the brackets are not
3243               preceeded or followed by an operator with higher precedence than
3244               PERCENT.
3245
3246     Transformations:
3247         The parentheses mentioned in the 'Requirements' section are stripped.
3248
3249     Collaborations:
3250         StringParenStripper has its own inherent usefulness, but it is also
3251         relied on to clean up the parentheses created by StringParenWrapper (in
3252         the event that they are no longer needed).
3253     """
3254
3255     def do_match(self, line: Line) -> TMatchResult:
3256         LL = line.leaves
3257
3258         is_valid_index = is_valid_index_factory(LL)
3259
3260         for (idx, leaf) in enumerate(LL):
3261             # Should be a string...
3262             if leaf.type != token.STRING:
3263                 continue
3264
3265             # Should be preceded by a non-empty LPAR...
3266             if (
3267                 not is_valid_index(idx - 1)
3268                 or LL[idx - 1].type != token.LPAR
3269                 or is_empty_lpar(LL[idx - 1])
3270             ):
3271                 continue
3272
3273             # That LPAR should NOT be preceded by a function name or a closing
3274             # bracket (which could be a function which returns a function or a
3275             # list/dictionary that contains a function)...
3276             if is_valid_index(idx - 2) and (
3277                 LL[idx - 2].type == token.NAME or LL[idx - 2].type in CLOSING_BRACKETS
3278             ):
3279                 continue
3280
3281             string_idx = idx
3282
3283             # Skip the string trailer, if one exists.
3284             string_parser = StringParser()
3285             next_idx = string_parser.parse(LL, string_idx)
3286
3287             # if the leaves in the parsed string include a PERCENT, we need to
3288             # make sure the initial LPAR is NOT preceded by an operator with
3289             # higher or equal precedence to PERCENT
3290             if is_valid_index(idx - 2):
3291                 # mypy can't quite follow unless we name this
3292                 before_lpar = LL[idx - 2]
3293                 if token.PERCENT in {leaf.type for leaf in LL[idx - 1 : next_idx]} and (
3294                     (
3295                         before_lpar.type
3296                         in {
3297                             token.STAR,
3298                             token.AT,
3299                             token.SLASH,
3300                             token.DOUBLESLASH,
3301                             token.PERCENT,
3302                             token.TILDE,
3303                             token.DOUBLESTAR,
3304                             token.AWAIT,
3305                             token.LSQB,
3306                             token.LPAR,
3307                         }
3308                     )
3309                     or (
3310                         # only unary PLUS/MINUS
3311                         before_lpar.parent
3312                         and before_lpar.parent.type == syms.factor
3313                         and (before_lpar.type in {token.PLUS, token.MINUS})
3314                     )
3315                 ):
3316                     continue
3317
3318             # Should be followed by a non-empty RPAR...
3319             if (
3320                 is_valid_index(next_idx)
3321                 and LL[next_idx].type == token.RPAR
3322                 and not is_empty_rpar(LL[next_idx])
3323             ):
3324                 # That RPAR should NOT be followed by anything with higher
3325                 # precedence than PERCENT
3326                 if is_valid_index(next_idx + 1) and LL[next_idx + 1].type in {
3327                     token.DOUBLESTAR,
3328                     token.LSQB,
3329                     token.LPAR,
3330                     token.DOT,
3331                 }:
3332                     continue
3333
3334                 return Ok(string_idx)
3335
3336         return TErr("This line has no strings wrapped in parens.")
3337
3338     def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]:
3339         LL = line.leaves
3340
3341         string_parser = StringParser()
3342         rpar_idx = string_parser.parse(LL, string_idx)
3343
3344         for leaf in (LL[string_idx - 1], LL[rpar_idx]):
3345             if line.comments_after(leaf):
3346                 yield TErr(
3347                     "Will not strip parentheses which have comments attached to them."
3348                 )
3349                 return
3350
3351         new_line = line.clone()
3352         new_line.comments = line.comments.copy()
3353         try:
3354             append_leaves(new_line, line, LL[: string_idx - 1])
3355         except BracketMatchError:
3356             # HACK: I believe there is currently a bug somewhere in
3357             # right_hand_split() that is causing brackets to not be tracked
3358             # properly by a shared BracketTracker.
3359             append_leaves(new_line, line, LL[: string_idx - 1], preformatted=True)
3360
3361         string_leaf = Leaf(token.STRING, LL[string_idx].value)
3362         LL[string_idx - 1].remove()
3363         replace_child(LL[string_idx], string_leaf)
3364         new_line.append(string_leaf)
3365
3366         append_leaves(
3367             new_line, line, LL[string_idx + 1 : rpar_idx] + LL[rpar_idx + 1 :]
3368         )
3369
3370         LL[rpar_idx].remove()
3371
3372         yield Ok(new_line)
3373
3374
3375 class BaseStringSplitter(StringTransformer):
3376     """
3377     Abstract class for StringTransformers which transform a Line's strings by splitting
3378     them or placing them on their own lines where necessary to avoid going over
3379     the configured line length.
3380
3381     Requirements:
3382         * The target string value is responsible for the line going over the
3383         line length limit. It follows that after all of black's other line
3384         split methods have been exhausted, this line (or one of the resulting
3385         lines after all line splits are performed) would still be over the
3386         line_length limit unless we split this string.
3387             AND
3388         * The target string is NOT a "pointless" string (i.e. a string that has
3389         no parent or siblings).
3390             AND
3391         * The target string is not followed by an inline comment that appears
3392         to be a pragma.
3393             AND
3394         * The target string is not a multiline (i.e. triple-quote) string.
3395     """
3396
3397     @abstractmethod
3398     def do_splitter_match(self, line: Line) -> TMatchResult:
3399         """
3400         BaseStringSplitter asks its clients to override this method instead of
3401         `StringTransformer.do_match(...)`.
3402
3403         Follows the same protocol as `StringTransformer.do_match(...)`.
3404
3405         Refer to `help(StringTransformer.do_match)` for more information.
3406         """
3407
3408     def do_match(self, line: Line) -> TMatchResult:
3409         match_result = self.do_splitter_match(line)
3410         if isinstance(match_result, Err):
3411             return match_result
3412
3413         string_idx = match_result.ok()
3414         vresult = self.__validate(line, string_idx)
3415         if isinstance(vresult, Err):
3416             return vresult
3417
3418         return match_result
3419
3420     def __validate(self, line: Line, string_idx: int) -> TResult[None]:
3421         """
3422         Checks that @line meets all of the requirements listed in this classes'
3423         docstring. Refer to `help(BaseStringSplitter)` for a detailed
3424         description of those requirements.
3425
3426         Returns:
3427             * Ok(None), if ALL of the requirements are met.
3428                 OR
3429             * Err(CannotTransform), if ANY of the requirements are NOT met.
3430         """
3431         LL = line.leaves
3432
3433         string_leaf = LL[string_idx]
3434
3435         max_string_length = self.__get_max_string_length(line, string_idx)
3436         if len(string_leaf.value) <= max_string_length:
3437             return TErr(
3438                 "The string itself is not what is causing this line to be too long."
3439             )
3440
3441         if not string_leaf.parent or [L.type for L in string_leaf.parent.children] == [
3442             token.STRING,
3443             token.NEWLINE,
3444         ]:
3445             return TErr(
3446                 f"This string ({string_leaf.value}) appears to be pointless (i.e. has"
3447                 " no parent)."
3448             )
3449
3450         if id(line.leaves[string_idx]) in line.comments and contains_pragma_comment(
3451             line.comments[id(line.leaves[string_idx])]
3452         ):
3453             return TErr(
3454                 "Line appears to end with an inline pragma comment. Splitting the line"
3455                 " could modify the pragma's behavior."
3456             )
3457
3458         if has_triple_quotes(string_leaf.value):
3459             return TErr("We cannot split multiline strings.")
3460
3461         return Ok(None)
3462
3463     def __get_max_string_length(self, line: Line, string_idx: int) -> int:
3464         """
3465         Calculates the max string length used when attempting to determine
3466         whether or not the target string is responsible for causing the line to
3467         go over the line length limit.
3468
3469         WARNING: This method is tightly coupled to both StringSplitter and
3470         (especially) StringParenWrapper. There is probably a better way to
3471         accomplish what is being done here.
3472
3473         Returns:
3474             max_string_length: such that `line.leaves[string_idx].value >
3475             max_string_length` implies that the target string IS responsible
3476             for causing this line to exceed the line length limit.
3477         """
3478         LL = line.leaves
3479
3480         is_valid_index = is_valid_index_factory(LL)
3481
3482         # We use the shorthand "WMA4" in comments to abbreviate "We must
3483         # account for". When giving examples, we use STRING to mean some/any
3484         # valid string.
3485         #
3486         # Finally, we use the following convenience variables:
3487         #
3488         #   P:  The leaf that is before the target string leaf.
3489         #   N:  The leaf that is after the target string leaf.
3490         #   NN: The leaf that is after N.
3491
3492         # WMA4 the whitespace at the beginning of the line.
3493         offset = line.depth * 4
3494
3495         if is_valid_index(string_idx - 1):
3496             p_idx = string_idx - 1
3497             if (
3498                 LL[string_idx - 1].type == token.LPAR
3499                 and LL[string_idx - 1].value == ""
3500                 and string_idx >= 2
3501             ):
3502                 # If the previous leaf is an empty LPAR placeholder, we should skip it.
3503                 p_idx -= 1
3504
3505             P = LL[p_idx]
3506             if P.type == token.PLUS:
3507                 # WMA4 a space and a '+' character (e.g. `+ STRING`).
3508                 offset += 2
3509
3510             if P.type == token.COMMA:
3511                 # WMA4 a space, a comma, and a closing bracket [e.g. `), STRING`].
3512                 offset += 3
3513
3514             if P.type in [token.COLON, token.EQUAL, token.NAME]:
3515                 # This conditional branch is meant to handle dictionary keys,
3516                 # variable assignments, 'return STRING' statement lines, and
3517                 # 'else STRING' ternary expression lines.
3518
3519                 # WMA4 a single space.
3520                 offset += 1
3521
3522                 # WMA4 the lengths of any leaves that came before that space,
3523                 # but after any closing bracket before that space.
3524                 for leaf in reversed(LL[: p_idx + 1]):
3525                     offset += len(str(leaf))
3526                     if leaf.type in CLOSING_BRACKETS:
3527                         break
3528
3529         if is_valid_index(string_idx + 1):
3530             N = LL[string_idx + 1]
3531             if N.type == token.RPAR and N.value == "" and len(LL) > string_idx + 2:
3532                 # If the next leaf is an empty RPAR placeholder, we should skip it.
3533                 N = LL[string_idx + 2]
3534
3535             if N.type == token.COMMA:
3536                 # WMA4 a single comma at the end of the string (e.g `STRING,`).
3537                 offset += 1
3538
3539             if is_valid_index(string_idx + 2):
3540                 NN = LL[string_idx + 2]
3541
3542                 if N.type == token.DOT and NN.type == token.NAME:
3543                     # This conditional branch is meant to handle method calls invoked
3544                     # off of a string literal up to and including the LPAR character.
3545
3546                     # WMA4 the '.' character.
3547                     offset += 1
3548
3549                     if (
3550                         is_valid_index(string_idx + 3)
3551                         and LL[string_idx + 3].type == token.LPAR
3552                     ):
3553                         # WMA4 the left parenthesis character.
3554                         offset += 1
3555
3556                     # WMA4 the length of the method's name.
3557                     offset += len(NN.value)
3558
3559         has_comments = False
3560         for comment_leaf in line.comments_after(LL[string_idx]):
3561             if not has_comments:
3562                 has_comments = True
3563                 # WMA4 two spaces before the '#' character.
3564                 offset += 2
3565
3566             # WMA4 the length of the inline comment.
3567             offset += len(comment_leaf.value)
3568
3569         max_string_length = self.line_length - offset
3570         return max_string_length
3571
3572
3573 class StringSplitter(CustomSplitMapMixin, BaseStringSplitter):
3574     """
3575     StringTransformer that splits "atom" strings (i.e. strings which exist on
3576     lines by themselves).
3577
3578     Requirements:
3579         * The line consists ONLY of a single string (with the exception of a
3580         '+' symbol which MAY exist at the start of the line), MAYBE a string
3581         trailer, and MAYBE a trailing comma.
3582             AND
3583         * All of the requirements listed in BaseStringSplitter's docstring.
3584
3585     Transformations:
3586         The string mentioned in the 'Requirements' section is split into as
3587         many substrings as necessary to adhere to the configured line length.
3588
3589         In the final set of substrings, no substring should be smaller than
3590         MIN_SUBSTR_SIZE characters.
3591
3592         The string will ONLY be split on spaces (i.e. each new substring should
3593         start with a space). Note that the string will NOT be split on a space
3594         which is escaped with a backslash.
3595
3596         If the string is an f-string, it will NOT be split in the middle of an
3597         f-expression (e.g. in f"FooBar: {foo() if x else bar()}", {foo() if x
3598         else bar()} is an f-expression).
3599
3600         If the string that is being split has an associated set of custom split
3601         records and those custom splits will NOT result in any line going over
3602         the configured line length, those custom splits are used. Otherwise the
3603         string is split as late as possible (from left-to-right) while still
3604         adhering to the transformation rules listed above.
3605
3606     Collaborations:
3607         StringSplitter relies on StringMerger to construct the appropriate
3608         CustomSplit objects and add them to the custom split map.
3609     """
3610
3611     MIN_SUBSTR_SIZE = 6
3612     # Matches an "f-expression" (e.g. {var}) that might be found in an f-string.
3613     RE_FEXPR = r"""
3614     (?<!\{) (?:\{\{)* \{ (?!\{)
3615         (?:
3616             [^\{\}]
3617             | \{\{
3618             | \}\}
3619             | (?R)
3620         )+?
3621     (?<!\}) \} (?:\}\})* (?!\})
3622     """
3623
3624     def do_splitter_match(self, line: Line) -> TMatchResult:
3625         LL = line.leaves
3626
3627         is_valid_index = is_valid_index_factory(LL)
3628
3629         idx = 0
3630
3631         # The first leaf MAY be a '+' symbol...
3632         if is_valid_index(idx) and LL[idx].type == token.PLUS:
3633             idx += 1
3634
3635         # The next/first leaf MAY be an empty LPAR...
3636         if is_valid_index(idx) and is_empty_lpar(LL[idx]):
3637             idx += 1
3638
3639         # The next/first leaf MUST be a string...
3640         if not is_valid_index(idx) or LL[idx].type != token.STRING:
3641             return TErr("Line does not start with a string.")
3642
3643         string_idx = idx
3644
3645         # Skip the string trailer, if one exists.
3646         string_parser = StringParser()
3647         idx = string_parser.parse(LL, string_idx)
3648
3649         # That string MAY be followed by an empty RPAR...
3650         if is_valid_index(idx) and is_empty_rpar(LL[idx]):
3651             idx += 1
3652
3653         # That string / empty RPAR leaf MAY be followed by a comma...
3654         if is_valid_index(idx) and LL[idx].type == token.COMMA:
3655             idx += 1
3656
3657         # But no more leaves are allowed...
3658         if is_valid_index(idx):
3659             return TErr("This line does not end with a string.")
3660
3661         return Ok(string_idx)
3662
3663     def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]:
3664         LL = line.leaves
3665
3666         QUOTE = LL[string_idx].value[-1]
3667
3668         is_valid_index = is_valid_index_factory(LL)
3669         insert_str_child = insert_str_child_factory(LL[string_idx])
3670
3671         prefix = get_string_prefix(LL[string_idx].value)
3672
3673         # We MAY choose to drop the 'f' prefix from substrings that don't
3674         # contain any f-expressions, but ONLY if the original f-string
3675         # contains at least one f-expression. Otherwise, we will alter the AST
3676         # of the program.
3677         drop_pointless_f_prefix = ("f" in prefix) and re.search(
3678             self.RE_FEXPR, LL[string_idx].value, re.VERBOSE
3679         )
3680
3681         first_string_line = True
3682         starts_with_plus = LL[0].type == token.PLUS
3683
3684         def line_needs_plus() -> bool:
3685             return first_string_line and starts_with_plus
3686
3687         def maybe_append_plus(new_line: Line) -> None:
3688             """
3689             Side Effects:
3690                 If @line starts with a plus and this is the first line we are
3691                 constructing, this function appends a PLUS leaf to @new_line
3692                 and replaces the old PLUS leaf in the node structure. Otherwise
3693                 this function does nothing.
3694             """
3695             if line_needs_plus():
3696                 plus_leaf = Leaf(token.PLUS, "+")
3697                 replace_child(LL[0], plus_leaf)
3698                 new_line.append(plus_leaf)
3699
3700         ends_with_comma = (
3701             is_valid_index(string_idx + 1) and LL[string_idx + 1].type == token.COMMA
3702         )
3703
3704         def max_last_string() -> int:
3705             """
3706             Returns:
3707                 The max allowed length of the string value used for the last
3708                 line we will construct.
3709             """
3710             result = self.line_length
3711             result -= line.depth * 4
3712             result -= 1 if ends_with_comma else 0
3713             result -= 2 if line_needs_plus() else 0
3714             return result
3715
3716         # --- Calculate Max Break Index (for string value)
3717         # We start with the line length limit
3718         max_break_idx = self.line_length
3719         # The last index of a string of length N is N-1.
3720         max_break_idx -= 1
3721         # Leading whitespace is not present in the string value (e.g. Leaf.value).
3722         max_break_idx -= line.depth * 4
3723         if max_break_idx < 0:
3724             yield TErr(
3725                 f"Unable to split {LL[string_idx].value} at such high of a line depth:"
3726                 f" {line.depth}"
3727             )
3728             return
3729
3730         # Check if StringMerger registered any custom splits.
3731         custom_splits = self.pop_custom_splits(LL[string_idx].value)
3732         # We use them ONLY if none of them would produce lines that exceed the
3733         # line limit.
3734         use_custom_breakpoints = bool(
3735             custom_splits
3736             and all(csplit.break_idx <= max_break_idx for csplit in custom_splits)
3737         )
3738
3739         # Temporary storage for the remaining chunk of the string line that
3740         # can't fit onto the line currently being constructed.
3741         rest_value = LL[string_idx].value
3742
3743         def more_splits_should_be_made() -> bool:
3744             """
3745             Returns:
3746                 True iff `rest_value` (the remaining string value from the last
3747                 split), should be split again.
3748             """
3749             if use_custom_breakpoints:
3750                 return len(custom_splits) > 1
3751             else:
3752                 return len(rest_value) > max_last_string()
3753
3754         string_line_results: List[Ok[Line]] = []
3755         while more_splits_should_be_made():
3756             if use_custom_breakpoints:
3757                 # Custom User Split (manual)
3758                 csplit = custom_splits.pop(0)
3759                 break_idx = csplit.break_idx
3760             else:
3761                 # Algorithmic Split (automatic)
3762                 max_bidx = max_break_idx - 2 if line_needs_plus() else max_break_idx
3763                 maybe_break_idx = self.__get_break_idx(rest_value, max_bidx)
3764                 if maybe_break_idx is None:
3765                     # If we are unable to algorithmically determine a good split
3766                     # and this string has custom splits registered to it, we
3767                     # fall back to using them--which means we have to start
3768                     # over from the beginning.
3769                     if custom_splits:
3770                         rest_value = LL[string_idx].value
3771                         string_line_results = []
3772                         first_string_line = True
3773                         use_custom_breakpoints = True
3774                         continue
3775
3776                     # Otherwise, we stop splitting here.
3777                     break
3778
3779                 break_idx = maybe_break_idx
3780
3781             # --- Construct `next_value`
3782             next_value = rest_value[:break_idx] + QUOTE
3783             if (
3784                 # Are we allowed to try to drop a pointless 'f' prefix?
3785                 drop_pointless_f_prefix
3786                 # If we are, will we be successful?
3787                 and next_value != self.__normalize_f_string(next_value, prefix)
3788             ):
3789                 # If the current custom split did NOT originally use a prefix,
3790                 # then `csplit.break_idx` will be off by one after removing
3791                 # the 'f' prefix.
3792                 break_idx = (
3793                     break_idx + 1
3794                     if use_custom_breakpoints and not csplit.has_prefix
3795                     else break_idx
3796                 )
3797                 next_value = rest_value[:break_idx] + QUOTE
3798                 next_value = self.__normalize_f_string(next_value, prefix)
3799
3800             # --- Construct `next_leaf`
3801             next_leaf = Leaf(token.STRING, next_value)
3802             insert_str_child(next_leaf)
3803             self.__maybe_normalize_string_quotes(next_leaf)
3804
3805             # --- Construct `next_line`
3806             next_line = line.clone()
3807             maybe_append_plus(next_line)
3808             next_line.append(next_leaf)
3809             string_line_results.append(Ok(next_line))
3810
3811             rest_value = prefix + QUOTE + rest_value[break_idx:]
3812             first_string_line = False
3813
3814         yield from string_line_results
3815
3816         if drop_pointless_f_prefix:
3817             rest_value = self.__normalize_f_string(rest_value, prefix)
3818
3819         rest_leaf = Leaf(token.STRING, rest_value)
3820         insert_str_child(rest_leaf)
3821
3822         # NOTE: I could not find a test case that verifies that the following
3823         # line is actually necessary, but it seems to be. Otherwise we risk
3824         # not normalizing the last substring, right?
3825         self.__maybe_normalize_string_quotes(rest_leaf)
3826
3827         last_line = line.clone()
3828         maybe_append_plus(last_line)
3829
3830         # If there are any leaves to the right of the target string...
3831         if is_valid_index(string_idx + 1):
3832             # We use `temp_value` here to determine how long the last line
3833             # would be if we were to append all the leaves to the right of the
3834             # target string to the last string line.
3835             temp_value = rest_value
3836             for leaf in LL[string_idx + 1 :]:
3837                 temp_value += str(leaf)
3838                 if leaf.type == token.LPAR:
3839                     break
3840
3841             # Try to fit them all on the same line with the last substring...
3842             if (
3843                 len(temp_value) <= max_last_string()
3844                 or LL[string_idx + 1].type == token.COMMA
3845             ):
3846                 last_line.append(rest_leaf)
3847                 append_leaves(last_line, line, LL[string_idx + 1 :])
3848                 yield Ok(last_line)
3849             # Otherwise, place the last substring on one line and everything
3850             # else on a line below that...
3851             else:
3852                 last_line.append(rest_leaf)
3853                 yield Ok(last_line)
3854
3855                 non_string_line = line.clone()
3856                 append_leaves(non_string_line, line, LL[string_idx + 1 :])
3857                 yield Ok(non_string_line)
3858         # Else the target string was the last leaf...
3859         else:
3860             last_line.append(rest_leaf)
3861             last_line.comments = line.comments.copy()
3862             yield Ok(last_line)
3863
3864     def __get_break_idx(self, string: str, max_break_idx: int) -> Optional[int]:
3865         """
3866         This method contains the algorithm that StringSplitter uses to
3867         determine which character to split each string at.
3868
3869         Args:
3870             @string: The substring that we are attempting to split.
3871             @max_break_idx: The ideal break index. We will return this value if it
3872             meets all the necessary conditions. In the likely event that it
3873             doesn't we will try to find the closest index BELOW @max_break_idx
3874             that does. If that fails, we will expand our search by also
3875             considering all valid indices ABOVE @max_break_idx.
3876
3877         Pre-Conditions:
3878             * assert_is_leaf_string(@string)
3879             * 0 <= @max_break_idx < len(@string)
3880
3881         Returns:
3882             break_idx, if an index is able to be found that meets all of the
3883             conditions listed in the 'Transformations' section of this classes'
3884             docstring.
3885                 OR
3886             None, otherwise.
3887         """
3888         is_valid_index = is_valid_index_factory(string)
3889
3890         assert is_valid_index(max_break_idx)
3891         assert_is_leaf_string(string)
3892
3893         _fexpr_slices: Optional[List[Tuple[Index, Index]]] = None
3894
3895         def fexpr_slices() -> Iterator[Tuple[Index, Index]]:
3896             """
3897             Yields:
3898                 All ranges of @string which, if @string were to be split there,
3899                 would result in the splitting of an f-expression (which is NOT
3900                 allowed).
3901             """
3902             nonlocal _fexpr_slices
3903
3904             if _fexpr_slices is None:
3905                 _fexpr_slices = []
3906                 for match in re.finditer(self.RE_FEXPR, string, re.VERBOSE):
3907                     _fexpr_slices.append(match.span())
3908
3909             yield from _fexpr_slices
3910
3911         is_fstring = "f" in get_string_prefix(string)
3912
3913         def breaks_fstring_expression(i: Index) -> bool:
3914             """
3915             Returns:
3916                 True iff returning @i would result in the splitting of an
3917                 f-expression (which is NOT allowed).
3918             """
3919             if not is_fstring:
3920                 return False
3921
3922             for (start, end) in fexpr_slices():
3923                 if start <= i < end:
3924                     return True
3925
3926             return False
3927
3928         def passes_all_checks(i: Index) -> bool:
3929             """
3930             Returns:
3931                 True iff ALL of the conditions listed in the 'Transformations'
3932                 section of this classes' docstring would be be met by returning @i.
3933             """
3934             is_space = string[i] == " "
3935
3936             is_not_escaped = True
3937             j = i - 1
3938             while is_valid_index(j) and string[j] == "\\":
3939                 is_not_escaped = not is_not_escaped
3940                 j -= 1
3941
3942             is_big_enough = (
3943                 len(string[i:]) >= self.MIN_SUBSTR_SIZE
3944                 and len(string[:i]) >= self.MIN_SUBSTR_SIZE
3945             )
3946             return (
3947                 is_space
3948                 and is_not_escaped
3949                 and is_big_enough
3950                 and not breaks_fstring_expression(i)
3951             )
3952
3953         # First, we check all indices BELOW @max_break_idx.
3954         break_idx = max_break_idx
3955         while is_valid_index(break_idx - 1) and not passes_all_checks(break_idx):
3956             break_idx -= 1
3957
3958         if not passes_all_checks(break_idx):
3959             # If that fails, we check all indices ABOVE @max_break_idx.
3960             #
3961             # If we are able to find a valid index here, the next line is going
3962             # to be longer than the specified line length, but it's probably
3963             # better than doing nothing at all.
3964             break_idx = max_break_idx + 1
3965             while is_valid_index(break_idx + 1) and not passes_all_checks(break_idx):
3966                 break_idx += 1
3967
3968             if not is_valid_index(break_idx) or not passes_all_checks(break_idx):
3969                 return None
3970
3971         return break_idx
3972
3973     def __maybe_normalize_string_quotes(self, leaf: Leaf) -> None:
3974         if self.normalize_strings:
3975             normalize_string_quotes(leaf)
3976
3977     def __normalize_f_string(self, string: str, prefix: str) -> str:
3978         """
3979         Pre-Conditions:
3980             * assert_is_leaf_string(@string)
3981
3982         Returns:
3983             * If @string is an f-string that contains no f-expressions, we
3984             return a string identical to @string except that the 'f' prefix
3985             has been stripped and all double braces (i.e. '{{' or '}}') have
3986             been normalized (i.e. turned into '{' or '}').
3987                 OR
3988             * Otherwise, we return @string.
3989         """
3990         assert_is_leaf_string(string)
3991
3992         if "f" in prefix and not re.search(self.RE_FEXPR, string, re.VERBOSE):
3993             new_prefix = prefix.replace("f", "")
3994
3995             temp = string[len(prefix) :]
3996             temp = re.sub(r"\{\{", "{", temp)
3997             temp = re.sub(r"\}\}", "}", temp)
3998             new_string = temp
3999
4000             return f"{new_prefix}{new_string}"
4001         else:
4002             return string
4003
4004
4005 class StringParenWrapper(CustomSplitMapMixin, BaseStringSplitter):
4006     """
4007     StringTransformer that splits non-"atom" strings (i.e. strings that do not
4008     exist on lines by themselves).
4009
4010     Requirements:
4011         All of the requirements listed in BaseStringSplitter's docstring in
4012         addition to the requirements listed below:
4013
4014         * The line is a return/yield statement, which returns/yields a string.
4015             OR
4016         * The line is part of a ternary expression (e.g. `x = y if cond else
4017         z`) such that the line starts with `else <string>`, where <string> is
4018         some string.
4019             OR
4020         * The line is an assert statement, which ends with a string.
4021             OR
4022         * The line is an assignment statement (e.g. `x = <string>` or `x +=
4023         <string>`) such that the variable is being assigned the value of some
4024         string.
4025             OR
4026         * The line is a dictionary key assignment where some valid key is being
4027         assigned the value of some string.
4028
4029     Transformations:
4030         The chosen string is wrapped in parentheses and then split at the LPAR.
4031
4032         We then have one line which ends with an LPAR and another line that
4033         starts with the chosen string. The latter line is then split again at
4034         the RPAR. This results in the RPAR (and possibly a trailing comma)
4035         being placed on its own line.
4036
4037         NOTE: If any leaves exist to the right of the chosen string (except
4038         for a trailing comma, which would be placed after the RPAR), those
4039         leaves are placed inside the parentheses.  In effect, the chosen
4040         string is not necessarily being "wrapped" by parentheses. We can,
4041         however, count on the LPAR being placed directly before the chosen
4042         string.
4043
4044         In other words, StringParenWrapper creates "atom" strings. These
4045         can then be split again by StringSplitter, if necessary.
4046
4047     Collaborations:
4048         In the event that a string line split by StringParenWrapper is
4049         changed such that it no longer needs to be given its own line,
4050         StringParenWrapper relies on StringParenStripper to clean up the
4051         parentheses it created.
4052     """
4053
4054     def do_splitter_match(self, line: Line) -> TMatchResult:
4055         LL = line.leaves
4056
4057         string_idx = (
4058             self._return_match(LL)
4059             or self._else_match(LL)
4060             or self._assert_match(LL)
4061             or self._assign_match(LL)
4062             or self._dict_match(LL)
4063         )
4064
4065         if string_idx is not None:
4066             string_value = line.leaves[string_idx].value
4067             # If the string has no spaces...
4068             if " " not in string_value:
4069                 # And will still violate the line length limit when split...
4070                 max_string_length = self.line_length - ((line.depth + 1) * 4)
4071                 if len(string_value) > max_string_length:
4072                     # And has no associated custom splits...
4073                     if not self.has_custom_splits(string_value):
4074                         # Then we should NOT put this string on its own line.
4075                         return TErr(
4076                             "We do not wrap long strings in parentheses when the"
4077                             " resultant line would still be over the specified line"
4078                             " length and can't be split further by StringSplitter."
4079                         )
4080             return Ok(string_idx)
4081
4082         return TErr("This line does not contain any non-atomic strings.")
4083
4084     @staticmethod
4085     def _return_match(LL: List[Leaf]) -> Optional[int]:
4086         """
4087         Returns:
4088             string_idx such that @LL[string_idx] is equal to our target (i.e.
4089             matched) string, if this line matches the return/yield statement
4090             requirements listed in the 'Requirements' section of this classes'
4091             docstring.
4092                 OR
4093             None, otherwise.
4094         """
4095         # If this line is apart of a return/yield statement and the first leaf
4096         # contains either the "return" or "yield" keywords...
4097         if parent_type(LL[0]) in [syms.return_stmt, syms.yield_expr] and LL[
4098             0
4099         ].value in ["return", "yield"]:
4100             is_valid_index = is_valid_index_factory(LL)
4101
4102             idx = 2 if is_valid_index(1) and is_empty_par(LL[1]) else 1
4103             # The next visible leaf MUST contain a string...
4104             if is_valid_index(idx) and LL[idx].type == token.STRING:
4105                 return idx
4106
4107         return None
4108
4109     @staticmethod
4110     def _else_match(LL: List[Leaf]) -> Optional[int]:
4111         """
4112         Returns:
4113             string_idx such that @LL[string_idx] is equal to our target (i.e.
4114             matched) string, if this line matches the ternary expression
4115             requirements listed in the 'Requirements' section of this classes'
4116             docstring.
4117                 OR
4118             None, otherwise.
4119         """
4120         # If this line is apart of a ternary expression and the first leaf
4121         # contains the "else" keyword...
4122         if (
4123             parent_type(LL[0]) == syms.test
4124             and LL[0].type == token.NAME
4125             and LL[0].value == "else"
4126         ):
4127             is_valid_index = is_valid_index_factory(LL)
4128
4129             idx = 2 if is_valid_index(1) and is_empty_par(LL[1]) else 1
4130             # The next visible leaf MUST contain a string...
4131             if is_valid_index(idx) and LL[idx].type == token.STRING:
4132                 return idx
4133
4134         return None
4135
4136     @staticmethod
4137     def _assert_match(LL: List[Leaf]) -> Optional[int]:
4138         """
4139         Returns:
4140             string_idx such that @LL[string_idx] is equal to our target (i.e.
4141             matched) string, if this line matches the assert statement
4142             requirements listed in the 'Requirements' section of this classes'
4143             docstring.
4144                 OR
4145             None, otherwise.
4146         """
4147         # If this line is apart of an assert statement and the first leaf
4148         # contains the "assert" keyword...
4149         if parent_type(LL[0]) == syms.assert_stmt and LL[0].value == "assert":
4150             is_valid_index = is_valid_index_factory(LL)
4151
4152             for (i, leaf) in enumerate(LL):
4153                 # We MUST find a comma...
4154                 if leaf.type == token.COMMA:
4155                     idx = i + 2 if is_empty_par(LL[i + 1]) else i + 1
4156
4157                     # That comma MUST be followed by a string...
4158                     if is_valid_index(idx) and LL[idx].type == token.STRING:
4159                         string_idx = idx
4160
4161                         # Skip the string trailer, if one exists.
4162                         string_parser = StringParser()
4163                         idx = string_parser.parse(LL, string_idx)
4164
4165                         # But no more leaves are allowed...
4166                         if not is_valid_index(idx):
4167                             return string_idx
4168
4169         return None
4170
4171     @staticmethod
4172     def _assign_match(LL: List[Leaf]) -> Optional[int]:
4173         """
4174         Returns:
4175             string_idx such that @LL[string_idx] is equal to our target (i.e.
4176             matched) string, if this line matches the assignment statement
4177             requirements listed in the 'Requirements' section of this classes'
4178             docstring.
4179                 OR
4180             None, otherwise.
4181         """
4182         # If this line is apart of an expression statement or is a function
4183         # argument AND the first leaf contains a variable name...
4184         if (
4185             parent_type(LL[0]) in [syms.expr_stmt, syms.argument, syms.power]
4186             and LL[0].type == token.NAME
4187         ):
4188             is_valid_index = is_valid_index_factory(LL)
4189
4190             for (i, leaf) in enumerate(LL):
4191                 # We MUST find either an '=' or '+=' symbol...
4192                 if leaf.type in [token.EQUAL, token.PLUSEQUAL]:
4193                     idx = i + 2 if is_empty_par(LL[i + 1]) else i + 1
4194
4195                     # That symbol MUST be followed by a string...
4196                     if is_valid_index(idx) and LL[idx].type == token.STRING:
4197                         string_idx = idx
4198
4199                         # Skip the string trailer, if one exists.
4200                         string_parser = StringParser()
4201                         idx = string_parser.parse(LL, string_idx)
4202
4203                         # The next leaf MAY be a comma iff this line is apart
4204                         # of a function argument...
4205                         if (
4206                             parent_type(LL[0]) == syms.argument
4207                             and is_valid_index(idx)
4208                             and LL[idx].type == token.COMMA
4209                         ):
4210                             idx += 1
4211
4212                         # But no more leaves are allowed...
4213                         if not is_valid_index(idx):
4214                             return string_idx
4215
4216         return None
4217
4218     @staticmethod
4219     def _dict_match(LL: List[Leaf]) -> Optional[int]:
4220         """
4221         Returns:
4222             string_idx such that @LL[string_idx] is equal to our target (i.e.
4223             matched) string, if this line matches the dictionary key assignment
4224             statement requirements listed in the 'Requirements' section of this
4225             classes' docstring.
4226                 OR
4227             None, otherwise.
4228         """
4229         # If this line is apart of a dictionary key assignment...
4230         if syms.dictsetmaker in [parent_type(LL[0]), parent_type(LL[0].parent)]:
4231             is_valid_index = is_valid_index_factory(LL)
4232
4233             for (i, leaf) in enumerate(LL):
4234                 # We MUST find a colon...
4235                 if leaf.type == token.COLON:
4236                     idx = i + 2 if is_empty_par(LL[i + 1]) else i + 1
4237
4238                     # That colon MUST be followed by a string...
4239                     if is_valid_index(idx) and LL[idx].type == token.STRING:
4240                         string_idx = idx
4241
4242                         # Skip the string trailer, if one exists.
4243                         string_parser = StringParser()
4244                         idx = string_parser.parse(LL, string_idx)
4245
4246                         # That string MAY be followed by a comma...
4247                         if is_valid_index(idx) and LL[idx].type == token.COMMA:
4248                             idx += 1
4249
4250                         # But no more leaves are allowed...
4251                         if not is_valid_index(idx):
4252                             return string_idx
4253
4254         return None
4255
4256     def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]:
4257         LL = line.leaves
4258
4259         is_valid_index = is_valid_index_factory(LL)
4260         insert_str_child = insert_str_child_factory(LL[string_idx])
4261
4262         comma_idx = -1
4263         ends_with_comma = False
4264         if LL[comma_idx].type == token.COMMA:
4265             ends_with_comma = True
4266
4267         leaves_to_steal_comments_from = [LL[string_idx]]
4268         if ends_with_comma:
4269             leaves_to_steal_comments_from.append(LL[comma_idx])
4270
4271         # --- First Line
4272         first_line = line.clone()
4273         left_leaves = LL[:string_idx]
4274
4275         # We have to remember to account for (possibly invisible) LPAR and RPAR
4276         # leaves that already wrapped the target string. If these leaves do
4277         # exist, we will replace them with our own LPAR and RPAR leaves.
4278         old_parens_exist = False
4279         if left_leaves and left_leaves[-1].type == token.LPAR:
4280             old_parens_exist = True
4281             leaves_to_steal_comments_from.append(left_leaves[-1])
4282             left_leaves.pop()
4283
4284         append_leaves(first_line, line, left_leaves)
4285
4286         lpar_leaf = Leaf(token.LPAR, "(")
4287         if old_parens_exist:
4288             replace_child(LL[string_idx - 1], lpar_leaf)
4289         else:
4290             insert_str_child(lpar_leaf)
4291         first_line.append(lpar_leaf)
4292
4293         # We throw inline comments that were originally to the right of the
4294         # target string to the top line. They will now be shown to the right of
4295         # the LPAR.
4296         for leaf in leaves_to_steal_comments_from:
4297             for comment_leaf in line.comments_after(leaf):
4298                 first_line.append(comment_leaf, preformatted=True)
4299
4300         yield Ok(first_line)
4301
4302         # --- Middle (String) Line
4303         # We only need to yield one (possibly too long) string line, since the
4304         # `StringSplitter` will break it down further if necessary.
4305         string_value = LL[string_idx].value
4306         string_line = Line(
4307             depth=line.depth + 1,
4308             inside_brackets=True,
4309             should_explode=line.should_explode,
4310         )
4311         string_leaf = Leaf(token.STRING, string_value)
4312         insert_str_child(string_leaf)
4313         string_line.append(string_leaf)
4314
4315         old_rpar_leaf = None
4316         if is_valid_index(string_idx + 1):
4317             right_leaves = LL[string_idx + 1 :]
4318             if ends_with_comma:
4319                 right_leaves.pop()
4320
4321             if old_parens_exist:
4322                 assert (
4323                     right_leaves and right_leaves[-1].type == token.RPAR
4324                 ), "Apparently, old parentheses do NOT exist?!"
4325                 old_rpar_leaf = right_leaves.pop()
4326
4327             append_leaves(string_line, line, right_leaves)
4328
4329         yield Ok(string_line)
4330
4331         # --- Last Line
4332         last_line = line.clone()
4333         last_line.bracket_tracker = first_line.bracket_tracker
4334
4335         new_rpar_leaf = Leaf(token.RPAR, ")")
4336         if old_rpar_leaf is not None:
4337             replace_child(old_rpar_leaf, new_rpar_leaf)
4338         else:
4339             insert_str_child(new_rpar_leaf)
4340         last_line.append(new_rpar_leaf)
4341
4342         # If the target string ended with a comma, we place this comma to the
4343         # right of the RPAR on the last line.
4344         if ends_with_comma:
4345             comma_leaf = Leaf(token.COMMA, ",")
4346             replace_child(LL[comma_idx], comma_leaf)
4347             last_line.append(comma_leaf)
4348
4349         yield Ok(last_line)
4350
4351
4352 class StringParser:
4353     """
4354     A state machine that aids in parsing a string's "trailer", which can be
4355     either non-existent, an old-style formatting sequence (e.g. `% varX` or `%
4356     (varX, varY)`), or a method-call / attribute access (e.g. `.format(varX,
4357     varY)`).
4358
4359     NOTE: A new StringParser object MUST be instantiated for each string
4360     trailer we need to parse.
4361
4362     Examples:
4363         We shall assume that `line` equals the `Line` object that corresponds
4364         to the following line of python code:
4365         ```
4366         x = "Some {}.".format("String") + some_other_string
4367         ```
4368
4369         Furthermore, we will assume that `string_idx` is some index such that:
4370         ```
4371         assert line.leaves[string_idx].value == "Some {}."
4372         ```
4373
4374         The following code snippet then holds:
4375         ```
4376         string_parser = StringParser()
4377         idx = string_parser.parse(line.leaves, string_idx)
4378         assert line.leaves[idx].type == token.PLUS
4379         ```
4380     """
4381
4382     DEFAULT_TOKEN = -1
4383
4384     # String Parser States
4385     START = 1
4386     DOT = 2
4387     NAME = 3
4388     PERCENT = 4
4389     SINGLE_FMT_ARG = 5
4390     LPAR = 6
4391     RPAR = 7
4392     DONE = 8
4393
4394     # Lookup Table for Next State
4395     _goto: Dict[Tuple[ParserState, NodeType], ParserState] = {
4396         # A string trailer may start with '.' OR '%'.
4397         (START, token.DOT): DOT,
4398         (START, token.PERCENT): PERCENT,
4399         (START, DEFAULT_TOKEN): DONE,
4400         # A '.' MUST be followed by an attribute or method name.
4401         (DOT, token.NAME): NAME,
4402         # A method name MUST be followed by an '(', whereas an attribute name
4403         # is the last symbol in the string trailer.
4404         (NAME, token.LPAR): LPAR,
4405         (NAME, DEFAULT_TOKEN): DONE,
4406         # A '%' symbol can be followed by an '(' or a single argument (e.g. a
4407         # string or variable name).
4408         (PERCENT, token.LPAR): LPAR,
4409         (PERCENT, DEFAULT_TOKEN): SINGLE_FMT_ARG,
4410         # If a '%' symbol is followed by a single argument, that argument is
4411         # the last leaf in the string trailer.
4412         (SINGLE_FMT_ARG, DEFAULT_TOKEN): DONE,
4413         # If present, a ')' symbol is the last symbol in a string trailer.
4414         # (NOTE: LPARS and nested RPARS are not included in this lookup table,
4415         # since they are treated as a special case by the parsing logic in this
4416         # classes' implementation.)
4417         (RPAR, DEFAULT_TOKEN): DONE,
4418     }
4419
4420     def __init__(self) -> None:
4421         self._state = self.START
4422         self._unmatched_lpars = 0
4423
4424     def parse(self, leaves: List[Leaf], string_idx: int) -> int:
4425         """
4426         Pre-conditions:
4427             * @leaves[@string_idx].type == token.STRING
4428
4429         Returns:
4430             The index directly after the last leaf which is apart of the string
4431             trailer, if a "trailer" exists.
4432                 OR
4433             @string_idx + 1, if no string "trailer" exists.
4434         """
4435         assert leaves[string_idx].type == token.STRING
4436
4437         idx = string_idx + 1
4438         while idx < len(leaves) and self._next_state(leaves[idx]):
4439             idx += 1
4440         return idx
4441
4442     def _next_state(self, leaf: Leaf) -> bool:
4443         """
4444         Pre-conditions:
4445             * On the first call to this function, @leaf MUST be the leaf that
4446             was directly after the string leaf in question (e.g. if our target
4447             string is `line.leaves[i]` then the first call to this method must
4448             be `line.leaves[i + 1]`).
4449             * On the next call to this function, the leaf parameter passed in
4450             MUST be the leaf directly following @leaf.
4451
4452         Returns:
4453             True iff @leaf is apart of the string's trailer.
4454         """
4455         # We ignore empty LPAR or RPAR leaves.
4456         if is_empty_par(leaf):
4457             return True
4458
4459         next_token = leaf.type
4460         if next_token == token.LPAR:
4461             self._unmatched_lpars += 1
4462
4463         current_state = self._state
4464
4465         # The LPAR parser state is a special case. We will return True until we
4466         # find the matching RPAR token.
4467         if current_state == self.LPAR:
4468             if next_token == token.RPAR:
4469                 self._unmatched_lpars -= 1
4470                 if self._unmatched_lpars == 0:
4471                     self._state = self.RPAR
4472         # Otherwise, we use a lookup table to determine the next state.
4473         else:
4474             # If the lookup table matches the current state to the next
4475             # token, we use the lookup table.
4476             if (current_state, next_token) in self._goto:
4477                 self._state = self._goto[current_state, next_token]
4478             else:
4479                 # Otherwise, we check if a the current state was assigned a
4480                 # default.
4481                 if (current_state, self.DEFAULT_TOKEN) in self._goto:
4482                     self._state = self._goto[current_state, self.DEFAULT_TOKEN]
4483                 # If no default has been assigned, then this parser has a logic
4484                 # error.
4485                 else:
4486                     raise RuntimeError(f"{self.__class__.__name__} LOGIC ERROR!")
4487
4488             if self._state == self.DONE:
4489                 return False
4490
4491         return True
4492
4493
4494 def TErr(err_msg: str) -> Err[CannotTransform]:
4495     """(T)ransform Err
4496
4497     Convenience function used when working with the TResult type.
4498     """
4499     cant_transform = CannotTransform(err_msg)
4500     return Err(cant_transform)
4501
4502
4503 def contains_pragma_comment(comment_list: List[Leaf]) -> bool:
4504     """
4505     Returns:
4506         True iff one of the comments in @comment_list is a pragma used by one
4507         of the more common static analysis tools for python (e.g. mypy, flake8,
4508         pylint).
4509     """
4510     for comment in comment_list:
4511         if comment.value.startswith(("# type:", "# noqa", "# pylint:")):
4512             return True
4513
4514     return False
4515
4516
4517 def insert_str_child_factory(string_leaf: Leaf) -> Callable[[LN], None]:
4518     """
4519     Factory for a convenience function that is used to orphan @string_leaf
4520     and then insert multiple new leaves into the same part of the node
4521     structure that @string_leaf had originally occupied.
4522
4523     Examples:
4524         Let `string_leaf = Leaf(token.STRING, '"foo"')` and `N =
4525         string_leaf.parent`. Assume the node `N` has the following
4526         original structure:
4527
4528         Node(
4529             expr_stmt, [
4530                 Leaf(NAME, 'x'),
4531                 Leaf(EQUAL, '='),
4532                 Leaf(STRING, '"foo"'),
4533             ]
4534         )
4535
4536         We then run the code snippet shown below.
4537         ```
4538         insert_str_child = insert_str_child_factory(string_leaf)
4539
4540         lpar = Leaf(token.LPAR, '(')
4541         insert_str_child(lpar)
4542
4543         bar = Leaf(token.STRING, '"bar"')
4544         insert_str_child(bar)
4545
4546         rpar = Leaf(token.RPAR, ')')
4547         insert_str_child(rpar)
4548         ```
4549
4550         After which point, it follows that `string_leaf.parent is None` and
4551         the node `N` now has the following structure:
4552
4553         Node(
4554             expr_stmt, [
4555                 Leaf(NAME, 'x'),
4556                 Leaf(EQUAL, '='),
4557                 Leaf(LPAR, '('),
4558                 Leaf(STRING, '"bar"'),
4559                 Leaf(RPAR, ')'),
4560             ]
4561         )
4562     """
4563     string_parent = string_leaf.parent
4564     string_child_idx = string_leaf.remove()
4565
4566     def insert_str_child(child: LN) -> None:
4567         nonlocal string_child_idx
4568
4569         assert string_parent is not None
4570         assert string_child_idx is not None
4571
4572         string_parent.insert_child(string_child_idx, child)
4573         string_child_idx += 1
4574
4575     return insert_str_child
4576
4577
4578 def has_triple_quotes(string: str) -> bool:
4579     """
4580     Returns:
4581         True iff @string starts with three quotation characters.
4582     """
4583     raw_string = string.lstrip(STRING_PREFIX_CHARS)
4584     return raw_string[:3] in {'"""', "'''"}
4585
4586
4587 def parent_type(node: Optional[LN]) -> Optional[NodeType]:
4588     """
4589     Returns:
4590         @node.parent.type, if @node is not None and has a parent.
4591             OR
4592         None, otherwise.
4593     """
4594     if node is None or node.parent is None:
4595         return None
4596
4597     return node.parent.type
4598
4599
4600 def is_empty_par(leaf: Leaf) -> bool:
4601     return is_empty_lpar(leaf) or is_empty_rpar(leaf)
4602
4603
4604 def is_empty_lpar(leaf: Leaf) -> bool:
4605     return leaf.type == token.LPAR and leaf.value == ""
4606
4607
4608 def is_empty_rpar(leaf: Leaf) -> bool:
4609     return leaf.type == token.RPAR and leaf.value == ""
4610
4611
4612 def is_valid_index_factory(seq: Sequence[Any]) -> Callable[[int], bool]:
4613     """
4614     Examples:
4615         ```
4616         my_list = [1, 2, 3]
4617
4618         is_valid_index = is_valid_index_factory(my_list)
4619
4620         assert is_valid_index(0)
4621         assert is_valid_index(2)
4622
4623         assert not is_valid_index(3)
4624         assert not is_valid_index(-1)
4625         ```
4626     """
4627
4628     def is_valid_index(idx: int) -> bool:
4629         """
4630         Returns:
4631             True iff @idx is positive AND seq[@idx] does NOT raise an
4632             IndexError.
4633         """
4634         return 0 <= idx < len(seq)
4635
4636     return is_valid_index
4637
4638
4639 def line_to_string(line: Line) -> str:
4640     """Returns the string representation of @line.
4641
4642     WARNING: This is known to be computationally expensive.
4643     """
4644     return str(line).strip("\n")
4645
4646
4647 def append_leaves(
4648     new_line: Line, old_line: Line, leaves: List[Leaf], preformatted: bool = False
4649 ) -> None:
4650     """
4651     Append leaves (taken from @old_line) to @new_line, making sure to fix the
4652     underlying Node structure where appropriate.
4653
4654     All of the leaves in @leaves are duplicated. The duplicates are then
4655     appended to @new_line and used to replace their originals in the underlying
4656     Node structure. Any comments attached to the old leaves are reattached to
4657     the new leaves.
4658
4659     Pre-conditions:
4660         set(@leaves) is a subset of set(@old_line.leaves).
4661     """
4662     for old_leaf in leaves:
4663         new_leaf = Leaf(old_leaf.type, old_leaf.value)
4664         replace_child(old_leaf, new_leaf)
4665         new_line.append(new_leaf, preformatted=preformatted)
4666
4667         for comment_leaf in old_line.comments_after(old_leaf):
4668             new_line.append(comment_leaf, preformatted=True)
4669
4670
4671 def replace_child(old_child: LN, new_child: LN) -> None:
4672     """
4673     Side Effects:
4674         * If @old_child.parent is set, replace @old_child with @new_child in
4675         @old_child's underlying Node structure.
4676             OR
4677         * Otherwise, this function does nothing.
4678     """
4679     parent = old_child.parent
4680     if not parent:
4681         return
4682
4683     child_idx = old_child.remove()
4684     if child_idx is not None:
4685         parent.insert_child(child_idx, new_child)
4686
4687
4688 def get_string_prefix(string: str) -> str:
4689     """
4690     Pre-conditions:
4691         * assert_is_leaf_string(@string)
4692
4693     Returns:
4694         @string's prefix (e.g. '', 'r', 'f', or 'rf').
4695     """
4696     assert_is_leaf_string(string)
4697
4698     prefix = ""
4699     prefix_idx = 0
4700     while string[prefix_idx] in STRING_PREFIX_CHARS:
4701         prefix += string[prefix_idx].lower()
4702         prefix_idx += 1
4703
4704     return prefix
4705
4706
4707 def assert_is_leaf_string(string: str) -> None:
4708     """
4709     Checks the pre-condition that @string has the format that you would expect
4710     of `leaf.value` where `leaf` is some Leaf such that `leaf.type ==
4711     token.STRING`. A more precise description of the pre-conditions that are
4712     checked are listed below.
4713
4714     Pre-conditions:
4715         * @string starts with either ', ", <prefix>', or <prefix>" where
4716         `set(<prefix>)` is some subset of `set(STRING_PREFIX_CHARS)`.
4717         * @string ends with a quote character (' or ").
4718
4719     Raises:
4720         AssertionError(...) if the pre-conditions listed above are not
4721         satisfied.
4722     """
4723     dquote_idx = string.find('"')
4724     squote_idx = string.find("'")
4725     if -1 in [dquote_idx, squote_idx]:
4726         quote_idx = max(dquote_idx, squote_idx)
4727     else:
4728         quote_idx = min(squote_idx, dquote_idx)
4729
4730     assert (
4731         0 <= quote_idx < len(string) - 1
4732     ), f"{string!r} is missing a starting quote character (' or \")."
4733     assert string[-1] in (
4734         "'",
4735         '"',
4736     ), f"{string!r} is missing an ending quote character (' or \")."
4737     assert set(string[:quote_idx]).issubset(
4738         set(STRING_PREFIX_CHARS)
4739     ), f"{set(string[:quote_idx])} is NOT a subset of {set(STRING_PREFIX_CHARS)}."
4740
4741
4742 def left_hand_split(line: Line, _features: Collection[Feature] = ()) -> Iterator[Line]:
4743     """Split line into many lines, starting with the first matching bracket pair.
4744
4745     Note: this usually looks weird, only use this for function definitions.
4746     Prefer RHS otherwise.  This is why this function is not symmetrical with
4747     :func:`right_hand_split` which also handles optional parentheses.
4748     """
4749     tail_leaves: List[Leaf] = []
4750     body_leaves: List[Leaf] = []
4751     head_leaves: List[Leaf] = []
4752     current_leaves = head_leaves
4753     matching_bracket: Optional[Leaf] = None
4754     for leaf in line.leaves:
4755         if (
4756             current_leaves is body_leaves
4757             and leaf.type in CLOSING_BRACKETS
4758             and leaf.opening_bracket is matching_bracket
4759         ):
4760             current_leaves = tail_leaves if body_leaves else head_leaves
4761         current_leaves.append(leaf)
4762         if current_leaves is head_leaves:
4763             if leaf.type in OPENING_BRACKETS:
4764                 matching_bracket = leaf
4765                 current_leaves = body_leaves
4766     if not matching_bracket:
4767         raise CannotSplit("No brackets found")
4768
4769     head = bracket_split_build_line(head_leaves, line, matching_bracket)
4770     body = bracket_split_build_line(body_leaves, line, matching_bracket, is_body=True)
4771     tail = bracket_split_build_line(tail_leaves, line, matching_bracket)
4772     bracket_split_succeeded_or_raise(head, body, tail)
4773     for result in (head, body, tail):
4774         if result:
4775             yield result
4776
4777
4778 def right_hand_split(
4779     line: Line,
4780     line_length: int,
4781     features: Collection[Feature] = (),
4782     omit: Collection[LeafID] = (),
4783 ) -> Iterator[Line]:
4784     """Split line into many lines, starting with the last matching bracket pair.
4785
4786     If the split was by optional parentheses, attempt splitting without them, too.
4787     `omit` is a collection of closing bracket IDs that shouldn't be considered for
4788     this split.
4789
4790     Note: running this function modifies `bracket_depth` on the leaves of `line`.
4791     """
4792     tail_leaves: List[Leaf] = []
4793     body_leaves: List[Leaf] = []
4794     head_leaves: List[Leaf] = []
4795     current_leaves = tail_leaves
4796     opening_bracket: Optional[Leaf] = None
4797     closing_bracket: Optional[Leaf] = None
4798     for leaf in reversed(line.leaves):
4799         if current_leaves is body_leaves:
4800             if leaf is opening_bracket:
4801                 current_leaves = head_leaves if body_leaves else tail_leaves
4802         current_leaves.append(leaf)
4803         if current_leaves is tail_leaves:
4804             if leaf.type in CLOSING_BRACKETS and id(leaf) not in omit:
4805                 opening_bracket = leaf.opening_bracket
4806                 closing_bracket = leaf
4807                 current_leaves = body_leaves
4808     if not (opening_bracket and closing_bracket and head_leaves):
4809         # If there is no opening or closing_bracket that means the split failed and
4810         # all content is in the tail.  Otherwise, if `head_leaves` are empty, it means
4811         # the matching `opening_bracket` wasn't available on `line` anymore.
4812         raise CannotSplit("No brackets found")
4813
4814     tail_leaves.reverse()
4815     body_leaves.reverse()
4816     head_leaves.reverse()
4817     head = bracket_split_build_line(head_leaves, line, opening_bracket)
4818     body = bracket_split_build_line(body_leaves, line, opening_bracket, is_body=True)
4819     tail = bracket_split_build_line(tail_leaves, line, opening_bracket)
4820     bracket_split_succeeded_or_raise(head, body, tail)
4821     if (
4822         Feature.FORCE_OPTIONAL_PARENTHESES not in features
4823         # the opening bracket is an optional paren
4824         and opening_bracket.type == token.LPAR
4825         and not opening_bracket.value
4826         # the closing bracket is an optional paren
4827         and closing_bracket.type == token.RPAR
4828         and not closing_bracket.value
4829         # it's not an import (optional parens are the only thing we can split on
4830         # in this case; attempting a split without them is a waste of time)
4831         and not line.is_import
4832         # there are no standalone comments in the body
4833         and not body.contains_standalone_comments(0)
4834         # and we can actually remove the parens
4835         and can_omit_invisible_parens(body, line_length, omit_on_explode=omit)
4836     ):
4837         omit = {id(closing_bracket), *omit}
4838         try:
4839             yield from right_hand_split(line, line_length, features=features, omit=omit)
4840             return
4841
4842         except CannotSplit:
4843             if not (
4844                 can_be_split(body)
4845                 or is_line_short_enough(body, line_length=line_length)
4846             ):
4847                 raise CannotSplit(
4848                     "Splitting failed, body is still too long and can't be split."
4849                 )
4850
4851             elif head.contains_multiline_strings() or tail.contains_multiline_strings():
4852                 raise CannotSplit(
4853                     "The current optional pair of parentheses is bound to fail to"
4854                     " satisfy the splitting algorithm because the head or the tail"
4855                     " contains multiline strings which by definition never fit one"
4856                     " line."
4857                 )
4858
4859     ensure_visible(opening_bracket)
4860     ensure_visible(closing_bracket)
4861     for result in (head, body, tail):
4862         if result:
4863             yield result
4864
4865
4866 def bracket_split_succeeded_or_raise(head: Line, body: Line, tail: Line) -> None:
4867     """Raise :exc:`CannotSplit` if the last left- or right-hand split failed.
4868
4869     Do nothing otherwise.
4870
4871     A left- or right-hand split is based on a pair of brackets. Content before
4872     (and including) the opening bracket is left on one line, content inside the
4873     brackets is put on a separate line, and finally content starting with and
4874     following the closing bracket is put on a separate line.
4875
4876     Those are called `head`, `body`, and `tail`, respectively. If the split
4877     produced the same line (all content in `head`) or ended up with an empty `body`
4878     and the `tail` is just the closing bracket, then it's considered failed.
4879     """
4880     tail_len = len(str(tail).strip())
4881     if not body:
4882         if tail_len == 0:
4883             raise CannotSplit("Splitting brackets produced the same line")
4884
4885         elif tail_len < 3:
4886             raise CannotSplit(
4887                 f"Splitting brackets on an empty body to save {tail_len} characters is"
4888                 " not worth it"
4889             )
4890
4891
4892 def bracket_split_build_line(
4893     leaves: List[Leaf], original: Line, opening_bracket: Leaf, *, is_body: bool = False
4894 ) -> Line:
4895     """Return a new line with given `leaves` and respective comments from `original`.
4896
4897     If `is_body` is True, the result line is one-indented inside brackets and as such
4898     has its first leaf's prefix normalized and a trailing comma added when expected.
4899     """
4900     result = Line(depth=original.depth)
4901     if is_body:
4902         result.inside_brackets = True
4903         result.depth += 1
4904         if leaves:
4905             # Since body is a new indent level, remove spurious leading whitespace.
4906             normalize_prefix(leaves[0], inside_brackets=True)
4907             # Ensure a trailing comma for imports and standalone function arguments, but
4908             # be careful not to add one after any comments or within type annotations.
4909             no_commas = (
4910                 original.is_def
4911                 and opening_bracket.value == "("
4912                 and not any(leaf.type == token.COMMA for leaf in leaves)
4913             )
4914
4915             if original.is_import or no_commas:
4916                 for i in range(len(leaves) - 1, -1, -1):
4917                     if leaves[i].type == STANDALONE_COMMENT:
4918                         continue
4919
4920                     if leaves[i].type != token.COMMA:
4921                         new_comma = Leaf(token.COMMA, ",")
4922                         leaves.insert(i + 1, new_comma)
4923                     break
4924
4925     # Populate the line
4926     for leaf in leaves:
4927         result.append(leaf, preformatted=True)
4928         for comment_after in original.comments_after(leaf):
4929             result.append(comment_after, preformatted=True)
4930     if is_body and should_split_body_explode(result, opening_bracket):
4931         result.should_explode = True
4932     return result
4933
4934
4935 def dont_increase_indentation(split_func: Transformer) -> Transformer:
4936     """Normalize prefix of the first leaf in every line returned by `split_func`.
4937
4938     This is a decorator over relevant split functions.
4939     """
4940
4941     @wraps(split_func)
4942     def split_wrapper(line: Line, features: Collection[Feature] = ()) -> Iterator[Line]:
4943         for line in split_func(line, features):
4944             normalize_prefix(line.leaves[0], inside_brackets=True)
4945             yield line
4946
4947     return split_wrapper
4948
4949
4950 @dont_increase_indentation
4951 def delimiter_split(line: Line, features: Collection[Feature] = ()) -> Iterator[Line]:
4952     """Split according to delimiters of the highest priority.
4953
4954     If the appropriate Features are given, the split will add trailing commas
4955     also in function signatures and calls that contain `*` and `**`.
4956     """
4957     try:
4958         last_leaf = line.leaves[-1]
4959     except IndexError:
4960         raise CannotSplit("Line empty")
4961
4962     bt = line.bracket_tracker
4963     try:
4964         delimiter_priority = bt.max_delimiter_priority(exclude={id(last_leaf)})
4965     except ValueError:
4966         raise CannotSplit("No delimiters found")
4967
4968     if delimiter_priority == DOT_PRIORITY:
4969         if bt.delimiter_count_with_priority(delimiter_priority) == 1:
4970             raise CannotSplit("Splitting a single attribute from its owner looks wrong")
4971
4972     current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
4973     lowest_depth = sys.maxsize
4974     trailing_comma_safe = True
4975
4976     def append_to_line(leaf: Leaf) -> Iterator[Line]:
4977         """Append `leaf` to current line or to new line if appending impossible."""
4978         nonlocal current_line
4979         try:
4980             current_line.append_safe(leaf, preformatted=True)
4981         except ValueError:
4982             yield current_line
4983
4984             current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
4985             current_line.append(leaf)
4986
4987     for leaf in line.leaves:
4988         yield from append_to_line(leaf)
4989
4990         for comment_after in line.comments_after(leaf):
4991             yield from append_to_line(comment_after)
4992
4993         lowest_depth = min(lowest_depth, leaf.bracket_depth)
4994         if leaf.bracket_depth == lowest_depth:
4995             if is_vararg(leaf, within={syms.typedargslist}):
4996                 trailing_comma_safe = (
4997                     trailing_comma_safe and Feature.TRAILING_COMMA_IN_DEF in features
4998                 )
4999             elif is_vararg(leaf, within={syms.arglist, syms.argument}):
5000                 trailing_comma_safe = (
5001                     trailing_comma_safe and Feature.TRAILING_COMMA_IN_CALL in features
5002                 )
5003
5004         leaf_priority = bt.delimiters.get(id(leaf))
5005         if leaf_priority == delimiter_priority:
5006             yield current_line
5007
5008             current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
5009     if current_line:
5010         if (
5011             trailing_comma_safe
5012             and delimiter_priority == COMMA_PRIORITY
5013             and current_line.leaves[-1].type != token.COMMA
5014             and current_line.leaves[-1].type != STANDALONE_COMMENT
5015         ):
5016             new_comma = Leaf(token.COMMA, ",")
5017             current_line.append(new_comma)
5018         yield current_line
5019
5020
5021 @dont_increase_indentation
5022 def standalone_comment_split(
5023     line: Line, features: Collection[Feature] = ()
5024 ) -> Iterator[Line]:
5025     """Split standalone comments from the rest of the line."""
5026     if not line.contains_standalone_comments(0):
5027         raise CannotSplit("Line does not have any standalone comments")
5028
5029     current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
5030
5031     def append_to_line(leaf: Leaf) -> Iterator[Line]:
5032         """Append `leaf` to current line or to new line if appending impossible."""
5033         nonlocal current_line
5034         try:
5035             current_line.append_safe(leaf, preformatted=True)
5036         except ValueError:
5037             yield current_line
5038
5039             current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
5040             current_line.append(leaf)
5041
5042     for leaf in line.leaves:
5043         yield from append_to_line(leaf)
5044
5045         for comment_after in line.comments_after(leaf):
5046             yield from append_to_line(comment_after)
5047
5048     if current_line:
5049         yield current_line
5050
5051
5052 def is_import(leaf: Leaf) -> bool:
5053     """Return True if the given leaf starts an import statement."""
5054     p = leaf.parent
5055     t = leaf.type
5056     v = leaf.value
5057     return bool(
5058         t == token.NAME
5059         and (
5060             (v == "import" and p and p.type == syms.import_name)
5061             or (v == "from" and p and p.type == syms.import_from)
5062         )
5063     )
5064
5065
5066 def is_type_comment(leaf: Leaf, suffix: str = "") -> bool:
5067     """Return True if the given leaf is a special comment.
5068     Only returns true for type comments for now."""
5069     t = leaf.type
5070     v = leaf.value
5071     return t in {token.COMMENT, STANDALONE_COMMENT} and v.startswith("# type:" + suffix)
5072
5073
5074 def normalize_prefix(leaf: Leaf, *, inside_brackets: bool) -> None:
5075     """Leave existing extra newlines if not `inside_brackets`. Remove everything
5076     else.
5077
5078     Note: don't use backslashes for formatting or you'll lose your voting rights.
5079     """
5080     if not inside_brackets:
5081         spl = leaf.prefix.split("#")
5082         if "\\" not in spl[0]:
5083             nl_count = spl[-1].count("\n")
5084             if len(spl) > 1:
5085                 nl_count -= 1
5086             leaf.prefix = "\n" * nl_count
5087             return
5088
5089     leaf.prefix = ""
5090
5091
5092 def normalize_string_prefix(leaf: Leaf, remove_u_prefix: bool = False) -> None:
5093     """Make all string prefixes lowercase.
5094
5095     If remove_u_prefix is given, also removes any u prefix from the string.
5096
5097     Note: Mutates its argument.
5098     """
5099     match = re.match(r"^([" + STRING_PREFIX_CHARS + r"]*)(.*)$", leaf.value, re.DOTALL)
5100     assert match is not None, f"failed to match string {leaf.value!r}"
5101     orig_prefix = match.group(1)
5102     new_prefix = orig_prefix.replace("F", "f").replace("B", "b").replace("U", "u")
5103     if remove_u_prefix:
5104         new_prefix = new_prefix.replace("u", "")
5105     leaf.value = f"{new_prefix}{match.group(2)}"
5106
5107
5108 def normalize_string_quotes(leaf: Leaf) -> None:
5109     """Prefer double quotes but only if it doesn't cause more escaping.
5110
5111     Adds or removes backslashes as appropriate. Doesn't parse and fix
5112     strings nested in f-strings (yet).
5113
5114     Note: Mutates its argument.
5115     """
5116     value = leaf.value.lstrip(STRING_PREFIX_CHARS)
5117     if value[:3] == '"""':
5118         return
5119
5120     elif value[:3] == "'''":
5121         orig_quote = "'''"
5122         new_quote = '"""'
5123     elif value[0] == '"':
5124         orig_quote = '"'
5125         new_quote = "'"
5126     else:
5127         orig_quote = "'"
5128         new_quote = '"'
5129     first_quote_pos = leaf.value.find(orig_quote)
5130     if first_quote_pos == -1:
5131         return  # There's an internal error
5132
5133     prefix = leaf.value[:first_quote_pos]
5134     unescaped_new_quote = re.compile(rf"(([^\\]|^)(\\\\)*){new_quote}")
5135     escaped_new_quote = re.compile(rf"([^\\]|^)\\((?:\\\\)*){new_quote}")
5136     escaped_orig_quote = re.compile(rf"([^\\]|^)\\((?:\\\\)*){orig_quote}")
5137     body = leaf.value[first_quote_pos + len(orig_quote) : -len(orig_quote)]
5138     if "r" in prefix.casefold():
5139         if unescaped_new_quote.search(body):
5140             # There's at least one unescaped new_quote in this raw string
5141             # so converting is impossible
5142             return
5143
5144         # Do not introduce or remove backslashes in raw strings
5145         new_body = body
5146     else:
5147         # remove unnecessary escapes
5148         new_body = sub_twice(escaped_new_quote, rf"\1\2{new_quote}", body)
5149         if body != new_body:
5150             # Consider the string without unnecessary escapes as the original
5151             body = new_body
5152             leaf.value = f"{prefix}{orig_quote}{body}{orig_quote}"
5153         new_body = sub_twice(escaped_orig_quote, rf"\1\2{orig_quote}", new_body)
5154         new_body = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_body)
5155     if "f" in prefix.casefold():
5156         matches = re.findall(
5157             r"""
5158             (?:[^{]|^)\{  # start of the string or a non-{ followed by a single {
5159                 ([^{].*?)  # contents of the brackets except if begins with {{
5160             \}(?:[^}]|$)  # A } followed by end of the string or a non-}
5161             """,
5162             new_body,
5163             re.VERBOSE,
5164         )
5165         for m in matches:
5166             if "\\" in str(m):
5167                 # Do not introduce backslashes in interpolated expressions
5168                 return
5169
5170     if new_quote == '"""' and new_body[-1:] == '"':
5171         # edge case:
5172         new_body = new_body[:-1] + '\\"'
5173     orig_escape_count = body.count("\\")
5174     new_escape_count = new_body.count("\\")
5175     if new_escape_count > orig_escape_count:
5176         return  # Do not introduce more escaping
5177
5178     if new_escape_count == orig_escape_count and orig_quote == '"':
5179         return  # Prefer double quotes
5180
5181     leaf.value = f"{prefix}{new_quote}{new_body}{new_quote}"
5182
5183
5184 def normalize_numeric_literal(leaf: Leaf) -> None:
5185     """Normalizes numeric (float, int, and complex) literals.
5186
5187     All letters used in the representation are normalized to lowercase (except
5188     in Python 2 long literals).
5189     """
5190     text = leaf.value.lower()
5191     if text.startswith(("0o", "0b")):
5192         # Leave octal and binary literals alone.
5193         pass
5194     elif text.startswith("0x"):
5195         text = format_hex(text)
5196     elif "e" in text:
5197         text = format_scientific_notation(text)
5198     elif text.endswith(("j", "l")):
5199         text = format_long_or_complex_number(text)
5200     else:
5201         text = format_float_or_int_string(text)
5202     leaf.value = text
5203
5204
5205 def format_hex(text: str) -> str:
5206     """
5207     Formats a hexadecimal string like "0x12b3"
5208
5209     Uses lowercase because of similarity between "B" and "8", which
5210     can cause security issues.
5211     see: https://github.com/psf/black/issues/1692
5212     """
5213
5214     before, after = text[:2], text[2:]
5215     return f"{before}{after.lower()}"
5216
5217
5218 def format_scientific_notation(text: str) -> str:
5219     """Formats a numeric string utilizing scentific notation"""
5220     before, after = text.split("e")
5221     sign = ""
5222     if after.startswith("-"):
5223         after = after[1:]
5224         sign = "-"
5225     elif after.startswith("+"):
5226         after = after[1:]
5227     before = format_float_or_int_string(before)
5228     return f"{before}e{sign}{after}"
5229
5230
5231 def format_long_or_complex_number(text: str) -> str:
5232     """Formats a long or complex string like `10L` or `10j`"""
5233     number = text[:-1]
5234     suffix = text[-1]
5235     # Capitalize in "2L" because "l" looks too similar to "1".
5236     if suffix == "l":
5237         suffix = "L"
5238     return f"{format_float_or_int_string(number)}{suffix}"
5239
5240
5241 def format_float_or_int_string(text: str) -> str:
5242     """Formats a float string like "1.0"."""
5243     if "." not in text:
5244         return text
5245
5246     before, after = text.split(".")
5247     return f"{before or 0}.{after or 0}"
5248
5249
5250 def normalize_invisible_parens(node: Node, parens_after: Set[str]) -> None:
5251     """Make existing optional parentheses invisible or create new ones.
5252
5253     `parens_after` is a set of string leaf values immediately after which parens
5254     should be put.
5255
5256     Standardizes on visible parentheses for single-element tuples, and keeps
5257     existing visible parentheses for other tuples and generator expressions.
5258     """
5259     for pc in list_comments(node.prefix, is_endmarker=False):
5260         if pc.value in FMT_OFF:
5261             # This `node` has a prefix with `# fmt: off`, don't mess with parens.
5262             return
5263     check_lpar = False
5264     for index, child in enumerate(list(node.children)):
5265         # Fixes a bug where invisible parens are not properly stripped from
5266         # assignment statements that contain type annotations.
5267         if isinstance(child, Node) and child.type == syms.annassign:
5268             normalize_invisible_parens(child, parens_after=parens_after)
5269
5270         # Add parentheses around long tuple unpacking in assignments.
5271         if (
5272             index == 0
5273             and isinstance(child, Node)
5274             and child.type == syms.testlist_star_expr
5275         ):
5276             check_lpar = True
5277
5278         if check_lpar:
5279             if is_walrus_assignment(child):
5280                 pass
5281
5282             elif child.type == syms.atom:
5283                 if maybe_make_parens_invisible_in_atom(child, parent=node):
5284                     wrap_in_parentheses(node, child, visible=False)
5285             elif is_one_tuple(child):
5286                 wrap_in_parentheses(node, child, visible=True)
5287             elif node.type == syms.import_from:
5288                 # "import from" nodes store parentheses directly as part of
5289                 # the statement
5290                 if child.type == token.LPAR:
5291                     # make parentheses invisible
5292                     child.value = ""  # type: ignore
5293                     node.children[-1].value = ""  # type: ignore
5294                 elif child.type != token.STAR:
5295                     # insert invisible parentheses
5296                     node.insert_child(index, Leaf(token.LPAR, ""))
5297                     node.append_child(Leaf(token.RPAR, ""))
5298                 break
5299
5300             elif not (isinstance(child, Leaf) and is_multiline_string(child)):
5301                 wrap_in_parentheses(node, child, visible=False)
5302
5303         check_lpar = isinstance(child, Leaf) and child.value in parens_after
5304
5305
5306 def normalize_fmt_off(node: Node) -> None:
5307     """Convert content between `# fmt: off`/`# fmt: on` into standalone comments."""
5308     try_again = True
5309     while try_again:
5310         try_again = convert_one_fmt_off_pair(node)
5311
5312
5313 def convert_one_fmt_off_pair(node: Node) -> bool:
5314     """Convert content of a single `# fmt: off`/`# fmt: on` into a standalone comment.
5315
5316     Returns True if a pair was converted.
5317     """
5318     for leaf in node.leaves():
5319         previous_consumed = 0
5320         for comment in list_comments(leaf.prefix, is_endmarker=False):
5321             if comment.value in FMT_OFF:
5322                 # We only want standalone comments. If there's no previous leaf or
5323                 # the previous leaf is indentation, it's a standalone comment in
5324                 # disguise.
5325                 if comment.type != STANDALONE_COMMENT:
5326                     prev = preceding_leaf(leaf)
5327                     if prev and prev.type not in WHITESPACE:
5328                         continue
5329
5330                 ignored_nodes = list(generate_ignored_nodes(leaf))
5331                 if not ignored_nodes:
5332                     continue
5333
5334                 first = ignored_nodes[0]  # Can be a container node with the `leaf`.
5335                 parent = first.parent
5336                 prefix = first.prefix
5337                 first.prefix = prefix[comment.consumed :]
5338                 hidden_value = (
5339                     comment.value + "\n" + "".join(str(n) for n in ignored_nodes)
5340                 )
5341                 if hidden_value.endswith("\n"):
5342                     # That happens when one of the `ignored_nodes` ended with a NEWLINE
5343                     # leaf (possibly followed by a DEDENT).
5344                     hidden_value = hidden_value[:-1]
5345                 first_idx: Optional[int] = None
5346                 for ignored in ignored_nodes:
5347                     index = ignored.remove()
5348                     if first_idx is None:
5349                         first_idx = index
5350                 assert parent is not None, "INTERNAL ERROR: fmt: on/off handling (1)"
5351                 assert first_idx is not None, "INTERNAL ERROR: fmt: on/off handling (2)"
5352                 parent.insert_child(
5353                     first_idx,
5354                     Leaf(
5355                         STANDALONE_COMMENT,
5356                         hidden_value,
5357                         prefix=prefix[:previous_consumed] + "\n" * comment.newlines,
5358                     ),
5359                 )
5360                 return True
5361
5362             previous_consumed = comment.consumed
5363
5364     return False
5365
5366
5367 def generate_ignored_nodes(leaf: Leaf) -> Iterator[LN]:
5368     """Starting from the container of `leaf`, generate all leaves until `# fmt: on`.
5369
5370     Stops at the end of the block.
5371     """
5372     container: Optional[LN] = container_of(leaf)
5373     while container is not None and container.type != token.ENDMARKER:
5374         if is_fmt_on(container):
5375             return
5376
5377         # fix for fmt: on in children
5378         if contains_fmt_on_at_column(container, leaf.column):
5379             for child in container.children:
5380                 if contains_fmt_on_at_column(child, leaf.column):
5381                     return
5382                 yield child
5383         else:
5384             yield container
5385             container = container.next_sibling
5386
5387
5388 def is_fmt_on(container: LN) -> bool:
5389     """Determine whether formatting is switched on within a container.
5390     Determined by whether the last `# fmt:` comment is `on` or `off`.
5391     """
5392     fmt_on = False
5393     for comment in list_comments(container.prefix, is_endmarker=False):
5394         if comment.value in FMT_ON:
5395             fmt_on = True
5396         elif comment.value in FMT_OFF:
5397             fmt_on = False
5398     return fmt_on
5399
5400
5401 def contains_fmt_on_at_column(container: LN, column: int) -> bool:
5402     """Determine if children at a given column have formatting switched on."""
5403     for child in container.children:
5404         if (
5405             isinstance(child, Node)
5406             and first_leaf_column(child) == column
5407             or isinstance(child, Leaf)
5408             and child.column == column
5409         ):
5410             if is_fmt_on(child):
5411                 return True
5412
5413     return False
5414
5415
5416 def first_leaf_column(node: Node) -> Optional[int]:
5417     """Returns the column of the first leaf child of a node."""
5418     for child in node.children:
5419         if isinstance(child, Leaf):
5420             return child.column
5421     return None
5422
5423
5424 def maybe_make_parens_invisible_in_atom(node: LN, parent: LN) -> bool:
5425     """If it's safe, make the parens in the atom `node` invisible, recursively.
5426     Additionally, remove repeated, adjacent invisible parens from the atom `node`
5427     as they are redundant.
5428
5429     Returns whether the node should itself be wrapped in invisible parentheses.
5430
5431     """
5432     if (
5433         node.type != syms.atom
5434         or is_empty_tuple(node)
5435         or is_one_tuple(node)
5436         or (is_yield(node) and parent.type != syms.expr_stmt)
5437         or max_delimiter_priority_in_atom(node) >= COMMA_PRIORITY
5438     ):
5439         return False
5440
5441     first = node.children[0]
5442     last = node.children[-1]
5443     if first.type == token.LPAR and last.type == token.RPAR:
5444         middle = node.children[1]
5445         # make parentheses invisible
5446         first.value = ""  # type: ignore
5447         last.value = ""  # type: ignore
5448         maybe_make_parens_invisible_in_atom(middle, parent=parent)
5449
5450         if is_atom_with_invisible_parens(middle):
5451             # Strip the invisible parens from `middle` by replacing
5452             # it with the child in-between the invisible parens
5453             middle.replace(middle.children[1])
5454
5455         return False
5456
5457     return True
5458
5459
5460 def is_atom_with_invisible_parens(node: LN) -> bool:
5461     """Given a `LN`, determines whether it's an atom `node` with invisible
5462     parens. Useful in dedupe-ing and normalizing parens.
5463     """
5464     if isinstance(node, Leaf) or node.type != syms.atom:
5465         return False
5466
5467     first, last = node.children[0], node.children[-1]
5468     return (
5469         isinstance(first, Leaf)
5470         and first.type == token.LPAR
5471         and first.value == ""
5472         and isinstance(last, Leaf)
5473         and last.type == token.RPAR
5474         and last.value == ""
5475     )
5476
5477
5478 def is_empty_tuple(node: LN) -> bool:
5479     """Return True if `node` holds an empty tuple."""
5480     return (
5481         node.type == syms.atom
5482         and len(node.children) == 2
5483         and node.children[0].type == token.LPAR
5484         and node.children[1].type == token.RPAR
5485     )
5486
5487
5488 def unwrap_singleton_parenthesis(node: LN) -> Optional[LN]:
5489     """Returns `wrapped` if `node` is of the shape ( wrapped ).
5490
5491     Parenthesis can be optional. Returns None otherwise"""
5492     if len(node.children) != 3:
5493         return None
5494
5495     lpar, wrapped, rpar = node.children
5496     if not (lpar.type == token.LPAR and rpar.type == token.RPAR):
5497         return None
5498
5499     return wrapped
5500
5501
5502 def wrap_in_parentheses(parent: Node, child: LN, *, visible: bool = True) -> None:
5503     """Wrap `child` in parentheses.
5504
5505     This replaces `child` with an atom holding the parentheses and the old
5506     child.  That requires moving the prefix.
5507
5508     If `visible` is False, the leaves will be valueless (and thus invisible).
5509     """
5510     lpar = Leaf(token.LPAR, "(" if visible else "")
5511     rpar = Leaf(token.RPAR, ")" if visible else "")
5512     prefix = child.prefix
5513     child.prefix = ""
5514     index = child.remove() or 0
5515     new_child = Node(syms.atom, [lpar, child, rpar])
5516     new_child.prefix = prefix
5517     parent.insert_child(index, new_child)
5518
5519
5520 def is_one_tuple(node: LN) -> bool:
5521     """Return True if `node` holds a tuple with one element, with or without parens."""
5522     if node.type == syms.atom:
5523         gexp = unwrap_singleton_parenthesis(node)
5524         if gexp is None or gexp.type != syms.testlist_gexp:
5525             return False
5526
5527         return len(gexp.children) == 2 and gexp.children[1].type == token.COMMA
5528
5529     return (
5530         node.type in IMPLICIT_TUPLE
5531         and len(node.children) == 2
5532         and node.children[1].type == token.COMMA
5533     )
5534
5535
5536 def is_walrus_assignment(node: LN) -> bool:
5537     """Return True iff `node` is of the shape ( test := test )"""
5538     inner = unwrap_singleton_parenthesis(node)
5539     return inner is not None and inner.type == syms.namedexpr_test
5540
5541
5542 def is_simple_decorator_trailer(node: LN, last: bool = False) -> bool:
5543     """Return True iff `node` is a trailer valid in a simple decorator"""
5544     return node.type == syms.trailer and (
5545         (
5546             len(node.children) == 2
5547             and node.children[0].type == token.DOT
5548             and node.children[1].type == token.NAME
5549         )
5550         # last trailer can be arguments
5551         or (
5552             last
5553             and len(node.children) == 3
5554             and node.children[0].type == token.LPAR
5555             # and node.children[1].type == syms.argument
5556             and node.children[2].type == token.RPAR
5557         )
5558     )
5559
5560
5561 def is_simple_decorator_expression(node: LN) -> bool:
5562     """Return True iff `node` could be a 'dotted name' decorator
5563
5564     This function takes the node of the 'namedexpr_test' of the new decorator
5565     grammar and test if it would be valid under the old decorator grammar.
5566
5567     The old grammar was: decorator: @ dotted_name [arguments] NEWLINE
5568     The new grammar is : decorator: @ namedexpr_test NEWLINE
5569     """
5570     if node.type == token.NAME:
5571         return True
5572     if node.type == syms.power:
5573         if node.children:
5574             return (
5575                 node.children[0].type == token.NAME
5576                 and all(map(is_simple_decorator_trailer, node.children[1:-1]))
5577                 and (
5578                     len(node.children) < 2
5579                     or is_simple_decorator_trailer(node.children[-1], last=True)
5580                 )
5581             )
5582     return False
5583
5584
5585 def is_yield(node: LN) -> bool:
5586     """Return True if `node` holds a `yield` or `yield from` expression."""
5587     if node.type == syms.yield_expr:
5588         return True
5589
5590     if node.type == token.NAME and node.value == "yield":  # type: ignore
5591         return True
5592
5593     if node.type != syms.atom:
5594         return False
5595
5596     if len(node.children) != 3:
5597         return False
5598
5599     lpar, expr, rpar = node.children
5600     if lpar.type == token.LPAR and rpar.type == token.RPAR:
5601         return is_yield(expr)
5602
5603     return False
5604
5605
5606 def is_vararg(leaf: Leaf, within: Set[NodeType]) -> bool:
5607     """Return True if `leaf` is a star or double star in a vararg or kwarg.
5608
5609     If `within` includes VARARGS_PARENTS, this applies to function signatures.
5610     If `within` includes UNPACKING_PARENTS, it applies to right hand-side
5611     extended iterable unpacking (PEP 3132) and additional unpacking
5612     generalizations (PEP 448).
5613     """
5614     if leaf.type not in VARARGS_SPECIALS or not leaf.parent:
5615         return False
5616
5617     p = leaf.parent
5618     if p.type == syms.star_expr:
5619         # Star expressions are also used as assignment targets in extended
5620         # iterable unpacking (PEP 3132).  See what its parent is instead.
5621         if not p.parent:
5622             return False
5623
5624         p = p.parent
5625
5626     return p.type in within
5627
5628
5629 def is_multiline_string(leaf: Leaf) -> bool:
5630     """Return True if `leaf` is a multiline string that actually spans many lines."""
5631     return has_triple_quotes(leaf.value) and "\n" in leaf.value
5632
5633
5634 def is_stub_suite(node: Node) -> bool:
5635     """Return True if `node` is a suite with a stub body."""
5636     if (
5637         len(node.children) != 4
5638         or node.children[0].type != token.NEWLINE
5639         or node.children[1].type != token.INDENT
5640         or node.children[3].type != token.DEDENT
5641     ):
5642         return False
5643
5644     return is_stub_body(node.children[2])
5645
5646
5647 def is_stub_body(node: LN) -> bool:
5648     """Return True if `node` is a simple statement containing an ellipsis."""
5649     if not isinstance(node, Node) or node.type != syms.simple_stmt:
5650         return False
5651
5652     if len(node.children) != 2:
5653         return False
5654
5655     child = node.children[0]
5656     return (
5657         child.type == syms.atom
5658         and len(child.children) == 3
5659         and all(leaf == Leaf(token.DOT, ".") for leaf in child.children)
5660     )
5661
5662
5663 def max_delimiter_priority_in_atom(node: LN) -> Priority:
5664     """Return maximum delimiter priority inside `node`.
5665
5666     This is specific to atoms with contents contained in a pair of parentheses.
5667     If `node` isn't an atom or there are no enclosing parentheses, returns 0.
5668     """
5669     if node.type != syms.atom:
5670         return 0
5671
5672     first = node.children[0]
5673     last = node.children[-1]
5674     if not (first.type == token.LPAR and last.type == token.RPAR):
5675         return 0
5676
5677     bt = BracketTracker()
5678     for c in node.children[1:-1]:
5679         if isinstance(c, Leaf):
5680             bt.mark(c)
5681         else:
5682             for leaf in c.leaves():
5683                 bt.mark(leaf)
5684     try:
5685         return bt.max_delimiter_priority()
5686
5687     except ValueError:
5688         return 0
5689
5690
5691 def ensure_visible(leaf: Leaf) -> None:
5692     """Make sure parentheses are visible.
5693
5694     They could be invisible as part of some statements (see
5695     :func:`normalize_invisible_parens` and :func:`visit_import_from`).
5696     """
5697     if leaf.type == token.LPAR:
5698         leaf.value = "("
5699     elif leaf.type == token.RPAR:
5700         leaf.value = ")"
5701
5702
5703 def should_split_body_explode(line: Line, opening_bracket: Leaf) -> bool:
5704     """Should `line` be immediately split with `delimiter_split()` after RHS?"""
5705
5706     if not (opening_bracket.parent and opening_bracket.value in "[{("):
5707         return False
5708
5709     # We're essentially checking if the body is delimited by commas and there's more
5710     # than one of them (we're excluding the trailing comma and if the delimiter priority
5711     # is still commas, that means there's more).
5712     exclude = set()
5713     trailing_comma = False
5714     try:
5715         last_leaf = line.leaves[-1]
5716         if last_leaf.type == token.COMMA:
5717             trailing_comma = True
5718             exclude.add(id(last_leaf))
5719         max_priority = line.bracket_tracker.max_delimiter_priority(exclude=exclude)
5720     except (IndexError, ValueError):
5721         return False
5722
5723     return max_priority == COMMA_PRIORITY and (
5724         trailing_comma
5725         # always explode imports
5726         or opening_bracket.parent.type in {syms.atom, syms.import_from}
5727     )
5728
5729
5730 def is_one_tuple_between(opening: Leaf, closing: Leaf, leaves: List[Leaf]) -> bool:
5731     """Return True if content between `opening` and `closing` looks like a one-tuple."""
5732     if opening.type != token.LPAR and closing.type != token.RPAR:
5733         return False
5734
5735     depth = closing.bracket_depth + 1
5736     for _opening_index, leaf in enumerate(leaves):
5737         if leaf is opening:
5738             break
5739
5740     else:
5741         raise LookupError("Opening paren not found in `leaves`")
5742
5743     commas = 0
5744     _opening_index += 1
5745     for leaf in leaves[_opening_index:]:
5746         if leaf is closing:
5747             break
5748
5749         bracket_depth = leaf.bracket_depth
5750         if bracket_depth == depth and leaf.type == token.COMMA:
5751             commas += 1
5752             if leaf.parent and leaf.parent.type in {
5753                 syms.arglist,
5754                 syms.typedargslist,
5755             }:
5756                 commas += 1
5757                 break
5758
5759     return commas < 2
5760
5761
5762 def get_features_used(node: Node) -> Set[Feature]:
5763     """Return a set of (relatively) new Python features used in this file.
5764
5765     Currently looking for:
5766     - f-strings;
5767     - underscores in numeric literals;
5768     - trailing commas after * or ** in function signatures and calls;
5769     - positional only arguments in function signatures and lambdas;
5770     - assignment expression;
5771     - relaxed decorator syntax;
5772     """
5773     features: Set[Feature] = set()
5774     for n in node.pre_order():
5775         if n.type == token.STRING:
5776             value_head = n.value[:2]  # type: ignore
5777             if value_head in {'f"', 'F"', "f'", "F'", "rf", "fr", "RF", "FR"}:
5778                 features.add(Feature.F_STRINGS)
5779
5780         elif n.type == token.NUMBER:
5781             if "_" in n.value:  # type: ignore
5782                 features.add(Feature.NUMERIC_UNDERSCORES)
5783
5784         elif n.type == token.SLASH:
5785             if n.parent and n.parent.type in {syms.typedargslist, syms.arglist}:
5786                 features.add(Feature.POS_ONLY_ARGUMENTS)
5787
5788         elif n.type == token.COLONEQUAL:
5789             features.add(Feature.ASSIGNMENT_EXPRESSIONS)
5790
5791         elif n.type == syms.decorator:
5792             if len(n.children) > 1 and not is_simple_decorator_expression(
5793                 n.children[1]
5794             ):
5795                 features.add(Feature.RELAXED_DECORATORS)
5796
5797         elif (
5798             n.type in {syms.typedargslist, syms.arglist}
5799             and n.children
5800             and n.children[-1].type == token.COMMA
5801         ):
5802             if n.type == syms.typedargslist:
5803                 feature = Feature.TRAILING_COMMA_IN_DEF
5804             else:
5805                 feature = Feature.TRAILING_COMMA_IN_CALL
5806
5807             for ch in n.children:
5808                 if ch.type in STARS:
5809                     features.add(feature)
5810
5811                 if ch.type == syms.argument:
5812                     for argch in ch.children:
5813                         if argch.type in STARS:
5814                             features.add(feature)
5815
5816     return features
5817
5818
5819 def detect_target_versions(node: Node) -> Set[TargetVersion]:
5820     """Detect the version to target based on the nodes used."""
5821     features = get_features_used(node)
5822     return {
5823         version for version in TargetVersion if features <= VERSION_TO_FEATURES[version]
5824     }
5825
5826
5827 def generate_trailers_to_omit(line: Line, line_length: int) -> Iterator[Set[LeafID]]:
5828     """Generate sets of closing bracket IDs that should be omitted in a RHS.
5829
5830     Brackets can be omitted if the entire trailer up to and including
5831     a preceding closing bracket fits in one line.
5832
5833     Yielded sets are cumulative (contain results of previous yields, too).  First
5834     set is empty, unless the line should explode, in which case bracket pairs until
5835     the one that needs to explode are omitted.
5836     """
5837
5838     omit: Set[LeafID] = set()
5839     if not line.should_explode:
5840         yield omit
5841
5842     length = 4 * line.depth
5843     opening_bracket: Optional[Leaf] = None
5844     closing_bracket: Optional[Leaf] = None
5845     inner_brackets: Set[LeafID] = set()
5846     for index, leaf, leaf_length in enumerate_with_length(line, reversed=True):
5847         length += leaf_length
5848         if length > line_length:
5849             break
5850
5851         has_inline_comment = leaf_length > len(leaf.value) + len(leaf.prefix)
5852         if leaf.type == STANDALONE_COMMENT or has_inline_comment:
5853             break
5854
5855         if opening_bracket:
5856             if leaf is opening_bracket:
5857                 opening_bracket = None
5858             elif leaf.type in CLOSING_BRACKETS:
5859                 prev = line.leaves[index - 1] if index > 0 else None
5860                 if (
5861                     line.should_explode
5862                     and prev
5863                     and prev.type == token.COMMA
5864                     and not is_one_tuple_between(
5865                         leaf.opening_bracket, leaf, line.leaves
5866                     )
5867                 ):
5868                     # Never omit bracket pairs with trailing commas.
5869                     # We need to explode on those.
5870                     break
5871
5872                 inner_brackets.add(id(leaf))
5873         elif leaf.type in CLOSING_BRACKETS:
5874             prev = line.leaves[index - 1] if index > 0 else None
5875             if prev and prev.type in OPENING_BRACKETS:
5876                 # Empty brackets would fail a split so treat them as "inner"
5877                 # brackets (e.g. only add them to the `omit` set if another
5878                 # pair of brackets was good enough.
5879                 inner_brackets.add(id(leaf))
5880                 continue
5881
5882             if closing_bracket:
5883                 omit.add(id(closing_bracket))
5884                 omit.update(inner_brackets)
5885                 inner_brackets.clear()
5886                 yield omit
5887
5888             if (
5889                 line.should_explode
5890                 and prev
5891                 and prev.type == token.COMMA
5892                 and not is_one_tuple_between(leaf.opening_bracket, leaf, line.leaves)
5893             ):
5894                 # Never omit bracket pairs with trailing commas.
5895                 # We need to explode on those.
5896                 break
5897
5898             if leaf.value:
5899                 opening_bracket = leaf.opening_bracket
5900                 closing_bracket = leaf
5901
5902
5903 def get_future_imports(node: Node) -> Set[str]:
5904     """Return a set of __future__ imports in the file."""
5905     imports: Set[str] = set()
5906
5907     def get_imports_from_children(children: List[LN]) -> Generator[str, None, None]:
5908         for child in children:
5909             if isinstance(child, Leaf):
5910                 if child.type == token.NAME:
5911                     yield child.value
5912
5913             elif child.type == syms.import_as_name:
5914                 orig_name = child.children[0]
5915                 assert isinstance(orig_name, Leaf), "Invalid syntax parsing imports"
5916                 assert orig_name.type == token.NAME, "Invalid syntax parsing imports"
5917                 yield orig_name.value
5918
5919             elif child.type == syms.import_as_names:
5920                 yield from get_imports_from_children(child.children)
5921
5922             else:
5923                 raise AssertionError("Invalid syntax parsing imports")
5924
5925     for child in node.children:
5926         if child.type != syms.simple_stmt:
5927             break
5928
5929         first_child = child.children[0]
5930         if isinstance(first_child, Leaf):
5931             # Continue looking if we see a docstring; otherwise stop.
5932             if (
5933                 len(child.children) == 2
5934                 and first_child.type == token.STRING
5935                 and child.children[1].type == token.NEWLINE
5936             ):
5937                 continue
5938
5939             break
5940
5941         elif first_child.type == syms.import_from:
5942             module_name = first_child.children[1]
5943             if not isinstance(module_name, Leaf) or module_name.value != "__future__":
5944                 break
5945
5946             imports |= set(get_imports_from_children(first_child.children[3:]))
5947         else:
5948             break
5949
5950     return imports
5951
5952
5953 @lru_cache()
5954 def get_gitignore(root: Path) -> PathSpec:
5955     """ Return a PathSpec matching gitignore content if present."""
5956     gitignore = root / ".gitignore"
5957     lines: List[str] = []
5958     if gitignore.is_file():
5959         with gitignore.open() as gf:
5960             lines = gf.readlines()
5961     return PathSpec.from_lines("gitwildmatch", lines)
5962
5963
5964 def normalize_path_maybe_ignore(
5965     path: Path, root: Path, report: "Report"
5966 ) -> Optional[str]:
5967     """Normalize `path`. May return `None` if `path` was ignored.
5968
5969     `report` is where "path ignored" output goes.
5970     """
5971     try:
5972         abspath = path if path.is_absolute() else Path.cwd() / path
5973         normalized_path = abspath.resolve().relative_to(root).as_posix()
5974     except OSError as e:
5975         report.path_ignored(path, f"cannot be read because {e}")
5976         return None
5977
5978     except ValueError:
5979         if path.is_symlink():
5980             report.path_ignored(path, f"is a symbolic link that points outside {root}")
5981             return None
5982
5983         raise
5984
5985     return normalized_path
5986
5987
5988 def gen_python_files(
5989     paths: Iterable[Path],
5990     root: Path,
5991     include: Optional[Pattern[str]],
5992     exclude: Pattern[str],
5993     force_exclude: Optional[Pattern[str]],
5994     report: "Report",
5995     gitignore: PathSpec,
5996 ) -> Iterator[Path]:
5997     """Generate all files under `path` whose paths are not excluded by the
5998     `exclude_regex` or `force_exclude` regexes, but are included by the `include` regex.
5999
6000     Symbolic links pointing outside of the `root` directory are ignored.
6001
6002     `report` is where output about exclusions goes.
6003     """
6004     assert root.is_absolute(), f"INTERNAL ERROR: `root` must be absolute but is {root}"
6005     for child in paths:
6006         normalized_path = normalize_path_maybe_ignore(child, root, report)
6007         if normalized_path is None:
6008             continue
6009
6010         # First ignore files matching .gitignore
6011         if gitignore.match_file(normalized_path):
6012             report.path_ignored(child, "matches the .gitignore file content")
6013             continue
6014
6015         # Then ignore with `--exclude` and `--force-exclude` options.
6016         normalized_path = "/" + normalized_path
6017         if child.is_dir():
6018             normalized_path += "/"
6019
6020         exclude_match = exclude.search(normalized_path) if exclude else None
6021         if exclude_match and exclude_match.group(0):
6022             report.path_ignored(child, "matches the --exclude regular expression")
6023             continue
6024
6025         force_exclude_match = (
6026             force_exclude.search(normalized_path) if force_exclude else None
6027         )
6028         if force_exclude_match and force_exclude_match.group(0):
6029             report.path_ignored(child, "matches the --force-exclude regular expression")
6030             continue
6031
6032         if child.is_dir():
6033             yield from gen_python_files(
6034                 child.iterdir(),
6035                 root,
6036                 include,
6037                 exclude,
6038                 force_exclude,
6039                 report,
6040                 gitignore,
6041             )
6042
6043         elif child.is_file():
6044             include_match = include.search(normalized_path) if include else True
6045             if include_match:
6046                 yield child
6047
6048
6049 @lru_cache()
6050 def find_project_root(srcs: Iterable[str]) -> Path:
6051     """Return a directory containing .git, .hg, or pyproject.toml.
6052
6053     That directory will be a common parent of all files and directories
6054     passed in `srcs`.
6055
6056     If no directory in the tree contains a marker that would specify it's the
6057     project root, the root of the file system is returned.
6058     """
6059     if not srcs:
6060         return Path("/").resolve()
6061
6062     path_srcs = [Path(Path.cwd(), src).resolve() for src in srcs]
6063
6064     # A list of lists of parents for each 'src'. 'src' is included as a
6065     # "parent" of itself if it is a directory
6066     src_parents = [
6067         list(path.parents) + ([path] if path.is_dir() else []) for path in path_srcs
6068     ]
6069
6070     common_base = max(
6071         set.intersection(*(set(parents) for parents in src_parents)),
6072         key=lambda path: path.parts,
6073     )
6074
6075     for directory in (common_base, *common_base.parents):
6076         if (directory / ".git").exists():
6077             return directory
6078
6079         if (directory / ".hg").is_dir():
6080             return directory
6081
6082         if (directory / "pyproject.toml").is_file():
6083             return directory
6084
6085     return directory
6086
6087
6088 @dataclass
6089 class Report:
6090     """Provides a reformatting counter. Can be rendered with `str(report)`."""
6091
6092     check: bool = False
6093     diff: bool = False
6094     quiet: bool = False
6095     verbose: bool = False
6096     change_count: int = 0
6097     same_count: int = 0
6098     failure_count: int = 0
6099
6100     def done(self, src: Path, changed: Changed) -> None:
6101         """Increment the counter for successful reformatting. Write out a message."""
6102         if changed is Changed.YES:
6103             reformatted = "would reformat" if self.check or self.diff else "reformatted"
6104             if self.verbose or not self.quiet:
6105                 out(f"{reformatted} {src}")
6106             self.change_count += 1
6107         else:
6108             if self.verbose:
6109                 if changed is Changed.NO:
6110                     msg = f"{src} already well formatted, good job."
6111                 else:
6112                     msg = f"{src} wasn't modified on disk since last run."
6113                 out(msg, bold=False)
6114             self.same_count += 1
6115
6116     def failed(self, src: Path, message: str) -> None:
6117         """Increment the counter for failed reformatting. Write out a message."""
6118         err(f"error: cannot format {src}: {message}")
6119         self.failure_count += 1
6120
6121     def path_ignored(self, path: Path, message: str) -> None:
6122         if self.verbose:
6123             out(f"{path} ignored: {message}", bold=False)
6124
6125     @property
6126     def return_code(self) -> int:
6127         """Return the exit code that the app should use.
6128
6129         This considers the current state of changed files and failures:
6130         - if there were any failures, return 123;
6131         - if any files were changed and --check is being used, return 1;
6132         - otherwise return 0.
6133         """
6134         # According to http://tldp.org/LDP/abs/html/exitcodes.html starting with
6135         # 126 we have special return codes reserved by the shell.
6136         if self.failure_count:
6137             return 123
6138
6139         elif self.change_count and self.check:
6140             return 1
6141
6142         return 0
6143
6144     def __str__(self) -> str:
6145         """Render a color report of the current state.
6146
6147         Use `click.unstyle` to remove colors.
6148         """
6149         if self.check or self.diff:
6150             reformatted = "would be reformatted"
6151             unchanged = "would be left unchanged"
6152             failed = "would fail to reformat"
6153         else:
6154             reformatted = "reformatted"
6155             unchanged = "left unchanged"
6156             failed = "failed to reformat"
6157         report = []
6158         if self.change_count:
6159             s = "s" if self.change_count > 1 else ""
6160             report.append(
6161                 click.style(f"{self.change_count} file{s} {reformatted}", bold=True)
6162             )
6163         if self.same_count:
6164             s = "s" if self.same_count > 1 else ""
6165             report.append(f"{self.same_count} file{s} {unchanged}")
6166         if self.failure_count:
6167             s = "s" if self.failure_count > 1 else ""
6168             report.append(
6169                 click.style(f"{self.failure_count} file{s} {failed}", fg="red")
6170             )
6171         return ", ".join(report) + "."
6172
6173
6174 def parse_ast(src: str) -> Union[ast.AST, ast3.AST, ast27.AST]:
6175     filename = "<unknown>"
6176     if sys.version_info >= (3, 8):
6177         # TODO: support Python 4+ ;)
6178         for minor_version in range(sys.version_info[1], 4, -1):
6179             try:
6180                 return ast.parse(src, filename, feature_version=(3, minor_version))
6181             except SyntaxError:
6182                 continue
6183     else:
6184         for feature_version in (7, 6):
6185             try:
6186                 return ast3.parse(src, filename, feature_version=feature_version)
6187             except SyntaxError:
6188                 continue
6189
6190     return ast27.parse(src)
6191
6192
6193 def _fixup_ast_constants(
6194     node: Union[ast.AST, ast3.AST, ast27.AST]
6195 ) -> Union[ast.AST, ast3.AST, ast27.AST]:
6196     """Map ast nodes deprecated in 3.8 to Constant."""
6197     if isinstance(node, (ast.Str, ast3.Str, ast27.Str, ast.Bytes, ast3.Bytes)):
6198         return ast.Constant(value=node.s)
6199
6200     if isinstance(node, (ast.Num, ast3.Num, ast27.Num)):
6201         return ast.Constant(value=node.n)
6202
6203     if isinstance(node, (ast.NameConstant, ast3.NameConstant)):
6204         return ast.Constant(value=node.value)
6205
6206     return node
6207
6208
6209 def _stringify_ast(
6210     node: Union[ast.AST, ast3.AST, ast27.AST], depth: int = 0
6211 ) -> Iterator[str]:
6212     """Simple visitor generating strings to compare ASTs by content."""
6213
6214     node = _fixup_ast_constants(node)
6215
6216     yield f"{'  ' * depth}{node.__class__.__name__}("
6217
6218     for field in sorted(node._fields):  # noqa: F402
6219         # TypeIgnore has only one field 'lineno' which breaks this comparison
6220         type_ignore_classes = (ast3.TypeIgnore, ast27.TypeIgnore)
6221         if sys.version_info >= (3, 8):
6222             type_ignore_classes += (ast.TypeIgnore,)
6223         if isinstance(node, type_ignore_classes):
6224             break
6225
6226         try:
6227             value = getattr(node, field)
6228         except AttributeError:
6229             continue
6230
6231         yield f"{'  ' * (depth+1)}{field}="
6232
6233         if isinstance(value, list):
6234             for item in value:
6235                 # Ignore nested tuples within del statements, because we may insert
6236                 # parentheses and they change the AST.
6237                 if (
6238                     field == "targets"
6239                     and isinstance(node, (ast.Delete, ast3.Delete, ast27.Delete))
6240                     and isinstance(item, (ast.Tuple, ast3.Tuple, ast27.Tuple))
6241                 ):
6242                     for item in item.elts:
6243                         yield from _stringify_ast(item, depth + 2)
6244
6245                 elif isinstance(item, (ast.AST, ast3.AST, ast27.AST)):
6246                     yield from _stringify_ast(item, depth + 2)
6247
6248         elif isinstance(value, (ast.AST, ast3.AST, ast27.AST)):
6249             yield from _stringify_ast(value, depth + 2)
6250
6251         else:
6252             # Constant strings may be indented across newlines, if they are
6253             # docstrings; fold spaces after newlines when comparing. Similarly,
6254             # trailing and leading space may be removed.
6255             if (
6256                 isinstance(node, ast.Constant)
6257                 and field == "value"
6258                 and isinstance(value, str)
6259             ):
6260                 normalized = re.sub(r" *\n[ \t]*", "\n", value).strip()
6261             else:
6262                 normalized = value
6263             yield f"{'  ' * (depth+2)}{normalized!r},  # {value.__class__.__name__}"
6264
6265     yield f"{'  ' * depth})  # /{node.__class__.__name__}"
6266
6267
6268 def assert_equivalent(src: str, dst: str) -> None:
6269     """Raise AssertionError if `src` and `dst` aren't equivalent."""
6270     try:
6271         src_ast = parse_ast(src)
6272     except Exception as exc:
6273         raise AssertionError(
6274             "cannot use --safe with this file; failed to parse source file.  AST"
6275             f" error message: {exc}"
6276         )
6277
6278     try:
6279         dst_ast = parse_ast(dst)
6280     except Exception as exc:
6281         log = dump_to_file("".join(traceback.format_tb(exc.__traceback__)), dst)
6282         raise AssertionError(
6283             f"INTERNAL ERROR: Black produced invalid code: {exc}. Please report a bug"
6284             " on https://github.com/psf/black/issues.  This invalid output might be"
6285             f" helpful: {log}"
6286         ) from None
6287
6288     src_ast_str = "\n".join(_stringify_ast(src_ast))
6289     dst_ast_str = "\n".join(_stringify_ast(dst_ast))
6290     if src_ast_str != dst_ast_str:
6291         log = dump_to_file(diff(src_ast_str, dst_ast_str, "src", "dst"))
6292         raise AssertionError(
6293             "INTERNAL ERROR: Black produced code that is not equivalent to the"
6294             " source.  Please report a bug on https://github.com/psf/black/issues. "
6295             f" This diff might be helpful: {log}"
6296         ) from None
6297
6298
6299 def assert_stable(src: str, dst: str, mode: Mode) -> None:
6300     """Raise AssertionError if `dst` reformats differently the second time."""
6301     newdst = format_str(dst, mode=mode)
6302     if dst != newdst:
6303         log = dump_to_file(
6304             str(mode),
6305             diff(src, dst, "source", "first pass"),
6306             diff(dst, newdst, "first pass", "second pass"),
6307         )
6308         raise AssertionError(
6309             "INTERNAL ERROR: Black produced different code on the second pass of the"
6310             " formatter.  Please report a bug on https://github.com/psf/black/issues."
6311             f"  This diff might be helpful: {log}"
6312         ) from None
6313
6314
6315 @mypyc_attr(patchable=True)
6316 def dump_to_file(*output: str) -> str:
6317     """Dump `output` to a temporary file. Return path to the file."""
6318     with tempfile.NamedTemporaryFile(
6319         mode="w", prefix="blk_", suffix=".log", delete=False, encoding="utf8"
6320     ) as f:
6321         for lines in output:
6322             f.write(lines)
6323             if lines and lines[-1] != "\n":
6324                 f.write("\n")
6325     return f.name
6326
6327
6328 @contextmanager
6329 def nullcontext() -> Iterator[None]:
6330     """Return an empty context manager.
6331
6332     To be used like `nullcontext` in Python 3.7.
6333     """
6334     yield
6335
6336
6337 def diff(a: str, b: str, a_name: str, b_name: str) -> str:
6338     """Return a unified diff string between strings `a` and `b`."""
6339     import difflib
6340
6341     a_lines = [line + "\n" for line in a.splitlines()]
6342     b_lines = [line + "\n" for line in b.splitlines()]
6343     return "".join(
6344         difflib.unified_diff(a_lines, b_lines, fromfile=a_name, tofile=b_name, n=5)
6345     )
6346
6347
6348 def cancel(tasks: Iterable["asyncio.Task[Any]"]) -> None:
6349     """asyncio signal handler that cancels all `tasks` and reports to stderr."""
6350     err("Aborted!")
6351     for task in tasks:
6352         task.cancel()
6353
6354
6355 def shutdown(loop: asyncio.AbstractEventLoop) -> None:
6356     """Cancel all pending tasks on `loop`, wait for them, and close the loop."""
6357     try:
6358         if sys.version_info[:2] >= (3, 7):
6359             all_tasks = asyncio.all_tasks
6360         else:
6361             all_tasks = asyncio.Task.all_tasks
6362         # This part is borrowed from asyncio/runners.py in Python 3.7b2.
6363         to_cancel = [task for task in all_tasks(loop) if not task.done()]
6364         if not to_cancel:
6365             return
6366
6367         for task in to_cancel:
6368             task.cancel()
6369         loop.run_until_complete(
6370             asyncio.gather(*to_cancel, loop=loop, return_exceptions=True)
6371         )
6372     finally:
6373         # `concurrent.futures.Future` objects cannot be cancelled once they
6374         # are already running. There might be some when the `shutdown()` happened.
6375         # Silence their logger's spew about the event loop being closed.
6376         cf_logger = logging.getLogger("concurrent.futures")
6377         cf_logger.setLevel(logging.CRITICAL)
6378         loop.close()
6379
6380
6381 def sub_twice(regex: Pattern[str], replacement: str, original: str) -> str:
6382     """Replace `regex` with `replacement` twice on `original`.
6383
6384     This is used by string normalization to perform replaces on
6385     overlapping matches.
6386     """
6387     return regex.sub(replacement, regex.sub(replacement, original))
6388
6389
6390 def re_compile_maybe_verbose(regex: str) -> Pattern[str]:
6391     """Compile a regular expression string in `regex`.
6392
6393     If it contains newlines, use verbose mode.
6394     """
6395     if "\n" in regex:
6396         regex = "(?x)" + regex
6397     compiled: Pattern[str] = re.compile(regex)
6398     return compiled
6399
6400
6401 def enumerate_reversed(sequence: Sequence[T]) -> Iterator[Tuple[Index, T]]:
6402     """Like `reversed(enumerate(sequence))` if that were possible."""
6403     index = len(sequence) - 1
6404     for element in reversed(sequence):
6405         yield (index, element)
6406         index -= 1
6407
6408
6409 def enumerate_with_length(
6410     line: Line, reversed: bool = False
6411 ) -> Iterator[Tuple[Index, Leaf, int]]:
6412     """Return an enumeration of leaves with their length.
6413
6414     Stops prematurely on multiline strings and standalone comments.
6415     """
6416     op = cast(
6417         Callable[[Sequence[Leaf]], Iterator[Tuple[Index, Leaf]]],
6418         enumerate_reversed if reversed else enumerate,
6419     )
6420     for index, leaf in op(line.leaves):
6421         length = len(leaf.prefix) + len(leaf.value)
6422         if "\n" in leaf.value:
6423             return  # Multiline strings, we can't continue.
6424
6425         for comment in line.comments_after(leaf):
6426             length += len(comment.value)
6427
6428         yield index, leaf, length
6429
6430
6431 def is_line_short_enough(line: Line, *, line_length: int, line_str: str = "") -> bool:
6432     """Return True if `line` is no longer than `line_length`.
6433
6434     Uses the provided `line_str` rendering, if any, otherwise computes a new one.
6435     """
6436     if not line_str:
6437         line_str = line_to_string(line)
6438     return (
6439         len(line_str) <= line_length
6440         and "\n" not in line_str  # multiline strings
6441         and not line.contains_standalone_comments()
6442     )
6443
6444
6445 def can_be_split(line: Line) -> bool:
6446     """Return False if the line cannot be split *for sure*.
6447
6448     This is not an exhaustive search but a cheap heuristic that we can use to
6449     avoid some unfortunate formattings (mostly around wrapping unsplittable code
6450     in unnecessary parentheses).
6451     """
6452     leaves = line.leaves
6453     if len(leaves) < 2:
6454         return False
6455
6456     if leaves[0].type == token.STRING and leaves[1].type == token.DOT:
6457         call_count = 0
6458         dot_count = 0
6459         next = leaves[-1]
6460         for leaf in leaves[-2::-1]:
6461             if leaf.type in OPENING_BRACKETS:
6462                 if next.type not in CLOSING_BRACKETS:
6463                     return False
6464
6465                 call_count += 1
6466             elif leaf.type == token.DOT:
6467                 dot_count += 1
6468             elif leaf.type == token.NAME:
6469                 if not (next.type == token.DOT or next.type in OPENING_BRACKETS):
6470                     return False
6471
6472             elif leaf.type not in CLOSING_BRACKETS:
6473                 return False
6474
6475             if dot_count > 1 and call_count > 1:
6476                 return False
6477
6478     return True
6479
6480
6481 def can_omit_invisible_parens(
6482     line: Line,
6483     line_length: int,
6484     omit_on_explode: Collection[LeafID] = (),
6485 ) -> bool:
6486     """Does `line` have a shape safe to reformat without optional parens around it?
6487
6488     Returns True for only a subset of potentially nice looking formattings but
6489     the point is to not return false positives that end up producing lines that
6490     are too long.
6491     """
6492     bt = line.bracket_tracker
6493     if not bt.delimiters:
6494         # Without delimiters the optional parentheses are useless.
6495         return True
6496
6497     max_priority = bt.max_delimiter_priority()
6498     if bt.delimiter_count_with_priority(max_priority) > 1:
6499         # With more than one delimiter of a kind the optional parentheses read better.
6500         return False
6501
6502     if max_priority == DOT_PRIORITY:
6503         # A single stranded method call doesn't require optional parentheses.
6504         return True
6505
6506     assert len(line.leaves) >= 2, "Stranded delimiter"
6507
6508     # With a single delimiter, omit if the expression starts or ends with
6509     # a bracket.
6510     first = line.leaves[0]
6511     second = line.leaves[1]
6512     if first.type in OPENING_BRACKETS and second.type not in CLOSING_BRACKETS:
6513         if _can_omit_opening_paren(line, first=first, line_length=line_length):
6514             return True
6515
6516         # Note: we are not returning False here because a line might have *both*
6517         # a leading opening bracket and a trailing closing bracket.  If the
6518         # opening bracket doesn't match our rule, maybe the closing will.
6519
6520     penultimate = line.leaves[-2]
6521     last = line.leaves[-1]
6522     if line.should_explode:
6523         try:
6524             penultimate, last = last_two_except(line.leaves, omit=omit_on_explode)
6525         except LookupError:
6526             # Turns out we'd omit everything.  We cannot skip the optional parentheses.
6527             return False
6528
6529     if (
6530         last.type == token.RPAR
6531         or last.type == token.RBRACE
6532         or (
6533             # don't use indexing for omitting optional parentheses;
6534             # it looks weird
6535             last.type == token.RSQB
6536             and last.parent
6537             and last.parent.type != syms.trailer
6538         )
6539     ):
6540         if penultimate.type in OPENING_BRACKETS:
6541             # Empty brackets don't help.
6542             return False
6543
6544         if is_multiline_string(first):
6545             # Additional wrapping of a multiline string in this situation is
6546             # unnecessary.
6547             return True
6548
6549         if line.should_explode and penultimate.type == token.COMMA:
6550             # The rightmost non-omitted bracket pair is the one we want to explode on.
6551             return True
6552
6553         if _can_omit_closing_paren(line, last=last, line_length=line_length):
6554             return True
6555
6556     return False
6557
6558
6559 def _can_omit_opening_paren(line: Line, *, first: Leaf, line_length: int) -> bool:
6560     """See `can_omit_invisible_parens`."""
6561     remainder = False
6562     length = 4 * line.depth
6563     _index = -1
6564     for _index, leaf, leaf_length in enumerate_with_length(line):
6565         if leaf.type in CLOSING_BRACKETS and leaf.opening_bracket is first:
6566             remainder = True
6567         if remainder:
6568             length += leaf_length
6569             if length > line_length:
6570                 break
6571
6572             if leaf.type in OPENING_BRACKETS:
6573                 # There are brackets we can further split on.
6574                 remainder = False
6575
6576     else:
6577         # checked the entire string and line length wasn't exceeded
6578         if len(line.leaves) == _index + 1:
6579             return True
6580
6581     return False
6582
6583
6584 def _can_omit_closing_paren(line: Line, *, last: Leaf, line_length: int) -> bool:
6585     """See `can_omit_invisible_parens`."""
6586     length = 4 * line.depth
6587     seen_other_brackets = False
6588     for _index, leaf, leaf_length in enumerate_with_length(line):
6589         length += leaf_length
6590         if leaf is last.opening_bracket:
6591             if seen_other_brackets or length <= line_length:
6592                 return True
6593
6594         elif leaf.type in OPENING_BRACKETS:
6595             # There are brackets we can further split on.
6596             seen_other_brackets = True
6597
6598     return False
6599
6600
6601 def last_two_except(leaves: List[Leaf], omit: Collection[LeafID]) -> Tuple[Leaf, Leaf]:
6602     """Return (penultimate, last) leaves skipping brackets in `omit` and contents."""
6603     stop_after = None
6604     last = None
6605     for leaf in reversed(leaves):
6606         if stop_after:
6607             if leaf is stop_after:
6608                 stop_after = None
6609             continue
6610
6611         if last:
6612             return leaf, last
6613
6614         if id(leaf) in omit:
6615             stop_after = leaf.opening_bracket
6616         else:
6617             last = leaf
6618     else:
6619         raise LookupError("Last two leaves were also skipped")
6620
6621
6622 def run_transformer(
6623     line: Line,
6624     transform: Transformer,
6625     mode: Mode,
6626     features: Collection[Feature],
6627     *,
6628     line_str: str = "",
6629 ) -> List[Line]:
6630     if not line_str:
6631         line_str = line_to_string(line)
6632     result: List[Line] = []
6633     for transformed_line in transform(line, features):
6634         if str(transformed_line).strip("\n") == line_str:
6635             raise CannotTransform("Line transformer returned an unchanged result")
6636
6637         result.extend(transform_line(transformed_line, mode=mode, features=features))
6638
6639     if not (
6640         transform.__name__ == "rhs"
6641         and line.bracket_tracker.invisible
6642         and not any(bracket.value for bracket in line.bracket_tracker.invisible)
6643         and not line.contains_multiline_strings()
6644         and not result[0].contains_uncollapsable_type_comments()
6645         and not result[0].contains_unsplittable_type_ignore()
6646         and not is_line_short_enough(result[0], line_length=mode.line_length)
6647     ):
6648         return result
6649
6650     line_copy = line.clone()
6651     append_leaves(line_copy, line, line.leaves)
6652     features_fop = set(features) | {Feature.FORCE_OPTIONAL_PARENTHESES}
6653     second_opinion = run_transformer(
6654         line_copy, transform, mode, features_fop, line_str=line_str
6655     )
6656     if all(
6657         is_line_short_enough(ln, line_length=mode.line_length) for ln in second_opinion
6658     ):
6659         result = second_opinion
6660     return result
6661
6662
6663 def get_cache_file(mode: Mode) -> Path:
6664     return CACHE_DIR / f"cache.{mode.get_cache_key()}.pickle"
6665
6666
6667 def read_cache(mode: Mode) -> Cache:
6668     """Read the cache if it exists and is well formed.
6669
6670     If it is not well formed, the call to write_cache later should resolve the issue.
6671     """
6672     cache_file = get_cache_file(mode)
6673     if not cache_file.exists():
6674         return {}
6675
6676     with cache_file.open("rb") as fobj:
6677         try:
6678             cache: Cache = pickle.load(fobj)
6679         except (pickle.UnpicklingError, ValueError):
6680             return {}
6681
6682     return cache
6683
6684
6685 def get_cache_info(path: Path) -> CacheInfo:
6686     """Return the information used to check if a file is already formatted or not."""
6687     stat = path.stat()
6688     return stat.st_mtime, stat.st_size
6689
6690
6691 def filter_cached(cache: Cache, sources: Iterable[Path]) -> Tuple[Set[Path], Set[Path]]:
6692     """Split an iterable of paths in `sources` into two sets.
6693
6694     The first contains paths of files that modified on disk or are not in the
6695     cache. The other contains paths to non-modified files.
6696     """
6697     todo, done = set(), set()
6698     for src in sources:
6699         src = src.resolve()
6700         if cache.get(src) != get_cache_info(src):
6701             todo.add(src)
6702         else:
6703             done.add(src)
6704     return todo, done
6705
6706
6707 def write_cache(cache: Cache, sources: Iterable[Path], mode: Mode) -> None:
6708     """Update the cache file."""
6709     cache_file = get_cache_file(mode)
6710     try:
6711         CACHE_DIR.mkdir(parents=True, exist_ok=True)
6712         new_cache = {**cache, **{src.resolve(): get_cache_info(src) for src in sources}}
6713         with tempfile.NamedTemporaryFile(dir=str(cache_file.parent), delete=False) as f:
6714             pickle.dump(new_cache, f, protocol=4)
6715         os.replace(f.name, cache_file)
6716     except OSError:
6717         pass
6718
6719
6720 def patch_click() -> None:
6721     """Make Click not crash.
6722
6723     On certain misconfigured environments, Python 3 selects the ASCII encoding as the
6724     default which restricts paths that it can access during the lifetime of the
6725     application.  Click refuses to work in this scenario by raising a RuntimeError.
6726
6727     In case of Black the likelihood that non-ASCII characters are going to be used in
6728     file paths is minimal since it's Python source code.  Moreover, this crash was
6729     spurious on Python 3.7 thanks to PEP 538 and PEP 540.
6730     """
6731     try:
6732         from click import core
6733         from click import _unicodefun  # type: ignore
6734     except ModuleNotFoundError:
6735         return
6736
6737     for module in (core, _unicodefun):
6738         if hasattr(module, "_verify_python3_env"):
6739             module._verify_python3_env = lambda: None
6740
6741
6742 def patched_main() -> None:
6743     freeze_support()
6744     patch_click()
6745     main()
6746
6747
6748 def is_docstring(leaf: Leaf) -> bool:
6749     if not is_multiline_string(leaf):
6750         # For the purposes of docstring re-indentation, we don't need to do anything
6751         # with single-line docstrings.
6752         return False
6753
6754     if prev_siblings_are(
6755         leaf.parent, [None, token.NEWLINE, token.INDENT, syms.simple_stmt]
6756     ):
6757         return True
6758
6759     # Multiline docstring on the same line as the `def`.
6760     if prev_siblings_are(leaf.parent, [syms.parameters, token.COLON, syms.simple_stmt]):
6761         # `syms.parameters` is only used in funcdefs and async_funcdefs in the Python
6762         # grammar. We're safe to return True without further checks.
6763         return True
6764
6765     return False
6766
6767
6768 def lines_with_leading_tabs_expanded(s: str) -> List[str]:
6769     """
6770     Splits string into lines and expands only leading tabs (following the normal
6771     Python rules)
6772     """
6773     lines = []
6774     for line in s.splitlines():
6775         # Find the index of the first non-whitespace character after a string of
6776         # whitespace that includes at least one tab
6777         match = re.match(r"\s*\t+\s*(\S)", line)
6778         if match:
6779             first_non_whitespace_idx = match.start(1)
6780
6781             lines.append(
6782                 line[:first_non_whitespace_idx].expandtabs()
6783                 + line[first_non_whitespace_idx:]
6784             )
6785         else:
6786             lines.append(line)
6787     return lines
6788
6789
6790 def fix_docstring(docstring: str, prefix: str) -> str:
6791     # https://www.python.org/dev/peps/pep-0257/#handling-docstring-indentation
6792     if not docstring:
6793         return ""
6794     lines = lines_with_leading_tabs_expanded(docstring)
6795     # Determine minimum indentation (first line doesn't count):
6796     indent = sys.maxsize
6797     for line in lines[1:]:
6798         stripped = line.lstrip()
6799         if stripped:
6800             indent = min(indent, len(line) - len(stripped))
6801     # Remove indentation (first line is special):
6802     trimmed = [lines[0].strip()]
6803     if indent < sys.maxsize:
6804         last_line_idx = len(lines) - 2
6805         for i, line in enumerate(lines[1:]):
6806             stripped_line = line[indent:].rstrip()
6807             if stripped_line or i == last_line_idx:
6808                 trimmed.append(prefix + stripped_line)
6809             else:
6810                 trimmed.append("")
6811     return "\n".join(trimmed)
6812
6813
6814 if __name__ == "__main__":
6815     patched_main()