src/black/__init__.py

   1 import ast
   2 import asyncio
   3 from abc import ABC, abstractmethod
   4 from collections import defaultdict
   5 from concurrent.futures import Executor, ThreadPoolExecutor, ProcessPoolExecutor
   6 from contextlib import contextmanager
   7 from datetime import datetime
   8 from enum import Enum
   9 from functools import lru_cache, partial, wraps
  10 import io
  11 import itertools
  12 import logging
  13 from multiprocessing import Manager, freeze_support
  14 import os
  15 from pathlib import Path
  16 import pickle
  17 import regex as re
  18 import signal
  19 import sys
  20 import tempfile
  21 import tokenize
  22 import traceback
  23 from typing import (
  24     Any,
  25     Callable,
  26     Collection,
  27     Dict,
  28     Generator,
  29     Generic,
  30     Iterable,
  31     Iterator,
  32     List,
  33     Optional,
  34     Pattern,
  35     Sequence,
  36     Set,
  37     Sized,
  38     Tuple,
  39     Type,
  40     TypeVar,
  41     Union,
  42     cast,
  43     TYPE_CHECKING,
  44 )
  45 from typing_extensions import Final
  46 from mypy_extensions import mypyc_attr
  47
  48 from appdirs import user_cache_dir
  49 from dataclasses import dataclass, field, replace
  50 import click
  51 import toml
  52 from typed_ast import ast3, ast27
  53 from pathspec import PathSpec
  54
  55 # lib2to3 fork
  56 from blib2to3.pytree import Node, Leaf, type_repr
  57 from blib2to3 import pygram, pytree
  58 from blib2to3.pgen2 import driver, token
  59 from blib2to3.pgen2.grammar import Grammar
  60 from blib2to3.pgen2.parse import ParseError
  61
  62 from _black_version import version as __version__
  63
  64 if TYPE_CHECKING:
  65     import colorama  # noqa: F401
  66
  67 DEFAULT_LINE_LENGTH = 88
  68 DEFAULT_EXCLUDES = r"/(\.direnv|\.eggs|\.git|\.hg|\.mypy_cache|\.nox|\.tox|\.venv|\.svn|_build|buck-out|build|dist)/"  # noqa: B950
  69 DEFAULT_INCLUDES = r"\.pyi?$"
  70 CACHE_DIR = Path(user_cache_dir("black", version=__version__))
  71
  72 STRING_PREFIX_CHARS: Final = "furbFURB"  # All possible string prefix characters.
  73
  74
  75 # types
  76 FileContent = str
  77 Encoding = str
  78 NewLine = str
  79 Depth = int
  80 NodeType = int
  81 ParserState = int
  82 LeafID = int
  83 StringID = int
  84 Priority = int
  85 Index = int
  86 LN = Union[Leaf, Node]
  87 Transformer = Callable[["Line", Collection["Feature"]], Iterator["Line"]]
  88 Timestamp = float
  89 FileSize = int
  90 CacheInfo = Tuple[Timestamp, FileSize]
  91 Cache = Dict[Path, CacheInfo]
  92 out = partial(click.secho, bold=True, err=True)
  93 err = partial(click.secho, fg="red", err=True)
  94
  95 pygram.initialize(CACHE_DIR)
  96 syms = pygram.python_symbols
  97
  98
  99 class NothingChanged(UserWarning):
 100     """Raised when reformatted code is the same as source."""
 101
 102
 103 class CannotTransform(Exception):
 104     """Base class for errors raised by Transformers."""
 105
 106
 107 class CannotSplit(CannotTransform):
 108     """A readable split that fits the allotted line length is impossible."""
 109
 110
 111 class InvalidInput(ValueError):
 112     """Raised when input source code fails all parse attempts."""
 113
 114
 115 class BracketMatchError(KeyError):
 116     """Raised when an opening bracket is unable to be matched to a closing bracket."""
 117
 118
 119 T = TypeVar("T")
 120 E = TypeVar("E", bound=Exception)
 121
 122
 123 class Ok(Generic[T]):
 124     def __init__(self, value: T) -> None:
 125         self._value = value
 126
 127     def ok(self) -> T:
 128         return self._value
 129
 130
 131 class Err(Generic[E]):
 132     def __init__(self, e: E) -> None:
 133         self._e = e
 134
 135     def err(self) -> E:
 136         return self._e
 137
 138
 139 # The 'Result' return type is used to implement an error-handling model heavily
 140 # influenced by that used by the Rust programming language
 141 # (see https://doc.rust-lang.org/book/ch09-00-error-handling.html).
 142 Result = Union[Ok[T], Err[E]]
 143 TResult = Result[T, CannotTransform]  # (T)ransform Result
 144 TMatchResult = TResult[Index]
 145
 146
 147 class WriteBack(Enum):
 148     NO = 0
 149     YES = 1
 150     DIFF = 2
 151     CHECK = 3
 152     COLOR_DIFF = 4
 153
 154     @classmethod
 155     def from_configuration(
 156         cls, *, check: bool, diff: bool, color: bool = False
 157     ) -> "WriteBack":
 158         if check and not diff:
 159             return cls.CHECK
 160
 161         if diff and color:
 162             return cls.COLOR_DIFF
 163
 164         return cls.DIFF if diff else cls.YES
 165
 166
 167 class Changed(Enum):
 168     NO = 0
 169     CACHED = 1
 170     YES = 2
 171
 172
 173 class TargetVersion(Enum):
 174     PY27 = 2
 175     PY33 = 3
 176     PY34 = 4
 177     PY35 = 5
 178     PY36 = 6
 179     PY37 = 7
 180     PY38 = 8
 181     PY39 = 9
 182
 183     def is_python2(self) -> bool:
 184         return self is TargetVersion.PY27
 185
 186
 187 class Feature(Enum):
 188     # All string literals are unicode
 189     UNICODE_LITERALS = 1
 190     F_STRINGS = 2
 191     NUMERIC_UNDERSCORES = 3
 192     TRAILING_COMMA_IN_CALL = 4
 193     TRAILING_COMMA_IN_DEF = 5
 194     # The following two feature-flags are mutually exclusive, and exactly one should be
 195     # set for every version of python.
 196     ASYNC_IDENTIFIERS = 6
 197     ASYNC_KEYWORDS = 7
 198     ASSIGNMENT_EXPRESSIONS = 8
 199     POS_ONLY_ARGUMENTS = 9
 200     RELAXED_DECORATORS = 10
 201     FORCE_OPTIONAL_PARENTHESES = 50
 202
 203
 204 VERSION_TO_FEATURES: Dict[TargetVersion, Set[Feature]] = {
 205     TargetVersion.PY27: {Feature.ASYNC_IDENTIFIERS},
 206     TargetVersion.PY33: {Feature.UNICODE_LITERALS, Feature.ASYNC_IDENTIFIERS},
 207     TargetVersion.PY34: {Feature.UNICODE_LITERALS, Feature.ASYNC_IDENTIFIERS},
 208     TargetVersion.PY35: {
 209         Feature.UNICODE_LITERALS,
 210         Feature.TRAILING_COMMA_IN_CALL,
 211         Feature.ASYNC_IDENTIFIERS,
 212     },
 213     TargetVersion.PY36: {
 214         Feature.UNICODE_LITERALS,
 215         Feature.F_STRINGS,
 216         Feature.NUMERIC_UNDERSCORES,
 217         Feature.TRAILING_COMMA_IN_CALL,
 218         Feature.TRAILING_COMMA_IN_DEF,
 219         Feature.ASYNC_IDENTIFIERS,
 220     },
 221     TargetVersion.PY37: {
 222         Feature.UNICODE_LITERALS,
 223         Feature.F_STRINGS,
 224         Feature.NUMERIC_UNDERSCORES,
 225         Feature.TRAILING_COMMA_IN_CALL,
 226         Feature.TRAILING_COMMA_IN_DEF,
 227         Feature.ASYNC_KEYWORDS,
 228     },
 229     TargetVersion.PY38: {
 230         Feature.UNICODE_LITERALS,
 231         Feature.F_STRINGS,
 232         Feature.NUMERIC_UNDERSCORES,
 233         Feature.TRAILING_COMMA_IN_CALL,
 234         Feature.TRAILING_COMMA_IN_DEF,
 235         Feature.ASYNC_KEYWORDS,
 236         Feature.ASSIGNMENT_EXPRESSIONS,
 237         Feature.POS_ONLY_ARGUMENTS,
 238     },
 239     TargetVersion.PY39: {
 240         Feature.UNICODE_LITERALS,
 241         Feature.F_STRINGS,
 242         Feature.NUMERIC_UNDERSCORES,
 243         Feature.TRAILING_COMMA_IN_CALL,
 244         Feature.TRAILING_COMMA_IN_DEF,
 245         Feature.ASYNC_KEYWORDS,
 246         Feature.ASSIGNMENT_EXPRESSIONS,
 247         Feature.RELAXED_DECORATORS,
 248         Feature.POS_ONLY_ARGUMENTS,
 249     },
 250 }
 251
 252
 253 @dataclass
 254 class Mode:
 255     target_versions: Set[TargetVersion] = field(default_factory=set)
 256     line_length: int = DEFAULT_LINE_LENGTH
 257     string_normalization: bool = True
 258     experimental_string_processing: bool = False
 259     is_pyi: bool = False
 260
 261     def get_cache_key(self) -> str:
 262         if self.target_versions:
 263             version_str = ",".join(
 264                 str(version.value)
 265                 for version in sorted(self.target_versions, key=lambda v: v.value)
 266             )
 267         else:
 268             version_str = "-"
 269         parts = [
 270             version_str,
 271             str(self.line_length),
 272             str(int(self.string_normalization)),
 273             str(int(self.is_pyi)),
 274         ]
 275         return ".".join(parts)
 276
 277
 278 # Legacy name, left for integrations.
 279 FileMode = Mode
 280
 281
 282 def supports_feature(target_versions: Set[TargetVersion], feature: Feature) -> bool:
 283     return all(feature in VERSION_TO_FEATURES[version] for version in target_versions)
 284
 285
 286 def find_pyproject_toml(path_search_start: Iterable[str]) -> Optional[str]:
 287     """Find the absolute filepath to a pyproject.toml if it exists"""
 288     path_project_root = find_project_root(path_search_start)
 289     path_pyproject_toml = path_project_root / "pyproject.toml"
 290     return str(path_pyproject_toml) if path_pyproject_toml.is_file() else None
 291
 292
 293 def parse_pyproject_toml(path_config: str) -> Dict[str, Any]:
 294     """Parse a pyproject toml file, pulling out relevant parts for Black
 295
 296     If parsing fails, will raise a toml.TomlDecodeError
 297     """
 298     pyproject_toml = toml.load(path_config)
 299     config = pyproject_toml.get("tool", {}).get("black", {})
 300     return {k.replace("--", "").replace("-", "_"): v for k, v in config.items()}
 301
 302
 303 def read_pyproject_toml(
 304     ctx: click.Context, param: click.Parameter, value: Optional[str]
 305 ) -> Optional[str]:
 306     """Inject Black configuration from "pyproject.toml" into defaults in `ctx`.
 307
 308     Returns the path to a successfully found and read configuration file, None
 309     otherwise.
 310     """
 311     if not value:
 312         value = find_pyproject_toml(ctx.params.get("src", ()))
 313         if value is None:
 314             return None
 315
 316     try:
 317         config = parse_pyproject_toml(value)
 318     except (toml.TomlDecodeError, OSError) as e:
 319         raise click.FileError(
 320             filename=value, hint=f"Error reading configuration file: {e}"
 321         )
 322
 323     if not config:
 324         return None
 325     else:
 326         # Sanitize the values to be Click friendly. For more information please see:
 327         # https://github.com/psf/black/issues/1458
 328         # https://github.com/pallets/click/issues/1567
 329         config = {
 330             k: str(v) if not isinstance(v, (list, dict)) else v
 331             for k, v in config.items()
 332         }
 333
 334     target_version = config.get("target_version")
 335     if target_version is not None and not isinstance(target_version, list):
 336         raise click.BadOptionUsage(
 337             "target-version", "Config key target-version must be a list"
 338         )
 339
 340     default_map: Dict[str, Any] = {}
 341     if ctx.default_map:
 342         default_map.update(ctx.default_map)
 343     default_map.update(config)
 344
 345     ctx.default_map = default_map
 346     return value
 347
 348
 349 def target_version_option_callback(
 350     c: click.Context, p: Union[click.Option, click.Parameter], v: Tuple[str, ...]
 351 ) -> List[TargetVersion]:
 352     """Compute the target versions from a --target-version flag.
 353
 354     This is its own function because mypy couldn't infer the type correctly
 355     when it was a lambda, causing mypyc trouble.
 356     """
 357     return [TargetVersion[val.upper()] for val in v]
 358
 359
 360 @click.command(context_settings=dict(help_option_names=["-h", "--help"]))
 361 @click.option("-c", "--code", type=str, help="Format the code passed in as a string.")
 362 @click.option(
 363     "-l",
 364     "--line-length",
 365     type=int,
 366     default=DEFAULT_LINE_LENGTH,
 367     help="How many characters per line to allow.",
 368     show_default=True,
 369 )
 370 @click.option(
 371     "-t",
 372     "--target-version",
 373     type=click.Choice([v.name.lower() for v in TargetVersion]),
 374     callback=target_version_option_callback,
 375     multiple=True,
 376     help=(
 377         "Python versions that should be supported by Black's output. [default: per-file"
 378         " auto-detection]"
 379     ),
 380 )
 381 @click.option(
 382     "--pyi",
 383     is_flag=True,
 384     help=(
 385         "Format all input files like typing stubs regardless of file extension (useful"
 386         " when piping source on standard input)."
 387     ),
 388 )
 389 @click.option(
 390     "-S",
 391     "--skip-string-normalization",
 392     is_flag=True,
 393     help="Don't normalize string quotes or prefixes.",
 394 )
 395 @click.option(
 396     "--experimental-string-processing",
 397     is_flag=True,
 398     hidden=True,
 399     help=(
 400         "Experimental option that performs more normalization on string literals."
 401         " Currently disabled because it leads to some crashes."
 402     ),
 403 )
 404 @click.option(
 405     "--check",
 406     is_flag=True,
 407     help=(
 408         "Don't write the files back, just return the status.  Return code 0 means"
 409         " nothing would change.  Return code 1 means some files would be reformatted."
 410         " Return code 123 means there was an internal error."
 411     ),
 412 )
 413 @click.option(
 414     "--diff",
 415     is_flag=True,
 416     help="Don't write the files back, just output a diff for each file on stdout.",
 417 )
 418 @click.option(
 419     "--color/--no-color",
 420     is_flag=True,
 421     help="Show colored diff. Only applies when `--diff` is given.",
 422 )
 423 @click.option(
 424     "--fast/--safe",
 425     is_flag=True,
 426     help="If --fast given, skip temporary sanity checks. [default: --safe]",
 427 )
 428 @click.option(
 429     "--include",
 430     type=str,
 431     default=DEFAULT_INCLUDES,
 432     help=(
 433         "A regular expression that matches files and directories that should be"
 434         " included on recursive searches.  An empty value means all files are included"
 435         " regardless of the name.  Use forward slashes for directories on all platforms"
 436         " (Windows, too).  Exclusions are calculated first, inclusions later."
 437     ),
 438     show_default=True,
 439 )
 440 @click.option(
 441     "--exclude",
 442     type=str,
 443     default=DEFAULT_EXCLUDES,
 444     help=(
 445         "A regular expression that matches files and directories that should be"
 446         " excluded on recursive searches.  An empty value means no paths are excluded."
 447         " Use forward slashes for directories on all platforms (Windows, too). "
 448         " Exclusions are calculated first, inclusions later."
 449     ),
 450     show_default=True,
 451 )
 452 @click.option(
 453     "--force-exclude",
 454     type=str,
 455     help=(
 456         "Like --exclude, but files and directories matching this regex will be "
 457         "excluded even when they are passed explicitly as arguments."
 458     ),
 459 )
 460 @click.option(
 461     "-q",
 462     "--quiet",
 463     is_flag=True,
 464     help=(
 465         "Don't emit non-error messages to stderr. Errors are still emitted; silence"
 466         " those with 2>/dev/null."
 467     ),
 468 )
 469 @click.option(
 470     "-v",
 471     "--verbose",
 472     is_flag=True,
 473     help=(
 474         "Also emit messages to stderr about files that were not changed or were ignored"
 475         " due to --exclude=."
 476     ),
 477 )
 478 @click.version_option(version=__version__)
 479 @click.argument(
 480     "src",
 481     nargs=-1,
 482     type=click.Path(
 483         exists=True, file_okay=True, dir_okay=True, readable=True, allow_dash=True
 484     ),
 485     is_eager=True,
 486 )
 487 @click.option(
 488     "--config",
 489     type=click.Path(
 490         exists=True,
 491         file_okay=True,
 492         dir_okay=False,
 493         readable=True,
 494         allow_dash=False,
 495         path_type=str,
 496     ),
 497     is_eager=True,
 498     callback=read_pyproject_toml,
 499     help="Read configuration from FILE path.",
 500 )
 501 @click.pass_context
 502 def main(
 503     ctx: click.Context,
 504     code: Optional[str],
 505     line_length: int,
 506     target_version: List[TargetVersion],
 507     check: bool,
 508     diff: bool,
 509     color: bool,
 510     fast: bool,
 511     pyi: bool,
 512     skip_string_normalization: bool,
 513     experimental_string_processing: bool,
 514     quiet: bool,
 515     verbose: bool,
 516     include: str,
 517     exclude: str,
 518     force_exclude: Optional[str],
 519     src: Tuple[str, ...],
 520     config: Optional[str],
 521 ) -> None:
 522     """The uncompromising code formatter."""
 523     write_back = WriteBack.from_configuration(check=check, diff=diff, color=color)
 524     if target_version:
 525         versions = set(target_version)
 526     else:
 527         # We'll autodetect later.
 528         versions = set()
 529     mode = Mode(
 530         target_versions=versions,
 531         line_length=line_length,
 532         is_pyi=pyi,
 533         string_normalization=not skip_string_normalization,
 534         experimental_string_processing=experimental_string_processing,
 535     )
 536     if config and verbose:
 537         out(f"Using configuration from {config}.", bold=False, fg="blue")
 538     if code is not None:
 539         print(format_str(code, mode=mode))
 540         ctx.exit(0)
 541     report = Report(check=check, diff=diff, quiet=quiet, verbose=verbose)
 542     sources = get_sources(
 543         ctx=ctx,
 544         src=src,
 545         quiet=quiet,
 546         verbose=verbose,
 547         include=include,
 548         exclude=exclude,
 549         force_exclude=force_exclude,
 550         report=report,
 551     )
 552
 553     path_empty(
 554         sources,
 555         "No Python files are present to be formatted. Nothing to do 😴",
 556         quiet,
 557         verbose,
 558         ctx,
 559     )
 560
 561     if len(sources) == 1:
 562         reformat_one(
 563             src=sources.pop(),
 564             fast=fast,
 565             write_back=write_back,
 566             mode=mode,
 567             report=report,
 568         )
 569     else:
 570         reformat_many(
 571             sources=sources, fast=fast, write_back=write_back, mode=mode, report=report
 572         )
 573
 574     if verbose or not quiet:
 575         out("Oh no! 💥 💔 💥" if report.return_code else "All done! ✨ 🍰 ✨")
 576         click.secho(str(report), err=True)
 577     ctx.exit(report.return_code)
 578
 579
 580 def get_sources(
 581     *,
 582     ctx: click.Context,
 583     src: Tuple[str, ...],
 584     quiet: bool,
 585     verbose: bool,
 586     include: str,
 587     exclude: str,
 588     force_exclude: Optional[str],
 589     report: "Report",
 590 ) -> Set[Path]:
 591     """Compute the set of files to be formatted."""
 592     try:
 593         include_regex = re_compile_maybe_verbose(include)
 594     except re.error:
 595         err(f"Invalid regular expression for include given: {include!r}")
 596         ctx.exit(2)
 597     try:
 598         exclude_regex = re_compile_maybe_verbose(exclude)
 599     except re.error:
 600         err(f"Invalid regular expression for exclude given: {exclude!r}")
 601         ctx.exit(2)
 602     try:
 603         force_exclude_regex = (
 604             re_compile_maybe_verbose(force_exclude) if force_exclude else None
 605         )
 606     except re.error:
 607         err(f"Invalid regular expression for force_exclude given: {force_exclude!r}")
 608         ctx.exit(2)
 609
 610     root = find_project_root(src)
 611     sources: Set[Path] = set()
 612     path_empty(src, "No Path provided. Nothing to do 😴", quiet, verbose, ctx)
 613     gitignore = get_gitignore(root)
 614
 615     for s in src:
 616         p = Path(s)
 617         if p.is_dir():
 618             sources.update(
 619                 gen_python_files(
 620                     p.iterdir(),
 621                     root,
 622                     include_regex,
 623                     exclude_regex,
 624                     force_exclude_regex,
 625                     report,
 626                     gitignore,
 627                 )
 628             )
 629         elif s == "-":
 630             sources.add(p)
 631         elif p.is_file():
 632             normalized_path = normalize_path_maybe_ignore(p, root, report)
 633             if normalized_path is None:
 634                 continue
 635
 636             normalized_path = "/" + normalized_path
 637             # Hard-exclude any files that matches the `--force-exclude` regex.
 638             if force_exclude_regex:
 639                 force_exclude_match = force_exclude_regex.search(normalized_path)
 640             else:
 641                 force_exclude_match = None
 642             if force_exclude_match and force_exclude_match.group(0):
 643                 report.path_ignored(p, "matches the --force-exclude regular expression")
 644                 continue
 645
 646             sources.add(p)
 647         else:
 648             err(f"invalid path: {s}")
 649     return sources
 650
 651
 652 def path_empty(
 653     src: Sized, msg: str, quiet: bool, verbose: bool, ctx: click.Context
 654 ) -> None:
 655     """
 656     Exit if there is no `src` provided for formatting
 657     """
 658     if not src and (verbose or not quiet):
 659         out(msg)
 660         ctx.exit(0)
 661
 662
 663 def reformat_one(
 664     src: Path, fast: bool, write_back: WriteBack, mode: Mode, report: "Report"
 665 ) -> None:
 666     """Reformat a single file under `src` without spawning child processes.
 667
 668     `fast`, `write_back`, and `mode` options are passed to
 669     :func:`format_file_in_place` or :func:`format_stdin_to_stdout`.
 670     """
 671     try:
 672         changed = Changed.NO
 673         if not src.is_file() and str(src) == "-":
 674             if format_stdin_to_stdout(fast=fast, write_back=write_back, mode=mode):
 675                 changed = Changed.YES
 676         else:
 677             cache: Cache = {}
 678             if write_back not in (WriteBack.DIFF, WriteBack.COLOR_DIFF):
 679                 cache = read_cache(mode)
 680                 res_src = src.resolve()
 681                 if res_src in cache and cache[res_src] == get_cache_info(res_src):
 682                     changed = Changed.CACHED
 683             if changed is not Changed.CACHED and format_file_in_place(
 684                 src, fast=fast, write_back=write_back, mode=mode
 685             ):
 686                 changed = Changed.YES
 687             if (write_back is WriteBack.YES and changed is not Changed.CACHED) or (
 688                 write_back is WriteBack.CHECK and changed is Changed.NO
 689             ):
 690                 write_cache(cache, [src], mode)
 691         report.done(src, changed)
 692     except Exception as exc:
 693         if report.verbose:
 694             traceback.print_exc()
 695         report.failed(src, str(exc))
 696
 697
 698 def reformat_many(
 699     sources: Set[Path], fast: bool, write_back: WriteBack, mode: Mode, report: "Report"
 700 ) -> None:
 701     """Reformat multiple files using a ProcessPoolExecutor."""
 702     executor: Executor
 703     loop = asyncio.get_event_loop()
 704     worker_count = os.cpu_count()
 705     if sys.platform == "win32":
 706         # Work around https://bugs.python.org/issue26903
 707         worker_count = min(worker_count, 61)
 708     try:
 709         executor = ProcessPoolExecutor(max_workers=worker_count)
 710     except (ImportError, OSError):
 711         # we arrive here if the underlying system does not support multi-processing
 712         # like in AWS Lambda or Termux, in which case we gracefully fallback to
 713         # a ThreadPollExecutor with just a single worker (more workers would not do us
 714         # any good due to the Global Interpreter Lock)
 715         executor = ThreadPoolExecutor(max_workers=1)
 716
 717     try:
 718         loop.run_until_complete(
 719             schedule_formatting(
 720                 sources=sources,
 721                 fast=fast,
 722                 write_back=write_back,
 723                 mode=mode,
 724                 report=report,
 725                 loop=loop,
 726                 executor=executor,
 727             )
 728         )
 729     finally:
 730         shutdown(loop)
 731         if executor is not None:
 732             executor.shutdown()
 733
 734
 735 async def schedule_formatting(
 736     sources: Set[Path],
 737     fast: bool,
 738     write_back: WriteBack,
 739     mode: Mode,
 740     report: "Report",
 741     loop: asyncio.AbstractEventLoop,
 742     executor: Executor,
 743 ) -> None:
 744     """Run formatting of `sources` in parallel using the provided `executor`.
 745
 746     (Use ProcessPoolExecutors for actual parallelism.)
 747
 748     `write_back`, `fast`, and `mode` options are passed to
 749     :func:`format_file_in_place`.
 750     """
 751     cache: Cache = {}
 752     if write_back not in (WriteBack.DIFF, WriteBack.COLOR_DIFF):
 753         cache = read_cache(mode)
 754         sources, cached = filter_cached(cache, sources)
 755         for src in sorted(cached):
 756             report.done(src, Changed.CACHED)
 757     if not sources:
 758         return
 759
 760     cancelled = []
 761     sources_to_cache = []
 762     lock = None
 763     if write_back in (WriteBack.DIFF, WriteBack.COLOR_DIFF):
 764         # For diff output, we need locks to ensure we don't interleave output
 765         # from different processes.
 766         manager = Manager()
 767         lock = manager.Lock()
 768     tasks = {
 769         asyncio.ensure_future(
 770             loop.run_in_executor(
 771                 executor, format_file_in_place, src, fast, mode, write_back, lock
 772             )
 773         ): src
 774         for src in sorted(sources)
 775     }
 776     pending: Iterable["asyncio.Future[bool]"] = tasks.keys()
 777     try:
 778         loop.add_signal_handler(signal.SIGINT, cancel, pending)
 779         loop.add_signal_handler(signal.SIGTERM, cancel, pending)
 780     except NotImplementedError:
 781         # There are no good alternatives for these on Windows.
 782         pass
 783     while pending:
 784         done, _ = await asyncio.wait(pending, return_when=asyncio.FIRST_COMPLETED)
 785         for task in done:
 786             src = tasks.pop(task)
 787             if task.cancelled():
 788                 cancelled.append(task)
 789             elif task.exception():
 790                 report.failed(src, str(task.exception()))
 791             else:
 792                 changed = Changed.YES if task.result() else Changed.NO
 793                 # If the file was written back or was successfully checked as
 794                 # well-formatted, store this information in the cache.
 795                 if write_back is WriteBack.YES or (
 796                     write_back is WriteBack.CHECK and changed is Changed.NO
 797                 ):
 798                     sources_to_cache.append(src)
 799                 report.done(src, changed)
 800     if cancelled:
 801         await asyncio.gather(*cancelled, loop=loop, return_exceptions=True)
 802     if sources_to_cache:
 803         write_cache(cache, sources_to_cache, mode)
 804
 805
 806 def format_file_in_place(
 807     src: Path,
 808     fast: bool,
 809     mode: Mode,
 810     write_back: WriteBack = WriteBack.NO,
 811     lock: Any = None,  # multiprocessing.Manager().Lock() is some crazy proxy
 812 ) -> bool:
 813     """Format file under `src` path. Return True if changed.
 814
 815     If `write_back` is DIFF, write a diff to stdout. If it is YES, write reformatted
 816     code to the file.
 817     `mode` and `fast` options are passed to :func:`format_file_contents`.
 818     """
 819     if src.suffix == ".pyi":
 820         mode = replace(mode, is_pyi=True)
 821
 822     then = datetime.utcfromtimestamp(src.stat().st_mtime)
 823     with open(src, "rb") as buf:
 824         src_contents, encoding, newline = decode_bytes(buf.read())
 825     try:
 826         dst_contents = format_file_contents(src_contents, fast=fast, mode=mode)
 827     except NothingChanged:
 828         return False
 829
 830     if write_back == WriteBack.YES:
 831         with open(src, "w", encoding=encoding, newline=newline) as f:
 832             f.write(dst_contents)
 833     elif write_back in (WriteBack.DIFF, WriteBack.COLOR_DIFF):
 834         now = datetime.utcnow()
 835         src_name = f"{src}\t{then} +0000"
 836         dst_name = f"{src}\t{now} +0000"
 837         diff_contents = diff(src_contents, dst_contents, src_name, dst_name)
 838
 839         if write_back == write_back.COLOR_DIFF:
 840             diff_contents = color_diff(diff_contents)
 841
 842         with lock or nullcontext():
 843             f = io.TextIOWrapper(
 844                 sys.stdout.buffer,
 845                 encoding=encoding,
 846                 newline=newline,
 847                 write_through=True,
 848             )
 849             f = wrap_stream_for_windows(f)
 850             f.write(diff_contents)
 851             f.detach()
 852
 853     return True
 854
 855
 856 def color_diff(contents: str) -> str:
 857     """Inject the ANSI color codes to the diff."""
 858     lines = contents.split("\n")
 859     for i, line in enumerate(lines):
 860         if line.startswith("+++") or line.startswith("---"):
 861             line = "\033[1;37m" + line + "\033[0m"  # bold white, reset
 862         elif line.startswith("@@"):
 863             line = "\033[36m" + line + "\033[0m"  # cyan, reset
 864         elif line.startswith("+"):
 865             line = "\033[32m" + line + "\033[0m"  # green, reset
 866         elif line.startswith("-"):
 867             line = "\033[31m" + line + "\033[0m"  # red, reset
 868         lines[i] = line
 869     return "\n".join(lines)
 870
 871
 872 def wrap_stream_for_windows(
 873     f: io.TextIOWrapper,
 874 ) -> Union[io.TextIOWrapper, "colorama.AnsiToWin32"]:
 875     """
 876     Wrap stream with colorama's wrap_stream so colors are shown on Windows.
 877
 878     If `colorama` is unavailable, the original stream is returned unmodified.
 879     Otherwise, the `wrap_stream()` function determines whether the stream needs
 880     to be wrapped for a Windows environment and will accordingly either return
 881     an `AnsiToWin32` wrapper or the original stream.
 882     """
 883     try:
 884         from colorama.initialise import wrap_stream
 885     except ImportError:
 886         return f
 887     else:
 888         # Set `strip=False` to avoid needing to modify test_express_diff_with_color.
 889         return wrap_stream(f, convert=None, strip=False, autoreset=False, wrap=True)
 890
 891
 892 def format_stdin_to_stdout(
 893     fast: bool, *, write_back: WriteBack = WriteBack.NO, mode: Mode
 894 ) -> bool:
 895     """Format file on stdin. Return True if changed.
 896
 897     If `write_back` is YES, write reformatted code back to stdout. If it is DIFF,
 898     write a diff to stdout. The `mode` argument is passed to
 899     :func:`format_file_contents`.
 900     """
 901     then = datetime.utcnow()
 902     src, encoding, newline = decode_bytes(sys.stdin.buffer.read())
 903     dst = src
 904     try:
 905         dst = format_file_contents(src, fast=fast, mode=mode)
 906         return True
 907
 908     except NothingChanged:
 909         return False
 910
 911     finally:
 912         f = io.TextIOWrapper(
 913             sys.stdout.buffer, encoding=encoding, newline=newline, write_through=True
 914         )
 915         if write_back == WriteBack.YES:
 916             f.write(dst)
 917         elif write_back in (WriteBack.DIFF, WriteBack.COLOR_DIFF):
 918             now = datetime.utcnow()
 919             src_name = f"STDIN\t{then} +0000"
 920             dst_name = f"STDOUT\t{now} +0000"
 921             d = diff(src, dst, src_name, dst_name)
 922             if write_back == WriteBack.COLOR_DIFF:
 923                 d = color_diff(d)
 924                 f = wrap_stream_for_windows(f)
 925             f.write(d)
 926         f.detach()
 927
 928
 929 def format_file_contents(src_contents: str, *, fast: bool, mode: Mode) -> FileContent:
 930     """Reformat contents of a file and return new contents.
 931
 932     If `fast` is False, additionally confirm that the reformatted code is
 933     valid by calling :func:`assert_equivalent` and :func:`assert_stable` on it.
 934     `mode` is passed to :func:`format_str`.
 935     """
 936     if not src_contents.strip():
 937         raise NothingChanged
 938
 939     dst_contents = format_str(src_contents, mode=mode)
 940     if src_contents == dst_contents:
 941         raise NothingChanged
 942
 943     if not fast:
 944         assert_equivalent(src_contents, dst_contents)
 945         assert_stable(src_contents, dst_contents, mode=mode)
 946     return dst_contents
 947
 948
 949 def format_str(src_contents: str, *, mode: Mode) -> FileContent:
 950     """Reformat a string and return new contents.
 951
 952     `mode` determines formatting options, such as how many characters per line are
 953     allowed.  Example:
 954
 955     >>> import black
 956     >>> print(black.format_str("def f(arg:str='')->None:...", mode=Mode()))
 957     def f(arg: str = "") -> None:
 958         ...
 959
 960     A more complex example:
 961
 962     >>> print(
 963     ...   black.format_str(
 964     ...     "def f(arg:str='')->None: hey",
 965     ...     mode=black.Mode(
 966     ...       target_versions={black.TargetVersion.PY36},
 967     ...       line_length=10,
 968     ...       string_normalization=False,
 969     ...       is_pyi=False,
 970     ...     ),
 971     ...   ),
 972     ... )
 973     def f(
 974         arg: str = '',
 975     ) -> None:
 976         hey
 977
 978     """
 979     src_node = lib2to3_parse(src_contents.lstrip(), mode.target_versions)
 980     dst_contents = []
 981     future_imports = get_future_imports(src_node)
 982     if mode.target_versions:
 983         versions = mode.target_versions
 984     else:
 985         versions = detect_target_versions(src_node)
 986     normalize_fmt_off(src_node)
 987     lines = LineGenerator(
 988         remove_u_prefix="unicode_literals" in future_imports
 989         or supports_feature(versions, Feature.UNICODE_LITERALS),
 990         is_pyi=mode.is_pyi,
 991         normalize_strings=mode.string_normalization,
 992     )
 993     elt = EmptyLineTracker(is_pyi=mode.is_pyi)
 994     empty_line = Line()
 995     after = 0
 996     split_line_features = {
 997         feature
 998         for feature in {Feature.TRAILING_COMMA_IN_CALL, Feature.TRAILING_COMMA_IN_DEF}
 999         if supports_feature(versions, feature)
1000     }
1001     for current_line in lines.visit(src_node):
1002         dst_contents.append(str(empty_line) * after)
1003         before, after = elt.maybe_empty_lines(current_line)
1004         dst_contents.append(str(empty_line) * before)
1005         for line in transform_line(
1006             current_line, mode=mode, features=split_line_features
1007         ):
1008             dst_contents.append(str(line))
1009     return "".join(dst_contents)
1010
1011
1012 def decode_bytes(src: bytes) -> Tuple[FileContent, Encoding, NewLine]:
1013     """Return a tuple of (decoded_contents, encoding, newline).
1014
1015     `newline` is either CRLF or LF but `decoded_contents` is decoded with
1016     universal newlines (i.e. only contains LF).
1017     """
1018     srcbuf = io.BytesIO(src)
1019     encoding, lines = tokenize.detect_encoding(srcbuf.readline)
1020     if not lines:
1021         return "", encoding, "\n"
1022
1023     newline = "\r\n" if b"\r\n" == lines[0][-2:] else "\n"
1024     srcbuf.seek(0)
1025     with io.TextIOWrapper(srcbuf, encoding) as tiow:
1026         return tiow.read(), encoding, newline
1027
1028
1029 def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]:
1030     if not target_versions:
1031         # No target_version specified, so try all grammars.
1032         return [
1033             # Python 3.7+
1034             pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords,
1035             # Python 3.0-3.6
1036             pygram.python_grammar_no_print_statement_no_exec_statement,
1037             # Python 2.7 with future print_function import
1038             pygram.python_grammar_no_print_statement,
1039             # Python 2.7
1040             pygram.python_grammar,
1041         ]
1042
1043     if all(version.is_python2() for version in target_versions):
1044         # Python 2-only code, so try Python 2 grammars.
1045         return [
1046             # Python 2.7 with future print_function import
1047             pygram.python_grammar_no_print_statement,
1048             # Python 2.7
1049             pygram.python_grammar,
1050         ]
1051
1052     # Python 3-compatible code, so only try Python 3 grammar.
1053     grammars = []
1054     # If we have to parse both, try to parse async as a keyword first
1055     if not supports_feature(target_versions, Feature.ASYNC_IDENTIFIERS):
1056         # Python 3.7+
1057         grammars.append(
1058             pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords
1059         )
1060     if not supports_feature(target_versions, Feature.ASYNC_KEYWORDS):
1061         # Python 3.0-3.6
1062         grammars.append(pygram.python_grammar_no_print_statement_no_exec_statement)
1063     # At least one of the above branches must have been taken, because every Python
1064     # version has exactly one of the two 'ASYNC_*' flags
1065     return grammars
1066
1067
1068 def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -> Node:
1069     """Given a string with source, return the lib2to3 Node."""
1070     if not src_txt.endswith("\n"):
1071         src_txt += "\n"
1072
1073     for grammar in get_grammars(set(target_versions)):
1074         drv = driver.Driver(grammar, pytree.convert)
1075         try:
1076             result = drv.parse_string(src_txt, True)
1077             break
1078
1079         except ParseError as pe:
1080             lineno, column = pe.context[1]
1081             lines = src_txt.splitlines()
1082             try:
1083                 faulty_line = lines[lineno - 1]
1084             except IndexError:
1085                 faulty_line = "<line number missing in source>"
1086             exc = InvalidInput(f"Cannot parse: {lineno}:{column}: {faulty_line}")
1087     else:
1088         raise exc from None
1089
1090     if isinstance(result, Leaf):
1091         result = Node(syms.file_input, [result])
1092     return result
1093
1094
1095 def lib2to3_unparse(node: Node) -> str:
1096     """Given a lib2to3 node, return its string representation."""
1097     code = str(node)
1098     return code
1099
1100
1101 class Visitor(Generic[T]):
1102     """Basic lib2to3 visitor that yields things of type `T` on `visit()`."""
1103
1104     def visit(self, node: LN) -> Iterator[T]:
1105         """Main method to visit `node` and its children.
1106
1107         It tries to find a `visit_*()` method for the given `node.type`, like
1108         `visit_simple_stmt` for Node objects or `visit_INDENT` for Leaf objects.
1109         If no dedicated `visit_*()` method is found, chooses `visit_default()`
1110         instead.
1111
1112         Then yields objects of type `T` from the selected visitor.
1113         """
1114         if node.type < 256:
1115             name = token.tok_name[node.type]
1116         else:
1117             name = str(type_repr(node.type))
1118         # We explicitly branch on whether a visitor exists (instead of
1119         # using self.visit_default as the default arg to getattr) in order
1120         # to save needing to create a bound method object and so mypyc can
1121         # generate a native call to visit_default.
1122         visitf = getattr(self, f"visit_{name}", None)
1123         if visitf:
1124             yield from visitf(node)
1125         else:
1126             yield from self.visit_default(node)
1127
1128     def visit_default(self, node: LN) -> Iterator[T]:
1129         """Default `visit_*()` implementation. Recurses to children of `node`."""
1130         if isinstance(node, Node):
1131             for child in node.children:
1132                 yield from self.visit(child)
1133
1134
1135 @dataclass
1136 class DebugVisitor(Visitor[T]):
1137     tree_depth: int = 0
1138
1139     def visit_default(self, node: LN) -> Iterator[T]:
1140         indent = " " * (2 * self.tree_depth)
1141         if isinstance(node, Node):
1142             _type = type_repr(node.type)
1143             out(f"{indent}{_type}", fg="yellow")
1144             self.tree_depth += 1
1145             for child in node.children:
1146                 yield from self.visit(child)
1147
1148             self.tree_depth -= 1
1149             out(f"{indent}/{_type}", fg="yellow", bold=False)
1150         else:
1151             _type = token.tok_name.get(node.type, str(node.type))
1152             out(f"{indent}{_type}", fg="blue", nl=False)
1153             if node.prefix:
1154                 # We don't have to handle prefixes for `Node` objects since
1155                 # that delegates to the first child anyway.
1156                 out(f" {node.prefix!r}", fg="green", bold=False, nl=False)
1157             out(f" {node.value!r}", fg="blue", bold=False)
1158
1159     @classmethod
1160     def show(cls, code: Union[str, Leaf, Node]) -> None:
1161         """Pretty-print the lib2to3 AST of a given string of `code`.
1162
1163         Convenience method for debugging.
1164         """
1165         v: DebugVisitor[None] = DebugVisitor()
1166         if isinstance(code, str):
1167             code = lib2to3_parse(code)
1168         list(v.visit(code))
1169
1170
1171 WHITESPACE: Final = {token.DEDENT, token.INDENT, token.NEWLINE}
1172 STATEMENT: Final = {
1173     syms.if_stmt,
1174     syms.while_stmt,
1175     syms.for_stmt,
1176     syms.try_stmt,
1177     syms.except_clause,
1178     syms.with_stmt,
1179     syms.funcdef,
1180     syms.classdef,
1181 }
1182 STANDALONE_COMMENT: Final = 153
1183 token.tok_name[STANDALONE_COMMENT] = "STANDALONE_COMMENT"
1184 LOGIC_OPERATORS: Final = {"and", "or"}
1185 COMPARATORS: Final = {
1186     token.LESS,
1187     token.GREATER,
1188     token.EQEQUAL,
1189     token.NOTEQUAL,
1190     token.LESSEQUAL,
1191     token.GREATEREQUAL,
1192 }
1193 MATH_OPERATORS: Final = {
1194     token.VBAR,
1195     token.CIRCUMFLEX,
1196     token.AMPER,
1197     token.LEFTSHIFT,
1198     token.RIGHTSHIFT,
1199     token.PLUS,
1200     token.MINUS,
1201     token.STAR,
1202     token.SLASH,
1203     token.DOUBLESLASH,
1204     token.PERCENT,
1205     token.AT,
1206     token.TILDE,
1207     token.DOUBLESTAR,
1208 }
1209 STARS: Final = {token.STAR, token.DOUBLESTAR}
1210 VARARGS_SPECIALS: Final = STARS | {token.SLASH}
1211 VARARGS_PARENTS: Final = {
1212     syms.arglist,
1213     syms.argument,  # double star in arglist
1214     syms.trailer,  # single argument to call
1215     syms.typedargslist,
1216     syms.varargslist,  # lambdas
1217 }
1218 UNPACKING_PARENTS: Final = {
1219     syms.atom,  # single element of a list or set literal
1220     syms.dictsetmaker,
1221     syms.listmaker,
1222     syms.testlist_gexp,
1223     syms.testlist_star_expr,
1224 }
1225 TEST_DESCENDANTS: Final = {
1226     syms.test,
1227     syms.lambdef,
1228     syms.or_test,
1229     syms.and_test,
1230     syms.not_test,
1231     syms.comparison,
1232     syms.star_expr,
1233     syms.expr,
1234     syms.xor_expr,
1235     syms.and_expr,
1236     syms.shift_expr,
1237     syms.arith_expr,
1238     syms.trailer,
1239     syms.term,
1240     syms.power,
1241 }
1242 ASSIGNMENTS: Final = {
1243     "=",
1244     "+=",
1245     "-=",
1246     "*=",
1247     "@=",
1248     "/=",
1249     "%=",
1250     "&=",
1251     "|=",
1252     "^=",
1253     "<<=",
1254     ">>=",
1255     "**=",
1256     "//=",
1257 }
1258 COMPREHENSION_PRIORITY: Final = 20
1259 COMMA_PRIORITY: Final = 18
1260 TERNARY_PRIORITY: Final = 16
1261 LOGIC_PRIORITY: Final = 14
1262 STRING_PRIORITY: Final = 12
1263 COMPARATOR_PRIORITY: Final = 10
1264 MATH_PRIORITIES: Final = {
1265     token.VBAR: 9,
1266     token.CIRCUMFLEX: 8,
1267     token.AMPER: 7,
1268     token.LEFTSHIFT: 6,
1269     token.RIGHTSHIFT: 6,
1270     token.PLUS: 5,
1271     token.MINUS: 5,
1272     token.STAR: 4,
1273     token.SLASH: 4,
1274     token.DOUBLESLASH: 4,
1275     token.PERCENT: 4,
1276     token.AT: 4,
1277     token.TILDE: 3,
1278     token.DOUBLESTAR: 2,
1279 }
1280 DOT_PRIORITY: Final = 1
1281
1282
1283 @dataclass
1284 class BracketTracker:
1285     """Keeps track of brackets on a line."""
1286
1287     depth: int = 0
1288     bracket_match: Dict[Tuple[Depth, NodeType], Leaf] = field(default_factory=dict)
1289     delimiters: Dict[LeafID, Priority] = field(default_factory=dict)
1290     previous: Optional[Leaf] = None
1291     _for_loop_depths: List[int] = field(default_factory=list)
1292     _lambda_argument_depths: List[int] = field(default_factory=list)
1293     invisible: List[Leaf] = field(default_factory=list)
1294
1295     def mark(self, leaf: Leaf) -> None:
1296         """Mark `leaf` with bracket-related metadata. Keep track of delimiters.
1297
1298         All leaves receive an int `bracket_depth` field that stores how deep
1299         within brackets a given leaf is. 0 means there are no enclosing brackets
1300         that started on this line.
1301
1302         If a leaf is itself a closing bracket, it receives an `opening_bracket`
1303         field that it forms a pair with. This is a one-directional link to
1304         avoid reference cycles.
1305
1306         If a leaf is a delimiter (a token on which Black can split the line if
1307         needed) and it's on depth 0, its `id()` is stored in the tracker's
1308         `delimiters` field.
1309         """
1310         if leaf.type == token.COMMENT:
1311             return
1312
1313         self.maybe_decrement_after_for_loop_variable(leaf)
1314         self.maybe_decrement_after_lambda_arguments(leaf)
1315         if leaf.type in CLOSING_BRACKETS:
1316             self.depth -= 1
1317             try:
1318                 opening_bracket = self.bracket_match.pop((self.depth, leaf.type))
1319             except KeyError as e:
1320                 raise BracketMatchError(
1321                     "Unable to match a closing bracket to the following opening"
1322                     f" bracket: {leaf}"
1323                 ) from e
1324             leaf.opening_bracket = opening_bracket
1325             if not leaf.value:
1326                 self.invisible.append(leaf)
1327         leaf.bracket_depth = self.depth
1328         if self.depth == 0:
1329             delim = is_split_before_delimiter(leaf, self.previous)
1330             if delim and self.previous is not None:
1331                 self.delimiters[id(self.previous)] = delim
1332             else:
1333                 delim = is_split_after_delimiter(leaf, self.previous)
1334                 if delim:
1335                     self.delimiters[id(leaf)] = delim
1336         if leaf.type in OPENING_BRACKETS:
1337             self.bracket_match[self.depth, BRACKET[leaf.type]] = leaf
1338             self.depth += 1
1339             if not leaf.value:
1340                 self.invisible.append(leaf)
1341         self.previous = leaf
1342         self.maybe_increment_lambda_arguments(leaf)
1343         self.maybe_increment_for_loop_variable(leaf)
1344
1345     def any_open_brackets(self) -> bool:
1346         """Return True if there is an yet unmatched open bracket on the line."""
1347         return bool(self.bracket_match)
1348
1349     def max_delimiter_priority(self, exclude: Iterable[LeafID] = ()) -> Priority:
1350         """Return the highest priority of a delimiter found on the line.
1351
1352         Values are consistent with what `is_split_*_delimiter()` return.
1353         Raises ValueError on no delimiters.
1354         """
1355         return max(v for k, v in self.delimiters.items() if k not in exclude)
1356
1357     def delimiter_count_with_priority(self, priority: Priority = 0) -> int:
1358         """Return the number of delimiters with the given `priority`.
1359
1360         If no `priority` is passed, defaults to max priority on the line.
1361         """
1362         if not self.delimiters:
1363             return 0
1364
1365         priority = priority or self.max_delimiter_priority()
1366         return sum(1 for p in self.delimiters.values() if p == priority)
1367
1368     def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool:
1369         """In a for loop, or comprehension, the variables are often unpacks.
1370
1371         To avoid splitting on the comma in this situation, increase the depth of
1372         tokens between `for` and `in`.
1373         """
1374         if leaf.type == token.NAME and leaf.value == "for":
1375             self.depth += 1
1376             self._for_loop_depths.append(self.depth)
1377             return True
1378
1379         return False
1380
1381     def maybe_decrement_after_for_loop_variable(self, leaf: Leaf) -> bool:
1382         """See `maybe_increment_for_loop_variable` above for explanation."""
1383         if (
1384             self._for_loop_depths
1385             and self._for_loop_depths[-1] == self.depth
1386             and leaf.type == token.NAME
1387             and leaf.value == "in"
1388         ):
1389             self.depth -= 1
1390             self._for_loop_depths.pop()
1391             return True
1392
1393         return False
1394
1395     def maybe_increment_lambda_arguments(self, leaf: Leaf) -> bool:
1396         """In a lambda expression, there might be more than one argument.
1397
1398         To avoid splitting on the comma in this situation, increase the depth of
1399         tokens between `lambda` and `:`.
1400         """
1401         if leaf.type == token.NAME and leaf.value == "lambda":
1402             self.depth += 1
1403             self._lambda_argument_depths.append(self.depth)
1404             return True
1405
1406         return False
1407
1408     def maybe_decrement_after_lambda_arguments(self, leaf: Leaf) -> bool:
1409         """See `maybe_increment_lambda_arguments` above for explanation."""
1410         if (
1411             self._lambda_argument_depths
1412             and self._lambda_argument_depths[-1] == self.depth
1413             and leaf.type == token.COLON
1414         ):
1415             self.depth -= 1
1416             self._lambda_argument_depths.pop()
1417             return True
1418
1419         return False
1420
1421     def get_open_lsqb(self) -> Optional[Leaf]:
1422         """Return the most recent opening square bracket (if any)."""
1423         return self.bracket_match.get((self.depth - 1, token.RSQB))
1424
1425
1426 @dataclass
1427 class Line:
1428     """Holds leaves and comments. Can be printed with `str(line)`."""
1429
1430     depth: int = 0
1431     leaves: List[Leaf] = field(default_factory=list)
1432     # keys ordered like `leaves`
1433     comments: Dict[LeafID, List[Leaf]] = field(default_factory=dict)
1434     bracket_tracker: BracketTracker = field(default_factory=BracketTracker)
1435     inside_brackets: bool = False
1436     should_explode: bool = False
1437
1438     def append(self, leaf: Leaf, preformatted: bool = False) -> None:
1439         """Add a new `leaf` to the end of the line.
1440
1441         Unless `preformatted` is True, the `leaf` will receive a new consistent
1442         whitespace prefix and metadata applied by :class:`BracketTracker`.
1443         Trailing commas are maybe removed, unpacked for loop variables are
1444         demoted from being delimiters.
1445
1446         Inline comments are put aside.
1447         """
1448         has_value = leaf.type in BRACKETS or bool(leaf.value.strip())
1449         if not has_value:
1450             return
1451
1452         if token.COLON == leaf.type and self.is_class_paren_empty:
1453             del self.leaves[-2:]
1454         if self.leaves and not preformatted:
1455             # Note: at this point leaf.prefix should be empty except for
1456             # imports, for which we only preserve newlines.
1457             leaf.prefix += whitespace(
1458                 leaf, complex_subscript=self.is_complex_subscript(leaf)
1459             )
1460         if self.inside_brackets or not preformatted:
1461             self.bracket_tracker.mark(leaf)
1462             if self.maybe_should_explode(leaf):
1463                 self.should_explode = True
1464         if not self.append_comment(leaf):
1465             self.leaves.append(leaf)
1466
1467     def append_safe(self, leaf: Leaf, preformatted: bool = False) -> None:
1468         """Like :func:`append()` but disallow invalid standalone comment structure.
1469
1470         Raises ValueError when any `leaf` is appended after a standalone comment
1471         or when a standalone comment is not the first leaf on the line.
1472         """
1473         if self.bracket_tracker.depth == 0:
1474             if self.is_comment:
1475                 raise ValueError("cannot append to standalone comments")
1476
1477             if self.leaves and leaf.type == STANDALONE_COMMENT:
1478                 raise ValueError(
1479                     "cannot append standalone comments to a populated line"
1480                 )
1481
1482         self.append(leaf, preformatted=preformatted)
1483
1484     @property
1485     def is_comment(self) -> bool:
1486         """Is this line a standalone comment?"""
1487         return len(self.leaves) == 1 and self.leaves[0].type == STANDALONE_COMMENT
1488
1489     @property
1490     def is_decorator(self) -> bool:
1491         """Is this line a decorator?"""
1492         return bool(self) and self.leaves[0].type == token.AT
1493
1494     @property
1495     def is_import(self) -> bool:
1496         """Is this an import line?"""
1497         return bool(self) and is_import(self.leaves[0])
1498
1499     @property
1500     def is_class(self) -> bool:
1501         """Is this line a class definition?"""
1502         return (
1503             bool(self)
1504             and self.leaves[0].type == token.NAME
1505             and self.leaves[0].value == "class"
1506         )
1507
1508     @property
1509     def is_stub_class(self) -> bool:
1510         """Is this line a class definition with a body consisting only of "..."?"""
1511         return self.is_class and self.leaves[-3:] == [
1512             Leaf(token.DOT, ".") for _ in range(3)
1513         ]
1514
1515     @property
1516     def is_def(self) -> bool:
1517         """Is this a function definition? (Also returns True for async defs.)"""
1518         try:
1519             first_leaf = self.leaves[0]
1520         except IndexError:
1521             return False
1522
1523         try:
1524             second_leaf: Optional[Leaf] = self.leaves[1]
1525         except IndexError:
1526             second_leaf = None
1527         return (first_leaf.type == token.NAME and first_leaf.value == "def") or (
1528             first_leaf.type == token.ASYNC
1529             and second_leaf is not None
1530             and second_leaf.type == token.NAME
1531             and second_leaf.value == "def"
1532         )
1533
1534     @property
1535     def is_class_paren_empty(self) -> bool:
1536         """Is this a class with no base classes but using parentheses?
1537
1538         Those are unnecessary and should be removed.
1539         """
1540         return (
1541             bool(self)
1542             and len(self.leaves) == 4
1543             and self.is_class
1544             and self.leaves[2].type == token.LPAR
1545             and self.leaves[2].value == "("
1546             and self.leaves[3].type == token.RPAR
1547             and self.leaves[3].value == ")"
1548         )
1549
1550     @property
1551     def is_triple_quoted_string(self) -> bool:
1552         """Is the line a triple quoted string?"""
1553         return (
1554             bool(self)
1555             and self.leaves[0].type == token.STRING
1556             and self.leaves[0].value.startswith(('"""', "'''"))
1557         )
1558
1559     def contains_standalone_comments(self, depth_limit: int = sys.maxsize) -> bool:
1560         """If so, needs to be split before emitting."""
1561         for leaf in self.leaves:
1562             if leaf.type == STANDALONE_COMMENT and leaf.bracket_depth <= depth_limit:
1563                 return True
1564
1565         return False
1566
1567     def contains_uncollapsable_type_comments(self) -> bool:
1568         ignored_ids = set()
1569         try:
1570             last_leaf = self.leaves[-1]
1571             ignored_ids.add(id(last_leaf))
1572             if last_leaf.type == token.COMMA or (
1573                 last_leaf.type == token.RPAR and not last_leaf.value
1574             ):
1575                 # When trailing commas or optional parens are inserted by Black for
1576                 # consistency, comments after the previous last element are not moved
1577                 # (they don't have to, rendering will still be correct).  So we ignore
1578                 # trailing commas and invisible.
1579                 last_leaf = self.leaves[-2]
1580                 ignored_ids.add(id(last_leaf))
1581         except IndexError:
1582             return False
1583
1584         # A type comment is uncollapsable if it is attached to a leaf
1585         # that isn't at the end of the line (since that could cause it
1586         # to get associated to a different argument) or if there are
1587         # comments before it (since that could cause it to get hidden
1588         # behind a comment.
1589         comment_seen = False
1590         for leaf_id, comments in self.comments.items():
1591             for comment in comments:
1592                 if is_type_comment(comment):
1593                     if comment_seen or (
1594                         not is_type_comment(comment, " ignore")
1595                         and leaf_id not in ignored_ids
1596                     ):
1597                         return True
1598
1599                 comment_seen = True
1600
1601         return False
1602
1603     def contains_unsplittable_type_ignore(self) -> bool:
1604         if not self.leaves:
1605             return False
1606
1607         # If a 'type: ignore' is attached to the end of a line, we
1608         # can't split the line, because we can't know which of the
1609         # subexpressions the ignore was meant to apply to.
1610         #
1611         # We only want this to apply to actual physical lines from the
1612         # original source, though: we don't want the presence of a
1613         # 'type: ignore' at the end of a multiline expression to
1614         # justify pushing it all onto one line. Thus we
1615         # (unfortunately) need to check the actual source lines and
1616         # only report an unsplittable 'type: ignore' if this line was
1617         # one line in the original code.
1618
1619         # Grab the first and last line numbers, skipping generated leaves
1620         first_line = next((leaf.lineno for leaf in self.leaves if leaf.lineno != 0), 0)
1621         last_line = next(
1622             (leaf.lineno for leaf in reversed(self.leaves) if leaf.lineno != 0), 0
1623         )
1624
1625         if first_line == last_line:
1626             # We look at the last two leaves since a comma or an
1627             # invisible paren could have been added at the end of the
1628             # line.
1629             for node in self.leaves[-2:]:
1630                 for comment in self.comments.get(id(node), []):
1631                     if is_type_comment(comment, " ignore"):
1632                         return True
1633
1634         return False
1635
1636     def contains_multiline_strings(self) -> bool:
1637         return any(is_multiline_string(leaf) for leaf in self.leaves)
1638
1639     def maybe_should_explode(self, closing: Leaf) -> bool:
1640         """Return True if this line should explode (always be split), that is when:
1641         - there's a trailing comma here; and
1642         - it's not a one-tuple.
1643         """
1644         if not (
1645             closing.type in CLOSING_BRACKETS
1646             and self.leaves
1647             and self.leaves[-1].type == token.COMMA
1648         ):
1649             return False
1650
1651         if closing.type in {token.RBRACE, token.RSQB}:
1652             return True
1653
1654         if self.is_import:
1655             return True
1656
1657         if not is_one_tuple_between(closing.opening_bracket, closing, self.leaves):
1658             return True
1659
1660         return False
1661
1662     def append_comment(self, comment: Leaf) -> bool:
1663         """Add an inline or standalone comment to the line."""
1664         if (
1665             comment.type == STANDALONE_COMMENT
1666             and self.bracket_tracker.any_open_brackets()
1667         ):
1668             comment.prefix = ""
1669             return False
1670
1671         if comment.type != token.COMMENT:
1672             return False
1673
1674         if not self.leaves:
1675             comment.type = STANDALONE_COMMENT
1676             comment.prefix = ""
1677             return False
1678
1679         last_leaf = self.leaves[-1]
1680         if (
1681             last_leaf.type == token.RPAR
1682             and not last_leaf.value
1683             and last_leaf.parent
1684             and len(list(last_leaf.parent.leaves())) <= 3
1685             and not is_type_comment(comment)
1686         ):
1687             # Comments on an optional parens wrapping a single leaf should belong to
1688             # the wrapped node except if it's a type comment. Pinning the comment like
1689             # this avoids unstable formatting caused by comment migration.
1690             if len(self.leaves) < 2:
1691                 comment.type = STANDALONE_COMMENT
1692                 comment.prefix = ""
1693                 return False
1694
1695             last_leaf = self.leaves[-2]
1696         self.comments.setdefault(id(last_leaf), []).append(comment)
1697         return True
1698
1699     def comments_after(self, leaf: Leaf) -> List[Leaf]:
1700         """Generate comments that should appear directly after `leaf`."""
1701         return self.comments.get(id(leaf), [])
1702
1703     def remove_trailing_comma(self) -> None:
1704         """Remove the trailing comma and moves the comments attached to it."""
1705         trailing_comma = self.leaves.pop()
1706         trailing_comma_comments = self.comments.pop(id(trailing_comma), [])
1707         self.comments.setdefault(id(self.leaves[-1]), []).extend(
1708             trailing_comma_comments
1709         )
1710
1711     def is_complex_subscript(self, leaf: Leaf) -> bool:
1712         """Return True iff `leaf` is part of a slice with non-trivial exprs."""
1713         open_lsqb = self.bracket_tracker.get_open_lsqb()
1714         if open_lsqb is None:
1715             return False
1716
1717         subscript_start = open_lsqb.next_sibling
1718
1719         if isinstance(subscript_start, Node):
1720             if subscript_start.type == syms.listmaker:
1721                 return False
1722
1723             if subscript_start.type == syms.subscriptlist:
1724                 subscript_start = child_towards(subscript_start, leaf)
1725         return subscript_start is not None and any(
1726             n.type in TEST_DESCENDANTS for n in subscript_start.pre_order()
1727         )
1728
1729     def clone(self) -> "Line":
1730         return Line(
1731             depth=self.depth,
1732             inside_brackets=self.inside_brackets,
1733             should_explode=self.should_explode,
1734         )
1735
1736     def __str__(self) -> str:
1737         """Render the line."""
1738         if not self:
1739             return "\n"
1740
1741         indent = "    " * self.depth
1742         leaves = iter(self.leaves)
1743         first = next(leaves)
1744         res = f"{first.prefix}{indent}{first.value}"
1745         for leaf in leaves:
1746             res += str(leaf)
1747         for comment in itertools.chain.from_iterable(self.comments.values()):
1748             res += str(comment)
1749
1750         return res + "\n"
1751
1752     def __bool__(self) -> bool:
1753         """Return True if the line has leaves or comments."""
1754         return bool(self.leaves or self.comments)
1755
1756
1757 @dataclass
1758 class EmptyLineTracker:
1759     """Provides a stateful method that returns the number of potential extra
1760     empty lines needed before and after the currently processed line.
1761
1762     Note: this tracker works on lines that haven't been split yet.  It assumes
1763     the prefix of the first leaf consists of optional newlines.  Those newlines
1764     are consumed by `maybe_empty_lines()` and included in the computation.
1765     """
1766
1767     is_pyi: bool = False
1768     previous_line: Optional[Line] = None
1769     previous_after: int = 0
1770     previous_defs: List[int] = field(default_factory=list)
1771
1772     def maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
1773         """Return the number of extra empty lines before and after the `current_line`.
1774
1775         This is for separating `def`, `async def` and `class` with extra empty
1776         lines (two on module-level).
1777         """
1778         before, after = self._maybe_empty_lines(current_line)
1779         before = (
1780             # Black should not insert empty lines at the beginning
1781             # of the file
1782             0
1783             if self.previous_line is None
1784             else before - self.previous_after
1785         )
1786         self.previous_after = after
1787         self.previous_line = current_line
1788         return before, after
1789
1790     def _maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
1791         max_allowed = 1
1792         if current_line.depth == 0:
1793             max_allowed = 1 if self.is_pyi else 2
1794         if current_line.leaves:
1795             # Consume the first leaf's extra newlines.
1796             first_leaf = current_line.leaves[0]
1797             before = first_leaf.prefix.count("\n")
1798             before = min(before, max_allowed)
1799             first_leaf.prefix = ""
1800         else:
1801             before = 0
1802         depth = current_line.depth
1803         while self.previous_defs and self.previous_defs[-1] >= depth:
1804             self.previous_defs.pop()
1805             if self.is_pyi:
1806                 before = 0 if depth else 1
1807             else:
1808                 before = 1 if depth else 2
1809         if current_line.is_decorator or current_line.is_def or current_line.is_class:
1810             return self._maybe_empty_lines_for_class_or_def(current_line, before)
1811
1812         if (
1813             self.previous_line
1814             and self.previous_line.is_import
1815             and not current_line.is_import
1816             and depth == self.previous_line.depth
1817         ):
1818             return (before or 1), 0
1819
1820         if (
1821             self.previous_line
1822             and self.previous_line.is_class
1823             and current_line.is_triple_quoted_string
1824         ):
1825             return before, 1
1826
1827         return before, 0
1828
1829     def _maybe_empty_lines_for_class_or_def(
1830         self, current_line: Line, before: int
1831     ) -> Tuple[int, int]:
1832         if not current_line.is_decorator:
1833             self.previous_defs.append(current_line.depth)
1834         if self.previous_line is None:
1835             # Don't insert empty lines before the first line in the file.
1836             return 0, 0
1837
1838         if self.previous_line.is_decorator:
1839             if self.is_pyi and current_line.is_stub_class:
1840                 # Insert an empty line after a decorated stub class
1841                 return 0, 1
1842
1843             return 0, 0
1844
1845         if self.previous_line.depth < current_line.depth and (
1846             self.previous_line.is_class or self.previous_line.is_def
1847         ):
1848             return 0, 0
1849
1850         if (
1851             self.previous_line.is_comment
1852             and self.previous_line.depth == current_line.depth
1853             and before == 0
1854         ):
1855             return 0, 0
1856
1857         if self.is_pyi:
1858             if self.previous_line.depth > current_line.depth:
1859                 newlines = 1
1860             elif current_line.is_class or self.previous_line.is_class:
1861                 if current_line.is_stub_class and self.previous_line.is_stub_class:
1862                     # No blank line between classes with an empty body
1863                     newlines = 0
1864                 else:
1865                     newlines = 1
1866             elif (
1867                 current_line.is_def or current_line.is_decorator
1868             ) and not self.previous_line.is_def:
1869                 # Blank line between a block of functions (maybe with preceding
1870                 # decorators) and a block of non-functions
1871                 newlines = 1
1872             else:
1873                 newlines = 0
1874         else:
1875             newlines = 2
1876         if current_line.depth and newlines:
1877             newlines -= 1
1878         return newlines, 0
1879
1880
1881 @dataclass
1882 class LineGenerator(Visitor[Line]):
1883     """Generates reformatted Line objects.  Empty lines are not emitted.
1884
1885     Note: destroys the tree it's visiting by mutating prefixes of its leaves
1886     in ways that will no longer stringify to valid Python code on the tree.
1887     """
1888
1889     is_pyi: bool = False
1890     normalize_strings: bool = True
1891     current_line: Line = field(default_factory=Line)
1892     remove_u_prefix: bool = False
1893
1894     def line(self, indent: int = 0) -> Iterator[Line]:
1895         """Generate a line.
1896
1897         If the line is empty, only emit if it makes sense.
1898         If the line is too long, split it first and then generate.
1899
1900         If any lines were generated, set up a new current_line.
1901         """
1902         if not self.current_line:
1903             self.current_line.depth += indent
1904             return  # Line is empty, don't emit. Creating a new one unnecessary.
1905
1906         complete_line = self.current_line
1907         self.current_line = Line(depth=complete_line.depth + indent)
1908         yield complete_line
1909
1910     def visit_default(self, node: LN) -> Iterator[Line]:
1911         """Default `visit_*()` implementation. Recurses to children of `node`."""
1912         if isinstance(node, Leaf):
1913             any_open_brackets = self.current_line.bracket_tracker.any_open_brackets()
1914             for comment in generate_comments(node):
1915                 if any_open_brackets:
1916                     # any comment within brackets is subject to splitting
1917                     self.current_line.append(comment)
1918                 elif comment.type == token.COMMENT:
1919                     # regular trailing comment
1920                     self.current_line.append(comment)
1921                     yield from self.line()
1922
1923                 else:
1924                     # regular standalone comment
1925                     yield from self.line()
1926
1927                     self.current_line.append(comment)
1928                     yield from self.line()
1929
1930             normalize_prefix(node, inside_brackets=any_open_brackets)
1931             if self.normalize_strings and node.type == token.STRING:
1932                 normalize_string_prefix(node, remove_u_prefix=self.remove_u_prefix)
1933                 normalize_string_quotes(node)
1934             if node.type == token.NUMBER:
1935                 normalize_numeric_literal(node)
1936             if node.type not in WHITESPACE:
1937                 self.current_line.append(node)
1938         yield from super().visit_default(node)
1939
1940     def visit_INDENT(self, node: Leaf) -> Iterator[Line]:
1941         """Increase indentation level, maybe yield a line."""
1942         # In blib2to3 INDENT never holds comments.
1943         yield from self.line(+1)
1944         yield from self.visit_default(node)
1945
1946     def visit_DEDENT(self, node: Leaf) -> Iterator[Line]:
1947         """Decrease indentation level, maybe yield a line."""
1948         # The current line might still wait for trailing comments.  At DEDENT time
1949         # there won't be any (they would be prefixes on the preceding NEWLINE).
1950         # Emit the line then.
1951         yield from self.line()
1952
1953         # While DEDENT has no value, its prefix may contain standalone comments
1954         # that belong to the current indentation level.  Get 'em.
1955         yield from self.visit_default(node)
1956
1957         # Finally, emit the dedent.
1958         yield from self.line(-1)
1959
1960     def visit_stmt(
1961         self, node: Node, keywords: Set[str], parens: Set[str]
1962     ) -> Iterator[Line]:
1963         """Visit a statement.
1964
1965         This implementation is shared for `if`, `while`, `for`, `try`, `except`,
1966         `def`, `with`, `class`, `assert` and assignments.
1967
1968         The relevant Python language `keywords` for a given statement will be
1969         NAME leaves within it. This methods puts those on a separate line.
1970
1971         `parens` holds a set of string leaf values immediately after which
1972         invisible parens should be put.
1973         """
1974         normalize_invisible_parens(node, parens_after=parens)
1975         for child in node.children:
1976             if child.type == token.NAME and child.value in keywords:  # type: ignore
1977                 yield from self.line()
1978
1979             yield from self.visit(child)
1980
1981     def visit_suite(self, node: Node) -> Iterator[Line]:
1982         """Visit a suite."""
1983         if self.is_pyi and is_stub_suite(node):
1984             yield from self.visit(node.children[2])
1985         else:
1986             yield from self.visit_default(node)
1987
1988     def visit_simple_stmt(self, node: Node) -> Iterator[Line]:
1989         """Visit a statement without nested statements."""
1990         is_suite_like = node.parent and node.parent.type in STATEMENT
1991         if is_suite_like:
1992             if self.is_pyi and is_stub_body(node):
1993                 yield from self.visit_default(node)
1994             else:
1995                 yield from self.line(+1)
1996                 yield from self.visit_default(node)
1997                 yield from self.line(-1)
1998
1999         else:
2000             if not self.is_pyi or not node.parent or not is_stub_suite(node.parent):
2001                 yield from self.line()
2002             yield from self.visit_default(node)
2003
2004     def visit_async_stmt(self, node: Node) -> Iterator[Line]:
2005         """Visit `async def`, `async for`, `async with`."""
2006         yield from self.line()
2007
2008         children = iter(node.children)
2009         for child in children:
2010             yield from self.visit(child)
2011
2012             if child.type == token.ASYNC:
2013                 break
2014
2015         internal_stmt = next(children)
2016         for child in internal_stmt.children:
2017             yield from self.visit(child)
2018
2019     def visit_decorators(self, node: Node) -> Iterator[Line]:
2020         """Visit decorators."""
2021         for child in node.children:
2022             yield from self.line()
2023             yield from self.visit(child)
2024
2025     def visit_SEMI(self, leaf: Leaf) -> Iterator[Line]:
2026         """Remove a semicolon and put the other statement on a separate line."""
2027         yield from self.line()
2028
2029     def visit_ENDMARKER(self, leaf: Leaf) -> Iterator[Line]:
2030         """End of file. Process outstanding comments and end with a newline."""
2031         yield from self.visit_default(leaf)
2032         yield from self.line()
2033
2034     def visit_STANDALONE_COMMENT(self, leaf: Leaf) -> Iterator[Line]:
2035         if not self.current_line.bracket_tracker.any_open_brackets():
2036             yield from self.line()
2037         yield from self.visit_default(leaf)
2038
2039     def visit_factor(self, node: Node) -> Iterator[Line]:
2040         """Force parentheses between a unary op and a binary power:
2041
2042         -2 ** 8 -> -(2 ** 8)
2043         """
2044         _operator, operand = node.children
2045         if (
2046             operand.type == syms.power
2047             and len(operand.children) == 3
2048             and operand.children[1].type == token.DOUBLESTAR
2049         ):
2050             lpar = Leaf(token.LPAR, "(")
2051             rpar = Leaf(token.RPAR, ")")
2052             index = operand.remove() or 0
2053             node.insert_child(index, Node(syms.atom, [lpar, operand, rpar]))
2054         yield from self.visit_default(node)
2055
2056     def visit_STRING(self, leaf: Leaf) -> Iterator[Line]:
2057         if is_docstring(leaf) and "\\\n" not in leaf.value:
2058             # We're ignoring docstrings with backslash newline escapes because changing
2059             # indentation of those changes the AST representation of the code.
2060             prefix = get_string_prefix(leaf.value)
2061             lead_len = len(prefix) + 3
2062             tail_len = -3
2063             indent = " " * 4 * self.current_line.depth
2064             docstring = fix_docstring(leaf.value[lead_len:tail_len], indent)
2065             if docstring:
2066                 if leaf.value[lead_len - 1] == docstring[0]:
2067                     docstring = " " + docstring
2068                 if leaf.value[tail_len + 1] == docstring[-1]:
2069                     docstring = docstring + " "
2070             leaf.value = leaf.value[0:lead_len] + docstring + leaf.value[tail_len:]
2071
2072         yield from self.visit_default(leaf)
2073
2074     def __post_init__(self) -> None:
2075         """You are in a twisty little maze of passages."""
2076         v = self.visit_stmt
2077         Ø: Set[str] = set()
2078         self.visit_assert_stmt = partial(v, keywords={"assert"}, parens={"assert", ","})
2079         self.visit_if_stmt = partial(
2080             v, keywords={"if", "else", "elif"}, parens={"if", "elif"}
2081         )
2082         self.visit_while_stmt = partial(v, keywords={"while", "else"}, parens={"while"})
2083         self.visit_for_stmt = partial(v, keywords={"for", "else"}, parens={"for", "in"})
2084         self.visit_try_stmt = partial(
2085             v, keywords={"try", "except", "else", "finally"}, parens=Ø
2086         )
2087         self.visit_except_clause = partial(v, keywords={"except"}, parens=Ø)
2088         self.visit_with_stmt = partial(v, keywords={"with"}, parens=Ø)
2089         self.visit_funcdef = partial(v, keywords={"def"}, parens=Ø)
2090         self.visit_classdef = partial(v, keywords={"class"}, parens=Ø)
2091         self.visit_expr_stmt = partial(v, keywords=Ø, parens=ASSIGNMENTS)
2092         self.visit_return_stmt = partial(v, keywords={"return"}, parens={"return"})
2093         self.visit_import_from = partial(v, keywords=Ø, parens={"import"})
2094         self.visit_del_stmt = partial(v, keywords=Ø, parens={"del"})
2095         self.visit_async_funcdef = self.visit_async_stmt
2096         self.visit_decorated = self.visit_decorators
2097
2098
2099 IMPLICIT_TUPLE = {syms.testlist, syms.testlist_star_expr, syms.exprlist}
2100 BRACKET = {token.LPAR: token.RPAR, token.LSQB: token.RSQB, token.LBRACE: token.RBRACE}
2101 OPENING_BRACKETS = set(BRACKET.keys())
2102 CLOSING_BRACKETS = set(BRACKET.values())
2103 BRACKETS = OPENING_BRACKETS | CLOSING_BRACKETS
2104 ALWAYS_NO_SPACE = CLOSING_BRACKETS | {token.COMMA, STANDALONE_COMMENT}
2105
2106
2107 def whitespace(leaf: Leaf, *, complex_subscript: bool) -> str:  # noqa: C901
2108     """Return whitespace prefix if needed for the given `leaf`.
2109
2110     `complex_subscript` signals whether the given leaf is part of a subscription
2111     which has non-trivial arguments, like arithmetic expressions or function calls.
2112     """
2113     NO = ""
2114     SPACE = " "
2115     DOUBLESPACE = "  "
2116     t = leaf.type
2117     p = leaf.parent
2118     v = leaf.value
2119     if t in ALWAYS_NO_SPACE:
2120         return NO
2121
2122     if t == token.COMMENT:
2123         return DOUBLESPACE
2124
2125     assert p is not None, f"INTERNAL ERROR: hand-made leaf without parent: {leaf!r}"
2126     if t == token.COLON and p.type not in {
2127         syms.subscript,
2128         syms.subscriptlist,
2129         syms.sliceop,
2130     }:
2131         return NO
2132
2133     prev = leaf.prev_sibling
2134     if not prev:
2135         prevp = preceding_leaf(p)
2136         if not prevp or prevp.type in OPENING_BRACKETS:
2137             return NO
2138
2139         if t == token.COLON:
2140             if prevp.type == token.COLON:
2141                 return NO
2142
2143             elif prevp.type != token.COMMA and not complex_subscript:
2144                 return NO
2145
2146             return SPACE
2147
2148         if prevp.type == token.EQUAL:
2149             if prevp.parent:
2150                 if prevp.parent.type in {
2151                     syms.arglist,
2152                     syms.argument,
2153                     syms.parameters,
2154                     syms.varargslist,
2155                 }:
2156                     return NO
2157
2158                 elif prevp.parent.type == syms.typedargslist:
2159                     # A bit hacky: if the equal sign has whitespace, it means we
2160                     # previously found it's a typed argument.  So, we're using
2161                     # that, too.
2162                     return prevp.prefix
2163
2164         elif prevp.type in VARARGS_SPECIALS:
2165             if is_vararg(prevp, within=VARARGS_PARENTS | UNPACKING_PARENTS):
2166                 return NO
2167
2168         elif prevp.type == token.COLON:
2169             if prevp.parent and prevp.parent.type in {syms.subscript, syms.sliceop}:
2170                 return SPACE if complex_subscript else NO
2171
2172         elif (
2173             prevp.parent
2174             and prevp.parent.type == syms.factor
2175             and prevp.type in MATH_OPERATORS
2176         ):
2177             return NO
2178
2179         elif (
2180             prevp.type == token.RIGHTSHIFT
2181             and prevp.parent
2182             and prevp.parent.type == syms.shift_expr
2183             and prevp.prev_sibling
2184             and prevp.prev_sibling.type == token.NAME
2185             and prevp.prev_sibling.value == "print"  # type: ignore
2186         ):
2187             # Python 2 print chevron
2188             return NO
2189         elif prevp.type == token.AT and p.parent and p.parent.type == syms.decorator:
2190             # no space in decorators
2191             return NO
2192
2193     elif prev.type in OPENING_BRACKETS:
2194         return NO
2195
2196     if p.type in {syms.parameters, syms.arglist}:
2197         # untyped function signatures or calls
2198         if not prev or prev.type != token.COMMA:
2199             return NO
2200
2201     elif p.type == syms.varargslist:
2202         # lambdas
2203         if prev and prev.type != token.COMMA:
2204             return NO
2205
2206     elif p.type == syms.typedargslist:
2207         # typed function signatures
2208         if not prev:
2209             return NO
2210
2211         if t == token.EQUAL:
2212             if prev.type != syms.tname:
2213                 return NO
2214
2215         elif prev.type == token.EQUAL:
2216             # A bit hacky: if the equal sign has whitespace, it means we
2217             # previously found it's a typed argument.  So, we're using that, too.
2218             return prev.prefix
2219
2220         elif prev.type != token.COMMA:
2221             return NO
2222
2223     elif p.type == syms.tname:
2224         # type names
2225         if not prev:
2226             prevp = preceding_leaf(p)
2227             if not prevp or prevp.type != token.COMMA:
2228                 return NO
2229
2230     elif p.type == syms.trailer:
2231         # attributes and calls
2232         if t == token.LPAR or t == token.RPAR:
2233             return NO
2234
2235         if not prev:
2236             if t == token.DOT:
2237                 prevp = preceding_leaf(p)
2238                 if not prevp or prevp.type != token.NUMBER:
2239                     return NO
2240
2241             elif t == token.LSQB:
2242                 return NO
2243
2244         elif prev.type != token.COMMA:
2245             return NO
2246
2247     elif p.type == syms.argument:
2248         # single argument
2249         if t == token.EQUAL:
2250             return NO
2251
2252         if not prev:
2253             prevp = preceding_leaf(p)
2254             if not prevp or prevp.type == token.LPAR:
2255                 return NO
2256
2257         elif prev.type in {token.EQUAL} | VARARGS_SPECIALS:
2258             return NO
2259
2260     elif p.type == syms.decorator:
2261         # decorators
2262         return NO
2263
2264     elif p.type == syms.dotted_name:
2265         if prev:
2266             return NO
2267
2268         prevp = preceding_leaf(p)
2269         if not prevp or prevp.type == token.AT or prevp.type == token.DOT:
2270             return NO
2271
2272     elif p.type == syms.classdef:
2273         if t == token.LPAR:
2274             return NO
2275
2276         if prev and prev.type == token.LPAR:
2277             return NO
2278
2279     elif p.type in {syms.subscript, syms.sliceop}:
2280         # indexing
2281         if not prev:
2282             assert p.parent is not None, "subscripts are always parented"
2283             if p.parent.type == syms.subscriptlist:
2284                 return SPACE
2285
2286             return NO
2287
2288         elif not complex_subscript:
2289             return NO
2290
2291     elif p.type == syms.atom:
2292         if prev and t == token.DOT:
2293             # dots, but not the first one.
2294             return NO
2295
2296     elif p.type == syms.dictsetmaker:
2297         # dict unpacking
2298         if prev and prev.type == token.DOUBLESTAR:
2299             return NO
2300
2301     elif p.type in {syms.factor, syms.star_expr}:
2302         # unary ops
2303         if not prev:
2304             prevp = preceding_leaf(p)
2305             if not prevp or prevp.type in OPENING_BRACKETS:
2306                 return NO
2307
2308             prevp_parent = prevp.parent
2309             assert prevp_parent is not None
2310             if prevp.type == token.COLON and prevp_parent.type in {
2311                 syms.subscript,
2312                 syms.sliceop,
2313             }:
2314                 return NO
2315
2316             elif prevp.type == token.EQUAL and prevp_parent.type == syms.argument:
2317                 return NO
2318
2319         elif t in {token.NAME, token.NUMBER, token.STRING}:
2320             return NO
2321
2322     elif p.type == syms.import_from:
2323         if t == token.DOT:
2324             if prev and prev.type == token.DOT:
2325                 return NO
2326
2327         elif t == token.NAME:
2328             if v == "import":
2329                 return SPACE
2330
2331             if prev and prev.type == token.DOT:
2332                 return NO
2333
2334     elif p.type == syms.sliceop:
2335         return NO
2336
2337     return SPACE
2338
2339
2340 def preceding_leaf(node: Optional[LN]) -> Optional[Leaf]:
2341     """Return the first leaf that precedes `node`, if any."""
2342     while node:
2343         res = node.prev_sibling
2344         if res:
2345             if isinstance(res, Leaf):
2346                 return res
2347
2348             try:
2349                 return list(res.leaves())[-1]
2350
2351             except IndexError:
2352                 return None
2353
2354         node = node.parent
2355     return None
2356
2357
2358 def prev_siblings_are(node: Optional[LN], tokens: List[Optional[NodeType]]) -> bool:
2359     """Return if the `node` and its previous siblings match types against the provided
2360     list of tokens; the provided `node`has its type matched against the last element in
2361     the list.  `None` can be used as the first element to declare that the start of the
2362     list is anchored at the start of its parent's children."""
2363     if not tokens:
2364         return True
2365     if tokens[-1] is None:
2366         return node is None
2367     if not node:
2368         return False
2369     if node.type != tokens[-1]:
2370         return False
2371     return prev_siblings_are(node.prev_sibling, tokens[:-1])
2372
2373
2374 def child_towards(ancestor: Node, descendant: LN) -> Optional[LN]:
2375     """Return the child of `ancestor` that contains `descendant`."""
2376     node: Optional[LN] = descendant
2377     while node and node.parent != ancestor:
2378         node = node.parent
2379     return node
2380
2381
2382 def container_of(leaf: Leaf) -> LN:
2383     """Return `leaf` or one of its ancestors that is the topmost container of it.
2384
2385     By "container" we mean a node where `leaf` is the very first child.
2386     """
2387     same_prefix = leaf.prefix
2388     container: LN = leaf
2389     while container:
2390         parent = container.parent
2391         if parent is None:
2392             break
2393
2394         if parent.children[0].prefix != same_prefix:
2395             break
2396
2397         if parent.type == syms.file_input:
2398             break
2399
2400         if parent.prev_sibling is not None and parent.prev_sibling.type in BRACKETS:
2401             break
2402
2403         container = parent
2404     return container
2405
2406
2407 def is_split_after_delimiter(leaf: Leaf, previous: Optional[Leaf] = None) -> Priority:
2408     """Return the priority of the `leaf` delimiter, given a line break after it.
2409
2410     The delimiter priorities returned here are from those delimiters that would
2411     cause a line break after themselves.
2412
2413     Higher numbers are higher priority.
2414     """
2415     if leaf.type == token.COMMA:
2416         return COMMA_PRIORITY
2417
2418     return 0
2419
2420
2421 def is_split_before_delimiter(leaf: Leaf, previous: Optional[Leaf] = None) -> Priority:
2422     """Return the priority of the `leaf` delimiter, given a line break before it.
2423
2424     The delimiter priorities returned here are from those delimiters that would
2425     cause a line break before themselves.
2426
2427     Higher numbers are higher priority.
2428     """
2429     if is_vararg(leaf, within=VARARGS_PARENTS | UNPACKING_PARENTS):
2430         # * and ** might also be MATH_OPERATORS but in this case they are not.
2431         # Don't treat them as a delimiter.
2432         return 0
2433
2434     if (
2435         leaf.type == token.DOT
2436         and leaf.parent
2437         and leaf.parent.type not in {syms.import_from, syms.dotted_name}
2438         and (previous is None or previous.type in CLOSING_BRACKETS)
2439     ):
2440         return DOT_PRIORITY
2441
2442     if (
2443         leaf.type in MATH_OPERATORS
2444         and leaf.parent
2445         and leaf.parent.type not in {syms.factor, syms.star_expr}
2446     ):
2447         return MATH_PRIORITIES[leaf.type]
2448
2449     if leaf.type in COMPARATORS:
2450         return COMPARATOR_PRIORITY
2451
2452     if (
2453         leaf.type == token.STRING
2454         and previous is not None
2455         and previous.type == token.STRING
2456     ):
2457         return STRING_PRIORITY
2458
2459     if leaf.type not in {token.NAME, token.ASYNC}:
2460         return 0
2461
2462     if (
2463         leaf.value == "for"
2464         and leaf.parent
2465         and leaf.parent.type in {syms.comp_for, syms.old_comp_for}
2466         or leaf.type == token.ASYNC
2467     ):
2468         if (
2469             not isinstance(leaf.prev_sibling, Leaf)
2470             or leaf.prev_sibling.value != "async"
2471         ):
2472             return COMPREHENSION_PRIORITY
2473
2474     if (
2475         leaf.value == "if"
2476         and leaf.parent
2477         and leaf.parent.type in {syms.comp_if, syms.old_comp_if}
2478     ):
2479         return COMPREHENSION_PRIORITY
2480
2481     if leaf.value in {"if", "else"} and leaf.parent and leaf.parent.type == syms.test:
2482         return TERNARY_PRIORITY
2483
2484     if leaf.value == "is":
2485         return COMPARATOR_PRIORITY
2486
2487     if (
2488         leaf.value == "in"
2489         and leaf.parent
2490         and leaf.parent.type in {syms.comp_op, syms.comparison}
2491         and not (
2492             previous is not None
2493             and previous.type == token.NAME
2494             and previous.value == "not"
2495         )
2496     ):
2497         return COMPARATOR_PRIORITY
2498
2499     if (
2500         leaf.value == "not"
2501         and leaf.parent
2502         and leaf.parent.type == syms.comp_op
2503         and not (
2504             previous is not None
2505             and previous.type == token.NAME
2506             and previous.value == "is"
2507         )
2508     ):
2509         return COMPARATOR_PRIORITY
2510
2511     if leaf.value in LOGIC_OPERATORS and leaf.parent:
2512         return LOGIC_PRIORITY
2513
2514     return 0
2515
2516
2517 FMT_OFF = {"# fmt: off", "# fmt:off", "# yapf: disable"}
2518 FMT_ON = {"# fmt: on", "# fmt:on", "# yapf: enable"}
2519
2520
2521 def generate_comments(leaf: LN) -> Iterator[Leaf]:
2522     """Clean the prefix of the `leaf` and generate comments from it, if any.
2523
2524     Comments in lib2to3 are shoved into the whitespace prefix.  This happens
2525     in `pgen2/driver.py:Driver.parse_tokens()`.  This was a brilliant implementation
2526     move because it does away with modifying the grammar to include all the
2527     possible places in which comments can be placed.
2528
2529     The sad consequence for us though is that comments don't "belong" anywhere.
2530     This is why this function generates simple parentless Leaf objects for
2531     comments.  We simply don't know what the correct parent should be.
2532
2533     No matter though, we can live without this.  We really only need to
2534     differentiate between inline and standalone comments.  The latter don't
2535     share the line with any code.
2536
2537     Inline comments are emitted as regular token.COMMENT leaves.  Standalone
2538     are emitted with a fake STANDALONE_COMMENT token identifier.
2539     """
2540     for pc in list_comments(leaf.prefix, is_endmarker=leaf.type == token.ENDMARKER):
2541         yield Leaf(pc.type, pc.value, prefix="\n" * pc.newlines)
2542
2543
2544 @dataclass
2545 class ProtoComment:
2546     """Describes a piece of syntax that is a comment.
2547
2548     It's not a :class:`blib2to3.pytree.Leaf` so that:
2549
2550     * it can be cached (`Leaf` objects should not be reused more than once as
2551       they store their lineno, column, prefix, and parent information);
2552     * `newlines` and `consumed` fields are kept separate from the `value`. This
2553       simplifies handling of special marker comments like ``# fmt: off/on``.
2554     """
2555
2556     type: int  # token.COMMENT or STANDALONE_COMMENT
2557     value: str  # content of the comment
2558     newlines: int  # how many newlines before the comment
2559     consumed: int  # how many characters of the original leaf's prefix did we consume
2560
2561
2562 @lru_cache(maxsize=4096)
2563 def list_comments(prefix: str, *, is_endmarker: bool) -> List[ProtoComment]:
2564     """Return a list of :class:`ProtoComment` objects parsed from the given `prefix`."""
2565     result: List[ProtoComment] = []
2566     if not prefix or "#" not in prefix:
2567         return result
2568
2569     consumed = 0
2570     nlines = 0
2571     ignored_lines = 0
2572     for index, line in enumerate(prefix.split("\n")):
2573         consumed += len(line) + 1  # adding the length of the split '\n'
2574         line = line.lstrip()
2575         if not line:
2576             nlines += 1
2577         if not line.startswith("#"):
2578             # Escaped newlines outside of a comment are not really newlines at
2579             # all. We treat a single-line comment following an escaped newline
2580             # as a simple trailing comment.
2581             if line.endswith("\\"):
2582                 ignored_lines += 1
2583             continue
2584
2585         if index == ignored_lines and not is_endmarker:
2586             comment_type = token.COMMENT  # simple trailing comment
2587         else:
2588             comment_type = STANDALONE_COMMENT
2589         comment = make_comment(line)
2590         result.append(
2591             ProtoComment(
2592                 type=comment_type, value=comment, newlines=nlines, consumed=consumed
2593             )
2594         )
2595         nlines = 0
2596     return result
2597
2598
2599 def make_comment(content: str) -> str:
2600     """Return a consistently formatted comment from the given `content` string.
2601
2602     All comments (except for "##", "#!", "#:", '#'", "#%%") should have a single
2603     space between the hash sign and the content.
2604
2605     If `content` didn't start with a hash sign, one is provided.
2606     """
2607     content = content.rstrip()
2608     if not content:
2609         return "#"
2610
2611     if content[0] == "#":
2612         content = content[1:]
2613     if content and content[0] not in " !:#'%":
2614         content = " " + content
2615     return "#" + content
2616
2617
2618 def transform_line(
2619     line: Line, mode: Mode, features: Collection[Feature] = ()
2620 ) -> Iterator[Line]:
2621     """Transform a `line`, potentially splitting it into many lines.
2622
2623     They should fit in the allotted `line_length` but might not be able to.
2624
2625     `features` are syntactical features that may be used in the output.
2626     """
2627     if line.is_comment:
2628         yield line
2629         return
2630
2631     line_str = line_to_string(line)
2632
2633     def init_st(ST: Type[StringTransformer]) -> StringTransformer:
2634         """Initialize StringTransformer"""
2635         return ST(mode.line_length, mode.string_normalization)
2636
2637     string_merge = init_st(StringMerger)
2638     string_paren_strip = init_st(StringParenStripper)
2639     string_split = init_st(StringSplitter)
2640     string_paren_wrap = init_st(StringParenWrapper)
2641
2642     transformers: List[Transformer]
2643     if (
2644         not line.contains_uncollapsable_type_comments()
2645         and not line.should_explode
2646         and (
2647             is_line_short_enough(line, line_length=mode.line_length, line_str=line_str)
2648             or line.contains_unsplittable_type_ignore()
2649         )
2650         and not (line.inside_brackets and line.contains_standalone_comments())
2651     ):
2652         # Only apply basic string preprocessing, since lines shouldn't be split here.
2653         if mode.experimental_string_processing:
2654             transformers = [string_merge, string_paren_strip]
2655         else:
2656             transformers = []
2657     elif line.is_def:
2658         transformers = [left_hand_split]
2659     else:
2660
2661         def rhs(line: Line, features: Collection[Feature]) -> Iterator[Line]:
2662             """Wraps calls to `right_hand_split`.
2663
2664             The calls increasingly `omit` right-hand trailers (bracket pairs with
2665             content), meaning the trailers get glued together to split on another
2666             bracket pair instead.
2667             """
2668             for omit in generate_trailers_to_omit(line, mode.line_length):
2669                 lines = list(
2670                     right_hand_split(line, mode.line_length, features, omit=omit)
2671                 )
2672                 # Note: this check is only able to figure out if the first line of the
2673                 # *current* transformation fits in the line length.  This is true only
2674                 # for simple cases.  All others require running more transforms via
2675                 # `transform_line()`.  This check doesn't know if those would succeed.
2676                 if is_line_short_enough(lines[0], line_length=mode.line_length):
2677                     yield from lines
2678                     return
2679
2680             # All splits failed, best effort split with no omits.
2681             # This mostly happens to multiline strings that are by definition
2682             # reported as not fitting a single line, as well as lines that contain
2683             # trailing commas (those have to be exploded).
2684             yield from right_hand_split(
2685                 line, line_length=mode.line_length, features=features
2686             )
2687
2688         if mode.experimental_string_processing:
2689             if line.inside_brackets:
2690                 transformers = [
2691                     string_merge,
2692                     string_paren_strip,
2693                     string_split,
2694                     delimiter_split,
2695                     standalone_comment_split,
2696                     string_paren_wrap,
2697                     rhs,
2698                 ]
2699             else:
2700                 transformers = [
2701                     string_merge,
2702                     string_paren_strip,
2703                     string_split,
2704                     string_paren_wrap,
2705                     rhs,
2706                 ]
2707         else:
2708             if line.inside_brackets:
2709                 transformers = [delimiter_split, standalone_comment_split, rhs]
2710             else:
2711                 transformers = [rhs]
2712
2713     for transform in transformers:
2714         # We are accumulating lines in `result` because we might want to abort
2715         # mission and return the original line in the end, or attempt a different
2716         # split altogether.
2717         try:
2718             result = run_transformer(line, transform, mode, features, line_str=line_str)
2719         except CannotTransform:
2720             continue
2721         else:
2722             yield from result
2723             break
2724
2725     else:
2726         yield line
2727
2728
2729 @dataclass  # type: ignore
2730 class StringTransformer(ABC):
2731     """
2732     An implementation of the Transformer protocol that relies on its
2733     subclasses overriding the template methods `do_match(...)` and
2734     `do_transform(...)`.
2735
2736     This Transformer works exclusively on strings (for example, by merging
2737     or splitting them).
2738
2739     The following sections can be found among the docstrings of each concrete
2740     StringTransformer subclass.
2741
2742     Requirements:
2743         Which requirements must be met of the given Line for this
2744         StringTransformer to be applied?
2745
2746     Transformations:
2747         If the given Line meets all of the above requirements, which string
2748         transformations can you expect to be applied to it by this
2749         StringTransformer?
2750
2751     Collaborations:
2752         What contractual agreements does this StringTransformer have with other
2753         StringTransfomers? Such collaborations should be eliminated/minimized
2754         as much as possible.
2755     """
2756
2757     line_length: int
2758     normalize_strings: bool
2759     __name__ = "StringTransformer"
2760
2761     @abstractmethod
2762     def do_match(self, line: Line) -> TMatchResult:
2763         """
2764         Returns:
2765             * Ok(string_idx) such that `line.leaves[string_idx]` is our target
2766             string, if a match was able to be made.
2767                 OR
2768             * Err(CannotTransform), if a match was not able to be made.
2769         """
2770
2771     @abstractmethod
2772     def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]:
2773         """
2774         Yields:
2775             * Ok(new_line) where new_line is the new transformed line.
2776                 OR
2777             * Err(CannotTransform) if the transformation failed for some reason. The
2778             `do_match(...)` template method should usually be used to reject
2779             the form of the given Line, but in some cases it is difficult to
2780             know whether or not a Line meets the StringTransformer's
2781             requirements until the transformation is already midway.
2782
2783         Side Effects:
2784             This method should NOT mutate @line directly, but it MAY mutate the
2785             Line's underlying Node structure. (WARNING: If the underlying Node
2786             structure IS altered, then this method should NOT be allowed to
2787             yield an CannotTransform after that point.)
2788         """
2789
2790     def __call__(self, line: Line, _features: Collection[Feature]) -> Iterator[Line]:
2791         """
2792         StringTransformer instances have a call signature that mirrors that of
2793         the Transformer type.
2794
2795         Raises:
2796             CannotTransform(...) if the concrete StringTransformer class is unable
2797             to transform @line.
2798         """
2799         # Optimization to avoid calling `self.do_match(...)` when the line does
2800         # not contain any string.
2801         if not any(leaf.type == token.STRING for leaf in line.leaves):
2802             raise CannotTransform("There are no strings in this line.")
2803
2804         match_result = self.do_match(line)
2805
2806         if isinstance(match_result, Err):
2807             cant_transform = match_result.err()
2808             raise CannotTransform(
2809                 f"The string transformer {self.__class__.__name__} does not recognize"
2810                 " this line as one that it can transform."
2811             ) from cant_transform
2812
2813         string_idx = match_result.ok()
2814
2815         for line_result in self.do_transform(line, string_idx):
2816             if isinstance(line_result, Err):
2817                 cant_transform = line_result.err()
2818                 raise CannotTransform(
2819                     "StringTransformer failed while attempting to transform string."
2820                 ) from cant_transform
2821             line = line_result.ok()
2822             yield line
2823
2824
2825 @dataclass
2826 class CustomSplit:
2827     """A custom (i.e. manual) string split.
2828
2829     A single CustomSplit instance represents a single substring.
2830
2831     Examples:
2832         Consider the following string:
2833         ```
2834         "Hi there friend."
2835         " This is a custom"
2836         f" string {split}."
2837         ```
2838
2839         This string will correspond to the following three CustomSplit instances:
2840         ```
2841         CustomSplit(False, 16)
2842         CustomSplit(False, 17)
2843         CustomSplit(True, 16)
2844         ```
2845     """
2846
2847     has_prefix: bool
2848     break_idx: int
2849
2850
2851 class CustomSplitMapMixin:
2852     """
2853     This mixin class is used to map merged strings to a sequence of
2854     CustomSplits, which will then be used to re-split the strings iff none of
2855     the resultant substrings go over the configured max line length.
2856     """
2857
2858     _Key = Tuple[StringID, str]
2859     _CUSTOM_SPLIT_MAP: Dict[_Key, Tuple[CustomSplit, ...]] = defaultdict(tuple)
2860
2861     @staticmethod
2862     def _get_key(string: str) -> "CustomSplitMapMixin._Key":
2863         """
2864         Returns:
2865             A unique identifier that is used internally to map @string to a
2866             group of custom splits.
2867         """
2868         return (id(string), string)
2869
2870     def add_custom_splits(
2871         self, string: str, custom_splits: Iterable[CustomSplit]
2872     ) -> None:
2873         """Custom Split Map Setter Method
2874
2875         Side Effects:
2876             Adds a mapping from @string to the custom splits @custom_splits.
2877         """
2878         key = self._get_key(string)
2879         self._CUSTOM_SPLIT_MAP[key] = tuple(custom_splits)
2880
2881     def pop_custom_splits(self, string: str) -> List[CustomSplit]:
2882         """Custom Split Map Getter Method
2883
2884         Returns:
2885             * A list of the custom splits that are mapped to @string, if any
2886             exist.
2887                 OR
2888             * [], otherwise.
2889
2890         Side Effects:
2891             Deletes the mapping between @string and its associated custom
2892             splits (which are returned to the caller).
2893         """
2894         key = self._get_key(string)
2895
2896         custom_splits = self._CUSTOM_SPLIT_MAP[key]
2897         del self._CUSTOM_SPLIT_MAP[key]
2898
2899         return list(custom_splits)
2900
2901     def has_custom_splits(self, string: str) -> bool:
2902         """
2903         Returns:
2904             True iff @string is associated with a set of custom splits.
2905         """
2906         key = self._get_key(string)
2907         return key in self._CUSTOM_SPLIT_MAP
2908
2909
2910 class StringMerger(CustomSplitMapMixin, StringTransformer):
2911     """StringTransformer that merges strings together.
2912
2913     Requirements:
2914         (A) The line contains adjacent strings such that ALL of the validation checks
2915         listed in StringMerger.__validate_msg(...)'s docstring pass.
2916             OR
2917         (B) The line contains a string which uses line continuation backslashes.
2918
2919     Transformations:
2920         Depending on which of the two requirements above where met, either:
2921
2922         (A) The string group associated with the target string is merged.
2923             OR
2924         (B) All line-continuation backslashes are removed from the target string.
2925
2926     Collaborations:
2927         StringMerger provides custom split information to StringSplitter.
2928     """
2929
2930     def do_match(self, line: Line) -> TMatchResult:
2931         LL = line.leaves
2932
2933         is_valid_index = is_valid_index_factory(LL)
2934
2935         for (i, leaf) in enumerate(LL):
2936             if (
2937                 leaf.type == token.STRING
2938                 and is_valid_index(i + 1)
2939                 and LL[i + 1].type == token.STRING
2940             ):
2941                 return Ok(i)
2942
2943             if leaf.type == token.STRING and "\\\n" in leaf.value:
2944                 return Ok(i)
2945
2946         return TErr("This line has no strings that need merging.")
2947
2948     def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]:
2949         new_line = line
2950         rblc_result = self.__remove_backslash_line_continuation_chars(
2951             new_line, string_idx
2952         )
2953         if isinstance(rblc_result, Ok):
2954             new_line = rblc_result.ok()
2955
2956         msg_result = self.__merge_string_group(new_line, string_idx)
2957         if isinstance(msg_result, Ok):
2958             new_line = msg_result.ok()
2959
2960         if isinstance(rblc_result, Err) and isinstance(msg_result, Err):
2961             msg_cant_transform = msg_result.err()
2962             rblc_cant_transform = rblc_result.err()
2963             cant_transform = CannotTransform(
2964                 "StringMerger failed to merge any strings in this line."
2965             )
2966
2967             # Chain the errors together using `__cause__`.
2968             msg_cant_transform.__cause__ = rblc_cant_transform
2969             cant_transform.__cause__ = msg_cant_transform
2970
2971             yield Err(cant_transform)
2972         else:
2973             yield Ok(new_line)
2974
2975     @staticmethod
2976     def __remove_backslash_line_continuation_chars(
2977         line: Line, string_idx: int
2978     ) -> TResult[Line]:
2979         """
2980         Merge strings that were split across multiple lines using
2981         line-continuation backslashes.
2982
2983         Returns:
2984             Ok(new_line), if @line contains backslash line-continuation
2985             characters.
2986                 OR
2987             Err(CannotTransform), otherwise.
2988         """
2989         LL = line.leaves
2990
2991         string_leaf = LL[string_idx]
2992         if not (
2993             string_leaf.type == token.STRING
2994             and "\\\n" in string_leaf.value
2995             and not has_triple_quotes(string_leaf.value)
2996         ):
2997             return TErr(
2998                 f"String leaf {string_leaf} does not contain any backslash line"
2999                 " continuation characters."
3000             )
3001
3002         new_line = line.clone()
3003         new_line.comments = line.comments.copy()
3004         append_leaves(new_line, line, LL)
3005
3006         new_string_leaf = new_line.leaves[string_idx]
3007         new_string_leaf.value = new_string_leaf.value.replace("\\\n", "")
3008
3009         return Ok(new_line)
3010
3011     def __merge_string_group(self, line: Line, string_idx: int) -> TResult[Line]:
3012         """
3013         Merges string group (i.e. set of adjacent strings) where the first
3014         string in the group is `line.leaves[string_idx]`.
3015
3016         Returns:
3017             Ok(new_line), if ALL of the validation checks found in
3018             __validate_msg(...) pass.
3019                 OR
3020             Err(CannotTransform), otherwise.
3021         """
3022         LL = line.leaves
3023
3024         is_valid_index = is_valid_index_factory(LL)
3025
3026         vresult = self.__validate_msg(line, string_idx)
3027         if isinstance(vresult, Err):
3028             return vresult
3029
3030         # If the string group is wrapped inside an Atom node, we must make sure
3031         # to later replace that Atom with our new (merged) string leaf.
3032         atom_node = LL[string_idx].parent
3033
3034         # We will place BREAK_MARK in between every two substrings that we
3035         # merge. We will then later go through our final result and use the
3036         # various instances of BREAK_MARK we find to add the right values to
3037         # the custom split map.
3038         BREAK_MARK = "@@@@@ BLACK BREAKPOINT MARKER @@@@@"
3039
3040         QUOTE = LL[string_idx].value[-1]
3041
3042         def make_naked(string: str, string_prefix: str) -> str:
3043             """Strip @string (i.e. make it a "naked" string)
3044
3045             Pre-conditions:
3046                 * assert_is_leaf_string(@string)
3047
3048             Returns:
3049                 A string that is identical to @string except that
3050                 @string_prefix has been stripped, the surrounding QUOTE
3051                 characters have been removed, and any remaining QUOTE
3052                 characters have been escaped.
3053             """
3054             assert_is_leaf_string(string)
3055
3056             RE_EVEN_BACKSLASHES = r"(?:(?<!\\)(?:\\\\)*)"
3057             naked_string = string[len(string_prefix) + 1 : -1]
3058             naked_string = re.sub(
3059                 "(" + RE_EVEN_BACKSLASHES + ")" + QUOTE, r"\1\\" + QUOTE, naked_string
3060             )
3061             return naked_string
3062
3063         # Holds the CustomSplit objects that will later be added to the custom
3064         # split map.
3065         custom_splits = []
3066
3067         # Temporary storage for the 'has_prefix' part of the CustomSplit objects.
3068         prefix_tracker = []
3069
3070         # Sets the 'prefix' variable. This is the prefix that the final merged
3071         # string will have.
3072         next_str_idx = string_idx
3073         prefix = ""
3074         while (
3075             not prefix
3076             and is_valid_index(next_str_idx)
3077             and LL[next_str_idx].type == token.STRING
3078         ):
3079             prefix = get_string_prefix(LL[next_str_idx].value)
3080             next_str_idx += 1
3081
3082         # The next loop merges the string group. The final string will be
3083         # contained in 'S'.
3084         #
3085         # The following convenience variables are used:
3086         #
3087         #   S: string
3088         #   NS: naked string
3089         #   SS: next string
3090         #   NSS: naked next string
3091         S = ""
3092         NS = ""
3093         num_of_strings = 0
3094         next_str_idx = string_idx
3095         while is_valid_index(next_str_idx) and LL[next_str_idx].type == token.STRING:
3096             num_of_strings += 1
3097
3098             SS = LL[next_str_idx].value
3099             next_prefix = get_string_prefix(SS)
3100
3101             # If this is an f-string group but this substring is not prefixed
3102             # with 'f'...
3103             if "f" in prefix and "f" not in next_prefix:
3104                 # Then we must escape any braces contained in this substring.
3105                 SS = re.subf(r"(\{|\})", "{1}{1}", SS)
3106
3107             NSS = make_naked(SS, next_prefix)
3108
3109             has_prefix = bool(next_prefix)
3110             prefix_tracker.append(has_prefix)
3111
3112             S = prefix + QUOTE + NS + NSS + BREAK_MARK + QUOTE
3113             NS = make_naked(S, prefix)
3114
3115             next_str_idx += 1
3116
3117         S_leaf = Leaf(token.STRING, S)
3118         if self.normalize_strings:
3119             normalize_string_quotes(S_leaf)
3120
3121         # Fill the 'custom_splits' list with the appropriate CustomSplit objects.
3122         temp_string = S_leaf.value[len(prefix) + 1 : -1]
3123         for has_prefix in prefix_tracker:
3124             mark_idx = temp_string.find(BREAK_MARK)
3125             assert (
3126                 mark_idx >= 0
3127             ), "Logic error while filling the custom string breakpoint cache."
3128
3129             temp_string = temp_string[mark_idx + len(BREAK_MARK) :]
3130             breakpoint_idx = mark_idx + (len(prefix) if has_prefix else 0) + 1
3131             custom_splits.append(CustomSplit(has_prefix, breakpoint_idx))
3132
3133         string_leaf = Leaf(token.STRING, S_leaf.value.replace(BREAK_MARK, ""))
3134
3135         if atom_node is not None:
3136             replace_child(atom_node, string_leaf)
3137
3138         # Build the final line ('new_line') that this method will later return.
3139         new_line = line.clone()
3140         for (i, leaf) in enumerate(LL):
3141             if i == string_idx:
3142                 new_line.append(string_leaf)
3143
3144             if string_idx <= i < string_idx + num_of_strings:
3145                 for comment_leaf in line.comments_after(LL[i]):
3146                     new_line.append(comment_leaf, preformatted=True)
3147                 continue
3148
3149             append_leaves(new_line, line, [leaf])
3150
3151         self.add_custom_splits(string_leaf.value, custom_splits)
3152         return Ok(new_line)
3153
3154     @staticmethod
3155     def __validate_msg(line: Line, string_idx: int) -> TResult[None]:
3156         """Validate (M)erge (S)tring (G)roup
3157
3158         Transform-time string validation logic for __merge_string_group(...).
3159
3160         Returns:
3161             * Ok(None), if ALL validation checks (listed below) pass.
3162                 OR
3163             * Err(CannotTransform), if any of the following are true:
3164                 - The target string group does not contain ANY stand-alone comments.
3165                 - The target string is not in a string group (i.e. it has no
3166                   adjacent strings).
3167                 - The string group has more than one inline comment.
3168                 - The string group has an inline comment that appears to be a pragma.
3169                 - The set of all string prefixes in the string group is of
3170                   length greater than one and is not equal to {"", "f"}.
3171                 - The string group consists of raw strings.
3172         """
3173         # We first check for "inner" stand-alone comments (i.e. stand-alone
3174         # comments that have a string leaf before them AND after them).
3175         for inc in [1, -1]:
3176             i = string_idx
3177             found_sa_comment = False
3178             is_valid_index = is_valid_index_factory(line.leaves)
3179             while is_valid_index(i) and line.leaves[i].type in [
3180                 token.STRING,
3181                 STANDALONE_COMMENT,
3182             ]:
3183                 if line.leaves[i].type == STANDALONE_COMMENT:
3184                     found_sa_comment = True
3185                 elif found_sa_comment:
3186                     return TErr(
3187                         "StringMerger does NOT merge string groups which contain "
3188                         "stand-alone comments."
3189                     )
3190
3191                 i += inc
3192
3193         num_of_inline_string_comments = 0
3194         set_of_prefixes = set()
3195         num_of_strings = 0
3196         for leaf in line.leaves[string_idx:]:
3197             if leaf.type != token.STRING:
3198                 # If the string group is trailed by a comma, we count the
3199                 # comments trailing the comma to be one of the string group's
3200                 # comments.
3201                 if leaf.type == token.COMMA and id(leaf) in line.comments:
3202                     num_of_inline_string_comments += 1
3203                 break
3204
3205             if has_triple_quotes(leaf.value):
3206                 return TErr("StringMerger does NOT merge multiline strings.")
3207
3208             num_of_strings += 1
3209             prefix = get_string_prefix(leaf.value)
3210             if "r" in prefix:
3211                 return TErr("StringMerger does NOT merge raw strings.")
3212
3213             set_of_prefixes.add(prefix)
3214
3215             if id(leaf) in line.comments:
3216                 num_of_inline_string_comments += 1
3217                 if contains_pragma_comment(line.comments[id(leaf)]):
3218                     return TErr("Cannot merge strings which have pragma comments.")
3219
3220         if num_of_strings < 2:
3221             return TErr(
3222                 f"Not enough strings to merge (num_of_strings={num_of_strings})."
3223             )
3224
3225         if num_of_inline_string_comments > 1:
3226             return TErr(
3227                 f"Too many inline string comments ({num_of_inline_string_comments})."
3228             )
3229
3230         if len(set_of_prefixes) > 1 and set_of_prefixes != {"", "f"}:
3231             return TErr(f"Too many different prefixes ({set_of_prefixes}).")
3232
3233         return Ok(None)
3234
3235
3236 class StringParenStripper(StringTransformer):
3237     """StringTransformer that strips surrounding parentheses from strings.
3238
3239     Requirements:
3240         The line contains a string which is surrounded by parentheses and:
3241             - The target string is NOT the only argument to a function call).
3242             - If the target string contains a PERCENT, the brackets are not
3243               preceeded or followed by an operator with higher precedence than
3244               PERCENT.
3245
3246     Transformations:
3247         The parentheses mentioned in the 'Requirements' section are stripped.
3248
3249     Collaborations:
3250         StringParenStripper has its own inherent usefulness, but it is also
3251         relied on to clean up the parentheses created by StringParenWrapper (in
3252         the event that they are no longer needed).
3253     """
3254
3255     def do_match(self, line: Line) -> TMatchResult:
3256         LL = line.leaves
3257
3258         is_valid_index = is_valid_index_factory(LL)
3259
3260         for (idx, leaf) in enumerate(LL):
3261             # Should be a string...
3262             if leaf.type != token.STRING:
3263                 continue
3264
3265             # Should be preceded by a non-empty LPAR...
3266             if (
3267                 not is_valid_index(idx - 1)
3268                 or LL[idx - 1].type != token.LPAR
3269                 or is_empty_lpar(LL[idx - 1])
3270             ):
3271                 continue
3272
3273             # That LPAR should NOT be preceded by a function name or a closing
3274             # bracket (which could be a function which returns a function or a
3275             # list/dictionary that contains a function)...
3276             if is_valid_index(idx - 2) and (
3277                 LL[idx - 2].type == token.NAME or LL[idx - 2].type in CLOSING_BRACKETS
3278             ):
3279                 continue
3280
3281             string_idx = idx
3282
3283             # Skip the string trailer, if one exists.
3284             string_parser = StringParser()
3285             next_idx = string_parser.parse(LL, string_idx)
3286
3287             # if the leaves in the parsed string include a PERCENT, we need to
3288             # make sure the initial LPAR is NOT preceded by an operator with
3289             # higher or equal precedence to PERCENT
3290             if is_valid_index(idx - 2):
3291                 # mypy can't quite follow unless we name this
3292                 before_lpar = LL[idx - 2]
3293                 if token.PERCENT in {leaf.type for leaf in LL[idx - 1 : next_idx]} and (
3294                     (
3295                         before_lpar.type
3296                         in {
3297                             token.STAR,
3298                             token.AT,
3299                             token.SLASH,
3300                             token.DOUBLESLASH,
3301                             token.PERCENT,
3302                             token.TILDE,
3303                             token.DOUBLESTAR,
3304                             token.AWAIT,
3305                             token.LSQB,
3306                             token.LPAR,
3307                         }
3308                     )
3309                     or (
3310                         # only unary PLUS/MINUS
3311                         before_lpar.parent
3312                         and before_lpar.parent.type == syms.factor
3313                         and (before_lpar.type in {token.PLUS, token.MINUS})
3314                     )
3315                 ):
3316                     continue
3317
3318             # Should be followed by a non-empty RPAR...
3319             if (
3320                 is_valid_index(next_idx)
3321                 and LL[next_idx].type == token.RPAR
3322                 and not is_empty_rpar(LL[next_idx])
3323             ):
3324                 # That RPAR should NOT be followed by anything with higher
3325                 # precedence than PERCENT
3326                 if is_valid_index(next_idx + 1) and LL[next_idx + 1].type in {
3327                     token.DOUBLESTAR,
3328                     token.LSQB,
3329                     token.LPAR,
3330                     token.DOT,
3331                 }:
3332                     continue
3333
3334                 return Ok(string_idx)
3335
3336         return TErr("This line has no strings wrapped in parens.")
3337
3338     def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]:
3339         LL = line.leaves
3340
3341         string_parser = StringParser()
3342         rpar_idx = string_parser.parse(LL, string_idx)
3343
3344         for leaf in (LL[string_idx - 1], LL[rpar_idx]):
3345             if line.comments_after(leaf):
3346                 yield TErr(
3347                     "Will not strip parentheses which have comments attached to them."
3348                 )
3349                 return
3350
3351         new_line = line.clone()
3352         new_line.comments = line.comments.copy()
3353         try:
3354             append_leaves(new_line, line, LL[: string_idx - 1])
3355         except BracketMatchError:
3356             # HACK: I believe there is currently a bug somewhere in
3357             # right_hand_split() that is causing brackets to not be tracked
3358             # properly by a shared BracketTracker.
3359             append_leaves(new_line, line, LL[: string_idx - 1], preformatted=True)
3360
3361         string_leaf = Leaf(token.STRING, LL[string_idx].value)
3362         LL[string_idx - 1].remove()
3363         replace_child(LL[string_idx], string_leaf)
3364         new_line.append(string_leaf)
3365
3366         append_leaves(
3367             new_line, line, LL[string_idx + 1 : rpar_idx] + LL[rpar_idx + 1 :]
3368         )
3369
3370         LL[rpar_idx].remove()
3371
3372         yield Ok(new_line)
3373
3374
3375 class BaseStringSplitter(StringTransformer):
3376     """
3377     Abstract class for StringTransformers which transform a Line's strings by splitting
3378     them or placing them on their own lines where necessary to avoid going over
3379     the configured line length.
3380
3381     Requirements:
3382         * The target string value is responsible for the line going over the
3383         line length limit. It follows that after all of black's other line
3384         split methods have been exhausted, this line (or one of the resulting
3385         lines after all line splits are performed) would still be over the
3386         line_length limit unless we split this string.
3387             AND
3388         * The target string is NOT a "pointless" string (i.e. a string that has
3389         no parent or siblings).
3390             AND
3391         * The target string is not followed by an inline comment that appears
3392         to be a pragma.
3393             AND
3394         * The target string is not a multiline (i.e. triple-quote) string.
3395     """
3396
3397     @abstractmethod
3398     def do_splitter_match(self, line: Line) -> TMatchResult:
3399         """
3400         BaseStringSplitter asks its clients to override this method instead of
3401         `StringTransformer.do_match(...)`.
3402
3403         Follows the same protocol as `StringTransformer.do_match(...)`.
3404
3405         Refer to `help(StringTransformer.do_match)` for more information.
3406         """
3407
3408     def do_match(self, line: Line) -> TMatchResult:
3409         match_result = self.do_splitter_match(line)
3410         if isinstance(match_result, Err):
3411             return match_result
3412
3413         string_idx = match_result.ok()
3414         vresult = self.__validate(line, string_idx)
3415         if isinstance(vresult, Err):
3416             return vresult
3417
3418         return match_result
3419
3420     def __validate(self, line: Line, string_idx: int) -> TResult[None]:
3421         """
3422         Checks that @line meets all of the requirements listed in this classes'
3423         docstring. Refer to `help(BaseStringSplitter)` for a detailed
3424         description of those requirements.
3425
3426         Returns:
3427             * Ok(None), if ALL of the requirements are met.
3428                 OR
3429             * Err(CannotTransform), if ANY of the requirements are NOT met.
3430         """
3431         LL = line.leaves
3432
3433         string_leaf = LL[string_idx]
3434
3435         max_string_length = self.__get_max_string_length(line, string_idx)
3436         if len(string_leaf.value) <= max_string_length:
3437             return TErr(
3438                 "The string itself is not what is causing this line to be too long."
3439             )
3440
3441         if not string_leaf.parent or [L.type for L in string_leaf.parent.children] == [
3442             token.STRING,
3443             token.NEWLINE,
3444         ]:
3445             return TErr(
3446                 f"This string ({string_leaf.value}) appears to be pointless (i.e. has"
3447                 " no parent)."
3448             )
3449
3450         if id(line.leaves[string_idx]) in line.comments and contains_pragma_comment(
3451             line.comments[id(line.leaves[string_idx])]
3452         ):
3453             return TErr(
3454                 "Line appears to end with an inline pragma comment. Splitting the line"
3455                 " could modify the pragma's behavior."
3456             )
3457
3458         if has_triple_quotes(string_leaf.value):
3459             return TErr("We cannot split multiline strings.")
3460
3461         return Ok(None)
3462
3463     def __get_max_string_length(self, line: Line, string_idx: int) -> int:
3464         """
3465         Calculates the max string length used when attempting to determine
3466         whether or not the target string is responsible for causing the line to
3467         go over the line length limit.
3468
3469         WARNING: This method is tightly coupled to both StringSplitter and
3470         (especially) StringParenWrapper. There is probably a better way to
3471         accomplish what is being done here.
3472
3473         Returns:
3474             max_string_length: such that `line.leaves[string_idx].value >
3475             max_string_length` implies that the target string IS responsible
3476             for causing this line to exceed the line length limit.
3477         """
3478         LL = line.leaves
3479
3480         is_valid_index = is_valid_index_factory(LL)
3481
3482         # We use the shorthand "WMA4" in comments to abbreviate "We must
3483         # account for". When giving examples, we use STRING to mean some/any
3484         # valid string.
3485         #
3486         # Finally, we use the following convenience variables:
3487         #
3488         #   P:  The leaf that is before the target string leaf.
3489         #   N:  The leaf that is after the target string leaf.
3490         #   NN: The leaf that is after N.
3491
3492         # WMA4 the whitespace at the beginning of the line.
3493         offset = line.depth * 4
3494
3495         if is_valid_index(string_idx - 1):
3496             p_idx = string_idx - 1
3497             if (
3498                 LL[string_idx - 1].type == token.LPAR
3499                 and LL[string_idx - 1].value == ""
3500                 and string_idx >= 2
3501             ):
3502                 # If the previous leaf is an empty LPAR placeholder, we should skip it.
3503                 p_idx -= 1
3504
3505             P = LL[p_idx]
3506             if P.type == token.PLUS:
3507                 # WMA4 a space and a '+' character (e.g. `+ STRING`).
3508                 offset += 2
3509
3510             if P.type == token.COMMA:
3511                 # WMA4 a space, a comma, and a closing bracket [e.g. `), STRING`].
3512                 offset += 3
3513
3514             if P.type in [token.COLON, token.EQUAL, token.NAME]:
3515                 # This conditional branch is meant to handle dictionary keys,
3516                 # variable assignments, 'return STRING' statement lines, and
3517                 # 'else STRING' ternary expression lines.
3518
3519                 # WMA4 a single space.
3520                 offset += 1
3521
3522                 # WMA4 the lengths of any leaves that came before that space,
3523                 # but after any closing bracket before that space.
3524                 for leaf in reversed(LL[: p_idx + 1]):
3525                     offset += len(str(leaf))
3526                     if leaf.type in CLOSING_BRACKETS:
3527                         break
3528
3529         if is_valid_index(string_idx + 1):
3530             N = LL[string_idx + 1]
3531             if N.type == token.RPAR and N.value == "" and len(LL) > string_idx + 2:
3532                 # If the next leaf is an empty RPAR placeholder, we should skip it.
3533                 N = LL[string_idx + 2]
3534
3535             if N.type == token.COMMA:
3536                 # WMA4 a single comma at the end of the string (e.g `STRING,`).
3537                 offset += 1
3538
3539             if is_valid_index(string_idx + 2):
3540                 NN = LL[string_idx + 2]
3541
3542                 if N.type == token.DOT and NN.type == token.NAME:
3543                     # This conditional branch is meant to handle method calls invoked
3544                     # off of a string literal up to and including the LPAR character.
3545
3546                     # WMA4 the '.' character.
3547                     offset += 1
3548
3549                     if (
3550                         is_valid_index(string_idx + 3)
3551                         and LL[string_idx + 3].type == token.LPAR
3552                     ):
3553                         # WMA4 the left parenthesis character.
3554                         offset += 1
3555
3556                     # WMA4 the length of the method's name.
3557                     offset += len(NN.value)
3558
3559         has_comments = False
3560         for comment_leaf in line.comments_after(LL[string_idx]):
3561             if not has_comments:
3562                 has_comments = True
3563                 # WMA4 two spaces before the '#' character.
3564                 offset += 2
3565
3566             # WMA4 the length of the inline comment.
3567             offset += len(comment_leaf.value)
3568
3569         max_string_length = self.line_length - offset
3570         return max_string_length
3571
3572
3573 class StringSplitter(CustomSplitMapMixin, BaseStringSplitter):
3574     """
3575     StringTransformer that splits "atom" strings (i.e. strings which exist on
3576     lines by themselves).
3577
3578     Requirements:
3579         * The line consists ONLY of a single string (with the exception of a
3580         '+' symbol which MAY exist at the start of the line), MAYBE a string
3581         trailer, and MAYBE a trailing comma.
3582             AND
3583         * All of the requirements listed in BaseStringSplitter's docstring.
3584
3585     Transformations:
3586         The string mentioned in the 'Requirements' section is split into as
3587         many substrings as necessary to adhere to the configured line length.
3588
3589         In the final set of substrings, no substring should be smaller than
3590         MIN_SUBSTR_SIZE characters.
3591
3592         The string will ONLY be split on spaces (i.e. each new substring should
3593         start with a space). Note that the string will NOT be split on a space
3594         which is escaped with a backslash.
3595
3596         If the string is an f-string, it will NOT be split in the middle of an
3597         f-expression (e.g. in f"FooBar: {foo() if x else bar()}", {foo() if x
3598         else bar()} is an f-expression).
3599
3600         If the string that is being split has an associated set of custom split
3601         records and those custom splits will NOT result in any line going over
3602         the configured line length, those custom splits are used. Otherwise the
3603         string is split as late as possible (from left-to-right) while still
3604         adhering to the transformation rules listed above.
3605
3606     Collaborations:
3607         StringSplitter relies on StringMerger to construct the appropriate
3608         CustomSplit objects and add them to the custom split map.
3609     """
3610
3611     MIN_SUBSTR_SIZE = 6
3612     # Matches an "f-expression" (e.g. {var}) that might be found in an f-string.
3613     RE_FEXPR = r"""
3614     (?<!\{)\{
3615         (?:
3616             [^\{\}]
3617             | \{\{
3618             | \}\}
3619         )+?
3620     (?<!\})(?:\}\})*\}(?!\})
3621     """
3622
3623     def do_splitter_match(self, line: Line) -> TMatchResult:
3624         LL = line.leaves
3625
3626         is_valid_index = is_valid_index_factory(LL)
3627
3628         idx = 0
3629
3630         # The first leaf MAY be a '+' symbol...
3631         if is_valid_index(idx) and LL[idx].type == token.PLUS:
3632             idx += 1
3633
3634         # The next/first leaf MAY be an empty LPAR...
3635         if is_valid_index(idx) and is_empty_lpar(LL[idx]):
3636             idx += 1
3637
3638         # The next/first leaf MUST be a string...
3639         if not is_valid_index(idx) or LL[idx].type != token.STRING:
3640             return TErr("Line does not start with a string.")
3641
3642         string_idx = idx
3643
3644         # Skip the string trailer, if one exists.
3645         string_parser = StringParser()
3646         idx = string_parser.parse(LL, string_idx)
3647
3648         # That string MAY be followed by an empty RPAR...
3649         if is_valid_index(idx) and is_empty_rpar(LL[idx]):
3650             idx += 1
3651
3652         # That string / empty RPAR leaf MAY be followed by a comma...
3653         if is_valid_index(idx) and LL[idx].type == token.COMMA:
3654             idx += 1
3655
3656         # But no more leaves are allowed...
3657         if is_valid_index(idx):
3658             return TErr("This line does not end with a string.")
3659
3660         return Ok(string_idx)
3661
3662     def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]:
3663         LL = line.leaves
3664
3665         QUOTE = LL[string_idx].value[-1]
3666
3667         is_valid_index = is_valid_index_factory(LL)
3668         insert_str_child = insert_str_child_factory(LL[string_idx])
3669
3670         prefix = get_string_prefix(LL[string_idx].value)
3671
3672         # We MAY choose to drop the 'f' prefix from substrings that don't
3673         # contain any f-expressions, but ONLY if the original f-string
3674         # contains at least one f-expression. Otherwise, we will alter the AST
3675         # of the program.
3676         drop_pointless_f_prefix = ("f" in prefix) and re.search(
3677             self.RE_FEXPR, LL[string_idx].value, re.VERBOSE
3678         )
3679
3680         first_string_line = True
3681         starts_with_plus = LL[0].type == token.PLUS
3682
3683         def line_needs_plus() -> bool:
3684             return first_string_line and starts_with_plus
3685
3686         def maybe_append_plus(new_line: Line) -> None:
3687             """
3688             Side Effects:
3689                 If @line starts with a plus and this is the first line we are
3690                 constructing, this function appends a PLUS leaf to @new_line
3691                 and replaces the old PLUS leaf in the node structure. Otherwise
3692                 this function does nothing.
3693             """
3694             if line_needs_plus():
3695                 plus_leaf = Leaf(token.PLUS, "+")
3696                 replace_child(LL[0], plus_leaf)
3697                 new_line.append(plus_leaf)
3698
3699         ends_with_comma = (
3700             is_valid_index(string_idx + 1) and LL[string_idx + 1].type == token.COMMA
3701         )
3702
3703         def max_last_string() -> int:
3704             """
3705             Returns:
3706                 The max allowed length of the string value used for the last
3707                 line we will construct.
3708             """
3709             result = self.line_length
3710             result -= line.depth * 4
3711             result -= 1 if ends_with_comma else 0
3712             result -= 2 if line_needs_plus() else 0
3713             return result
3714
3715         # --- Calculate Max Break Index (for string value)
3716         # We start with the line length limit
3717         max_break_idx = self.line_length
3718         # The last index of a string of length N is N-1.
3719         max_break_idx -= 1
3720         # Leading whitespace is not present in the string value (e.g. Leaf.value).
3721         max_break_idx -= line.depth * 4
3722         if max_break_idx < 0:
3723             yield TErr(
3724                 f"Unable to split {LL[string_idx].value} at such high of a line depth:"
3725                 f" {line.depth}"
3726             )
3727             return
3728
3729         # Check if StringMerger registered any custom splits.
3730         custom_splits = self.pop_custom_splits(LL[string_idx].value)
3731         # We use them ONLY if none of them would produce lines that exceed the
3732         # line limit.
3733         use_custom_breakpoints = bool(
3734             custom_splits
3735             and all(csplit.break_idx <= max_break_idx for csplit in custom_splits)
3736         )
3737
3738         # Temporary storage for the remaining chunk of the string line that
3739         # can't fit onto the line currently being constructed.
3740         rest_value = LL[string_idx].value
3741
3742         def more_splits_should_be_made() -> bool:
3743             """
3744             Returns:
3745                 True iff `rest_value` (the remaining string value from the last
3746                 split), should be split again.
3747             """
3748             if use_custom_breakpoints:
3749                 return len(custom_splits) > 1
3750             else:
3751                 return len(rest_value) > max_last_string()
3752
3753         string_line_results: List[Ok[Line]] = []
3754         while more_splits_should_be_made():
3755             if use_custom_breakpoints:
3756                 # Custom User Split (manual)
3757                 csplit = custom_splits.pop(0)
3758                 break_idx = csplit.break_idx
3759             else:
3760                 # Algorithmic Split (automatic)
3761                 max_bidx = max_break_idx - 2 if line_needs_plus() else max_break_idx
3762                 maybe_break_idx = self.__get_break_idx(rest_value, max_bidx)
3763                 if maybe_break_idx is None:
3764                     # If we are unable to algorithmically determine a good split
3765                     # and this string has custom splits registered to it, we
3766                     # fall back to using them--which means we have to start
3767                     # over from the beginning.
3768                     if custom_splits:
3769                         rest_value = LL[string_idx].value
3770                         string_line_results = []
3771                         first_string_line = True
3772                         use_custom_breakpoints = True
3773                         continue
3774
3775                     # Otherwise, we stop splitting here.
3776                     break
3777
3778                 break_idx = maybe_break_idx
3779
3780             # --- Construct `next_value`
3781             next_value = rest_value[:break_idx] + QUOTE
3782             if (
3783                 # Are we allowed to try to drop a pointless 'f' prefix?
3784                 drop_pointless_f_prefix
3785                 # If we are, will we be successful?
3786                 and next_value != self.__normalize_f_string(next_value, prefix)
3787             ):
3788                 # If the current custom split did NOT originally use a prefix,
3789                 # then `csplit.break_idx` will be off by one after removing
3790                 # the 'f' prefix.
3791                 break_idx = (
3792                     break_idx + 1
3793                     if use_custom_breakpoints and not csplit.has_prefix
3794                     else break_idx
3795                 )
3796                 next_value = rest_value[:break_idx] + QUOTE
3797                 next_value = self.__normalize_f_string(next_value, prefix)
3798
3799             # --- Construct `next_leaf`
3800             next_leaf = Leaf(token.STRING, next_value)
3801             insert_str_child(next_leaf)
3802             self.__maybe_normalize_string_quotes(next_leaf)
3803
3804             # --- Construct `next_line`
3805             next_line = line.clone()
3806             maybe_append_plus(next_line)
3807             next_line.append(next_leaf)
3808             string_line_results.append(Ok(next_line))
3809
3810             rest_value = prefix + QUOTE + rest_value[break_idx:]
3811             first_string_line = False
3812
3813         yield from string_line_results
3814
3815         if drop_pointless_f_prefix:
3816             rest_value = self.__normalize_f_string(rest_value, prefix)
3817
3818         rest_leaf = Leaf(token.STRING, rest_value)
3819         insert_str_child(rest_leaf)
3820
3821         # NOTE: I could not find a test case that verifies that the following
3822         # line is actually necessary, but it seems to be. Otherwise we risk
3823         # not normalizing the last substring, right?
3824         self.__maybe_normalize_string_quotes(rest_leaf)
3825
3826         last_line = line.clone()
3827         maybe_append_plus(last_line)
3828
3829         # If there are any leaves to the right of the target string...
3830         if is_valid_index(string_idx + 1):
3831             # We use `temp_value` here to determine how long the last line
3832             # would be if we were to append all the leaves to the right of the
3833             # target string to the last string line.
3834             temp_value = rest_value
3835             for leaf in LL[string_idx + 1 :]:
3836                 temp_value += str(leaf)
3837                 if leaf.type == token.LPAR:
3838                     break
3839
3840             # Try to fit them all on the same line with the last substring...
3841             if (
3842                 len(temp_value) <= max_last_string()
3843                 or LL[string_idx + 1].type == token.COMMA
3844             ):
3845                 last_line.append(rest_leaf)
3846                 append_leaves(last_line, line, LL[string_idx + 1 :])
3847                 yield Ok(last_line)
3848             # Otherwise, place the last substring on one line and everything
3849             # else on a line below that...
3850             else:
3851                 last_line.append(rest_leaf)
3852                 yield Ok(last_line)
3853
3854                 non_string_line = line.clone()
3855                 append_leaves(non_string_line, line, LL[string_idx + 1 :])
3856                 yield Ok(non_string_line)
3857         # Else the target string was the last leaf...
3858         else:
3859             last_line.append(rest_leaf)
3860             last_line.comments = line.comments.copy()
3861             yield Ok(last_line)
3862
3863     def __get_break_idx(self, string: str, max_break_idx: int) -> Optional[int]:
3864         """
3865         This method contains the algorithm that StringSplitter uses to
3866         determine which character to split each string at.
3867
3868         Args:
3869             @string: The substring that we are attempting to split.
3870             @max_break_idx: The ideal break index. We will return this value if it
3871             meets all the necessary conditions. In the likely event that it
3872             doesn't we will try to find the closest index BELOW @max_break_idx
3873             that does. If that fails, we will expand our search by also
3874             considering all valid indices ABOVE @max_break_idx.
3875
3876         Pre-Conditions:
3877             * assert_is_leaf_string(@string)
3878             * 0 <= @max_break_idx < len(@string)
3879
3880         Returns:
3881             break_idx, if an index is able to be found that meets all of the
3882             conditions listed in the 'Transformations' section of this classes'
3883             docstring.
3884                 OR
3885             None, otherwise.
3886         """
3887         is_valid_index = is_valid_index_factory(string)
3888
3889         assert is_valid_index(max_break_idx)
3890         assert_is_leaf_string(string)
3891
3892         _fexpr_slices: Optional[List[Tuple[Index, Index]]] = None
3893
3894         def fexpr_slices() -> Iterator[Tuple[Index, Index]]:
3895             """
3896             Yields:
3897                 All ranges of @string which, if @string were to be split there,
3898                 would result in the splitting of an f-expression (which is NOT
3899                 allowed).
3900             """
3901             nonlocal _fexpr_slices
3902
3903             if _fexpr_slices is None:
3904                 _fexpr_slices = []
3905                 for match in re.finditer(self.RE_FEXPR, string, re.VERBOSE):
3906                     _fexpr_slices.append(match.span())
3907
3908             yield from _fexpr_slices
3909
3910         is_fstring = "f" in get_string_prefix(string)
3911
3912         def breaks_fstring_expression(i: Index) -> bool:
3913             """
3914             Returns:
3915                 True iff returning @i would result in the splitting of an
3916                 f-expression (which is NOT allowed).
3917             """
3918             if not is_fstring:
3919                 return False
3920
3921             for (start, end) in fexpr_slices():
3922                 if start <= i < end:
3923                     return True
3924
3925             return False
3926
3927         def passes_all_checks(i: Index) -> bool:
3928             """
3929             Returns:
3930                 True iff ALL of the conditions listed in the 'Transformations'
3931                 section of this classes' docstring would be be met by returning @i.
3932             """
3933             is_space = string[i] == " "
3934
3935             is_not_escaped = True
3936             j = i - 1
3937             while is_valid_index(j) and string[j] == "\\":
3938                 is_not_escaped = not is_not_escaped
3939                 j -= 1
3940
3941             is_big_enough = (
3942                 len(string[i:]) >= self.MIN_SUBSTR_SIZE
3943                 and len(string[:i]) >= self.MIN_SUBSTR_SIZE
3944             )
3945             return (
3946                 is_space
3947                 and is_not_escaped
3948                 and is_big_enough
3949                 and not breaks_fstring_expression(i)
3950             )
3951
3952         # First, we check all indices BELOW @max_break_idx.
3953         break_idx = max_break_idx
3954         while is_valid_index(break_idx - 1) and not passes_all_checks(break_idx):
3955             break_idx -= 1
3956
3957         if not passes_all_checks(break_idx):
3958             # If that fails, we check all indices ABOVE @max_break_idx.
3959             #
3960             # If we are able to find a valid index here, the next line is going
3961             # to be longer than the specified line length, but it's probably
3962             # better than doing nothing at all.
3963             break_idx = max_break_idx + 1
3964             while is_valid_index(break_idx + 1) and not passes_all_checks(break_idx):
3965                 break_idx += 1
3966
3967             if not is_valid_index(break_idx) or not passes_all_checks(break_idx):
3968                 return None
3969
3970         return break_idx
3971
3972     def __maybe_normalize_string_quotes(self, leaf: Leaf) -> None:
3973         if self.normalize_strings:
3974             normalize_string_quotes(leaf)
3975
3976     def __normalize_f_string(self, string: str, prefix: str) -> str:
3977         """
3978         Pre-Conditions:
3979             * assert_is_leaf_string(@string)
3980
3981         Returns:
3982             * If @string is an f-string that contains no f-expressions, we
3983             return a string identical to @string except that the 'f' prefix
3984             has been stripped and all double braces (i.e. '{{' or '}}') have
3985             been normalized (i.e. turned into '{' or '}').
3986                 OR
3987             * Otherwise, we return @string.
3988         """
3989         assert_is_leaf_string(string)
3990
3991         if "f" in prefix and not re.search(self.RE_FEXPR, string, re.VERBOSE):
3992             new_prefix = prefix.replace("f", "")
3993
3994             temp = string[len(prefix) :]
3995             temp = re.sub(r"\{\{", "{", temp)
3996             temp = re.sub(r"\}\}", "}", temp)
3997             new_string = temp
3998
3999             return f"{new_prefix}{new_string}"
4000         else:
4001             return string
4002
4003
4004 class StringParenWrapper(CustomSplitMapMixin, BaseStringSplitter):
4005     """
4006     StringTransformer that splits non-"atom" strings (i.e. strings that do not
4007     exist on lines by themselves).
4008
4009     Requirements:
4010         All of the requirements listed in BaseStringSplitter's docstring in
4011         addition to the requirements listed below:
4012
4013         * The line is a return/yield statement, which returns/yields a string.
4014             OR
4015         * The line is part of a ternary expression (e.g. `x = y if cond else
4016         z`) such that the line starts with `else <string>`, where <string> is
4017         some string.
4018             OR
4019         * The line is an assert statement, which ends with a string.
4020             OR
4021         * The line is an assignment statement (e.g. `x = <string>` or `x +=
4022         <string>`) such that the variable is being assigned the value of some
4023         string.
4024             OR
4025         * The line is a dictionary key assignment where some valid key is being
4026         assigned the value of some string.
4027
4028     Transformations:
4029         The chosen string is wrapped in parentheses and then split at the LPAR.
4030
4031         We then have one line which ends with an LPAR and another line that
4032         starts with the chosen string. The latter line is then split again at
4033         the RPAR. This results in the RPAR (and possibly a trailing comma)
4034         being placed on its own line.
4035
4036         NOTE: If any leaves exist to the right of the chosen string (except
4037         for a trailing comma, which would be placed after the RPAR), those
4038         leaves are placed inside the parentheses.  In effect, the chosen
4039         string is not necessarily being "wrapped" by parentheses. We can,
4040         however, count on the LPAR being placed directly before the chosen
4041         string.
4042
4043         In other words, StringParenWrapper creates "atom" strings. These
4044         can then be split again by StringSplitter, if necessary.
4045
4046     Collaborations:
4047         In the event that a string line split by StringParenWrapper is
4048         changed such that it no longer needs to be given its own line,
4049         StringParenWrapper relies on StringParenStripper to clean up the
4050         parentheses it created.
4051     """
4052
4053     def do_splitter_match(self, line: Line) -> TMatchResult:
4054         LL = line.leaves
4055
4056         string_idx = (
4057             self._return_match(LL)
4058             or self._else_match(LL)
4059             or self._assert_match(LL)
4060             or self._assign_match(LL)
4061             or self._dict_match(LL)
4062         )
4063
4064         if string_idx is not None:
4065             string_value = line.leaves[string_idx].value
4066             # If the string has no spaces...
4067             if " " not in string_value:
4068                 # And will still violate the line length limit when split...
4069                 max_string_length = self.line_length - ((line.depth + 1) * 4)
4070                 if len(string_value) > max_string_length:
4071                     # And has no associated custom splits...
4072                     if not self.has_custom_splits(string_value):
4073                         # Then we should NOT put this string on its own line.
4074                         return TErr(
4075                             "We do not wrap long strings in parentheses when the"
4076                             " resultant line would still be over the specified line"
4077                             " length and can't be split further by StringSplitter."
4078                         )
4079             return Ok(string_idx)
4080
4081         return TErr("This line does not contain any non-atomic strings.")
4082
4083     @staticmethod
4084     def _return_match(LL: List[Leaf]) -> Optional[int]:
4085         """
4086         Returns:
4087             string_idx such that @LL[string_idx] is equal to our target (i.e.
4088             matched) string, if this line matches the return/yield statement
4089             requirements listed in the 'Requirements' section of this classes'
4090             docstring.
4091                 OR
4092             None, otherwise.
4093         """
4094         # If this line is apart of a return/yield statement and the first leaf
4095         # contains either the "return" or "yield" keywords...
4096         if parent_type(LL[0]) in [syms.return_stmt, syms.yield_expr] and LL[
4097             0
4098         ].value in ["return", "yield"]:
4099             is_valid_index = is_valid_index_factory(LL)
4100
4101             idx = 2 if is_valid_index(1) and is_empty_par(LL[1]) else 1
4102             # The next visible leaf MUST contain a string...
4103             if is_valid_index(idx) and LL[idx].type == token.STRING:
4104                 return idx
4105
4106         return None
4107
4108     @staticmethod
4109     def _else_match(LL: List[Leaf]) -> Optional[int]:
4110         """
4111         Returns:
4112             string_idx such that @LL[string_idx] is equal to our target (i.e.
4113             matched) string, if this line matches the ternary expression
4114             requirements listed in the 'Requirements' section of this classes'
4115             docstring.
4116                 OR
4117             None, otherwise.
4118         """
4119         # If this line is apart of a ternary expression and the first leaf
4120         # contains the "else" keyword...
4121         if (
4122             parent_type(LL[0]) == syms.test
4123             and LL[0].type == token.NAME
4124             and LL[0].value == "else"
4125         ):
4126             is_valid_index = is_valid_index_factory(LL)
4127
4128             idx = 2 if is_valid_index(1) and is_empty_par(LL[1]) else 1
4129             # The next visible leaf MUST contain a string...
4130             if is_valid_index(idx) and LL[idx].type == token.STRING:
4131                 return idx
4132
4133         return None
4134
4135     @staticmethod
4136     def _assert_match(LL: List[Leaf]) -> Optional[int]:
4137         """
4138         Returns:
4139             string_idx such that @LL[string_idx] is equal to our target (i.e.
4140             matched) string, if this line matches the assert statement
4141             requirements listed in the 'Requirements' section of this classes'
4142             docstring.
4143                 OR
4144             None, otherwise.
4145         """
4146         # If this line is apart of an assert statement and the first leaf
4147         # contains the "assert" keyword...
4148         if parent_type(LL[0]) == syms.assert_stmt and LL[0].value == "assert":
4149             is_valid_index = is_valid_index_factory(LL)
4150
4151             for (i, leaf) in enumerate(LL):
4152                 # We MUST find a comma...
4153                 if leaf.type == token.COMMA:
4154                     idx = i + 2 if is_empty_par(LL[i + 1]) else i + 1
4155
4156                     # That comma MUST be followed by a string...
4157                     if is_valid_index(idx) and LL[idx].type == token.STRING:
4158                         string_idx = idx
4159
4160                         # Skip the string trailer, if one exists.
4161                         string_parser = StringParser()
4162                         idx = string_parser.parse(LL, string_idx)
4163
4164                         # But no more leaves are allowed...
4165                         if not is_valid_index(idx):
4166                             return string_idx
4167
4168         return None
4169
4170     @staticmethod
4171     def _assign_match(LL: List[Leaf]) -> Optional[int]:
4172         """
4173         Returns:
4174             string_idx such that @LL[string_idx] is equal to our target (i.e.
4175             matched) string, if this line matches the assignment statement
4176             requirements listed in the 'Requirements' section of this classes'
4177             docstring.
4178                 OR
4179             None, otherwise.
4180         """
4181         # If this line is apart of an expression statement or is a function
4182         # argument AND the first leaf contains a variable name...
4183         if (
4184             parent_type(LL[0]) in [syms.expr_stmt, syms.argument, syms.power]
4185             and LL[0].type == token.NAME
4186         ):
4187             is_valid_index = is_valid_index_factory(LL)
4188
4189             for (i, leaf) in enumerate(LL):
4190                 # We MUST find either an '=' or '+=' symbol...
4191                 if leaf.type in [token.EQUAL, token.PLUSEQUAL]:
4192                     idx = i + 2 if is_empty_par(LL[i + 1]) else i + 1
4193
4194                     # That symbol MUST be followed by a string...
4195                     if is_valid_index(idx) and LL[idx].type == token.STRING:
4196                         string_idx = idx
4197
4198                         # Skip the string trailer, if one exists.
4199                         string_parser = StringParser()
4200                         idx = string_parser.parse(LL, string_idx)
4201
4202                         # The next leaf MAY be a comma iff this line is apart
4203                         # of a function argument...
4204                         if (
4205                             parent_type(LL[0]) == syms.argument
4206                             and is_valid_index(idx)
4207                             and LL[idx].type == token.COMMA
4208                         ):
4209                             idx += 1
4210
4211                         # But no more leaves are allowed...
4212                         if not is_valid_index(idx):
4213                             return string_idx
4214
4215         return None
4216
4217     @staticmethod
4218     def _dict_match(LL: List[Leaf]) -> Optional[int]:
4219         """
4220         Returns:
4221             string_idx such that @LL[string_idx] is equal to our target (i.e.
4222             matched) string, if this line matches the dictionary key assignment
4223             statement requirements listed in the 'Requirements' section of this
4224             classes' docstring.
4225                 OR
4226             None, otherwise.
4227         """
4228         # If this line is apart of a dictionary key assignment...
4229         if syms.dictsetmaker in [parent_type(LL[0]), parent_type(LL[0].parent)]:
4230             is_valid_index = is_valid_index_factory(LL)
4231
4232             for (i, leaf) in enumerate(LL):
4233                 # We MUST find a colon...
4234                 if leaf.type == token.COLON:
4235                     idx = i + 2 if is_empty_par(LL[i + 1]) else i + 1
4236
4237                     # That colon MUST be followed by a string...
4238                     if is_valid_index(idx) and LL[idx].type == token.STRING:
4239                         string_idx = idx
4240
4241                         # Skip the string trailer, if one exists.
4242                         string_parser = StringParser()
4243                         idx = string_parser.parse(LL, string_idx)
4244
4245                         # That string MAY be followed by a comma...
4246                         if is_valid_index(idx) and LL[idx].type == token.COMMA:
4247                             idx += 1
4248
4249                         # But no more leaves are allowed...
4250                         if not is_valid_index(idx):
4251                             return string_idx
4252
4253         return None
4254
4255     def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]:
4256         LL = line.leaves
4257
4258         is_valid_index = is_valid_index_factory(LL)
4259         insert_str_child = insert_str_child_factory(LL[string_idx])
4260
4261         comma_idx = -1
4262         ends_with_comma = False
4263         if LL[comma_idx].type == token.COMMA:
4264             ends_with_comma = True
4265
4266         leaves_to_steal_comments_from = [LL[string_idx]]
4267         if ends_with_comma:
4268             leaves_to_steal_comments_from.append(LL[comma_idx])
4269
4270         # --- First Line
4271         first_line = line.clone()
4272         left_leaves = LL[:string_idx]
4273
4274         # We have to remember to account for (possibly invisible) LPAR and RPAR
4275         # leaves that already wrapped the target string. If these leaves do
4276         # exist, we will replace them with our own LPAR and RPAR leaves.
4277         old_parens_exist = False
4278         if left_leaves and left_leaves[-1].type == token.LPAR:
4279             old_parens_exist = True
4280             leaves_to_steal_comments_from.append(left_leaves[-1])
4281             left_leaves.pop()
4282
4283         append_leaves(first_line, line, left_leaves)
4284
4285         lpar_leaf = Leaf(token.LPAR, "(")
4286         if old_parens_exist:
4287             replace_child(LL[string_idx - 1], lpar_leaf)
4288         else:
4289             insert_str_child(lpar_leaf)
4290         first_line.append(lpar_leaf)
4291
4292         # We throw inline comments that were originally to the right of the
4293         # target string to the top line. They will now be shown to the right of
4294         # the LPAR.
4295         for leaf in leaves_to_steal_comments_from:
4296             for comment_leaf in line.comments_after(leaf):
4297                 first_line.append(comment_leaf, preformatted=True)
4298
4299         yield Ok(first_line)
4300
4301         # --- Middle (String) Line
4302         # We only need to yield one (possibly too long) string line, since the
4303         # `StringSplitter` will break it down further if necessary.
4304         string_value = LL[string_idx].value
4305         string_line = Line(
4306             depth=line.depth + 1,
4307             inside_brackets=True,
4308             should_explode=line.should_explode,
4309         )
4310         string_leaf = Leaf(token.STRING, string_value)
4311         insert_str_child(string_leaf)
4312         string_line.append(string_leaf)
4313
4314         old_rpar_leaf = None
4315         if is_valid_index(string_idx + 1):
4316             right_leaves = LL[string_idx + 1 :]
4317             if ends_with_comma:
4318                 right_leaves.pop()
4319
4320             if old_parens_exist:
4321                 assert (
4322                     right_leaves and right_leaves[-1].type == token.RPAR
4323                 ), "Apparently, old parentheses do NOT exist?!"
4324                 old_rpar_leaf = right_leaves.pop()
4325
4326             append_leaves(string_line, line, right_leaves)
4327
4328         yield Ok(string_line)
4329
4330         # --- Last Line
4331         last_line = line.clone()
4332         last_line.bracket_tracker = first_line.bracket_tracker
4333
4334         new_rpar_leaf = Leaf(token.RPAR, ")")
4335         if old_rpar_leaf is not None:
4336             replace_child(old_rpar_leaf, new_rpar_leaf)
4337         else:
4338             insert_str_child(new_rpar_leaf)
4339         last_line.append(new_rpar_leaf)
4340
4341         # If the target string ended with a comma, we place this comma to the
4342         # right of the RPAR on the last line.
4343         if ends_with_comma:
4344             comma_leaf = Leaf(token.COMMA, ",")
4345             replace_child(LL[comma_idx], comma_leaf)
4346             last_line.append(comma_leaf)
4347
4348         yield Ok(last_line)
4349
4350
4351 class StringParser:
4352     """
4353     A state machine that aids in parsing a string's "trailer", which can be
4354     either non-existent, an old-style formatting sequence (e.g. `% varX` or `%
4355     (varX, varY)`), or a method-call / attribute access (e.g. `.format(varX,
4356     varY)`).
4357
4358     NOTE: A new StringParser object MUST be instantiated for each string
4359     trailer we need to parse.
4360
4361     Examples:
4362         We shall assume that `line` equals the `Line` object that corresponds
4363         to the following line of python code:
4364         ```
4365         x = "Some {}.".format("String") + some_other_string
4366         ```
4367
4368         Furthermore, we will assume that `string_idx` is some index such that:
4369         ```
4370         assert line.leaves[string_idx].value == "Some {}."
4371         ```
4372
4373         The following code snippet then holds:
4374         ```
4375         string_parser = StringParser()
4376         idx = string_parser.parse(line.leaves, string_idx)
4377         assert line.leaves[idx].type == token.PLUS
4378         ```
4379     """
4380
4381     DEFAULT_TOKEN = -1
4382
4383     # String Parser States
4384     START = 1
4385     DOT = 2
4386     NAME = 3
4387     PERCENT = 4
4388     SINGLE_FMT_ARG = 5
4389     LPAR = 6
4390     RPAR = 7
4391     DONE = 8
4392
4393     # Lookup Table for Next State
4394     _goto: Dict[Tuple[ParserState, NodeType], ParserState] = {
4395         # A string trailer may start with '.' OR '%'.
4396         (START, token.DOT): DOT,
4397         (START, token.PERCENT): PERCENT,
4398         (START, DEFAULT_TOKEN): DONE,
4399         # A '.' MUST be followed by an attribute or method name.
4400         (DOT, token.NAME): NAME,
4401         # A method name MUST be followed by an '(', whereas an attribute name
4402         # is the last symbol in the string trailer.
4403         (NAME, token.LPAR): LPAR,
4404         (NAME, DEFAULT_TOKEN): DONE,
4405         # A '%' symbol can be followed by an '(' or a single argument (e.g. a
4406         # string or variable name).
4407         (PERCENT, token.LPAR): LPAR,
4408         (PERCENT, DEFAULT_TOKEN): SINGLE_FMT_ARG,
4409         # If a '%' symbol is followed by a single argument, that argument is
4410         # the last leaf in the string trailer.
4411         (SINGLE_FMT_ARG, DEFAULT_TOKEN): DONE,
4412         # If present, a ')' symbol is the last symbol in a string trailer.
4413         # (NOTE: LPARS and nested RPARS are not included in this lookup table,
4414         # since they are treated as a special case by the parsing logic in this
4415         # classes' implementation.)
4416         (RPAR, DEFAULT_TOKEN): DONE,
4417     }
4418
4419     def __init__(self) -> None:
4420         self._state = self.START
4421         self._unmatched_lpars = 0
4422
4423     def parse(self, leaves: List[Leaf], string_idx: int) -> int:
4424         """
4425         Pre-conditions:
4426             * @leaves[@string_idx].type == token.STRING
4427
4428         Returns:
4429             The index directly after the last leaf which is apart of the string
4430             trailer, if a "trailer" exists.
4431                 OR
4432             @string_idx + 1, if no string "trailer" exists.
4433         """
4434         assert leaves[string_idx].type == token.STRING
4435
4436         idx = string_idx + 1
4437         while idx < len(leaves) and self._next_state(leaves[idx]):
4438             idx += 1
4439         return idx
4440
4441     def _next_state(self, leaf: Leaf) -> bool:
4442         """
4443         Pre-conditions:
4444             * On the first call to this function, @leaf MUST be the leaf that
4445             was directly after the string leaf in question (e.g. if our target
4446             string is `line.leaves[i]` then the first call to this method must
4447             be `line.leaves[i + 1]`).
4448             * On the next call to this function, the leaf parameter passed in
4449             MUST be the leaf directly following @leaf.
4450
4451         Returns:
4452             True iff @leaf is apart of the string's trailer.
4453         """
4454         # We ignore empty LPAR or RPAR leaves.
4455         if is_empty_par(leaf):
4456             return True
4457
4458         next_token = leaf.type
4459         if next_token == token.LPAR:
4460             self._unmatched_lpars += 1
4461
4462         current_state = self._state
4463
4464         # The LPAR parser state is a special case. We will return True until we
4465         # find the matching RPAR token.
4466         if current_state == self.LPAR:
4467             if next_token == token.RPAR:
4468                 self._unmatched_lpars -= 1
4469                 if self._unmatched_lpars == 0:
4470                     self._state = self.RPAR
4471         # Otherwise, we use a lookup table to determine the next state.
4472         else:
4473             # If the lookup table matches the current state to the next
4474             # token, we use the lookup table.
4475             if (current_state, next_token) in self._goto:
4476                 self._state = self._goto[current_state, next_token]
4477             else:
4478                 # Otherwise, we check if a the current state was assigned a
4479                 # default.
4480                 if (current_state, self.DEFAULT_TOKEN) in self._goto:
4481                     self._state = self._goto[current_state, self.DEFAULT_TOKEN]
4482                 # If no default has been assigned, then this parser has a logic
4483                 # error.
4484                 else:
4485                     raise RuntimeError(f"{self.__class__.__name__} LOGIC ERROR!")
4486
4487             if self._state == self.DONE:
4488                 return False
4489
4490         return True
4491
4492
4493 def TErr(err_msg: str) -> Err[CannotTransform]:
4494     """(T)ransform Err
4495
4496     Convenience function used when working with the TResult type.
4497     """
4498     cant_transform = CannotTransform(err_msg)
4499     return Err(cant_transform)
4500
4501
4502 def contains_pragma_comment(comment_list: List[Leaf]) -> bool:
4503     """
4504     Returns:
4505         True iff one of the comments in @comment_list is a pragma used by one
4506         of the more common static analysis tools for python (e.g. mypy, flake8,
4507         pylint).
4508     """
4509     for comment in comment_list:
4510         if comment.value.startswith(("# type:", "# noqa", "# pylint:")):
4511             return True
4512
4513     return False
4514
4515
4516 def insert_str_child_factory(string_leaf: Leaf) -> Callable[[LN], None]:
4517     """
4518     Factory for a convenience function that is used to orphan @string_leaf
4519     and then insert multiple new leaves into the same part of the node
4520     structure that @string_leaf had originally occupied.
4521
4522     Examples:
4523         Let `string_leaf = Leaf(token.STRING, '"foo"')` and `N =
4524         string_leaf.parent`. Assume the node `N` has the following
4525         original structure:
4526
4527         Node(
4528             expr_stmt, [
4529                 Leaf(NAME, 'x'),
4530                 Leaf(EQUAL, '='),
4531                 Leaf(STRING, '"foo"'),
4532             ]
4533         )
4534
4535         We then run the code snippet shown below.
4536         ```
4537         insert_str_child = insert_str_child_factory(string_leaf)
4538
4539         lpar = Leaf(token.LPAR, '(')
4540         insert_str_child(lpar)
4541
4542         bar = Leaf(token.STRING, '"bar"')
4543         insert_str_child(bar)
4544
4545         rpar = Leaf(token.RPAR, ')')
4546         insert_str_child(rpar)
4547         ```
4548
4549         After which point, it follows that `string_leaf.parent is None` and
4550         the node `N` now has the following structure:
4551
4552         Node(
4553             expr_stmt, [
4554                 Leaf(NAME, 'x'),
4555                 Leaf(EQUAL, '='),
4556                 Leaf(LPAR, '('),
4557                 Leaf(STRING, '"bar"'),
4558                 Leaf(RPAR, ')'),
4559             ]
4560         )
4561     """
4562     string_parent = string_leaf.parent
4563     string_child_idx = string_leaf.remove()
4564
4565     def insert_str_child(child: LN) -> None:
4566         nonlocal string_child_idx
4567
4568         assert string_parent is not None
4569         assert string_child_idx is not None
4570
4571         string_parent.insert_child(string_child_idx, child)
4572         string_child_idx += 1
4573
4574     return insert_str_child
4575
4576
4577 def has_triple_quotes(string: str) -> bool:
4578     """
4579     Returns:
4580         True iff @string starts with three quotation characters.
4581     """
4582     raw_string = string.lstrip(STRING_PREFIX_CHARS)
4583     return raw_string[:3] in {'"""', "'''"}
4584
4585
4586 def parent_type(node: Optional[LN]) -> Optional[NodeType]:
4587     """
4588     Returns:
4589         @node.parent.type, if @node is not None and has a parent.
4590             OR
4591         None, otherwise.
4592     """
4593     if node is None or node.parent is None:
4594         return None
4595
4596     return node.parent.type
4597
4598
4599 def is_empty_par(leaf: Leaf) -> bool:
4600     return is_empty_lpar(leaf) or is_empty_rpar(leaf)
4601
4602
4603 def is_empty_lpar(leaf: Leaf) -> bool:
4604     return leaf.type == token.LPAR and leaf.value == ""
4605
4606
4607 def is_empty_rpar(leaf: Leaf) -> bool:
4608     return leaf.type == token.RPAR and leaf.value == ""
4609
4610
4611 def is_valid_index_factory(seq: Sequence[Any]) -> Callable[[int], bool]:
4612     """
4613     Examples:
4614         ```
4615         my_list = [1, 2, 3]
4616
4617         is_valid_index = is_valid_index_factory(my_list)
4618
4619         assert is_valid_index(0)
4620         assert is_valid_index(2)
4621
4622         assert not is_valid_index(3)
4623         assert not is_valid_index(-1)
4624         ```
4625     """
4626
4627     def is_valid_index(idx: int) -> bool:
4628         """
4629         Returns:
4630             True iff @idx is positive AND seq[@idx] does NOT raise an
4631             IndexError.
4632         """
4633         return 0 <= idx < len(seq)
4634
4635     return is_valid_index
4636
4637
4638 def line_to_string(line: Line) -> str:
4639     """Returns the string representation of @line.
4640
4641     WARNING: This is known to be computationally expensive.
4642     """
4643     return str(line).strip("\n")
4644
4645
4646 def append_leaves(
4647     new_line: Line, old_line: Line, leaves: List[Leaf], preformatted: bool = False
4648 ) -> None:
4649     """
4650     Append leaves (taken from @old_line) to @new_line, making sure to fix the
4651     underlying Node structure where appropriate.
4652
4653     All of the leaves in @leaves are duplicated. The duplicates are then
4654     appended to @new_line and used to replace their originals in the underlying
4655     Node structure. Any comments attached to the old leaves are reattached to
4656     the new leaves.
4657
4658     Pre-conditions:
4659         set(@leaves) is a subset of set(@old_line.leaves).
4660     """
4661     for old_leaf in leaves:
4662         new_leaf = Leaf(old_leaf.type, old_leaf.value)
4663         replace_child(old_leaf, new_leaf)
4664         new_line.append(new_leaf, preformatted=preformatted)
4665
4666         for comment_leaf in old_line.comments_after(old_leaf):
4667             new_line.append(comment_leaf, preformatted=True)
4668
4669
4670 def replace_child(old_child: LN, new_child: LN) -> None:
4671     """
4672     Side Effects:
4673         * If @old_child.parent is set, replace @old_child with @new_child in
4674         @old_child's underlying Node structure.
4675             OR
4676         * Otherwise, this function does nothing.
4677     """
4678     parent = old_child.parent
4679     if not parent:
4680         return
4681
4682     child_idx = old_child.remove()
4683     if child_idx is not None:
4684         parent.insert_child(child_idx, new_child)
4685
4686
4687 def get_string_prefix(string: str) -> str:
4688     """
4689     Pre-conditions:
4690         * assert_is_leaf_string(@string)
4691
4692     Returns:
4693         @string's prefix (e.g. '', 'r', 'f', or 'rf').
4694     """
4695     assert_is_leaf_string(string)
4696
4697     prefix = ""
4698     prefix_idx = 0
4699     while string[prefix_idx] in STRING_PREFIX_CHARS:
4700         prefix += string[prefix_idx].lower()
4701         prefix_idx += 1
4702
4703     return prefix
4704
4705
4706 def assert_is_leaf_string(string: str) -> None:
4707     """
4708     Checks the pre-condition that @string has the format that you would expect
4709     of `leaf.value` where `leaf` is some Leaf such that `leaf.type ==
4710     token.STRING`. A more precise description of the pre-conditions that are
4711     checked are listed below.
4712
4713     Pre-conditions:
4714         * @string starts with either ', ", <prefix>', or <prefix>" where
4715         `set(<prefix>)` is some subset of `set(STRING_PREFIX_CHARS)`.
4716         * @string ends with a quote character (' or ").
4717
4718     Raises:
4719         AssertionError(...) if the pre-conditions listed above are not
4720         satisfied.
4721     """
4722     dquote_idx = string.find('"')
4723     squote_idx = string.find("'")
4724     if -1 in [dquote_idx, squote_idx]:
4725         quote_idx = max(dquote_idx, squote_idx)
4726     else:
4727         quote_idx = min(squote_idx, dquote_idx)
4728
4729     assert (
4730         0 <= quote_idx < len(string) - 1
4731     ), f"{string!r} is missing a starting quote character (' or \")."
4732     assert string[-1] in (
4733         "'",
4734         '"',
4735     ), f"{string!r} is missing an ending quote character (' or \")."
4736     assert set(string[:quote_idx]).issubset(
4737         set(STRING_PREFIX_CHARS)
4738     ), f"{set(string[:quote_idx])} is NOT a subset of {set(STRING_PREFIX_CHARS)}."
4739
4740
4741 def left_hand_split(line: Line, _features: Collection[Feature] = ()) -> Iterator[Line]:
4742     """Split line into many lines, starting with the first matching bracket pair.
4743
4744     Note: this usually looks weird, only use this for function definitions.
4745     Prefer RHS otherwise.  This is why this function is not symmetrical with
4746     :func:`right_hand_split` which also handles optional parentheses.
4747     """
4748     tail_leaves: List[Leaf] = []
4749     body_leaves: List[Leaf] = []
4750     head_leaves: List[Leaf] = []
4751     current_leaves = head_leaves
4752     matching_bracket: Optional[Leaf] = None
4753     for leaf in line.leaves:
4754         if (
4755             current_leaves is body_leaves
4756             and leaf.type in CLOSING_BRACKETS
4757             and leaf.opening_bracket is matching_bracket
4758         ):
4759             current_leaves = tail_leaves if body_leaves else head_leaves
4760         current_leaves.append(leaf)
4761         if current_leaves is head_leaves:
4762             if leaf.type in OPENING_BRACKETS:
4763                 matching_bracket = leaf
4764                 current_leaves = body_leaves
4765     if not matching_bracket:
4766         raise CannotSplit("No brackets found")
4767
4768     head = bracket_split_build_line(head_leaves, line, matching_bracket)
4769     body = bracket_split_build_line(body_leaves, line, matching_bracket, is_body=True)
4770     tail = bracket_split_build_line(tail_leaves, line, matching_bracket)
4771     bracket_split_succeeded_or_raise(head, body, tail)
4772     for result in (head, body, tail):
4773         if result:
4774             yield result
4775
4776
4777 def right_hand_split(
4778     line: Line,
4779     line_length: int,
4780     features: Collection[Feature] = (),
4781     omit: Collection[LeafID] = (),
4782 ) -> Iterator[Line]:
4783     """Split line into many lines, starting with the last matching bracket pair.
4784
4785     If the split was by optional parentheses, attempt splitting without them, too.
4786     `omit` is a collection of closing bracket IDs that shouldn't be considered for
4787     this split.
4788
4789     Note: running this function modifies `bracket_depth` on the leaves of `line`.
4790     """
4791     tail_leaves: List[Leaf] = []
4792     body_leaves: List[Leaf] = []
4793     head_leaves: List[Leaf] = []
4794     current_leaves = tail_leaves
4795     opening_bracket: Optional[Leaf] = None
4796     closing_bracket: Optional[Leaf] = None
4797     for leaf in reversed(line.leaves):
4798         if current_leaves is body_leaves:
4799             if leaf is opening_bracket:
4800                 current_leaves = head_leaves if body_leaves else tail_leaves
4801         current_leaves.append(leaf)
4802         if current_leaves is tail_leaves:
4803             if leaf.type in CLOSING_BRACKETS and id(leaf) not in omit:
4804                 opening_bracket = leaf.opening_bracket
4805                 closing_bracket = leaf
4806                 current_leaves = body_leaves
4807     if not (opening_bracket and closing_bracket and head_leaves):
4808         # If there is no opening or closing_bracket that means the split failed and
4809         # all content is in the tail.  Otherwise, if `head_leaves` are empty, it means
4810         # the matching `opening_bracket` wasn't available on `line` anymore.
4811         raise CannotSplit("No brackets found")
4812
4813     tail_leaves.reverse()
4814     body_leaves.reverse()
4815     head_leaves.reverse()
4816     head = bracket_split_build_line(head_leaves, line, opening_bracket)
4817     body = bracket_split_build_line(body_leaves, line, opening_bracket, is_body=True)
4818     tail = bracket_split_build_line(tail_leaves, line, opening_bracket)
4819     bracket_split_succeeded_or_raise(head, body, tail)
4820     if (
4821         Feature.FORCE_OPTIONAL_PARENTHESES not in features
4822         # the opening bracket is an optional paren
4823         and opening_bracket.type == token.LPAR
4824         and not opening_bracket.value
4825         # the closing bracket is an optional paren
4826         and closing_bracket.type == token.RPAR
4827         and not closing_bracket.value
4828         # it's not an import (optional parens are the only thing we can split on
4829         # in this case; attempting a split without them is a waste of time)
4830         and not line.is_import
4831         # there are no standalone comments in the body
4832         and not body.contains_standalone_comments(0)
4833         # and we can actually remove the parens
4834         and can_omit_invisible_parens(body, line_length, omit_on_explode=omit)
4835     ):
4836         omit = {id(closing_bracket), *omit}
4837         try:
4838             yield from right_hand_split(line, line_length, features=features, omit=omit)
4839             return
4840
4841         except CannotSplit:
4842             if not (
4843                 can_be_split(body)
4844                 or is_line_short_enough(body, line_length=line_length)
4845             ):
4846                 raise CannotSplit(
4847                     "Splitting failed, body is still too long and can't be split."
4848                 )
4849
4850             elif head.contains_multiline_strings() or tail.contains_multiline_strings():
4851                 raise CannotSplit(
4852                     "The current optional pair of parentheses is bound to fail to"
4853                     " satisfy the splitting algorithm because the head or the tail"
4854                     " contains multiline strings which by definition never fit one"
4855                     " line."
4856                 )
4857
4858     ensure_visible(opening_bracket)
4859     ensure_visible(closing_bracket)
4860     for result in (head, body, tail):
4861         if result:
4862             yield result
4863
4864
4865 def bracket_split_succeeded_or_raise(head: Line, body: Line, tail: Line) -> None:
4866     """Raise :exc:`CannotSplit` if the last left- or right-hand split failed.
4867
4868     Do nothing otherwise.
4869
4870     A left- or right-hand split is based on a pair of brackets. Content before
4871     (and including) the opening bracket is left on one line, content inside the
4872     brackets is put on a separate line, and finally content starting with and
4873     following the closing bracket is put on a separate line.
4874
4875     Those are called `head`, `body`, and `tail`, respectively. If the split
4876     produced the same line (all content in `head`) or ended up with an empty `body`
4877     and the `tail` is just the closing bracket, then it's considered failed.
4878     """
4879     tail_len = len(str(tail).strip())
4880     if not body:
4881         if tail_len == 0:
4882             raise CannotSplit("Splitting brackets produced the same line")
4883
4884         elif tail_len < 3:
4885             raise CannotSplit(
4886                 f"Splitting brackets on an empty body to save {tail_len} characters is"
4887                 " not worth it"
4888             )
4889
4890
4891 def bracket_split_build_line(
4892     leaves: List[Leaf], original: Line, opening_bracket: Leaf, *, is_body: bool = False
4893 ) -> Line:
4894     """Return a new line with given `leaves` and respective comments from `original`.
4895
4896     If `is_body` is True, the result line is one-indented inside brackets and as such
4897     has its first leaf's prefix normalized and a trailing comma added when expected.
4898     """
4899     result = Line(depth=original.depth)
4900     if is_body:
4901         result.inside_brackets = True
4902         result.depth += 1
4903         if leaves:
4904             # Since body is a new indent level, remove spurious leading whitespace.
4905             normalize_prefix(leaves[0], inside_brackets=True)
4906             # Ensure a trailing comma for imports and standalone function arguments, but
4907             # be careful not to add one after any comments or within type annotations.
4908             no_commas = (
4909                 original.is_def
4910                 and opening_bracket.value == "("
4911                 and not any(leaf.type == token.COMMA for leaf in leaves)
4912             )
4913
4914             if original.is_import or no_commas:
4915                 for i in range(len(leaves) - 1, -1, -1):
4916                     if leaves[i].type == STANDALONE_COMMENT:
4917                         continue
4918
4919                     if leaves[i].type != token.COMMA:
4920                         new_comma = Leaf(token.COMMA, ",")
4921                         leaves.insert(i + 1, new_comma)
4922                     break
4923
4924     # Populate the line
4925     for leaf in leaves:
4926         result.append(leaf, preformatted=True)
4927         for comment_after in original.comments_after(leaf):
4928             result.append(comment_after, preformatted=True)
4929     if is_body and should_split_body_explode(result, opening_bracket):
4930         result.should_explode = True
4931     return result
4932
4933
4934 def dont_increase_indentation(split_func: Transformer) -> Transformer:
4935     """Normalize prefix of the first leaf in every line returned by `split_func`.
4936
4937     This is a decorator over relevant split functions.
4938     """
4939
4940     @wraps(split_func)
4941     def split_wrapper(line: Line, features: Collection[Feature] = ()) -> Iterator[Line]:
4942         for line in split_func(line, features):
4943             normalize_prefix(line.leaves[0], inside_brackets=True)
4944             yield line
4945
4946     return split_wrapper
4947
4948
4949 @dont_increase_indentation
4950 def delimiter_split(line: Line, features: Collection[Feature] = ()) -> Iterator[Line]:
4951     """Split according to delimiters of the highest priority.
4952
4953     If the appropriate Features are given, the split will add trailing commas
4954     also in function signatures and calls that contain `*` and `**`.
4955     """
4956     try:
4957         last_leaf = line.leaves[-1]
4958     except IndexError:
4959         raise CannotSplit("Line empty")
4960
4961     bt = line.bracket_tracker
4962     try:
4963         delimiter_priority = bt.max_delimiter_priority(exclude={id(last_leaf)})
4964     except ValueError:
4965         raise CannotSplit("No delimiters found")
4966
4967     if delimiter_priority == DOT_PRIORITY:
4968         if bt.delimiter_count_with_priority(delimiter_priority) == 1:
4969             raise CannotSplit("Splitting a single attribute from its owner looks wrong")
4970
4971     current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
4972     lowest_depth = sys.maxsize
4973     trailing_comma_safe = True
4974
4975     def append_to_line(leaf: Leaf) -> Iterator[Line]:
4976         """Append `leaf` to current line or to new line if appending impossible."""
4977         nonlocal current_line
4978         try:
4979             current_line.append_safe(leaf, preformatted=True)
4980         except ValueError:
4981             yield current_line
4982
4983             current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
4984             current_line.append(leaf)
4985
4986     for leaf in line.leaves:
4987         yield from append_to_line(leaf)
4988
4989         for comment_after in line.comments_after(leaf):
4990             yield from append_to_line(comment_after)
4991
4992         lowest_depth = min(lowest_depth, leaf.bracket_depth)
4993         if leaf.bracket_depth == lowest_depth:
4994             if is_vararg(leaf, within={syms.typedargslist}):
4995                 trailing_comma_safe = (
4996                     trailing_comma_safe and Feature.TRAILING_COMMA_IN_DEF in features
4997                 )
4998             elif is_vararg(leaf, within={syms.arglist, syms.argument}):
4999                 trailing_comma_safe = (
5000                     trailing_comma_safe and Feature.TRAILING_COMMA_IN_CALL in features
5001                 )
5002
5003         leaf_priority = bt.delimiters.get(id(leaf))
5004         if leaf_priority == delimiter_priority:
5005             yield current_line
5006
5007             current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
5008     if current_line:
5009         if (
5010             trailing_comma_safe
5011             and delimiter_priority == COMMA_PRIORITY
5012             and current_line.leaves[-1].type != token.COMMA
5013             and current_line.leaves[-1].type != STANDALONE_COMMENT
5014         ):
5015             new_comma = Leaf(token.COMMA, ",")
5016             current_line.append(new_comma)
5017         yield current_line
5018
5019
5020 @dont_increase_indentation
5021 def standalone_comment_split(
5022     line: Line, features: Collection[Feature] = ()
5023 ) -> Iterator[Line]:
5024     """Split standalone comments from the rest of the line."""
5025     if not line.contains_standalone_comments(0):
5026         raise CannotSplit("Line does not have any standalone comments")
5027
5028     current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
5029
5030     def append_to_line(leaf: Leaf) -> Iterator[Line]:
5031         """Append `leaf` to current line or to new line if appending impossible."""
5032         nonlocal current_line
5033         try:
5034             current_line.append_safe(leaf, preformatted=True)
5035         except ValueError:
5036             yield current_line
5037
5038             current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
5039             current_line.append(leaf)
5040
5041     for leaf in line.leaves:
5042         yield from append_to_line(leaf)
5043
5044         for comment_after in line.comments_after(leaf):
5045             yield from append_to_line(comment_after)
5046
5047     if current_line:
5048         yield current_line
5049
5050
5051 def is_import(leaf: Leaf) -> bool:
5052     """Return True if the given leaf starts an import statement."""
5053     p = leaf.parent
5054     t = leaf.type
5055     v = leaf.value
5056     return bool(
5057         t == token.NAME
5058         and (
5059             (v == "import" and p and p.type == syms.import_name)
5060             or (v == "from" and p and p.type == syms.import_from)
5061         )
5062     )
5063
5064
5065 def is_type_comment(leaf: Leaf, suffix: str = "") -> bool:
5066     """Return True if the given leaf is a special comment.
5067     Only returns true for type comments for now."""
5068     t = leaf.type
5069     v = leaf.value
5070     return t in {token.COMMENT, STANDALONE_COMMENT} and v.startswith("# type:" + suffix)
5071
5072
5073 def normalize_prefix(leaf: Leaf, *, inside_brackets: bool) -> None:
5074     """Leave existing extra newlines if not `inside_brackets`. Remove everything
5075     else.
5076
5077     Note: don't use backslashes for formatting or you'll lose your voting rights.
5078     """
5079     if not inside_brackets:
5080         spl = leaf.prefix.split("#")
5081         if "\\" not in spl[0]:
5082             nl_count = spl[-1].count("\n")
5083             if len(spl) > 1:
5084                 nl_count -= 1
5085             leaf.prefix = "\n" * nl_count
5086             return
5087
5088     leaf.prefix = ""
5089
5090
5091 def normalize_string_prefix(leaf: Leaf, remove_u_prefix: bool = False) -> None:
5092     """Make all string prefixes lowercase.
5093
5094     If remove_u_prefix is given, also removes any u prefix from the string.
5095
5096     Note: Mutates its argument.
5097     """
5098     match = re.match(r"^([" + STRING_PREFIX_CHARS + r"]*)(.*)$", leaf.value, re.DOTALL)
5099     assert match is not None, f"failed to match string {leaf.value!r}"
5100     orig_prefix = match.group(1)
5101     new_prefix = orig_prefix.replace("F", "f").replace("B", "b").replace("U", "u")
5102     if remove_u_prefix:
5103         new_prefix = new_prefix.replace("u", "")
5104     leaf.value = f"{new_prefix}{match.group(2)}"
5105
5106
5107 def normalize_string_quotes(leaf: Leaf) -> None:
5108     """Prefer double quotes but only if it doesn't cause more escaping.
5109
5110     Adds or removes backslashes as appropriate. Doesn't parse and fix
5111     strings nested in f-strings (yet).
5112
5113     Note: Mutates its argument.
5114     """
5115     value = leaf.value.lstrip(STRING_PREFIX_CHARS)
5116     if value[:3] == '"""':
5117         return
5118
5119     elif value[:3] == "'''":
5120         orig_quote = "'''"
5121         new_quote = '"""'
5122     elif value[0] == '"':
5123         orig_quote = '"'
5124         new_quote = "'"
5125     else:
5126         orig_quote = "'"
5127         new_quote = '"'
5128     first_quote_pos = leaf.value.find(orig_quote)
5129     if first_quote_pos == -1:
5130         return  # There's an internal error
5131
5132     prefix = leaf.value[:first_quote_pos]
5133     unescaped_new_quote = re.compile(rf"(([^\\]|^)(\\\\)*){new_quote}")
5134     escaped_new_quote = re.compile(rf"([^\\]|^)\\((?:\\\\)*){new_quote}")
5135     escaped_orig_quote = re.compile(rf"([^\\]|^)\\((?:\\\\)*){orig_quote}")
5136     body = leaf.value[first_quote_pos + len(orig_quote) : -len(orig_quote)]
5137     if "r" in prefix.casefold():
5138         if unescaped_new_quote.search(body):
5139             # There's at least one unescaped new_quote in this raw string
5140             # so converting is impossible
5141             return
5142
5143         # Do not introduce or remove backslashes in raw strings
5144         new_body = body
5145     else:
5146         # remove unnecessary escapes
5147         new_body = sub_twice(escaped_new_quote, rf"\1\2{new_quote}", body)
5148         if body != new_body:
5149             # Consider the string without unnecessary escapes as the original
5150             body = new_body
5151             leaf.value = f"{prefix}{orig_quote}{body}{orig_quote}"
5152         new_body = sub_twice(escaped_orig_quote, rf"\1\2{orig_quote}", new_body)
5153         new_body = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_body)
5154     if "f" in prefix.casefold():
5155         matches = re.findall(
5156             r"""
5157             (?:[^{]|^)\{  # start of the string or a non-{ followed by a single {
5158                 ([^{].*?)  # contents of the brackets except if begins with {{
5159             \}(?:[^}]|$)  # A } followed by end of the string or a non-}
5160             """,
5161             new_body,
5162             re.VERBOSE,
5163         )
5164         for m in matches:
5165             if "\\" in str(m):
5166                 # Do not introduce backslashes in interpolated expressions
5167                 return
5168
5169     if new_quote == '"""' and new_body[-1:] == '"':
5170         # edge case:
5171         new_body = new_body[:-1] + '\\"'
5172     orig_escape_count = body.count("\\")
5173     new_escape_count = new_body.count("\\")
5174     if new_escape_count > orig_escape_count:
5175         return  # Do not introduce more escaping
5176
5177     if new_escape_count == orig_escape_count and orig_quote == '"':
5178         return  # Prefer double quotes
5179
5180     leaf.value = f"{prefix}{new_quote}{new_body}{new_quote}"
5181
5182
5183 def normalize_numeric_literal(leaf: Leaf) -> None:
5184     """Normalizes numeric (float, int, and complex) literals.
5185
5186     All letters used in the representation are normalized to lowercase (except
5187     in Python 2 long literals).
5188     """
5189     text = leaf.value.lower()
5190     if text.startswith(("0o", "0b")):
5191         # Leave octal and binary literals alone.
5192         pass
5193     elif text.startswith("0x"):
5194         # Change hex literals to upper case.
5195         before, after = text[:2], text[2:]
5196         text = f"{before}{after.upper()}"
5197     elif "e" in text:
5198         before, after = text.split("e")
5199         sign = ""
5200         if after.startswith("-"):
5201             after = after[1:]
5202             sign = "-"
5203         elif after.startswith("+"):
5204             after = after[1:]
5205         before = format_float_or_int_string(before)
5206         text = f"{before}e{sign}{after}"
5207     elif text.endswith(("j", "l")):
5208         number = text[:-1]
5209         suffix = text[-1]
5210         # Capitalize in "2L" because "l" looks too similar to "1".
5211         if suffix == "l":
5212             suffix = "L"
5213         text = f"{format_float_or_int_string(number)}{suffix}"
5214     else:
5215         text = format_float_or_int_string(text)
5216     leaf.value = text
5217
5218
5219 def format_float_or_int_string(text: str) -> str:
5220     """Formats a float string like "1.0"."""
5221     if "." not in text:
5222         return text
5223
5224     before, after = text.split(".")
5225     return f"{before or 0}.{after or 0}"
5226
5227
5228 def normalize_invisible_parens(node: Node, parens_after: Set[str]) -> None:
5229     """Make existing optional parentheses invisible or create new ones.
5230
5231     `parens_after` is a set of string leaf values immediately after which parens
5232     should be put.
5233
5234     Standardizes on visible parentheses for single-element tuples, and keeps
5235     existing visible parentheses for other tuples and generator expressions.
5236     """
5237     for pc in list_comments(node.prefix, is_endmarker=False):
5238         if pc.value in FMT_OFF:
5239             # This `node` has a prefix with `# fmt: off`, don't mess with parens.
5240             return
5241     check_lpar = False
5242     for index, child in enumerate(list(node.children)):
5243         # Fixes a bug where invisible parens are not properly stripped from
5244         # assignment statements that contain type annotations.
5245         if isinstance(child, Node) and child.type == syms.annassign:
5246             normalize_invisible_parens(child, parens_after=parens_after)
5247
5248         # Add parentheses around long tuple unpacking in assignments.
5249         if (
5250             index == 0
5251             and isinstance(child, Node)
5252             and child.type == syms.testlist_star_expr
5253         ):
5254             check_lpar = True
5255
5256         if check_lpar:
5257             if is_walrus_assignment(child):
5258                 pass
5259
5260             elif child.type == syms.atom:
5261                 if maybe_make_parens_invisible_in_atom(child, parent=node):
5262                     wrap_in_parentheses(node, child, visible=False)
5263             elif is_one_tuple(child):
5264                 wrap_in_parentheses(node, child, visible=True)
5265             elif node.type == syms.import_from:
5266                 # "import from" nodes store parentheses directly as part of
5267                 # the statement
5268                 if child.type == token.LPAR:
5269                     # make parentheses invisible
5270                     child.value = ""  # type: ignore
5271                     node.children[-1].value = ""  # type: ignore
5272                 elif child.type != token.STAR:
5273                     # insert invisible parentheses
5274                     node.insert_child(index, Leaf(token.LPAR, ""))
5275                     node.append_child(Leaf(token.RPAR, ""))
5276                 break
5277
5278             elif not (isinstance(child, Leaf) and is_multiline_string(child)):
5279                 wrap_in_parentheses(node, child, visible=False)
5280
5281         check_lpar = isinstance(child, Leaf) and child.value in parens_after
5282
5283
5284 def normalize_fmt_off(node: Node) -> None:
5285     """Convert content between `# fmt: off`/`# fmt: on` into standalone comments."""
5286     try_again = True
5287     while try_again:
5288         try_again = convert_one_fmt_off_pair(node)
5289
5290
5291 def convert_one_fmt_off_pair(node: Node) -> bool:
5292     """Convert content of a single `# fmt: off`/`# fmt: on` into a standalone comment.
5293
5294     Returns True if a pair was converted.
5295     """
5296     for leaf in node.leaves():
5297         previous_consumed = 0
5298         for comment in list_comments(leaf.prefix, is_endmarker=False):
5299             if comment.value in FMT_OFF:
5300                 # We only want standalone comments. If there's no previous leaf or
5301                 # the previous leaf is indentation, it's a standalone comment in
5302                 # disguise.
5303                 if comment.type != STANDALONE_COMMENT:
5304                     prev = preceding_leaf(leaf)
5305                     if prev and prev.type not in WHITESPACE:
5306                         continue
5307
5308                 ignored_nodes = list(generate_ignored_nodes(leaf))
5309                 if not ignored_nodes:
5310                     continue
5311
5312                 first = ignored_nodes[0]  # Can be a container node with the `leaf`.
5313                 parent = first.parent
5314                 prefix = first.prefix
5315                 first.prefix = prefix[comment.consumed :]
5316                 hidden_value = (
5317                     comment.value + "\n" + "".join(str(n) for n in ignored_nodes)
5318                 )
5319                 if hidden_value.endswith("\n"):
5320                     # That happens when one of the `ignored_nodes` ended with a NEWLINE
5321                     # leaf (possibly followed by a DEDENT).
5322                     hidden_value = hidden_value[:-1]
5323                 first_idx: Optional[int] = None
5324                 for ignored in ignored_nodes:
5325                     index = ignored.remove()
5326                     if first_idx is None:
5327                         first_idx = index
5328                 assert parent is not None, "INTERNAL ERROR: fmt: on/off handling (1)"
5329                 assert first_idx is not None, "INTERNAL ERROR: fmt: on/off handling (2)"
5330                 parent.insert_child(
5331                     first_idx,
5332                     Leaf(
5333                         STANDALONE_COMMENT,
5334                         hidden_value,
5335                         prefix=prefix[:previous_consumed] + "\n" * comment.newlines,
5336                     ),
5337                 )
5338                 return True
5339
5340             previous_consumed = comment.consumed
5341
5342     return False
5343
5344
5345 def generate_ignored_nodes(leaf: Leaf) -> Iterator[LN]:
5346     """Starting from the container of `leaf`, generate all leaves until `# fmt: on`.
5347
5348     Stops at the end of the block.
5349     """
5350     container: Optional[LN] = container_of(leaf)
5351     while container is not None and container.type != token.ENDMARKER:
5352         if is_fmt_on(container):
5353             return
5354
5355         # fix for fmt: on in children
5356         if contains_fmt_on_at_column(container, leaf.column):
5357             for child in container.children:
5358                 if contains_fmt_on_at_column(child, leaf.column):
5359                     return
5360                 yield child
5361         else:
5362             yield container
5363             container = container.next_sibling
5364
5365
5366 def is_fmt_on(container: LN) -> bool:
5367     """Determine whether formatting is switched on within a container.
5368     Determined by whether the last `# fmt:` comment is `on` or `off`.
5369     """
5370     fmt_on = False
5371     for comment in list_comments(container.prefix, is_endmarker=False):
5372         if comment.value in FMT_ON:
5373             fmt_on = True
5374         elif comment.value in FMT_OFF:
5375             fmt_on = False
5376     return fmt_on
5377
5378
5379 def contains_fmt_on_at_column(container: LN, column: int) -> bool:
5380     """Determine if children at a given column have formatting switched on."""
5381     for child in container.children:
5382         if (
5383             isinstance(child, Node)
5384             and first_leaf_column(child) == column
5385             or isinstance(child, Leaf)
5386             and child.column == column
5387         ):
5388             if is_fmt_on(child):
5389                 return True
5390
5391     return False
5392
5393
5394 def first_leaf_column(node: Node) -> Optional[int]:
5395     """Returns the column of the first leaf child of a node."""
5396     for child in node.children:
5397         if isinstance(child, Leaf):
5398             return child.column
5399     return None
5400
5401
5402 def maybe_make_parens_invisible_in_atom(node: LN, parent: LN) -> bool:
5403     """If it's safe, make the parens in the atom `node` invisible, recursively.
5404     Additionally, remove repeated, adjacent invisible parens from the atom `node`
5405     as they are redundant.
5406
5407     Returns whether the node should itself be wrapped in invisible parentheses.
5408
5409     """
5410     if (
5411         node.type != syms.atom
5412         or is_empty_tuple(node)
5413         or is_one_tuple(node)
5414         or (is_yield(node) and parent.type != syms.expr_stmt)
5415         or max_delimiter_priority_in_atom(node) >= COMMA_PRIORITY
5416     ):
5417         return False
5418
5419     first = node.children[0]
5420     last = node.children[-1]
5421     if first.type == token.LPAR and last.type == token.RPAR:
5422         middle = node.children[1]
5423         # make parentheses invisible
5424         first.value = ""  # type: ignore
5425         last.value = ""  # type: ignore
5426         maybe_make_parens_invisible_in_atom(middle, parent=parent)
5427
5428         if is_atom_with_invisible_parens(middle):
5429             # Strip the invisible parens from `middle` by replacing
5430             # it with the child in-between the invisible parens
5431             middle.replace(middle.children[1])
5432
5433         return False
5434
5435     return True
5436
5437
5438 def is_atom_with_invisible_parens(node: LN) -> bool:
5439     """Given a `LN`, determines whether it's an atom `node` with invisible
5440     parens. Useful in dedupe-ing and normalizing parens.
5441     """
5442     if isinstance(node, Leaf) or node.type != syms.atom:
5443         return False
5444
5445     first, last = node.children[0], node.children[-1]
5446     return (
5447         isinstance(first, Leaf)
5448         and first.type == token.LPAR
5449         and first.value == ""
5450         and isinstance(last, Leaf)
5451         and last.type == token.RPAR
5452         and last.value == ""
5453     )
5454
5455
5456 def is_empty_tuple(node: LN) -> bool:
5457     """Return True if `node` holds an empty tuple."""
5458     return (
5459         node.type == syms.atom
5460         and len(node.children) == 2
5461         and node.children[0].type == token.LPAR
5462         and node.children[1].type == token.RPAR
5463     )
5464
5465
5466 def unwrap_singleton_parenthesis(node: LN) -> Optional[LN]:
5467     """Returns `wrapped` if `node` is of the shape ( wrapped ).
5468
5469     Parenthesis can be optional. Returns None otherwise"""
5470     if len(node.children) != 3:
5471         return None
5472
5473     lpar, wrapped, rpar = node.children
5474     if not (lpar.type == token.LPAR and rpar.type == token.RPAR):
5475         return None
5476
5477     return wrapped
5478
5479
5480 def wrap_in_parentheses(parent: Node, child: LN, *, visible: bool = True) -> None:
5481     """Wrap `child` in parentheses.
5482
5483     This replaces `child` with an atom holding the parentheses and the old
5484     child.  That requires moving the prefix.
5485
5486     If `visible` is False, the leaves will be valueless (and thus invisible).
5487     """
5488     lpar = Leaf(token.LPAR, "(" if visible else "")
5489     rpar = Leaf(token.RPAR, ")" if visible else "")
5490     prefix = child.prefix
5491     child.prefix = ""
5492     index = child.remove() or 0
5493     new_child = Node(syms.atom, [lpar, child, rpar])
5494     new_child.prefix = prefix
5495     parent.insert_child(index, new_child)
5496
5497
5498 def is_one_tuple(node: LN) -> bool:
5499     """Return True if `node` holds a tuple with one element, with or without parens."""
5500     if node.type == syms.atom:
5501         gexp = unwrap_singleton_parenthesis(node)
5502         if gexp is None or gexp.type != syms.testlist_gexp:
5503             return False
5504
5505         return len(gexp.children) == 2 and gexp.children[1].type == token.COMMA
5506
5507     return (
5508         node.type in IMPLICIT_TUPLE
5509         and len(node.children) == 2
5510         and node.children[1].type == token.COMMA
5511     )
5512
5513
5514 def is_walrus_assignment(node: LN) -> bool:
5515     """Return True iff `node` is of the shape ( test := test )"""
5516     inner = unwrap_singleton_parenthesis(node)
5517     return inner is not None and inner.type == syms.namedexpr_test
5518
5519
5520 def is_simple_decorator_trailer(node: LN, last: bool = False) -> bool:
5521     """Return True iff `node` is a trailer valid in a simple decorator"""
5522     return node.type == syms.trailer and (
5523         (
5524             len(node.children) == 2
5525             and node.children[0].type == token.DOT
5526             and node.children[1].type == token.NAME
5527         )
5528         # last trailer can be arguments
5529         or (
5530             last
5531             and len(node.children) == 3
5532             and node.children[0].type == token.LPAR
5533             # and node.children[1].type == syms.argument
5534             and node.children[2].type == token.RPAR
5535         )
5536     )
5537
5538
5539 def is_simple_decorator_expression(node: LN) -> bool:
5540     """Return True iff `node` could be a 'dotted name' decorator
5541
5542     This function takes the node of the 'namedexpr_test' of the new decorator
5543     grammar and test if it would be valid under the old decorator grammar.
5544
5545     The old grammar was: decorator: @ dotted_name [arguments] NEWLINE
5546     The new grammar is : decorator: @ namedexpr_test NEWLINE
5547     """
5548     if node.type == token.NAME:
5549         return True
5550     if node.type == syms.power:
5551         if node.children:
5552             return (
5553                 node.children[0].type == token.NAME
5554                 and all(map(is_simple_decorator_trailer, node.children[1:-1]))
5555                 and (
5556                     len(node.children) < 2
5557                     or is_simple_decorator_trailer(node.children[-1], last=True)
5558                 )
5559             )
5560     return False
5561
5562
5563 def is_yield(node: LN) -> bool:
5564     """Return True if `node` holds a `yield` or `yield from` expression."""
5565     if node.type == syms.yield_expr:
5566         return True
5567
5568     if node.type == token.NAME and node.value == "yield":  # type: ignore
5569         return True
5570
5571     if node.type != syms.atom:
5572         return False
5573
5574     if len(node.children) != 3:
5575         return False
5576
5577     lpar, expr, rpar = node.children
5578     if lpar.type == token.LPAR and rpar.type == token.RPAR:
5579         return is_yield(expr)
5580
5581     return False
5582
5583
5584 def is_vararg(leaf: Leaf, within: Set[NodeType]) -> bool:
5585     """Return True if `leaf` is a star or double star in a vararg or kwarg.
5586
5587     If `within` includes VARARGS_PARENTS, this applies to function signatures.
5588     If `within` includes UNPACKING_PARENTS, it applies to right hand-side
5589     extended iterable unpacking (PEP 3132) and additional unpacking
5590     generalizations (PEP 448).
5591     """
5592     if leaf.type not in VARARGS_SPECIALS or not leaf.parent:
5593         return False
5594
5595     p = leaf.parent
5596     if p.type == syms.star_expr:
5597         # Star expressions are also used as assignment targets in extended
5598         # iterable unpacking (PEP 3132).  See what its parent is instead.
5599         if not p.parent:
5600             return False
5601
5602         p = p.parent
5603
5604     return p.type in within
5605
5606
5607 def is_multiline_string(leaf: Leaf) -> bool:
5608     """Return True if `leaf` is a multiline string that actually spans many lines."""
5609     return has_triple_quotes(leaf.value) and "\n" in leaf.value
5610
5611
5612 def is_stub_suite(node: Node) -> bool:
5613     """Return True if `node` is a suite with a stub body."""
5614     if (
5615         len(node.children) != 4
5616         or node.children[0].type != token.NEWLINE
5617         or node.children[1].type != token.INDENT
5618         or node.children[3].type != token.DEDENT
5619     ):
5620         return False
5621
5622     return is_stub_body(node.children[2])
5623
5624
5625 def is_stub_body(node: LN) -> bool:
5626     """Return True if `node` is a simple statement containing an ellipsis."""
5627     if not isinstance(node, Node) or node.type != syms.simple_stmt:
5628         return False
5629
5630     if len(node.children) != 2:
5631         return False
5632
5633     child = node.children[0]
5634     return (
5635         child.type == syms.atom
5636         and len(child.children) == 3
5637         and all(leaf == Leaf(token.DOT, ".") for leaf in child.children)
5638     )
5639
5640
5641 def max_delimiter_priority_in_atom(node: LN) -> Priority:
5642     """Return maximum delimiter priority inside `node`.
5643
5644     This is specific to atoms with contents contained in a pair of parentheses.
5645     If `node` isn't an atom or there are no enclosing parentheses, returns 0.
5646     """
5647     if node.type != syms.atom:
5648         return 0
5649
5650     first = node.children[0]
5651     last = node.children[-1]
5652     if not (first.type == token.LPAR and last.type == token.RPAR):
5653         return 0
5654
5655     bt = BracketTracker()
5656     for c in node.children[1:-1]:
5657         if isinstance(c, Leaf):
5658             bt.mark(c)
5659         else:
5660             for leaf in c.leaves():
5661                 bt.mark(leaf)
5662     try:
5663         return bt.max_delimiter_priority()
5664
5665     except ValueError:
5666         return 0
5667
5668
5669 def ensure_visible(leaf: Leaf) -> None:
5670     """Make sure parentheses are visible.
5671
5672     They could be invisible as part of some statements (see
5673     :func:`normalize_invisible_parens` and :func:`visit_import_from`).
5674     """
5675     if leaf.type == token.LPAR:
5676         leaf.value = "("
5677     elif leaf.type == token.RPAR:
5678         leaf.value = ")"
5679
5680
5681 def should_split_body_explode(line: Line, opening_bracket: Leaf) -> bool:
5682     """Should `line` be immediately split with `delimiter_split()` after RHS?"""
5683
5684     if not (opening_bracket.parent and opening_bracket.value in "[{("):
5685         return False
5686
5687     # We're essentially checking if the body is delimited by commas and there's more
5688     # than one of them (we're excluding the trailing comma and if the delimiter priority
5689     # is still commas, that means there's more).
5690     exclude = set()
5691     trailing_comma = False
5692     try:
5693         last_leaf = line.leaves[-1]
5694         if last_leaf.type == token.COMMA:
5695             trailing_comma = True
5696             exclude.add(id(last_leaf))
5697         max_priority = line.bracket_tracker.max_delimiter_priority(exclude=exclude)
5698     except (IndexError, ValueError):
5699         return False
5700
5701     return max_priority == COMMA_PRIORITY and (
5702         trailing_comma
5703         # always explode imports
5704         or opening_bracket.parent.type in {syms.atom, syms.import_from}
5705     )
5706
5707
5708 def is_one_tuple_between(opening: Leaf, closing: Leaf, leaves: List[Leaf]) -> bool:
5709     """Return True if content between `opening` and `closing` looks like a one-tuple."""
5710     if opening.type != token.LPAR and closing.type != token.RPAR:
5711         return False
5712
5713     depth = closing.bracket_depth + 1
5714     for _opening_index, leaf in enumerate(leaves):
5715         if leaf is opening:
5716             break
5717
5718     else:
5719         raise LookupError("Opening paren not found in `leaves`")
5720
5721     commas = 0
5722     _opening_index += 1
5723     for leaf in leaves[_opening_index:]:
5724         if leaf is closing:
5725             break
5726
5727         bracket_depth = leaf.bracket_depth
5728         if bracket_depth == depth and leaf.type == token.COMMA:
5729             commas += 1
5730             if leaf.parent and leaf.parent.type in {
5731                 syms.arglist,
5732                 syms.typedargslist,
5733             }:
5734                 commas += 1
5735                 break
5736
5737     return commas < 2
5738
5739
5740 def get_features_used(node: Node) -> Set[Feature]:
5741     """Return a set of (relatively) new Python features used in this file.
5742
5743     Currently looking for:
5744     - f-strings;
5745     - underscores in numeric literals;
5746     - trailing commas after * or ** in function signatures and calls;
5747     - positional only arguments in function signatures and lambdas;
5748     - assignment expression;
5749     - relaxed decorator syntax;
5750     """
5751     features: Set[Feature] = set()
5752     for n in node.pre_order():
5753         if n.type == token.STRING:
5754             value_head = n.value[:2]  # type: ignore
5755             if value_head in {'f"', 'F"', "f'", "F'", "rf", "fr", "RF", "FR"}:
5756                 features.add(Feature.F_STRINGS)
5757
5758         elif n.type == token.NUMBER:
5759             if "_" in n.value:  # type: ignore
5760                 features.add(Feature.NUMERIC_UNDERSCORES)
5761
5762         elif n.type == token.SLASH:
5763             if n.parent and n.parent.type in {syms.typedargslist, syms.arglist}:
5764                 features.add(Feature.POS_ONLY_ARGUMENTS)
5765
5766         elif n.type == token.COLONEQUAL:
5767             features.add(Feature.ASSIGNMENT_EXPRESSIONS)
5768
5769         elif n.type == syms.decorator:
5770             if len(n.children) > 1 and not is_simple_decorator_expression(
5771                 n.children[1]
5772             ):
5773                 features.add(Feature.RELAXED_DECORATORS)
5774
5775         elif (
5776             n.type in {syms.typedargslist, syms.arglist}
5777             and n.children
5778             and n.children[-1].type == token.COMMA
5779         ):
5780             if n.type == syms.typedargslist:
5781                 feature = Feature.TRAILING_COMMA_IN_DEF
5782             else:
5783                 feature = Feature.TRAILING_COMMA_IN_CALL
5784
5785             for ch in n.children:
5786                 if ch.type in STARS:
5787                     features.add(feature)
5788
5789                 if ch.type == syms.argument:
5790                     for argch in ch.children:
5791                         if argch.type in STARS:
5792                             features.add(feature)
5793
5794     return features
5795
5796
5797 def detect_target_versions(node: Node) -> Set[TargetVersion]:
5798     """Detect the version to target based on the nodes used."""
5799     features = get_features_used(node)
5800     return {
5801         version for version in TargetVersion if features <= VERSION_TO_FEATURES[version]
5802     }
5803
5804
5805 def generate_trailers_to_omit(line: Line, line_length: int) -> Iterator[Set[LeafID]]:
5806     """Generate sets of closing bracket IDs that should be omitted in a RHS.
5807
5808     Brackets can be omitted if the entire trailer up to and including
5809     a preceding closing bracket fits in one line.
5810
5811     Yielded sets are cumulative (contain results of previous yields, too).  First
5812     set is empty, unless the line should explode, in which case bracket pairs until
5813     the one that needs to explode are omitted.
5814     """
5815
5816     omit: Set[LeafID] = set()
5817     if not line.should_explode:
5818         yield omit
5819
5820     length = 4 * line.depth
5821     opening_bracket: Optional[Leaf] = None
5822     closing_bracket: Optional[Leaf] = None
5823     inner_brackets: Set[LeafID] = set()
5824     for index, leaf, leaf_length in enumerate_with_length(line, reversed=True):
5825         length += leaf_length
5826         if length > line_length:
5827             break
5828
5829         has_inline_comment = leaf_length > len(leaf.value) + len(leaf.prefix)
5830         if leaf.type == STANDALONE_COMMENT or has_inline_comment:
5831             break
5832
5833         if opening_bracket:
5834             if leaf is opening_bracket:
5835                 opening_bracket = None
5836             elif leaf.type in CLOSING_BRACKETS:
5837                 prev = line.leaves[index - 1] if index > 0 else None
5838                 if (
5839                     line.should_explode
5840                     and prev
5841                     and prev.type == token.COMMA
5842                     and not is_one_tuple_between(
5843                         leaf.opening_bracket, leaf, line.leaves
5844                     )
5845                 ):
5846                     # Never omit bracket pairs with trailing commas.
5847                     # We need to explode on those.
5848                     break
5849
5850                 inner_brackets.add(id(leaf))
5851         elif leaf.type in CLOSING_BRACKETS:
5852             prev = line.leaves[index - 1] if index > 0 else None
5853             if prev and prev.type in OPENING_BRACKETS:
5854                 # Empty brackets would fail a split so treat them as "inner"
5855                 # brackets (e.g. only add them to the `omit` set if another
5856                 # pair of brackets was good enough.
5857                 inner_brackets.add(id(leaf))
5858                 continue
5859
5860             if closing_bracket:
5861                 omit.add(id(closing_bracket))
5862                 omit.update(inner_brackets)
5863                 inner_brackets.clear()
5864                 yield omit
5865
5866             if (
5867                 line.should_explode
5868                 and prev
5869                 and prev.type == token.COMMA
5870                 and not is_one_tuple_between(leaf.opening_bracket, leaf, line.leaves)
5871             ):
5872                 # Never omit bracket pairs with trailing commas.
5873                 # We need to explode on those.
5874                 break
5875
5876             if leaf.value:
5877                 opening_bracket = leaf.opening_bracket
5878                 closing_bracket = leaf
5879
5880
5881 def get_future_imports(node: Node) -> Set[str]:
5882     """Return a set of __future__ imports in the file."""
5883     imports: Set[str] = set()
5884
5885     def get_imports_from_children(children: List[LN]) -> Generator[str, None, None]:
5886         for child in children:
5887             if isinstance(child, Leaf):
5888                 if child.type == token.NAME:
5889                     yield child.value
5890
5891             elif child.type == syms.import_as_name:
5892                 orig_name = child.children[0]
5893                 assert isinstance(orig_name, Leaf), "Invalid syntax parsing imports"
5894                 assert orig_name.type == token.NAME, "Invalid syntax parsing imports"
5895                 yield orig_name.value
5896
5897             elif child.type == syms.import_as_names:
5898                 yield from get_imports_from_children(child.children)
5899
5900             else:
5901                 raise AssertionError("Invalid syntax parsing imports")
5902
5903     for child in node.children:
5904         if child.type != syms.simple_stmt:
5905             break
5906
5907         first_child = child.children[0]
5908         if isinstance(first_child, Leaf):
5909             # Continue looking if we see a docstring; otherwise stop.
5910             if (
5911                 len(child.children) == 2
5912                 and first_child.type == token.STRING
5913                 and child.children[1].type == token.NEWLINE
5914             ):
5915                 continue
5916
5917             break
5918
5919         elif first_child.type == syms.import_from:
5920             module_name = first_child.children[1]
5921             if not isinstance(module_name, Leaf) or module_name.value != "__future__":
5922                 break
5923
5924             imports |= set(get_imports_from_children(first_child.children[3:]))
5925         else:
5926             break
5927
5928     return imports
5929
5930
5931 @lru_cache()
5932 def get_gitignore(root: Path) -> PathSpec:
5933     """ Return a PathSpec matching gitignore content if present."""
5934     gitignore = root / ".gitignore"
5935     lines: List[str] = []
5936     if gitignore.is_file():
5937         with gitignore.open() as gf:
5938             lines = gf.readlines()
5939     return PathSpec.from_lines("gitwildmatch", lines)
5940
5941
5942 def normalize_path_maybe_ignore(
5943     path: Path, root: Path, report: "Report"
5944 ) -> Optional[str]:
5945     """Normalize `path`. May return `None` if `path` was ignored.
5946
5947     `report` is where "path ignored" output goes.
5948     """
5949     try:
5950         abspath = path if path.is_absolute() else Path.cwd() / path
5951         normalized_path = abspath.resolve().relative_to(root).as_posix()
5952     except OSError as e:
5953         report.path_ignored(path, f"cannot be read because {e}")
5954         return None
5955
5956     except ValueError:
5957         if path.is_symlink():
5958             report.path_ignored(path, f"is a symbolic link that points outside {root}")
5959             return None
5960
5961         raise
5962
5963     return normalized_path
5964
5965
5966 def gen_python_files(
5967     paths: Iterable[Path],
5968     root: Path,
5969     include: Optional[Pattern[str]],
5970     exclude: Pattern[str],
5971     force_exclude: Optional[Pattern[str]],
5972     report: "Report",
5973     gitignore: PathSpec,
5974 ) -> Iterator[Path]:
5975     """Generate all files under `path` whose paths are not excluded by the
5976     `exclude_regex` or `force_exclude` regexes, but are included by the `include` regex.
5977
5978     Symbolic links pointing outside of the `root` directory are ignored.
5979
5980     `report` is where output about exclusions goes.
5981     """
5982     assert root.is_absolute(), f"INTERNAL ERROR: `root` must be absolute but is {root}"
5983     for child in paths:
5984         normalized_path = normalize_path_maybe_ignore(child, root, report)
5985         if normalized_path is None:
5986             continue
5987
5988         # First ignore files matching .gitignore
5989         if gitignore.match_file(normalized_path):
5990             report.path_ignored(child, "matches the .gitignore file content")
5991             continue
5992
5993         # Then ignore with `--exclude` and `--force-exclude` options.
5994         normalized_path = "/" + normalized_path
5995         if child.is_dir():
5996             normalized_path += "/"
5997
5998         exclude_match = exclude.search(normalized_path) if exclude else None
5999         if exclude_match and exclude_match.group(0):
6000             report.path_ignored(child, "matches the --exclude regular expression")
6001             continue
6002
6003         force_exclude_match = (
6004             force_exclude.search(normalized_path) if force_exclude else None
6005         )
6006         if force_exclude_match and force_exclude_match.group(0):
6007             report.path_ignored(child, "matches the --force-exclude regular expression")
6008             continue
6009
6010         if child.is_dir():
6011             yield from gen_python_files(
6012                 child.iterdir(),
6013                 root,
6014                 include,
6015                 exclude,
6016                 force_exclude,
6017                 report,
6018                 gitignore,
6019             )
6020
6021         elif child.is_file():
6022             include_match = include.search(normalized_path) if include else True
6023             if include_match:
6024                 yield child
6025
6026
6027 @lru_cache()
6028 def find_project_root(srcs: Iterable[str]) -> Path:
6029     """Return a directory containing .git, .hg, or pyproject.toml.
6030
6031     That directory will be a common parent of all files and directories
6032     passed in `srcs`.
6033
6034     If no directory in the tree contains a marker that would specify it's the
6035     project root, the root of the file system is returned.
6036     """
6037     if not srcs:
6038         return Path("/").resolve()
6039
6040     path_srcs = [Path(Path.cwd(), src).resolve() for src in srcs]
6041
6042     # A list of lists of parents for each 'src'. 'src' is included as a
6043     # "parent" of itself if it is a directory
6044     src_parents = [
6045         list(path.parents) + ([path] if path.is_dir() else []) for path in path_srcs
6046     ]
6047
6048     common_base = max(
6049         set.intersection(*(set(parents) for parents in src_parents)),
6050         key=lambda path: path.parts,
6051     )
6052
6053     for directory in (common_base, *common_base.parents):
6054         if (directory / ".git").exists():
6055             return directory
6056
6057         if (directory / ".hg").is_dir():
6058             return directory
6059
6060         if (directory / "pyproject.toml").is_file():
6061             return directory
6062
6063     return directory
6064
6065
6066 @dataclass
6067 class Report:
6068     """Provides a reformatting counter. Can be rendered with `str(report)`."""
6069
6070     check: bool = False
6071     diff: bool = False
6072     quiet: bool = False
6073     verbose: bool = False
6074     change_count: int = 0
6075     same_count: int = 0
6076     failure_count: int = 0
6077
6078     def done(self, src: Path, changed: Changed) -> None:
6079         """Increment the counter for successful reformatting. Write out a message."""
6080         if changed is Changed.YES:
6081             reformatted = "would reformat" if self.check or self.diff else "reformatted"
6082             if self.verbose or not self.quiet:
6083                 out(f"{reformatted} {src}")
6084             self.change_count += 1
6085         else:
6086             if self.verbose:
6087                 if changed is Changed.NO:
6088                     msg = f"{src} already well formatted, good job."
6089                 else:
6090                     msg = f"{src} wasn't modified on disk since last run."
6091                 out(msg, bold=False)
6092             self.same_count += 1
6093
6094     def failed(self, src: Path, message: str) -> None:
6095         """Increment the counter for failed reformatting. Write out a message."""
6096         err(f"error: cannot format {src}: {message}")
6097         self.failure_count += 1
6098
6099     def path_ignored(self, path: Path, message: str) -> None:
6100         if self.verbose:
6101             out(f"{path} ignored: {message}", bold=False)
6102
6103     @property
6104     def return_code(self) -> int:
6105         """Return the exit code that the app should use.
6106
6107         This considers the current state of changed files and failures:
6108         - if there were any failures, return 123;
6109         - if any files were changed and --check is being used, return 1;
6110         - otherwise return 0.
6111         """
6112         # According to http://tldp.org/LDP/abs/html/exitcodes.html starting with
6113         # 126 we have special return codes reserved by the shell.
6114         if self.failure_count:
6115             return 123
6116
6117         elif self.change_count and self.check:
6118             return 1
6119
6120         return 0
6121
6122     def __str__(self) -> str:
6123         """Render a color report of the current state.
6124
6125         Use `click.unstyle` to remove colors.
6126         """
6127         if self.check or self.diff:
6128             reformatted = "would be reformatted"
6129             unchanged = "would be left unchanged"
6130             failed = "would fail to reformat"
6131         else:
6132             reformatted = "reformatted"
6133             unchanged = "left unchanged"
6134             failed = "failed to reformat"
6135         report = []
6136         if self.change_count:
6137             s = "s" if self.change_count > 1 else ""
6138             report.append(
6139                 click.style(f"{self.change_count} file{s} {reformatted}", bold=True)
6140             )
6141         if self.same_count:
6142             s = "s" if self.same_count > 1 else ""
6143             report.append(f"{self.same_count} file{s} {unchanged}")
6144         if self.failure_count:
6145             s = "s" if self.failure_count > 1 else ""
6146             report.append(
6147                 click.style(f"{self.failure_count} file{s} {failed}", fg="red")
6148             )
6149         return ", ".join(report) + "."
6150
6151
6152 def parse_ast(src: str) -> Union[ast.AST, ast3.AST, ast27.AST]:
6153     filename = "<unknown>"
6154     if sys.version_info >= (3, 8):
6155         # TODO: support Python 4+ ;)
6156         for minor_version in range(sys.version_info[1], 4, -1):
6157             try:
6158                 return ast.parse(src, filename, feature_version=(3, minor_version))
6159             except SyntaxError:
6160                 continue
6161     else:
6162         for feature_version in (7, 6):
6163             try:
6164                 return ast3.parse(src, filename, feature_version=feature_version)
6165             except SyntaxError:
6166                 continue
6167
6168     return ast27.parse(src)
6169
6170
6171 def _fixup_ast_constants(
6172     node: Union[ast.AST, ast3.AST, ast27.AST]
6173 ) -> Union[ast.AST, ast3.AST, ast27.AST]:
6174     """Map ast nodes deprecated in 3.8 to Constant."""
6175     if isinstance(node, (ast.Str, ast3.Str, ast27.Str, ast.Bytes, ast3.Bytes)):
6176         return ast.Constant(value=node.s)
6177
6178     if isinstance(node, (ast.Num, ast3.Num, ast27.Num)):
6179         return ast.Constant(value=node.n)
6180
6181     if isinstance(node, (ast.NameConstant, ast3.NameConstant)):
6182         return ast.Constant(value=node.value)
6183
6184     return node
6185
6186
6187 def _stringify_ast(
6188     node: Union[ast.AST, ast3.AST, ast27.AST], depth: int = 0
6189 ) -> Iterator[str]:
6190     """Simple visitor generating strings to compare ASTs by content."""
6191
6192     node = _fixup_ast_constants(node)
6193
6194     yield f"{'  ' * depth}{node.__class__.__name__}("
6195
6196     for field in sorted(node._fields):  # noqa: F402
6197         # TypeIgnore has only one field 'lineno' which breaks this comparison
6198         type_ignore_classes = (ast3.TypeIgnore, ast27.TypeIgnore)
6199         if sys.version_info >= (3, 8):
6200             type_ignore_classes += (ast.TypeIgnore,)
6201         if isinstance(node, type_ignore_classes):
6202             break
6203
6204         try:
6205             value = getattr(node, field)
6206         except AttributeError:
6207             continue
6208
6209         yield f"{'  ' * (depth+1)}{field}="
6210
6211         if isinstance(value, list):
6212             for item in value:
6213                 # Ignore nested tuples within del statements, because we may insert
6214                 # parentheses and they change the AST.
6215                 if (
6216                     field == "targets"
6217                     and isinstance(node, (ast.Delete, ast3.Delete, ast27.Delete))
6218                     and isinstance(item, (ast.Tuple, ast3.Tuple, ast27.Tuple))
6219                 ):
6220                     for item in item.elts:
6221                         yield from _stringify_ast(item, depth + 2)
6222
6223                 elif isinstance(item, (ast.AST, ast3.AST, ast27.AST)):
6224                     yield from _stringify_ast(item, depth + 2)
6225
6226         elif isinstance(value, (ast.AST, ast3.AST, ast27.AST)):
6227             yield from _stringify_ast(value, depth + 2)
6228
6229         else:
6230             # Constant strings may be indented across newlines, if they are
6231             # docstrings; fold spaces after newlines when comparing. Similarly,
6232             # trailing and leading space may be removed.
6233             if (
6234                 isinstance(node, ast.Constant)
6235                 and field == "value"
6236                 and isinstance(value, str)
6237             ):
6238                 normalized = re.sub(r" *\n[ \t]*", "\n", value).strip()
6239             else:
6240                 normalized = value
6241             yield f"{'  ' * (depth+2)}{normalized!r},  # {value.__class__.__name__}"
6242
6243     yield f"{'  ' * depth})  # /{node.__class__.__name__}"
6244
6245
6246 def assert_equivalent(src: str, dst: str) -> None:
6247     """Raise AssertionError if `src` and `dst` aren't equivalent."""
6248     try:
6249         src_ast = parse_ast(src)
6250     except Exception as exc:
6251         raise AssertionError(
6252             "cannot use --safe with this file; failed to parse source file.  AST"
6253             f" error message: {exc}"
6254         )
6255
6256     try:
6257         dst_ast = parse_ast(dst)
6258     except Exception as exc:
6259         log = dump_to_file("".join(traceback.format_tb(exc.__traceback__)), dst)
6260         raise AssertionError(
6261             f"INTERNAL ERROR: Black produced invalid code: {exc}. Please report a bug"
6262             " on https://github.com/psf/black/issues.  This invalid output might be"
6263             f" helpful: {log}"
6264         ) from None
6265
6266     src_ast_str = "\n".join(_stringify_ast(src_ast))
6267     dst_ast_str = "\n".join(_stringify_ast(dst_ast))
6268     if src_ast_str != dst_ast_str:
6269         log = dump_to_file(diff(src_ast_str, dst_ast_str, "src", "dst"))
6270         raise AssertionError(
6271             "INTERNAL ERROR: Black produced code that is not equivalent to the"
6272             " source.  Please report a bug on https://github.com/psf/black/issues. "
6273             f" This diff might be helpful: {log}"
6274         ) from None
6275
6276
6277 def assert_stable(src: str, dst: str, mode: Mode) -> None:
6278     """Raise AssertionError if `dst` reformats differently the second time."""
6279     newdst = format_str(dst, mode=mode)
6280     if dst != newdst:
6281         log = dump_to_file(
6282             str(mode),
6283             diff(src, dst, "source", "first pass"),
6284             diff(dst, newdst, "first pass", "second pass"),
6285         )
6286         raise AssertionError(
6287             "INTERNAL ERROR: Black produced different code on the second pass of the"
6288             " formatter.  Please report a bug on https://github.com/psf/black/issues."
6289             f"  This diff might be helpful: {log}"
6290         ) from None
6291
6292
6293 @mypyc_attr(patchable=True)
6294 def dump_to_file(*output: str) -> str:
6295     """Dump `output` to a temporary file. Return path to the file."""
6296     with tempfile.NamedTemporaryFile(
6297         mode="w", prefix="blk_", suffix=".log", delete=False, encoding="utf8"
6298     ) as f:
6299         for lines in output:
6300             f.write(lines)
6301             if lines and lines[-1] != "\n":
6302                 f.write("\n")
6303     return f.name
6304
6305
6306 @contextmanager
6307 def nullcontext() -> Iterator[None]:
6308     """Return an empty context manager.
6309
6310     To be used like `nullcontext` in Python 3.7.
6311     """
6312     yield
6313
6314
6315 def diff(a: str, b: str, a_name: str, b_name: str) -> str:
6316     """Return a unified diff string between strings `a` and `b`."""
6317     import difflib
6318
6319     a_lines = [line + "\n" for line in a.splitlines()]
6320     b_lines = [line + "\n" for line in b.splitlines()]
6321     return "".join(
6322         difflib.unified_diff(a_lines, b_lines, fromfile=a_name, tofile=b_name, n=5)
6323     )
6324
6325
6326 def cancel(tasks: Iterable["asyncio.Task[Any]"]) -> None:
6327     """asyncio signal handler that cancels all `tasks` and reports to stderr."""
6328     err("Aborted!")
6329     for task in tasks:
6330         task.cancel()
6331
6332
6333 def shutdown(loop: asyncio.AbstractEventLoop) -> None:
6334     """Cancel all pending tasks on `loop`, wait for them, and close the loop."""
6335     try:
6336         if sys.version_info[:2] >= (3, 7):
6337             all_tasks = asyncio.all_tasks
6338         else:
6339             all_tasks = asyncio.Task.all_tasks
6340         # This part is borrowed from asyncio/runners.py in Python 3.7b2.
6341         to_cancel = [task for task in all_tasks(loop) if not task.done()]
6342         if not to_cancel:
6343             return
6344
6345         for task in to_cancel:
6346             task.cancel()
6347         loop.run_until_complete(
6348             asyncio.gather(*to_cancel, loop=loop, return_exceptions=True)
6349         )
6350     finally:
6351         # `concurrent.futures.Future` objects cannot be cancelled once they
6352         # are already running. There might be some when the `shutdown()` happened.
6353         # Silence their logger's spew about the event loop being closed.
6354         cf_logger = logging.getLogger("concurrent.futures")
6355         cf_logger.setLevel(logging.CRITICAL)
6356         loop.close()
6357
6358
6359 def sub_twice(regex: Pattern[str], replacement: str, original: str) -> str:
6360     """Replace `regex` with `replacement` twice on `original`.
6361
6362     This is used by string normalization to perform replaces on
6363     overlapping matches.
6364     """
6365     return regex.sub(replacement, regex.sub(replacement, original))
6366
6367
6368 def re_compile_maybe_verbose(regex: str) -> Pattern[str]:
6369     """Compile a regular expression string in `regex`.
6370
6371     If it contains newlines, use verbose mode.
6372     """
6373     if "\n" in regex:
6374         regex = "(?x)" + regex
6375     compiled: Pattern[str] = re.compile(regex)
6376     return compiled
6377
6378
6379 def enumerate_reversed(sequence: Sequence[T]) -> Iterator[Tuple[Index, T]]:
6380     """Like `reversed(enumerate(sequence))` if that were possible."""
6381     index = len(sequence) - 1
6382     for element in reversed(sequence):
6383         yield (index, element)
6384         index -= 1
6385
6386
6387 def enumerate_with_length(
6388     line: Line, reversed: bool = False
6389 ) -> Iterator[Tuple[Index, Leaf, int]]:
6390     """Return an enumeration of leaves with their length.
6391
6392     Stops prematurely on multiline strings and standalone comments.
6393     """
6394     op = cast(
6395         Callable[[Sequence[Leaf]], Iterator[Tuple[Index, Leaf]]],
6396         enumerate_reversed if reversed else enumerate,
6397     )
6398     for index, leaf in op(line.leaves):
6399         length = len(leaf.prefix) + len(leaf.value)
6400         if "\n" in leaf.value:
6401             return  # Multiline strings, we can't continue.
6402
6403         for comment in line.comments_after(leaf):
6404             length += len(comment.value)
6405
6406         yield index, leaf, length
6407
6408
6409 def is_line_short_enough(line: Line, *, line_length: int, line_str: str = "") -> bool:
6410     """Return True if `line` is no longer than `line_length`.
6411
6412     Uses the provided `line_str` rendering, if any, otherwise computes a new one.
6413     """
6414     if not line_str:
6415         line_str = line_to_string(line)
6416     return (
6417         len(line_str) <= line_length
6418         and "\n" not in line_str  # multiline strings
6419         and not line.contains_standalone_comments()
6420     )
6421
6422
6423 def can_be_split(line: Line) -> bool:
6424     """Return False if the line cannot be split *for sure*.
6425
6426     This is not an exhaustive search but a cheap heuristic that we can use to
6427     avoid some unfortunate formattings (mostly around wrapping unsplittable code
6428     in unnecessary parentheses).
6429     """
6430     leaves = line.leaves
6431     if len(leaves) < 2:
6432         return False
6433
6434     if leaves[0].type == token.STRING and leaves[1].type == token.DOT:
6435         call_count = 0
6436         dot_count = 0
6437         next = leaves[-1]
6438         for leaf in leaves[-2::-1]:
6439             if leaf.type in OPENING_BRACKETS:
6440                 if next.type not in CLOSING_BRACKETS:
6441                     return False
6442
6443                 call_count += 1
6444             elif leaf.type == token.DOT:
6445                 dot_count += 1
6446             elif leaf.type == token.NAME:
6447                 if not (next.type == token.DOT or next.type in OPENING_BRACKETS):
6448                     return False
6449
6450             elif leaf.type not in CLOSING_BRACKETS:
6451                 return False
6452
6453             if dot_count > 1 and call_count > 1:
6454                 return False
6455
6456     return True
6457
6458
6459 def can_omit_invisible_parens(
6460     line: Line,
6461     line_length: int,
6462     omit_on_explode: Collection[LeafID] = (),
6463 ) -> bool:
6464     """Does `line` have a shape safe to reformat without optional parens around it?
6465
6466     Returns True for only a subset of potentially nice looking formattings but
6467     the point is to not return false positives that end up producing lines that
6468     are too long.
6469     """
6470     bt = line.bracket_tracker
6471     if not bt.delimiters:
6472         # Without delimiters the optional parentheses are useless.
6473         return True
6474
6475     max_priority = bt.max_delimiter_priority()
6476     if bt.delimiter_count_with_priority(max_priority) > 1:
6477         # With more than one delimiter of a kind the optional parentheses read better.
6478         return False
6479
6480     if max_priority == DOT_PRIORITY:
6481         # A single stranded method call doesn't require optional parentheses.
6482         return True
6483
6484     assert len(line.leaves) >= 2, "Stranded delimiter"
6485
6486     # With a single delimiter, omit if the expression starts or ends with
6487     # a bracket.
6488     first = line.leaves[0]
6489     second = line.leaves[1]
6490     if first.type in OPENING_BRACKETS and second.type not in CLOSING_BRACKETS:
6491         if _can_omit_opening_paren(line, first=first, line_length=line_length):
6492             return True
6493
6494         # Note: we are not returning False here because a line might have *both*
6495         # a leading opening bracket and a trailing closing bracket.  If the
6496         # opening bracket doesn't match our rule, maybe the closing will.
6497
6498     penultimate = line.leaves[-2]
6499     last = line.leaves[-1]
6500     if line.should_explode:
6501         try:
6502             penultimate, last = last_two_except(line.leaves, omit=omit_on_explode)
6503         except LookupError:
6504             # Turns out we'd omit everything.  We cannot skip the optional parentheses.
6505             return False
6506
6507     if (
6508         last.type == token.RPAR
6509         or last.type == token.RBRACE
6510         or (
6511             # don't use indexing for omitting optional parentheses;
6512             # it looks weird
6513             last.type == token.RSQB
6514             and last.parent
6515             and last.parent.type != syms.trailer
6516         )
6517     ):
6518         if penultimate.type in OPENING_BRACKETS:
6519             # Empty brackets don't help.
6520             return False
6521
6522         if is_multiline_string(first):
6523             # Additional wrapping of a multiline string in this situation is
6524             # unnecessary.
6525             return True
6526
6527         if line.should_explode and penultimate.type == token.COMMA:
6528             # The rightmost non-omitted bracket pair is the one we want to explode on.
6529             return True
6530
6531         if _can_omit_closing_paren(line, last=last, line_length=line_length):
6532             return True
6533
6534     return False
6535
6536
6537 def _can_omit_opening_paren(line: Line, *, first: Leaf, line_length: int) -> bool:
6538     """See `can_omit_invisible_parens`."""
6539     remainder = False
6540     length = 4 * line.depth
6541     _index = -1
6542     for _index, leaf, leaf_length in enumerate_with_length(line):
6543         if leaf.type in CLOSING_BRACKETS and leaf.opening_bracket is first:
6544             remainder = True
6545         if remainder:
6546             length += leaf_length
6547             if length > line_length:
6548                 break
6549
6550             if leaf.type in OPENING_BRACKETS:
6551                 # There are brackets we can further split on.
6552                 remainder = False
6553
6554     else:
6555         # checked the entire string and line length wasn't exceeded
6556         if len(line.leaves) == _index + 1:
6557             return True
6558
6559     return False
6560
6561
6562 def _can_omit_closing_paren(line: Line, *, last: Leaf, line_length: int) -> bool:
6563     """See `can_omit_invisible_parens`."""
6564     length = 4 * line.depth
6565     seen_other_brackets = False
6566     for _index, leaf, leaf_length in enumerate_with_length(line):
6567         length += leaf_length
6568         if leaf is last.opening_bracket:
6569             if seen_other_brackets or length <= line_length:
6570                 return True
6571
6572         elif leaf.type in OPENING_BRACKETS:
6573             # There are brackets we can further split on.
6574             seen_other_brackets = True
6575
6576     return False
6577
6578
6579 def last_two_except(leaves: List[Leaf], omit: Collection[LeafID]) -> Tuple[Leaf, Leaf]:
6580     """Return (penultimate, last) leaves skipping brackets in `omit` and contents."""
6581     stop_after = None
6582     last = None
6583     for leaf in reversed(leaves):
6584         if stop_after:
6585             if leaf is stop_after:
6586                 stop_after = None
6587             continue
6588
6589         if last:
6590             return leaf, last
6591
6592         if id(leaf) in omit:
6593             stop_after = leaf.opening_bracket
6594         else:
6595             last = leaf
6596     else:
6597         raise LookupError("Last two leaves were also skipped")
6598
6599
6600 def run_transformer(
6601     line: Line,
6602     transform: Transformer,
6603     mode: Mode,
6604     features: Collection[Feature],
6605     *,
6606     line_str: str = "",
6607 ) -> List[Line]:
6608     if not line_str:
6609         line_str = line_to_string(line)
6610     result: List[Line] = []
6611     for transformed_line in transform(line, features):
6612         if str(transformed_line).strip("\n") == line_str:
6613             raise CannotTransform("Line transformer returned an unchanged result")
6614
6615         result.extend(transform_line(transformed_line, mode=mode, features=features))
6616
6617     if not (
6618         transform.__name__ == "rhs"
6619         and line.bracket_tracker.invisible
6620         and not any(bracket.value for bracket in line.bracket_tracker.invisible)
6621         and not line.contains_multiline_strings()
6622         and not result[0].contains_uncollapsable_type_comments()
6623         and not result[0].contains_unsplittable_type_ignore()
6624         and not is_line_short_enough(result[0], line_length=mode.line_length)
6625     ):
6626         return result
6627
6628     line_copy = line.clone()
6629     append_leaves(line_copy, line, line.leaves)
6630     features_fop = set(features) | {Feature.FORCE_OPTIONAL_PARENTHESES}
6631     second_opinion = run_transformer(
6632         line_copy, transform, mode, features_fop, line_str=line_str
6633     )
6634     if all(
6635         is_line_short_enough(ln, line_length=mode.line_length) for ln in second_opinion
6636     ):
6637         result = second_opinion
6638     return result
6639
6640
6641 def get_cache_file(mode: Mode) -> Path:
6642     return CACHE_DIR / f"cache.{mode.get_cache_key()}.pickle"
6643
6644
6645 def read_cache(mode: Mode) -> Cache:
6646     """Read the cache if it exists and is well formed.
6647
6648     If it is not well formed, the call to write_cache later should resolve the issue.
6649     """
6650     cache_file = get_cache_file(mode)
6651     if not cache_file.exists():
6652         return {}
6653
6654     with cache_file.open("rb") as fobj:
6655         try:
6656             cache: Cache = pickle.load(fobj)
6657         except (pickle.UnpicklingError, ValueError):
6658             return {}
6659
6660     return cache
6661
6662
6663 def get_cache_info(path: Path) -> CacheInfo:
6664     """Return the information used to check if a file is already formatted or not."""
6665     stat = path.stat()
6666     return stat.st_mtime, stat.st_size
6667
6668
6669 def filter_cached(cache: Cache, sources: Iterable[Path]) -> Tuple[Set[Path], Set[Path]]:
6670     """Split an iterable of paths in `sources` into two sets.
6671
6672     The first contains paths of files that modified on disk or are not in the
6673     cache. The other contains paths to non-modified files.
6674     """
6675     todo, done = set(), set()
6676     for src in sources:
6677         src = src.resolve()
6678         if cache.get(src) != get_cache_info(src):
6679             todo.add(src)
6680         else:
6681             done.add(src)
6682     return todo, done
6683
6684
6685 def write_cache(cache: Cache, sources: Iterable[Path], mode: Mode) -> None:
6686     """Update the cache file."""
6687     cache_file = get_cache_file(mode)
6688     try:
6689         CACHE_DIR.mkdir(parents=True, exist_ok=True)
6690         new_cache = {**cache, **{src.resolve(): get_cache_info(src) for src in sources}}
6691         with tempfile.NamedTemporaryFile(dir=str(cache_file.parent), delete=False) as f:
6692             pickle.dump(new_cache, f, protocol=4)
6693         os.replace(f.name, cache_file)
6694     except OSError:
6695         pass
6696
6697
6698 def patch_click() -> None:
6699     """Make Click not crash.
6700
6701     On certain misconfigured environments, Python 3 selects the ASCII encoding as the
6702     default which restricts paths that it can access during the lifetime of the
6703     application.  Click refuses to work in this scenario by raising a RuntimeError.
6704
6705     In case of Black the likelihood that non-ASCII characters are going to be used in
6706     file paths is minimal since it's Python source code.  Moreover, this crash was
6707     spurious on Python 3.7 thanks to PEP 538 and PEP 540.
6708     """
6709     try:
6710         from click import core
6711         from click import _unicodefun  # type: ignore
6712     except ModuleNotFoundError:
6713         return
6714
6715     for module in (core, _unicodefun):
6716         if hasattr(module, "_verify_python3_env"):
6717             module._verify_python3_env = lambda: None
6718
6719
6720 def patched_main() -> None:
6721     freeze_support()
6722     patch_click()
6723     main()
6724
6725
6726 def is_docstring(leaf: Leaf) -> bool:
6727     if not is_multiline_string(leaf):
6728         # For the purposes of docstring re-indentation, we don't need to do anything
6729         # with single-line docstrings.
6730         return False
6731
6732     if prev_siblings_are(
6733         leaf.parent, [None, token.NEWLINE, token.INDENT, syms.simple_stmt]
6734     ):
6735         return True
6736
6737     # Multiline docstring on the same line as the `def`.
6738     if prev_siblings_are(leaf.parent, [syms.parameters, token.COLON, syms.simple_stmt]):
6739         # `syms.parameters` is only used in funcdefs and async_funcdefs in the Python
6740         # grammar. We're safe to return True without further checks.
6741         return True
6742
6743     return False
6744
6745
6746 def fix_docstring(docstring: str, prefix: str) -> str:
6747     # https://www.python.org/dev/peps/pep-0257/#handling-docstring-indentation
6748     if not docstring:
6749         return ""
6750     # Convert tabs to spaces (following the normal Python rules)
6751     # and split into a list of lines:
6752     lines = docstring.expandtabs().splitlines()
6753     # Determine minimum indentation (first line doesn't count):
6754     indent = sys.maxsize
6755     for line in lines[1:]:
6756         stripped = line.lstrip()
6757         if stripped:
6758             indent = min(indent, len(line) - len(stripped))
6759     # Remove indentation (first line is special):
6760     trimmed = [lines[0].strip()]
6761     if indent < sys.maxsize:
6762         last_line_idx = len(lines) - 2
6763         for i, line in enumerate(lines[1:]):
6764             stripped_line = line[indent:].rstrip()
6765             if stripped_line or i == last_line_idx:
6766                 trimmed.append(prefix + stripped_line)
6767             else:
6768                 trimmed.append("")
6769     return "\n".join(trimmed)
6770
6771
6772 if __name__ == "__main__":
6773     patched_main()