src/black/__init__.py

   1 import ast
   2 import asyncio
   3 from abc import ABC, abstractmethod
   4 from collections import defaultdict
   5 from concurrent.futures import Executor, ThreadPoolExecutor, ProcessPoolExecutor
   6 from contextlib import contextmanager
   7 from datetime import datetime
   8 from enum import Enum
   9 from functools import lru_cache, partial, wraps
  10 import io
  11 import itertools
  12 import logging
  13 from multiprocessing import Manager, freeze_support
  14 import os
  15 from pathlib import Path
  16 import pickle
  17 import regex as re
  18 import signal
  19 import sys
  20 import tempfile
  21 import tokenize
  22 import traceback
  23 from typing import (
  24     Any,
  25     Callable,
  26     Collection,
  27     Dict,
  28     Generator,
  29     Generic,
  30     Iterable,
  31     Iterator,
  32     List,
  33     Optional,
  34     Pattern,
  35     Sequence,
  36     Set,
  37     Sized,
  38     Tuple,
  39     Type,
  40     TypeVar,
  41     Union,
  42     cast,
  43     TYPE_CHECKING,
  44 )
  45 from typing_extensions import Final
  46 from mypy_extensions import mypyc_attr
  47
  48 from appdirs import user_cache_dir
  49 from dataclasses import dataclass, field, replace
  50 import click
  51 import toml
  52 from typed_ast import ast3, ast27
  53 from pathspec import PathSpec
  54
  55 # lib2to3 fork
  56 from blib2to3.pytree import Node, Leaf, type_repr
  57 from blib2to3 import pygram, pytree
  58 from blib2to3.pgen2 import driver, token
  59 from blib2to3.pgen2.grammar import Grammar
  60 from blib2to3.pgen2.parse import ParseError
  61
  62 from _black_version import version as __version__
  63
  64 if TYPE_CHECKING:
  65     import colorama  # noqa: F401
  66
  67 DEFAULT_LINE_LENGTH = 88
  68 DEFAULT_EXCLUDES = r"/(\.direnv|\.eggs|\.git|\.hg|\.mypy_cache|\.nox|\.tox|\.venv|\.svn|_build|buck-out|build|dist)/"  # noqa: B950
  69 DEFAULT_INCLUDES = r"\.pyi?$"
  70 CACHE_DIR = Path(user_cache_dir("black", version=__version__))
  71
  72 STRING_PREFIX_CHARS: Final = "furbFURB"  # All possible string prefix characters.
  73
  74
  75 # types
  76 FileContent = str
  77 Encoding = str
  78 NewLine = str
  79 Depth = int
  80 NodeType = int
  81 ParserState = int
  82 LeafID = int
  83 StringID = int
  84 Priority = int
  85 Index = int
  86 LN = Union[Leaf, Node]
  87 Transformer = Callable[["Line", Collection["Feature"]], Iterator["Line"]]
  88 Timestamp = float
  89 FileSize = int
  90 CacheInfo = Tuple[Timestamp, FileSize]
  91 Cache = Dict[Path, CacheInfo]
  92 out = partial(click.secho, bold=True, err=True)
  93 err = partial(click.secho, fg="red", err=True)
  94
  95 pygram.initialize(CACHE_DIR)
  96 syms = pygram.python_symbols
  97
  98
  99 class NothingChanged(UserWarning):
 100     """Raised when reformatted code is the same as source."""
 101
 102
 103 class CannotTransform(Exception):
 104     """Base class for errors raised by Transformers."""
 105
 106
 107 class CannotSplit(CannotTransform):
 108     """A readable split that fits the allotted line length is impossible."""
 109
 110
 111 class InvalidInput(ValueError):
 112     """Raised when input source code fails all parse attempts."""
 113
 114
 115 T = TypeVar("T")
 116 E = TypeVar("E", bound=Exception)
 117
 118
 119 class Ok(Generic[T]):
 120     def __init__(self, value: T) -> None:
 121         self._value = value
 122
 123     def ok(self) -> T:
 124         return self._value
 125
 126
 127 class Err(Generic[E]):
 128     def __init__(self, e: E) -> None:
 129         self._e = e
 130
 131     def err(self) -> E:
 132         return self._e
 133
 134
 135 # The 'Result' return type is used to implement an error-handling model heavily
 136 # influenced by that used by the Rust programming language
 137 # (see https://doc.rust-lang.org/book/ch09-00-error-handling.html).
 138 Result = Union[Ok[T], Err[E]]
 139 TResult = Result[T, CannotTransform]  # (T)ransform Result
 140 TMatchResult = TResult[Index]
 141
 142
 143 class WriteBack(Enum):
 144     NO = 0
 145     YES = 1
 146     DIFF = 2
 147     CHECK = 3
 148     COLOR_DIFF = 4
 149
 150     @classmethod
 151     def from_configuration(
 152         cls, *, check: bool, diff: bool, color: bool = False
 153     ) -> "WriteBack":
 154         if check and not diff:
 155             return cls.CHECK
 156
 157         if diff and color:
 158             return cls.COLOR_DIFF
 159
 160         return cls.DIFF if diff else cls.YES
 161
 162
 163 class Changed(Enum):
 164     NO = 0
 165     CACHED = 1
 166     YES = 2
 167
 168
 169 class TargetVersion(Enum):
 170     PY27 = 2
 171     PY33 = 3
 172     PY34 = 4
 173     PY35 = 5
 174     PY36 = 6
 175     PY37 = 7
 176     PY38 = 8
 177
 178     def is_python2(self) -> bool:
 179         return self is TargetVersion.PY27
 180
 181
 182 PY36_VERSIONS = {TargetVersion.PY36, TargetVersion.PY37, TargetVersion.PY38}
 183
 184
 185 class Feature(Enum):
 186     # All string literals are unicode
 187     UNICODE_LITERALS = 1
 188     F_STRINGS = 2
 189     NUMERIC_UNDERSCORES = 3
 190     TRAILING_COMMA_IN_CALL = 4
 191     TRAILING_COMMA_IN_DEF = 5
 192     # The following two feature-flags are mutually exclusive, and exactly one should be
 193     # set for every version of python.
 194     ASYNC_IDENTIFIERS = 6
 195     ASYNC_KEYWORDS = 7
 196     ASSIGNMENT_EXPRESSIONS = 8
 197     POS_ONLY_ARGUMENTS = 9
 198     FORCE_OPTIONAL_PARENTHESES = 50
 199
 200
 201 VERSION_TO_FEATURES: Dict[TargetVersion, Set[Feature]] = {
 202     TargetVersion.PY27: {Feature.ASYNC_IDENTIFIERS},
 203     TargetVersion.PY33: {Feature.UNICODE_LITERALS, Feature.ASYNC_IDENTIFIERS},
 204     TargetVersion.PY34: {Feature.UNICODE_LITERALS, Feature.ASYNC_IDENTIFIERS},
 205     TargetVersion.PY35: {
 206         Feature.UNICODE_LITERALS,
 207         Feature.TRAILING_COMMA_IN_CALL,
 208         Feature.ASYNC_IDENTIFIERS,
 209     },
 210     TargetVersion.PY36: {
 211         Feature.UNICODE_LITERALS,
 212         Feature.F_STRINGS,
 213         Feature.NUMERIC_UNDERSCORES,
 214         Feature.TRAILING_COMMA_IN_CALL,
 215         Feature.TRAILING_COMMA_IN_DEF,
 216         Feature.ASYNC_IDENTIFIERS,
 217     },
 218     TargetVersion.PY37: {
 219         Feature.UNICODE_LITERALS,
 220         Feature.F_STRINGS,
 221         Feature.NUMERIC_UNDERSCORES,
 222         Feature.TRAILING_COMMA_IN_CALL,
 223         Feature.TRAILING_COMMA_IN_DEF,
 224         Feature.ASYNC_KEYWORDS,
 225     },
 226     TargetVersion.PY38: {
 227         Feature.UNICODE_LITERALS,
 228         Feature.F_STRINGS,
 229         Feature.NUMERIC_UNDERSCORES,
 230         Feature.TRAILING_COMMA_IN_CALL,
 231         Feature.TRAILING_COMMA_IN_DEF,
 232         Feature.ASYNC_KEYWORDS,
 233         Feature.ASSIGNMENT_EXPRESSIONS,
 234         Feature.POS_ONLY_ARGUMENTS,
 235     },
 236 }
 237
 238
 239 @dataclass
 240 class Mode:
 241     target_versions: Set[TargetVersion] = field(default_factory=set)
 242     line_length: int = DEFAULT_LINE_LENGTH
 243     string_normalization: bool = True
 244     experimental_string_processing: bool = False
 245     is_pyi: bool = False
 246
 247     def get_cache_key(self) -> str:
 248         if self.target_versions:
 249             version_str = ",".join(
 250                 str(version.value)
 251                 for version in sorted(self.target_versions, key=lambda v: v.value)
 252             )
 253         else:
 254             version_str = "-"
 255         parts = [
 256             version_str,
 257             str(self.line_length),
 258             str(int(self.string_normalization)),
 259             str(int(self.is_pyi)),
 260         ]
 261         return ".".join(parts)
 262
 263
 264 # Legacy name, left for integrations.
 265 FileMode = Mode
 266
 267
 268 def supports_feature(target_versions: Set[TargetVersion], feature: Feature) -> bool:
 269     return all(feature in VERSION_TO_FEATURES[version] for version in target_versions)
 270
 271
 272 def find_pyproject_toml(path_search_start: Iterable[str]) -> Optional[str]:
 273     """Find the absolute filepath to a pyproject.toml if it exists"""
 274     path_project_root = find_project_root(path_search_start)
 275     path_pyproject_toml = path_project_root / "pyproject.toml"
 276     return str(path_pyproject_toml) if path_pyproject_toml.is_file() else None
 277
 278
 279 def parse_pyproject_toml(path_config: str) -> Dict[str, Any]:
 280     """Parse a pyproject toml file, pulling out relevant parts for Black
 281
 282     If parsing fails, will raise a toml.TomlDecodeError
 283     """
 284     pyproject_toml = toml.load(path_config)
 285     config = pyproject_toml.get("tool", {}).get("black", {})
 286     return {k.replace("--", "").replace("-", "_"): v for k, v in config.items()}
 287
 288
 289 def read_pyproject_toml(
 290     ctx: click.Context, param: click.Parameter, value: Optional[str]
 291 ) -> Optional[str]:
 292     """Inject Black configuration from "pyproject.toml" into defaults in `ctx`.
 293
 294     Returns the path to a successfully found and read configuration file, None
 295     otherwise.
 296     """
 297     if not value:
 298         value = find_pyproject_toml(ctx.params.get("src", ()))
 299         if value is None:
 300             return None
 301
 302     try:
 303         config = parse_pyproject_toml(value)
 304     except (toml.TomlDecodeError, OSError) as e:
 305         raise click.FileError(
 306             filename=value, hint=f"Error reading configuration file: {e}"
 307         )
 308
 309     if not config:
 310         return None
 311     else:
 312         # Sanitize the values to be Click friendly. For more information please see:
 313         # https://github.com/psf/black/issues/1458
 314         # https://github.com/pallets/click/issues/1567
 315         config = {
 316             k: str(v) if not isinstance(v, (list, dict)) else v
 317             for k, v in config.items()
 318         }
 319
 320     target_version = config.get("target_version")
 321     if target_version is not None and not isinstance(target_version, list):
 322         raise click.BadOptionUsage(
 323             "target-version", "Config key target-version must be a list"
 324         )
 325
 326     default_map: Dict[str, Any] = {}
 327     if ctx.default_map:
 328         default_map.update(ctx.default_map)
 329     default_map.update(config)
 330
 331     ctx.default_map = default_map
 332     return value
 333
 334
 335 def target_version_option_callback(
 336     c: click.Context, p: Union[click.Option, click.Parameter], v: Tuple[str, ...]
 337 ) -> List[TargetVersion]:
 338     """Compute the target versions from a --target-version flag.
 339
 340     This is its own function because mypy couldn't infer the type correctly
 341     when it was a lambda, causing mypyc trouble.
 342     """
 343     return [TargetVersion[val.upper()] for val in v]
 344
 345
 346 @click.command(context_settings=dict(help_option_names=["-h", "--help"]))
 347 @click.option("-c", "--code", type=str, help="Format the code passed in as a string.")
 348 @click.option(
 349     "-l",
 350     "--line-length",
 351     type=int,
 352     default=DEFAULT_LINE_LENGTH,
 353     help="How many characters per line to allow.",
 354     show_default=True,
 355 )
 356 @click.option(
 357     "-t",
 358     "--target-version",
 359     type=click.Choice([v.name.lower() for v in TargetVersion]),
 360     callback=target_version_option_callback,
 361     multiple=True,
 362     help=(
 363         "Python versions that should be supported by Black's output. [default: per-file"
 364         " auto-detection]"
 365     ),
 366 )
 367 @click.option(
 368     "--pyi",
 369     is_flag=True,
 370     help=(
 371         "Format all input files like typing stubs regardless of file extension (useful"
 372         " when piping source on standard input)."
 373     ),
 374 )
 375 @click.option(
 376     "-S",
 377     "--skip-string-normalization",
 378     is_flag=True,
 379     help="Don't normalize string quotes or prefixes.",
 380 )
 381 @click.option(
 382     "--experimental-string-processing",
 383     is_flag=True,
 384     hidden=True,
 385     help=(
 386         "Experimental option that performs more normalization on string literals."
 387         " Currently disabled because it leads to some crashes."
 388     ),
 389 )
 390 @click.option(
 391     "--check",
 392     is_flag=True,
 393     help=(
 394         "Don't write the files back, just return the status.  Return code 0 means"
 395         " nothing would change.  Return code 1 means some files would be reformatted."
 396         " Return code 123 means there was an internal error."
 397     ),
 398 )
 399 @click.option(
 400     "--diff",
 401     is_flag=True,
 402     help="Don't write the files back, just output a diff for each file on stdout.",
 403 )
 404 @click.option(
 405     "--color/--no-color",
 406     is_flag=True,
 407     help="Show colored diff. Only applies when `--diff` is given.",
 408 )
 409 @click.option(
 410     "--fast/--safe",
 411     is_flag=True,
 412     help="If --fast given, skip temporary sanity checks. [default: --safe]",
 413 )
 414 @click.option(
 415     "--include",
 416     type=str,
 417     default=DEFAULT_INCLUDES,
 418     help=(
 419         "A regular expression that matches files and directories that should be"
 420         " included on recursive searches.  An empty value means all files are included"
 421         " regardless of the name.  Use forward slashes for directories on all platforms"
 422         " (Windows, too).  Exclusions are calculated first, inclusions later."
 423     ),
 424     show_default=True,
 425 )
 426 @click.option(
 427     "--exclude",
 428     type=str,
 429     default=DEFAULT_EXCLUDES,
 430     help=(
 431         "A regular expression that matches files and directories that should be"
 432         " excluded on recursive searches.  An empty value means no paths are excluded."
 433         " Use forward slashes for directories on all platforms (Windows, too). "
 434         " Exclusions are calculated first, inclusions later."
 435     ),
 436     show_default=True,
 437 )
 438 @click.option(
 439     "--force-exclude",
 440     type=str,
 441     help=(
 442         "Like --exclude, but files and directories matching this regex will be "
 443         "excluded even when they are passed explicitly as arguments"
 444     ),
 445 )
 446 @click.option(
 447     "-q",
 448     "--quiet",
 449     is_flag=True,
 450     help=(
 451         "Don't emit non-error messages to stderr. Errors are still emitted; silence"
 452         " those with 2>/dev/null."
 453     ),
 454 )
 455 @click.option(
 456     "-v",
 457     "--verbose",
 458     is_flag=True,
 459     help=(
 460         "Also emit messages to stderr about files that were not changed or were ignored"
 461         " due to --exclude=."
 462     ),
 463 )
 464 @click.version_option(version=__version__)
 465 @click.argument(
 466     "src",
 467     nargs=-1,
 468     type=click.Path(
 469         exists=True, file_okay=True, dir_okay=True, readable=True, allow_dash=True
 470     ),
 471     is_eager=True,
 472 )
 473 @click.option(
 474     "--config",
 475     type=click.Path(
 476         exists=True,
 477         file_okay=True,
 478         dir_okay=False,
 479         readable=True,
 480         allow_dash=False,
 481         path_type=str,
 482     ),
 483     is_eager=True,
 484     callback=read_pyproject_toml,
 485     help="Read configuration from FILE path.",
 486 )
 487 @click.pass_context
 488 def main(
 489     ctx: click.Context,
 490     code: Optional[str],
 491     line_length: int,
 492     target_version: List[TargetVersion],
 493     check: bool,
 494     diff: bool,
 495     color: bool,
 496     fast: bool,
 497     pyi: bool,
 498     skip_string_normalization: bool,
 499     experimental_string_processing: bool,
 500     quiet: bool,
 501     verbose: bool,
 502     include: str,
 503     exclude: str,
 504     force_exclude: Optional[str],
 505     src: Tuple[str, ...],
 506     config: Optional[str],
 507 ) -> None:
 508     """The uncompromising code formatter."""
 509     write_back = WriteBack.from_configuration(check=check, diff=diff, color=color)
 510     if target_version:
 511         versions = set(target_version)
 512     else:
 513         # We'll autodetect later.
 514         versions = set()
 515     mode = Mode(
 516         target_versions=versions,
 517         line_length=line_length,
 518         is_pyi=pyi,
 519         string_normalization=not skip_string_normalization,
 520         experimental_string_processing=experimental_string_processing,
 521     )
 522     if config and verbose:
 523         out(f"Using configuration from {config}.", bold=False, fg="blue")
 524     if code is not None:
 525         print(format_str(code, mode=mode))
 526         ctx.exit(0)
 527     report = Report(check=check, diff=diff, quiet=quiet, verbose=verbose)
 528     sources = get_sources(
 529         ctx=ctx,
 530         src=src,
 531         quiet=quiet,
 532         verbose=verbose,
 533         include=include,
 534         exclude=exclude,
 535         force_exclude=force_exclude,
 536         report=report,
 537     )
 538
 539     path_empty(
 540         sources,
 541         "No Python files are present to be formatted. Nothing to do 😴",
 542         quiet,
 543         verbose,
 544         ctx,
 545     )
 546
 547     if len(sources) == 1:
 548         reformat_one(
 549             src=sources.pop(),
 550             fast=fast,
 551             write_back=write_back,
 552             mode=mode,
 553             report=report,
 554         )
 555     else:
 556         reformat_many(
 557             sources=sources, fast=fast, write_back=write_back, mode=mode, report=report
 558         )
 559
 560     if verbose or not quiet:
 561         out("Oh no! 💥 💔 💥" if report.return_code else "All done! ✨ 🍰 ✨")
 562         click.secho(str(report), err=True)
 563     ctx.exit(report.return_code)
 564
 565
 566 def get_sources(
 567     *,
 568     ctx: click.Context,
 569     src: Tuple[str, ...],
 570     quiet: bool,
 571     verbose: bool,
 572     include: str,
 573     exclude: str,
 574     force_exclude: Optional[str],
 575     report: "Report",
 576 ) -> Set[Path]:
 577     """Compute the set of files to be formatted."""
 578     try:
 579         include_regex = re_compile_maybe_verbose(include)
 580     except re.error:
 581         err(f"Invalid regular expression for include given: {include!r}")
 582         ctx.exit(2)
 583     try:
 584         exclude_regex = re_compile_maybe_verbose(exclude)
 585     except re.error:
 586         err(f"Invalid regular expression for exclude given: {exclude!r}")
 587         ctx.exit(2)
 588     try:
 589         force_exclude_regex = (
 590             re_compile_maybe_verbose(force_exclude) if force_exclude else None
 591         )
 592     except re.error:
 593         err(f"Invalid regular expression for force_exclude given: {force_exclude!r}")
 594         ctx.exit(2)
 595
 596     root = find_project_root(src)
 597     sources: Set[Path] = set()
 598     path_empty(src, "No Path provided. Nothing to do 😴", quiet, verbose, ctx)
 599     gitignore = get_gitignore(root)
 600
 601     for s in src:
 602         p = Path(s)
 603         if p.is_dir():
 604             sources.update(
 605                 gen_python_files(
 606                     p.iterdir(),
 607                     root,
 608                     include_regex,
 609                     exclude_regex,
 610                     force_exclude_regex,
 611                     report,
 612                     gitignore,
 613                 )
 614             )
 615         elif s == "-":
 616             sources.add(p)
 617         elif p.is_file():
 618             normalized_path = normalize_path_maybe_ignore(p, root, report)
 619             if normalized_path is None:
 620                 continue
 621
 622             normalized_path = "/" + normalized_path
 623             # Hard-exclude any files that matches the `--force-exclude` regex.
 624             if force_exclude_regex:
 625                 force_exclude_match = force_exclude_regex.search(normalized_path)
 626             else:
 627                 force_exclude_match = None
 628             if force_exclude_match and force_exclude_match.group(0):
 629                 report.path_ignored(p, "matches the --force-exclude regular expression")
 630                 continue
 631
 632             sources.add(p)
 633         else:
 634             err(f"invalid path: {s}")
 635     return sources
 636
 637
 638 def path_empty(
 639     src: Sized, msg: str, quiet: bool, verbose: bool, ctx: click.Context
 640 ) -> None:
 641     """
 642     Exit if there is no `src` provided for formatting
 643     """
 644     if not src and (verbose or not quiet):
 645         out(msg)
 646         ctx.exit(0)
 647
 648
 649 def reformat_one(
 650     src: Path, fast: bool, write_back: WriteBack, mode: Mode, report: "Report"
 651 ) -> None:
 652     """Reformat a single file under `src` without spawning child processes.
 653
 654     `fast`, `write_back`, and `mode` options are passed to
 655     :func:`format_file_in_place` or :func:`format_stdin_to_stdout`.
 656     """
 657     try:
 658         changed = Changed.NO
 659         if not src.is_file() and str(src) == "-":
 660             if format_stdin_to_stdout(fast=fast, write_back=write_back, mode=mode):
 661                 changed = Changed.YES
 662         else:
 663             cache: Cache = {}
 664             if write_back not in (WriteBack.DIFF, WriteBack.COLOR_DIFF):
 665                 cache = read_cache(mode)
 666                 res_src = src.resolve()
 667                 if res_src in cache and cache[res_src] == get_cache_info(res_src):
 668                     changed = Changed.CACHED
 669             if changed is not Changed.CACHED and format_file_in_place(
 670                 src, fast=fast, write_back=write_back, mode=mode
 671             ):
 672                 changed = Changed.YES
 673             if (write_back is WriteBack.YES and changed is not Changed.CACHED) or (
 674                 write_back is WriteBack.CHECK and changed is Changed.NO
 675             ):
 676                 write_cache(cache, [src], mode)
 677         report.done(src, changed)
 678     except Exception as exc:
 679         if report.verbose:
 680             traceback.print_exc()
 681         report.failed(src, str(exc))
 682
 683
 684 def reformat_many(
 685     sources: Set[Path], fast: bool, write_back: WriteBack, mode: Mode, report: "Report"
 686 ) -> None:
 687     """Reformat multiple files using a ProcessPoolExecutor."""
 688     executor: Executor
 689     loop = asyncio.get_event_loop()
 690     worker_count = os.cpu_count()
 691     if sys.platform == "win32":
 692         # Work around https://bugs.python.org/issue26903
 693         worker_count = min(worker_count, 61)
 694     try:
 695         executor = ProcessPoolExecutor(max_workers=worker_count)
 696     except (ImportError, OSError):
 697         # we arrive here if the underlying system does not support multi-processing
 698         # like in AWS Lambda or Termux, in which case we gracefully fallback to
 699         # a ThreadPollExecutor with just a single worker (more workers would not do us
 700         # any good due to the Global Interpreter Lock)
 701         executor = ThreadPoolExecutor(max_workers=1)
 702
 703     try:
 704         loop.run_until_complete(
 705             schedule_formatting(
 706                 sources=sources,
 707                 fast=fast,
 708                 write_back=write_back,
 709                 mode=mode,
 710                 report=report,
 711                 loop=loop,
 712                 executor=executor,
 713             )
 714         )
 715     finally:
 716         shutdown(loop)
 717         if executor is not None:
 718             executor.shutdown()
 719
 720
 721 async def schedule_formatting(
 722     sources: Set[Path],
 723     fast: bool,
 724     write_back: WriteBack,
 725     mode: Mode,
 726     report: "Report",
 727     loop: asyncio.AbstractEventLoop,
 728     executor: Executor,
 729 ) -> None:
 730     """Run formatting of `sources` in parallel using the provided `executor`.
 731
 732     (Use ProcessPoolExecutors for actual parallelism.)
 733
 734     `write_back`, `fast`, and `mode` options are passed to
 735     :func:`format_file_in_place`.
 736     """
 737     cache: Cache = {}
 738     if write_back not in (WriteBack.DIFF, WriteBack.COLOR_DIFF):
 739         cache = read_cache(mode)
 740         sources, cached = filter_cached(cache, sources)
 741         for src in sorted(cached):
 742             report.done(src, Changed.CACHED)
 743     if not sources:
 744         return
 745
 746     cancelled = []
 747     sources_to_cache = []
 748     lock = None
 749     if write_back in (WriteBack.DIFF, WriteBack.COLOR_DIFF):
 750         # For diff output, we need locks to ensure we don't interleave output
 751         # from different processes.
 752         manager = Manager()
 753         lock = manager.Lock()
 754     tasks = {
 755         asyncio.ensure_future(
 756             loop.run_in_executor(
 757                 executor, format_file_in_place, src, fast, mode, write_back, lock
 758             )
 759         ): src
 760         for src in sorted(sources)
 761     }
 762     pending: Iterable["asyncio.Future[bool]"] = tasks.keys()
 763     try:
 764         loop.add_signal_handler(signal.SIGINT, cancel, pending)
 765         loop.add_signal_handler(signal.SIGTERM, cancel, pending)
 766     except NotImplementedError:
 767         # There are no good alternatives for these on Windows.
 768         pass
 769     while pending:
 770         done, _ = await asyncio.wait(pending, return_when=asyncio.FIRST_COMPLETED)
 771         for task in done:
 772             src = tasks.pop(task)
 773             if task.cancelled():
 774                 cancelled.append(task)
 775             elif task.exception():
 776                 report.failed(src, str(task.exception()))
 777             else:
 778                 changed = Changed.YES if task.result() else Changed.NO
 779                 # If the file was written back or was successfully checked as
 780                 # well-formatted, store this information in the cache.
 781                 if write_back is WriteBack.YES or (
 782                     write_back is WriteBack.CHECK and changed is Changed.NO
 783                 ):
 784                     sources_to_cache.append(src)
 785                 report.done(src, changed)
 786     if cancelled:
 787         await asyncio.gather(*cancelled, loop=loop, return_exceptions=True)
 788     if sources_to_cache:
 789         write_cache(cache, sources_to_cache, mode)
 790
 791
 792 def format_file_in_place(
 793     src: Path,
 794     fast: bool,
 795     mode: Mode,
 796     write_back: WriteBack = WriteBack.NO,
 797     lock: Any = None,  # multiprocessing.Manager().Lock() is some crazy proxy
 798 ) -> bool:
 799     """Format file under `src` path. Return True if changed.
 800
 801     If `write_back` is DIFF, write a diff to stdout. If it is YES, write reformatted
 802     code to the file.
 803     `mode` and `fast` options are passed to :func:`format_file_contents`.
 804     """
 805     if src.suffix == ".pyi":
 806         mode = replace(mode, is_pyi=True)
 807
 808     then = datetime.utcfromtimestamp(src.stat().st_mtime)
 809     with open(src, "rb") as buf:
 810         src_contents, encoding, newline = decode_bytes(buf.read())
 811     try:
 812         dst_contents = format_file_contents(src_contents, fast=fast, mode=mode)
 813     except NothingChanged:
 814         return False
 815
 816     if write_back == WriteBack.YES:
 817         with open(src, "w", encoding=encoding, newline=newline) as f:
 818             f.write(dst_contents)
 819     elif write_back in (WriteBack.DIFF, WriteBack.COLOR_DIFF):
 820         now = datetime.utcnow()
 821         src_name = f"{src}\t{then} +0000"
 822         dst_name = f"{src}\t{now} +0000"
 823         diff_contents = diff(src_contents, dst_contents, src_name, dst_name)
 824
 825         if write_back == write_back.COLOR_DIFF:
 826             diff_contents = color_diff(diff_contents)
 827
 828         with lock or nullcontext():
 829             f = io.TextIOWrapper(
 830                 sys.stdout.buffer,
 831                 encoding=encoding,
 832                 newline=newline,
 833                 write_through=True,
 834             )
 835             f = wrap_stream_for_windows(f)
 836             f.write(diff_contents)
 837             f.detach()
 838
 839     return True
 840
 841
 842 def color_diff(contents: str) -> str:
 843     """Inject the ANSI color codes to the diff."""
 844     lines = contents.split("\n")
 845     for i, line in enumerate(lines):
 846         if line.startswith("+++") or line.startswith("---"):
 847             line = "\033[1;37m" + line + "\033[0m"  # bold white, reset
 848         if line.startswith("@@"):
 849             line = "\033[36m" + line + "\033[0m"  # cyan, reset
 850         if line.startswith("+"):
 851             line = "\033[32m" + line + "\033[0m"  # green, reset
 852         elif line.startswith("-"):
 853             line = "\033[31m" + line + "\033[0m"  # red, reset
 854         lines[i] = line
 855     return "\n".join(lines)
 856
 857
 858 def wrap_stream_for_windows(
 859     f: io.TextIOWrapper,
 860 ) -> Union[io.TextIOWrapper, "colorama.AnsiToWin32.AnsiToWin32"]:
 861     """
 862     Wrap the stream in colorama's wrap_stream so colors are shown on Windows.
 863
 864     If `colorama` is not found, then no change is made. If `colorama` does
 865     exist, then it handles the logic to determine whether or not to change
 866     things.
 867     """
 868     try:
 869         from colorama import initialise
 870
 871         # We set `strip=False` so that we can don't have to modify
 872         # test_express_diff_with_color.
 873         f = initialise.wrap_stream(
 874             f, convert=None, strip=False, autoreset=False, wrap=True
 875         )
 876
 877         # wrap_stream returns a `colorama.AnsiToWin32.AnsiToWin32` object
 878         # which does not have a `detach()` method. So we fake one.
 879         f.detach = lambda *args, **kwargs: None  # type: ignore
 880     except ImportError:
 881         pass
 882
 883     return f
 884
 885
 886 def format_stdin_to_stdout(
 887     fast: bool, *, write_back: WriteBack = WriteBack.NO, mode: Mode
 888 ) -> bool:
 889     """Format file on stdin. Return True if changed.
 890
 891     If `write_back` is YES, write reformatted code back to stdout. If it is DIFF,
 892     write a diff to stdout. The `mode` argument is passed to
 893     :func:`format_file_contents`.
 894     """
 895     then = datetime.utcnow()
 896     src, encoding, newline = decode_bytes(sys.stdin.buffer.read())
 897     dst = src
 898     try:
 899         dst = format_file_contents(src, fast=fast, mode=mode)
 900         return True
 901
 902     except NothingChanged:
 903         return False
 904
 905     finally:
 906         f = io.TextIOWrapper(
 907             sys.stdout.buffer, encoding=encoding, newline=newline, write_through=True
 908         )
 909         if write_back == WriteBack.YES:
 910             f.write(dst)
 911         elif write_back in (WriteBack.DIFF, WriteBack.COLOR_DIFF):
 912             now = datetime.utcnow()
 913             src_name = f"STDIN\t{then} +0000"
 914             dst_name = f"STDOUT\t{now} +0000"
 915             d = diff(src, dst, src_name, dst_name)
 916             if write_back == WriteBack.COLOR_DIFF:
 917                 d = color_diff(d)
 918                 f = wrap_stream_for_windows(f)
 919             f.write(d)
 920         f.detach()
 921
 922
 923 def format_file_contents(src_contents: str, *, fast: bool, mode: Mode) -> FileContent:
 924     """Reformat contents a file and return new contents.
 925
 926     If `fast` is False, additionally confirm that the reformatted code is
 927     valid by calling :func:`assert_equivalent` and :func:`assert_stable` on it.
 928     `mode` is passed to :func:`format_str`.
 929     """
 930     if not src_contents.strip():
 931         raise NothingChanged
 932
 933     dst_contents = format_str(src_contents, mode=mode)
 934     if src_contents == dst_contents:
 935         raise NothingChanged
 936
 937     if not fast:
 938         assert_equivalent(src_contents, dst_contents)
 939         assert_stable(src_contents, dst_contents, mode=mode)
 940     return dst_contents
 941
 942
 943 def format_str(src_contents: str, *, mode: Mode) -> FileContent:
 944     """Reformat a string and return new contents.
 945
 946     `mode` determines formatting options, such as how many characters per line are
 947     allowed.  Example:
 948
 949     >>> import black
 950     >>> print(black.format_str("def f(arg:str='')->None:...", mode=Mode()))
 951     def f(arg: str = "") -> None:
 952         ...
 953
 954     A more complex example:
 955
 956     >>> print(
 957     ...   black.format_str(
 958     ...     "def f(arg:str='')->None: hey",
 959     ...     mode=black.Mode(
 960     ...       target_versions={black.TargetVersion.PY36},
 961     ...       line_length=10,
 962     ...       string_normalization=False,
 963     ...       is_pyi=False,
 964     ...     ),
 965     ...   ),
 966     ... )
 967     def f(
 968         arg: str = '',
 969     ) -> None:
 970         hey
 971
 972     """
 973     src_node = lib2to3_parse(src_contents.lstrip(), mode.target_versions)
 974     dst_contents = []
 975     future_imports = get_future_imports(src_node)
 976     if mode.target_versions:
 977         versions = mode.target_versions
 978     else:
 979         versions = detect_target_versions(src_node)
 980     normalize_fmt_off(src_node)
 981     lines = LineGenerator(
 982         remove_u_prefix="unicode_literals" in future_imports
 983         or supports_feature(versions, Feature.UNICODE_LITERALS),
 984         is_pyi=mode.is_pyi,
 985         normalize_strings=mode.string_normalization,
 986     )
 987     elt = EmptyLineTracker(is_pyi=mode.is_pyi)
 988     empty_line = Line()
 989     after = 0
 990     split_line_features = {
 991         feature
 992         for feature in {Feature.TRAILING_COMMA_IN_CALL, Feature.TRAILING_COMMA_IN_DEF}
 993         if supports_feature(versions, feature)
 994     }
 995     for current_line in lines.visit(src_node):
 996         dst_contents.append(str(empty_line) * after)
 997         before, after = elt.maybe_empty_lines(current_line)
 998         dst_contents.append(str(empty_line) * before)
 999         for line in transform_line(
1000             current_line, mode=mode, features=split_line_features
1001         ):
1002             dst_contents.append(str(line))
1003     return "".join(dst_contents)
1004
1005
1006 def decode_bytes(src: bytes) -> Tuple[FileContent, Encoding, NewLine]:
1007     """Return a tuple of (decoded_contents, encoding, newline).
1008
1009     `newline` is either CRLF or LF but `decoded_contents` is decoded with
1010     universal newlines (i.e. only contains LF).
1011     """
1012     srcbuf = io.BytesIO(src)
1013     encoding, lines = tokenize.detect_encoding(srcbuf.readline)
1014     if not lines:
1015         return "", encoding, "\n"
1016
1017     newline = "\r\n" if b"\r\n" == lines[0][-2:] else "\n"
1018     srcbuf.seek(0)
1019     with io.TextIOWrapper(srcbuf, encoding) as tiow:
1020         return tiow.read(), encoding, newline
1021
1022
1023 def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]:
1024     if not target_versions:
1025         # No target_version specified, so try all grammars.
1026         return [
1027             # Python 3.7+
1028             pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords,
1029             # Python 3.0-3.6
1030             pygram.python_grammar_no_print_statement_no_exec_statement,
1031             # Python 2.7 with future print_function import
1032             pygram.python_grammar_no_print_statement,
1033             # Python 2.7
1034             pygram.python_grammar,
1035         ]
1036
1037     if all(version.is_python2() for version in target_versions):
1038         # Python 2-only code, so try Python 2 grammars.
1039         return [
1040             # Python 2.7 with future print_function import
1041             pygram.python_grammar_no_print_statement,
1042             # Python 2.7
1043             pygram.python_grammar,
1044         ]
1045
1046     # Python 3-compatible code, so only try Python 3 grammar.
1047     grammars = []
1048     # If we have to parse both, try to parse async as a keyword first
1049     if not supports_feature(target_versions, Feature.ASYNC_IDENTIFIERS):
1050         # Python 3.7+
1051         grammars.append(
1052             pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords
1053         )
1054     if not supports_feature(target_versions, Feature.ASYNC_KEYWORDS):
1055         # Python 3.0-3.6
1056         grammars.append(pygram.python_grammar_no_print_statement_no_exec_statement)
1057     # At least one of the above branches must have been taken, because every Python
1058     # version has exactly one of the two 'ASYNC_*' flags
1059     return grammars
1060
1061
1062 def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -> Node:
1063     """Given a string with source, return the lib2to3 Node."""
1064     if not src_txt.endswith("\n"):
1065         src_txt += "\n"
1066
1067     for grammar in get_grammars(set(target_versions)):
1068         drv = driver.Driver(grammar, pytree.convert)
1069         try:
1070             result = drv.parse_string(src_txt, True)
1071             break
1072
1073         except ParseError as pe:
1074             lineno, column = pe.context[1]
1075             lines = src_txt.splitlines()
1076             try:
1077                 faulty_line = lines[lineno - 1]
1078             except IndexError:
1079                 faulty_line = "<line number missing in source>"
1080             exc = InvalidInput(f"Cannot parse: {lineno}:{column}: {faulty_line}")
1081     else:
1082         raise exc from None
1083
1084     if isinstance(result, Leaf):
1085         result = Node(syms.file_input, [result])
1086     return result
1087
1088
1089 def lib2to3_unparse(node: Node) -> str:
1090     """Given a lib2to3 node, return its string representation."""
1091     code = str(node)
1092     return code
1093
1094
1095 class Visitor(Generic[T]):
1096     """Basic lib2to3 visitor that yields things of type `T` on `visit()`."""
1097
1098     def visit(self, node: LN) -> Iterator[T]:
1099         """Main method to visit `node` and its children.
1100
1101         It tries to find a `visit_*()` method for the given `node.type`, like
1102         `visit_simple_stmt` for Node objects or `visit_INDENT` for Leaf objects.
1103         If no dedicated `visit_*()` method is found, chooses `visit_default()`
1104         instead.
1105
1106         Then yields objects of type `T` from the selected visitor.
1107         """
1108         if node.type < 256:
1109             name = token.tok_name[node.type]
1110         else:
1111             name = str(type_repr(node.type))
1112         # We explicitly branch on whether a visitor exists (instead of
1113         # using self.visit_default as the default arg to getattr) in order
1114         # to save needing to create a bound method object and so mypyc can
1115         # generate a native call to visit_default.
1116         visitf = getattr(self, f"visit_{name}", None)
1117         if visitf:
1118             yield from visitf(node)
1119         else:
1120             yield from self.visit_default(node)
1121
1122     def visit_default(self, node: LN) -> Iterator[T]:
1123         """Default `visit_*()` implementation. Recurses to children of `node`."""
1124         if isinstance(node, Node):
1125             for child in node.children:
1126                 yield from self.visit(child)
1127
1128
1129 @dataclass
1130 class DebugVisitor(Visitor[T]):
1131     tree_depth: int = 0
1132
1133     def visit_default(self, node: LN) -> Iterator[T]:
1134         indent = " " * (2 * self.tree_depth)
1135         if isinstance(node, Node):
1136             _type = type_repr(node.type)
1137             out(f"{indent}{_type}", fg="yellow")
1138             self.tree_depth += 1
1139             for child in node.children:
1140                 yield from self.visit(child)
1141
1142             self.tree_depth -= 1
1143             out(f"{indent}/{_type}", fg="yellow", bold=False)
1144         else:
1145             _type = token.tok_name.get(node.type, str(node.type))
1146             out(f"{indent}{_type}", fg="blue", nl=False)
1147             if node.prefix:
1148                 # We don't have to handle prefixes for `Node` objects since
1149                 # that delegates to the first child anyway.
1150                 out(f" {node.prefix!r}", fg="green", bold=False, nl=False)
1151             out(f" {node.value!r}", fg="blue", bold=False)
1152
1153     @classmethod
1154     def show(cls, code: Union[str, Leaf, Node]) -> None:
1155         """Pretty-print the lib2to3 AST of a given string of `code`.
1156
1157         Convenience method for debugging.
1158         """
1159         v: DebugVisitor[None] = DebugVisitor()
1160         if isinstance(code, str):
1161             code = lib2to3_parse(code)
1162         list(v.visit(code))
1163
1164
1165 WHITESPACE: Final = {token.DEDENT, token.INDENT, token.NEWLINE}
1166 STATEMENT: Final = {
1167     syms.if_stmt,
1168     syms.while_stmt,
1169     syms.for_stmt,
1170     syms.try_stmt,
1171     syms.except_clause,
1172     syms.with_stmt,
1173     syms.funcdef,
1174     syms.classdef,
1175 }
1176 STANDALONE_COMMENT: Final = 153
1177 token.tok_name[STANDALONE_COMMENT] = "STANDALONE_COMMENT"
1178 LOGIC_OPERATORS: Final = {"and", "or"}
1179 COMPARATORS: Final = {
1180     token.LESS,
1181     token.GREATER,
1182     token.EQEQUAL,
1183     token.NOTEQUAL,
1184     token.LESSEQUAL,
1185     token.GREATEREQUAL,
1186 }
1187 MATH_OPERATORS: Final = {
1188     token.VBAR,
1189     token.CIRCUMFLEX,
1190     token.AMPER,
1191     token.LEFTSHIFT,
1192     token.RIGHTSHIFT,
1193     token.PLUS,
1194     token.MINUS,
1195     token.STAR,
1196     token.SLASH,
1197     token.DOUBLESLASH,
1198     token.PERCENT,
1199     token.AT,
1200     token.TILDE,
1201     token.DOUBLESTAR,
1202 }
1203 STARS: Final = {token.STAR, token.DOUBLESTAR}
1204 VARARGS_SPECIALS: Final = STARS | {token.SLASH}
1205 VARARGS_PARENTS: Final = {
1206     syms.arglist,
1207     syms.argument,  # double star in arglist
1208     syms.trailer,  # single argument to call
1209     syms.typedargslist,
1210     syms.varargslist,  # lambdas
1211 }
1212 UNPACKING_PARENTS: Final = {
1213     syms.atom,  # single element of a list or set literal
1214     syms.dictsetmaker,
1215     syms.listmaker,
1216     syms.testlist_gexp,
1217     syms.testlist_star_expr,
1218 }
1219 TEST_DESCENDANTS: Final = {
1220     syms.test,
1221     syms.lambdef,
1222     syms.or_test,
1223     syms.and_test,
1224     syms.not_test,
1225     syms.comparison,
1226     syms.star_expr,
1227     syms.expr,
1228     syms.xor_expr,
1229     syms.and_expr,
1230     syms.shift_expr,
1231     syms.arith_expr,
1232     syms.trailer,
1233     syms.term,
1234     syms.power,
1235 }
1236 ASSIGNMENTS: Final = {
1237     "=",
1238     "+=",
1239     "-=",
1240     "*=",
1241     "@=",
1242     "/=",
1243     "%=",
1244     "&=",
1245     "|=",
1246     "^=",
1247     "<<=",
1248     ">>=",
1249     "**=",
1250     "//=",
1251 }
1252 COMPREHENSION_PRIORITY: Final = 20
1253 COMMA_PRIORITY: Final = 18
1254 TERNARY_PRIORITY: Final = 16
1255 LOGIC_PRIORITY: Final = 14
1256 STRING_PRIORITY: Final = 12
1257 COMPARATOR_PRIORITY: Final = 10
1258 MATH_PRIORITIES: Final = {
1259     token.VBAR: 9,
1260     token.CIRCUMFLEX: 8,
1261     token.AMPER: 7,
1262     token.LEFTSHIFT: 6,
1263     token.RIGHTSHIFT: 6,
1264     token.PLUS: 5,
1265     token.MINUS: 5,
1266     token.STAR: 4,
1267     token.SLASH: 4,
1268     token.DOUBLESLASH: 4,
1269     token.PERCENT: 4,
1270     token.AT: 4,
1271     token.TILDE: 3,
1272     token.DOUBLESTAR: 2,
1273 }
1274 DOT_PRIORITY: Final = 1
1275
1276
1277 @dataclass
1278 class BracketTracker:
1279     """Keeps track of brackets on a line."""
1280
1281     depth: int = 0
1282     bracket_match: Dict[Tuple[Depth, NodeType], Leaf] = field(default_factory=dict)
1283     delimiters: Dict[LeafID, Priority] = field(default_factory=dict)
1284     previous: Optional[Leaf] = None
1285     _for_loop_depths: List[int] = field(default_factory=list)
1286     _lambda_argument_depths: List[int] = field(default_factory=list)
1287     invisible: List[Leaf] = field(default_factory=list)
1288
1289     def mark(self, leaf: Leaf) -> None:
1290         """Mark `leaf` with bracket-related metadata. Keep track of delimiters.
1291
1292         All leaves receive an int `bracket_depth` field that stores how deep
1293         within brackets a given leaf is. 0 means there are no enclosing brackets
1294         that started on this line.
1295
1296         If a leaf is itself a closing bracket, it receives an `opening_bracket`
1297         field that it forms a pair with. This is a one-directional link to
1298         avoid reference cycles.
1299
1300         If a leaf is a delimiter (a token on which Black can split the line if
1301         needed) and it's on depth 0, its `id()` is stored in the tracker's
1302         `delimiters` field.
1303         """
1304         if leaf.type == token.COMMENT:
1305             return
1306
1307         self.maybe_decrement_after_for_loop_variable(leaf)
1308         self.maybe_decrement_after_lambda_arguments(leaf)
1309         if leaf.type in CLOSING_BRACKETS:
1310             self.depth -= 1
1311             opening_bracket = self.bracket_match.pop((self.depth, leaf.type))
1312             leaf.opening_bracket = opening_bracket
1313             if not leaf.value:
1314                 self.invisible.append(leaf)
1315         leaf.bracket_depth = self.depth
1316         if self.depth == 0:
1317             delim = is_split_before_delimiter(leaf, self.previous)
1318             if delim and self.previous is not None:
1319                 self.delimiters[id(self.previous)] = delim
1320             else:
1321                 delim = is_split_after_delimiter(leaf, self.previous)
1322                 if delim:
1323                     self.delimiters[id(leaf)] = delim
1324         if leaf.type in OPENING_BRACKETS:
1325             self.bracket_match[self.depth, BRACKET[leaf.type]] = leaf
1326             self.depth += 1
1327             if not leaf.value:
1328                 self.invisible.append(leaf)
1329         self.previous = leaf
1330         self.maybe_increment_lambda_arguments(leaf)
1331         self.maybe_increment_for_loop_variable(leaf)
1332
1333     def any_open_brackets(self) -> bool:
1334         """Return True if there is an yet unmatched open bracket on the line."""
1335         return bool(self.bracket_match)
1336
1337     def max_delimiter_priority(self, exclude: Iterable[LeafID] = ()) -> Priority:
1338         """Return the highest priority of a delimiter found on the line.
1339
1340         Values are consistent with what `is_split_*_delimiter()` return.
1341         Raises ValueError on no delimiters.
1342         """
1343         return max(v for k, v in self.delimiters.items() if k not in exclude)
1344
1345     def delimiter_count_with_priority(self, priority: Priority = 0) -> int:
1346         """Return the number of delimiters with the given `priority`.
1347
1348         If no `priority` is passed, defaults to max priority on the line.
1349         """
1350         if not self.delimiters:
1351             return 0
1352
1353         priority = priority or self.max_delimiter_priority()
1354         return sum(1 for p in self.delimiters.values() if p == priority)
1355
1356     def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool:
1357         """In a for loop, or comprehension, the variables are often unpacks.
1358
1359         To avoid splitting on the comma in this situation, increase the depth of
1360         tokens between `for` and `in`.
1361         """
1362         if leaf.type == token.NAME and leaf.value == "for":
1363             self.depth += 1
1364             self._for_loop_depths.append(self.depth)
1365             return True
1366
1367         return False
1368
1369     def maybe_decrement_after_for_loop_variable(self, leaf: Leaf) -> bool:
1370         """See `maybe_increment_for_loop_variable` above for explanation."""
1371         if (
1372             self._for_loop_depths
1373             and self._for_loop_depths[-1] == self.depth
1374             and leaf.type == token.NAME
1375             and leaf.value == "in"
1376         ):
1377             self.depth -= 1
1378             self._for_loop_depths.pop()
1379             return True
1380
1381         return False
1382
1383     def maybe_increment_lambda_arguments(self, leaf: Leaf) -> bool:
1384         """In a lambda expression, there might be more than one argument.
1385
1386         To avoid splitting on the comma in this situation, increase the depth of
1387         tokens between `lambda` and `:`.
1388         """
1389         if leaf.type == token.NAME and leaf.value == "lambda":
1390             self.depth += 1
1391             self._lambda_argument_depths.append(self.depth)
1392             return True
1393
1394         return False
1395
1396     def maybe_decrement_after_lambda_arguments(self, leaf: Leaf) -> bool:
1397         """See `maybe_increment_lambda_arguments` above for explanation."""
1398         if (
1399             self._lambda_argument_depths
1400             and self._lambda_argument_depths[-1] == self.depth
1401             and leaf.type == token.COLON
1402         ):
1403             self.depth -= 1
1404             self._lambda_argument_depths.pop()
1405             return True
1406
1407         return False
1408
1409     def get_open_lsqb(self) -> Optional[Leaf]:
1410         """Return the most recent opening square bracket (if any)."""
1411         return self.bracket_match.get((self.depth - 1, token.RSQB))
1412
1413
1414 @dataclass
1415 class Line:
1416     """Holds leaves and comments. Can be printed with `str(line)`."""
1417
1418     depth: int = 0
1419     leaves: List[Leaf] = field(default_factory=list)
1420     # keys ordered like `leaves`
1421     comments: Dict[LeafID, List[Leaf]] = field(default_factory=dict)
1422     bracket_tracker: BracketTracker = field(default_factory=BracketTracker)
1423     inside_brackets: bool = False
1424     should_explode: bool = False
1425
1426     def append(self, leaf: Leaf, preformatted: bool = False) -> None:
1427         """Add a new `leaf` to the end of the line.
1428
1429         Unless `preformatted` is True, the `leaf` will receive a new consistent
1430         whitespace prefix and metadata applied by :class:`BracketTracker`.
1431         Trailing commas are maybe removed, unpacked for loop variables are
1432         demoted from being delimiters.
1433
1434         Inline comments are put aside.
1435         """
1436         has_value = leaf.type in BRACKETS or bool(leaf.value.strip())
1437         if not has_value:
1438             return
1439
1440         if token.COLON == leaf.type and self.is_class_paren_empty:
1441             del self.leaves[-2:]
1442         if self.leaves and not preformatted:
1443             # Note: at this point leaf.prefix should be empty except for
1444             # imports, for which we only preserve newlines.
1445             leaf.prefix += whitespace(
1446                 leaf, complex_subscript=self.is_complex_subscript(leaf)
1447             )
1448         if self.inside_brackets or not preformatted:
1449             self.bracket_tracker.mark(leaf)
1450             if self.maybe_should_explode(leaf):
1451                 self.should_explode = True
1452         if not self.append_comment(leaf):
1453             self.leaves.append(leaf)
1454
1455     def append_safe(self, leaf: Leaf, preformatted: bool = False) -> None:
1456         """Like :func:`append()` but disallow invalid standalone comment structure.
1457
1458         Raises ValueError when any `leaf` is appended after a standalone comment
1459         or when a standalone comment is not the first leaf on the line.
1460         """
1461         if self.bracket_tracker.depth == 0:
1462             if self.is_comment:
1463                 raise ValueError("cannot append to standalone comments")
1464
1465             if self.leaves and leaf.type == STANDALONE_COMMENT:
1466                 raise ValueError(
1467                     "cannot append standalone comments to a populated line"
1468                 )
1469
1470         self.append(leaf, preformatted=preformatted)
1471
1472     @property
1473     def is_comment(self) -> bool:
1474         """Is this line a standalone comment?"""
1475         return len(self.leaves) == 1 and self.leaves[0].type == STANDALONE_COMMENT
1476
1477     @property
1478     def is_decorator(self) -> bool:
1479         """Is this line a decorator?"""
1480         return bool(self) and self.leaves[0].type == token.AT
1481
1482     @property
1483     def is_import(self) -> bool:
1484         """Is this an import line?"""
1485         return bool(self) and is_import(self.leaves[0])
1486
1487     @property
1488     def is_class(self) -> bool:
1489         """Is this line a class definition?"""
1490         return (
1491             bool(self)
1492             and self.leaves[0].type == token.NAME
1493             and self.leaves[0].value == "class"
1494         )
1495
1496     @property
1497     def is_stub_class(self) -> bool:
1498         """Is this line a class definition with a body consisting only of "..."?"""
1499         return self.is_class and self.leaves[-3:] == [
1500             Leaf(token.DOT, ".") for _ in range(3)
1501         ]
1502
1503     @property
1504     def is_def(self) -> bool:
1505         """Is this a function definition? (Also returns True for async defs.)"""
1506         try:
1507             first_leaf = self.leaves[0]
1508         except IndexError:
1509             return False
1510
1511         try:
1512             second_leaf: Optional[Leaf] = self.leaves[1]
1513         except IndexError:
1514             second_leaf = None
1515         return (first_leaf.type == token.NAME and first_leaf.value == "def") or (
1516             first_leaf.type == token.ASYNC
1517             and second_leaf is not None
1518             and second_leaf.type == token.NAME
1519             and second_leaf.value == "def"
1520         )
1521
1522     @property
1523     def is_class_paren_empty(self) -> bool:
1524         """Is this a class with no base classes but using parentheses?
1525
1526         Those are unnecessary and should be removed.
1527         """
1528         return (
1529             bool(self)
1530             and len(self.leaves) == 4
1531             and self.is_class
1532             and self.leaves[2].type == token.LPAR
1533             and self.leaves[2].value == "("
1534             and self.leaves[3].type == token.RPAR
1535             and self.leaves[3].value == ")"
1536         )
1537
1538     @property
1539     def is_triple_quoted_string(self) -> bool:
1540         """Is the line a triple quoted string?"""
1541         return (
1542             bool(self)
1543             and self.leaves[0].type == token.STRING
1544             and self.leaves[0].value.startswith(('"""', "'''"))
1545         )
1546
1547     def contains_standalone_comments(self, depth_limit: int = sys.maxsize) -> bool:
1548         """If so, needs to be split before emitting."""
1549         for leaf in self.leaves:
1550             if leaf.type == STANDALONE_COMMENT and leaf.bracket_depth <= depth_limit:
1551                 return True
1552
1553         return False
1554
1555     def contains_uncollapsable_type_comments(self) -> bool:
1556         ignored_ids = set()
1557         try:
1558             last_leaf = self.leaves[-1]
1559             ignored_ids.add(id(last_leaf))
1560             if last_leaf.type == token.COMMA or (
1561                 last_leaf.type == token.RPAR and not last_leaf.value
1562             ):
1563                 # When trailing commas or optional parens are inserted by Black for
1564                 # consistency, comments after the previous last element are not moved
1565                 # (they don't have to, rendering will still be correct).  So we ignore
1566                 # trailing commas and invisible.
1567                 last_leaf = self.leaves[-2]
1568                 ignored_ids.add(id(last_leaf))
1569         except IndexError:
1570             return False
1571
1572         # A type comment is uncollapsable if it is attached to a leaf
1573         # that isn't at the end of the line (since that could cause it
1574         # to get associated to a different argument) or if there are
1575         # comments before it (since that could cause it to get hidden
1576         # behind a comment.
1577         comment_seen = False
1578         for leaf_id, comments in self.comments.items():
1579             for comment in comments:
1580                 if is_type_comment(comment):
1581                     if comment_seen or (
1582                         not is_type_comment(comment, " ignore")
1583                         and leaf_id not in ignored_ids
1584                     ):
1585                         return True
1586
1587                 comment_seen = True
1588
1589         return False
1590
1591     def contains_unsplittable_type_ignore(self) -> bool:
1592         if not self.leaves:
1593             return False
1594
1595         # If a 'type: ignore' is attached to the end of a line, we
1596         # can't split the line, because we can't know which of the
1597         # subexpressions the ignore was meant to apply to.
1598         #
1599         # We only want this to apply to actual physical lines from the
1600         # original source, though: we don't want the presence of a
1601         # 'type: ignore' at the end of a multiline expression to
1602         # justify pushing it all onto one line. Thus we
1603         # (unfortunately) need to check the actual source lines and
1604         # only report an unsplittable 'type: ignore' if this line was
1605         # one line in the original code.
1606
1607         # Grab the first and last line numbers, skipping generated leaves
1608         first_line = next((leaf.lineno for leaf in self.leaves if leaf.lineno != 0), 0)
1609         last_line = next(
1610             (leaf.lineno for leaf in reversed(self.leaves) if leaf.lineno != 0), 0
1611         )
1612
1613         if first_line == last_line:
1614             # We look at the last two leaves since a comma or an
1615             # invisible paren could have been added at the end of the
1616             # line.
1617             for node in self.leaves[-2:]:
1618                 for comment in self.comments.get(id(node), []):
1619                     if is_type_comment(comment, " ignore"):
1620                         return True
1621
1622         return False
1623
1624     def contains_multiline_strings(self) -> bool:
1625         return any(is_multiline_string(leaf) for leaf in self.leaves)
1626
1627     def maybe_should_explode(self, closing: Leaf) -> bool:
1628         """Return True if this line should explode (always be split), that is when:
1629         - there's a trailing comma here; and
1630         - it's not a one-tuple.
1631         """
1632         if not (
1633             closing.type in CLOSING_BRACKETS
1634             and self.leaves
1635             and self.leaves[-1].type == token.COMMA
1636         ):
1637             return False
1638
1639         if closing.type in {token.RBRACE, token.RSQB}:
1640             return True
1641
1642         if self.is_import:
1643             return True
1644
1645         if not is_one_tuple_between(closing.opening_bracket, closing, self.leaves):
1646             return True
1647
1648         return False
1649
1650     def append_comment(self, comment: Leaf) -> bool:
1651         """Add an inline or standalone comment to the line."""
1652         if (
1653             comment.type == STANDALONE_COMMENT
1654             and self.bracket_tracker.any_open_brackets()
1655         ):
1656             comment.prefix = ""
1657             return False
1658
1659         if comment.type != token.COMMENT:
1660             return False
1661
1662         if not self.leaves:
1663             comment.type = STANDALONE_COMMENT
1664             comment.prefix = ""
1665             return False
1666
1667         last_leaf = self.leaves[-1]
1668         if (
1669             last_leaf.type == token.RPAR
1670             and not last_leaf.value
1671             and last_leaf.parent
1672             and len(list(last_leaf.parent.leaves())) <= 3
1673             and not is_type_comment(comment)
1674         ):
1675             # Comments on an optional parens wrapping a single leaf should belong to
1676             # the wrapped node except if it's a type comment. Pinning the comment like
1677             # this avoids unstable formatting caused by comment migration.
1678             if len(self.leaves) < 2:
1679                 comment.type = STANDALONE_COMMENT
1680                 comment.prefix = ""
1681                 return False
1682
1683             last_leaf = self.leaves[-2]
1684         self.comments.setdefault(id(last_leaf), []).append(comment)
1685         return True
1686
1687     def comments_after(self, leaf: Leaf) -> List[Leaf]:
1688         """Generate comments that should appear directly after `leaf`."""
1689         return self.comments.get(id(leaf), [])
1690
1691     def remove_trailing_comma(self) -> None:
1692         """Remove the trailing comma and moves the comments attached to it."""
1693         trailing_comma = self.leaves.pop()
1694         trailing_comma_comments = self.comments.pop(id(trailing_comma), [])
1695         self.comments.setdefault(id(self.leaves[-1]), []).extend(
1696             trailing_comma_comments
1697         )
1698
1699     def is_complex_subscript(self, leaf: Leaf) -> bool:
1700         """Return True iff `leaf` is part of a slice with non-trivial exprs."""
1701         open_lsqb = self.bracket_tracker.get_open_lsqb()
1702         if open_lsqb is None:
1703             return False
1704
1705         subscript_start = open_lsqb.next_sibling
1706
1707         if isinstance(subscript_start, Node):
1708             if subscript_start.type == syms.listmaker:
1709                 return False
1710
1711             if subscript_start.type == syms.subscriptlist:
1712                 subscript_start = child_towards(subscript_start, leaf)
1713         return subscript_start is not None and any(
1714             n.type in TEST_DESCENDANTS for n in subscript_start.pre_order()
1715         )
1716
1717     def clone(self) -> "Line":
1718         return Line(
1719             depth=self.depth,
1720             inside_brackets=self.inside_brackets,
1721             should_explode=self.should_explode,
1722         )
1723
1724     def __str__(self) -> str:
1725         """Render the line."""
1726         if not self:
1727             return "\n"
1728
1729         indent = "    " * self.depth
1730         leaves = iter(self.leaves)
1731         first = next(leaves)
1732         res = f"{first.prefix}{indent}{first.value}"
1733         for leaf in leaves:
1734             res += str(leaf)
1735         for comment in itertools.chain.from_iterable(self.comments.values()):
1736             res += str(comment)
1737
1738         return res + "\n"
1739
1740     def __bool__(self) -> bool:
1741         """Return True if the line has leaves or comments."""
1742         return bool(self.leaves or self.comments)
1743
1744
1745 @dataclass
1746 class EmptyLineTracker:
1747     """Provides a stateful method that returns the number of potential extra
1748     empty lines needed before and after the currently processed line.
1749
1750     Note: this tracker works on lines that haven't been split yet.  It assumes
1751     the prefix of the first leaf consists of optional newlines.  Those newlines
1752     are consumed by `maybe_empty_lines()` and included in the computation.
1753     """
1754
1755     is_pyi: bool = False
1756     previous_line: Optional[Line] = None
1757     previous_after: int = 0
1758     previous_defs: List[int] = field(default_factory=list)
1759
1760     def maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
1761         """Return the number of extra empty lines before and after the `current_line`.
1762
1763         This is for separating `def`, `async def` and `class` with extra empty
1764         lines (two on module-level).
1765         """
1766         before, after = self._maybe_empty_lines(current_line)
1767         before = (
1768             # Black should not insert empty lines at the beginning
1769             # of the file
1770             0
1771             if self.previous_line is None
1772             else before - self.previous_after
1773         )
1774         self.previous_after = after
1775         self.previous_line = current_line
1776         return before, after
1777
1778     def _maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
1779         max_allowed = 1
1780         if current_line.depth == 0:
1781             max_allowed = 1 if self.is_pyi else 2
1782         if current_line.leaves:
1783             # Consume the first leaf's extra newlines.
1784             first_leaf = current_line.leaves[0]
1785             before = first_leaf.prefix.count("\n")
1786             before = min(before, max_allowed)
1787             first_leaf.prefix = ""
1788         else:
1789             before = 0
1790         depth = current_line.depth
1791         while self.previous_defs and self.previous_defs[-1] >= depth:
1792             self.previous_defs.pop()
1793             if self.is_pyi:
1794                 before = 0 if depth else 1
1795             else:
1796                 before = 1 if depth else 2
1797         if current_line.is_decorator or current_line.is_def or current_line.is_class:
1798             return self._maybe_empty_lines_for_class_or_def(current_line, before)
1799
1800         if (
1801             self.previous_line
1802             and self.previous_line.is_import
1803             and not current_line.is_import
1804             and depth == self.previous_line.depth
1805         ):
1806             return (before or 1), 0
1807
1808         if (
1809             self.previous_line
1810             and self.previous_line.is_class
1811             and current_line.is_triple_quoted_string
1812         ):
1813             return before, 1
1814
1815         return before, 0
1816
1817     def _maybe_empty_lines_for_class_or_def(
1818         self, current_line: Line, before: int
1819     ) -> Tuple[int, int]:
1820         if not current_line.is_decorator:
1821             self.previous_defs.append(current_line.depth)
1822         if self.previous_line is None:
1823             # Don't insert empty lines before the first line in the file.
1824             return 0, 0
1825
1826         if self.previous_line.is_decorator:
1827             return 0, 0
1828
1829         if self.previous_line.depth < current_line.depth and (
1830             self.previous_line.is_class or self.previous_line.is_def
1831         ):
1832             return 0, 0
1833
1834         if (
1835             self.previous_line.is_comment
1836             and self.previous_line.depth == current_line.depth
1837             and before == 0
1838         ):
1839             return 0, 0
1840
1841         if self.is_pyi:
1842             if self.previous_line.depth > current_line.depth:
1843                 newlines = 1
1844             elif current_line.is_class or self.previous_line.is_class:
1845                 if current_line.is_stub_class and self.previous_line.is_stub_class:
1846                     # No blank line between classes with an empty body
1847                     newlines = 0
1848                 else:
1849                     newlines = 1
1850             elif current_line.is_def and not self.previous_line.is_def:
1851                 # Blank line between a block of functions and a block of non-functions
1852                 newlines = 1
1853             else:
1854                 newlines = 0
1855         else:
1856             newlines = 2
1857         if current_line.depth and newlines:
1858             newlines -= 1
1859         return newlines, 0
1860
1861
1862 @dataclass
1863 class LineGenerator(Visitor[Line]):
1864     """Generates reformatted Line objects.  Empty lines are not emitted.
1865
1866     Note: destroys the tree it's visiting by mutating prefixes of its leaves
1867     in ways that will no longer stringify to valid Python code on the tree.
1868     """
1869
1870     is_pyi: bool = False
1871     normalize_strings: bool = True
1872     current_line: Line = field(default_factory=Line)
1873     remove_u_prefix: bool = False
1874
1875     def line(self, indent: int = 0) -> Iterator[Line]:
1876         """Generate a line.
1877
1878         If the line is empty, only emit if it makes sense.
1879         If the line is too long, split it first and then generate.
1880
1881         If any lines were generated, set up a new current_line.
1882         """
1883         if not self.current_line:
1884             self.current_line.depth += indent
1885             return  # Line is empty, don't emit. Creating a new one unnecessary.
1886
1887         complete_line = self.current_line
1888         self.current_line = Line(depth=complete_line.depth + indent)
1889         yield complete_line
1890
1891     def visit_default(self, node: LN) -> Iterator[Line]:
1892         """Default `visit_*()` implementation. Recurses to children of `node`."""
1893         if isinstance(node, Leaf):
1894             any_open_brackets = self.current_line.bracket_tracker.any_open_brackets()
1895             for comment in generate_comments(node):
1896                 if any_open_brackets:
1897                     # any comment within brackets is subject to splitting
1898                     self.current_line.append(comment)
1899                 elif comment.type == token.COMMENT:
1900                     # regular trailing comment
1901                     self.current_line.append(comment)
1902                     yield from self.line()
1903
1904                 else:
1905                     # regular standalone comment
1906                     yield from self.line()
1907
1908                     self.current_line.append(comment)
1909                     yield from self.line()
1910
1911             normalize_prefix(node, inside_brackets=any_open_brackets)
1912             if self.normalize_strings and node.type == token.STRING:
1913                 normalize_string_prefix(node, remove_u_prefix=self.remove_u_prefix)
1914                 normalize_string_quotes(node)
1915             if node.type == token.NUMBER:
1916                 normalize_numeric_literal(node)
1917             if node.type not in WHITESPACE:
1918                 self.current_line.append(node)
1919         yield from super().visit_default(node)
1920
1921     def visit_INDENT(self, node: Leaf) -> Iterator[Line]:
1922         """Increase indentation level, maybe yield a line."""
1923         # In blib2to3 INDENT never holds comments.
1924         yield from self.line(+1)
1925         yield from self.visit_default(node)
1926
1927     def visit_DEDENT(self, node: Leaf) -> Iterator[Line]:
1928         """Decrease indentation level, maybe yield a line."""
1929         # The current line might still wait for trailing comments.  At DEDENT time
1930         # there won't be any (they would be prefixes on the preceding NEWLINE).
1931         # Emit the line then.
1932         yield from self.line()
1933
1934         # While DEDENT has no value, its prefix may contain standalone comments
1935         # that belong to the current indentation level.  Get 'em.
1936         yield from self.visit_default(node)
1937
1938         # Finally, emit the dedent.
1939         yield from self.line(-1)
1940
1941     def visit_stmt(
1942         self, node: Node, keywords: Set[str], parens: Set[str]
1943     ) -> Iterator[Line]:
1944         """Visit a statement.
1945
1946         This implementation is shared for `if`, `while`, `for`, `try`, `except`,
1947         `def`, `with`, `class`, `assert` and assignments.
1948
1949         The relevant Python language `keywords` for a given statement will be
1950         NAME leaves within it. This methods puts those on a separate line.
1951
1952         `parens` holds a set of string leaf values immediately after which
1953         invisible parens should be put.
1954         """
1955         normalize_invisible_parens(node, parens_after=parens)
1956         for child in node.children:
1957             if child.type == token.NAME and child.value in keywords:  # type: ignore
1958                 yield from self.line()
1959
1960             yield from self.visit(child)
1961
1962     def visit_suite(self, node: Node) -> Iterator[Line]:
1963         """Visit a suite."""
1964         if self.is_pyi and is_stub_suite(node):
1965             yield from self.visit(node.children[2])
1966         else:
1967             yield from self.visit_default(node)
1968
1969     def visit_simple_stmt(self, node: Node) -> Iterator[Line]:
1970         """Visit a statement without nested statements."""
1971         is_suite_like = node.parent and node.parent.type in STATEMENT
1972         if is_suite_like:
1973             if self.is_pyi and is_stub_body(node):
1974                 yield from self.visit_default(node)
1975             else:
1976                 yield from self.line(+1)
1977                 yield from self.visit_default(node)
1978                 yield from self.line(-1)
1979
1980         else:
1981             if not self.is_pyi or not node.parent or not is_stub_suite(node.parent):
1982                 yield from self.line()
1983             yield from self.visit_default(node)
1984
1985     def visit_async_stmt(self, node: Node) -> Iterator[Line]:
1986         """Visit `async def`, `async for`, `async with`."""
1987         yield from self.line()
1988
1989         children = iter(node.children)
1990         for child in children:
1991             yield from self.visit(child)
1992
1993             if child.type == token.ASYNC:
1994                 break
1995
1996         internal_stmt = next(children)
1997         for child in internal_stmt.children:
1998             yield from self.visit(child)
1999
2000     def visit_decorators(self, node: Node) -> Iterator[Line]:
2001         """Visit decorators."""
2002         for child in node.children:
2003             yield from self.line()
2004             yield from self.visit(child)
2005
2006     def visit_SEMI(self, leaf: Leaf) -> Iterator[Line]:
2007         """Remove a semicolon and put the other statement on a separate line."""
2008         yield from self.line()
2009
2010     def visit_ENDMARKER(self, leaf: Leaf) -> Iterator[Line]:
2011         """End of file. Process outstanding comments and end with a newline."""
2012         yield from self.visit_default(leaf)
2013         yield from self.line()
2014
2015     def visit_STANDALONE_COMMENT(self, leaf: Leaf) -> Iterator[Line]:
2016         if not self.current_line.bracket_tracker.any_open_brackets():
2017             yield from self.line()
2018         yield from self.visit_default(leaf)
2019
2020     def visit_factor(self, node: Node) -> Iterator[Line]:
2021         """Force parentheses between a unary op and a binary power:
2022
2023         -2 ** 8 -> -(2 ** 8)
2024         """
2025         _operator, operand = node.children
2026         if (
2027             operand.type == syms.power
2028             and len(operand.children) == 3
2029             and operand.children[1].type == token.DOUBLESTAR
2030         ):
2031             lpar = Leaf(token.LPAR, "(")
2032             rpar = Leaf(token.RPAR, ")")
2033             index = operand.remove() or 0
2034             node.insert_child(index, Node(syms.atom, [lpar, operand, rpar]))
2035         yield from self.visit_default(node)
2036
2037     def visit_STRING(self, leaf: Leaf) -> Iterator[Line]:
2038         if is_docstring(leaf) and "\\\n" not in leaf.value:
2039             # We're ignoring docstrings with backslash newline escapes because changing
2040             # indentation of those changes the AST representation of the code.
2041             prefix = get_string_prefix(leaf.value)
2042             lead_len = len(prefix) + 3
2043             tail_len = -3
2044             indent = " " * 4 * self.current_line.depth
2045             docstring = fix_docstring(leaf.value[lead_len:tail_len], indent)
2046             if docstring:
2047                 if leaf.value[lead_len - 1] == docstring[0]:
2048                     docstring = " " + docstring
2049                 if leaf.value[tail_len + 1] == docstring[-1]:
2050                     docstring = docstring + " "
2051             leaf.value = leaf.value[0:lead_len] + docstring + leaf.value[tail_len:]
2052
2053         yield from self.visit_default(leaf)
2054
2055     def __post_init__(self) -> None:
2056         """You are in a twisty little maze of passages."""
2057         v = self.visit_stmt
2058         Ø: Set[str] = set()
2059         self.visit_assert_stmt = partial(v, keywords={"assert"}, parens={"assert", ","})
2060         self.visit_if_stmt = partial(
2061             v, keywords={"if", "else", "elif"}, parens={"if", "elif"}
2062         )
2063         self.visit_while_stmt = partial(v, keywords={"while", "else"}, parens={"while"})
2064         self.visit_for_stmt = partial(v, keywords={"for", "else"}, parens={"for", "in"})
2065         self.visit_try_stmt = partial(
2066             v, keywords={"try", "except", "else", "finally"}, parens=Ø
2067         )
2068         self.visit_except_clause = partial(v, keywords={"except"}, parens=Ø)
2069         self.visit_with_stmt = partial(v, keywords={"with"}, parens=Ø)
2070         self.visit_funcdef = partial(v, keywords={"def"}, parens=Ø)
2071         self.visit_classdef = partial(v, keywords={"class"}, parens=Ø)
2072         self.visit_expr_stmt = partial(v, keywords=Ø, parens=ASSIGNMENTS)
2073         self.visit_return_stmt = partial(v, keywords={"return"}, parens={"return"})
2074         self.visit_import_from = partial(v, keywords=Ø, parens={"import"})
2075         self.visit_del_stmt = partial(v, keywords=Ø, parens={"del"})
2076         self.visit_async_funcdef = self.visit_async_stmt
2077         self.visit_decorated = self.visit_decorators
2078
2079
2080 IMPLICIT_TUPLE = {syms.testlist, syms.testlist_star_expr, syms.exprlist}
2081 BRACKET = {token.LPAR: token.RPAR, token.LSQB: token.RSQB, token.LBRACE: token.RBRACE}
2082 OPENING_BRACKETS = set(BRACKET.keys())
2083 CLOSING_BRACKETS = set(BRACKET.values())
2084 BRACKETS = OPENING_BRACKETS | CLOSING_BRACKETS
2085 ALWAYS_NO_SPACE = CLOSING_BRACKETS | {token.COMMA, STANDALONE_COMMENT}
2086
2087
2088 def whitespace(leaf: Leaf, *, complex_subscript: bool) -> str:  # noqa: C901
2089     """Return whitespace prefix if needed for the given `leaf`.
2090
2091     `complex_subscript` signals whether the given leaf is part of a subscription
2092     which has non-trivial arguments, like arithmetic expressions or function calls.
2093     """
2094     NO = ""
2095     SPACE = " "
2096     DOUBLESPACE = "  "
2097     t = leaf.type
2098     p = leaf.parent
2099     v = leaf.value
2100     if t in ALWAYS_NO_SPACE:
2101         return NO
2102
2103     if t == token.COMMENT:
2104         return DOUBLESPACE
2105
2106     assert p is not None, f"INTERNAL ERROR: hand-made leaf without parent: {leaf!r}"
2107     if t == token.COLON and p.type not in {
2108         syms.subscript,
2109         syms.subscriptlist,
2110         syms.sliceop,
2111     }:
2112         return NO
2113
2114     prev = leaf.prev_sibling
2115     if not prev:
2116         prevp = preceding_leaf(p)
2117         if not prevp or prevp.type in OPENING_BRACKETS:
2118             return NO
2119
2120         if t == token.COLON:
2121             if prevp.type == token.COLON:
2122                 return NO
2123
2124             elif prevp.type != token.COMMA and not complex_subscript:
2125                 return NO
2126
2127             return SPACE
2128
2129         if prevp.type == token.EQUAL:
2130             if prevp.parent:
2131                 if prevp.parent.type in {
2132                     syms.arglist,
2133                     syms.argument,
2134                     syms.parameters,
2135                     syms.varargslist,
2136                 }:
2137                     return NO
2138
2139                 elif prevp.parent.type == syms.typedargslist:
2140                     # A bit hacky: if the equal sign has whitespace, it means we
2141                     # previously found it's a typed argument.  So, we're using
2142                     # that, too.
2143                     return prevp.prefix
2144
2145         elif prevp.type in VARARGS_SPECIALS:
2146             if is_vararg(prevp, within=VARARGS_PARENTS | UNPACKING_PARENTS):
2147                 return NO
2148
2149         elif prevp.type == token.COLON:
2150             if prevp.parent and prevp.parent.type in {syms.subscript, syms.sliceop}:
2151                 return SPACE if complex_subscript else NO
2152
2153         elif (
2154             prevp.parent
2155             and prevp.parent.type == syms.factor
2156             and prevp.type in MATH_OPERATORS
2157         ):
2158             return NO
2159
2160         elif (
2161             prevp.type == token.RIGHTSHIFT
2162             and prevp.parent
2163             and prevp.parent.type == syms.shift_expr
2164             and prevp.prev_sibling
2165             and prevp.prev_sibling.type == token.NAME
2166             and prevp.prev_sibling.value == "print"  # type: ignore
2167         ):
2168             # Python 2 print chevron
2169             return NO
2170
2171     elif prev.type in OPENING_BRACKETS:
2172         return NO
2173
2174     if p.type in {syms.parameters, syms.arglist}:
2175         # untyped function signatures or calls
2176         if not prev or prev.type != token.COMMA:
2177             return NO
2178
2179     elif p.type == syms.varargslist:
2180         # lambdas
2181         if prev and prev.type != token.COMMA:
2182             return NO
2183
2184     elif p.type == syms.typedargslist:
2185         # typed function signatures
2186         if not prev:
2187             return NO
2188
2189         if t == token.EQUAL:
2190             if prev.type != syms.tname:
2191                 return NO
2192
2193         elif prev.type == token.EQUAL:
2194             # A bit hacky: if the equal sign has whitespace, it means we
2195             # previously found it's a typed argument.  So, we're using that, too.
2196             return prev.prefix
2197
2198         elif prev.type != token.COMMA:
2199             return NO
2200
2201     elif p.type == syms.tname:
2202         # type names
2203         if not prev:
2204             prevp = preceding_leaf(p)
2205             if not prevp or prevp.type != token.COMMA:
2206                 return NO
2207
2208     elif p.type == syms.trailer:
2209         # attributes and calls
2210         if t == token.LPAR or t == token.RPAR:
2211             return NO
2212
2213         if not prev:
2214             if t == token.DOT:
2215                 prevp = preceding_leaf(p)
2216                 if not prevp or prevp.type != token.NUMBER:
2217                     return NO
2218
2219             elif t == token.LSQB:
2220                 return NO
2221
2222         elif prev.type != token.COMMA:
2223             return NO
2224
2225     elif p.type == syms.argument:
2226         # single argument
2227         if t == token.EQUAL:
2228             return NO
2229
2230         if not prev:
2231             prevp = preceding_leaf(p)
2232             if not prevp or prevp.type == token.LPAR:
2233                 return NO
2234
2235         elif prev.type in {token.EQUAL} | VARARGS_SPECIALS:
2236             return NO
2237
2238     elif p.type == syms.decorator:
2239         # decorators
2240         return NO
2241
2242     elif p.type == syms.dotted_name:
2243         if prev:
2244             return NO
2245
2246         prevp = preceding_leaf(p)
2247         if not prevp or prevp.type == token.AT or prevp.type == token.DOT:
2248             return NO
2249
2250     elif p.type == syms.classdef:
2251         if t == token.LPAR:
2252             return NO
2253
2254         if prev and prev.type == token.LPAR:
2255             return NO
2256
2257     elif p.type in {syms.subscript, syms.sliceop}:
2258         # indexing
2259         if not prev:
2260             assert p.parent is not None, "subscripts are always parented"
2261             if p.parent.type == syms.subscriptlist:
2262                 return SPACE
2263
2264             return NO
2265
2266         elif not complex_subscript:
2267             return NO
2268
2269     elif p.type == syms.atom:
2270         if prev and t == token.DOT:
2271             # dots, but not the first one.
2272             return NO
2273
2274     elif p.type == syms.dictsetmaker:
2275         # dict unpacking
2276         if prev and prev.type == token.DOUBLESTAR:
2277             return NO
2278
2279     elif p.type in {syms.factor, syms.star_expr}:
2280         # unary ops
2281         if not prev:
2282             prevp = preceding_leaf(p)
2283             if not prevp or prevp.type in OPENING_BRACKETS:
2284                 return NO
2285
2286             prevp_parent = prevp.parent
2287             assert prevp_parent is not None
2288             if prevp.type == token.COLON and prevp_parent.type in {
2289                 syms.subscript,
2290                 syms.sliceop,
2291             }:
2292                 return NO
2293
2294             elif prevp.type == token.EQUAL and prevp_parent.type == syms.argument:
2295                 return NO
2296
2297         elif t in {token.NAME, token.NUMBER, token.STRING}:
2298             return NO
2299
2300     elif p.type == syms.import_from:
2301         if t == token.DOT:
2302             if prev and prev.type == token.DOT:
2303                 return NO
2304
2305         elif t == token.NAME:
2306             if v == "import":
2307                 return SPACE
2308
2309             if prev and prev.type == token.DOT:
2310                 return NO
2311
2312     elif p.type == syms.sliceop:
2313         return NO
2314
2315     return SPACE
2316
2317
2318 def preceding_leaf(node: Optional[LN]) -> Optional[Leaf]:
2319     """Return the first leaf that precedes `node`, if any."""
2320     while node:
2321         res = node.prev_sibling
2322         if res:
2323             if isinstance(res, Leaf):
2324                 return res
2325
2326             try:
2327                 return list(res.leaves())[-1]
2328
2329             except IndexError:
2330                 return None
2331
2332         node = node.parent
2333     return None
2334
2335
2336 def prev_siblings_are(node: Optional[LN], tokens: List[Optional[NodeType]]) -> bool:
2337     """Return if the `node` and its previous siblings match types against the provided
2338     list of tokens; the provided `node`has its type matched against the last element in
2339     the list.  `None` can be used as the first element to declare that the start of the
2340     list is anchored at the start of its parent's children."""
2341     if not tokens:
2342         return True
2343     if tokens[-1] is None:
2344         return node is None
2345     if not node:
2346         return False
2347     if node.type != tokens[-1]:
2348         return False
2349     return prev_siblings_are(node.prev_sibling, tokens[:-1])
2350
2351
2352 def child_towards(ancestor: Node, descendant: LN) -> Optional[LN]:
2353     """Return the child of `ancestor` that contains `descendant`."""
2354     node: Optional[LN] = descendant
2355     while node and node.parent != ancestor:
2356         node = node.parent
2357     return node
2358
2359
2360 def container_of(leaf: Leaf) -> LN:
2361     """Return `leaf` or one of its ancestors that is the topmost container of it.
2362
2363     By "container" we mean a node where `leaf` is the very first child.
2364     """
2365     same_prefix = leaf.prefix
2366     container: LN = leaf
2367     while container:
2368         parent = container.parent
2369         if parent is None:
2370             break
2371
2372         if parent.children[0].prefix != same_prefix:
2373             break
2374
2375         if parent.type == syms.file_input:
2376             break
2377
2378         if parent.prev_sibling is not None and parent.prev_sibling.type in BRACKETS:
2379             break
2380
2381         container = parent
2382     return container
2383
2384
2385 def is_split_after_delimiter(leaf: Leaf, previous: Optional[Leaf] = None) -> Priority:
2386     """Return the priority of the `leaf` delimiter, given a line break after it.
2387
2388     The delimiter priorities returned here are from those delimiters that would
2389     cause a line break after themselves.
2390
2391     Higher numbers are higher priority.
2392     """
2393     if leaf.type == token.COMMA:
2394         return COMMA_PRIORITY
2395
2396     return 0
2397
2398
2399 def is_split_before_delimiter(leaf: Leaf, previous: Optional[Leaf] = None) -> Priority:
2400     """Return the priority of the `leaf` delimiter, given a line break before it.
2401
2402     The delimiter priorities returned here are from those delimiters that would
2403     cause a line break before themselves.
2404
2405     Higher numbers are higher priority.
2406     """
2407     if is_vararg(leaf, within=VARARGS_PARENTS | UNPACKING_PARENTS):
2408         # * and ** might also be MATH_OPERATORS but in this case they are not.
2409         # Don't treat them as a delimiter.
2410         return 0
2411
2412     if (
2413         leaf.type == token.DOT
2414         and leaf.parent
2415         and leaf.parent.type not in {syms.import_from, syms.dotted_name}
2416         and (previous is None or previous.type in CLOSING_BRACKETS)
2417     ):
2418         return DOT_PRIORITY
2419
2420     if (
2421         leaf.type in MATH_OPERATORS
2422         and leaf.parent
2423         and leaf.parent.type not in {syms.factor, syms.star_expr}
2424     ):
2425         return MATH_PRIORITIES[leaf.type]
2426
2427     if leaf.type in COMPARATORS:
2428         return COMPARATOR_PRIORITY
2429
2430     if (
2431         leaf.type == token.STRING
2432         and previous is not None
2433         and previous.type == token.STRING
2434     ):
2435         return STRING_PRIORITY
2436
2437     if leaf.type not in {token.NAME, token.ASYNC}:
2438         return 0
2439
2440     if (
2441         leaf.value == "for"
2442         and leaf.parent
2443         and leaf.parent.type in {syms.comp_for, syms.old_comp_for}
2444         or leaf.type == token.ASYNC
2445     ):
2446         if (
2447             not isinstance(leaf.prev_sibling, Leaf)
2448             or leaf.prev_sibling.value != "async"
2449         ):
2450             return COMPREHENSION_PRIORITY
2451
2452     if (
2453         leaf.value == "if"
2454         and leaf.parent
2455         and leaf.parent.type in {syms.comp_if, syms.old_comp_if}
2456     ):
2457         return COMPREHENSION_PRIORITY
2458
2459     if leaf.value in {"if", "else"} and leaf.parent and leaf.parent.type == syms.test:
2460         return TERNARY_PRIORITY
2461
2462     if leaf.value == "is":
2463         return COMPARATOR_PRIORITY
2464
2465     if (
2466         leaf.value == "in"
2467         and leaf.parent
2468         and leaf.parent.type in {syms.comp_op, syms.comparison}
2469         and not (
2470             previous is not None
2471             and previous.type == token.NAME
2472             and previous.value == "not"
2473         )
2474     ):
2475         return COMPARATOR_PRIORITY
2476
2477     if (
2478         leaf.value == "not"
2479         and leaf.parent
2480         and leaf.parent.type == syms.comp_op
2481         and not (
2482             previous is not None
2483             and previous.type == token.NAME
2484             and previous.value == "is"
2485         )
2486     ):
2487         return COMPARATOR_PRIORITY
2488
2489     if leaf.value in LOGIC_OPERATORS and leaf.parent:
2490         return LOGIC_PRIORITY
2491
2492     return 0
2493
2494
2495 FMT_OFF = {"# fmt: off", "# fmt:off", "# yapf: disable"}
2496 FMT_ON = {"# fmt: on", "# fmt:on", "# yapf: enable"}
2497
2498
2499 def generate_comments(leaf: LN) -> Iterator[Leaf]:
2500     """Clean the prefix of the `leaf` and generate comments from it, if any.
2501
2502     Comments in lib2to3 are shoved into the whitespace prefix.  This happens
2503     in `pgen2/driver.py:Driver.parse_tokens()`.  This was a brilliant implementation
2504     move because it does away with modifying the grammar to include all the
2505     possible places in which comments can be placed.
2506
2507     The sad consequence for us though is that comments don't "belong" anywhere.
2508     This is why this function generates simple parentless Leaf objects for
2509     comments.  We simply don't know what the correct parent should be.
2510
2511     No matter though, we can live without this.  We really only need to
2512     differentiate between inline and standalone comments.  The latter don't
2513     share the line with any code.
2514
2515     Inline comments are emitted as regular token.COMMENT leaves.  Standalone
2516     are emitted with a fake STANDALONE_COMMENT token identifier.
2517     """
2518     for pc in list_comments(leaf.prefix, is_endmarker=leaf.type == token.ENDMARKER):
2519         yield Leaf(pc.type, pc.value, prefix="\n" * pc.newlines)
2520
2521
2522 @dataclass
2523 class ProtoComment:
2524     """Describes a piece of syntax that is a comment.
2525
2526     It's not a :class:`blib2to3.pytree.Leaf` so that:
2527
2528     * it can be cached (`Leaf` objects should not be reused more than once as
2529       they store their lineno, column, prefix, and parent information);
2530     * `newlines` and `consumed` fields are kept separate from the `value`. This
2531       simplifies handling of special marker comments like ``# fmt: off/on``.
2532     """
2533
2534     type: int  # token.COMMENT or STANDALONE_COMMENT
2535     value: str  # content of the comment
2536     newlines: int  # how many newlines before the comment
2537     consumed: int  # how many characters of the original leaf's prefix did we consume
2538
2539
2540 @lru_cache(maxsize=4096)
2541 def list_comments(prefix: str, *, is_endmarker: bool) -> List[ProtoComment]:
2542     """Return a list of :class:`ProtoComment` objects parsed from the given `prefix`."""
2543     result: List[ProtoComment] = []
2544     if not prefix or "#" not in prefix:
2545         return result
2546
2547     consumed = 0
2548     nlines = 0
2549     ignored_lines = 0
2550     for index, line in enumerate(prefix.split("\n")):
2551         consumed += len(line) + 1  # adding the length of the split '\n'
2552         line = line.lstrip()
2553         if not line:
2554             nlines += 1
2555         if not line.startswith("#"):
2556             # Escaped newlines outside of a comment are not really newlines at
2557             # all. We treat a single-line comment following an escaped newline
2558             # as a simple trailing comment.
2559             if line.endswith("\\"):
2560                 ignored_lines += 1
2561             continue
2562
2563         if index == ignored_lines and not is_endmarker:
2564             comment_type = token.COMMENT  # simple trailing comment
2565         else:
2566             comment_type = STANDALONE_COMMENT
2567         comment = make_comment(line)
2568         result.append(
2569             ProtoComment(
2570                 type=comment_type, value=comment, newlines=nlines, consumed=consumed
2571             )
2572         )
2573         nlines = 0
2574     return result
2575
2576
2577 def make_comment(content: str) -> str:
2578     """Return a consistently formatted comment from the given `content` string.
2579
2580     All comments (except for "##", "#!", "#:", '#'", "#%%") should have a single
2581     space between the hash sign and the content.
2582
2583     If `content` didn't start with a hash sign, one is provided.
2584     """
2585     content = content.rstrip()
2586     if not content:
2587         return "#"
2588
2589     if content[0] == "#":
2590         content = content[1:]
2591     if content and content[0] not in " !:#'%":
2592         content = " " + content
2593     return "#" + content
2594
2595
2596 def transform_line(
2597     line: Line, mode: Mode, features: Collection[Feature] = ()
2598 ) -> Iterator[Line]:
2599     """Transform a `line`, potentially splitting it into many lines.
2600
2601     They should fit in the allotted `line_length` but might not be able to.
2602
2603     `features` are syntactical features that may be used in the output.
2604     """
2605     if line.is_comment:
2606         yield line
2607         return
2608
2609     line_str = line_to_string(line)
2610
2611     def init_st(ST: Type[StringTransformer]) -> StringTransformer:
2612         """Initialize StringTransformer"""
2613         return ST(mode.line_length, mode.string_normalization)
2614
2615     string_merge = init_st(StringMerger)
2616     string_paren_strip = init_st(StringParenStripper)
2617     string_split = init_st(StringSplitter)
2618     string_paren_wrap = init_st(StringParenWrapper)
2619
2620     transformers: List[Transformer]
2621     if (
2622         not line.contains_uncollapsable_type_comments()
2623         and not line.should_explode
2624         and (
2625             is_line_short_enough(line, line_length=mode.line_length, line_str=line_str)
2626             or line.contains_unsplittable_type_ignore()
2627         )
2628         and not (line.inside_brackets and line.contains_standalone_comments())
2629     ):
2630         # Only apply basic string preprocessing, since lines shouldn't be split here.
2631         if mode.experimental_string_processing:
2632             transformers = [string_merge, string_paren_strip]
2633         else:
2634             transformers = []
2635     elif line.is_def:
2636         transformers = [left_hand_split]
2637     else:
2638
2639         def rhs(line: Line, features: Collection[Feature]) -> Iterator[Line]:
2640             """Wraps calls to `right_hand_split`.
2641
2642             The calls increasingly `omit` right-hand trailers (bracket pairs with
2643             content), meaning the trailers get glued together to split on another
2644             bracket pair instead.
2645             """
2646             for omit in generate_trailers_to_omit(line, mode.line_length):
2647                 lines = list(
2648                     right_hand_split(line, mode.line_length, features, omit=omit)
2649                 )
2650                 # Note: this check is only able to figure out if the first line of the
2651                 # *current* transformation fits in the line length.  This is true only
2652                 # for simple cases.  All others require running more transforms via
2653                 # `transform_line()`.  This check doesn't know if those would succeed.
2654                 if is_line_short_enough(lines[0], line_length=mode.line_length):
2655                     yield from lines
2656                     return
2657
2658             # All splits failed, best effort split with no omits.
2659             # This mostly happens to multiline strings that are by definition
2660             # reported as not fitting a single line, as well as lines that contain
2661             # trailing commas (those have to be exploded).
2662             yield from right_hand_split(
2663                 line, line_length=mode.line_length, features=features
2664             )
2665
2666         if mode.experimental_string_processing:
2667             if line.inside_brackets:
2668                 transformers = [
2669                     string_merge,
2670                     string_paren_strip,
2671                     string_split,
2672                     delimiter_split,
2673                     standalone_comment_split,
2674                     string_paren_wrap,
2675                     rhs,
2676                 ]
2677             else:
2678                 transformers = [
2679                     string_merge,
2680                     string_paren_strip,
2681                     string_split,
2682                     string_paren_wrap,
2683                     rhs,
2684                 ]
2685         else:
2686             if line.inside_brackets:
2687                 transformers = [delimiter_split, standalone_comment_split, rhs]
2688             else:
2689                 transformers = [rhs]
2690
2691     for transform in transformers:
2692         # We are accumulating lines in `result` because we might want to abort
2693         # mission and return the original line in the end, or attempt a different
2694         # split altogether.
2695         try:
2696             result = run_transformer(line, transform, mode, features, line_str=line_str)
2697         except CannotTransform:
2698             continue
2699         else:
2700             yield from result
2701             break
2702
2703     else:
2704         yield line
2705
2706
2707 @dataclass  # type: ignore
2708 class StringTransformer(ABC):
2709     """
2710     An implementation of the Transformer protocol that relies on its
2711     subclasses overriding the template methods `do_match(...)` and
2712     `do_transform(...)`.
2713
2714     This Transformer works exclusively on strings (for example, by merging
2715     or splitting them).
2716
2717     The following sections can be found among the docstrings of each concrete
2718     StringTransformer subclass.
2719
2720     Requirements:
2721         Which requirements must be met of the given Line for this
2722         StringTransformer to be applied?
2723
2724     Transformations:
2725         If the given Line meets all of the above requirements, which string
2726         transformations can you expect to be applied to it by this
2727         StringTransformer?
2728
2729     Collaborations:
2730         What contractual agreements does this StringTransformer have with other
2731         StringTransfomers? Such collaborations should be eliminated/minimized
2732         as much as possible.
2733     """
2734
2735     line_length: int
2736     normalize_strings: bool
2737     __name__ = "StringTransformer"
2738
2739     @abstractmethod
2740     def do_match(self, line: Line) -> TMatchResult:
2741         """
2742         Returns:
2743             * Ok(string_idx) such that `line.leaves[string_idx]` is our target
2744             string, if a match was able to be made.
2745                 OR
2746             * Err(CannotTransform), if a match was not able to be made.
2747         """
2748
2749     @abstractmethod
2750     def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]:
2751         """
2752         Yields:
2753             * Ok(new_line) where new_line is the new transformed line.
2754                 OR
2755             * Err(CannotTransform) if the transformation failed for some reason. The
2756             `do_match(...)` template method should usually be used to reject
2757             the form of the given Line, but in some cases it is difficult to
2758             know whether or not a Line meets the StringTransformer's
2759             requirements until the transformation is already midway.
2760
2761         Side Effects:
2762             This method should NOT mutate @line directly, but it MAY mutate the
2763             Line's underlying Node structure. (WARNING: If the underlying Node
2764             structure IS altered, then this method should NOT be allowed to
2765             yield an CannotTransform after that point.)
2766         """
2767
2768     def __call__(self, line: Line, _features: Collection[Feature]) -> Iterator[Line]:
2769         """
2770         StringTransformer instances have a call signature that mirrors that of
2771         the Transformer type.
2772
2773         Raises:
2774             CannotTransform(...) if the concrete StringTransformer class is unable
2775             to transform @line.
2776         """
2777         # Optimization to avoid calling `self.do_match(...)` when the line does
2778         # not contain any string.
2779         if not any(leaf.type == token.STRING for leaf in line.leaves):
2780             raise CannotTransform("There are no strings in this line.")
2781
2782         match_result = self.do_match(line)
2783
2784         if isinstance(match_result, Err):
2785             cant_transform = match_result.err()
2786             raise CannotTransform(
2787                 f"The string transformer {self.__class__.__name__} does not recognize"
2788                 " this line as one that it can transform."
2789             ) from cant_transform
2790
2791         string_idx = match_result.ok()
2792
2793         for line_result in self.do_transform(line, string_idx):
2794             if isinstance(line_result, Err):
2795                 cant_transform = line_result.err()
2796                 raise CannotTransform(
2797                     "StringTransformer failed while attempting to transform string."
2798                 ) from cant_transform
2799             line = line_result.ok()
2800             yield line
2801
2802
2803 @dataclass
2804 class CustomSplit:
2805     """A custom (i.e. manual) string split.
2806
2807     A single CustomSplit instance represents a single substring.
2808
2809     Examples:
2810         Consider the following string:
2811         ```
2812         "Hi there friend."
2813         " This is a custom"
2814         f" string {split}."
2815         ```
2816
2817         This string will correspond to the following three CustomSplit instances:
2818         ```
2819         CustomSplit(False, 16)
2820         CustomSplit(False, 17)
2821         CustomSplit(True, 16)
2822         ```
2823     """
2824
2825     has_prefix: bool
2826     break_idx: int
2827
2828
2829 class CustomSplitMapMixin:
2830     """
2831     This mixin class is used to map merged strings to a sequence of
2832     CustomSplits, which will then be used to re-split the strings iff none of
2833     the resultant substrings go over the configured max line length.
2834     """
2835
2836     _Key = Tuple[StringID, str]
2837     _CUSTOM_SPLIT_MAP: Dict[_Key, Tuple[CustomSplit, ...]] = defaultdict(tuple)
2838
2839     @staticmethod
2840     def _get_key(string: str) -> "CustomSplitMapMixin._Key":
2841         """
2842         Returns:
2843             A unique identifier that is used internally to map @string to a
2844             group of custom splits.
2845         """
2846         return (id(string), string)
2847
2848     def add_custom_splits(
2849         self, string: str, custom_splits: Iterable[CustomSplit]
2850     ) -> None:
2851         """Custom Split Map Setter Method
2852
2853         Side Effects:
2854             Adds a mapping from @string to the custom splits @custom_splits.
2855         """
2856         key = self._get_key(string)
2857         self._CUSTOM_SPLIT_MAP[key] = tuple(custom_splits)
2858
2859     def pop_custom_splits(self, string: str) -> List[CustomSplit]:
2860         """Custom Split Map Getter Method
2861
2862         Returns:
2863             * A list of the custom splits that are mapped to @string, if any
2864             exist.
2865                 OR
2866             * [], otherwise.
2867
2868         Side Effects:
2869             Deletes the mapping between @string and its associated custom
2870             splits (which are returned to the caller).
2871         """
2872         key = self._get_key(string)
2873
2874         custom_splits = self._CUSTOM_SPLIT_MAP[key]
2875         del self._CUSTOM_SPLIT_MAP[key]
2876
2877         return list(custom_splits)
2878
2879     def has_custom_splits(self, string: str) -> bool:
2880         """
2881         Returns:
2882             True iff @string is associated with a set of custom splits.
2883         """
2884         key = self._get_key(string)
2885         return key in self._CUSTOM_SPLIT_MAP
2886
2887
2888 class StringMerger(CustomSplitMapMixin, StringTransformer):
2889     """StringTransformer that merges strings together.
2890
2891     Requirements:
2892         (A) The line contains adjacent strings such that ALL of the validation checks
2893         listed in StringMerger.__validate_msg(...)'s docstring pass.
2894             OR
2895         (B) The line contains a string which uses line continuation backslashes.
2896
2897     Transformations:
2898         Depending on which of the two requirements above where met, either:
2899
2900         (A) The string group associated with the target string is merged.
2901             OR
2902         (B) All line-continuation backslashes are removed from the target string.
2903
2904     Collaborations:
2905         StringMerger provides custom split information to StringSplitter.
2906     """
2907
2908     def do_match(self, line: Line) -> TMatchResult:
2909         LL = line.leaves
2910
2911         is_valid_index = is_valid_index_factory(LL)
2912
2913         for (i, leaf) in enumerate(LL):
2914             if (
2915                 leaf.type == token.STRING
2916                 and is_valid_index(i + 1)
2917                 and LL[i + 1].type == token.STRING
2918             ):
2919                 return Ok(i)
2920
2921             if leaf.type == token.STRING and "\\\n" in leaf.value:
2922                 return Ok(i)
2923
2924         return TErr("This line has no strings that need merging.")
2925
2926     def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]:
2927         new_line = line
2928         rblc_result = self.__remove_backslash_line_continuation_chars(
2929             new_line, string_idx
2930         )
2931         if isinstance(rblc_result, Ok):
2932             new_line = rblc_result.ok()
2933
2934         msg_result = self.__merge_string_group(new_line, string_idx)
2935         if isinstance(msg_result, Ok):
2936             new_line = msg_result.ok()
2937
2938         if isinstance(rblc_result, Err) and isinstance(msg_result, Err):
2939             msg_cant_transform = msg_result.err()
2940             rblc_cant_transform = rblc_result.err()
2941             cant_transform = CannotTransform(
2942                 "StringMerger failed to merge any strings in this line."
2943             )
2944
2945             # Chain the errors together using `__cause__`.
2946             msg_cant_transform.__cause__ = rblc_cant_transform
2947             cant_transform.__cause__ = msg_cant_transform
2948
2949             yield Err(cant_transform)
2950         else:
2951             yield Ok(new_line)
2952
2953     @staticmethod
2954     def __remove_backslash_line_continuation_chars(
2955         line: Line, string_idx: int
2956     ) -> TResult[Line]:
2957         """
2958         Merge strings that were split across multiple lines using
2959         line-continuation backslashes.
2960
2961         Returns:
2962             Ok(new_line), if @line contains backslash line-continuation
2963             characters.
2964                 OR
2965             Err(CannotTransform), otherwise.
2966         """
2967         LL = line.leaves
2968
2969         string_leaf = LL[string_idx]
2970         if not (
2971             string_leaf.type == token.STRING
2972             and "\\\n" in string_leaf.value
2973             and not has_triple_quotes(string_leaf.value)
2974         ):
2975             return TErr(
2976                 f"String leaf {string_leaf} does not contain any backslash line"
2977                 " continuation characters."
2978             )
2979
2980         new_line = line.clone()
2981         new_line.comments = line.comments.copy()
2982         append_leaves(new_line, line, LL)
2983
2984         new_string_leaf = new_line.leaves[string_idx]
2985         new_string_leaf.value = new_string_leaf.value.replace("\\\n", "")
2986
2987         return Ok(new_line)
2988
2989     def __merge_string_group(self, line: Line, string_idx: int) -> TResult[Line]:
2990         """
2991         Merges string group (i.e. set of adjacent strings) where the first
2992         string in the group is `line.leaves[string_idx]`.
2993
2994         Returns:
2995             Ok(new_line), if ALL of the validation checks found in
2996             __validate_msg(...) pass.
2997                 OR
2998             Err(CannotTransform), otherwise.
2999         """
3000         LL = line.leaves
3001
3002         is_valid_index = is_valid_index_factory(LL)
3003
3004         vresult = self.__validate_msg(line, string_idx)
3005         if isinstance(vresult, Err):
3006             return vresult
3007
3008         # If the string group is wrapped inside an Atom node, we must make sure
3009         # to later replace that Atom with our new (merged) string leaf.
3010         atom_node = LL[string_idx].parent
3011
3012         # We will place BREAK_MARK in between every two substrings that we
3013         # merge. We will then later go through our final result and use the
3014         # various instances of BREAK_MARK we find to add the right values to
3015         # the custom split map.
3016         BREAK_MARK = "@@@@@ BLACK BREAKPOINT MARKER @@@@@"
3017
3018         QUOTE = LL[string_idx].value[-1]
3019
3020         def make_naked(string: str, string_prefix: str) -> str:
3021             """Strip @string (i.e. make it a "naked" string)
3022
3023             Pre-conditions:
3024                 * assert_is_leaf_string(@string)
3025
3026             Returns:
3027                 A string that is identical to @string except that
3028                 @string_prefix has been stripped, the surrounding QUOTE
3029                 characters have been removed, and any remaining QUOTE
3030                 characters have been escaped.
3031             """
3032             assert_is_leaf_string(string)
3033
3034             RE_EVEN_BACKSLASHES = r"(?:(?<!\\)(?:\\\\)*)"
3035             naked_string = string[len(string_prefix) + 1 : -1]
3036             naked_string = re.sub(
3037                 "(" + RE_EVEN_BACKSLASHES + ")" + QUOTE, r"\1\\" + QUOTE, naked_string
3038             )
3039             return naked_string
3040
3041         # Holds the CustomSplit objects that will later be added to the custom
3042         # split map.
3043         custom_splits = []
3044
3045         # Temporary storage for the 'has_prefix' part of the CustomSplit objects.
3046         prefix_tracker = []
3047
3048         # Sets the 'prefix' variable. This is the prefix that the final merged
3049         # string will have.
3050         next_str_idx = string_idx
3051         prefix = ""
3052         while (
3053             not prefix
3054             and is_valid_index(next_str_idx)
3055             and LL[next_str_idx].type == token.STRING
3056         ):
3057             prefix = get_string_prefix(LL[next_str_idx].value)
3058             next_str_idx += 1
3059
3060         # The next loop merges the string group. The final string will be
3061         # contained in 'S'.
3062         #
3063         # The following convenience variables are used:
3064         #
3065         #   S: string
3066         #   NS: naked string
3067         #   SS: next string
3068         #   NSS: naked next string
3069         S = ""
3070         NS = ""
3071         num_of_strings = 0
3072         next_str_idx = string_idx
3073         while is_valid_index(next_str_idx) and LL[next_str_idx].type == token.STRING:
3074             num_of_strings += 1
3075
3076             SS = LL[next_str_idx].value
3077             next_prefix = get_string_prefix(SS)
3078
3079             # If this is an f-string group but this substring is not prefixed
3080             # with 'f'...
3081             if "f" in prefix and "f" not in next_prefix:
3082                 # Then we must escape any braces contained in this substring.
3083                 SS = re.subf(r"(\{|\})", "{1}{1}", SS)
3084
3085             NSS = make_naked(SS, next_prefix)
3086
3087             has_prefix = bool(next_prefix)
3088             prefix_tracker.append(has_prefix)
3089
3090             S = prefix + QUOTE + NS + NSS + BREAK_MARK + QUOTE
3091             NS = make_naked(S, prefix)
3092
3093             next_str_idx += 1
3094
3095         S_leaf = Leaf(token.STRING, S)
3096         if self.normalize_strings:
3097             normalize_string_quotes(S_leaf)
3098
3099         # Fill the 'custom_splits' list with the appropriate CustomSplit objects.
3100         temp_string = S_leaf.value[len(prefix) + 1 : -1]
3101         for has_prefix in prefix_tracker:
3102             mark_idx = temp_string.find(BREAK_MARK)
3103             assert (
3104                 mark_idx >= 0
3105             ), "Logic error while filling the custom string breakpoint cache."
3106
3107             temp_string = temp_string[mark_idx + len(BREAK_MARK) :]
3108             breakpoint_idx = mark_idx + (len(prefix) if has_prefix else 0) + 1
3109             custom_splits.append(CustomSplit(has_prefix, breakpoint_idx))
3110
3111         string_leaf = Leaf(token.STRING, S_leaf.value.replace(BREAK_MARK, ""))
3112
3113         if atom_node is not None:
3114             replace_child(atom_node, string_leaf)
3115
3116         # Build the final line ('new_line') that this method will later return.
3117         new_line = line.clone()
3118         for (i, leaf) in enumerate(LL):
3119             if i == string_idx:
3120                 new_line.append(string_leaf)
3121
3122             if string_idx <= i < string_idx + num_of_strings:
3123                 for comment_leaf in line.comments_after(LL[i]):
3124                     new_line.append(comment_leaf, preformatted=True)
3125                 continue
3126
3127             append_leaves(new_line, line, [leaf])
3128
3129         self.add_custom_splits(string_leaf.value, custom_splits)
3130         return Ok(new_line)
3131
3132     @staticmethod
3133     def __validate_msg(line: Line, string_idx: int) -> TResult[None]:
3134         """Validate (M)erge (S)tring (G)roup
3135
3136         Transform-time string validation logic for __merge_string_group(...).
3137
3138         Returns:
3139             * Ok(None), if ALL validation checks (listed below) pass.
3140                 OR
3141             * Err(CannotTransform), if any of the following are true:
3142                 - The target string group does not contain ANY stand-alone comments.
3143                 - The target string is not in a string group (i.e. it has no
3144                   adjacent strings).
3145                 - The string group has more than one inline comment.
3146                 - The string group has an inline comment that appears to be a pragma.
3147                 - The set of all string prefixes in the string group is of
3148                   length greater than one and is not equal to {"", "f"}.
3149                 - The string group consists of raw strings.
3150         """
3151         # We first check for "inner" stand-alone comments (i.e. stand-alone
3152         # comments that have a string leaf before them AND after them).
3153         for inc in [1, -1]:
3154             i = string_idx
3155             found_sa_comment = False
3156             is_valid_index = is_valid_index_factory(line.leaves)
3157             while is_valid_index(i) and line.leaves[i].type in [
3158                 token.STRING,
3159                 STANDALONE_COMMENT,
3160             ]:
3161                 if line.leaves[i].type == STANDALONE_COMMENT:
3162                     found_sa_comment = True
3163                 elif found_sa_comment:
3164                     return TErr(
3165                         "StringMerger does NOT merge string groups which contain "
3166                         "stand-alone comments."
3167                     )
3168
3169                 i += inc
3170
3171         num_of_inline_string_comments = 0
3172         set_of_prefixes = set()
3173         num_of_strings = 0
3174         for leaf in line.leaves[string_idx:]:
3175             if leaf.type != token.STRING:
3176                 # If the string group is trailed by a comma, we count the
3177                 # comments trailing the comma to be one of the string group's
3178                 # comments.
3179                 if leaf.type == token.COMMA and id(leaf) in line.comments:
3180                     num_of_inline_string_comments += 1
3181                 break
3182
3183             if has_triple_quotes(leaf.value):
3184                 return TErr("StringMerger does NOT merge multiline strings.")
3185
3186             num_of_strings += 1
3187             prefix = get_string_prefix(leaf.value)
3188             if "r" in prefix:
3189                 return TErr("StringMerger does NOT merge raw strings.")
3190
3191             set_of_prefixes.add(prefix)
3192
3193             if id(leaf) in line.comments:
3194                 num_of_inline_string_comments += 1
3195                 if contains_pragma_comment(line.comments[id(leaf)]):
3196                     return TErr("Cannot merge strings which have pragma comments.")
3197
3198         if num_of_strings < 2:
3199             return TErr(
3200                 f"Not enough strings to merge (num_of_strings={num_of_strings})."
3201             )
3202
3203         if num_of_inline_string_comments > 1:
3204             return TErr(
3205                 f"Too many inline string comments ({num_of_inline_string_comments})."
3206             )
3207
3208         if len(set_of_prefixes) > 1 and set_of_prefixes != {"", "f"}:
3209             return TErr(f"Too many different prefixes ({set_of_prefixes}).")
3210
3211         return Ok(None)
3212
3213
3214 class StringParenStripper(StringTransformer):
3215     """StringTransformer that strips surrounding parentheses from strings.
3216
3217     Requirements:
3218         The line contains a string which is surrounded by parentheses and:
3219             - The target string is NOT the only argument to a function call).
3220             - If the target string contains a PERCENT, the brackets are not
3221               preceeded or followed by an operator with higher precedence than
3222               PERCENT.
3223
3224     Transformations:
3225         The parentheses mentioned in the 'Requirements' section are stripped.
3226
3227     Collaborations:
3228         StringParenStripper has its own inherent usefulness, but it is also
3229         relied on to clean up the parentheses created by StringParenWrapper (in
3230         the event that they are no longer needed).
3231     """
3232
3233     def do_match(self, line: Line) -> TMatchResult:
3234         LL = line.leaves
3235
3236         is_valid_index = is_valid_index_factory(LL)
3237
3238         for (idx, leaf) in enumerate(LL):
3239             # Should be a string...
3240             if leaf.type != token.STRING:
3241                 continue
3242
3243             # Should be preceded by a non-empty LPAR...
3244             if (
3245                 not is_valid_index(idx - 1)
3246                 or LL[idx - 1].type != token.LPAR
3247                 or is_empty_lpar(LL[idx - 1])
3248             ):
3249                 continue
3250
3251             # That LPAR should NOT be preceded by a function name or a closing
3252             # bracket (which could be a function which returns a function or a
3253             # list/dictionary that contains a function)...
3254             if is_valid_index(idx - 2) and (
3255                 LL[idx - 2].type == token.NAME or LL[idx - 2].type in CLOSING_BRACKETS
3256             ):
3257                 continue
3258
3259             string_idx = idx
3260
3261             # Skip the string trailer, if one exists.
3262             string_parser = StringParser()
3263             next_idx = string_parser.parse(LL, string_idx)
3264
3265             # if the leaves in the parsed string include a PERCENT, we need to
3266             # make sure the initial LPAR is NOT preceded by an operator with
3267             # higher or equal precedence to PERCENT
3268             if is_valid_index(idx - 2):
3269                 # mypy can't quite follow unless we name this
3270                 before_lpar = LL[idx - 2]
3271                 if token.PERCENT in {leaf.type for leaf in LL[idx - 1 : next_idx]} and (
3272                     (
3273                         before_lpar.type
3274                         in {
3275                             token.STAR,
3276                             token.AT,
3277                             token.SLASH,
3278                             token.DOUBLESLASH,
3279                             token.PERCENT,
3280                             token.TILDE,
3281                             token.DOUBLESTAR,
3282                             token.AWAIT,
3283                             token.LSQB,
3284                             token.LPAR,
3285                         }
3286                     )
3287                     or (
3288                         # only unary PLUS/MINUS
3289                         before_lpar.parent
3290                         and before_lpar.parent.type == syms.factor
3291                         and (before_lpar.type in {token.PLUS, token.MINUS})
3292                     )
3293                 ):
3294                     continue
3295
3296             # Should be followed by a non-empty RPAR...
3297             if (
3298                 is_valid_index(next_idx)
3299                 and LL[next_idx].type == token.RPAR
3300                 and not is_empty_rpar(LL[next_idx])
3301             ):
3302                 # That RPAR should NOT be followed by anything with higher
3303                 # precedence than PERCENT
3304                 if is_valid_index(next_idx + 1) and LL[next_idx + 1].type in {
3305                     token.DOUBLESTAR,
3306                     token.LSQB,
3307                     token.LPAR,
3308                     token.DOT,
3309                 }:
3310                     continue
3311
3312                 return Ok(string_idx)
3313
3314         return TErr("This line has no strings wrapped in parens.")
3315
3316     def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]:
3317         LL = line.leaves
3318
3319         string_parser = StringParser()
3320         rpar_idx = string_parser.parse(LL, string_idx)
3321
3322         for leaf in (LL[string_idx - 1], LL[rpar_idx]):
3323             if line.comments_after(leaf):
3324                 yield TErr(
3325                     "Will not strip parentheses which have comments attached to them."
3326                 )
3327
3328         new_line = line.clone()
3329         new_line.comments = line.comments.copy()
3330         append_leaves(new_line, line, LL[: string_idx - 1])
3331
3332         string_leaf = Leaf(token.STRING, LL[string_idx].value)
3333         LL[string_idx - 1].remove()
3334         replace_child(LL[string_idx], string_leaf)
3335         new_line.append(string_leaf)
3336
3337         append_leaves(
3338             new_line, line, LL[string_idx + 1 : rpar_idx] + LL[rpar_idx + 1 :]
3339         )
3340
3341         LL[rpar_idx].remove()
3342
3343         yield Ok(new_line)
3344
3345
3346 class BaseStringSplitter(StringTransformer):
3347     """
3348     Abstract class for StringTransformers which transform a Line's strings by splitting
3349     them or placing them on their own lines where necessary to avoid going over
3350     the configured line length.
3351
3352     Requirements:
3353         * The target string value is responsible for the line going over the
3354         line length limit. It follows that after all of black's other line
3355         split methods have been exhausted, this line (or one of the resulting
3356         lines after all line splits are performed) would still be over the
3357         line_length limit unless we split this string.
3358             AND
3359         * The target string is NOT a "pointless" string (i.e. a string that has
3360         no parent or siblings).
3361             AND
3362         * The target string is not followed by an inline comment that appears
3363         to be a pragma.
3364             AND
3365         * The target string is not a multiline (i.e. triple-quote) string.
3366     """
3367
3368     @abstractmethod
3369     def do_splitter_match(self, line: Line) -> TMatchResult:
3370         """
3371         BaseStringSplitter asks its clients to override this method instead of
3372         `StringTransformer.do_match(...)`.
3373
3374         Follows the same protocol as `StringTransformer.do_match(...)`.
3375
3376         Refer to `help(StringTransformer.do_match)` for more information.
3377         """
3378
3379     def do_match(self, line: Line) -> TMatchResult:
3380         match_result = self.do_splitter_match(line)
3381         if isinstance(match_result, Err):
3382             return match_result
3383
3384         string_idx = match_result.ok()
3385         vresult = self.__validate(line, string_idx)
3386         if isinstance(vresult, Err):
3387             return vresult
3388
3389         return match_result
3390
3391     def __validate(self, line: Line, string_idx: int) -> TResult[None]:
3392         """
3393         Checks that @line meets all of the requirements listed in this classes'
3394         docstring. Refer to `help(BaseStringSplitter)` for a detailed
3395         description of those requirements.
3396
3397         Returns:
3398             * Ok(None), if ALL of the requirements are met.
3399                 OR
3400             * Err(CannotTransform), if ANY of the requirements are NOT met.
3401         """
3402         LL = line.leaves
3403
3404         string_leaf = LL[string_idx]
3405
3406         max_string_length = self.__get_max_string_length(line, string_idx)
3407         if len(string_leaf.value) <= max_string_length:
3408             return TErr(
3409                 "The string itself is not what is causing this line to be too long."
3410             )
3411
3412         if not string_leaf.parent or [L.type for L in string_leaf.parent.children] == [
3413             token.STRING,
3414             token.NEWLINE,
3415         ]:
3416             return TErr(
3417                 f"This string ({string_leaf.value}) appears to be pointless (i.e. has"
3418                 " no parent)."
3419             )
3420
3421         if id(line.leaves[string_idx]) in line.comments and contains_pragma_comment(
3422             line.comments[id(line.leaves[string_idx])]
3423         ):
3424             return TErr(
3425                 "Line appears to end with an inline pragma comment. Splitting the line"
3426                 " could modify the pragma's behavior."
3427             )
3428
3429         if has_triple_quotes(string_leaf.value):
3430             return TErr("We cannot split multiline strings.")
3431
3432         return Ok(None)
3433
3434     def __get_max_string_length(self, line: Line, string_idx: int) -> int:
3435         """
3436         Calculates the max string length used when attempting to determine
3437         whether or not the target string is responsible for causing the line to
3438         go over the line length limit.
3439
3440         WARNING: This method is tightly coupled to both StringSplitter and
3441         (especially) StringParenWrapper. There is probably a better way to
3442         accomplish what is being done here.
3443
3444         Returns:
3445             max_string_length: such that `line.leaves[string_idx].value >
3446             max_string_length` implies that the target string IS responsible
3447             for causing this line to exceed the line length limit.
3448         """
3449         LL = line.leaves
3450
3451         is_valid_index = is_valid_index_factory(LL)
3452
3453         # We use the shorthand "WMA4" in comments to abbreviate "We must
3454         # account for". When giving examples, we use STRING to mean some/any
3455         # valid string.
3456         #
3457         # Finally, we use the following convenience variables:
3458         #
3459         #   P:  The leaf that is before the target string leaf.
3460         #   N:  The leaf that is after the target string leaf.
3461         #   NN: The leaf that is after N.
3462
3463         # WMA4 the whitespace at the beginning of the line.
3464         offset = line.depth * 4
3465
3466         if is_valid_index(string_idx - 1):
3467             p_idx = string_idx - 1
3468             if (
3469                 LL[string_idx - 1].type == token.LPAR
3470                 and LL[string_idx - 1].value == ""
3471                 and string_idx >= 2
3472             ):
3473                 # If the previous leaf is an empty LPAR placeholder, we should skip it.
3474                 p_idx -= 1
3475
3476             P = LL[p_idx]
3477             if P.type == token.PLUS:
3478                 # WMA4 a space and a '+' character (e.g. `+ STRING`).
3479                 offset += 2
3480
3481             if P.type == token.COMMA:
3482                 # WMA4 a space, a comma, and a closing bracket [e.g. `), STRING`].
3483                 offset += 3
3484
3485             if P.type in [token.COLON, token.EQUAL, token.NAME]:
3486                 # This conditional branch is meant to handle dictionary keys,
3487                 # variable assignments, 'return STRING' statement lines, and
3488                 # 'else STRING' ternary expression lines.
3489
3490                 # WMA4 a single space.
3491                 offset += 1
3492
3493                 # WMA4 the lengths of any leaves that came before that space.
3494                 for leaf in LL[: p_idx + 1]:
3495                     offset += len(str(leaf))
3496
3497         if is_valid_index(string_idx + 1):
3498             N = LL[string_idx + 1]
3499             if N.type == token.RPAR and N.value == "" and len(LL) > string_idx + 2:
3500                 # If the next leaf is an empty RPAR placeholder, we should skip it.
3501                 N = LL[string_idx + 2]
3502
3503             if N.type == token.COMMA:
3504                 # WMA4 a single comma at the end of the string (e.g `STRING,`).
3505                 offset += 1
3506
3507             if is_valid_index(string_idx + 2):
3508                 NN = LL[string_idx + 2]
3509
3510                 if N.type == token.DOT and NN.type == token.NAME:
3511                     # This conditional branch is meant to handle method calls invoked
3512                     # off of a string literal up to and including the LPAR character.
3513
3514                     # WMA4 the '.' character.
3515                     offset += 1
3516
3517                     if (
3518                         is_valid_index(string_idx + 3)
3519                         and LL[string_idx + 3].type == token.LPAR
3520                     ):
3521                         # WMA4 the left parenthesis character.
3522                         offset += 1
3523
3524                     # WMA4 the length of the method's name.
3525                     offset += len(NN.value)
3526
3527         has_comments = False
3528         for comment_leaf in line.comments_after(LL[string_idx]):
3529             if not has_comments:
3530                 has_comments = True
3531                 # WMA4 two spaces before the '#' character.
3532                 offset += 2
3533
3534             # WMA4 the length of the inline comment.
3535             offset += len(comment_leaf.value)
3536
3537         max_string_length = self.line_length - offset
3538         return max_string_length
3539
3540
3541 class StringSplitter(CustomSplitMapMixin, BaseStringSplitter):
3542     """
3543     StringTransformer that splits "atom" strings (i.e. strings which exist on
3544     lines by themselves).
3545
3546     Requirements:
3547         * The line consists ONLY of a single string (with the exception of a
3548         '+' symbol which MAY exist at the start of the line), MAYBE a string
3549         trailer, and MAYBE a trailing comma.
3550             AND
3551         * All of the requirements listed in BaseStringSplitter's docstring.
3552
3553     Transformations:
3554         The string mentioned in the 'Requirements' section is split into as
3555         many substrings as necessary to adhere to the configured line length.
3556
3557         In the final set of substrings, no substring should be smaller than
3558         MIN_SUBSTR_SIZE characters.
3559
3560         The string will ONLY be split on spaces (i.e. each new substring should
3561         start with a space).
3562
3563         If the string is an f-string, it will NOT be split in the middle of an
3564         f-expression (e.g. in f"FooBar: {foo() if x else bar()}", {foo() if x
3565         else bar()} is an f-expression).
3566
3567         If the string that is being split has an associated set of custom split
3568         records and those custom splits will NOT result in any line going over
3569         the configured line length, those custom splits are used. Otherwise the
3570         string is split as late as possible (from left-to-right) while still
3571         adhering to the transformation rules listed above.
3572
3573     Collaborations:
3574         StringSplitter relies on StringMerger to construct the appropriate
3575         CustomSplit objects and add them to the custom split map.
3576     """
3577
3578     MIN_SUBSTR_SIZE = 6
3579     # Matches an "f-expression" (e.g. {var}) that might be found in an f-string.
3580     RE_FEXPR = r"""
3581     (?<!\{)\{
3582         (?:
3583             [^\{\}]
3584             | \{\{
3585             | \}\}
3586         )+?
3587     (?<!\})(?:\}\})*\}(?!\})
3588     """
3589
3590     def do_splitter_match(self, line: Line) -> TMatchResult:
3591         LL = line.leaves
3592
3593         is_valid_index = is_valid_index_factory(LL)
3594
3595         idx = 0
3596
3597         # The first leaf MAY be a '+' symbol...
3598         if is_valid_index(idx) and LL[idx].type == token.PLUS:
3599             idx += 1
3600
3601         # The next/first leaf MAY be an empty LPAR...
3602         if is_valid_index(idx) and is_empty_lpar(LL[idx]):
3603             idx += 1
3604
3605         # The next/first leaf MUST be a string...
3606         if not is_valid_index(idx) or LL[idx].type != token.STRING:
3607             return TErr("Line does not start with a string.")
3608
3609         string_idx = idx
3610
3611         # Skip the string trailer, if one exists.
3612         string_parser = StringParser()
3613         idx = string_parser.parse(LL, string_idx)
3614
3615         # That string MAY be followed by an empty RPAR...
3616         if is_valid_index(idx) and is_empty_rpar(LL[idx]):
3617             idx += 1
3618
3619         # That string / empty RPAR leaf MAY be followed by a comma...
3620         if is_valid_index(idx) and LL[idx].type == token.COMMA:
3621             idx += 1
3622
3623         # But no more leaves are allowed...
3624         if is_valid_index(idx):
3625             return TErr("This line does not end with a string.")
3626
3627         return Ok(string_idx)
3628
3629     def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]:
3630         LL = line.leaves
3631
3632         QUOTE = LL[string_idx].value[-1]
3633
3634         is_valid_index = is_valid_index_factory(LL)
3635         insert_str_child = insert_str_child_factory(LL[string_idx])
3636
3637         prefix = get_string_prefix(LL[string_idx].value)
3638
3639         # We MAY choose to drop the 'f' prefix from substrings that don't
3640         # contain any f-expressions, but ONLY if the original f-string
3641         # contains at least one f-expression. Otherwise, we will alter the AST
3642         # of the program.
3643         drop_pointless_f_prefix = ("f" in prefix) and re.search(
3644             self.RE_FEXPR, LL[string_idx].value, re.VERBOSE
3645         )
3646
3647         first_string_line = True
3648         starts_with_plus = LL[0].type == token.PLUS
3649
3650         def line_needs_plus() -> bool:
3651             return first_string_line and starts_with_plus
3652
3653         def maybe_append_plus(new_line: Line) -> None:
3654             """
3655             Side Effects:
3656                 If @line starts with a plus and this is the first line we are
3657                 constructing, this function appends a PLUS leaf to @new_line
3658                 and replaces the old PLUS leaf in the node structure. Otherwise
3659                 this function does nothing.
3660             """
3661             if line_needs_plus():
3662                 plus_leaf = Leaf(token.PLUS, "+")
3663                 replace_child(LL[0], plus_leaf)
3664                 new_line.append(plus_leaf)
3665
3666         ends_with_comma = (
3667             is_valid_index(string_idx + 1) and LL[string_idx + 1].type == token.COMMA
3668         )
3669
3670         def max_last_string() -> int:
3671             """
3672             Returns:
3673                 The max allowed length of the string value used for the last
3674                 line we will construct.
3675             """
3676             result = self.line_length
3677             result -= line.depth * 4
3678             result -= 1 if ends_with_comma else 0
3679             result -= 2 if line_needs_plus() else 0
3680             return result
3681
3682         # --- Calculate Max Break Index (for string value)
3683         # We start with the line length limit
3684         max_break_idx = self.line_length
3685         # The last index of a string of length N is N-1.
3686         max_break_idx -= 1
3687         # Leading whitespace is not present in the string value (e.g. Leaf.value).
3688         max_break_idx -= line.depth * 4
3689         if max_break_idx < 0:
3690             yield TErr(
3691                 f"Unable to split {LL[string_idx].value} at such high of a line depth:"
3692                 f" {line.depth}"
3693             )
3694             return
3695
3696         # Check if StringMerger registered any custom splits.
3697         custom_splits = self.pop_custom_splits(LL[string_idx].value)
3698         # We use them ONLY if none of them would produce lines that exceed the
3699         # line limit.
3700         use_custom_breakpoints = bool(
3701             custom_splits
3702             and all(csplit.break_idx <= max_break_idx for csplit in custom_splits)
3703         )
3704
3705         # Temporary storage for the remaining chunk of the string line that
3706         # can't fit onto the line currently being constructed.
3707         rest_value = LL[string_idx].value
3708
3709         def more_splits_should_be_made() -> bool:
3710             """
3711             Returns:
3712                 True iff `rest_value` (the remaining string value from the last
3713                 split), should be split again.
3714             """
3715             if use_custom_breakpoints:
3716                 return len(custom_splits) > 1
3717             else:
3718                 return len(rest_value) > max_last_string()
3719
3720         string_line_results: List[Ok[Line]] = []
3721         while more_splits_should_be_made():
3722             if use_custom_breakpoints:
3723                 # Custom User Split (manual)
3724                 csplit = custom_splits.pop(0)
3725                 break_idx = csplit.break_idx
3726             else:
3727                 # Algorithmic Split (automatic)
3728                 max_bidx = max_break_idx - 2 if line_needs_plus() else max_break_idx
3729                 maybe_break_idx = self.__get_break_idx(rest_value, max_bidx)
3730                 if maybe_break_idx is None:
3731                     # If we are unable to algorithmically determine a good split
3732                     # and this string has custom splits registered to it, we
3733                     # fall back to using them--which means we have to start
3734                     # over from the beginning.
3735                     if custom_splits:
3736                         rest_value = LL[string_idx].value
3737                         string_line_results = []
3738                         first_string_line = True
3739                         use_custom_breakpoints = True
3740                         continue
3741
3742                     # Otherwise, we stop splitting here.
3743                     break
3744
3745                 break_idx = maybe_break_idx
3746
3747             # --- Construct `next_value`
3748             next_value = rest_value[:break_idx] + QUOTE
3749             if (
3750                 # Are we allowed to try to drop a pointless 'f' prefix?
3751                 drop_pointless_f_prefix
3752                 # If we are, will we be successful?
3753                 and next_value != self.__normalize_f_string(next_value, prefix)
3754             ):
3755                 # If the current custom split did NOT originally use a prefix,
3756                 # then `csplit.break_idx` will be off by one after removing
3757                 # the 'f' prefix.
3758                 break_idx = (
3759                     break_idx + 1
3760                     if use_custom_breakpoints and not csplit.has_prefix
3761                     else break_idx
3762                 )
3763                 next_value = rest_value[:break_idx] + QUOTE
3764                 next_value = self.__normalize_f_string(next_value, prefix)
3765
3766             # --- Construct `next_leaf`
3767             next_leaf = Leaf(token.STRING, next_value)
3768             insert_str_child(next_leaf)
3769             self.__maybe_normalize_string_quotes(next_leaf)
3770
3771             # --- Construct `next_line`
3772             next_line = line.clone()
3773             maybe_append_plus(next_line)
3774             next_line.append(next_leaf)
3775             string_line_results.append(Ok(next_line))
3776
3777             rest_value = prefix + QUOTE + rest_value[break_idx:]
3778             first_string_line = False
3779
3780         yield from string_line_results
3781
3782         if drop_pointless_f_prefix:
3783             rest_value = self.__normalize_f_string(rest_value, prefix)
3784
3785         rest_leaf = Leaf(token.STRING, rest_value)
3786         insert_str_child(rest_leaf)
3787
3788         # NOTE: I could not find a test case that verifies that the following
3789         # line is actually necessary, but it seems to be. Otherwise we risk
3790         # not normalizing the last substring, right?
3791         self.__maybe_normalize_string_quotes(rest_leaf)
3792
3793         last_line = line.clone()
3794         maybe_append_plus(last_line)
3795
3796         # If there are any leaves to the right of the target string...
3797         if is_valid_index(string_idx + 1):
3798             # We use `temp_value` here to determine how long the last line
3799             # would be if we were to append all the leaves to the right of the
3800             # target string to the last string line.
3801             temp_value = rest_value
3802             for leaf in LL[string_idx + 1 :]:
3803                 temp_value += str(leaf)
3804                 if leaf.type == token.LPAR:
3805                     break
3806
3807             # Try to fit them all on the same line with the last substring...
3808             if (
3809                 len(temp_value) <= max_last_string()
3810                 or LL[string_idx + 1].type == token.COMMA
3811             ):
3812                 last_line.append(rest_leaf)
3813                 append_leaves(last_line, line, LL[string_idx + 1 :])
3814                 yield Ok(last_line)
3815             # Otherwise, place the last substring on one line and everything
3816             # else on a line below that...
3817             else:
3818                 last_line.append(rest_leaf)
3819                 yield Ok(last_line)
3820
3821                 non_string_line = line.clone()
3822                 append_leaves(non_string_line, line, LL[string_idx + 1 :])
3823                 yield Ok(non_string_line)
3824         # Else the target string was the last leaf...
3825         else:
3826             last_line.append(rest_leaf)
3827             last_line.comments = line.comments.copy()
3828             yield Ok(last_line)
3829
3830     def __get_break_idx(self, string: str, max_break_idx: int) -> Optional[int]:
3831         """
3832         This method contains the algorithm that StringSplitter uses to
3833         determine which character to split each string at.
3834
3835         Args:
3836             @string: The substring that we are attempting to split.
3837             @max_break_idx: The ideal break index. We will return this value if it
3838             meets all the necessary conditions. In the likely event that it
3839             doesn't we will try to find the closest index BELOW @max_break_idx
3840             that does. If that fails, we will expand our search by also
3841             considering all valid indices ABOVE @max_break_idx.
3842
3843         Pre-Conditions:
3844             * assert_is_leaf_string(@string)
3845             * 0 <= @max_break_idx < len(@string)
3846
3847         Returns:
3848             break_idx, if an index is able to be found that meets all of the
3849             conditions listed in the 'Transformations' section of this classes'
3850             docstring.
3851                 OR
3852             None, otherwise.
3853         """
3854         is_valid_index = is_valid_index_factory(string)
3855
3856         assert is_valid_index(max_break_idx)
3857         assert_is_leaf_string(string)
3858
3859         _fexpr_slices: Optional[List[Tuple[Index, Index]]] = None
3860
3861         def fexpr_slices() -> Iterator[Tuple[Index, Index]]:
3862             """
3863             Yields:
3864                 All ranges of @string which, if @string were to be split there,
3865                 would result in the splitting of an f-expression (which is NOT
3866                 allowed).
3867             """
3868             nonlocal _fexpr_slices
3869
3870             if _fexpr_slices is None:
3871                 _fexpr_slices = []
3872                 for match in re.finditer(self.RE_FEXPR, string, re.VERBOSE):
3873                     _fexpr_slices.append(match.span())
3874
3875             yield from _fexpr_slices
3876
3877         is_fstring = "f" in get_string_prefix(string)
3878
3879         def breaks_fstring_expression(i: Index) -> bool:
3880             """
3881             Returns:
3882                 True iff returning @i would result in the splitting of an
3883                 f-expression (which is NOT allowed).
3884             """
3885             if not is_fstring:
3886                 return False
3887
3888             for (start, end) in fexpr_slices():
3889                 if start <= i < end:
3890                     return True
3891
3892             return False
3893
3894         def passes_all_checks(i: Index) -> bool:
3895             """
3896             Returns:
3897                 True iff ALL of the conditions listed in the 'Transformations'
3898                 section of this classes' docstring would be be met by returning @i.
3899             """
3900             is_space = string[i] == " "
3901             is_big_enough = (
3902                 len(string[i:]) >= self.MIN_SUBSTR_SIZE
3903                 and len(string[:i]) >= self.MIN_SUBSTR_SIZE
3904             )
3905             return is_space and is_big_enough and not breaks_fstring_expression(i)
3906
3907         # First, we check all indices BELOW @max_break_idx.
3908         break_idx = max_break_idx
3909         while is_valid_index(break_idx - 1) and not passes_all_checks(break_idx):
3910             break_idx -= 1
3911
3912         if not passes_all_checks(break_idx):
3913             # If that fails, we check all indices ABOVE @max_break_idx.
3914             #
3915             # If we are able to find a valid index here, the next line is going
3916             # to be longer than the specified line length, but it's probably
3917             # better than doing nothing at all.
3918             break_idx = max_break_idx + 1
3919             while is_valid_index(break_idx + 1) and not passes_all_checks(break_idx):
3920                 break_idx += 1
3921
3922             if not is_valid_index(break_idx) or not passes_all_checks(break_idx):
3923                 return None
3924
3925         return break_idx
3926
3927     def __maybe_normalize_string_quotes(self, leaf: Leaf) -> None:
3928         if self.normalize_strings:
3929             normalize_string_quotes(leaf)
3930
3931     def __normalize_f_string(self, string: str, prefix: str) -> str:
3932         """
3933         Pre-Conditions:
3934             * assert_is_leaf_string(@string)
3935
3936         Returns:
3937             * If @string is an f-string that contains no f-expressions, we
3938             return a string identical to @string except that the 'f' prefix
3939             has been stripped and all double braces (i.e. '{{' or '}}') have
3940             been normalized (i.e. turned into '{' or '}').
3941                 OR
3942             * Otherwise, we return @string.
3943         """
3944         assert_is_leaf_string(string)
3945
3946         if "f" in prefix and not re.search(self.RE_FEXPR, string, re.VERBOSE):
3947             new_prefix = prefix.replace("f", "")
3948
3949             temp = string[len(prefix) :]
3950             temp = re.sub(r"\{\{", "{", temp)
3951             temp = re.sub(r"\}\}", "}", temp)
3952             new_string = temp
3953
3954             return f"{new_prefix}{new_string}"
3955         else:
3956             return string
3957
3958
3959 class StringParenWrapper(CustomSplitMapMixin, BaseStringSplitter):
3960     """
3961     StringTransformer that splits non-"atom" strings (i.e. strings that do not
3962     exist on lines by themselves).
3963
3964     Requirements:
3965         All of the requirements listed in BaseStringSplitter's docstring in
3966         addition to the requirements listed below:
3967
3968         * The line is a return/yield statement, which returns/yields a string.
3969             OR
3970         * The line is part of a ternary expression (e.g. `x = y if cond else
3971         z`) such that the line starts with `else <string>`, where <string> is
3972         some string.
3973             OR
3974         * The line is an assert statement, which ends with a string.
3975             OR
3976         * The line is an assignment statement (e.g. `x = <string>` or `x +=
3977         <string>`) such that the variable is being assigned the value of some
3978         string.
3979             OR
3980         * The line is a dictionary key assignment where some valid key is being
3981         assigned the value of some string.
3982
3983     Transformations:
3984         The chosen string is wrapped in parentheses and then split at the LPAR.
3985
3986         We then have one line which ends with an LPAR and another line that
3987         starts with the chosen string. The latter line is then split again at
3988         the RPAR. This results in the RPAR (and possibly a trailing comma)
3989         being placed on its own line.
3990
3991         NOTE: If any leaves exist to the right of the chosen string (except
3992         for a trailing comma, which would be placed after the RPAR), those
3993         leaves are placed inside the parentheses.  In effect, the chosen
3994         string is not necessarily being "wrapped" by parentheses. We can,
3995         however, count on the LPAR being placed directly before the chosen
3996         string.
3997
3998         In other words, StringParenWrapper creates "atom" strings. These
3999         can then be split again by StringSplitter, if necessary.
4000
4001     Collaborations:
4002         In the event that a string line split by StringParenWrapper is
4003         changed such that it no longer needs to be given its own line,
4004         StringParenWrapper relies on StringParenStripper to clean up the
4005         parentheses it created.
4006     """
4007
4008     def do_splitter_match(self, line: Line) -> TMatchResult:
4009         LL = line.leaves
4010
4011         string_idx = (
4012             self._return_match(LL)
4013             or self._else_match(LL)
4014             or self._assert_match(LL)
4015             or self._assign_match(LL)
4016             or self._dict_match(LL)
4017         )
4018
4019         if string_idx is not None:
4020             string_value = line.leaves[string_idx].value
4021             # If the string has no spaces...
4022             if " " not in string_value:
4023                 # And will still violate the line length limit when split...
4024                 max_string_length = self.line_length - ((line.depth + 1) * 4)
4025                 if len(string_value) > max_string_length:
4026                     # And has no associated custom splits...
4027                     if not self.has_custom_splits(string_value):
4028                         # Then we should NOT put this string on its own line.
4029                         return TErr(
4030                             "We do not wrap long strings in parentheses when the"
4031                             " resultant line would still be over the specified line"
4032                             " length and can't be split further by StringSplitter."
4033                         )
4034             return Ok(string_idx)
4035
4036         return TErr("This line does not contain any non-atomic strings.")
4037
4038     @staticmethod
4039     def _return_match(LL: List[Leaf]) -> Optional[int]:
4040         """
4041         Returns:
4042             string_idx such that @LL[string_idx] is equal to our target (i.e.
4043             matched) string, if this line matches the return/yield statement
4044             requirements listed in the 'Requirements' section of this classes'
4045             docstring.
4046                 OR
4047             None, otherwise.
4048         """
4049         # If this line is apart of a return/yield statement and the first leaf
4050         # contains either the "return" or "yield" keywords...
4051         if parent_type(LL[0]) in [syms.return_stmt, syms.yield_expr] and LL[
4052             0
4053         ].value in ["return", "yield"]:
4054             is_valid_index = is_valid_index_factory(LL)
4055
4056             idx = 2 if is_valid_index(1) and is_empty_par(LL[1]) else 1
4057             # The next visible leaf MUST contain a string...
4058             if is_valid_index(idx) and LL[idx].type == token.STRING:
4059                 return idx
4060
4061         return None
4062
4063     @staticmethod
4064     def _else_match(LL: List[Leaf]) -> Optional[int]:
4065         """
4066         Returns:
4067             string_idx such that @LL[string_idx] is equal to our target (i.e.
4068             matched) string, if this line matches the ternary expression
4069             requirements listed in the 'Requirements' section of this classes'
4070             docstring.
4071                 OR
4072             None, otherwise.
4073         """
4074         # If this line is apart of a ternary expression and the first leaf
4075         # contains the "else" keyword...
4076         if (
4077             parent_type(LL[0]) == syms.test
4078             and LL[0].type == token.NAME
4079             and LL[0].value == "else"
4080         ):
4081             is_valid_index = is_valid_index_factory(LL)
4082
4083             idx = 2 if is_valid_index(1) and is_empty_par(LL[1]) else 1
4084             # The next visible leaf MUST contain a string...
4085             if is_valid_index(idx) and LL[idx].type == token.STRING:
4086                 return idx
4087
4088         return None
4089
4090     @staticmethod
4091     def _assert_match(LL: List[Leaf]) -> Optional[int]:
4092         """
4093         Returns:
4094             string_idx such that @LL[string_idx] is equal to our target (i.e.
4095             matched) string, if this line matches the assert statement
4096             requirements listed in the 'Requirements' section of this classes'
4097             docstring.
4098                 OR
4099             None, otherwise.
4100         """
4101         # If this line is apart of an assert statement and the first leaf
4102         # contains the "assert" keyword...
4103         if parent_type(LL[0]) == syms.assert_stmt and LL[0].value == "assert":
4104             is_valid_index = is_valid_index_factory(LL)
4105
4106             for (i, leaf) in enumerate(LL):
4107                 # We MUST find a comma...
4108                 if leaf.type == token.COMMA:
4109                     idx = i + 2 if is_empty_par(LL[i + 1]) else i + 1
4110
4111                     # That comma MUST be followed by a string...
4112                     if is_valid_index(idx) and LL[idx].type == token.STRING:
4113                         string_idx = idx
4114
4115                         # Skip the string trailer, if one exists.
4116                         string_parser = StringParser()
4117                         idx = string_parser.parse(LL, string_idx)
4118
4119                         # But no more leaves are allowed...
4120                         if not is_valid_index(idx):
4121                             return string_idx
4122
4123         return None
4124
4125     @staticmethod
4126     def _assign_match(LL: List[Leaf]) -> Optional[int]:
4127         """
4128         Returns:
4129             string_idx such that @LL[string_idx] is equal to our target (i.e.
4130             matched) string, if this line matches the assignment statement
4131             requirements listed in the 'Requirements' section of this classes'
4132             docstring.
4133                 OR
4134             None, otherwise.
4135         """
4136         # If this line is apart of an expression statement or is a function
4137         # argument AND the first leaf contains a variable name...
4138         if (
4139             parent_type(LL[0]) in [syms.expr_stmt, syms.argument, syms.power]
4140             and LL[0].type == token.NAME
4141         ):
4142             is_valid_index = is_valid_index_factory(LL)
4143
4144             for (i, leaf) in enumerate(LL):
4145                 # We MUST find either an '=' or '+=' symbol...
4146                 if leaf.type in [token.EQUAL, token.PLUSEQUAL]:
4147                     idx = i + 2 if is_empty_par(LL[i + 1]) else i + 1
4148
4149                     # That symbol MUST be followed by a string...
4150                     if is_valid_index(idx) and LL[idx].type == token.STRING:
4151                         string_idx = idx
4152
4153                         # Skip the string trailer, if one exists.
4154                         string_parser = StringParser()
4155                         idx = string_parser.parse(LL, string_idx)
4156
4157                         # The next leaf MAY be a comma iff this line is apart
4158                         # of a function argument...
4159                         if (
4160                             parent_type(LL[0]) == syms.argument
4161                             and is_valid_index(idx)
4162                             and LL[idx].type == token.COMMA
4163                         ):
4164                             idx += 1
4165
4166                         # But no more leaves are allowed...
4167                         if not is_valid_index(idx):
4168                             return string_idx
4169
4170         return None
4171
4172     @staticmethod
4173     def _dict_match(LL: List[Leaf]) -> Optional[int]:
4174         """
4175         Returns:
4176             string_idx such that @LL[string_idx] is equal to our target (i.e.
4177             matched) string, if this line matches the dictionary key assignment
4178             statement requirements listed in the 'Requirements' section of this
4179             classes' docstring.
4180                 OR
4181             None, otherwise.
4182         """
4183         # If this line is apart of a dictionary key assignment...
4184         if syms.dictsetmaker in [parent_type(LL[0]), parent_type(LL[0].parent)]:
4185             is_valid_index = is_valid_index_factory(LL)
4186
4187             for (i, leaf) in enumerate(LL):
4188                 # We MUST find a colon...
4189                 if leaf.type == token.COLON:
4190                     idx = i + 2 if is_empty_par(LL[i + 1]) else i + 1
4191
4192                     # That colon MUST be followed by a string...
4193                     if is_valid_index(idx) and LL[idx].type == token.STRING:
4194                         string_idx = idx
4195
4196                         # Skip the string trailer, if one exists.
4197                         string_parser = StringParser()
4198                         idx = string_parser.parse(LL, string_idx)
4199
4200                         # That string MAY be followed by a comma...
4201                         if is_valid_index(idx) and LL[idx].type == token.COMMA:
4202                             idx += 1
4203
4204                         # But no more leaves are allowed...
4205                         if not is_valid_index(idx):
4206                             return string_idx
4207
4208         return None
4209
4210     def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]:
4211         LL = line.leaves
4212
4213         is_valid_index = is_valid_index_factory(LL)
4214         insert_str_child = insert_str_child_factory(LL[string_idx])
4215
4216         comma_idx = -1
4217         ends_with_comma = False
4218         if LL[comma_idx].type == token.COMMA:
4219             ends_with_comma = True
4220
4221         leaves_to_steal_comments_from = [LL[string_idx]]
4222         if ends_with_comma:
4223             leaves_to_steal_comments_from.append(LL[comma_idx])
4224
4225         # --- First Line
4226         first_line = line.clone()
4227         left_leaves = LL[:string_idx]
4228
4229         # We have to remember to account for (possibly invisible) LPAR and RPAR
4230         # leaves that already wrapped the target string. If these leaves do
4231         # exist, we will replace them with our own LPAR and RPAR leaves.
4232         old_parens_exist = False
4233         if left_leaves and left_leaves[-1].type == token.LPAR:
4234             old_parens_exist = True
4235             leaves_to_steal_comments_from.append(left_leaves[-1])
4236             left_leaves.pop()
4237
4238         append_leaves(first_line, line, left_leaves)
4239
4240         lpar_leaf = Leaf(token.LPAR, "(")
4241         if old_parens_exist:
4242             replace_child(LL[string_idx - 1], lpar_leaf)
4243         else:
4244             insert_str_child(lpar_leaf)
4245         first_line.append(lpar_leaf)
4246
4247         # We throw inline comments that were originally to the right of the
4248         # target string to the top line. They will now be shown to the right of
4249         # the LPAR.
4250         for leaf in leaves_to_steal_comments_from:
4251             for comment_leaf in line.comments_after(leaf):
4252                 first_line.append(comment_leaf, preformatted=True)
4253
4254         yield Ok(first_line)
4255
4256         # --- Middle (String) Line
4257         # We only need to yield one (possibly too long) string line, since the
4258         # `StringSplitter` will break it down further if necessary.
4259         string_value = LL[string_idx].value
4260         string_line = Line(
4261             depth=line.depth + 1,
4262             inside_brackets=True,
4263             should_explode=line.should_explode,
4264         )
4265         string_leaf = Leaf(token.STRING, string_value)
4266         insert_str_child(string_leaf)
4267         string_line.append(string_leaf)
4268
4269         old_rpar_leaf = None
4270         if is_valid_index(string_idx + 1):
4271             right_leaves = LL[string_idx + 1 :]
4272             if ends_with_comma:
4273                 right_leaves.pop()
4274
4275             if old_parens_exist:
4276                 assert (
4277                     right_leaves and right_leaves[-1].type == token.RPAR
4278                 ), "Apparently, old parentheses do NOT exist?!"
4279                 old_rpar_leaf = right_leaves.pop()
4280
4281             append_leaves(string_line, line, right_leaves)
4282
4283         yield Ok(string_line)
4284
4285         # --- Last Line
4286         last_line = line.clone()
4287         last_line.bracket_tracker = first_line.bracket_tracker
4288
4289         new_rpar_leaf = Leaf(token.RPAR, ")")
4290         if old_rpar_leaf is not None:
4291             replace_child(old_rpar_leaf, new_rpar_leaf)
4292         else:
4293             insert_str_child(new_rpar_leaf)
4294         last_line.append(new_rpar_leaf)
4295
4296         # If the target string ended with a comma, we place this comma to the
4297         # right of the RPAR on the last line.
4298         if ends_with_comma:
4299             comma_leaf = Leaf(token.COMMA, ",")
4300             replace_child(LL[comma_idx], comma_leaf)
4301             last_line.append(comma_leaf)
4302
4303         yield Ok(last_line)
4304
4305
4306 class StringParser:
4307     """
4308     A state machine that aids in parsing a string's "trailer", which can be
4309     either non-existent, an old-style formatting sequence (e.g. `% varX` or `%
4310     (varX, varY)`), or a method-call / attribute access (e.g. `.format(varX,
4311     varY)`).
4312
4313     NOTE: A new StringParser object MUST be instantiated for each string
4314     trailer we need to parse.
4315
4316     Examples:
4317         We shall assume that `line` equals the `Line` object that corresponds
4318         to the following line of python code:
4319         ```
4320         x = "Some {}.".format("String") + some_other_string
4321         ```
4322
4323         Furthermore, we will assume that `string_idx` is some index such that:
4324         ```
4325         assert line.leaves[string_idx].value == "Some {}."
4326         ```
4327
4328         The following code snippet then holds:
4329         ```
4330         string_parser = StringParser()
4331         idx = string_parser.parse(line.leaves, string_idx)
4332         assert line.leaves[idx].type == token.PLUS
4333         ```
4334     """
4335
4336     DEFAULT_TOKEN = -1
4337
4338     # String Parser States
4339     START = 1
4340     DOT = 2
4341     NAME = 3
4342     PERCENT = 4
4343     SINGLE_FMT_ARG = 5
4344     LPAR = 6
4345     RPAR = 7
4346     DONE = 8
4347
4348     # Lookup Table for Next State
4349     _goto: Dict[Tuple[ParserState, NodeType], ParserState] = {
4350         # A string trailer may start with '.' OR '%'.
4351         (START, token.DOT): DOT,
4352         (START, token.PERCENT): PERCENT,
4353         (START, DEFAULT_TOKEN): DONE,
4354         # A '.' MUST be followed by an attribute or method name.
4355         (DOT, token.NAME): NAME,
4356         # A method name MUST be followed by an '(', whereas an attribute name
4357         # is the last symbol in the string trailer.
4358         (NAME, token.LPAR): LPAR,
4359         (NAME, DEFAULT_TOKEN): DONE,
4360         # A '%' symbol can be followed by an '(' or a single argument (e.g. a
4361         # string or variable name).
4362         (PERCENT, token.LPAR): LPAR,
4363         (PERCENT, DEFAULT_TOKEN): SINGLE_FMT_ARG,
4364         # If a '%' symbol is followed by a single argument, that argument is
4365         # the last leaf in the string trailer.
4366         (SINGLE_FMT_ARG, DEFAULT_TOKEN): DONE,
4367         # If present, a ')' symbol is the last symbol in a string trailer.
4368         # (NOTE: LPARS and nested RPARS are not included in this lookup table,
4369         # since they are treated as a special case by the parsing logic in this
4370         # classes' implementation.)
4371         (RPAR, DEFAULT_TOKEN): DONE,
4372     }
4373
4374     def __init__(self) -> None:
4375         self._state = self.START
4376         self._unmatched_lpars = 0
4377
4378     def parse(self, leaves: List[Leaf], string_idx: int) -> int:
4379         """
4380         Pre-conditions:
4381             * @leaves[@string_idx].type == token.STRING
4382
4383         Returns:
4384             The index directly after the last leaf which is apart of the string
4385             trailer, if a "trailer" exists.
4386                 OR
4387             @string_idx + 1, if no string "trailer" exists.
4388         """
4389         assert leaves[string_idx].type == token.STRING
4390
4391         idx = string_idx + 1
4392         while idx < len(leaves) and self._next_state(leaves[idx]):
4393             idx += 1
4394         return idx
4395
4396     def _next_state(self, leaf: Leaf) -> bool:
4397         """
4398         Pre-conditions:
4399             * On the first call to this function, @leaf MUST be the leaf that
4400             was directly after the string leaf in question (e.g. if our target
4401             string is `line.leaves[i]` then the first call to this method must
4402             be `line.leaves[i + 1]`).
4403             * On the next call to this function, the leaf parameter passed in
4404             MUST be the leaf directly following @leaf.
4405
4406         Returns:
4407             True iff @leaf is apart of the string's trailer.
4408         """
4409         # We ignore empty LPAR or RPAR leaves.
4410         if is_empty_par(leaf):
4411             return True
4412
4413         next_token = leaf.type
4414         if next_token == token.LPAR:
4415             self._unmatched_lpars += 1
4416
4417         current_state = self._state
4418
4419         # The LPAR parser state is a special case. We will return True until we
4420         # find the matching RPAR token.
4421         if current_state == self.LPAR:
4422             if next_token == token.RPAR:
4423                 self._unmatched_lpars -= 1
4424                 if self._unmatched_lpars == 0:
4425                     self._state = self.RPAR
4426         # Otherwise, we use a lookup table to determine the next state.
4427         else:
4428             # If the lookup table matches the current state to the next
4429             # token, we use the lookup table.
4430             if (current_state, next_token) in self._goto:
4431                 self._state = self._goto[current_state, next_token]
4432             else:
4433                 # Otherwise, we check if a the current state was assigned a
4434                 # default.
4435                 if (current_state, self.DEFAULT_TOKEN) in self._goto:
4436                     self._state = self._goto[current_state, self.DEFAULT_TOKEN]
4437                 # If no default has been assigned, then this parser has a logic
4438                 # error.
4439                 else:
4440                     raise RuntimeError(f"{self.__class__.__name__} LOGIC ERROR!")
4441
4442             if self._state == self.DONE:
4443                 return False
4444
4445         return True
4446
4447
4448 def TErr(err_msg: str) -> Err[CannotTransform]:
4449     """(T)ransform Err
4450
4451     Convenience function used when working with the TResult type.
4452     """
4453     cant_transform = CannotTransform(err_msg)
4454     return Err(cant_transform)
4455
4456
4457 def contains_pragma_comment(comment_list: List[Leaf]) -> bool:
4458     """
4459     Returns:
4460         True iff one of the comments in @comment_list is a pragma used by one
4461         of the more common static analysis tools for python (e.g. mypy, flake8,
4462         pylint).
4463     """
4464     for comment in comment_list:
4465         if comment.value.startswith(("# type:", "# noqa", "# pylint:")):
4466             return True
4467
4468     return False
4469
4470
4471 def insert_str_child_factory(string_leaf: Leaf) -> Callable[[LN], None]:
4472     """
4473     Factory for a convenience function that is used to orphan @string_leaf
4474     and then insert multiple new leaves into the same part of the node
4475     structure that @string_leaf had originally occupied.
4476
4477     Examples:
4478         Let `string_leaf = Leaf(token.STRING, '"foo"')` and `N =
4479         string_leaf.parent`. Assume the node `N` has the following
4480         original structure:
4481
4482         Node(
4483             expr_stmt, [
4484                 Leaf(NAME, 'x'),
4485                 Leaf(EQUAL, '='),
4486                 Leaf(STRING, '"foo"'),
4487             ]
4488         )
4489
4490         We then run the code snippet shown below.
4491         ```
4492         insert_str_child = insert_str_child_factory(string_leaf)
4493
4494         lpar = Leaf(token.LPAR, '(')
4495         insert_str_child(lpar)
4496
4497         bar = Leaf(token.STRING, '"bar"')
4498         insert_str_child(bar)
4499
4500         rpar = Leaf(token.RPAR, ')')
4501         insert_str_child(rpar)
4502         ```
4503
4504         After which point, it follows that `string_leaf.parent is None` and
4505         the node `N` now has the following structure:
4506
4507         Node(
4508             expr_stmt, [
4509                 Leaf(NAME, 'x'),
4510                 Leaf(EQUAL, '='),
4511                 Leaf(LPAR, '('),
4512                 Leaf(STRING, '"bar"'),
4513                 Leaf(RPAR, ')'),
4514             ]
4515         )
4516     """
4517     string_parent = string_leaf.parent
4518     string_child_idx = string_leaf.remove()
4519
4520     def insert_str_child(child: LN) -> None:
4521         nonlocal string_child_idx
4522
4523         assert string_parent is not None
4524         assert string_child_idx is not None
4525
4526         string_parent.insert_child(string_child_idx, child)
4527         string_child_idx += 1
4528
4529     return insert_str_child
4530
4531
4532 def has_triple_quotes(string: str) -> bool:
4533     """
4534     Returns:
4535         True iff @string starts with three quotation characters.
4536     """
4537     raw_string = string.lstrip(STRING_PREFIX_CHARS)
4538     return raw_string[:3] in {'"""', "'''"}
4539
4540
4541 def parent_type(node: Optional[LN]) -> Optional[NodeType]:
4542     """
4543     Returns:
4544         @node.parent.type, if @node is not None and has a parent.
4545             OR
4546         None, otherwise.
4547     """
4548     if node is None or node.parent is None:
4549         return None
4550
4551     return node.parent.type
4552
4553
4554 def is_empty_par(leaf: Leaf) -> bool:
4555     return is_empty_lpar(leaf) or is_empty_rpar(leaf)
4556
4557
4558 def is_empty_lpar(leaf: Leaf) -> bool:
4559     return leaf.type == token.LPAR and leaf.value == ""
4560
4561
4562 def is_empty_rpar(leaf: Leaf) -> bool:
4563     return leaf.type == token.RPAR and leaf.value == ""
4564
4565
4566 def is_valid_index_factory(seq: Sequence[Any]) -> Callable[[int], bool]:
4567     """
4568     Examples:
4569         ```
4570         my_list = [1, 2, 3]
4571
4572         is_valid_index = is_valid_index_factory(my_list)
4573
4574         assert is_valid_index(0)
4575         assert is_valid_index(2)
4576
4577         assert not is_valid_index(3)
4578         assert not is_valid_index(-1)
4579         ```
4580     """
4581
4582     def is_valid_index(idx: int) -> bool:
4583         """
4584         Returns:
4585             True iff @idx is positive AND seq[@idx] does NOT raise an
4586             IndexError.
4587         """
4588         return 0 <= idx < len(seq)
4589
4590     return is_valid_index
4591
4592
4593 def line_to_string(line: Line) -> str:
4594     """Returns the string representation of @line.
4595
4596     WARNING: This is known to be computationally expensive.
4597     """
4598     return str(line).strip("\n")
4599
4600
4601 def append_leaves(new_line: Line, old_line: Line, leaves: List[Leaf]) -> None:
4602     """
4603     Append leaves (taken from @old_line) to @new_line, making sure to fix the
4604     underlying Node structure where appropriate.
4605
4606     All of the leaves in @leaves are duplicated. The duplicates are then
4607     appended to @new_line and used to replace their originals in the underlying
4608     Node structure. Any comments attached to the old leaves are reattached to
4609     the new leaves.
4610
4611     Pre-conditions:
4612         set(@leaves) is a subset of set(@old_line.leaves).
4613     """
4614     for old_leaf in leaves:
4615         new_leaf = Leaf(old_leaf.type, old_leaf.value)
4616         replace_child(old_leaf, new_leaf)
4617         new_line.append(new_leaf)
4618
4619         for comment_leaf in old_line.comments_after(old_leaf):
4620             new_line.append(comment_leaf, preformatted=True)
4621
4622
4623 def replace_child(old_child: LN, new_child: LN) -> None:
4624     """
4625     Side Effects:
4626         * If @old_child.parent is set, replace @old_child with @new_child in
4627         @old_child's underlying Node structure.
4628             OR
4629         * Otherwise, this function does nothing.
4630     """
4631     parent = old_child.parent
4632     if not parent:
4633         return
4634
4635     child_idx = old_child.remove()
4636     if child_idx is not None:
4637         parent.insert_child(child_idx, new_child)
4638
4639
4640 def get_string_prefix(string: str) -> str:
4641     """
4642     Pre-conditions:
4643         * assert_is_leaf_string(@string)
4644
4645     Returns:
4646         @string's prefix (e.g. '', 'r', 'f', or 'rf').
4647     """
4648     assert_is_leaf_string(string)
4649
4650     prefix = ""
4651     prefix_idx = 0
4652     while string[prefix_idx] in STRING_PREFIX_CHARS:
4653         prefix += string[prefix_idx].lower()
4654         prefix_idx += 1
4655
4656     return prefix
4657
4658
4659 def assert_is_leaf_string(string: str) -> None:
4660     """
4661     Checks the pre-condition that @string has the format that you would expect
4662     of `leaf.value` where `leaf` is some Leaf such that `leaf.type ==
4663     token.STRING`. A more precise description of the pre-conditions that are
4664     checked are listed below.
4665
4666     Pre-conditions:
4667         * @string starts with either ', ", <prefix>', or <prefix>" where
4668         `set(<prefix>)` is some subset of `set(STRING_PREFIX_CHARS)`.
4669         * @string ends with a quote character (' or ").
4670
4671     Raises:
4672         AssertionError(...) if the pre-conditions listed above are not
4673         satisfied.
4674     """
4675     dquote_idx = string.find('"')
4676     squote_idx = string.find("'")
4677     if -1 in [dquote_idx, squote_idx]:
4678         quote_idx = max(dquote_idx, squote_idx)
4679     else:
4680         quote_idx = min(squote_idx, dquote_idx)
4681
4682     assert (
4683         0 <= quote_idx < len(string) - 1
4684     ), f"{string!r} is missing a starting quote character (' or \")."
4685     assert string[-1] in (
4686         "'",
4687         '"',
4688     ), f"{string!r} is missing an ending quote character (' or \")."
4689     assert set(string[:quote_idx]).issubset(
4690         set(STRING_PREFIX_CHARS)
4691     ), f"{set(string[:quote_idx])} is NOT a subset of {set(STRING_PREFIX_CHARS)}."
4692
4693
4694 def left_hand_split(line: Line, _features: Collection[Feature] = ()) -> Iterator[Line]:
4695     """Split line into many lines, starting with the first matching bracket pair.
4696
4697     Note: this usually looks weird, only use this for function definitions.
4698     Prefer RHS otherwise.  This is why this function is not symmetrical with
4699     :func:`right_hand_split` which also handles optional parentheses.
4700     """
4701     tail_leaves: List[Leaf] = []
4702     body_leaves: List[Leaf] = []
4703     head_leaves: List[Leaf] = []
4704     current_leaves = head_leaves
4705     matching_bracket: Optional[Leaf] = None
4706     for leaf in line.leaves:
4707         if (
4708             current_leaves is body_leaves
4709             and leaf.type in CLOSING_BRACKETS
4710             and leaf.opening_bracket is matching_bracket
4711         ):
4712             current_leaves = tail_leaves if body_leaves else head_leaves
4713         current_leaves.append(leaf)
4714         if current_leaves is head_leaves:
4715             if leaf.type in OPENING_BRACKETS:
4716                 matching_bracket = leaf
4717                 current_leaves = body_leaves
4718     if not matching_bracket:
4719         raise CannotSplit("No brackets found")
4720
4721     head = bracket_split_build_line(head_leaves, line, matching_bracket)
4722     body = bracket_split_build_line(body_leaves, line, matching_bracket, is_body=True)
4723     tail = bracket_split_build_line(tail_leaves, line, matching_bracket)
4724     bracket_split_succeeded_or_raise(head, body, tail)
4725     for result in (head, body, tail):
4726         if result:
4727             yield result
4728
4729
4730 def right_hand_split(
4731     line: Line,
4732     line_length: int,
4733     features: Collection[Feature] = (),
4734     omit: Collection[LeafID] = (),
4735 ) -> Iterator[Line]:
4736     """Split line into many lines, starting with the last matching bracket pair.
4737
4738     If the split was by optional parentheses, attempt splitting without them, too.
4739     `omit` is a collection of closing bracket IDs that shouldn't be considered for
4740     this split.
4741
4742     Note: running this function modifies `bracket_depth` on the leaves of `line`.
4743     """
4744     tail_leaves: List[Leaf] = []
4745     body_leaves: List[Leaf] = []
4746     head_leaves: List[Leaf] = []
4747     current_leaves = tail_leaves
4748     opening_bracket: Optional[Leaf] = None
4749     closing_bracket: Optional[Leaf] = None
4750     for leaf in reversed(line.leaves):
4751         if current_leaves is body_leaves:
4752             if leaf is opening_bracket:
4753                 current_leaves = head_leaves if body_leaves else tail_leaves
4754         current_leaves.append(leaf)
4755         if current_leaves is tail_leaves:
4756             if leaf.type in CLOSING_BRACKETS and id(leaf) not in omit:
4757                 opening_bracket = leaf.opening_bracket
4758                 closing_bracket = leaf
4759                 current_leaves = body_leaves
4760     if not (opening_bracket and closing_bracket and head_leaves):
4761         # If there is no opening or closing_bracket that means the split failed and
4762         # all content is in the tail.  Otherwise, if `head_leaves` are empty, it means
4763         # the matching `opening_bracket` wasn't available on `line` anymore.
4764         raise CannotSplit("No brackets found")
4765
4766     tail_leaves.reverse()
4767     body_leaves.reverse()
4768     head_leaves.reverse()
4769     head = bracket_split_build_line(head_leaves, line, opening_bracket)
4770     body = bracket_split_build_line(body_leaves, line, opening_bracket, is_body=True)
4771     tail = bracket_split_build_line(tail_leaves, line, opening_bracket)
4772     bracket_split_succeeded_or_raise(head, body, tail)
4773     if (
4774         Feature.FORCE_OPTIONAL_PARENTHESES not in features
4775         # the opening bracket is an optional paren
4776         and opening_bracket.type == token.LPAR
4777         and not opening_bracket.value
4778         # the closing bracket is an optional paren
4779         and closing_bracket.type == token.RPAR
4780         and not closing_bracket.value
4781         # it's not an import (optional parens are the only thing we can split on
4782         # in this case; attempting a split without them is a waste of time)
4783         and not line.is_import
4784         # there are no standalone comments in the body
4785         and not body.contains_standalone_comments(0)
4786         # and we can actually remove the parens
4787         and can_omit_invisible_parens(body, line_length, omit_on_explode=omit)
4788     ):
4789         omit = {id(closing_bracket), *omit}
4790         try:
4791             yield from right_hand_split(line, line_length, features=features, omit=omit)
4792             return
4793
4794         except CannotSplit:
4795             if not (
4796                 can_be_split(body)
4797                 or is_line_short_enough(body, line_length=line_length)
4798             ):
4799                 raise CannotSplit(
4800                     "Splitting failed, body is still too long and can't be split."
4801                 )
4802
4803             elif head.contains_multiline_strings() or tail.contains_multiline_strings():
4804                 raise CannotSplit(
4805                     "The current optional pair of parentheses is bound to fail to"
4806                     " satisfy the splitting algorithm because the head or the tail"
4807                     " contains multiline strings which by definition never fit one"
4808                     " line."
4809                 )
4810
4811     ensure_visible(opening_bracket)
4812     ensure_visible(closing_bracket)
4813     for result in (head, body, tail):
4814         if result:
4815             yield result
4816
4817
4818 def bracket_split_succeeded_or_raise(head: Line, body: Line, tail: Line) -> None:
4819     """Raise :exc:`CannotSplit` if the last left- or right-hand split failed.
4820
4821     Do nothing otherwise.
4822
4823     A left- or right-hand split is based on a pair of brackets. Content before
4824     (and including) the opening bracket is left on one line, content inside the
4825     brackets is put on a separate line, and finally content starting with and
4826     following the closing bracket is put on a separate line.
4827
4828     Those are called `head`, `body`, and `tail`, respectively. If the split
4829     produced the same line (all content in `head`) or ended up with an empty `body`
4830     and the `tail` is just the closing bracket, then it's considered failed.
4831     """
4832     tail_len = len(str(tail).strip())
4833     if not body:
4834         if tail_len == 0:
4835             raise CannotSplit("Splitting brackets produced the same line")
4836
4837         elif tail_len < 3:
4838             raise CannotSplit(
4839                 f"Splitting brackets on an empty body to save {tail_len} characters is"
4840                 " not worth it"
4841             )
4842
4843
4844 def bracket_split_build_line(
4845     leaves: List[Leaf], original: Line, opening_bracket: Leaf, *, is_body: bool = False
4846 ) -> Line:
4847     """Return a new line with given `leaves` and respective comments from `original`.
4848
4849     If `is_body` is True, the result line is one-indented inside brackets and as such
4850     has its first leaf's prefix normalized and a trailing comma added when expected.
4851     """
4852     result = Line(depth=original.depth)
4853     if is_body:
4854         result.inside_brackets = True
4855         result.depth += 1
4856         if leaves:
4857             # Since body is a new indent level, remove spurious leading whitespace.
4858             normalize_prefix(leaves[0], inside_brackets=True)
4859             # Ensure a trailing comma for imports and standalone function arguments, but
4860             # be careful not to add one after any comments or within type annotations.
4861             no_commas = (
4862                 original.is_def
4863                 and opening_bracket.value == "("
4864                 and not any(leaf.type == token.COMMA for leaf in leaves)
4865             )
4866
4867             if original.is_import or no_commas:
4868                 for i in range(len(leaves) - 1, -1, -1):
4869                     if leaves[i].type == STANDALONE_COMMENT:
4870                         continue
4871
4872                     if leaves[i].type != token.COMMA:
4873                         new_comma = Leaf(token.COMMA, ",")
4874                         leaves.insert(i + 1, new_comma)
4875                     break
4876
4877     # Populate the line
4878     for leaf in leaves:
4879         result.append(leaf, preformatted=True)
4880         for comment_after in original.comments_after(leaf):
4881             result.append(comment_after, preformatted=True)
4882     if is_body and should_split_body_explode(result, opening_bracket):
4883         result.should_explode = True
4884     return result
4885
4886
4887 def dont_increase_indentation(split_func: Transformer) -> Transformer:
4888     """Normalize prefix of the first leaf in every line returned by `split_func`.
4889
4890     This is a decorator over relevant split functions.
4891     """
4892
4893     @wraps(split_func)
4894     def split_wrapper(line: Line, features: Collection[Feature] = ()) -> Iterator[Line]:
4895         for line in split_func(line, features):
4896             normalize_prefix(line.leaves[0], inside_brackets=True)
4897             yield line
4898
4899     return split_wrapper
4900
4901
4902 @dont_increase_indentation
4903 def delimiter_split(line: Line, features: Collection[Feature] = ()) -> Iterator[Line]:
4904     """Split according to delimiters of the highest priority.
4905
4906     If the appropriate Features are given, the split will add trailing commas
4907     also in function signatures and calls that contain `*` and `**`.
4908     """
4909     try:
4910         last_leaf = line.leaves[-1]
4911     except IndexError:
4912         raise CannotSplit("Line empty")
4913
4914     bt = line.bracket_tracker
4915     try:
4916         delimiter_priority = bt.max_delimiter_priority(exclude={id(last_leaf)})
4917     except ValueError:
4918         raise CannotSplit("No delimiters found")
4919
4920     if delimiter_priority == DOT_PRIORITY:
4921         if bt.delimiter_count_with_priority(delimiter_priority) == 1:
4922             raise CannotSplit("Splitting a single attribute from its owner looks wrong")
4923
4924     current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
4925     lowest_depth = sys.maxsize
4926     trailing_comma_safe = True
4927
4928     def append_to_line(leaf: Leaf) -> Iterator[Line]:
4929         """Append `leaf` to current line or to new line if appending impossible."""
4930         nonlocal current_line
4931         try:
4932             current_line.append_safe(leaf, preformatted=True)
4933         except ValueError:
4934             yield current_line
4935
4936             current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
4937             current_line.append(leaf)
4938
4939     for leaf in line.leaves:
4940         yield from append_to_line(leaf)
4941
4942         for comment_after in line.comments_after(leaf):
4943             yield from append_to_line(comment_after)
4944
4945         lowest_depth = min(lowest_depth, leaf.bracket_depth)
4946         if leaf.bracket_depth == lowest_depth:
4947             if is_vararg(leaf, within={syms.typedargslist}):
4948                 trailing_comma_safe = (
4949                     trailing_comma_safe and Feature.TRAILING_COMMA_IN_DEF in features
4950                 )
4951             elif is_vararg(leaf, within={syms.arglist, syms.argument}):
4952                 trailing_comma_safe = (
4953                     trailing_comma_safe and Feature.TRAILING_COMMA_IN_CALL in features
4954                 )
4955
4956         leaf_priority = bt.delimiters.get(id(leaf))
4957         if leaf_priority == delimiter_priority:
4958             yield current_line
4959
4960             current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
4961     if current_line:
4962         if (
4963             trailing_comma_safe
4964             and delimiter_priority == COMMA_PRIORITY
4965             and current_line.leaves[-1].type != token.COMMA
4966             and current_line.leaves[-1].type != STANDALONE_COMMENT
4967         ):
4968             new_comma = Leaf(token.COMMA, ",")
4969             current_line.append(new_comma)
4970         yield current_line
4971
4972
4973 @dont_increase_indentation
4974 def standalone_comment_split(
4975     line: Line, features: Collection[Feature] = ()
4976 ) -> Iterator[Line]:
4977     """Split standalone comments from the rest of the line."""
4978     if not line.contains_standalone_comments(0):
4979         raise CannotSplit("Line does not have any standalone comments")
4980
4981     current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
4982
4983     def append_to_line(leaf: Leaf) -> Iterator[Line]:
4984         """Append `leaf` to current line or to new line if appending impossible."""
4985         nonlocal current_line
4986         try:
4987             current_line.append_safe(leaf, preformatted=True)
4988         except ValueError:
4989             yield current_line
4990
4991             current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
4992             current_line.append(leaf)
4993
4994     for leaf in line.leaves:
4995         yield from append_to_line(leaf)
4996
4997         for comment_after in line.comments_after(leaf):
4998             yield from append_to_line(comment_after)
4999
5000     if current_line:
5001         yield current_line
5002
5003
5004 def is_import(leaf: Leaf) -> bool:
5005     """Return True if the given leaf starts an import statement."""
5006     p = leaf.parent
5007     t = leaf.type
5008     v = leaf.value
5009     return bool(
5010         t == token.NAME
5011         and (
5012             (v == "import" and p and p.type == syms.import_name)
5013             or (v == "from" and p and p.type == syms.import_from)
5014         )
5015     )
5016
5017
5018 def is_type_comment(leaf: Leaf, suffix: str = "") -> bool:
5019     """Return True if the given leaf is a special comment.
5020     Only returns true for type comments for now."""
5021     t = leaf.type
5022     v = leaf.value
5023     return t in {token.COMMENT, STANDALONE_COMMENT} and v.startswith("# type:" + suffix)
5024
5025
5026 def normalize_prefix(leaf: Leaf, *, inside_brackets: bool) -> None:
5027     """Leave existing extra newlines if not `inside_brackets`. Remove everything
5028     else.
5029
5030     Note: don't use backslashes for formatting or you'll lose your voting rights.
5031     """
5032     if not inside_brackets:
5033         spl = leaf.prefix.split("#")
5034         if "\\" not in spl[0]:
5035             nl_count = spl[-1].count("\n")
5036             if len(spl) > 1:
5037                 nl_count -= 1
5038             leaf.prefix = "\n" * nl_count
5039             return
5040
5041     leaf.prefix = ""
5042
5043
5044 def normalize_string_prefix(leaf: Leaf, remove_u_prefix: bool = False) -> None:
5045     """Make all string prefixes lowercase.
5046
5047     If remove_u_prefix is given, also removes any u prefix from the string.
5048
5049     Note: Mutates its argument.
5050     """
5051     match = re.match(r"^([" + STRING_PREFIX_CHARS + r"]*)(.*)$", leaf.value, re.DOTALL)
5052     assert match is not None, f"failed to match string {leaf.value!r}"
5053     orig_prefix = match.group(1)
5054     new_prefix = orig_prefix.replace("F", "f").replace("B", "b").replace("U", "u")
5055     if remove_u_prefix:
5056         new_prefix = new_prefix.replace("u", "")
5057     leaf.value = f"{new_prefix}{match.group(2)}"
5058
5059
5060 def normalize_string_quotes(leaf: Leaf) -> None:
5061     """Prefer double quotes but only if it doesn't cause more escaping.
5062
5063     Adds or removes backslashes as appropriate. Doesn't parse and fix
5064     strings nested in f-strings (yet).
5065
5066     Note: Mutates its argument.
5067     """
5068     value = leaf.value.lstrip(STRING_PREFIX_CHARS)
5069     if value[:3] == '"""':
5070         return
5071
5072     elif value[:3] == "'''":
5073         orig_quote = "'''"
5074         new_quote = '"""'
5075     elif value[0] == '"':
5076         orig_quote = '"'
5077         new_quote = "'"
5078     else:
5079         orig_quote = "'"
5080         new_quote = '"'
5081     first_quote_pos = leaf.value.find(orig_quote)
5082     if first_quote_pos == -1:
5083         return  # There's an internal error
5084
5085     prefix = leaf.value[:first_quote_pos]
5086     unescaped_new_quote = re.compile(rf"(([^\\]|^)(\\\\)*){new_quote}")
5087     escaped_new_quote = re.compile(rf"([^\\]|^)\\((?:\\\\)*){new_quote}")
5088     escaped_orig_quote = re.compile(rf"([^\\]|^)\\((?:\\\\)*){orig_quote}")
5089     body = leaf.value[first_quote_pos + len(orig_quote) : -len(orig_quote)]
5090     if "r" in prefix.casefold():
5091         if unescaped_new_quote.search(body):
5092             # There's at least one unescaped new_quote in this raw string
5093             # so converting is impossible
5094             return
5095
5096         # Do not introduce or remove backslashes in raw strings
5097         new_body = body
5098     else:
5099         # remove unnecessary escapes
5100         new_body = sub_twice(escaped_new_quote, rf"\1\2{new_quote}", body)
5101         if body != new_body:
5102             # Consider the string without unnecessary escapes as the original
5103             body = new_body
5104             leaf.value = f"{prefix}{orig_quote}{body}{orig_quote}"
5105         new_body = sub_twice(escaped_orig_quote, rf"\1\2{orig_quote}", new_body)
5106         new_body = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_body)
5107     if "f" in prefix.casefold():
5108         matches = re.findall(
5109             r"""
5110             (?:[^{]|^)\{  # start of the string or a non-{ followed by a single {
5111                 ([^{].*?)  # contents of the brackets except if begins with {{
5112             \}(?:[^}]|$)  # A } followed by end of the string or a non-}
5113             """,
5114             new_body,
5115             re.VERBOSE,
5116         )
5117         for m in matches:
5118             if "\\" in str(m):
5119                 # Do not introduce backslashes in interpolated expressions
5120                 return
5121
5122     if new_quote == '"""' and new_body[-1:] == '"':
5123         # edge case:
5124         new_body = new_body[:-1] + '\\"'
5125     orig_escape_count = body.count("\\")
5126     new_escape_count = new_body.count("\\")
5127     if new_escape_count > orig_escape_count:
5128         return  # Do not introduce more escaping
5129
5130     if new_escape_count == orig_escape_count and orig_quote == '"':
5131         return  # Prefer double quotes
5132
5133     leaf.value = f"{prefix}{new_quote}{new_body}{new_quote}"
5134
5135
5136 def normalize_numeric_literal(leaf: Leaf) -> None:
5137     """Normalizes numeric (float, int, and complex) literals.
5138
5139     All letters used in the representation are normalized to lowercase (except
5140     in Python 2 long literals).
5141     """
5142     text = leaf.value.lower()
5143     if text.startswith(("0o", "0b")):
5144         # Leave octal and binary literals alone.
5145         pass
5146     elif text.startswith("0x"):
5147         # Change hex literals to upper case.
5148         before, after = text[:2], text[2:]
5149         text = f"{before}{after.upper()}"
5150     elif "e" in text:
5151         before, after = text.split("e")
5152         sign = ""
5153         if after.startswith("-"):
5154             after = after[1:]
5155             sign = "-"
5156         elif after.startswith("+"):
5157             after = after[1:]
5158         before = format_float_or_int_string(before)
5159         text = f"{before}e{sign}{after}"
5160     elif text.endswith(("j", "l")):
5161         number = text[:-1]
5162         suffix = text[-1]
5163         # Capitalize in "2L" because "l" looks too similar to "1".
5164         if suffix == "l":
5165             suffix = "L"
5166         text = f"{format_float_or_int_string(number)}{suffix}"
5167     else:
5168         text = format_float_or_int_string(text)
5169     leaf.value = text
5170
5171
5172 def format_float_or_int_string(text: str) -> str:
5173     """Formats a float string like "1.0"."""
5174     if "." not in text:
5175         return text
5176
5177     before, after = text.split(".")
5178     return f"{before or 0}.{after or 0}"
5179
5180
5181 def normalize_invisible_parens(node: Node, parens_after: Set[str]) -> None:
5182     """Make existing optional parentheses invisible or create new ones.
5183
5184     `parens_after` is a set of string leaf values immediately after which parens
5185     should be put.
5186
5187     Standardizes on visible parentheses for single-element tuples, and keeps
5188     existing visible parentheses for other tuples and generator expressions.
5189     """
5190     for pc in list_comments(node.prefix, is_endmarker=False):
5191         if pc.value in FMT_OFF:
5192             # This `node` has a prefix with `# fmt: off`, don't mess with parens.
5193             return
5194     check_lpar = False
5195     for index, child in enumerate(list(node.children)):
5196         # Fixes a bug where invisible parens are not properly stripped from
5197         # assignment statements that contain type annotations.
5198         if isinstance(child, Node) and child.type == syms.annassign:
5199             normalize_invisible_parens(child, parens_after=parens_after)
5200
5201         # Add parentheses around long tuple unpacking in assignments.
5202         if (
5203             index == 0
5204             and isinstance(child, Node)
5205             and child.type == syms.testlist_star_expr
5206         ):
5207             check_lpar = True
5208
5209         if check_lpar:
5210             if is_walrus_assignment(child):
5211                 pass
5212
5213             elif child.type == syms.atom:
5214                 if maybe_make_parens_invisible_in_atom(child, parent=node):
5215                     wrap_in_parentheses(node, child, visible=False)
5216             elif is_one_tuple(child):
5217                 wrap_in_parentheses(node, child, visible=True)
5218             elif node.type == syms.import_from:
5219                 # "import from" nodes store parentheses directly as part of
5220                 # the statement
5221                 if child.type == token.LPAR:
5222                     # make parentheses invisible
5223                     child.value = ""  # type: ignore
5224                     node.children[-1].value = ""  # type: ignore
5225                 elif child.type != token.STAR:
5226                     # insert invisible parentheses
5227                     node.insert_child(index, Leaf(token.LPAR, ""))
5228                     node.append_child(Leaf(token.RPAR, ""))
5229                 break
5230
5231             elif not (isinstance(child, Leaf) and is_multiline_string(child)):
5232                 wrap_in_parentheses(node, child, visible=False)
5233
5234         check_lpar = isinstance(child, Leaf) and child.value in parens_after
5235
5236
5237 def normalize_fmt_off(node: Node) -> None:
5238     """Convert content between `# fmt: off`/`# fmt: on` into standalone comments."""
5239     try_again = True
5240     while try_again:
5241         try_again = convert_one_fmt_off_pair(node)
5242
5243
5244 def convert_one_fmt_off_pair(node: Node) -> bool:
5245     """Convert content of a single `# fmt: off`/`# fmt: on` into a standalone comment.
5246
5247     Returns True if a pair was converted.
5248     """
5249     for leaf in node.leaves():
5250         previous_consumed = 0
5251         for comment in list_comments(leaf.prefix, is_endmarker=False):
5252             if comment.value in FMT_OFF:
5253                 # We only want standalone comments. If there's no previous leaf or
5254                 # the previous leaf is indentation, it's a standalone comment in
5255                 # disguise.
5256                 if comment.type != STANDALONE_COMMENT:
5257                     prev = preceding_leaf(leaf)
5258                     if prev and prev.type not in WHITESPACE:
5259                         continue
5260
5261                 ignored_nodes = list(generate_ignored_nodes(leaf))
5262                 if not ignored_nodes:
5263                     continue
5264
5265                 first = ignored_nodes[0]  # Can be a container node with the `leaf`.
5266                 parent = first.parent
5267                 prefix = first.prefix
5268                 first.prefix = prefix[comment.consumed :]
5269                 hidden_value = (
5270                     comment.value + "\n" + "".join(str(n) for n in ignored_nodes)
5271                 )
5272                 if hidden_value.endswith("\n"):
5273                     # That happens when one of the `ignored_nodes` ended with a NEWLINE
5274                     # leaf (possibly followed by a DEDENT).
5275                     hidden_value = hidden_value[:-1]
5276                 first_idx: Optional[int] = None
5277                 for ignored in ignored_nodes:
5278                     index = ignored.remove()
5279                     if first_idx is None:
5280                         first_idx = index
5281                 assert parent is not None, "INTERNAL ERROR: fmt: on/off handling (1)"
5282                 assert first_idx is not None, "INTERNAL ERROR: fmt: on/off handling (2)"
5283                 parent.insert_child(
5284                     first_idx,
5285                     Leaf(
5286                         STANDALONE_COMMENT,
5287                         hidden_value,
5288                         prefix=prefix[:previous_consumed] + "\n" * comment.newlines,
5289                     ),
5290                 )
5291                 return True
5292
5293             previous_consumed = comment.consumed
5294
5295     return False
5296
5297
5298 def generate_ignored_nodes(leaf: Leaf) -> Iterator[LN]:
5299     """Starting from the container of `leaf`, generate all leaves until `# fmt: on`.
5300
5301     Stops at the end of the block.
5302     """
5303     container: Optional[LN] = container_of(leaf)
5304     while container is not None and container.type != token.ENDMARKER:
5305         if is_fmt_on(container):
5306             return
5307
5308         # fix for fmt: on in children
5309         if contains_fmt_on_at_column(container, leaf.column):
5310             for child in container.children:
5311                 if contains_fmt_on_at_column(child, leaf.column):
5312                     return
5313                 yield child
5314         else:
5315             yield container
5316             container = container.next_sibling
5317
5318
5319 def is_fmt_on(container: LN) -> bool:
5320     """Determine whether formatting is switched on within a container.
5321     Determined by whether the last `# fmt:` comment is `on` or `off`.
5322     """
5323     fmt_on = False
5324     for comment in list_comments(container.prefix, is_endmarker=False):
5325         if comment.value in FMT_ON:
5326             fmt_on = True
5327         elif comment.value in FMT_OFF:
5328             fmt_on = False
5329     return fmt_on
5330
5331
5332 def contains_fmt_on_at_column(container: LN, column: int) -> bool:
5333     """Determine if children at a given column have formatting switched on."""
5334     for child in container.children:
5335         if (
5336             isinstance(child, Node)
5337             and first_leaf_column(child) == column
5338             or isinstance(child, Leaf)
5339             and child.column == column
5340         ):
5341             if is_fmt_on(child):
5342                 return True
5343
5344     return False
5345
5346
5347 def first_leaf_column(node: Node) -> Optional[int]:
5348     """Returns the column of the first leaf child of a node."""
5349     for child in node.children:
5350         if isinstance(child, Leaf):
5351             return child.column
5352     return None
5353
5354
5355 def maybe_make_parens_invisible_in_atom(node: LN, parent: LN) -> bool:
5356     """If it's safe, make the parens in the atom `node` invisible, recursively.
5357     Additionally, remove repeated, adjacent invisible parens from the atom `node`
5358     as they are redundant.
5359
5360     Returns whether the node should itself be wrapped in invisible parentheses.
5361
5362     """
5363     if (
5364         node.type != syms.atom
5365         or is_empty_tuple(node)
5366         or is_one_tuple(node)
5367         or (is_yield(node) and parent.type != syms.expr_stmt)
5368         or max_delimiter_priority_in_atom(node) >= COMMA_PRIORITY
5369     ):
5370         return False
5371
5372     first = node.children[0]
5373     last = node.children[-1]
5374     if first.type == token.LPAR and last.type == token.RPAR:
5375         middle = node.children[1]
5376         # make parentheses invisible
5377         first.value = ""  # type: ignore
5378         last.value = ""  # type: ignore
5379         maybe_make_parens_invisible_in_atom(middle, parent=parent)
5380
5381         if is_atom_with_invisible_parens(middle):
5382             # Strip the invisible parens from `middle` by replacing
5383             # it with the child in-between the invisible parens
5384             middle.replace(middle.children[1])
5385
5386         return False
5387
5388     return True
5389
5390
5391 def is_atom_with_invisible_parens(node: LN) -> bool:
5392     """Given a `LN`, determines whether it's an atom `node` with invisible
5393     parens. Useful in dedupe-ing and normalizing parens.
5394     """
5395     if isinstance(node, Leaf) or node.type != syms.atom:
5396         return False
5397
5398     first, last = node.children[0], node.children[-1]
5399     return (
5400         isinstance(first, Leaf)
5401         and first.type == token.LPAR
5402         and first.value == ""
5403         and isinstance(last, Leaf)
5404         and last.type == token.RPAR
5405         and last.value == ""
5406     )
5407
5408
5409 def is_empty_tuple(node: LN) -> bool:
5410     """Return True if `node` holds an empty tuple."""
5411     return (
5412         node.type == syms.atom
5413         and len(node.children) == 2
5414         and node.children[0].type == token.LPAR
5415         and node.children[1].type == token.RPAR
5416     )
5417
5418
5419 def unwrap_singleton_parenthesis(node: LN) -> Optional[LN]:
5420     """Returns `wrapped` if `node` is of the shape ( wrapped ).
5421
5422     Parenthesis can be optional. Returns None otherwise"""
5423     if len(node.children) != 3:
5424         return None
5425
5426     lpar, wrapped, rpar = node.children
5427     if not (lpar.type == token.LPAR and rpar.type == token.RPAR):
5428         return None
5429
5430     return wrapped
5431
5432
5433 def wrap_in_parentheses(parent: Node, child: LN, *, visible: bool = True) -> None:
5434     """Wrap `child` in parentheses.
5435
5436     This replaces `child` with an atom holding the parentheses and the old
5437     child.  That requires moving the prefix.
5438
5439     If `visible` is False, the leaves will be valueless (and thus invisible).
5440     """
5441     lpar = Leaf(token.LPAR, "(" if visible else "")
5442     rpar = Leaf(token.RPAR, ")" if visible else "")
5443     prefix = child.prefix
5444     child.prefix = ""
5445     index = child.remove() or 0
5446     new_child = Node(syms.atom, [lpar, child, rpar])
5447     new_child.prefix = prefix
5448     parent.insert_child(index, new_child)
5449
5450
5451 def is_one_tuple(node: LN) -> bool:
5452     """Return True if `node` holds a tuple with one element, with or without parens."""
5453     if node.type == syms.atom:
5454         gexp = unwrap_singleton_parenthesis(node)
5455         if gexp is None or gexp.type != syms.testlist_gexp:
5456             return False
5457
5458         return len(gexp.children) == 2 and gexp.children[1].type == token.COMMA
5459
5460     return (
5461         node.type in IMPLICIT_TUPLE
5462         and len(node.children) == 2
5463         and node.children[1].type == token.COMMA
5464     )
5465
5466
5467 def is_walrus_assignment(node: LN) -> bool:
5468     """Return True iff `node` is of the shape ( test := test )"""
5469     inner = unwrap_singleton_parenthesis(node)
5470     return inner is not None and inner.type == syms.namedexpr_test
5471
5472
5473 def is_yield(node: LN) -> bool:
5474     """Return True if `node` holds a `yield` or `yield from` expression."""
5475     if node.type == syms.yield_expr:
5476         return True
5477
5478     if node.type == token.NAME and node.value == "yield":  # type: ignore
5479         return True
5480
5481     if node.type != syms.atom:
5482         return False
5483
5484     if len(node.children) != 3:
5485         return False
5486
5487     lpar, expr, rpar = node.children
5488     if lpar.type == token.LPAR and rpar.type == token.RPAR:
5489         return is_yield(expr)
5490
5491     return False
5492
5493
5494 def is_vararg(leaf: Leaf, within: Set[NodeType]) -> bool:
5495     """Return True if `leaf` is a star or double star in a vararg or kwarg.
5496
5497     If `within` includes VARARGS_PARENTS, this applies to function signatures.
5498     If `within` includes UNPACKING_PARENTS, it applies to right hand-side
5499     extended iterable unpacking (PEP 3132) and additional unpacking
5500     generalizations (PEP 448).
5501     """
5502     if leaf.type not in VARARGS_SPECIALS or not leaf.parent:
5503         return False
5504
5505     p = leaf.parent
5506     if p.type == syms.star_expr:
5507         # Star expressions are also used as assignment targets in extended
5508         # iterable unpacking (PEP 3132).  See what its parent is instead.
5509         if not p.parent:
5510             return False
5511
5512         p = p.parent
5513
5514     return p.type in within
5515
5516
5517 def is_multiline_string(leaf: Leaf) -> bool:
5518     """Return True if `leaf` is a multiline string that actually spans many lines."""
5519     return has_triple_quotes(leaf.value) and "\n" in leaf.value
5520
5521
5522 def is_stub_suite(node: Node) -> bool:
5523     """Return True if `node` is a suite with a stub body."""
5524     if (
5525         len(node.children) != 4
5526         or node.children[0].type != token.NEWLINE
5527         or node.children[1].type != token.INDENT
5528         or node.children[3].type != token.DEDENT
5529     ):
5530         return False
5531
5532     return is_stub_body(node.children[2])
5533
5534
5535 def is_stub_body(node: LN) -> bool:
5536     """Return True if `node` is a simple statement containing an ellipsis."""
5537     if not isinstance(node, Node) or node.type != syms.simple_stmt:
5538         return False
5539
5540     if len(node.children) != 2:
5541         return False
5542
5543     child = node.children[0]
5544     return (
5545         child.type == syms.atom
5546         and len(child.children) == 3
5547         and all(leaf == Leaf(token.DOT, ".") for leaf in child.children)
5548     )
5549
5550
5551 def max_delimiter_priority_in_atom(node: LN) -> Priority:
5552     """Return maximum delimiter priority inside `node`.
5553
5554     This is specific to atoms with contents contained in a pair of parentheses.
5555     If `node` isn't an atom or there are no enclosing parentheses, returns 0.
5556     """
5557     if node.type != syms.atom:
5558         return 0
5559
5560     first = node.children[0]
5561     last = node.children[-1]
5562     if not (first.type == token.LPAR and last.type == token.RPAR):
5563         return 0
5564
5565     bt = BracketTracker()
5566     for c in node.children[1:-1]:
5567         if isinstance(c, Leaf):
5568             bt.mark(c)
5569         else:
5570             for leaf in c.leaves():
5571                 bt.mark(leaf)
5572     try:
5573         return bt.max_delimiter_priority()
5574
5575     except ValueError:
5576         return 0
5577
5578
5579 def ensure_visible(leaf: Leaf) -> None:
5580     """Make sure parentheses are visible.
5581
5582     They could be invisible as part of some statements (see
5583     :func:`normalize_invisible_parens` and :func:`visit_import_from`).
5584     """
5585     if leaf.type == token.LPAR:
5586         leaf.value = "("
5587     elif leaf.type == token.RPAR:
5588         leaf.value = ")"
5589
5590
5591 def should_split_body_explode(line: Line, opening_bracket: Leaf) -> bool:
5592     """Should `line` be immediately split with `delimiter_split()` after RHS?"""
5593
5594     if not (opening_bracket.parent and opening_bracket.value in "[{("):
5595         return False
5596
5597     # We're essentially checking if the body is delimited by commas and there's more
5598     # than one of them (we're excluding the trailing comma and if the delimiter priority
5599     # is still commas, that means there's more).
5600     exclude = set()
5601     trailing_comma = False
5602     try:
5603         last_leaf = line.leaves[-1]
5604         if last_leaf.type == token.COMMA:
5605             trailing_comma = True
5606             exclude.add(id(last_leaf))
5607         max_priority = line.bracket_tracker.max_delimiter_priority(exclude=exclude)
5608     except (IndexError, ValueError):
5609         return False
5610
5611     return max_priority == COMMA_PRIORITY and (
5612         trailing_comma
5613         # always explode imports
5614         or opening_bracket.parent.type in {syms.atom, syms.import_from}
5615     )
5616
5617
5618 def is_one_tuple_between(opening: Leaf, closing: Leaf, leaves: List[Leaf]) -> bool:
5619     """Return True if content between `opening` and `closing` looks like a one-tuple."""
5620     if opening.type != token.LPAR and closing.type != token.RPAR:
5621         return False
5622
5623     depth = closing.bracket_depth + 1
5624     for _opening_index, leaf in enumerate(leaves):
5625         if leaf is opening:
5626             break
5627
5628     else:
5629         raise LookupError("Opening paren not found in `leaves`")
5630
5631     commas = 0
5632     _opening_index += 1
5633     for leaf in leaves[_opening_index:]:
5634         if leaf is closing:
5635             break
5636
5637         bracket_depth = leaf.bracket_depth
5638         if bracket_depth == depth and leaf.type == token.COMMA:
5639             commas += 1
5640             if leaf.parent and leaf.parent.type in {
5641                 syms.arglist,
5642                 syms.typedargslist,
5643             }:
5644                 commas += 1
5645                 break
5646
5647     return commas < 2
5648
5649
5650 def get_features_used(node: Node) -> Set[Feature]:
5651     """Return a set of (relatively) new Python features used in this file.
5652
5653     Currently looking for:
5654     - f-strings;
5655     - underscores in numeric literals;
5656     - trailing commas after * or ** in function signatures and calls;
5657     - positional only arguments in function signatures and lambdas;
5658     """
5659     features: Set[Feature] = set()
5660     for n in node.pre_order():
5661         if n.type == token.STRING:
5662             value_head = n.value[:2]  # type: ignore
5663             if value_head in {'f"', 'F"', "f'", "F'", "rf", "fr", "RF", "FR"}:
5664                 features.add(Feature.F_STRINGS)
5665
5666         elif n.type == token.NUMBER:
5667             if "_" in n.value:  # type: ignore
5668                 features.add(Feature.NUMERIC_UNDERSCORES)
5669
5670         elif n.type == token.SLASH:
5671             if n.parent and n.parent.type in {syms.typedargslist, syms.arglist}:
5672                 features.add(Feature.POS_ONLY_ARGUMENTS)
5673
5674         elif n.type == token.COLONEQUAL:
5675             features.add(Feature.ASSIGNMENT_EXPRESSIONS)
5676
5677         elif (
5678             n.type in {syms.typedargslist, syms.arglist}
5679             and n.children
5680             and n.children[-1].type == token.COMMA
5681         ):
5682             if n.type == syms.typedargslist:
5683                 feature = Feature.TRAILING_COMMA_IN_DEF
5684             else:
5685                 feature = Feature.TRAILING_COMMA_IN_CALL
5686
5687             for ch in n.children:
5688                 if ch.type in STARS:
5689                     features.add(feature)
5690
5691                 if ch.type == syms.argument:
5692                     for argch in ch.children:
5693                         if argch.type in STARS:
5694                             features.add(feature)
5695
5696     return features
5697
5698
5699 def detect_target_versions(node: Node) -> Set[TargetVersion]:
5700     """Detect the version to target based on the nodes used."""
5701     features = get_features_used(node)
5702     return {
5703         version for version in TargetVersion if features <= VERSION_TO_FEATURES[version]
5704     }
5705
5706
5707 def generate_trailers_to_omit(line: Line, line_length: int) -> Iterator[Set[LeafID]]:
5708     """Generate sets of closing bracket IDs that should be omitted in a RHS.
5709
5710     Brackets can be omitted if the entire trailer up to and including
5711     a preceding closing bracket fits in one line.
5712
5713     Yielded sets are cumulative (contain results of previous yields, too).  First
5714     set is empty, unless the line should explode, in which case bracket pairs until
5715     the one that needs to explode are omitted.
5716     """
5717
5718     omit: Set[LeafID] = set()
5719     if not line.should_explode:
5720         yield omit
5721
5722     length = 4 * line.depth
5723     opening_bracket: Optional[Leaf] = None
5724     closing_bracket: Optional[Leaf] = None
5725     inner_brackets: Set[LeafID] = set()
5726     for index, leaf, leaf_length in enumerate_with_length(line, reversed=True):
5727         length += leaf_length
5728         if length > line_length:
5729             break
5730
5731         has_inline_comment = leaf_length > len(leaf.value) + len(leaf.prefix)
5732         if leaf.type == STANDALONE_COMMENT or has_inline_comment:
5733             break
5734
5735         if opening_bracket:
5736             if leaf is opening_bracket:
5737                 opening_bracket = None
5738             elif leaf.type in CLOSING_BRACKETS:
5739                 prev = line.leaves[index - 1] if index > 0 else None
5740                 if (
5741                     line.should_explode
5742                     and prev
5743                     and prev.type == token.COMMA
5744                     and not is_one_tuple_between(
5745                         leaf.opening_bracket, leaf, line.leaves
5746                     )
5747                 ):
5748                     # Never omit bracket pairs with trailing commas.
5749                     # We need to explode on those.
5750                     break
5751
5752                 inner_brackets.add(id(leaf))
5753         elif leaf.type in CLOSING_BRACKETS:
5754             prev = line.leaves[index - 1] if index > 0 else None
5755             if prev and prev.type in OPENING_BRACKETS:
5756                 # Empty brackets would fail a split so treat them as "inner"
5757                 # brackets (e.g. only add them to the `omit` set if another
5758                 # pair of brackets was good enough.
5759                 inner_brackets.add(id(leaf))
5760                 continue
5761
5762             if closing_bracket:
5763                 omit.add(id(closing_bracket))
5764                 omit.update(inner_brackets)
5765                 inner_brackets.clear()
5766                 yield omit
5767
5768             if (
5769                 line.should_explode
5770                 and prev
5771                 and prev.type == token.COMMA
5772                 and not is_one_tuple_between(leaf.opening_bracket, leaf, line.leaves)
5773             ):
5774                 # Never omit bracket pairs with trailing commas.
5775                 # We need to explode on those.
5776                 break
5777
5778             if leaf.value:
5779                 opening_bracket = leaf.opening_bracket
5780                 closing_bracket = leaf
5781
5782
5783 def get_future_imports(node: Node) -> Set[str]:
5784     """Return a set of __future__ imports in the file."""
5785     imports: Set[str] = set()
5786
5787     def get_imports_from_children(children: List[LN]) -> Generator[str, None, None]:
5788         for child in children:
5789             if isinstance(child, Leaf):
5790                 if child.type == token.NAME:
5791                     yield child.value
5792
5793             elif child.type == syms.import_as_name:
5794                 orig_name = child.children[0]
5795                 assert isinstance(orig_name, Leaf), "Invalid syntax parsing imports"
5796                 assert orig_name.type == token.NAME, "Invalid syntax parsing imports"
5797                 yield orig_name.value
5798
5799             elif child.type == syms.import_as_names:
5800                 yield from get_imports_from_children(child.children)
5801
5802             else:
5803                 raise AssertionError("Invalid syntax parsing imports")
5804
5805     for child in node.children:
5806         if child.type != syms.simple_stmt:
5807             break
5808
5809         first_child = child.children[0]
5810         if isinstance(first_child, Leaf):
5811             # Continue looking if we see a docstring; otherwise stop.
5812             if (
5813                 len(child.children) == 2
5814                 and first_child.type == token.STRING
5815                 and child.children[1].type == token.NEWLINE
5816             ):
5817                 continue
5818
5819             break
5820
5821         elif first_child.type == syms.import_from:
5822             module_name = first_child.children[1]
5823             if not isinstance(module_name, Leaf) or module_name.value != "__future__":
5824                 break
5825
5826             imports |= set(get_imports_from_children(first_child.children[3:]))
5827         else:
5828             break
5829
5830     return imports
5831
5832
5833 @lru_cache()
5834 def get_gitignore(root: Path) -> PathSpec:
5835     """ Return a PathSpec matching gitignore content if present."""
5836     gitignore = root / ".gitignore"
5837     lines: List[str] = []
5838     if gitignore.is_file():
5839         with gitignore.open() as gf:
5840             lines = gf.readlines()
5841     return PathSpec.from_lines("gitwildmatch", lines)
5842
5843
5844 def normalize_path_maybe_ignore(
5845     path: Path, root: Path, report: "Report"
5846 ) -> Optional[str]:
5847     """Normalize `path`. May return `None` if `path` was ignored.
5848
5849     `report` is where "path ignored" output goes.
5850     """
5851     try:
5852         abspath = path if path.is_absolute() else Path.cwd() / path
5853         normalized_path = abspath.resolve().relative_to(root).as_posix()
5854     except OSError as e:
5855         report.path_ignored(path, f"cannot be read because {e}")
5856         return None
5857
5858     except ValueError:
5859         if path.is_symlink():
5860             report.path_ignored(path, f"is a symbolic link that points outside {root}")
5861             return None
5862
5863         raise
5864
5865     return normalized_path
5866
5867
5868 def gen_python_files(
5869     paths: Iterable[Path],
5870     root: Path,
5871     include: Optional[Pattern[str]],
5872     exclude: Pattern[str],
5873     force_exclude: Optional[Pattern[str]],
5874     report: "Report",
5875     gitignore: PathSpec,
5876 ) -> Iterator[Path]:
5877     """Generate all files under `path` whose paths are not excluded by the
5878     `exclude_regex` or `force_exclude` regexes, but are included by the `include` regex.
5879
5880     Symbolic links pointing outside of the `root` directory are ignored.
5881
5882     `report` is where output about exclusions goes.
5883     """
5884     assert root.is_absolute(), f"INTERNAL ERROR: `root` must be absolute but is {root}"
5885     for child in paths:
5886         normalized_path = normalize_path_maybe_ignore(child, root, report)
5887         if normalized_path is None:
5888             continue
5889
5890         # First ignore files matching .gitignore
5891         if gitignore.match_file(normalized_path):
5892             report.path_ignored(child, "matches the .gitignore file content")
5893             continue
5894
5895         # Then ignore with `--exclude` and `--force-exclude` options.
5896         normalized_path = "/" + normalized_path
5897         if child.is_dir():
5898             normalized_path += "/"
5899
5900         exclude_match = exclude.search(normalized_path) if exclude else None
5901         if exclude_match and exclude_match.group(0):
5902             report.path_ignored(child, "matches the --exclude regular expression")
5903             continue
5904
5905         force_exclude_match = (
5906             force_exclude.search(normalized_path) if force_exclude else None
5907         )
5908         if force_exclude_match and force_exclude_match.group(0):
5909             report.path_ignored(child, "matches the --force-exclude regular expression")
5910             continue
5911
5912         if child.is_dir():
5913             yield from gen_python_files(
5914                 child.iterdir(),
5915                 root,
5916                 include,
5917                 exclude,
5918                 force_exclude,
5919                 report,
5920                 gitignore,
5921             )
5922
5923         elif child.is_file():
5924             include_match = include.search(normalized_path) if include else True
5925             if include_match:
5926                 yield child
5927
5928
5929 @lru_cache()
5930 def find_project_root(srcs: Iterable[str]) -> Path:
5931     """Return a directory containing .git, .hg, or pyproject.toml.
5932
5933     That directory will be a common parent of all files and directories
5934     passed in `srcs`.
5935
5936     If no directory in the tree contains a marker that would specify it's the
5937     project root, the root of the file system is returned.
5938     """
5939     if not srcs:
5940         return Path("/").resolve()
5941
5942     path_srcs = [Path(Path.cwd(), src).resolve() for src in srcs]
5943
5944     # A list of lists of parents for each 'src'. 'src' is included as a
5945     # "parent" of itself if it is a directory
5946     src_parents = [
5947         list(path.parents) + ([path] if path.is_dir() else []) for path in path_srcs
5948     ]
5949
5950     common_base = max(
5951         set.intersection(*(set(parents) for parents in src_parents)),
5952         key=lambda path: path.parts,
5953     )
5954
5955     for directory in (common_base, *common_base.parents):
5956         if (directory / ".git").exists():
5957             return directory
5958
5959         if (directory / ".hg").is_dir():
5960             return directory
5961
5962         if (directory / "pyproject.toml").is_file():
5963             return directory
5964
5965     return directory
5966
5967
5968 @dataclass
5969 class Report:
5970     """Provides a reformatting counter. Can be rendered with `str(report)`."""
5971
5972     check: bool = False
5973     diff: bool = False
5974     quiet: bool = False
5975     verbose: bool = False
5976     change_count: int = 0
5977     same_count: int = 0
5978     failure_count: int = 0
5979
5980     def done(self, src: Path, changed: Changed) -> None:
5981         """Increment the counter for successful reformatting. Write out a message."""
5982         if changed is Changed.YES:
5983             reformatted = "would reformat" if self.check or self.diff else "reformatted"
5984             if self.verbose or not self.quiet:
5985                 out(f"{reformatted} {src}")
5986             self.change_count += 1
5987         else:
5988             if self.verbose:
5989                 if changed is Changed.NO:
5990                     msg = f"{src} already well formatted, good job."
5991                 else:
5992                     msg = f"{src} wasn't modified on disk since last run."
5993                 out(msg, bold=False)
5994             self.same_count += 1
5995
5996     def failed(self, src: Path, message: str) -> None:
5997         """Increment the counter for failed reformatting. Write out a message."""
5998         err(f"error: cannot format {src}: {message}")
5999         self.failure_count += 1
6000
6001     def path_ignored(self, path: Path, message: str) -> None:
6002         if self.verbose:
6003             out(f"{path} ignored: {message}", bold=False)
6004
6005     @property
6006     def return_code(self) -> int:
6007         """Return the exit code that the app should use.
6008
6009         This considers the current state of changed files and failures:
6010         - if there were any failures, return 123;
6011         - if any files were changed and --check is being used, return 1;
6012         - otherwise return 0.
6013         """
6014         # According to http://tldp.org/LDP/abs/html/exitcodes.html starting with
6015         # 126 we have special return codes reserved by the shell.
6016         if self.failure_count:
6017             return 123
6018
6019         elif self.change_count and self.check:
6020             return 1
6021
6022         return 0
6023
6024     def __str__(self) -> str:
6025         """Render a color report of the current state.
6026
6027         Use `click.unstyle` to remove colors.
6028         """
6029         if self.check or self.diff:
6030             reformatted = "would be reformatted"
6031             unchanged = "would be left unchanged"
6032             failed = "would fail to reformat"
6033         else:
6034             reformatted = "reformatted"
6035             unchanged = "left unchanged"
6036             failed = "failed to reformat"
6037         report = []
6038         if self.change_count:
6039             s = "s" if self.change_count > 1 else ""
6040             report.append(
6041                 click.style(f"{self.change_count} file{s} {reformatted}", bold=True)
6042             )
6043         if self.same_count:
6044             s = "s" if self.same_count > 1 else ""
6045             report.append(f"{self.same_count} file{s} {unchanged}")
6046         if self.failure_count:
6047             s = "s" if self.failure_count > 1 else ""
6048             report.append(
6049                 click.style(f"{self.failure_count} file{s} {failed}", fg="red")
6050             )
6051         return ", ".join(report) + "."
6052
6053
6054 def parse_ast(src: str) -> Union[ast.AST, ast3.AST, ast27.AST]:
6055     filename = "<unknown>"
6056     if sys.version_info >= (3, 8):
6057         # TODO: support Python 4+ ;)
6058         for minor_version in range(sys.version_info[1], 4, -1):
6059             try:
6060                 return ast.parse(src, filename, feature_version=(3, minor_version))
6061             except SyntaxError:
6062                 continue
6063     else:
6064         for feature_version in (7, 6):
6065             try:
6066                 return ast3.parse(src, filename, feature_version=feature_version)
6067             except SyntaxError:
6068                 continue
6069
6070     return ast27.parse(src)
6071
6072
6073 def _fixup_ast_constants(
6074     node: Union[ast.AST, ast3.AST, ast27.AST]
6075 ) -> Union[ast.AST, ast3.AST, ast27.AST]:
6076     """Map ast nodes deprecated in 3.8 to Constant."""
6077     if isinstance(node, (ast.Str, ast3.Str, ast27.Str, ast.Bytes, ast3.Bytes)):
6078         return ast.Constant(value=node.s)
6079
6080     if isinstance(node, (ast.Num, ast3.Num, ast27.Num)):
6081         return ast.Constant(value=node.n)
6082
6083     if isinstance(node, (ast.NameConstant, ast3.NameConstant)):
6084         return ast.Constant(value=node.value)
6085
6086     return node
6087
6088
6089 def _stringify_ast(
6090     node: Union[ast.AST, ast3.AST, ast27.AST], depth: int = 0
6091 ) -> Iterator[str]:
6092     """Simple visitor generating strings to compare ASTs by content."""
6093
6094     node = _fixup_ast_constants(node)
6095
6096     yield f"{'  ' * depth}{node.__class__.__name__}("
6097
6098     for field in sorted(node._fields):  # noqa: F402
6099         # TypeIgnore has only one field 'lineno' which breaks this comparison
6100         type_ignore_classes = (ast3.TypeIgnore, ast27.TypeIgnore)
6101         if sys.version_info >= (3, 8):
6102             type_ignore_classes += (ast.TypeIgnore,)
6103         if isinstance(node, type_ignore_classes):
6104             break
6105
6106         try:
6107             value = getattr(node, field)
6108         except AttributeError:
6109             continue
6110
6111         yield f"{'  ' * (depth+1)}{field}="
6112
6113         if isinstance(value, list):
6114             for item in value:
6115                 # Ignore nested tuples within del statements, because we may insert
6116                 # parentheses and they change the AST.
6117                 if (
6118                     field == "targets"
6119                     and isinstance(node, (ast.Delete, ast3.Delete, ast27.Delete))
6120                     and isinstance(item, (ast.Tuple, ast3.Tuple, ast27.Tuple))
6121                 ):
6122                     for item in item.elts:
6123                         yield from _stringify_ast(item, depth + 2)
6124
6125                 elif isinstance(item, (ast.AST, ast3.AST, ast27.AST)):
6126                     yield from _stringify_ast(item, depth + 2)
6127
6128         elif isinstance(value, (ast.AST, ast3.AST, ast27.AST)):
6129             yield from _stringify_ast(value, depth + 2)
6130
6131         else:
6132             # Constant strings may be indented across newlines, if they are
6133             # docstrings; fold spaces after newlines when comparing. Similarly,
6134             # trailing and leading space may be removed.
6135             if (
6136                 isinstance(node, ast.Constant)
6137                 and field == "value"
6138                 and isinstance(value, str)
6139             ):
6140                 normalized = re.sub(r" *\n[ \t]*", "\n", value).strip()
6141             else:
6142                 normalized = value
6143             yield f"{'  ' * (depth+2)}{normalized!r},  # {value.__class__.__name__}"
6144
6145     yield f"{'  ' * depth})  # /{node.__class__.__name__}"
6146
6147
6148 def assert_equivalent(src: str, dst: str) -> None:
6149     """Raise AssertionError if `src` and `dst` aren't equivalent."""
6150     try:
6151         src_ast = parse_ast(src)
6152     except Exception as exc:
6153         raise AssertionError(
6154             "cannot use --safe with this file; failed to parse source file.  AST"
6155             f" error message: {exc}"
6156         )
6157
6158     try:
6159         dst_ast = parse_ast(dst)
6160     except Exception as exc:
6161         log = dump_to_file("".join(traceback.format_tb(exc.__traceback__)), dst)
6162         raise AssertionError(
6163             f"INTERNAL ERROR: Black produced invalid code: {exc}. Please report a bug"
6164             " on https://github.com/psf/black/issues.  This invalid output might be"
6165             f" helpful: {log}"
6166         ) from None
6167
6168     src_ast_str = "\n".join(_stringify_ast(src_ast))
6169     dst_ast_str = "\n".join(_stringify_ast(dst_ast))
6170     if src_ast_str != dst_ast_str:
6171         log = dump_to_file(diff(src_ast_str, dst_ast_str, "src", "dst"))
6172         raise AssertionError(
6173             "INTERNAL ERROR: Black produced code that is not equivalent to the"
6174             " source.  Please report a bug on https://github.com/psf/black/issues. "
6175             f" This diff might be helpful: {log}"
6176         ) from None
6177
6178
6179 def assert_stable(src: str, dst: str, mode: Mode) -> None:
6180     """Raise AssertionError if `dst` reformats differently the second time."""
6181     newdst = format_str(dst, mode=mode)
6182     if dst != newdst:
6183         log = dump_to_file(
6184             str(mode),
6185             diff(src, dst, "source", "first pass"),
6186             diff(dst, newdst, "first pass", "second pass"),
6187         )
6188         raise AssertionError(
6189             "INTERNAL ERROR: Black produced different code on the second pass of the"
6190             " formatter.  Please report a bug on https://github.com/psf/black/issues."
6191             f"  This diff might be helpful: {log}"
6192         ) from None
6193
6194
6195 @mypyc_attr(patchable=True)
6196 def dump_to_file(*output: str) -> str:
6197     """Dump `output` to a temporary file. Return path to the file."""
6198     with tempfile.NamedTemporaryFile(
6199         mode="w", prefix="blk_", suffix=".log", delete=False, encoding="utf8"
6200     ) as f:
6201         for lines in output:
6202             f.write(lines)
6203             if lines and lines[-1] != "\n":
6204                 f.write("\n")
6205     return f.name
6206
6207
6208 @contextmanager
6209 def nullcontext() -> Iterator[None]:
6210     """Return an empty context manager.
6211
6212     To be used like `nullcontext` in Python 3.7.
6213     """
6214     yield
6215
6216
6217 def diff(a: str, b: str, a_name: str, b_name: str) -> str:
6218     """Return a unified diff string between strings `a` and `b`."""
6219     import difflib
6220
6221     a_lines = [line + "\n" for line in a.splitlines()]
6222     b_lines = [line + "\n" for line in b.splitlines()]
6223     return "".join(
6224         difflib.unified_diff(a_lines, b_lines, fromfile=a_name, tofile=b_name, n=5)
6225     )
6226
6227
6228 def cancel(tasks: Iterable["asyncio.Task[Any]"]) -> None:
6229     """asyncio signal handler that cancels all `tasks` and reports to stderr."""
6230     err("Aborted!")
6231     for task in tasks:
6232         task.cancel()
6233
6234
6235 def shutdown(loop: asyncio.AbstractEventLoop) -> None:
6236     """Cancel all pending tasks on `loop`, wait for them, and close the loop."""
6237     try:
6238         if sys.version_info[:2] >= (3, 7):
6239             all_tasks = asyncio.all_tasks
6240         else:
6241             all_tasks = asyncio.Task.all_tasks
6242         # This part is borrowed from asyncio/runners.py in Python 3.7b2.
6243         to_cancel = [task for task in all_tasks(loop) if not task.done()]
6244         if not to_cancel:
6245             return
6246
6247         for task in to_cancel:
6248             task.cancel()
6249         loop.run_until_complete(
6250             asyncio.gather(*to_cancel, loop=loop, return_exceptions=True)
6251         )
6252     finally:
6253         # `concurrent.futures.Future` objects cannot be cancelled once they
6254         # are already running. There might be some when the `shutdown()` happened.
6255         # Silence their logger's spew about the event loop being closed.
6256         cf_logger = logging.getLogger("concurrent.futures")
6257         cf_logger.setLevel(logging.CRITICAL)
6258         loop.close()
6259
6260
6261 def sub_twice(regex: Pattern[str], replacement: str, original: str) -> str:
6262     """Replace `regex` with `replacement` twice on `original`.
6263
6264     This is used by string normalization to perform replaces on
6265     overlapping matches.
6266     """
6267     return regex.sub(replacement, regex.sub(replacement, original))
6268
6269
6270 def re_compile_maybe_verbose(regex: str) -> Pattern[str]:
6271     """Compile a regular expression string in `regex`.
6272
6273     If it contains newlines, use verbose mode.
6274     """
6275     if "\n" in regex:
6276         regex = "(?x)" + regex
6277     compiled: Pattern[str] = re.compile(regex)
6278     return compiled
6279
6280
6281 def enumerate_reversed(sequence: Sequence[T]) -> Iterator[Tuple[Index, T]]:
6282     """Like `reversed(enumerate(sequence))` if that were possible."""
6283     index = len(sequence) - 1
6284     for element in reversed(sequence):
6285         yield (index, element)
6286         index -= 1
6287
6288
6289 def enumerate_with_length(
6290     line: Line, reversed: bool = False
6291 ) -> Iterator[Tuple[Index, Leaf, int]]:
6292     """Return an enumeration of leaves with their length.
6293
6294     Stops prematurely on multiline strings and standalone comments.
6295     """
6296     op = cast(
6297         Callable[[Sequence[Leaf]], Iterator[Tuple[Index, Leaf]]],
6298         enumerate_reversed if reversed else enumerate,
6299     )
6300     for index, leaf in op(line.leaves):
6301         length = len(leaf.prefix) + len(leaf.value)
6302         if "\n" in leaf.value:
6303             return  # Multiline strings, we can't continue.
6304
6305         for comment in line.comments_after(leaf):
6306             length += len(comment.value)
6307
6308         yield index, leaf, length
6309
6310
6311 def is_line_short_enough(line: Line, *, line_length: int, line_str: str = "") -> bool:
6312     """Return True if `line` is no longer than `line_length`.
6313
6314     Uses the provided `line_str` rendering, if any, otherwise computes a new one.
6315     """
6316     if not line_str:
6317         line_str = line_to_string(line)
6318     return (
6319         len(line_str) <= line_length
6320         and "\n" not in line_str  # multiline strings
6321         and not line.contains_standalone_comments()
6322     )
6323
6324
6325 def can_be_split(line: Line) -> bool:
6326     """Return False if the line cannot be split *for sure*.
6327
6328     This is not an exhaustive search but a cheap heuristic that we can use to
6329     avoid some unfortunate formattings (mostly around wrapping unsplittable code
6330     in unnecessary parentheses).
6331     """
6332     leaves = line.leaves
6333     if len(leaves) < 2:
6334         return False
6335
6336     if leaves[0].type == token.STRING and leaves[1].type == token.DOT:
6337         call_count = 0
6338         dot_count = 0
6339         next = leaves[-1]
6340         for leaf in leaves[-2::-1]:
6341             if leaf.type in OPENING_BRACKETS:
6342                 if next.type not in CLOSING_BRACKETS:
6343                     return False
6344
6345                 call_count += 1
6346             elif leaf.type == token.DOT:
6347                 dot_count += 1
6348             elif leaf.type == token.NAME:
6349                 if not (next.type == token.DOT or next.type in OPENING_BRACKETS):
6350                     return False
6351
6352             elif leaf.type not in CLOSING_BRACKETS:
6353                 return False
6354
6355             if dot_count > 1 and call_count > 1:
6356                 return False
6357
6358     return True
6359
6360
6361 def can_omit_invisible_parens(
6362     line: Line,
6363     line_length: int,
6364     omit_on_explode: Collection[LeafID] = (),
6365 ) -> bool:
6366     """Does `line` have a shape safe to reformat without optional parens around it?
6367
6368     Returns True for only a subset of potentially nice looking formattings but
6369     the point is to not return false positives that end up producing lines that
6370     are too long.
6371     """
6372     bt = line.bracket_tracker
6373     if not bt.delimiters:
6374         # Without delimiters the optional parentheses are useless.
6375         return True
6376
6377     max_priority = bt.max_delimiter_priority()
6378     if bt.delimiter_count_with_priority(max_priority) > 1:
6379         # With more than one delimiter of a kind the optional parentheses read better.
6380         return False
6381
6382     if max_priority == DOT_PRIORITY:
6383         # A single stranded method call doesn't require optional parentheses.
6384         return True
6385
6386     assert len(line.leaves) >= 2, "Stranded delimiter"
6387
6388     # With a single delimiter, omit if the expression starts or ends with
6389     # a bracket.
6390     first = line.leaves[0]
6391     second = line.leaves[1]
6392     if first.type in OPENING_BRACKETS and second.type not in CLOSING_BRACKETS:
6393         if _can_omit_opening_paren(line, first=first, line_length=line_length):
6394             return True
6395
6396         # Note: we are not returning False here because a line might have *both*
6397         # a leading opening bracket and a trailing closing bracket.  If the
6398         # opening bracket doesn't match our rule, maybe the closing will.
6399
6400     penultimate = line.leaves[-2]
6401     last = line.leaves[-1]
6402     if line.should_explode:
6403         try:
6404             penultimate, last = last_two_except(line.leaves, omit=omit_on_explode)
6405         except LookupError:
6406             # Turns out we'd omit everything.  We cannot skip the optional parentheses.
6407             return False
6408
6409     if (
6410         last.type == token.RPAR
6411         or last.type == token.RBRACE
6412         or (
6413             # don't use indexing for omitting optional parentheses;
6414             # it looks weird
6415             last.type == token.RSQB
6416             and last.parent
6417             and last.parent.type != syms.trailer
6418         )
6419     ):
6420         if penultimate.type in OPENING_BRACKETS:
6421             # Empty brackets don't help.
6422             return False
6423
6424         if is_multiline_string(first):
6425             # Additional wrapping of a multiline string in this situation is
6426             # unnecessary.
6427             return True
6428
6429         if line.should_explode and penultimate.type == token.COMMA:
6430             # The rightmost non-omitted bracket pair is the one we want to explode on.
6431             return True
6432
6433         if _can_omit_closing_paren(line, last=last, line_length=line_length):
6434             return True
6435
6436     return False
6437
6438
6439 def _can_omit_opening_paren(line: Line, *, first: Leaf, line_length: int) -> bool:
6440     """See `can_omit_invisible_parens`."""
6441     remainder = False
6442     length = 4 * line.depth
6443     _index = -1
6444     for _index, leaf, leaf_length in enumerate_with_length(line):
6445         if leaf.type in CLOSING_BRACKETS and leaf.opening_bracket is first:
6446             remainder = True
6447         if remainder:
6448             length += leaf_length
6449             if length > line_length:
6450                 break
6451
6452             if leaf.type in OPENING_BRACKETS:
6453                 # There are brackets we can further split on.
6454                 remainder = False
6455
6456     else:
6457         # checked the entire string and line length wasn't exceeded
6458         if len(line.leaves) == _index + 1:
6459             return True
6460
6461     return False
6462
6463
6464 def _can_omit_closing_paren(line: Line, *, last: Leaf, line_length: int) -> bool:
6465     """See `can_omit_invisible_parens`."""
6466     length = 4 * line.depth
6467     seen_other_brackets = False
6468     for _index, leaf, leaf_length in enumerate_with_length(line):
6469         length += leaf_length
6470         if leaf is last.opening_bracket:
6471             if seen_other_brackets or length <= line_length:
6472                 return True
6473
6474         elif leaf.type in OPENING_BRACKETS:
6475             # There are brackets we can further split on.
6476             seen_other_brackets = True
6477
6478     return False
6479
6480
6481 def last_two_except(leaves: List[Leaf], omit: Collection[LeafID]) -> Tuple[Leaf, Leaf]:
6482     """Return (penultimate, last) leaves skipping brackets in `omit` and contents."""
6483     stop_after = None
6484     last = None
6485     for leaf in reversed(leaves):
6486         if stop_after:
6487             if leaf is stop_after:
6488                 stop_after = None
6489             continue
6490
6491         if last:
6492             return leaf, last
6493
6494         if id(leaf) in omit:
6495             stop_after = leaf.opening_bracket
6496         else:
6497             last = leaf
6498     else:
6499         raise LookupError("Last two leaves were also skipped")
6500
6501
6502 def run_transformer(
6503     line: Line,
6504     transform: Transformer,
6505     mode: Mode,
6506     features: Collection[Feature],
6507     *,
6508     line_str: str = "",
6509 ) -> List[Line]:
6510     if not line_str:
6511         line_str = line_to_string(line)
6512     result: List[Line] = []
6513     for transformed_line in transform(line, features):
6514         if str(transformed_line).strip("\n") == line_str:
6515             raise CannotTransform("Line transformer returned an unchanged result")
6516
6517         result.extend(transform_line(transformed_line, mode=mode, features=features))
6518
6519     if not (
6520         transform.__name__ == "rhs"
6521         and line.bracket_tracker.invisible
6522         and not any(bracket.value for bracket in line.bracket_tracker.invisible)
6523         and not line.contains_multiline_strings()
6524         and not result[0].contains_uncollapsable_type_comments()
6525         and not result[0].contains_unsplittable_type_ignore()
6526         and not is_line_short_enough(result[0], line_length=mode.line_length)
6527     ):
6528         return result
6529
6530     line_copy = line.clone()
6531     append_leaves(line_copy, line, line.leaves)
6532     features_fop = set(features) | {Feature.FORCE_OPTIONAL_PARENTHESES}
6533     second_opinion = run_transformer(
6534         line_copy, transform, mode, features_fop, line_str=line_str
6535     )
6536     if all(
6537         is_line_short_enough(ln, line_length=mode.line_length) for ln in second_opinion
6538     ):
6539         result = second_opinion
6540     return result
6541
6542
6543 def get_cache_file(mode: Mode) -> Path:
6544     return CACHE_DIR / f"cache.{mode.get_cache_key()}.pickle"
6545
6546
6547 def read_cache(mode: Mode) -> Cache:
6548     """Read the cache if it exists and is well formed.
6549
6550     If it is not well formed, the call to write_cache later should resolve the issue.
6551     """
6552     cache_file = get_cache_file(mode)
6553     if not cache_file.exists():
6554         return {}
6555
6556     with cache_file.open("rb") as fobj:
6557         try:
6558             cache: Cache = pickle.load(fobj)
6559         except (pickle.UnpicklingError, ValueError):
6560             return {}
6561
6562     return cache
6563
6564
6565 def get_cache_info(path: Path) -> CacheInfo:
6566     """Return the information used to check if a file is already formatted or not."""
6567     stat = path.stat()
6568     return stat.st_mtime, stat.st_size
6569
6570
6571 def filter_cached(cache: Cache, sources: Iterable[Path]) -> Tuple[Set[Path], Set[Path]]:
6572     """Split an iterable of paths in `sources` into two sets.
6573
6574     The first contains paths of files that modified on disk or are not in the
6575     cache. The other contains paths to non-modified files.
6576     """
6577     todo, done = set(), set()
6578     for src in sources:
6579         src = src.resolve()
6580         if cache.get(src) != get_cache_info(src):
6581             todo.add(src)
6582         else:
6583             done.add(src)
6584     return todo, done
6585
6586
6587 def write_cache(cache: Cache, sources: Iterable[Path], mode: Mode) -> None:
6588     """Update the cache file."""
6589     cache_file = get_cache_file(mode)
6590     try:
6591         CACHE_DIR.mkdir(parents=True, exist_ok=True)
6592         new_cache = {**cache, **{src.resolve(): get_cache_info(src) for src in sources}}
6593         with tempfile.NamedTemporaryFile(dir=str(cache_file.parent), delete=False) as f:
6594             pickle.dump(new_cache, f, protocol=4)
6595         os.replace(f.name, cache_file)
6596     except OSError:
6597         pass
6598
6599
6600 def patch_click() -> None:
6601     """Make Click not crash.
6602
6603     On certain misconfigured environments, Python 3 selects the ASCII encoding as the
6604     default which restricts paths that it can access during the lifetime of the
6605     application.  Click refuses to work in this scenario by raising a RuntimeError.
6606
6607     In case of Black the likelihood that non-ASCII characters are going to be used in
6608     file paths is minimal since it's Python source code.  Moreover, this crash was
6609     spurious on Python 3.7 thanks to PEP 538 and PEP 540.
6610     """
6611     try:
6612         from click import core
6613         from click import _unicodefun  # type: ignore
6614     except ModuleNotFoundError:
6615         return
6616
6617     for module in (core, _unicodefun):
6618         if hasattr(module, "_verify_python3_env"):
6619             module._verify_python3_env = lambda: None
6620
6621
6622 def patched_main() -> None:
6623     freeze_support()
6624     patch_click()
6625     main()
6626
6627
6628 def is_docstring(leaf: Leaf) -> bool:
6629     if not is_multiline_string(leaf):
6630         # For the purposes of docstring re-indentation, we don't need to do anything
6631         # with single-line docstrings.
6632         return False
6633
6634     if prev_siblings_are(
6635         leaf.parent, [None, token.NEWLINE, token.INDENT, syms.simple_stmt]
6636     ):
6637         return True
6638
6639     # Multiline docstring on the same line as the `def`.
6640     if prev_siblings_are(leaf.parent, [syms.parameters, token.COLON, syms.simple_stmt]):
6641         # `syms.parameters` is only used in funcdefs and async_funcdefs in the Python
6642         # grammar. We're safe to return True without further checks.
6643         return True
6644
6645     return False
6646
6647
6648 def fix_docstring(docstring: str, prefix: str) -> str:
6649     # https://www.python.org/dev/peps/pep-0257/#handling-docstring-indentation
6650     if not docstring:
6651         return ""
6652     # Convert tabs to spaces (following the normal Python rules)
6653     # and split into a list of lines:
6654     lines = docstring.expandtabs().splitlines()
6655     # Determine minimum indentation (first line doesn't count):
6656     indent = sys.maxsize
6657     for line in lines[1:]:
6658         stripped = line.lstrip()
6659         if stripped:
6660             indent = min(indent, len(line) - len(stripped))
6661     # Remove indentation (first line is special):
6662     trimmed = [lines[0].strip()]
6663     if indent < sys.maxsize:
6664         last_line_idx = len(lines) - 2
6665         for i, line in enumerate(lines[1:]):
6666             stripped_line = line[indent:].rstrip()
6667             if stripped_line or i == last_line_idx:
6668                 trimmed.append(prefix + stripped_line)
6669             else:
6670                 trimmed.append("")
6671     return "\n".join(trimmed)
6672
6673
6674 if __name__ == "__main__":
6675     patched_main()