black.py

   1 import ast
   2 import asyncio
   3 from abc import ABC, abstractmethod
   4 from collections import defaultdict
   5 from concurrent.futures import Executor, ProcessPoolExecutor
   6 from contextlib import contextmanager
   7 from datetime import datetime
   8 from enum import Enum
   9 from functools import lru_cache, partial, wraps
  10 import io
  11 import itertools
  12 import logging
  13 from multiprocessing import Manager, freeze_support
  14 import os
  15 from pathlib import Path
  16 import pickle
  17 import regex as re
  18 import signal
  19 import sys
  20 import tempfile
  21 import tokenize
  22 import traceback
  23 from typing import (
  24     Any,
  25     Callable,
  26     Collection,
  27     Dict,
  28     Generator,
  29     Generic,
  30     Iterable,
  31     Iterator,
  32     List,
  33     Optional,
  34     Pattern,
  35     Sequence,
  36     Set,
  37     Tuple,
  38     Type,
  39     TypeVar,
  40     Union,
  41     cast,
  42     TYPE_CHECKING,
  43 )
  44 from typing_extensions import Final
  45 from mypy_extensions import mypyc_attr
  46
  47 from appdirs import user_cache_dir
  48 from dataclasses import dataclass, field, replace
  49 import click
  50 import toml
  51 from typed_ast import ast3, ast27
  52 from pathspec import PathSpec
  53
  54 # lib2to3 fork
  55 from blib2to3.pytree import Node, Leaf, type_repr
  56 from blib2to3 import pygram, pytree
  57 from blib2to3.pgen2 import driver, token
  58 from blib2to3.pgen2.grammar import Grammar
  59 from blib2to3.pgen2.parse import ParseError
  60
  61 from _black_version import version as __version__
  62
  63 if TYPE_CHECKING:
  64     import colorama  # noqa: F401
  65
  66 DEFAULT_LINE_LENGTH = 88
  67 DEFAULT_EXCLUDES = r"/(\.eggs|\.git|\.hg|\.mypy_cache|\.nox|\.tox|\.venv|\.svn|_build|buck-out|build|dist)/"  # noqa: B950
  68 DEFAULT_INCLUDES = r"\.pyi?$"
  69 CACHE_DIR = Path(user_cache_dir("black", version=__version__))
  70
  71 STRING_PREFIX_CHARS: Final = "furbFURB"  # All possible string prefix characters.
  72
  73
  74 # types
  75 FileContent = str
  76 Encoding = str
  77 NewLine = str
  78 Depth = int
  79 NodeType = int
  80 ParserState = int
  81 LeafID = int
  82 StringID = int
  83 Priority = int
  84 Index = int
  85 LN = Union[Leaf, Node]
  86 Transformer = Callable[["Line", Collection["Feature"]], Iterator["Line"]]
  87 Timestamp = float
  88 FileSize = int
  89 CacheInfo = Tuple[Timestamp, FileSize]
  90 Cache = Dict[Path, CacheInfo]
  91 out = partial(click.secho, bold=True, err=True)
  92 err = partial(click.secho, fg="red", err=True)
  93
  94 pygram.initialize(CACHE_DIR)
  95 syms = pygram.python_symbols
  96
  97
  98 class NothingChanged(UserWarning):
  99     """Raised when reformatted code is the same as source."""
 100
 101
 102 class CannotTransform(Exception):
 103     """Base class for errors raised by Transformers."""
 104
 105
 106 class CannotSplit(CannotTransform):
 107     """A readable split that fits the allotted line length is impossible."""
 108
 109
 110 class InvalidInput(ValueError):
 111     """Raised when input source code fails all parse attempts."""
 112
 113
 114 T = TypeVar("T")
 115 E = TypeVar("E", bound=Exception)
 116
 117
 118 class Ok(Generic[T]):
 119     def __init__(self, value: T) -> None:
 120         self._value = value
 121
 122     def ok(self) -> T:
 123         return self._value
 124
 125
 126 class Err(Generic[E]):
 127     def __init__(self, e: E) -> None:
 128         self._e = e
 129
 130     def err(self) -> E:
 131         return self._e
 132
 133
 134 # The 'Result' return type is used to implement an error-handling model heavily
 135 # influenced by that used by the Rust programming language
 136 # (see https://doc.rust-lang.org/book/ch09-00-error-handling.html).
 137 Result = Union[Ok[T], Err[E]]
 138 TResult = Result[T, CannotTransform]  # (T)ransform Result
 139 TMatchResult = TResult[Index]
 140
 141
 142 class WriteBack(Enum):
 143     NO = 0
 144     YES = 1
 145     DIFF = 2
 146     CHECK = 3
 147     COLOR_DIFF = 4
 148
 149     @classmethod
 150     def from_configuration(
 151         cls, *, check: bool, diff: bool, color: bool = False
 152     ) -> "WriteBack":
 153         if check and not diff:
 154             return cls.CHECK
 155
 156         if diff and color:
 157             return cls.COLOR_DIFF
 158
 159         return cls.DIFF if diff else cls.YES
 160
 161
 162 class Changed(Enum):
 163     NO = 0
 164     CACHED = 1
 165     YES = 2
 166
 167
 168 class TargetVersion(Enum):
 169     PY27 = 2
 170     PY33 = 3
 171     PY34 = 4
 172     PY35 = 5
 173     PY36 = 6
 174     PY37 = 7
 175     PY38 = 8
 176
 177     def is_python2(self) -> bool:
 178         return self is TargetVersion.PY27
 179
 180
 181 PY36_VERSIONS = {TargetVersion.PY36, TargetVersion.PY37, TargetVersion.PY38}
 182
 183
 184 class Feature(Enum):
 185     # All string literals are unicode
 186     UNICODE_LITERALS = 1
 187     F_STRINGS = 2
 188     NUMERIC_UNDERSCORES = 3
 189     TRAILING_COMMA_IN_CALL = 4
 190     TRAILING_COMMA_IN_DEF = 5
 191     # The following two feature-flags are mutually exclusive, and exactly one should be
 192     # set for every version of python.
 193     ASYNC_IDENTIFIERS = 6
 194     ASYNC_KEYWORDS = 7
 195     ASSIGNMENT_EXPRESSIONS = 8
 196     POS_ONLY_ARGUMENTS = 9
 197
 198
 199 VERSION_TO_FEATURES: Dict[TargetVersion, Set[Feature]] = {
 200     TargetVersion.PY27: {Feature.ASYNC_IDENTIFIERS},
 201     TargetVersion.PY33: {Feature.UNICODE_LITERALS, Feature.ASYNC_IDENTIFIERS},
 202     TargetVersion.PY34: {Feature.UNICODE_LITERALS, Feature.ASYNC_IDENTIFIERS},
 203     TargetVersion.PY35: {
 204         Feature.UNICODE_LITERALS,
 205         Feature.TRAILING_COMMA_IN_CALL,
 206         Feature.ASYNC_IDENTIFIERS,
 207     },
 208     TargetVersion.PY36: {
 209         Feature.UNICODE_LITERALS,
 210         Feature.F_STRINGS,
 211         Feature.NUMERIC_UNDERSCORES,
 212         Feature.TRAILING_COMMA_IN_CALL,
 213         Feature.TRAILING_COMMA_IN_DEF,
 214         Feature.ASYNC_IDENTIFIERS,
 215     },
 216     TargetVersion.PY37: {
 217         Feature.UNICODE_LITERALS,
 218         Feature.F_STRINGS,
 219         Feature.NUMERIC_UNDERSCORES,
 220         Feature.TRAILING_COMMA_IN_CALL,
 221         Feature.TRAILING_COMMA_IN_DEF,
 222         Feature.ASYNC_KEYWORDS,
 223     },
 224     TargetVersion.PY38: {
 225         Feature.UNICODE_LITERALS,
 226         Feature.F_STRINGS,
 227         Feature.NUMERIC_UNDERSCORES,
 228         Feature.TRAILING_COMMA_IN_CALL,
 229         Feature.TRAILING_COMMA_IN_DEF,
 230         Feature.ASYNC_KEYWORDS,
 231         Feature.ASSIGNMENT_EXPRESSIONS,
 232         Feature.POS_ONLY_ARGUMENTS,
 233     },
 234 }
 235
 236
 237 @dataclass
 238 class Mode:
 239     target_versions: Set[TargetVersion] = field(default_factory=set)
 240     line_length: int = DEFAULT_LINE_LENGTH
 241     string_normalization: bool = True
 242     is_pyi: bool = False
 243
 244     def get_cache_key(self) -> str:
 245         if self.target_versions:
 246             version_str = ",".join(
 247                 str(version.value)
 248                 for version in sorted(self.target_versions, key=lambda v: v.value)
 249             )
 250         else:
 251             version_str = "-"
 252         parts = [
 253             version_str,
 254             str(self.line_length),
 255             str(int(self.string_normalization)),
 256             str(int(self.is_pyi)),
 257         ]
 258         return ".".join(parts)
 259
 260
 261 # Legacy name, left for integrations.
 262 FileMode = Mode
 263
 264
 265 def supports_feature(target_versions: Set[TargetVersion], feature: Feature) -> bool:
 266     return all(feature in VERSION_TO_FEATURES[version] for version in target_versions)
 267
 268
 269 def find_pyproject_toml(path_search_start: str) -> Optional[str]:
 270     """Find the absolute filepath to a pyproject.toml if it exists"""
 271     path_project_root = find_project_root(path_search_start)
 272     path_pyproject_toml = path_project_root / "pyproject.toml"
 273     return str(path_pyproject_toml) if path_pyproject_toml.is_file() else None
 274
 275
 276 def parse_pyproject_toml(path_config: str) -> Dict[str, Any]:
 277     """Parse a pyproject toml file, pulling out relevant parts for Black
 278
 279     If parsing fails, will raise a toml.TomlDecodeError
 280     """
 281     pyproject_toml = toml.load(path_config)
 282     config = pyproject_toml.get("tool", {}).get("black", {})
 283     return {k.replace("--", "").replace("-", "_"): v for k, v in config.items()}
 284
 285
 286 def read_pyproject_toml(
 287     ctx: click.Context, param: click.Parameter, value: Optional[str]
 288 ) -> Optional[str]:
 289     """Inject Black configuration from "pyproject.toml" into defaults in `ctx`.
 290
 291     Returns the path to a successfully found and read configuration file, None
 292     otherwise.
 293     """
 294     if not value:
 295         value = find_pyproject_toml(ctx.params.get("src", ()))
 296         if value is None:
 297             return None
 298
 299     try:
 300         config = parse_pyproject_toml(value)
 301     except (toml.TomlDecodeError, OSError) as e:
 302         raise click.FileError(
 303             filename=value, hint=f"Error reading configuration file: {e}"
 304         )
 305
 306     if not config:
 307         return None
 308
 309     target_version = config.get("target_version")
 310     if target_version is not None and not isinstance(target_version, list):
 311         raise click.BadOptionUsage(
 312             "target-version", f"Config key target-version must be a list"
 313         )
 314
 315     default_map: Dict[str, Any] = {}
 316     if ctx.default_map:
 317         default_map.update(ctx.default_map)
 318     default_map.update(config)
 319
 320     ctx.default_map = default_map
 321     return value
 322
 323
 324 def target_version_option_callback(
 325     c: click.Context, p: Union[click.Option, click.Parameter], v: Tuple[str, ...]
 326 ) -> List[TargetVersion]:
 327     """Compute the target versions from a --target-version flag.
 328
 329     This is its own function because mypy couldn't infer the type correctly
 330     when it was a lambda, causing mypyc trouble.
 331     """
 332     return [TargetVersion[val.upper()] for val in v]
 333
 334
 335 @click.command(context_settings=dict(help_option_names=["-h", "--help"]))
 336 @click.option("-c", "--code", type=str, help="Format the code passed in as a string.")
 337 @click.option(
 338     "-l",
 339     "--line-length",
 340     type=int,
 341     default=DEFAULT_LINE_LENGTH,
 342     help="How many characters per line to allow.",
 343     show_default=True,
 344 )
 345 @click.option(
 346     "-t",
 347     "--target-version",
 348     type=click.Choice([v.name.lower() for v in TargetVersion]),
 349     callback=target_version_option_callback,
 350     multiple=True,
 351     help=(
 352         "Python versions that should be supported by Black's output. [default: per-file"
 353         " auto-detection]"
 354     ),
 355 )
 356 @click.option(
 357     "--py36",
 358     is_flag=True,
 359     help=(
 360         "Allow using Python 3.6-only syntax on all input files.  This will put trailing"
 361         " commas in function signatures and calls also after *args and **kwargs."
 362         " Deprecated; use --target-version instead. [default: per-file auto-detection]"
 363     ),
 364 )
 365 @click.option(
 366     "--pyi",
 367     is_flag=True,
 368     help=(
 369         "Format all input files like typing stubs regardless of file extension (useful"
 370         " when piping source on standard input)."
 371     ),
 372 )
 373 @click.option(
 374     "-S",
 375     "--skip-string-normalization",
 376     is_flag=True,
 377     help="Don't normalize string quotes or prefixes.",
 378 )
 379 @click.option(
 380     "--check",
 381     is_flag=True,
 382     help=(
 383         "Don't write the files back, just return the status.  Return code 0 means"
 384         " nothing would change.  Return code 1 means some files would be reformatted."
 385         " Return code 123 means there was an internal error."
 386     ),
 387 )
 388 @click.option(
 389     "--diff",
 390     is_flag=True,
 391     help="Don't write the files back, just output a diff for each file on stdout.",
 392 )
 393 @click.option(
 394     "--color/--no-color",
 395     is_flag=True,
 396     help="Show colored diff. Only applies when `--diff` is given.",
 397 )
 398 @click.option(
 399     "--fast/--safe",
 400     is_flag=True,
 401     help="If --fast given, skip temporary sanity checks. [default: --safe]",
 402 )
 403 @click.option(
 404     "--include",
 405     type=str,
 406     default=DEFAULT_INCLUDES,
 407     help=(
 408         "A regular expression that matches files and directories that should be"
 409         " included on recursive searches.  An empty value means all files are included"
 410         " regardless of the name.  Use forward slashes for directories on all platforms"
 411         " (Windows, too).  Exclusions are calculated first, inclusions later."
 412     ),
 413     show_default=True,
 414 )
 415 @click.option(
 416     "--exclude",
 417     type=str,
 418     default=DEFAULT_EXCLUDES,
 419     help=(
 420         "A regular expression that matches files and directories that should be"
 421         " excluded on recursive searches.  An empty value means no paths are excluded."
 422         " Use forward slashes for directories on all platforms (Windows, too). "
 423         " Exclusions are calculated first, inclusions later."
 424     ),
 425     show_default=True,
 426 )
 427 @click.option(
 428     "-q",
 429     "--quiet",
 430     is_flag=True,
 431     help=(
 432         "Don't emit non-error messages to stderr. Errors are still emitted; silence"
 433         " those with 2>/dev/null."
 434     ),
 435 )
 436 @click.option(
 437     "-v",
 438     "--verbose",
 439     is_flag=True,
 440     help=(
 441         "Also emit messages to stderr about files that were not changed or were ignored"
 442         " due to --exclude=."
 443     ),
 444 )
 445 @click.version_option(version=__version__)
 446 @click.argument(
 447     "src",
 448     nargs=-1,
 449     type=click.Path(
 450         exists=True, file_okay=True, dir_okay=True, readable=True, allow_dash=True
 451     ),
 452     is_eager=True,
 453 )
 454 @click.option(
 455     "--config",
 456     type=click.Path(
 457         exists=True,
 458         file_okay=True,
 459         dir_okay=False,
 460         readable=True,
 461         allow_dash=False,
 462         path_type=str,
 463     ),
 464     is_eager=True,
 465     callback=read_pyproject_toml,
 466     help="Read configuration from PATH.",
 467 )
 468 @click.pass_context
 469 def main(
 470     ctx: click.Context,
 471     code: Optional[str],
 472     line_length: int,
 473     target_version: List[TargetVersion],
 474     check: bool,
 475     diff: bool,
 476     color: bool,
 477     fast: bool,
 478     pyi: bool,
 479     py36: bool,
 480     skip_string_normalization: bool,
 481     quiet: bool,
 482     verbose: bool,
 483     include: str,
 484     exclude: str,
 485     src: Tuple[str, ...],
 486     config: Optional[str],
 487 ) -> None:
 488     """The uncompromising code formatter."""
 489     write_back = WriteBack.from_configuration(check=check, diff=diff, color=color)
 490     if target_version:
 491         if py36:
 492             err("Cannot use both --target-version and --py36")
 493             ctx.exit(2)
 494         else:
 495             versions = set(target_version)
 496     elif py36:
 497         err(
 498             "--py36 is deprecated and will be removed in a future version. Use"
 499             " --target-version py36 instead."
 500         )
 501         versions = PY36_VERSIONS
 502     else:
 503         # We'll autodetect later.
 504         versions = set()
 505     mode = Mode(
 506         target_versions=versions,
 507         line_length=line_length,
 508         is_pyi=pyi,
 509         string_normalization=not skip_string_normalization,
 510     )
 511     if config and verbose:
 512         out(f"Using configuration from {config}.", bold=False, fg="blue")
 513     if code is not None:
 514         print(format_str(code, mode=mode))
 515         ctx.exit(0)
 516     try:
 517         include_regex = re_compile_maybe_verbose(include)
 518     except re.error:
 519         err(f"Invalid regular expression for include given: {include!r}")
 520         ctx.exit(2)
 521     try:
 522         exclude_regex = re_compile_maybe_verbose(exclude)
 523     except re.error:
 524         err(f"Invalid regular expression for exclude given: {exclude!r}")
 525         ctx.exit(2)
 526     report = Report(check=check, diff=diff, quiet=quiet, verbose=verbose)
 527     root = find_project_root(src)
 528     sources: Set[Path] = set()
 529     path_empty(src, quiet, verbose, ctx)
 530     for s in src:
 531         p = Path(s)
 532         if p.is_dir():
 533             sources.update(
 534                 gen_python_files_in_dir(
 535                     p, root, include_regex, exclude_regex, report, get_gitignore(root)
 536                 )
 537             )
 538         elif p.is_file() or s == "-":
 539             # if a file was explicitly given, we don't care about its extension
 540             sources.add(p)
 541         else:
 542             err(f"invalid path: {s}")
 543     if len(sources) == 0:
 544         if verbose or not quiet:
 545             out("No Python files are present to be formatted. Nothing to do 😴")
 546         ctx.exit(0)
 547
 548     if len(sources) == 1:
 549         reformat_one(
 550             src=sources.pop(),
 551             fast=fast,
 552             write_back=write_back,
 553             mode=mode,
 554             report=report,
 555         )
 556     else:
 557         reformat_many(
 558             sources=sources, fast=fast, write_back=write_back, mode=mode, report=report
 559         )
 560
 561     if verbose or not quiet:
 562         out("Oh no! 💥 💔 💥" if report.return_code else "All done! ✨ 🍰 ✨")
 563         click.secho(str(report), err=True)
 564     ctx.exit(report.return_code)
 565
 566
 567 def path_empty(
 568     src: Tuple[str, ...], quiet: bool, verbose: bool, ctx: click.Context
 569 ) -> None:
 570     """
 571     Exit if there is no `src` provided for formatting
 572     """
 573     if not src:
 574         if verbose or not quiet:
 575             out("No Path provided. Nothing to do 😴")
 576             ctx.exit(0)
 577
 578
 579 def reformat_one(
 580     src: Path, fast: bool, write_back: WriteBack, mode: Mode, report: "Report"
 581 ) -> None:
 582     """Reformat a single file under `src` without spawning child processes.
 583
 584     `fast`, `write_back`, and `mode` options are passed to
 585     :func:`format_file_in_place` or :func:`format_stdin_to_stdout`.
 586     """
 587     try:
 588         changed = Changed.NO
 589         if not src.is_file() and str(src) == "-":
 590             if format_stdin_to_stdout(fast=fast, write_back=write_back, mode=mode):
 591                 changed = Changed.YES
 592         else:
 593             cache: Cache = {}
 594             if write_back != WriteBack.DIFF:
 595                 cache = read_cache(mode)
 596                 res_src = src.resolve()
 597                 if res_src in cache and cache[res_src] == get_cache_info(res_src):
 598                     changed = Changed.CACHED
 599             if changed is not Changed.CACHED and format_file_in_place(
 600                 src, fast=fast, write_back=write_back, mode=mode
 601             ):
 602                 changed = Changed.YES
 603             if (write_back is WriteBack.YES and changed is not Changed.CACHED) or (
 604                 write_back is WriteBack.CHECK and changed is Changed.NO
 605             ):
 606                 write_cache(cache, [src], mode)
 607         report.done(src, changed)
 608     except Exception as exc:
 609         report.failed(src, str(exc))
 610
 611
 612 def reformat_many(
 613     sources: Set[Path], fast: bool, write_back: WriteBack, mode: Mode, report: "Report"
 614 ) -> None:
 615     """Reformat multiple files using a ProcessPoolExecutor."""
 616     loop = asyncio.get_event_loop()
 617     worker_count = os.cpu_count()
 618     if sys.platform == "win32":
 619         # Work around https://bugs.python.org/issue26903
 620         worker_count = min(worker_count, 61)
 621     try:
 622         executor = ProcessPoolExecutor(max_workers=worker_count)
 623     except OSError:
 624         # we arrive here if the underlying system does not support multi-processing
 625         # like in AWS Lambda, in which case we gracefully fallback to the default
 626         # mono-process Executor by using None
 627         executor = None
 628
 629     try:
 630         loop.run_until_complete(
 631             schedule_formatting(
 632                 sources=sources,
 633                 fast=fast,
 634                 write_back=write_back,
 635                 mode=mode,
 636                 report=report,
 637                 loop=loop,
 638                 executor=executor,
 639             )
 640         )
 641     finally:
 642         shutdown(loop)
 643         if executor is not None:
 644             executor.shutdown()
 645
 646
 647 async def schedule_formatting(
 648     sources: Set[Path],
 649     fast: bool,
 650     write_back: WriteBack,
 651     mode: Mode,
 652     report: "Report",
 653     loop: asyncio.AbstractEventLoop,
 654     executor: Optional[Executor],
 655 ) -> None:
 656     """Run formatting of `sources` in parallel using the provided `executor`.
 657
 658     (Use ProcessPoolExecutors for actual parallelism.)
 659
 660     `write_back`, `fast`, and `mode` options are passed to
 661     :func:`format_file_in_place`.
 662     """
 663     cache: Cache = {}
 664     if write_back != WriteBack.DIFF:
 665         cache = read_cache(mode)
 666         sources, cached = filter_cached(cache, sources)
 667         for src in sorted(cached):
 668             report.done(src, Changed.CACHED)
 669     if not sources:
 670         return
 671
 672     cancelled = []
 673     sources_to_cache = []
 674     lock = None
 675     if write_back == WriteBack.DIFF:
 676         # For diff output, we need locks to ensure we don't interleave output
 677         # from different processes.
 678         manager = Manager()
 679         lock = manager.Lock()
 680     tasks = {
 681         asyncio.ensure_future(
 682             loop.run_in_executor(
 683                 executor, format_file_in_place, src, fast, mode, write_back, lock
 684             )
 685         ): src
 686         for src in sorted(sources)
 687     }
 688     pending: Iterable["asyncio.Future[bool]"] = tasks.keys()
 689     try:
 690         loop.add_signal_handler(signal.SIGINT, cancel, pending)
 691         loop.add_signal_handler(signal.SIGTERM, cancel, pending)
 692     except NotImplementedError:
 693         # There are no good alternatives for these on Windows.
 694         pass
 695     while pending:
 696         done, _ = await asyncio.wait(pending, return_when=asyncio.FIRST_COMPLETED)
 697         for task in done:
 698             src = tasks.pop(task)
 699             if task.cancelled():
 700                 cancelled.append(task)
 701             elif task.exception():
 702                 report.failed(src, str(task.exception()))
 703             else:
 704                 changed = Changed.YES if task.result() else Changed.NO
 705                 # If the file was written back or was successfully checked as
 706                 # well-formatted, store this information in the cache.
 707                 if write_back is WriteBack.YES or (
 708                     write_back is WriteBack.CHECK and changed is Changed.NO
 709                 ):
 710                     sources_to_cache.append(src)
 711                 report.done(src, changed)
 712     if cancelled:
 713         await asyncio.gather(*cancelled, loop=loop, return_exceptions=True)
 714     if sources_to_cache:
 715         write_cache(cache, sources_to_cache, mode)
 716
 717
 718 def format_file_in_place(
 719     src: Path,
 720     fast: bool,
 721     mode: Mode,
 722     write_back: WriteBack = WriteBack.NO,
 723     lock: Any = None,  # multiprocessing.Manager().Lock() is some crazy proxy
 724 ) -> bool:
 725     """Format file under `src` path. Return True if changed.
 726
 727     If `write_back` is DIFF, write a diff to stdout. If it is YES, write reformatted
 728     code to the file.
 729     `mode` and `fast` options are passed to :func:`format_file_contents`.
 730     """
 731     if src.suffix == ".pyi":
 732         mode = replace(mode, is_pyi=True)
 733
 734     then = datetime.utcfromtimestamp(src.stat().st_mtime)
 735     with open(src, "rb") as buf:
 736         src_contents, encoding, newline = decode_bytes(buf.read())
 737     try:
 738         dst_contents = format_file_contents(src_contents, fast=fast, mode=mode)
 739     except NothingChanged:
 740         return False
 741
 742     if write_back == WriteBack.YES:
 743         with open(src, "w", encoding=encoding, newline=newline) as f:
 744             f.write(dst_contents)
 745     elif write_back in (WriteBack.DIFF, WriteBack.COLOR_DIFF):
 746         now = datetime.utcnow()
 747         src_name = f"{src}\t{then} +0000"
 748         dst_name = f"{src}\t{now} +0000"
 749         diff_contents = diff(src_contents, dst_contents, src_name, dst_name)
 750
 751         if write_back == write_back.COLOR_DIFF:
 752             diff_contents = color_diff(diff_contents)
 753
 754         with lock or nullcontext():
 755             f = io.TextIOWrapper(
 756                 sys.stdout.buffer,
 757                 encoding=encoding,
 758                 newline=newline,
 759                 write_through=True,
 760             )
 761             f = wrap_stream_for_windows(f)
 762             f.write(diff_contents)
 763             f.detach()
 764
 765     return True
 766
 767
 768 def color_diff(contents: str) -> str:
 769     """Inject the ANSI color codes to the diff."""
 770     lines = contents.split("\n")
 771     for i, line in enumerate(lines):
 772         if line.startswith("+++") or line.startswith("---"):
 773             line = "\033[1;37m" + line + "\033[0m"  # bold white, reset
 774         if line.startswith("@@"):
 775             line = "\033[36m" + line + "\033[0m"  # cyan, reset
 776         if line.startswith("+"):
 777             line = "\033[32m" + line + "\033[0m"  # green, reset
 778         elif line.startswith("-"):
 779             line = "\033[31m" + line + "\033[0m"  # red, reset
 780         lines[i] = line
 781     return "\n".join(lines)
 782
 783
 784 def wrap_stream_for_windows(
 785     f: io.TextIOWrapper,
 786 ) -> Union[io.TextIOWrapper, "colorama.AnsiToWin32.AnsiToWin32"]:
 787     """
 788     Wrap the stream in colorama's wrap_stream so colors are shown on Windows.
 789
 790     If `colorama` is not found, then no change is made. If `colorama` does
 791     exist, then it handles the logic to determine whether or not to change
 792     things.
 793     """
 794     try:
 795         from colorama import initialise
 796
 797         # We set `strip=False` so that we can don't have to modify
 798         # test_express_diff_with_color.
 799         f = initialise.wrap_stream(
 800             f, convert=None, strip=False, autoreset=False, wrap=True
 801         )
 802
 803         # wrap_stream returns a `colorama.AnsiToWin32.AnsiToWin32` object
 804         # which does not have a `detach()` method. So we fake one.
 805         f.detach = lambda *args, **kwargs: None  # type: ignore
 806     except ImportError:
 807         pass
 808
 809     return f
 810
 811
 812 def format_stdin_to_stdout(
 813     fast: bool, *, write_back: WriteBack = WriteBack.NO, mode: Mode
 814 ) -> bool:
 815     """Format file on stdin. Return True if changed.
 816
 817     If `write_back` is YES, write reformatted code back to stdout. If it is DIFF,
 818     write a diff to stdout. The `mode` argument is passed to
 819     :func:`format_file_contents`.
 820     """
 821     then = datetime.utcnow()
 822     src, encoding, newline = decode_bytes(sys.stdin.buffer.read())
 823     dst = src
 824     try:
 825         dst = format_file_contents(src, fast=fast, mode=mode)
 826         return True
 827
 828     except NothingChanged:
 829         return False
 830
 831     finally:
 832         f = io.TextIOWrapper(
 833             sys.stdout.buffer, encoding=encoding, newline=newline, write_through=True
 834         )
 835         if write_back == WriteBack.YES:
 836             f.write(dst)
 837         elif write_back in (WriteBack.DIFF, WriteBack.COLOR_DIFF):
 838             now = datetime.utcnow()
 839             src_name = f"STDIN\t{then} +0000"
 840             dst_name = f"STDOUT\t{now} +0000"
 841             d = diff(src, dst, src_name, dst_name)
 842             if write_back == WriteBack.COLOR_DIFF:
 843                 d = color_diff(d)
 844                 f = wrap_stream_for_windows(f)
 845             f.write(d)
 846         f.detach()
 847
 848
 849 def format_file_contents(src_contents: str, *, fast: bool, mode: Mode) -> FileContent:
 850     """Reformat contents a file and return new contents.
 851
 852     If `fast` is False, additionally confirm that the reformatted code is
 853     valid by calling :func:`assert_equivalent` and :func:`assert_stable` on it.
 854     `mode` is passed to :func:`format_str`.
 855     """
 856     if src_contents.strip() == "":
 857         raise NothingChanged
 858
 859     dst_contents = format_str(src_contents, mode=mode)
 860     if src_contents == dst_contents:
 861         raise NothingChanged
 862
 863     if not fast:
 864         assert_equivalent(src_contents, dst_contents)
 865         assert_stable(src_contents, dst_contents, mode=mode)
 866     return dst_contents
 867
 868
 869 def format_str(src_contents: str, *, mode: Mode) -> FileContent:
 870     """Reformat a string and return new contents.
 871
 872     `mode` determines formatting options, such as how many characters per line are
 873     allowed.  Example:
 874
 875     >>> import black
 876     >>> print(black.format_str("def f(arg:str='')->None:...", mode=Mode()))
 877     def f(arg: str = "") -> None:
 878         ...
 879
 880     A more complex example:
 881     >>> print(
 882     ...   black.format_str(
 883     ...     "def f(arg:str='')->None: hey",
 884     ...     mode=black.Mode(
 885     ...       target_versions={black.TargetVersion.PY36},
 886     ...       line_length=10,
 887     ...       string_normalization=False,
 888     ...       is_pyi=False,
 889     ...     ),
 890     ...   ),
 891     ... )
 892     def f(
 893         arg: str = '',
 894     ) -> None:
 895         hey
 896
 897     """
 898     src_node = lib2to3_parse(src_contents.lstrip(), mode.target_versions)
 899     dst_contents = []
 900     future_imports = get_future_imports(src_node)
 901     if mode.target_versions:
 902         versions = mode.target_versions
 903     else:
 904         versions = detect_target_versions(src_node)
 905     normalize_fmt_off(src_node)
 906     lines = LineGenerator(
 907         remove_u_prefix="unicode_literals" in future_imports
 908         or supports_feature(versions, Feature.UNICODE_LITERALS),
 909         is_pyi=mode.is_pyi,
 910         normalize_strings=mode.string_normalization,
 911     )
 912     elt = EmptyLineTracker(is_pyi=mode.is_pyi)
 913     empty_line = Line()
 914     after = 0
 915     split_line_features = {
 916         feature
 917         for feature in {Feature.TRAILING_COMMA_IN_CALL, Feature.TRAILING_COMMA_IN_DEF}
 918         if supports_feature(versions, feature)
 919     }
 920     for current_line in lines.visit(src_node):
 921         dst_contents.append(str(empty_line) * after)
 922         before, after = elt.maybe_empty_lines(current_line)
 923         dst_contents.append(str(empty_line) * before)
 924         for line in transform_line(
 925             current_line,
 926             line_length=mode.line_length,
 927             normalize_strings=mode.string_normalization,
 928             features=split_line_features,
 929         ):
 930             dst_contents.append(str(line))
 931     return "".join(dst_contents)
 932
 933
 934 def decode_bytes(src: bytes) -> Tuple[FileContent, Encoding, NewLine]:
 935     """Return a tuple of (decoded_contents, encoding, newline).
 936
 937     `newline` is either CRLF or LF but `decoded_contents` is decoded with
 938     universal newlines (i.e. only contains LF).
 939     """
 940     srcbuf = io.BytesIO(src)
 941     encoding, lines = tokenize.detect_encoding(srcbuf.readline)
 942     if not lines:
 943         return "", encoding, "\n"
 944
 945     newline = "\r\n" if b"\r\n" == lines[0][-2:] else "\n"
 946     srcbuf.seek(0)
 947     with io.TextIOWrapper(srcbuf, encoding) as tiow:
 948         return tiow.read(), encoding, newline
 949
 950
 951 def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]:
 952     if not target_versions:
 953         # No target_version specified, so try all grammars.
 954         return [
 955             # Python 3.7+
 956             pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords,
 957             # Python 3.0-3.6
 958             pygram.python_grammar_no_print_statement_no_exec_statement,
 959             # Python 2.7 with future print_function import
 960             pygram.python_grammar_no_print_statement,
 961             # Python 2.7
 962             pygram.python_grammar,
 963         ]
 964
 965     if all(version.is_python2() for version in target_versions):
 966         # Python 2-only code, so try Python 2 grammars.
 967         return [
 968             # Python 2.7 with future print_function import
 969             pygram.python_grammar_no_print_statement,
 970             # Python 2.7
 971             pygram.python_grammar,
 972         ]
 973
 974     # Python 3-compatible code, so only try Python 3 grammar.
 975     grammars = []
 976     # If we have to parse both, try to parse async as a keyword first
 977     if not supports_feature(target_versions, Feature.ASYNC_IDENTIFIERS):
 978         # Python 3.7+
 979         grammars.append(
 980             pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords
 981         )
 982     if not supports_feature(target_versions, Feature.ASYNC_KEYWORDS):
 983         # Python 3.0-3.6
 984         grammars.append(pygram.python_grammar_no_print_statement_no_exec_statement)
 985     # At least one of the above branches must have been taken, because every Python
 986     # version has exactly one of the two 'ASYNC_*' flags
 987     return grammars
 988
 989
 990 def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -> Node:
 991     """Given a string with source, return the lib2to3 Node."""
 992     if src_txt[-1:] != "\n":
 993         src_txt += "\n"
 994
 995     for grammar in get_grammars(set(target_versions)):
 996         drv = driver.Driver(grammar, pytree.convert)
 997         try:
 998             result = drv.parse_string(src_txt, True)
 999             break
1000
1001         except ParseError as pe:
1002             lineno, column = pe.context[1]
1003             lines = src_txt.splitlines()
1004             try:
1005                 faulty_line = lines[lineno - 1]
1006             except IndexError:
1007                 faulty_line = "<line number missing in source>"
1008             exc = InvalidInput(f"Cannot parse: {lineno}:{column}: {faulty_line}")
1009     else:
1010         raise exc from None
1011
1012     if isinstance(result, Leaf):
1013         result = Node(syms.file_input, [result])
1014     return result
1015
1016
1017 def lib2to3_unparse(node: Node) -> str:
1018     """Given a lib2to3 node, return its string representation."""
1019     code = str(node)
1020     return code
1021
1022
1023 class Visitor(Generic[T]):
1024     """Basic lib2to3 visitor that yields things of type `T` on `visit()`."""
1025
1026     def visit(self, node: LN) -> Iterator[T]:
1027         """Main method to visit `node` and its children.
1028
1029         It tries to find a `visit_*()` method for the given `node.type`, like
1030         `visit_simple_stmt` for Node objects or `visit_INDENT` for Leaf objects.
1031         If no dedicated `visit_*()` method is found, chooses `visit_default()`
1032         instead.
1033
1034         Then yields objects of type `T` from the selected visitor.
1035         """
1036         if node.type < 256:
1037             name = token.tok_name[node.type]
1038         else:
1039             name = str(type_repr(node.type))
1040         # We explicitly branch on whether a visitor exists (instead of
1041         # using self.visit_default as the default arg to getattr) in order
1042         # to save needing to create a bound method object and so mypyc can
1043         # generate a native call to visit_default.
1044         visitf = getattr(self, f"visit_{name}", None)
1045         if visitf:
1046             yield from visitf(node)
1047         else:
1048             yield from self.visit_default(node)
1049
1050     def visit_default(self, node: LN) -> Iterator[T]:
1051         """Default `visit_*()` implementation. Recurses to children of `node`."""
1052         if isinstance(node, Node):
1053             for child in node.children:
1054                 yield from self.visit(child)
1055
1056
1057 @dataclass
1058 class DebugVisitor(Visitor[T]):
1059     tree_depth: int = 0
1060
1061     def visit_default(self, node: LN) -> Iterator[T]:
1062         indent = " " * (2 * self.tree_depth)
1063         if isinstance(node, Node):
1064             _type = type_repr(node.type)
1065             out(f"{indent}{_type}", fg="yellow")
1066             self.tree_depth += 1
1067             for child in node.children:
1068                 yield from self.visit(child)
1069
1070             self.tree_depth -= 1
1071             out(f"{indent}/{_type}", fg="yellow", bold=False)
1072         else:
1073             _type = token.tok_name.get(node.type, str(node.type))
1074             out(f"{indent}{_type}", fg="blue", nl=False)
1075             if node.prefix:
1076                 # We don't have to handle prefixes for `Node` objects since
1077                 # that delegates to the first child anyway.
1078                 out(f" {node.prefix!r}", fg="green", bold=False, nl=False)
1079             out(f" {node.value!r}", fg="blue", bold=False)
1080
1081     @classmethod
1082     def show(cls, code: Union[str, Leaf, Node]) -> None:
1083         """Pretty-print the lib2to3 AST of a given string of `code`.
1084
1085         Convenience method for debugging.
1086         """
1087         v: DebugVisitor[None] = DebugVisitor()
1088         if isinstance(code, str):
1089             code = lib2to3_parse(code)
1090         list(v.visit(code))
1091
1092
1093 WHITESPACE: Final = {token.DEDENT, token.INDENT, token.NEWLINE}
1094 STATEMENT: Final = {
1095     syms.if_stmt,
1096     syms.while_stmt,
1097     syms.for_stmt,
1098     syms.try_stmt,
1099     syms.except_clause,
1100     syms.with_stmt,
1101     syms.funcdef,
1102     syms.classdef,
1103 }
1104 STANDALONE_COMMENT: Final = 153
1105 token.tok_name[STANDALONE_COMMENT] = "STANDALONE_COMMENT"
1106 LOGIC_OPERATORS: Final = {"and", "or"}
1107 COMPARATORS: Final = {
1108     token.LESS,
1109     token.GREATER,
1110     token.EQEQUAL,
1111     token.NOTEQUAL,
1112     token.LESSEQUAL,
1113     token.GREATEREQUAL,
1114 }
1115 MATH_OPERATORS: Final = {
1116     token.VBAR,
1117     token.CIRCUMFLEX,
1118     token.AMPER,
1119     token.LEFTSHIFT,
1120     token.RIGHTSHIFT,
1121     token.PLUS,
1122     token.MINUS,
1123     token.STAR,
1124     token.SLASH,
1125     token.DOUBLESLASH,
1126     token.PERCENT,
1127     token.AT,
1128     token.TILDE,
1129     token.DOUBLESTAR,
1130 }
1131 STARS: Final = {token.STAR, token.DOUBLESTAR}
1132 VARARGS_SPECIALS: Final = STARS | {token.SLASH}
1133 VARARGS_PARENTS: Final = {
1134     syms.arglist,
1135     syms.argument,  # double star in arglist
1136     syms.trailer,  # single argument to call
1137     syms.typedargslist,
1138     syms.varargslist,  # lambdas
1139 }
1140 UNPACKING_PARENTS: Final = {
1141     syms.atom,  # single element of a list or set literal
1142     syms.dictsetmaker,
1143     syms.listmaker,
1144     syms.testlist_gexp,
1145     syms.testlist_star_expr,
1146 }
1147 TEST_DESCENDANTS: Final = {
1148     syms.test,
1149     syms.lambdef,
1150     syms.or_test,
1151     syms.and_test,
1152     syms.not_test,
1153     syms.comparison,
1154     syms.star_expr,
1155     syms.expr,
1156     syms.xor_expr,
1157     syms.and_expr,
1158     syms.shift_expr,
1159     syms.arith_expr,
1160     syms.trailer,
1161     syms.term,
1162     syms.power,
1163 }
1164 ASSIGNMENTS: Final = {
1165     "=",
1166     "+=",
1167     "-=",
1168     "*=",
1169     "@=",
1170     "/=",
1171     "%=",
1172     "&=",
1173     "|=",
1174     "^=",
1175     "<<=",
1176     ">>=",
1177     "**=",
1178     "//=",
1179 }
1180 COMPREHENSION_PRIORITY: Final = 20
1181 COMMA_PRIORITY: Final = 18
1182 TERNARY_PRIORITY: Final = 16
1183 LOGIC_PRIORITY: Final = 14
1184 STRING_PRIORITY: Final = 12
1185 COMPARATOR_PRIORITY: Final = 10
1186 MATH_PRIORITIES: Final = {
1187     token.VBAR: 9,
1188     token.CIRCUMFLEX: 8,
1189     token.AMPER: 7,
1190     token.LEFTSHIFT: 6,
1191     token.RIGHTSHIFT: 6,
1192     token.PLUS: 5,
1193     token.MINUS: 5,
1194     token.STAR: 4,
1195     token.SLASH: 4,
1196     token.DOUBLESLASH: 4,
1197     token.PERCENT: 4,
1198     token.AT: 4,
1199     token.TILDE: 3,
1200     token.DOUBLESTAR: 2,
1201 }
1202 DOT_PRIORITY: Final = 1
1203
1204
1205 @dataclass
1206 class BracketTracker:
1207     """Keeps track of brackets on a line."""
1208
1209     depth: int = 0
1210     bracket_match: Dict[Tuple[Depth, NodeType], Leaf] = field(default_factory=dict)
1211     delimiters: Dict[LeafID, Priority] = field(default_factory=dict)
1212     previous: Optional[Leaf] = None
1213     _for_loop_depths: List[int] = field(default_factory=list)
1214     _lambda_argument_depths: List[int] = field(default_factory=list)
1215
1216     def mark(self, leaf: Leaf) -> None:
1217         """Mark `leaf` with bracket-related metadata. Keep track of delimiters.
1218
1219         All leaves receive an int `bracket_depth` field that stores how deep
1220         within brackets a given leaf is. 0 means there are no enclosing brackets
1221         that started on this line.
1222
1223         If a leaf is itself a closing bracket, it receives an `opening_bracket`
1224         field that it forms a pair with. This is a one-directional link to
1225         avoid reference cycles.
1226
1227         If a leaf is a delimiter (a token on which Black can split the line if
1228         needed) and it's on depth 0, its `id()` is stored in the tracker's
1229         `delimiters` field.
1230         """
1231         if leaf.type == token.COMMENT:
1232             return
1233
1234         self.maybe_decrement_after_for_loop_variable(leaf)
1235         self.maybe_decrement_after_lambda_arguments(leaf)
1236         if leaf.type in CLOSING_BRACKETS:
1237             self.depth -= 1
1238             opening_bracket = self.bracket_match.pop((self.depth, leaf.type))
1239             leaf.opening_bracket = opening_bracket
1240         leaf.bracket_depth = self.depth
1241         if self.depth == 0:
1242             delim = is_split_before_delimiter(leaf, self.previous)
1243             if delim and self.previous is not None:
1244                 self.delimiters[id(self.previous)] = delim
1245             else:
1246                 delim = is_split_after_delimiter(leaf, self.previous)
1247                 if delim:
1248                     self.delimiters[id(leaf)] = delim
1249         if leaf.type in OPENING_BRACKETS:
1250             self.bracket_match[self.depth, BRACKET[leaf.type]] = leaf
1251             self.depth += 1
1252         self.previous = leaf
1253         self.maybe_increment_lambda_arguments(leaf)
1254         self.maybe_increment_for_loop_variable(leaf)
1255
1256     def any_open_brackets(self) -> bool:
1257         """Return True if there is an yet unmatched open bracket on the line."""
1258         return bool(self.bracket_match)
1259
1260     def max_delimiter_priority(self, exclude: Iterable[LeafID] = ()) -> Priority:
1261         """Return the highest priority of a delimiter found on the line.
1262
1263         Values are consistent with what `is_split_*_delimiter()` return.
1264         Raises ValueError on no delimiters.
1265         """
1266         return max(v for k, v in self.delimiters.items() if k not in exclude)
1267
1268     def delimiter_count_with_priority(self, priority: Priority = 0) -> int:
1269         """Return the number of delimiters with the given `priority`.
1270
1271         If no `priority` is passed, defaults to max priority on the line.
1272         """
1273         if not self.delimiters:
1274             return 0
1275
1276         priority = priority or self.max_delimiter_priority()
1277         return sum(1 for p in self.delimiters.values() if p == priority)
1278
1279     def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool:
1280         """In a for loop, or comprehension, the variables are often unpacks.
1281
1282         To avoid splitting on the comma in this situation, increase the depth of
1283         tokens between `for` and `in`.
1284         """
1285         if leaf.type == token.NAME and leaf.value == "for":
1286             self.depth += 1
1287             self._for_loop_depths.append(self.depth)
1288             return True
1289
1290         return False
1291
1292     def maybe_decrement_after_for_loop_variable(self, leaf: Leaf) -> bool:
1293         """See `maybe_increment_for_loop_variable` above for explanation."""
1294         if (
1295             self._for_loop_depths
1296             and self._for_loop_depths[-1] == self.depth
1297             and leaf.type == token.NAME
1298             and leaf.value == "in"
1299         ):
1300             self.depth -= 1
1301             self._for_loop_depths.pop()
1302             return True
1303
1304         return False
1305
1306     def maybe_increment_lambda_arguments(self, leaf: Leaf) -> bool:
1307         """In a lambda expression, there might be more than one argument.
1308
1309         To avoid splitting on the comma in this situation, increase the depth of
1310         tokens between `lambda` and `:`.
1311         """
1312         if leaf.type == token.NAME and leaf.value == "lambda":
1313             self.depth += 1
1314             self._lambda_argument_depths.append(self.depth)
1315             return True
1316
1317         return False
1318
1319     def maybe_decrement_after_lambda_arguments(self, leaf: Leaf) -> bool:
1320         """See `maybe_increment_lambda_arguments` above for explanation."""
1321         if (
1322             self._lambda_argument_depths
1323             and self._lambda_argument_depths[-1] == self.depth
1324             and leaf.type == token.COLON
1325         ):
1326             self.depth -= 1
1327             self._lambda_argument_depths.pop()
1328             return True
1329
1330         return False
1331
1332     def get_open_lsqb(self) -> Optional[Leaf]:
1333         """Return the most recent opening square bracket (if any)."""
1334         return self.bracket_match.get((self.depth - 1, token.RSQB))
1335
1336
1337 @dataclass
1338 class Line:
1339     """Holds leaves and comments. Can be printed with `str(line)`."""
1340
1341     depth: int = 0
1342     leaves: List[Leaf] = field(default_factory=list)
1343     # keys ordered like `leaves`
1344     comments: Dict[LeafID, List[Leaf]] = field(default_factory=dict)
1345     bracket_tracker: BracketTracker = field(default_factory=BracketTracker)
1346     inside_brackets: bool = False
1347     should_explode: bool = False
1348
1349     def append(self, leaf: Leaf, preformatted: bool = False) -> None:
1350         """Add a new `leaf` to the end of the line.
1351
1352         Unless `preformatted` is True, the `leaf` will receive a new consistent
1353         whitespace prefix and metadata applied by :class:`BracketTracker`.
1354         Trailing commas are maybe removed, unpacked for loop variables are
1355         demoted from being delimiters.
1356
1357         Inline comments are put aside.
1358         """
1359         has_value = leaf.type in BRACKETS or bool(leaf.value.strip())
1360         if not has_value:
1361             return
1362
1363         if token.COLON == leaf.type and self.is_class_paren_empty:
1364             del self.leaves[-2:]
1365         if self.leaves and not preformatted:
1366             # Note: at this point leaf.prefix should be empty except for
1367             # imports, for which we only preserve newlines.
1368             leaf.prefix += whitespace(
1369                 leaf, complex_subscript=self.is_complex_subscript(leaf)
1370             )
1371         if self.inside_brackets or not preformatted:
1372             self.bracket_tracker.mark(leaf)
1373             self.maybe_remove_trailing_comma(leaf)
1374         if not self.append_comment(leaf):
1375             self.leaves.append(leaf)
1376
1377     def append_safe(self, leaf: Leaf, preformatted: bool = False) -> None:
1378         """Like :func:`append()` but disallow invalid standalone comment structure.
1379
1380         Raises ValueError when any `leaf` is appended after a standalone comment
1381         or when a standalone comment is not the first leaf on the line.
1382         """
1383         if self.bracket_tracker.depth == 0:
1384             if self.is_comment:
1385                 raise ValueError("cannot append to standalone comments")
1386
1387             if self.leaves and leaf.type == STANDALONE_COMMENT:
1388                 raise ValueError(
1389                     "cannot append standalone comments to a populated line"
1390                 )
1391
1392         self.append(leaf, preformatted=preformatted)
1393
1394     @property
1395     def is_comment(self) -> bool:
1396         """Is this line a standalone comment?"""
1397         return len(self.leaves) == 1 and self.leaves[0].type == STANDALONE_COMMENT
1398
1399     @property
1400     def is_decorator(self) -> bool:
1401         """Is this line a decorator?"""
1402         return bool(self) and self.leaves[0].type == token.AT
1403
1404     @property
1405     def is_import(self) -> bool:
1406         """Is this an import line?"""
1407         return bool(self) and is_import(self.leaves[0])
1408
1409     @property
1410     def is_class(self) -> bool:
1411         """Is this line a class definition?"""
1412         return (
1413             bool(self)
1414             and self.leaves[0].type == token.NAME
1415             and self.leaves[0].value == "class"
1416         )
1417
1418     @property
1419     def is_stub_class(self) -> bool:
1420         """Is this line a class definition with a body consisting only of "..."?"""
1421         return self.is_class and self.leaves[-3:] == [
1422             Leaf(token.DOT, ".") for _ in range(3)
1423         ]
1424
1425     @property
1426     def is_collection_with_optional_trailing_comma(self) -> bool:
1427         """Is this line a collection literal with a trailing comma that's optional?
1428
1429         Note that the trailing comma in a 1-tuple is not optional.
1430         """
1431         if not self.leaves or len(self.leaves) < 4:
1432             return False
1433
1434         # Look for and address a trailing colon.
1435         if self.leaves[-1].type == token.COLON:
1436             closer = self.leaves[-2]
1437             close_index = -2
1438         else:
1439             closer = self.leaves[-1]
1440             close_index = -1
1441         if closer.type not in CLOSING_BRACKETS or self.inside_brackets:
1442             return False
1443
1444         if closer.type == token.RPAR:
1445             # Tuples require an extra check, because if there's only
1446             # one element in the tuple removing the comma unmakes the
1447             # tuple.
1448             #
1449             # We also check for parens before looking for the trailing
1450             # comma because in some cases (eg assigning a dict
1451             # literal) the literal gets wrapped in temporary parens
1452             # during parsing. This case is covered by the
1453             # collections.py test data.
1454             opener = closer.opening_bracket
1455             for _open_index, leaf in enumerate(self.leaves):
1456                 if leaf is opener:
1457                     break
1458
1459             else:
1460                 # Couldn't find the matching opening paren, play it safe.
1461                 return False
1462
1463             commas = 0
1464             comma_depth = self.leaves[close_index - 1].bracket_depth
1465             for leaf in self.leaves[_open_index + 1 : close_index]:
1466                 if leaf.bracket_depth == comma_depth and leaf.type == token.COMMA:
1467                     commas += 1
1468             if commas > 1:
1469                 # We haven't looked yet for the trailing comma because
1470                 # we might also have caught noop parens.
1471                 return self.leaves[close_index - 1].type == token.COMMA
1472
1473             elif commas == 1:
1474                 return False  # it's either a one-tuple or didn't have a trailing comma
1475
1476             if self.leaves[close_index - 1].type in CLOSING_BRACKETS:
1477                 close_index -= 1
1478                 closer = self.leaves[close_index]
1479                 if closer.type == token.RPAR:
1480                     # TODO: this is a gut feeling. Will we ever see this?
1481                     return False
1482
1483         if self.leaves[close_index - 1].type != token.COMMA:
1484             return False
1485
1486         return True
1487
1488     @property
1489     def is_def(self) -> bool:
1490         """Is this a function definition? (Also returns True for async defs.)"""
1491         try:
1492             first_leaf = self.leaves[0]
1493         except IndexError:
1494             return False
1495
1496         try:
1497             second_leaf: Optional[Leaf] = self.leaves[1]
1498         except IndexError:
1499             second_leaf = None
1500         return (first_leaf.type == token.NAME and first_leaf.value == "def") or (
1501             first_leaf.type == token.ASYNC
1502             and second_leaf is not None
1503             and second_leaf.type == token.NAME
1504             and second_leaf.value == "def"
1505         )
1506
1507     @property
1508     def is_class_paren_empty(self) -> bool:
1509         """Is this a class with no base classes but using parentheses?
1510
1511         Those are unnecessary and should be removed.
1512         """
1513         return (
1514             bool(self)
1515             and len(self.leaves) == 4
1516             and self.is_class
1517             and self.leaves[2].type == token.LPAR
1518             and self.leaves[2].value == "("
1519             and self.leaves[3].type == token.RPAR
1520             and self.leaves[3].value == ")"
1521         )
1522
1523     @property
1524     def is_triple_quoted_string(self) -> bool:
1525         """Is the line a triple quoted string?"""
1526         return (
1527             bool(self)
1528             and self.leaves[0].type == token.STRING
1529             and self.leaves[0].value.startswith(('"""', "'''"))
1530         )
1531
1532     def contains_standalone_comments(self, depth_limit: int = sys.maxsize) -> bool:
1533         """If so, needs to be split before emitting."""
1534         for leaf in self.leaves:
1535             if leaf.type == STANDALONE_COMMENT and leaf.bracket_depth <= depth_limit:
1536                 return True
1537
1538         return False
1539
1540     def contains_uncollapsable_type_comments(self) -> bool:
1541         ignored_ids = set()
1542         try:
1543             last_leaf = self.leaves[-1]
1544             ignored_ids.add(id(last_leaf))
1545             if last_leaf.type == token.COMMA or (
1546                 last_leaf.type == token.RPAR and not last_leaf.value
1547             ):
1548                 # When trailing commas or optional parens are inserted by Black for
1549                 # consistency, comments after the previous last element are not moved
1550                 # (they don't have to, rendering will still be correct).  So we ignore
1551                 # trailing commas and invisible.
1552                 last_leaf = self.leaves[-2]
1553                 ignored_ids.add(id(last_leaf))
1554         except IndexError:
1555             return False
1556
1557         # A type comment is uncollapsable if it is attached to a leaf
1558         # that isn't at the end of the line (since that could cause it
1559         # to get associated to a different argument) or if there are
1560         # comments before it (since that could cause it to get hidden
1561         # behind a comment.
1562         comment_seen = False
1563         for leaf_id, comments in self.comments.items():
1564             for comment in comments:
1565                 if is_type_comment(comment):
1566                     if comment_seen or (
1567                         not is_type_comment(comment, " ignore")
1568                         and leaf_id not in ignored_ids
1569                     ):
1570                         return True
1571
1572                 comment_seen = True
1573
1574         return False
1575
1576     def contains_unsplittable_type_ignore(self) -> bool:
1577         if not self.leaves:
1578             return False
1579
1580         # If a 'type: ignore' is attached to the end of a line, we
1581         # can't split the line, because we can't know which of the
1582         # subexpressions the ignore was meant to apply to.
1583         #
1584         # We only want this to apply to actual physical lines from the
1585         # original source, though: we don't want the presence of a
1586         # 'type: ignore' at the end of a multiline expression to
1587         # justify pushing it all onto one line. Thus we
1588         # (unfortunately) need to check the actual source lines and
1589         # only report an unsplittable 'type: ignore' if this line was
1590         # one line in the original code.
1591
1592         # Grab the first and last line numbers, skipping generated leaves
1593         first_line = next((l.lineno for l in self.leaves if l.lineno != 0), 0)
1594         last_line = next((l.lineno for l in reversed(self.leaves) if l.lineno != 0), 0)
1595
1596         if first_line == last_line:
1597             # We look at the last two leaves since a comma or an
1598             # invisible paren could have been added at the end of the
1599             # line.
1600             for node in self.leaves[-2:]:
1601                 for comment in self.comments.get(id(node), []):
1602                     if is_type_comment(comment, " ignore"):
1603                         return True
1604
1605         return False
1606
1607     def contains_multiline_strings(self) -> bool:
1608         return any(is_multiline_string(leaf) for leaf in self.leaves)
1609
1610     def maybe_remove_trailing_comma(self, closing: Leaf) -> bool:
1611         """Remove trailing comma if there is one and it's safe."""
1612         if not (self.leaves and self.leaves[-1].type == token.COMMA):
1613             return False
1614
1615         # We remove trailing commas only in the case of importing a
1616         # single name from a module.
1617         if not (
1618             self.leaves
1619             and self.is_import
1620             and len(self.leaves) > 4
1621             and self.leaves[-1].type == token.COMMA
1622             and closing.type in CLOSING_BRACKETS
1623             and self.leaves[-4].type == token.NAME
1624             and (
1625                 # regular `from foo import bar,`
1626                 self.leaves[-4].value == "import"
1627                 # `from foo import (bar as baz,)
1628                 or (
1629                     len(self.leaves) > 6
1630                     and self.leaves[-6].value == "import"
1631                     and self.leaves[-3].value == "as"
1632                 )
1633                 # `from foo import bar as baz,`
1634                 or (
1635                     len(self.leaves) > 5
1636                     and self.leaves[-5].value == "import"
1637                     and self.leaves[-3].value == "as"
1638                 )
1639             )
1640             and closing.type == token.RPAR
1641         ):
1642             return False
1643
1644         self.remove_trailing_comma()
1645         return True
1646
1647     def append_comment(self, comment: Leaf) -> bool:
1648         """Add an inline or standalone comment to the line."""
1649         if (
1650             comment.type == STANDALONE_COMMENT
1651             and self.bracket_tracker.any_open_brackets()
1652         ):
1653             comment.prefix = ""
1654             return False
1655
1656         if comment.type != token.COMMENT:
1657             return False
1658
1659         if not self.leaves:
1660             comment.type = STANDALONE_COMMENT
1661             comment.prefix = ""
1662             return False
1663
1664         last_leaf = self.leaves[-1]
1665         if (
1666             last_leaf.type == token.RPAR
1667             and not last_leaf.value
1668             and last_leaf.parent
1669             and len(list(last_leaf.parent.leaves())) <= 3
1670             and not is_type_comment(comment)
1671         ):
1672             # Comments on an optional parens wrapping a single leaf should belong to
1673             # the wrapped node except if it's a type comment. Pinning the comment like
1674             # this avoids unstable formatting caused by comment migration.
1675             if len(self.leaves) < 2:
1676                 comment.type = STANDALONE_COMMENT
1677                 comment.prefix = ""
1678                 return False
1679
1680             last_leaf = self.leaves[-2]
1681         self.comments.setdefault(id(last_leaf), []).append(comment)
1682         return True
1683
1684     def comments_after(self, leaf: Leaf) -> List[Leaf]:
1685         """Generate comments that should appear directly after `leaf`."""
1686         return self.comments.get(id(leaf), [])
1687
1688     def remove_trailing_comma(self) -> None:
1689         """Remove the trailing comma and moves the comments attached to it."""
1690         trailing_comma = self.leaves.pop()
1691         trailing_comma_comments = self.comments.pop(id(trailing_comma), [])
1692         self.comments.setdefault(id(self.leaves[-1]), []).extend(
1693             trailing_comma_comments
1694         )
1695
1696     def is_complex_subscript(self, leaf: Leaf) -> bool:
1697         """Return True iff `leaf` is part of a slice with non-trivial exprs."""
1698         open_lsqb = self.bracket_tracker.get_open_lsqb()
1699         if open_lsqb is None:
1700             return False
1701
1702         subscript_start = open_lsqb.next_sibling
1703
1704         if isinstance(subscript_start, Node):
1705             if subscript_start.type == syms.listmaker:
1706                 return False
1707
1708             if subscript_start.type == syms.subscriptlist:
1709                 subscript_start = child_towards(subscript_start, leaf)
1710         return subscript_start is not None and any(
1711             n.type in TEST_DESCENDANTS for n in subscript_start.pre_order()
1712         )
1713
1714     def clone(self) -> "Line":
1715         return Line(
1716             depth=self.depth,
1717             inside_brackets=self.inside_brackets,
1718             should_explode=self.should_explode,
1719         )
1720
1721     def __str__(self) -> str:
1722         """Render the line."""
1723         if not self:
1724             return "\n"
1725
1726         indent = "    " * self.depth
1727         leaves = iter(self.leaves)
1728         first = next(leaves)
1729         res = f"{first.prefix}{indent}{first.value}"
1730         for leaf in leaves:
1731             res += str(leaf)
1732         for comment in itertools.chain.from_iterable(self.comments.values()):
1733             res += str(comment)
1734
1735         return res + "\n"
1736
1737     def __bool__(self) -> bool:
1738         """Return True if the line has leaves or comments."""
1739         return bool(self.leaves or self.comments)
1740
1741
1742 @dataclass
1743 class EmptyLineTracker:
1744     """Provides a stateful method that returns the number of potential extra
1745     empty lines needed before and after the currently processed line.
1746
1747     Note: this tracker works on lines that haven't been split yet.  It assumes
1748     the prefix of the first leaf consists of optional newlines.  Those newlines
1749     are consumed by `maybe_empty_lines()` and included in the computation.
1750     """
1751
1752     is_pyi: bool = False
1753     previous_line: Optional[Line] = None
1754     previous_after: int = 0
1755     previous_defs: List[int] = field(default_factory=list)
1756
1757     def maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
1758         """Return the number of extra empty lines before and after the `current_line`.
1759
1760         This is for separating `def`, `async def` and `class` with extra empty
1761         lines (two on module-level).
1762         """
1763         before, after = self._maybe_empty_lines(current_line)
1764         before = (
1765             # Black should not insert empty lines at the beginning
1766             # of the file
1767             0
1768             if self.previous_line is None
1769             else before - self.previous_after
1770         )
1771         self.previous_after = after
1772         self.previous_line = current_line
1773         return before, after
1774
1775     def _maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
1776         max_allowed = 1
1777         if current_line.depth == 0:
1778             max_allowed = 1 if self.is_pyi else 2
1779         if current_line.leaves:
1780             # Consume the first leaf's extra newlines.
1781             first_leaf = current_line.leaves[0]
1782             before = first_leaf.prefix.count("\n")
1783             before = min(before, max_allowed)
1784             first_leaf.prefix = ""
1785         else:
1786             before = 0
1787         depth = current_line.depth
1788         while self.previous_defs and self.previous_defs[-1] >= depth:
1789             self.previous_defs.pop()
1790             if self.is_pyi:
1791                 before = 0 if depth else 1
1792             else:
1793                 before = 1 if depth else 2
1794         if current_line.is_decorator or current_line.is_def or current_line.is_class:
1795             return self._maybe_empty_lines_for_class_or_def(current_line, before)
1796
1797         if (
1798             self.previous_line
1799             and self.previous_line.is_import
1800             and not current_line.is_import
1801             and depth == self.previous_line.depth
1802         ):
1803             return (before or 1), 0
1804
1805         if (
1806             self.previous_line
1807             and self.previous_line.is_class
1808             and current_line.is_triple_quoted_string
1809         ):
1810             return before, 1
1811
1812         return before, 0
1813
1814     def _maybe_empty_lines_for_class_or_def(
1815         self, current_line: Line, before: int
1816     ) -> Tuple[int, int]:
1817         if not current_line.is_decorator:
1818             self.previous_defs.append(current_line.depth)
1819         if self.previous_line is None:
1820             # Don't insert empty lines before the first line in the file.
1821             return 0, 0
1822
1823         if self.previous_line.is_decorator:
1824             return 0, 0
1825
1826         if self.previous_line.depth < current_line.depth and (
1827             self.previous_line.is_class or self.previous_line.is_def
1828         ):
1829             return 0, 0
1830
1831         if (
1832             self.previous_line.is_comment
1833             and self.previous_line.depth == current_line.depth
1834             and before == 0
1835         ):
1836             return 0, 0
1837
1838         if self.is_pyi:
1839             if self.previous_line.depth > current_line.depth:
1840                 newlines = 1
1841             elif current_line.is_class or self.previous_line.is_class:
1842                 if current_line.is_stub_class and self.previous_line.is_stub_class:
1843                     # No blank line between classes with an empty body
1844                     newlines = 0
1845                 else:
1846                     newlines = 1
1847             elif current_line.is_def and not self.previous_line.is_def:
1848                 # Blank line between a block of functions and a block of non-functions
1849                 newlines = 1
1850             else:
1851                 newlines = 0
1852         else:
1853             newlines = 2
1854         if current_line.depth and newlines:
1855             newlines -= 1
1856         return newlines, 0
1857
1858
1859 @dataclass
1860 class LineGenerator(Visitor[Line]):
1861     """Generates reformatted Line objects.  Empty lines are not emitted.
1862
1863     Note: destroys the tree it's visiting by mutating prefixes of its leaves
1864     in ways that will no longer stringify to valid Python code on the tree.
1865     """
1866
1867     is_pyi: bool = False
1868     normalize_strings: bool = True
1869     current_line: Line = field(default_factory=Line)
1870     remove_u_prefix: bool = False
1871
1872     def line(self, indent: int = 0) -> Iterator[Line]:
1873         """Generate a line.
1874
1875         If the line is empty, only emit if it makes sense.
1876         If the line is too long, split it first and then generate.
1877
1878         If any lines were generated, set up a new current_line.
1879         """
1880         if not self.current_line:
1881             self.current_line.depth += indent
1882             return  # Line is empty, don't emit. Creating a new one unnecessary.
1883
1884         complete_line = self.current_line
1885         self.current_line = Line(depth=complete_line.depth + indent)
1886         yield complete_line
1887
1888     def visit_default(self, node: LN) -> Iterator[Line]:
1889         """Default `visit_*()` implementation. Recurses to children of `node`."""
1890         if isinstance(node, Leaf):
1891             any_open_brackets = self.current_line.bracket_tracker.any_open_brackets()
1892             for comment in generate_comments(node):
1893                 if any_open_brackets:
1894                     # any comment within brackets is subject to splitting
1895                     self.current_line.append(comment)
1896                 elif comment.type == token.COMMENT:
1897                     # regular trailing comment
1898                     self.current_line.append(comment)
1899                     yield from self.line()
1900
1901                 else:
1902                     # regular standalone comment
1903                     yield from self.line()
1904
1905                     self.current_line.append(comment)
1906                     yield from self.line()
1907
1908             normalize_prefix(node, inside_brackets=any_open_brackets)
1909             if self.normalize_strings and node.type == token.STRING:
1910                 normalize_string_prefix(node, remove_u_prefix=self.remove_u_prefix)
1911                 normalize_string_quotes(node)
1912             if node.type == token.NUMBER:
1913                 normalize_numeric_literal(node)
1914             if node.type not in WHITESPACE:
1915                 self.current_line.append(node)
1916         yield from super().visit_default(node)
1917
1918     def visit_INDENT(self, node: Leaf) -> Iterator[Line]:
1919         """Increase indentation level, maybe yield a line."""
1920         # In blib2to3 INDENT never holds comments.
1921         yield from self.line(+1)
1922         yield from self.visit_default(node)
1923
1924     def visit_DEDENT(self, node: Leaf) -> Iterator[Line]:
1925         """Decrease indentation level, maybe yield a line."""
1926         # The current line might still wait for trailing comments.  At DEDENT time
1927         # there won't be any (they would be prefixes on the preceding NEWLINE).
1928         # Emit the line then.
1929         yield from self.line()
1930
1931         # While DEDENT has no value, its prefix may contain standalone comments
1932         # that belong to the current indentation level.  Get 'em.
1933         yield from self.visit_default(node)
1934
1935         # Finally, emit the dedent.
1936         yield from self.line(-1)
1937
1938     def visit_stmt(
1939         self, node: Node, keywords: Set[str], parens: Set[str]
1940     ) -> Iterator[Line]:
1941         """Visit a statement.
1942
1943         This implementation is shared for `if`, `while`, `for`, `try`, `except`,
1944         `def`, `with`, `class`, `assert` and assignments.
1945
1946         The relevant Python language `keywords` for a given statement will be
1947         NAME leaves within it. This methods puts those on a separate line.
1948
1949         `parens` holds a set of string leaf values immediately after which
1950         invisible parens should be put.
1951         """
1952         normalize_invisible_parens(node, parens_after=parens)
1953         for child in node.children:
1954             if child.type == token.NAME and child.value in keywords:  # type: ignore
1955                 yield from self.line()
1956
1957             yield from self.visit(child)
1958
1959     def visit_suite(self, node: Node) -> Iterator[Line]:
1960         """Visit a suite."""
1961         if self.is_pyi and is_stub_suite(node):
1962             yield from self.visit(node.children[2])
1963         else:
1964             yield from self.visit_default(node)
1965
1966     def visit_simple_stmt(self, node: Node) -> Iterator[Line]:
1967         """Visit a statement without nested statements."""
1968         is_suite_like = node.parent and node.parent.type in STATEMENT
1969         if is_suite_like:
1970             if self.is_pyi and is_stub_body(node):
1971                 yield from self.visit_default(node)
1972             else:
1973                 yield from self.line(+1)
1974                 yield from self.visit_default(node)
1975                 yield from self.line(-1)
1976
1977         else:
1978             if not self.is_pyi or not node.parent or not is_stub_suite(node.parent):
1979                 yield from self.line()
1980             yield from self.visit_default(node)
1981
1982     def visit_async_stmt(self, node: Node) -> Iterator[Line]:
1983         """Visit `async def`, `async for`, `async with`."""
1984         yield from self.line()
1985
1986         children = iter(node.children)
1987         for child in children:
1988             yield from self.visit(child)
1989
1990             if child.type == token.ASYNC:
1991                 break
1992
1993         internal_stmt = next(children)
1994         for child in internal_stmt.children:
1995             yield from self.visit(child)
1996
1997     def visit_decorators(self, node: Node) -> Iterator[Line]:
1998         """Visit decorators."""
1999         for child in node.children:
2000             yield from self.line()
2001             yield from self.visit(child)
2002
2003     def visit_SEMI(self, leaf: Leaf) -> Iterator[Line]:
2004         """Remove a semicolon and put the other statement on a separate line."""
2005         yield from self.line()
2006
2007     def visit_ENDMARKER(self, leaf: Leaf) -> Iterator[Line]:
2008         """End of file. Process outstanding comments and end with a newline."""
2009         yield from self.visit_default(leaf)
2010         yield from self.line()
2011
2012     def visit_STANDALONE_COMMENT(self, leaf: Leaf) -> Iterator[Line]:
2013         if not self.current_line.bracket_tracker.any_open_brackets():
2014             yield from self.line()
2015         yield from self.visit_default(leaf)
2016
2017     def visit_factor(self, node: Node) -> Iterator[Line]:
2018         """Force parentheses between a unary op and a binary power:
2019
2020         -2 ** 8 -> -(2 ** 8)
2021         """
2022         _operator, operand = node.children
2023         if (
2024             operand.type == syms.power
2025             and len(operand.children) == 3
2026             and operand.children[1].type == token.DOUBLESTAR
2027         ):
2028             lpar = Leaf(token.LPAR, "(")
2029             rpar = Leaf(token.RPAR, ")")
2030             index = operand.remove() or 0
2031             node.insert_child(index, Node(syms.atom, [lpar, operand, rpar]))
2032         yield from self.visit_default(node)
2033
2034     def visit_STRING(self, leaf: Leaf) -> Iterator[Line]:
2035         # Check if it's a docstring
2036         if prev_siblings_are(
2037             leaf.parent, [None, token.NEWLINE, token.INDENT, syms.simple_stmt]
2038         ) and is_multiline_string(leaf):
2039             prefix = "    " * self.current_line.depth
2040             docstring = fix_docstring(leaf.value[3:-3], prefix)
2041             leaf.value = leaf.value[0:3] + docstring + leaf.value[-3:]
2042             normalize_string_quotes(leaf)
2043
2044         yield from self.visit_default(leaf)
2045
2046     def __post_init__(self) -> None:
2047         """You are in a twisty little maze of passages."""
2048         v = self.visit_stmt
2049         Ø: Set[str] = set()
2050         self.visit_assert_stmt = partial(v, keywords={"assert"}, parens={"assert", ","})
2051         self.visit_if_stmt = partial(
2052             v, keywords={"if", "else", "elif"}, parens={"if", "elif"}
2053         )
2054         self.visit_while_stmt = partial(v, keywords={"while", "else"}, parens={"while"})
2055         self.visit_for_stmt = partial(v, keywords={"for", "else"}, parens={"for", "in"})
2056         self.visit_try_stmt = partial(
2057             v, keywords={"try", "except", "else", "finally"}, parens=Ø
2058         )
2059         self.visit_except_clause = partial(v, keywords={"except"}, parens=Ø)
2060         self.visit_with_stmt = partial(v, keywords={"with"}, parens=Ø)
2061         self.visit_funcdef = partial(v, keywords={"def"}, parens=Ø)
2062         self.visit_classdef = partial(v, keywords={"class"}, parens=Ø)
2063         self.visit_expr_stmt = partial(v, keywords=Ø, parens=ASSIGNMENTS)
2064         self.visit_return_stmt = partial(v, keywords={"return"}, parens={"return"})
2065         self.visit_import_from = partial(v, keywords=Ø, parens={"import"})
2066         self.visit_del_stmt = partial(v, keywords=Ø, parens={"del"})
2067         self.visit_async_funcdef = self.visit_async_stmt
2068         self.visit_decorated = self.visit_decorators
2069
2070
2071 IMPLICIT_TUPLE = {syms.testlist, syms.testlist_star_expr, syms.exprlist}
2072 BRACKET = {token.LPAR: token.RPAR, token.LSQB: token.RSQB, token.LBRACE: token.RBRACE}
2073 OPENING_BRACKETS = set(BRACKET.keys())
2074 CLOSING_BRACKETS = set(BRACKET.values())
2075 BRACKETS = OPENING_BRACKETS | CLOSING_BRACKETS
2076 ALWAYS_NO_SPACE = CLOSING_BRACKETS | {token.COMMA, STANDALONE_COMMENT}
2077
2078
2079 def whitespace(leaf: Leaf, *, complex_subscript: bool) -> str:  # noqa: C901
2080     """Return whitespace prefix if needed for the given `leaf`.
2081
2082     `complex_subscript` signals whether the given leaf is part of a subscription
2083     which has non-trivial arguments, like arithmetic expressions or function calls.
2084     """
2085     NO = ""
2086     SPACE = " "
2087     DOUBLESPACE = "  "
2088     t = leaf.type
2089     p = leaf.parent
2090     v = leaf.value
2091     if t in ALWAYS_NO_SPACE:
2092         return NO
2093
2094     if t == token.COMMENT:
2095         return DOUBLESPACE
2096
2097     assert p is not None, f"INTERNAL ERROR: hand-made leaf without parent: {leaf!r}"
2098     if t == token.COLON and p.type not in {
2099         syms.subscript,
2100         syms.subscriptlist,
2101         syms.sliceop,
2102     }:
2103         return NO
2104
2105     prev = leaf.prev_sibling
2106     if not prev:
2107         prevp = preceding_leaf(p)
2108         if not prevp or prevp.type in OPENING_BRACKETS:
2109             return NO
2110
2111         if t == token.COLON:
2112             if prevp.type == token.COLON:
2113                 return NO
2114
2115             elif prevp.type != token.COMMA and not complex_subscript:
2116                 return NO
2117
2118             return SPACE
2119
2120         if prevp.type == token.EQUAL:
2121             if prevp.parent:
2122                 if prevp.parent.type in {
2123                     syms.arglist,
2124                     syms.argument,
2125                     syms.parameters,
2126                     syms.varargslist,
2127                 }:
2128                     return NO
2129
2130                 elif prevp.parent.type == syms.typedargslist:
2131                     # A bit hacky: if the equal sign has whitespace, it means we
2132                     # previously found it's a typed argument.  So, we're using
2133                     # that, too.
2134                     return prevp.prefix
2135
2136         elif prevp.type in VARARGS_SPECIALS:
2137             if is_vararg(prevp, within=VARARGS_PARENTS | UNPACKING_PARENTS):
2138                 return NO
2139
2140         elif prevp.type == token.COLON:
2141             if prevp.parent and prevp.parent.type in {syms.subscript, syms.sliceop}:
2142                 return SPACE if complex_subscript else NO
2143
2144         elif (
2145             prevp.parent
2146             and prevp.parent.type == syms.factor
2147             and prevp.type in MATH_OPERATORS
2148         ):
2149             return NO
2150
2151         elif (
2152             prevp.type == token.RIGHTSHIFT
2153             and prevp.parent
2154             and prevp.parent.type == syms.shift_expr
2155             and prevp.prev_sibling
2156             and prevp.prev_sibling.type == token.NAME
2157             and prevp.prev_sibling.value == "print"  # type: ignore
2158         ):
2159             # Python 2 print chevron
2160             return NO
2161
2162     elif prev.type in OPENING_BRACKETS:
2163         return NO
2164
2165     if p.type in {syms.parameters, syms.arglist}:
2166         # untyped function signatures or calls
2167         if not prev or prev.type != token.COMMA:
2168             return NO
2169
2170     elif p.type == syms.varargslist:
2171         # lambdas
2172         if prev and prev.type != token.COMMA:
2173             return NO
2174
2175     elif p.type == syms.typedargslist:
2176         # typed function signatures
2177         if not prev:
2178             return NO
2179
2180         if t == token.EQUAL:
2181             if prev.type != syms.tname:
2182                 return NO
2183
2184         elif prev.type == token.EQUAL:
2185             # A bit hacky: if the equal sign has whitespace, it means we
2186             # previously found it's a typed argument.  So, we're using that, too.
2187             return prev.prefix
2188
2189         elif prev.type != token.COMMA:
2190             return NO
2191
2192     elif p.type == syms.tname:
2193         # type names
2194         if not prev:
2195             prevp = preceding_leaf(p)
2196             if not prevp or prevp.type != token.COMMA:
2197                 return NO
2198
2199     elif p.type == syms.trailer:
2200         # attributes and calls
2201         if t == token.LPAR or t == token.RPAR:
2202             return NO
2203
2204         if not prev:
2205             if t == token.DOT:
2206                 prevp = preceding_leaf(p)
2207                 if not prevp or prevp.type != token.NUMBER:
2208                     return NO
2209
2210             elif t == token.LSQB:
2211                 return NO
2212
2213         elif prev.type != token.COMMA:
2214             return NO
2215
2216     elif p.type == syms.argument:
2217         # single argument
2218         if t == token.EQUAL:
2219             return NO
2220
2221         if not prev:
2222             prevp = preceding_leaf(p)
2223             if not prevp or prevp.type == token.LPAR:
2224                 return NO
2225
2226         elif prev.type in {token.EQUAL} | VARARGS_SPECIALS:
2227             return NO
2228
2229     elif p.type == syms.decorator:
2230         # decorators
2231         return NO
2232
2233     elif p.type == syms.dotted_name:
2234         if prev:
2235             return NO
2236
2237         prevp = preceding_leaf(p)
2238         if not prevp or prevp.type == token.AT or prevp.type == token.DOT:
2239             return NO
2240
2241     elif p.type == syms.classdef:
2242         if t == token.LPAR:
2243             return NO
2244
2245         if prev and prev.type == token.LPAR:
2246             return NO
2247
2248     elif p.type in {syms.subscript, syms.sliceop}:
2249         # indexing
2250         if not prev:
2251             assert p.parent is not None, "subscripts are always parented"
2252             if p.parent.type == syms.subscriptlist:
2253                 return SPACE
2254
2255             return NO
2256
2257         elif not complex_subscript:
2258             return NO
2259
2260     elif p.type == syms.atom:
2261         if prev and t == token.DOT:
2262             # dots, but not the first one.
2263             return NO
2264
2265     elif p.type == syms.dictsetmaker:
2266         # dict unpacking
2267         if prev and prev.type == token.DOUBLESTAR:
2268             return NO
2269
2270     elif p.type in {syms.factor, syms.star_expr}:
2271         # unary ops
2272         if not prev:
2273             prevp = preceding_leaf(p)
2274             if not prevp or prevp.type in OPENING_BRACKETS:
2275                 return NO
2276
2277             prevp_parent = prevp.parent
2278             assert prevp_parent is not None
2279             if prevp.type == token.COLON and prevp_parent.type in {
2280                 syms.subscript,
2281                 syms.sliceop,
2282             }:
2283                 return NO
2284
2285             elif prevp.type == token.EQUAL and prevp_parent.type == syms.argument:
2286                 return NO
2287
2288         elif t in {token.NAME, token.NUMBER, token.STRING}:
2289             return NO
2290
2291     elif p.type == syms.import_from:
2292         if t == token.DOT:
2293             if prev and prev.type == token.DOT:
2294                 return NO
2295
2296         elif t == token.NAME:
2297             if v == "import":
2298                 return SPACE
2299
2300             if prev and prev.type == token.DOT:
2301                 return NO
2302
2303     elif p.type == syms.sliceop:
2304         return NO
2305
2306     return SPACE
2307
2308
2309 def preceding_leaf(node: Optional[LN]) -> Optional[Leaf]:
2310     """Return the first leaf that precedes `node`, if any."""
2311     while node:
2312         res = node.prev_sibling
2313         if res:
2314             if isinstance(res, Leaf):
2315                 return res
2316
2317             try:
2318                 return list(res.leaves())[-1]
2319
2320             except IndexError:
2321                 return None
2322
2323         node = node.parent
2324     return None
2325
2326
2327 def prev_siblings_are(node: Optional[LN], tokens: List[Optional[NodeType]]) -> bool:
2328     """Return if the `node` and its previous siblings match types against the provided
2329     list of tokens; the provided `node`has its type matched against the last element in
2330     the list.  `None` can be used as the first element to declare that the start of the
2331     list is anchored at the start of its parent's children."""
2332     if not tokens:
2333         return True
2334     if tokens[-1] is None:
2335         return node is None
2336     if not node:
2337         return False
2338     if node.type != tokens[-1]:
2339         return False
2340     return prev_siblings_are(node.prev_sibling, tokens[:-1])
2341
2342
2343 def child_towards(ancestor: Node, descendant: LN) -> Optional[LN]:
2344     """Return the child of `ancestor` that contains `descendant`."""
2345     node: Optional[LN] = descendant
2346     while node and node.parent != ancestor:
2347         node = node.parent
2348     return node
2349
2350
2351 def container_of(leaf: Leaf) -> LN:
2352     """Return `leaf` or one of its ancestors that is the topmost container of it.
2353
2354     By "container" we mean a node where `leaf` is the very first child.
2355     """
2356     same_prefix = leaf.prefix
2357     container: LN = leaf
2358     while container:
2359         parent = container.parent
2360         if parent is None:
2361             break
2362
2363         if parent.children[0].prefix != same_prefix:
2364             break
2365
2366         if parent.type == syms.file_input:
2367             break
2368
2369         if parent.prev_sibling is not None and parent.prev_sibling.type in BRACKETS:
2370             break
2371
2372         container = parent
2373     return container
2374
2375
2376 def is_split_after_delimiter(leaf: Leaf, previous: Optional[Leaf] = None) -> Priority:
2377     """Return the priority of the `leaf` delimiter, given a line break after it.
2378
2379     The delimiter priorities returned here are from those delimiters that would
2380     cause a line break after themselves.
2381
2382     Higher numbers are higher priority.
2383     """
2384     if leaf.type == token.COMMA:
2385         return COMMA_PRIORITY
2386
2387     return 0
2388
2389
2390 def is_split_before_delimiter(leaf: Leaf, previous: Optional[Leaf] = None) -> Priority:
2391     """Return the priority of the `leaf` delimiter, given a line break before it.
2392
2393     The delimiter priorities returned here are from those delimiters that would
2394     cause a line break before themselves.
2395
2396     Higher numbers are higher priority.
2397     """
2398     if is_vararg(leaf, within=VARARGS_PARENTS | UNPACKING_PARENTS):
2399         # * and ** might also be MATH_OPERATORS but in this case they are not.
2400         # Don't treat them as a delimiter.
2401         return 0
2402
2403     if (
2404         leaf.type == token.DOT
2405         and leaf.parent
2406         and leaf.parent.type not in {syms.import_from, syms.dotted_name}
2407         and (previous is None or previous.type in CLOSING_BRACKETS)
2408     ):
2409         return DOT_PRIORITY
2410
2411     if (
2412         leaf.type in MATH_OPERATORS
2413         and leaf.parent
2414         and leaf.parent.type not in {syms.factor, syms.star_expr}
2415     ):
2416         return MATH_PRIORITIES[leaf.type]
2417
2418     if leaf.type in COMPARATORS:
2419         return COMPARATOR_PRIORITY
2420
2421     if (
2422         leaf.type == token.STRING
2423         and previous is not None
2424         and previous.type == token.STRING
2425     ):
2426         return STRING_PRIORITY
2427
2428     if leaf.type not in {token.NAME, token.ASYNC}:
2429         return 0
2430
2431     if (
2432         leaf.value == "for"
2433         and leaf.parent
2434         and leaf.parent.type in {syms.comp_for, syms.old_comp_for}
2435         or leaf.type == token.ASYNC
2436     ):
2437         if (
2438             not isinstance(leaf.prev_sibling, Leaf)
2439             or leaf.prev_sibling.value != "async"
2440         ):
2441             return COMPREHENSION_PRIORITY
2442
2443     if (
2444         leaf.value == "if"
2445         and leaf.parent
2446         and leaf.parent.type in {syms.comp_if, syms.old_comp_if}
2447     ):
2448         return COMPREHENSION_PRIORITY
2449
2450     if leaf.value in {"if", "else"} and leaf.parent and leaf.parent.type == syms.test:
2451         return TERNARY_PRIORITY
2452
2453     if leaf.value == "is":
2454         return COMPARATOR_PRIORITY
2455
2456     if (
2457         leaf.value == "in"
2458         and leaf.parent
2459         and leaf.parent.type in {syms.comp_op, syms.comparison}
2460         and not (
2461             previous is not None
2462             and previous.type == token.NAME
2463             and previous.value == "not"
2464         )
2465     ):
2466         return COMPARATOR_PRIORITY
2467
2468     if (
2469         leaf.value == "not"
2470         and leaf.parent
2471         and leaf.parent.type == syms.comp_op
2472         and not (
2473             previous is not None
2474             and previous.type == token.NAME
2475             and previous.value == "is"
2476         )
2477     ):
2478         return COMPARATOR_PRIORITY
2479
2480     if leaf.value in LOGIC_OPERATORS and leaf.parent:
2481         return LOGIC_PRIORITY
2482
2483     return 0
2484
2485
2486 FMT_OFF = {"# fmt: off", "# fmt:off", "# yapf: disable"}
2487 FMT_ON = {"# fmt: on", "# fmt:on", "# yapf: enable"}
2488
2489
2490 def generate_comments(leaf: LN) -> Iterator[Leaf]:
2491     """Clean the prefix of the `leaf` and generate comments from it, if any.
2492
2493     Comments in lib2to3 are shoved into the whitespace prefix.  This happens
2494     in `pgen2/driver.py:Driver.parse_tokens()`.  This was a brilliant implementation
2495     move because it does away with modifying the grammar to include all the
2496     possible places in which comments can be placed.
2497
2498     The sad consequence for us though is that comments don't "belong" anywhere.
2499     This is why this function generates simple parentless Leaf objects for
2500     comments.  We simply don't know what the correct parent should be.
2501
2502     No matter though, we can live without this.  We really only need to
2503     differentiate between inline and standalone comments.  The latter don't
2504     share the line with any code.
2505
2506     Inline comments are emitted as regular token.COMMENT leaves.  Standalone
2507     are emitted with a fake STANDALONE_COMMENT token identifier.
2508     """
2509     for pc in list_comments(leaf.prefix, is_endmarker=leaf.type == token.ENDMARKER):
2510         yield Leaf(pc.type, pc.value, prefix="\n" * pc.newlines)
2511
2512
2513 @dataclass
2514 class ProtoComment:
2515     """Describes a piece of syntax that is a comment.
2516
2517     It's not a :class:`blib2to3.pytree.Leaf` so that:
2518
2519     * it can be cached (`Leaf` objects should not be reused more than once as
2520       they store their lineno, column, prefix, and parent information);
2521     * `newlines` and `consumed` fields are kept separate from the `value`. This
2522       simplifies handling of special marker comments like ``# fmt: off/on``.
2523     """
2524
2525     type: int  # token.COMMENT or STANDALONE_COMMENT
2526     value: str  # content of the comment
2527     newlines: int  # how many newlines before the comment
2528     consumed: int  # how many characters of the original leaf's prefix did we consume
2529
2530
2531 @lru_cache(maxsize=4096)
2532 def list_comments(prefix: str, *, is_endmarker: bool) -> List[ProtoComment]:
2533     """Return a list of :class:`ProtoComment` objects parsed from the given `prefix`."""
2534     result: List[ProtoComment] = []
2535     if not prefix or "#" not in prefix:
2536         return result
2537
2538     consumed = 0
2539     nlines = 0
2540     ignored_lines = 0
2541     for index, line in enumerate(prefix.split("\n")):
2542         consumed += len(line) + 1  # adding the length of the split '\n'
2543         line = line.lstrip()
2544         if not line:
2545             nlines += 1
2546         if not line.startswith("#"):
2547             # Escaped newlines outside of a comment are not really newlines at
2548             # all. We treat a single-line comment following an escaped newline
2549             # as a simple trailing comment.
2550             if line.endswith("\\"):
2551                 ignored_lines += 1
2552             continue
2553
2554         if index == ignored_lines and not is_endmarker:
2555             comment_type = token.COMMENT  # simple trailing comment
2556         else:
2557             comment_type = STANDALONE_COMMENT
2558         comment = make_comment(line)
2559         result.append(
2560             ProtoComment(
2561                 type=comment_type, value=comment, newlines=nlines, consumed=consumed
2562             )
2563         )
2564         nlines = 0
2565     return result
2566
2567
2568 def make_comment(content: str) -> str:
2569     """Return a consistently formatted comment from the given `content` string.
2570
2571     All comments (except for "##", "#!", "#:", '#'", "#%%") should have a single
2572     space between the hash sign and the content.
2573
2574     If `content` didn't start with a hash sign, one is provided.
2575     """
2576     content = content.rstrip()
2577     if not content:
2578         return "#"
2579
2580     if content[0] == "#":
2581         content = content[1:]
2582     if content and content[0] not in " !:#'%":
2583         content = " " + content
2584     return "#" + content
2585
2586
2587 def transform_line(
2588     line: Line,
2589     line_length: int,
2590     normalize_strings: bool,
2591     features: Collection[Feature] = (),
2592 ) -> Iterator[Line]:
2593     """Transform a `line`, potentially splitting it into many lines.
2594
2595     They should fit in the allotted `line_length` but might not be able to.
2596
2597     `features` are syntactical features that may be used in the output.
2598     """
2599     if line.is_comment:
2600         yield line
2601         return
2602
2603     line_str = line_to_string(line)
2604
2605     def init_st(ST: Type[StringTransformer]) -> StringTransformer:
2606         """Initialize StringTransformer"""
2607         return ST(line_length, normalize_strings)
2608
2609     string_merge = init_st(StringMerger)
2610     string_paren_strip = init_st(StringParenStripper)
2611     string_split = init_st(StringSplitter)
2612     string_paren_wrap = init_st(StringParenWrapper)
2613
2614     transformers: List[Transformer]
2615     if (
2616         not line.contains_uncollapsable_type_comments()
2617         and not line.should_explode
2618         and not line.is_collection_with_optional_trailing_comma
2619         and (
2620             is_line_short_enough(line, line_length=line_length, line_str=line_str)
2621             or line.contains_unsplittable_type_ignore()
2622         )
2623         and not (line.contains_standalone_comments() and line.inside_brackets)
2624     ):
2625         # Only apply basic string preprocessing, since lines shouldn't be split here.
2626         transformers = [string_merge, string_paren_strip]
2627     elif line.is_def:
2628         transformers = [left_hand_split]
2629     else:
2630
2631         def rhs(line: Line, features: Collection[Feature]) -> Iterator[Line]:
2632             for omit in generate_trailers_to_omit(line, line_length):
2633                 lines = list(right_hand_split(line, line_length, features, omit=omit))
2634                 if is_line_short_enough(lines[0], line_length=line_length):
2635                     yield from lines
2636                     return
2637
2638             # All splits failed, best effort split with no omits.
2639             # This mostly happens to multiline strings that are by definition
2640             # reported as not fitting a single line.
2641             # line_length=1 here was historically a bug that somehow became a feature.
2642             # See #762 and #781 for the full story.
2643             yield from right_hand_split(line, line_length=1, features=features)
2644
2645         if line.inside_brackets:
2646             transformers = [
2647                 string_merge,
2648                 string_paren_strip,
2649                 delimiter_split,
2650                 standalone_comment_split,
2651                 string_split,
2652                 string_paren_wrap,
2653                 rhs,
2654             ]
2655         else:
2656             transformers = [
2657                 string_merge,
2658                 string_paren_strip,
2659                 string_split,
2660                 string_paren_wrap,
2661                 rhs,
2662             ]
2663
2664     for transform in transformers:
2665         # We are accumulating lines in `result` because we might want to abort
2666         # mission and return the original line in the end, or attempt a different
2667         # split altogether.
2668         result: List[Line] = []
2669         try:
2670             for l in transform(line, features):
2671                 if str(l).strip("\n") == line_str:
2672                     raise CannotTransform(
2673                         "Line transformer returned an unchanged result"
2674                     )
2675
2676                 result.extend(
2677                     transform_line(
2678                         l,
2679                         line_length=line_length,
2680                         normalize_strings=normalize_strings,
2681                         features=features,
2682                     )
2683                 )
2684         except CannotTransform:
2685             continue
2686         else:
2687             yield from result
2688             break
2689
2690     else:
2691         yield line
2692
2693
2694 @dataclass  # type: ignore
2695 class StringTransformer(ABC):
2696     """
2697     An implementation of the Transformer protocol that relies on its
2698     subclasses overriding the template methods `do_match(...)` and
2699     `do_transform(...)`.
2700
2701     This Transformer works exclusively on strings (for example, by merging
2702     or splitting them).
2703
2704     The following sections can be found among the docstrings of each concrete
2705     StringTransformer subclass.
2706
2707     Requirements:
2708         Which requirements must be met of the given Line for this
2709         StringTransformer to be applied?
2710
2711     Transformations:
2712         If the given Line meets all of the above requirments, which string
2713         transformations can you expect to be applied to it by this
2714         StringTransformer?
2715
2716     Collaborations:
2717         What contractual agreements does this StringTransformer have with other
2718         StringTransfomers? Such collaborations should be eliminated/minimized
2719         as much as possible.
2720     """
2721
2722     line_length: int
2723     normalize_strings: bool
2724
2725     @abstractmethod
2726     def do_match(self, line: Line) -> TMatchResult:
2727         """
2728         Returns:
2729             * Ok(string_idx) such that `line.leaves[string_idx]` is our target
2730             string, if a match was able to be made.
2731                 OR
2732             * Err(CannotTransform), if a match was not able to be made.
2733         """
2734
2735     @abstractmethod
2736     def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]:
2737         """
2738         Yields:
2739             * Ok(new_line) where new_line is the new transformed line.
2740                 OR
2741             * Err(CannotTransform) if the transformation failed for some reason. The
2742             `do_match(...)` template method should usually be used to reject
2743             the form of the given Line, but in some cases it is difficult to
2744             know whether or not a Line meets the StringTransformer's
2745             requirements until the transformation is already midway.
2746
2747         Side Effects:
2748             This method should NOT mutate @line directly, but it MAY mutate the
2749             Line's underlying Node structure. (WARNING: If the underlying Node
2750             structure IS altered, then this method should NOT be allowed to
2751             yield an CannotTransform after that point.)
2752         """
2753
2754     def __call__(self, line: Line, _features: Collection[Feature]) -> Iterator[Line]:
2755         """
2756         StringTransformer instances have a call signature that mirrors that of
2757         the Transformer type.
2758
2759         Raises:
2760             CannotTransform(...) if the concrete StringTransformer class is unable
2761             to transform @line.
2762         """
2763         # Optimization to avoid calling `self.do_match(...)` when the line does
2764         # not contain any string.
2765         if not any(leaf.type == token.STRING for leaf in line.leaves):
2766             raise CannotTransform("There are no strings in this line.")
2767
2768         match_result = self.do_match(line)
2769
2770         if isinstance(match_result, Err):
2771             cant_transform = match_result.err()
2772             raise CannotTransform(
2773                 f"The string transformer {self.__class__.__name__} does not recognize"
2774                 " this line as one that it can transform."
2775             ) from cant_transform
2776
2777         string_idx = match_result.ok()
2778
2779         for line_result in self.do_transform(line, string_idx):
2780             if isinstance(line_result, Err):
2781                 cant_transform = line_result.err()
2782                 raise CannotTransform(
2783                     "StringTransformer failed while attempting to transform string."
2784                 ) from cant_transform
2785             line = line_result.ok()
2786             yield line
2787
2788
2789 @dataclass
2790 class CustomSplit:
2791     """A custom (i.e. manual) string split.
2792
2793     A single CustomSplit instance represents a single substring.
2794
2795     Examples:
2796         Consider the following string:
2797         ```
2798         "Hi there friend."
2799         " This is a custom"
2800         f" string {split}."
2801         ```
2802
2803         This string will correspond to the following three CustomSplit instances:
2804         ```
2805         CustomSplit(False, 16)
2806         CustomSplit(False, 17)
2807         CustomSplit(True, 16)
2808         ```
2809     """
2810
2811     has_prefix: bool
2812     break_idx: int
2813
2814
2815 class CustomSplitMapMixin:
2816     """
2817     This mixin class is used to map merged strings to a sequence of
2818     CustomSplits, which will then be used to re-split the strings iff none of
2819     the resultant substrings go over the configured max line length.
2820     """
2821
2822     _Key = Tuple[StringID, str]
2823     _CUSTOM_SPLIT_MAP: Dict[_Key, Tuple[CustomSplit, ...]] = defaultdict(tuple)
2824
2825     @staticmethod
2826     def _get_key(string: str) -> "CustomSplitMapMixin._Key":
2827         """
2828         Returns:
2829             A unique identifier that is used internally to map @string to a
2830             group of custom splits.
2831         """
2832         return (id(string), string)
2833
2834     def add_custom_splits(
2835         self, string: str, custom_splits: Iterable[CustomSplit]
2836     ) -> None:
2837         """Custom Split Map Setter Method
2838
2839         Side Effects:
2840             Adds a mapping from @string to the custom splits @custom_splits.
2841         """
2842         key = self._get_key(string)
2843         self._CUSTOM_SPLIT_MAP[key] = tuple(custom_splits)
2844
2845     def pop_custom_splits(self, string: str) -> List[CustomSplit]:
2846         """Custom Split Map Getter Method
2847
2848         Returns:
2849             * A list of the custom splits that are mapped to @string, if any
2850             exist.
2851                 OR
2852             * [], otherwise.
2853
2854         Side Effects:
2855             Deletes the mapping between @string and its associated custom
2856             splits (which are returned to the caller).
2857         """
2858         key = self._get_key(string)
2859
2860         custom_splits = self._CUSTOM_SPLIT_MAP[key]
2861         del self._CUSTOM_SPLIT_MAP[key]
2862
2863         return list(custom_splits)
2864
2865     def has_custom_splits(self, string: str) -> bool:
2866         """
2867         Returns:
2868             True iff @string is associated with a set of custom splits.
2869         """
2870         key = self._get_key(string)
2871         return key in self._CUSTOM_SPLIT_MAP
2872
2873
2874 class StringMerger(CustomSplitMapMixin, StringTransformer):
2875     """StringTransformer that merges strings together.
2876
2877     Requirements:
2878         (A) The line contains adjacent strings such that at most one substring
2879         has inline comments AND none of those inline comments are pragmas AND
2880         the set of all substring prefixes is either of length 1 or equal to
2881         {"", "f"} AND none of the substrings are raw strings (i.e. are prefixed
2882         with 'r').
2883             OR
2884         (B) The line contains a string which uses line continuation backslashes.
2885
2886     Transformations:
2887         Depending on which of the two requirements above where met, either:
2888
2889         (A) The string group associated with the target string is merged.
2890             OR
2891         (B) All line-continuation backslashes are removed from the target string.
2892
2893     Collaborations:
2894         StringMerger provides custom split information to StringSplitter.
2895     """
2896
2897     def do_match(self, line: Line) -> TMatchResult:
2898         LL = line.leaves
2899
2900         is_valid_index = is_valid_index_factory(LL)
2901
2902         for (i, leaf) in enumerate(LL):
2903             if (
2904                 leaf.type == token.STRING
2905                 and is_valid_index(i + 1)
2906                 and LL[i + 1].type == token.STRING
2907             ):
2908                 return Ok(i)
2909
2910             if leaf.type == token.STRING and "\\\n" in leaf.value:
2911                 return Ok(i)
2912
2913         return TErr("This line has no strings that need merging.")
2914
2915     def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]:
2916         new_line = line
2917         rblc_result = self.__remove_backslash_line_continuation_chars(
2918             new_line, string_idx
2919         )
2920         if isinstance(rblc_result, Ok):
2921             new_line = rblc_result.ok()
2922
2923         msg_result = self.__merge_string_group(new_line, string_idx)
2924         if isinstance(msg_result, Ok):
2925             new_line = msg_result.ok()
2926
2927         if isinstance(rblc_result, Err) and isinstance(msg_result, Err):
2928             msg_cant_transform = msg_result.err()
2929             rblc_cant_transform = rblc_result.err()
2930             cant_transform = CannotTransform(
2931                 "StringMerger failed to merge any strings in this line."
2932             )
2933
2934             # Chain the errors together using `__cause__`.
2935             msg_cant_transform.__cause__ = rblc_cant_transform
2936             cant_transform.__cause__ = msg_cant_transform
2937
2938             yield Err(cant_transform)
2939         else:
2940             yield Ok(new_line)
2941
2942     @staticmethod
2943     def __remove_backslash_line_continuation_chars(
2944         line: Line, string_idx: int
2945     ) -> TResult[Line]:
2946         """
2947         Merge strings that were split across multiple lines using
2948         line-continuation backslashes.
2949
2950         Returns:
2951             Ok(new_line), if @line contains backslash line-continuation
2952             characters.
2953                 OR
2954             Err(CannotTransform), otherwise.
2955         """
2956         LL = line.leaves
2957
2958         string_leaf = LL[string_idx]
2959         if not (
2960             string_leaf.type == token.STRING
2961             and "\\\n" in string_leaf.value
2962             and not has_triple_quotes(string_leaf.value)
2963         ):
2964             return TErr(
2965                 f"String leaf {string_leaf} does not contain any backslash line"
2966                 " continuation characters."
2967             )
2968
2969         new_line = line.clone()
2970         new_line.comments = line.comments
2971         append_leaves(new_line, line, LL)
2972
2973         new_string_leaf = new_line.leaves[string_idx]
2974         new_string_leaf.value = new_string_leaf.value.replace("\\\n", "")
2975
2976         return Ok(new_line)
2977
2978     def __merge_string_group(self, line: Line, string_idx: int) -> TResult[Line]:
2979         """
2980         Merges string group (i.e. set of adjacent strings) where the first
2981         string in the group is `line.leaves[string_idx]`.
2982
2983         Returns:
2984             Ok(new_line), if ALL of the validation checks found in
2985             __validate_msg(...) pass.
2986                 OR
2987             Err(CannotTransform), otherwise.
2988         """
2989         LL = line.leaves
2990
2991         is_valid_index = is_valid_index_factory(LL)
2992
2993         vresult = self.__validate_msg(line, string_idx)
2994         if isinstance(vresult, Err):
2995             return vresult
2996
2997         # If the string group is wrapped inside an Atom node, we must make sure
2998         # to later replace that Atom with our new (merged) string leaf.
2999         atom_node = LL[string_idx].parent
3000
3001         # We will place BREAK_MARK in between every two substrings that we
3002         # merge. We will then later go through our final result and use the
3003         # various instances of BREAK_MARK we find to add the right values to
3004         # the custom split map.
3005         BREAK_MARK = "@@@@@ BLACK BREAKPOINT MARKER @@@@@"
3006
3007         QUOTE = LL[string_idx].value[-1]
3008
3009         def make_naked(string: str, string_prefix: str) -> str:
3010             """Strip @string (i.e. make it a "naked" string)
3011
3012             Pre-conditions:
3013                 * assert_is_leaf_string(@string)
3014
3015             Returns:
3016                 A string that is identical to @string except that
3017                 @string_prefix has been stripped, the surrounding QUOTE
3018                 characters have been removed, and any remaining QUOTE
3019                 characters have been escaped.
3020             """
3021             assert_is_leaf_string(string)
3022
3023             RE_EVEN_BACKSLASHES = r"(?:(?<!\\)(?:\\\\)*)"
3024             naked_string = string[len(string_prefix) + 1 : -1]
3025             naked_string = re.sub(
3026                 "(" + RE_EVEN_BACKSLASHES + ")" + QUOTE, r"\1\\" + QUOTE, naked_string
3027             )
3028             return naked_string
3029
3030         # Holds the CustomSplit objects that will later be added to the custom
3031         # split map.
3032         custom_splits = []
3033
3034         # Temporary storage for the 'has_prefix' part of the CustomSplit objects.
3035         prefix_tracker = []
3036
3037         # Sets the 'prefix' variable. This is the prefix that the final merged
3038         # string will have.
3039         next_str_idx = string_idx
3040         prefix = ""
3041         while (
3042             not prefix
3043             and is_valid_index(next_str_idx)
3044             and LL[next_str_idx].type == token.STRING
3045         ):
3046             prefix = get_string_prefix(LL[next_str_idx].value)
3047             next_str_idx += 1
3048
3049         # The next loop merges the string group. The final string will be
3050         # contained in 'S'.
3051         #
3052         # The following convenience variables are used:
3053         #
3054         #   S: string
3055         #   NS: naked string
3056         #   SS: next string
3057         #   NSS: naked next string
3058         S = ""
3059         NS = ""
3060         num_of_strings = 0
3061         next_str_idx = string_idx
3062         while is_valid_index(next_str_idx) and LL[next_str_idx].type == token.STRING:
3063             num_of_strings += 1
3064
3065             SS = LL[next_str_idx].value
3066             next_prefix = get_string_prefix(SS)
3067
3068             # If this is an f-string group but this substring is not prefixed
3069             # with 'f'...
3070             if "f" in prefix and "f" not in next_prefix:
3071                 # Then we must escape any braces contained in this substring.
3072                 SS = re.subf(r"(\{|\})", "{1}{1}", SS)
3073
3074             NSS = make_naked(SS, next_prefix)
3075
3076             has_prefix = bool(next_prefix)
3077             prefix_tracker.append(has_prefix)
3078
3079             S = prefix + QUOTE + NS + NSS + BREAK_MARK + QUOTE
3080             NS = make_naked(S, prefix)
3081
3082             next_str_idx += 1
3083
3084         S_leaf = Leaf(token.STRING, S)
3085         if self.normalize_strings:
3086             normalize_string_quotes(S_leaf)
3087
3088         # Fill the 'custom_splits' list with the appropriate CustomSplit objects.
3089         temp_string = S_leaf.value[len(prefix) + 1 : -1]
3090         for has_prefix in prefix_tracker:
3091             mark_idx = temp_string.find(BREAK_MARK)
3092             assert (
3093                 mark_idx >= 0
3094             ), "Logic error while filling the custom string breakpoint cache."
3095
3096             temp_string = temp_string[mark_idx + len(BREAK_MARK) :]
3097             breakpoint_idx = mark_idx + (len(prefix) if has_prefix else 0) + 1
3098             custom_splits.append(CustomSplit(has_prefix, breakpoint_idx))
3099
3100         string_leaf = Leaf(token.STRING, S_leaf.value.replace(BREAK_MARK, ""))
3101
3102         if atom_node is not None:
3103             replace_child(atom_node, string_leaf)
3104
3105         # Build the final line ('new_line') that this method will later return.
3106         new_line = line.clone()
3107         for (i, leaf) in enumerate(LL):
3108             if i == string_idx:
3109                 new_line.append(string_leaf)
3110
3111             if string_idx <= i < string_idx + num_of_strings:
3112                 for comment_leaf in line.comments_after(LL[i]):
3113                     new_line.append(comment_leaf, preformatted=True)
3114                 continue
3115
3116             append_leaves(new_line, line, [leaf])
3117
3118         self.add_custom_splits(string_leaf.value, custom_splits)
3119         return Ok(new_line)
3120
3121     @staticmethod
3122     def __validate_msg(line: Line, string_idx: int) -> TResult[None]:
3123         """Validate (M)erge (S)tring (G)roup
3124
3125         Transform-time string validation logic for __merge_string_group(...).
3126
3127         Returns:
3128             * Ok(None), if ALL validation checks (listed below) pass.
3129                 OR
3130             * Err(CannotTransform), if any of the following are true:
3131                 - The target string is not in a string group (i.e. it has no
3132                   adjacent strings).
3133                 - The string group has more than one inline comment.
3134                 - The string group has an inline comment that appears to be a pragma.
3135                 - The set of all string prefixes in the string group is of
3136                   length greater than one and is not equal to {"", "f"}.
3137                 - The string group consists of raw strings.
3138         """
3139         num_of_inline_string_comments = 0
3140         set_of_prefixes = set()
3141         num_of_strings = 0
3142         for leaf in line.leaves[string_idx:]:
3143             if leaf.type != token.STRING:
3144                 # If the string group is trailed by a comma, we count the
3145                 # comments trailing the comma to be one of the string group's
3146                 # comments.
3147                 if leaf.type == token.COMMA and id(leaf) in line.comments:
3148                     num_of_inline_string_comments += 1
3149                 break
3150
3151             if has_triple_quotes(leaf.value):
3152                 return TErr("StringMerger does NOT merge multiline strings.")
3153
3154             num_of_strings += 1
3155             prefix = get_string_prefix(leaf.value)
3156             if "r" in prefix:
3157                 return TErr("StringMerger does NOT merge raw strings.")
3158
3159             set_of_prefixes.add(prefix)
3160
3161             if id(leaf) in line.comments:
3162                 num_of_inline_string_comments += 1
3163                 if contains_pragma_comment(line.comments[id(leaf)]):
3164                     return TErr("Cannot merge strings which have pragma comments.")
3165
3166         if num_of_strings < 2:
3167             return TErr(
3168                 f"Not enough strings to merge (num_of_strings={num_of_strings})."
3169             )
3170
3171         if num_of_inline_string_comments > 1:
3172             return TErr(
3173                 f"Too many inline string comments ({num_of_inline_string_comments})."
3174             )
3175
3176         if len(set_of_prefixes) > 1 and set_of_prefixes != {"", "f"}:
3177             return TErr(f"Too many different prefixes ({set_of_prefixes}).")
3178
3179         return Ok(None)
3180
3181
3182 class StringParenStripper(StringTransformer):
3183     """StringTransformer that strips surrounding parentheses from strings.
3184
3185     Requirements:
3186         The line contains a string which is surrounded by parentheses and:
3187             - The target string is NOT the only argument to a function call).
3188             - The RPAR is NOT followed by an attribute access (i.e. a dot).
3189
3190     Transformations:
3191         The parentheses mentioned in the 'Requirements' section are stripped.
3192
3193     Collaborations:
3194         StringParenStripper has its own inherent usefulness, but it is also
3195         relied on to clean up the parentheses created by StringParenWrapper (in
3196         the event that they are no longer needed).
3197     """
3198
3199     def do_match(self, line: Line) -> TMatchResult:
3200         LL = line.leaves
3201
3202         is_valid_index = is_valid_index_factory(LL)
3203
3204         for (idx, leaf) in enumerate(LL):
3205             # Should be a string...
3206             if leaf.type != token.STRING:
3207                 continue
3208
3209             # Should be preceded by a non-empty LPAR...
3210             if (
3211                 not is_valid_index(idx - 1)
3212                 or LL[idx - 1].type != token.LPAR
3213                 or is_empty_lpar(LL[idx - 1])
3214             ):
3215                 continue
3216
3217             # That LPAR should NOT be preceded by a function name or a closing
3218             # bracket (which could be a function which returns a function or a
3219             # list/dictionary that contains a function)...
3220             if is_valid_index(idx - 2) and (
3221                 LL[idx - 2].type == token.NAME or LL[idx - 2].type in CLOSING_BRACKETS
3222             ):
3223                 continue
3224
3225             string_idx = idx
3226
3227             # Skip the string trailer, if one exists.
3228             string_parser = StringParser()
3229             next_idx = string_parser.parse(LL, string_idx)
3230
3231             # Should be followed by a non-empty RPAR...
3232             if (
3233                 is_valid_index(next_idx)
3234                 and LL[next_idx].type == token.RPAR
3235                 and not is_empty_rpar(LL[next_idx])
3236             ):
3237                 # That RPAR should NOT be followed by a '.' symbol.
3238                 if is_valid_index(next_idx + 1) and LL[next_idx + 1].type == token.DOT:
3239                     continue
3240
3241                 return Ok(string_idx)
3242
3243         return TErr("This line has no strings wrapped in parens.")
3244
3245     def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]:
3246         LL = line.leaves
3247
3248         string_parser = StringParser()
3249         rpar_idx = string_parser.parse(LL, string_idx)
3250
3251         for leaf in (LL[string_idx - 1], LL[rpar_idx]):
3252             if line.comments_after(leaf):
3253                 yield TErr(
3254                     "Will not strip parentheses which have comments attached to them."
3255                 )
3256
3257         new_line = line.clone()
3258         new_line.comments = line.comments.copy()
3259
3260         append_leaves(new_line, line, LL[: string_idx - 1])
3261
3262         string_leaf = Leaf(token.STRING, LL[string_idx].value)
3263         LL[string_idx - 1].remove()
3264         replace_child(LL[string_idx], string_leaf)
3265         new_line.append(string_leaf)
3266
3267         append_leaves(
3268             new_line, line, LL[string_idx + 1 : rpar_idx] + LL[rpar_idx + 1 :],
3269         )
3270
3271         LL[rpar_idx].remove()
3272
3273         yield Ok(new_line)
3274
3275
3276 class BaseStringSplitter(StringTransformer):
3277     """
3278     Abstract class for StringTransformers which transform a Line's strings by splitting
3279     them or placing them on their own lines where necessary to avoid going over
3280     the configured line length.
3281
3282     Requirements:
3283         * The target string value is responsible for the line going over the
3284         line length limit. It follows that after all of black's other line
3285         split methods have been exhausted, this line (or one of the resulting
3286         lines after all line splits are performed) would still be over the
3287         line_length limit unless we split this string.
3288             AND
3289         * The target string is NOT a "pointless" string (i.e. a string that has
3290         no parent or siblings).
3291             AND
3292         * The target string is not followed by an inline comment that appears
3293         to be a pragma.
3294             AND
3295         * The target string is not a multiline (i.e. triple-quote) string.
3296     """
3297
3298     @abstractmethod
3299     def do_splitter_match(self, line: Line) -> TMatchResult:
3300         """
3301         BaseStringSplitter asks its clients to override this method instead of
3302         `StringTransformer.do_match(...)`.
3303
3304         Follows the same protocol as `StringTransformer.do_match(...)`.
3305
3306         Refer to `help(StringTransformer.do_match)` for more information.
3307         """
3308
3309     def do_match(self, line: Line) -> TMatchResult:
3310         match_result = self.do_splitter_match(line)
3311         if isinstance(match_result, Err):
3312             return match_result
3313
3314         string_idx = match_result.ok()
3315         vresult = self.__validate(line, string_idx)
3316         if isinstance(vresult, Err):
3317             return vresult
3318
3319         return match_result
3320
3321     def __validate(self, line: Line, string_idx: int) -> TResult[None]:
3322         """
3323         Checks that @line meets all of the requirements listed in this classes'
3324         docstring. Refer to `help(BaseStringSplitter)` for a detailed
3325         description of those requirements.
3326
3327         Returns:
3328             * Ok(None), if ALL of the requirements are met.
3329                 OR
3330             * Err(CannotTransform), if ANY of the requirements are NOT met.
3331         """
3332         LL = line.leaves
3333
3334         string_leaf = LL[string_idx]
3335
3336         max_string_length = self.__get_max_string_length(line, string_idx)
3337         if len(string_leaf.value) <= max_string_length:
3338             return TErr(
3339                 "The string itself is not what is causing this line to be too long."
3340             )
3341
3342         if not string_leaf.parent or [L.type for L in string_leaf.parent.children] == [
3343             token.STRING,
3344             token.NEWLINE,
3345         ]:
3346             return TErr(
3347                 f"This string ({string_leaf.value}) appears to be pointless (i.e. has"
3348                 " no parent)."
3349             )
3350
3351         if id(line.leaves[string_idx]) in line.comments and contains_pragma_comment(
3352             line.comments[id(line.leaves[string_idx])]
3353         ):
3354             return TErr(
3355                 "Line appears to end with an inline pragma comment. Splitting the line"
3356                 " could modify the pragma's behavior."
3357             )
3358
3359         if has_triple_quotes(string_leaf.value):
3360             return TErr("We cannot split multiline strings.")
3361
3362         return Ok(None)
3363
3364     def __get_max_string_length(self, line: Line, string_idx: int) -> int:
3365         """
3366         Calculates the max string length used when attempting to determine
3367         whether or not the target string is responsible for causing the line to
3368         go over the line length limit.
3369
3370         WARNING: This method is tightly coupled to both StringSplitter and
3371         (especially) StringParenWrapper. There is probably a better way to
3372         accomplish what is being done here.
3373
3374         Returns:
3375             max_string_length: such that `line.leaves[string_idx].value >
3376             max_string_length` implies that the target string IS responsible
3377             for causing this line to exceed the line length limit.
3378         """
3379         LL = line.leaves
3380
3381         is_valid_index = is_valid_index_factory(LL)
3382
3383         # We use the shorthand "WMA4" in comments to abbreviate "We must
3384         # account for". When giving examples, we use STRING to mean some/any
3385         # valid string.
3386         #
3387         # Finally, we use the following convenience variables:
3388         #
3389         #   P:  The leaf that is before the target string leaf.
3390         #   N:  The leaf that is after the target string leaf.
3391         #   NN: The leaf that is after N.
3392
3393         # WMA4 the whitespace at the beginning of the line.
3394         offset = line.depth * 4
3395
3396         if is_valid_index(string_idx - 1):
3397             p_idx = string_idx - 1
3398             if (
3399                 LL[string_idx - 1].type == token.LPAR
3400                 and LL[string_idx - 1].value == ""
3401                 and string_idx >= 2
3402             ):
3403                 # If the previous leaf is an empty LPAR placeholder, we should skip it.
3404                 p_idx -= 1
3405
3406             P = LL[p_idx]
3407             if P.type == token.PLUS:
3408                 # WMA4 a space and a '+' character (e.g. `+ STRING`).
3409                 offset += 2
3410
3411             if P.type == token.COMMA:
3412                 # WMA4 a space, a comma, and a closing bracket [e.g. `), STRING`].
3413                 offset += 3
3414
3415             if P.type in [token.COLON, token.EQUAL, token.NAME]:
3416                 # This conditional branch is meant to handle dictionary keys,
3417                 # variable assignments, 'return STRING' statement lines, and
3418                 # 'else STRING' ternary expression lines.
3419
3420                 # WMA4 a single space.
3421                 offset += 1
3422
3423                 # WMA4 the lengths of any leaves that came before that space.
3424                 for leaf in LL[: p_idx + 1]:
3425                     offset += len(str(leaf))
3426
3427         if is_valid_index(string_idx + 1):
3428             N = LL[string_idx + 1]
3429             if N.type == token.RPAR and N.value == "" and len(LL) > string_idx + 2:
3430                 # If the next leaf is an empty RPAR placeholder, we should skip it.
3431                 N = LL[string_idx + 2]
3432
3433             if N.type == token.COMMA:
3434                 # WMA4 a single comma at the end of the string (e.g `STRING,`).
3435                 offset += 1
3436
3437             if is_valid_index(string_idx + 2):
3438                 NN = LL[string_idx + 2]
3439
3440                 if N.type == token.DOT and NN.type == token.NAME:
3441                     # This conditional branch is meant to handle method calls invoked
3442                     # off of a string literal up to and including the LPAR character.
3443
3444                     # WMA4 the '.' character.
3445                     offset += 1
3446
3447                     if (
3448                         is_valid_index(string_idx + 3)
3449                         and LL[string_idx + 3].type == token.LPAR
3450                     ):
3451                         # WMA4 the left parenthesis character.
3452                         offset += 1
3453
3454                     # WMA4 the length of the method's name.
3455                     offset += len(NN.value)
3456
3457         has_comments = False
3458         for comment_leaf in line.comments_after(LL[string_idx]):
3459             if not has_comments:
3460                 has_comments = True
3461                 # WMA4 two spaces before the '#' character.
3462                 offset += 2
3463
3464             # WMA4 the length of the inline comment.
3465             offset += len(comment_leaf.value)
3466
3467         max_string_length = self.line_length - offset
3468         return max_string_length
3469
3470
3471 class StringSplitter(CustomSplitMapMixin, BaseStringSplitter):
3472     """
3473     StringTransformer that splits "atom" strings (i.e. strings which exist on
3474     lines by themselves).
3475
3476     Requirements:
3477         * The line consists ONLY of a single string (with the exception of a
3478         '+' symbol which MAY exist at the start of the line), MAYBE a string
3479         trailer, and MAYBE a trailing comma.
3480             AND
3481         * All of the requirements listed in BaseStringSplitter's docstring.
3482
3483     Transformations:
3484         The string mentioned in the 'Requirements' section is split into as
3485         many substrings as necessary to adhere to the configured line length.
3486
3487         In the final set of substrings, no substring should be smaller than
3488         MIN_SUBSTR_SIZE characters.
3489
3490         The string will ONLY be split on spaces (i.e. each new substring should
3491         start with a space).
3492
3493         If the string is an f-string, it will NOT be split in the middle of an
3494         f-expression (e.g. in f"FooBar: {foo() if x else bar()}", {foo() if x
3495         else bar()} is an f-expression).
3496
3497         If the string that is being split has an associated set of custom split
3498         records and those custom splits will NOT result in any line going over
3499         the configured line length, those custom splits are used. Otherwise the
3500         string is split as late as possible (from left-to-right) while still
3501         adhering to the transformation rules listed above.
3502
3503     Collaborations:
3504         StringSplitter relies on StringMerger to construct the appropriate
3505         CustomSplit objects and add them to the custom split map.
3506     """
3507
3508     MIN_SUBSTR_SIZE = 6
3509     # Matches an "f-expression" (e.g. {var}) that might be found in an f-string.
3510     RE_FEXPR = r"""
3511     (?<!\{)\{
3512         (?:
3513             [^\{\}]
3514             | \{\{
3515             | \}\}
3516         )+?
3517     (?<!\})(?:\}\})*\}(?!\})
3518     """
3519
3520     def do_splitter_match(self, line: Line) -> TMatchResult:
3521         LL = line.leaves
3522
3523         is_valid_index = is_valid_index_factory(LL)
3524
3525         idx = 0
3526
3527         # The first leaf MAY be a '+' symbol...
3528         if is_valid_index(idx) and LL[idx].type == token.PLUS:
3529             idx += 1
3530
3531         # The next/first leaf MAY be an empty LPAR...
3532         if is_valid_index(idx) and is_empty_lpar(LL[idx]):
3533             idx += 1
3534
3535         # The next/first leaf MUST be a string...
3536         if not is_valid_index(idx) or LL[idx].type != token.STRING:
3537             return TErr("Line does not start with a string.")
3538
3539         string_idx = idx
3540
3541         # Skip the string trailer, if one exists.
3542         string_parser = StringParser()
3543         idx = string_parser.parse(LL, string_idx)
3544
3545         # That string MAY be followed by an empty RPAR...
3546         if is_valid_index(idx) and is_empty_rpar(LL[idx]):
3547             idx += 1
3548
3549         # That string / empty RPAR leaf MAY be followed by a comma...
3550         if is_valid_index(idx) and LL[idx].type == token.COMMA:
3551             idx += 1
3552
3553         # But no more leaves are allowed...
3554         if is_valid_index(idx):
3555             return TErr("This line does not end with a string.")
3556
3557         return Ok(string_idx)
3558
3559     def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]:
3560         LL = line.leaves
3561
3562         QUOTE = LL[string_idx].value[-1]
3563
3564         is_valid_index = is_valid_index_factory(LL)
3565         insert_str_child = insert_str_child_factory(LL[string_idx])
3566
3567         prefix = get_string_prefix(LL[string_idx].value)
3568
3569         # We MAY choose to drop the 'f' prefix from substrings that don't
3570         # contain any f-expressions, but ONLY if the original f-string
3571         # containes at least one f-expression. Otherwise, we will alter the AST
3572         # of the program.
3573         drop_pointless_f_prefix = ("f" in prefix) and re.search(
3574             self.RE_FEXPR, LL[string_idx].value, re.VERBOSE
3575         )
3576
3577         first_string_line = True
3578         starts_with_plus = LL[0].type == token.PLUS
3579
3580         def line_needs_plus() -> bool:
3581             return first_string_line and starts_with_plus
3582
3583         def maybe_append_plus(new_line: Line) -> None:
3584             """
3585             Side Effects:
3586                 If @line starts with a plus and this is the first line we are
3587                 constructing, this function appends a PLUS leaf to @new_line
3588                 and replaces the old PLUS leaf in the node structure. Otherwise
3589                 this function does nothing.
3590             """
3591             if line_needs_plus():
3592                 plus_leaf = Leaf(token.PLUS, "+")
3593                 replace_child(LL[0], plus_leaf)
3594                 new_line.append(plus_leaf)
3595
3596         ends_with_comma = (
3597             is_valid_index(string_idx + 1) and LL[string_idx + 1].type == token.COMMA
3598         )
3599
3600         def max_last_string() -> int:
3601             """
3602             Returns:
3603                 The max allowed length of the string value used for the last
3604                 line we will construct.
3605             """
3606             result = self.line_length
3607             result -= line.depth * 4
3608             result -= 1 if ends_with_comma else 0
3609             result -= 2 if line_needs_plus() else 0
3610             return result
3611
3612         # --- Calculate Max Break Index (for string value)
3613         # We start with the line length limit
3614         max_break_idx = self.line_length
3615         # The last index of a string of length N is N-1.
3616         max_break_idx -= 1
3617         # Leading whitespace is not present in the string value (e.g. Leaf.value).
3618         max_break_idx -= line.depth * 4
3619         if max_break_idx < 0:
3620             yield TErr(
3621                 f"Unable to split {LL[string_idx].value} at such high of a line depth:"
3622                 f" {line.depth}"
3623             )
3624             return
3625
3626         # Check if StringMerger registered any custom splits.
3627         custom_splits = self.pop_custom_splits(LL[string_idx].value)
3628         # We use them ONLY if none of them would produce lines that exceed the
3629         # line limit.
3630         use_custom_breakpoints = bool(
3631             custom_splits
3632             and all(csplit.break_idx <= max_break_idx for csplit in custom_splits)
3633         )
3634
3635         # Temporary storage for the remaining chunk of the string line that
3636         # can't fit onto the line currently being constructed.
3637         rest_value = LL[string_idx].value
3638
3639         def more_splits_should_be_made() -> bool:
3640             """
3641             Returns:
3642                 True iff `rest_value` (the remaining string value from the last
3643                 split), should be split again.
3644             """
3645             if use_custom_breakpoints:
3646                 return len(custom_splits) > 1
3647             else:
3648                 return len(rest_value) > max_last_string()
3649
3650         string_line_results: List[Ok[Line]] = []
3651         while more_splits_should_be_made():
3652             if use_custom_breakpoints:
3653                 # Custom User Split (manual)
3654                 csplit = custom_splits.pop(0)
3655                 break_idx = csplit.break_idx
3656             else:
3657                 # Algorithmic Split (automatic)
3658                 max_bidx = max_break_idx - 2 if line_needs_plus() else max_break_idx
3659                 maybe_break_idx = self.__get_break_idx(rest_value, max_bidx)
3660                 if maybe_break_idx is None:
3661                     # If we are unable to algorthmically determine a good split
3662                     # and this string has custom splits registered to it, we
3663                     # fall back to using them--which means we have to start
3664                     # over from the beginning.
3665                     if custom_splits:
3666                         rest_value = LL[string_idx].value
3667                         string_line_results = []
3668                         first_string_line = True
3669                         use_custom_breakpoints = True
3670                         continue
3671
3672                     # Otherwise, we stop splitting here.
3673                     break
3674
3675                 break_idx = maybe_break_idx
3676
3677             # --- Construct `next_value`
3678             next_value = rest_value[:break_idx] + QUOTE
3679             if (
3680                 # Are we allowed to try to drop a pointless 'f' prefix?
3681                 drop_pointless_f_prefix
3682                 # If we are, will we be successful?
3683                 and next_value != self.__normalize_f_string(next_value, prefix)
3684             ):
3685                 # If the current custom split did NOT originally use a prefix,
3686                 # then `csplit.break_idx` will be off by one after removing
3687                 # the 'f' prefix.
3688                 break_idx = (
3689                     break_idx + 1
3690                     if use_custom_breakpoints and not csplit.has_prefix
3691                     else break_idx
3692                 )
3693                 next_value = rest_value[:break_idx] + QUOTE
3694                 next_value = self.__normalize_f_string(next_value, prefix)
3695
3696             # --- Construct `next_leaf`
3697             next_leaf = Leaf(token.STRING, next_value)
3698             insert_str_child(next_leaf)
3699             self.__maybe_normalize_string_quotes(next_leaf)
3700
3701             # --- Construct `next_line`
3702             next_line = line.clone()
3703             maybe_append_plus(next_line)
3704             next_line.append(next_leaf)
3705             string_line_results.append(Ok(next_line))
3706
3707             rest_value = prefix + QUOTE + rest_value[break_idx:]
3708             first_string_line = False
3709
3710         yield from string_line_results
3711
3712         if drop_pointless_f_prefix:
3713             rest_value = self.__normalize_f_string(rest_value, prefix)
3714
3715         rest_leaf = Leaf(token.STRING, rest_value)
3716         insert_str_child(rest_leaf)
3717
3718         # NOTE: I could not find a test case that verifies that the following
3719         # line is actually necessary, but it seems to be. Otherwise we risk
3720         # not normalizing the last substring, right?
3721         self.__maybe_normalize_string_quotes(rest_leaf)
3722
3723         last_line = line.clone()
3724         maybe_append_plus(last_line)
3725
3726         # If there are any leaves to the right of the target string...
3727         if is_valid_index(string_idx + 1):
3728             # We use `temp_value` here to determine how long the last line
3729             # would be if we were to append all the leaves to the right of the
3730             # target string to the last string line.
3731             temp_value = rest_value
3732             for leaf in LL[string_idx + 1 :]:
3733                 temp_value += str(leaf)
3734                 if leaf.type == token.LPAR:
3735                     break
3736
3737             # Try to fit them all on the same line with the last substring...
3738             if (
3739                 len(temp_value) <= max_last_string()
3740                 or LL[string_idx + 1].type == token.COMMA
3741             ):
3742                 last_line.append(rest_leaf)
3743                 append_leaves(last_line, line, LL[string_idx + 1 :])
3744                 yield Ok(last_line)
3745             # Otherwise, place the last substring on one line and everything
3746             # else on a line below that...
3747             else:
3748                 last_line.append(rest_leaf)
3749                 yield Ok(last_line)
3750
3751                 non_string_line = line.clone()
3752                 append_leaves(non_string_line, line, LL[string_idx + 1 :])
3753                 yield Ok(non_string_line)
3754         # Else the target string was the last leaf...
3755         else:
3756             last_line.append(rest_leaf)
3757             last_line.comments = line.comments.copy()
3758             yield Ok(last_line)
3759
3760     def __get_break_idx(self, string: str, max_break_idx: int) -> Optional[int]:
3761         """
3762         This method contains the algorithm that StringSplitter uses to
3763         determine which character to split each string at.
3764
3765         Args:
3766             @string: The substring that we are attempting to split.
3767             @max_break_idx: The ideal break index. We will return this value if it
3768             meets all the necessary conditions. In the likely event that it
3769             doesn't we will try to find the closest index BELOW @max_break_idx
3770             that does. If that fails, we will expand our search by also
3771             considering all valid indices ABOVE @max_break_idx.
3772
3773         Pre-Conditions:
3774             * assert_is_leaf_string(@string)
3775             * 0 <= @max_break_idx < len(@string)
3776
3777         Returns:
3778             break_idx, if an index is able to be found that meets all of the
3779             conditions listed in the 'Transformations' section of this classes'
3780             docstring.
3781                 OR
3782             None, otherwise.
3783         """
3784         is_valid_index = is_valid_index_factory(string)
3785
3786         assert is_valid_index(max_break_idx)
3787         assert_is_leaf_string(string)
3788
3789         _fexpr_slices: Optional[List[Tuple[Index, Index]]] = None
3790
3791         def fexpr_slices() -> Iterator[Tuple[Index, Index]]:
3792             """
3793             Yields:
3794                 All ranges of @string which, if @string were to be split there,
3795                 would result in the splitting of an f-expression (which is NOT
3796                 allowed).
3797             """
3798             nonlocal _fexpr_slices
3799
3800             if _fexpr_slices is None:
3801                 _fexpr_slices = []
3802                 for match in re.finditer(self.RE_FEXPR, string, re.VERBOSE):
3803                     _fexpr_slices.append(match.span())
3804
3805             yield from _fexpr_slices
3806
3807         is_fstring = "f" in get_string_prefix(string)
3808
3809         def breaks_fstring_expression(i: Index) -> bool:
3810             """
3811             Returns:
3812                 True iff returning @i would result in the splitting of an
3813                 f-expression (which is NOT allowed).
3814             """
3815             if not is_fstring:
3816                 return False
3817
3818             for (start, end) in fexpr_slices():
3819                 if start <= i < end:
3820                     return True
3821
3822             return False
3823
3824         def passes_all_checks(i: Index) -> bool:
3825             """
3826             Returns:
3827                 True iff ALL of the conditions listed in the 'Transformations'
3828                 section of this classes' docstring would be be met by returning @i.
3829             """
3830             is_space = string[i] == " "
3831             is_big_enough = (
3832                 len(string[i:]) >= self.MIN_SUBSTR_SIZE
3833                 and len(string[:i]) >= self.MIN_SUBSTR_SIZE
3834             )
3835             return is_space and is_big_enough and not breaks_fstring_expression(i)
3836
3837         # First, we check all indices BELOW @max_break_idx.
3838         break_idx = max_break_idx
3839         while is_valid_index(break_idx - 1) and not passes_all_checks(break_idx):
3840             break_idx -= 1
3841
3842         if not passes_all_checks(break_idx):
3843             # If that fails, we check all indices ABOVE @max_break_idx.
3844             #
3845             # If we are able to find a valid index here, the next line is going
3846             # to be longer than the specified line length, but it's probably
3847             # better than doing nothing at all.
3848             break_idx = max_break_idx + 1
3849             while is_valid_index(break_idx + 1) and not passes_all_checks(break_idx):
3850                 break_idx += 1
3851
3852             if not is_valid_index(break_idx) or not passes_all_checks(break_idx):
3853                 return None
3854
3855         return break_idx
3856
3857     def __maybe_normalize_string_quotes(self, leaf: Leaf) -> None:
3858         if self.normalize_strings:
3859             normalize_string_quotes(leaf)
3860
3861     def __normalize_f_string(self, string: str, prefix: str) -> str:
3862         """
3863         Pre-Conditions:
3864             * assert_is_leaf_string(@string)
3865
3866         Returns:
3867             * If @string is an f-string that contains no f-expressions, we
3868             return a string identical to @string except that the 'f' prefix
3869             has been stripped and all double braces (i.e. '{{' or '}}') have
3870             been normalized (i.e. turned into '{' or '}').
3871                 OR
3872             * Otherwise, we return @string.
3873         """
3874         assert_is_leaf_string(string)
3875
3876         if "f" in prefix and not re.search(self.RE_FEXPR, string, re.VERBOSE):
3877             new_prefix = prefix.replace("f", "")
3878
3879             temp = string[len(prefix) :]
3880             temp = re.sub(r"\{\{", "{", temp)
3881             temp = re.sub(r"\}\}", "}", temp)
3882             new_string = temp
3883
3884             return f"{new_prefix}{new_string}"
3885         else:
3886             return string
3887
3888
3889 class StringParenWrapper(CustomSplitMapMixin, BaseStringSplitter):
3890     """
3891     StringTransformer that splits non-"atom" strings (i.e. strings that do not
3892     exist on lines by themselves).
3893
3894     Requirements:
3895         All of the requirements listed in BaseStringSplitter's docstring in
3896         addition to the requirements listed below:
3897
3898         * The line is a return/yield statement, which returns/yields a string.
3899             OR
3900         * The line is part of a ternary expression (e.g. `x = y if cond else
3901         z`) such that the line starts with `else <string>`, where <string> is
3902         some string.
3903             OR
3904         * The line is an assert statement, which ends with a string.
3905             OR
3906         * The line is an assignment statement (e.g. `x = <string>` or `x +=
3907         <string>`) such that the variable is being assigned the value of some
3908         string.
3909             OR
3910         * The line is a dictionary key assignment where some valid key is being
3911         assigned the value of some string.
3912
3913     Transformations:
3914         The chosen string is wrapped in parentheses and then split at the LPAR.
3915
3916         We then have one line which ends with an LPAR and another line that
3917         starts with the chosen string. The latter line is then split again at
3918         the RPAR. This results in the RPAR (and possibly a trailing comma)
3919         being placed on its own line.
3920
3921         NOTE: If any leaves exist to the right of the chosen string (except
3922         for a trailing comma, which would be placed after the RPAR), those
3923         leaves are placed inside the parentheses.  In effect, the chosen
3924         string is not necessarily being "wrapped" by parentheses. We can,
3925         however, count on the LPAR being placed directly before the chosen
3926         string.
3927
3928         In other words, StringParenWrapper creates "atom" strings. These
3929         can then be split again by StringSplitter, if necessary.
3930
3931     Collaborations:
3932         In the event that a string line split by StringParenWrapper is
3933         changed such that it no longer needs to be given its own line,
3934         StringParenWrapper relies on StringParenStripper to clean up the
3935         parentheses it created.
3936     """
3937
3938     def do_splitter_match(self, line: Line) -> TMatchResult:
3939         LL = line.leaves
3940
3941         string_idx = None
3942         string_idx = string_idx or self._return_match(LL)
3943         string_idx = string_idx or self._else_match(LL)
3944         string_idx = string_idx or self._assert_match(LL)
3945         string_idx = string_idx or self._assign_match(LL)
3946         string_idx = string_idx or self._dict_match(LL)
3947
3948         if string_idx is not None:
3949             string_value = line.leaves[string_idx].value
3950             # If the string has no spaces...
3951             if " " not in string_value:
3952                 # And will still violate the line length limit when split...
3953                 max_string_length = self.line_length - ((line.depth + 1) * 4)
3954                 if len(string_value) > max_string_length:
3955                     # And has no associated custom splits...
3956                     if not self.has_custom_splits(string_value):
3957                         # Then we should NOT put this string on its own line.
3958                         return TErr(
3959                             "We do not wrap long strings in parentheses when the"
3960                             " resultant line would still be over the specified line"
3961                             " length and can't be split further by StringSplitter."
3962                         )
3963             return Ok(string_idx)
3964
3965         return TErr("This line does not contain any non-atomic strings.")
3966
3967     @staticmethod
3968     def _return_match(LL: List[Leaf]) -> Optional[int]:
3969         """
3970         Returns:
3971             string_idx such that @LL[string_idx] is equal to our target (i.e.
3972             matched) string, if this line matches the return/yield statement
3973             requirements listed in the 'Requirements' section of this classes'
3974             docstring.
3975                 OR
3976             None, otherwise.
3977         """
3978         # If this line is apart of a return/yield statement and the first leaf
3979         # contains either the "return" or "yield" keywords...
3980         if parent_type(LL[0]) in [syms.return_stmt, syms.yield_expr] and LL[
3981             0
3982         ].value in ["return", "yield"]:
3983             is_valid_index = is_valid_index_factory(LL)
3984
3985             idx = 2 if is_valid_index(1) and is_empty_par(LL[1]) else 1
3986             # The next visible leaf MUST contain a string...
3987             if is_valid_index(idx) and LL[idx].type == token.STRING:
3988                 return idx
3989
3990         return None
3991
3992     @staticmethod
3993     def _else_match(LL: List[Leaf]) -> Optional[int]:
3994         """
3995         Returns:
3996             string_idx such that @LL[string_idx] is equal to our target (i.e.
3997             matched) string, if this line matches the ternary expression
3998             requirements listed in the 'Requirements' section of this classes'
3999             docstring.
4000                 OR
4001             None, otherwise.
4002         """
4003         # If this line is apart of a ternary expression and the first leaf
4004         # contains the "else" keyword...
4005         if (
4006             parent_type(LL[0]) == syms.test
4007             and LL[0].type == token.NAME
4008             and LL[0].value == "else"
4009         ):
4010             is_valid_index = is_valid_index_factory(LL)
4011
4012             idx = 2 if is_valid_index(1) and is_empty_par(LL[1]) else 1
4013             # The next visible leaf MUST contain a string...
4014             if is_valid_index(idx) and LL[idx].type == token.STRING:
4015                 return idx
4016
4017         return None
4018
4019     @staticmethod
4020     def _assert_match(LL: List[Leaf]) -> Optional[int]:
4021         """
4022         Returns:
4023             string_idx such that @LL[string_idx] is equal to our target (i.e.
4024             matched) string, if this line matches the assert statement
4025             requirements listed in the 'Requirements' section of this classes'
4026             docstring.
4027                 OR
4028             None, otherwise.
4029         """
4030         # If this line is apart of an assert statement and the first leaf
4031         # contains the "assert" keyword...
4032         if parent_type(LL[0]) == syms.assert_stmt and LL[0].value == "assert":
4033             is_valid_index = is_valid_index_factory(LL)
4034
4035             for (i, leaf) in enumerate(LL):
4036                 # We MUST find a comma...
4037                 if leaf.type == token.COMMA:
4038                     idx = i + 2 if is_empty_par(LL[i + 1]) else i + 1
4039
4040                     # That comma MUST be followed by a string...
4041                     if is_valid_index(idx) and LL[idx].type == token.STRING:
4042                         string_idx = idx
4043
4044                         # Skip the string trailer, if one exists.
4045                         string_parser = StringParser()
4046                         idx = string_parser.parse(LL, string_idx)
4047
4048                         # But no more leaves are allowed...
4049                         if not is_valid_index(idx):
4050                             return string_idx
4051
4052         return None
4053
4054     @staticmethod
4055     def _assign_match(LL: List[Leaf]) -> Optional[int]:
4056         """
4057         Returns:
4058             string_idx such that @LL[string_idx] is equal to our target (i.e.
4059             matched) string, if this line matches the assignment statement
4060             requirements listed in the 'Requirements' section of this classes'
4061             docstring.
4062                 OR
4063             None, otherwise.
4064         """
4065         # If this line is apart of an expression statement or is a function
4066         # argument AND the first leaf contains a variable name...
4067         if (
4068             parent_type(LL[0]) in [syms.expr_stmt, syms.argument, syms.power]
4069             and LL[0].type == token.NAME
4070         ):
4071             is_valid_index = is_valid_index_factory(LL)
4072
4073             for (i, leaf) in enumerate(LL):
4074                 # We MUST find either an '=' or '+=' symbol...
4075                 if leaf.type in [token.EQUAL, token.PLUSEQUAL]:
4076                     idx = i + 2 if is_empty_par(LL[i + 1]) else i + 1
4077
4078                     # That symbol MUST be followed by a string...
4079                     if is_valid_index(idx) and LL[idx].type == token.STRING:
4080                         string_idx = idx
4081
4082                         # Skip the string trailer, if one exists.
4083                         string_parser = StringParser()
4084                         idx = string_parser.parse(LL, string_idx)
4085
4086                         # The next leaf MAY be a comma iff this line is apart
4087                         # of a function argument...
4088                         if (
4089                             parent_type(LL[0]) == syms.argument
4090                             and is_valid_index(idx)
4091                             and LL[idx].type == token.COMMA
4092                         ):
4093                             idx += 1
4094
4095                         # But no more leaves are allowed...
4096                         if not is_valid_index(idx):
4097                             return string_idx
4098
4099         return None
4100
4101     @staticmethod
4102     def _dict_match(LL: List[Leaf]) -> Optional[int]:
4103         """
4104         Returns:
4105             string_idx such that @LL[string_idx] is equal to our target (i.e.
4106             matched) string, if this line matches the dictionary key assignment
4107             statement requirements listed in the 'Requirements' section of this
4108             classes' docstring.
4109                 OR
4110             None, otherwise.
4111         """
4112         # If this line is apart of a dictionary key assignment...
4113         if syms.dictsetmaker in [parent_type(LL[0]), parent_type(LL[0].parent)]:
4114             is_valid_index = is_valid_index_factory(LL)
4115
4116             for (i, leaf) in enumerate(LL):
4117                 # We MUST find a colon...
4118                 if leaf.type == token.COLON:
4119                     idx = i + 2 if is_empty_par(LL[i + 1]) else i + 1
4120
4121                     # That colon MUST be followed by a string...
4122                     if is_valid_index(idx) and LL[idx].type == token.STRING:
4123                         string_idx = idx
4124
4125                         # Skip the string trailer, if one exists.
4126                         string_parser = StringParser()
4127                         idx = string_parser.parse(LL, string_idx)
4128
4129                         # That string MAY be followed by a comma...
4130                         if is_valid_index(idx) and LL[idx].type == token.COMMA:
4131                             idx += 1
4132
4133                         # But no more leaves are allowed...
4134                         if not is_valid_index(idx):
4135                             return string_idx
4136
4137         return None
4138
4139     def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]:
4140         LL = line.leaves
4141
4142         is_valid_index = is_valid_index_factory(LL)
4143         insert_str_child = insert_str_child_factory(LL[string_idx])
4144
4145         comma_idx = len(LL) - 1
4146         ends_with_comma = False
4147         if LL[comma_idx].type == token.COMMA:
4148             ends_with_comma = True
4149
4150         leaves_to_steal_comments_from = [LL[string_idx]]
4151         if ends_with_comma:
4152             leaves_to_steal_comments_from.append(LL[comma_idx])
4153
4154         # --- First Line
4155         first_line = line.clone()
4156         left_leaves = LL[:string_idx]
4157
4158         # We have to remember to account for (possibly invisible) LPAR and RPAR
4159         # leaves that already wrapped the target string. If these leaves do
4160         # exist, we will replace them with our own LPAR and RPAR leaves.
4161         old_parens_exist = False
4162         if left_leaves and left_leaves[-1].type == token.LPAR:
4163             old_parens_exist = True
4164             leaves_to_steal_comments_from.append(left_leaves[-1])
4165             left_leaves.pop()
4166
4167         append_leaves(first_line, line, left_leaves)
4168
4169         lpar_leaf = Leaf(token.LPAR, "(")
4170         if old_parens_exist:
4171             replace_child(LL[string_idx - 1], lpar_leaf)
4172         else:
4173             insert_str_child(lpar_leaf)
4174         first_line.append(lpar_leaf)
4175
4176         # We throw inline comments that were originally to the right of the
4177         # target string to the top line. They will now be shown to the right of
4178         # the LPAR.
4179         for leaf in leaves_to_steal_comments_from:
4180             for comment_leaf in line.comments_after(leaf):
4181                 first_line.append(comment_leaf, preformatted=True)
4182
4183         yield Ok(first_line)
4184
4185         # --- Middle (String) Line
4186         # We only need to yield one (possibly too long) string line, since the
4187         # `StringSplitter` will break it down further if necessary.
4188         string_value = LL[string_idx].value
4189         string_line = Line(
4190             depth=line.depth + 1,
4191             inside_brackets=True,
4192             should_explode=line.should_explode,
4193         )
4194         string_leaf = Leaf(token.STRING, string_value)
4195         insert_str_child(string_leaf)
4196         string_line.append(string_leaf)
4197
4198         old_rpar_leaf = None
4199         if is_valid_index(string_idx + 1):
4200             right_leaves = LL[string_idx + 1 :]
4201             if ends_with_comma:
4202                 right_leaves.pop()
4203
4204             if old_parens_exist:
4205                 assert (
4206                     right_leaves and right_leaves[-1].type == token.RPAR
4207                 ), "Apparently, old parentheses do NOT exist?!"
4208                 old_rpar_leaf = right_leaves.pop()
4209
4210             append_leaves(string_line, line, right_leaves)
4211
4212         yield Ok(string_line)
4213
4214         # --- Last Line
4215         last_line = line.clone()
4216         last_line.bracket_tracker = first_line.bracket_tracker
4217
4218         new_rpar_leaf = Leaf(token.RPAR, ")")
4219         if old_rpar_leaf is not None:
4220             replace_child(old_rpar_leaf, new_rpar_leaf)
4221         else:
4222             insert_str_child(new_rpar_leaf)
4223         last_line.append(new_rpar_leaf)
4224
4225         # If the target string ended with a comma, we place this comma to the
4226         # right of the RPAR on the last line.
4227         if ends_with_comma:
4228             comma_leaf = Leaf(token.COMMA, ",")
4229             replace_child(LL[comma_idx], comma_leaf)
4230             last_line.append(comma_leaf)
4231
4232         yield Ok(last_line)
4233
4234
4235 class StringParser:
4236     """
4237     A state machine that aids in parsing a string's "trailer", which can be
4238     either non-existant, an old-style formatting sequence (e.g. `% varX` or `%
4239     (varX, varY)`), or a method-call / attribute access (e.g. `.format(varX,
4240     varY)`).
4241
4242     NOTE: A new StringParser object MUST be instantiated for each string
4243     trailer we need to parse.
4244
4245     Examples:
4246         We shall assume that `line` equals the `Line` object that corresponds
4247         to the following line of python code:
4248         ```
4249         x = "Some {}.".format("String") + some_other_string
4250         ```
4251
4252         Furthermore, we will assume that `string_idx` is some index such that:
4253         ```
4254         assert line.leaves[string_idx].value == "Some {}."
4255         ```
4256
4257         The following code snippet then holds:
4258         ```
4259         string_parser = StringParser()
4260         idx = string_parser.parse(line.leaves, string_idx)
4261         assert line.leaves[idx].type == token.PLUS
4262         ```
4263     """
4264
4265     DEFAULT_TOKEN = -1
4266
4267     # String Parser States
4268     START = 1
4269     DOT = 2
4270     NAME = 3
4271     PERCENT = 4
4272     SINGLE_FMT_ARG = 5
4273     LPAR = 6
4274     RPAR = 7
4275     DONE = 8
4276
4277     # Lookup Table for Next State
4278     _goto: Dict[Tuple[ParserState, NodeType], ParserState] = {
4279         # A string trailer may start with '.' OR '%'.
4280         (START, token.DOT): DOT,
4281         (START, token.PERCENT): PERCENT,
4282         (START, DEFAULT_TOKEN): DONE,
4283         # A '.' MUST be followed by an attribute or method name.
4284         (DOT, token.NAME): NAME,
4285         # A method name MUST be followed by an '(', whereas an attribute name
4286         # is the last symbol in the string trailer.
4287         (NAME, token.LPAR): LPAR,
4288         (NAME, DEFAULT_TOKEN): DONE,
4289         # A '%' symbol can be followed by an '(' or a single argument (e.g. a
4290         # string or variable name).
4291         (PERCENT, token.LPAR): LPAR,
4292         (PERCENT, DEFAULT_TOKEN): SINGLE_FMT_ARG,
4293         # If a '%' symbol is followed by a single argument, that argument is
4294         # the last leaf in the string trailer.
4295         (SINGLE_FMT_ARG, DEFAULT_TOKEN): DONE,
4296         # If present, a ')' symbol is the last symbol in a string trailer.
4297         # (NOTE: LPARS and nested RPARS are not included in this lookup table,
4298         # since they are treated as a special case by the parsing logic in this
4299         # classes' implementation.)
4300         (RPAR, DEFAULT_TOKEN): DONE,
4301     }
4302
4303     def __init__(self) -> None:
4304         self._state = self.START
4305         self._unmatched_lpars = 0
4306
4307     def parse(self, leaves: List[Leaf], string_idx: int) -> int:
4308         """
4309         Pre-conditions:
4310             * @leaves[@string_idx].type == token.STRING
4311
4312         Returns:
4313             The index directly after the last leaf which is apart of the string
4314             trailer, if a "trailer" exists.
4315                 OR
4316             @string_idx + 1, if no string "trailer" exists.
4317         """
4318         assert leaves[string_idx].type == token.STRING
4319
4320         idx = string_idx + 1
4321         while idx < len(leaves) and self._next_state(leaves[idx]):
4322             idx += 1
4323         return idx
4324
4325     def _next_state(self, leaf: Leaf) -> bool:
4326         """
4327         Pre-conditions:
4328             * On the first call to this function, @leaf MUST be the leaf that
4329             was directly after the string leaf in question (e.g. if our target
4330             string is `line.leaves[i]` then the first call to this method must
4331             be `line.leaves[i + 1]`).
4332             * On the next call to this function, the leaf paramater passed in
4333             MUST be the leaf directly following @leaf.
4334
4335         Returns:
4336             True iff @leaf is apart of the string's trailer.
4337         """
4338         # We ignore empty LPAR or RPAR leaves.
4339         if is_empty_par(leaf):
4340             return True
4341
4342         next_token = leaf.type
4343         if next_token == token.LPAR:
4344             self._unmatched_lpars += 1
4345
4346         current_state = self._state
4347
4348         # The LPAR parser state is a special case. We will return True until we
4349         # find the matching RPAR token.
4350         if current_state == self.LPAR:
4351             if next_token == token.RPAR:
4352                 self._unmatched_lpars -= 1
4353                 if self._unmatched_lpars == 0:
4354                     self._state = self.RPAR
4355         # Otherwise, we use a lookup table to determine the next state.
4356         else:
4357             # If the lookup table matches the current state to the next
4358             # token, we use the lookup table.
4359             if (current_state, next_token) in self._goto:
4360                 self._state = self._goto[current_state, next_token]
4361             else:
4362                 # Otherwise, we check if a the current state was assigned a
4363                 # default.
4364                 if (current_state, self.DEFAULT_TOKEN) in self._goto:
4365                     self._state = self._goto[current_state, self.DEFAULT_TOKEN]
4366                 # If no default has been assigned, then this parser has a logic
4367                 # error.
4368                 else:
4369                     raise RuntimeError(f"{self.__class__.__name__} LOGIC ERROR!")
4370
4371             if self._state == self.DONE:
4372                 return False
4373
4374         return True
4375
4376
4377 def TErr(err_msg: str) -> Err[CannotTransform]:
4378     """(T)ransform Err
4379
4380     Convenience function used when working with the TResult type.
4381     """
4382     cant_transform = CannotTransform(err_msg)
4383     return Err(cant_transform)
4384
4385
4386 def contains_pragma_comment(comment_list: List[Leaf]) -> bool:
4387     """
4388     Returns:
4389         True iff one of the comments in @comment_list is a pragma used by one
4390         of the more common static analysis tools for python (e.g. mypy, flake8,
4391         pylint).
4392     """
4393     for comment in comment_list:
4394         if comment.value.startswith(("# type:", "# noqa", "# pylint:")):
4395             return True
4396
4397     return False
4398
4399
4400 def insert_str_child_factory(string_leaf: Leaf) -> Callable[[LN], None]:
4401     """
4402     Factory for a convenience function that is used to orphan @string_leaf
4403     and then insert multiple new leaves into the same part of the node
4404     structure that @string_leaf had originally occupied.
4405
4406     Examples:
4407         Let `string_leaf = Leaf(token.STRING, '"foo"')` and `N =
4408         string_leaf.parent`. Assume the node `N` has the following
4409         original structure:
4410
4411         Node(
4412             expr_stmt, [
4413                 Leaf(NAME, 'x'),
4414                 Leaf(EQUAL, '='),
4415                 Leaf(STRING, '"foo"'),
4416             ]
4417         )
4418
4419         We then run the code snippet shown below.
4420         ```
4421         insert_str_child = insert_str_child_factory(string_leaf)
4422
4423         lpar = Leaf(token.LPAR, '(')
4424         insert_str_child(lpar)
4425
4426         bar = Leaf(token.STRING, '"bar"')
4427         insert_str_child(bar)
4428
4429         rpar = Leaf(token.RPAR, ')')
4430         insert_str_child(rpar)
4431         ```
4432
4433         After which point, it follows that `string_leaf.parent is None` and
4434         the node `N` now has the following structure:
4435
4436         Node(
4437             expr_stmt, [
4438                 Leaf(NAME, 'x'),
4439                 Leaf(EQUAL, '='),
4440                 Leaf(LPAR, '('),
4441                 Leaf(STRING, '"bar"'),
4442                 Leaf(RPAR, ')'),
4443             ]
4444         )
4445     """
4446     string_parent = string_leaf.parent
4447     string_child_idx = string_leaf.remove()
4448
4449     def insert_str_child(child: LN) -> None:
4450         nonlocal string_child_idx
4451
4452         assert string_parent is not None
4453         assert string_child_idx is not None
4454
4455         string_parent.insert_child(string_child_idx, child)
4456         string_child_idx += 1
4457
4458     return insert_str_child
4459
4460
4461 def has_triple_quotes(string: str) -> bool:
4462     """
4463     Returns:
4464         True iff @string starts with three quotation characters.
4465     """
4466     raw_string = string.lstrip(STRING_PREFIX_CHARS)
4467     return raw_string[:3] in {'"""', "'''"}
4468
4469
4470 def parent_type(node: Optional[LN]) -> Optional[NodeType]:
4471     """
4472     Returns:
4473         @node.parent.type, if @node is not None and has a parent.
4474             OR
4475         None, otherwise.
4476     """
4477     if node is None or node.parent is None:
4478         return None
4479
4480     return node.parent.type
4481
4482
4483 def is_empty_par(leaf: Leaf) -> bool:
4484     return is_empty_lpar(leaf) or is_empty_rpar(leaf)
4485
4486
4487 def is_empty_lpar(leaf: Leaf) -> bool:
4488     return leaf.type == token.LPAR and leaf.value == ""
4489
4490
4491 def is_empty_rpar(leaf: Leaf) -> bool:
4492     return leaf.type == token.RPAR and leaf.value == ""
4493
4494
4495 def is_valid_index_factory(seq: Sequence[Any]) -> Callable[[int], bool]:
4496     """
4497     Examples:
4498         ```
4499         my_list = [1, 2, 3]
4500
4501         is_valid_index = is_valid_index_factory(my_list)
4502
4503         assert is_valid_index(0)
4504         assert is_valid_index(2)
4505
4506         assert not is_valid_index(3)
4507         assert not is_valid_index(-1)
4508         ```
4509     """
4510
4511     def is_valid_index(idx: int) -> bool:
4512         """
4513         Returns:
4514             True iff @idx is positive AND seq[@idx] does NOT raise an
4515             IndexError.
4516         """
4517         return 0 <= idx < len(seq)
4518
4519     return is_valid_index
4520
4521
4522 def line_to_string(line: Line) -> str:
4523     """Returns the string representation of @line.
4524
4525     WARNING: This is known to be computationally expensive.
4526     """
4527     return str(line).strip("\n")
4528
4529
4530 def append_leaves(new_line: Line, old_line: Line, leaves: List[Leaf]) -> None:
4531     """
4532     Append leaves (taken from @old_line) to @new_line, making sure to fix the
4533     underlying Node structure where appropriate.
4534
4535     All of the leaves in @leaves are duplicated. The duplicates are then
4536     appended to @new_line and used to replace their originals in the underlying
4537     Node structure. Any comments attatched to the old leaves are reattached to
4538     the new leaves.
4539
4540     Pre-conditions:
4541         set(@leaves) is a subset of set(@old_line.leaves).
4542     """
4543     for old_leaf in leaves:
4544         assert old_leaf in old_line.leaves
4545
4546         new_leaf = Leaf(old_leaf.type, old_leaf.value)
4547         replace_child(old_leaf, new_leaf)
4548         new_line.append(new_leaf)
4549
4550         for comment_leaf in old_line.comments_after(old_leaf):
4551             new_line.append(comment_leaf, preformatted=True)
4552
4553
4554 def replace_child(old_child: LN, new_child: LN) -> None:
4555     """
4556     Side Effects:
4557         * If @old_child.parent is set, replace @old_child with @new_child in
4558         @old_child's underlying Node structure.
4559             OR
4560         * Otherwise, this function does nothing.
4561     """
4562     parent = old_child.parent
4563     if not parent:
4564         return
4565
4566     child_idx = old_child.remove()
4567     if child_idx is not None:
4568         parent.insert_child(child_idx, new_child)
4569
4570
4571 def get_string_prefix(string: str) -> str:
4572     """
4573     Pre-conditions:
4574         * assert_is_leaf_string(@string)
4575
4576     Returns:
4577         @string's prefix (e.g. '', 'r', 'f', or 'rf').
4578     """
4579     assert_is_leaf_string(string)
4580
4581     prefix = ""
4582     prefix_idx = 0
4583     while string[prefix_idx] in STRING_PREFIX_CHARS:
4584         prefix += string[prefix_idx].lower()
4585         prefix_idx += 1
4586
4587     return prefix
4588
4589
4590 def assert_is_leaf_string(string: str) -> None:
4591     """
4592     Checks the pre-condition that @string has the format that you would expect
4593     of `leaf.value` where `leaf` is some Leaf such that `leaf.type ==
4594     token.STRING`. A more precise description of the pre-conditions that are
4595     checked are listed below.
4596
4597     Pre-conditions:
4598         * @string starts with either ', ", <prefix>', or <prefix>" where
4599         `set(<prefix>)` is some subset of `set(STRING_PREFIX_CHARS)`.
4600         * @string ends with a quote character (' or ").
4601
4602     Raises:
4603         AssertionError(...) if the pre-conditions listed above are not
4604         satisfied.
4605     """
4606     dquote_idx = string.find('"')
4607     squote_idx = string.find("'")
4608     if -1 in [dquote_idx, squote_idx]:
4609         quote_idx = max(dquote_idx, squote_idx)
4610     else:
4611         quote_idx = min(squote_idx, dquote_idx)
4612
4613     assert (
4614         0 <= quote_idx < len(string) - 1
4615     ), f"{string!r} is missing a starting quote character (' or \")."
4616     assert string[-1] in (
4617         "'",
4618         '"',
4619     ), f"{string!r} is missing an ending quote character (' or \")."
4620     assert set(string[:quote_idx]).issubset(
4621         set(STRING_PREFIX_CHARS)
4622     ), f"{set(string[:quote_idx])} is NOT a subset of {set(STRING_PREFIX_CHARS)}."
4623
4624
4625 def left_hand_split(line: Line, _features: Collection[Feature] = ()) -> Iterator[Line]:
4626     """Split line into many lines, starting with the first matching bracket pair.
4627
4628     Note: this usually looks weird, only use this for function definitions.
4629     Prefer RHS otherwise.  This is why this function is not symmetrical with
4630     :func:`right_hand_split` which also handles optional parentheses.
4631     """
4632     tail_leaves: List[Leaf] = []
4633     body_leaves: List[Leaf] = []
4634     head_leaves: List[Leaf] = []
4635     current_leaves = head_leaves
4636     matching_bracket: Optional[Leaf] = None
4637     for leaf in line.leaves:
4638         if (
4639             current_leaves is body_leaves
4640             and leaf.type in CLOSING_BRACKETS
4641             and leaf.opening_bracket is matching_bracket
4642         ):
4643             current_leaves = tail_leaves if body_leaves else head_leaves
4644         current_leaves.append(leaf)
4645         if current_leaves is head_leaves:
4646             if leaf.type in OPENING_BRACKETS:
4647                 matching_bracket = leaf
4648                 current_leaves = body_leaves
4649     if not matching_bracket:
4650         raise CannotSplit("No brackets found")
4651
4652     head = bracket_split_build_line(head_leaves, line, matching_bracket)
4653     body = bracket_split_build_line(body_leaves, line, matching_bracket, is_body=True)
4654     tail = bracket_split_build_line(tail_leaves, line, matching_bracket)
4655     bracket_split_succeeded_or_raise(head, body, tail)
4656     for result in (head, body, tail):
4657         if result:
4658             yield result
4659
4660
4661 def right_hand_split(
4662     line: Line,
4663     line_length: int,
4664     features: Collection[Feature] = (),
4665     omit: Collection[LeafID] = (),
4666 ) -> Iterator[Line]:
4667     """Split line into many lines, starting with the last matching bracket pair.
4668
4669     If the split was by optional parentheses, attempt splitting without them, too.
4670     `omit` is a collection of closing bracket IDs that shouldn't be considered for
4671     this split.
4672
4673     Note: running this function modifies `bracket_depth` on the leaves of `line`.
4674     """
4675     tail_leaves: List[Leaf] = []
4676     body_leaves: List[Leaf] = []
4677     head_leaves: List[Leaf] = []
4678     current_leaves = tail_leaves
4679     opening_bracket: Optional[Leaf] = None
4680     closing_bracket: Optional[Leaf] = None
4681     for leaf in reversed(line.leaves):
4682         if current_leaves is body_leaves:
4683             if leaf is opening_bracket:
4684                 current_leaves = head_leaves if body_leaves else tail_leaves
4685         current_leaves.append(leaf)
4686         if current_leaves is tail_leaves:
4687             if leaf.type in CLOSING_BRACKETS and id(leaf) not in omit:
4688                 opening_bracket = leaf.opening_bracket
4689                 closing_bracket = leaf
4690                 current_leaves = body_leaves
4691     if not (opening_bracket and closing_bracket and head_leaves):
4692         # If there is no opening or closing_bracket that means the split failed and
4693         # all content is in the tail.  Otherwise, if `head_leaves` are empty, it means
4694         # the matching `opening_bracket` wasn't available on `line` anymore.
4695         raise CannotSplit("No brackets found")
4696
4697     tail_leaves.reverse()
4698     body_leaves.reverse()
4699     head_leaves.reverse()
4700     head = bracket_split_build_line(head_leaves, line, opening_bracket)
4701     body = bracket_split_build_line(body_leaves, line, opening_bracket, is_body=True)
4702     tail = bracket_split_build_line(tail_leaves, line, opening_bracket)
4703     bracket_split_succeeded_or_raise(head, body, tail)
4704     if (
4705         # the body shouldn't be exploded
4706         not body.should_explode
4707         # the opening bracket is an optional paren
4708         and opening_bracket.type == token.LPAR
4709         and not opening_bracket.value
4710         # the closing bracket is an optional paren
4711         and closing_bracket.type == token.RPAR
4712         and not closing_bracket.value
4713         # it's not an import (optional parens are the only thing we can split on
4714         # in this case; attempting a split without them is a waste of time)
4715         and not line.is_import
4716         # there are no standalone comments in the body
4717         and not body.contains_standalone_comments(0)
4718         # and we can actually remove the parens
4719         and can_omit_invisible_parens(body, line_length)
4720     ):
4721         omit = {id(closing_bracket), *omit}
4722         try:
4723             yield from right_hand_split(line, line_length, features=features, omit=omit)
4724             return
4725
4726         except CannotSplit:
4727             if not (
4728                 can_be_split(body)
4729                 or is_line_short_enough(body, line_length=line_length)
4730             ):
4731                 raise CannotSplit(
4732                     "Splitting failed, body is still too long and can't be split."
4733                 )
4734
4735             elif head.contains_multiline_strings() or tail.contains_multiline_strings():
4736                 raise CannotSplit(
4737                     "The current optional pair of parentheses is bound to fail to"
4738                     " satisfy the splitting algorithm because the head or the tail"
4739                     " contains multiline strings which by definition never fit one"
4740                     " line."
4741                 )
4742
4743     ensure_visible(opening_bracket)
4744     ensure_visible(closing_bracket)
4745     for result in (head, body, tail):
4746         if result:
4747             yield result
4748
4749
4750 def bracket_split_succeeded_or_raise(head: Line, body: Line, tail: Line) -> None:
4751     """Raise :exc:`CannotSplit` if the last left- or right-hand split failed.
4752
4753     Do nothing otherwise.
4754
4755     A left- or right-hand split is based on a pair of brackets. Content before
4756     (and including) the opening bracket is left on one line, content inside the
4757     brackets is put on a separate line, and finally content starting with and
4758     following the closing bracket is put on a separate line.
4759
4760     Those are called `head`, `body`, and `tail`, respectively. If the split
4761     produced the same line (all content in `head`) or ended up with an empty `body`
4762     and the `tail` is just the closing bracket, then it's considered failed.
4763     """
4764     tail_len = len(str(tail).strip())
4765     if not body:
4766         if tail_len == 0:
4767             raise CannotSplit("Splitting brackets produced the same line")
4768
4769         elif tail_len < 3:
4770             raise CannotSplit(
4771                 f"Splitting brackets on an empty body to save {tail_len} characters is"
4772                 " not worth it"
4773             )
4774
4775
4776 def bracket_split_build_line(
4777     leaves: List[Leaf], original: Line, opening_bracket: Leaf, *, is_body: bool = False
4778 ) -> Line:
4779     """Return a new line with given `leaves` and respective comments from `original`.
4780
4781     If `is_body` is True, the result line is one-indented inside brackets and as such
4782     has its first leaf's prefix normalized and a trailing comma added when expected.
4783     """
4784     result = Line(depth=original.depth)
4785     if is_body:
4786         result.inside_brackets = True
4787         result.depth += 1
4788         if leaves:
4789             # Since body is a new indent level, remove spurious leading whitespace.
4790             normalize_prefix(leaves[0], inside_brackets=True)
4791             # Ensure a trailing comma for imports and standalone function arguments, but
4792             # be careful not to add one after any comments or within type annotations.
4793             no_commas = (
4794                 original.is_def
4795                 and opening_bracket.value == "("
4796                 and not any(l.type == token.COMMA for l in leaves)
4797             )
4798
4799             if original.is_import or no_commas:
4800                 for i in range(len(leaves) - 1, -1, -1):
4801                     if leaves[i].type == STANDALONE_COMMENT:
4802                         continue
4803
4804                     if leaves[i].type != token.COMMA:
4805                         leaves.insert(i + 1, Leaf(token.COMMA, ","))
4806                     break
4807
4808     # Populate the line
4809     for leaf in leaves:
4810         result.append(leaf, preformatted=True)
4811         for comment_after in original.comments_after(leaf):
4812             result.append(comment_after, preformatted=True)
4813     if is_body:
4814         result.should_explode = should_explode(result, opening_bracket)
4815     return result
4816
4817
4818 def dont_increase_indentation(split_func: Transformer) -> Transformer:
4819     """Normalize prefix of the first leaf in every line returned by `split_func`.
4820
4821     This is a decorator over relevant split functions.
4822     """
4823
4824     @wraps(split_func)
4825     def split_wrapper(line: Line, features: Collection[Feature] = ()) -> Iterator[Line]:
4826         for l in split_func(line, features):
4827             normalize_prefix(l.leaves[0], inside_brackets=True)
4828             yield l
4829
4830     return split_wrapper
4831
4832
4833 @dont_increase_indentation
4834 def delimiter_split(line: Line, features: Collection[Feature] = ()) -> Iterator[Line]:
4835     """Split according to delimiters of the highest priority.
4836
4837     If the appropriate Features are given, the split will add trailing commas
4838     also in function signatures and calls that contain `*` and `**`.
4839     """
4840     try:
4841         last_leaf = line.leaves[-1]
4842     except IndexError:
4843         raise CannotSplit("Line empty")
4844
4845     bt = line.bracket_tracker
4846     try:
4847         delimiter_priority = bt.max_delimiter_priority(exclude={id(last_leaf)})
4848     except ValueError:
4849         raise CannotSplit("No delimiters found")
4850
4851     if delimiter_priority == DOT_PRIORITY:
4852         if bt.delimiter_count_with_priority(delimiter_priority) == 1:
4853             raise CannotSplit("Splitting a single attribute from its owner looks wrong")
4854
4855     current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
4856     lowest_depth = sys.maxsize
4857     trailing_comma_safe = True
4858
4859     def append_to_line(leaf: Leaf) -> Iterator[Line]:
4860         """Append `leaf` to current line or to new line if appending impossible."""
4861         nonlocal current_line
4862         try:
4863             current_line.append_safe(leaf, preformatted=True)
4864         except ValueError:
4865             yield current_line
4866
4867             current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
4868             current_line.append(leaf)
4869
4870     for leaf in line.leaves:
4871         yield from append_to_line(leaf)
4872
4873         for comment_after in line.comments_after(leaf):
4874             yield from append_to_line(comment_after)
4875
4876         lowest_depth = min(lowest_depth, leaf.bracket_depth)
4877         if leaf.bracket_depth == lowest_depth:
4878             if is_vararg(leaf, within={syms.typedargslist}):
4879                 trailing_comma_safe = (
4880                     trailing_comma_safe and Feature.TRAILING_COMMA_IN_DEF in features
4881                 )
4882             elif is_vararg(leaf, within={syms.arglist, syms.argument}):
4883                 trailing_comma_safe = (
4884                     trailing_comma_safe and Feature.TRAILING_COMMA_IN_CALL in features
4885                 )
4886
4887         leaf_priority = bt.delimiters.get(id(leaf))
4888         if leaf_priority == delimiter_priority:
4889             yield current_line
4890
4891             current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
4892     if current_line:
4893         if (
4894             trailing_comma_safe
4895             and delimiter_priority == COMMA_PRIORITY
4896             and current_line.leaves[-1].type != token.COMMA
4897             and current_line.leaves[-1].type != STANDALONE_COMMENT
4898         ):
4899             current_line.append(Leaf(token.COMMA, ","))
4900         yield current_line
4901
4902
4903 @dont_increase_indentation
4904 def standalone_comment_split(
4905     line: Line, features: Collection[Feature] = ()
4906 ) -> Iterator[Line]:
4907     """Split standalone comments from the rest of the line."""
4908     if not line.contains_standalone_comments(0):
4909         raise CannotSplit("Line does not have any standalone comments")
4910
4911     current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
4912
4913     def append_to_line(leaf: Leaf) -> Iterator[Line]:
4914         """Append `leaf` to current line or to new line if appending impossible."""
4915         nonlocal current_line
4916         try:
4917             current_line.append_safe(leaf, preformatted=True)
4918         except ValueError:
4919             yield current_line
4920
4921             current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
4922             current_line.append(leaf)
4923
4924     for leaf in line.leaves:
4925         yield from append_to_line(leaf)
4926
4927         for comment_after in line.comments_after(leaf):
4928             yield from append_to_line(comment_after)
4929
4930     if current_line:
4931         yield current_line
4932
4933
4934 def is_import(leaf: Leaf) -> bool:
4935     """Return True if the given leaf starts an import statement."""
4936     p = leaf.parent
4937     t = leaf.type
4938     v = leaf.value
4939     return bool(
4940         t == token.NAME
4941         and (
4942             (v == "import" and p and p.type == syms.import_name)
4943             or (v == "from" and p and p.type == syms.import_from)
4944         )
4945     )
4946
4947
4948 def is_type_comment(leaf: Leaf, suffix: str = "") -> bool:
4949     """Return True if the given leaf is a special comment.
4950     Only returns true for type comments for now."""
4951     t = leaf.type
4952     v = leaf.value
4953     return t in {token.COMMENT, STANDALONE_COMMENT} and v.startswith("# type:" + suffix)
4954
4955
4956 def normalize_prefix(leaf: Leaf, *, inside_brackets: bool) -> None:
4957     """Leave existing extra newlines if not `inside_brackets`. Remove everything
4958     else.
4959
4960     Note: don't use backslashes for formatting or you'll lose your voting rights.
4961     """
4962     if not inside_brackets:
4963         spl = leaf.prefix.split("#")
4964         if "\\" not in spl[0]:
4965             nl_count = spl[-1].count("\n")
4966             if len(spl) > 1:
4967                 nl_count -= 1
4968             leaf.prefix = "\n" * nl_count
4969             return
4970
4971     leaf.prefix = ""
4972
4973
4974 def normalize_string_prefix(leaf: Leaf, remove_u_prefix: bool = False) -> None:
4975     """Make all string prefixes lowercase.
4976
4977     If remove_u_prefix is given, also removes any u prefix from the string.
4978
4979     Note: Mutates its argument.
4980     """
4981     match = re.match(r"^([" + STRING_PREFIX_CHARS + r"]*)(.*)$", leaf.value, re.DOTALL)
4982     assert match is not None, f"failed to match string {leaf.value!r}"
4983     orig_prefix = match.group(1)
4984     new_prefix = orig_prefix.replace("F", "f").replace("B", "b").replace("U", "u")
4985     if remove_u_prefix:
4986         new_prefix = new_prefix.replace("u", "")
4987     leaf.value = f"{new_prefix}{match.group(2)}"
4988
4989
4990 def normalize_string_quotes(leaf: Leaf) -> None:
4991     """Prefer double quotes but only if it doesn't cause more escaping.
4992
4993     Adds or removes backslashes as appropriate. Doesn't parse and fix
4994     strings nested in f-strings (yet).
4995
4996     Note: Mutates its argument.
4997     """
4998     value = leaf.value.lstrip(STRING_PREFIX_CHARS)
4999     if value[:3] == '"""':
5000         return
5001
5002     elif value[:3] == "'''":
5003         orig_quote = "'''"
5004         new_quote = '"""'
5005     elif value[0] == '"':
5006         orig_quote = '"'
5007         new_quote = "'"
5008     else:
5009         orig_quote = "'"
5010         new_quote = '"'
5011     first_quote_pos = leaf.value.find(orig_quote)
5012     if first_quote_pos == -1:
5013         return  # There's an internal error
5014
5015     prefix = leaf.value[:first_quote_pos]
5016     unescaped_new_quote = re.compile(rf"(([^\\]|^)(\\\\)*){new_quote}")
5017     escaped_new_quote = re.compile(rf"([^\\]|^)\\((?:\\\\)*){new_quote}")
5018     escaped_orig_quote = re.compile(rf"([^\\]|^)\\((?:\\\\)*){orig_quote}")
5019     body = leaf.value[first_quote_pos + len(orig_quote) : -len(orig_quote)]
5020     if "r" in prefix.casefold():
5021         if unescaped_new_quote.search(body):
5022             # There's at least one unescaped new_quote in this raw string
5023             # so converting is impossible
5024             return
5025
5026         # Do not introduce or remove backslashes in raw strings
5027         new_body = body
5028     else:
5029         # remove unnecessary escapes
5030         new_body = sub_twice(escaped_new_quote, rf"\1\2{new_quote}", body)
5031         if body != new_body:
5032             # Consider the string without unnecessary escapes as the original
5033             body = new_body
5034             leaf.value = f"{prefix}{orig_quote}{body}{orig_quote}"
5035         new_body = sub_twice(escaped_orig_quote, rf"\1\2{orig_quote}", new_body)
5036         new_body = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_body)
5037     if "f" in prefix.casefold():
5038         matches = re.findall(
5039             r"""
5040             (?:[^{]|^)\{  # start of the string or a non-{ followed by a single {
5041                 ([^{].*?)  # contents of the brackets except if begins with {{
5042             \}(?:[^}]|$)  # A } followed by end of the string or a non-}
5043             """,
5044             new_body,
5045             re.VERBOSE,
5046         )
5047         for m in matches:
5048             if "\\" in str(m):
5049                 # Do not introduce backslashes in interpolated expressions
5050                 return
5051
5052     if new_quote == '"""' and new_body[-1:] == '"':
5053         # edge case:
5054         new_body = new_body[:-1] + '\\"'
5055     orig_escape_count = body.count("\\")
5056     new_escape_count = new_body.count("\\")
5057     if new_escape_count > orig_escape_count:
5058         return  # Do not introduce more escaping
5059
5060     if new_escape_count == orig_escape_count and orig_quote == '"':
5061         return  # Prefer double quotes
5062
5063     leaf.value = f"{prefix}{new_quote}{new_body}{new_quote}"
5064
5065
5066 def normalize_numeric_literal(leaf: Leaf) -> None:
5067     """Normalizes numeric (float, int, and complex) literals.
5068
5069     All letters used in the representation are normalized to lowercase (except
5070     in Python 2 long literals).
5071     """
5072     text = leaf.value.lower()
5073     if text.startswith(("0o", "0b")):
5074         # Leave octal and binary literals alone.
5075         pass
5076     elif text.startswith("0x"):
5077         # Change hex literals to upper case.
5078         before, after = text[:2], text[2:]
5079         text = f"{before}{after.upper()}"
5080     elif "e" in text:
5081         before, after = text.split("e")
5082         sign = ""
5083         if after.startswith("-"):
5084             after = after[1:]
5085             sign = "-"
5086         elif after.startswith("+"):
5087             after = after[1:]
5088         before = format_float_or_int_string(before)
5089         text = f"{before}e{sign}{after}"
5090     elif text.endswith(("j", "l")):
5091         number = text[:-1]
5092         suffix = text[-1]
5093         # Capitalize in "2L" because "l" looks too similar to "1".
5094         if suffix == "l":
5095             suffix = "L"
5096         text = f"{format_float_or_int_string(number)}{suffix}"
5097     else:
5098         text = format_float_or_int_string(text)
5099     leaf.value = text
5100
5101
5102 def format_float_or_int_string(text: str) -> str:
5103     """Formats a float string like "1.0"."""
5104     if "." not in text:
5105         return text
5106
5107     before, after = text.split(".")
5108     return f"{before or 0}.{after or 0}"
5109
5110
5111 def normalize_invisible_parens(node: Node, parens_after: Set[str]) -> None:
5112     """Make existing optional parentheses invisible or create new ones.
5113
5114     `parens_after` is a set of string leaf values immediately after which parens
5115     should be put.
5116
5117     Standardizes on visible parentheses for single-element tuples, and keeps
5118     existing visible parentheses for other tuples and generator expressions.
5119     """
5120     for pc in list_comments(node.prefix, is_endmarker=False):
5121         if pc.value in FMT_OFF:
5122             # This `node` has a prefix with `# fmt: off`, don't mess with parens.
5123             return
5124     check_lpar = False
5125     for index, child in enumerate(list(node.children)):
5126         # Fixes a bug where invisible parens are not properly stripped from
5127         # assignment statements that contain type annotations.
5128         if isinstance(child, Node) and child.type == syms.annassign:
5129             normalize_invisible_parens(child, parens_after=parens_after)
5130
5131         # Add parentheses around long tuple unpacking in assignments.
5132         if (
5133             index == 0
5134             and isinstance(child, Node)
5135             and child.type == syms.testlist_star_expr
5136         ):
5137             check_lpar = True
5138
5139         if check_lpar:
5140             if is_walrus_assignment(child):
5141                 continue
5142
5143             if child.type == syms.atom:
5144                 if maybe_make_parens_invisible_in_atom(child, parent=node):
5145                     wrap_in_parentheses(node, child, visible=False)
5146             elif is_one_tuple(child):
5147                 wrap_in_parentheses(node, child, visible=True)
5148             elif node.type == syms.import_from:
5149                 # "import from" nodes store parentheses directly as part of
5150                 # the statement
5151                 if child.type == token.LPAR:
5152                     # make parentheses invisible
5153                     child.value = ""  # type: ignore
5154                     node.children[-1].value = ""  # type: ignore
5155                 elif child.type != token.STAR:
5156                     # insert invisible parentheses
5157                     node.insert_child(index, Leaf(token.LPAR, ""))
5158                     node.append_child(Leaf(token.RPAR, ""))
5159                 break
5160
5161             elif not (isinstance(child, Leaf) and is_multiline_string(child)):
5162                 wrap_in_parentheses(node, child, visible=False)
5163
5164         check_lpar = isinstance(child, Leaf) and child.value in parens_after
5165
5166
5167 def normalize_fmt_off(node: Node) -> None:
5168     """Convert content between `# fmt: off`/`# fmt: on` into standalone comments."""
5169     try_again = True
5170     while try_again:
5171         try_again = convert_one_fmt_off_pair(node)
5172
5173
5174 def convert_one_fmt_off_pair(node: Node) -> bool:
5175     """Convert content of a single `# fmt: off`/`# fmt: on` into a standalone comment.
5176
5177     Returns True if a pair was converted.
5178     """
5179     for leaf in node.leaves():
5180         previous_consumed = 0
5181         for comment in list_comments(leaf.prefix, is_endmarker=False):
5182             if comment.value in FMT_OFF:
5183                 # We only want standalone comments. If there's no previous leaf or
5184                 # the previous leaf is indentation, it's a standalone comment in
5185                 # disguise.
5186                 if comment.type != STANDALONE_COMMENT:
5187                     prev = preceding_leaf(leaf)
5188                     if prev and prev.type not in WHITESPACE:
5189                         continue
5190
5191                 ignored_nodes = list(generate_ignored_nodes(leaf))
5192                 if not ignored_nodes:
5193                     continue
5194
5195                 first = ignored_nodes[0]  # Can be a container node with the `leaf`.
5196                 parent = first.parent
5197                 prefix = first.prefix
5198                 first.prefix = prefix[comment.consumed :]
5199                 hidden_value = (
5200                     comment.value + "\n" + "".join(str(n) for n in ignored_nodes)
5201                 )
5202                 if hidden_value.endswith("\n"):
5203                     # That happens when one of the `ignored_nodes` ended with a NEWLINE
5204                     # leaf (possibly followed by a DEDENT).
5205                     hidden_value = hidden_value[:-1]
5206                 first_idx: Optional[int] = None
5207                 for ignored in ignored_nodes:
5208                     index = ignored.remove()
5209                     if first_idx is None:
5210                         first_idx = index
5211                 assert parent is not None, "INTERNAL ERROR: fmt: on/off handling (1)"
5212                 assert first_idx is not None, "INTERNAL ERROR: fmt: on/off handling (2)"
5213                 parent.insert_child(
5214                     first_idx,
5215                     Leaf(
5216                         STANDALONE_COMMENT,
5217                         hidden_value,
5218                         prefix=prefix[:previous_consumed] + "\n" * comment.newlines,
5219                     ),
5220                 )
5221                 return True
5222
5223             previous_consumed = comment.consumed
5224
5225     return False
5226
5227
5228 def generate_ignored_nodes(leaf: Leaf) -> Iterator[LN]:
5229     """Starting from the container of `leaf`, generate all leaves until `# fmt: on`.
5230
5231     Stops at the end of the block.
5232     """
5233     container: Optional[LN] = container_of(leaf)
5234     while container is not None and container.type != token.ENDMARKER:
5235         if is_fmt_on(container):
5236             return
5237
5238         # fix for fmt: on in children
5239         if contains_fmt_on_at_column(container, leaf.column):
5240             for child in container.children:
5241                 if contains_fmt_on_at_column(child, leaf.column):
5242                     return
5243                 yield child
5244         else:
5245             yield container
5246             container = container.next_sibling
5247
5248
5249 def is_fmt_on(container: LN) -> bool:
5250     """Determine whether formatting is switched on within a container.
5251     Determined by whether the last `# fmt:` comment is `on` or `off`.
5252     """
5253     fmt_on = False
5254     for comment in list_comments(container.prefix, is_endmarker=False):
5255         if comment.value in FMT_ON:
5256             fmt_on = True
5257         elif comment.value in FMT_OFF:
5258             fmt_on = False
5259     return fmt_on
5260
5261
5262 def contains_fmt_on_at_column(container: LN, column: int) -> bool:
5263     """Determine if children at a given column have formatting switched on."""
5264     for child in container.children:
5265         if (
5266             isinstance(child, Node)
5267             and first_leaf_column(child) == column
5268             or isinstance(child, Leaf)
5269             and child.column == column
5270         ):
5271             if is_fmt_on(child):
5272                 return True
5273
5274     return False
5275
5276
5277 def first_leaf_column(node: Node) -> Optional[int]:
5278     """Returns the column of the first leaf child of a node."""
5279     for child in node.children:
5280         if isinstance(child, Leaf):
5281             return child.column
5282     return None
5283
5284
5285 def maybe_make_parens_invisible_in_atom(node: LN, parent: LN) -> bool:
5286     """If it's safe, make the parens in the atom `node` invisible, recursively.
5287     Additionally, remove repeated, adjacent invisible parens from the atom `node`
5288     as they are redundant.
5289
5290     Returns whether the node should itself be wrapped in invisible parentheses.
5291
5292     """
5293     if (
5294         node.type != syms.atom
5295         or is_empty_tuple(node)
5296         or is_one_tuple(node)
5297         or (is_yield(node) and parent.type != syms.expr_stmt)
5298         or max_delimiter_priority_in_atom(node) >= COMMA_PRIORITY
5299     ):
5300         return False
5301
5302     first = node.children[0]
5303     last = node.children[-1]
5304     if first.type == token.LPAR and last.type == token.RPAR:
5305         middle = node.children[1]
5306         # make parentheses invisible
5307         first.value = ""  # type: ignore
5308         last.value = ""  # type: ignore
5309         maybe_make_parens_invisible_in_atom(middle, parent=parent)
5310
5311         if is_atom_with_invisible_parens(middle):
5312             # Strip the invisible parens from `middle` by replacing
5313             # it with the child in-between the invisible parens
5314             middle.replace(middle.children[1])
5315
5316         return False
5317
5318     return True
5319
5320
5321 def is_atom_with_invisible_parens(node: LN) -> bool:
5322     """Given a `LN`, determines whether it's an atom `node` with invisible
5323     parens. Useful in dedupe-ing and normalizing parens.
5324     """
5325     if isinstance(node, Leaf) or node.type != syms.atom:
5326         return False
5327
5328     first, last = node.children[0], node.children[-1]
5329     return (
5330         isinstance(first, Leaf)
5331         and first.type == token.LPAR
5332         and first.value == ""
5333         and isinstance(last, Leaf)
5334         and last.type == token.RPAR
5335         and last.value == ""
5336     )
5337
5338
5339 def is_empty_tuple(node: LN) -> bool:
5340     """Return True if `node` holds an empty tuple."""
5341     return (
5342         node.type == syms.atom
5343         and len(node.children) == 2
5344         and node.children[0].type == token.LPAR
5345         and node.children[1].type == token.RPAR
5346     )
5347
5348
5349 def unwrap_singleton_parenthesis(node: LN) -> Optional[LN]:
5350     """Returns `wrapped` if `node` is of the shape ( wrapped ).
5351
5352     Parenthesis can be optional. Returns None otherwise"""
5353     if len(node.children) != 3:
5354         return None
5355
5356     lpar, wrapped, rpar = node.children
5357     if not (lpar.type == token.LPAR and rpar.type == token.RPAR):
5358         return None
5359
5360     return wrapped
5361
5362
5363 def wrap_in_parentheses(parent: Node, child: LN, *, visible: bool = True) -> None:
5364     """Wrap `child` in parentheses.
5365
5366     This replaces `child` with an atom holding the parentheses and the old
5367     child.  That requires moving the prefix.
5368
5369     If `visible` is False, the leaves will be valueless (and thus invisible).
5370     """
5371     lpar = Leaf(token.LPAR, "(" if visible else "")
5372     rpar = Leaf(token.RPAR, ")" if visible else "")
5373     prefix = child.prefix
5374     child.prefix = ""
5375     index = child.remove() or 0
5376     new_child = Node(syms.atom, [lpar, child, rpar])
5377     new_child.prefix = prefix
5378     parent.insert_child(index, new_child)
5379
5380
5381 def is_one_tuple(node: LN) -> bool:
5382     """Return True if `node` holds a tuple with one element, with or without parens."""
5383     if node.type == syms.atom:
5384         gexp = unwrap_singleton_parenthesis(node)
5385         if gexp is None or gexp.type != syms.testlist_gexp:
5386             return False
5387
5388         return len(gexp.children) == 2 and gexp.children[1].type == token.COMMA
5389
5390     return (
5391         node.type in IMPLICIT_TUPLE
5392         and len(node.children) == 2
5393         and node.children[1].type == token.COMMA
5394     )
5395
5396
5397 def is_walrus_assignment(node: LN) -> bool:
5398     """Return True iff `node` is of the shape ( test := test )"""
5399     inner = unwrap_singleton_parenthesis(node)
5400     return inner is not None and inner.type == syms.namedexpr_test
5401
5402
5403 def is_yield(node: LN) -> bool:
5404     """Return True if `node` holds a `yield` or `yield from` expression."""
5405     if node.type == syms.yield_expr:
5406         return True
5407
5408     if node.type == token.NAME and node.value == "yield":  # type: ignore
5409         return True
5410
5411     if node.type != syms.atom:
5412         return False
5413
5414     if len(node.children) != 3:
5415         return False
5416
5417     lpar, expr, rpar = node.children
5418     if lpar.type == token.LPAR and rpar.type == token.RPAR:
5419         return is_yield(expr)
5420
5421     return False
5422
5423
5424 def is_vararg(leaf: Leaf, within: Set[NodeType]) -> bool:
5425     """Return True if `leaf` is a star or double star in a vararg or kwarg.
5426
5427     If `within` includes VARARGS_PARENTS, this applies to function signatures.
5428     If `within` includes UNPACKING_PARENTS, it applies to right hand-side
5429     extended iterable unpacking (PEP 3132) and additional unpacking
5430     generalizations (PEP 448).
5431     """
5432     if leaf.type not in VARARGS_SPECIALS or not leaf.parent:
5433         return False
5434
5435     p = leaf.parent
5436     if p.type == syms.star_expr:
5437         # Star expressions are also used as assignment targets in extended
5438         # iterable unpacking (PEP 3132).  See what its parent is instead.
5439         if not p.parent:
5440             return False
5441
5442         p = p.parent
5443
5444     return p.type in within
5445
5446
5447 def is_multiline_string(leaf: Leaf) -> bool:
5448     """Return True if `leaf` is a multiline string that actually spans many lines."""
5449     return has_triple_quotes(leaf.value) and "\n" in leaf.value
5450
5451
5452 def is_stub_suite(node: Node) -> bool:
5453     """Return True if `node` is a suite with a stub body."""
5454     if (
5455         len(node.children) != 4
5456         or node.children[0].type != token.NEWLINE
5457         or node.children[1].type != token.INDENT
5458         or node.children[3].type != token.DEDENT
5459     ):
5460         return False
5461
5462     return is_stub_body(node.children[2])
5463
5464
5465 def is_stub_body(node: LN) -> bool:
5466     """Return True if `node` is a simple statement containing an ellipsis."""
5467     if not isinstance(node, Node) or node.type != syms.simple_stmt:
5468         return False
5469
5470     if len(node.children) != 2:
5471         return False
5472
5473     child = node.children[0]
5474     return (
5475         child.type == syms.atom
5476         and len(child.children) == 3
5477         and all(leaf == Leaf(token.DOT, ".") for leaf in child.children)
5478     )
5479
5480
5481 def max_delimiter_priority_in_atom(node: LN) -> Priority:
5482     """Return maximum delimiter priority inside `node`.
5483
5484     This is specific to atoms with contents contained in a pair of parentheses.
5485     If `node` isn't an atom or there are no enclosing parentheses, returns 0.
5486     """
5487     if node.type != syms.atom:
5488         return 0
5489
5490     first = node.children[0]
5491     last = node.children[-1]
5492     if not (first.type == token.LPAR and last.type == token.RPAR):
5493         return 0
5494
5495     bt = BracketTracker()
5496     for c in node.children[1:-1]:
5497         if isinstance(c, Leaf):
5498             bt.mark(c)
5499         else:
5500             for leaf in c.leaves():
5501                 bt.mark(leaf)
5502     try:
5503         return bt.max_delimiter_priority()
5504
5505     except ValueError:
5506         return 0
5507
5508
5509 def ensure_visible(leaf: Leaf) -> None:
5510     """Make sure parentheses are visible.
5511
5512     They could be invisible as part of some statements (see
5513     :func:`normalize_invisible_parens` and :func:`visit_import_from`).
5514     """
5515     if leaf.type == token.LPAR:
5516         leaf.value = "("
5517     elif leaf.type == token.RPAR:
5518         leaf.value = ")"
5519
5520
5521 def should_explode(line: Line, opening_bracket: Leaf) -> bool:
5522     """Should `line` immediately be split with `delimiter_split()` after RHS?"""
5523
5524     if not (
5525         opening_bracket.parent
5526         and opening_bracket.parent.type in {syms.atom, syms.import_from}
5527         and opening_bracket.value in "[{("
5528     ):
5529         return False
5530
5531     try:
5532         last_leaf = line.leaves[-1]
5533         exclude = {id(last_leaf)} if last_leaf.type == token.COMMA else set()
5534         max_priority = line.bracket_tracker.max_delimiter_priority(exclude=exclude)
5535     except (IndexError, ValueError):
5536         return False
5537
5538     return max_priority == COMMA_PRIORITY
5539
5540
5541 def get_features_used(node: Node) -> Set[Feature]:
5542     """Return a set of (relatively) new Python features used in this file.
5543
5544     Currently looking for:
5545     - f-strings;
5546     - underscores in numeric literals;
5547     - trailing commas after * or ** in function signatures and calls;
5548     - positional only arguments in function signatures and lambdas;
5549     """
5550     features: Set[Feature] = set()
5551     for n in node.pre_order():
5552         if n.type == token.STRING:
5553             value_head = n.value[:2]  # type: ignore
5554             if value_head in {'f"', 'F"', "f'", "F'", "rf", "fr", "RF", "FR"}:
5555                 features.add(Feature.F_STRINGS)
5556
5557         elif n.type == token.NUMBER:
5558             if "_" in n.value:  # type: ignore
5559                 features.add(Feature.NUMERIC_UNDERSCORES)
5560
5561         elif n.type == token.SLASH:
5562             if n.parent and n.parent.type in {syms.typedargslist, syms.arglist}:
5563                 features.add(Feature.POS_ONLY_ARGUMENTS)
5564
5565         elif n.type == token.COLONEQUAL:
5566             features.add(Feature.ASSIGNMENT_EXPRESSIONS)
5567
5568         elif (
5569             n.type in {syms.typedargslist, syms.arglist}
5570             and n.children
5571             and n.children[-1].type == token.COMMA
5572         ):
5573             if n.type == syms.typedargslist:
5574                 feature = Feature.TRAILING_COMMA_IN_DEF
5575             else:
5576                 feature = Feature.TRAILING_COMMA_IN_CALL
5577
5578             for ch in n.children:
5579                 if ch.type in STARS:
5580                     features.add(feature)
5581
5582                 if ch.type == syms.argument:
5583                     for argch in ch.children:
5584                         if argch.type in STARS:
5585                             features.add(feature)
5586
5587     return features
5588
5589
5590 def detect_target_versions(node: Node) -> Set[TargetVersion]:
5591     """Detect the version to target based on the nodes used."""
5592     features = get_features_used(node)
5593     return {
5594         version for version in TargetVersion if features <= VERSION_TO_FEATURES[version]
5595     }
5596
5597
5598 def generate_trailers_to_omit(line: Line, line_length: int) -> Iterator[Set[LeafID]]:
5599     """Generate sets of closing bracket IDs that should be omitted in a RHS.
5600
5601     Brackets can be omitted if the entire trailer up to and including
5602     a preceding closing bracket fits in one line.
5603
5604     Yielded sets are cumulative (contain results of previous yields, too).  First
5605     set is empty.
5606     """
5607
5608     omit: Set[LeafID] = set()
5609     yield omit
5610
5611     length = 4 * line.depth
5612     opening_bracket: Optional[Leaf] = None
5613     closing_bracket: Optional[Leaf] = None
5614     inner_brackets: Set[LeafID] = set()
5615     for index, leaf, leaf_length in enumerate_with_length(line, reversed=True):
5616         length += leaf_length
5617         if length > line_length:
5618             break
5619
5620         has_inline_comment = leaf_length > len(leaf.value) + len(leaf.prefix)
5621         if leaf.type == STANDALONE_COMMENT or has_inline_comment:
5622             break
5623
5624         if opening_bracket:
5625             if leaf is opening_bracket:
5626                 opening_bracket = None
5627             elif leaf.type in CLOSING_BRACKETS:
5628                 inner_brackets.add(id(leaf))
5629         elif leaf.type in CLOSING_BRACKETS:
5630             if index > 0 and line.leaves[index - 1].type in OPENING_BRACKETS:
5631                 # Empty brackets would fail a split so treat them as "inner"
5632                 # brackets (e.g. only add them to the `omit` set if another
5633                 # pair of brackets was good enough.
5634                 inner_brackets.add(id(leaf))
5635                 continue
5636
5637             if closing_bracket:
5638                 omit.add(id(closing_bracket))
5639                 omit.update(inner_brackets)
5640                 inner_brackets.clear()
5641                 yield omit
5642
5643             if leaf.value:
5644                 opening_bracket = leaf.opening_bracket
5645                 closing_bracket = leaf
5646
5647
5648 def get_future_imports(node: Node) -> Set[str]:
5649     """Return a set of __future__ imports in the file."""
5650     imports: Set[str] = set()
5651
5652     def get_imports_from_children(children: List[LN]) -> Generator[str, None, None]:
5653         for child in children:
5654             if isinstance(child, Leaf):
5655                 if child.type == token.NAME:
5656                     yield child.value
5657
5658             elif child.type == syms.import_as_name:
5659                 orig_name = child.children[0]
5660                 assert isinstance(orig_name, Leaf), "Invalid syntax parsing imports"
5661                 assert orig_name.type == token.NAME, "Invalid syntax parsing imports"
5662                 yield orig_name.value
5663
5664             elif child.type == syms.import_as_names:
5665                 yield from get_imports_from_children(child.children)
5666
5667             else:
5668                 raise AssertionError("Invalid syntax parsing imports")
5669
5670     for child in node.children:
5671         if child.type != syms.simple_stmt:
5672             break
5673
5674         first_child = child.children[0]
5675         if isinstance(first_child, Leaf):
5676             # Continue looking if we see a docstring; otherwise stop.
5677             if (
5678                 len(child.children) == 2
5679                 and first_child.type == token.STRING
5680                 and child.children[1].type == token.NEWLINE
5681             ):
5682                 continue
5683
5684             break
5685
5686         elif first_child.type == syms.import_from:
5687             module_name = first_child.children[1]
5688             if not isinstance(module_name, Leaf) or module_name.value != "__future__":
5689                 break
5690
5691             imports |= set(get_imports_from_children(first_child.children[3:]))
5692         else:
5693             break
5694
5695     return imports
5696
5697
5698 @lru_cache()
5699 def get_gitignore(root: Path) -> PathSpec:
5700     """ Return a PathSpec matching gitignore content if present."""
5701     gitignore = root / ".gitignore"
5702     lines: List[str] = []
5703     if gitignore.is_file():
5704         with gitignore.open() as gf:
5705             lines = gf.readlines()
5706     return PathSpec.from_lines("gitwildmatch", lines)
5707
5708
5709 def gen_python_files_in_dir(
5710     path: Path,
5711     root: Path,
5712     include: Pattern[str],
5713     exclude: Pattern[str],
5714     report: "Report",
5715     gitignore: PathSpec,
5716 ) -> Iterator[Path]:
5717     """Generate all files under `path` whose paths are not excluded by the
5718     `exclude` regex, but are included by the `include` regex.
5719
5720     Symbolic links pointing outside of the `root` directory are ignored.
5721
5722     `report` is where output about exclusions goes.
5723     """
5724     assert root.is_absolute(), f"INTERNAL ERROR: `root` must be absolute but is {root}"
5725     for child in path.iterdir():
5726         # First ignore files matching .gitignore
5727         if gitignore.match_file(child.as_posix()):
5728             report.path_ignored(child, "matches the .gitignore file content")
5729             continue
5730
5731         # Then ignore with `exclude` option.
5732         try:
5733             normalized_path = "/" + child.resolve().relative_to(root).as_posix()
5734         except OSError as e:
5735             report.path_ignored(child, f"cannot be read because {e}")
5736             continue
5737
5738         except ValueError:
5739             if child.is_symlink():
5740                 report.path_ignored(
5741                     child, f"is a symbolic link that points outside {root}"
5742                 )
5743                 continue
5744
5745             raise
5746
5747         if child.is_dir():
5748             normalized_path += "/"
5749
5750         exclude_match = exclude.search(normalized_path)
5751         if exclude_match and exclude_match.group(0):
5752             report.path_ignored(child, "matches the --exclude regular expression")
5753             continue
5754
5755         if child.is_dir():
5756             yield from gen_python_files_in_dir(
5757                 child, root, include, exclude, report, gitignore
5758             )
5759
5760         elif child.is_file():
5761             include_match = include.search(normalized_path)
5762             if include_match:
5763                 yield child
5764
5765
5766 @lru_cache()
5767 def find_project_root(srcs: Iterable[str]) -> Path:
5768     """Return a directory containing .git, .hg, or pyproject.toml.
5769
5770     That directory can be one of the directories passed in `srcs` or their
5771     common parent.
5772
5773     If no directory in the tree contains a marker that would specify it's the
5774     project root, the root of the file system is returned.
5775     """
5776     if not srcs:
5777         return Path("/").resolve()
5778
5779     common_base = min(Path(src).resolve() for src in srcs)
5780     if common_base.is_dir():
5781         # Append a fake file so `parents` below returns `common_base_dir`, too.
5782         common_base /= "fake-file"
5783     for directory in common_base.parents:
5784         if (directory / ".git").exists():
5785             return directory
5786
5787         if (directory / ".hg").is_dir():
5788             return directory
5789
5790         if (directory / "pyproject.toml").is_file():
5791             return directory
5792
5793     return directory
5794
5795
5796 @dataclass
5797 class Report:
5798     """Provides a reformatting counter. Can be rendered with `str(report)`."""
5799
5800     check: bool = False
5801     diff: bool = False
5802     quiet: bool = False
5803     verbose: bool = False
5804     change_count: int = 0
5805     same_count: int = 0
5806     failure_count: int = 0
5807
5808     def done(self, src: Path, changed: Changed) -> None:
5809         """Increment the counter for successful reformatting. Write out a message."""
5810         if changed is Changed.YES:
5811             reformatted = "would reformat" if self.check or self.diff else "reformatted"
5812             if self.verbose or not self.quiet:
5813                 out(f"{reformatted} {src}")
5814             self.change_count += 1
5815         else:
5816             if self.verbose:
5817                 if changed is Changed.NO:
5818                     msg = f"{src} already well formatted, good job."
5819                 else:
5820                     msg = f"{src} wasn't modified on disk since last run."
5821                 out(msg, bold=False)
5822             self.same_count += 1
5823
5824     def failed(self, src: Path, message: str) -> None:
5825         """Increment the counter for failed reformatting. Write out a message."""
5826         err(f"error: cannot format {src}: {message}")
5827         self.failure_count += 1
5828
5829     def path_ignored(self, path: Path, message: str) -> None:
5830         if self.verbose:
5831             out(f"{path} ignored: {message}", bold=False)
5832
5833     @property
5834     def return_code(self) -> int:
5835         """Return the exit code that the app should use.
5836
5837         This considers the current state of changed files and failures:
5838         - if there were any failures, return 123;
5839         - if any files were changed and --check is being used, return 1;
5840         - otherwise return 0.
5841         """
5842         # According to http://tldp.org/LDP/abs/html/exitcodes.html starting with
5843         # 126 we have special return codes reserved by the shell.
5844         if self.failure_count:
5845             return 123
5846
5847         elif self.change_count and self.check:
5848             return 1
5849
5850         return 0
5851
5852     def __str__(self) -> str:
5853         """Render a color report of the current state.
5854
5855         Use `click.unstyle` to remove colors.
5856         """
5857         if self.check or self.diff:
5858             reformatted = "would be reformatted"
5859             unchanged = "would be left unchanged"
5860             failed = "would fail to reformat"
5861         else:
5862             reformatted = "reformatted"
5863             unchanged = "left unchanged"
5864             failed = "failed to reformat"
5865         report = []
5866         if self.change_count:
5867             s = "s" if self.change_count > 1 else ""
5868             report.append(
5869                 click.style(f"{self.change_count} file{s} {reformatted}", bold=True)
5870             )
5871         if self.same_count:
5872             s = "s" if self.same_count > 1 else ""
5873             report.append(f"{self.same_count} file{s} {unchanged}")
5874         if self.failure_count:
5875             s = "s" if self.failure_count > 1 else ""
5876             report.append(
5877                 click.style(f"{self.failure_count} file{s} {failed}", fg="red")
5878             )
5879         return ", ".join(report) + "."
5880
5881
5882 def parse_ast(src: str) -> Union[ast.AST, ast3.AST, ast27.AST]:
5883     filename = "<unknown>"
5884     if sys.version_info >= (3, 8):
5885         # TODO: support Python 4+ ;)
5886         for minor_version in range(sys.version_info[1], 4, -1):
5887             try:
5888                 return ast.parse(src, filename, feature_version=(3, minor_version))
5889             except SyntaxError:
5890                 continue
5891     else:
5892         for feature_version in (7, 6):
5893             try:
5894                 return ast3.parse(src, filename, feature_version=feature_version)
5895             except SyntaxError:
5896                 continue
5897
5898     return ast27.parse(src)
5899
5900
5901 def _fixup_ast_constants(
5902     node: Union[ast.AST, ast3.AST, ast27.AST]
5903 ) -> Union[ast.AST, ast3.AST, ast27.AST]:
5904     """Map ast nodes deprecated in 3.8 to Constant."""
5905     if isinstance(node, (ast.Str, ast3.Str, ast27.Str, ast.Bytes, ast3.Bytes)):
5906         return ast.Constant(value=node.s)
5907
5908     if isinstance(node, (ast.Num, ast3.Num, ast27.Num)):
5909         return ast.Constant(value=node.n)
5910
5911     if isinstance(node, (ast.NameConstant, ast3.NameConstant)):
5912         return ast.Constant(value=node.value)
5913
5914     return node
5915
5916
5917 def _stringify_ast(
5918     node: Union[ast.AST, ast3.AST, ast27.AST], depth: int = 0
5919 ) -> Iterator[str]:
5920     """Simple visitor generating strings to compare ASTs by content."""
5921
5922     node = _fixup_ast_constants(node)
5923
5924     yield f"{'  ' * depth}{node.__class__.__name__}("
5925
5926     for field in sorted(node._fields):  # noqa: F402
5927         # TypeIgnore has only one field 'lineno' which breaks this comparison
5928         type_ignore_classes = (ast3.TypeIgnore, ast27.TypeIgnore)
5929         if sys.version_info >= (3, 8):
5930             type_ignore_classes += (ast.TypeIgnore,)
5931         if isinstance(node, type_ignore_classes):
5932             break
5933
5934         try:
5935             value = getattr(node, field)
5936         except AttributeError:
5937             continue
5938
5939         yield f"{'  ' * (depth+1)}{field}="
5940
5941         if isinstance(value, list):
5942             for item in value:
5943                 # Ignore nested tuples within del statements, because we may insert
5944                 # parentheses and they change the AST.
5945                 if (
5946                     field == "targets"
5947                     and isinstance(node, (ast.Delete, ast3.Delete, ast27.Delete))
5948                     and isinstance(item, (ast.Tuple, ast3.Tuple, ast27.Tuple))
5949                 ):
5950                     for item in item.elts:
5951                         yield from _stringify_ast(item, depth + 2)
5952
5953                 elif isinstance(item, (ast.AST, ast3.AST, ast27.AST)):
5954                     yield from _stringify_ast(item, depth + 2)
5955
5956         elif isinstance(value, (ast.AST, ast3.AST, ast27.AST)):
5957             yield from _stringify_ast(value, depth + 2)
5958
5959         else:
5960             # Constant strings may be indented across newlines, if they are
5961             # docstrings; fold spaces after newlines when comparing
5962             if (
5963                 isinstance(node, ast.Constant)
5964                 and field == "value"
5965                 and isinstance(value, str)
5966             ):
5967                 normalized = re.sub(r"\n[ \t]+", "\n ", value)
5968             else:
5969                 normalized = value
5970             yield f"{'  ' * (depth+2)}{normalized!r},  # {value.__class__.__name__}"
5971
5972     yield f"{'  ' * depth})  # /{node.__class__.__name__}"
5973
5974
5975 def assert_equivalent(src: str, dst: str) -> None:
5976     """Raise AssertionError if `src` and `dst` aren't equivalent."""
5977     try:
5978         src_ast = parse_ast(src)
5979     except Exception as exc:
5980         raise AssertionError(
5981             "cannot use --safe with this file; failed to parse source file.  AST"
5982             f" error message: {exc}"
5983         )
5984
5985     try:
5986         dst_ast = parse_ast(dst)
5987     except Exception as exc:
5988         log = dump_to_file("".join(traceback.format_tb(exc.__traceback__)), dst)
5989         raise AssertionError(
5990             f"INTERNAL ERROR: Black produced invalid code: {exc}. Please report a bug"
5991             " on https://github.com/psf/black/issues.  This invalid output might be"
5992             f" helpful: {log}"
5993         ) from None
5994
5995     src_ast_str = "\n".join(_stringify_ast(src_ast))
5996     dst_ast_str = "\n".join(_stringify_ast(dst_ast))
5997     if src_ast_str != dst_ast_str:
5998         log = dump_to_file(diff(src_ast_str, dst_ast_str, "src", "dst"))
5999         raise AssertionError(
6000             "INTERNAL ERROR: Black produced code that is not equivalent to the"
6001             " source.  Please report a bug on https://github.com/psf/black/issues. "
6002             f" This diff might be helpful: {log}"
6003         ) from None
6004
6005
6006 def assert_stable(src: str, dst: str, mode: Mode) -> None:
6007     """Raise AssertionError if `dst` reformats differently the second time."""
6008     newdst = format_str(dst, mode=mode)
6009     if dst != newdst:
6010         log = dump_to_file(
6011             diff(src, dst, "source", "first pass"),
6012             diff(dst, newdst, "first pass", "second pass"),
6013         )
6014         raise AssertionError(
6015             "INTERNAL ERROR: Black produced different code on the second pass of the"
6016             " formatter.  Please report a bug on https://github.com/psf/black/issues."
6017             f"  This diff might be helpful: {log}"
6018         ) from None
6019
6020
6021 @mypyc_attr(patchable=True)
6022 def dump_to_file(*output: str) -> str:
6023     """Dump `output` to a temporary file. Return path to the file."""
6024     with tempfile.NamedTemporaryFile(
6025         mode="w", prefix="blk_", suffix=".log", delete=False, encoding="utf8"
6026     ) as f:
6027         for lines in output:
6028             f.write(lines)
6029             if lines and lines[-1] != "\n":
6030                 f.write("\n")
6031     return f.name
6032
6033
6034 @contextmanager
6035 def nullcontext() -> Iterator[None]:
6036     """Return an empty context manager.
6037
6038     To be used like `nullcontext` in Python 3.7.
6039     """
6040     yield
6041
6042
6043 def diff(a: str, b: str, a_name: str, b_name: str) -> str:
6044     """Return a unified diff string between strings `a` and `b`."""
6045     import difflib
6046
6047     a_lines = [line + "\n" for line in a.splitlines()]
6048     b_lines = [line + "\n" for line in b.splitlines()]
6049     return "".join(
6050         difflib.unified_diff(a_lines, b_lines, fromfile=a_name, tofile=b_name, n=5)
6051     )
6052
6053
6054 def cancel(tasks: Iterable["asyncio.Task[Any]"]) -> None:
6055     """asyncio signal handler that cancels all `tasks` and reports to stderr."""
6056     err("Aborted!")
6057     for task in tasks:
6058         task.cancel()
6059
6060
6061 def shutdown(loop: asyncio.AbstractEventLoop) -> None:
6062     """Cancel all pending tasks on `loop`, wait for them, and close the loop."""
6063     try:
6064         if sys.version_info[:2] >= (3, 7):
6065             all_tasks = asyncio.all_tasks
6066         else:
6067             all_tasks = asyncio.Task.all_tasks
6068         # This part is borrowed from asyncio/runners.py in Python 3.7b2.
6069         to_cancel = [task for task in all_tasks(loop) if not task.done()]
6070         if not to_cancel:
6071             return
6072
6073         for task in to_cancel:
6074             task.cancel()
6075         loop.run_until_complete(
6076             asyncio.gather(*to_cancel, loop=loop, return_exceptions=True)
6077         )
6078     finally:
6079         # `concurrent.futures.Future` objects cannot be cancelled once they
6080         # are already running. There might be some when the `shutdown()` happened.
6081         # Silence their logger's spew about the event loop being closed.
6082         cf_logger = logging.getLogger("concurrent.futures")
6083         cf_logger.setLevel(logging.CRITICAL)
6084         loop.close()
6085
6086
6087 def sub_twice(regex: Pattern[str], replacement: str, original: str) -> str:
6088     """Replace `regex` with `replacement` twice on `original`.
6089
6090     This is used by string normalization to perform replaces on
6091     overlapping matches.
6092     """
6093     return regex.sub(replacement, regex.sub(replacement, original))
6094
6095
6096 def re_compile_maybe_verbose(regex: str) -> Pattern[str]:
6097     """Compile a regular expression string in `regex`.
6098
6099     If it contains newlines, use verbose mode.
6100     """
6101     if "\n" in regex:
6102         regex = "(?x)" + regex
6103     compiled: Pattern[str] = re.compile(regex)
6104     return compiled
6105
6106
6107 def enumerate_reversed(sequence: Sequence[T]) -> Iterator[Tuple[Index, T]]:
6108     """Like `reversed(enumerate(sequence))` if that were possible."""
6109     index = len(sequence) - 1
6110     for element in reversed(sequence):
6111         yield (index, element)
6112         index -= 1
6113
6114
6115 def enumerate_with_length(
6116     line: Line, reversed: bool = False
6117 ) -> Iterator[Tuple[Index, Leaf, int]]:
6118     """Return an enumeration of leaves with their length.
6119
6120     Stops prematurely on multiline strings and standalone comments.
6121     """
6122     op = cast(
6123         Callable[[Sequence[Leaf]], Iterator[Tuple[Index, Leaf]]],
6124         enumerate_reversed if reversed else enumerate,
6125     )
6126     for index, leaf in op(line.leaves):
6127         length = len(leaf.prefix) + len(leaf.value)
6128         if "\n" in leaf.value:
6129             return  # Multiline strings, we can't continue.
6130
6131         for comment in line.comments_after(leaf):
6132             length += len(comment.value)
6133
6134         yield index, leaf, length
6135
6136
6137 def is_line_short_enough(line: Line, *, line_length: int, line_str: str = "") -> bool:
6138     """Return True if `line` is no longer than `line_length`.
6139
6140     Uses the provided `line_str` rendering, if any, otherwise computes a new one.
6141     """
6142     if not line_str:
6143         line_str = line_to_string(line)
6144     return (
6145         len(line_str) <= line_length
6146         and "\n" not in line_str  # multiline strings
6147         and not line.contains_standalone_comments()
6148     )
6149
6150
6151 def can_be_split(line: Line) -> bool:
6152     """Return False if the line cannot be split *for sure*.
6153
6154     This is not an exhaustive search but a cheap heuristic that we can use to
6155     avoid some unfortunate formattings (mostly around wrapping unsplittable code
6156     in unnecessary parentheses).
6157     """
6158     leaves = line.leaves
6159     if len(leaves) < 2:
6160         return False
6161
6162     if leaves[0].type == token.STRING and leaves[1].type == token.DOT:
6163         call_count = 0
6164         dot_count = 0
6165         next = leaves[-1]
6166         for leaf in leaves[-2::-1]:
6167             if leaf.type in OPENING_BRACKETS:
6168                 if next.type not in CLOSING_BRACKETS:
6169                     return False
6170
6171                 call_count += 1
6172             elif leaf.type == token.DOT:
6173                 dot_count += 1
6174             elif leaf.type == token.NAME:
6175                 if not (next.type == token.DOT or next.type in OPENING_BRACKETS):
6176                     return False
6177
6178             elif leaf.type not in CLOSING_BRACKETS:
6179                 return False
6180
6181             if dot_count > 1 and call_count > 1:
6182                 return False
6183
6184     return True
6185
6186
6187 def can_omit_invisible_parens(line: Line, line_length: int) -> bool:
6188     """Does `line` have a shape safe to reformat without optional parens around it?
6189
6190     Returns True for only a subset of potentially nice looking formattings but
6191     the point is to not return false positives that end up producing lines that
6192     are too long.
6193     """
6194     bt = line.bracket_tracker
6195     if not bt.delimiters:
6196         # Without delimiters the optional parentheses are useless.
6197         return True
6198
6199     max_priority = bt.max_delimiter_priority()
6200     if bt.delimiter_count_with_priority(max_priority) > 1:
6201         # With more than one delimiter of a kind the optional parentheses read better.
6202         return False
6203
6204     if max_priority == DOT_PRIORITY:
6205         # A single stranded method call doesn't require optional parentheses.
6206         return True
6207
6208     assert len(line.leaves) >= 2, "Stranded delimiter"
6209
6210     first = line.leaves[0]
6211     second = line.leaves[1]
6212     penultimate = line.leaves[-2]
6213     last = line.leaves[-1]
6214
6215     # With a single delimiter, omit if the expression starts or ends with
6216     # a bracket.
6217     if first.type in OPENING_BRACKETS and second.type not in CLOSING_BRACKETS:
6218         remainder = False
6219         length = 4 * line.depth
6220         for _index, leaf, leaf_length in enumerate_with_length(line):
6221             if leaf.type in CLOSING_BRACKETS and leaf.opening_bracket is first:
6222                 remainder = True
6223             if remainder:
6224                 length += leaf_length
6225                 if length > line_length:
6226                     break
6227
6228                 if leaf.type in OPENING_BRACKETS:
6229                     # There are brackets we can further split on.
6230                     remainder = False
6231
6232         else:
6233             # checked the entire string and line length wasn't exceeded
6234             if len(line.leaves) == _index + 1:
6235                 return True
6236
6237         # Note: we are not returning False here because a line might have *both*
6238         # a leading opening bracket and a trailing closing bracket.  If the
6239         # opening bracket doesn't match our rule, maybe the closing will.
6240
6241     if (
6242         last.type == token.RPAR
6243         or last.type == token.RBRACE
6244         or (
6245             # don't use indexing for omitting optional parentheses;
6246             # it looks weird
6247             last.type == token.RSQB
6248             and last.parent
6249             and last.parent.type != syms.trailer
6250         )
6251     ):
6252         if penultimate.type in OPENING_BRACKETS:
6253             # Empty brackets don't help.
6254             return False
6255
6256         if is_multiline_string(first):
6257             # Additional wrapping of a multiline string in this situation is
6258             # unnecessary.
6259             return True
6260
6261         length = 4 * line.depth
6262         seen_other_brackets = False
6263         for _index, leaf, leaf_length in enumerate_with_length(line):
6264             length += leaf_length
6265             if leaf is last.opening_bracket:
6266                 if seen_other_brackets or length <= line_length:
6267                     return True
6268
6269             elif leaf.type in OPENING_BRACKETS:
6270                 # There are brackets we can further split on.
6271                 seen_other_brackets = True
6272
6273     return False
6274
6275
6276 def get_cache_file(mode: Mode) -> Path:
6277     return CACHE_DIR / f"cache.{mode.get_cache_key()}.pickle"
6278
6279
6280 def read_cache(mode: Mode) -> Cache:
6281     """Read the cache if it exists and is well formed.
6282
6283     If it is not well formed, the call to write_cache later should resolve the issue.
6284     """
6285     cache_file = get_cache_file(mode)
6286     if not cache_file.exists():
6287         return {}
6288
6289     with cache_file.open("rb") as fobj:
6290         try:
6291             cache: Cache = pickle.load(fobj)
6292         except (pickle.UnpicklingError, ValueError):
6293             return {}
6294
6295     return cache
6296
6297
6298 def get_cache_info(path: Path) -> CacheInfo:
6299     """Return the information used to check if a file is already formatted or not."""
6300     stat = path.stat()
6301     return stat.st_mtime, stat.st_size
6302
6303
6304 def filter_cached(cache: Cache, sources: Iterable[Path]) -> Tuple[Set[Path], Set[Path]]:
6305     """Split an iterable of paths in `sources` into two sets.
6306
6307     The first contains paths of files that modified on disk or are not in the
6308     cache. The other contains paths to non-modified files.
6309     """
6310     todo, done = set(), set()
6311     for src in sources:
6312         src = src.resolve()
6313         if cache.get(src) != get_cache_info(src):
6314             todo.add(src)
6315         else:
6316             done.add(src)
6317     return todo, done
6318
6319
6320 def write_cache(cache: Cache, sources: Iterable[Path], mode: Mode) -> None:
6321     """Update the cache file."""
6322     cache_file = get_cache_file(mode)
6323     try:
6324         CACHE_DIR.mkdir(parents=True, exist_ok=True)
6325         new_cache = {**cache, **{src.resolve(): get_cache_info(src) for src in sources}}
6326         with tempfile.NamedTemporaryFile(dir=str(cache_file.parent), delete=False) as f:
6327             pickle.dump(new_cache, f, protocol=4)
6328         os.replace(f.name, cache_file)
6329     except OSError:
6330         pass
6331
6332
6333 def patch_click() -> None:
6334     """Make Click not crash.
6335
6336     On certain misconfigured environments, Python 3 selects the ASCII encoding as the
6337     default which restricts paths that it can access during the lifetime of the
6338     application.  Click refuses to work in this scenario by raising a RuntimeError.
6339
6340     In case of Black the likelihood that non-ASCII characters are going to be used in
6341     file paths is minimal since it's Python source code.  Moreover, this crash was
6342     spurious on Python 3.7 thanks to PEP 538 and PEP 540.
6343     """
6344     try:
6345         from click import core
6346         from click import _unicodefun  # type: ignore
6347     except ModuleNotFoundError:
6348         return
6349
6350     for module in (core, _unicodefun):
6351         if hasattr(module, "_verify_python3_env"):
6352             module._verify_python3_env = lambda: None
6353
6354
6355 def patched_main() -> None:
6356     freeze_support()
6357     patch_click()
6358     main()
6359
6360
6361 def fix_docstring(docstring: str, prefix: str) -> str:
6362     # https://www.python.org/dev/peps/pep-0257/#handling-docstring-indentation
6363     if not docstring:
6364         return ""
6365     # Convert tabs to spaces (following the normal Python rules)
6366     # and split into a list of lines:
6367     lines = docstring.expandtabs().splitlines()
6368     # Determine minimum indentation (first line doesn't count):
6369     indent = sys.maxsize
6370     for line in lines[1:]:
6371         stripped = line.lstrip()
6372         if stripped:
6373             indent = min(indent, len(line) - len(stripped))
6374     # Remove indentation (first line is special):
6375     trimmed = [lines[0].strip()]
6376     if indent < sys.maxsize:
6377         last_line_idx = len(lines) - 2
6378         for i, line in enumerate(lines[1:]):
6379             stripped_line = line[indent:].rstrip()
6380             if stripped_line or i == last_line_idx:
6381                 trimmed.append(prefix + stripped_line)
6382             else:
6383                 trimmed.append("")
6384     # Return a single string:
6385     return "\n".join(trimmed)
6386
6387
6388 if __name__ == "__main__":
6389     patched_main()