black.py

   1 import ast
   2 import asyncio
   3 from abc import ABC, abstractmethod
   4 from collections import defaultdict
   5 from concurrent.futures import Executor, ThreadPoolExecutor, ProcessPoolExecutor
   6 from contextlib import contextmanager
   7 from datetime import datetime
   8 from enum import Enum
   9 from functools import lru_cache, partial, wraps
  10 import io
  11 import itertools
  12 import logging
  13 from multiprocessing import Manager, freeze_support
  14 import os
  15 from pathlib import Path
  16 import pickle
  17 import regex as re
  18 import signal
  19 import sys
  20 import tempfile
  21 import tokenize
  22 import traceback
  23 from typing import (
  24     Any,
  25     Callable,
  26     Collection,
  27     Dict,
  28     Generator,
  29     Generic,
  30     Iterable,
  31     Iterator,
  32     List,
  33     Optional,
  34     Pattern,
  35     Sequence,
  36     Set,
  37     Tuple,
  38     Type,
  39     TypeVar,
  40     Union,
  41     cast,
  42     TYPE_CHECKING,
  43 )
  44 from typing_extensions import Final
  45 from mypy_extensions import mypyc_attr
  46
  47 from appdirs import user_cache_dir
  48 from dataclasses import dataclass, field, replace
  49 import click
  50 import toml
  51 from typed_ast import ast3, ast27
  52 from pathspec import PathSpec
  53
  54 # lib2to3 fork
  55 from blib2to3.pytree import Node, Leaf, type_repr
  56 from blib2to3 import pygram, pytree
  57 from blib2to3.pgen2 import driver, token
  58 from blib2to3.pgen2.grammar import Grammar
  59 from blib2to3.pgen2.parse import ParseError
  60
  61 from _black_version import version as __version__
  62
  63 if TYPE_CHECKING:
  64     import colorama  # noqa: F401
  65
  66 DEFAULT_LINE_LENGTH = 88
  67 DEFAULT_EXCLUDES = r"/(\.eggs|\.git|\.hg|\.mypy_cache|\.nox|\.tox|\.venv|\.svn|_build|buck-out|build|dist)/"  # noqa: B950
  68 DEFAULT_INCLUDES = r"\.pyi?$"
  69 CACHE_DIR = Path(user_cache_dir("black", version=__version__))
  70
  71 STRING_PREFIX_CHARS: Final = "furbFURB"  # All possible string prefix characters.
  72
  73
  74 # types
  75 FileContent = str
  76 Encoding = str
  77 NewLine = str
  78 Depth = int
  79 NodeType = int
  80 ParserState = int
  81 LeafID = int
  82 StringID = int
  83 Priority = int
  84 Index = int
  85 LN = Union[Leaf, Node]
  86 Transformer = Callable[["Line", Collection["Feature"]], Iterator["Line"]]
  87 Timestamp = float
  88 FileSize = int
  89 CacheInfo = Tuple[Timestamp, FileSize]
  90 Cache = Dict[Path, CacheInfo]
  91 out = partial(click.secho, bold=True, err=True)
  92 err = partial(click.secho, fg="red", err=True)
  93
  94 pygram.initialize(CACHE_DIR)
  95 syms = pygram.python_symbols
  96
  97
  98 class NothingChanged(UserWarning):
  99     """Raised when reformatted code is the same as source."""
 100
 101
 102 class CannotTransform(Exception):
 103     """Base class for errors raised by Transformers."""
 104
 105
 106 class CannotSplit(CannotTransform):
 107     """A readable split that fits the allotted line length is impossible."""
 108
 109
 110 class InvalidInput(ValueError):
 111     """Raised when input source code fails all parse attempts."""
 112
 113
 114 T = TypeVar("T")
 115 E = TypeVar("E", bound=Exception)
 116
 117
 118 class Ok(Generic[T]):
 119     def __init__(self, value: T) -> None:
 120         self._value = value
 121
 122     def ok(self) -> T:
 123         return self._value
 124
 125
 126 class Err(Generic[E]):
 127     def __init__(self, e: E) -> None:
 128         self._e = e
 129
 130     def err(self) -> E:
 131         return self._e
 132
 133
 134 # The 'Result' return type is used to implement an error-handling model heavily
 135 # influenced by that used by the Rust programming language
 136 # (see https://doc.rust-lang.org/book/ch09-00-error-handling.html).
 137 Result = Union[Ok[T], Err[E]]
 138 TResult = Result[T, CannotTransform]  # (T)ransform Result
 139 TMatchResult = TResult[Index]
 140
 141
 142 class WriteBack(Enum):
 143     NO = 0
 144     YES = 1
 145     DIFF = 2
 146     CHECK = 3
 147     COLOR_DIFF = 4
 148
 149     @classmethod
 150     def from_configuration(
 151         cls, *, check: bool, diff: bool, color: bool = False
 152     ) -> "WriteBack":
 153         if check and not diff:
 154             return cls.CHECK
 155
 156         if diff and color:
 157             return cls.COLOR_DIFF
 158
 159         return cls.DIFF if diff else cls.YES
 160
 161
 162 class Changed(Enum):
 163     NO = 0
 164     CACHED = 1
 165     YES = 2
 166
 167
 168 class TargetVersion(Enum):
 169     PY27 = 2
 170     PY33 = 3
 171     PY34 = 4
 172     PY35 = 5
 173     PY36 = 6
 174     PY37 = 7
 175     PY38 = 8
 176
 177     def is_python2(self) -> bool:
 178         return self is TargetVersion.PY27
 179
 180
 181 PY36_VERSIONS = {TargetVersion.PY36, TargetVersion.PY37, TargetVersion.PY38}
 182
 183
 184 class Feature(Enum):
 185     # All string literals are unicode
 186     UNICODE_LITERALS = 1
 187     F_STRINGS = 2
 188     NUMERIC_UNDERSCORES = 3
 189     TRAILING_COMMA_IN_CALL = 4
 190     TRAILING_COMMA_IN_DEF = 5
 191     # The following two feature-flags are mutually exclusive, and exactly one should be
 192     # set for every version of python.
 193     ASYNC_IDENTIFIERS = 6
 194     ASYNC_KEYWORDS = 7
 195     ASSIGNMENT_EXPRESSIONS = 8
 196     POS_ONLY_ARGUMENTS = 9
 197
 198
 199 VERSION_TO_FEATURES: Dict[TargetVersion, Set[Feature]] = {
 200     TargetVersion.PY27: {Feature.ASYNC_IDENTIFIERS},
 201     TargetVersion.PY33: {Feature.UNICODE_LITERALS, Feature.ASYNC_IDENTIFIERS},
 202     TargetVersion.PY34: {Feature.UNICODE_LITERALS, Feature.ASYNC_IDENTIFIERS},
 203     TargetVersion.PY35: {
 204         Feature.UNICODE_LITERALS,
 205         Feature.TRAILING_COMMA_IN_CALL,
 206         Feature.ASYNC_IDENTIFIERS,
 207     },
 208     TargetVersion.PY36: {
 209         Feature.UNICODE_LITERALS,
 210         Feature.F_STRINGS,
 211         Feature.NUMERIC_UNDERSCORES,
 212         Feature.TRAILING_COMMA_IN_CALL,
 213         Feature.TRAILING_COMMA_IN_DEF,
 214         Feature.ASYNC_IDENTIFIERS,
 215     },
 216     TargetVersion.PY37: {
 217         Feature.UNICODE_LITERALS,
 218         Feature.F_STRINGS,
 219         Feature.NUMERIC_UNDERSCORES,
 220         Feature.TRAILING_COMMA_IN_CALL,
 221         Feature.TRAILING_COMMA_IN_DEF,
 222         Feature.ASYNC_KEYWORDS,
 223     },
 224     TargetVersion.PY38: {
 225         Feature.UNICODE_LITERALS,
 226         Feature.F_STRINGS,
 227         Feature.NUMERIC_UNDERSCORES,
 228         Feature.TRAILING_COMMA_IN_CALL,
 229         Feature.TRAILING_COMMA_IN_DEF,
 230         Feature.ASYNC_KEYWORDS,
 231         Feature.ASSIGNMENT_EXPRESSIONS,
 232         Feature.POS_ONLY_ARGUMENTS,
 233     },
 234 }
 235
 236
 237 @dataclass
 238 class Mode:
 239     target_versions: Set[TargetVersion] = field(default_factory=set)
 240     line_length: int = DEFAULT_LINE_LENGTH
 241     string_normalization: bool = True
 242     is_pyi: bool = False
 243
 244     def get_cache_key(self) -> str:
 245         if self.target_versions:
 246             version_str = ",".join(
 247                 str(version.value)
 248                 for version in sorted(self.target_versions, key=lambda v: v.value)
 249             )
 250         else:
 251             version_str = "-"
 252         parts = [
 253             version_str,
 254             str(self.line_length),
 255             str(int(self.string_normalization)),
 256             str(int(self.is_pyi)),
 257         ]
 258         return ".".join(parts)
 259
 260
 261 # Legacy name, left for integrations.
 262 FileMode = Mode
 263
 264
 265 def supports_feature(target_versions: Set[TargetVersion], feature: Feature) -> bool:
 266     return all(feature in VERSION_TO_FEATURES[version] for version in target_versions)
 267
 268
 269 def find_pyproject_toml(path_search_start: str) -> Optional[str]:
 270     """Find the absolute filepath to a pyproject.toml if it exists"""
 271     path_project_root = find_project_root(path_search_start)
 272     path_pyproject_toml = path_project_root / "pyproject.toml"
 273     return str(path_pyproject_toml) if path_pyproject_toml.is_file() else None
 274
 275
 276 def parse_pyproject_toml(path_config: str) -> Dict[str, Any]:
 277     """Parse a pyproject toml file, pulling out relevant parts for Black
 278
 279     If parsing fails, will raise a toml.TomlDecodeError
 280     """
 281     pyproject_toml = toml.load(path_config)
 282     config = pyproject_toml.get("tool", {}).get("black", {})
 283     return {k.replace("--", "").replace("-", "_"): v for k, v in config.items()}
 284
 285
 286 def read_pyproject_toml(
 287     ctx: click.Context, param: click.Parameter, value: Optional[str]
 288 ) -> Optional[str]:
 289     """Inject Black configuration from "pyproject.toml" into defaults in `ctx`.
 290
 291     Returns the path to a successfully found and read configuration file, None
 292     otherwise.
 293     """
 294     if not value:
 295         value = find_pyproject_toml(ctx.params.get("src", ()))
 296         if value is None:
 297             return None
 298
 299     try:
 300         config = parse_pyproject_toml(value)
 301     except (toml.TomlDecodeError, OSError) as e:
 302         raise click.FileError(
 303             filename=value, hint=f"Error reading configuration file: {e}"
 304         )
 305
 306     if not config:
 307         return None
 308
 309     target_version = config.get("target_version")
 310     if target_version is not None and not isinstance(target_version, list):
 311         raise click.BadOptionUsage(
 312             "target-version", f"Config key target-version must be a list"
 313         )
 314
 315     default_map: Dict[str, Any] = {}
 316     if ctx.default_map:
 317         default_map.update(ctx.default_map)
 318     default_map.update(config)
 319
 320     ctx.default_map = default_map
 321     return value
 322
 323
 324 def target_version_option_callback(
 325     c: click.Context, p: Union[click.Option, click.Parameter], v: Tuple[str, ...]
 326 ) -> List[TargetVersion]:
 327     """Compute the target versions from a --target-version flag.
 328
 329     This is its own function because mypy couldn't infer the type correctly
 330     when it was a lambda, causing mypyc trouble.
 331     """
 332     return [TargetVersion[val.upper()] for val in v]
 333
 334
 335 @click.command(context_settings=dict(help_option_names=["-h", "--help"]))
 336 @click.option("-c", "--code", type=str, help="Format the code passed in as a string.")
 337 @click.option(
 338     "-l",
 339     "--line-length",
 340     type=int,
 341     default=DEFAULT_LINE_LENGTH,
 342     help="How many characters per line to allow.",
 343     show_default=True,
 344 )
 345 @click.option(
 346     "-t",
 347     "--target-version",
 348     type=click.Choice([v.name.lower() for v in TargetVersion]),
 349     callback=target_version_option_callback,
 350     multiple=True,
 351     help=(
 352         "Python versions that should be supported by Black's output. [default: per-file"
 353         " auto-detection]"
 354     ),
 355 )
 356 @click.option(
 357     "--py36",
 358     is_flag=True,
 359     help=(
 360         "Allow using Python 3.6-only syntax on all input files.  This will put trailing"
 361         " commas in function signatures and calls also after *args and **kwargs."
 362         " Deprecated; use --target-version instead. [default: per-file auto-detection]"
 363     ),
 364 )
 365 @click.option(
 366     "--pyi",
 367     is_flag=True,
 368     help=(
 369         "Format all input files like typing stubs regardless of file extension (useful"
 370         " when piping source on standard input)."
 371     ),
 372 )
 373 @click.option(
 374     "-S",
 375     "--skip-string-normalization",
 376     is_flag=True,
 377     help="Don't normalize string quotes or prefixes.",
 378 )
 379 @click.option(
 380     "--check",
 381     is_flag=True,
 382     help=(
 383         "Don't write the files back, just return the status.  Return code 0 means"
 384         " nothing would change.  Return code 1 means some files would be reformatted."
 385         " Return code 123 means there was an internal error."
 386     ),
 387 )
 388 @click.option(
 389     "--diff",
 390     is_flag=True,
 391     help="Don't write the files back, just output a diff for each file on stdout.",
 392 )
 393 @click.option(
 394     "--color/--no-color",
 395     is_flag=True,
 396     help="Show colored diff. Only applies when `--diff` is given.",
 397 )
 398 @click.option(
 399     "--fast/--safe",
 400     is_flag=True,
 401     help="If --fast given, skip temporary sanity checks. [default: --safe]",
 402 )
 403 @click.option(
 404     "--include",
 405     type=str,
 406     default=DEFAULT_INCLUDES,
 407     help=(
 408         "A regular expression that matches files and directories that should be"
 409         " included on recursive searches.  An empty value means all files are included"
 410         " regardless of the name.  Use forward slashes for directories on all platforms"
 411         " (Windows, too).  Exclusions are calculated first, inclusions later."
 412     ),
 413     show_default=True,
 414 )
 415 @click.option(
 416     "--exclude",
 417     type=str,
 418     default=DEFAULT_EXCLUDES,
 419     help=(
 420         "A regular expression that matches files and directories that should be"
 421         " excluded on recursive searches.  An empty value means no paths are excluded."
 422         " Use forward slashes for directories on all platforms (Windows, too). "
 423         " Exclusions are calculated first, inclusions later."
 424     ),
 425     show_default=True,
 426 )
 427 @click.option(
 428     "-q",
 429     "--quiet",
 430     is_flag=True,
 431     help=(
 432         "Don't emit non-error messages to stderr. Errors are still emitted; silence"
 433         " those with 2>/dev/null."
 434     ),
 435 )
 436 @click.option(
 437     "-v",
 438     "--verbose",
 439     is_flag=True,
 440     help=(
 441         "Also emit messages to stderr about files that were not changed or were ignored"
 442         " due to --exclude=."
 443     ),
 444 )
 445 @click.version_option(version=__version__)
 446 @click.argument(
 447     "src",
 448     nargs=-1,
 449     type=click.Path(
 450         exists=True, file_okay=True, dir_okay=True, readable=True, allow_dash=True
 451     ),
 452     is_eager=True,
 453 )
 454 @click.option(
 455     "--config",
 456     type=click.Path(
 457         exists=True,
 458         file_okay=True,
 459         dir_okay=False,
 460         readable=True,
 461         allow_dash=False,
 462         path_type=str,
 463     ),
 464     is_eager=True,
 465     callback=read_pyproject_toml,
 466     help="Read configuration from PATH.",
 467 )
 468 @click.pass_context
 469 def main(
 470     ctx: click.Context,
 471     code: Optional[str],
 472     line_length: int,
 473     target_version: List[TargetVersion],
 474     check: bool,
 475     diff: bool,
 476     color: bool,
 477     fast: bool,
 478     pyi: bool,
 479     py36: bool,
 480     skip_string_normalization: bool,
 481     quiet: bool,
 482     verbose: bool,
 483     include: str,
 484     exclude: str,
 485     src: Tuple[str, ...],
 486     config: Optional[str],
 487 ) -> None:
 488     """The uncompromising code formatter."""
 489     write_back = WriteBack.from_configuration(check=check, diff=diff, color=color)
 490     if target_version:
 491         if py36:
 492             err("Cannot use both --target-version and --py36")
 493             ctx.exit(2)
 494         else:
 495             versions = set(target_version)
 496     elif py36:
 497         err(
 498             "--py36 is deprecated and will be removed in a future version. Use"
 499             " --target-version py36 instead."
 500         )
 501         versions = PY36_VERSIONS
 502     else:
 503         # We'll autodetect later.
 504         versions = set()
 505     mode = Mode(
 506         target_versions=versions,
 507         line_length=line_length,
 508         is_pyi=pyi,
 509         string_normalization=not skip_string_normalization,
 510     )
 511     if config and verbose:
 512         out(f"Using configuration from {config}.", bold=False, fg="blue")
 513     if code is not None:
 514         print(format_str(code, mode=mode))
 515         ctx.exit(0)
 516     try:
 517         include_regex = re_compile_maybe_verbose(include)
 518     except re.error:
 519         err(f"Invalid regular expression for include given: {include!r}")
 520         ctx.exit(2)
 521     try:
 522         exclude_regex = re_compile_maybe_verbose(exclude)
 523     except re.error:
 524         err(f"Invalid regular expression for exclude given: {exclude!r}")
 525         ctx.exit(2)
 526     report = Report(check=check, diff=diff, quiet=quiet, verbose=verbose)
 527     root = find_project_root(src)
 528     sources: Set[Path] = set()
 529     path_empty(src, quiet, verbose, ctx)
 530     for s in src:
 531         p = Path(s)
 532         if p.is_dir():
 533             sources.update(
 534                 gen_python_files_in_dir(
 535                     p, root, include_regex, exclude_regex, report, get_gitignore(root)
 536                 )
 537             )
 538         elif p.is_file() or s == "-":
 539             # if a file was explicitly given, we don't care about its extension
 540             sources.add(p)
 541         else:
 542             err(f"invalid path: {s}")
 543     if len(sources) == 0:
 544         if verbose or not quiet:
 545             out("No Python files are present to be formatted. Nothing to do 😴")
 546         ctx.exit(0)
 547
 548     if len(sources) == 1:
 549         reformat_one(
 550             src=sources.pop(),
 551             fast=fast,
 552             write_back=write_back,
 553             mode=mode,
 554             report=report,
 555         )
 556     else:
 557         reformat_many(
 558             sources=sources, fast=fast, write_back=write_back, mode=mode, report=report
 559         )
 560
 561     if verbose or not quiet:
 562         out("Oh no! 💥 💔 💥" if report.return_code else "All done! ✨ 🍰 ✨")
 563         click.secho(str(report), err=True)
 564     ctx.exit(report.return_code)
 565
 566
 567 def path_empty(
 568     src: Tuple[str, ...], quiet: bool, verbose: bool, ctx: click.Context
 569 ) -> None:
 570     """
 571     Exit if there is no `src` provided for formatting
 572     """
 573     if not src:
 574         if verbose or not quiet:
 575             out("No Path provided. Nothing to do 😴")
 576             ctx.exit(0)
 577
 578
 579 def reformat_one(
 580     src: Path, fast: bool, write_back: WriteBack, mode: Mode, report: "Report"
 581 ) -> None:
 582     """Reformat a single file under `src` without spawning child processes.
 583
 584     `fast`, `write_back`, and `mode` options are passed to
 585     :func:`format_file_in_place` or :func:`format_stdin_to_stdout`.
 586     """
 587     try:
 588         changed = Changed.NO
 589         if not src.is_file() and str(src) == "-":
 590             if format_stdin_to_stdout(fast=fast, write_back=write_back, mode=mode):
 591                 changed = Changed.YES
 592         else:
 593             cache: Cache = {}
 594             if write_back != WriteBack.DIFF:
 595                 cache = read_cache(mode)
 596                 res_src = src.resolve()
 597                 if res_src in cache and cache[res_src] == get_cache_info(res_src):
 598                     changed = Changed.CACHED
 599             if changed is not Changed.CACHED and format_file_in_place(
 600                 src, fast=fast, write_back=write_back, mode=mode
 601             ):
 602                 changed = Changed.YES
 603             if (write_back is WriteBack.YES and changed is not Changed.CACHED) or (
 604                 write_back is WriteBack.CHECK and changed is Changed.NO
 605             ):
 606                 write_cache(cache, [src], mode)
 607         report.done(src, changed)
 608     except Exception as exc:
 609         report.failed(src, str(exc))
 610
 611
 612 def reformat_many(
 613     sources: Set[Path], fast: bool, write_back: WriteBack, mode: Mode, report: "Report"
 614 ) -> None:
 615     """Reformat multiple files using a ProcessPoolExecutor."""
 616     executor: Executor
 617     loop = asyncio.get_event_loop()
 618     worker_count = os.cpu_count()
 619     if sys.platform == "win32":
 620         # Work around https://bugs.python.org/issue26903
 621         worker_count = min(worker_count, 61)
 622     try:
 623         executor = ProcessPoolExecutor(max_workers=worker_count)
 624     except OSError:
 625         # we arrive here if the underlying system does not support multi-processing
 626         # like in AWS Lambda, in which case we gracefully fallback to
 627         # a ThreadPollExecutor with just a single worker (more workers would not do us
 628         # any good due to the Global Interpreter Lock)
 629         executor = ThreadPoolExecutor(max_workers=1)
 630
 631     try:
 632         loop.run_until_complete(
 633             schedule_formatting(
 634                 sources=sources,
 635                 fast=fast,
 636                 write_back=write_back,
 637                 mode=mode,
 638                 report=report,
 639                 loop=loop,
 640                 executor=executor,
 641             )
 642         )
 643     finally:
 644         shutdown(loop)
 645         if executor is not None:
 646             executor.shutdown()
 647
 648
 649 async def schedule_formatting(
 650     sources: Set[Path],
 651     fast: bool,
 652     write_back: WriteBack,
 653     mode: Mode,
 654     report: "Report",
 655     loop: asyncio.AbstractEventLoop,
 656     executor: Executor,
 657 ) -> None:
 658     """Run formatting of `sources` in parallel using the provided `executor`.
 659
 660     (Use ProcessPoolExecutors for actual parallelism.)
 661
 662     `write_back`, `fast`, and `mode` options are passed to
 663     :func:`format_file_in_place`.
 664     """
 665     cache: Cache = {}
 666     if write_back != WriteBack.DIFF:
 667         cache = read_cache(mode)
 668         sources, cached = filter_cached(cache, sources)
 669         for src in sorted(cached):
 670             report.done(src, Changed.CACHED)
 671     if not sources:
 672         return
 673
 674     cancelled = []
 675     sources_to_cache = []
 676     lock = None
 677     if write_back == WriteBack.DIFF:
 678         # For diff output, we need locks to ensure we don't interleave output
 679         # from different processes.
 680         manager = Manager()
 681         lock = manager.Lock()
 682     tasks = {
 683         asyncio.ensure_future(
 684             loop.run_in_executor(
 685                 executor, format_file_in_place, src, fast, mode, write_back, lock
 686             )
 687         ): src
 688         for src in sorted(sources)
 689     }
 690     pending: Iterable["asyncio.Future[bool]"] = tasks.keys()
 691     try:
 692         loop.add_signal_handler(signal.SIGINT, cancel, pending)
 693         loop.add_signal_handler(signal.SIGTERM, cancel, pending)
 694     except NotImplementedError:
 695         # There are no good alternatives for these on Windows.
 696         pass
 697     while pending:
 698         done, _ = await asyncio.wait(pending, return_when=asyncio.FIRST_COMPLETED)
 699         for task in done:
 700             src = tasks.pop(task)
 701             if task.cancelled():
 702                 cancelled.append(task)
 703             elif task.exception():
 704                 report.failed(src, str(task.exception()))
 705             else:
 706                 changed = Changed.YES if task.result() else Changed.NO
 707                 # If the file was written back or was successfully checked as
 708                 # well-formatted, store this information in the cache.
 709                 if write_back is WriteBack.YES or (
 710                     write_back is WriteBack.CHECK and changed is Changed.NO
 711                 ):
 712                     sources_to_cache.append(src)
 713                 report.done(src, changed)
 714     if cancelled:
 715         await asyncio.gather(*cancelled, loop=loop, return_exceptions=True)
 716     if sources_to_cache:
 717         write_cache(cache, sources_to_cache, mode)
 718
 719
 720 def format_file_in_place(
 721     src: Path,
 722     fast: bool,
 723     mode: Mode,
 724     write_back: WriteBack = WriteBack.NO,
 725     lock: Any = None,  # multiprocessing.Manager().Lock() is some crazy proxy
 726 ) -> bool:
 727     """Format file under `src` path. Return True if changed.
 728
 729     If `write_back` is DIFF, write a diff to stdout. If it is YES, write reformatted
 730     code to the file.
 731     `mode` and `fast` options are passed to :func:`format_file_contents`.
 732     """
 733     if src.suffix == ".pyi":
 734         mode = replace(mode, is_pyi=True)
 735
 736     then = datetime.utcfromtimestamp(src.stat().st_mtime)
 737     with open(src, "rb") as buf:
 738         src_contents, encoding, newline = decode_bytes(buf.read())
 739     try:
 740         dst_contents = format_file_contents(src_contents, fast=fast, mode=mode)
 741     except NothingChanged:
 742         return False
 743
 744     if write_back == WriteBack.YES:
 745         with open(src, "w", encoding=encoding, newline=newline) as f:
 746             f.write(dst_contents)
 747     elif write_back in (WriteBack.DIFF, WriteBack.COLOR_DIFF):
 748         now = datetime.utcnow()
 749         src_name = f"{src}\t{then} +0000"
 750         dst_name = f"{src}\t{now} +0000"
 751         diff_contents = diff(src_contents, dst_contents, src_name, dst_name)
 752
 753         if write_back == write_back.COLOR_DIFF:
 754             diff_contents = color_diff(diff_contents)
 755
 756         with lock or nullcontext():
 757             f = io.TextIOWrapper(
 758                 sys.stdout.buffer,
 759                 encoding=encoding,
 760                 newline=newline,
 761                 write_through=True,
 762             )
 763             f = wrap_stream_for_windows(f)
 764             f.write(diff_contents)
 765             f.detach()
 766
 767     return True
 768
 769
 770 def color_diff(contents: str) -> str:
 771     """Inject the ANSI color codes to the diff."""
 772     lines = contents.split("\n")
 773     for i, line in enumerate(lines):
 774         if line.startswith("+++") or line.startswith("---"):
 775             line = "\033[1;37m" + line + "\033[0m"  # bold white, reset
 776         if line.startswith("@@"):
 777             line = "\033[36m" + line + "\033[0m"  # cyan, reset
 778         if line.startswith("+"):
 779             line = "\033[32m" + line + "\033[0m"  # green, reset
 780         elif line.startswith("-"):
 781             line = "\033[31m" + line + "\033[0m"  # red, reset
 782         lines[i] = line
 783     return "\n".join(lines)
 784
 785
 786 def wrap_stream_for_windows(
 787     f: io.TextIOWrapper,
 788 ) -> Union[io.TextIOWrapper, "colorama.AnsiToWin32.AnsiToWin32"]:
 789     """
 790     Wrap the stream in colorama's wrap_stream so colors are shown on Windows.
 791
 792     If `colorama` is not found, then no change is made. If `colorama` does
 793     exist, then it handles the logic to determine whether or not to change
 794     things.
 795     """
 796     try:
 797         from colorama import initialise
 798
 799         # We set `strip=False` so that we can don't have to modify
 800         # test_express_diff_with_color.
 801         f = initialise.wrap_stream(
 802             f, convert=None, strip=False, autoreset=False, wrap=True
 803         )
 804
 805         # wrap_stream returns a `colorama.AnsiToWin32.AnsiToWin32` object
 806         # which does not have a `detach()` method. So we fake one.
 807         f.detach = lambda *args, **kwargs: None  # type: ignore
 808     except ImportError:
 809         pass
 810
 811     return f
 812
 813
 814 def format_stdin_to_stdout(
 815     fast: bool, *, write_back: WriteBack = WriteBack.NO, mode: Mode
 816 ) -> bool:
 817     """Format file on stdin. Return True if changed.
 818
 819     If `write_back` is YES, write reformatted code back to stdout. If it is DIFF,
 820     write a diff to stdout. The `mode` argument is passed to
 821     :func:`format_file_contents`.
 822     """
 823     then = datetime.utcnow()
 824     src, encoding, newline = decode_bytes(sys.stdin.buffer.read())
 825     dst = src
 826     try:
 827         dst = format_file_contents(src, fast=fast, mode=mode)
 828         return True
 829
 830     except NothingChanged:
 831         return False
 832
 833     finally:
 834         f = io.TextIOWrapper(
 835             sys.stdout.buffer, encoding=encoding, newline=newline, write_through=True
 836         )
 837         if write_back == WriteBack.YES:
 838             f.write(dst)
 839         elif write_back in (WriteBack.DIFF, WriteBack.COLOR_DIFF):
 840             now = datetime.utcnow()
 841             src_name = f"STDIN\t{then} +0000"
 842             dst_name = f"STDOUT\t{now} +0000"
 843             d = diff(src, dst, src_name, dst_name)
 844             if write_back == WriteBack.COLOR_DIFF:
 845                 d = color_diff(d)
 846                 f = wrap_stream_for_windows(f)
 847             f.write(d)
 848         f.detach()
 849
 850
 851 def format_file_contents(src_contents: str, *, fast: bool, mode: Mode) -> FileContent:
 852     """Reformat contents a file and return new contents.
 853
 854     If `fast` is False, additionally confirm that the reformatted code is
 855     valid by calling :func:`assert_equivalent` and :func:`assert_stable` on it.
 856     `mode` is passed to :func:`format_str`.
 857     """
 858     if src_contents.strip() == "":
 859         raise NothingChanged
 860
 861     dst_contents = format_str(src_contents, mode=mode)
 862     if src_contents == dst_contents:
 863         raise NothingChanged
 864
 865     if not fast:
 866         assert_equivalent(src_contents, dst_contents)
 867         assert_stable(src_contents, dst_contents, mode=mode)
 868     return dst_contents
 869
 870
 871 def format_str(src_contents: str, *, mode: Mode) -> FileContent:
 872     """Reformat a string and return new contents.
 873
 874     `mode` determines formatting options, such as how many characters per line are
 875     allowed.  Example:
 876
 877     >>> import black
 878     >>> print(black.format_str("def f(arg:str='')->None:...", mode=Mode()))
 879     def f(arg: str = "") -> None:
 880         ...
 881
 882     A more complex example:
 883     >>> print(
 884     ...   black.format_str(
 885     ...     "def f(arg:str='')->None: hey",
 886     ...     mode=black.Mode(
 887     ...       target_versions={black.TargetVersion.PY36},
 888     ...       line_length=10,
 889     ...       string_normalization=False,
 890     ...       is_pyi=False,
 891     ...     ),
 892     ...   ),
 893     ... )
 894     def f(
 895         arg: str = '',
 896     ) -> None:
 897         hey
 898
 899     """
 900     src_node = lib2to3_parse(src_contents.lstrip(), mode.target_versions)
 901     dst_contents = []
 902     future_imports = get_future_imports(src_node)
 903     if mode.target_versions:
 904         versions = mode.target_versions
 905     else:
 906         versions = detect_target_versions(src_node)
 907     normalize_fmt_off(src_node)
 908     lines = LineGenerator(
 909         remove_u_prefix="unicode_literals" in future_imports
 910         or supports_feature(versions, Feature.UNICODE_LITERALS),
 911         is_pyi=mode.is_pyi,
 912         normalize_strings=mode.string_normalization,
 913     )
 914     elt = EmptyLineTracker(is_pyi=mode.is_pyi)
 915     empty_line = Line()
 916     after = 0
 917     split_line_features = {
 918         feature
 919         for feature in {Feature.TRAILING_COMMA_IN_CALL, Feature.TRAILING_COMMA_IN_DEF}
 920         if supports_feature(versions, feature)
 921     }
 922     for current_line in lines.visit(src_node):
 923         dst_contents.append(str(empty_line) * after)
 924         before, after = elt.maybe_empty_lines(current_line)
 925         dst_contents.append(str(empty_line) * before)
 926         for line in transform_line(
 927             current_line,
 928             line_length=mode.line_length,
 929             normalize_strings=mode.string_normalization,
 930             features=split_line_features,
 931         ):
 932             dst_contents.append(str(line))
 933     return "".join(dst_contents)
 934
 935
 936 def decode_bytes(src: bytes) -> Tuple[FileContent, Encoding, NewLine]:
 937     """Return a tuple of (decoded_contents, encoding, newline).
 938
 939     `newline` is either CRLF or LF but `decoded_contents` is decoded with
 940     universal newlines (i.e. only contains LF).
 941     """
 942     srcbuf = io.BytesIO(src)
 943     encoding, lines = tokenize.detect_encoding(srcbuf.readline)
 944     if not lines:
 945         return "", encoding, "\n"
 946
 947     newline = "\r\n" if b"\r\n" == lines[0][-2:] else "\n"
 948     srcbuf.seek(0)
 949     with io.TextIOWrapper(srcbuf, encoding) as tiow:
 950         return tiow.read(), encoding, newline
 951
 952
 953 def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]:
 954     if not target_versions:
 955         # No target_version specified, so try all grammars.
 956         return [
 957             # Python 3.7+
 958             pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords,
 959             # Python 3.0-3.6
 960             pygram.python_grammar_no_print_statement_no_exec_statement,
 961             # Python 2.7 with future print_function import
 962             pygram.python_grammar_no_print_statement,
 963             # Python 2.7
 964             pygram.python_grammar,
 965         ]
 966
 967     if all(version.is_python2() for version in target_versions):
 968         # Python 2-only code, so try Python 2 grammars.
 969         return [
 970             # Python 2.7 with future print_function import
 971             pygram.python_grammar_no_print_statement,
 972             # Python 2.7
 973             pygram.python_grammar,
 974         ]
 975
 976     # Python 3-compatible code, so only try Python 3 grammar.
 977     grammars = []
 978     # If we have to parse both, try to parse async as a keyword first
 979     if not supports_feature(target_versions, Feature.ASYNC_IDENTIFIERS):
 980         # Python 3.7+
 981         grammars.append(
 982             pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords
 983         )
 984     if not supports_feature(target_versions, Feature.ASYNC_KEYWORDS):
 985         # Python 3.0-3.6
 986         grammars.append(pygram.python_grammar_no_print_statement_no_exec_statement)
 987     # At least one of the above branches must have been taken, because every Python
 988     # version has exactly one of the two 'ASYNC_*' flags
 989     return grammars
 990
 991
 992 def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -> Node:
 993     """Given a string with source, return the lib2to3 Node."""
 994     if src_txt[-1:] != "\n":
 995         src_txt += "\n"
 996
 997     for grammar in get_grammars(set(target_versions)):
 998         drv = driver.Driver(grammar, pytree.convert)
 999         try:
1000             result = drv.parse_string(src_txt, True)
1001             break
1002
1003         except ParseError as pe:
1004             lineno, column = pe.context[1]
1005             lines = src_txt.splitlines()
1006             try:
1007                 faulty_line = lines[lineno - 1]
1008             except IndexError:
1009                 faulty_line = "<line number missing in source>"
1010             exc = InvalidInput(f"Cannot parse: {lineno}:{column}: {faulty_line}")
1011     else:
1012         raise exc from None
1013
1014     if isinstance(result, Leaf):
1015         result = Node(syms.file_input, [result])
1016     return result
1017
1018
1019 def lib2to3_unparse(node: Node) -> str:
1020     """Given a lib2to3 node, return its string representation."""
1021     code = str(node)
1022     return code
1023
1024
1025 class Visitor(Generic[T]):
1026     """Basic lib2to3 visitor that yields things of type `T` on `visit()`."""
1027
1028     def visit(self, node: LN) -> Iterator[T]:
1029         """Main method to visit `node` and its children.
1030
1031         It tries to find a `visit_*()` method for the given `node.type`, like
1032         `visit_simple_stmt` for Node objects or `visit_INDENT` for Leaf objects.
1033         If no dedicated `visit_*()` method is found, chooses `visit_default()`
1034         instead.
1035
1036         Then yields objects of type `T` from the selected visitor.
1037         """
1038         if node.type < 256:
1039             name = token.tok_name[node.type]
1040         else:
1041             name = str(type_repr(node.type))
1042         # We explicitly branch on whether a visitor exists (instead of
1043         # using self.visit_default as the default arg to getattr) in order
1044         # to save needing to create a bound method object and so mypyc can
1045         # generate a native call to visit_default.
1046         visitf = getattr(self, f"visit_{name}", None)
1047         if visitf:
1048             yield from visitf(node)
1049         else:
1050             yield from self.visit_default(node)
1051
1052     def visit_default(self, node: LN) -> Iterator[T]:
1053         """Default `visit_*()` implementation. Recurses to children of `node`."""
1054         if isinstance(node, Node):
1055             for child in node.children:
1056                 yield from self.visit(child)
1057
1058
1059 @dataclass
1060 class DebugVisitor(Visitor[T]):
1061     tree_depth: int = 0
1062
1063     def visit_default(self, node: LN) -> Iterator[T]:
1064         indent = " " * (2 * self.tree_depth)
1065         if isinstance(node, Node):
1066             _type = type_repr(node.type)
1067             out(f"{indent}{_type}", fg="yellow")
1068             self.tree_depth += 1
1069             for child in node.children:
1070                 yield from self.visit(child)
1071
1072             self.tree_depth -= 1
1073             out(f"{indent}/{_type}", fg="yellow", bold=False)
1074         else:
1075             _type = token.tok_name.get(node.type, str(node.type))
1076             out(f"{indent}{_type}", fg="blue", nl=False)
1077             if node.prefix:
1078                 # We don't have to handle prefixes for `Node` objects since
1079                 # that delegates to the first child anyway.
1080                 out(f" {node.prefix!r}", fg="green", bold=False, nl=False)
1081             out(f" {node.value!r}", fg="blue", bold=False)
1082
1083     @classmethod
1084     def show(cls, code: Union[str, Leaf, Node]) -> None:
1085         """Pretty-print the lib2to3 AST of a given string of `code`.
1086
1087         Convenience method for debugging.
1088         """
1089         v: DebugVisitor[None] = DebugVisitor()
1090         if isinstance(code, str):
1091             code = lib2to3_parse(code)
1092         list(v.visit(code))
1093
1094
1095 WHITESPACE: Final = {token.DEDENT, token.INDENT, token.NEWLINE}
1096 STATEMENT: Final = {
1097     syms.if_stmt,
1098     syms.while_stmt,
1099     syms.for_stmt,
1100     syms.try_stmt,
1101     syms.except_clause,
1102     syms.with_stmt,
1103     syms.funcdef,
1104     syms.classdef,
1105 }
1106 STANDALONE_COMMENT: Final = 153
1107 token.tok_name[STANDALONE_COMMENT] = "STANDALONE_COMMENT"
1108 LOGIC_OPERATORS: Final = {"and", "or"}
1109 COMPARATORS: Final = {
1110     token.LESS,
1111     token.GREATER,
1112     token.EQEQUAL,
1113     token.NOTEQUAL,
1114     token.LESSEQUAL,
1115     token.GREATEREQUAL,
1116 }
1117 MATH_OPERATORS: Final = {
1118     token.VBAR,
1119     token.CIRCUMFLEX,
1120     token.AMPER,
1121     token.LEFTSHIFT,
1122     token.RIGHTSHIFT,
1123     token.PLUS,
1124     token.MINUS,
1125     token.STAR,
1126     token.SLASH,
1127     token.DOUBLESLASH,
1128     token.PERCENT,
1129     token.AT,
1130     token.TILDE,
1131     token.DOUBLESTAR,
1132 }
1133 STARS: Final = {token.STAR, token.DOUBLESTAR}
1134 VARARGS_SPECIALS: Final = STARS | {token.SLASH}
1135 VARARGS_PARENTS: Final = {
1136     syms.arglist,
1137     syms.argument,  # double star in arglist
1138     syms.trailer,  # single argument to call
1139     syms.typedargslist,
1140     syms.varargslist,  # lambdas
1141 }
1142 UNPACKING_PARENTS: Final = {
1143     syms.atom,  # single element of a list or set literal
1144     syms.dictsetmaker,
1145     syms.listmaker,
1146     syms.testlist_gexp,
1147     syms.testlist_star_expr,
1148 }
1149 TEST_DESCENDANTS: Final = {
1150     syms.test,
1151     syms.lambdef,
1152     syms.or_test,
1153     syms.and_test,
1154     syms.not_test,
1155     syms.comparison,
1156     syms.star_expr,
1157     syms.expr,
1158     syms.xor_expr,
1159     syms.and_expr,
1160     syms.shift_expr,
1161     syms.arith_expr,
1162     syms.trailer,
1163     syms.term,
1164     syms.power,
1165 }
1166 ASSIGNMENTS: Final = {
1167     "=",
1168     "+=",
1169     "-=",
1170     "*=",
1171     "@=",
1172     "/=",
1173     "%=",
1174     "&=",
1175     "|=",
1176     "^=",
1177     "<<=",
1178     ">>=",
1179     "**=",
1180     "//=",
1181 }
1182 COMPREHENSION_PRIORITY: Final = 20
1183 COMMA_PRIORITY: Final = 18
1184 TERNARY_PRIORITY: Final = 16
1185 LOGIC_PRIORITY: Final = 14
1186 STRING_PRIORITY: Final = 12
1187 COMPARATOR_PRIORITY: Final = 10
1188 MATH_PRIORITIES: Final = {
1189     token.VBAR: 9,
1190     token.CIRCUMFLEX: 8,
1191     token.AMPER: 7,
1192     token.LEFTSHIFT: 6,
1193     token.RIGHTSHIFT: 6,
1194     token.PLUS: 5,
1195     token.MINUS: 5,
1196     token.STAR: 4,
1197     token.SLASH: 4,
1198     token.DOUBLESLASH: 4,
1199     token.PERCENT: 4,
1200     token.AT: 4,
1201     token.TILDE: 3,
1202     token.DOUBLESTAR: 2,
1203 }
1204 DOT_PRIORITY: Final = 1
1205
1206
1207 @dataclass
1208 class BracketTracker:
1209     """Keeps track of brackets on a line."""
1210
1211     depth: int = 0
1212     bracket_match: Dict[Tuple[Depth, NodeType], Leaf] = field(default_factory=dict)
1213     delimiters: Dict[LeafID, Priority] = field(default_factory=dict)
1214     previous: Optional[Leaf] = None
1215     _for_loop_depths: List[int] = field(default_factory=list)
1216     _lambda_argument_depths: List[int] = field(default_factory=list)
1217
1218     def mark(self, leaf: Leaf) -> None:
1219         """Mark `leaf` with bracket-related metadata. Keep track of delimiters.
1220
1221         All leaves receive an int `bracket_depth` field that stores how deep
1222         within brackets a given leaf is. 0 means there are no enclosing brackets
1223         that started on this line.
1224
1225         If a leaf is itself a closing bracket, it receives an `opening_bracket`
1226         field that it forms a pair with. This is a one-directional link to
1227         avoid reference cycles.
1228
1229         If a leaf is a delimiter (a token on which Black can split the line if
1230         needed) and it's on depth 0, its `id()` is stored in the tracker's
1231         `delimiters` field.
1232         """
1233         if leaf.type == token.COMMENT:
1234             return
1235
1236         self.maybe_decrement_after_for_loop_variable(leaf)
1237         self.maybe_decrement_after_lambda_arguments(leaf)
1238         if leaf.type in CLOSING_BRACKETS:
1239             self.depth -= 1
1240             opening_bracket = self.bracket_match.pop((self.depth, leaf.type))
1241             leaf.opening_bracket = opening_bracket
1242         leaf.bracket_depth = self.depth
1243         if self.depth == 0:
1244             delim = is_split_before_delimiter(leaf, self.previous)
1245             if delim and self.previous is not None:
1246                 self.delimiters[id(self.previous)] = delim
1247             else:
1248                 delim = is_split_after_delimiter(leaf, self.previous)
1249                 if delim:
1250                     self.delimiters[id(leaf)] = delim
1251         if leaf.type in OPENING_BRACKETS:
1252             self.bracket_match[self.depth, BRACKET[leaf.type]] = leaf
1253             self.depth += 1
1254         self.previous = leaf
1255         self.maybe_increment_lambda_arguments(leaf)
1256         self.maybe_increment_for_loop_variable(leaf)
1257
1258     def any_open_brackets(self) -> bool:
1259         """Return True if there is an yet unmatched open bracket on the line."""
1260         return bool(self.bracket_match)
1261
1262     def max_delimiter_priority(self, exclude: Iterable[LeafID] = ()) -> Priority:
1263         """Return the highest priority of a delimiter found on the line.
1264
1265         Values are consistent with what `is_split_*_delimiter()` return.
1266         Raises ValueError on no delimiters.
1267         """
1268         return max(v for k, v in self.delimiters.items() if k not in exclude)
1269
1270     def delimiter_count_with_priority(self, priority: Priority = 0) -> int:
1271         """Return the number of delimiters with the given `priority`.
1272
1273         If no `priority` is passed, defaults to max priority on the line.
1274         """
1275         if not self.delimiters:
1276             return 0
1277
1278         priority = priority or self.max_delimiter_priority()
1279         return sum(1 for p in self.delimiters.values() if p == priority)
1280
1281     def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool:
1282         """In a for loop, or comprehension, the variables are often unpacks.
1283
1284         To avoid splitting on the comma in this situation, increase the depth of
1285         tokens between `for` and `in`.
1286         """
1287         if leaf.type == token.NAME and leaf.value == "for":
1288             self.depth += 1
1289             self._for_loop_depths.append(self.depth)
1290             return True
1291
1292         return False
1293
1294     def maybe_decrement_after_for_loop_variable(self, leaf: Leaf) -> bool:
1295         """See `maybe_increment_for_loop_variable` above for explanation."""
1296         if (
1297             self._for_loop_depths
1298             and self._for_loop_depths[-1] == self.depth
1299             and leaf.type == token.NAME
1300             and leaf.value == "in"
1301         ):
1302             self.depth -= 1
1303             self._for_loop_depths.pop()
1304             return True
1305
1306         return False
1307
1308     def maybe_increment_lambda_arguments(self, leaf: Leaf) -> bool:
1309         """In a lambda expression, there might be more than one argument.
1310
1311         To avoid splitting on the comma in this situation, increase the depth of
1312         tokens between `lambda` and `:`.
1313         """
1314         if leaf.type == token.NAME and leaf.value == "lambda":
1315             self.depth += 1
1316             self._lambda_argument_depths.append(self.depth)
1317             return True
1318
1319         return False
1320
1321     def maybe_decrement_after_lambda_arguments(self, leaf: Leaf) -> bool:
1322         """See `maybe_increment_lambda_arguments` above for explanation."""
1323         if (
1324             self._lambda_argument_depths
1325             and self._lambda_argument_depths[-1] == self.depth
1326             and leaf.type == token.COLON
1327         ):
1328             self.depth -= 1
1329             self._lambda_argument_depths.pop()
1330             return True
1331
1332         return False
1333
1334     def get_open_lsqb(self) -> Optional[Leaf]:
1335         """Return the most recent opening square bracket (if any)."""
1336         return self.bracket_match.get((self.depth - 1, token.RSQB))
1337
1338
1339 @dataclass
1340 class Line:
1341     """Holds leaves and comments. Can be printed with `str(line)`."""
1342
1343     depth: int = 0
1344     leaves: List[Leaf] = field(default_factory=list)
1345     # keys ordered like `leaves`
1346     comments: Dict[LeafID, List[Leaf]] = field(default_factory=dict)
1347     bracket_tracker: BracketTracker = field(default_factory=BracketTracker)
1348     inside_brackets: bool = False
1349     should_explode: bool = False
1350
1351     def append(self, leaf: Leaf, preformatted: bool = False) -> None:
1352         """Add a new `leaf` to the end of the line.
1353
1354         Unless `preformatted` is True, the `leaf` will receive a new consistent
1355         whitespace prefix and metadata applied by :class:`BracketTracker`.
1356         Trailing commas are maybe removed, unpacked for loop variables are
1357         demoted from being delimiters.
1358
1359         Inline comments are put aside.
1360         """
1361         has_value = leaf.type in BRACKETS or bool(leaf.value.strip())
1362         if not has_value:
1363             return
1364
1365         if token.COLON == leaf.type and self.is_class_paren_empty:
1366             del self.leaves[-2:]
1367         if self.leaves and not preformatted:
1368             # Note: at this point leaf.prefix should be empty except for
1369             # imports, for which we only preserve newlines.
1370             leaf.prefix += whitespace(
1371                 leaf, complex_subscript=self.is_complex_subscript(leaf)
1372             )
1373         if self.inside_brackets or not preformatted:
1374             self.bracket_tracker.mark(leaf)
1375             self.maybe_remove_trailing_comma(leaf)
1376         if not self.append_comment(leaf):
1377             self.leaves.append(leaf)
1378
1379     def append_safe(self, leaf: Leaf, preformatted: bool = False) -> None:
1380         """Like :func:`append()` but disallow invalid standalone comment structure.
1381
1382         Raises ValueError when any `leaf` is appended after a standalone comment
1383         or when a standalone comment is not the first leaf on the line.
1384         """
1385         if self.bracket_tracker.depth == 0:
1386             if self.is_comment:
1387                 raise ValueError("cannot append to standalone comments")
1388
1389             if self.leaves and leaf.type == STANDALONE_COMMENT:
1390                 raise ValueError(
1391                     "cannot append standalone comments to a populated line"
1392                 )
1393
1394         self.append(leaf, preformatted=preformatted)
1395
1396     @property
1397     def is_comment(self) -> bool:
1398         """Is this line a standalone comment?"""
1399         return len(self.leaves) == 1 and self.leaves[0].type == STANDALONE_COMMENT
1400
1401     @property
1402     def is_decorator(self) -> bool:
1403         """Is this line a decorator?"""
1404         return bool(self) and self.leaves[0].type == token.AT
1405
1406     @property
1407     def is_import(self) -> bool:
1408         """Is this an import line?"""
1409         return bool(self) and is_import(self.leaves[0])
1410
1411     @property
1412     def is_class(self) -> bool:
1413         """Is this line a class definition?"""
1414         return (
1415             bool(self)
1416             and self.leaves[0].type == token.NAME
1417             and self.leaves[0].value == "class"
1418         )
1419
1420     @property
1421     def is_stub_class(self) -> bool:
1422         """Is this line a class definition with a body consisting only of "..."?"""
1423         return self.is_class and self.leaves[-3:] == [
1424             Leaf(token.DOT, ".") for _ in range(3)
1425         ]
1426
1427     @property
1428     def is_collection_with_optional_trailing_comma(self) -> bool:
1429         """Is this line a collection literal with a trailing comma that's optional?
1430
1431         Note that the trailing comma in a 1-tuple is not optional.
1432         """
1433         if not self.leaves or len(self.leaves) < 4:
1434             return False
1435
1436         # Look for and address a trailing colon.
1437         if self.leaves[-1].type == token.COLON:
1438             closer = self.leaves[-2]
1439             close_index = -2
1440         else:
1441             closer = self.leaves[-1]
1442             close_index = -1
1443         if closer.type not in CLOSING_BRACKETS or self.inside_brackets:
1444             return False
1445
1446         if closer.type == token.RPAR:
1447             # Tuples require an extra check, because if there's only
1448             # one element in the tuple removing the comma unmakes the
1449             # tuple.
1450             #
1451             # We also check for parens before looking for the trailing
1452             # comma because in some cases (eg assigning a dict
1453             # literal) the literal gets wrapped in temporary parens
1454             # during parsing. This case is covered by the
1455             # collections.py test data.
1456             opener = closer.opening_bracket
1457             for _open_index, leaf in enumerate(self.leaves):
1458                 if leaf is opener:
1459                     break
1460
1461             else:
1462                 # Couldn't find the matching opening paren, play it safe.
1463                 return False
1464
1465             commas = 0
1466             comma_depth = self.leaves[close_index - 1].bracket_depth
1467             for leaf in self.leaves[_open_index + 1 : close_index]:
1468                 if leaf.bracket_depth == comma_depth and leaf.type == token.COMMA:
1469                     commas += 1
1470             if commas > 1:
1471                 # We haven't looked yet for the trailing comma because
1472                 # we might also have caught noop parens.
1473                 return self.leaves[close_index - 1].type == token.COMMA
1474
1475             elif commas == 1:
1476                 return False  # it's either a one-tuple or didn't have a trailing comma
1477
1478             if self.leaves[close_index - 1].type in CLOSING_BRACKETS:
1479                 close_index -= 1
1480                 closer = self.leaves[close_index]
1481                 if closer.type == token.RPAR:
1482                     # TODO: this is a gut feeling. Will we ever see this?
1483                     return False
1484
1485         if self.leaves[close_index - 1].type != token.COMMA:
1486             return False
1487
1488         return True
1489
1490     @property
1491     def is_def(self) -> bool:
1492         """Is this a function definition? (Also returns True for async defs.)"""
1493         try:
1494             first_leaf = self.leaves[0]
1495         except IndexError:
1496             return False
1497
1498         try:
1499             second_leaf: Optional[Leaf] = self.leaves[1]
1500         except IndexError:
1501             second_leaf = None
1502         return (first_leaf.type == token.NAME and first_leaf.value == "def") or (
1503             first_leaf.type == token.ASYNC
1504             and second_leaf is not None
1505             and second_leaf.type == token.NAME
1506             and second_leaf.value == "def"
1507         )
1508
1509     @property
1510     def is_class_paren_empty(self) -> bool:
1511         """Is this a class with no base classes but using parentheses?
1512
1513         Those are unnecessary and should be removed.
1514         """
1515         return (
1516             bool(self)
1517             and len(self.leaves) == 4
1518             and self.is_class
1519             and self.leaves[2].type == token.LPAR
1520             and self.leaves[2].value == "("
1521             and self.leaves[3].type == token.RPAR
1522             and self.leaves[3].value == ")"
1523         )
1524
1525     @property
1526     def is_triple_quoted_string(self) -> bool:
1527         """Is the line a triple quoted string?"""
1528         return (
1529             bool(self)
1530             and self.leaves[0].type == token.STRING
1531             and self.leaves[0].value.startswith(('"""', "'''"))
1532         )
1533
1534     def contains_standalone_comments(self, depth_limit: int = sys.maxsize) -> bool:
1535         """If so, needs to be split before emitting."""
1536         for leaf in self.leaves:
1537             if leaf.type == STANDALONE_COMMENT and leaf.bracket_depth <= depth_limit:
1538                 return True
1539
1540         return False
1541
1542     def contains_uncollapsable_type_comments(self) -> bool:
1543         ignored_ids = set()
1544         try:
1545             last_leaf = self.leaves[-1]
1546             ignored_ids.add(id(last_leaf))
1547             if last_leaf.type == token.COMMA or (
1548                 last_leaf.type == token.RPAR and not last_leaf.value
1549             ):
1550                 # When trailing commas or optional parens are inserted by Black for
1551                 # consistency, comments after the previous last element are not moved
1552                 # (they don't have to, rendering will still be correct).  So we ignore
1553                 # trailing commas and invisible.
1554                 last_leaf = self.leaves[-2]
1555                 ignored_ids.add(id(last_leaf))
1556         except IndexError:
1557             return False
1558
1559         # A type comment is uncollapsable if it is attached to a leaf
1560         # that isn't at the end of the line (since that could cause it
1561         # to get associated to a different argument) or if there are
1562         # comments before it (since that could cause it to get hidden
1563         # behind a comment.
1564         comment_seen = False
1565         for leaf_id, comments in self.comments.items():
1566             for comment in comments:
1567                 if is_type_comment(comment):
1568                     if comment_seen or (
1569                         not is_type_comment(comment, " ignore")
1570                         and leaf_id not in ignored_ids
1571                     ):
1572                         return True
1573
1574                 comment_seen = True
1575
1576         return False
1577
1578     def contains_unsplittable_type_ignore(self) -> bool:
1579         if not self.leaves:
1580             return False
1581
1582         # If a 'type: ignore' is attached to the end of a line, we
1583         # can't split the line, because we can't know which of the
1584         # subexpressions the ignore was meant to apply to.
1585         #
1586         # We only want this to apply to actual physical lines from the
1587         # original source, though: we don't want the presence of a
1588         # 'type: ignore' at the end of a multiline expression to
1589         # justify pushing it all onto one line. Thus we
1590         # (unfortunately) need to check the actual source lines and
1591         # only report an unsplittable 'type: ignore' if this line was
1592         # one line in the original code.
1593
1594         # Grab the first and last line numbers, skipping generated leaves
1595         first_line = next((l.lineno for l in self.leaves if l.lineno != 0), 0)
1596         last_line = next((l.lineno for l in reversed(self.leaves) if l.lineno != 0), 0)
1597
1598         if first_line == last_line:
1599             # We look at the last two leaves since a comma or an
1600             # invisible paren could have been added at the end of the
1601             # line.
1602             for node in self.leaves[-2:]:
1603                 for comment in self.comments.get(id(node), []):
1604                     if is_type_comment(comment, " ignore"):
1605                         return True
1606
1607         return False
1608
1609     def contains_multiline_strings(self) -> bool:
1610         return any(is_multiline_string(leaf) for leaf in self.leaves)
1611
1612     def maybe_remove_trailing_comma(self, closing: Leaf) -> bool:
1613         """Remove trailing comma if there is one and it's safe."""
1614         if not (self.leaves and self.leaves[-1].type == token.COMMA):
1615             return False
1616
1617         # We remove trailing commas only in the case of importing a
1618         # single name from a module.
1619         if not (
1620             self.leaves
1621             and self.is_import
1622             and len(self.leaves) > 4
1623             and self.leaves[-1].type == token.COMMA
1624             and closing.type in CLOSING_BRACKETS
1625             and self.leaves[-4].type == token.NAME
1626             and (
1627                 # regular `from foo import bar,`
1628                 self.leaves[-4].value == "import"
1629                 # `from foo import (bar as baz,)
1630                 or (
1631                     len(self.leaves) > 6
1632                     and self.leaves[-6].value == "import"
1633                     and self.leaves[-3].value == "as"
1634                 )
1635                 # `from foo import bar as baz,`
1636                 or (
1637                     len(self.leaves) > 5
1638                     and self.leaves[-5].value == "import"
1639                     and self.leaves[-3].value == "as"
1640                 )
1641             )
1642             and closing.type == token.RPAR
1643         ):
1644             return False
1645
1646         self.remove_trailing_comma()
1647         return True
1648
1649     def append_comment(self, comment: Leaf) -> bool:
1650         """Add an inline or standalone comment to the line."""
1651         if (
1652             comment.type == STANDALONE_COMMENT
1653             and self.bracket_tracker.any_open_brackets()
1654         ):
1655             comment.prefix = ""
1656             return False
1657
1658         if comment.type != token.COMMENT:
1659             return False
1660
1661         if not self.leaves:
1662             comment.type = STANDALONE_COMMENT
1663             comment.prefix = ""
1664             return False
1665
1666         last_leaf = self.leaves[-1]
1667         if (
1668             last_leaf.type == token.RPAR
1669             and not last_leaf.value
1670             and last_leaf.parent
1671             and len(list(last_leaf.parent.leaves())) <= 3
1672             and not is_type_comment(comment)
1673         ):
1674             # Comments on an optional parens wrapping a single leaf should belong to
1675             # the wrapped node except if it's a type comment. Pinning the comment like
1676             # this avoids unstable formatting caused by comment migration.
1677             if len(self.leaves) < 2:
1678                 comment.type = STANDALONE_COMMENT
1679                 comment.prefix = ""
1680                 return False
1681
1682             last_leaf = self.leaves[-2]
1683         self.comments.setdefault(id(last_leaf), []).append(comment)
1684         return True
1685
1686     def comments_after(self, leaf: Leaf) -> List[Leaf]:
1687         """Generate comments that should appear directly after `leaf`."""
1688         return self.comments.get(id(leaf), [])
1689
1690     def remove_trailing_comma(self) -> None:
1691         """Remove the trailing comma and moves the comments attached to it."""
1692         trailing_comma = self.leaves.pop()
1693         trailing_comma_comments = self.comments.pop(id(trailing_comma), [])
1694         self.comments.setdefault(id(self.leaves[-1]), []).extend(
1695             trailing_comma_comments
1696         )
1697
1698     def is_complex_subscript(self, leaf: Leaf) -> bool:
1699         """Return True iff `leaf` is part of a slice with non-trivial exprs."""
1700         open_lsqb = self.bracket_tracker.get_open_lsqb()
1701         if open_lsqb is None:
1702             return False
1703
1704         subscript_start = open_lsqb.next_sibling
1705
1706         if isinstance(subscript_start, Node):
1707             if subscript_start.type == syms.listmaker:
1708                 return False
1709
1710             if subscript_start.type == syms.subscriptlist:
1711                 subscript_start = child_towards(subscript_start, leaf)
1712         return subscript_start is not None and any(
1713             n.type in TEST_DESCENDANTS for n in subscript_start.pre_order()
1714         )
1715
1716     def clone(self) -> "Line":
1717         return Line(
1718             depth=self.depth,
1719             inside_brackets=self.inside_brackets,
1720             should_explode=self.should_explode,
1721         )
1722
1723     def __str__(self) -> str:
1724         """Render the line."""
1725         if not self:
1726             return "\n"
1727
1728         indent = "    " * self.depth
1729         leaves = iter(self.leaves)
1730         first = next(leaves)
1731         res = f"{first.prefix}{indent}{first.value}"
1732         for leaf in leaves:
1733             res += str(leaf)
1734         for comment in itertools.chain.from_iterable(self.comments.values()):
1735             res += str(comment)
1736
1737         return res + "\n"
1738
1739     def __bool__(self) -> bool:
1740         """Return True if the line has leaves or comments."""
1741         return bool(self.leaves or self.comments)
1742
1743
1744 @dataclass
1745 class EmptyLineTracker:
1746     """Provides a stateful method that returns the number of potential extra
1747     empty lines needed before and after the currently processed line.
1748
1749     Note: this tracker works on lines that haven't been split yet.  It assumes
1750     the prefix of the first leaf consists of optional newlines.  Those newlines
1751     are consumed by `maybe_empty_lines()` and included in the computation.
1752     """
1753
1754     is_pyi: bool = False
1755     previous_line: Optional[Line] = None
1756     previous_after: int = 0
1757     previous_defs: List[int] = field(default_factory=list)
1758
1759     def maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
1760         """Return the number of extra empty lines before and after the `current_line`.
1761
1762         This is for separating `def`, `async def` and `class` with extra empty
1763         lines (two on module-level).
1764         """
1765         before, after = self._maybe_empty_lines(current_line)
1766         before = (
1767             # Black should not insert empty lines at the beginning
1768             # of the file
1769             0
1770             if self.previous_line is None
1771             else before - self.previous_after
1772         )
1773         self.previous_after = after
1774         self.previous_line = current_line
1775         return before, after
1776
1777     def _maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
1778         max_allowed = 1
1779         if current_line.depth == 0:
1780             max_allowed = 1 if self.is_pyi else 2
1781         if current_line.leaves:
1782             # Consume the first leaf's extra newlines.
1783             first_leaf = current_line.leaves[0]
1784             before = first_leaf.prefix.count("\n")
1785             before = min(before, max_allowed)
1786             first_leaf.prefix = ""
1787         else:
1788             before = 0
1789         depth = current_line.depth
1790         while self.previous_defs and self.previous_defs[-1] >= depth:
1791             self.previous_defs.pop()
1792             if self.is_pyi:
1793                 before = 0 if depth else 1
1794             else:
1795                 before = 1 if depth else 2
1796         if current_line.is_decorator or current_line.is_def or current_line.is_class:
1797             return self._maybe_empty_lines_for_class_or_def(current_line, before)
1798
1799         if (
1800             self.previous_line
1801             and self.previous_line.is_import
1802             and not current_line.is_import
1803             and depth == self.previous_line.depth
1804         ):
1805             return (before or 1), 0
1806
1807         if (
1808             self.previous_line
1809             and self.previous_line.is_class
1810             and current_line.is_triple_quoted_string
1811         ):
1812             return before, 1
1813
1814         return before, 0
1815
1816     def _maybe_empty_lines_for_class_or_def(
1817         self, current_line: Line, before: int
1818     ) -> Tuple[int, int]:
1819         if not current_line.is_decorator:
1820             self.previous_defs.append(current_line.depth)
1821         if self.previous_line is None:
1822             # Don't insert empty lines before the first line in the file.
1823             return 0, 0
1824
1825         if self.previous_line.is_decorator:
1826             return 0, 0
1827
1828         if self.previous_line.depth < current_line.depth and (
1829             self.previous_line.is_class or self.previous_line.is_def
1830         ):
1831             return 0, 0
1832
1833         if (
1834             self.previous_line.is_comment
1835             and self.previous_line.depth == current_line.depth
1836             and before == 0
1837         ):
1838             return 0, 0
1839
1840         if self.is_pyi:
1841             if self.previous_line.depth > current_line.depth:
1842                 newlines = 1
1843             elif current_line.is_class or self.previous_line.is_class:
1844                 if current_line.is_stub_class and self.previous_line.is_stub_class:
1845                     # No blank line between classes with an empty body
1846                     newlines = 0
1847                 else:
1848                     newlines = 1
1849             elif current_line.is_def and not self.previous_line.is_def:
1850                 # Blank line between a block of functions and a block of non-functions
1851                 newlines = 1
1852             else:
1853                 newlines = 0
1854         else:
1855             newlines = 2
1856         if current_line.depth and newlines:
1857             newlines -= 1
1858         return newlines, 0
1859
1860
1861 @dataclass
1862 class LineGenerator(Visitor[Line]):
1863     """Generates reformatted Line objects.  Empty lines are not emitted.
1864
1865     Note: destroys the tree it's visiting by mutating prefixes of its leaves
1866     in ways that will no longer stringify to valid Python code on the tree.
1867     """
1868
1869     is_pyi: bool = False
1870     normalize_strings: bool = True
1871     current_line: Line = field(default_factory=Line)
1872     remove_u_prefix: bool = False
1873
1874     def line(self, indent: int = 0) -> Iterator[Line]:
1875         """Generate a line.
1876
1877         If the line is empty, only emit if it makes sense.
1878         If the line is too long, split it first and then generate.
1879
1880         If any lines were generated, set up a new current_line.
1881         """
1882         if not self.current_line:
1883             self.current_line.depth += indent
1884             return  # Line is empty, don't emit. Creating a new one unnecessary.
1885
1886         complete_line = self.current_line
1887         self.current_line = Line(depth=complete_line.depth + indent)
1888         yield complete_line
1889
1890     def visit_default(self, node: LN) -> Iterator[Line]:
1891         """Default `visit_*()` implementation. Recurses to children of `node`."""
1892         if isinstance(node, Leaf):
1893             any_open_brackets = self.current_line.bracket_tracker.any_open_brackets()
1894             for comment in generate_comments(node):
1895                 if any_open_brackets:
1896                     # any comment within brackets is subject to splitting
1897                     self.current_line.append(comment)
1898                 elif comment.type == token.COMMENT:
1899                     # regular trailing comment
1900                     self.current_line.append(comment)
1901                     yield from self.line()
1902
1903                 else:
1904                     # regular standalone comment
1905                     yield from self.line()
1906
1907                     self.current_line.append(comment)
1908                     yield from self.line()
1909
1910             normalize_prefix(node, inside_brackets=any_open_brackets)
1911             if self.normalize_strings and node.type == token.STRING:
1912                 normalize_string_prefix(node, remove_u_prefix=self.remove_u_prefix)
1913                 normalize_string_quotes(node)
1914             if node.type == token.NUMBER:
1915                 normalize_numeric_literal(node)
1916             if node.type not in WHITESPACE:
1917                 self.current_line.append(node)
1918         yield from super().visit_default(node)
1919
1920     def visit_INDENT(self, node: Leaf) -> Iterator[Line]:
1921         """Increase indentation level, maybe yield a line."""
1922         # In blib2to3 INDENT never holds comments.
1923         yield from self.line(+1)
1924         yield from self.visit_default(node)
1925
1926     def visit_DEDENT(self, node: Leaf) -> Iterator[Line]:
1927         """Decrease indentation level, maybe yield a line."""
1928         # The current line might still wait for trailing comments.  At DEDENT time
1929         # there won't be any (they would be prefixes on the preceding NEWLINE).
1930         # Emit the line then.
1931         yield from self.line()
1932
1933         # While DEDENT has no value, its prefix may contain standalone comments
1934         # that belong to the current indentation level.  Get 'em.
1935         yield from self.visit_default(node)
1936
1937         # Finally, emit the dedent.
1938         yield from self.line(-1)
1939
1940     def visit_stmt(
1941         self, node: Node, keywords: Set[str], parens: Set[str]
1942     ) -> Iterator[Line]:
1943         """Visit a statement.
1944
1945         This implementation is shared for `if`, `while`, `for`, `try`, `except`,
1946         `def`, `with`, `class`, `assert` and assignments.
1947
1948         The relevant Python language `keywords` for a given statement will be
1949         NAME leaves within it. This methods puts those on a separate line.
1950
1951         `parens` holds a set of string leaf values immediately after which
1952         invisible parens should be put.
1953         """
1954         normalize_invisible_parens(node, parens_after=parens)
1955         for child in node.children:
1956             if child.type == token.NAME and child.value in keywords:  # type: ignore
1957                 yield from self.line()
1958
1959             yield from self.visit(child)
1960
1961     def visit_suite(self, node: Node) -> Iterator[Line]:
1962         """Visit a suite."""
1963         if self.is_pyi and is_stub_suite(node):
1964             yield from self.visit(node.children[2])
1965         else:
1966             yield from self.visit_default(node)
1967
1968     def visit_simple_stmt(self, node: Node) -> Iterator[Line]:
1969         """Visit a statement without nested statements."""
1970         is_suite_like = node.parent and node.parent.type in STATEMENT
1971         if is_suite_like:
1972             if self.is_pyi and is_stub_body(node):
1973                 yield from self.visit_default(node)
1974             else:
1975                 yield from self.line(+1)
1976                 yield from self.visit_default(node)
1977                 yield from self.line(-1)
1978
1979         else:
1980             if not self.is_pyi or not node.parent or not is_stub_suite(node.parent):
1981                 yield from self.line()
1982             yield from self.visit_default(node)
1983
1984     def visit_async_stmt(self, node: Node) -> Iterator[Line]:
1985         """Visit `async def`, `async for`, `async with`."""
1986         yield from self.line()
1987
1988         children = iter(node.children)
1989         for child in children:
1990             yield from self.visit(child)
1991
1992             if child.type == token.ASYNC:
1993                 break
1994
1995         internal_stmt = next(children)
1996         for child in internal_stmt.children:
1997             yield from self.visit(child)
1998
1999     def visit_decorators(self, node: Node) -> Iterator[Line]:
2000         """Visit decorators."""
2001         for child in node.children:
2002             yield from self.line()
2003             yield from self.visit(child)
2004
2005     def visit_SEMI(self, leaf: Leaf) -> Iterator[Line]:
2006         """Remove a semicolon and put the other statement on a separate line."""
2007         yield from self.line()
2008
2009     def visit_ENDMARKER(self, leaf: Leaf) -> Iterator[Line]:
2010         """End of file. Process outstanding comments and end with a newline."""
2011         yield from self.visit_default(leaf)
2012         yield from self.line()
2013
2014     def visit_STANDALONE_COMMENT(self, leaf: Leaf) -> Iterator[Line]:
2015         if not self.current_line.bracket_tracker.any_open_brackets():
2016             yield from self.line()
2017         yield from self.visit_default(leaf)
2018
2019     def visit_factor(self, node: Node) -> Iterator[Line]:
2020         """Force parentheses between a unary op and a binary power:
2021
2022         -2 ** 8 -> -(2 ** 8)
2023         """
2024         _operator, operand = node.children
2025         if (
2026             operand.type == syms.power
2027             and len(operand.children) == 3
2028             and operand.children[1].type == token.DOUBLESTAR
2029         ):
2030             lpar = Leaf(token.LPAR, "(")
2031             rpar = Leaf(token.RPAR, ")")
2032             index = operand.remove() or 0
2033             node.insert_child(index, Node(syms.atom, [lpar, operand, rpar]))
2034         yield from self.visit_default(node)
2035
2036     def visit_STRING(self, leaf: Leaf) -> Iterator[Line]:
2037         # Check if it's a docstring
2038         if prev_siblings_are(
2039             leaf.parent, [None, token.NEWLINE, token.INDENT, syms.simple_stmt]
2040         ) and is_multiline_string(leaf):
2041             prefix = "    " * self.current_line.depth
2042             docstring = fix_docstring(leaf.value[3:-3], prefix)
2043             leaf.value = leaf.value[0:3] + docstring + leaf.value[-3:]
2044             normalize_string_quotes(leaf)
2045
2046         yield from self.visit_default(leaf)
2047
2048     def __post_init__(self) -> None:
2049         """You are in a twisty little maze of passages."""
2050         v = self.visit_stmt
2051         Ø: Set[str] = set()
2052         self.visit_assert_stmt = partial(v, keywords={"assert"}, parens={"assert", ","})
2053         self.visit_if_stmt = partial(
2054             v, keywords={"if", "else", "elif"}, parens={"if", "elif"}
2055         )
2056         self.visit_while_stmt = partial(v, keywords={"while", "else"}, parens={"while"})
2057         self.visit_for_stmt = partial(v, keywords={"for", "else"}, parens={"for", "in"})
2058         self.visit_try_stmt = partial(
2059             v, keywords={"try", "except", "else", "finally"}, parens=Ø
2060         )
2061         self.visit_except_clause = partial(v, keywords={"except"}, parens=Ø)
2062         self.visit_with_stmt = partial(v, keywords={"with"}, parens=Ø)
2063         self.visit_funcdef = partial(v, keywords={"def"}, parens=Ø)
2064         self.visit_classdef = partial(v, keywords={"class"}, parens=Ø)
2065         self.visit_expr_stmt = partial(v, keywords=Ø, parens=ASSIGNMENTS)
2066         self.visit_return_stmt = partial(v, keywords={"return"}, parens={"return"})
2067         self.visit_import_from = partial(v, keywords=Ø, parens={"import"})
2068         self.visit_del_stmt = partial(v, keywords=Ø, parens={"del"})
2069         self.visit_async_funcdef = self.visit_async_stmt
2070         self.visit_decorated = self.visit_decorators
2071
2072
2073 IMPLICIT_TUPLE = {syms.testlist, syms.testlist_star_expr, syms.exprlist}
2074 BRACKET = {token.LPAR: token.RPAR, token.LSQB: token.RSQB, token.LBRACE: token.RBRACE}
2075 OPENING_BRACKETS = set(BRACKET.keys())
2076 CLOSING_BRACKETS = set(BRACKET.values())
2077 BRACKETS = OPENING_BRACKETS | CLOSING_BRACKETS
2078 ALWAYS_NO_SPACE = CLOSING_BRACKETS | {token.COMMA, STANDALONE_COMMENT}
2079
2080
2081 def whitespace(leaf: Leaf, *, complex_subscript: bool) -> str:  # noqa: C901
2082     """Return whitespace prefix if needed for the given `leaf`.
2083
2084     `complex_subscript` signals whether the given leaf is part of a subscription
2085     which has non-trivial arguments, like arithmetic expressions or function calls.
2086     """
2087     NO = ""
2088     SPACE = " "
2089     DOUBLESPACE = "  "
2090     t = leaf.type
2091     p = leaf.parent
2092     v = leaf.value
2093     if t in ALWAYS_NO_SPACE:
2094         return NO
2095
2096     if t == token.COMMENT:
2097         return DOUBLESPACE
2098
2099     assert p is not None, f"INTERNAL ERROR: hand-made leaf without parent: {leaf!r}"
2100     if t == token.COLON and p.type not in {
2101         syms.subscript,
2102         syms.subscriptlist,
2103         syms.sliceop,
2104     }:
2105         return NO
2106
2107     prev = leaf.prev_sibling
2108     if not prev:
2109         prevp = preceding_leaf(p)
2110         if not prevp or prevp.type in OPENING_BRACKETS:
2111             return NO
2112
2113         if t == token.COLON:
2114             if prevp.type == token.COLON:
2115                 return NO
2116
2117             elif prevp.type != token.COMMA and not complex_subscript:
2118                 return NO
2119
2120             return SPACE
2121
2122         if prevp.type == token.EQUAL:
2123             if prevp.parent:
2124                 if prevp.parent.type in {
2125                     syms.arglist,
2126                     syms.argument,
2127                     syms.parameters,
2128                     syms.varargslist,
2129                 }:
2130                     return NO
2131
2132                 elif prevp.parent.type == syms.typedargslist:
2133                     # A bit hacky: if the equal sign has whitespace, it means we
2134                     # previously found it's a typed argument.  So, we're using
2135                     # that, too.
2136                     return prevp.prefix
2137
2138         elif prevp.type in VARARGS_SPECIALS:
2139             if is_vararg(prevp, within=VARARGS_PARENTS | UNPACKING_PARENTS):
2140                 return NO
2141
2142         elif prevp.type == token.COLON:
2143             if prevp.parent and prevp.parent.type in {syms.subscript, syms.sliceop}:
2144                 return SPACE if complex_subscript else NO
2145
2146         elif (
2147             prevp.parent
2148             and prevp.parent.type == syms.factor
2149             and prevp.type in MATH_OPERATORS
2150         ):
2151             return NO
2152
2153         elif (
2154             prevp.type == token.RIGHTSHIFT
2155             and prevp.parent
2156             and prevp.parent.type == syms.shift_expr
2157             and prevp.prev_sibling
2158             and prevp.prev_sibling.type == token.NAME
2159             and prevp.prev_sibling.value == "print"  # type: ignore
2160         ):
2161             # Python 2 print chevron
2162             return NO
2163
2164     elif prev.type in OPENING_BRACKETS:
2165         return NO
2166
2167     if p.type in {syms.parameters, syms.arglist}:
2168         # untyped function signatures or calls
2169         if not prev or prev.type != token.COMMA:
2170             return NO
2171
2172     elif p.type == syms.varargslist:
2173         # lambdas
2174         if prev and prev.type != token.COMMA:
2175             return NO
2176
2177     elif p.type == syms.typedargslist:
2178         # typed function signatures
2179         if not prev:
2180             return NO
2181
2182         if t == token.EQUAL:
2183             if prev.type != syms.tname:
2184                 return NO
2185
2186         elif prev.type == token.EQUAL:
2187             # A bit hacky: if the equal sign has whitespace, it means we
2188             # previously found it's a typed argument.  So, we're using that, too.
2189             return prev.prefix
2190
2191         elif prev.type != token.COMMA:
2192             return NO
2193
2194     elif p.type == syms.tname:
2195         # type names
2196         if not prev:
2197             prevp = preceding_leaf(p)
2198             if not prevp or prevp.type != token.COMMA:
2199                 return NO
2200
2201     elif p.type == syms.trailer:
2202         # attributes and calls
2203         if t == token.LPAR or t == token.RPAR:
2204             return NO
2205
2206         if not prev:
2207             if t == token.DOT:
2208                 prevp = preceding_leaf(p)
2209                 if not prevp or prevp.type != token.NUMBER:
2210                     return NO
2211
2212             elif t == token.LSQB:
2213                 return NO
2214
2215         elif prev.type != token.COMMA:
2216             return NO
2217
2218     elif p.type == syms.argument:
2219         # single argument
2220         if t == token.EQUAL:
2221             return NO
2222
2223         if not prev:
2224             prevp = preceding_leaf(p)
2225             if not prevp or prevp.type == token.LPAR:
2226                 return NO
2227
2228         elif prev.type in {token.EQUAL} | VARARGS_SPECIALS:
2229             return NO
2230
2231     elif p.type == syms.decorator:
2232         # decorators
2233         return NO
2234
2235     elif p.type == syms.dotted_name:
2236         if prev:
2237             return NO
2238
2239         prevp = preceding_leaf(p)
2240         if not prevp or prevp.type == token.AT or prevp.type == token.DOT:
2241             return NO
2242
2243     elif p.type == syms.classdef:
2244         if t == token.LPAR:
2245             return NO
2246
2247         if prev and prev.type == token.LPAR:
2248             return NO
2249
2250     elif p.type in {syms.subscript, syms.sliceop}:
2251         # indexing
2252         if not prev:
2253             assert p.parent is not None, "subscripts are always parented"
2254             if p.parent.type == syms.subscriptlist:
2255                 return SPACE
2256
2257             return NO
2258
2259         elif not complex_subscript:
2260             return NO
2261
2262     elif p.type == syms.atom:
2263         if prev and t == token.DOT:
2264             # dots, but not the first one.
2265             return NO
2266
2267     elif p.type == syms.dictsetmaker:
2268         # dict unpacking
2269         if prev and prev.type == token.DOUBLESTAR:
2270             return NO
2271
2272     elif p.type in {syms.factor, syms.star_expr}:
2273         # unary ops
2274         if not prev:
2275             prevp = preceding_leaf(p)
2276             if not prevp or prevp.type in OPENING_BRACKETS:
2277                 return NO
2278
2279             prevp_parent = prevp.parent
2280             assert prevp_parent is not None
2281             if prevp.type == token.COLON and prevp_parent.type in {
2282                 syms.subscript,
2283                 syms.sliceop,
2284             }:
2285                 return NO
2286
2287             elif prevp.type == token.EQUAL and prevp_parent.type == syms.argument:
2288                 return NO
2289
2290         elif t in {token.NAME, token.NUMBER, token.STRING}:
2291             return NO
2292
2293     elif p.type == syms.import_from:
2294         if t == token.DOT:
2295             if prev and prev.type == token.DOT:
2296                 return NO
2297
2298         elif t == token.NAME:
2299             if v == "import":
2300                 return SPACE
2301
2302             if prev and prev.type == token.DOT:
2303                 return NO
2304
2305     elif p.type == syms.sliceop:
2306         return NO
2307
2308     return SPACE
2309
2310
2311 def preceding_leaf(node: Optional[LN]) -> Optional[Leaf]:
2312     """Return the first leaf that precedes `node`, if any."""
2313     while node:
2314         res = node.prev_sibling
2315         if res:
2316             if isinstance(res, Leaf):
2317                 return res
2318
2319             try:
2320                 return list(res.leaves())[-1]
2321
2322             except IndexError:
2323                 return None
2324
2325         node = node.parent
2326     return None
2327
2328
2329 def prev_siblings_are(node: Optional[LN], tokens: List[Optional[NodeType]]) -> bool:
2330     """Return if the `node` and its previous siblings match types against the provided
2331     list of tokens; the provided `node`has its type matched against the last element in
2332     the list.  `None` can be used as the first element to declare that the start of the
2333     list is anchored at the start of its parent's children."""
2334     if not tokens:
2335         return True
2336     if tokens[-1] is None:
2337         return node is None
2338     if not node:
2339         return False
2340     if node.type != tokens[-1]:
2341         return False
2342     return prev_siblings_are(node.prev_sibling, tokens[:-1])
2343
2344
2345 def child_towards(ancestor: Node, descendant: LN) -> Optional[LN]:
2346     """Return the child of `ancestor` that contains `descendant`."""
2347     node: Optional[LN] = descendant
2348     while node and node.parent != ancestor:
2349         node = node.parent
2350     return node
2351
2352
2353 def container_of(leaf: Leaf) -> LN:
2354     """Return `leaf` or one of its ancestors that is the topmost container of it.
2355
2356     By "container" we mean a node where `leaf` is the very first child.
2357     """
2358     same_prefix = leaf.prefix
2359     container: LN = leaf
2360     while container:
2361         parent = container.parent
2362         if parent is None:
2363             break
2364
2365         if parent.children[0].prefix != same_prefix:
2366             break
2367
2368         if parent.type == syms.file_input:
2369             break
2370
2371         if parent.prev_sibling is not None and parent.prev_sibling.type in BRACKETS:
2372             break
2373
2374         container = parent
2375     return container
2376
2377
2378 def is_split_after_delimiter(leaf: Leaf, previous: Optional[Leaf] = None) -> Priority:
2379     """Return the priority of the `leaf` delimiter, given a line break after it.
2380
2381     The delimiter priorities returned here are from those delimiters that would
2382     cause a line break after themselves.
2383
2384     Higher numbers are higher priority.
2385     """
2386     if leaf.type == token.COMMA:
2387         return COMMA_PRIORITY
2388
2389     return 0
2390
2391
2392 def is_split_before_delimiter(leaf: Leaf, previous: Optional[Leaf] = None) -> Priority:
2393     """Return the priority of the `leaf` delimiter, given a line break before it.
2394
2395     The delimiter priorities returned here are from those delimiters that would
2396     cause a line break before themselves.
2397
2398     Higher numbers are higher priority.
2399     """
2400     if is_vararg(leaf, within=VARARGS_PARENTS | UNPACKING_PARENTS):
2401         # * and ** might also be MATH_OPERATORS but in this case they are not.
2402         # Don't treat them as a delimiter.
2403         return 0
2404
2405     if (
2406         leaf.type == token.DOT
2407         and leaf.parent
2408         and leaf.parent.type not in {syms.import_from, syms.dotted_name}
2409         and (previous is None or previous.type in CLOSING_BRACKETS)
2410     ):
2411         return DOT_PRIORITY
2412
2413     if (
2414         leaf.type in MATH_OPERATORS
2415         and leaf.parent
2416         and leaf.parent.type not in {syms.factor, syms.star_expr}
2417     ):
2418         return MATH_PRIORITIES[leaf.type]
2419
2420     if leaf.type in COMPARATORS:
2421         return COMPARATOR_PRIORITY
2422
2423     if (
2424         leaf.type == token.STRING
2425         and previous is not None
2426         and previous.type == token.STRING
2427     ):
2428         return STRING_PRIORITY
2429
2430     if leaf.type not in {token.NAME, token.ASYNC}:
2431         return 0
2432
2433     if (
2434         leaf.value == "for"
2435         and leaf.parent
2436         and leaf.parent.type in {syms.comp_for, syms.old_comp_for}
2437         or leaf.type == token.ASYNC
2438     ):
2439         if (
2440             not isinstance(leaf.prev_sibling, Leaf)
2441             or leaf.prev_sibling.value != "async"
2442         ):
2443             return COMPREHENSION_PRIORITY
2444
2445     if (
2446         leaf.value == "if"
2447         and leaf.parent
2448         and leaf.parent.type in {syms.comp_if, syms.old_comp_if}
2449     ):
2450         return COMPREHENSION_PRIORITY
2451
2452     if leaf.value in {"if", "else"} and leaf.parent and leaf.parent.type == syms.test:
2453         return TERNARY_PRIORITY
2454
2455     if leaf.value == "is":
2456         return COMPARATOR_PRIORITY
2457
2458     if (
2459         leaf.value == "in"
2460         and leaf.parent
2461         and leaf.parent.type in {syms.comp_op, syms.comparison}
2462         and not (
2463             previous is not None
2464             and previous.type == token.NAME
2465             and previous.value == "not"
2466         )
2467     ):
2468         return COMPARATOR_PRIORITY
2469
2470     if (
2471         leaf.value == "not"
2472         and leaf.parent
2473         and leaf.parent.type == syms.comp_op
2474         and not (
2475             previous is not None
2476             and previous.type == token.NAME
2477             and previous.value == "is"
2478         )
2479     ):
2480         return COMPARATOR_PRIORITY
2481
2482     if leaf.value in LOGIC_OPERATORS and leaf.parent:
2483         return LOGIC_PRIORITY
2484
2485     return 0
2486
2487
2488 FMT_OFF = {"# fmt: off", "# fmt:off", "# yapf: disable"}
2489 FMT_ON = {"# fmt: on", "# fmt:on", "# yapf: enable"}
2490
2491
2492 def generate_comments(leaf: LN) -> Iterator[Leaf]:
2493     """Clean the prefix of the `leaf` and generate comments from it, if any.
2494
2495     Comments in lib2to3 are shoved into the whitespace prefix.  This happens
2496     in `pgen2/driver.py:Driver.parse_tokens()`.  This was a brilliant implementation
2497     move because it does away with modifying the grammar to include all the
2498     possible places in which comments can be placed.
2499
2500     The sad consequence for us though is that comments don't "belong" anywhere.
2501     This is why this function generates simple parentless Leaf objects for
2502     comments.  We simply don't know what the correct parent should be.
2503
2504     No matter though, we can live without this.  We really only need to
2505     differentiate between inline and standalone comments.  The latter don't
2506     share the line with any code.
2507
2508     Inline comments are emitted as regular token.COMMENT leaves.  Standalone
2509     are emitted with a fake STANDALONE_COMMENT token identifier.
2510     """
2511     for pc in list_comments(leaf.prefix, is_endmarker=leaf.type == token.ENDMARKER):
2512         yield Leaf(pc.type, pc.value, prefix="\n" * pc.newlines)
2513
2514
2515 @dataclass
2516 class ProtoComment:
2517     """Describes a piece of syntax that is a comment.
2518
2519     It's not a :class:`blib2to3.pytree.Leaf` so that:
2520
2521     * it can be cached (`Leaf` objects should not be reused more than once as
2522       they store their lineno, column, prefix, and parent information);
2523     * `newlines` and `consumed` fields are kept separate from the `value`. This
2524       simplifies handling of special marker comments like ``# fmt: off/on``.
2525     """
2526
2527     type: int  # token.COMMENT or STANDALONE_COMMENT
2528     value: str  # content of the comment
2529     newlines: int  # how many newlines before the comment
2530     consumed: int  # how many characters of the original leaf's prefix did we consume
2531
2532
2533 @lru_cache(maxsize=4096)
2534 def list_comments(prefix: str, *, is_endmarker: bool) -> List[ProtoComment]:
2535     """Return a list of :class:`ProtoComment` objects parsed from the given `prefix`."""
2536     result: List[ProtoComment] = []
2537     if not prefix or "#" not in prefix:
2538         return result
2539
2540     consumed = 0
2541     nlines = 0
2542     ignored_lines = 0
2543     for index, line in enumerate(prefix.split("\n")):
2544         consumed += len(line) + 1  # adding the length of the split '\n'
2545         line = line.lstrip()
2546         if not line:
2547             nlines += 1
2548         if not line.startswith("#"):
2549             # Escaped newlines outside of a comment are not really newlines at
2550             # all. We treat a single-line comment following an escaped newline
2551             # as a simple trailing comment.
2552             if line.endswith("\\"):
2553                 ignored_lines += 1
2554             continue
2555
2556         if index == ignored_lines and not is_endmarker:
2557             comment_type = token.COMMENT  # simple trailing comment
2558         else:
2559             comment_type = STANDALONE_COMMENT
2560         comment = make_comment(line)
2561         result.append(
2562             ProtoComment(
2563                 type=comment_type, value=comment, newlines=nlines, consumed=consumed
2564             )
2565         )
2566         nlines = 0
2567     return result
2568
2569
2570 def make_comment(content: str) -> str:
2571     """Return a consistently formatted comment from the given `content` string.
2572
2573     All comments (except for "##", "#!", "#:", '#'", "#%%") should have a single
2574     space between the hash sign and the content.
2575
2576     If `content` didn't start with a hash sign, one is provided.
2577     """
2578     content = content.rstrip()
2579     if not content:
2580         return "#"
2581
2582     if content[0] == "#":
2583         content = content[1:]
2584     if content and content[0] not in " !:#'%":
2585         content = " " + content
2586     return "#" + content
2587
2588
2589 def transform_line(
2590     line: Line,
2591     line_length: int,
2592     normalize_strings: bool,
2593     features: Collection[Feature] = (),
2594 ) -> Iterator[Line]:
2595     """Transform a `line`, potentially splitting it into many lines.
2596
2597     They should fit in the allotted `line_length` but might not be able to.
2598
2599     `features` are syntactical features that may be used in the output.
2600     """
2601     if line.is_comment:
2602         yield line
2603         return
2604
2605     line_str = line_to_string(line)
2606
2607     def init_st(ST: Type[StringTransformer]) -> StringTransformer:
2608         """Initialize StringTransformer"""
2609         return ST(line_length, normalize_strings)
2610
2611     string_merge = init_st(StringMerger)
2612     string_paren_strip = init_st(StringParenStripper)
2613     string_split = init_st(StringSplitter)
2614     string_paren_wrap = init_st(StringParenWrapper)
2615
2616     transformers: List[Transformer]
2617     if (
2618         not line.contains_uncollapsable_type_comments()
2619         and not line.should_explode
2620         and not line.is_collection_with_optional_trailing_comma
2621         and (
2622             is_line_short_enough(line, line_length=line_length, line_str=line_str)
2623             or line.contains_unsplittable_type_ignore()
2624         )
2625         and not (line.contains_standalone_comments() and line.inside_brackets)
2626     ):
2627         # Only apply basic string preprocessing, since lines shouldn't be split here.
2628         transformers = [string_merge, string_paren_strip]
2629     elif line.is_def:
2630         transformers = [left_hand_split]
2631     else:
2632
2633         def rhs(line: Line, features: Collection[Feature]) -> Iterator[Line]:
2634             for omit in generate_trailers_to_omit(line, line_length):
2635                 lines = list(right_hand_split(line, line_length, features, omit=omit))
2636                 if is_line_short_enough(lines[0], line_length=line_length):
2637                     yield from lines
2638                     return
2639
2640             # All splits failed, best effort split with no omits.
2641             # This mostly happens to multiline strings that are by definition
2642             # reported as not fitting a single line.
2643             # line_length=1 here was historically a bug that somehow became a feature.
2644             # See #762 and #781 for the full story.
2645             yield from right_hand_split(line, line_length=1, features=features)
2646
2647         if line.inside_brackets:
2648             transformers = [
2649                 string_merge,
2650                 string_paren_strip,
2651                 delimiter_split,
2652                 standalone_comment_split,
2653                 string_split,
2654                 string_paren_wrap,
2655                 rhs,
2656             ]
2657         else:
2658             transformers = [
2659                 string_merge,
2660                 string_paren_strip,
2661                 string_split,
2662                 string_paren_wrap,
2663                 rhs,
2664             ]
2665
2666     for transform in transformers:
2667         # We are accumulating lines in `result` because we might want to abort
2668         # mission and return the original line in the end, or attempt a different
2669         # split altogether.
2670         result: List[Line] = []
2671         try:
2672             for l in transform(line, features):
2673                 if str(l).strip("\n") == line_str:
2674                     raise CannotTransform(
2675                         "Line transformer returned an unchanged result"
2676                     )
2677
2678                 result.extend(
2679                     transform_line(
2680                         l,
2681                         line_length=line_length,
2682                         normalize_strings=normalize_strings,
2683                         features=features,
2684                     )
2685                 )
2686         except CannotTransform:
2687             continue
2688         else:
2689             yield from result
2690             break
2691
2692     else:
2693         yield line
2694
2695
2696 @dataclass  # type: ignore
2697 class StringTransformer(ABC):
2698     """
2699     An implementation of the Transformer protocol that relies on its
2700     subclasses overriding the template methods `do_match(...)` and
2701     `do_transform(...)`.
2702
2703     This Transformer works exclusively on strings (for example, by merging
2704     or splitting them).
2705
2706     The following sections can be found among the docstrings of each concrete
2707     StringTransformer subclass.
2708
2709     Requirements:
2710         Which requirements must be met of the given Line for this
2711         StringTransformer to be applied?
2712
2713     Transformations:
2714         If the given Line meets all of the above requirments, which string
2715         transformations can you expect to be applied to it by this
2716         StringTransformer?
2717
2718     Collaborations:
2719         What contractual agreements does this StringTransformer have with other
2720         StringTransfomers? Such collaborations should be eliminated/minimized
2721         as much as possible.
2722     """
2723
2724     line_length: int
2725     normalize_strings: bool
2726
2727     @abstractmethod
2728     def do_match(self, line: Line) -> TMatchResult:
2729         """
2730         Returns:
2731             * Ok(string_idx) such that `line.leaves[string_idx]` is our target
2732             string, if a match was able to be made.
2733                 OR
2734             * Err(CannotTransform), if a match was not able to be made.
2735         """
2736
2737     @abstractmethod
2738     def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]:
2739         """
2740         Yields:
2741             * Ok(new_line) where new_line is the new transformed line.
2742                 OR
2743             * Err(CannotTransform) if the transformation failed for some reason. The
2744             `do_match(...)` template method should usually be used to reject
2745             the form of the given Line, but in some cases it is difficult to
2746             know whether or not a Line meets the StringTransformer's
2747             requirements until the transformation is already midway.
2748
2749         Side Effects:
2750             This method should NOT mutate @line directly, but it MAY mutate the
2751             Line's underlying Node structure. (WARNING: If the underlying Node
2752             structure IS altered, then this method should NOT be allowed to
2753             yield an CannotTransform after that point.)
2754         """
2755
2756     def __call__(self, line: Line, _features: Collection[Feature]) -> Iterator[Line]:
2757         """
2758         StringTransformer instances have a call signature that mirrors that of
2759         the Transformer type.
2760
2761         Raises:
2762             CannotTransform(...) if the concrete StringTransformer class is unable
2763             to transform @line.
2764         """
2765         # Optimization to avoid calling `self.do_match(...)` when the line does
2766         # not contain any string.
2767         if not any(leaf.type == token.STRING for leaf in line.leaves):
2768             raise CannotTransform("There are no strings in this line.")
2769
2770         match_result = self.do_match(line)
2771
2772         if isinstance(match_result, Err):
2773             cant_transform = match_result.err()
2774             raise CannotTransform(
2775                 f"The string transformer {self.__class__.__name__} does not recognize"
2776                 " this line as one that it can transform."
2777             ) from cant_transform
2778
2779         string_idx = match_result.ok()
2780
2781         for line_result in self.do_transform(line, string_idx):
2782             if isinstance(line_result, Err):
2783                 cant_transform = line_result.err()
2784                 raise CannotTransform(
2785                     "StringTransformer failed while attempting to transform string."
2786                 ) from cant_transform
2787             line = line_result.ok()
2788             yield line
2789
2790
2791 @dataclass
2792 class CustomSplit:
2793     """A custom (i.e. manual) string split.
2794
2795     A single CustomSplit instance represents a single substring.
2796
2797     Examples:
2798         Consider the following string:
2799         ```
2800         "Hi there friend."
2801         " This is a custom"
2802         f" string {split}."
2803         ```
2804
2805         This string will correspond to the following three CustomSplit instances:
2806         ```
2807         CustomSplit(False, 16)
2808         CustomSplit(False, 17)
2809         CustomSplit(True, 16)
2810         ```
2811     """
2812
2813     has_prefix: bool
2814     break_idx: int
2815
2816
2817 class CustomSplitMapMixin:
2818     """
2819     This mixin class is used to map merged strings to a sequence of
2820     CustomSplits, which will then be used to re-split the strings iff none of
2821     the resultant substrings go over the configured max line length.
2822     """
2823
2824     _Key = Tuple[StringID, str]
2825     _CUSTOM_SPLIT_MAP: Dict[_Key, Tuple[CustomSplit, ...]] = defaultdict(tuple)
2826
2827     @staticmethod
2828     def _get_key(string: str) -> "CustomSplitMapMixin._Key":
2829         """
2830         Returns:
2831             A unique identifier that is used internally to map @string to a
2832             group of custom splits.
2833         """
2834         return (id(string), string)
2835
2836     def add_custom_splits(
2837         self, string: str, custom_splits: Iterable[CustomSplit]
2838     ) -> None:
2839         """Custom Split Map Setter Method
2840
2841         Side Effects:
2842             Adds a mapping from @string to the custom splits @custom_splits.
2843         """
2844         key = self._get_key(string)
2845         self._CUSTOM_SPLIT_MAP[key] = tuple(custom_splits)
2846
2847     def pop_custom_splits(self, string: str) -> List[CustomSplit]:
2848         """Custom Split Map Getter Method
2849
2850         Returns:
2851             * A list of the custom splits that are mapped to @string, if any
2852             exist.
2853                 OR
2854             * [], otherwise.
2855
2856         Side Effects:
2857             Deletes the mapping between @string and its associated custom
2858             splits (which are returned to the caller).
2859         """
2860         key = self._get_key(string)
2861
2862         custom_splits = self._CUSTOM_SPLIT_MAP[key]
2863         del self._CUSTOM_SPLIT_MAP[key]
2864
2865         return list(custom_splits)
2866
2867     def has_custom_splits(self, string: str) -> bool:
2868         """
2869         Returns:
2870             True iff @string is associated with a set of custom splits.
2871         """
2872         key = self._get_key(string)
2873         return key in self._CUSTOM_SPLIT_MAP
2874
2875
2876 class StringMerger(CustomSplitMapMixin, StringTransformer):
2877     """StringTransformer that merges strings together.
2878
2879     Requirements:
2880         (A) The line contains adjacent strings such that at most one substring
2881         has inline comments AND none of those inline comments are pragmas AND
2882         the set of all substring prefixes is either of length 1 or equal to
2883         {"", "f"} AND none of the substrings are raw strings (i.e. are prefixed
2884         with 'r').
2885             OR
2886         (B) The line contains a string which uses line continuation backslashes.
2887
2888     Transformations:
2889         Depending on which of the two requirements above where met, either:
2890
2891         (A) The string group associated with the target string is merged.
2892             OR
2893         (B) All line-continuation backslashes are removed from the target string.
2894
2895     Collaborations:
2896         StringMerger provides custom split information to StringSplitter.
2897     """
2898
2899     def do_match(self, line: Line) -> TMatchResult:
2900         LL = line.leaves
2901
2902         is_valid_index = is_valid_index_factory(LL)
2903
2904         for (i, leaf) in enumerate(LL):
2905             if (
2906                 leaf.type == token.STRING
2907                 and is_valid_index(i + 1)
2908                 and LL[i + 1].type == token.STRING
2909             ):
2910                 return Ok(i)
2911
2912             if leaf.type == token.STRING and "\\\n" in leaf.value:
2913                 return Ok(i)
2914
2915         return TErr("This line has no strings that need merging.")
2916
2917     def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]:
2918         new_line = line
2919         rblc_result = self.__remove_backslash_line_continuation_chars(
2920             new_line, string_idx
2921         )
2922         if isinstance(rblc_result, Ok):
2923             new_line = rblc_result.ok()
2924
2925         msg_result = self.__merge_string_group(new_line, string_idx)
2926         if isinstance(msg_result, Ok):
2927             new_line = msg_result.ok()
2928
2929         if isinstance(rblc_result, Err) and isinstance(msg_result, Err):
2930             msg_cant_transform = msg_result.err()
2931             rblc_cant_transform = rblc_result.err()
2932             cant_transform = CannotTransform(
2933                 "StringMerger failed to merge any strings in this line."
2934             )
2935
2936             # Chain the errors together using `__cause__`.
2937             msg_cant_transform.__cause__ = rblc_cant_transform
2938             cant_transform.__cause__ = msg_cant_transform
2939
2940             yield Err(cant_transform)
2941         else:
2942             yield Ok(new_line)
2943
2944     @staticmethod
2945     def __remove_backslash_line_continuation_chars(
2946         line: Line, string_idx: int
2947     ) -> TResult[Line]:
2948         """
2949         Merge strings that were split across multiple lines using
2950         line-continuation backslashes.
2951
2952         Returns:
2953             Ok(new_line), if @line contains backslash line-continuation
2954             characters.
2955                 OR
2956             Err(CannotTransform), otherwise.
2957         """
2958         LL = line.leaves
2959
2960         string_leaf = LL[string_idx]
2961         if not (
2962             string_leaf.type == token.STRING
2963             and "\\\n" in string_leaf.value
2964             and not has_triple_quotes(string_leaf.value)
2965         ):
2966             return TErr(
2967                 f"String leaf {string_leaf} does not contain any backslash line"
2968                 " continuation characters."
2969             )
2970
2971         new_line = line.clone()
2972         new_line.comments = line.comments
2973         append_leaves(new_line, line, LL)
2974
2975         new_string_leaf = new_line.leaves[string_idx]
2976         new_string_leaf.value = new_string_leaf.value.replace("\\\n", "")
2977
2978         return Ok(new_line)
2979
2980     def __merge_string_group(self, line: Line, string_idx: int) -> TResult[Line]:
2981         """
2982         Merges string group (i.e. set of adjacent strings) where the first
2983         string in the group is `line.leaves[string_idx]`.
2984
2985         Returns:
2986             Ok(new_line), if ALL of the validation checks found in
2987             __validate_msg(...) pass.
2988                 OR
2989             Err(CannotTransform), otherwise.
2990         """
2991         LL = line.leaves
2992
2993         is_valid_index = is_valid_index_factory(LL)
2994
2995         vresult = self.__validate_msg(line, string_idx)
2996         if isinstance(vresult, Err):
2997             return vresult
2998
2999         # If the string group is wrapped inside an Atom node, we must make sure
3000         # to later replace that Atom with our new (merged) string leaf.
3001         atom_node = LL[string_idx].parent
3002
3003         # We will place BREAK_MARK in between every two substrings that we
3004         # merge. We will then later go through our final result and use the
3005         # various instances of BREAK_MARK we find to add the right values to
3006         # the custom split map.
3007         BREAK_MARK = "@@@@@ BLACK BREAKPOINT MARKER @@@@@"
3008
3009         QUOTE = LL[string_idx].value[-1]
3010
3011         def make_naked(string: str, string_prefix: str) -> str:
3012             """Strip @string (i.e. make it a "naked" string)
3013
3014             Pre-conditions:
3015                 * assert_is_leaf_string(@string)
3016
3017             Returns:
3018                 A string that is identical to @string except that
3019                 @string_prefix has been stripped, the surrounding QUOTE
3020                 characters have been removed, and any remaining QUOTE
3021                 characters have been escaped.
3022             """
3023             assert_is_leaf_string(string)
3024
3025             RE_EVEN_BACKSLASHES = r"(?:(?<!\\)(?:\\\\)*)"
3026             naked_string = string[len(string_prefix) + 1 : -1]
3027             naked_string = re.sub(
3028                 "(" + RE_EVEN_BACKSLASHES + ")" + QUOTE, r"\1\\" + QUOTE, naked_string
3029             )
3030             return naked_string
3031
3032         # Holds the CustomSplit objects that will later be added to the custom
3033         # split map.
3034         custom_splits = []
3035
3036         # Temporary storage for the 'has_prefix' part of the CustomSplit objects.
3037         prefix_tracker = []
3038
3039         # Sets the 'prefix' variable. This is the prefix that the final merged
3040         # string will have.
3041         next_str_idx = string_idx
3042         prefix = ""
3043         while (
3044             not prefix
3045             and is_valid_index(next_str_idx)
3046             and LL[next_str_idx].type == token.STRING
3047         ):
3048             prefix = get_string_prefix(LL[next_str_idx].value)
3049             next_str_idx += 1
3050
3051         # The next loop merges the string group. The final string will be
3052         # contained in 'S'.
3053         #
3054         # The following convenience variables are used:
3055         #
3056         #   S: string
3057         #   NS: naked string
3058         #   SS: next string
3059         #   NSS: naked next string
3060         S = ""
3061         NS = ""
3062         num_of_strings = 0
3063         next_str_idx = string_idx
3064         while is_valid_index(next_str_idx) and LL[next_str_idx].type == token.STRING:
3065             num_of_strings += 1
3066
3067             SS = LL[next_str_idx].value
3068             next_prefix = get_string_prefix(SS)
3069
3070             # If this is an f-string group but this substring is not prefixed
3071             # with 'f'...
3072             if "f" in prefix and "f" not in next_prefix:
3073                 # Then we must escape any braces contained in this substring.
3074                 SS = re.subf(r"(\{|\})", "{1}{1}", SS)
3075
3076             NSS = make_naked(SS, next_prefix)
3077
3078             has_prefix = bool(next_prefix)
3079             prefix_tracker.append(has_prefix)
3080
3081             S = prefix + QUOTE + NS + NSS + BREAK_MARK + QUOTE
3082             NS = make_naked(S, prefix)
3083
3084             next_str_idx += 1
3085
3086         S_leaf = Leaf(token.STRING, S)
3087         if self.normalize_strings:
3088             normalize_string_quotes(S_leaf)
3089
3090         # Fill the 'custom_splits' list with the appropriate CustomSplit objects.
3091         temp_string = S_leaf.value[len(prefix) + 1 : -1]
3092         for has_prefix in prefix_tracker:
3093             mark_idx = temp_string.find(BREAK_MARK)
3094             assert (
3095                 mark_idx >= 0
3096             ), "Logic error while filling the custom string breakpoint cache."
3097
3098             temp_string = temp_string[mark_idx + len(BREAK_MARK) :]
3099             breakpoint_idx = mark_idx + (len(prefix) if has_prefix else 0) + 1
3100             custom_splits.append(CustomSplit(has_prefix, breakpoint_idx))
3101
3102         string_leaf = Leaf(token.STRING, S_leaf.value.replace(BREAK_MARK, ""))
3103
3104         if atom_node is not None:
3105             replace_child(atom_node, string_leaf)
3106
3107         # Build the final line ('new_line') that this method will later return.
3108         new_line = line.clone()
3109         for (i, leaf) in enumerate(LL):
3110             if i == string_idx:
3111                 new_line.append(string_leaf)
3112
3113             if string_idx <= i < string_idx + num_of_strings:
3114                 for comment_leaf in line.comments_after(LL[i]):
3115                     new_line.append(comment_leaf, preformatted=True)
3116                 continue
3117
3118             append_leaves(new_line, line, [leaf])
3119
3120         self.add_custom_splits(string_leaf.value, custom_splits)
3121         return Ok(new_line)
3122
3123     @staticmethod
3124     def __validate_msg(line: Line, string_idx: int) -> TResult[None]:
3125         """Validate (M)erge (S)tring (G)roup
3126
3127         Transform-time string validation logic for __merge_string_group(...).
3128
3129         Returns:
3130             * Ok(None), if ALL validation checks (listed below) pass.
3131                 OR
3132             * Err(CannotTransform), if any of the following are true:
3133                 - The target string is not in a string group (i.e. it has no
3134                   adjacent strings).
3135                 - The string group has more than one inline comment.
3136                 - The string group has an inline comment that appears to be a pragma.
3137                 - The set of all string prefixes in the string group is of
3138                   length greater than one and is not equal to {"", "f"}.
3139                 - The string group consists of raw strings.
3140         """
3141         num_of_inline_string_comments = 0
3142         set_of_prefixes = set()
3143         num_of_strings = 0
3144         for leaf in line.leaves[string_idx:]:
3145             if leaf.type != token.STRING:
3146                 # If the string group is trailed by a comma, we count the
3147                 # comments trailing the comma to be one of the string group's
3148                 # comments.
3149                 if leaf.type == token.COMMA and id(leaf) in line.comments:
3150                     num_of_inline_string_comments += 1
3151                 break
3152
3153             if has_triple_quotes(leaf.value):
3154                 return TErr("StringMerger does NOT merge multiline strings.")
3155
3156             num_of_strings += 1
3157             prefix = get_string_prefix(leaf.value)
3158             if "r" in prefix:
3159                 return TErr("StringMerger does NOT merge raw strings.")
3160
3161             set_of_prefixes.add(prefix)
3162
3163             if id(leaf) in line.comments:
3164                 num_of_inline_string_comments += 1
3165                 if contains_pragma_comment(line.comments[id(leaf)]):
3166                     return TErr("Cannot merge strings which have pragma comments.")
3167
3168         if num_of_strings < 2:
3169             return TErr(
3170                 f"Not enough strings to merge (num_of_strings={num_of_strings})."
3171             )
3172
3173         if num_of_inline_string_comments > 1:
3174             return TErr(
3175                 f"Too many inline string comments ({num_of_inline_string_comments})."
3176             )
3177
3178         if len(set_of_prefixes) > 1 and set_of_prefixes != {"", "f"}:
3179             return TErr(f"Too many different prefixes ({set_of_prefixes}).")
3180
3181         return Ok(None)
3182
3183
3184 class StringParenStripper(StringTransformer):
3185     """StringTransformer that strips surrounding parentheses from strings.
3186
3187     Requirements:
3188         The line contains a string which is surrounded by parentheses and:
3189             - The target string is NOT the only argument to a function call).
3190             - The RPAR is NOT followed by an attribute access (i.e. a dot).
3191
3192     Transformations:
3193         The parentheses mentioned in the 'Requirements' section are stripped.
3194
3195     Collaborations:
3196         StringParenStripper has its own inherent usefulness, but it is also
3197         relied on to clean up the parentheses created by StringParenWrapper (in
3198         the event that they are no longer needed).
3199     """
3200
3201     def do_match(self, line: Line) -> TMatchResult:
3202         LL = line.leaves
3203
3204         is_valid_index = is_valid_index_factory(LL)
3205
3206         for (idx, leaf) in enumerate(LL):
3207             # Should be a string...
3208             if leaf.type != token.STRING:
3209                 continue
3210
3211             # Should be preceded by a non-empty LPAR...
3212             if (
3213                 not is_valid_index(idx - 1)
3214                 or LL[idx - 1].type != token.LPAR
3215                 or is_empty_lpar(LL[idx - 1])
3216             ):
3217                 continue
3218
3219             # That LPAR should NOT be preceded by a function name or a closing
3220             # bracket (which could be a function which returns a function or a
3221             # list/dictionary that contains a function)...
3222             if is_valid_index(idx - 2) and (
3223                 LL[idx - 2].type == token.NAME or LL[idx - 2].type in CLOSING_BRACKETS
3224             ):
3225                 continue
3226
3227             string_idx = idx
3228
3229             # Skip the string trailer, if one exists.
3230             string_parser = StringParser()
3231             next_idx = string_parser.parse(LL, string_idx)
3232
3233             # Should be followed by a non-empty RPAR...
3234             if (
3235                 is_valid_index(next_idx)
3236                 and LL[next_idx].type == token.RPAR
3237                 and not is_empty_rpar(LL[next_idx])
3238             ):
3239                 # That RPAR should NOT be followed by a '.' symbol.
3240                 if is_valid_index(next_idx + 1) and LL[next_idx + 1].type == token.DOT:
3241                     continue
3242
3243                 return Ok(string_idx)
3244
3245         return TErr("This line has no strings wrapped in parens.")
3246
3247     def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]:
3248         LL = line.leaves
3249
3250         string_parser = StringParser()
3251         rpar_idx = string_parser.parse(LL, string_idx)
3252
3253         for leaf in (LL[string_idx - 1], LL[rpar_idx]):
3254             if line.comments_after(leaf):
3255                 yield TErr(
3256                     "Will not strip parentheses which have comments attached to them."
3257                 )
3258
3259         new_line = line.clone()
3260         new_line.comments = line.comments.copy()
3261
3262         append_leaves(new_line, line, LL[: string_idx - 1])
3263
3264         string_leaf = Leaf(token.STRING, LL[string_idx].value)
3265         LL[string_idx - 1].remove()
3266         replace_child(LL[string_idx], string_leaf)
3267         new_line.append(string_leaf)
3268
3269         append_leaves(
3270             new_line, line, LL[string_idx + 1 : rpar_idx] + LL[rpar_idx + 1 :],
3271         )
3272
3273         LL[rpar_idx].remove()
3274
3275         yield Ok(new_line)
3276
3277
3278 class BaseStringSplitter(StringTransformer):
3279     """
3280     Abstract class for StringTransformers which transform a Line's strings by splitting
3281     them or placing them on their own lines where necessary to avoid going over
3282     the configured line length.
3283
3284     Requirements:
3285         * The target string value is responsible for the line going over the
3286         line length limit. It follows that after all of black's other line
3287         split methods have been exhausted, this line (or one of the resulting
3288         lines after all line splits are performed) would still be over the
3289         line_length limit unless we split this string.
3290             AND
3291         * The target string is NOT a "pointless" string (i.e. a string that has
3292         no parent or siblings).
3293             AND
3294         * The target string is not followed by an inline comment that appears
3295         to be a pragma.
3296             AND
3297         * The target string is not a multiline (i.e. triple-quote) string.
3298     """
3299
3300     @abstractmethod
3301     def do_splitter_match(self, line: Line) -> TMatchResult:
3302         """
3303         BaseStringSplitter asks its clients to override this method instead of
3304         `StringTransformer.do_match(...)`.
3305
3306         Follows the same protocol as `StringTransformer.do_match(...)`.
3307
3308         Refer to `help(StringTransformer.do_match)` for more information.
3309         """
3310
3311     def do_match(self, line: Line) -> TMatchResult:
3312         match_result = self.do_splitter_match(line)
3313         if isinstance(match_result, Err):
3314             return match_result
3315
3316         string_idx = match_result.ok()
3317         vresult = self.__validate(line, string_idx)
3318         if isinstance(vresult, Err):
3319             return vresult
3320
3321         return match_result
3322
3323     def __validate(self, line: Line, string_idx: int) -> TResult[None]:
3324         """
3325         Checks that @line meets all of the requirements listed in this classes'
3326         docstring. Refer to `help(BaseStringSplitter)` for a detailed
3327         description of those requirements.
3328
3329         Returns:
3330             * Ok(None), if ALL of the requirements are met.
3331                 OR
3332             * Err(CannotTransform), if ANY of the requirements are NOT met.
3333         """
3334         LL = line.leaves
3335
3336         string_leaf = LL[string_idx]
3337
3338         max_string_length = self.__get_max_string_length(line, string_idx)
3339         if len(string_leaf.value) <= max_string_length:
3340             return TErr(
3341                 "The string itself is not what is causing this line to be too long."
3342             )
3343
3344         if not string_leaf.parent or [L.type for L in string_leaf.parent.children] == [
3345             token.STRING,
3346             token.NEWLINE,
3347         ]:
3348             return TErr(
3349                 f"This string ({string_leaf.value}) appears to be pointless (i.e. has"
3350                 " no parent)."
3351             )
3352
3353         if id(line.leaves[string_idx]) in line.comments and contains_pragma_comment(
3354             line.comments[id(line.leaves[string_idx])]
3355         ):
3356             return TErr(
3357                 "Line appears to end with an inline pragma comment. Splitting the line"
3358                 " could modify the pragma's behavior."
3359             )
3360
3361         if has_triple_quotes(string_leaf.value):
3362             return TErr("We cannot split multiline strings.")
3363
3364         return Ok(None)
3365
3366     def __get_max_string_length(self, line: Line, string_idx: int) -> int:
3367         """
3368         Calculates the max string length used when attempting to determine
3369         whether or not the target string is responsible for causing the line to
3370         go over the line length limit.
3371
3372         WARNING: This method is tightly coupled to both StringSplitter and
3373         (especially) StringParenWrapper. There is probably a better way to
3374         accomplish what is being done here.
3375
3376         Returns:
3377             max_string_length: such that `line.leaves[string_idx].value >
3378             max_string_length` implies that the target string IS responsible
3379             for causing this line to exceed the line length limit.
3380         """
3381         LL = line.leaves
3382
3383         is_valid_index = is_valid_index_factory(LL)
3384
3385         # We use the shorthand "WMA4" in comments to abbreviate "We must
3386         # account for". When giving examples, we use STRING to mean some/any
3387         # valid string.
3388         #
3389         # Finally, we use the following convenience variables:
3390         #
3391         #   P:  The leaf that is before the target string leaf.
3392         #   N:  The leaf that is after the target string leaf.
3393         #   NN: The leaf that is after N.
3394
3395         # WMA4 the whitespace at the beginning of the line.
3396         offset = line.depth * 4
3397
3398         if is_valid_index(string_idx - 1):
3399             p_idx = string_idx - 1
3400             if (
3401                 LL[string_idx - 1].type == token.LPAR
3402                 and LL[string_idx - 1].value == ""
3403                 and string_idx >= 2
3404             ):
3405                 # If the previous leaf is an empty LPAR placeholder, we should skip it.
3406                 p_idx -= 1
3407
3408             P = LL[p_idx]
3409             if P.type == token.PLUS:
3410                 # WMA4 a space and a '+' character (e.g. `+ STRING`).
3411                 offset += 2
3412
3413             if P.type == token.COMMA:
3414                 # WMA4 a space, a comma, and a closing bracket [e.g. `), STRING`].
3415                 offset += 3
3416
3417             if P.type in [token.COLON, token.EQUAL, token.NAME]:
3418                 # This conditional branch is meant to handle dictionary keys,
3419                 # variable assignments, 'return STRING' statement lines, and
3420                 # 'else STRING' ternary expression lines.
3421
3422                 # WMA4 a single space.
3423                 offset += 1
3424
3425                 # WMA4 the lengths of any leaves that came before that space.
3426                 for leaf in LL[: p_idx + 1]:
3427                     offset += len(str(leaf))
3428
3429         if is_valid_index(string_idx + 1):
3430             N = LL[string_idx + 1]
3431             if N.type == token.RPAR and N.value == "" and len(LL) > string_idx + 2:
3432                 # If the next leaf is an empty RPAR placeholder, we should skip it.
3433                 N = LL[string_idx + 2]
3434
3435             if N.type == token.COMMA:
3436                 # WMA4 a single comma at the end of the string (e.g `STRING,`).
3437                 offset += 1
3438
3439             if is_valid_index(string_idx + 2):
3440                 NN = LL[string_idx + 2]
3441
3442                 if N.type == token.DOT and NN.type == token.NAME:
3443                     # This conditional branch is meant to handle method calls invoked
3444                     # off of a string literal up to and including the LPAR character.
3445
3446                     # WMA4 the '.' character.
3447                     offset += 1
3448
3449                     if (
3450                         is_valid_index(string_idx + 3)
3451                         and LL[string_idx + 3].type == token.LPAR
3452                     ):
3453                         # WMA4 the left parenthesis character.
3454                         offset += 1
3455
3456                     # WMA4 the length of the method's name.
3457                     offset += len(NN.value)
3458
3459         has_comments = False
3460         for comment_leaf in line.comments_after(LL[string_idx]):
3461             if not has_comments:
3462                 has_comments = True
3463                 # WMA4 two spaces before the '#' character.
3464                 offset += 2
3465
3466             # WMA4 the length of the inline comment.
3467             offset += len(comment_leaf.value)
3468
3469         max_string_length = self.line_length - offset
3470         return max_string_length
3471
3472
3473 class StringSplitter(CustomSplitMapMixin, BaseStringSplitter):
3474     """
3475     StringTransformer that splits "atom" strings (i.e. strings which exist on
3476     lines by themselves).
3477
3478     Requirements:
3479         * The line consists ONLY of a single string (with the exception of a
3480         '+' symbol which MAY exist at the start of the line), MAYBE a string
3481         trailer, and MAYBE a trailing comma.
3482             AND
3483         * All of the requirements listed in BaseStringSplitter's docstring.
3484
3485     Transformations:
3486         The string mentioned in the 'Requirements' section is split into as
3487         many substrings as necessary to adhere to the configured line length.
3488
3489         In the final set of substrings, no substring should be smaller than
3490         MIN_SUBSTR_SIZE characters.
3491
3492         The string will ONLY be split on spaces (i.e. each new substring should
3493         start with a space).
3494
3495         If the string is an f-string, it will NOT be split in the middle of an
3496         f-expression (e.g. in f"FooBar: {foo() if x else bar()}", {foo() if x
3497         else bar()} is an f-expression).
3498
3499         If the string that is being split has an associated set of custom split
3500         records and those custom splits will NOT result in any line going over
3501         the configured line length, those custom splits are used. Otherwise the
3502         string is split as late as possible (from left-to-right) while still
3503         adhering to the transformation rules listed above.
3504
3505     Collaborations:
3506         StringSplitter relies on StringMerger to construct the appropriate
3507         CustomSplit objects and add them to the custom split map.
3508     """
3509
3510     MIN_SUBSTR_SIZE = 6
3511     # Matches an "f-expression" (e.g. {var}) that might be found in an f-string.
3512     RE_FEXPR = r"""
3513     (?<!\{)\{
3514         (?:
3515             [^\{\}]
3516             | \{\{
3517             | \}\}
3518         )+?
3519     (?<!\})(?:\}\})*\}(?!\})
3520     """
3521
3522     def do_splitter_match(self, line: Line) -> TMatchResult:
3523         LL = line.leaves
3524
3525         is_valid_index = is_valid_index_factory(LL)
3526
3527         idx = 0
3528
3529         # The first leaf MAY be a '+' symbol...
3530         if is_valid_index(idx) and LL[idx].type == token.PLUS:
3531             idx += 1
3532
3533         # The next/first leaf MAY be an empty LPAR...
3534         if is_valid_index(idx) and is_empty_lpar(LL[idx]):
3535             idx += 1
3536
3537         # The next/first leaf MUST be a string...
3538         if not is_valid_index(idx) or LL[idx].type != token.STRING:
3539             return TErr("Line does not start with a string.")
3540
3541         string_idx = idx
3542
3543         # Skip the string trailer, if one exists.
3544         string_parser = StringParser()
3545         idx = string_parser.parse(LL, string_idx)
3546
3547         # That string MAY be followed by an empty RPAR...
3548         if is_valid_index(idx) and is_empty_rpar(LL[idx]):
3549             idx += 1
3550
3551         # That string / empty RPAR leaf MAY be followed by a comma...
3552         if is_valid_index(idx) and LL[idx].type == token.COMMA:
3553             idx += 1
3554
3555         # But no more leaves are allowed...
3556         if is_valid_index(idx):
3557             return TErr("This line does not end with a string.")
3558
3559         return Ok(string_idx)
3560
3561     def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]:
3562         LL = line.leaves
3563
3564         QUOTE = LL[string_idx].value[-1]
3565
3566         is_valid_index = is_valid_index_factory(LL)
3567         insert_str_child = insert_str_child_factory(LL[string_idx])
3568
3569         prefix = get_string_prefix(LL[string_idx].value)
3570
3571         # We MAY choose to drop the 'f' prefix from substrings that don't
3572         # contain any f-expressions, but ONLY if the original f-string
3573         # containes at least one f-expression. Otherwise, we will alter the AST
3574         # of the program.
3575         drop_pointless_f_prefix = ("f" in prefix) and re.search(
3576             self.RE_FEXPR, LL[string_idx].value, re.VERBOSE
3577         )
3578
3579         first_string_line = True
3580         starts_with_plus = LL[0].type == token.PLUS
3581
3582         def line_needs_plus() -> bool:
3583             return first_string_line and starts_with_plus
3584
3585         def maybe_append_plus(new_line: Line) -> None:
3586             """
3587             Side Effects:
3588                 If @line starts with a plus and this is the first line we are
3589                 constructing, this function appends a PLUS leaf to @new_line
3590                 and replaces the old PLUS leaf in the node structure. Otherwise
3591                 this function does nothing.
3592             """
3593             if line_needs_plus():
3594                 plus_leaf = Leaf(token.PLUS, "+")
3595                 replace_child(LL[0], plus_leaf)
3596                 new_line.append(plus_leaf)
3597
3598         ends_with_comma = (
3599             is_valid_index(string_idx + 1) and LL[string_idx + 1].type == token.COMMA
3600         )
3601
3602         def max_last_string() -> int:
3603             """
3604             Returns:
3605                 The max allowed length of the string value used for the last
3606                 line we will construct.
3607             """
3608             result = self.line_length
3609             result -= line.depth * 4
3610             result -= 1 if ends_with_comma else 0
3611             result -= 2 if line_needs_plus() else 0
3612             return result
3613
3614         # --- Calculate Max Break Index (for string value)
3615         # We start with the line length limit
3616         max_break_idx = self.line_length
3617         # The last index of a string of length N is N-1.
3618         max_break_idx -= 1
3619         # Leading whitespace is not present in the string value (e.g. Leaf.value).
3620         max_break_idx -= line.depth * 4
3621         if max_break_idx < 0:
3622             yield TErr(
3623                 f"Unable to split {LL[string_idx].value} at such high of a line depth:"
3624                 f" {line.depth}"
3625             )
3626             return
3627
3628         # Check if StringMerger registered any custom splits.
3629         custom_splits = self.pop_custom_splits(LL[string_idx].value)
3630         # We use them ONLY if none of them would produce lines that exceed the
3631         # line limit.
3632         use_custom_breakpoints = bool(
3633             custom_splits
3634             and all(csplit.break_idx <= max_break_idx for csplit in custom_splits)
3635         )
3636
3637         # Temporary storage for the remaining chunk of the string line that
3638         # can't fit onto the line currently being constructed.
3639         rest_value = LL[string_idx].value
3640
3641         def more_splits_should_be_made() -> bool:
3642             """
3643             Returns:
3644                 True iff `rest_value` (the remaining string value from the last
3645                 split), should be split again.
3646             """
3647             if use_custom_breakpoints:
3648                 return len(custom_splits) > 1
3649             else:
3650                 return len(rest_value) > max_last_string()
3651
3652         string_line_results: List[Ok[Line]] = []
3653         while more_splits_should_be_made():
3654             if use_custom_breakpoints:
3655                 # Custom User Split (manual)
3656                 csplit = custom_splits.pop(0)
3657                 break_idx = csplit.break_idx
3658             else:
3659                 # Algorithmic Split (automatic)
3660                 max_bidx = max_break_idx - 2 if line_needs_plus() else max_break_idx
3661                 maybe_break_idx = self.__get_break_idx(rest_value, max_bidx)
3662                 if maybe_break_idx is None:
3663                     # If we are unable to algorthmically determine a good split
3664                     # and this string has custom splits registered to it, we
3665                     # fall back to using them--which means we have to start
3666                     # over from the beginning.
3667                     if custom_splits:
3668                         rest_value = LL[string_idx].value
3669                         string_line_results = []
3670                         first_string_line = True
3671                         use_custom_breakpoints = True
3672                         continue
3673
3674                     # Otherwise, we stop splitting here.
3675                     break
3676
3677                 break_idx = maybe_break_idx
3678
3679             # --- Construct `next_value`
3680             next_value = rest_value[:break_idx] + QUOTE
3681             if (
3682                 # Are we allowed to try to drop a pointless 'f' prefix?
3683                 drop_pointless_f_prefix
3684                 # If we are, will we be successful?
3685                 and next_value != self.__normalize_f_string(next_value, prefix)
3686             ):
3687                 # If the current custom split did NOT originally use a prefix,
3688                 # then `csplit.break_idx` will be off by one after removing
3689                 # the 'f' prefix.
3690                 break_idx = (
3691                     break_idx + 1
3692                     if use_custom_breakpoints and not csplit.has_prefix
3693                     else break_idx
3694                 )
3695                 next_value = rest_value[:break_idx] + QUOTE
3696                 next_value = self.__normalize_f_string(next_value, prefix)
3697
3698             # --- Construct `next_leaf`
3699             next_leaf = Leaf(token.STRING, next_value)
3700             insert_str_child(next_leaf)
3701             self.__maybe_normalize_string_quotes(next_leaf)
3702
3703             # --- Construct `next_line`
3704             next_line = line.clone()
3705             maybe_append_plus(next_line)
3706             next_line.append(next_leaf)
3707             string_line_results.append(Ok(next_line))
3708
3709             rest_value = prefix + QUOTE + rest_value[break_idx:]
3710             first_string_line = False
3711
3712         yield from string_line_results
3713
3714         if drop_pointless_f_prefix:
3715             rest_value = self.__normalize_f_string(rest_value, prefix)
3716
3717         rest_leaf = Leaf(token.STRING, rest_value)
3718         insert_str_child(rest_leaf)
3719
3720         # NOTE: I could not find a test case that verifies that the following
3721         # line is actually necessary, but it seems to be. Otherwise we risk
3722         # not normalizing the last substring, right?
3723         self.__maybe_normalize_string_quotes(rest_leaf)
3724
3725         last_line = line.clone()
3726         maybe_append_plus(last_line)
3727
3728         # If there are any leaves to the right of the target string...
3729         if is_valid_index(string_idx + 1):
3730             # We use `temp_value` here to determine how long the last line
3731             # would be if we were to append all the leaves to the right of the
3732             # target string to the last string line.
3733             temp_value = rest_value
3734             for leaf in LL[string_idx + 1 :]:
3735                 temp_value += str(leaf)
3736                 if leaf.type == token.LPAR:
3737                     break
3738
3739             # Try to fit them all on the same line with the last substring...
3740             if (
3741                 len(temp_value) <= max_last_string()
3742                 or LL[string_idx + 1].type == token.COMMA
3743             ):
3744                 last_line.append(rest_leaf)
3745                 append_leaves(last_line, line, LL[string_idx + 1 :])
3746                 yield Ok(last_line)
3747             # Otherwise, place the last substring on one line and everything
3748             # else on a line below that...
3749             else:
3750                 last_line.append(rest_leaf)
3751                 yield Ok(last_line)
3752
3753                 non_string_line = line.clone()
3754                 append_leaves(non_string_line, line, LL[string_idx + 1 :])
3755                 yield Ok(non_string_line)
3756         # Else the target string was the last leaf...
3757         else:
3758             last_line.append(rest_leaf)
3759             last_line.comments = line.comments.copy()
3760             yield Ok(last_line)
3761
3762     def __get_break_idx(self, string: str, max_break_idx: int) -> Optional[int]:
3763         """
3764         This method contains the algorithm that StringSplitter uses to
3765         determine which character to split each string at.
3766
3767         Args:
3768             @string: The substring that we are attempting to split.
3769             @max_break_idx: The ideal break index. We will return this value if it
3770             meets all the necessary conditions. In the likely event that it
3771             doesn't we will try to find the closest index BELOW @max_break_idx
3772             that does. If that fails, we will expand our search by also
3773             considering all valid indices ABOVE @max_break_idx.
3774
3775         Pre-Conditions:
3776             * assert_is_leaf_string(@string)
3777             * 0 <= @max_break_idx < len(@string)
3778
3779         Returns:
3780             break_idx, if an index is able to be found that meets all of the
3781             conditions listed in the 'Transformations' section of this classes'
3782             docstring.
3783                 OR
3784             None, otherwise.
3785         """
3786         is_valid_index = is_valid_index_factory(string)
3787
3788         assert is_valid_index(max_break_idx)
3789         assert_is_leaf_string(string)
3790
3791         _fexpr_slices: Optional[List[Tuple[Index, Index]]] = None
3792
3793         def fexpr_slices() -> Iterator[Tuple[Index, Index]]:
3794             """
3795             Yields:
3796                 All ranges of @string which, if @string were to be split there,
3797                 would result in the splitting of an f-expression (which is NOT
3798                 allowed).
3799             """
3800             nonlocal _fexpr_slices
3801
3802             if _fexpr_slices is None:
3803                 _fexpr_slices = []
3804                 for match in re.finditer(self.RE_FEXPR, string, re.VERBOSE):
3805                     _fexpr_slices.append(match.span())
3806
3807             yield from _fexpr_slices
3808
3809         is_fstring = "f" in get_string_prefix(string)
3810
3811         def breaks_fstring_expression(i: Index) -> bool:
3812             """
3813             Returns:
3814                 True iff returning @i would result in the splitting of an
3815                 f-expression (which is NOT allowed).
3816             """
3817             if not is_fstring:
3818                 return False
3819
3820             for (start, end) in fexpr_slices():
3821                 if start <= i < end:
3822                     return True
3823
3824             return False
3825
3826         def passes_all_checks(i: Index) -> bool:
3827             """
3828             Returns:
3829                 True iff ALL of the conditions listed in the 'Transformations'
3830                 section of this classes' docstring would be be met by returning @i.
3831             """
3832             is_space = string[i] == " "
3833             is_big_enough = (
3834                 len(string[i:]) >= self.MIN_SUBSTR_SIZE
3835                 and len(string[:i]) >= self.MIN_SUBSTR_SIZE
3836             )
3837             return is_space and is_big_enough and not breaks_fstring_expression(i)
3838
3839         # First, we check all indices BELOW @max_break_idx.
3840         break_idx = max_break_idx
3841         while is_valid_index(break_idx - 1) and not passes_all_checks(break_idx):
3842             break_idx -= 1
3843
3844         if not passes_all_checks(break_idx):
3845             # If that fails, we check all indices ABOVE @max_break_idx.
3846             #
3847             # If we are able to find a valid index here, the next line is going
3848             # to be longer than the specified line length, but it's probably
3849             # better than doing nothing at all.
3850             break_idx = max_break_idx + 1
3851             while is_valid_index(break_idx + 1) and not passes_all_checks(break_idx):
3852                 break_idx += 1
3853
3854             if not is_valid_index(break_idx) or not passes_all_checks(break_idx):
3855                 return None
3856
3857         return break_idx
3858
3859     def __maybe_normalize_string_quotes(self, leaf: Leaf) -> None:
3860         if self.normalize_strings:
3861             normalize_string_quotes(leaf)
3862
3863     def __normalize_f_string(self, string: str, prefix: str) -> str:
3864         """
3865         Pre-Conditions:
3866             * assert_is_leaf_string(@string)
3867
3868         Returns:
3869             * If @string is an f-string that contains no f-expressions, we
3870             return a string identical to @string except that the 'f' prefix
3871             has been stripped and all double braces (i.e. '{{' or '}}') have
3872             been normalized (i.e. turned into '{' or '}').
3873                 OR
3874             * Otherwise, we return @string.
3875         """
3876         assert_is_leaf_string(string)
3877
3878         if "f" in prefix and not re.search(self.RE_FEXPR, string, re.VERBOSE):
3879             new_prefix = prefix.replace("f", "")
3880
3881             temp = string[len(prefix) :]
3882             temp = re.sub(r"\{\{", "{", temp)
3883             temp = re.sub(r"\}\}", "}", temp)
3884             new_string = temp
3885
3886             return f"{new_prefix}{new_string}"
3887         else:
3888             return string
3889
3890
3891 class StringParenWrapper(CustomSplitMapMixin, BaseStringSplitter):
3892     """
3893     StringTransformer that splits non-"atom" strings (i.e. strings that do not
3894     exist on lines by themselves).
3895
3896     Requirements:
3897         All of the requirements listed in BaseStringSplitter's docstring in
3898         addition to the requirements listed below:
3899
3900         * The line is a return/yield statement, which returns/yields a string.
3901             OR
3902         * The line is part of a ternary expression (e.g. `x = y if cond else
3903         z`) such that the line starts with `else <string>`, where <string> is
3904         some string.
3905             OR
3906         * The line is an assert statement, which ends with a string.
3907             OR
3908         * The line is an assignment statement (e.g. `x = <string>` or `x +=
3909         <string>`) such that the variable is being assigned the value of some
3910         string.
3911             OR
3912         * The line is a dictionary key assignment where some valid key is being
3913         assigned the value of some string.
3914
3915     Transformations:
3916         The chosen string is wrapped in parentheses and then split at the LPAR.
3917
3918         We then have one line which ends with an LPAR and another line that
3919         starts with the chosen string. The latter line is then split again at
3920         the RPAR. This results in the RPAR (and possibly a trailing comma)
3921         being placed on its own line.
3922
3923         NOTE: If any leaves exist to the right of the chosen string (except
3924         for a trailing comma, which would be placed after the RPAR), those
3925         leaves are placed inside the parentheses.  In effect, the chosen
3926         string is not necessarily being "wrapped" by parentheses. We can,
3927         however, count on the LPAR being placed directly before the chosen
3928         string.
3929
3930         In other words, StringParenWrapper creates "atom" strings. These
3931         can then be split again by StringSplitter, if necessary.
3932
3933     Collaborations:
3934         In the event that a string line split by StringParenWrapper is
3935         changed such that it no longer needs to be given its own line,
3936         StringParenWrapper relies on StringParenStripper to clean up the
3937         parentheses it created.
3938     """
3939
3940     def do_splitter_match(self, line: Line) -> TMatchResult:
3941         LL = line.leaves
3942
3943         string_idx = None
3944         string_idx = string_idx or self._return_match(LL)
3945         string_idx = string_idx or self._else_match(LL)
3946         string_idx = string_idx or self._assert_match(LL)
3947         string_idx = string_idx or self._assign_match(LL)
3948         string_idx = string_idx or self._dict_match(LL)
3949
3950         if string_idx is not None:
3951             string_value = line.leaves[string_idx].value
3952             # If the string has no spaces...
3953             if " " not in string_value:
3954                 # And will still violate the line length limit when split...
3955                 max_string_length = self.line_length - ((line.depth + 1) * 4)
3956                 if len(string_value) > max_string_length:
3957                     # And has no associated custom splits...
3958                     if not self.has_custom_splits(string_value):
3959                         # Then we should NOT put this string on its own line.
3960                         return TErr(
3961                             "We do not wrap long strings in parentheses when the"
3962                             " resultant line would still be over the specified line"
3963                             " length and can't be split further by StringSplitter."
3964                         )
3965             return Ok(string_idx)
3966
3967         return TErr("This line does not contain any non-atomic strings.")
3968
3969     @staticmethod
3970     def _return_match(LL: List[Leaf]) -> Optional[int]:
3971         """
3972         Returns:
3973             string_idx such that @LL[string_idx] is equal to our target (i.e.
3974             matched) string, if this line matches the return/yield statement
3975             requirements listed in the 'Requirements' section of this classes'
3976             docstring.
3977                 OR
3978             None, otherwise.
3979         """
3980         # If this line is apart of a return/yield statement and the first leaf
3981         # contains either the "return" or "yield" keywords...
3982         if parent_type(LL[0]) in [syms.return_stmt, syms.yield_expr] and LL[
3983             0
3984         ].value in ["return", "yield"]:
3985             is_valid_index = is_valid_index_factory(LL)
3986
3987             idx = 2 if is_valid_index(1) and is_empty_par(LL[1]) else 1
3988             # The next visible leaf MUST contain a string...
3989             if is_valid_index(idx) and LL[idx].type == token.STRING:
3990                 return idx
3991
3992         return None
3993
3994     @staticmethod
3995     def _else_match(LL: List[Leaf]) -> Optional[int]:
3996         """
3997         Returns:
3998             string_idx such that @LL[string_idx] is equal to our target (i.e.
3999             matched) string, if this line matches the ternary expression
4000             requirements listed in the 'Requirements' section of this classes'
4001             docstring.
4002                 OR
4003             None, otherwise.
4004         """
4005         # If this line is apart of a ternary expression and the first leaf
4006         # contains the "else" keyword...
4007         if (
4008             parent_type(LL[0]) == syms.test
4009             and LL[0].type == token.NAME
4010             and LL[0].value == "else"
4011         ):
4012             is_valid_index = is_valid_index_factory(LL)
4013
4014             idx = 2 if is_valid_index(1) and is_empty_par(LL[1]) else 1
4015             # The next visible leaf MUST contain a string...
4016             if is_valid_index(idx) and LL[idx].type == token.STRING:
4017                 return idx
4018
4019         return None
4020
4021     @staticmethod
4022     def _assert_match(LL: List[Leaf]) -> Optional[int]:
4023         """
4024         Returns:
4025             string_idx such that @LL[string_idx] is equal to our target (i.e.
4026             matched) string, if this line matches the assert statement
4027             requirements listed in the 'Requirements' section of this classes'
4028             docstring.
4029                 OR
4030             None, otherwise.
4031         """
4032         # If this line is apart of an assert statement and the first leaf
4033         # contains the "assert" keyword...
4034         if parent_type(LL[0]) == syms.assert_stmt and LL[0].value == "assert":
4035             is_valid_index = is_valid_index_factory(LL)
4036
4037             for (i, leaf) in enumerate(LL):
4038                 # We MUST find a comma...
4039                 if leaf.type == token.COMMA:
4040                     idx = i + 2 if is_empty_par(LL[i + 1]) else i + 1
4041
4042                     # That comma MUST be followed by a string...
4043                     if is_valid_index(idx) and LL[idx].type == token.STRING:
4044                         string_idx = idx
4045
4046                         # Skip the string trailer, if one exists.
4047                         string_parser = StringParser()
4048                         idx = string_parser.parse(LL, string_idx)
4049
4050                         # But no more leaves are allowed...
4051                         if not is_valid_index(idx):
4052                             return string_idx
4053
4054         return None
4055
4056     @staticmethod
4057     def _assign_match(LL: List[Leaf]) -> Optional[int]:
4058         """
4059         Returns:
4060             string_idx such that @LL[string_idx] is equal to our target (i.e.
4061             matched) string, if this line matches the assignment statement
4062             requirements listed in the 'Requirements' section of this classes'
4063             docstring.
4064                 OR
4065             None, otherwise.
4066         """
4067         # If this line is apart of an expression statement or is a function
4068         # argument AND the first leaf contains a variable name...
4069         if (
4070             parent_type(LL[0]) in [syms.expr_stmt, syms.argument, syms.power]
4071             and LL[0].type == token.NAME
4072         ):
4073             is_valid_index = is_valid_index_factory(LL)
4074
4075             for (i, leaf) in enumerate(LL):
4076                 # We MUST find either an '=' or '+=' symbol...
4077                 if leaf.type in [token.EQUAL, token.PLUSEQUAL]:
4078                     idx = i + 2 if is_empty_par(LL[i + 1]) else i + 1
4079
4080                     # That symbol MUST be followed by a string...
4081                     if is_valid_index(idx) and LL[idx].type == token.STRING:
4082                         string_idx = idx
4083
4084                         # Skip the string trailer, if one exists.
4085                         string_parser = StringParser()
4086                         idx = string_parser.parse(LL, string_idx)
4087
4088                         # The next leaf MAY be a comma iff this line is apart
4089                         # of a function argument...
4090                         if (
4091                             parent_type(LL[0]) == syms.argument
4092                             and is_valid_index(idx)
4093                             and LL[idx].type == token.COMMA
4094                         ):
4095                             idx += 1
4096
4097                         # But no more leaves are allowed...
4098                         if not is_valid_index(idx):
4099                             return string_idx
4100
4101         return None
4102
4103     @staticmethod
4104     def _dict_match(LL: List[Leaf]) -> Optional[int]:
4105         """
4106         Returns:
4107             string_idx such that @LL[string_idx] is equal to our target (i.e.
4108             matched) string, if this line matches the dictionary key assignment
4109             statement requirements listed in the 'Requirements' section of this
4110             classes' docstring.
4111                 OR
4112             None, otherwise.
4113         """
4114         # If this line is apart of a dictionary key assignment...
4115         if syms.dictsetmaker in [parent_type(LL[0]), parent_type(LL[0].parent)]:
4116             is_valid_index = is_valid_index_factory(LL)
4117
4118             for (i, leaf) in enumerate(LL):
4119                 # We MUST find a colon...
4120                 if leaf.type == token.COLON:
4121                     idx = i + 2 if is_empty_par(LL[i + 1]) else i + 1
4122
4123                     # That colon MUST be followed by a string...
4124                     if is_valid_index(idx) and LL[idx].type == token.STRING:
4125                         string_idx = idx
4126
4127                         # Skip the string trailer, if one exists.
4128                         string_parser = StringParser()
4129                         idx = string_parser.parse(LL, string_idx)
4130
4131                         # That string MAY be followed by a comma...
4132                         if is_valid_index(idx) and LL[idx].type == token.COMMA:
4133                             idx += 1
4134
4135                         # But no more leaves are allowed...
4136                         if not is_valid_index(idx):
4137                             return string_idx
4138
4139         return None
4140
4141     def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]:
4142         LL = line.leaves
4143
4144         is_valid_index = is_valid_index_factory(LL)
4145         insert_str_child = insert_str_child_factory(LL[string_idx])
4146
4147         comma_idx = len(LL) - 1
4148         ends_with_comma = False
4149         if LL[comma_idx].type == token.COMMA:
4150             ends_with_comma = True
4151
4152         leaves_to_steal_comments_from = [LL[string_idx]]
4153         if ends_with_comma:
4154             leaves_to_steal_comments_from.append(LL[comma_idx])
4155
4156         # --- First Line
4157         first_line = line.clone()
4158         left_leaves = LL[:string_idx]
4159
4160         # We have to remember to account for (possibly invisible) LPAR and RPAR
4161         # leaves that already wrapped the target string. If these leaves do
4162         # exist, we will replace them with our own LPAR and RPAR leaves.
4163         old_parens_exist = False
4164         if left_leaves and left_leaves[-1].type == token.LPAR:
4165             old_parens_exist = True
4166             leaves_to_steal_comments_from.append(left_leaves[-1])
4167             left_leaves.pop()
4168
4169         append_leaves(first_line, line, left_leaves)
4170
4171         lpar_leaf = Leaf(token.LPAR, "(")
4172         if old_parens_exist:
4173             replace_child(LL[string_idx - 1], lpar_leaf)
4174         else:
4175             insert_str_child(lpar_leaf)
4176         first_line.append(lpar_leaf)
4177
4178         # We throw inline comments that were originally to the right of the
4179         # target string to the top line. They will now be shown to the right of
4180         # the LPAR.
4181         for leaf in leaves_to_steal_comments_from:
4182             for comment_leaf in line.comments_after(leaf):
4183                 first_line.append(comment_leaf, preformatted=True)
4184
4185         yield Ok(first_line)
4186
4187         # --- Middle (String) Line
4188         # We only need to yield one (possibly too long) string line, since the
4189         # `StringSplitter` will break it down further if necessary.
4190         string_value = LL[string_idx].value
4191         string_line = Line(
4192             depth=line.depth + 1,
4193             inside_brackets=True,
4194             should_explode=line.should_explode,
4195         )
4196         string_leaf = Leaf(token.STRING, string_value)
4197         insert_str_child(string_leaf)
4198         string_line.append(string_leaf)
4199
4200         old_rpar_leaf = None
4201         if is_valid_index(string_idx + 1):
4202             right_leaves = LL[string_idx + 1 :]
4203             if ends_with_comma:
4204                 right_leaves.pop()
4205
4206             if old_parens_exist:
4207                 assert (
4208                     right_leaves and right_leaves[-1].type == token.RPAR
4209                 ), "Apparently, old parentheses do NOT exist?!"
4210                 old_rpar_leaf = right_leaves.pop()
4211
4212             append_leaves(string_line, line, right_leaves)
4213
4214         yield Ok(string_line)
4215
4216         # --- Last Line
4217         last_line = line.clone()
4218         last_line.bracket_tracker = first_line.bracket_tracker
4219
4220         new_rpar_leaf = Leaf(token.RPAR, ")")
4221         if old_rpar_leaf is not None:
4222             replace_child(old_rpar_leaf, new_rpar_leaf)
4223         else:
4224             insert_str_child(new_rpar_leaf)
4225         last_line.append(new_rpar_leaf)
4226
4227         # If the target string ended with a comma, we place this comma to the
4228         # right of the RPAR on the last line.
4229         if ends_with_comma:
4230             comma_leaf = Leaf(token.COMMA, ",")
4231             replace_child(LL[comma_idx], comma_leaf)
4232             last_line.append(comma_leaf)
4233
4234         yield Ok(last_line)
4235
4236
4237 class StringParser:
4238     """
4239     A state machine that aids in parsing a string's "trailer", which can be
4240     either non-existant, an old-style formatting sequence (e.g. `% varX` or `%
4241     (varX, varY)`), or a method-call / attribute access (e.g. `.format(varX,
4242     varY)`).
4243
4244     NOTE: A new StringParser object MUST be instantiated for each string
4245     trailer we need to parse.
4246
4247     Examples:
4248         We shall assume that `line` equals the `Line` object that corresponds
4249         to the following line of python code:
4250         ```
4251         x = "Some {}.".format("String") + some_other_string
4252         ```
4253
4254         Furthermore, we will assume that `string_idx` is some index such that:
4255         ```
4256         assert line.leaves[string_idx].value == "Some {}."
4257         ```
4258
4259         The following code snippet then holds:
4260         ```
4261         string_parser = StringParser()
4262         idx = string_parser.parse(line.leaves, string_idx)
4263         assert line.leaves[idx].type == token.PLUS
4264         ```
4265     """
4266
4267     DEFAULT_TOKEN = -1
4268
4269     # String Parser States
4270     START = 1
4271     DOT = 2
4272     NAME = 3
4273     PERCENT = 4
4274     SINGLE_FMT_ARG = 5
4275     LPAR = 6
4276     RPAR = 7
4277     DONE = 8
4278
4279     # Lookup Table for Next State
4280     _goto: Dict[Tuple[ParserState, NodeType], ParserState] = {
4281         # A string trailer may start with '.' OR '%'.
4282         (START, token.DOT): DOT,
4283         (START, token.PERCENT): PERCENT,
4284         (START, DEFAULT_TOKEN): DONE,
4285         # A '.' MUST be followed by an attribute or method name.
4286         (DOT, token.NAME): NAME,
4287         # A method name MUST be followed by an '(', whereas an attribute name
4288         # is the last symbol in the string trailer.
4289         (NAME, token.LPAR): LPAR,
4290         (NAME, DEFAULT_TOKEN): DONE,
4291         # A '%' symbol can be followed by an '(' or a single argument (e.g. a
4292         # string or variable name).
4293         (PERCENT, token.LPAR): LPAR,
4294         (PERCENT, DEFAULT_TOKEN): SINGLE_FMT_ARG,
4295         # If a '%' symbol is followed by a single argument, that argument is
4296         # the last leaf in the string trailer.
4297         (SINGLE_FMT_ARG, DEFAULT_TOKEN): DONE,
4298         # If present, a ')' symbol is the last symbol in a string trailer.
4299         # (NOTE: LPARS and nested RPARS are not included in this lookup table,
4300         # since they are treated as a special case by the parsing logic in this
4301         # classes' implementation.)
4302         (RPAR, DEFAULT_TOKEN): DONE,
4303     }
4304
4305     def __init__(self) -> None:
4306         self._state = self.START
4307         self._unmatched_lpars = 0
4308
4309     def parse(self, leaves: List[Leaf], string_idx: int) -> int:
4310         """
4311         Pre-conditions:
4312             * @leaves[@string_idx].type == token.STRING
4313
4314         Returns:
4315             The index directly after the last leaf which is apart of the string
4316             trailer, if a "trailer" exists.
4317                 OR
4318             @string_idx + 1, if no string "trailer" exists.
4319         """
4320         assert leaves[string_idx].type == token.STRING
4321
4322         idx = string_idx + 1
4323         while idx < len(leaves) and self._next_state(leaves[idx]):
4324             idx += 1
4325         return idx
4326
4327     def _next_state(self, leaf: Leaf) -> bool:
4328         """
4329         Pre-conditions:
4330             * On the first call to this function, @leaf MUST be the leaf that
4331             was directly after the string leaf in question (e.g. if our target
4332             string is `line.leaves[i]` then the first call to this method must
4333             be `line.leaves[i + 1]`).
4334             * On the next call to this function, the leaf paramater passed in
4335             MUST be the leaf directly following @leaf.
4336
4337         Returns:
4338             True iff @leaf is apart of the string's trailer.
4339         """
4340         # We ignore empty LPAR or RPAR leaves.
4341         if is_empty_par(leaf):
4342             return True
4343
4344         next_token = leaf.type
4345         if next_token == token.LPAR:
4346             self._unmatched_lpars += 1
4347
4348         current_state = self._state
4349
4350         # The LPAR parser state is a special case. We will return True until we
4351         # find the matching RPAR token.
4352         if current_state == self.LPAR:
4353             if next_token == token.RPAR:
4354                 self._unmatched_lpars -= 1
4355                 if self._unmatched_lpars == 0:
4356                     self._state = self.RPAR
4357         # Otherwise, we use a lookup table to determine the next state.
4358         else:
4359             # If the lookup table matches the current state to the next
4360             # token, we use the lookup table.
4361             if (current_state, next_token) in self._goto:
4362                 self._state = self._goto[current_state, next_token]
4363             else:
4364                 # Otherwise, we check if a the current state was assigned a
4365                 # default.
4366                 if (current_state, self.DEFAULT_TOKEN) in self._goto:
4367                     self._state = self._goto[current_state, self.DEFAULT_TOKEN]
4368                 # If no default has been assigned, then this parser has a logic
4369                 # error.
4370                 else:
4371                     raise RuntimeError(f"{self.__class__.__name__} LOGIC ERROR!")
4372
4373             if self._state == self.DONE:
4374                 return False
4375
4376         return True
4377
4378
4379 def TErr(err_msg: str) -> Err[CannotTransform]:
4380     """(T)ransform Err
4381
4382     Convenience function used when working with the TResult type.
4383     """
4384     cant_transform = CannotTransform(err_msg)
4385     return Err(cant_transform)
4386
4387
4388 def contains_pragma_comment(comment_list: List[Leaf]) -> bool:
4389     """
4390     Returns:
4391         True iff one of the comments in @comment_list is a pragma used by one
4392         of the more common static analysis tools for python (e.g. mypy, flake8,
4393         pylint).
4394     """
4395     for comment in comment_list:
4396         if comment.value.startswith(("# type:", "# noqa", "# pylint:")):
4397             return True
4398
4399     return False
4400
4401
4402 def insert_str_child_factory(string_leaf: Leaf) -> Callable[[LN], None]:
4403     """
4404     Factory for a convenience function that is used to orphan @string_leaf
4405     and then insert multiple new leaves into the same part of the node
4406     structure that @string_leaf had originally occupied.
4407
4408     Examples:
4409         Let `string_leaf = Leaf(token.STRING, '"foo"')` and `N =
4410         string_leaf.parent`. Assume the node `N` has the following
4411         original structure:
4412
4413         Node(
4414             expr_stmt, [
4415                 Leaf(NAME, 'x'),
4416                 Leaf(EQUAL, '='),
4417                 Leaf(STRING, '"foo"'),
4418             ]
4419         )
4420
4421         We then run the code snippet shown below.
4422         ```
4423         insert_str_child = insert_str_child_factory(string_leaf)
4424
4425         lpar = Leaf(token.LPAR, '(')
4426         insert_str_child(lpar)
4427
4428         bar = Leaf(token.STRING, '"bar"')
4429         insert_str_child(bar)
4430
4431         rpar = Leaf(token.RPAR, ')')
4432         insert_str_child(rpar)
4433         ```
4434
4435         After which point, it follows that `string_leaf.parent is None` and
4436         the node `N` now has the following structure:
4437
4438         Node(
4439             expr_stmt, [
4440                 Leaf(NAME, 'x'),
4441                 Leaf(EQUAL, '='),
4442                 Leaf(LPAR, '('),
4443                 Leaf(STRING, '"bar"'),
4444                 Leaf(RPAR, ')'),
4445             ]
4446         )
4447     """
4448     string_parent = string_leaf.parent
4449     string_child_idx = string_leaf.remove()
4450
4451     def insert_str_child(child: LN) -> None:
4452         nonlocal string_child_idx
4453
4454         assert string_parent is not None
4455         assert string_child_idx is not None
4456
4457         string_parent.insert_child(string_child_idx, child)
4458         string_child_idx += 1
4459
4460     return insert_str_child
4461
4462
4463 def has_triple_quotes(string: str) -> bool:
4464     """
4465     Returns:
4466         True iff @string starts with three quotation characters.
4467     """
4468     raw_string = string.lstrip(STRING_PREFIX_CHARS)
4469     return raw_string[:3] in {'"""', "'''"}
4470
4471
4472 def parent_type(node: Optional[LN]) -> Optional[NodeType]:
4473     """
4474     Returns:
4475         @node.parent.type, if @node is not None and has a parent.
4476             OR
4477         None, otherwise.
4478     """
4479     if node is None or node.parent is None:
4480         return None
4481
4482     return node.parent.type
4483
4484
4485 def is_empty_par(leaf: Leaf) -> bool:
4486     return is_empty_lpar(leaf) or is_empty_rpar(leaf)
4487
4488
4489 def is_empty_lpar(leaf: Leaf) -> bool:
4490     return leaf.type == token.LPAR and leaf.value == ""
4491
4492
4493 def is_empty_rpar(leaf: Leaf) -> bool:
4494     return leaf.type == token.RPAR and leaf.value == ""
4495
4496
4497 def is_valid_index_factory(seq: Sequence[Any]) -> Callable[[int], bool]:
4498     """
4499     Examples:
4500         ```
4501         my_list = [1, 2, 3]
4502
4503         is_valid_index = is_valid_index_factory(my_list)
4504
4505         assert is_valid_index(0)
4506         assert is_valid_index(2)
4507
4508         assert not is_valid_index(3)
4509         assert not is_valid_index(-1)
4510         ```
4511     """
4512
4513     def is_valid_index(idx: int) -> bool:
4514         """
4515         Returns:
4516             True iff @idx is positive AND seq[@idx] does NOT raise an
4517             IndexError.
4518         """
4519         return 0 <= idx < len(seq)
4520
4521     return is_valid_index
4522
4523
4524 def line_to_string(line: Line) -> str:
4525     """Returns the string representation of @line.
4526
4527     WARNING: This is known to be computationally expensive.
4528     """
4529     return str(line).strip("\n")
4530
4531
4532 def append_leaves(new_line: Line, old_line: Line, leaves: List[Leaf]) -> None:
4533     """
4534     Append leaves (taken from @old_line) to @new_line, making sure to fix the
4535     underlying Node structure where appropriate.
4536
4537     All of the leaves in @leaves are duplicated. The duplicates are then
4538     appended to @new_line and used to replace their originals in the underlying
4539     Node structure. Any comments attatched to the old leaves are reattached to
4540     the new leaves.
4541
4542     Pre-conditions:
4543         set(@leaves) is a subset of set(@old_line.leaves).
4544     """
4545     for old_leaf in leaves:
4546         assert old_leaf in old_line.leaves
4547
4548         new_leaf = Leaf(old_leaf.type, old_leaf.value)
4549         replace_child(old_leaf, new_leaf)
4550         new_line.append(new_leaf)
4551
4552         for comment_leaf in old_line.comments_after(old_leaf):
4553             new_line.append(comment_leaf, preformatted=True)
4554
4555
4556 def replace_child(old_child: LN, new_child: LN) -> None:
4557     """
4558     Side Effects:
4559         * If @old_child.parent is set, replace @old_child with @new_child in
4560         @old_child's underlying Node structure.
4561             OR
4562         * Otherwise, this function does nothing.
4563     """
4564     parent = old_child.parent
4565     if not parent:
4566         return
4567
4568     child_idx = old_child.remove()
4569     if child_idx is not None:
4570         parent.insert_child(child_idx, new_child)
4571
4572
4573 def get_string_prefix(string: str) -> str:
4574     """
4575     Pre-conditions:
4576         * assert_is_leaf_string(@string)
4577
4578     Returns:
4579         @string's prefix (e.g. '', 'r', 'f', or 'rf').
4580     """
4581     assert_is_leaf_string(string)
4582
4583     prefix = ""
4584     prefix_idx = 0
4585     while string[prefix_idx] in STRING_PREFIX_CHARS:
4586         prefix += string[prefix_idx].lower()
4587         prefix_idx += 1
4588
4589     return prefix
4590
4591
4592 def assert_is_leaf_string(string: str) -> None:
4593     """
4594     Checks the pre-condition that @string has the format that you would expect
4595     of `leaf.value` where `leaf` is some Leaf such that `leaf.type ==
4596     token.STRING`. A more precise description of the pre-conditions that are
4597     checked are listed below.
4598
4599     Pre-conditions:
4600         * @string starts with either ', ", <prefix>', or <prefix>" where
4601         `set(<prefix>)` is some subset of `set(STRING_PREFIX_CHARS)`.
4602         * @string ends with a quote character (' or ").
4603
4604     Raises:
4605         AssertionError(...) if the pre-conditions listed above are not
4606         satisfied.
4607     """
4608     dquote_idx = string.find('"')
4609     squote_idx = string.find("'")
4610     if -1 in [dquote_idx, squote_idx]:
4611         quote_idx = max(dquote_idx, squote_idx)
4612     else:
4613         quote_idx = min(squote_idx, dquote_idx)
4614
4615     assert (
4616         0 <= quote_idx < len(string) - 1
4617     ), f"{string!r} is missing a starting quote character (' or \")."
4618     assert string[-1] in (
4619         "'",
4620         '"',
4621     ), f"{string!r} is missing an ending quote character (' or \")."
4622     assert set(string[:quote_idx]).issubset(
4623         set(STRING_PREFIX_CHARS)
4624     ), f"{set(string[:quote_idx])} is NOT a subset of {set(STRING_PREFIX_CHARS)}."
4625
4626
4627 def left_hand_split(line: Line, _features: Collection[Feature] = ()) -> Iterator[Line]:
4628     """Split line into many lines, starting with the first matching bracket pair.
4629
4630     Note: this usually looks weird, only use this for function definitions.
4631     Prefer RHS otherwise.  This is why this function is not symmetrical with
4632     :func:`right_hand_split` which also handles optional parentheses.
4633     """
4634     tail_leaves: List[Leaf] = []
4635     body_leaves: List[Leaf] = []
4636     head_leaves: List[Leaf] = []
4637     current_leaves = head_leaves
4638     matching_bracket: Optional[Leaf] = None
4639     for leaf in line.leaves:
4640         if (
4641             current_leaves is body_leaves
4642             and leaf.type in CLOSING_BRACKETS
4643             and leaf.opening_bracket is matching_bracket
4644         ):
4645             current_leaves = tail_leaves if body_leaves else head_leaves
4646         current_leaves.append(leaf)
4647         if current_leaves is head_leaves:
4648             if leaf.type in OPENING_BRACKETS:
4649                 matching_bracket = leaf
4650                 current_leaves = body_leaves
4651     if not matching_bracket:
4652         raise CannotSplit("No brackets found")
4653
4654     head = bracket_split_build_line(head_leaves, line, matching_bracket)
4655     body = bracket_split_build_line(body_leaves, line, matching_bracket, is_body=True)
4656     tail = bracket_split_build_line(tail_leaves, line, matching_bracket)
4657     bracket_split_succeeded_or_raise(head, body, tail)
4658     for result in (head, body, tail):
4659         if result:
4660             yield result
4661
4662
4663 def right_hand_split(
4664     line: Line,
4665     line_length: int,
4666     features: Collection[Feature] = (),
4667     omit: Collection[LeafID] = (),
4668 ) -> Iterator[Line]:
4669     """Split line into many lines, starting with the last matching bracket pair.
4670
4671     If the split was by optional parentheses, attempt splitting without them, too.
4672     `omit` is a collection of closing bracket IDs that shouldn't be considered for
4673     this split.
4674
4675     Note: running this function modifies `bracket_depth` on the leaves of `line`.
4676     """
4677     tail_leaves: List[Leaf] = []
4678     body_leaves: List[Leaf] = []
4679     head_leaves: List[Leaf] = []
4680     current_leaves = tail_leaves
4681     opening_bracket: Optional[Leaf] = None
4682     closing_bracket: Optional[Leaf] = None
4683     for leaf in reversed(line.leaves):
4684         if current_leaves is body_leaves:
4685             if leaf is opening_bracket:
4686                 current_leaves = head_leaves if body_leaves else tail_leaves
4687         current_leaves.append(leaf)
4688         if current_leaves is tail_leaves:
4689             if leaf.type in CLOSING_BRACKETS and id(leaf) not in omit:
4690                 opening_bracket = leaf.opening_bracket
4691                 closing_bracket = leaf
4692                 current_leaves = body_leaves
4693     if not (opening_bracket and closing_bracket and head_leaves):
4694         # If there is no opening or closing_bracket that means the split failed and
4695         # all content is in the tail.  Otherwise, if `head_leaves` are empty, it means
4696         # the matching `opening_bracket` wasn't available on `line` anymore.
4697         raise CannotSplit("No brackets found")
4698
4699     tail_leaves.reverse()
4700     body_leaves.reverse()
4701     head_leaves.reverse()
4702     head = bracket_split_build_line(head_leaves, line, opening_bracket)
4703     body = bracket_split_build_line(body_leaves, line, opening_bracket, is_body=True)
4704     tail = bracket_split_build_line(tail_leaves, line, opening_bracket)
4705     bracket_split_succeeded_or_raise(head, body, tail)
4706     if (
4707         # the body shouldn't be exploded
4708         not body.should_explode
4709         # the opening bracket is an optional paren
4710         and opening_bracket.type == token.LPAR
4711         and not opening_bracket.value
4712         # the closing bracket is an optional paren
4713         and closing_bracket.type == token.RPAR
4714         and not closing_bracket.value
4715         # it's not an import (optional parens are the only thing we can split on
4716         # in this case; attempting a split without them is a waste of time)
4717         and not line.is_import
4718         # there are no standalone comments in the body
4719         and not body.contains_standalone_comments(0)
4720         # and we can actually remove the parens
4721         and can_omit_invisible_parens(body, line_length)
4722     ):
4723         omit = {id(closing_bracket), *omit}
4724         try:
4725             yield from right_hand_split(line, line_length, features=features, omit=omit)
4726             return
4727
4728         except CannotSplit:
4729             if not (
4730                 can_be_split(body)
4731                 or is_line_short_enough(body, line_length=line_length)
4732             ):
4733                 raise CannotSplit(
4734                     "Splitting failed, body is still too long and can't be split."
4735                 )
4736
4737             elif head.contains_multiline_strings() or tail.contains_multiline_strings():
4738                 raise CannotSplit(
4739                     "The current optional pair of parentheses is bound to fail to"
4740                     " satisfy the splitting algorithm because the head or the tail"
4741                     " contains multiline strings which by definition never fit one"
4742                     " line."
4743                 )
4744
4745     ensure_visible(opening_bracket)
4746     ensure_visible(closing_bracket)
4747     for result in (head, body, tail):
4748         if result:
4749             yield result
4750
4751
4752 def bracket_split_succeeded_or_raise(head: Line, body: Line, tail: Line) -> None:
4753     """Raise :exc:`CannotSplit` if the last left- or right-hand split failed.
4754
4755     Do nothing otherwise.
4756
4757     A left- or right-hand split is based on a pair of brackets. Content before
4758     (and including) the opening bracket is left on one line, content inside the
4759     brackets is put on a separate line, and finally content starting with and
4760     following the closing bracket is put on a separate line.
4761
4762     Those are called `head`, `body`, and `tail`, respectively. If the split
4763     produced the same line (all content in `head`) or ended up with an empty `body`
4764     and the `tail` is just the closing bracket, then it's considered failed.
4765     """
4766     tail_len = len(str(tail).strip())
4767     if not body:
4768         if tail_len == 0:
4769             raise CannotSplit("Splitting brackets produced the same line")
4770
4771         elif tail_len < 3:
4772             raise CannotSplit(
4773                 f"Splitting brackets on an empty body to save {tail_len} characters is"
4774                 " not worth it"
4775             )
4776
4777
4778 def bracket_split_build_line(
4779     leaves: List[Leaf], original: Line, opening_bracket: Leaf, *, is_body: bool = False
4780 ) -> Line:
4781     """Return a new line with given `leaves` and respective comments from `original`.
4782
4783     If `is_body` is True, the result line is one-indented inside brackets and as such
4784     has its first leaf's prefix normalized and a trailing comma added when expected.
4785     """
4786     result = Line(depth=original.depth)
4787     if is_body:
4788         result.inside_brackets = True
4789         result.depth += 1
4790         if leaves:
4791             # Since body is a new indent level, remove spurious leading whitespace.
4792             normalize_prefix(leaves[0], inside_brackets=True)
4793             # Ensure a trailing comma for imports and standalone function arguments, but
4794             # be careful not to add one after any comments or within type annotations.
4795             no_commas = (
4796                 original.is_def
4797                 and opening_bracket.value == "("
4798                 and not any(l.type == token.COMMA for l in leaves)
4799             )
4800
4801             if original.is_import or no_commas:
4802                 for i in range(len(leaves) - 1, -1, -1):
4803                     if leaves[i].type == STANDALONE_COMMENT:
4804                         continue
4805
4806                     if leaves[i].type != token.COMMA:
4807                         leaves.insert(i + 1, Leaf(token.COMMA, ","))
4808                     break
4809
4810     # Populate the line
4811     for leaf in leaves:
4812         result.append(leaf, preformatted=True)
4813         for comment_after in original.comments_after(leaf):
4814             result.append(comment_after, preformatted=True)
4815     if is_body:
4816         result.should_explode = should_explode(result, opening_bracket)
4817     return result
4818
4819
4820 def dont_increase_indentation(split_func: Transformer) -> Transformer:
4821     """Normalize prefix of the first leaf in every line returned by `split_func`.
4822
4823     This is a decorator over relevant split functions.
4824     """
4825
4826     @wraps(split_func)
4827     def split_wrapper(line: Line, features: Collection[Feature] = ()) -> Iterator[Line]:
4828         for l in split_func(line, features):
4829             normalize_prefix(l.leaves[0], inside_brackets=True)
4830             yield l
4831
4832     return split_wrapper
4833
4834
4835 @dont_increase_indentation
4836 def delimiter_split(line: Line, features: Collection[Feature] = ()) -> Iterator[Line]:
4837     """Split according to delimiters of the highest priority.
4838
4839     If the appropriate Features are given, the split will add trailing commas
4840     also in function signatures and calls that contain `*` and `**`.
4841     """
4842     try:
4843         last_leaf = line.leaves[-1]
4844     except IndexError:
4845         raise CannotSplit("Line empty")
4846
4847     bt = line.bracket_tracker
4848     try:
4849         delimiter_priority = bt.max_delimiter_priority(exclude={id(last_leaf)})
4850     except ValueError:
4851         raise CannotSplit("No delimiters found")
4852
4853     if delimiter_priority == DOT_PRIORITY:
4854         if bt.delimiter_count_with_priority(delimiter_priority) == 1:
4855             raise CannotSplit("Splitting a single attribute from its owner looks wrong")
4856
4857     current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
4858     lowest_depth = sys.maxsize
4859     trailing_comma_safe = True
4860
4861     def append_to_line(leaf: Leaf) -> Iterator[Line]:
4862         """Append `leaf` to current line or to new line if appending impossible."""
4863         nonlocal current_line
4864         try:
4865             current_line.append_safe(leaf, preformatted=True)
4866         except ValueError:
4867             yield current_line
4868
4869             current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
4870             current_line.append(leaf)
4871
4872     for leaf in line.leaves:
4873         yield from append_to_line(leaf)
4874
4875         for comment_after in line.comments_after(leaf):
4876             yield from append_to_line(comment_after)
4877
4878         lowest_depth = min(lowest_depth, leaf.bracket_depth)
4879         if leaf.bracket_depth == lowest_depth:
4880             if is_vararg(leaf, within={syms.typedargslist}):
4881                 trailing_comma_safe = (
4882                     trailing_comma_safe and Feature.TRAILING_COMMA_IN_DEF in features
4883                 )
4884             elif is_vararg(leaf, within={syms.arglist, syms.argument}):
4885                 trailing_comma_safe = (
4886                     trailing_comma_safe and Feature.TRAILING_COMMA_IN_CALL in features
4887                 )
4888
4889         leaf_priority = bt.delimiters.get(id(leaf))
4890         if leaf_priority == delimiter_priority:
4891             yield current_line
4892
4893             current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
4894     if current_line:
4895         if (
4896             trailing_comma_safe
4897             and delimiter_priority == COMMA_PRIORITY
4898             and current_line.leaves[-1].type != token.COMMA
4899             and current_line.leaves[-1].type != STANDALONE_COMMENT
4900         ):
4901             current_line.append(Leaf(token.COMMA, ","))
4902         yield current_line
4903
4904
4905 @dont_increase_indentation
4906 def standalone_comment_split(
4907     line: Line, features: Collection[Feature] = ()
4908 ) -> Iterator[Line]:
4909     """Split standalone comments from the rest of the line."""
4910     if not line.contains_standalone_comments(0):
4911         raise CannotSplit("Line does not have any standalone comments")
4912
4913     current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
4914
4915     def append_to_line(leaf: Leaf) -> Iterator[Line]:
4916         """Append `leaf` to current line or to new line if appending impossible."""
4917         nonlocal current_line
4918         try:
4919             current_line.append_safe(leaf, preformatted=True)
4920         except ValueError:
4921             yield current_line
4922
4923             current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
4924             current_line.append(leaf)
4925
4926     for leaf in line.leaves:
4927         yield from append_to_line(leaf)
4928
4929         for comment_after in line.comments_after(leaf):
4930             yield from append_to_line(comment_after)
4931
4932     if current_line:
4933         yield current_line
4934
4935
4936 def is_import(leaf: Leaf) -> bool:
4937     """Return True if the given leaf starts an import statement."""
4938     p = leaf.parent
4939     t = leaf.type
4940     v = leaf.value
4941     return bool(
4942         t == token.NAME
4943         and (
4944             (v == "import" and p and p.type == syms.import_name)
4945             or (v == "from" and p and p.type == syms.import_from)
4946         )
4947     )
4948
4949
4950 def is_type_comment(leaf: Leaf, suffix: str = "") -> bool:
4951     """Return True if the given leaf is a special comment.
4952     Only returns true for type comments for now."""
4953     t = leaf.type
4954     v = leaf.value
4955     return t in {token.COMMENT, STANDALONE_COMMENT} and v.startswith("# type:" + suffix)
4956
4957
4958 def normalize_prefix(leaf: Leaf, *, inside_brackets: bool) -> None:
4959     """Leave existing extra newlines if not `inside_brackets`. Remove everything
4960     else.
4961
4962     Note: don't use backslashes for formatting or you'll lose your voting rights.
4963     """
4964     if not inside_brackets:
4965         spl = leaf.prefix.split("#")
4966         if "\\" not in spl[0]:
4967             nl_count = spl[-1].count("\n")
4968             if len(spl) > 1:
4969                 nl_count -= 1
4970             leaf.prefix = "\n" * nl_count
4971             return
4972
4973     leaf.prefix = ""
4974
4975
4976 def normalize_string_prefix(leaf: Leaf, remove_u_prefix: bool = False) -> None:
4977     """Make all string prefixes lowercase.
4978
4979     If remove_u_prefix is given, also removes any u prefix from the string.
4980
4981     Note: Mutates its argument.
4982     """
4983     match = re.match(r"^([" + STRING_PREFIX_CHARS + r"]*)(.*)$", leaf.value, re.DOTALL)
4984     assert match is not None, f"failed to match string {leaf.value!r}"
4985     orig_prefix = match.group(1)
4986     new_prefix = orig_prefix.replace("F", "f").replace("B", "b").replace("U", "u")
4987     if remove_u_prefix:
4988         new_prefix = new_prefix.replace("u", "")
4989     leaf.value = f"{new_prefix}{match.group(2)}"
4990
4991
4992 def normalize_string_quotes(leaf: Leaf) -> None:
4993     """Prefer double quotes but only if it doesn't cause more escaping.
4994
4995     Adds or removes backslashes as appropriate. Doesn't parse and fix
4996     strings nested in f-strings (yet).
4997
4998     Note: Mutates its argument.
4999     """
5000     value = leaf.value.lstrip(STRING_PREFIX_CHARS)
5001     if value[:3] == '"""':
5002         return
5003
5004     elif value[:3] == "'''":
5005         orig_quote = "'''"
5006         new_quote = '"""'
5007     elif value[0] == '"':
5008         orig_quote = '"'
5009         new_quote = "'"
5010     else:
5011         orig_quote = "'"
5012         new_quote = '"'
5013     first_quote_pos = leaf.value.find(orig_quote)
5014     if first_quote_pos == -1:
5015         return  # There's an internal error
5016
5017     prefix = leaf.value[:first_quote_pos]
5018     unescaped_new_quote = re.compile(rf"(([^\\]|^)(\\\\)*){new_quote}")
5019     escaped_new_quote = re.compile(rf"([^\\]|^)\\((?:\\\\)*){new_quote}")
5020     escaped_orig_quote = re.compile(rf"([^\\]|^)\\((?:\\\\)*){orig_quote}")
5021     body = leaf.value[first_quote_pos + len(orig_quote) : -len(orig_quote)]
5022     if "r" in prefix.casefold():
5023         if unescaped_new_quote.search(body):
5024             # There's at least one unescaped new_quote in this raw string
5025             # so converting is impossible
5026             return
5027
5028         # Do not introduce or remove backslashes in raw strings
5029         new_body = body
5030     else:
5031         # remove unnecessary escapes
5032         new_body = sub_twice(escaped_new_quote, rf"\1\2{new_quote}", body)
5033         if body != new_body:
5034             # Consider the string without unnecessary escapes as the original
5035             body = new_body
5036             leaf.value = f"{prefix}{orig_quote}{body}{orig_quote}"
5037         new_body = sub_twice(escaped_orig_quote, rf"\1\2{orig_quote}", new_body)
5038         new_body = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_body)
5039     if "f" in prefix.casefold():
5040         matches = re.findall(
5041             r"""
5042             (?:[^{]|^)\{  # start of the string or a non-{ followed by a single {
5043                 ([^{].*?)  # contents of the brackets except if begins with {{
5044             \}(?:[^}]|$)  # A } followed by end of the string or a non-}
5045             """,
5046             new_body,
5047             re.VERBOSE,
5048         )
5049         for m in matches:
5050             if "\\" in str(m):
5051                 # Do not introduce backslashes in interpolated expressions
5052                 return
5053
5054     if new_quote == '"""' and new_body[-1:] == '"':
5055         # edge case:
5056         new_body = new_body[:-1] + '\\"'
5057     orig_escape_count = body.count("\\")
5058     new_escape_count = new_body.count("\\")
5059     if new_escape_count > orig_escape_count:
5060         return  # Do not introduce more escaping
5061
5062     if new_escape_count == orig_escape_count and orig_quote == '"':
5063         return  # Prefer double quotes
5064
5065     leaf.value = f"{prefix}{new_quote}{new_body}{new_quote}"
5066
5067
5068 def normalize_numeric_literal(leaf: Leaf) -> None:
5069     """Normalizes numeric (float, int, and complex) literals.
5070
5071     All letters used in the representation are normalized to lowercase (except
5072     in Python 2 long literals).
5073     """
5074     text = leaf.value.lower()
5075     if text.startswith(("0o", "0b")):
5076         # Leave octal and binary literals alone.
5077         pass
5078     elif text.startswith("0x"):
5079         # Change hex literals to upper case.
5080         before, after = text[:2], text[2:]
5081         text = f"{before}{after.upper()}"
5082     elif "e" in text:
5083         before, after = text.split("e")
5084         sign = ""
5085         if after.startswith("-"):
5086             after = after[1:]
5087             sign = "-"
5088         elif after.startswith("+"):
5089             after = after[1:]
5090         before = format_float_or_int_string(before)
5091         text = f"{before}e{sign}{after}"
5092     elif text.endswith(("j", "l")):
5093         number = text[:-1]
5094         suffix = text[-1]
5095         # Capitalize in "2L" because "l" looks too similar to "1".
5096         if suffix == "l":
5097             suffix = "L"
5098         text = f"{format_float_or_int_string(number)}{suffix}"
5099     else:
5100         text = format_float_or_int_string(text)
5101     leaf.value = text
5102
5103
5104 def format_float_or_int_string(text: str) -> str:
5105     """Formats a float string like "1.0"."""
5106     if "." not in text:
5107         return text
5108
5109     before, after = text.split(".")
5110     return f"{before or 0}.{after or 0}"
5111
5112
5113 def normalize_invisible_parens(node: Node, parens_after: Set[str]) -> None:
5114     """Make existing optional parentheses invisible or create new ones.
5115
5116     `parens_after` is a set of string leaf values immediately after which parens
5117     should be put.
5118
5119     Standardizes on visible parentheses for single-element tuples, and keeps
5120     existing visible parentheses for other tuples and generator expressions.
5121     """
5122     for pc in list_comments(node.prefix, is_endmarker=False):
5123         if pc.value in FMT_OFF:
5124             # This `node` has a prefix with `# fmt: off`, don't mess with parens.
5125             return
5126     check_lpar = False
5127     for index, child in enumerate(list(node.children)):
5128         # Fixes a bug where invisible parens are not properly stripped from
5129         # assignment statements that contain type annotations.
5130         if isinstance(child, Node) and child.type == syms.annassign:
5131             normalize_invisible_parens(child, parens_after=parens_after)
5132
5133         # Add parentheses around long tuple unpacking in assignments.
5134         if (
5135             index == 0
5136             and isinstance(child, Node)
5137             and child.type == syms.testlist_star_expr
5138         ):
5139             check_lpar = True
5140
5141         if check_lpar:
5142             if is_walrus_assignment(child):
5143                 continue
5144
5145             if child.type == syms.atom:
5146                 if maybe_make_parens_invisible_in_atom(child, parent=node):
5147                     wrap_in_parentheses(node, child, visible=False)
5148             elif is_one_tuple(child):
5149                 wrap_in_parentheses(node, child, visible=True)
5150             elif node.type == syms.import_from:
5151                 # "import from" nodes store parentheses directly as part of
5152                 # the statement
5153                 if child.type == token.LPAR:
5154                     # make parentheses invisible
5155                     child.value = ""  # type: ignore
5156                     node.children[-1].value = ""  # type: ignore
5157                 elif child.type != token.STAR:
5158                     # insert invisible parentheses
5159                     node.insert_child(index, Leaf(token.LPAR, ""))
5160                     node.append_child(Leaf(token.RPAR, ""))
5161                 break
5162
5163             elif not (isinstance(child, Leaf) and is_multiline_string(child)):
5164                 wrap_in_parentheses(node, child, visible=False)
5165
5166         check_lpar = isinstance(child, Leaf) and child.value in parens_after
5167
5168
5169 def normalize_fmt_off(node: Node) -> None:
5170     """Convert content between `# fmt: off`/`# fmt: on` into standalone comments."""
5171     try_again = True
5172     while try_again:
5173         try_again = convert_one_fmt_off_pair(node)
5174
5175
5176 def convert_one_fmt_off_pair(node: Node) -> bool:
5177     """Convert content of a single `# fmt: off`/`# fmt: on` into a standalone comment.
5178
5179     Returns True if a pair was converted.
5180     """
5181     for leaf in node.leaves():
5182         previous_consumed = 0
5183         for comment in list_comments(leaf.prefix, is_endmarker=False):
5184             if comment.value in FMT_OFF:
5185                 # We only want standalone comments. If there's no previous leaf or
5186                 # the previous leaf is indentation, it's a standalone comment in
5187                 # disguise.
5188                 if comment.type != STANDALONE_COMMENT:
5189                     prev = preceding_leaf(leaf)
5190                     if prev and prev.type not in WHITESPACE:
5191                         continue
5192
5193                 ignored_nodes = list(generate_ignored_nodes(leaf))
5194                 if not ignored_nodes:
5195                     continue
5196
5197                 first = ignored_nodes[0]  # Can be a container node with the `leaf`.
5198                 parent = first.parent
5199                 prefix = first.prefix
5200                 first.prefix = prefix[comment.consumed :]
5201                 hidden_value = (
5202                     comment.value + "\n" + "".join(str(n) for n in ignored_nodes)
5203                 )
5204                 if hidden_value.endswith("\n"):
5205                     # That happens when one of the `ignored_nodes` ended with a NEWLINE
5206                     # leaf (possibly followed by a DEDENT).
5207                     hidden_value = hidden_value[:-1]
5208                 first_idx: Optional[int] = None
5209                 for ignored in ignored_nodes:
5210                     index = ignored.remove()
5211                     if first_idx is None:
5212                         first_idx = index
5213                 assert parent is not None, "INTERNAL ERROR: fmt: on/off handling (1)"
5214                 assert first_idx is not None, "INTERNAL ERROR: fmt: on/off handling (2)"
5215                 parent.insert_child(
5216                     first_idx,
5217                     Leaf(
5218                         STANDALONE_COMMENT,
5219                         hidden_value,
5220                         prefix=prefix[:previous_consumed] + "\n" * comment.newlines,
5221                     ),
5222                 )
5223                 return True
5224
5225             previous_consumed = comment.consumed
5226
5227     return False
5228
5229
5230 def generate_ignored_nodes(leaf: Leaf) -> Iterator[LN]:
5231     """Starting from the container of `leaf`, generate all leaves until `# fmt: on`.
5232
5233     Stops at the end of the block.
5234     """
5235     container: Optional[LN] = container_of(leaf)
5236     while container is not None and container.type != token.ENDMARKER:
5237         if is_fmt_on(container):
5238             return
5239
5240         # fix for fmt: on in children
5241         if contains_fmt_on_at_column(container, leaf.column):
5242             for child in container.children:
5243                 if contains_fmt_on_at_column(child, leaf.column):
5244                     return
5245                 yield child
5246         else:
5247             yield container
5248             container = container.next_sibling
5249
5250
5251 def is_fmt_on(container: LN) -> bool:
5252     """Determine whether formatting is switched on within a container.
5253     Determined by whether the last `# fmt:` comment is `on` or `off`.
5254     """
5255     fmt_on = False
5256     for comment in list_comments(container.prefix, is_endmarker=False):
5257         if comment.value in FMT_ON:
5258             fmt_on = True
5259         elif comment.value in FMT_OFF:
5260             fmt_on = False
5261     return fmt_on
5262
5263
5264 def contains_fmt_on_at_column(container: LN, column: int) -> bool:
5265     """Determine if children at a given column have formatting switched on."""
5266     for child in container.children:
5267         if (
5268             isinstance(child, Node)
5269             and first_leaf_column(child) == column
5270             or isinstance(child, Leaf)
5271             and child.column == column
5272         ):
5273             if is_fmt_on(child):
5274                 return True
5275
5276     return False
5277
5278
5279 def first_leaf_column(node: Node) -> Optional[int]:
5280     """Returns the column of the first leaf child of a node."""
5281     for child in node.children:
5282         if isinstance(child, Leaf):
5283             return child.column
5284     return None
5285
5286
5287 def maybe_make_parens_invisible_in_atom(node: LN, parent: LN) -> bool:
5288     """If it's safe, make the parens in the atom `node` invisible, recursively.
5289     Additionally, remove repeated, adjacent invisible parens from the atom `node`
5290     as they are redundant.
5291
5292     Returns whether the node should itself be wrapped in invisible parentheses.
5293
5294     """
5295     if (
5296         node.type != syms.atom
5297         or is_empty_tuple(node)
5298         or is_one_tuple(node)
5299         or (is_yield(node) and parent.type != syms.expr_stmt)
5300         or max_delimiter_priority_in_atom(node) >= COMMA_PRIORITY
5301     ):
5302         return False
5303
5304     first = node.children[0]
5305     last = node.children[-1]
5306     if first.type == token.LPAR and last.type == token.RPAR:
5307         middle = node.children[1]
5308         # make parentheses invisible
5309         first.value = ""  # type: ignore
5310         last.value = ""  # type: ignore
5311         maybe_make_parens_invisible_in_atom(middle, parent=parent)
5312
5313         if is_atom_with_invisible_parens(middle):
5314             # Strip the invisible parens from `middle` by replacing
5315             # it with the child in-between the invisible parens
5316             middle.replace(middle.children[1])
5317
5318         return False
5319
5320     return True
5321
5322
5323 def is_atom_with_invisible_parens(node: LN) -> bool:
5324     """Given a `LN`, determines whether it's an atom `node` with invisible
5325     parens. Useful in dedupe-ing and normalizing parens.
5326     """
5327     if isinstance(node, Leaf) or node.type != syms.atom:
5328         return False
5329
5330     first, last = node.children[0], node.children[-1]
5331     return (
5332         isinstance(first, Leaf)
5333         and first.type == token.LPAR
5334         and first.value == ""
5335         and isinstance(last, Leaf)
5336         and last.type == token.RPAR
5337         and last.value == ""
5338     )
5339
5340
5341 def is_empty_tuple(node: LN) -> bool:
5342     """Return True if `node` holds an empty tuple."""
5343     return (
5344         node.type == syms.atom
5345         and len(node.children) == 2
5346         and node.children[0].type == token.LPAR
5347         and node.children[1].type == token.RPAR
5348     )
5349
5350
5351 def unwrap_singleton_parenthesis(node: LN) -> Optional[LN]:
5352     """Returns `wrapped` if `node` is of the shape ( wrapped ).
5353
5354     Parenthesis can be optional. Returns None otherwise"""
5355     if len(node.children) != 3:
5356         return None
5357
5358     lpar, wrapped, rpar = node.children
5359     if not (lpar.type == token.LPAR and rpar.type == token.RPAR):
5360         return None
5361
5362     return wrapped
5363
5364
5365 def wrap_in_parentheses(parent: Node, child: LN, *, visible: bool = True) -> None:
5366     """Wrap `child` in parentheses.
5367
5368     This replaces `child` with an atom holding the parentheses and the old
5369     child.  That requires moving the prefix.
5370
5371     If `visible` is False, the leaves will be valueless (and thus invisible).
5372     """
5373     lpar = Leaf(token.LPAR, "(" if visible else "")
5374     rpar = Leaf(token.RPAR, ")" if visible else "")
5375     prefix = child.prefix
5376     child.prefix = ""
5377     index = child.remove() or 0
5378     new_child = Node(syms.atom, [lpar, child, rpar])
5379     new_child.prefix = prefix
5380     parent.insert_child(index, new_child)
5381
5382
5383 def is_one_tuple(node: LN) -> bool:
5384     """Return True if `node` holds a tuple with one element, with or without parens."""
5385     if node.type == syms.atom:
5386         gexp = unwrap_singleton_parenthesis(node)
5387         if gexp is None or gexp.type != syms.testlist_gexp:
5388             return False
5389
5390         return len(gexp.children) == 2 and gexp.children[1].type == token.COMMA
5391
5392     return (
5393         node.type in IMPLICIT_TUPLE
5394         and len(node.children) == 2
5395         and node.children[1].type == token.COMMA
5396     )
5397
5398
5399 def is_walrus_assignment(node: LN) -> bool:
5400     """Return True iff `node` is of the shape ( test := test )"""
5401     inner = unwrap_singleton_parenthesis(node)
5402     return inner is not None and inner.type == syms.namedexpr_test
5403
5404
5405 def is_yield(node: LN) -> bool:
5406     """Return True if `node` holds a `yield` or `yield from` expression."""
5407     if node.type == syms.yield_expr:
5408         return True
5409
5410     if node.type == token.NAME and node.value == "yield":  # type: ignore
5411         return True
5412
5413     if node.type != syms.atom:
5414         return False
5415
5416     if len(node.children) != 3:
5417         return False
5418
5419     lpar, expr, rpar = node.children
5420     if lpar.type == token.LPAR and rpar.type == token.RPAR:
5421         return is_yield(expr)
5422
5423     return False
5424
5425
5426 def is_vararg(leaf: Leaf, within: Set[NodeType]) -> bool:
5427     """Return True if `leaf` is a star or double star in a vararg or kwarg.
5428
5429     If `within` includes VARARGS_PARENTS, this applies to function signatures.
5430     If `within` includes UNPACKING_PARENTS, it applies to right hand-side
5431     extended iterable unpacking (PEP 3132) and additional unpacking
5432     generalizations (PEP 448).
5433     """
5434     if leaf.type not in VARARGS_SPECIALS or not leaf.parent:
5435         return False
5436
5437     p = leaf.parent
5438     if p.type == syms.star_expr:
5439         # Star expressions are also used as assignment targets in extended
5440         # iterable unpacking (PEP 3132).  See what its parent is instead.
5441         if not p.parent:
5442             return False
5443
5444         p = p.parent
5445
5446     return p.type in within
5447
5448
5449 def is_multiline_string(leaf: Leaf) -> bool:
5450     """Return True if `leaf` is a multiline string that actually spans many lines."""
5451     return has_triple_quotes(leaf.value) and "\n" in leaf.value
5452
5453
5454 def is_stub_suite(node: Node) -> bool:
5455     """Return True if `node` is a suite with a stub body."""
5456     if (
5457         len(node.children) != 4
5458         or node.children[0].type != token.NEWLINE
5459         or node.children[1].type != token.INDENT
5460         or node.children[3].type != token.DEDENT
5461     ):
5462         return False
5463
5464     return is_stub_body(node.children[2])
5465
5466
5467 def is_stub_body(node: LN) -> bool:
5468     """Return True if `node` is a simple statement containing an ellipsis."""
5469     if not isinstance(node, Node) or node.type != syms.simple_stmt:
5470         return False
5471
5472     if len(node.children) != 2:
5473         return False
5474
5475     child = node.children[0]
5476     return (
5477         child.type == syms.atom
5478         and len(child.children) == 3
5479         and all(leaf == Leaf(token.DOT, ".") for leaf in child.children)
5480     )
5481
5482
5483 def max_delimiter_priority_in_atom(node: LN) -> Priority:
5484     """Return maximum delimiter priority inside `node`.
5485
5486     This is specific to atoms with contents contained in a pair of parentheses.
5487     If `node` isn't an atom or there are no enclosing parentheses, returns 0.
5488     """
5489     if node.type != syms.atom:
5490         return 0
5491
5492     first = node.children[0]
5493     last = node.children[-1]
5494     if not (first.type == token.LPAR and last.type == token.RPAR):
5495         return 0
5496
5497     bt = BracketTracker()
5498     for c in node.children[1:-1]:
5499         if isinstance(c, Leaf):
5500             bt.mark(c)
5501         else:
5502             for leaf in c.leaves():
5503                 bt.mark(leaf)
5504     try:
5505         return bt.max_delimiter_priority()
5506
5507     except ValueError:
5508         return 0
5509
5510
5511 def ensure_visible(leaf: Leaf) -> None:
5512     """Make sure parentheses are visible.
5513
5514     They could be invisible as part of some statements (see
5515     :func:`normalize_invisible_parens` and :func:`visit_import_from`).
5516     """
5517     if leaf.type == token.LPAR:
5518         leaf.value = "("
5519     elif leaf.type == token.RPAR:
5520         leaf.value = ")"
5521
5522
5523 def should_explode(line: Line, opening_bracket: Leaf) -> bool:
5524     """Should `line` immediately be split with `delimiter_split()` after RHS?"""
5525
5526     if not (
5527         opening_bracket.parent
5528         and opening_bracket.parent.type in {syms.atom, syms.import_from}
5529         and opening_bracket.value in "[{("
5530     ):
5531         return False
5532
5533     try:
5534         last_leaf = line.leaves[-1]
5535         exclude = {id(last_leaf)} if last_leaf.type == token.COMMA else set()
5536         max_priority = line.bracket_tracker.max_delimiter_priority(exclude=exclude)
5537     except (IndexError, ValueError):
5538         return False
5539
5540     return max_priority == COMMA_PRIORITY
5541
5542
5543 def get_features_used(node: Node) -> Set[Feature]:
5544     """Return a set of (relatively) new Python features used in this file.
5545
5546     Currently looking for:
5547     - f-strings;
5548     - underscores in numeric literals;
5549     - trailing commas after * or ** in function signatures and calls;
5550     - positional only arguments in function signatures and lambdas;
5551     """
5552     features: Set[Feature] = set()
5553     for n in node.pre_order():
5554         if n.type == token.STRING:
5555             value_head = n.value[:2]  # type: ignore
5556             if value_head in {'f"', 'F"', "f'", "F'", "rf", "fr", "RF", "FR"}:
5557                 features.add(Feature.F_STRINGS)
5558
5559         elif n.type == token.NUMBER:
5560             if "_" in n.value:  # type: ignore
5561                 features.add(Feature.NUMERIC_UNDERSCORES)
5562
5563         elif n.type == token.SLASH:
5564             if n.parent and n.parent.type in {syms.typedargslist, syms.arglist}:
5565                 features.add(Feature.POS_ONLY_ARGUMENTS)
5566
5567         elif n.type == token.COLONEQUAL:
5568             features.add(Feature.ASSIGNMENT_EXPRESSIONS)
5569
5570         elif (
5571             n.type in {syms.typedargslist, syms.arglist}
5572             and n.children
5573             and n.children[-1].type == token.COMMA
5574         ):
5575             if n.type == syms.typedargslist:
5576                 feature = Feature.TRAILING_COMMA_IN_DEF
5577             else:
5578                 feature = Feature.TRAILING_COMMA_IN_CALL
5579
5580             for ch in n.children:
5581                 if ch.type in STARS:
5582                     features.add(feature)
5583
5584                 if ch.type == syms.argument:
5585                     for argch in ch.children:
5586                         if argch.type in STARS:
5587                             features.add(feature)
5588
5589     return features
5590
5591
5592 def detect_target_versions(node: Node) -> Set[TargetVersion]:
5593     """Detect the version to target based on the nodes used."""
5594     features = get_features_used(node)
5595     return {
5596         version for version in TargetVersion if features <= VERSION_TO_FEATURES[version]
5597     }
5598
5599
5600 def generate_trailers_to_omit(line: Line, line_length: int) -> Iterator[Set[LeafID]]:
5601     """Generate sets of closing bracket IDs that should be omitted in a RHS.
5602
5603     Brackets can be omitted if the entire trailer up to and including
5604     a preceding closing bracket fits in one line.
5605
5606     Yielded sets are cumulative (contain results of previous yields, too).  First
5607     set is empty.
5608     """
5609
5610     omit: Set[LeafID] = set()
5611     yield omit
5612
5613     length = 4 * line.depth
5614     opening_bracket: Optional[Leaf] = None
5615     closing_bracket: Optional[Leaf] = None
5616     inner_brackets: Set[LeafID] = set()
5617     for index, leaf, leaf_length in enumerate_with_length(line, reversed=True):
5618         length += leaf_length
5619         if length > line_length:
5620             break
5621
5622         has_inline_comment = leaf_length > len(leaf.value) + len(leaf.prefix)
5623         if leaf.type == STANDALONE_COMMENT or has_inline_comment:
5624             break
5625
5626         if opening_bracket:
5627             if leaf is opening_bracket:
5628                 opening_bracket = None
5629             elif leaf.type in CLOSING_BRACKETS:
5630                 inner_brackets.add(id(leaf))
5631         elif leaf.type in CLOSING_BRACKETS:
5632             if index > 0 and line.leaves[index - 1].type in OPENING_BRACKETS:
5633                 # Empty brackets would fail a split so treat them as "inner"
5634                 # brackets (e.g. only add them to the `omit` set if another
5635                 # pair of brackets was good enough.
5636                 inner_brackets.add(id(leaf))
5637                 continue
5638
5639             if closing_bracket:
5640                 omit.add(id(closing_bracket))
5641                 omit.update(inner_brackets)
5642                 inner_brackets.clear()
5643                 yield omit
5644
5645             if leaf.value:
5646                 opening_bracket = leaf.opening_bracket
5647                 closing_bracket = leaf
5648
5649
5650 def get_future_imports(node: Node) -> Set[str]:
5651     """Return a set of __future__ imports in the file."""
5652     imports: Set[str] = set()
5653
5654     def get_imports_from_children(children: List[LN]) -> Generator[str, None, None]:
5655         for child in children:
5656             if isinstance(child, Leaf):
5657                 if child.type == token.NAME:
5658                     yield child.value
5659
5660             elif child.type == syms.import_as_name:
5661                 orig_name = child.children[0]
5662                 assert isinstance(orig_name, Leaf), "Invalid syntax parsing imports"
5663                 assert orig_name.type == token.NAME, "Invalid syntax parsing imports"
5664                 yield orig_name.value
5665
5666             elif child.type == syms.import_as_names:
5667                 yield from get_imports_from_children(child.children)
5668
5669             else:
5670                 raise AssertionError("Invalid syntax parsing imports")
5671
5672     for child in node.children:
5673         if child.type != syms.simple_stmt:
5674             break
5675
5676         first_child = child.children[0]
5677         if isinstance(first_child, Leaf):
5678             # Continue looking if we see a docstring; otherwise stop.
5679             if (
5680                 len(child.children) == 2
5681                 and first_child.type == token.STRING
5682                 and child.children[1].type == token.NEWLINE
5683             ):
5684                 continue
5685
5686             break
5687
5688         elif first_child.type == syms.import_from:
5689             module_name = first_child.children[1]
5690             if not isinstance(module_name, Leaf) or module_name.value != "__future__":
5691                 break
5692
5693             imports |= set(get_imports_from_children(first_child.children[3:]))
5694         else:
5695             break
5696
5697     return imports
5698
5699
5700 @lru_cache()
5701 def get_gitignore(root: Path) -> PathSpec:
5702     """ Return a PathSpec matching gitignore content if present."""
5703     gitignore = root / ".gitignore"
5704     lines: List[str] = []
5705     if gitignore.is_file():
5706         with gitignore.open() as gf:
5707             lines = gf.readlines()
5708     return PathSpec.from_lines("gitwildmatch", lines)
5709
5710
5711 def gen_python_files_in_dir(
5712     path: Path,
5713     root: Path,
5714     include: Pattern[str],
5715     exclude: Pattern[str],
5716     report: "Report",
5717     gitignore: PathSpec,
5718 ) -> Iterator[Path]:
5719     """Generate all files under `path` whose paths are not excluded by the
5720     `exclude` regex, but are included by the `include` regex.
5721
5722     Symbolic links pointing outside of the `root` directory are ignored.
5723
5724     `report` is where output about exclusions goes.
5725     """
5726     assert root.is_absolute(), f"INTERNAL ERROR: `root` must be absolute but is {root}"
5727     for child in path.iterdir():
5728         # First ignore files matching .gitignore
5729         if gitignore.match_file(child.as_posix()):
5730             report.path_ignored(child, "matches the .gitignore file content")
5731             continue
5732
5733         # Then ignore with `exclude` option.
5734         try:
5735             normalized_path = "/" + child.resolve().relative_to(root).as_posix()
5736         except OSError as e:
5737             report.path_ignored(child, f"cannot be read because {e}")
5738             continue
5739
5740         except ValueError:
5741             if child.is_symlink():
5742                 report.path_ignored(
5743                     child, f"is a symbolic link that points outside {root}"
5744                 )
5745                 continue
5746
5747             raise
5748
5749         if child.is_dir():
5750             normalized_path += "/"
5751
5752         exclude_match = exclude.search(normalized_path)
5753         if exclude_match and exclude_match.group(0):
5754             report.path_ignored(child, "matches the --exclude regular expression")
5755             continue
5756
5757         if child.is_dir():
5758             yield from gen_python_files_in_dir(
5759                 child, root, include, exclude, report, gitignore
5760             )
5761
5762         elif child.is_file():
5763             include_match = include.search(normalized_path)
5764             if include_match:
5765                 yield child
5766
5767
5768 @lru_cache()
5769 def find_project_root(srcs: Iterable[str]) -> Path:
5770     """Return a directory containing .git, .hg, or pyproject.toml.
5771
5772     That directory can be one of the directories passed in `srcs` or their
5773     common parent.
5774
5775     If no directory in the tree contains a marker that would specify it's the
5776     project root, the root of the file system is returned.
5777     """
5778     if not srcs:
5779         return Path("/").resolve()
5780
5781     common_base = min(Path(src).resolve() for src in srcs)
5782     if common_base.is_dir():
5783         # Append a fake file so `parents` below returns `common_base_dir`, too.
5784         common_base /= "fake-file"
5785     for directory in common_base.parents:
5786         if (directory / ".git").exists():
5787             return directory
5788
5789         if (directory / ".hg").is_dir():
5790             return directory
5791
5792         if (directory / "pyproject.toml").is_file():
5793             return directory
5794
5795     return directory
5796
5797
5798 @dataclass
5799 class Report:
5800     """Provides a reformatting counter. Can be rendered with `str(report)`."""
5801
5802     check: bool = False
5803     diff: bool = False
5804     quiet: bool = False
5805     verbose: bool = False
5806     change_count: int = 0
5807     same_count: int = 0
5808     failure_count: int = 0
5809
5810     def done(self, src: Path, changed: Changed) -> None:
5811         """Increment the counter for successful reformatting. Write out a message."""
5812         if changed is Changed.YES:
5813             reformatted = "would reformat" if self.check or self.diff else "reformatted"
5814             if self.verbose or not self.quiet:
5815                 out(f"{reformatted} {src}")
5816             self.change_count += 1
5817         else:
5818             if self.verbose:
5819                 if changed is Changed.NO:
5820                     msg = f"{src} already well formatted, good job."
5821                 else:
5822                     msg = f"{src} wasn't modified on disk since last run."
5823                 out(msg, bold=False)
5824             self.same_count += 1
5825
5826     def failed(self, src: Path, message: str) -> None:
5827         """Increment the counter for failed reformatting. Write out a message."""
5828         err(f"error: cannot format {src}: {message}")
5829         self.failure_count += 1
5830
5831     def path_ignored(self, path: Path, message: str) -> None:
5832         if self.verbose:
5833             out(f"{path} ignored: {message}", bold=False)
5834
5835     @property
5836     def return_code(self) -> int:
5837         """Return the exit code that the app should use.
5838
5839         This considers the current state of changed files and failures:
5840         - if there were any failures, return 123;
5841         - if any files were changed and --check is being used, return 1;
5842         - otherwise return 0.
5843         """
5844         # According to http://tldp.org/LDP/abs/html/exitcodes.html starting with
5845         # 126 we have special return codes reserved by the shell.
5846         if self.failure_count:
5847             return 123
5848
5849         elif self.change_count and self.check:
5850             return 1
5851
5852         return 0
5853
5854     def __str__(self) -> str:
5855         """Render a color report of the current state.
5856
5857         Use `click.unstyle` to remove colors.
5858         """
5859         if self.check or self.diff:
5860             reformatted = "would be reformatted"
5861             unchanged = "would be left unchanged"
5862             failed = "would fail to reformat"
5863         else:
5864             reformatted = "reformatted"
5865             unchanged = "left unchanged"
5866             failed = "failed to reformat"
5867         report = []
5868         if self.change_count:
5869             s = "s" if self.change_count > 1 else ""
5870             report.append(
5871                 click.style(f"{self.change_count} file{s} {reformatted}", bold=True)
5872             )
5873         if self.same_count:
5874             s = "s" if self.same_count > 1 else ""
5875             report.append(f"{self.same_count} file{s} {unchanged}")
5876         if self.failure_count:
5877             s = "s" if self.failure_count > 1 else ""
5878             report.append(
5879                 click.style(f"{self.failure_count} file{s} {failed}", fg="red")
5880             )
5881         return ", ".join(report) + "."
5882
5883
5884 def parse_ast(src: str) -> Union[ast.AST, ast3.AST, ast27.AST]:
5885     filename = "<unknown>"
5886     if sys.version_info >= (3, 8):
5887         # TODO: support Python 4+ ;)
5888         for minor_version in range(sys.version_info[1], 4, -1):
5889             try:
5890                 return ast.parse(src, filename, feature_version=(3, minor_version))
5891             except SyntaxError:
5892                 continue
5893     else:
5894         for feature_version in (7, 6):
5895             try:
5896                 return ast3.parse(src, filename, feature_version=feature_version)
5897             except SyntaxError:
5898                 continue
5899
5900     return ast27.parse(src)
5901
5902
5903 def _fixup_ast_constants(
5904     node: Union[ast.AST, ast3.AST, ast27.AST]
5905 ) -> Union[ast.AST, ast3.AST, ast27.AST]:
5906     """Map ast nodes deprecated in 3.8 to Constant."""
5907     if isinstance(node, (ast.Str, ast3.Str, ast27.Str, ast.Bytes, ast3.Bytes)):
5908         return ast.Constant(value=node.s)
5909
5910     if isinstance(node, (ast.Num, ast3.Num, ast27.Num)):
5911         return ast.Constant(value=node.n)
5912
5913     if isinstance(node, (ast.NameConstant, ast3.NameConstant)):
5914         return ast.Constant(value=node.value)
5915
5916     return node
5917
5918
5919 def _stringify_ast(
5920     node: Union[ast.AST, ast3.AST, ast27.AST], depth: int = 0
5921 ) -> Iterator[str]:
5922     """Simple visitor generating strings to compare ASTs by content."""
5923
5924     node = _fixup_ast_constants(node)
5925
5926     yield f"{'  ' * depth}{node.__class__.__name__}("
5927
5928     for field in sorted(node._fields):  # noqa: F402
5929         # TypeIgnore has only one field 'lineno' which breaks this comparison
5930         type_ignore_classes = (ast3.TypeIgnore, ast27.TypeIgnore)
5931         if sys.version_info >= (3, 8):
5932             type_ignore_classes += (ast.TypeIgnore,)
5933         if isinstance(node, type_ignore_classes):
5934             break
5935
5936         try:
5937             value = getattr(node, field)
5938         except AttributeError:
5939             continue
5940
5941         yield f"{'  ' * (depth+1)}{field}="
5942
5943         if isinstance(value, list):
5944             for item in value:
5945                 # Ignore nested tuples within del statements, because we may insert
5946                 # parentheses and they change the AST.
5947                 if (
5948                     field == "targets"
5949                     and isinstance(node, (ast.Delete, ast3.Delete, ast27.Delete))
5950                     and isinstance(item, (ast.Tuple, ast3.Tuple, ast27.Tuple))
5951                 ):
5952                     for item in item.elts:
5953                         yield from _stringify_ast(item, depth + 2)
5954
5955                 elif isinstance(item, (ast.AST, ast3.AST, ast27.AST)):
5956                     yield from _stringify_ast(item, depth + 2)
5957
5958         elif isinstance(value, (ast.AST, ast3.AST, ast27.AST)):
5959             yield from _stringify_ast(value, depth + 2)
5960
5961         else:
5962             # Constant strings may be indented across newlines, if they are
5963             # docstrings; fold spaces after newlines when comparing
5964             if (
5965                 isinstance(node, ast.Constant)
5966                 and field == "value"
5967                 and isinstance(value, str)
5968             ):
5969                 normalized = re.sub(r"\n[ \t]+", "\n ", value)
5970             else:
5971                 normalized = value
5972             yield f"{'  ' * (depth+2)}{normalized!r},  # {value.__class__.__name__}"
5973
5974     yield f"{'  ' * depth})  # /{node.__class__.__name__}"
5975
5976
5977 def assert_equivalent(src: str, dst: str) -> None:
5978     """Raise AssertionError if `src` and `dst` aren't equivalent."""
5979     try:
5980         src_ast = parse_ast(src)
5981     except Exception as exc:
5982         raise AssertionError(
5983             "cannot use --safe with this file; failed to parse source file.  AST"
5984             f" error message: {exc}"
5985         )
5986
5987     try:
5988         dst_ast = parse_ast(dst)
5989     except Exception as exc:
5990         log = dump_to_file("".join(traceback.format_tb(exc.__traceback__)), dst)
5991         raise AssertionError(
5992             f"INTERNAL ERROR: Black produced invalid code: {exc}. Please report a bug"
5993             " on https://github.com/psf/black/issues.  This invalid output might be"
5994             f" helpful: {log}"
5995         ) from None
5996
5997     src_ast_str = "\n".join(_stringify_ast(src_ast))
5998     dst_ast_str = "\n".join(_stringify_ast(dst_ast))
5999     if src_ast_str != dst_ast_str:
6000         log = dump_to_file(diff(src_ast_str, dst_ast_str, "src", "dst"))
6001         raise AssertionError(
6002             "INTERNAL ERROR: Black produced code that is not equivalent to the"
6003             " source.  Please report a bug on https://github.com/psf/black/issues. "
6004             f" This diff might be helpful: {log}"
6005         ) from None
6006
6007
6008 def assert_stable(src: str, dst: str, mode: Mode) -> None:
6009     """Raise AssertionError if `dst` reformats differently the second time."""
6010     newdst = format_str(dst, mode=mode)
6011     if dst != newdst:
6012         log = dump_to_file(
6013             diff(src, dst, "source", "first pass"),
6014             diff(dst, newdst, "first pass", "second pass"),
6015         )
6016         raise AssertionError(
6017             "INTERNAL ERROR: Black produced different code on the second pass of the"
6018             " formatter.  Please report a bug on https://github.com/psf/black/issues."
6019             f"  This diff might be helpful: {log}"
6020         ) from None
6021
6022
6023 @mypyc_attr(patchable=True)
6024 def dump_to_file(*output: str) -> str:
6025     """Dump `output` to a temporary file. Return path to the file."""
6026     with tempfile.NamedTemporaryFile(
6027         mode="w", prefix="blk_", suffix=".log", delete=False, encoding="utf8"
6028     ) as f:
6029         for lines in output:
6030             f.write(lines)
6031             if lines and lines[-1] != "\n":
6032                 f.write("\n")
6033     return f.name
6034
6035
6036 @contextmanager
6037 def nullcontext() -> Iterator[None]:
6038     """Return an empty context manager.
6039
6040     To be used like `nullcontext` in Python 3.7.
6041     """
6042     yield
6043
6044
6045 def diff(a: str, b: str, a_name: str, b_name: str) -> str:
6046     """Return a unified diff string between strings `a` and `b`."""
6047     import difflib
6048
6049     a_lines = [line + "\n" for line in a.splitlines()]
6050     b_lines = [line + "\n" for line in b.splitlines()]
6051     return "".join(
6052         difflib.unified_diff(a_lines, b_lines, fromfile=a_name, tofile=b_name, n=5)
6053     )
6054
6055
6056 def cancel(tasks: Iterable["asyncio.Task[Any]"]) -> None:
6057     """asyncio signal handler that cancels all `tasks` and reports to stderr."""
6058     err("Aborted!")
6059     for task in tasks:
6060         task.cancel()
6061
6062
6063 def shutdown(loop: asyncio.AbstractEventLoop) -> None:
6064     """Cancel all pending tasks on `loop`, wait for them, and close the loop."""
6065     try:
6066         if sys.version_info[:2] >= (3, 7):
6067             all_tasks = asyncio.all_tasks
6068         else:
6069             all_tasks = asyncio.Task.all_tasks
6070         # This part is borrowed from asyncio/runners.py in Python 3.7b2.
6071         to_cancel = [task for task in all_tasks(loop) if not task.done()]
6072         if not to_cancel:
6073             return
6074
6075         for task in to_cancel:
6076             task.cancel()
6077         loop.run_until_complete(
6078             asyncio.gather(*to_cancel, loop=loop, return_exceptions=True)
6079         )
6080     finally:
6081         # `concurrent.futures.Future` objects cannot be cancelled once they
6082         # are already running. There might be some when the `shutdown()` happened.
6083         # Silence their logger's spew about the event loop being closed.
6084         cf_logger = logging.getLogger("concurrent.futures")
6085         cf_logger.setLevel(logging.CRITICAL)
6086         loop.close()
6087
6088
6089 def sub_twice(regex: Pattern[str], replacement: str, original: str) -> str:
6090     """Replace `regex` with `replacement` twice on `original`.
6091
6092     This is used by string normalization to perform replaces on
6093     overlapping matches.
6094     """
6095     return regex.sub(replacement, regex.sub(replacement, original))
6096
6097
6098 def re_compile_maybe_verbose(regex: str) -> Pattern[str]:
6099     """Compile a regular expression string in `regex`.
6100
6101     If it contains newlines, use verbose mode.
6102     """
6103     if "\n" in regex:
6104         regex = "(?x)" + regex
6105     compiled: Pattern[str] = re.compile(regex)
6106     return compiled
6107
6108
6109 def enumerate_reversed(sequence: Sequence[T]) -> Iterator[Tuple[Index, T]]:
6110     """Like `reversed(enumerate(sequence))` if that were possible."""
6111     index = len(sequence) - 1
6112     for element in reversed(sequence):
6113         yield (index, element)
6114         index -= 1
6115
6116
6117 def enumerate_with_length(
6118     line: Line, reversed: bool = False
6119 ) -> Iterator[Tuple[Index, Leaf, int]]:
6120     """Return an enumeration of leaves with their length.
6121
6122     Stops prematurely on multiline strings and standalone comments.
6123     """
6124     op = cast(
6125         Callable[[Sequence[Leaf]], Iterator[Tuple[Index, Leaf]]],
6126         enumerate_reversed if reversed else enumerate,
6127     )
6128     for index, leaf in op(line.leaves):
6129         length = len(leaf.prefix) + len(leaf.value)
6130         if "\n" in leaf.value:
6131             return  # Multiline strings, we can't continue.
6132
6133         for comment in line.comments_after(leaf):
6134             length += len(comment.value)
6135
6136         yield index, leaf, length
6137
6138
6139 def is_line_short_enough(line: Line, *, line_length: int, line_str: str = "") -> bool:
6140     """Return True if `line` is no longer than `line_length`.
6141
6142     Uses the provided `line_str` rendering, if any, otherwise computes a new one.
6143     """
6144     if not line_str:
6145         line_str = line_to_string(line)
6146     return (
6147         len(line_str) <= line_length
6148         and "\n" not in line_str  # multiline strings
6149         and not line.contains_standalone_comments()
6150     )
6151
6152
6153 def can_be_split(line: Line) -> bool:
6154     """Return False if the line cannot be split *for sure*.
6155
6156     This is not an exhaustive search but a cheap heuristic that we can use to
6157     avoid some unfortunate formattings (mostly around wrapping unsplittable code
6158     in unnecessary parentheses).
6159     """
6160     leaves = line.leaves
6161     if len(leaves) < 2:
6162         return False
6163
6164     if leaves[0].type == token.STRING and leaves[1].type == token.DOT:
6165         call_count = 0
6166         dot_count = 0
6167         next = leaves[-1]
6168         for leaf in leaves[-2::-1]:
6169             if leaf.type in OPENING_BRACKETS:
6170                 if next.type not in CLOSING_BRACKETS:
6171                     return False
6172
6173                 call_count += 1
6174             elif leaf.type == token.DOT:
6175                 dot_count += 1
6176             elif leaf.type == token.NAME:
6177                 if not (next.type == token.DOT or next.type in OPENING_BRACKETS):
6178                     return False
6179
6180             elif leaf.type not in CLOSING_BRACKETS:
6181                 return False
6182
6183             if dot_count > 1 and call_count > 1:
6184                 return False
6185
6186     return True
6187
6188
6189 def can_omit_invisible_parens(line: Line, line_length: int) -> bool:
6190     """Does `line` have a shape safe to reformat without optional parens around it?
6191
6192     Returns True for only a subset of potentially nice looking formattings but
6193     the point is to not return false positives that end up producing lines that
6194     are too long.
6195     """
6196     bt = line.bracket_tracker
6197     if not bt.delimiters:
6198         # Without delimiters the optional parentheses are useless.
6199         return True
6200
6201     max_priority = bt.max_delimiter_priority()
6202     if bt.delimiter_count_with_priority(max_priority) > 1:
6203         # With more than one delimiter of a kind the optional parentheses read better.
6204         return False
6205
6206     if max_priority == DOT_PRIORITY:
6207         # A single stranded method call doesn't require optional parentheses.
6208         return True
6209
6210     assert len(line.leaves) >= 2, "Stranded delimiter"
6211
6212     first = line.leaves[0]
6213     second = line.leaves[1]
6214     penultimate = line.leaves[-2]
6215     last = line.leaves[-1]
6216
6217     # With a single delimiter, omit if the expression starts or ends with
6218     # a bracket.
6219     if first.type in OPENING_BRACKETS and second.type not in CLOSING_BRACKETS:
6220         remainder = False
6221         length = 4 * line.depth
6222         for _index, leaf, leaf_length in enumerate_with_length(line):
6223             if leaf.type in CLOSING_BRACKETS and leaf.opening_bracket is first:
6224                 remainder = True
6225             if remainder:
6226                 length += leaf_length
6227                 if length > line_length:
6228                     break
6229
6230                 if leaf.type in OPENING_BRACKETS:
6231                     # There are brackets we can further split on.
6232                     remainder = False
6233
6234         else:
6235             # checked the entire string and line length wasn't exceeded
6236             if len(line.leaves) == _index + 1:
6237                 return True
6238
6239         # Note: we are not returning False here because a line might have *both*
6240         # a leading opening bracket and a trailing closing bracket.  If the
6241         # opening bracket doesn't match our rule, maybe the closing will.
6242
6243     if (
6244         last.type == token.RPAR
6245         or last.type == token.RBRACE
6246         or (
6247             # don't use indexing for omitting optional parentheses;
6248             # it looks weird
6249             last.type == token.RSQB
6250             and last.parent
6251             and last.parent.type != syms.trailer
6252         )
6253     ):
6254         if penultimate.type in OPENING_BRACKETS:
6255             # Empty brackets don't help.
6256             return False
6257
6258         if is_multiline_string(first):
6259             # Additional wrapping of a multiline string in this situation is
6260             # unnecessary.
6261             return True
6262
6263         length = 4 * line.depth
6264         seen_other_brackets = False
6265         for _index, leaf, leaf_length in enumerate_with_length(line):
6266             length += leaf_length
6267             if leaf is last.opening_bracket:
6268                 if seen_other_brackets or length <= line_length:
6269                     return True
6270
6271             elif leaf.type in OPENING_BRACKETS:
6272                 # There are brackets we can further split on.
6273                 seen_other_brackets = True
6274
6275     return False
6276
6277
6278 def get_cache_file(mode: Mode) -> Path:
6279     return CACHE_DIR / f"cache.{mode.get_cache_key()}.pickle"
6280
6281
6282 def read_cache(mode: Mode) -> Cache:
6283     """Read the cache if it exists and is well formed.
6284
6285     If it is not well formed, the call to write_cache later should resolve the issue.
6286     """
6287     cache_file = get_cache_file(mode)
6288     if not cache_file.exists():
6289         return {}
6290
6291     with cache_file.open("rb") as fobj:
6292         try:
6293             cache: Cache = pickle.load(fobj)
6294         except (pickle.UnpicklingError, ValueError):
6295             return {}
6296
6297     return cache
6298
6299
6300 def get_cache_info(path: Path) -> CacheInfo:
6301     """Return the information used to check if a file is already formatted or not."""
6302     stat = path.stat()
6303     return stat.st_mtime, stat.st_size
6304
6305
6306 def filter_cached(cache: Cache, sources: Iterable[Path]) -> Tuple[Set[Path], Set[Path]]:
6307     """Split an iterable of paths in `sources` into two sets.
6308
6309     The first contains paths of files that modified on disk or are not in the
6310     cache. The other contains paths to non-modified files.
6311     """
6312     todo, done = set(), set()
6313     for src in sources:
6314         src = src.resolve()
6315         if cache.get(src) != get_cache_info(src):
6316             todo.add(src)
6317         else:
6318             done.add(src)
6319     return todo, done
6320
6321
6322 def write_cache(cache: Cache, sources: Iterable[Path], mode: Mode) -> None:
6323     """Update the cache file."""
6324     cache_file = get_cache_file(mode)
6325     try:
6326         CACHE_DIR.mkdir(parents=True, exist_ok=True)
6327         new_cache = {**cache, **{src.resolve(): get_cache_info(src) for src in sources}}
6328         with tempfile.NamedTemporaryFile(dir=str(cache_file.parent), delete=False) as f:
6329             pickle.dump(new_cache, f, protocol=4)
6330         os.replace(f.name, cache_file)
6331     except OSError:
6332         pass
6333
6334
6335 def patch_click() -> None:
6336     """Make Click not crash.
6337
6338     On certain misconfigured environments, Python 3 selects the ASCII encoding as the
6339     default which restricts paths that it can access during the lifetime of the
6340     application.  Click refuses to work in this scenario by raising a RuntimeError.
6341
6342     In case of Black the likelihood that non-ASCII characters are going to be used in
6343     file paths is minimal since it's Python source code.  Moreover, this crash was
6344     spurious on Python 3.7 thanks to PEP 538 and PEP 540.
6345     """
6346     try:
6347         from click import core
6348         from click import _unicodefun  # type: ignore
6349     except ModuleNotFoundError:
6350         return
6351
6352     for module in (core, _unicodefun):
6353         if hasattr(module, "_verify_python3_env"):
6354             module._verify_python3_env = lambda: None
6355
6356
6357 def patched_main() -> None:
6358     freeze_support()
6359     patch_click()
6360     main()
6361
6362
6363 def fix_docstring(docstring: str, prefix: str) -> str:
6364     # https://www.python.org/dev/peps/pep-0257/#handling-docstring-indentation
6365     if not docstring:
6366         return ""
6367     # Convert tabs to spaces (following the normal Python rules)
6368     # and split into a list of lines:
6369     lines = docstring.expandtabs().splitlines()
6370     # Determine minimum indentation (first line doesn't count):
6371     indent = sys.maxsize
6372     for line in lines[1:]:
6373         stripped = line.lstrip()
6374         if stripped:
6375             indent = min(indent, len(line) - len(stripped))
6376     # Remove indentation (first line is special):
6377     trimmed = [lines[0].strip()]
6378     if indent < sys.maxsize:
6379         last_line_idx = len(lines) - 2
6380         for i, line in enumerate(lines[1:]):
6381             stripped_line = line[indent:].rstrip()
6382             if stripped_line or i == last_line_idx:
6383                 trimmed.append(prefix + stripped_line)
6384             else:
6385                 trimmed.append("")
6386     # Return a single string:
6387     return "\n".join(trimmed)
6388
6389
6390 if __name__ == "__main__":
6391     patched_main()