black.py

   1 import ast
   2 import asyncio
   3 from concurrent.futures import Executor, ProcessPoolExecutor
   4 from contextlib import contextmanager
   5 from datetime import datetime
   6 from enum import Enum
   7 from functools import lru_cache, partial, wraps
   8 import io
   9 import itertools
  10 import logging
  11 from multiprocessing import Manager, freeze_support
  12 import os
  13 from pathlib import Path
  14 import pickle
  15 import re
  16 import signal
  17 import sys
  18 import tempfile
  19 import tokenize
  20 import traceback
  21 from typing import (
  22     Any,
  23     Callable,
  24     Collection,
  25     Dict,
  26     Generator,
  27     Generic,
  28     Iterable,
  29     Iterator,
  30     List,
  31     Optional,
  32     Pattern,
  33     Sequence,
  34     Set,
  35     Tuple,
  36     TypeVar,
  37     Union,
  38     cast,
  39 )
  40
  41 from appdirs import user_cache_dir
  42 from attr import dataclass, evolve, Factory
  43 import click
  44 import toml
  45 from typed_ast import ast3, ast27
  46
  47 # lib2to3 fork
  48 from blib2to3.pytree import Node, Leaf, type_repr
  49 from blib2to3 import pygram, pytree
  50 from blib2to3.pgen2 import driver, token
  51 from blib2to3.pgen2.grammar import Grammar
  52 from blib2to3.pgen2.parse import ParseError
  53
  54 from _version import get_versions
  55
  56 v = get_versions()
  57 __version__ = v.get("closest-tag", v["version"])
  58 __git_version__ = v.get("full-revisionid")
  59
  60 DEFAULT_LINE_LENGTH = 88
  61 DEFAULT_EXCLUDES = (
  62     r"/(\.eggs|\.git|\.hg|\.mypy_cache|\.nox|\.tox|\.venv|_build|buck-out|build|dist)/"
  63 )
  64 DEFAULT_INCLUDES = r"\.pyi?$"
  65 CACHE_DIR = Path(user_cache_dir("black", version=__git_version__))
  66
  67
  68 # types
  69 FileContent = str
  70 Encoding = str
  71 NewLine = str
  72 Depth = int
  73 NodeType = int
  74 LeafID = int
  75 Priority = int
  76 Index = int
  77 LN = Union[Leaf, Node]
  78 SplitFunc = Callable[["Line", Collection["Feature"]], Iterator["Line"]]
  79 Timestamp = float
  80 FileSize = int
  81 CacheInfo = Tuple[Timestamp, FileSize]
  82 Cache = Dict[Path, CacheInfo]
  83 out = partial(click.secho, bold=True, err=True)
  84 err = partial(click.secho, fg="red", err=True)
  85
  86 pygram.initialize(CACHE_DIR)
  87 syms = pygram.python_symbols
  88
  89
  90 class NothingChanged(UserWarning):
  91     """Raised when reformatted code is the same as source."""
  92
  93
  94 class CannotSplit(Exception):
  95     """A readable split that fits the allotted line length is impossible."""
  96
  97
  98 class InvalidInput(ValueError):
  99     """Raised when input source code fails all parse attempts."""
 100
 101
 102 class WriteBack(Enum):
 103     NO = 0
 104     YES = 1
 105     DIFF = 2
 106     CHECK = 3
 107
 108     @classmethod
 109     def from_configuration(cls, *, check: bool, diff: bool) -> "WriteBack":
 110         if check and not diff:
 111             return cls.CHECK
 112
 113         return cls.DIFF if diff else cls.YES
 114
 115
 116 class Changed(Enum):
 117     NO = 0
 118     CACHED = 1
 119     YES = 2
 120
 121
 122 class TargetVersion(Enum):
 123     PY27 = 2
 124     PY33 = 3
 125     PY34 = 4
 126     PY35 = 5
 127     PY36 = 6
 128     PY37 = 7
 129     PY38 = 8
 130
 131     def is_python2(self) -> bool:
 132         return self is TargetVersion.PY27
 133
 134
 135 PY36_VERSIONS = {TargetVersion.PY36, TargetVersion.PY37, TargetVersion.PY38}
 136
 137
 138 class Feature(Enum):
 139     # All string literals are unicode
 140     UNICODE_LITERALS = 1
 141     F_STRINGS = 2
 142     NUMERIC_UNDERSCORES = 3
 143     TRAILING_COMMA_IN_CALL = 4
 144     TRAILING_COMMA_IN_DEF = 5
 145     # The following two feature-flags are mutually exclusive, and exactly one should be
 146     # set for every version of python.
 147     ASYNC_IDENTIFIERS = 6
 148     ASYNC_KEYWORDS = 7
 149     ASSIGNMENT_EXPRESSIONS = 8
 150     POS_ONLY_ARGUMENTS = 9
 151
 152
 153 VERSION_TO_FEATURES: Dict[TargetVersion, Set[Feature]] = {
 154     TargetVersion.PY27: {Feature.ASYNC_IDENTIFIERS},
 155     TargetVersion.PY33: {Feature.UNICODE_LITERALS, Feature.ASYNC_IDENTIFIERS},
 156     TargetVersion.PY34: {Feature.UNICODE_LITERALS, Feature.ASYNC_IDENTIFIERS},
 157     TargetVersion.PY35: {
 158         Feature.UNICODE_LITERALS,
 159         Feature.TRAILING_COMMA_IN_CALL,
 160         Feature.ASYNC_IDENTIFIERS,
 161     },
 162     TargetVersion.PY36: {
 163         Feature.UNICODE_LITERALS,
 164         Feature.F_STRINGS,
 165         Feature.NUMERIC_UNDERSCORES,
 166         Feature.TRAILING_COMMA_IN_CALL,
 167         Feature.TRAILING_COMMA_IN_DEF,
 168         Feature.ASYNC_IDENTIFIERS,
 169     },
 170     TargetVersion.PY37: {
 171         Feature.UNICODE_LITERALS,
 172         Feature.F_STRINGS,
 173         Feature.NUMERIC_UNDERSCORES,
 174         Feature.TRAILING_COMMA_IN_CALL,
 175         Feature.TRAILING_COMMA_IN_DEF,
 176         Feature.ASYNC_KEYWORDS,
 177     },
 178     TargetVersion.PY38: {
 179         Feature.UNICODE_LITERALS,
 180         Feature.F_STRINGS,
 181         Feature.NUMERIC_UNDERSCORES,
 182         Feature.TRAILING_COMMA_IN_CALL,
 183         Feature.TRAILING_COMMA_IN_DEF,
 184         Feature.ASYNC_KEYWORDS,
 185         Feature.ASSIGNMENT_EXPRESSIONS,
 186         Feature.POS_ONLY_ARGUMENTS,
 187     },
 188 }
 189
 190
 191 @dataclass
 192 class FileMode:
 193     target_versions: Set[TargetVersion] = Factory(set)
 194     line_length: int = DEFAULT_LINE_LENGTH
 195     string_normalization: bool = True
 196     is_pyi: bool = False
 197
 198     def get_cache_key(self) -> str:
 199         if self.target_versions:
 200             version_str = ",".join(
 201                 str(version.value)
 202                 for version in sorted(self.target_versions, key=lambda v: v.value)
 203             )
 204         else:
 205             version_str = "-"
 206         parts = [
 207             version_str,
 208             str(self.line_length),
 209             str(int(self.string_normalization)),
 210             str(int(self.is_pyi)),
 211         ]
 212         return ".".join(parts)
 213
 214
 215 def supports_feature(target_versions: Set[TargetVersion], feature: Feature) -> bool:
 216     return all(feature in VERSION_TO_FEATURES[version] for version in target_versions)
 217
 218
 219 def read_pyproject_toml(
 220     ctx: click.Context, param: click.Parameter, value: Union[str, int, bool, None]
 221 ) -> Optional[str]:
 222     """Inject Black configuration from "pyproject.toml" into defaults in `ctx`.
 223
 224     Returns the path to a successfully found and read configuration file, None
 225     otherwise.
 226     """
 227     assert not isinstance(value, (int, bool)), "Invalid parameter type passed"
 228     if not value:
 229         root = find_project_root(ctx.params.get("src", ()))
 230         path = root / "pyproject.toml"
 231         if path.is_file():
 232             value = str(path)
 233         else:
 234             return None
 235
 236     try:
 237         pyproject_toml = toml.load(value)
 238         config = pyproject_toml.get("tool", {}).get("black", {})
 239     except (toml.TomlDecodeError, OSError) as e:
 240         raise click.FileError(
 241             filename=value, hint=f"Error reading configuration file: {e}"
 242         )
 243
 244     if not config:
 245         return None
 246
 247     if ctx.default_map is None:
 248         ctx.default_map = {}
 249     ctx.default_map.update(  # type: ignore  # bad types in .pyi
 250         {k.replace("--", "").replace("-", "_"): v for k, v in config.items()}
 251     )
 252     return value
 253
 254
 255 @click.command(context_settings=dict(help_option_names=["-h", "--help"]))
 256 @click.option("-c", "--code", type=str, help="Format the code passed in as a string.")
 257 @click.option(
 258     "-l",
 259     "--line-length",
 260     type=int,
 261     default=DEFAULT_LINE_LENGTH,
 262     help="How many characters per line to allow.",
 263     show_default=True,
 264 )
 265 @click.option(
 266     "-t",
 267     "--target-version",
 268     type=click.Choice([v.name.lower() for v in TargetVersion]),
 269     callback=lambda c, p, v: [TargetVersion[val.upper()] for val in v],
 270     multiple=True,
 271     help=(
 272         "Python versions that should be supported by Black's output. [default: "
 273         "per-file auto-detection]"
 274     ),
 275 )
 276 @click.option(
 277     "--py36",
 278     is_flag=True,
 279     help=(
 280         "Allow using Python 3.6-only syntax on all input files.  This will put "
 281         "trailing commas in function signatures and calls also after *args and "
 282         "**kwargs. Deprecated; use --target-version instead. "
 283         "[default: per-file auto-detection]"
 284     ),
 285 )
 286 @click.option(
 287     "--pyi",
 288     is_flag=True,
 289     help=(
 290         "Format all input files like typing stubs regardless of file extension "
 291         "(useful when piping source on standard input)."
 292     ),
 293 )
 294 @click.option(
 295     "-S",
 296     "--skip-string-normalization",
 297     is_flag=True,
 298     help="Don't normalize string quotes or prefixes.",
 299 )
 300 @click.option(
 301     "--check",
 302     is_flag=True,
 303     help=(
 304         "Don't write the files back, just return the status.  Return code 0 "
 305         "means nothing would change.  Return code 1 means some files would be "
 306         "reformatted.  Return code 123 means there was an internal error."
 307     ),
 308 )
 309 @click.option(
 310     "--diff",
 311     is_flag=True,
 312     help="Don't write the files back, just output a diff for each file on stdout.",
 313 )
 314 @click.option(
 315     "--fast/--safe",
 316     is_flag=True,
 317     help="If --fast given, skip temporary sanity checks. [default: --safe]",
 318 )
 319 @click.option(
 320     "--include",
 321     type=str,
 322     default=DEFAULT_INCLUDES,
 323     help=(
 324         "A regular expression that matches files and directories that should be "
 325         "included on recursive searches.  An empty value means all files are "
 326         "included regardless of the name.  Use forward slashes for directories on "
 327         "all platforms (Windows, too).  Exclusions are calculated first, inclusions "
 328         "later."
 329     ),
 330     show_default=True,
 331 )
 332 @click.option(
 333     "--exclude",
 334     type=str,
 335     default=DEFAULT_EXCLUDES,
 336     help=(
 337         "A regular expression that matches files and directories that should be "
 338         "excluded on recursive searches.  An empty value means no paths are excluded. "
 339         "Use forward slashes for directories on all platforms (Windows, too).  "
 340         "Exclusions are calculated first, inclusions later."
 341     ),
 342     show_default=True,
 343 )
 344 @click.option(
 345     "-q",
 346     "--quiet",
 347     is_flag=True,
 348     help=(
 349         "Don't emit non-error messages to stderr. Errors are still emitted; "
 350         "silence those with 2>/dev/null."
 351     ),
 352 )
 353 @click.option(
 354     "-v",
 355     "--verbose",
 356     is_flag=True,
 357     help=(
 358         "Also emit messages to stderr about files that were not changed or were "
 359         "ignored due to --exclude=."
 360     ),
 361 )
 362 @click.version_option(version=__version__)
 363 @click.argument(
 364     "src",
 365     nargs=-1,
 366     type=click.Path(
 367         exists=True, file_okay=True, dir_okay=True, readable=True, allow_dash=True
 368     ),
 369     is_eager=True,
 370 )
 371 @click.option(
 372     "--config",
 373     type=click.Path(
 374         exists=False, file_okay=True, dir_okay=False, readable=True, allow_dash=False
 375     ),
 376     is_eager=True,
 377     callback=read_pyproject_toml,
 378     help="Read configuration from PATH.",
 379 )
 380 @click.pass_context
 381 def main(
 382     ctx: click.Context,
 383     code: Optional[str],
 384     line_length: int,
 385     target_version: List[TargetVersion],
 386     check: bool,
 387     diff: bool,
 388     fast: bool,
 389     pyi: bool,
 390     py36: bool,
 391     skip_string_normalization: bool,
 392     quiet: bool,
 393     verbose: bool,
 394     include: str,
 395     exclude: str,
 396     src: Tuple[str],
 397     config: Optional[str],
 398 ) -> None:
 399     """The uncompromising code formatter."""
 400     write_back = WriteBack.from_configuration(check=check, diff=diff)
 401     if target_version:
 402         if py36:
 403             err(f"Cannot use both --target-version and --py36")
 404             ctx.exit(2)
 405         else:
 406             versions = set(target_version)
 407     elif py36:
 408         err(
 409             "--py36 is deprecated and will be removed in a future version. "
 410             "Use --target-version py36 instead."
 411         )
 412         versions = PY36_VERSIONS
 413     else:
 414         # We'll autodetect later.
 415         versions = set()
 416     mode = FileMode(
 417         target_versions=versions,
 418         line_length=line_length,
 419         is_pyi=pyi,
 420         string_normalization=not skip_string_normalization,
 421     )
 422     if config and verbose:
 423         out(f"Using configuration from {config}.", bold=False, fg="blue")
 424     if code is not None:
 425         print(format_str(code, mode=mode))
 426         ctx.exit(0)
 427     try:
 428         include_regex = re_compile_maybe_verbose(include)
 429     except re.error:
 430         err(f"Invalid regular expression for include given: {include!r}")
 431         ctx.exit(2)
 432     try:
 433         exclude_regex = re_compile_maybe_verbose(exclude)
 434     except re.error:
 435         err(f"Invalid regular expression for exclude given: {exclude!r}")
 436         ctx.exit(2)
 437     report = Report(check=check, quiet=quiet, verbose=verbose)
 438     root = find_project_root(src)
 439     sources: Set[Path] = set()
 440     for s in src:
 441         p = Path(s)
 442         if p.is_dir():
 443             sources.update(
 444                 gen_python_files_in_dir(p, root, include_regex, exclude_regex, report)
 445             )
 446         elif p.is_file() or s == "-":
 447             # if a file was explicitly given, we don't care about its extension
 448             sources.add(p)
 449         else:
 450             err(f"invalid path: {s}")
 451     if len(sources) == 0:
 452         if verbose or not quiet:
 453             out("No paths given. Nothing to do 😴")
 454         ctx.exit(0)
 455
 456     if len(sources) == 1:
 457         reformat_one(
 458             src=sources.pop(),
 459             fast=fast,
 460             write_back=write_back,
 461             mode=mode,
 462             report=report,
 463         )
 464     else:
 465         reformat_many(
 466             sources=sources, fast=fast, write_back=write_back, mode=mode, report=report
 467         )
 468
 469     if verbose or not quiet:
 470         out("Oh no! 💥 💔 💥" if report.return_code else "All done! ✨ 🍰 ✨")
 471         click.secho(str(report), err=True)
 472     ctx.exit(report.return_code)
 473
 474
 475 def reformat_one(
 476     src: Path, fast: bool, write_back: WriteBack, mode: FileMode, report: "Report"
 477 ) -> None:
 478     """Reformat a single file under `src` without spawning child processes.
 479
 480     `fast`, `write_back`, and `mode` options are passed to
 481     :func:`format_file_in_place` or :func:`format_stdin_to_stdout`.
 482     """
 483     try:
 484         changed = Changed.NO
 485         if not src.is_file() and str(src) == "-":
 486             if format_stdin_to_stdout(fast=fast, write_back=write_back, mode=mode):
 487                 changed = Changed.YES
 488         else:
 489             cache: Cache = {}
 490             if write_back != WriteBack.DIFF:
 491                 cache = read_cache(mode)
 492                 res_src = src.resolve()
 493                 if res_src in cache and cache[res_src] == get_cache_info(res_src):
 494                     changed = Changed.CACHED
 495             if changed is not Changed.CACHED and format_file_in_place(
 496                 src, fast=fast, write_back=write_back, mode=mode
 497             ):
 498                 changed = Changed.YES
 499             if (write_back is WriteBack.YES and changed is not Changed.CACHED) or (
 500                 write_back is WriteBack.CHECK and changed is Changed.NO
 501             ):
 502                 write_cache(cache, [src], mode)
 503         report.done(src, changed)
 504     except Exception as exc:
 505         report.failed(src, str(exc))
 506
 507
 508 def reformat_many(
 509     sources: Set[Path],
 510     fast: bool,
 511     write_back: WriteBack,
 512     mode: FileMode,
 513     report: "Report",
 514 ) -> None:
 515     """Reformat multiple files using a ProcessPoolExecutor."""
 516     loop = asyncio.get_event_loop()
 517     worker_count = os.cpu_count()
 518     if sys.platform == "win32":
 519         # Work around https://bugs.python.org/issue26903
 520         worker_count = min(worker_count, 61)
 521     executor = ProcessPoolExecutor(max_workers=worker_count)
 522     try:
 523         loop.run_until_complete(
 524             schedule_formatting(
 525                 sources=sources,
 526                 fast=fast,
 527                 write_back=write_back,
 528                 mode=mode,
 529                 report=report,
 530                 loop=loop,
 531                 executor=executor,
 532             )
 533         )
 534     finally:
 535         shutdown(loop)
 536         executor.shutdown()
 537
 538
 539 async def schedule_formatting(
 540     sources: Set[Path],
 541     fast: bool,
 542     write_back: WriteBack,
 543     mode: FileMode,
 544     report: "Report",
 545     loop: asyncio.AbstractEventLoop,
 546     executor: Executor,
 547 ) -> None:
 548     """Run formatting of `sources` in parallel using the provided `executor`.
 549
 550     (Use ProcessPoolExecutors for actual parallelism.)
 551
 552     `write_back`, `fast`, and `mode` options are passed to
 553     :func:`format_file_in_place`.
 554     """
 555     cache: Cache = {}
 556     if write_back != WriteBack.DIFF:
 557         cache = read_cache(mode)
 558         sources, cached = filter_cached(cache, sources)
 559         for src in sorted(cached):
 560             report.done(src, Changed.CACHED)
 561     if not sources:
 562         return
 563
 564     cancelled = []
 565     sources_to_cache = []
 566     lock = None
 567     if write_back == WriteBack.DIFF:
 568         # For diff output, we need locks to ensure we don't interleave output
 569         # from different processes.
 570         manager = Manager()
 571         lock = manager.Lock()
 572     tasks = {
 573         asyncio.ensure_future(
 574             loop.run_in_executor(
 575                 executor, format_file_in_place, src, fast, mode, write_back, lock
 576             )
 577         ): src
 578         for src in sorted(sources)
 579     }
 580     pending: Iterable[asyncio.Future] = tasks.keys()
 581     try:
 582         loop.add_signal_handler(signal.SIGINT, cancel, pending)
 583         loop.add_signal_handler(signal.SIGTERM, cancel, pending)
 584     except NotImplementedError:
 585         # There are no good alternatives for these on Windows.
 586         pass
 587     while pending:
 588         done, _ = await asyncio.wait(pending, return_when=asyncio.FIRST_COMPLETED)
 589         for task in done:
 590             src = tasks.pop(task)
 591             if task.cancelled():
 592                 cancelled.append(task)
 593             elif task.exception():
 594                 report.failed(src, str(task.exception()))
 595             else:
 596                 changed = Changed.YES if task.result() else Changed.NO
 597                 # If the file was written back or was successfully checked as
 598                 # well-formatted, store this information in the cache.
 599                 if write_back is WriteBack.YES or (
 600                     write_back is WriteBack.CHECK and changed is Changed.NO
 601                 ):
 602                     sources_to_cache.append(src)
 603                 report.done(src, changed)
 604     if cancelled:
 605         await asyncio.gather(*cancelled, loop=loop, return_exceptions=True)
 606     if sources_to_cache:
 607         write_cache(cache, sources_to_cache, mode)
 608
 609
 610 def format_file_in_place(
 611     src: Path,
 612     fast: bool,
 613     mode: FileMode,
 614     write_back: WriteBack = WriteBack.NO,
 615     lock: Any = None,  # multiprocessing.Manager().Lock() is some crazy proxy
 616 ) -> bool:
 617     """Format file under `src` path. Return True if changed.
 618
 619     If `write_back` is DIFF, write a diff to stdout. If it is YES, write reformatted
 620     code to the file.
 621     `mode` and `fast` options are passed to :func:`format_file_contents`.
 622     """
 623     if src.suffix == ".pyi":
 624         mode = evolve(mode, is_pyi=True)
 625
 626     then = datetime.utcfromtimestamp(src.stat().st_mtime)
 627     with open(src, "rb") as buf:
 628         src_contents, encoding, newline = decode_bytes(buf.read())
 629     try:
 630         dst_contents = format_file_contents(src_contents, fast=fast, mode=mode)
 631     except NothingChanged:
 632         return False
 633
 634     if write_back == write_back.YES:
 635         with open(src, "w", encoding=encoding, newline=newline) as f:
 636             f.write(dst_contents)
 637     elif write_back == write_back.DIFF:
 638         now = datetime.utcnow()
 639         src_name = f"{src}\t{then} +0000"
 640         dst_name = f"{src}\t{now} +0000"
 641         diff_contents = diff(src_contents, dst_contents, src_name, dst_name)
 642
 643         with lock or nullcontext():
 644             f = io.TextIOWrapper(
 645                 sys.stdout.buffer,
 646                 encoding=encoding,
 647                 newline=newline,
 648                 write_through=True,
 649             )
 650             f.write(diff_contents)
 651             f.detach()
 652
 653     return True
 654
 655
 656 def format_stdin_to_stdout(
 657     fast: bool, *, write_back: WriteBack = WriteBack.NO, mode: FileMode
 658 ) -> bool:
 659     """Format file on stdin. Return True if changed.
 660
 661     If `write_back` is YES, write reformatted code back to stdout. If it is DIFF,
 662     write a diff to stdout. The `mode` argument is passed to
 663     :func:`format_file_contents`.
 664     """
 665     then = datetime.utcnow()
 666     src, encoding, newline = decode_bytes(sys.stdin.buffer.read())
 667     dst = src
 668     try:
 669         dst = format_file_contents(src, fast=fast, mode=mode)
 670         return True
 671
 672     except NothingChanged:
 673         return False
 674
 675     finally:
 676         f = io.TextIOWrapper(
 677             sys.stdout.buffer, encoding=encoding, newline=newline, write_through=True
 678         )
 679         if write_back == WriteBack.YES:
 680             f.write(dst)
 681         elif write_back == WriteBack.DIFF:
 682             now = datetime.utcnow()
 683             src_name = f"STDIN\t{then} +0000"
 684             dst_name = f"STDOUT\t{now} +0000"
 685             f.write(diff(src, dst, src_name, dst_name))
 686         f.detach()
 687
 688
 689 def format_file_contents(
 690     src_contents: str, *, fast: bool, mode: FileMode
 691 ) -> FileContent:
 692     """Reformat contents a file and return new contents.
 693
 694     If `fast` is False, additionally confirm that the reformatted code is
 695     valid by calling :func:`assert_equivalent` and :func:`assert_stable` on it.
 696     `mode` is passed to :func:`format_str`.
 697     """
 698     if src_contents.strip() == "":
 699         raise NothingChanged
 700
 701     dst_contents = format_str(src_contents, mode=mode)
 702     if src_contents == dst_contents:
 703         raise NothingChanged
 704
 705     if not fast:
 706         assert_equivalent(src_contents, dst_contents)
 707         assert_stable(src_contents, dst_contents, mode=mode)
 708     return dst_contents
 709
 710
 711 def format_str(src_contents: str, *, mode: FileMode) -> FileContent:
 712     """Reformat a string and return new contents.
 713
 714     `mode` determines formatting options, such as how many characters per line are
 715     allowed.
 716     """
 717     src_node = lib2to3_parse(src_contents.lstrip(), mode.target_versions)
 718     dst_contents = []
 719     future_imports = get_future_imports(src_node)
 720     if mode.target_versions:
 721         versions = mode.target_versions
 722     else:
 723         versions = detect_target_versions(src_node)
 724     normalize_fmt_off(src_node)
 725     lines = LineGenerator(
 726         remove_u_prefix="unicode_literals" in future_imports
 727         or supports_feature(versions, Feature.UNICODE_LITERALS),
 728         is_pyi=mode.is_pyi,
 729         normalize_strings=mode.string_normalization,
 730     )
 731     elt = EmptyLineTracker(is_pyi=mode.is_pyi)
 732     empty_line = Line()
 733     after = 0
 734     split_line_features = {
 735         feature
 736         for feature in {Feature.TRAILING_COMMA_IN_CALL, Feature.TRAILING_COMMA_IN_DEF}
 737         if supports_feature(versions, feature)
 738     }
 739     for current_line in lines.visit(src_node):
 740         for _ in range(after):
 741             dst_contents.append(str(empty_line))
 742         before, after = elt.maybe_empty_lines(current_line)
 743         for _ in range(before):
 744             dst_contents.append(str(empty_line))
 745         for line in split_line(
 746             current_line, line_length=mode.line_length, features=split_line_features
 747         ):
 748             dst_contents.append(str(line))
 749     return "".join(dst_contents)
 750
 751
 752 def decode_bytes(src: bytes) -> Tuple[FileContent, Encoding, NewLine]:
 753     """Return a tuple of (decoded_contents, encoding, newline).
 754
 755     `newline` is either CRLF or LF but `decoded_contents` is decoded with
 756     universal newlines (i.e. only contains LF).
 757     """
 758     srcbuf = io.BytesIO(src)
 759     encoding, lines = tokenize.detect_encoding(srcbuf.readline)
 760     if not lines:
 761         return "", encoding, "\n"
 762
 763     newline = "\r\n" if b"\r\n" == lines[0][-2:] else "\n"
 764     srcbuf.seek(0)
 765     with io.TextIOWrapper(srcbuf, encoding) as tiow:
 766         return tiow.read(), encoding, newline
 767
 768
 769 def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]:
 770     if not target_versions:
 771         # No target_version specified, so try all grammars.
 772         return [
 773             # Python 3.7+
 774             pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords,
 775             # Python 3.0-3.6
 776             pygram.python_grammar_no_print_statement_no_exec_statement,
 777             # Python 2.7 with future print_function import
 778             pygram.python_grammar_no_print_statement,
 779             # Python 2.7
 780             pygram.python_grammar,
 781         ]
 782     elif all(version.is_python2() for version in target_versions):
 783         # Python 2-only code, so try Python 2 grammars.
 784         return [
 785             # Python 2.7 with future print_function import
 786             pygram.python_grammar_no_print_statement,
 787             # Python 2.7
 788             pygram.python_grammar,
 789         ]
 790     else:
 791         # Python 3-compatible code, so only try Python 3 grammar.
 792         grammars = []
 793         # If we have to parse both, try to parse async as a keyword first
 794         if not supports_feature(target_versions, Feature.ASYNC_IDENTIFIERS):
 795             # Python 3.7+
 796             grammars.append(
 797                 pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords  # noqa: B950
 798             )
 799         if not supports_feature(target_versions, Feature.ASYNC_KEYWORDS):
 800             # Python 3.0-3.6
 801             grammars.append(pygram.python_grammar_no_print_statement_no_exec_statement)
 802         # At least one of the above branches must have been taken, because every Python
 803         # version has exactly one of the two 'ASYNC_*' flags
 804         return grammars
 805
 806
 807 def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -> Node:
 808     """Given a string with source, return the lib2to3 Node."""
 809     if src_txt[-1:] != "\n":
 810         src_txt += "\n"
 811
 812     for grammar in get_grammars(set(target_versions)):
 813         drv = driver.Driver(grammar, pytree.convert)
 814         try:
 815             result = drv.parse_string(src_txt, True)
 816             break
 817
 818         except ParseError as pe:
 819             lineno, column = pe.context[1]
 820             lines = src_txt.splitlines()
 821             try:
 822                 faulty_line = lines[lineno - 1]
 823             except IndexError:
 824                 faulty_line = "<line number missing in source>"
 825             exc = InvalidInput(f"Cannot parse: {lineno}:{column}: {faulty_line}")
 826     else:
 827         raise exc from None
 828
 829     if isinstance(result, Leaf):
 830         result = Node(syms.file_input, [result])
 831     return result
 832
 833
 834 def lib2to3_unparse(node: Node) -> str:
 835     """Given a lib2to3 node, return its string representation."""
 836     code = str(node)
 837     return code
 838
 839
 840 T = TypeVar("T")
 841
 842
 843 class Visitor(Generic[T]):
 844     """Basic lib2to3 visitor that yields things of type `T` on `visit()`."""
 845
 846     def visit(self, node: LN) -> Iterator[T]:
 847         """Main method to visit `node` and its children.
 848
 849         It tries to find a `visit_*()` method for the given `node.type`, like
 850         `visit_simple_stmt` for Node objects or `visit_INDENT` for Leaf objects.
 851         If no dedicated `visit_*()` method is found, chooses `visit_default()`
 852         instead.
 853
 854         Then yields objects of type `T` from the selected visitor.
 855         """
 856         if node.type < 256:
 857             name = token.tok_name[node.type]
 858         else:
 859             name = type_repr(node.type)
 860         yield from getattr(self, f"visit_{name}", self.visit_default)(node)
 861
 862     def visit_default(self, node: LN) -> Iterator[T]:
 863         """Default `visit_*()` implementation. Recurses to children of `node`."""
 864         if isinstance(node, Node):
 865             for child in node.children:
 866                 yield from self.visit(child)
 867
 868
 869 @dataclass
 870 class DebugVisitor(Visitor[T]):
 871     tree_depth: int = 0
 872
 873     def visit_default(self, node: LN) -> Iterator[T]:
 874         indent = " " * (2 * self.tree_depth)
 875         if isinstance(node, Node):
 876             _type = type_repr(node.type)
 877             out(f"{indent}{_type}", fg="yellow")
 878             self.tree_depth += 1
 879             for child in node.children:
 880                 yield from self.visit(child)
 881
 882             self.tree_depth -= 1
 883             out(f"{indent}/{_type}", fg="yellow", bold=False)
 884         else:
 885             _type = token.tok_name.get(node.type, str(node.type))
 886             out(f"{indent}{_type}", fg="blue", nl=False)
 887             if node.prefix:
 888                 # We don't have to handle prefixes for `Node` objects since
 889                 # that delegates to the first child anyway.
 890                 out(f" {node.prefix!r}", fg="green", bold=False, nl=False)
 891             out(f" {node.value!r}", fg="blue", bold=False)
 892
 893     @classmethod
 894     def show(cls, code: Union[str, Leaf, Node]) -> None:
 895         """Pretty-print the lib2to3 AST of a given string of `code`.
 896
 897         Convenience method for debugging.
 898         """
 899         v: DebugVisitor[None] = DebugVisitor()
 900         if isinstance(code, str):
 901             code = lib2to3_parse(code)
 902         list(v.visit(code))
 903
 904
 905 WHITESPACE = {token.DEDENT, token.INDENT, token.NEWLINE}
 906 STATEMENT = {
 907     syms.if_stmt,
 908     syms.while_stmt,
 909     syms.for_stmt,
 910     syms.try_stmt,
 911     syms.except_clause,
 912     syms.with_stmt,
 913     syms.funcdef,
 914     syms.classdef,
 915 }
 916 STANDALONE_COMMENT = 153
 917 token.tok_name[STANDALONE_COMMENT] = "STANDALONE_COMMENT"
 918 LOGIC_OPERATORS = {"and", "or"}
 919 COMPARATORS = {
 920     token.LESS,
 921     token.GREATER,
 922     token.EQEQUAL,
 923     token.NOTEQUAL,
 924     token.LESSEQUAL,
 925     token.GREATEREQUAL,
 926 }
 927 MATH_OPERATORS = {
 928     token.VBAR,
 929     token.CIRCUMFLEX,
 930     token.AMPER,
 931     token.LEFTSHIFT,
 932     token.RIGHTSHIFT,
 933     token.PLUS,
 934     token.MINUS,
 935     token.STAR,
 936     token.SLASH,
 937     token.DOUBLESLASH,
 938     token.PERCENT,
 939     token.AT,
 940     token.TILDE,
 941     token.DOUBLESTAR,
 942 }
 943 STARS = {token.STAR, token.DOUBLESTAR}
 944 VARARGS_SPECIALS = STARS | {token.SLASH}
 945 VARARGS_PARENTS = {
 946     syms.arglist,
 947     syms.argument,  # double star in arglist
 948     syms.trailer,  # single argument to call
 949     syms.typedargslist,
 950     syms.varargslist,  # lambdas
 951 }
 952 UNPACKING_PARENTS = {
 953     syms.atom,  # single element of a list or set literal
 954     syms.dictsetmaker,
 955     syms.listmaker,
 956     syms.testlist_gexp,
 957     syms.testlist_star_expr,
 958 }
 959 TEST_DESCENDANTS = {
 960     syms.test,
 961     syms.lambdef,
 962     syms.or_test,
 963     syms.and_test,
 964     syms.not_test,
 965     syms.comparison,
 966     syms.star_expr,
 967     syms.expr,
 968     syms.xor_expr,
 969     syms.and_expr,
 970     syms.shift_expr,
 971     syms.arith_expr,
 972     syms.trailer,
 973     syms.term,
 974     syms.power,
 975 }
 976 ASSIGNMENTS = {
 977     "=",
 978     "+=",
 979     "-=",
 980     "*=",
 981     "@=",
 982     "/=",
 983     "%=",
 984     "&=",
 985     "|=",
 986     "^=",
 987     "<<=",
 988     ">>=",
 989     "**=",
 990     "//=",
 991 }
 992 COMPREHENSION_PRIORITY = 20
 993 COMMA_PRIORITY = 18
 994 TERNARY_PRIORITY = 16
 995 LOGIC_PRIORITY = 14
 996 STRING_PRIORITY = 12
 997 COMPARATOR_PRIORITY = 10
 998 MATH_PRIORITIES = {
 999     token.VBAR: 9,
1000     token.CIRCUMFLEX: 8,
1001     token.AMPER: 7,
1002     token.LEFTSHIFT: 6,
1003     token.RIGHTSHIFT: 6,
1004     token.PLUS: 5,
1005     token.MINUS: 5,
1006     token.STAR: 4,
1007     token.SLASH: 4,
1008     token.DOUBLESLASH: 4,
1009     token.PERCENT: 4,
1010     token.AT: 4,
1011     token.TILDE: 3,
1012     token.DOUBLESTAR: 2,
1013 }
1014 DOT_PRIORITY = 1
1015
1016
1017 @dataclass
1018 class BracketTracker:
1019     """Keeps track of brackets on a line."""
1020
1021     depth: int = 0
1022     bracket_match: Dict[Tuple[Depth, NodeType], Leaf] = Factory(dict)
1023     delimiters: Dict[LeafID, Priority] = Factory(dict)
1024     previous: Optional[Leaf] = None
1025     _for_loop_depths: List[int] = Factory(list)
1026     _lambda_argument_depths: List[int] = Factory(list)
1027
1028     def mark(self, leaf: Leaf) -> None:
1029         """Mark `leaf` with bracket-related metadata. Keep track of delimiters.
1030
1031         All leaves receive an int `bracket_depth` field that stores how deep
1032         within brackets a given leaf is. 0 means there are no enclosing brackets
1033         that started on this line.
1034
1035         If a leaf is itself a closing bracket, it receives an `opening_bracket`
1036         field that it forms a pair with. This is a one-directional link to
1037         avoid reference cycles.
1038
1039         If a leaf is a delimiter (a token on which Black can split the line if
1040         needed) and it's on depth 0, its `id()` is stored in the tracker's
1041         `delimiters` field.
1042         """
1043         if leaf.type == token.COMMENT:
1044             return
1045
1046         self.maybe_decrement_after_for_loop_variable(leaf)
1047         self.maybe_decrement_after_lambda_arguments(leaf)
1048         if leaf.type in CLOSING_BRACKETS:
1049             self.depth -= 1
1050             opening_bracket = self.bracket_match.pop((self.depth, leaf.type))
1051             leaf.opening_bracket = opening_bracket
1052         leaf.bracket_depth = self.depth
1053         if self.depth == 0:
1054             delim = is_split_before_delimiter(leaf, self.previous)
1055             if delim and self.previous is not None:
1056                 self.delimiters[id(self.previous)] = delim
1057             else:
1058                 delim = is_split_after_delimiter(leaf, self.previous)
1059                 if delim:
1060                     self.delimiters[id(leaf)] = delim
1061         if leaf.type in OPENING_BRACKETS:
1062             self.bracket_match[self.depth, BRACKET[leaf.type]] = leaf
1063             self.depth += 1
1064         self.previous = leaf
1065         self.maybe_increment_lambda_arguments(leaf)
1066         self.maybe_increment_for_loop_variable(leaf)
1067
1068     def any_open_brackets(self) -> bool:
1069         """Return True if there is an yet unmatched open bracket on the line."""
1070         return bool(self.bracket_match)
1071
1072     def max_delimiter_priority(self, exclude: Iterable[LeafID] = ()) -> Priority:
1073         """Return the highest priority of a delimiter found on the line.
1074
1075         Values are consistent with what `is_split_*_delimiter()` return.
1076         Raises ValueError on no delimiters.
1077         """
1078         return max(v for k, v in self.delimiters.items() if k not in exclude)
1079
1080     def delimiter_count_with_priority(self, priority: Priority = 0) -> int:
1081         """Return the number of delimiters with the given `priority`.
1082
1083         If no `priority` is passed, defaults to max priority on the line.
1084         """
1085         if not self.delimiters:
1086             return 0
1087
1088         priority = priority or self.max_delimiter_priority()
1089         return sum(1 for p in self.delimiters.values() if p == priority)
1090
1091     def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool:
1092         """In a for loop, or comprehension, the variables are often unpacks.
1093
1094         To avoid splitting on the comma in this situation, increase the depth of
1095         tokens between `for` and `in`.
1096         """
1097         if leaf.type == token.NAME and leaf.value == "for":
1098             self.depth += 1
1099             self._for_loop_depths.append(self.depth)
1100             return True
1101
1102         return False
1103
1104     def maybe_decrement_after_for_loop_variable(self, leaf: Leaf) -> bool:
1105         """See `maybe_increment_for_loop_variable` above for explanation."""
1106         if (
1107             self._for_loop_depths
1108             and self._for_loop_depths[-1] == self.depth
1109             and leaf.type == token.NAME
1110             and leaf.value == "in"
1111         ):
1112             self.depth -= 1
1113             self._for_loop_depths.pop()
1114             return True
1115
1116         return False
1117
1118     def maybe_increment_lambda_arguments(self, leaf: Leaf) -> bool:
1119         """In a lambda expression, there might be more than one argument.
1120
1121         To avoid splitting on the comma in this situation, increase the depth of
1122         tokens between `lambda` and `:`.
1123         """
1124         if leaf.type == token.NAME and leaf.value == "lambda":
1125             self.depth += 1
1126             self._lambda_argument_depths.append(self.depth)
1127             return True
1128
1129         return False
1130
1131     def maybe_decrement_after_lambda_arguments(self, leaf: Leaf) -> bool:
1132         """See `maybe_increment_lambda_arguments` above for explanation."""
1133         if (
1134             self._lambda_argument_depths
1135             and self._lambda_argument_depths[-1] == self.depth
1136             and leaf.type == token.COLON
1137         ):
1138             self.depth -= 1
1139             self._lambda_argument_depths.pop()
1140             return True
1141
1142         return False
1143
1144     def get_open_lsqb(self) -> Optional[Leaf]:
1145         """Return the most recent opening square bracket (if any)."""
1146         return self.bracket_match.get((self.depth - 1, token.RSQB))
1147
1148
1149 @dataclass
1150 class Line:
1151     """Holds leaves and comments. Can be printed with `str(line)`."""
1152
1153     depth: int = 0
1154     leaves: List[Leaf] = Factory(list)
1155     comments: Dict[LeafID, List[Leaf]] = Factory(dict)  # keys ordered like `leaves`
1156     bracket_tracker: BracketTracker = Factory(BracketTracker)
1157     inside_brackets: bool = False
1158     should_explode: bool = False
1159
1160     def append(self, leaf: Leaf, preformatted: bool = False) -> None:
1161         """Add a new `leaf` to the end of the line.
1162
1163         Unless `preformatted` is True, the `leaf` will receive a new consistent
1164         whitespace prefix and metadata applied by :class:`BracketTracker`.
1165         Trailing commas are maybe removed, unpacked for loop variables are
1166         demoted from being delimiters.
1167
1168         Inline comments are put aside.
1169         """
1170         has_value = leaf.type in BRACKETS or bool(leaf.value.strip())
1171         if not has_value:
1172             return
1173
1174         if token.COLON == leaf.type and self.is_class_paren_empty:
1175             del self.leaves[-2:]
1176         if self.leaves and not preformatted:
1177             # Note: at this point leaf.prefix should be empty except for
1178             # imports, for which we only preserve newlines.
1179             leaf.prefix += whitespace(
1180                 leaf, complex_subscript=self.is_complex_subscript(leaf)
1181             )
1182         if self.inside_brackets or not preformatted:
1183             self.bracket_tracker.mark(leaf)
1184             self.maybe_remove_trailing_comma(leaf)
1185         if not self.append_comment(leaf):
1186             self.leaves.append(leaf)
1187
1188     def append_safe(self, leaf: Leaf, preformatted: bool = False) -> None:
1189         """Like :func:`append()` but disallow invalid standalone comment structure.
1190
1191         Raises ValueError when any `leaf` is appended after a standalone comment
1192         or when a standalone comment is not the first leaf on the line.
1193         """
1194         if self.bracket_tracker.depth == 0:
1195             if self.is_comment:
1196                 raise ValueError("cannot append to standalone comments")
1197
1198             if self.leaves and leaf.type == STANDALONE_COMMENT:
1199                 raise ValueError(
1200                     "cannot append standalone comments to a populated line"
1201                 )
1202
1203         self.append(leaf, preformatted=preformatted)
1204
1205     @property
1206     def is_comment(self) -> bool:
1207         """Is this line a standalone comment?"""
1208         return len(self.leaves) == 1 and self.leaves[0].type == STANDALONE_COMMENT
1209
1210     @property
1211     def is_decorator(self) -> bool:
1212         """Is this line a decorator?"""
1213         return bool(self) and self.leaves[0].type == token.AT
1214
1215     @property
1216     def is_import(self) -> bool:
1217         """Is this an import line?"""
1218         return bool(self) and is_import(self.leaves[0])
1219
1220     @property
1221     def is_class(self) -> bool:
1222         """Is this line a class definition?"""
1223         return (
1224             bool(self)
1225             and self.leaves[0].type == token.NAME
1226             and self.leaves[0].value == "class"
1227         )
1228
1229     @property
1230     def is_stub_class(self) -> bool:
1231         """Is this line a class definition with a body consisting only of "..."?"""
1232         return self.is_class and self.leaves[-3:] == [
1233             Leaf(token.DOT, ".") for _ in range(3)
1234         ]
1235
1236     @property
1237     def is_def(self) -> bool:
1238         """Is this a function definition? (Also returns True for async defs.)"""
1239         try:
1240             first_leaf = self.leaves[0]
1241         except IndexError:
1242             return False
1243
1244         try:
1245             second_leaf: Optional[Leaf] = self.leaves[1]
1246         except IndexError:
1247             second_leaf = None
1248         return (first_leaf.type == token.NAME and first_leaf.value == "def") or (
1249             first_leaf.type == token.ASYNC
1250             and second_leaf is not None
1251             and second_leaf.type == token.NAME
1252             and second_leaf.value == "def"
1253         )
1254
1255     @property
1256     def is_class_paren_empty(self) -> bool:
1257         """Is this a class with no base classes but using parentheses?
1258
1259         Those are unnecessary and should be removed.
1260         """
1261         return (
1262             bool(self)
1263             and len(self.leaves) == 4
1264             and self.is_class
1265             and self.leaves[2].type == token.LPAR
1266             and self.leaves[2].value == "("
1267             and self.leaves[3].type == token.RPAR
1268             and self.leaves[3].value == ")"
1269         )
1270
1271     @property
1272     def is_triple_quoted_string(self) -> bool:
1273         """Is the line a triple quoted string?"""
1274         return (
1275             bool(self)
1276             and self.leaves[0].type == token.STRING
1277             and self.leaves[0].value.startswith(('"""', "'''"))
1278         )
1279
1280     def contains_standalone_comments(self, depth_limit: int = sys.maxsize) -> bool:
1281         """If so, needs to be split before emitting."""
1282         for leaf in self.leaves:
1283             if leaf.type == STANDALONE_COMMENT:
1284                 if leaf.bracket_depth <= depth_limit:
1285                     return True
1286         return False
1287
1288     def contains_inner_type_comments(self) -> bool:
1289         ignored_ids = set()
1290         try:
1291             last_leaf = self.leaves[-1]
1292             ignored_ids.add(id(last_leaf))
1293             if last_leaf.type == token.COMMA or (
1294                 last_leaf.type == token.RPAR and not last_leaf.value
1295             ):
1296                 # When trailing commas or optional parens are inserted by Black for
1297                 # consistency, comments after the previous last element are not moved
1298                 # (they don't have to, rendering will still be correct).  So we ignore
1299                 # trailing commas and invisible.
1300                 last_leaf = self.leaves[-2]
1301                 ignored_ids.add(id(last_leaf))
1302         except IndexError:
1303             return False
1304
1305         for leaf_id, comments in self.comments.items():
1306             if leaf_id in ignored_ids:
1307                 continue
1308
1309             for comment in comments:
1310                 if is_type_comment(comment):
1311                     return True
1312
1313         return False
1314
1315     def contains_multiline_strings(self) -> bool:
1316         for leaf in self.leaves:
1317             if is_multiline_string(leaf):
1318                 return True
1319
1320         return False
1321
1322     def maybe_remove_trailing_comma(self, closing: Leaf) -> bool:
1323         """Remove trailing comma if there is one and it's safe."""
1324         if not (
1325             self.leaves
1326             and self.leaves[-1].type == token.COMMA
1327             and closing.type in CLOSING_BRACKETS
1328         ):
1329             return False
1330
1331         if closing.type == token.RBRACE:
1332             self.remove_trailing_comma()
1333             return True
1334
1335         if closing.type == token.RSQB:
1336             comma = self.leaves[-1]
1337             if comma.parent and comma.parent.type == syms.listmaker:
1338                 self.remove_trailing_comma()
1339                 return True
1340
1341         # For parens let's check if it's safe to remove the comma.
1342         # Imports are always safe.
1343         if self.is_import:
1344             self.remove_trailing_comma()
1345             return True
1346
1347         # Otherwise, if the trailing one is the only one, we might mistakenly
1348         # change a tuple into a different type by removing the comma.
1349         depth = closing.bracket_depth + 1
1350         commas = 0
1351         opening = closing.opening_bracket
1352         for _opening_index, leaf in enumerate(self.leaves):
1353             if leaf is opening:
1354                 break
1355
1356         else:
1357             return False
1358
1359         for leaf in self.leaves[_opening_index + 1 :]:
1360             if leaf is closing:
1361                 break
1362
1363             bracket_depth = leaf.bracket_depth
1364             if bracket_depth == depth and leaf.type == token.COMMA:
1365                 commas += 1
1366                 if leaf.parent and leaf.parent.type in {
1367                     syms.arglist,
1368                     syms.typedargslist,
1369                 }:
1370                     commas += 1
1371                     break
1372
1373         if commas > 1:
1374             self.remove_trailing_comma()
1375             return True
1376
1377         return False
1378
1379     def append_comment(self, comment: Leaf) -> bool:
1380         """Add an inline or standalone comment to the line."""
1381         if (
1382             comment.type == STANDALONE_COMMENT
1383             and self.bracket_tracker.any_open_brackets()
1384         ):
1385             comment.prefix = ""
1386             return False
1387
1388         if comment.type != token.COMMENT:
1389             return False
1390
1391         if not self.leaves:
1392             comment.type = STANDALONE_COMMENT
1393             comment.prefix = ""
1394             return False
1395
1396         last_leaf = self.leaves[-1]
1397         if (
1398             last_leaf.type == token.RPAR
1399             and not last_leaf.value
1400             and last_leaf.parent
1401             and len(list(last_leaf.parent.leaves())) <= 3
1402             and not is_type_comment(comment)
1403         ):
1404             # Comments on an optional parens wrapping a single leaf should belong to
1405             # the wrapped node except if it's a type comment. Pinning the comment like
1406             # this avoids unstable formatting caused by comment migration.
1407             if len(self.leaves) < 2:
1408                 comment.type = STANDALONE_COMMENT
1409                 comment.prefix = ""
1410                 return False
1411             last_leaf = self.leaves[-2]
1412         self.comments.setdefault(id(last_leaf), []).append(comment)
1413         return True
1414
1415     def comments_after(self, leaf: Leaf) -> List[Leaf]:
1416         """Generate comments that should appear directly after `leaf`."""
1417         return self.comments.get(id(leaf), [])
1418
1419     def remove_trailing_comma(self) -> None:
1420         """Remove the trailing comma and moves the comments attached to it."""
1421         trailing_comma = self.leaves.pop()
1422         trailing_comma_comments = self.comments.pop(id(trailing_comma), [])
1423         self.comments.setdefault(id(self.leaves[-1]), []).extend(
1424             trailing_comma_comments
1425         )
1426
1427     def is_complex_subscript(self, leaf: Leaf) -> bool:
1428         """Return True iff `leaf` is part of a slice with non-trivial exprs."""
1429         open_lsqb = self.bracket_tracker.get_open_lsqb()
1430         if open_lsqb is None:
1431             return False
1432
1433         subscript_start = open_lsqb.next_sibling
1434
1435         if isinstance(subscript_start, Node):
1436             if subscript_start.type == syms.listmaker:
1437                 return False
1438
1439             if subscript_start.type == syms.subscriptlist:
1440                 subscript_start = child_towards(subscript_start, leaf)
1441         return subscript_start is not None and any(
1442             n.type in TEST_DESCENDANTS for n in subscript_start.pre_order()
1443         )
1444
1445     def __str__(self) -> str:
1446         """Render the line."""
1447         if not self:
1448             return "\n"
1449
1450         indent = "    " * self.depth
1451         leaves = iter(self.leaves)
1452         first = next(leaves)
1453         res = f"{first.prefix}{indent}{first.value}"
1454         for leaf in leaves:
1455             res += str(leaf)
1456         for comment in itertools.chain.from_iterable(self.comments.values()):
1457             res += str(comment)
1458         return res + "\n"
1459
1460     def __bool__(self) -> bool:
1461         """Return True if the line has leaves or comments."""
1462         return bool(self.leaves or self.comments)
1463
1464
1465 @dataclass
1466 class EmptyLineTracker:
1467     """Provides a stateful method that returns the number of potential extra
1468     empty lines needed before and after the currently processed line.
1469
1470     Note: this tracker works on lines that haven't been split yet.  It assumes
1471     the prefix of the first leaf consists of optional newlines.  Those newlines
1472     are consumed by `maybe_empty_lines()` and included in the computation.
1473     """
1474
1475     is_pyi: bool = False
1476     previous_line: Optional[Line] = None
1477     previous_after: int = 0
1478     previous_defs: List[int] = Factory(list)
1479
1480     def maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
1481         """Return the number of extra empty lines before and after the `current_line`.
1482
1483         This is for separating `def`, `async def` and `class` with extra empty
1484         lines (two on module-level).
1485         """
1486         before, after = self._maybe_empty_lines(current_line)
1487         before = (
1488             # Black should not insert empty lines at the beginning
1489             # of the file
1490             0
1491             if self.previous_line is None
1492             else before - self.previous_after
1493         )
1494         self.previous_after = after
1495         self.previous_line = current_line
1496         return before, after
1497
1498     def _maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
1499         max_allowed = 1
1500         if current_line.depth == 0:
1501             max_allowed = 1 if self.is_pyi else 2
1502         if current_line.leaves:
1503             # Consume the first leaf's extra newlines.
1504             first_leaf = current_line.leaves[0]
1505             before = first_leaf.prefix.count("\n")
1506             before = min(before, max_allowed)
1507             first_leaf.prefix = ""
1508         else:
1509             before = 0
1510         depth = current_line.depth
1511         while self.previous_defs and self.previous_defs[-1] >= depth:
1512             self.previous_defs.pop()
1513             if self.is_pyi:
1514                 before = 0 if depth else 1
1515             else:
1516                 before = 1 if depth else 2
1517         if current_line.is_decorator or current_line.is_def or current_line.is_class:
1518             return self._maybe_empty_lines_for_class_or_def(current_line, before)
1519
1520         if (
1521             self.previous_line
1522             and self.previous_line.is_import
1523             and not current_line.is_import
1524             and depth == self.previous_line.depth
1525         ):
1526             return (before or 1), 0
1527
1528         if (
1529             self.previous_line
1530             and self.previous_line.is_class
1531             and current_line.is_triple_quoted_string
1532         ):
1533             return before, 1
1534
1535         return before, 0
1536
1537     def _maybe_empty_lines_for_class_or_def(
1538         self, current_line: Line, before: int
1539     ) -> Tuple[int, int]:
1540         if not current_line.is_decorator:
1541             self.previous_defs.append(current_line.depth)
1542         if self.previous_line is None:
1543             # Don't insert empty lines before the first line in the file.
1544             return 0, 0
1545
1546         if self.previous_line.is_decorator:
1547             return 0, 0
1548
1549         if self.previous_line.depth < current_line.depth and (
1550             self.previous_line.is_class or self.previous_line.is_def
1551         ):
1552             return 0, 0
1553
1554         if (
1555             self.previous_line.is_comment
1556             and self.previous_line.depth == current_line.depth
1557             and before == 0
1558         ):
1559             return 0, 0
1560
1561         if self.is_pyi:
1562             if self.previous_line.depth > current_line.depth:
1563                 newlines = 1
1564             elif current_line.is_class or self.previous_line.is_class:
1565                 if current_line.is_stub_class and self.previous_line.is_stub_class:
1566                     # No blank line between classes with an empty body
1567                     newlines = 0
1568                 else:
1569                     newlines = 1
1570             elif current_line.is_def and not self.previous_line.is_def:
1571                 # Blank line between a block of functions and a block of non-functions
1572                 newlines = 1
1573             else:
1574                 newlines = 0
1575         else:
1576             newlines = 2
1577         if current_line.depth and newlines:
1578             newlines -= 1
1579         return newlines, 0
1580
1581
1582 @dataclass
1583 class LineGenerator(Visitor[Line]):
1584     """Generates reformatted Line objects.  Empty lines are not emitted.
1585
1586     Note: destroys the tree it's visiting by mutating prefixes of its leaves
1587     in ways that will no longer stringify to valid Python code on the tree.
1588     """
1589
1590     is_pyi: bool = False
1591     normalize_strings: bool = True
1592     current_line: Line = Factory(Line)
1593     remove_u_prefix: bool = False
1594
1595     def line(self, indent: int = 0) -> Iterator[Line]:
1596         """Generate a line.
1597
1598         If the line is empty, only emit if it makes sense.
1599         If the line is too long, split it first and then generate.
1600
1601         If any lines were generated, set up a new current_line.
1602         """
1603         if not self.current_line:
1604             self.current_line.depth += indent
1605             return  # Line is empty, don't emit. Creating a new one unnecessary.
1606
1607         complete_line = self.current_line
1608         self.current_line = Line(depth=complete_line.depth + indent)
1609         yield complete_line
1610
1611     def visit_default(self, node: LN) -> Iterator[Line]:
1612         """Default `visit_*()` implementation. Recurses to children of `node`."""
1613         if isinstance(node, Leaf):
1614             any_open_brackets = self.current_line.bracket_tracker.any_open_brackets()
1615             for comment in generate_comments(node):
1616                 if any_open_brackets:
1617                     # any comment within brackets is subject to splitting
1618                     self.current_line.append(comment)
1619                 elif comment.type == token.COMMENT:
1620                     # regular trailing comment
1621                     self.current_line.append(comment)
1622                     yield from self.line()
1623
1624                 else:
1625                     # regular standalone comment
1626                     yield from self.line()
1627
1628                     self.current_line.append(comment)
1629                     yield from self.line()
1630
1631             normalize_prefix(node, inside_brackets=any_open_brackets)
1632             if self.normalize_strings and node.type == token.STRING:
1633                 normalize_string_prefix(node, remove_u_prefix=self.remove_u_prefix)
1634                 normalize_string_quotes(node)
1635             if node.type == token.NUMBER:
1636                 normalize_numeric_literal(node)
1637             if node.type not in WHITESPACE:
1638                 self.current_line.append(node)
1639         yield from super().visit_default(node)
1640
1641     def visit_atom(self, node: Node) -> Iterator[Line]:
1642         # Always make parentheses invisible around a single node, because it should
1643         # not be needed (except in the case of yield, where removing the parentheses
1644         # produces a SyntaxError).
1645         if (
1646             len(node.children) == 3
1647             and isinstance(node.children[0], Leaf)
1648             and node.children[0].type == token.LPAR
1649             and isinstance(node.children[2], Leaf)
1650             and node.children[2].type == token.RPAR
1651             and isinstance(node.children[1], Leaf)
1652             and not (
1653                 node.children[1].type == token.NAME
1654                 and node.children[1].value == "yield"
1655             )
1656         ):
1657             node.children[0].value = ""
1658             node.children[2].value = ""
1659         yield from super().visit_default(node)
1660
1661     def visit_factor(self, node: Node) -> Iterator[Line]:
1662         """Force parentheses between a unary op and a binary power:
1663
1664         -2 ** 8 -> -(2 ** 8)
1665         """
1666         child = node.children[1]
1667         if child.type == syms.power and len(child.children) == 3:
1668             lpar = Leaf(token.LPAR, "(")
1669             rpar = Leaf(token.RPAR, ")")
1670             index = child.remove() or 0
1671             node.insert_child(index, Node(syms.atom, [lpar, child, rpar]))
1672         yield from self.visit_default(node)
1673
1674     def visit_INDENT(self, node: Node) -> Iterator[Line]:
1675         """Increase indentation level, maybe yield a line."""
1676         # In blib2to3 INDENT never holds comments.
1677         yield from self.line(+1)
1678         yield from self.visit_default(node)
1679
1680     def visit_DEDENT(self, node: Node) -> Iterator[Line]:
1681         """Decrease indentation level, maybe yield a line."""
1682         # The current line might still wait for trailing comments.  At DEDENT time
1683         # there won't be any (they would be prefixes on the preceding NEWLINE).
1684         # Emit the line then.
1685         yield from self.line()
1686
1687         # While DEDENT has no value, its prefix may contain standalone comments
1688         # that belong to the current indentation level.  Get 'em.
1689         yield from self.visit_default(node)
1690
1691         # Finally, emit the dedent.
1692         yield from self.line(-1)
1693
1694     def visit_stmt(
1695         self, node: Node, keywords: Set[str], parens: Set[str]
1696     ) -> Iterator[Line]:
1697         """Visit a statement.
1698
1699         This implementation is shared for `if`, `while`, `for`, `try`, `except`,
1700         `def`, `with`, `class`, `assert` and assignments.
1701
1702         The relevant Python language `keywords` for a given statement will be
1703         NAME leaves within it. This methods puts those on a separate line.
1704
1705         `parens` holds a set of string leaf values immediately after which
1706         invisible parens should be put.
1707         """
1708         normalize_invisible_parens(node, parens_after=parens)
1709         for child in node.children:
1710             if child.type == token.NAME and child.value in keywords:  # type: ignore
1711                 yield from self.line()
1712
1713             yield from self.visit(child)
1714
1715     def visit_suite(self, node: Node) -> Iterator[Line]:
1716         """Visit a suite."""
1717         if self.is_pyi and is_stub_suite(node):
1718             yield from self.visit(node.children[2])
1719         else:
1720             yield from self.visit_default(node)
1721
1722     def visit_simple_stmt(self, node: Node) -> Iterator[Line]:
1723         """Visit a statement without nested statements."""
1724         is_suite_like = node.parent and node.parent.type in STATEMENT
1725         if is_suite_like:
1726             if self.is_pyi and is_stub_body(node):
1727                 yield from self.visit_default(node)
1728             else:
1729                 yield from self.line(+1)
1730                 yield from self.visit_default(node)
1731                 yield from self.line(-1)
1732
1733         else:
1734             if not self.is_pyi or not node.parent or not is_stub_suite(node.parent):
1735                 yield from self.line()
1736             yield from self.visit_default(node)
1737
1738     def visit_async_stmt(self, node: Node) -> Iterator[Line]:
1739         """Visit `async def`, `async for`, `async with`."""
1740         yield from self.line()
1741
1742         children = iter(node.children)
1743         for child in children:
1744             yield from self.visit(child)
1745
1746             if child.type == token.ASYNC:
1747                 break
1748
1749         internal_stmt = next(children)
1750         for child in internal_stmt.children:
1751             yield from self.visit(child)
1752
1753     def visit_decorators(self, node: Node) -> Iterator[Line]:
1754         """Visit decorators."""
1755         for child in node.children:
1756             yield from self.line()
1757             yield from self.visit(child)
1758
1759     def visit_SEMI(self, leaf: Leaf) -> Iterator[Line]:
1760         """Remove a semicolon and put the other statement on a separate line."""
1761         yield from self.line()
1762
1763     def visit_ENDMARKER(self, leaf: Leaf) -> Iterator[Line]:
1764         """End of file. Process outstanding comments and end with a newline."""
1765         yield from self.visit_default(leaf)
1766         yield from self.line()
1767
1768     def visit_STANDALONE_COMMENT(self, leaf: Leaf) -> Iterator[Line]:
1769         if not self.current_line.bracket_tracker.any_open_brackets():
1770             yield from self.line()
1771         yield from self.visit_default(leaf)
1772
1773     def __attrs_post_init__(self) -> None:
1774         """You are in a twisty little maze of passages."""
1775         v = self.visit_stmt
1776         Ø: Set[str] = set()
1777         self.visit_assert_stmt = partial(v, keywords={"assert"}, parens={"assert", ","})
1778         self.visit_if_stmt = partial(
1779             v, keywords={"if", "else", "elif"}, parens={"if", "elif"}
1780         )
1781         self.visit_while_stmt = partial(v, keywords={"while", "else"}, parens={"while"})
1782         self.visit_for_stmt = partial(v, keywords={"for", "else"}, parens={"for", "in"})
1783         self.visit_try_stmt = partial(
1784             v, keywords={"try", "except", "else", "finally"}, parens=Ø
1785         )
1786         self.visit_except_clause = partial(v, keywords={"except"}, parens=Ø)
1787         self.visit_with_stmt = partial(v, keywords={"with"}, parens=Ø)
1788         self.visit_funcdef = partial(v, keywords={"def"}, parens=Ø)
1789         self.visit_classdef = partial(v, keywords={"class"}, parens=Ø)
1790         self.visit_expr_stmt = partial(v, keywords=Ø, parens=ASSIGNMENTS)
1791         self.visit_return_stmt = partial(v, keywords={"return"}, parens={"return"})
1792         self.visit_import_from = partial(v, keywords=Ø, parens={"import"})
1793         self.visit_del_stmt = partial(v, keywords=Ø, parens={"del"})
1794         self.visit_async_funcdef = self.visit_async_stmt
1795         self.visit_decorated = self.visit_decorators
1796
1797
1798 IMPLICIT_TUPLE = {syms.testlist, syms.testlist_star_expr, syms.exprlist}
1799 BRACKET = {token.LPAR: token.RPAR, token.LSQB: token.RSQB, token.LBRACE: token.RBRACE}
1800 OPENING_BRACKETS = set(BRACKET.keys())
1801 CLOSING_BRACKETS = set(BRACKET.values())
1802 BRACKETS = OPENING_BRACKETS | CLOSING_BRACKETS
1803 ALWAYS_NO_SPACE = CLOSING_BRACKETS | {token.COMMA, STANDALONE_COMMENT}
1804
1805
1806 def whitespace(leaf: Leaf, *, complex_subscript: bool) -> str:  # noqa: C901
1807     """Return whitespace prefix if needed for the given `leaf`.
1808
1809     `complex_subscript` signals whether the given leaf is part of a subscription
1810     which has non-trivial arguments, like arithmetic expressions or function calls.
1811     """
1812     NO = ""
1813     SPACE = " "
1814     DOUBLESPACE = "  "
1815     t = leaf.type
1816     p = leaf.parent
1817     v = leaf.value
1818     if t in ALWAYS_NO_SPACE:
1819         return NO
1820
1821     if t == token.COMMENT:
1822         return DOUBLESPACE
1823
1824     assert p is not None, f"INTERNAL ERROR: hand-made leaf without parent: {leaf!r}"
1825     if t == token.COLON and p.type not in {
1826         syms.subscript,
1827         syms.subscriptlist,
1828         syms.sliceop,
1829     }:
1830         return NO
1831
1832     prev = leaf.prev_sibling
1833     if not prev:
1834         prevp = preceding_leaf(p)
1835         if not prevp or prevp.type in OPENING_BRACKETS:
1836             return NO
1837
1838         if t == token.COLON:
1839             if prevp.type == token.COLON:
1840                 return NO
1841
1842             elif prevp.type != token.COMMA and not complex_subscript:
1843                 return NO
1844
1845             return SPACE
1846
1847         if prevp.type == token.EQUAL:
1848             if prevp.parent:
1849                 if prevp.parent.type in {
1850                     syms.arglist,
1851                     syms.argument,
1852                     syms.parameters,
1853                     syms.varargslist,
1854                 }:
1855                     return NO
1856
1857                 elif prevp.parent.type == syms.typedargslist:
1858                     # A bit hacky: if the equal sign has whitespace, it means we
1859                     # previously found it's a typed argument.  So, we're using
1860                     # that, too.
1861                     return prevp.prefix
1862
1863         elif prevp.type in VARARGS_SPECIALS:
1864             if is_vararg(prevp, within=VARARGS_PARENTS | UNPACKING_PARENTS):
1865                 return NO
1866
1867         elif prevp.type == token.COLON:
1868             if prevp.parent and prevp.parent.type in {syms.subscript, syms.sliceop}:
1869                 return SPACE if complex_subscript else NO
1870
1871         elif (
1872             prevp.parent
1873             and prevp.parent.type == syms.factor
1874             and prevp.type in MATH_OPERATORS
1875         ):
1876             return NO
1877
1878         elif (
1879             prevp.type == token.RIGHTSHIFT
1880             and prevp.parent
1881             and prevp.parent.type == syms.shift_expr
1882             and prevp.prev_sibling
1883             and prevp.prev_sibling.type == token.NAME
1884             and prevp.prev_sibling.value == "print"  # type: ignore
1885         ):
1886             # Python 2 print chevron
1887             return NO
1888
1889     elif prev.type in OPENING_BRACKETS:
1890         return NO
1891
1892     if p.type in {syms.parameters, syms.arglist}:
1893         # untyped function signatures or calls
1894         if not prev or prev.type != token.COMMA:
1895             return NO
1896
1897     elif p.type == syms.varargslist:
1898         # lambdas
1899         if prev and prev.type != token.COMMA:
1900             return NO
1901
1902     elif p.type == syms.typedargslist:
1903         # typed function signatures
1904         if not prev:
1905             return NO
1906
1907         if t == token.EQUAL:
1908             if prev.type != syms.tname:
1909                 return NO
1910
1911         elif prev.type == token.EQUAL:
1912             # A bit hacky: if the equal sign has whitespace, it means we
1913             # previously found it's a typed argument.  So, we're using that, too.
1914             return prev.prefix
1915
1916         elif prev.type != token.COMMA:
1917             return NO
1918
1919     elif p.type == syms.tname:
1920         # type names
1921         if not prev:
1922             prevp = preceding_leaf(p)
1923             if not prevp or prevp.type != token.COMMA:
1924                 return NO
1925
1926     elif p.type == syms.trailer:
1927         # attributes and calls
1928         if t == token.LPAR or t == token.RPAR:
1929             return NO
1930
1931         if not prev:
1932             if t == token.DOT:
1933                 prevp = preceding_leaf(p)
1934                 if not prevp or prevp.type != token.NUMBER:
1935                     return NO
1936
1937             elif t == token.LSQB:
1938                 return NO
1939
1940         elif prev.type != token.COMMA:
1941             return NO
1942
1943     elif p.type == syms.argument:
1944         # single argument
1945         if t == token.EQUAL:
1946             return NO
1947
1948         if not prev:
1949             prevp = preceding_leaf(p)
1950             if not prevp or prevp.type == token.LPAR:
1951                 return NO
1952
1953         elif prev.type in {token.EQUAL} | VARARGS_SPECIALS:
1954             return NO
1955
1956     elif p.type == syms.decorator:
1957         # decorators
1958         return NO
1959
1960     elif p.type == syms.dotted_name:
1961         if prev:
1962             return NO
1963
1964         prevp = preceding_leaf(p)
1965         if not prevp or prevp.type == token.AT or prevp.type == token.DOT:
1966             return NO
1967
1968     elif p.type == syms.classdef:
1969         if t == token.LPAR:
1970             return NO
1971
1972         if prev and prev.type == token.LPAR:
1973             return NO
1974
1975     elif p.type in {syms.subscript, syms.sliceop}:
1976         # indexing
1977         if not prev:
1978             assert p.parent is not None, "subscripts are always parented"
1979             if p.parent.type == syms.subscriptlist:
1980                 return SPACE
1981
1982             return NO
1983
1984         elif not complex_subscript:
1985             return NO
1986
1987     elif p.type == syms.atom:
1988         if prev and t == token.DOT:
1989             # dots, but not the first one.
1990             return NO
1991
1992     elif p.type == syms.dictsetmaker:
1993         # dict unpacking
1994         if prev and prev.type == token.DOUBLESTAR:
1995             return NO
1996
1997     elif p.type in {syms.factor, syms.star_expr}:
1998         # unary ops
1999         if not prev:
2000             prevp = preceding_leaf(p)
2001             if not prevp or prevp.type in OPENING_BRACKETS:
2002                 return NO
2003
2004             prevp_parent = prevp.parent
2005             assert prevp_parent is not None
2006             if prevp.type == token.COLON and prevp_parent.type in {
2007                 syms.subscript,
2008                 syms.sliceop,
2009             }:
2010                 return NO
2011
2012             elif prevp.type == token.EQUAL and prevp_parent.type == syms.argument:
2013                 return NO
2014
2015         elif t in {token.NAME, token.NUMBER, token.STRING}:
2016             return NO
2017
2018     elif p.type == syms.import_from:
2019         if t == token.DOT:
2020             if prev and prev.type == token.DOT:
2021                 return NO
2022
2023         elif t == token.NAME:
2024             if v == "import":
2025                 return SPACE
2026
2027             if prev and prev.type == token.DOT:
2028                 return NO
2029
2030     elif p.type == syms.sliceop:
2031         return NO
2032
2033     return SPACE
2034
2035
2036 def preceding_leaf(node: Optional[LN]) -> Optional[Leaf]:
2037     """Return the first leaf that precedes `node`, if any."""
2038     while node:
2039         res = node.prev_sibling
2040         if res:
2041             if isinstance(res, Leaf):
2042                 return res
2043
2044             try:
2045                 return list(res.leaves())[-1]
2046
2047             except IndexError:
2048                 return None
2049
2050         node = node.parent
2051     return None
2052
2053
2054 def child_towards(ancestor: Node, descendant: LN) -> Optional[LN]:
2055     """Return the child of `ancestor` that contains `descendant`."""
2056     node: Optional[LN] = descendant
2057     while node and node.parent != ancestor:
2058         node = node.parent
2059     return node
2060
2061
2062 def container_of(leaf: Leaf) -> LN:
2063     """Return `leaf` or one of its ancestors that is the topmost container of it.
2064
2065     By "container" we mean a node where `leaf` is the very first child.
2066     """
2067     same_prefix = leaf.prefix
2068     container: LN = leaf
2069     while container:
2070         parent = container.parent
2071         if parent is None:
2072             break
2073
2074         if parent.children[0].prefix != same_prefix:
2075             break
2076
2077         if parent.type == syms.file_input:
2078             break
2079
2080         if parent.prev_sibling is not None and parent.prev_sibling.type in BRACKETS:
2081             break
2082
2083         container = parent
2084     return container
2085
2086
2087 def is_split_after_delimiter(leaf: Leaf, previous: Optional[Leaf] = None) -> Priority:
2088     """Return the priority of the `leaf` delimiter, given a line break after it.
2089
2090     The delimiter priorities returned here are from those delimiters that would
2091     cause a line break after themselves.
2092
2093     Higher numbers are higher priority.
2094     """
2095     if leaf.type == token.COMMA:
2096         return COMMA_PRIORITY
2097
2098     return 0
2099
2100
2101 def is_split_before_delimiter(leaf: Leaf, previous: Optional[Leaf] = None) -> Priority:
2102     """Return the priority of the `leaf` delimiter, given a line break before it.
2103
2104     The delimiter priorities returned here are from those delimiters that would
2105     cause a line break before themselves.
2106
2107     Higher numbers are higher priority.
2108     """
2109     if is_vararg(leaf, within=VARARGS_PARENTS | UNPACKING_PARENTS):
2110         # * and ** might also be MATH_OPERATORS but in this case they are not.
2111         # Don't treat them as a delimiter.
2112         return 0
2113
2114     if (
2115         leaf.type == token.DOT
2116         and leaf.parent
2117         and leaf.parent.type not in {syms.import_from, syms.dotted_name}
2118         and (previous is None or previous.type in CLOSING_BRACKETS)
2119     ):
2120         return DOT_PRIORITY
2121
2122     if (
2123         leaf.type in MATH_OPERATORS
2124         and leaf.parent
2125         and leaf.parent.type not in {syms.factor, syms.star_expr}
2126     ):
2127         return MATH_PRIORITIES[leaf.type]
2128
2129     if leaf.type in COMPARATORS:
2130         return COMPARATOR_PRIORITY
2131
2132     if (
2133         leaf.type == token.STRING
2134         and previous is not None
2135         and previous.type == token.STRING
2136     ):
2137         return STRING_PRIORITY
2138
2139     if leaf.type not in {token.NAME, token.ASYNC}:
2140         return 0
2141
2142     if (
2143         leaf.value == "for"
2144         and leaf.parent
2145         and leaf.parent.type in {syms.comp_for, syms.old_comp_for}
2146         or leaf.type == token.ASYNC
2147     ):
2148         if (
2149             not isinstance(leaf.prev_sibling, Leaf)
2150             or leaf.prev_sibling.value != "async"
2151         ):
2152             return COMPREHENSION_PRIORITY
2153
2154     if (
2155         leaf.value == "if"
2156         and leaf.parent
2157         and leaf.parent.type in {syms.comp_if, syms.old_comp_if}
2158     ):
2159         return COMPREHENSION_PRIORITY
2160
2161     if leaf.value in {"if", "else"} and leaf.parent and leaf.parent.type == syms.test:
2162         return TERNARY_PRIORITY
2163
2164     if leaf.value == "is":
2165         return COMPARATOR_PRIORITY
2166
2167     if (
2168         leaf.value == "in"
2169         and leaf.parent
2170         and leaf.parent.type in {syms.comp_op, syms.comparison}
2171         and not (
2172             previous is not None
2173             and previous.type == token.NAME
2174             and previous.value == "not"
2175         )
2176     ):
2177         return COMPARATOR_PRIORITY
2178
2179     if (
2180         leaf.value == "not"
2181         and leaf.parent
2182         and leaf.parent.type == syms.comp_op
2183         and not (
2184             previous is not None
2185             and previous.type == token.NAME
2186             and previous.value == "is"
2187         )
2188     ):
2189         return COMPARATOR_PRIORITY
2190
2191     if leaf.value in LOGIC_OPERATORS and leaf.parent:
2192         return LOGIC_PRIORITY
2193
2194     return 0
2195
2196
2197 FMT_OFF = {"# fmt: off", "# fmt:off", "# yapf: disable"}
2198 FMT_ON = {"# fmt: on", "# fmt:on", "# yapf: enable"}
2199
2200
2201 def generate_comments(leaf: LN) -> Iterator[Leaf]:
2202     """Clean the prefix of the `leaf` and generate comments from it, if any.
2203
2204     Comments in lib2to3 are shoved into the whitespace prefix.  This happens
2205     in `pgen2/driver.py:Driver.parse_tokens()`.  This was a brilliant implementation
2206     move because it does away with modifying the grammar to include all the
2207     possible places in which comments can be placed.
2208
2209     The sad consequence for us though is that comments don't "belong" anywhere.
2210     This is why this function generates simple parentless Leaf objects for
2211     comments.  We simply don't know what the correct parent should be.
2212
2213     No matter though, we can live without this.  We really only need to
2214     differentiate between inline and standalone comments.  The latter don't
2215     share the line with any code.
2216
2217     Inline comments are emitted as regular token.COMMENT leaves.  Standalone
2218     are emitted with a fake STANDALONE_COMMENT token identifier.
2219     """
2220     for pc in list_comments(leaf.prefix, is_endmarker=leaf.type == token.ENDMARKER):
2221         yield Leaf(pc.type, pc.value, prefix="\n" * pc.newlines)
2222
2223
2224 @dataclass
2225 class ProtoComment:
2226     """Describes a piece of syntax that is a comment.
2227
2228     It's not a :class:`blib2to3.pytree.Leaf` so that:
2229
2230     * it can be cached (`Leaf` objects should not be reused more than once as
2231       they store their lineno, column, prefix, and parent information);
2232     * `newlines` and `consumed` fields are kept separate from the `value`. This
2233       simplifies handling of special marker comments like ``# fmt: off/on``.
2234     """
2235
2236     type: int  # token.COMMENT or STANDALONE_COMMENT
2237     value: str  # content of the comment
2238     newlines: int  # how many newlines before the comment
2239     consumed: int  # how many characters of the original leaf's prefix did we consume
2240
2241
2242 @lru_cache(maxsize=4096)
2243 def list_comments(prefix: str, *, is_endmarker: bool) -> List[ProtoComment]:
2244     """Return a list of :class:`ProtoComment` objects parsed from the given `prefix`."""
2245     result: List[ProtoComment] = []
2246     if not prefix or "#" not in prefix:
2247         return result
2248
2249     consumed = 0
2250     nlines = 0
2251     ignored_lines = 0
2252     for index, line in enumerate(prefix.split("\n")):
2253         consumed += len(line) + 1  # adding the length of the split '\n'
2254         line = line.lstrip()
2255         if not line:
2256             nlines += 1
2257         if not line.startswith("#"):
2258             # Escaped newlines outside of a comment are not really newlines at
2259             # all. We treat a single-line comment following an escaped newline
2260             # as a simple trailing comment.
2261             if line.endswith("\\"):
2262                 ignored_lines += 1
2263             continue
2264
2265         if index == ignored_lines and not is_endmarker:
2266             comment_type = token.COMMENT  # simple trailing comment
2267         else:
2268             comment_type = STANDALONE_COMMENT
2269         comment = make_comment(line)
2270         result.append(
2271             ProtoComment(
2272                 type=comment_type, value=comment, newlines=nlines, consumed=consumed
2273             )
2274         )
2275         nlines = 0
2276     return result
2277
2278
2279 def make_comment(content: str) -> str:
2280     """Return a consistently formatted comment from the given `content` string.
2281
2282     All comments (except for "##", "#!", "#:", '#'", "#%%") should have a single
2283     space between the hash sign and the content.
2284
2285     If `content` didn't start with a hash sign, one is provided.
2286     """
2287     content = content.rstrip()
2288     if not content:
2289         return "#"
2290
2291     if content[0] == "#":
2292         content = content[1:]
2293     if content and content[0] not in " !:#'%":
2294         content = " " + content
2295     return "#" + content
2296
2297
2298 def split_line(
2299     line: Line,
2300     line_length: int,
2301     inner: bool = False,
2302     features: Collection[Feature] = (),
2303 ) -> Iterator[Line]:
2304     """Split a `line` into potentially many lines.
2305
2306     They should fit in the allotted `line_length` but might not be able to.
2307     `inner` signifies that there were a pair of brackets somewhere around the
2308     current `line`, possibly transitively. This means we can fallback to splitting
2309     by delimiters if the LHS/RHS don't yield any results.
2310
2311     `features` are syntactical features that may be used in the output.
2312     """
2313     if line.is_comment:
2314         yield line
2315         return
2316
2317     line_str = str(line).strip("\n")
2318
2319     if (
2320         not line.contains_inner_type_comments()
2321         and not line.should_explode
2322         and is_line_short_enough(line, line_length=line_length, line_str=line_str)
2323     ):
2324         yield line
2325         return
2326
2327     split_funcs: List[SplitFunc]
2328     if line.is_def:
2329         split_funcs = [left_hand_split]
2330     else:
2331
2332         def rhs(line: Line, features: Collection[Feature]) -> Iterator[Line]:
2333             for omit in generate_trailers_to_omit(line, line_length):
2334                 lines = list(right_hand_split(line, line_length, features, omit=omit))
2335                 if is_line_short_enough(lines[0], line_length=line_length):
2336                     yield from lines
2337                     return
2338
2339             # All splits failed, best effort split with no omits.
2340             # This mostly happens to multiline strings that are by definition
2341             # reported as not fitting a single line.
2342             yield from right_hand_split(line, line_length, features=features)
2343
2344         if line.inside_brackets:
2345             split_funcs = [delimiter_split, standalone_comment_split, rhs]
2346         else:
2347             split_funcs = [rhs]
2348     for split_func in split_funcs:
2349         # We are accumulating lines in `result` because we might want to abort
2350         # mission and return the original line in the end, or attempt a different
2351         # split altogether.
2352         result: List[Line] = []
2353         try:
2354             for l in split_func(line, features):
2355                 if str(l).strip("\n") == line_str:
2356                     raise CannotSplit("Split function returned an unchanged result")
2357
2358                 result.extend(
2359                     split_line(
2360                         l, line_length=line_length, inner=True, features=features
2361                     )
2362                 )
2363         except CannotSplit:
2364             continue
2365
2366         else:
2367             yield from result
2368             break
2369
2370     else:
2371         yield line
2372
2373
2374 def left_hand_split(line: Line, features: Collection[Feature] = ()) -> Iterator[Line]:
2375     """Split line into many lines, starting with the first matching bracket pair.
2376
2377     Note: this usually looks weird, only use this for function definitions.
2378     Prefer RHS otherwise.  This is why this function is not symmetrical with
2379     :func:`right_hand_split` which also handles optional parentheses.
2380     """
2381     tail_leaves: List[Leaf] = []
2382     body_leaves: List[Leaf] = []
2383     head_leaves: List[Leaf] = []
2384     current_leaves = head_leaves
2385     matching_bracket = None
2386     for leaf in line.leaves:
2387         if (
2388             current_leaves is body_leaves
2389             and leaf.type in CLOSING_BRACKETS
2390             and leaf.opening_bracket is matching_bracket
2391         ):
2392             current_leaves = tail_leaves if body_leaves else head_leaves
2393         current_leaves.append(leaf)
2394         if current_leaves is head_leaves:
2395             if leaf.type in OPENING_BRACKETS:
2396                 matching_bracket = leaf
2397                 current_leaves = body_leaves
2398     if not matching_bracket:
2399         raise CannotSplit("No brackets found")
2400
2401     head = bracket_split_build_line(head_leaves, line, matching_bracket)
2402     body = bracket_split_build_line(body_leaves, line, matching_bracket, is_body=True)
2403     tail = bracket_split_build_line(tail_leaves, line, matching_bracket)
2404     bracket_split_succeeded_or_raise(head, body, tail)
2405     for result in (head, body, tail):
2406         if result:
2407             yield result
2408
2409
2410 def right_hand_split(
2411     line: Line,
2412     line_length: int,
2413     features: Collection[Feature] = (),
2414     omit: Collection[LeafID] = (),
2415 ) -> Iterator[Line]:
2416     """Split line into many lines, starting with the last matching bracket pair.
2417
2418     If the split was by optional parentheses, attempt splitting without them, too.
2419     `omit` is a collection of closing bracket IDs that shouldn't be considered for
2420     this split.
2421
2422     Note: running this function modifies `bracket_depth` on the leaves of `line`.
2423     """
2424     tail_leaves: List[Leaf] = []
2425     body_leaves: List[Leaf] = []
2426     head_leaves: List[Leaf] = []
2427     current_leaves = tail_leaves
2428     opening_bracket = None
2429     closing_bracket = None
2430     for leaf in reversed(line.leaves):
2431         if current_leaves is body_leaves:
2432             if leaf is opening_bracket:
2433                 current_leaves = head_leaves if body_leaves else tail_leaves
2434         current_leaves.append(leaf)
2435         if current_leaves is tail_leaves:
2436             if leaf.type in CLOSING_BRACKETS and id(leaf) not in omit:
2437                 opening_bracket = leaf.opening_bracket
2438                 closing_bracket = leaf
2439                 current_leaves = body_leaves
2440     if not (opening_bracket and closing_bracket and head_leaves):
2441         # If there is no opening or closing_bracket that means the split failed and
2442         # all content is in the tail.  Otherwise, if `head_leaves` are empty, it means
2443         # the matching `opening_bracket` wasn't available on `line` anymore.
2444         raise CannotSplit("No brackets found")
2445
2446     tail_leaves.reverse()
2447     body_leaves.reverse()
2448     head_leaves.reverse()
2449     head = bracket_split_build_line(head_leaves, line, opening_bracket)
2450     body = bracket_split_build_line(body_leaves, line, opening_bracket, is_body=True)
2451     tail = bracket_split_build_line(tail_leaves, line, opening_bracket)
2452     bracket_split_succeeded_or_raise(head, body, tail)
2453     if (
2454         # the body shouldn't be exploded
2455         not body.should_explode
2456         # the opening bracket is an optional paren
2457         and opening_bracket.type == token.LPAR
2458         and not opening_bracket.value
2459         # the closing bracket is an optional paren
2460         and closing_bracket.type == token.RPAR
2461         and not closing_bracket.value
2462         # it's not an import (optional parens are the only thing we can split on
2463         # in this case; attempting a split without them is a waste of time)
2464         and not line.is_import
2465         # there are no standalone comments in the body
2466         and not body.contains_standalone_comments(0)
2467         # and we can actually remove the parens
2468         and can_omit_invisible_parens(body, line_length)
2469     ):
2470         omit = {id(closing_bracket), *omit}
2471         try:
2472             yield from right_hand_split(line, line_length, features=features, omit=omit)
2473             return
2474
2475         except CannotSplit:
2476             if not (
2477                 can_be_split(body)
2478                 or is_line_short_enough(body, line_length=line_length)
2479             ):
2480                 raise CannotSplit(
2481                     "Splitting failed, body is still too long and can't be split."
2482                 )
2483
2484             elif head.contains_multiline_strings() or tail.contains_multiline_strings():
2485                 raise CannotSplit(
2486                     "The current optional pair of parentheses is bound to fail to "
2487                     "satisfy the splitting algorithm because the head or the tail "
2488                     "contains multiline strings which by definition never fit one "
2489                     "line."
2490                 )
2491
2492     ensure_visible(opening_bracket)
2493     ensure_visible(closing_bracket)
2494     for result in (head, body, tail):
2495         if result:
2496             yield result
2497
2498
2499 def bracket_split_succeeded_or_raise(head: Line, body: Line, tail: Line) -> None:
2500     """Raise :exc:`CannotSplit` if the last left- or right-hand split failed.
2501
2502     Do nothing otherwise.
2503
2504     A left- or right-hand split is based on a pair of brackets. Content before
2505     (and including) the opening bracket is left on one line, content inside the
2506     brackets is put on a separate line, and finally content starting with and
2507     following the closing bracket is put on a separate line.
2508
2509     Those are called `head`, `body`, and `tail`, respectively. If the split
2510     produced the same line (all content in `head`) or ended up with an empty `body`
2511     and the `tail` is just the closing bracket, then it's considered failed.
2512     """
2513     tail_len = len(str(tail).strip())
2514     if not body:
2515         if tail_len == 0:
2516             raise CannotSplit("Splitting brackets produced the same line")
2517
2518         elif tail_len < 3:
2519             raise CannotSplit(
2520                 f"Splitting brackets on an empty body to save "
2521                 f"{tail_len} characters is not worth it"
2522             )
2523
2524
2525 def bracket_split_build_line(
2526     leaves: List[Leaf], original: Line, opening_bracket: Leaf, *, is_body: bool = False
2527 ) -> Line:
2528     """Return a new line with given `leaves` and respective comments from `original`.
2529
2530     If `is_body` is True, the result line is one-indented inside brackets and as such
2531     has its first leaf's prefix normalized and a trailing comma added when expected.
2532     """
2533     result = Line(depth=original.depth)
2534     if is_body:
2535         result.inside_brackets = True
2536         result.depth += 1
2537         if leaves:
2538             # Since body is a new indent level, remove spurious leading whitespace.
2539             normalize_prefix(leaves[0], inside_brackets=True)
2540             # Ensure a trailing comma for imports and standalone function arguments, but
2541             # be careful not to add one after any comments.
2542             no_commas = original.is_def and not any(
2543                 l.type == token.COMMA for l in leaves
2544             )
2545
2546             if original.is_import or no_commas:
2547                 for i in range(len(leaves) - 1, -1, -1):
2548                     if leaves[i].type == STANDALONE_COMMENT:
2549                         continue
2550                     elif leaves[i].type == token.COMMA:
2551                         break
2552                     else:
2553                         leaves.insert(i + 1, Leaf(token.COMMA, ","))
2554                         break
2555     # Populate the line
2556     for leaf in leaves:
2557         result.append(leaf, preformatted=True)
2558         for comment_after in original.comments_after(leaf):
2559             result.append(comment_after, preformatted=True)
2560     if is_body:
2561         result.should_explode = should_explode(result, opening_bracket)
2562     return result
2563
2564
2565 def dont_increase_indentation(split_func: SplitFunc) -> SplitFunc:
2566     """Normalize prefix of the first leaf in every line returned by `split_func`.
2567
2568     This is a decorator over relevant split functions.
2569     """
2570
2571     @wraps(split_func)
2572     def split_wrapper(line: Line, features: Collection[Feature] = ()) -> Iterator[Line]:
2573         for l in split_func(line, features):
2574             normalize_prefix(l.leaves[0], inside_brackets=True)
2575             yield l
2576
2577     return split_wrapper
2578
2579
2580 @dont_increase_indentation
2581 def delimiter_split(line: Line, features: Collection[Feature] = ()) -> Iterator[Line]:
2582     """Split according to delimiters of the highest priority.
2583
2584     If the appropriate Features are given, the split will add trailing commas
2585     also in function signatures and calls that contain `*` and `**`.
2586     """
2587     try:
2588         last_leaf = line.leaves[-1]
2589     except IndexError:
2590         raise CannotSplit("Line empty")
2591
2592     bt = line.bracket_tracker
2593     try:
2594         delimiter_priority = bt.max_delimiter_priority(exclude={id(last_leaf)})
2595     except ValueError:
2596         raise CannotSplit("No delimiters found")
2597
2598     if delimiter_priority == DOT_PRIORITY:
2599         if bt.delimiter_count_with_priority(delimiter_priority) == 1:
2600             raise CannotSplit("Splitting a single attribute from its owner looks wrong")
2601
2602     current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2603     lowest_depth = sys.maxsize
2604     trailing_comma_safe = True
2605
2606     def append_to_line(leaf: Leaf) -> Iterator[Line]:
2607         """Append `leaf` to current line or to new line if appending impossible."""
2608         nonlocal current_line
2609         try:
2610             current_line.append_safe(leaf, preformatted=True)
2611         except ValueError:
2612             yield current_line
2613
2614             current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2615             current_line.append(leaf)
2616
2617     for leaf in line.leaves:
2618         yield from append_to_line(leaf)
2619
2620         for comment_after in line.comments_after(leaf):
2621             yield from append_to_line(comment_after)
2622
2623         lowest_depth = min(lowest_depth, leaf.bracket_depth)
2624         if leaf.bracket_depth == lowest_depth:
2625             if is_vararg(leaf, within={syms.typedargslist}):
2626                 trailing_comma_safe = (
2627                     trailing_comma_safe and Feature.TRAILING_COMMA_IN_DEF in features
2628                 )
2629             elif is_vararg(leaf, within={syms.arglist, syms.argument}):
2630                 trailing_comma_safe = (
2631                     trailing_comma_safe and Feature.TRAILING_COMMA_IN_CALL in features
2632                 )
2633
2634         leaf_priority = bt.delimiters.get(id(leaf))
2635         if leaf_priority == delimiter_priority:
2636             yield current_line
2637
2638             current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2639     if current_line:
2640         if (
2641             trailing_comma_safe
2642             and delimiter_priority == COMMA_PRIORITY
2643             and current_line.leaves[-1].type != token.COMMA
2644             and current_line.leaves[-1].type != STANDALONE_COMMENT
2645         ):
2646             current_line.append(Leaf(token.COMMA, ","))
2647         yield current_line
2648
2649
2650 @dont_increase_indentation
2651 def standalone_comment_split(
2652     line: Line, features: Collection[Feature] = ()
2653 ) -> Iterator[Line]:
2654     """Split standalone comments from the rest of the line."""
2655     if not line.contains_standalone_comments(0):
2656         raise CannotSplit("Line does not have any standalone comments")
2657
2658     current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2659
2660     def append_to_line(leaf: Leaf) -> Iterator[Line]:
2661         """Append `leaf` to current line or to new line if appending impossible."""
2662         nonlocal current_line
2663         try:
2664             current_line.append_safe(leaf, preformatted=True)
2665         except ValueError:
2666             yield current_line
2667
2668             current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2669             current_line.append(leaf)
2670
2671     for leaf in line.leaves:
2672         yield from append_to_line(leaf)
2673
2674         for comment_after in line.comments_after(leaf):
2675             yield from append_to_line(comment_after)
2676
2677     if current_line:
2678         yield current_line
2679
2680
2681 def is_import(leaf: Leaf) -> bool:
2682     """Return True if the given leaf starts an import statement."""
2683     p = leaf.parent
2684     t = leaf.type
2685     v = leaf.value
2686     return bool(
2687         t == token.NAME
2688         and (
2689             (v == "import" and p and p.type == syms.import_name)
2690             or (v == "from" and p and p.type == syms.import_from)
2691         )
2692     )
2693
2694
2695 def is_type_comment(leaf: Leaf) -> bool:
2696     """Return True if the given leaf is a special comment.
2697     Only returns true for type comments for now."""
2698     t = leaf.type
2699     v = leaf.value
2700     return t in {token.COMMENT, t == STANDALONE_COMMENT} and v.startswith("# type:")
2701
2702
2703 def normalize_prefix(leaf: Leaf, *, inside_brackets: bool) -> None:
2704     """Leave existing extra newlines if not `inside_brackets`. Remove everything
2705     else.
2706
2707     Note: don't use backslashes for formatting or you'll lose your voting rights.
2708     """
2709     if not inside_brackets:
2710         spl = leaf.prefix.split("#")
2711         if "\\" not in spl[0]:
2712             nl_count = spl[-1].count("\n")
2713             if len(spl) > 1:
2714                 nl_count -= 1
2715             leaf.prefix = "\n" * nl_count
2716             return
2717
2718     leaf.prefix = ""
2719
2720
2721 def normalize_string_prefix(leaf: Leaf, remove_u_prefix: bool = False) -> None:
2722     """Make all string prefixes lowercase.
2723
2724     If remove_u_prefix is given, also removes any u prefix from the string.
2725
2726     Note: Mutates its argument.
2727     """
2728     match = re.match(r"^([furbFURB]*)(.*)$", leaf.value, re.DOTALL)
2729     assert match is not None, f"failed to match string {leaf.value!r}"
2730     orig_prefix = match.group(1)
2731     new_prefix = orig_prefix.lower()
2732     if remove_u_prefix:
2733         new_prefix = new_prefix.replace("u", "")
2734     leaf.value = f"{new_prefix}{match.group(2)}"
2735
2736
2737 def normalize_string_quotes(leaf: Leaf) -> None:
2738     """Prefer double quotes but only if it doesn't cause more escaping.
2739
2740     Adds or removes backslashes as appropriate. Doesn't parse and fix
2741     strings nested in f-strings (yet).
2742
2743     Note: Mutates its argument.
2744     """
2745     value = leaf.value.lstrip("furbFURB")
2746     if value[:3] == '"""':
2747         return
2748
2749     elif value[:3] == "'''":
2750         orig_quote = "'''"
2751         new_quote = '"""'
2752     elif value[0] == '"':
2753         orig_quote = '"'
2754         new_quote = "'"
2755     else:
2756         orig_quote = "'"
2757         new_quote = '"'
2758     first_quote_pos = leaf.value.find(orig_quote)
2759     if first_quote_pos == -1:
2760         return  # There's an internal error
2761
2762     prefix = leaf.value[:first_quote_pos]
2763     unescaped_new_quote = re.compile(rf"(([^\\]|^)(\\\\)*){new_quote}")
2764     escaped_new_quote = re.compile(rf"([^\\]|^)\\((?:\\\\)*){new_quote}")
2765     escaped_orig_quote = re.compile(rf"([^\\]|^)\\((?:\\\\)*){orig_quote}")
2766     body = leaf.value[first_quote_pos + len(orig_quote) : -len(orig_quote)]
2767     if "r" in prefix.casefold():
2768         if unescaped_new_quote.search(body):
2769             # There's at least one unescaped new_quote in this raw string
2770             # so converting is impossible
2771             return
2772
2773         # Do not introduce or remove backslashes in raw strings
2774         new_body = body
2775     else:
2776         # remove unnecessary escapes
2777         new_body = sub_twice(escaped_new_quote, rf"\1\2{new_quote}", body)
2778         if body != new_body:
2779             # Consider the string without unnecessary escapes as the original
2780             body = new_body
2781             leaf.value = f"{prefix}{orig_quote}{body}{orig_quote}"
2782         new_body = sub_twice(escaped_orig_quote, rf"\1\2{orig_quote}", new_body)
2783         new_body = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_body)
2784     if "f" in prefix.casefold():
2785         matches = re.findall(
2786             r"""
2787             (?:[^{]|^)\{  # start of the string or a non-{ followed by a single {
2788                 ([^{].*?)  # contents of the brackets except if begins with {{
2789             \}(?:[^}]|$)  # A } followed by end of the string or a non-}
2790             """,
2791             new_body,
2792             re.VERBOSE,
2793         )
2794         for m in matches:
2795             if "\\" in str(m):
2796                 # Do not introduce backslashes in interpolated expressions
2797                 return
2798     if new_quote == '"""' and new_body[-1:] == '"':
2799         # edge case:
2800         new_body = new_body[:-1] + '\\"'
2801     orig_escape_count = body.count("\\")
2802     new_escape_count = new_body.count("\\")
2803     if new_escape_count > orig_escape_count:
2804         return  # Do not introduce more escaping
2805
2806     if new_escape_count == orig_escape_count and orig_quote == '"':
2807         return  # Prefer double quotes
2808
2809     leaf.value = f"{prefix}{new_quote}{new_body}{new_quote}"
2810
2811
2812 def normalize_numeric_literal(leaf: Leaf) -> None:
2813     """Normalizes numeric (float, int, and complex) literals.
2814
2815     All letters used in the representation are normalized to lowercase (except
2816     in Python 2 long literals).
2817     """
2818     text = leaf.value.lower()
2819     if text.startswith(("0o", "0b")):
2820         # Leave octal and binary literals alone.
2821         pass
2822     elif text.startswith("0x"):
2823         # Change hex literals to upper case.
2824         before, after = text[:2], text[2:]
2825         text = f"{before}{after.upper()}"
2826     elif "e" in text:
2827         before, after = text.split("e")
2828         sign = ""
2829         if after.startswith("-"):
2830             after = after[1:]
2831             sign = "-"
2832         elif after.startswith("+"):
2833             after = after[1:]
2834         before = format_float_or_int_string(before)
2835         text = f"{before}e{sign}{after}"
2836     elif text.endswith(("j", "l")):
2837         number = text[:-1]
2838         suffix = text[-1]
2839         # Capitalize in "2L" because "l" looks too similar to "1".
2840         if suffix == "l":
2841             suffix = "L"
2842         text = f"{format_float_or_int_string(number)}{suffix}"
2843     else:
2844         text = format_float_or_int_string(text)
2845     leaf.value = text
2846
2847
2848 def format_float_or_int_string(text: str) -> str:
2849     """Formats a float string like "1.0"."""
2850     if "." not in text:
2851         return text
2852
2853     before, after = text.split(".")
2854     return f"{before or 0}.{after or 0}"
2855
2856
2857 def normalize_invisible_parens(node: Node, parens_after: Set[str]) -> None:
2858     """Make existing optional parentheses invisible or create new ones.
2859
2860     `parens_after` is a set of string leaf values immediately after which parens
2861     should be put.
2862
2863     Standardizes on visible parentheses for single-element tuples, and keeps
2864     existing visible parentheses for other tuples and generator expressions.
2865     """
2866     for pc in list_comments(node.prefix, is_endmarker=False):
2867         if pc.value in FMT_OFF:
2868             # This `node` has a prefix with `# fmt: off`, don't mess with parens.
2869             return
2870
2871     check_lpar = False
2872     for index, child in enumerate(list(node.children)):
2873         # Add parentheses around long tuple unpacking in assignments.
2874         if (
2875             index == 0
2876             and isinstance(child, Node)
2877             and child.type == syms.testlist_star_expr
2878         ):
2879             check_lpar = True
2880
2881         if check_lpar:
2882             if is_walrus_assignment(child):
2883                 continue
2884             if child.type == syms.atom:
2885                 if maybe_make_parens_invisible_in_atom(child, parent=node):
2886                     lpar = Leaf(token.LPAR, "")
2887                     rpar = Leaf(token.RPAR, "")
2888                     index = child.remove() or 0
2889                     node.insert_child(index, Node(syms.atom, [lpar, child, rpar]))
2890             elif is_one_tuple(child):
2891                 # wrap child in visible parentheses
2892                 lpar = Leaf(token.LPAR, "(")
2893                 rpar = Leaf(token.RPAR, ")")
2894                 child.remove()
2895                 node.insert_child(index, Node(syms.atom, [lpar, child, rpar]))
2896             elif node.type == syms.import_from:
2897                 # "import from" nodes store parentheses directly as part of
2898                 # the statement
2899                 if child.type == token.LPAR:
2900                     # make parentheses invisible
2901                     child.value = ""  # type: ignore
2902                     node.children[-1].value = ""  # type: ignore
2903                 elif child.type != token.STAR:
2904                     # insert invisible parentheses
2905                     node.insert_child(index, Leaf(token.LPAR, ""))
2906                     node.append_child(Leaf(token.RPAR, ""))
2907                 break
2908
2909             elif not (isinstance(child, Leaf) and is_multiline_string(child)):
2910                 # wrap child in invisible parentheses
2911                 lpar = Leaf(token.LPAR, "")
2912                 rpar = Leaf(token.RPAR, "")
2913                 index = child.remove() or 0
2914                 prefix = child.prefix
2915                 child.prefix = ""
2916                 new_child = Node(syms.atom, [lpar, child, rpar])
2917                 new_child.prefix = prefix
2918                 node.insert_child(index, new_child)
2919
2920         check_lpar = isinstance(child, Leaf) and child.value in parens_after
2921
2922
2923 def normalize_fmt_off(node: Node) -> None:
2924     """Convert content between `# fmt: off`/`# fmt: on` into standalone comments."""
2925     try_again = True
2926     while try_again:
2927         try_again = convert_one_fmt_off_pair(node)
2928
2929
2930 def convert_one_fmt_off_pair(node: Node) -> bool:
2931     """Convert content of a single `# fmt: off`/`# fmt: on` into a standalone comment.
2932
2933     Returns True if a pair was converted.
2934     """
2935     for leaf in node.leaves():
2936         previous_consumed = 0
2937         for comment in list_comments(leaf.prefix, is_endmarker=False):
2938             if comment.value in FMT_OFF:
2939                 # We only want standalone comments. If there's no previous leaf or
2940                 # the previous leaf is indentation, it's a standalone comment in
2941                 # disguise.
2942                 if comment.type != STANDALONE_COMMENT:
2943                     prev = preceding_leaf(leaf)
2944                     if prev and prev.type not in WHITESPACE:
2945                         continue
2946
2947                 ignored_nodes = list(generate_ignored_nodes(leaf))
2948                 if not ignored_nodes:
2949                     continue
2950
2951                 first = ignored_nodes[0]  # Can be a container node with the `leaf`.
2952                 parent = first.parent
2953                 prefix = first.prefix
2954                 first.prefix = prefix[comment.consumed :]
2955                 hidden_value = (
2956                     comment.value + "\n" + "".join(str(n) for n in ignored_nodes)
2957                 )
2958                 if hidden_value.endswith("\n"):
2959                     # That happens when one of the `ignored_nodes` ended with a NEWLINE
2960                     # leaf (possibly followed by a DEDENT).
2961                     hidden_value = hidden_value[:-1]
2962                 first_idx = None
2963                 for ignored in ignored_nodes:
2964                     index = ignored.remove()
2965                     if first_idx is None:
2966                         first_idx = index
2967                 assert parent is not None, "INTERNAL ERROR: fmt: on/off handling (1)"
2968                 assert first_idx is not None, "INTERNAL ERROR: fmt: on/off handling (2)"
2969                 parent.insert_child(
2970                     first_idx,
2971                     Leaf(
2972                         STANDALONE_COMMENT,
2973                         hidden_value,
2974                         prefix=prefix[:previous_consumed] + "\n" * comment.newlines,
2975                     ),
2976                 )
2977                 return True
2978
2979             previous_consumed = comment.consumed
2980
2981     return False
2982
2983
2984 def generate_ignored_nodes(leaf: Leaf) -> Iterator[LN]:
2985     """Starting from the container of `leaf`, generate all leaves until `# fmt: on`.
2986
2987     Stops at the end of the block.
2988     """
2989     container: Optional[LN] = container_of(leaf)
2990     while container is not None and container.type != token.ENDMARKER:
2991         for comment in list_comments(container.prefix, is_endmarker=False):
2992             if comment.value in FMT_ON:
2993                 return
2994
2995         yield container
2996
2997         container = container.next_sibling
2998
2999
3000 def maybe_make_parens_invisible_in_atom(node: LN, parent: LN) -> bool:
3001     """If it's safe, make the parens in the atom `node` invisible, recursively.
3002
3003     Returns whether the node should itself be wrapped in invisible parentheses.
3004
3005     """
3006     if (
3007         node.type != syms.atom
3008         or is_empty_tuple(node)
3009         or is_one_tuple(node)
3010         or (is_yield(node) and parent.type != syms.expr_stmt)
3011         or max_delimiter_priority_in_atom(node) >= COMMA_PRIORITY
3012     ):
3013         return False
3014
3015     first = node.children[0]
3016     last = node.children[-1]
3017     if first.type == token.LPAR and last.type == token.RPAR:
3018         # make parentheses invisible
3019         first.value = ""  # type: ignore
3020         last.value = ""  # type: ignore
3021         maybe_make_parens_invisible_in_atom(node.children[1], parent=parent)
3022         return False
3023
3024     return True
3025
3026
3027 def is_empty_tuple(node: LN) -> bool:
3028     """Return True if `node` holds an empty tuple."""
3029     return (
3030         node.type == syms.atom
3031         and len(node.children) == 2
3032         and node.children[0].type == token.LPAR
3033         and node.children[1].type == token.RPAR
3034     )
3035
3036
3037 def unwrap_singleton_parenthesis(node: LN) -> Optional[LN]:
3038     """Returns `wrapped` if `node` is of the shape ( wrapped ).
3039
3040     Parenthesis can be optional. Returns None otherwise"""
3041     if len(node.children) != 3:
3042         return None
3043     lpar, wrapped, rpar = node.children
3044     if not (lpar.type == token.LPAR and rpar.type == token.RPAR):
3045         return None
3046
3047     return wrapped
3048
3049
3050 def is_one_tuple(node: LN) -> bool:
3051     """Return True if `node` holds a tuple with one element, with or without parens."""
3052     if node.type == syms.atom:
3053         gexp = unwrap_singleton_parenthesis(node)
3054         if gexp is None or gexp.type != syms.testlist_gexp:
3055             return False
3056
3057         return len(gexp.children) == 2 and gexp.children[1].type == token.COMMA
3058
3059     return (
3060         node.type in IMPLICIT_TUPLE
3061         and len(node.children) == 2
3062         and node.children[1].type == token.COMMA
3063     )
3064
3065
3066 def is_walrus_assignment(node: LN) -> bool:
3067     """Return True iff `node` is of the shape ( test := test )"""
3068     inner = unwrap_singleton_parenthesis(node)
3069     return inner is not None and inner.type == syms.namedexpr_test
3070
3071
3072 def is_yield(node: LN) -> bool:
3073     """Return True if `node` holds a `yield` or `yield from` expression."""
3074     if node.type == syms.yield_expr:
3075         return True
3076
3077     if node.type == token.NAME and node.value == "yield":  # type: ignore
3078         return True
3079
3080     if node.type != syms.atom:
3081         return False
3082
3083     if len(node.children) != 3:
3084         return False
3085
3086     lpar, expr, rpar = node.children
3087     if lpar.type == token.LPAR and rpar.type == token.RPAR:
3088         return is_yield(expr)
3089
3090     return False
3091
3092
3093 def is_vararg(leaf: Leaf, within: Set[NodeType]) -> bool:
3094     """Return True if `leaf` is a star or double star in a vararg or kwarg.
3095
3096     If `within` includes VARARGS_PARENTS, this applies to function signatures.
3097     If `within` includes UNPACKING_PARENTS, it applies to right hand-side
3098     extended iterable unpacking (PEP 3132) and additional unpacking
3099     generalizations (PEP 448).
3100     """
3101     if leaf.type not in VARARGS_SPECIALS or not leaf.parent:
3102         return False
3103
3104     p = leaf.parent
3105     if p.type == syms.star_expr:
3106         # Star expressions are also used as assignment targets in extended
3107         # iterable unpacking (PEP 3132).  See what its parent is instead.
3108         if not p.parent:
3109             return False
3110
3111         p = p.parent
3112
3113     return p.type in within
3114
3115
3116 def is_multiline_string(leaf: Leaf) -> bool:
3117     """Return True if `leaf` is a multiline string that actually spans many lines."""
3118     value = leaf.value.lstrip("furbFURB")
3119     return value[:3] in {'"""', "'''"} and "\n" in value
3120
3121
3122 def is_stub_suite(node: Node) -> bool:
3123     """Return True if `node` is a suite with a stub body."""
3124     if (
3125         len(node.children) != 4
3126         or node.children[0].type != token.NEWLINE
3127         or node.children[1].type != token.INDENT
3128         or node.children[3].type != token.DEDENT
3129     ):
3130         return False
3131
3132     return is_stub_body(node.children[2])
3133
3134
3135 def is_stub_body(node: LN) -> bool:
3136     """Return True if `node` is a simple statement containing an ellipsis."""
3137     if not isinstance(node, Node) or node.type != syms.simple_stmt:
3138         return False
3139
3140     if len(node.children) != 2:
3141         return False
3142
3143     child = node.children[0]
3144     return (
3145         child.type == syms.atom
3146         and len(child.children) == 3
3147         and all(leaf == Leaf(token.DOT, ".") for leaf in child.children)
3148     )
3149
3150
3151 def max_delimiter_priority_in_atom(node: LN) -> Priority:
3152     """Return maximum delimiter priority inside `node`.
3153
3154     This is specific to atoms with contents contained in a pair of parentheses.
3155     If `node` isn't an atom or there are no enclosing parentheses, returns 0.
3156     """
3157     if node.type != syms.atom:
3158         return 0
3159
3160     first = node.children[0]
3161     last = node.children[-1]
3162     if not (first.type == token.LPAR and last.type == token.RPAR):
3163         return 0
3164
3165     bt = BracketTracker()
3166     for c in node.children[1:-1]:
3167         if isinstance(c, Leaf):
3168             bt.mark(c)
3169         else:
3170             for leaf in c.leaves():
3171                 bt.mark(leaf)
3172     try:
3173         return bt.max_delimiter_priority()
3174
3175     except ValueError:
3176         return 0
3177
3178
3179 def ensure_visible(leaf: Leaf) -> None:
3180     """Make sure parentheses are visible.
3181
3182     They could be invisible as part of some statements (see
3183     :func:`normalize_invisible_parens` and :func:`visit_import_from`).
3184     """
3185     if leaf.type == token.LPAR:
3186         leaf.value = "("
3187     elif leaf.type == token.RPAR:
3188         leaf.value = ")"
3189
3190
3191 def should_explode(line: Line, opening_bracket: Leaf) -> bool:
3192     """Should `line` immediately be split with `delimiter_split()` after RHS?"""
3193
3194     if not (
3195         opening_bracket.parent
3196         and opening_bracket.parent.type in {syms.atom, syms.import_from}
3197         and opening_bracket.value in "[{("
3198     ):
3199         return False
3200
3201     try:
3202         last_leaf = line.leaves[-1]
3203         exclude = {id(last_leaf)} if last_leaf.type == token.COMMA else set()
3204         max_priority = line.bracket_tracker.max_delimiter_priority(exclude=exclude)
3205     except (IndexError, ValueError):
3206         return False
3207
3208     return max_priority == COMMA_PRIORITY
3209
3210
3211 def get_features_used(node: Node) -> Set[Feature]:
3212     """Return a set of (relatively) new Python features used in this file.
3213
3214     Currently looking for:
3215     - f-strings;
3216     - underscores in numeric literals;
3217     - trailing commas after * or ** in function signatures and calls;
3218     - positional only arguments in function signatures and lambdas;
3219     """
3220     features: Set[Feature] = set()
3221     for n in node.pre_order():
3222         if n.type == token.STRING:
3223             value_head = n.value[:2]  # type: ignore
3224             if value_head in {'f"', 'F"', "f'", "F'", "rf", "fr", "RF", "FR"}:
3225                 features.add(Feature.F_STRINGS)
3226
3227         elif n.type == token.NUMBER:
3228             if "_" in n.value:  # type: ignore
3229                 features.add(Feature.NUMERIC_UNDERSCORES)
3230
3231         elif n.type == token.SLASH:
3232             if n.parent and n.parent.type in {syms.typedargslist, syms.arglist}:
3233                 features.add(Feature.POS_ONLY_ARGUMENTS)
3234
3235         elif n.type == token.COLONEQUAL:
3236             features.add(Feature.ASSIGNMENT_EXPRESSIONS)
3237
3238         elif (
3239             n.type in {syms.typedargslist, syms.arglist}
3240             and n.children
3241             and n.children[-1].type == token.COMMA
3242         ):
3243             if n.type == syms.typedargslist:
3244                 feature = Feature.TRAILING_COMMA_IN_DEF
3245             else:
3246                 feature = Feature.TRAILING_COMMA_IN_CALL
3247
3248             for ch in n.children:
3249                 if ch.type in STARS:
3250                     features.add(feature)
3251
3252                 if ch.type == syms.argument:
3253                     for argch in ch.children:
3254                         if argch.type in STARS:
3255                             features.add(feature)
3256
3257     return features
3258
3259
3260 def detect_target_versions(node: Node) -> Set[TargetVersion]:
3261     """Detect the version to target based on the nodes used."""
3262     features = get_features_used(node)
3263     return {
3264         version for version in TargetVersion if features <= VERSION_TO_FEATURES[version]
3265     }
3266
3267
3268 def generate_trailers_to_omit(line: Line, line_length: int) -> Iterator[Set[LeafID]]:
3269     """Generate sets of closing bracket IDs that should be omitted in a RHS.
3270
3271     Brackets can be omitted if the entire trailer up to and including
3272     a preceding closing bracket fits in one line.
3273
3274     Yielded sets are cumulative (contain results of previous yields, too).  First
3275     set is empty.
3276     """
3277
3278     omit: Set[LeafID] = set()
3279     yield omit
3280
3281     length = 4 * line.depth
3282     opening_bracket = None
3283     closing_bracket = None
3284     inner_brackets: Set[LeafID] = set()
3285     for index, leaf, leaf_length in enumerate_with_length(line, reversed=True):
3286         length += leaf_length
3287         if length > line_length:
3288             break
3289
3290         has_inline_comment = leaf_length > len(leaf.value) + len(leaf.prefix)
3291         if leaf.type == STANDALONE_COMMENT or has_inline_comment:
3292             break
3293
3294         if opening_bracket:
3295             if leaf is opening_bracket:
3296                 opening_bracket = None
3297             elif leaf.type in CLOSING_BRACKETS:
3298                 inner_brackets.add(id(leaf))
3299         elif leaf.type in CLOSING_BRACKETS:
3300             if index > 0 and line.leaves[index - 1].type in OPENING_BRACKETS:
3301                 # Empty brackets would fail a split so treat them as "inner"
3302                 # brackets (e.g. only add them to the `omit` set if another
3303                 # pair of brackets was good enough.
3304                 inner_brackets.add(id(leaf))
3305                 continue
3306
3307             if closing_bracket:
3308                 omit.add(id(closing_bracket))
3309                 omit.update(inner_brackets)
3310                 inner_brackets.clear()
3311                 yield omit
3312
3313             if leaf.value:
3314                 opening_bracket = leaf.opening_bracket
3315                 closing_bracket = leaf
3316
3317
3318 def get_future_imports(node: Node) -> Set[str]:
3319     """Return a set of __future__ imports in the file."""
3320     imports: Set[str] = set()
3321
3322     def get_imports_from_children(children: List[LN]) -> Generator[str, None, None]:
3323         for child in children:
3324             if isinstance(child, Leaf):
3325                 if child.type == token.NAME:
3326                     yield child.value
3327             elif child.type == syms.import_as_name:
3328                 orig_name = child.children[0]
3329                 assert isinstance(orig_name, Leaf), "Invalid syntax parsing imports"
3330                 assert orig_name.type == token.NAME, "Invalid syntax parsing imports"
3331                 yield orig_name.value
3332             elif child.type == syms.import_as_names:
3333                 yield from get_imports_from_children(child.children)
3334             else:
3335                 raise AssertionError("Invalid syntax parsing imports")
3336
3337     for child in node.children:
3338         if child.type != syms.simple_stmt:
3339             break
3340         first_child = child.children[0]
3341         if isinstance(first_child, Leaf):
3342             # Continue looking if we see a docstring; otherwise stop.
3343             if (
3344                 len(child.children) == 2
3345                 and first_child.type == token.STRING
3346                 and child.children[1].type == token.NEWLINE
3347             ):
3348                 continue
3349             else:
3350                 break
3351         elif first_child.type == syms.import_from:
3352             module_name = first_child.children[1]
3353             if not isinstance(module_name, Leaf) or module_name.value != "__future__":
3354                 break
3355             imports |= set(get_imports_from_children(first_child.children[3:]))
3356         else:
3357             break
3358     return imports
3359
3360
3361 def gen_python_files_in_dir(
3362     path: Path,
3363     root: Path,
3364     include: Pattern[str],
3365     exclude: Pattern[str],
3366     report: "Report",
3367 ) -> Iterator[Path]:
3368     """Generate all files under `path` whose paths are not excluded by the
3369     `exclude` regex, but are included by the `include` regex.
3370
3371     Symbolic links pointing outside of the `root` directory are ignored.
3372
3373     `report` is where output about exclusions goes.
3374     """
3375     assert root.is_absolute(), f"INTERNAL ERROR: `root` must be absolute but is {root}"
3376     for child in path.iterdir():
3377         try:
3378             normalized_path = "/" + child.resolve().relative_to(root).as_posix()
3379         except ValueError:
3380             if child.is_symlink():
3381                 report.path_ignored(
3382                     child, f"is a symbolic link that points outside {root}"
3383                 )
3384                 continue
3385
3386             raise
3387
3388         if child.is_dir():
3389             normalized_path += "/"
3390         exclude_match = exclude.search(normalized_path)
3391         if exclude_match and exclude_match.group(0):
3392             report.path_ignored(child, f"matches the --exclude regular expression")
3393             continue
3394
3395         if child.is_dir():
3396             yield from gen_python_files_in_dir(child, root, include, exclude, report)
3397
3398         elif child.is_file():
3399             include_match = include.search(normalized_path)
3400             if include_match:
3401                 yield child
3402
3403
3404 @lru_cache()
3405 def find_project_root(srcs: Iterable[str]) -> Path:
3406     """Return a directory containing .git, .hg, or pyproject.toml.
3407
3408     That directory can be one of the directories passed in `srcs` or their
3409     common parent.
3410
3411     If no directory in the tree contains a marker that would specify it's the
3412     project root, the root of the file system is returned.
3413     """
3414     if not srcs:
3415         return Path("/").resolve()
3416
3417     common_base = min(Path(src).resolve() for src in srcs)
3418     if common_base.is_dir():
3419         # Append a fake file so `parents` below returns `common_base_dir`, too.
3420         common_base /= "fake-file"
3421     for directory in common_base.parents:
3422         if (directory / ".git").is_dir():
3423             return directory
3424
3425         if (directory / ".hg").is_dir():
3426             return directory
3427
3428         if (directory / "pyproject.toml").is_file():
3429             return directory
3430
3431     return directory
3432
3433
3434 @dataclass
3435 class Report:
3436     """Provides a reformatting counter. Can be rendered with `str(report)`."""
3437
3438     check: bool = False
3439     quiet: bool = False
3440     verbose: bool = False
3441     change_count: int = 0
3442     same_count: int = 0
3443     failure_count: int = 0
3444
3445     def done(self, src: Path, changed: Changed) -> None:
3446         """Increment the counter for successful reformatting. Write out a message."""
3447         if changed is Changed.YES:
3448             reformatted = "would reformat" if self.check else "reformatted"
3449             if self.verbose or not self.quiet:
3450                 out(f"{reformatted} {src}")
3451             self.change_count += 1
3452         else:
3453             if self.verbose:
3454                 if changed is Changed.NO:
3455                     msg = f"{src} already well formatted, good job."
3456                 else:
3457                     msg = f"{src} wasn't modified on disk since last run."
3458                 out(msg, bold=False)
3459             self.same_count += 1
3460
3461     def failed(self, src: Path, message: str) -> None:
3462         """Increment the counter for failed reformatting. Write out a message."""
3463         err(f"error: cannot format {src}: {message}")
3464         self.failure_count += 1
3465
3466     def path_ignored(self, path: Path, message: str) -> None:
3467         if self.verbose:
3468             out(f"{path} ignored: {message}", bold=False)
3469
3470     @property
3471     def return_code(self) -> int:
3472         """Return the exit code that the app should use.
3473
3474         This considers the current state of changed files and failures:
3475         - if there were any failures, return 123;
3476         - if any files were changed and --check is being used, return 1;
3477         - otherwise return 0.
3478         """
3479         # According to http://tldp.org/LDP/abs/html/exitcodes.html starting with
3480         # 126 we have special return codes reserved by the shell.
3481         if self.failure_count:
3482             return 123
3483
3484         elif self.change_count and self.check:
3485             return 1
3486
3487         return 0
3488
3489     def __str__(self) -> str:
3490         """Render a color report of the current state.
3491
3492         Use `click.unstyle` to remove colors.
3493         """
3494         if self.check:
3495             reformatted = "would be reformatted"
3496             unchanged = "would be left unchanged"
3497             failed = "would fail to reformat"
3498         else:
3499             reformatted = "reformatted"
3500             unchanged = "left unchanged"
3501             failed = "failed to reformat"
3502         report = []
3503         if self.change_count:
3504             s = "s" if self.change_count > 1 else ""
3505             report.append(
3506                 click.style(f"{self.change_count} file{s} {reformatted}", bold=True)
3507             )
3508         if self.same_count:
3509             s = "s" if self.same_count > 1 else ""
3510             report.append(f"{self.same_count} file{s} {unchanged}")
3511         if self.failure_count:
3512             s = "s" if self.failure_count > 1 else ""
3513             report.append(
3514                 click.style(f"{self.failure_count} file{s} {failed}", fg="red")
3515             )
3516         return ", ".join(report) + "."
3517
3518
3519 def parse_ast(src: str) -> Union[ast.AST, ast3.AST, ast27.AST]:
3520     filename = "<unknown>"
3521     if sys.version_info >= (3, 8):
3522         # TODO: support Python 4+ ;)
3523         for minor_version in range(sys.version_info[1], 4, -1):
3524             try:
3525                 return ast.parse(src, filename, feature_version=(3, minor_version))
3526             except SyntaxError:
3527                 continue
3528     else:
3529         for feature_version in (7, 6):
3530             try:
3531                 return ast3.parse(src, filename, feature_version=feature_version)
3532             except SyntaxError:
3533                 continue
3534
3535     return ast27.parse(src)
3536
3537
3538 def _fixup_ast_constants(
3539     node: Union[ast.AST, ast3.AST, ast27.AST]
3540 ) -> Union[ast.AST, ast3.AST, ast27.AST]:
3541     """Map ast nodes deprecated in 3.8 to Constant."""
3542     # casts are required until this is released:
3543     # https://github.com/python/typeshed/pull/3142
3544     if isinstance(node, (ast.Str, ast3.Str, ast27.Str, ast.Bytes, ast3.Bytes)):
3545         return cast(ast.AST, ast.Constant(value=node.s))
3546     elif isinstance(node, (ast.Num, ast3.Num, ast27.Num)):
3547         return cast(ast.AST, ast.Constant(value=node.n))
3548     elif isinstance(node, (ast.NameConstant, ast3.NameConstant)):
3549         return cast(ast.AST, ast.Constant(value=node.value))
3550     return node
3551
3552
3553 def assert_equivalent(src: str, dst: str) -> None:
3554     """Raise AssertionError if `src` and `dst` aren't equivalent."""
3555
3556     def _v(node: Union[ast.AST, ast3.AST, ast27.AST], depth: int = 0) -> Iterator[str]:
3557         """Simple visitor generating strings to compare ASTs by content."""
3558
3559         node = _fixup_ast_constants(node)
3560
3561         yield f"{'  ' * depth}{node.__class__.__name__}("
3562
3563         for field in sorted(node._fields):
3564             # TypeIgnore has only one field 'lineno' which breaks this comparison
3565             type_ignore_classes = (ast3.TypeIgnore, ast27.TypeIgnore)
3566             if sys.version_info >= (3, 8):
3567                 type_ignore_classes += (ast.TypeIgnore,)
3568             if isinstance(node, type_ignore_classes):
3569                 break
3570
3571             try:
3572                 value = getattr(node, field)
3573             except AttributeError:
3574                 continue
3575
3576             yield f"{'  ' * (depth+1)}{field}="
3577
3578             if isinstance(value, list):
3579                 for item in value:
3580                     # Ignore nested tuples within del statements, because we may insert
3581                     # parentheses and they change the AST.
3582                     if (
3583                         field == "targets"
3584                         and isinstance(node, (ast.Delete, ast3.Delete, ast27.Delete))
3585                         and isinstance(item, (ast.Tuple, ast3.Tuple, ast27.Tuple))
3586                     ):
3587                         for item in item.elts:
3588                             yield from _v(item, depth + 2)
3589                     elif isinstance(item, (ast.AST, ast3.AST, ast27.AST)):
3590                         yield from _v(item, depth + 2)
3591
3592             elif isinstance(value, (ast.AST, ast3.AST, ast27.AST)):
3593                 yield from _v(value, depth + 2)
3594
3595             else:
3596                 yield f"{'  ' * (depth+2)}{value!r},  # {value.__class__.__name__}"
3597
3598         yield f"{'  ' * depth})  # /{node.__class__.__name__}"
3599
3600     try:
3601         src_ast = parse_ast(src)
3602     except Exception as exc:
3603         raise AssertionError(
3604             f"cannot use --safe with this file; failed to parse source file.  "
3605             f"AST error message: {exc}"
3606         )
3607
3608     try:
3609         dst_ast = parse_ast(dst)
3610     except Exception as exc:
3611         log = dump_to_file("".join(traceback.format_tb(exc.__traceback__)), dst)
3612         raise AssertionError(
3613             f"INTERNAL ERROR: Black produced invalid code: {exc}. "
3614             f"Please report a bug on https://github.com/psf/black/issues.  "
3615             f"This invalid output might be helpful: {log}"
3616         ) from None
3617
3618     src_ast_str = "\n".join(_v(src_ast))
3619     dst_ast_str = "\n".join(_v(dst_ast))
3620     if src_ast_str != dst_ast_str:
3621         log = dump_to_file(diff(src_ast_str, dst_ast_str, "src", "dst"))
3622         raise AssertionError(
3623             f"INTERNAL ERROR: Black produced code that is not equivalent to "
3624             f"the source.  "
3625             f"Please report a bug on https://github.com/psf/black/issues.  "
3626             f"This diff might be helpful: {log}"
3627         ) from None
3628
3629
3630 def assert_stable(src: str, dst: str, mode: FileMode) -> None:
3631     """Raise AssertionError if `dst` reformats differently the second time."""
3632     newdst = format_str(dst, mode=mode)
3633     if dst != newdst:
3634         log = dump_to_file(
3635             diff(src, dst, "source", "first pass"),
3636             diff(dst, newdst, "first pass", "second pass"),
3637         )
3638         raise AssertionError(
3639             f"INTERNAL ERROR: Black produced different code on the second pass "
3640             f"of the formatter.  "
3641             f"Please report a bug on https://github.com/psf/black/issues.  "
3642             f"This diff might be helpful: {log}"
3643         ) from None
3644
3645
3646 def dump_to_file(*output: str) -> str:
3647     """Dump `output` to a temporary file. Return path to the file."""
3648     with tempfile.NamedTemporaryFile(
3649         mode="w", prefix="blk_", suffix=".log", delete=False, encoding="utf8"
3650     ) as f:
3651         for lines in output:
3652             f.write(lines)
3653             if lines and lines[-1] != "\n":
3654                 f.write("\n")
3655     return f.name
3656
3657
3658 @contextmanager
3659 def nullcontext() -> Iterator[None]:
3660     """Return context manager that does nothing.
3661     Similar to `nullcontext` from python 3.7"""
3662     yield
3663
3664
3665 def diff(a: str, b: str, a_name: str, b_name: str) -> str:
3666     """Return a unified diff string between strings `a` and `b`."""
3667     import difflib
3668
3669     a_lines = [line + "\n" for line in a.split("\n")]
3670     b_lines = [line + "\n" for line in b.split("\n")]
3671     return "".join(
3672         difflib.unified_diff(a_lines, b_lines, fromfile=a_name, tofile=b_name, n=5)
3673     )
3674
3675
3676 def cancel(tasks: Iterable[asyncio.Task]) -> None:
3677     """asyncio signal handler that cancels all `tasks` and reports to stderr."""
3678     err("Aborted!")
3679     for task in tasks:
3680         task.cancel()
3681
3682
3683 def shutdown(loop: asyncio.AbstractEventLoop) -> None:
3684     """Cancel all pending tasks on `loop`, wait for them, and close the loop."""
3685     try:
3686         if sys.version_info[:2] >= (3, 7):
3687             all_tasks = asyncio.all_tasks
3688         else:
3689             all_tasks = asyncio.Task.all_tasks
3690         # This part is borrowed from asyncio/runners.py in Python 3.7b2.
3691         to_cancel = [task for task in all_tasks(loop) if not task.done()]
3692         if not to_cancel:
3693             return
3694
3695         for task in to_cancel:
3696             task.cancel()
3697         loop.run_until_complete(
3698             asyncio.gather(*to_cancel, loop=loop, return_exceptions=True)
3699         )
3700     finally:
3701         # `concurrent.futures.Future` objects cannot be cancelled once they
3702         # are already running. There might be some when the `shutdown()` happened.
3703         # Silence their logger's spew about the event loop being closed.
3704         cf_logger = logging.getLogger("concurrent.futures")
3705         cf_logger.setLevel(logging.CRITICAL)
3706         loop.close()
3707
3708
3709 def sub_twice(regex: Pattern[str], replacement: str, original: str) -> str:
3710     """Replace `regex` with `replacement` twice on `original`.
3711
3712     This is used by string normalization to perform replaces on
3713     overlapping matches.
3714     """
3715     return regex.sub(replacement, regex.sub(replacement, original))
3716
3717
3718 def re_compile_maybe_verbose(regex: str) -> Pattern[str]:
3719     """Compile a regular expression string in `regex`.
3720
3721     If it contains newlines, use verbose mode.
3722     """
3723     if "\n" in regex:
3724         regex = "(?x)" + regex
3725     return re.compile(regex)
3726
3727
3728 def enumerate_reversed(sequence: Sequence[T]) -> Iterator[Tuple[Index, T]]:
3729     """Like `reversed(enumerate(sequence))` if that were possible."""
3730     index = len(sequence) - 1
3731     for element in reversed(sequence):
3732         yield (index, element)
3733         index -= 1
3734
3735
3736 def enumerate_with_length(
3737     line: Line, reversed: bool = False
3738 ) -> Iterator[Tuple[Index, Leaf, int]]:
3739     """Return an enumeration of leaves with their length.
3740
3741     Stops prematurely on multiline strings and standalone comments.
3742     """
3743     op = cast(
3744         Callable[[Sequence[Leaf]], Iterator[Tuple[Index, Leaf]]],
3745         enumerate_reversed if reversed else enumerate,
3746     )
3747     for index, leaf in op(line.leaves):
3748         length = len(leaf.prefix) + len(leaf.value)
3749         if "\n" in leaf.value:
3750             return  # Multiline strings, we can't continue.
3751
3752         for comment in line.comments_after(leaf):
3753             length += len(comment.value)
3754
3755         yield index, leaf, length
3756
3757
3758 def is_line_short_enough(line: Line, *, line_length: int, line_str: str = "") -> bool:
3759     """Return True if `line` is no longer than `line_length`.
3760
3761     Uses the provided `line_str` rendering, if any, otherwise computes a new one.
3762     """
3763     if not line_str:
3764         line_str = str(line).strip("\n")
3765     return (
3766         len(line_str) <= line_length
3767         and "\n" not in line_str  # multiline strings
3768         and not line.contains_standalone_comments()
3769     )
3770
3771
3772 def can_be_split(line: Line) -> bool:
3773     """Return False if the line cannot be split *for sure*.
3774
3775     This is not an exhaustive search but a cheap heuristic that we can use to
3776     avoid some unfortunate formattings (mostly around wrapping unsplittable code
3777     in unnecessary parentheses).
3778     """
3779     leaves = line.leaves
3780     if len(leaves) < 2:
3781         return False
3782
3783     if leaves[0].type == token.STRING and leaves[1].type == token.DOT:
3784         call_count = 0
3785         dot_count = 0
3786         next = leaves[-1]
3787         for leaf in leaves[-2::-1]:
3788             if leaf.type in OPENING_BRACKETS:
3789                 if next.type not in CLOSING_BRACKETS:
3790                     return False
3791
3792                 call_count += 1
3793             elif leaf.type == token.DOT:
3794                 dot_count += 1
3795             elif leaf.type == token.NAME:
3796                 if not (next.type == token.DOT or next.type in OPENING_BRACKETS):
3797                     return False
3798
3799             elif leaf.type not in CLOSING_BRACKETS:
3800                 return False
3801
3802             if dot_count > 1 and call_count > 1:
3803                 return False
3804
3805     return True
3806
3807
3808 def can_omit_invisible_parens(line: Line, line_length: int) -> bool:
3809     """Does `line` have a shape safe to reformat without optional parens around it?
3810
3811     Returns True for only a subset of potentially nice looking formattings but
3812     the point is to not return false positives that end up producing lines that
3813     are too long.
3814     """
3815     bt = line.bracket_tracker
3816     if not bt.delimiters:
3817         # Without delimiters the optional parentheses are useless.
3818         return True
3819
3820     max_priority = bt.max_delimiter_priority()
3821     if bt.delimiter_count_with_priority(max_priority) > 1:
3822         # With more than one delimiter of a kind the optional parentheses read better.
3823         return False
3824
3825     if max_priority == DOT_PRIORITY:
3826         # A single stranded method call doesn't require optional parentheses.
3827         return True
3828
3829     assert len(line.leaves) >= 2, "Stranded delimiter"
3830
3831     first = line.leaves[0]
3832     second = line.leaves[1]
3833     penultimate = line.leaves[-2]
3834     last = line.leaves[-1]
3835
3836     # With a single delimiter, omit if the expression starts or ends with
3837     # a bracket.
3838     if first.type in OPENING_BRACKETS and second.type not in CLOSING_BRACKETS:
3839         remainder = False
3840         length = 4 * line.depth
3841         for _index, leaf, leaf_length in enumerate_with_length(line):
3842             if leaf.type in CLOSING_BRACKETS and leaf.opening_bracket is first:
3843                 remainder = True
3844             if remainder:
3845                 length += leaf_length
3846                 if length > line_length:
3847                     break
3848
3849                 if leaf.type in OPENING_BRACKETS:
3850                     # There are brackets we can further split on.
3851                     remainder = False
3852
3853         else:
3854             # checked the entire string and line length wasn't exceeded
3855             if len(line.leaves) == _index + 1:
3856                 return True
3857
3858         # Note: we are not returning False here because a line might have *both*
3859         # a leading opening bracket and a trailing closing bracket.  If the
3860         # opening bracket doesn't match our rule, maybe the closing will.
3861
3862     if (
3863         last.type == token.RPAR
3864         or last.type == token.RBRACE
3865         or (
3866             # don't use indexing for omitting optional parentheses;
3867             # it looks weird
3868             last.type == token.RSQB
3869             and last.parent
3870             and last.parent.type != syms.trailer
3871         )
3872     ):
3873         if penultimate.type in OPENING_BRACKETS:
3874             # Empty brackets don't help.
3875             return False
3876
3877         if is_multiline_string(first):
3878             # Additional wrapping of a multiline string in this situation is
3879             # unnecessary.
3880             return True
3881
3882         length = 4 * line.depth
3883         seen_other_brackets = False
3884         for _index, leaf, leaf_length in enumerate_with_length(line):
3885             length += leaf_length
3886             if leaf is last.opening_bracket:
3887                 if seen_other_brackets or length <= line_length:
3888                     return True
3889
3890             elif leaf.type in OPENING_BRACKETS:
3891                 # There are brackets we can further split on.
3892                 seen_other_brackets = True
3893
3894     return False
3895
3896
3897 def get_cache_file(mode: FileMode) -> Path:
3898     return CACHE_DIR / f"cache.{mode.get_cache_key()}.pickle"
3899
3900
3901 def read_cache(mode: FileMode) -> Cache:
3902     """Read the cache if it exists and is well formed.
3903
3904     If it is not well formed, the call to write_cache later should resolve the issue.
3905     """
3906     cache_file = get_cache_file(mode)
3907     if not cache_file.exists():
3908         return {}
3909
3910     with cache_file.open("rb") as fobj:
3911         try:
3912             cache: Cache = pickle.load(fobj)
3913         except pickle.UnpicklingError:
3914             return {}
3915
3916     return cache
3917
3918
3919 def get_cache_info(path: Path) -> CacheInfo:
3920     """Return the information used to check if a file is already formatted or not."""
3921     stat = path.stat()
3922     return stat.st_mtime, stat.st_size
3923
3924
3925 def filter_cached(cache: Cache, sources: Iterable[Path]) -> Tuple[Set[Path], Set[Path]]:
3926     """Split an iterable of paths in `sources` into two sets.
3927
3928     The first contains paths of files that modified on disk or are not in the
3929     cache. The other contains paths to non-modified files.
3930     """
3931     todo, done = set(), set()
3932     for src in sources:
3933         src = src.resolve()
3934         if cache.get(src) != get_cache_info(src):
3935             todo.add(src)
3936         else:
3937             done.add(src)
3938     return todo, done
3939
3940
3941 def write_cache(cache: Cache, sources: Iterable[Path], mode: FileMode) -> None:
3942     """Update the cache file."""
3943     cache_file = get_cache_file(mode)
3944     try:
3945         CACHE_DIR.mkdir(parents=True, exist_ok=True)
3946         new_cache = {**cache, **{src.resolve(): get_cache_info(src) for src in sources}}
3947         with tempfile.NamedTemporaryFile(dir=str(cache_file.parent), delete=False) as f:
3948             pickle.dump(new_cache, f, protocol=pickle.HIGHEST_PROTOCOL)
3949         os.replace(f.name, cache_file)
3950     except OSError:
3951         pass
3952
3953
3954 def patch_click() -> None:
3955     """Make Click not crash.
3956
3957     On certain misconfigured environments, Python 3 selects the ASCII encoding as the
3958     default which restricts paths that it can access during the lifetime of the
3959     application.  Click refuses to work in this scenario by raising a RuntimeError.
3960
3961     In case of Black the likelihood that non-ASCII characters are going to be used in
3962     file paths is minimal since it's Python source code.  Moreover, this crash was
3963     spurious on Python 3.7 thanks to PEP 538 and PEP 540.
3964     """
3965     try:
3966         from click import core
3967         from click import _unicodefun  # type: ignore
3968     except ModuleNotFoundError:
3969         return
3970
3971     for module in (core, _unicodefun):
3972         if hasattr(module, "_verify_python3_env"):
3973             module._verify_python3_env = lambda: None
3974
3975
3976 def patched_main() -> None:
3977     freeze_support()
3978     patch_click()
3979     main()
3980
3981
3982 if __name__ == "__main__":
3983     patched_main()