black.py

   1 import asyncio
   2 from concurrent.futures import Executor, ProcessPoolExecutor
   3 from datetime import datetime
   4 from enum import Enum
   5 from functools import lru_cache, partial, wraps
   6 import io
   7 import itertools
   8 import logging
   9 from multiprocessing import Manager, freeze_support
  10 import os
  11 from pathlib import Path
  12 import pickle
  13 import re
  14 import signal
  15 import sys
  16 import tempfile
  17 import tokenize
  18 import traceback
  19 from typing import (
  20     Any,
  21     Callable,
  22     Collection,
  23     Dict,
  24     Generator,
  25     Generic,
  26     Iterable,
  27     Iterator,
  28     List,
  29     Optional,
  30     Pattern,
  31     Sequence,
  32     Set,
  33     Tuple,
  34     TypeVar,
  35     Union,
  36     cast,
  37 )
  38
  39 from appdirs import user_cache_dir
  40 from attr import dataclass, evolve, Factory
  41 import click
  42 import toml
  43 from typed_ast import ast3, ast27
  44
  45 # lib2to3 fork
  46 from blib2to3.pytree import Node, Leaf, type_repr
  47 from blib2to3 import pygram, pytree
  48 from blib2to3.pgen2 import driver, token
  49 from blib2to3.pgen2.grammar import Grammar
  50 from blib2to3.pgen2.parse import ParseError
  51
  52
  53 __version__ = "19.3b0"
  54 DEFAULT_LINE_LENGTH = 88
  55 DEFAULT_EXCLUDES = (
  56     r"/(\.eggs|\.git|\.hg|\.mypy_cache|\.nox|\.tox|\.venv|_build|buck-out|build|dist)/"
  57 )
  58 DEFAULT_INCLUDES = r"\.pyi?$"
  59 CACHE_DIR = Path(user_cache_dir("black", version=__version__))
  60
  61
  62 # types
  63 FileContent = str
  64 Encoding = str
  65 NewLine = str
  66 Depth = int
  67 NodeType = int
  68 LeafID = int
  69 Priority = int
  70 Index = int
  71 LN = Union[Leaf, Node]
  72 SplitFunc = Callable[["Line", Collection["Feature"]], Iterator["Line"]]
  73 Timestamp = float
  74 FileSize = int
  75 CacheInfo = Tuple[Timestamp, FileSize]
  76 Cache = Dict[Path, CacheInfo]
  77 out = partial(click.secho, bold=True, err=True)
  78 err = partial(click.secho, fg="red", err=True)
  79
  80 pygram.initialize(CACHE_DIR)
  81 syms = pygram.python_symbols
  82
  83
  84 class NothingChanged(UserWarning):
  85     """Raised when reformatted code is the same as source."""
  86
  87
  88 class CannotSplit(Exception):
  89     """A readable split that fits the allotted line length is impossible."""
  90
  91
  92 class InvalidInput(ValueError):
  93     """Raised when input source code fails all parse attempts."""
  94
  95
  96 class WriteBack(Enum):
  97     NO = 0
  98     YES = 1
  99     DIFF = 2
 100     CHECK = 3
 101
 102     @classmethod
 103     def from_configuration(cls, *, check: bool, diff: bool) -> "WriteBack":
 104         if check and not diff:
 105             return cls.CHECK
 106
 107         return cls.DIFF if diff else cls.YES
 108
 109
 110 class Changed(Enum):
 111     NO = 0
 112     CACHED = 1
 113     YES = 2
 114
 115
 116 class TargetVersion(Enum):
 117     PY27 = 2
 118     PY33 = 3
 119     PY34 = 4
 120     PY35 = 5
 121     PY36 = 6
 122     PY37 = 7
 123     PY38 = 8
 124
 125     def is_python2(self) -> bool:
 126         return self is TargetVersion.PY27
 127
 128
 129 PY36_VERSIONS = {TargetVersion.PY36, TargetVersion.PY37, TargetVersion.PY38}
 130
 131
 132 class Feature(Enum):
 133     # All string literals are unicode
 134     UNICODE_LITERALS = 1
 135     F_STRINGS = 2
 136     NUMERIC_UNDERSCORES = 3
 137     TRAILING_COMMA_IN_CALL = 4
 138     TRAILING_COMMA_IN_DEF = 5
 139     # The following two feature-flags are mutually exclusive, and exactly one should be
 140     # set for every version of python.
 141     ASYNC_IDENTIFIERS = 6
 142     ASYNC_KEYWORDS = 7
 143
 144
 145 VERSION_TO_FEATURES: Dict[TargetVersion, Set[Feature]] = {
 146     TargetVersion.PY27: {Feature.ASYNC_IDENTIFIERS},
 147     TargetVersion.PY33: {Feature.UNICODE_LITERALS, Feature.ASYNC_IDENTIFIERS},
 148     TargetVersion.PY34: {Feature.UNICODE_LITERALS, Feature.ASYNC_IDENTIFIERS},
 149     TargetVersion.PY35: {
 150         Feature.UNICODE_LITERALS,
 151         Feature.TRAILING_COMMA_IN_CALL,
 152         Feature.ASYNC_IDENTIFIERS,
 153     },
 154     TargetVersion.PY36: {
 155         Feature.UNICODE_LITERALS,
 156         Feature.F_STRINGS,
 157         Feature.NUMERIC_UNDERSCORES,
 158         Feature.TRAILING_COMMA_IN_CALL,
 159         Feature.TRAILING_COMMA_IN_DEF,
 160         Feature.ASYNC_IDENTIFIERS,
 161     },
 162     TargetVersion.PY37: {
 163         Feature.UNICODE_LITERALS,
 164         Feature.F_STRINGS,
 165         Feature.NUMERIC_UNDERSCORES,
 166         Feature.TRAILING_COMMA_IN_CALL,
 167         Feature.TRAILING_COMMA_IN_DEF,
 168         Feature.ASYNC_KEYWORDS,
 169     },
 170     TargetVersion.PY38: {
 171         Feature.UNICODE_LITERALS,
 172         Feature.F_STRINGS,
 173         Feature.NUMERIC_UNDERSCORES,
 174         Feature.TRAILING_COMMA_IN_CALL,
 175         Feature.TRAILING_COMMA_IN_DEF,
 176         Feature.ASYNC_KEYWORDS,
 177     },
 178 }
 179
 180
 181 @dataclass
 182 class FileMode:
 183     target_versions: Set[TargetVersion] = Factory(set)
 184     line_length: int = DEFAULT_LINE_LENGTH
 185     string_normalization: bool = True
 186     is_pyi: bool = False
 187
 188     def get_cache_key(self) -> str:
 189         if self.target_versions:
 190             version_str = ",".join(
 191                 str(version.value)
 192                 for version in sorted(self.target_versions, key=lambda v: v.value)
 193             )
 194         else:
 195             version_str = "-"
 196         parts = [
 197             version_str,
 198             str(self.line_length),
 199             str(int(self.string_normalization)),
 200             str(int(self.is_pyi)),
 201         ]
 202         return ".".join(parts)
 203
 204
 205 def supports_feature(target_versions: Set[TargetVersion], feature: Feature) -> bool:
 206     return all(feature in VERSION_TO_FEATURES[version] for version in target_versions)
 207
 208
 209 def read_pyproject_toml(
 210     ctx: click.Context, param: click.Parameter, value: Union[str, int, bool, None]
 211 ) -> Optional[str]:
 212     """Inject Black configuration from "pyproject.toml" into defaults in `ctx`.
 213
 214     Returns the path to a successfully found and read configuration file, None
 215     otherwise.
 216     """
 217     assert not isinstance(value, (int, bool)), "Invalid parameter type passed"
 218     if not value:
 219         root = find_project_root(ctx.params.get("src", ()))
 220         path = root / "pyproject.toml"
 221         if path.is_file():
 222             value = str(path)
 223         else:
 224             return None
 225
 226     try:
 227         pyproject_toml = toml.load(value)
 228         config = pyproject_toml.get("tool", {}).get("black", {})
 229     except (toml.TomlDecodeError, OSError) as e:
 230         raise click.FileError(
 231             filename=value, hint=f"Error reading configuration file: {e}"
 232         )
 233
 234     if not config:
 235         return None
 236
 237     if ctx.default_map is None:
 238         ctx.default_map = {}
 239     ctx.default_map.update(  # type: ignore  # bad types in .pyi
 240         {k.replace("--", "").replace("-", "_"): v for k, v in config.items()}
 241     )
 242     return value
 243
 244
 245 @click.command(context_settings=dict(help_option_names=["-h", "--help"]))
 246 @click.option("-c", "--code", type=str, help="Format the code passed in as a string.")
 247 @click.option(
 248     "-l",
 249     "--line-length",
 250     type=int,
 251     default=DEFAULT_LINE_LENGTH,
 252     help="How many characters per line to allow.",
 253     show_default=True,
 254 )
 255 @click.option(
 256     "-t",
 257     "--target-version",
 258     type=click.Choice([v.name.lower() for v in TargetVersion]),
 259     callback=lambda c, p, v: [TargetVersion[val.upper()] for val in v],
 260     multiple=True,
 261     help=(
 262         "Python versions that should be supported by Black's output. [default: "
 263         "per-file auto-detection]"
 264     ),
 265 )
 266 @click.option(
 267     "--py36",
 268     is_flag=True,
 269     help=(
 270         "Allow using Python 3.6-only syntax on all input files.  This will put "
 271         "trailing commas in function signatures and calls also after *args and "
 272         "**kwargs. Deprecated; use --target-version instead. "
 273         "[default: per-file auto-detection]"
 274     ),
 275 )
 276 @click.option(
 277     "--pyi",
 278     is_flag=True,
 279     help=(
 280         "Format all input files like typing stubs regardless of file extension "
 281         "(useful when piping source on standard input)."
 282     ),
 283 )
 284 @click.option(
 285     "-S",
 286     "--skip-string-normalization",
 287     is_flag=True,
 288     help="Don't normalize string quotes or prefixes.",
 289 )
 290 @click.option(
 291     "--check",
 292     is_flag=True,
 293     help=(
 294         "Don't write the files back, just return the status.  Return code 0 "
 295         "means nothing would change.  Return code 1 means some files would be "
 296         "reformatted.  Return code 123 means there was an internal error."
 297     ),
 298 )
 299 @click.option(
 300     "--diff",
 301     is_flag=True,
 302     help="Don't write the files back, just output a diff for each file on stdout.",
 303 )
 304 @click.option(
 305     "--fast/--safe",
 306     is_flag=True,
 307     help="If --fast given, skip temporary sanity checks. [default: --safe]",
 308 )
 309 @click.option(
 310     "--include",
 311     type=str,
 312     default=DEFAULT_INCLUDES,
 313     help=(
 314         "A regular expression that matches files and directories that should be "
 315         "included on recursive searches.  An empty value means all files are "
 316         "included regardless of the name.  Use forward slashes for directories on "
 317         "all platforms (Windows, too).  Exclusions are calculated first, inclusions "
 318         "later."
 319     ),
 320     show_default=True,
 321 )
 322 @click.option(
 323     "--exclude",
 324     type=str,
 325     default=DEFAULT_EXCLUDES,
 326     help=(
 327         "A regular expression that matches files and directories that should be "
 328         "excluded on recursive searches.  An empty value means no paths are excluded. "
 329         "Use forward slashes for directories on all platforms (Windows, too).  "
 330         "Exclusions are calculated first, inclusions later."
 331     ),
 332     show_default=True,
 333 )
 334 @click.option(
 335     "-q",
 336     "--quiet",
 337     is_flag=True,
 338     help=(
 339         "Don't emit non-error messages to stderr. Errors are still emitted; "
 340         "silence those with 2>/dev/null."
 341     ),
 342 )
 343 @click.option(
 344     "-v",
 345     "--verbose",
 346     is_flag=True,
 347     help=(
 348         "Also emit messages to stderr about files that were not changed or were "
 349         "ignored due to --exclude=."
 350     ),
 351 )
 352 @click.version_option(version=__version__)
 353 @click.argument(
 354     "src",
 355     nargs=-1,
 356     type=click.Path(
 357         exists=True, file_okay=True, dir_okay=True, readable=True, allow_dash=True
 358     ),
 359     is_eager=True,
 360 )
 361 @click.option(
 362     "--config",
 363     type=click.Path(
 364         exists=False, file_okay=True, dir_okay=False, readable=True, allow_dash=False
 365     ),
 366     is_eager=True,
 367     callback=read_pyproject_toml,
 368     help="Read configuration from PATH.",
 369 )
 370 @click.pass_context
 371 def main(
 372     ctx: click.Context,
 373     code: Optional[str],
 374     line_length: int,
 375     target_version: List[TargetVersion],
 376     check: bool,
 377     diff: bool,
 378     fast: bool,
 379     pyi: bool,
 380     py36: bool,
 381     skip_string_normalization: bool,
 382     quiet: bool,
 383     verbose: bool,
 384     include: str,
 385     exclude: str,
 386     src: Tuple[str],
 387     config: Optional[str],
 388 ) -> None:
 389     """The uncompromising code formatter."""
 390     write_back = WriteBack.from_configuration(check=check, diff=diff)
 391     if target_version:
 392         if py36:
 393             err(f"Cannot use both --target-version and --py36")
 394             ctx.exit(2)
 395         else:
 396             versions = set(target_version)
 397     elif py36:
 398         err(
 399             "--py36 is deprecated and will be removed in a future version. "
 400             "Use --target-version py36 instead."
 401         )
 402         versions = PY36_VERSIONS
 403     else:
 404         # We'll autodetect later.
 405         versions = set()
 406     mode = FileMode(
 407         target_versions=versions,
 408         line_length=line_length,
 409         is_pyi=pyi,
 410         string_normalization=not skip_string_normalization,
 411     )
 412     if config and verbose:
 413         out(f"Using configuration from {config}.", bold=False, fg="blue")
 414     if code is not None:
 415         print(format_str(code, mode=mode))
 416         ctx.exit(0)
 417     try:
 418         include_regex = re_compile_maybe_verbose(include)
 419     except re.error:
 420         err(f"Invalid regular expression for include given: {include!r}")
 421         ctx.exit(2)
 422     try:
 423         exclude_regex = re_compile_maybe_verbose(exclude)
 424     except re.error:
 425         err(f"Invalid regular expression for exclude given: {exclude!r}")
 426         ctx.exit(2)
 427     report = Report(check=check, quiet=quiet, verbose=verbose)
 428     root = find_project_root(src)
 429     sources: Set[Path] = set()
 430     for s in src:
 431         p = Path(s)
 432         if p.is_dir():
 433             sources.update(
 434                 gen_python_files_in_dir(p, root, include_regex, exclude_regex, report)
 435             )
 436         elif p.is_file() or s == "-":
 437             # if a file was explicitly given, we don't care about its extension
 438             sources.add(p)
 439         else:
 440             err(f"invalid path: {s}")
 441     if len(sources) == 0:
 442         if verbose or not quiet:
 443             out("No paths given. Nothing to do 😴")
 444         ctx.exit(0)
 445
 446     if len(sources) == 1:
 447         reformat_one(
 448             src=sources.pop(),
 449             fast=fast,
 450             write_back=write_back,
 451             mode=mode,
 452             report=report,
 453         )
 454     else:
 455         reformat_many(
 456             sources=sources, fast=fast, write_back=write_back, mode=mode, report=report
 457         )
 458
 459     if verbose or not quiet:
 460         out("Oh no! 💥 💔 💥" if report.return_code else "All done! ✨ 🍰 ✨")
 461         click.secho(str(report), err=True)
 462     ctx.exit(report.return_code)
 463
 464
 465 def reformat_one(
 466     src: Path, fast: bool, write_back: WriteBack, mode: FileMode, report: "Report"
 467 ) -> None:
 468     """Reformat a single file under `src` without spawning child processes.
 469
 470     `fast`, `write_back`, and `mode` options are passed to
 471     :func:`format_file_in_place` or :func:`format_stdin_to_stdout`.
 472     """
 473     try:
 474         changed = Changed.NO
 475         if not src.is_file() and str(src) == "-":
 476             if format_stdin_to_stdout(fast=fast, write_back=write_back, mode=mode):
 477                 changed = Changed.YES
 478         else:
 479             cache: Cache = {}
 480             if write_back != WriteBack.DIFF:
 481                 cache = read_cache(mode)
 482                 res_src = src.resolve()
 483                 if res_src in cache and cache[res_src] == get_cache_info(res_src):
 484                     changed = Changed.CACHED
 485             if changed is not Changed.CACHED and format_file_in_place(
 486                 src, fast=fast, write_back=write_back, mode=mode
 487             ):
 488                 changed = Changed.YES
 489             if (write_back is WriteBack.YES and changed is not Changed.CACHED) or (
 490                 write_back is WriteBack.CHECK and changed is Changed.NO
 491             ):
 492                 write_cache(cache, [src], mode)
 493         report.done(src, changed)
 494     except Exception as exc:
 495         report.failed(src, str(exc))
 496
 497
 498 def reformat_many(
 499     sources: Set[Path],
 500     fast: bool,
 501     write_back: WriteBack,
 502     mode: FileMode,
 503     report: "Report",
 504 ) -> None:
 505     """Reformat multiple files using a ProcessPoolExecutor."""
 506     loop = asyncio.get_event_loop()
 507     worker_count = os.cpu_count()
 508     if sys.platform == "win32":
 509         # Work around https://bugs.python.org/issue26903
 510         worker_count = min(worker_count, 61)
 511     executor = ProcessPoolExecutor(max_workers=worker_count)
 512     try:
 513         loop.run_until_complete(
 514             schedule_formatting(
 515                 sources=sources,
 516                 fast=fast,
 517                 write_back=write_back,
 518                 mode=mode,
 519                 report=report,
 520                 loop=loop,
 521                 executor=executor,
 522             )
 523         )
 524     finally:
 525         shutdown(loop)
 526
 527
 528 async def schedule_formatting(
 529     sources: Set[Path],
 530     fast: bool,
 531     write_back: WriteBack,
 532     mode: FileMode,
 533     report: "Report",
 534     loop: asyncio.AbstractEventLoop,
 535     executor: Executor,
 536 ) -> None:
 537     """Run formatting of `sources` in parallel using the provided `executor`.
 538
 539     (Use ProcessPoolExecutors for actual parallelism.)
 540
 541     `line_length`, `write_back`, `fast`, and `pyi` options are passed to
 542     :func:`format_file_in_place`.
 543     """
 544     cache: Cache = {}
 545     if write_back != WriteBack.DIFF:
 546         cache = read_cache(mode)
 547         sources, cached = filter_cached(cache, sources)
 548         for src in sorted(cached):
 549             report.done(src, Changed.CACHED)
 550     if not sources:
 551         return
 552
 553     cancelled = []
 554     sources_to_cache = []
 555     lock = None
 556     if write_back == WriteBack.DIFF:
 557         # For diff output, we need locks to ensure we don't interleave output
 558         # from different processes.
 559         manager = Manager()
 560         lock = manager.Lock()
 561     tasks = {
 562         asyncio.ensure_future(
 563             loop.run_in_executor(
 564                 executor, format_file_in_place, src, fast, mode, write_back, lock
 565             )
 566         ): src
 567         for src in sorted(sources)
 568     }
 569     pending: Iterable[asyncio.Future] = tasks.keys()
 570     try:
 571         loop.add_signal_handler(signal.SIGINT, cancel, pending)
 572         loop.add_signal_handler(signal.SIGTERM, cancel, pending)
 573     except NotImplementedError:
 574         # There are no good alternatives for these on Windows.
 575         pass
 576     while pending:
 577         done, _ = await asyncio.wait(pending, return_when=asyncio.FIRST_COMPLETED)
 578         for task in done:
 579             src = tasks.pop(task)
 580             if task.cancelled():
 581                 cancelled.append(task)
 582             elif task.exception():
 583                 report.failed(src, str(task.exception()))
 584             else:
 585                 changed = Changed.YES if task.result() else Changed.NO
 586                 # If the file was written back or was successfully checked as
 587                 # well-formatted, store this information in the cache.
 588                 if write_back is WriteBack.YES or (
 589                     write_back is WriteBack.CHECK and changed is Changed.NO
 590                 ):
 591                     sources_to_cache.append(src)
 592                 report.done(src, changed)
 593     if cancelled:
 594         await asyncio.gather(*cancelled, loop=loop, return_exceptions=True)
 595     if sources_to_cache:
 596         write_cache(cache, sources_to_cache, mode)
 597
 598
 599 def format_file_in_place(
 600     src: Path,
 601     fast: bool,
 602     mode: FileMode,
 603     write_back: WriteBack = WriteBack.NO,
 604     lock: Any = None,  # multiprocessing.Manager().Lock() is some crazy proxy
 605 ) -> bool:
 606     """Format file under `src` path. Return True if changed.
 607
 608     If `write_back` is DIFF, write a diff to stdout. If it is YES, write reformatted
 609     code to the file.
 610     `mode` and `fast` options are passed to :func:`format_file_contents`.
 611     """
 612     if src.suffix == ".pyi":
 613         mode = evolve(mode, is_pyi=True)
 614
 615     then = datetime.utcfromtimestamp(src.stat().st_mtime)
 616     with open(src, "rb") as buf:
 617         src_contents, encoding, newline = decode_bytes(buf.read())
 618     try:
 619         dst_contents = format_file_contents(src_contents, fast=fast, mode=mode)
 620     except NothingChanged:
 621         return False
 622
 623     if write_back == write_back.YES:
 624         with open(src, "w", encoding=encoding, newline=newline) as f:
 625             f.write(dst_contents)
 626     elif write_back == write_back.DIFF:
 627         now = datetime.utcnow()
 628         src_name = f"{src}\t{then} +0000"
 629         dst_name = f"{src}\t{now} +0000"
 630         diff_contents = diff(src_contents, dst_contents, src_name, dst_name)
 631         if lock:
 632             lock.acquire()
 633         try:
 634             f = io.TextIOWrapper(
 635                 sys.stdout.buffer,
 636                 encoding=encoding,
 637                 newline=newline,
 638                 write_through=True,
 639             )
 640             f.write(diff_contents)
 641             f.detach()
 642         finally:
 643             if lock:
 644                 lock.release()
 645     return True
 646
 647
 648 def format_stdin_to_stdout(
 649     fast: bool, *, write_back: WriteBack = WriteBack.NO, mode: FileMode
 650 ) -> bool:
 651     """Format file on stdin. Return True if changed.
 652
 653     If `write_back` is YES, write reformatted code back to stdout. If it is DIFF,
 654     write a diff to stdout. The `mode` argument is passed to
 655     :func:`format_file_contents`.
 656     """
 657     then = datetime.utcnow()
 658     src, encoding, newline = decode_bytes(sys.stdin.buffer.read())
 659     dst = src
 660     try:
 661         dst = format_file_contents(src, fast=fast, mode=mode)
 662         return True
 663
 664     except NothingChanged:
 665         return False
 666
 667     finally:
 668         f = io.TextIOWrapper(
 669             sys.stdout.buffer, encoding=encoding, newline=newline, write_through=True
 670         )
 671         if write_back == WriteBack.YES:
 672             f.write(dst)
 673         elif write_back == WriteBack.DIFF:
 674             now = datetime.utcnow()
 675             src_name = f"STDIN\t{then} +0000"
 676             dst_name = f"STDOUT\t{now} +0000"
 677             f.write(diff(src, dst, src_name, dst_name))
 678         f.detach()
 679
 680
 681 def format_file_contents(
 682     src_contents: str, *, fast: bool, mode: FileMode
 683 ) -> FileContent:
 684     """Reformat contents a file and return new contents.
 685
 686     If `fast` is False, additionally confirm that the reformatted code is
 687     valid by calling :func:`assert_equivalent` and :func:`assert_stable` on it.
 688     `mode` is passed to :func:`format_str`.
 689     """
 690     if src_contents.strip() == "":
 691         raise NothingChanged
 692
 693     dst_contents = format_str(src_contents, mode=mode)
 694     if src_contents == dst_contents:
 695         raise NothingChanged
 696
 697     if not fast:
 698         assert_equivalent(src_contents, dst_contents)
 699         assert_stable(src_contents, dst_contents, mode=mode)
 700     return dst_contents
 701
 702
 703 def format_str(src_contents: str, *, mode: FileMode) -> FileContent:
 704     """Reformat a string and return new contents.
 705
 706     `mode` determines formatting options, such as how many characters per line are
 707     allowed.
 708     """
 709     src_node = lib2to3_parse(src_contents.lstrip(), mode.target_versions)
 710     dst_contents = []
 711     future_imports = get_future_imports(src_node)
 712     if mode.target_versions:
 713         versions = mode.target_versions
 714     else:
 715         versions = detect_target_versions(src_node)
 716     normalize_fmt_off(src_node)
 717     lines = LineGenerator(
 718         remove_u_prefix="unicode_literals" in future_imports
 719         or supports_feature(versions, Feature.UNICODE_LITERALS),
 720         is_pyi=mode.is_pyi,
 721         normalize_strings=mode.string_normalization,
 722     )
 723     elt = EmptyLineTracker(is_pyi=mode.is_pyi)
 724     empty_line = Line()
 725     after = 0
 726     split_line_features = {
 727         feature
 728         for feature in {Feature.TRAILING_COMMA_IN_CALL, Feature.TRAILING_COMMA_IN_DEF}
 729         if supports_feature(versions, feature)
 730     }
 731     for current_line in lines.visit(src_node):
 732         for _ in range(after):
 733             dst_contents.append(str(empty_line))
 734         before, after = elt.maybe_empty_lines(current_line)
 735         for _ in range(before):
 736             dst_contents.append(str(empty_line))
 737         for line in split_line(
 738             current_line, line_length=mode.line_length, features=split_line_features
 739         ):
 740             dst_contents.append(str(line))
 741     return "".join(dst_contents)
 742
 743
 744 def decode_bytes(src: bytes) -> Tuple[FileContent, Encoding, NewLine]:
 745     """Return a tuple of (decoded_contents, encoding, newline).
 746
 747     `newline` is either CRLF or LF but `decoded_contents` is decoded with
 748     universal newlines (i.e. only contains LF).
 749     """
 750     srcbuf = io.BytesIO(src)
 751     encoding, lines = tokenize.detect_encoding(srcbuf.readline)
 752     if not lines:
 753         return "", encoding, "\n"
 754
 755     newline = "\r\n" if b"\r\n" == lines[0][-2:] else "\n"
 756     srcbuf.seek(0)
 757     with io.TextIOWrapper(srcbuf, encoding) as tiow:
 758         return tiow.read(), encoding, newline
 759
 760
 761 def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]:
 762     if not target_versions:
 763         # No target_version specified, so try all grammars.
 764         return [
 765             # Python 3.7+
 766             pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords,
 767             # Python 3.0-3.6
 768             pygram.python_grammar_no_print_statement_no_exec_statement,
 769             # Python 2.7 with future print_function import
 770             pygram.python_grammar_no_print_statement,
 771             # Python 2.7
 772             pygram.python_grammar,
 773         ]
 774     elif all(version.is_python2() for version in target_versions):
 775         # Python 2-only code, so try Python 2 grammars.
 776         return [
 777             # Python 2.7 with future print_function import
 778             pygram.python_grammar_no_print_statement,
 779             # Python 2.7
 780             pygram.python_grammar,
 781         ]
 782     else:
 783         # Python 3-compatible code, so only try Python 3 grammar.
 784         grammars = []
 785         # If we have to parse both, try to parse async as a keyword first
 786         if not supports_feature(target_versions, Feature.ASYNC_IDENTIFIERS):
 787             # Python 3.7+
 788             grammars.append(
 789                 pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords  # noqa: B950
 790             )
 791         if not supports_feature(target_versions, Feature.ASYNC_KEYWORDS):
 792             # Python 3.0-3.6
 793             grammars.append(pygram.python_grammar_no_print_statement_no_exec_statement)
 794         # At least one of the above branches must have been taken, because every Python
 795         # version has exactly one of the two 'ASYNC_*' flags
 796         return grammars
 797
 798
 799 def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -> Node:
 800     """Given a string with source, return the lib2to3 Node."""
 801     if src_txt[-1:] != "\n":
 802         src_txt += "\n"
 803
 804     for grammar in get_grammars(set(target_versions)):
 805         drv = driver.Driver(grammar, pytree.convert)
 806         try:
 807             result = drv.parse_string(src_txt, True)
 808             break
 809
 810         except ParseError as pe:
 811             lineno, column = pe.context[1]
 812             lines = src_txt.splitlines()
 813             try:
 814                 faulty_line = lines[lineno - 1]
 815             except IndexError:
 816                 faulty_line = "<line number missing in source>"
 817             exc = InvalidInput(f"Cannot parse: {lineno}:{column}: {faulty_line}")
 818     else:
 819         raise exc from None
 820
 821     if isinstance(result, Leaf):
 822         result = Node(syms.file_input, [result])
 823     return result
 824
 825
 826 def lib2to3_unparse(node: Node) -> str:
 827     """Given a lib2to3 node, return its string representation."""
 828     code = str(node)
 829     return code
 830
 831
 832 T = TypeVar("T")
 833
 834
 835 class Visitor(Generic[T]):
 836     """Basic lib2to3 visitor that yields things of type `T` on `visit()`."""
 837
 838     def visit(self, node: LN) -> Iterator[T]:
 839         """Main method to visit `node` and its children.
 840
 841         It tries to find a `visit_*()` method for the given `node.type`, like
 842         `visit_simple_stmt` for Node objects or `visit_INDENT` for Leaf objects.
 843         If no dedicated `visit_*()` method is found, chooses `visit_default()`
 844         instead.
 845
 846         Then yields objects of type `T` from the selected visitor.
 847         """
 848         if node.type < 256:
 849             name = token.tok_name[node.type]
 850         else:
 851             name = type_repr(node.type)
 852         yield from getattr(self, f"visit_{name}", self.visit_default)(node)
 853
 854     def visit_default(self, node: LN) -> Iterator[T]:
 855         """Default `visit_*()` implementation. Recurses to children of `node`."""
 856         if isinstance(node, Node):
 857             for child in node.children:
 858                 yield from self.visit(child)
 859
 860
 861 @dataclass
 862 class DebugVisitor(Visitor[T]):
 863     tree_depth: int = 0
 864
 865     def visit_default(self, node: LN) -> Iterator[T]:
 866         indent = " " * (2 * self.tree_depth)
 867         if isinstance(node, Node):
 868             _type = type_repr(node.type)
 869             out(f"{indent}{_type}", fg="yellow")
 870             self.tree_depth += 1
 871             for child in node.children:
 872                 yield from self.visit(child)
 873
 874             self.tree_depth -= 1
 875             out(f"{indent}/{_type}", fg="yellow", bold=False)
 876         else:
 877             _type = token.tok_name.get(node.type, str(node.type))
 878             out(f"{indent}{_type}", fg="blue", nl=False)
 879             if node.prefix:
 880                 # We don't have to handle prefixes for `Node` objects since
 881                 # that delegates to the first child anyway.
 882                 out(f" {node.prefix!r}", fg="green", bold=False, nl=False)
 883             out(f" {node.value!r}", fg="blue", bold=False)
 884
 885     @classmethod
 886     def show(cls, code: Union[str, Leaf, Node]) -> None:
 887         """Pretty-print the lib2to3 AST of a given string of `code`.
 888
 889         Convenience method for debugging.
 890         """
 891         v: DebugVisitor[None] = DebugVisitor()
 892         if isinstance(code, str):
 893             code = lib2to3_parse(code)
 894         list(v.visit(code))
 895
 896
 897 WHITESPACE = {token.DEDENT, token.INDENT, token.NEWLINE}
 898 STATEMENT = {
 899     syms.if_stmt,
 900     syms.while_stmt,
 901     syms.for_stmt,
 902     syms.try_stmt,
 903     syms.except_clause,
 904     syms.with_stmt,
 905     syms.funcdef,
 906     syms.classdef,
 907 }
 908 STANDALONE_COMMENT = 153
 909 token.tok_name[STANDALONE_COMMENT] = "STANDALONE_COMMENT"
 910 LOGIC_OPERATORS = {"and", "or"}
 911 COMPARATORS = {
 912     token.LESS,
 913     token.GREATER,
 914     token.EQEQUAL,
 915     token.NOTEQUAL,
 916     token.LESSEQUAL,
 917     token.GREATEREQUAL,
 918 }
 919 MATH_OPERATORS = {
 920     token.VBAR,
 921     token.CIRCUMFLEX,
 922     token.AMPER,
 923     token.LEFTSHIFT,
 924     token.RIGHTSHIFT,
 925     token.PLUS,
 926     token.MINUS,
 927     token.STAR,
 928     token.SLASH,
 929     token.DOUBLESLASH,
 930     token.PERCENT,
 931     token.AT,
 932     token.TILDE,
 933     token.DOUBLESTAR,
 934 }
 935 STARS = {token.STAR, token.DOUBLESTAR}
 936 VARARGS_PARENTS = {
 937     syms.arglist,
 938     syms.argument,  # double star in arglist
 939     syms.trailer,  # single argument to call
 940     syms.typedargslist,
 941     syms.varargslist,  # lambdas
 942 }
 943 UNPACKING_PARENTS = {
 944     syms.atom,  # single element of a list or set literal
 945     syms.dictsetmaker,
 946     syms.listmaker,
 947     syms.testlist_gexp,
 948     syms.testlist_star_expr,
 949 }
 950 TEST_DESCENDANTS = {
 951     syms.test,
 952     syms.lambdef,
 953     syms.or_test,
 954     syms.and_test,
 955     syms.not_test,
 956     syms.comparison,
 957     syms.star_expr,
 958     syms.expr,
 959     syms.xor_expr,
 960     syms.and_expr,
 961     syms.shift_expr,
 962     syms.arith_expr,
 963     syms.trailer,
 964     syms.term,
 965     syms.power,
 966 }
 967 ASSIGNMENTS = {
 968     "=",
 969     "+=",
 970     "-=",
 971     "*=",
 972     "@=",
 973     "/=",
 974     "%=",
 975     "&=",
 976     "|=",
 977     "^=",
 978     "<<=",
 979     ">>=",
 980     "**=",
 981     "//=",
 982 }
 983 COMPREHENSION_PRIORITY = 20
 984 COMMA_PRIORITY = 18
 985 TERNARY_PRIORITY = 16
 986 LOGIC_PRIORITY = 14
 987 STRING_PRIORITY = 12
 988 COMPARATOR_PRIORITY = 10
 989 MATH_PRIORITIES = {
 990     token.VBAR: 9,
 991     token.CIRCUMFLEX: 8,
 992     token.AMPER: 7,
 993     token.LEFTSHIFT: 6,
 994     token.RIGHTSHIFT: 6,
 995     token.PLUS: 5,
 996     token.MINUS: 5,
 997     token.STAR: 4,
 998     token.SLASH: 4,
 999     token.DOUBLESLASH: 4,
1000     token.PERCENT: 4,
1001     token.AT: 4,
1002     token.TILDE: 3,
1003     token.DOUBLESTAR: 2,
1004 }
1005 DOT_PRIORITY = 1
1006
1007
1008 @dataclass
1009 class BracketTracker:
1010     """Keeps track of brackets on a line."""
1011
1012     depth: int = 0
1013     bracket_match: Dict[Tuple[Depth, NodeType], Leaf] = Factory(dict)
1014     delimiters: Dict[LeafID, Priority] = Factory(dict)
1015     previous: Optional[Leaf] = None
1016     _for_loop_depths: List[int] = Factory(list)
1017     _lambda_argument_depths: List[int] = Factory(list)
1018
1019     def mark(self, leaf: Leaf) -> None:
1020         """Mark `leaf` with bracket-related metadata. Keep track of delimiters.
1021
1022         All leaves receive an int `bracket_depth` field that stores how deep
1023         within brackets a given leaf is. 0 means there are no enclosing brackets
1024         that started on this line.
1025
1026         If a leaf is itself a closing bracket, it receives an `opening_bracket`
1027         field that it forms a pair with. This is a one-directional link to
1028         avoid reference cycles.
1029
1030         If a leaf is a delimiter (a token on which Black can split the line if
1031         needed) and it's on depth 0, its `id()` is stored in the tracker's
1032         `delimiters` field.
1033         """
1034         if leaf.type == token.COMMENT:
1035             return
1036
1037         self.maybe_decrement_after_for_loop_variable(leaf)
1038         self.maybe_decrement_after_lambda_arguments(leaf)
1039         if leaf.type in CLOSING_BRACKETS:
1040             self.depth -= 1
1041             opening_bracket = self.bracket_match.pop((self.depth, leaf.type))
1042             leaf.opening_bracket = opening_bracket
1043         leaf.bracket_depth = self.depth
1044         if self.depth == 0:
1045             delim = is_split_before_delimiter(leaf, self.previous)
1046             if delim and self.previous is not None:
1047                 self.delimiters[id(self.previous)] = delim
1048             else:
1049                 delim = is_split_after_delimiter(leaf, self.previous)
1050                 if delim:
1051                     self.delimiters[id(leaf)] = delim
1052         if leaf.type in OPENING_BRACKETS:
1053             self.bracket_match[self.depth, BRACKET[leaf.type]] = leaf
1054             self.depth += 1
1055         self.previous = leaf
1056         self.maybe_increment_lambda_arguments(leaf)
1057         self.maybe_increment_for_loop_variable(leaf)
1058
1059     def any_open_brackets(self) -> bool:
1060         """Return True if there is an yet unmatched open bracket on the line."""
1061         return bool(self.bracket_match)
1062
1063     def max_delimiter_priority(self, exclude: Iterable[LeafID] = ()) -> Priority:
1064         """Return the highest priority of a delimiter found on the line.
1065
1066         Values are consistent with what `is_split_*_delimiter()` return.
1067         Raises ValueError on no delimiters.
1068         """
1069         return max(v for k, v in self.delimiters.items() if k not in exclude)
1070
1071     def delimiter_count_with_priority(self, priority: Priority = 0) -> int:
1072         """Return the number of delimiters with the given `priority`.
1073
1074         If no `priority` is passed, defaults to max priority on the line.
1075         """
1076         if not self.delimiters:
1077             return 0
1078
1079         priority = priority or self.max_delimiter_priority()
1080         return sum(1 for p in self.delimiters.values() if p == priority)
1081
1082     def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool:
1083         """In a for loop, or comprehension, the variables are often unpacks.
1084
1085         To avoid splitting on the comma in this situation, increase the depth of
1086         tokens between `for` and `in`.
1087         """
1088         if leaf.type == token.NAME and leaf.value == "for":
1089             self.depth += 1
1090             self._for_loop_depths.append(self.depth)
1091             return True
1092
1093         return False
1094
1095     def maybe_decrement_after_for_loop_variable(self, leaf: Leaf) -> bool:
1096         """See `maybe_increment_for_loop_variable` above for explanation."""
1097         if (
1098             self._for_loop_depths
1099             and self._for_loop_depths[-1] == self.depth
1100             and leaf.type == token.NAME
1101             and leaf.value == "in"
1102         ):
1103             self.depth -= 1
1104             self._for_loop_depths.pop()
1105             return True
1106
1107         return False
1108
1109     def maybe_increment_lambda_arguments(self, leaf: Leaf) -> bool:
1110         """In a lambda expression, there might be more than one argument.
1111
1112         To avoid splitting on the comma in this situation, increase the depth of
1113         tokens between `lambda` and `:`.
1114         """
1115         if leaf.type == token.NAME and leaf.value == "lambda":
1116             self.depth += 1
1117             self._lambda_argument_depths.append(self.depth)
1118             return True
1119
1120         return False
1121
1122     def maybe_decrement_after_lambda_arguments(self, leaf: Leaf) -> bool:
1123         """See `maybe_increment_lambda_arguments` above for explanation."""
1124         if (
1125             self._lambda_argument_depths
1126             and self._lambda_argument_depths[-1] == self.depth
1127             and leaf.type == token.COLON
1128         ):
1129             self.depth -= 1
1130             self._lambda_argument_depths.pop()
1131             return True
1132
1133         return False
1134
1135     def get_open_lsqb(self) -> Optional[Leaf]:
1136         """Return the most recent opening square bracket (if any)."""
1137         return self.bracket_match.get((self.depth - 1, token.RSQB))
1138
1139
1140 @dataclass
1141 class Line:
1142     """Holds leaves and comments. Can be printed with `str(line)`."""
1143
1144     depth: int = 0
1145     leaves: List[Leaf] = Factory(list)
1146     comments: Dict[LeafID, List[Leaf]] = Factory(dict)  # keys ordered like `leaves`
1147     bracket_tracker: BracketTracker = Factory(BracketTracker)
1148     inside_brackets: bool = False
1149     should_explode: bool = False
1150
1151     def append(self, leaf: Leaf, preformatted: bool = False) -> None:
1152         """Add a new `leaf` to the end of the line.
1153
1154         Unless `preformatted` is True, the `leaf` will receive a new consistent
1155         whitespace prefix and metadata applied by :class:`BracketTracker`.
1156         Trailing commas are maybe removed, unpacked for loop variables are
1157         demoted from being delimiters.
1158
1159         Inline comments are put aside.
1160         """
1161         has_value = leaf.type in BRACKETS or bool(leaf.value.strip())
1162         if not has_value:
1163             return
1164
1165         if token.COLON == leaf.type and self.is_class_paren_empty:
1166             del self.leaves[-2:]
1167         if self.leaves and not preformatted:
1168             # Note: at this point leaf.prefix should be empty except for
1169             # imports, for which we only preserve newlines.
1170             leaf.prefix += whitespace(
1171                 leaf, complex_subscript=self.is_complex_subscript(leaf)
1172             )
1173         if self.inside_brackets or not preformatted:
1174             self.bracket_tracker.mark(leaf)
1175             self.maybe_remove_trailing_comma(leaf)
1176         if not self.append_comment(leaf):
1177             self.leaves.append(leaf)
1178
1179     def append_safe(self, leaf: Leaf, preformatted: bool = False) -> None:
1180         """Like :func:`append()` but disallow invalid standalone comment structure.
1181
1182         Raises ValueError when any `leaf` is appended after a standalone comment
1183         or when a standalone comment is not the first leaf on the line.
1184         """
1185         if self.bracket_tracker.depth == 0:
1186             if self.is_comment:
1187                 raise ValueError("cannot append to standalone comments")
1188
1189             if self.leaves and leaf.type == STANDALONE_COMMENT:
1190                 raise ValueError(
1191                     "cannot append standalone comments to a populated line"
1192                 )
1193
1194         self.append(leaf, preformatted=preformatted)
1195
1196     @property
1197     def is_comment(self) -> bool:
1198         """Is this line a standalone comment?"""
1199         return len(self.leaves) == 1 and self.leaves[0].type == STANDALONE_COMMENT
1200
1201     @property
1202     def is_decorator(self) -> bool:
1203         """Is this line a decorator?"""
1204         return bool(self) and self.leaves[0].type == token.AT
1205
1206     @property
1207     def is_import(self) -> bool:
1208         """Is this an import line?"""
1209         return bool(self) and is_import(self.leaves[0])
1210
1211     @property
1212     def is_class(self) -> bool:
1213         """Is this line a class definition?"""
1214         return (
1215             bool(self)
1216             and self.leaves[0].type == token.NAME
1217             and self.leaves[0].value == "class"
1218         )
1219
1220     @property
1221     def is_stub_class(self) -> bool:
1222         """Is this line a class definition with a body consisting only of "..."?"""
1223         return self.is_class and self.leaves[-3:] == [
1224             Leaf(token.DOT, ".") for _ in range(3)
1225         ]
1226
1227     @property
1228     def is_def(self) -> bool:
1229         """Is this a function definition? (Also returns True for async defs.)"""
1230         try:
1231             first_leaf = self.leaves[0]
1232         except IndexError:
1233             return False
1234
1235         try:
1236             second_leaf: Optional[Leaf] = self.leaves[1]
1237         except IndexError:
1238             second_leaf = None
1239         return (first_leaf.type == token.NAME and first_leaf.value == "def") or (
1240             first_leaf.type == token.ASYNC
1241             and second_leaf is not None
1242             and second_leaf.type == token.NAME
1243             and second_leaf.value == "def"
1244         )
1245
1246     @property
1247     def is_class_paren_empty(self) -> bool:
1248         """Is this a class with no base classes but using parentheses?
1249
1250         Those are unnecessary and should be removed.
1251         """
1252         return (
1253             bool(self)
1254             and len(self.leaves) == 4
1255             and self.is_class
1256             and self.leaves[2].type == token.LPAR
1257             and self.leaves[2].value == "("
1258             and self.leaves[3].type == token.RPAR
1259             and self.leaves[3].value == ")"
1260         )
1261
1262     @property
1263     def is_triple_quoted_string(self) -> bool:
1264         """Is the line a triple quoted string?"""
1265         return (
1266             bool(self)
1267             and self.leaves[0].type == token.STRING
1268             and self.leaves[0].value.startswith(('"""', "'''"))
1269         )
1270
1271     def contains_standalone_comments(self, depth_limit: int = sys.maxsize) -> bool:
1272         """If so, needs to be split before emitting."""
1273         for leaf in self.leaves:
1274             if leaf.type == STANDALONE_COMMENT:
1275                 if leaf.bracket_depth <= depth_limit:
1276                     return True
1277         return False
1278
1279     def contains_inner_type_comments(self) -> bool:
1280         ignored_ids = set()
1281         try:
1282             last_leaf = self.leaves[-1]
1283             ignored_ids.add(id(last_leaf))
1284             if last_leaf.type == token.COMMA or (
1285                 last_leaf.type == token.RPAR and not last_leaf.value
1286             ):
1287                 # When trailing commas or optional parens are inserted by Black for
1288                 # consistency, comments after the previous last element are not moved
1289                 # (they don't have to, rendering will still be correct).  So we ignore
1290                 # trailing commas and invisible.
1291                 last_leaf = self.leaves[-2]
1292                 ignored_ids.add(id(last_leaf))
1293         except IndexError:
1294             return False
1295
1296         for leaf_id, comments in self.comments.items():
1297             if leaf_id in ignored_ids:
1298                 continue
1299
1300             for comment in comments:
1301                 if is_type_comment(comment):
1302                     return True
1303
1304         return False
1305
1306     def contains_multiline_strings(self) -> bool:
1307         for leaf in self.leaves:
1308             if is_multiline_string(leaf):
1309                 return True
1310
1311         return False
1312
1313     def maybe_remove_trailing_comma(self, closing: Leaf) -> bool:
1314         """Remove trailing comma if there is one and it's safe."""
1315         if not (
1316             self.leaves
1317             and self.leaves[-1].type == token.COMMA
1318             and closing.type in CLOSING_BRACKETS
1319         ):
1320             return False
1321
1322         if closing.type == token.RBRACE:
1323             self.remove_trailing_comma()
1324             return True
1325
1326         if closing.type == token.RSQB:
1327             comma = self.leaves[-1]
1328             if comma.parent and comma.parent.type == syms.listmaker:
1329                 self.remove_trailing_comma()
1330                 return True
1331
1332         # For parens let's check if it's safe to remove the comma.
1333         # Imports are always safe.
1334         if self.is_import:
1335             self.remove_trailing_comma()
1336             return True
1337
1338         # Otherwise, if the trailing one is the only one, we might mistakenly
1339         # change a tuple into a different type by removing the comma.
1340         depth = closing.bracket_depth + 1
1341         commas = 0
1342         opening = closing.opening_bracket
1343         for _opening_index, leaf in enumerate(self.leaves):
1344             if leaf is opening:
1345                 break
1346
1347         else:
1348             return False
1349
1350         for leaf in self.leaves[_opening_index + 1 :]:
1351             if leaf is closing:
1352                 break
1353
1354             bracket_depth = leaf.bracket_depth
1355             if bracket_depth == depth and leaf.type == token.COMMA:
1356                 commas += 1
1357                 if leaf.parent and leaf.parent.type in {
1358                     syms.arglist,
1359                     syms.typedargslist,
1360                 }:
1361                     commas += 1
1362                     break
1363
1364         if commas > 1:
1365             self.remove_trailing_comma()
1366             return True
1367
1368         return False
1369
1370     def append_comment(self, comment: Leaf) -> bool:
1371         """Add an inline or standalone comment to the line."""
1372         if (
1373             comment.type == STANDALONE_COMMENT
1374             and self.bracket_tracker.any_open_brackets()
1375         ):
1376             comment.prefix = ""
1377             return False
1378
1379         if comment.type != token.COMMENT:
1380             return False
1381
1382         if not self.leaves:
1383             comment.type = STANDALONE_COMMENT
1384             comment.prefix = ""
1385             return False
1386
1387         last_leaf = self.leaves[-1]
1388         if (
1389             last_leaf.type == token.RPAR
1390             and not last_leaf.value
1391             and last_leaf.parent
1392             and len(list(last_leaf.parent.leaves())) <= 3
1393             and not is_type_comment(comment)
1394         ):
1395             # Comments on an optional parens wrapping a single leaf should belong to
1396             # the wrapped node except if it's a type comment. Pinning the comment like
1397             # this avoids unstable formatting caused by comment migration.
1398             if len(self.leaves) < 2:
1399                 comment.type = STANDALONE_COMMENT
1400                 comment.prefix = ""
1401                 return False
1402             last_leaf = self.leaves[-2]
1403         self.comments.setdefault(id(last_leaf), []).append(comment)
1404         return True
1405
1406     def comments_after(self, leaf: Leaf) -> List[Leaf]:
1407         """Generate comments that should appear directly after `leaf`."""
1408         return self.comments.get(id(leaf), [])
1409
1410     def remove_trailing_comma(self) -> None:
1411         """Remove the trailing comma and moves the comments attached to it."""
1412         trailing_comma = self.leaves.pop()
1413         trailing_comma_comments = self.comments.pop(id(trailing_comma), [])
1414         self.comments.setdefault(id(self.leaves[-1]), []).extend(
1415             trailing_comma_comments
1416         )
1417
1418     def is_complex_subscript(self, leaf: Leaf) -> bool:
1419         """Return True iff `leaf` is part of a slice with non-trivial exprs."""
1420         open_lsqb = self.bracket_tracker.get_open_lsqb()
1421         if open_lsqb is None:
1422             return False
1423
1424         subscript_start = open_lsqb.next_sibling
1425
1426         if isinstance(subscript_start, Node):
1427             if subscript_start.type == syms.listmaker:
1428                 return False
1429
1430             if subscript_start.type == syms.subscriptlist:
1431                 subscript_start = child_towards(subscript_start, leaf)
1432         return subscript_start is not None and any(
1433             n.type in TEST_DESCENDANTS for n in subscript_start.pre_order()
1434         )
1435
1436     def __str__(self) -> str:
1437         """Render the line."""
1438         if not self:
1439             return "\n"
1440
1441         indent = "    " * self.depth
1442         leaves = iter(self.leaves)
1443         first = next(leaves)
1444         res = f"{first.prefix}{indent}{first.value}"
1445         for leaf in leaves:
1446             res += str(leaf)
1447         for comment in itertools.chain.from_iterable(self.comments.values()):
1448             res += str(comment)
1449         return res + "\n"
1450
1451     def __bool__(self) -> bool:
1452         """Return True if the line has leaves or comments."""
1453         return bool(self.leaves or self.comments)
1454
1455
1456 @dataclass
1457 class EmptyLineTracker:
1458     """Provides a stateful method that returns the number of potential extra
1459     empty lines needed before and after the currently processed line.
1460
1461     Note: this tracker works on lines that haven't been split yet.  It assumes
1462     the prefix of the first leaf consists of optional newlines.  Those newlines
1463     are consumed by `maybe_empty_lines()` and included in the computation.
1464     """
1465
1466     is_pyi: bool = False
1467     previous_line: Optional[Line] = None
1468     previous_after: int = 0
1469     previous_defs: List[int] = Factory(list)
1470
1471     def maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
1472         """Return the number of extra empty lines before and after the `current_line`.
1473
1474         This is for separating `def`, `async def` and `class` with extra empty
1475         lines (two on module-level).
1476         """
1477         before, after = self._maybe_empty_lines(current_line)
1478         before -= self.previous_after
1479         self.previous_after = after
1480         self.previous_line = current_line
1481         return before, after
1482
1483     def _maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
1484         max_allowed = 1
1485         if current_line.depth == 0:
1486             max_allowed = 1 if self.is_pyi else 2
1487         if current_line.leaves:
1488             # Consume the first leaf's extra newlines.
1489             first_leaf = current_line.leaves[0]
1490             before = first_leaf.prefix.count("\n")
1491             before = min(before, max_allowed)
1492             first_leaf.prefix = ""
1493         else:
1494             before = 0
1495         depth = current_line.depth
1496         while self.previous_defs and self.previous_defs[-1] >= depth:
1497             self.previous_defs.pop()
1498             if self.is_pyi:
1499                 before = 0 if depth else 1
1500             else:
1501                 before = 1 if depth else 2
1502         if current_line.is_decorator or current_line.is_def or current_line.is_class:
1503             return self._maybe_empty_lines_for_class_or_def(current_line, before)
1504
1505         if (
1506             self.previous_line
1507             and self.previous_line.is_import
1508             and not current_line.is_import
1509             and depth == self.previous_line.depth
1510         ):
1511             return (before or 1), 0
1512
1513         if (
1514             self.previous_line
1515             and self.previous_line.is_class
1516             and current_line.is_triple_quoted_string
1517         ):
1518             return before, 1
1519
1520         return before, 0
1521
1522     def _maybe_empty_lines_for_class_or_def(
1523         self, current_line: Line, before: int
1524     ) -> Tuple[int, int]:
1525         if not current_line.is_decorator:
1526             self.previous_defs.append(current_line.depth)
1527         if self.previous_line is None:
1528             # Don't insert empty lines before the first line in the file.
1529             return 0, 0
1530
1531         if self.previous_line.is_decorator:
1532             return 0, 0
1533
1534         if self.previous_line.depth < current_line.depth and (
1535             self.previous_line.is_class or self.previous_line.is_def
1536         ):
1537             return 0, 0
1538
1539         if (
1540             self.previous_line.is_comment
1541             and self.previous_line.depth == current_line.depth
1542             and before == 0
1543         ):
1544             return 0, 0
1545
1546         if self.is_pyi:
1547             if self.previous_line.depth > current_line.depth:
1548                 newlines = 1
1549             elif current_line.is_class or self.previous_line.is_class:
1550                 if current_line.is_stub_class and self.previous_line.is_stub_class:
1551                     # No blank line between classes with an empty body
1552                     newlines = 0
1553                 else:
1554                     newlines = 1
1555             elif current_line.is_def and not self.previous_line.is_def:
1556                 # Blank line between a block of functions and a block of non-functions
1557                 newlines = 1
1558             else:
1559                 newlines = 0
1560         else:
1561             newlines = 2
1562         if current_line.depth and newlines:
1563             newlines -= 1
1564         return newlines, 0
1565
1566
1567 @dataclass
1568 class LineGenerator(Visitor[Line]):
1569     """Generates reformatted Line objects.  Empty lines are not emitted.
1570
1571     Note: destroys the tree it's visiting by mutating prefixes of its leaves
1572     in ways that will no longer stringify to valid Python code on the tree.
1573     """
1574
1575     is_pyi: bool = False
1576     normalize_strings: bool = True
1577     current_line: Line = Factory(Line)
1578     remove_u_prefix: bool = False
1579
1580     def line(self, indent: int = 0) -> Iterator[Line]:
1581         """Generate a line.
1582
1583         If the line is empty, only emit if it makes sense.
1584         If the line is too long, split it first and then generate.
1585
1586         If any lines were generated, set up a new current_line.
1587         """
1588         if not self.current_line:
1589             self.current_line.depth += indent
1590             return  # Line is empty, don't emit. Creating a new one unnecessary.
1591
1592         complete_line = self.current_line
1593         self.current_line = Line(depth=complete_line.depth + indent)
1594         yield complete_line
1595
1596     def visit_default(self, node: LN) -> Iterator[Line]:
1597         """Default `visit_*()` implementation. Recurses to children of `node`."""
1598         if isinstance(node, Leaf):
1599             any_open_brackets = self.current_line.bracket_tracker.any_open_brackets()
1600             for comment in generate_comments(node):
1601                 if any_open_brackets:
1602                     # any comment within brackets is subject to splitting
1603                     self.current_line.append(comment)
1604                 elif comment.type == token.COMMENT:
1605                     # regular trailing comment
1606                     self.current_line.append(comment)
1607                     yield from self.line()
1608
1609                 else:
1610                     # regular standalone comment
1611                     yield from self.line()
1612
1613                     self.current_line.append(comment)
1614                     yield from self.line()
1615
1616             normalize_prefix(node, inside_brackets=any_open_brackets)
1617             if self.normalize_strings and node.type == token.STRING:
1618                 normalize_string_prefix(node, remove_u_prefix=self.remove_u_prefix)
1619                 normalize_string_quotes(node)
1620             if node.type == token.NUMBER:
1621                 normalize_numeric_literal(node)
1622             if node.type not in WHITESPACE:
1623                 self.current_line.append(node)
1624         yield from super().visit_default(node)
1625
1626     def visit_atom(self, node: Node) -> Iterator[Line]:
1627         # Always make parentheses invisible around a single node, because it should
1628         # not be needed (except in the case of yield, where removing the parentheses
1629         # produces a SyntaxError).
1630         if (
1631             len(node.children) == 3
1632             and isinstance(node.children[0], Leaf)
1633             and node.children[0].type == token.LPAR
1634             and isinstance(node.children[2], Leaf)
1635             and node.children[2].type == token.RPAR
1636             and isinstance(node.children[1], Leaf)
1637             and not (
1638                 node.children[1].type == token.NAME
1639                 and node.children[1].value == "yield"
1640             )
1641         ):
1642             node.children[0].value = ""
1643             node.children[2].value = ""
1644         yield from super().visit_default(node)
1645
1646     def visit_INDENT(self, node: Node) -> Iterator[Line]:
1647         """Increase indentation level, maybe yield a line."""
1648         # In blib2to3 INDENT never holds comments.
1649         yield from self.line(+1)
1650         yield from self.visit_default(node)
1651
1652     def visit_DEDENT(self, node: Node) -> Iterator[Line]:
1653         """Decrease indentation level, maybe yield a line."""
1654         # The current line might still wait for trailing comments.  At DEDENT time
1655         # there won't be any (they would be prefixes on the preceding NEWLINE).
1656         # Emit the line then.
1657         yield from self.line()
1658
1659         # While DEDENT has no value, its prefix may contain standalone comments
1660         # that belong to the current indentation level.  Get 'em.
1661         yield from self.visit_default(node)
1662
1663         # Finally, emit the dedent.
1664         yield from self.line(-1)
1665
1666     def visit_stmt(
1667         self, node: Node, keywords: Set[str], parens: Set[str]
1668     ) -> Iterator[Line]:
1669         """Visit a statement.
1670
1671         This implementation is shared for `if`, `while`, `for`, `try`, `except`,
1672         `def`, `with`, `class`, `assert` and assignments.
1673
1674         The relevant Python language `keywords` for a given statement will be
1675         NAME leaves within it. This methods puts those on a separate line.
1676
1677         `parens` holds a set of string leaf values immediately after which
1678         invisible parens should be put.
1679         """
1680         normalize_invisible_parens(node, parens_after=parens)
1681         for child in node.children:
1682             if child.type == token.NAME and child.value in keywords:  # type: ignore
1683                 yield from self.line()
1684
1685             yield from self.visit(child)
1686
1687     def visit_suite(self, node: Node) -> Iterator[Line]:
1688         """Visit a suite."""
1689         if self.is_pyi and is_stub_suite(node):
1690             yield from self.visit(node.children[2])
1691         else:
1692             yield from self.visit_default(node)
1693
1694     def visit_simple_stmt(self, node: Node) -> Iterator[Line]:
1695         """Visit a statement without nested statements."""
1696         is_suite_like = node.parent and node.parent.type in STATEMENT
1697         if is_suite_like:
1698             if self.is_pyi and is_stub_body(node):
1699                 yield from self.visit_default(node)
1700             else:
1701                 yield from self.line(+1)
1702                 yield from self.visit_default(node)
1703                 yield from self.line(-1)
1704
1705         else:
1706             if not self.is_pyi or not node.parent or not is_stub_suite(node.parent):
1707                 yield from self.line()
1708             yield from self.visit_default(node)
1709
1710     def visit_async_stmt(self, node: Node) -> Iterator[Line]:
1711         """Visit `async def`, `async for`, `async with`."""
1712         yield from self.line()
1713
1714         children = iter(node.children)
1715         for child in children:
1716             yield from self.visit(child)
1717
1718             if child.type == token.ASYNC:
1719                 break
1720
1721         internal_stmt = next(children)
1722         for child in internal_stmt.children:
1723             yield from self.visit(child)
1724
1725     def visit_decorators(self, node: Node) -> Iterator[Line]:
1726         """Visit decorators."""
1727         for child in node.children:
1728             yield from self.line()
1729             yield from self.visit(child)
1730
1731     def visit_SEMI(self, leaf: Leaf) -> Iterator[Line]:
1732         """Remove a semicolon and put the other statement on a separate line."""
1733         yield from self.line()
1734
1735     def visit_ENDMARKER(self, leaf: Leaf) -> Iterator[Line]:
1736         """End of file. Process outstanding comments and end with a newline."""
1737         yield from self.visit_default(leaf)
1738         yield from self.line()
1739
1740     def visit_STANDALONE_COMMENT(self, leaf: Leaf) -> Iterator[Line]:
1741         if not self.current_line.bracket_tracker.any_open_brackets():
1742             yield from self.line()
1743         yield from self.visit_default(leaf)
1744
1745     def __attrs_post_init__(self) -> None:
1746         """You are in a twisty little maze of passages."""
1747         v = self.visit_stmt
1748         Ø: Set[str] = set()
1749         self.visit_assert_stmt = partial(v, keywords={"assert"}, parens={"assert", ","})
1750         self.visit_if_stmt = partial(
1751             v, keywords={"if", "else", "elif"}, parens={"if", "elif"}
1752         )
1753         self.visit_while_stmt = partial(v, keywords={"while", "else"}, parens={"while"})
1754         self.visit_for_stmt = partial(v, keywords={"for", "else"}, parens={"for", "in"})
1755         self.visit_try_stmt = partial(
1756             v, keywords={"try", "except", "else", "finally"}, parens=Ø
1757         )
1758         self.visit_except_clause = partial(v, keywords={"except"}, parens=Ø)
1759         self.visit_with_stmt = partial(v, keywords={"with"}, parens=Ø)
1760         self.visit_funcdef = partial(v, keywords={"def"}, parens=Ø)
1761         self.visit_classdef = partial(v, keywords={"class"}, parens=Ø)
1762         self.visit_expr_stmt = partial(v, keywords=Ø, parens=ASSIGNMENTS)
1763         self.visit_return_stmt = partial(v, keywords={"return"}, parens={"return"})
1764         self.visit_import_from = partial(v, keywords=Ø, parens={"import"})
1765         self.visit_del_stmt = partial(v, keywords=Ø, parens={"del"})
1766         self.visit_async_funcdef = self.visit_async_stmt
1767         self.visit_decorated = self.visit_decorators
1768
1769
1770 IMPLICIT_TUPLE = {syms.testlist, syms.testlist_star_expr, syms.exprlist}
1771 BRACKET = {token.LPAR: token.RPAR, token.LSQB: token.RSQB, token.LBRACE: token.RBRACE}
1772 OPENING_BRACKETS = set(BRACKET.keys())
1773 CLOSING_BRACKETS = set(BRACKET.values())
1774 BRACKETS = OPENING_BRACKETS | CLOSING_BRACKETS
1775 ALWAYS_NO_SPACE = CLOSING_BRACKETS | {token.COMMA, STANDALONE_COMMENT}
1776
1777
1778 def whitespace(leaf: Leaf, *, complex_subscript: bool) -> str:  # noqa: C901
1779     """Return whitespace prefix if needed for the given `leaf`.
1780
1781     `complex_subscript` signals whether the given leaf is part of a subscription
1782     which has non-trivial arguments, like arithmetic expressions or function calls.
1783     """
1784     NO = ""
1785     SPACE = " "
1786     DOUBLESPACE = "  "
1787     t = leaf.type
1788     p = leaf.parent
1789     v = leaf.value
1790     if t in ALWAYS_NO_SPACE:
1791         return NO
1792
1793     if t == token.COMMENT:
1794         return DOUBLESPACE
1795
1796     assert p is not None, f"INTERNAL ERROR: hand-made leaf without parent: {leaf!r}"
1797     if t == token.COLON and p.type not in {
1798         syms.subscript,
1799         syms.subscriptlist,
1800         syms.sliceop,
1801     }:
1802         return NO
1803
1804     prev = leaf.prev_sibling
1805     if not prev:
1806         prevp = preceding_leaf(p)
1807         if not prevp or prevp.type in OPENING_BRACKETS:
1808             return NO
1809
1810         if t == token.COLON:
1811             if prevp.type == token.COLON:
1812                 return NO
1813
1814             elif prevp.type != token.COMMA and not complex_subscript:
1815                 return NO
1816
1817             return SPACE
1818
1819         if prevp.type == token.EQUAL:
1820             if prevp.parent:
1821                 if prevp.parent.type in {
1822                     syms.arglist,
1823                     syms.argument,
1824                     syms.parameters,
1825                     syms.varargslist,
1826                 }:
1827                     return NO
1828
1829                 elif prevp.parent.type == syms.typedargslist:
1830                     # A bit hacky: if the equal sign has whitespace, it means we
1831                     # previously found it's a typed argument.  So, we're using
1832                     # that, too.
1833                     return prevp.prefix
1834
1835         elif prevp.type in STARS:
1836             if is_vararg(prevp, within=VARARGS_PARENTS | UNPACKING_PARENTS):
1837                 return NO
1838
1839         elif prevp.type == token.COLON:
1840             if prevp.parent and prevp.parent.type in {syms.subscript, syms.sliceop}:
1841                 return SPACE if complex_subscript else NO
1842
1843         elif (
1844             prevp.parent
1845             and prevp.parent.type == syms.factor
1846             and prevp.type in MATH_OPERATORS
1847         ):
1848             return NO
1849
1850         elif (
1851             prevp.type == token.RIGHTSHIFT
1852             and prevp.parent
1853             and prevp.parent.type == syms.shift_expr
1854             and prevp.prev_sibling
1855             and prevp.prev_sibling.type == token.NAME
1856             and prevp.prev_sibling.value == "print"  # type: ignore
1857         ):
1858             # Python 2 print chevron
1859             return NO
1860
1861     elif prev.type in OPENING_BRACKETS:
1862         return NO
1863
1864     if p.type in {syms.parameters, syms.arglist}:
1865         # untyped function signatures or calls
1866         if not prev or prev.type != token.COMMA:
1867             return NO
1868
1869     elif p.type == syms.varargslist:
1870         # lambdas
1871         if prev and prev.type != token.COMMA:
1872             return NO
1873
1874     elif p.type == syms.typedargslist:
1875         # typed function signatures
1876         if not prev:
1877             return NO
1878
1879         if t == token.EQUAL:
1880             if prev.type != syms.tname:
1881                 return NO
1882
1883         elif prev.type == token.EQUAL:
1884             # A bit hacky: if the equal sign has whitespace, it means we
1885             # previously found it's a typed argument.  So, we're using that, too.
1886             return prev.prefix
1887
1888         elif prev.type != token.COMMA:
1889             return NO
1890
1891     elif p.type == syms.tname:
1892         # type names
1893         if not prev:
1894             prevp = preceding_leaf(p)
1895             if not prevp or prevp.type != token.COMMA:
1896                 return NO
1897
1898     elif p.type == syms.trailer:
1899         # attributes and calls
1900         if t == token.LPAR or t == token.RPAR:
1901             return NO
1902
1903         if not prev:
1904             if t == token.DOT:
1905                 prevp = preceding_leaf(p)
1906                 if not prevp or prevp.type != token.NUMBER:
1907                     return NO
1908
1909             elif t == token.LSQB:
1910                 return NO
1911
1912         elif prev.type != token.COMMA:
1913             return NO
1914
1915     elif p.type == syms.argument:
1916         # single argument
1917         if t == token.EQUAL:
1918             return NO
1919
1920         if not prev:
1921             prevp = preceding_leaf(p)
1922             if not prevp or prevp.type == token.LPAR:
1923                 return NO
1924
1925         elif prev.type in {token.EQUAL} | STARS:
1926             return NO
1927
1928     elif p.type == syms.decorator:
1929         # decorators
1930         return NO
1931
1932     elif p.type == syms.dotted_name:
1933         if prev:
1934             return NO
1935
1936         prevp = preceding_leaf(p)
1937         if not prevp or prevp.type == token.AT or prevp.type == token.DOT:
1938             return NO
1939
1940     elif p.type == syms.classdef:
1941         if t == token.LPAR:
1942             return NO
1943
1944         if prev and prev.type == token.LPAR:
1945             return NO
1946
1947     elif p.type in {syms.subscript, syms.sliceop}:
1948         # indexing
1949         if not prev:
1950             assert p.parent is not None, "subscripts are always parented"
1951             if p.parent.type == syms.subscriptlist:
1952                 return SPACE
1953
1954             return NO
1955
1956         elif not complex_subscript:
1957             return NO
1958
1959     elif p.type == syms.atom:
1960         if prev and t == token.DOT:
1961             # dots, but not the first one.
1962             return NO
1963
1964     elif p.type == syms.dictsetmaker:
1965         # dict unpacking
1966         if prev and prev.type == token.DOUBLESTAR:
1967             return NO
1968
1969     elif p.type in {syms.factor, syms.star_expr}:
1970         # unary ops
1971         if not prev:
1972             prevp = preceding_leaf(p)
1973             if not prevp or prevp.type in OPENING_BRACKETS:
1974                 return NO
1975
1976             prevp_parent = prevp.parent
1977             assert prevp_parent is not None
1978             if prevp.type == token.COLON and prevp_parent.type in {
1979                 syms.subscript,
1980                 syms.sliceop,
1981             }:
1982                 return NO
1983
1984             elif prevp.type == token.EQUAL and prevp_parent.type == syms.argument:
1985                 return NO
1986
1987         elif t in {token.NAME, token.NUMBER, token.STRING}:
1988             return NO
1989
1990     elif p.type == syms.import_from:
1991         if t == token.DOT:
1992             if prev and prev.type == token.DOT:
1993                 return NO
1994
1995         elif t == token.NAME:
1996             if v == "import":
1997                 return SPACE
1998
1999             if prev and prev.type == token.DOT:
2000                 return NO
2001
2002     elif p.type == syms.sliceop:
2003         return NO
2004
2005     return SPACE
2006
2007
2008 def preceding_leaf(node: Optional[LN]) -> Optional[Leaf]:
2009     """Return the first leaf that precedes `node`, if any."""
2010     while node:
2011         res = node.prev_sibling
2012         if res:
2013             if isinstance(res, Leaf):
2014                 return res
2015
2016             try:
2017                 return list(res.leaves())[-1]
2018
2019             except IndexError:
2020                 return None
2021
2022         node = node.parent
2023     return None
2024
2025
2026 def child_towards(ancestor: Node, descendant: LN) -> Optional[LN]:
2027     """Return the child of `ancestor` that contains `descendant`."""
2028     node: Optional[LN] = descendant
2029     while node and node.parent != ancestor:
2030         node = node.parent
2031     return node
2032
2033
2034 def container_of(leaf: Leaf) -> LN:
2035     """Return `leaf` or one of its ancestors that is the topmost container of it.
2036
2037     By "container" we mean a node where `leaf` is the very first child.
2038     """
2039     same_prefix = leaf.prefix
2040     container: LN = leaf
2041     while container:
2042         parent = container.parent
2043         if parent is None:
2044             break
2045
2046         if parent.children[0].prefix != same_prefix:
2047             break
2048
2049         if parent.type == syms.file_input:
2050             break
2051
2052         if parent.prev_sibling is not None and parent.prev_sibling.type in BRACKETS:
2053             break
2054
2055         container = parent
2056     return container
2057
2058
2059 def is_split_after_delimiter(leaf: Leaf, previous: Optional[Leaf] = None) -> Priority:
2060     """Return the priority of the `leaf` delimiter, given a line break after it.
2061
2062     The delimiter priorities returned here are from those delimiters that would
2063     cause a line break after themselves.
2064
2065     Higher numbers are higher priority.
2066     """
2067     if leaf.type == token.COMMA:
2068         return COMMA_PRIORITY
2069
2070     return 0
2071
2072
2073 def is_split_before_delimiter(leaf: Leaf, previous: Optional[Leaf] = None) -> Priority:
2074     """Return the priority of the `leaf` delimiter, given a line break before it.
2075
2076     The delimiter priorities returned here are from those delimiters that would
2077     cause a line break before themselves.
2078
2079     Higher numbers are higher priority.
2080     """
2081     if is_vararg(leaf, within=VARARGS_PARENTS | UNPACKING_PARENTS):
2082         # * and ** might also be MATH_OPERATORS but in this case they are not.
2083         # Don't treat them as a delimiter.
2084         return 0
2085
2086     if (
2087         leaf.type == token.DOT
2088         and leaf.parent
2089         and leaf.parent.type not in {syms.import_from, syms.dotted_name}
2090         and (previous is None or previous.type in CLOSING_BRACKETS)
2091     ):
2092         return DOT_PRIORITY
2093
2094     if (
2095         leaf.type in MATH_OPERATORS
2096         and leaf.parent
2097         and leaf.parent.type not in {syms.factor, syms.star_expr}
2098     ):
2099         return MATH_PRIORITIES[leaf.type]
2100
2101     if leaf.type in COMPARATORS:
2102         return COMPARATOR_PRIORITY
2103
2104     if (
2105         leaf.type == token.STRING
2106         and previous is not None
2107         and previous.type == token.STRING
2108     ):
2109         return STRING_PRIORITY
2110
2111     if leaf.type not in {token.NAME, token.ASYNC}:
2112         return 0
2113
2114     if (
2115         leaf.value == "for"
2116         and leaf.parent
2117         and leaf.parent.type in {syms.comp_for, syms.old_comp_for}
2118         or leaf.type == token.ASYNC
2119     ):
2120         if (
2121             not isinstance(leaf.prev_sibling, Leaf)
2122             or leaf.prev_sibling.value != "async"
2123         ):
2124             return COMPREHENSION_PRIORITY
2125
2126     if (
2127         leaf.value == "if"
2128         and leaf.parent
2129         and leaf.parent.type in {syms.comp_if, syms.old_comp_if}
2130     ):
2131         return COMPREHENSION_PRIORITY
2132
2133     if leaf.value in {"if", "else"} and leaf.parent and leaf.parent.type == syms.test:
2134         return TERNARY_PRIORITY
2135
2136     if leaf.value == "is":
2137         return COMPARATOR_PRIORITY
2138
2139     if (
2140         leaf.value == "in"
2141         and leaf.parent
2142         and leaf.parent.type in {syms.comp_op, syms.comparison}
2143         and not (
2144             previous is not None
2145             and previous.type == token.NAME
2146             and previous.value == "not"
2147         )
2148     ):
2149         return COMPARATOR_PRIORITY
2150
2151     if (
2152         leaf.value == "not"
2153         and leaf.parent
2154         and leaf.parent.type == syms.comp_op
2155         and not (
2156             previous is not None
2157             and previous.type == token.NAME
2158             and previous.value == "is"
2159         )
2160     ):
2161         return COMPARATOR_PRIORITY
2162
2163     if leaf.value in LOGIC_OPERATORS and leaf.parent:
2164         return LOGIC_PRIORITY
2165
2166     return 0
2167
2168
2169 FMT_OFF = {"# fmt: off", "# fmt:off", "# yapf: disable"}
2170 FMT_ON = {"# fmt: on", "# fmt:on", "# yapf: enable"}
2171
2172
2173 def generate_comments(leaf: LN) -> Iterator[Leaf]:
2174     """Clean the prefix of the `leaf` and generate comments from it, if any.
2175
2176     Comments in lib2to3 are shoved into the whitespace prefix.  This happens
2177     in `pgen2/driver.py:Driver.parse_tokens()`.  This was a brilliant implementation
2178     move because it does away with modifying the grammar to include all the
2179     possible places in which comments can be placed.
2180
2181     The sad consequence for us though is that comments don't "belong" anywhere.
2182     This is why this function generates simple parentless Leaf objects for
2183     comments.  We simply don't know what the correct parent should be.
2184
2185     No matter though, we can live without this.  We really only need to
2186     differentiate between inline and standalone comments.  The latter don't
2187     share the line with any code.
2188
2189     Inline comments are emitted as regular token.COMMENT leaves.  Standalone
2190     are emitted with a fake STANDALONE_COMMENT token identifier.
2191     """
2192     for pc in list_comments(leaf.prefix, is_endmarker=leaf.type == token.ENDMARKER):
2193         yield Leaf(pc.type, pc.value, prefix="\n" * pc.newlines)
2194
2195
2196 @dataclass
2197 class ProtoComment:
2198     """Describes a piece of syntax that is a comment.
2199
2200     It's not a :class:`blib2to3.pytree.Leaf` so that:
2201
2202     * it can be cached (`Leaf` objects should not be reused more than once as
2203       they store their lineno, column, prefix, and parent information);
2204     * `newlines` and `consumed` fields are kept separate from the `value`. This
2205       simplifies handling of special marker comments like ``# fmt: off/on``.
2206     """
2207
2208     type: int  # token.COMMENT or STANDALONE_COMMENT
2209     value: str  # content of the comment
2210     newlines: int  # how many newlines before the comment
2211     consumed: int  # how many characters of the original leaf's prefix did we consume
2212
2213
2214 @lru_cache(maxsize=4096)
2215 def list_comments(prefix: str, *, is_endmarker: bool) -> List[ProtoComment]:
2216     """Return a list of :class:`ProtoComment` objects parsed from the given `prefix`."""
2217     result: List[ProtoComment] = []
2218     if not prefix or "#" not in prefix:
2219         return result
2220
2221     consumed = 0
2222     nlines = 0
2223     ignored_lines = 0
2224     for index, line in enumerate(prefix.split("\n")):
2225         consumed += len(line) + 1  # adding the length of the split '\n'
2226         line = line.lstrip()
2227         if not line:
2228             nlines += 1
2229         if not line.startswith("#"):
2230             # Escaped newlines outside of a comment are not really newlines at
2231             # all. We treat a single-line comment following an escaped newline
2232             # as a simple trailing comment.
2233             if line.endswith("\\"):
2234                 ignored_lines += 1
2235             continue
2236
2237         if index == ignored_lines and not is_endmarker:
2238             comment_type = token.COMMENT  # simple trailing comment
2239         else:
2240             comment_type = STANDALONE_COMMENT
2241         comment = make_comment(line)
2242         result.append(
2243             ProtoComment(
2244                 type=comment_type, value=comment, newlines=nlines, consumed=consumed
2245             )
2246         )
2247         nlines = 0
2248     return result
2249
2250
2251 def make_comment(content: str) -> str:
2252     """Return a consistently formatted comment from the given `content` string.
2253
2254     All comments (except for "##", "#!", "#:", '#'", "#%%") should have a single
2255     space between the hash sign and the content.
2256
2257     If `content` didn't start with a hash sign, one is provided.
2258     """
2259     content = content.rstrip()
2260     if not content:
2261         return "#"
2262
2263     if content[0] == "#":
2264         content = content[1:]
2265     if content and content[0] not in " !:#'%":
2266         content = " " + content
2267     return "#" + content
2268
2269
2270 def split_line(
2271     line: Line,
2272     line_length: int,
2273     inner: bool = False,
2274     features: Collection[Feature] = (),
2275 ) -> Iterator[Line]:
2276     """Split a `line` into potentially many lines.
2277
2278     They should fit in the allotted `line_length` but might not be able to.
2279     `inner` signifies that there were a pair of brackets somewhere around the
2280     current `line`, possibly transitively. This means we can fallback to splitting
2281     by delimiters if the LHS/RHS don't yield any results.
2282
2283     `features` are syntactical features that may be used in the output.
2284     """
2285     if line.is_comment:
2286         yield line
2287         return
2288
2289     line_str = str(line).strip("\n")
2290
2291     if (
2292         not line.contains_inner_type_comments()
2293         and not line.should_explode
2294         and is_line_short_enough(line, line_length=line_length, line_str=line_str)
2295     ):
2296         yield line
2297         return
2298
2299     split_funcs: List[SplitFunc]
2300     if line.is_def:
2301         split_funcs = [left_hand_split]
2302     else:
2303
2304         def rhs(line: Line, features: Collection[Feature]) -> Iterator[Line]:
2305             for omit in generate_trailers_to_omit(line, line_length):
2306                 lines = list(right_hand_split(line, line_length, features, omit=omit))
2307                 if is_line_short_enough(lines[0], line_length=line_length):
2308                     yield from lines
2309                     return
2310
2311             # All splits failed, best effort split with no omits.
2312             # This mostly happens to multiline strings that are by definition
2313             # reported as not fitting a single line.
2314             yield from right_hand_split(line, line_length, features=features)
2315
2316         if line.inside_brackets:
2317             split_funcs = [delimiter_split, standalone_comment_split, rhs]
2318         else:
2319             split_funcs = [rhs]
2320     for split_func in split_funcs:
2321         # We are accumulating lines in `result` because we might want to abort
2322         # mission and return the original line in the end, or attempt a different
2323         # split altogether.
2324         result: List[Line] = []
2325         try:
2326             for l in split_func(line, features):
2327                 if str(l).strip("\n") == line_str:
2328                     raise CannotSplit("Split function returned an unchanged result")
2329
2330                 result.extend(
2331                     split_line(
2332                         l, line_length=line_length, inner=True, features=features
2333                     )
2334                 )
2335         except CannotSplit:
2336             continue
2337
2338         else:
2339             yield from result
2340             break
2341
2342     else:
2343         yield line
2344
2345
2346 def left_hand_split(line: Line, features: Collection[Feature] = ()) -> Iterator[Line]:
2347     """Split line into many lines, starting with the first matching bracket pair.
2348
2349     Note: this usually looks weird, only use this for function definitions.
2350     Prefer RHS otherwise.  This is why this function is not symmetrical with
2351     :func:`right_hand_split` which also handles optional parentheses.
2352     """
2353     tail_leaves: List[Leaf] = []
2354     body_leaves: List[Leaf] = []
2355     head_leaves: List[Leaf] = []
2356     current_leaves = head_leaves
2357     matching_bracket = None
2358     for leaf in line.leaves:
2359         if (
2360             current_leaves is body_leaves
2361             and leaf.type in CLOSING_BRACKETS
2362             and leaf.opening_bracket is matching_bracket
2363         ):
2364             current_leaves = tail_leaves if body_leaves else head_leaves
2365         current_leaves.append(leaf)
2366         if current_leaves is head_leaves:
2367             if leaf.type in OPENING_BRACKETS:
2368                 matching_bracket = leaf
2369                 current_leaves = body_leaves
2370     if not matching_bracket:
2371         raise CannotSplit("No brackets found")
2372
2373     head = bracket_split_build_line(head_leaves, line, matching_bracket)
2374     body = bracket_split_build_line(body_leaves, line, matching_bracket, is_body=True)
2375     tail = bracket_split_build_line(tail_leaves, line, matching_bracket)
2376     bracket_split_succeeded_or_raise(head, body, tail)
2377     for result in (head, body, tail):
2378         if result:
2379             yield result
2380
2381
2382 def right_hand_split(
2383     line: Line,
2384     line_length: int,
2385     features: Collection[Feature] = (),
2386     omit: Collection[LeafID] = (),
2387 ) -> Iterator[Line]:
2388     """Split line into many lines, starting with the last matching bracket pair.
2389
2390     If the split was by optional parentheses, attempt splitting without them, too.
2391     `omit` is a collection of closing bracket IDs that shouldn't be considered for
2392     this split.
2393
2394     Note: running this function modifies `bracket_depth` on the leaves of `line`.
2395     """
2396     tail_leaves: List[Leaf] = []
2397     body_leaves: List[Leaf] = []
2398     head_leaves: List[Leaf] = []
2399     current_leaves = tail_leaves
2400     opening_bracket = None
2401     closing_bracket = None
2402     for leaf in reversed(line.leaves):
2403         if current_leaves is body_leaves:
2404             if leaf is opening_bracket:
2405                 current_leaves = head_leaves if body_leaves else tail_leaves
2406         current_leaves.append(leaf)
2407         if current_leaves is tail_leaves:
2408             if leaf.type in CLOSING_BRACKETS and id(leaf) not in omit:
2409                 opening_bracket = leaf.opening_bracket
2410                 closing_bracket = leaf
2411                 current_leaves = body_leaves
2412     if not (opening_bracket and closing_bracket and head_leaves):
2413         # If there is no opening or closing_bracket that means the split failed and
2414         # all content is in the tail.  Otherwise, if `head_leaves` are empty, it means
2415         # the matching `opening_bracket` wasn't available on `line` anymore.
2416         raise CannotSplit("No brackets found")
2417
2418     tail_leaves.reverse()
2419     body_leaves.reverse()
2420     head_leaves.reverse()
2421     head = bracket_split_build_line(head_leaves, line, opening_bracket)
2422     body = bracket_split_build_line(body_leaves, line, opening_bracket, is_body=True)
2423     tail = bracket_split_build_line(tail_leaves, line, opening_bracket)
2424     bracket_split_succeeded_or_raise(head, body, tail)
2425     if (
2426         # the body shouldn't be exploded
2427         not body.should_explode
2428         # the opening bracket is an optional paren
2429         and opening_bracket.type == token.LPAR
2430         and not opening_bracket.value
2431         # the closing bracket is an optional paren
2432         and closing_bracket.type == token.RPAR
2433         and not closing_bracket.value
2434         # it's not an import (optional parens are the only thing we can split on
2435         # in this case; attempting a split without them is a waste of time)
2436         and not line.is_import
2437         # there are no standalone comments in the body
2438         and not body.contains_standalone_comments(0)
2439         # and we can actually remove the parens
2440         and can_omit_invisible_parens(body, line_length)
2441     ):
2442         omit = {id(closing_bracket), *omit}
2443         try:
2444             yield from right_hand_split(line, line_length, features=features, omit=omit)
2445             return
2446
2447         except CannotSplit:
2448             if not (
2449                 can_be_split(body)
2450                 or is_line_short_enough(body, line_length=line_length)
2451             ):
2452                 raise CannotSplit(
2453                     "Splitting failed, body is still too long and can't be split."
2454                 )
2455
2456             elif head.contains_multiline_strings() or tail.contains_multiline_strings():
2457                 raise CannotSplit(
2458                     "The current optional pair of parentheses is bound to fail to "
2459                     "satisfy the splitting algorithm because the head or the tail "
2460                     "contains multiline strings which by definition never fit one "
2461                     "line."
2462                 )
2463
2464     ensure_visible(opening_bracket)
2465     ensure_visible(closing_bracket)
2466     for result in (head, body, tail):
2467         if result:
2468             yield result
2469
2470
2471 def bracket_split_succeeded_or_raise(head: Line, body: Line, tail: Line) -> None:
2472     """Raise :exc:`CannotSplit` if the last left- or right-hand split failed.
2473
2474     Do nothing otherwise.
2475
2476     A left- or right-hand split is based on a pair of brackets. Content before
2477     (and including) the opening bracket is left on one line, content inside the
2478     brackets is put on a separate line, and finally content starting with and
2479     following the closing bracket is put on a separate line.
2480
2481     Those are called `head`, `body`, and `tail`, respectively. If the split
2482     produced the same line (all content in `head`) or ended up with an empty `body`
2483     and the `tail` is just the closing bracket, then it's considered failed.
2484     """
2485     tail_len = len(str(tail).strip())
2486     if not body:
2487         if tail_len == 0:
2488             raise CannotSplit("Splitting brackets produced the same line")
2489
2490         elif tail_len < 3:
2491             raise CannotSplit(
2492                 f"Splitting brackets on an empty body to save "
2493                 f"{tail_len} characters is not worth it"
2494             )
2495
2496
2497 def bracket_split_build_line(
2498     leaves: List[Leaf], original: Line, opening_bracket: Leaf, *, is_body: bool = False
2499 ) -> Line:
2500     """Return a new line with given `leaves` and respective comments from `original`.
2501
2502     If `is_body` is True, the result line is one-indented inside brackets and as such
2503     has its first leaf's prefix normalized and a trailing comma added when expected.
2504     """
2505     result = Line(depth=original.depth)
2506     if is_body:
2507         result.inside_brackets = True
2508         result.depth += 1
2509         if leaves:
2510             # Since body is a new indent level, remove spurious leading whitespace.
2511             normalize_prefix(leaves[0], inside_brackets=True)
2512             # Ensure a trailing comma for imports and standalone function arguments, but
2513             # be careful not to add one after any comments.
2514             no_commas = original.is_def and not any(
2515                 l.type == token.COMMA for l in leaves
2516             )
2517
2518             if original.is_import or no_commas:
2519                 for i in range(len(leaves) - 1, -1, -1):
2520                     if leaves[i].type == STANDALONE_COMMENT:
2521                         continue
2522                     elif leaves[i].type == token.COMMA:
2523                         break
2524                     else:
2525                         leaves.insert(i + 1, Leaf(token.COMMA, ","))
2526                         break
2527     # Populate the line
2528     for leaf in leaves:
2529         result.append(leaf, preformatted=True)
2530         for comment_after in original.comments_after(leaf):
2531             result.append(comment_after, preformatted=True)
2532     if is_body:
2533         result.should_explode = should_explode(result, opening_bracket)
2534     return result
2535
2536
2537 def dont_increase_indentation(split_func: SplitFunc) -> SplitFunc:
2538     """Normalize prefix of the first leaf in every line returned by `split_func`.
2539
2540     This is a decorator over relevant split functions.
2541     """
2542
2543     @wraps(split_func)
2544     def split_wrapper(line: Line, features: Collection[Feature] = ()) -> Iterator[Line]:
2545         for l in split_func(line, features):
2546             normalize_prefix(l.leaves[0], inside_brackets=True)
2547             yield l
2548
2549     return split_wrapper
2550
2551
2552 @dont_increase_indentation
2553 def delimiter_split(line: Line, features: Collection[Feature] = ()) -> Iterator[Line]:
2554     """Split according to delimiters of the highest priority.
2555
2556     If the appropriate Features are given, the split will add trailing commas
2557     also in function signatures and calls that contain `*` and `**`.
2558     """
2559     try:
2560         last_leaf = line.leaves[-1]
2561     except IndexError:
2562         raise CannotSplit("Line empty")
2563
2564     bt = line.bracket_tracker
2565     try:
2566         delimiter_priority = bt.max_delimiter_priority(exclude={id(last_leaf)})
2567     except ValueError:
2568         raise CannotSplit("No delimiters found")
2569
2570     if delimiter_priority == DOT_PRIORITY:
2571         if bt.delimiter_count_with_priority(delimiter_priority) == 1:
2572             raise CannotSplit("Splitting a single attribute from its owner looks wrong")
2573
2574     current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2575     lowest_depth = sys.maxsize
2576     trailing_comma_safe = True
2577
2578     def append_to_line(leaf: Leaf) -> Iterator[Line]:
2579         """Append `leaf` to current line or to new line if appending impossible."""
2580         nonlocal current_line
2581         try:
2582             current_line.append_safe(leaf, preformatted=True)
2583         except ValueError:
2584             yield current_line
2585
2586             current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2587             current_line.append(leaf)
2588
2589     for leaf in line.leaves:
2590         yield from append_to_line(leaf)
2591
2592         for comment_after in line.comments_after(leaf):
2593             yield from append_to_line(comment_after)
2594
2595         lowest_depth = min(lowest_depth, leaf.bracket_depth)
2596         if leaf.bracket_depth == lowest_depth:
2597             if is_vararg(leaf, within={syms.typedargslist}):
2598                 trailing_comma_safe = (
2599                     trailing_comma_safe and Feature.TRAILING_COMMA_IN_DEF in features
2600                 )
2601             elif is_vararg(leaf, within={syms.arglist, syms.argument}):
2602                 trailing_comma_safe = (
2603                     trailing_comma_safe and Feature.TRAILING_COMMA_IN_CALL in features
2604                 )
2605
2606         leaf_priority = bt.delimiters.get(id(leaf))
2607         if leaf_priority == delimiter_priority:
2608             yield current_line
2609
2610             current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2611     if current_line:
2612         if (
2613             trailing_comma_safe
2614             and delimiter_priority == COMMA_PRIORITY
2615             and current_line.leaves[-1].type != token.COMMA
2616             and current_line.leaves[-1].type != STANDALONE_COMMENT
2617         ):
2618             current_line.append(Leaf(token.COMMA, ","))
2619         yield current_line
2620
2621
2622 @dont_increase_indentation
2623 def standalone_comment_split(
2624     line: Line, features: Collection[Feature] = ()
2625 ) -> Iterator[Line]:
2626     """Split standalone comments from the rest of the line."""
2627     if not line.contains_standalone_comments(0):
2628         raise CannotSplit("Line does not have any standalone comments")
2629
2630     current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2631
2632     def append_to_line(leaf: Leaf) -> Iterator[Line]:
2633         """Append `leaf` to current line or to new line if appending impossible."""
2634         nonlocal current_line
2635         try:
2636             current_line.append_safe(leaf, preformatted=True)
2637         except ValueError:
2638             yield current_line
2639
2640             current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2641             current_line.append(leaf)
2642
2643     for leaf in line.leaves:
2644         yield from append_to_line(leaf)
2645
2646         for comment_after in line.comments_after(leaf):
2647             yield from append_to_line(comment_after)
2648
2649     if current_line:
2650         yield current_line
2651
2652
2653 def is_import(leaf: Leaf) -> bool:
2654     """Return True if the given leaf starts an import statement."""
2655     p = leaf.parent
2656     t = leaf.type
2657     v = leaf.value
2658     return bool(
2659         t == token.NAME
2660         and (
2661             (v == "import" and p and p.type == syms.import_name)
2662             or (v == "from" and p and p.type == syms.import_from)
2663         )
2664     )
2665
2666
2667 def is_type_comment(leaf: Leaf) -> bool:
2668     """Return True if the given leaf is a special comment.
2669     Only returns true for type comments for now."""
2670     t = leaf.type
2671     v = leaf.value
2672     return t in {token.COMMENT, t == STANDALONE_COMMENT} and v.startswith("# type:")
2673
2674
2675 def normalize_prefix(leaf: Leaf, *, inside_brackets: bool) -> None:
2676     """Leave existing extra newlines if not `inside_brackets`. Remove everything
2677     else.
2678
2679     Note: don't use backslashes for formatting or you'll lose your voting rights.
2680     """
2681     if not inside_brackets:
2682         spl = leaf.prefix.split("#")
2683         if "\\" not in spl[0]:
2684             nl_count = spl[-1].count("\n")
2685             if len(spl) > 1:
2686                 nl_count -= 1
2687             leaf.prefix = "\n" * nl_count
2688             return
2689
2690     leaf.prefix = ""
2691
2692
2693 def normalize_string_prefix(leaf: Leaf, remove_u_prefix: bool = False) -> None:
2694     """Make all string prefixes lowercase.
2695
2696     If remove_u_prefix is given, also removes any u prefix from the string.
2697
2698     Note: Mutates its argument.
2699     """
2700     match = re.match(r"^([furbFURB]*)(.*)$", leaf.value, re.DOTALL)
2701     assert match is not None, f"failed to match string {leaf.value!r}"
2702     orig_prefix = match.group(1)
2703     new_prefix = orig_prefix.lower()
2704     if remove_u_prefix:
2705         new_prefix = new_prefix.replace("u", "")
2706     leaf.value = f"{new_prefix}{match.group(2)}"
2707
2708
2709 def normalize_string_quotes(leaf: Leaf) -> None:
2710     """Prefer double quotes but only if it doesn't cause more escaping.
2711
2712     Adds or removes backslashes as appropriate. Doesn't parse and fix
2713     strings nested in f-strings (yet).
2714
2715     Note: Mutates its argument.
2716     """
2717     value = leaf.value.lstrip("furbFURB")
2718     if value[:3] == '"""':
2719         return
2720
2721     elif value[:3] == "'''":
2722         orig_quote = "'''"
2723         new_quote = '"""'
2724     elif value[0] == '"':
2725         orig_quote = '"'
2726         new_quote = "'"
2727     else:
2728         orig_quote = "'"
2729         new_quote = '"'
2730     first_quote_pos = leaf.value.find(orig_quote)
2731     if first_quote_pos == -1:
2732         return  # There's an internal error
2733
2734     prefix = leaf.value[:first_quote_pos]
2735     unescaped_new_quote = re.compile(rf"(([^\\]|^)(\\\\)*){new_quote}")
2736     escaped_new_quote = re.compile(rf"([^\\]|^)\\((?:\\\\)*){new_quote}")
2737     escaped_orig_quote = re.compile(rf"([^\\]|^)\\((?:\\\\)*){orig_quote}")
2738     body = leaf.value[first_quote_pos + len(orig_quote) : -len(orig_quote)]
2739     if "r" in prefix.casefold():
2740         if unescaped_new_quote.search(body):
2741             # There's at least one unescaped new_quote in this raw string
2742             # so converting is impossible
2743             return
2744
2745         # Do not introduce or remove backslashes in raw strings
2746         new_body = body
2747     else:
2748         # remove unnecessary escapes
2749         new_body = sub_twice(escaped_new_quote, rf"\1\2{new_quote}", body)
2750         if body != new_body:
2751             # Consider the string without unnecessary escapes as the original
2752             body = new_body
2753             leaf.value = f"{prefix}{orig_quote}{body}{orig_quote}"
2754         new_body = sub_twice(escaped_orig_quote, rf"\1\2{orig_quote}", new_body)
2755         new_body = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_body)
2756     if "f" in prefix.casefold():
2757         matches = re.findall(
2758             r"""
2759             (?:[^{]|^)\{  # start of the string or a non-{ followed by a single {
2760                 ([^{].*?)  # contents of the brackets except if begins with {{
2761             \}(?:[^}]|$)  # A } followed by end of the string or a non-}
2762             """,
2763             new_body,
2764             re.VERBOSE,
2765         )
2766         for m in matches:
2767             if "\\" in str(m):
2768                 # Do not introduce backslashes in interpolated expressions
2769                 return
2770     if new_quote == '"""' and new_body[-1:] == '"':
2771         # edge case:
2772         new_body = new_body[:-1] + '\\"'
2773     orig_escape_count = body.count("\\")
2774     new_escape_count = new_body.count("\\")
2775     if new_escape_count > orig_escape_count:
2776         return  # Do not introduce more escaping
2777
2778     if new_escape_count == orig_escape_count and orig_quote == '"':
2779         return  # Prefer double quotes
2780
2781     leaf.value = f"{prefix}{new_quote}{new_body}{new_quote}"
2782
2783
2784 def normalize_numeric_literal(leaf: Leaf) -> None:
2785     """Normalizes numeric (float, int, and complex) literals.
2786
2787     All letters used in the representation are normalized to lowercase (except
2788     in Python 2 long literals).
2789     """
2790     text = leaf.value.lower()
2791     if text.startswith(("0o", "0b")):
2792         # Leave octal and binary literals alone.
2793         pass
2794     elif text.startswith("0x"):
2795         # Change hex literals to upper case.
2796         before, after = text[:2], text[2:]
2797         text = f"{before}{after.upper()}"
2798     elif "e" in text:
2799         before, after = text.split("e")
2800         sign = ""
2801         if after.startswith("-"):
2802             after = after[1:]
2803             sign = "-"
2804         elif after.startswith("+"):
2805             after = after[1:]
2806         before = format_float_or_int_string(before)
2807         text = f"{before}e{sign}{after}"
2808     elif text.endswith(("j", "l")):
2809         number = text[:-1]
2810         suffix = text[-1]
2811         # Capitalize in "2L" because "l" looks too similar to "1".
2812         if suffix == "l":
2813             suffix = "L"
2814         text = f"{format_float_or_int_string(number)}{suffix}"
2815     else:
2816         text = format_float_or_int_string(text)
2817     leaf.value = text
2818
2819
2820 def format_float_or_int_string(text: str) -> str:
2821     """Formats a float string like "1.0"."""
2822     if "." not in text:
2823         return text
2824
2825     before, after = text.split(".")
2826     return f"{before or 0}.{after or 0}"
2827
2828
2829 def normalize_invisible_parens(node: Node, parens_after: Set[str]) -> None:
2830     """Make existing optional parentheses invisible or create new ones.
2831
2832     `parens_after` is a set of string leaf values immediately after which parens
2833     should be put.
2834
2835     Standardizes on visible parentheses for single-element tuples, and keeps
2836     existing visible parentheses for other tuples and generator expressions.
2837     """
2838     for pc in list_comments(node.prefix, is_endmarker=False):
2839         if pc.value in FMT_OFF:
2840             # This `node` has a prefix with `# fmt: off`, don't mess with parens.
2841             return
2842
2843     check_lpar = False
2844     for index, child in enumerate(list(node.children)):
2845         # Add parentheses around long tuple unpacking in assignments.
2846         if (
2847             index == 0
2848             and isinstance(child, Node)
2849             and child.type == syms.testlist_star_expr
2850         ):
2851             check_lpar = True
2852
2853         if check_lpar:
2854             if child.type == syms.atom:
2855                 if maybe_make_parens_invisible_in_atom(child, parent=node):
2856                     lpar = Leaf(token.LPAR, "")
2857                     rpar = Leaf(token.RPAR, "")
2858                     index = child.remove() or 0
2859                     node.insert_child(index, Node(syms.atom, [lpar, child, rpar]))
2860             elif is_one_tuple(child):
2861                 # wrap child in visible parentheses
2862                 lpar = Leaf(token.LPAR, "(")
2863                 rpar = Leaf(token.RPAR, ")")
2864                 child.remove()
2865                 node.insert_child(index, Node(syms.atom, [lpar, child, rpar]))
2866             elif node.type == syms.import_from:
2867                 # "import from" nodes store parentheses directly as part of
2868                 # the statement
2869                 if child.type == token.LPAR:
2870                     # make parentheses invisible
2871                     child.value = ""  # type: ignore
2872                     node.children[-1].value = ""  # type: ignore
2873                 elif child.type != token.STAR:
2874                     # insert invisible parentheses
2875                     node.insert_child(index, Leaf(token.LPAR, ""))
2876                     node.append_child(Leaf(token.RPAR, ""))
2877                 break
2878
2879             elif not (isinstance(child, Leaf) and is_multiline_string(child)):
2880                 # wrap child in invisible parentheses
2881                 lpar = Leaf(token.LPAR, "")
2882                 rpar = Leaf(token.RPAR, "")
2883                 index = child.remove() or 0
2884                 prefix = child.prefix
2885                 child.prefix = ""
2886                 new_child = Node(syms.atom, [lpar, child, rpar])
2887                 new_child.prefix = prefix
2888                 node.insert_child(index, new_child)
2889
2890         check_lpar = isinstance(child, Leaf) and child.value in parens_after
2891
2892
2893 def normalize_fmt_off(node: Node) -> None:
2894     """Convert content between `# fmt: off`/`# fmt: on` into standalone comments."""
2895     try_again = True
2896     while try_again:
2897         try_again = convert_one_fmt_off_pair(node)
2898
2899
2900 def convert_one_fmt_off_pair(node: Node) -> bool:
2901     """Convert content of a single `# fmt: off`/`# fmt: on` into a standalone comment.
2902
2903     Returns True if a pair was converted.
2904     """
2905     for leaf in node.leaves():
2906         previous_consumed = 0
2907         for comment in list_comments(leaf.prefix, is_endmarker=False):
2908             if comment.value in FMT_OFF:
2909                 # We only want standalone comments. If there's no previous leaf or
2910                 # the previous leaf is indentation, it's a standalone comment in
2911                 # disguise.
2912                 if comment.type != STANDALONE_COMMENT:
2913                     prev = preceding_leaf(leaf)
2914                     if prev and prev.type not in WHITESPACE:
2915                         continue
2916
2917                 ignored_nodes = list(generate_ignored_nodes(leaf))
2918                 if not ignored_nodes:
2919                     continue
2920
2921                 first = ignored_nodes[0]  # Can be a container node with the `leaf`.
2922                 parent = first.parent
2923                 prefix = first.prefix
2924                 first.prefix = prefix[comment.consumed :]
2925                 hidden_value = (
2926                     comment.value + "\n" + "".join(str(n) for n in ignored_nodes)
2927                 )
2928                 if hidden_value.endswith("\n"):
2929                     # That happens when one of the `ignored_nodes` ended with a NEWLINE
2930                     # leaf (possibly followed by a DEDENT).
2931                     hidden_value = hidden_value[:-1]
2932                 first_idx = None
2933                 for ignored in ignored_nodes:
2934                     index = ignored.remove()
2935                     if first_idx is None:
2936                         first_idx = index
2937                 assert parent is not None, "INTERNAL ERROR: fmt: on/off handling (1)"
2938                 assert first_idx is not None, "INTERNAL ERROR: fmt: on/off handling (2)"
2939                 parent.insert_child(
2940                     first_idx,
2941                     Leaf(
2942                         STANDALONE_COMMENT,
2943                         hidden_value,
2944                         prefix=prefix[:previous_consumed] + "\n" * comment.newlines,
2945                     ),
2946                 )
2947                 return True
2948
2949             previous_consumed = comment.consumed
2950
2951     return False
2952
2953
2954 def generate_ignored_nodes(leaf: Leaf) -> Iterator[LN]:
2955     """Starting from the container of `leaf`, generate all leaves until `# fmt: on`.
2956
2957     Stops at the end of the block.
2958     """
2959     container: Optional[LN] = container_of(leaf)
2960     while container is not None and container.type != token.ENDMARKER:
2961         for comment in list_comments(container.prefix, is_endmarker=False):
2962             if comment.value in FMT_ON:
2963                 return
2964
2965         yield container
2966
2967         container = container.next_sibling
2968
2969
2970 def maybe_make_parens_invisible_in_atom(node: LN, parent: LN) -> bool:
2971     """If it's safe, make the parens in the atom `node` invisible, recursively.
2972
2973     Returns whether the node should itself be wrapped in invisible parentheses.
2974
2975     """
2976     if (
2977         node.type != syms.atom
2978         or is_empty_tuple(node)
2979         or is_one_tuple(node)
2980         or (is_yield(node) and parent.type != syms.expr_stmt)
2981         or max_delimiter_priority_in_atom(node) >= COMMA_PRIORITY
2982     ):
2983         return False
2984
2985     first = node.children[0]
2986     last = node.children[-1]
2987     if first.type == token.LPAR and last.type == token.RPAR:
2988         # make parentheses invisible
2989         first.value = ""  # type: ignore
2990         last.value = ""  # type: ignore
2991         if len(node.children) > 1:
2992             maybe_make_parens_invisible_in_atom(node.children[1], parent=parent)
2993         return False
2994
2995     return True
2996
2997
2998 def is_empty_tuple(node: LN) -> bool:
2999     """Return True if `node` holds an empty tuple."""
3000     return (
3001         node.type == syms.atom
3002         and len(node.children) == 2
3003         and node.children[0].type == token.LPAR
3004         and node.children[1].type == token.RPAR
3005     )
3006
3007
3008 def is_one_tuple(node: LN) -> bool:
3009     """Return True if `node` holds a tuple with one element, with or without parens."""
3010     if node.type == syms.atom:
3011         if len(node.children) != 3:
3012             return False
3013
3014         lpar, gexp, rpar = node.children
3015         if not (
3016             lpar.type == token.LPAR
3017             and gexp.type == syms.testlist_gexp
3018             and rpar.type == token.RPAR
3019         ):
3020             return False
3021
3022         return len(gexp.children) == 2 and gexp.children[1].type == token.COMMA
3023
3024     return (
3025         node.type in IMPLICIT_TUPLE
3026         and len(node.children) == 2
3027         and node.children[1].type == token.COMMA
3028     )
3029
3030
3031 def is_yield(node: LN) -> bool:
3032     """Return True if `node` holds a `yield` or `yield from` expression."""
3033     if node.type == syms.yield_expr:
3034         return True
3035
3036     if node.type == token.NAME and node.value == "yield":  # type: ignore
3037         return True
3038
3039     if node.type != syms.atom:
3040         return False
3041
3042     if len(node.children) != 3:
3043         return False
3044
3045     lpar, expr, rpar = node.children
3046     if lpar.type == token.LPAR and rpar.type == token.RPAR:
3047         return is_yield(expr)
3048
3049     return False
3050
3051
3052 def is_vararg(leaf: Leaf, within: Set[NodeType]) -> bool:
3053     """Return True if `leaf` is a star or double star in a vararg or kwarg.
3054
3055     If `within` includes VARARGS_PARENTS, this applies to function signatures.
3056     If `within` includes UNPACKING_PARENTS, it applies to right hand-side
3057     extended iterable unpacking (PEP 3132) and additional unpacking
3058     generalizations (PEP 448).
3059     """
3060     if leaf.type not in STARS or not leaf.parent:
3061         return False
3062
3063     p = leaf.parent
3064     if p.type == syms.star_expr:
3065         # Star expressions are also used as assignment targets in extended
3066         # iterable unpacking (PEP 3132).  See what its parent is instead.
3067         if not p.parent:
3068             return False
3069
3070         p = p.parent
3071
3072     return p.type in within
3073
3074
3075 def is_multiline_string(leaf: Leaf) -> bool:
3076     """Return True if `leaf` is a multiline string that actually spans many lines."""
3077     value = leaf.value.lstrip("furbFURB")
3078     return value[:3] in {'"""', "'''"} and "\n" in value
3079
3080
3081 def is_stub_suite(node: Node) -> bool:
3082     """Return True if `node` is a suite with a stub body."""
3083     if (
3084         len(node.children) != 4
3085         or node.children[0].type != token.NEWLINE
3086         or node.children[1].type != token.INDENT
3087         or node.children[3].type != token.DEDENT
3088     ):
3089         return False
3090
3091     return is_stub_body(node.children[2])
3092
3093
3094 def is_stub_body(node: LN) -> bool:
3095     """Return True if `node` is a simple statement containing an ellipsis."""
3096     if not isinstance(node, Node) or node.type != syms.simple_stmt:
3097         return False
3098
3099     if len(node.children) != 2:
3100         return False
3101
3102     child = node.children[0]
3103     return (
3104         child.type == syms.atom
3105         and len(child.children) == 3
3106         and all(leaf == Leaf(token.DOT, ".") for leaf in child.children)
3107     )
3108
3109
3110 def max_delimiter_priority_in_atom(node: LN) -> Priority:
3111     """Return maximum delimiter priority inside `node`.
3112
3113     This is specific to atoms with contents contained in a pair of parentheses.
3114     If `node` isn't an atom or there are no enclosing parentheses, returns 0.
3115     """
3116     if node.type != syms.atom:
3117         return 0
3118
3119     first = node.children[0]
3120     last = node.children[-1]
3121     if not (first.type == token.LPAR and last.type == token.RPAR):
3122         return 0
3123
3124     bt = BracketTracker()
3125     for c in node.children[1:-1]:
3126         if isinstance(c, Leaf):
3127             bt.mark(c)
3128         else:
3129             for leaf in c.leaves():
3130                 bt.mark(leaf)
3131     try:
3132         return bt.max_delimiter_priority()
3133
3134     except ValueError:
3135         return 0
3136
3137
3138 def ensure_visible(leaf: Leaf) -> None:
3139     """Make sure parentheses are visible.
3140
3141     They could be invisible as part of some statements (see
3142     :func:`normalize_invible_parens` and :func:`visit_import_from`).
3143     """
3144     if leaf.type == token.LPAR:
3145         leaf.value = "("
3146     elif leaf.type == token.RPAR:
3147         leaf.value = ")"
3148
3149
3150 def should_explode(line: Line, opening_bracket: Leaf) -> bool:
3151     """Should `line` immediately be split with `delimiter_split()` after RHS?"""
3152
3153     if not (
3154         opening_bracket.parent
3155         and opening_bracket.parent.type in {syms.atom, syms.import_from}
3156         and opening_bracket.value in "[{("
3157     ):
3158         return False
3159
3160     try:
3161         last_leaf = line.leaves[-1]
3162         exclude = {id(last_leaf)} if last_leaf.type == token.COMMA else set()
3163         max_priority = line.bracket_tracker.max_delimiter_priority(exclude=exclude)
3164     except (IndexError, ValueError):
3165         return False
3166
3167     return max_priority == COMMA_PRIORITY
3168
3169
3170 def get_features_used(node: Node) -> Set[Feature]:
3171     """Return a set of (relatively) new Python features used in this file.
3172
3173     Currently looking for:
3174     - f-strings;
3175     - underscores in numeric literals; and
3176     - trailing commas after * or ** in function signatures and calls.
3177     """
3178     features: Set[Feature] = set()
3179     for n in node.pre_order():
3180         if n.type == token.STRING:
3181             value_head = n.value[:2]  # type: ignore
3182             if value_head in {'f"', 'F"', "f'", "F'", "rf", "fr", "RF", "FR"}:
3183                 features.add(Feature.F_STRINGS)
3184
3185         elif n.type == token.NUMBER:
3186             if "_" in n.value:  # type: ignore
3187                 features.add(Feature.NUMERIC_UNDERSCORES)
3188
3189         elif (
3190             n.type in {syms.typedargslist, syms.arglist}
3191             and n.children
3192             and n.children[-1].type == token.COMMA
3193         ):
3194             if n.type == syms.typedargslist:
3195                 feature = Feature.TRAILING_COMMA_IN_DEF
3196             else:
3197                 feature = Feature.TRAILING_COMMA_IN_CALL
3198
3199             for ch in n.children:
3200                 if ch.type in STARS:
3201                     features.add(feature)
3202
3203                 if ch.type == syms.argument:
3204                     for argch in ch.children:
3205                         if argch.type in STARS:
3206                             features.add(feature)
3207
3208     return features
3209
3210
3211 def detect_target_versions(node: Node) -> Set[TargetVersion]:
3212     """Detect the version to target based on the nodes used."""
3213     features = get_features_used(node)
3214     return {
3215         version for version in TargetVersion if features <= VERSION_TO_FEATURES[version]
3216     }
3217
3218
3219 def generate_trailers_to_omit(line: Line, line_length: int) -> Iterator[Set[LeafID]]:
3220     """Generate sets of closing bracket IDs that should be omitted in a RHS.
3221
3222     Brackets can be omitted if the entire trailer up to and including
3223     a preceding closing bracket fits in one line.
3224
3225     Yielded sets are cumulative (contain results of previous yields, too).  First
3226     set is empty.
3227     """
3228
3229     omit: Set[LeafID] = set()
3230     yield omit
3231
3232     length = 4 * line.depth
3233     opening_bracket = None
3234     closing_bracket = None
3235     inner_brackets: Set[LeafID] = set()
3236     for index, leaf, leaf_length in enumerate_with_length(line, reversed=True):
3237         length += leaf_length
3238         if length > line_length:
3239             break
3240
3241         has_inline_comment = leaf_length > len(leaf.value) + len(leaf.prefix)
3242         if leaf.type == STANDALONE_COMMENT or has_inline_comment:
3243             break
3244
3245         if opening_bracket:
3246             if leaf is opening_bracket:
3247                 opening_bracket = None
3248             elif leaf.type in CLOSING_BRACKETS:
3249                 inner_brackets.add(id(leaf))
3250         elif leaf.type in CLOSING_BRACKETS:
3251             if index > 0 and line.leaves[index - 1].type in OPENING_BRACKETS:
3252                 # Empty brackets would fail a split so treat them as "inner"
3253                 # brackets (e.g. only add them to the `omit` set if another
3254                 # pair of brackets was good enough.
3255                 inner_brackets.add(id(leaf))
3256                 continue
3257
3258             if closing_bracket:
3259                 omit.add(id(closing_bracket))
3260                 omit.update(inner_brackets)
3261                 inner_brackets.clear()
3262                 yield omit
3263
3264             if leaf.value:
3265                 opening_bracket = leaf.opening_bracket
3266                 closing_bracket = leaf
3267
3268
3269 def get_future_imports(node: Node) -> Set[str]:
3270     """Return a set of __future__ imports in the file."""
3271     imports: Set[str] = set()
3272
3273     def get_imports_from_children(children: List[LN]) -> Generator[str, None, None]:
3274         for child in children:
3275             if isinstance(child, Leaf):
3276                 if child.type == token.NAME:
3277                     yield child.value
3278             elif child.type == syms.import_as_name:
3279                 orig_name = child.children[0]
3280                 assert isinstance(orig_name, Leaf), "Invalid syntax parsing imports"
3281                 assert orig_name.type == token.NAME, "Invalid syntax parsing imports"
3282                 yield orig_name.value
3283             elif child.type == syms.import_as_names:
3284                 yield from get_imports_from_children(child.children)
3285             else:
3286                 raise AssertionError("Invalid syntax parsing imports")
3287
3288     for child in node.children:
3289         if child.type != syms.simple_stmt:
3290             break
3291         first_child = child.children[0]
3292         if isinstance(first_child, Leaf):
3293             # Continue looking if we see a docstring; otherwise stop.
3294             if (
3295                 len(child.children) == 2
3296                 and first_child.type == token.STRING
3297                 and child.children[1].type == token.NEWLINE
3298             ):
3299                 continue
3300             else:
3301                 break
3302         elif first_child.type == syms.import_from:
3303             module_name = first_child.children[1]
3304             if not isinstance(module_name, Leaf) or module_name.value != "__future__":
3305                 break
3306             imports |= set(get_imports_from_children(first_child.children[3:]))
3307         else:
3308             break
3309     return imports
3310
3311
3312 def gen_python_files_in_dir(
3313     path: Path,
3314     root: Path,
3315     include: Pattern[str],
3316     exclude: Pattern[str],
3317     report: "Report",
3318 ) -> Iterator[Path]:
3319     """Generate all files under `path` whose paths are not excluded by the
3320     `exclude` regex, but are included by the `include` regex.
3321
3322     Symbolic links pointing outside of the `root` directory are ignored.
3323
3324     `report` is where output about exclusions goes.
3325     """
3326     assert root.is_absolute(), f"INTERNAL ERROR: `root` must be absolute but is {root}"
3327     for child in path.iterdir():
3328         try:
3329             normalized_path = "/" + child.resolve().relative_to(root).as_posix()
3330         except ValueError:
3331             if child.is_symlink():
3332                 report.path_ignored(
3333                     child, f"is a symbolic link that points outside {root}"
3334                 )
3335                 continue
3336
3337             raise
3338
3339         if child.is_dir():
3340             normalized_path += "/"
3341         exclude_match = exclude.search(normalized_path)
3342         if exclude_match and exclude_match.group(0):
3343             report.path_ignored(child, f"matches the --exclude regular expression")
3344             continue
3345
3346         if child.is_dir():
3347             yield from gen_python_files_in_dir(child, root, include, exclude, report)
3348
3349         elif child.is_file():
3350             include_match = include.search(normalized_path)
3351             if include_match:
3352                 yield child
3353
3354
3355 @lru_cache()
3356 def find_project_root(srcs: Iterable[str]) -> Path:
3357     """Return a directory containing .git, .hg, or pyproject.toml.
3358
3359     That directory can be one of the directories passed in `srcs` or their
3360     common parent.
3361
3362     If no directory in the tree contains a marker that would specify it's the
3363     project root, the root of the file system is returned.
3364     """
3365     if not srcs:
3366         return Path("/").resolve()
3367
3368     common_base = min(Path(src).resolve() for src in srcs)
3369     if common_base.is_dir():
3370         # Append a fake file so `parents` below returns `common_base_dir`, too.
3371         common_base /= "fake-file"
3372     for directory in common_base.parents:
3373         if (directory / ".git").is_dir():
3374             return directory
3375
3376         if (directory / ".hg").is_dir():
3377             return directory
3378
3379         if (directory / "pyproject.toml").is_file():
3380             return directory
3381
3382     return directory
3383
3384
3385 @dataclass
3386 class Report:
3387     """Provides a reformatting counter. Can be rendered with `str(report)`."""
3388
3389     check: bool = False
3390     quiet: bool = False
3391     verbose: bool = False
3392     change_count: int = 0
3393     same_count: int = 0
3394     failure_count: int = 0
3395
3396     def done(self, src: Path, changed: Changed) -> None:
3397         """Increment the counter for successful reformatting. Write out a message."""
3398         if changed is Changed.YES:
3399             reformatted = "would reformat" if self.check else "reformatted"
3400             if self.verbose or not self.quiet:
3401                 out(f"{reformatted} {src}")
3402             self.change_count += 1
3403         else:
3404             if self.verbose:
3405                 if changed is Changed.NO:
3406                     msg = f"{src} already well formatted, good job."
3407                 else:
3408                     msg = f"{src} wasn't modified on disk since last run."
3409                 out(msg, bold=False)
3410             self.same_count += 1
3411
3412     def failed(self, src: Path, message: str) -> None:
3413         """Increment the counter for failed reformatting. Write out a message."""
3414         err(f"error: cannot format {src}: {message}")
3415         self.failure_count += 1
3416
3417     def path_ignored(self, path: Path, message: str) -> None:
3418         if self.verbose:
3419             out(f"{path} ignored: {message}", bold=False)
3420
3421     @property
3422     def return_code(self) -> int:
3423         """Return the exit code that the app should use.
3424
3425         This considers the current state of changed files and failures:
3426         - if there were any failures, return 123;
3427         - if any files were changed and --check is being used, return 1;
3428         - otherwise return 0.
3429         """
3430         # According to http://tldp.org/LDP/abs/html/exitcodes.html starting with
3431         # 126 we have special return codes reserved by the shell.
3432         if self.failure_count:
3433             return 123
3434
3435         elif self.change_count and self.check:
3436             return 1
3437
3438         return 0
3439
3440     def __str__(self) -> str:
3441         """Render a color report of the current state.
3442
3443         Use `click.unstyle` to remove colors.
3444         """
3445         if self.check:
3446             reformatted = "would be reformatted"
3447             unchanged = "would be left unchanged"
3448             failed = "would fail to reformat"
3449         else:
3450             reformatted = "reformatted"
3451             unchanged = "left unchanged"
3452             failed = "failed to reformat"
3453         report = []
3454         if self.change_count:
3455             s = "s" if self.change_count > 1 else ""
3456             report.append(
3457                 click.style(f"{self.change_count} file{s} {reformatted}", bold=True)
3458             )
3459         if self.same_count:
3460             s = "s" if self.same_count > 1 else ""
3461             report.append(f"{self.same_count} file{s} {unchanged}")
3462         if self.failure_count:
3463             s = "s" if self.failure_count > 1 else ""
3464             report.append(
3465                 click.style(f"{self.failure_count} file{s} {failed}", fg="red")
3466             )
3467         return ", ".join(report) + "."
3468
3469
3470 def parse_ast(src: str) -> Union[ast3.AST, ast27.AST]:
3471     for feature_version in (7, 6):
3472         try:
3473             return ast3.parse(src, feature_version=feature_version)
3474         except SyntaxError:
3475             continue
3476
3477     return ast27.parse(src)
3478
3479
3480 def assert_equivalent(src: str, dst: str) -> None:
3481     """Raise AssertionError if `src` and `dst` aren't equivalent."""
3482
3483     def _v(node: Union[ast3.AST, ast27.AST], depth: int = 0) -> Iterator[str]:
3484         """Simple visitor generating strings to compare ASTs by content."""
3485         yield f"{'  ' * depth}{node.__class__.__name__}("
3486
3487         for field in sorted(node._fields):
3488             # TypeIgnore has only one field 'lineno' which breaks this comparison
3489             if isinstance(node, (ast3.TypeIgnore, ast27.TypeIgnore)):
3490                 break
3491
3492             # Ignore str kind which is case sensitive / and ignores unicode_literals
3493             if isinstance(node, (ast3.Str, ast27.Str, ast3.Bytes)) and field == "kind":
3494                 continue
3495
3496             try:
3497                 value = getattr(node, field)
3498             except AttributeError:
3499                 continue
3500
3501             yield f"{'  ' * (depth+1)}{field}="
3502
3503             if isinstance(value, list):
3504                 for item in value:
3505                     # Ignore nested tuples within del statements, because we may insert
3506                     # parentheses and they change the AST.
3507                     if (
3508                         field == "targets"
3509                         and isinstance(node, (ast3.Delete, ast27.Delete))
3510                         and isinstance(item, (ast3.Tuple, ast27.Tuple))
3511                     ):
3512                         for item in item.elts:
3513                             yield from _v(item, depth + 2)
3514                     elif isinstance(item, (ast3.AST, ast27.AST)):
3515                         yield from _v(item, depth + 2)
3516
3517             elif isinstance(value, (ast3.AST, ast27.AST)):
3518                 yield from _v(value, depth + 2)
3519
3520             else:
3521                 yield f"{'  ' * (depth+2)}{value!r},  # {value.__class__.__name__}"
3522
3523         yield f"{'  ' * depth})  # /{node.__class__.__name__}"
3524
3525     try:
3526         src_ast = parse_ast(src)
3527     except Exception as exc:
3528         raise AssertionError(
3529             f"cannot use --safe with this file; failed to parse source file.  "
3530             f"AST error message: {exc}"
3531         )
3532
3533     try:
3534         dst_ast = parse_ast(dst)
3535     except Exception as exc:
3536         log = dump_to_file("".join(traceback.format_tb(exc.__traceback__)), dst)
3537         raise AssertionError(
3538             f"INTERNAL ERROR: Black produced invalid code: {exc}. "
3539             f"Please report a bug on https://github.com/python/black/issues.  "
3540             f"This invalid output might be helpful: {log}"
3541         ) from None
3542
3543     src_ast_str = "\n".join(_v(src_ast))
3544     dst_ast_str = "\n".join(_v(dst_ast))
3545     if src_ast_str != dst_ast_str:
3546         log = dump_to_file(diff(src_ast_str, dst_ast_str, "src", "dst"))
3547         raise AssertionError(
3548             f"INTERNAL ERROR: Black produced code that is not equivalent to "
3549             f"the source.  "
3550             f"Please report a bug on https://github.com/python/black/issues.  "
3551             f"This diff might be helpful: {log}"
3552         ) from None
3553
3554
3555 def assert_stable(src: str, dst: str, mode: FileMode) -> None:
3556     """Raise AssertionError if `dst` reformats differently the second time."""
3557     newdst = format_str(dst, mode=mode)
3558     if dst != newdst:
3559         log = dump_to_file(
3560             diff(src, dst, "source", "first pass"),
3561             diff(dst, newdst, "first pass", "second pass"),
3562         )
3563         raise AssertionError(
3564             f"INTERNAL ERROR: Black produced different code on the second pass "
3565             f"of the formatter.  "
3566             f"Please report a bug on https://github.com/python/black/issues.  "
3567             f"This diff might be helpful: {log}"
3568         ) from None
3569
3570
3571 def dump_to_file(*output: str) -> str:
3572     """Dump `output` to a temporary file. Return path to the file."""
3573     with tempfile.NamedTemporaryFile(
3574         mode="w", prefix="blk_", suffix=".log", delete=False, encoding="utf8"
3575     ) as f:
3576         for lines in output:
3577             f.write(lines)
3578             if lines and lines[-1] != "\n":
3579                 f.write("\n")
3580     return f.name
3581
3582
3583 def diff(a: str, b: str, a_name: str, b_name: str) -> str:
3584     """Return a unified diff string between strings `a` and `b`."""
3585     import difflib
3586
3587     a_lines = [line + "\n" for line in a.split("\n")]
3588     b_lines = [line + "\n" for line in b.split("\n")]
3589     return "".join(
3590         difflib.unified_diff(a_lines, b_lines, fromfile=a_name, tofile=b_name, n=5)
3591     )
3592
3593
3594 def cancel(tasks: Iterable[asyncio.Task]) -> None:
3595     """asyncio signal handler that cancels all `tasks` and reports to stderr."""
3596     err("Aborted!")
3597     for task in tasks:
3598         task.cancel()
3599
3600
3601 def shutdown(loop: asyncio.AbstractEventLoop) -> None:
3602     """Cancel all pending tasks on `loop`, wait for them, and close the loop."""
3603     try:
3604         if sys.version_info[:2] >= (3, 7):
3605             all_tasks = asyncio.all_tasks
3606         else:
3607             all_tasks = asyncio.Task.all_tasks
3608         # This part is borrowed from asyncio/runners.py in Python 3.7b2.
3609         to_cancel = [task for task in all_tasks(loop) if not task.done()]
3610         if not to_cancel:
3611             return
3612
3613         for task in to_cancel:
3614             task.cancel()
3615         loop.run_until_complete(
3616             asyncio.gather(*to_cancel, loop=loop, return_exceptions=True)
3617         )
3618     finally:
3619         # `concurrent.futures.Future` objects cannot be cancelled once they
3620         # are already running. There might be some when the `shutdown()` happened.
3621         # Silence their logger's spew about the event loop being closed.
3622         cf_logger = logging.getLogger("concurrent.futures")
3623         cf_logger.setLevel(logging.CRITICAL)
3624         loop.close()
3625
3626
3627 def sub_twice(regex: Pattern[str], replacement: str, original: str) -> str:
3628     """Replace `regex` with `replacement` twice on `original`.
3629
3630     This is used by string normalization to perform replaces on
3631     overlapping matches.
3632     """
3633     return regex.sub(replacement, regex.sub(replacement, original))
3634
3635
3636 def re_compile_maybe_verbose(regex: str) -> Pattern[str]:
3637     """Compile a regular expression string in `regex`.
3638
3639     If it contains newlines, use verbose mode.
3640     """
3641     if "\n" in regex:
3642         regex = "(?x)" + regex
3643     return re.compile(regex)
3644
3645
3646 def enumerate_reversed(sequence: Sequence[T]) -> Iterator[Tuple[Index, T]]:
3647     """Like `reversed(enumerate(sequence))` if that were possible."""
3648     index = len(sequence) - 1
3649     for element in reversed(sequence):
3650         yield (index, element)
3651         index -= 1
3652
3653
3654 def enumerate_with_length(
3655     line: Line, reversed: bool = False
3656 ) -> Iterator[Tuple[Index, Leaf, int]]:
3657     """Return an enumeration of leaves with their length.
3658
3659     Stops prematurely on multiline strings and standalone comments.
3660     """
3661     op = cast(
3662         Callable[[Sequence[Leaf]], Iterator[Tuple[Index, Leaf]]],
3663         enumerate_reversed if reversed else enumerate,
3664     )
3665     for index, leaf in op(line.leaves):
3666         length = len(leaf.prefix) + len(leaf.value)
3667         if "\n" in leaf.value:
3668             return  # Multiline strings, we can't continue.
3669
3670         for comment in line.comments_after(leaf):
3671             length += len(comment.value)
3672
3673         yield index, leaf, length
3674
3675
3676 def is_line_short_enough(line: Line, *, line_length: int, line_str: str = "") -> bool:
3677     """Return True if `line` is no longer than `line_length`.
3678
3679     Uses the provided `line_str` rendering, if any, otherwise computes a new one.
3680     """
3681     if not line_str:
3682         line_str = str(line).strip("\n")
3683     return (
3684         len(line_str) <= line_length
3685         and "\n" not in line_str  # multiline strings
3686         and not line.contains_standalone_comments()
3687     )
3688
3689
3690 def can_be_split(line: Line) -> bool:
3691     """Return False if the line cannot be split *for sure*.
3692
3693     This is not an exhaustive search but a cheap heuristic that we can use to
3694     avoid some unfortunate formattings (mostly around wrapping unsplittable code
3695     in unnecessary parentheses).
3696     """
3697     leaves = line.leaves
3698     if len(leaves) < 2:
3699         return False
3700
3701     if leaves[0].type == token.STRING and leaves[1].type == token.DOT:
3702         call_count = 0
3703         dot_count = 0
3704         next = leaves[-1]
3705         for leaf in leaves[-2::-1]:
3706             if leaf.type in OPENING_BRACKETS:
3707                 if next.type not in CLOSING_BRACKETS:
3708                     return False
3709
3710                 call_count += 1
3711             elif leaf.type == token.DOT:
3712                 dot_count += 1
3713             elif leaf.type == token.NAME:
3714                 if not (next.type == token.DOT or next.type in OPENING_BRACKETS):
3715                     return False
3716
3717             elif leaf.type not in CLOSING_BRACKETS:
3718                 return False
3719
3720             if dot_count > 1 and call_count > 1:
3721                 return False
3722
3723     return True
3724
3725
3726 def can_omit_invisible_parens(line: Line, line_length: int) -> bool:
3727     """Does `line` have a shape safe to reformat without optional parens around it?
3728
3729     Returns True for only a subset of potentially nice looking formattings but
3730     the point is to not return false positives that end up producing lines that
3731     are too long.
3732     """
3733     bt = line.bracket_tracker
3734     if not bt.delimiters:
3735         # Without delimiters the optional parentheses are useless.
3736         return True
3737
3738     max_priority = bt.max_delimiter_priority()
3739     if bt.delimiter_count_with_priority(max_priority) > 1:
3740         # With more than one delimiter of a kind the optional parentheses read better.
3741         return False
3742
3743     if max_priority == DOT_PRIORITY:
3744         # A single stranded method call doesn't require optional parentheses.
3745         return True
3746
3747     assert len(line.leaves) >= 2, "Stranded delimiter"
3748
3749     first = line.leaves[0]
3750     second = line.leaves[1]
3751     penultimate = line.leaves[-2]
3752     last = line.leaves[-1]
3753
3754     # With a single delimiter, omit if the expression starts or ends with
3755     # a bracket.
3756     if first.type in OPENING_BRACKETS and second.type not in CLOSING_BRACKETS:
3757         remainder = False
3758         length = 4 * line.depth
3759         for _index, leaf, leaf_length in enumerate_with_length(line):
3760             if leaf.type in CLOSING_BRACKETS and leaf.opening_bracket is first:
3761                 remainder = True
3762             if remainder:
3763                 length += leaf_length
3764                 if length > line_length:
3765                     break
3766
3767                 if leaf.type in OPENING_BRACKETS:
3768                     # There are brackets we can further split on.
3769                     remainder = False
3770
3771         else:
3772             # checked the entire string and line length wasn't exceeded
3773             if len(line.leaves) == _index + 1:
3774                 return True
3775
3776         # Note: we are not returning False here because a line might have *both*
3777         # a leading opening bracket and a trailing closing bracket.  If the
3778         # opening bracket doesn't match our rule, maybe the closing will.
3779
3780     if (
3781         last.type == token.RPAR
3782         or last.type == token.RBRACE
3783         or (
3784             # don't use indexing for omitting optional parentheses;
3785             # it looks weird
3786             last.type == token.RSQB
3787             and last.parent
3788             and last.parent.type != syms.trailer
3789         )
3790     ):
3791         if penultimate.type in OPENING_BRACKETS:
3792             # Empty brackets don't help.
3793             return False
3794
3795         if is_multiline_string(first):
3796             # Additional wrapping of a multiline string in this situation is
3797             # unnecessary.
3798             return True
3799
3800         length = 4 * line.depth
3801         seen_other_brackets = False
3802         for _index, leaf, leaf_length in enumerate_with_length(line):
3803             length += leaf_length
3804             if leaf is last.opening_bracket:
3805                 if seen_other_brackets or length <= line_length:
3806                     return True
3807
3808             elif leaf.type in OPENING_BRACKETS:
3809                 # There are brackets we can further split on.
3810                 seen_other_brackets = True
3811
3812     return False
3813
3814
3815 def get_cache_file(mode: FileMode) -> Path:
3816     return CACHE_DIR / f"cache.{mode.get_cache_key()}.pickle"
3817
3818
3819 def read_cache(mode: FileMode) -> Cache:
3820     """Read the cache if it exists and is well formed.
3821
3822     If it is not well formed, the call to write_cache later should resolve the issue.
3823     """
3824     cache_file = get_cache_file(mode)
3825     if not cache_file.exists():
3826         return {}
3827
3828     with cache_file.open("rb") as fobj:
3829         try:
3830             cache: Cache = pickle.load(fobj)
3831         except pickle.UnpicklingError:
3832             return {}
3833
3834     return cache
3835
3836
3837 def get_cache_info(path: Path) -> CacheInfo:
3838     """Return the information used to check if a file is already formatted or not."""
3839     stat = path.stat()
3840     return stat.st_mtime, stat.st_size
3841
3842
3843 def filter_cached(cache: Cache, sources: Iterable[Path]) -> Tuple[Set[Path], Set[Path]]:
3844     """Split an iterable of paths in `sources` into two sets.
3845
3846     The first contains paths of files that modified on disk or are not in the
3847     cache. The other contains paths to non-modified files.
3848     """
3849     todo, done = set(), set()
3850     for src in sources:
3851         src = src.resolve()
3852         if cache.get(src) != get_cache_info(src):
3853             todo.add(src)
3854         else:
3855             done.add(src)
3856     return todo, done
3857
3858
3859 def write_cache(cache: Cache, sources: Iterable[Path], mode: FileMode) -> None:
3860     """Update the cache file."""
3861     cache_file = get_cache_file(mode)
3862     try:
3863         CACHE_DIR.mkdir(parents=True, exist_ok=True)
3864         new_cache = {**cache, **{src.resolve(): get_cache_info(src) for src in sources}}
3865         with tempfile.NamedTemporaryFile(dir=str(cache_file.parent), delete=False) as f:
3866             pickle.dump(new_cache, f, protocol=pickle.HIGHEST_PROTOCOL)
3867         os.replace(f.name, cache_file)
3868     except OSError:
3869         pass
3870
3871
3872 def patch_click() -> None:
3873     """Make Click not crash.
3874
3875     On certain misconfigured environments, Python 3 selects the ASCII encoding as the
3876     default which restricts paths that it can access during the lifetime of the
3877     application.  Click refuses to work in this scenario by raising a RuntimeError.
3878
3879     In case of Black the likelihood that non-ASCII characters are going to be used in
3880     file paths is minimal since it's Python source code.  Moreover, this crash was
3881     spurious on Python 3.7 thanks to PEP 538 and PEP 540.
3882     """
3883     try:
3884         from click import core
3885         from click import _unicodefun  # type: ignore
3886     except ModuleNotFoundError:
3887         return
3888
3889     for module in (core, _unicodefun):
3890         if hasattr(module, "_verify_python3_env"):
3891             module._verify_python3_env = lambda: None
3892
3893
3894 def patched_main() -> None:
3895     freeze_support()
3896     patch_click()
3897     main()
3898
3899
3900 if __name__ == "__main__":
3901     patched_main()