black.py

   1 import asyncio
   2 from asyncio.base_events import BaseEventLoop
   3 from concurrent.futures import Executor, ProcessPoolExecutor
   4 from datetime import datetime
   5 from enum import Enum
   6 from functools import lru_cache, partial, wraps
   7 import io
   8 import itertools
   9 import logging
  10 from multiprocessing import Manager, freeze_support
  11 import os
  12 from pathlib import Path
  13 import pickle
  14 import re
  15 import signal
  16 import sys
  17 import tempfile
  18 import tokenize
  19 from typing import (
  20     Any,
  21     Callable,
  22     Collection,
  23     Dict,
  24     Generator,
  25     Generic,
  26     Iterable,
  27     Iterator,
  28     List,
  29     Optional,
  30     Pattern,
  31     Sequence,
  32     Set,
  33     Tuple,
  34     TypeVar,
  35     Union,
  36     cast,
  37 )
  38
  39 from appdirs import user_cache_dir
  40 from attr import dataclass, evolve, Factory
  41 import click
  42 import toml
  43
  44 # lib2to3 fork
  45 from blib2to3.pytree import Node, Leaf, type_repr
  46 from blib2to3 import pygram, pytree
  47 from blib2to3.pgen2 import driver, token
  48 from blib2to3.pgen2.grammar import Grammar
  49 from blib2to3.pgen2.parse import ParseError
  50
  51
  52 __version__ = "18.9b0"
  53 DEFAULT_LINE_LENGTH = 88
  54 DEFAULT_EXCLUDES = (
  55     r"/(\.eggs|\.git|\.hg|\.mypy_cache|\.nox|\.tox|\.venv|_build|buck-out|build|dist)/"
  56 )
  57 DEFAULT_INCLUDES = r"\.pyi?$"
  58 CACHE_DIR = Path(user_cache_dir("black", version=__version__))
  59
  60
  61 # types
  62 FileContent = str
  63 Encoding = str
  64 NewLine = str
  65 Depth = int
  66 NodeType = int
  67 LeafID = int
  68 Priority = int
  69 Index = int
  70 LN = Union[Leaf, Node]
  71 SplitFunc = Callable[["Line", bool], Iterator["Line"]]
  72 Timestamp = float
  73 FileSize = int
  74 CacheInfo = Tuple[Timestamp, FileSize]
  75 Cache = Dict[Path, CacheInfo]
  76 out = partial(click.secho, bold=True, err=True)
  77 err = partial(click.secho, fg="red", err=True)
  78
  79 pygram.initialize(CACHE_DIR)
  80 syms = pygram.python_symbols
  81
  82
  83 class NothingChanged(UserWarning):
  84     """Raised when reformatted code is the same as source."""
  85
  86
  87 class CannotSplit(Exception):
  88     """A readable split that fits the allotted line length is impossible."""
  89
  90
  91 class InvalidInput(ValueError):
  92     """Raised when input source code fails all parse attempts."""
  93
  94
  95 class WriteBack(Enum):
  96     NO = 0
  97     YES = 1
  98     DIFF = 2
  99     CHECK = 3
 100
 101     @classmethod
 102     def from_configuration(cls, *, check: bool, diff: bool) -> "WriteBack":
 103         if check and not diff:
 104             return cls.CHECK
 105
 106         return cls.DIFF if diff else cls.YES
 107
 108
 109 class Changed(Enum):
 110     NO = 0
 111     CACHED = 1
 112     YES = 2
 113
 114
 115 class TargetVersion(Enum):
 116     PY27 = 2
 117     PY33 = 3
 118     PY34 = 4
 119     PY35 = 5
 120     PY36 = 6
 121     PY37 = 7
 122     PY38 = 8
 123
 124     def is_python2(self) -> bool:
 125         return self is TargetVersion.PY27
 126
 127
 128 PY36_VERSIONS = {TargetVersion.PY36, TargetVersion.PY37, TargetVersion.PY38}
 129
 130
 131 class Feature(Enum):
 132     # All string literals are unicode
 133     UNICODE_LITERALS = 1
 134     F_STRINGS = 2
 135     NUMERIC_UNDERSCORES = 3
 136     TRAILING_COMMA = 4
 137
 138
 139 VERSION_TO_FEATURES: Dict[TargetVersion, Set[Feature]] = {
 140     TargetVersion.PY27: set(),
 141     TargetVersion.PY33: {Feature.UNICODE_LITERALS},
 142     TargetVersion.PY34: {Feature.UNICODE_LITERALS},
 143     TargetVersion.PY35: {Feature.UNICODE_LITERALS, Feature.TRAILING_COMMA},
 144     TargetVersion.PY36: {
 145         Feature.UNICODE_LITERALS,
 146         Feature.F_STRINGS,
 147         Feature.NUMERIC_UNDERSCORES,
 148         Feature.TRAILING_COMMA,
 149     },
 150     TargetVersion.PY37: {
 151         Feature.UNICODE_LITERALS,
 152         Feature.F_STRINGS,
 153         Feature.NUMERIC_UNDERSCORES,
 154         Feature.TRAILING_COMMA,
 155     },
 156     TargetVersion.PY38: {
 157         Feature.UNICODE_LITERALS,
 158         Feature.F_STRINGS,
 159         Feature.NUMERIC_UNDERSCORES,
 160         Feature.TRAILING_COMMA,
 161     },
 162 }
 163
 164
 165 @dataclass
 166 class FileMode:
 167     target_versions: Set[TargetVersion] = Factory(set)
 168     line_length: int = DEFAULT_LINE_LENGTH
 169     string_normalization: bool = True
 170     is_pyi: bool = False
 171
 172     def get_cache_key(self) -> str:
 173         if self.target_versions:
 174             version_str = ",".join(
 175                 str(version.value)
 176                 for version in sorted(self.target_versions, key=lambda v: v.value)
 177             )
 178         else:
 179             version_str = "-"
 180         parts = [
 181             version_str,
 182             str(self.line_length),
 183             str(int(self.string_normalization)),
 184             str(int(self.is_pyi)),
 185         ]
 186         return ".".join(parts)
 187
 188
 189 def supports_feature(target_versions: Set[TargetVersion], feature: Feature) -> bool:
 190     return all(feature in VERSION_TO_FEATURES[version] for version in target_versions)
 191
 192
 193 def read_pyproject_toml(
 194     ctx: click.Context, param: click.Parameter, value: Union[str, int, bool, None]
 195 ) -> Optional[str]:
 196     """Inject Black configuration from "pyproject.toml" into defaults in `ctx`.
 197
 198     Returns the path to a successfully found and read configuration file, None
 199     otherwise.
 200     """
 201     assert not isinstance(value, (int, bool)), "Invalid parameter type passed"
 202     if not value:
 203         root = find_project_root(ctx.params.get("src", ()))
 204         path = root / "pyproject.toml"
 205         if path.is_file():
 206             value = str(path)
 207         else:
 208             return None
 209
 210     try:
 211         pyproject_toml = toml.load(value)
 212         config = pyproject_toml.get("tool", {}).get("black", {})
 213     except (toml.TomlDecodeError, OSError) as e:
 214         raise click.FileError(
 215             filename=value, hint=f"Error reading configuration file: {e}"
 216         )
 217
 218     if not config:
 219         return None
 220
 221     if ctx.default_map is None:
 222         ctx.default_map = {}
 223     ctx.default_map.update(  # type: ignore  # bad types in .pyi
 224         {k.replace("--", "").replace("-", "_"): v for k, v in config.items()}
 225     )
 226     return value
 227
 228
 229 @click.command(context_settings=dict(help_option_names=["-h", "--help"]))
 230 @click.option(
 231     "-l",
 232     "--line-length",
 233     type=int,
 234     default=DEFAULT_LINE_LENGTH,
 235     help="How many characters per line to allow.",
 236     show_default=True,
 237 )
 238 @click.option(
 239     "-t",
 240     "--target-version",
 241     type=click.Choice([v.name.lower() for v in TargetVersion]),
 242     callback=lambda c, p, v: [TargetVersion[val.upper()] for val in v],
 243     multiple=True,
 244     help=(
 245         "Python versions that should be supported by Black's output. [default: "
 246         "per-file auto-detection]"
 247     ),
 248 )
 249 @click.option(
 250     "--pyi",
 251     is_flag=True,
 252     help=(
 253         "Format all input files like typing stubs regardless of file extension "
 254         "(useful when piping source on standard input)."
 255     ),
 256 )
 257 @click.option(
 258     "-S",
 259     "--skip-string-normalization",
 260     is_flag=True,
 261     help="Don't normalize string quotes or prefixes.",
 262 )
 263 @click.option(
 264     "--check",
 265     is_flag=True,
 266     help=(
 267         "Don't write the files back, just return the status.  Return code 0 "
 268         "means nothing would change.  Return code 1 means some files would be "
 269         "reformatted.  Return code 123 means there was an internal error."
 270     ),
 271 )
 272 @click.option(
 273     "--diff",
 274     is_flag=True,
 275     help="Don't write the files back, just output a diff for each file on stdout.",
 276 )
 277 @click.option(
 278     "--fast/--safe",
 279     is_flag=True,
 280     help="If --fast given, skip temporary sanity checks. [default: --safe]",
 281 )
 282 @click.option(
 283     "--include",
 284     type=str,
 285     default=DEFAULT_INCLUDES,
 286     help=(
 287         "A regular expression that matches files and directories that should be "
 288         "included on recursive searches.  An empty value means all files are "
 289         "included regardless of the name.  Use forward slashes for directories on "
 290         "all platforms (Windows, too).  Exclusions are calculated first, inclusions "
 291         "later."
 292     ),
 293     show_default=True,
 294 )
 295 @click.option(
 296     "--exclude",
 297     type=str,
 298     default=DEFAULT_EXCLUDES,
 299     help=(
 300         "A regular expression that matches files and directories that should be "
 301         "excluded on recursive searches.  An empty value means no paths are excluded. "
 302         "Use forward slashes for directories on all platforms (Windows, too).  "
 303         "Exclusions are calculated first, inclusions later."
 304     ),
 305     show_default=True,
 306 )
 307 @click.option(
 308     "-q",
 309     "--quiet",
 310     is_flag=True,
 311     help=(
 312         "Don't emit non-error messages to stderr. Errors are still emitted, "
 313         "silence those with 2>/dev/null."
 314     ),
 315 )
 316 @click.option(
 317     "-v",
 318     "--verbose",
 319     is_flag=True,
 320     help=(
 321         "Also emit messages to stderr about files that were not changed or were "
 322         "ignored due to --exclude=."
 323     ),
 324 )
 325 @click.version_option(version=__version__)
 326 @click.argument(
 327     "src",
 328     nargs=-1,
 329     type=click.Path(
 330         exists=True, file_okay=True, dir_okay=True, readable=True, allow_dash=True
 331     ),
 332     is_eager=True,
 333 )
 334 @click.option(
 335     "--config",
 336     type=click.Path(
 337         exists=False, file_okay=True, dir_okay=False, readable=True, allow_dash=False
 338     ),
 339     is_eager=True,
 340     callback=read_pyproject_toml,
 341     help="Read configuration from PATH.",
 342 )
 343 @click.pass_context
 344 def main(
 345     ctx: click.Context,
 346     line_length: int,
 347     target_version: List[TargetVersion],
 348     check: bool,
 349     diff: bool,
 350     fast: bool,
 351     pyi: bool,
 352     skip_string_normalization: bool,
 353     quiet: bool,
 354     verbose: bool,
 355     include: str,
 356     exclude: str,
 357     src: Tuple[str],
 358     config: Optional[str],
 359 ) -> None:
 360     """The uncompromising code formatter."""
 361     write_back = WriteBack.from_configuration(check=check, diff=diff)
 362     if target_version:
 363         versions = set(target_version)
 364     else:
 365         # We'll autodetect later.
 366         versions = set()
 367     mode = FileMode(
 368         target_versions=versions,
 369         line_length=line_length,
 370         is_pyi=pyi,
 371         string_normalization=not skip_string_normalization,
 372     )
 373     if config and verbose:
 374         out(f"Using configuration from {config}.", bold=False, fg="blue")
 375     try:
 376         include_regex = re_compile_maybe_verbose(include)
 377     except re.error:
 378         err(f"Invalid regular expression for include given: {include!r}")
 379         ctx.exit(2)
 380     try:
 381         exclude_regex = re_compile_maybe_verbose(exclude)
 382     except re.error:
 383         err(f"Invalid regular expression for exclude given: {exclude!r}")
 384         ctx.exit(2)
 385     report = Report(check=check, quiet=quiet, verbose=verbose)
 386     root = find_project_root(src)
 387     sources: Set[Path] = set()
 388     for s in src:
 389         p = Path(s)
 390         if p.is_dir():
 391             sources.update(
 392                 gen_python_files_in_dir(p, root, include_regex, exclude_regex, report)
 393             )
 394         elif p.is_file() or s == "-":
 395             # if a file was explicitly given, we don't care about its extension
 396             sources.add(p)
 397         else:
 398             err(f"invalid path: {s}")
 399     if len(sources) == 0:
 400         if verbose or not quiet:
 401             out("No paths given. Nothing to do 😴")
 402         ctx.exit(0)
 403
 404     if len(sources) == 1:
 405         reformat_one(
 406             src=sources.pop(),
 407             fast=fast,
 408             write_back=write_back,
 409             mode=mode,
 410             report=report,
 411         )
 412     else:
 413         loop = asyncio.get_event_loop()
 414         executor = ProcessPoolExecutor(max_workers=os.cpu_count())
 415         try:
 416             loop.run_until_complete(
 417                 schedule_formatting(
 418                     sources=sources,
 419                     fast=fast,
 420                     write_back=write_back,
 421                     mode=mode,
 422                     report=report,
 423                     loop=loop,
 424                     executor=executor,
 425                 )
 426             )
 427         finally:
 428             shutdown(loop)
 429     if verbose or not quiet:
 430         bang = "💥 💔 💥" if report.return_code else "✨ 🍰 ✨"
 431         out(f"All done! {bang}")
 432         click.secho(str(report), err=True)
 433     ctx.exit(report.return_code)
 434
 435
 436 def reformat_one(
 437     src: Path, fast: bool, write_back: WriteBack, mode: FileMode, report: "Report"
 438 ) -> None:
 439     """Reformat a single file under `src` without spawning child processes.
 440
 441     If `quiet` is True, non-error messages are not output. `line_length`,
 442     `write_back`, `fast` and `pyi` options are passed to
 443     :func:`format_file_in_place` or :func:`format_stdin_to_stdout`.
 444     """
 445     try:
 446         changed = Changed.NO
 447         if not src.is_file() and str(src) == "-":
 448             if format_stdin_to_stdout(fast=fast, write_back=write_back, mode=mode):
 449                 changed = Changed.YES
 450         else:
 451             cache: Cache = {}
 452             if write_back != WriteBack.DIFF:
 453                 cache = read_cache(mode)
 454                 res_src = src.resolve()
 455                 if res_src in cache and cache[res_src] == get_cache_info(res_src):
 456                     changed = Changed.CACHED
 457             if changed is not Changed.CACHED and format_file_in_place(
 458                 src, fast=fast, write_back=write_back, mode=mode
 459             ):
 460                 changed = Changed.YES
 461             if (write_back is WriteBack.YES and changed is not Changed.CACHED) or (
 462                 write_back is WriteBack.CHECK and changed is Changed.NO
 463             ):
 464                 write_cache(cache, [src], mode)
 465         report.done(src, changed)
 466     except Exception as exc:
 467         report.failed(src, str(exc))
 468
 469
 470 async def schedule_formatting(
 471     sources: Set[Path],
 472     fast: bool,
 473     write_back: WriteBack,
 474     mode: FileMode,
 475     report: "Report",
 476     loop: BaseEventLoop,
 477     executor: Executor,
 478 ) -> None:
 479     """Run formatting of `sources` in parallel using the provided `executor`.
 480
 481     (Use ProcessPoolExecutors for actual parallelism.)
 482
 483     `line_length`, `write_back`, `fast`, and `pyi` options are passed to
 484     :func:`format_file_in_place`.
 485     """
 486     cache: Cache = {}
 487     if write_back != WriteBack.DIFF:
 488         cache = read_cache(mode)
 489         sources, cached = filter_cached(cache, sources)
 490         for src in sorted(cached):
 491             report.done(src, Changed.CACHED)
 492     if not sources:
 493         return
 494
 495     cancelled = []
 496     sources_to_cache = []
 497     lock = None
 498     if write_back == WriteBack.DIFF:
 499         # For diff output, we need locks to ensure we don't interleave output
 500         # from different processes.
 501         manager = Manager()
 502         lock = manager.Lock()
 503     tasks = {
 504         loop.run_in_executor(
 505             executor, format_file_in_place, src, fast, mode, write_back, lock
 506         ): src
 507         for src in sorted(sources)
 508     }
 509     pending: Iterable[asyncio.Task] = tasks.keys()
 510     try:
 511         loop.add_signal_handler(signal.SIGINT, cancel, pending)
 512         loop.add_signal_handler(signal.SIGTERM, cancel, pending)
 513     except NotImplementedError:
 514         # There are no good alternatives for these on Windows.
 515         pass
 516     while pending:
 517         done, _ = await asyncio.wait(pending, return_when=asyncio.FIRST_COMPLETED)
 518         for task in done:
 519             src = tasks.pop(task)
 520             if task.cancelled():
 521                 cancelled.append(task)
 522             elif task.exception():
 523                 report.failed(src, str(task.exception()))
 524             else:
 525                 changed = Changed.YES if task.result() else Changed.NO
 526                 # If the file was written back or was successfully checked as
 527                 # well-formatted, store this information in the cache.
 528                 if write_back is WriteBack.YES or (
 529                     write_back is WriteBack.CHECK and changed is Changed.NO
 530                 ):
 531                     sources_to_cache.append(src)
 532                 report.done(src, changed)
 533     if cancelled:
 534         await asyncio.gather(*cancelled, loop=loop, return_exceptions=True)
 535     if sources_to_cache:
 536         write_cache(cache, sources_to_cache, mode)
 537
 538
 539 def format_file_in_place(
 540     src: Path,
 541     fast: bool,
 542     mode: FileMode,
 543     write_back: WriteBack = WriteBack.NO,
 544     lock: Any = None,  # multiprocessing.Manager().Lock() is some crazy proxy
 545 ) -> bool:
 546     """Format file under `src` path. Return True if changed.
 547
 548     If `write_back` is DIFF, write a diff to stdout. If it is YES, write reformatted
 549     code to the file.
 550     `line_length` and `fast` options are passed to :func:`format_file_contents`.
 551     """
 552     if src.suffix == ".pyi":
 553         mode = evolve(mode, is_pyi=True)
 554
 555     then = datetime.utcfromtimestamp(src.stat().st_mtime)
 556     with open(src, "rb") as buf:
 557         src_contents, encoding, newline = decode_bytes(buf.read())
 558     try:
 559         dst_contents = format_file_contents(src_contents, fast=fast, mode=mode)
 560     except NothingChanged:
 561         return False
 562
 563     if write_back == write_back.YES:
 564         with open(src, "w", encoding=encoding, newline=newline) as f:
 565             f.write(dst_contents)
 566     elif write_back == write_back.DIFF:
 567         now = datetime.utcnow()
 568         src_name = f"{src}\t{then} +0000"
 569         dst_name = f"{src}\t{now} +0000"
 570         diff_contents = diff(src_contents, dst_contents, src_name, dst_name)
 571         if lock:
 572             lock.acquire()
 573         try:
 574             f = io.TextIOWrapper(
 575                 sys.stdout.buffer,
 576                 encoding=encoding,
 577                 newline=newline,
 578                 write_through=True,
 579             )
 580             f.write(diff_contents)
 581             f.detach()
 582         finally:
 583             if lock:
 584                 lock.release()
 585     return True
 586
 587
 588 def format_stdin_to_stdout(
 589     fast: bool, *, write_back: WriteBack = WriteBack.NO, mode: FileMode
 590 ) -> bool:
 591     """Format file on stdin. Return True if changed.
 592
 593     If `write_back` is YES, write reformatted code back to stdout. If it is DIFF,
 594     write a diff to stdout. The `mode` argument is passed to
 595     :func:`format_file_contents`.
 596     """
 597     then = datetime.utcnow()
 598     src, encoding, newline = decode_bytes(sys.stdin.buffer.read())
 599     dst = src
 600     try:
 601         dst = format_file_contents(src, fast=fast, mode=mode)
 602         return True
 603
 604     except NothingChanged:
 605         return False
 606
 607     finally:
 608         f = io.TextIOWrapper(
 609             sys.stdout.buffer, encoding=encoding, newline=newline, write_through=True
 610         )
 611         if write_back == WriteBack.YES:
 612             f.write(dst)
 613         elif write_back == WriteBack.DIFF:
 614             now = datetime.utcnow()
 615             src_name = f"STDIN\t{then} +0000"
 616             dst_name = f"STDOUT\t{now} +0000"
 617             f.write(diff(src, dst, src_name, dst_name))
 618         f.detach()
 619
 620
 621 def format_file_contents(
 622     src_contents: str, *, fast: bool, mode: FileMode
 623 ) -> FileContent:
 624     """Reformat contents a file and return new contents.
 625
 626     If `fast` is False, additionally confirm that the reformatted code is
 627     valid by calling :func:`assert_equivalent` and :func:`assert_stable` on it.
 628     `line_length` is passed to :func:`format_str`.
 629     """
 630     if src_contents.strip() == "":
 631         raise NothingChanged
 632
 633     dst_contents = format_str(src_contents, mode=mode)
 634     if src_contents == dst_contents:
 635         raise NothingChanged
 636
 637     if not fast:
 638         assert_equivalent(src_contents, dst_contents)
 639         assert_stable(src_contents, dst_contents, mode=mode)
 640     return dst_contents
 641
 642
 643 def format_str(src_contents: str, *, mode: FileMode) -> FileContent:
 644     """Reformat a string and return new contents.
 645
 646     `line_length` determines how many characters per line are allowed.
 647     """
 648     src_node = lib2to3_parse(src_contents.lstrip(), mode.target_versions)
 649     dst_contents = ""
 650     future_imports = get_future_imports(src_node)
 651     if mode.target_versions:
 652         versions = mode.target_versions
 653     else:
 654         versions = detect_target_versions(src_node)
 655     normalize_fmt_off(src_node)
 656     lines = LineGenerator(
 657         remove_u_prefix="unicode_literals" in future_imports
 658         or supports_feature(versions, Feature.UNICODE_LITERALS),
 659         is_pyi=mode.is_pyi,
 660         normalize_strings=mode.string_normalization,
 661     )
 662     elt = EmptyLineTracker(is_pyi=mode.is_pyi)
 663     empty_line = Line()
 664     after = 0
 665     for current_line in lines.visit(src_node):
 666         for _ in range(after):
 667             dst_contents += str(empty_line)
 668         before, after = elt.maybe_empty_lines(current_line)
 669         for _ in range(before):
 670             dst_contents += str(empty_line)
 671         for line in split_line(
 672             current_line,
 673             line_length=mode.line_length,
 674             supports_trailing_commas=supports_feature(versions, Feature.TRAILING_COMMA),
 675         ):
 676             dst_contents += str(line)
 677     return dst_contents
 678
 679
 680 def decode_bytes(src: bytes) -> Tuple[FileContent, Encoding, NewLine]:
 681     """Return a tuple of (decoded_contents, encoding, newline).
 682
 683     `newline` is either CRLF or LF but `decoded_contents` is decoded with
 684     universal newlines (i.e. only contains LF).
 685     """
 686     srcbuf = io.BytesIO(src)
 687     encoding, lines = tokenize.detect_encoding(srcbuf.readline)
 688     if not lines:
 689         return "", encoding, "\n"
 690
 691     newline = "\r\n" if b"\r\n" == lines[0][-2:] else "\n"
 692     srcbuf.seek(0)
 693     with io.TextIOWrapper(srcbuf, encoding) as tiow:
 694         return tiow.read(), encoding, newline
 695
 696
 697 GRAMMARS = [
 698     pygram.python_grammar_no_print_statement_no_exec_statement,
 699     pygram.python_grammar_no_print_statement,
 700     pygram.python_grammar,
 701 ]
 702
 703
 704 def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]:
 705     if not target_versions:
 706         return GRAMMARS
 707     elif all(not version.is_python2() for version in target_versions):
 708         # Python 2-compatible code, so don't try Python 3 grammar.
 709         return [
 710             pygram.python_grammar_no_print_statement_no_exec_statement,
 711             pygram.python_grammar_no_print_statement,
 712         ]
 713     else:
 714         return [pygram.python_grammar]
 715
 716
 717 def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -> Node:
 718     """Given a string with source, return the lib2to3 Node."""
 719     if src_txt[-1:] != "\n":
 720         src_txt += "\n"
 721
 722     for grammar in get_grammars(set(target_versions)):
 723         drv = driver.Driver(grammar, pytree.convert)
 724         try:
 725             result = drv.parse_string(src_txt, True)
 726             break
 727
 728         except ParseError as pe:
 729             lineno, column = pe.context[1]
 730             lines = src_txt.splitlines()
 731             try:
 732                 faulty_line = lines[lineno - 1]
 733             except IndexError:
 734                 faulty_line = "<line number missing in source>"
 735             exc = InvalidInput(f"Cannot parse: {lineno}:{column}: {faulty_line}")
 736     else:
 737         raise exc from None
 738
 739     if isinstance(result, Leaf):
 740         result = Node(syms.file_input, [result])
 741     return result
 742
 743
 744 def lib2to3_unparse(node: Node) -> str:
 745     """Given a lib2to3 node, return its string representation."""
 746     code = str(node)
 747     return code
 748
 749
 750 T = TypeVar("T")
 751
 752
 753 class Visitor(Generic[T]):
 754     """Basic lib2to3 visitor that yields things of type `T` on `visit()`."""
 755
 756     def visit(self, node: LN) -> Iterator[T]:
 757         """Main method to visit `node` and its children.
 758
 759         It tries to find a `visit_*()` method for the given `node.type`, like
 760         `visit_simple_stmt` for Node objects or `visit_INDENT` for Leaf objects.
 761         If no dedicated `visit_*()` method is found, chooses `visit_default()`
 762         instead.
 763
 764         Then yields objects of type `T` from the selected visitor.
 765         """
 766         if node.type < 256:
 767             name = token.tok_name[node.type]
 768         else:
 769             name = type_repr(node.type)
 770         yield from getattr(self, f"visit_{name}", self.visit_default)(node)
 771
 772     def visit_default(self, node: LN) -> Iterator[T]:
 773         """Default `visit_*()` implementation. Recurses to children of `node`."""
 774         if isinstance(node, Node):
 775             for child in node.children:
 776                 yield from self.visit(child)
 777
 778
 779 @dataclass
 780 class DebugVisitor(Visitor[T]):
 781     tree_depth: int = 0
 782
 783     def visit_default(self, node: LN) -> Iterator[T]:
 784         indent = " " * (2 * self.tree_depth)
 785         if isinstance(node, Node):
 786             _type = type_repr(node.type)
 787             out(f"{indent}{_type}", fg="yellow")
 788             self.tree_depth += 1
 789             for child in node.children:
 790                 yield from self.visit(child)
 791
 792             self.tree_depth -= 1
 793             out(f"{indent}/{_type}", fg="yellow", bold=False)
 794         else:
 795             _type = token.tok_name.get(node.type, str(node.type))
 796             out(f"{indent}{_type}", fg="blue", nl=False)
 797             if node.prefix:
 798                 # We don't have to handle prefixes for `Node` objects since
 799                 # that delegates to the first child anyway.
 800                 out(f" {node.prefix!r}", fg="green", bold=False, nl=False)
 801             out(f" {node.value!r}", fg="blue", bold=False)
 802
 803     @classmethod
 804     def show(cls, code: Union[str, Leaf, Node]) -> None:
 805         """Pretty-print the lib2to3 AST of a given string of `code`.
 806
 807         Convenience method for debugging.
 808         """
 809         v: DebugVisitor[None] = DebugVisitor()
 810         if isinstance(code, str):
 811             code = lib2to3_parse(code)
 812         list(v.visit(code))
 813
 814
 815 WHITESPACE = {token.DEDENT, token.INDENT, token.NEWLINE}
 816 STATEMENT = {
 817     syms.if_stmt,
 818     syms.while_stmt,
 819     syms.for_stmt,
 820     syms.try_stmt,
 821     syms.except_clause,
 822     syms.with_stmt,
 823     syms.funcdef,
 824     syms.classdef,
 825 }
 826 STANDALONE_COMMENT = 153
 827 token.tok_name[STANDALONE_COMMENT] = "STANDALONE_COMMENT"
 828 LOGIC_OPERATORS = {"and", "or"}
 829 COMPARATORS = {
 830     token.LESS,
 831     token.GREATER,
 832     token.EQEQUAL,
 833     token.NOTEQUAL,
 834     token.LESSEQUAL,
 835     token.GREATEREQUAL,
 836 }
 837 MATH_OPERATORS = {
 838     token.VBAR,
 839     token.CIRCUMFLEX,
 840     token.AMPER,
 841     token.LEFTSHIFT,
 842     token.RIGHTSHIFT,
 843     token.PLUS,
 844     token.MINUS,
 845     token.STAR,
 846     token.SLASH,
 847     token.DOUBLESLASH,
 848     token.PERCENT,
 849     token.AT,
 850     token.TILDE,
 851     token.DOUBLESTAR,
 852 }
 853 STARS = {token.STAR, token.DOUBLESTAR}
 854 VARARGS_PARENTS = {
 855     syms.arglist,
 856     syms.argument,  # double star in arglist
 857     syms.trailer,  # single argument to call
 858     syms.typedargslist,
 859     syms.varargslist,  # lambdas
 860 }
 861 UNPACKING_PARENTS = {
 862     syms.atom,  # single element of a list or set literal
 863     syms.dictsetmaker,
 864     syms.listmaker,
 865     syms.testlist_gexp,
 866     syms.testlist_star_expr,
 867 }
 868 TEST_DESCENDANTS = {
 869     syms.test,
 870     syms.lambdef,
 871     syms.or_test,
 872     syms.and_test,
 873     syms.not_test,
 874     syms.comparison,
 875     syms.star_expr,
 876     syms.expr,
 877     syms.xor_expr,
 878     syms.and_expr,
 879     syms.shift_expr,
 880     syms.arith_expr,
 881     syms.trailer,
 882     syms.term,
 883     syms.power,
 884 }
 885 ASSIGNMENTS = {
 886     "=",
 887     "+=",
 888     "-=",
 889     "*=",
 890     "@=",
 891     "/=",
 892     "%=",
 893     "&=",
 894     "|=",
 895     "^=",
 896     "<<=",
 897     ">>=",
 898     "**=",
 899     "//=",
 900 }
 901 COMPREHENSION_PRIORITY = 20
 902 COMMA_PRIORITY = 18
 903 TERNARY_PRIORITY = 16
 904 LOGIC_PRIORITY = 14
 905 STRING_PRIORITY = 12
 906 COMPARATOR_PRIORITY = 10
 907 MATH_PRIORITIES = {
 908     token.VBAR: 9,
 909     token.CIRCUMFLEX: 8,
 910     token.AMPER: 7,
 911     token.LEFTSHIFT: 6,
 912     token.RIGHTSHIFT: 6,
 913     token.PLUS: 5,
 914     token.MINUS: 5,
 915     token.STAR: 4,
 916     token.SLASH: 4,
 917     token.DOUBLESLASH: 4,
 918     token.PERCENT: 4,
 919     token.AT: 4,
 920     token.TILDE: 3,
 921     token.DOUBLESTAR: 2,
 922 }
 923 DOT_PRIORITY = 1
 924
 925
 926 @dataclass
 927 class BracketTracker:
 928     """Keeps track of brackets on a line."""
 929
 930     depth: int = 0
 931     bracket_match: Dict[Tuple[Depth, NodeType], Leaf] = Factory(dict)
 932     delimiters: Dict[LeafID, Priority] = Factory(dict)
 933     previous: Optional[Leaf] = None
 934     _for_loop_depths: List[int] = Factory(list)
 935     _lambda_argument_depths: List[int] = Factory(list)
 936
 937     def mark(self, leaf: Leaf) -> None:
 938         """Mark `leaf` with bracket-related metadata. Keep track of delimiters.
 939
 940         All leaves receive an int `bracket_depth` field that stores how deep
 941         within brackets a given leaf is. 0 means there are no enclosing brackets
 942         that started on this line.
 943
 944         If a leaf is itself a closing bracket, it receives an `opening_bracket`
 945         field that it forms a pair with. This is a one-directional link to
 946         avoid reference cycles.
 947
 948         If a leaf is a delimiter (a token on which Black can split the line if
 949         needed) and it's on depth 0, its `id()` is stored in the tracker's
 950         `delimiters` field.
 951         """
 952         if leaf.type == token.COMMENT:
 953             return
 954
 955         self.maybe_decrement_after_for_loop_variable(leaf)
 956         self.maybe_decrement_after_lambda_arguments(leaf)
 957         if leaf.type in CLOSING_BRACKETS:
 958             self.depth -= 1
 959             opening_bracket = self.bracket_match.pop((self.depth, leaf.type))
 960             leaf.opening_bracket = opening_bracket
 961         leaf.bracket_depth = self.depth
 962         if self.depth == 0:
 963             delim = is_split_before_delimiter(leaf, self.previous)
 964             if delim and self.previous is not None:
 965                 self.delimiters[id(self.previous)] = delim
 966             else:
 967                 delim = is_split_after_delimiter(leaf, self.previous)
 968                 if delim:
 969                     self.delimiters[id(leaf)] = delim
 970         if leaf.type in OPENING_BRACKETS:
 971             self.bracket_match[self.depth, BRACKET[leaf.type]] = leaf
 972             self.depth += 1
 973         self.previous = leaf
 974         self.maybe_increment_lambda_arguments(leaf)
 975         self.maybe_increment_for_loop_variable(leaf)
 976
 977     def any_open_brackets(self) -> bool:
 978         """Return True if there is an yet unmatched open bracket on the line."""
 979         return bool(self.bracket_match)
 980
 981     def max_delimiter_priority(self, exclude: Iterable[LeafID] = ()) -> int:
 982         """Return the highest priority of a delimiter found on the line.
 983
 984         Values are consistent with what `is_split_*_delimiter()` return.
 985         Raises ValueError on no delimiters.
 986         """
 987         return max(v for k, v in self.delimiters.items() if k not in exclude)
 988
 989     def delimiter_count_with_priority(self, priority: int = 0) -> int:
 990         """Return the number of delimiters with the given `priority`.
 991
 992         If no `priority` is passed, defaults to max priority on the line.
 993         """
 994         if not self.delimiters:
 995             return 0
 996
 997         priority = priority or self.max_delimiter_priority()
 998         return sum(1 for p in self.delimiters.values() if p == priority)
 999
1000     def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool:
1001         """In a for loop, or comprehension, the variables are often unpacks.
1002
1003         To avoid splitting on the comma in this situation, increase the depth of
1004         tokens between `for` and `in`.
1005         """
1006         if leaf.type == token.NAME and leaf.value == "for":
1007             self.depth += 1
1008             self._for_loop_depths.append(self.depth)
1009             return True
1010
1011         return False
1012
1013     def maybe_decrement_after_for_loop_variable(self, leaf: Leaf) -> bool:
1014         """See `maybe_increment_for_loop_variable` above for explanation."""
1015         if (
1016             self._for_loop_depths
1017             and self._for_loop_depths[-1] == self.depth
1018             and leaf.type == token.NAME
1019             and leaf.value == "in"
1020         ):
1021             self.depth -= 1
1022             self._for_loop_depths.pop()
1023             return True
1024
1025         return False
1026
1027     def maybe_increment_lambda_arguments(self, leaf: Leaf) -> bool:
1028         """In a lambda expression, there might be more than one argument.
1029
1030         To avoid splitting on the comma in this situation, increase the depth of
1031         tokens between `lambda` and `:`.
1032         """
1033         if leaf.type == token.NAME and leaf.value == "lambda":
1034             self.depth += 1
1035             self._lambda_argument_depths.append(self.depth)
1036             return True
1037
1038         return False
1039
1040     def maybe_decrement_after_lambda_arguments(self, leaf: Leaf) -> bool:
1041         """See `maybe_increment_lambda_arguments` above for explanation."""
1042         if (
1043             self._lambda_argument_depths
1044             and self._lambda_argument_depths[-1] == self.depth
1045             and leaf.type == token.COLON
1046         ):
1047             self.depth -= 1
1048             self._lambda_argument_depths.pop()
1049             return True
1050
1051         return False
1052
1053     def get_open_lsqb(self) -> Optional[Leaf]:
1054         """Return the most recent opening square bracket (if any)."""
1055         return self.bracket_match.get((self.depth - 1, token.RSQB))
1056
1057
1058 @dataclass
1059 class Line:
1060     """Holds leaves and comments. Can be printed with `str(line)`."""
1061
1062     depth: int = 0
1063     leaves: List[Leaf] = Factory(list)
1064     comments: Dict[LeafID, List[Leaf]] = Factory(dict)  # keys ordered like `leaves`
1065     bracket_tracker: BracketTracker = Factory(BracketTracker)
1066     inside_brackets: bool = False
1067     should_explode: bool = False
1068
1069     def append(self, leaf: Leaf, preformatted: bool = False) -> None:
1070         """Add a new `leaf` to the end of the line.
1071
1072         Unless `preformatted` is True, the `leaf` will receive a new consistent
1073         whitespace prefix and metadata applied by :class:`BracketTracker`.
1074         Trailing commas are maybe removed, unpacked for loop variables are
1075         demoted from being delimiters.
1076
1077         Inline comments are put aside.
1078         """
1079         has_value = leaf.type in BRACKETS or bool(leaf.value.strip())
1080         if not has_value:
1081             return
1082
1083         if token.COLON == leaf.type and self.is_class_paren_empty:
1084             del self.leaves[-2:]
1085         if self.leaves and not preformatted:
1086             # Note: at this point leaf.prefix should be empty except for
1087             # imports, for which we only preserve newlines.
1088             leaf.prefix += whitespace(
1089                 leaf, complex_subscript=self.is_complex_subscript(leaf)
1090             )
1091         if self.inside_brackets or not preformatted:
1092             self.bracket_tracker.mark(leaf)
1093             self.maybe_remove_trailing_comma(leaf)
1094         if not self.append_comment(leaf):
1095             self.leaves.append(leaf)
1096
1097     def append_safe(self, leaf: Leaf, preformatted: bool = False) -> None:
1098         """Like :func:`append()` but disallow invalid standalone comment structure.
1099
1100         Raises ValueError when any `leaf` is appended after a standalone comment
1101         or when a standalone comment is not the first leaf on the line.
1102         """
1103         if self.bracket_tracker.depth == 0:
1104             if self.is_comment:
1105                 raise ValueError("cannot append to standalone comments")
1106
1107             if self.leaves and leaf.type == STANDALONE_COMMENT:
1108                 raise ValueError(
1109                     "cannot append standalone comments to a populated line"
1110                 )
1111
1112         self.append(leaf, preformatted=preformatted)
1113
1114     @property
1115     def is_comment(self) -> bool:
1116         """Is this line a standalone comment?"""
1117         return len(self.leaves) == 1 and self.leaves[0].type == STANDALONE_COMMENT
1118
1119     @property
1120     def is_decorator(self) -> bool:
1121         """Is this line a decorator?"""
1122         return bool(self) and self.leaves[0].type == token.AT
1123
1124     @property
1125     def is_import(self) -> bool:
1126         """Is this an import line?"""
1127         return bool(self) and is_import(self.leaves[0])
1128
1129     @property
1130     def is_class(self) -> bool:
1131         """Is this line a class definition?"""
1132         return (
1133             bool(self)
1134             and self.leaves[0].type == token.NAME
1135             and self.leaves[0].value == "class"
1136         )
1137
1138     @property
1139     def is_stub_class(self) -> bool:
1140         """Is this line a class definition with a body consisting only of "..."?"""
1141         return self.is_class and self.leaves[-3:] == [
1142             Leaf(token.DOT, ".") for _ in range(3)
1143         ]
1144
1145     @property
1146     def is_def(self) -> bool:
1147         """Is this a function definition? (Also returns True for async defs.)"""
1148         try:
1149             first_leaf = self.leaves[0]
1150         except IndexError:
1151             return False
1152
1153         try:
1154             second_leaf: Optional[Leaf] = self.leaves[1]
1155         except IndexError:
1156             second_leaf = None
1157         return (first_leaf.type == token.NAME and first_leaf.value == "def") or (
1158             first_leaf.type == token.ASYNC
1159             and second_leaf is not None
1160             and second_leaf.type == token.NAME
1161             and second_leaf.value == "def"
1162         )
1163
1164     @property
1165     def is_class_paren_empty(self) -> bool:
1166         """Is this a class with no base classes but using parentheses?
1167
1168         Those are unnecessary and should be removed.
1169         """
1170         return (
1171             bool(self)
1172             and len(self.leaves) == 4
1173             and self.is_class
1174             and self.leaves[2].type == token.LPAR
1175             and self.leaves[2].value == "("
1176             and self.leaves[3].type == token.RPAR
1177             and self.leaves[3].value == ")"
1178         )
1179
1180     @property
1181     def is_triple_quoted_string(self) -> bool:
1182         """Is the line a triple quoted string?"""
1183         return (
1184             bool(self)
1185             and self.leaves[0].type == token.STRING
1186             and self.leaves[0].value.startswith(('"""', "'''"))
1187         )
1188
1189     def contains_standalone_comments(self, depth_limit: int = sys.maxsize) -> bool:
1190         """If so, needs to be split before emitting."""
1191         for leaf in self.leaves:
1192             if leaf.type == STANDALONE_COMMENT:
1193                 if leaf.bracket_depth <= depth_limit:
1194                     return True
1195         return False
1196
1197     def contains_inner_type_comments(self) -> bool:
1198         ignored_ids = set()
1199         try:
1200             last_leaf = self.leaves[-1]
1201             ignored_ids.add(id(last_leaf))
1202             if last_leaf.type == token.COMMA:
1203                 # When trailing commas are inserted by Black for consistency, comments
1204                 # after the previous last element are not moved (they don't have to,
1205                 # rendering will still be correct).  So we ignore trailing commas.
1206                 last_leaf = self.leaves[-2]
1207                 ignored_ids.add(id(last_leaf))
1208         except IndexError:
1209             return False
1210
1211         for leaf_id, comments in self.comments.items():
1212             if leaf_id in ignored_ids:
1213                 continue
1214
1215             for comment in comments:
1216                 if is_type_comment(comment):
1217                     return True
1218
1219         return False
1220
1221     def contains_multiline_strings(self) -> bool:
1222         for leaf in self.leaves:
1223             if is_multiline_string(leaf):
1224                 return True
1225
1226         return False
1227
1228     def maybe_remove_trailing_comma(self, closing: Leaf) -> bool:
1229         """Remove trailing comma if there is one and it's safe."""
1230         if not (
1231             self.leaves
1232             and self.leaves[-1].type == token.COMMA
1233             and closing.type in CLOSING_BRACKETS
1234         ):
1235             return False
1236
1237         if closing.type == token.RBRACE:
1238             self.remove_trailing_comma()
1239             return True
1240
1241         if closing.type == token.RSQB:
1242             comma = self.leaves[-1]
1243             if comma.parent and comma.parent.type == syms.listmaker:
1244                 self.remove_trailing_comma()
1245                 return True
1246
1247         # For parens let's check if it's safe to remove the comma.
1248         # Imports are always safe.
1249         if self.is_import:
1250             self.remove_trailing_comma()
1251             return True
1252
1253         # Otherwise, if the trailing one is the only one, we might mistakenly
1254         # change a tuple into a different type by removing the comma.
1255         depth = closing.bracket_depth + 1
1256         commas = 0
1257         opening = closing.opening_bracket
1258         for _opening_index, leaf in enumerate(self.leaves):
1259             if leaf is opening:
1260                 break
1261
1262         else:
1263             return False
1264
1265         for leaf in self.leaves[_opening_index + 1 :]:
1266             if leaf is closing:
1267                 break
1268
1269             bracket_depth = leaf.bracket_depth
1270             if bracket_depth == depth and leaf.type == token.COMMA:
1271                 commas += 1
1272                 if leaf.parent and leaf.parent.type == syms.arglist:
1273                     commas += 1
1274                     break
1275
1276         if commas > 1:
1277             self.remove_trailing_comma()
1278             return True
1279
1280         return False
1281
1282     def append_comment(self, comment: Leaf) -> bool:
1283         """Add an inline or standalone comment to the line."""
1284         if (
1285             comment.type == STANDALONE_COMMENT
1286             and self.bracket_tracker.any_open_brackets()
1287         ):
1288             comment.prefix = ""
1289             return False
1290
1291         if comment.type != token.COMMENT:
1292             return False
1293
1294         if not self.leaves:
1295             comment.type = STANDALONE_COMMENT
1296             comment.prefix = ""
1297             return False
1298
1299         self.comments.setdefault(id(self.leaves[-1]), []).append(comment)
1300         return True
1301
1302     def comments_after(self, leaf: Leaf) -> List[Leaf]:
1303         """Generate comments that should appear directly after `leaf`."""
1304         return self.comments.get(id(leaf), [])
1305
1306     def remove_trailing_comma(self) -> None:
1307         """Remove the trailing comma and moves the comments attached to it."""
1308         trailing_comma = self.leaves.pop()
1309         trailing_comma_comments = self.comments.pop(id(trailing_comma), [])
1310         self.comments.setdefault(id(self.leaves[-1]), []).extend(
1311             trailing_comma_comments
1312         )
1313
1314     def is_complex_subscript(self, leaf: Leaf) -> bool:
1315         """Return True iff `leaf` is part of a slice with non-trivial exprs."""
1316         open_lsqb = self.bracket_tracker.get_open_lsqb()
1317         if open_lsqb is None:
1318             return False
1319
1320         subscript_start = open_lsqb.next_sibling
1321
1322         if isinstance(subscript_start, Node):
1323             if subscript_start.type == syms.listmaker:
1324                 return False
1325
1326             if subscript_start.type == syms.subscriptlist:
1327                 subscript_start = child_towards(subscript_start, leaf)
1328         return subscript_start is not None and any(
1329             n.type in TEST_DESCENDANTS for n in subscript_start.pre_order()
1330         )
1331
1332     def __str__(self) -> str:
1333         """Render the line."""
1334         if not self:
1335             return "\n"
1336
1337         indent = "    " * self.depth
1338         leaves = iter(self.leaves)
1339         first = next(leaves)
1340         res = f"{first.prefix}{indent}{first.value}"
1341         for leaf in leaves:
1342             res += str(leaf)
1343         for comment in itertools.chain.from_iterable(self.comments.values()):
1344             res += str(comment)
1345         return res + "\n"
1346
1347     def __bool__(self) -> bool:
1348         """Return True if the line has leaves or comments."""
1349         return bool(self.leaves or self.comments)
1350
1351
1352 @dataclass
1353 class EmptyLineTracker:
1354     """Provides a stateful method that returns the number of potential extra
1355     empty lines needed before and after the currently processed line.
1356
1357     Note: this tracker works on lines that haven't been split yet.  It assumes
1358     the prefix of the first leaf consists of optional newlines.  Those newlines
1359     are consumed by `maybe_empty_lines()` and included in the computation.
1360     """
1361
1362     is_pyi: bool = False
1363     previous_line: Optional[Line] = None
1364     previous_after: int = 0
1365     previous_defs: List[int] = Factory(list)
1366
1367     def maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
1368         """Return the number of extra empty lines before and after the `current_line`.
1369
1370         This is for separating `def`, `async def` and `class` with extra empty
1371         lines (two on module-level).
1372         """
1373         before, after = self._maybe_empty_lines(current_line)
1374         before -= self.previous_after
1375         self.previous_after = after
1376         self.previous_line = current_line
1377         return before, after
1378
1379     def _maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
1380         max_allowed = 1
1381         if current_line.depth == 0:
1382             max_allowed = 1 if self.is_pyi else 2
1383         if current_line.leaves:
1384             # Consume the first leaf's extra newlines.
1385             first_leaf = current_line.leaves[0]
1386             before = first_leaf.prefix.count("\n")
1387             before = min(before, max_allowed)
1388             first_leaf.prefix = ""
1389         else:
1390             before = 0
1391         depth = current_line.depth
1392         while self.previous_defs and self.previous_defs[-1] >= depth:
1393             self.previous_defs.pop()
1394             if self.is_pyi:
1395                 before = 0 if depth else 1
1396             else:
1397                 before = 1 if depth else 2
1398         if current_line.is_decorator or current_line.is_def or current_line.is_class:
1399             return self._maybe_empty_lines_for_class_or_def(current_line, before)
1400
1401         if (
1402             self.previous_line
1403             and self.previous_line.is_import
1404             and not current_line.is_import
1405             and depth == self.previous_line.depth
1406         ):
1407             return (before or 1), 0
1408
1409         if (
1410             self.previous_line
1411             and self.previous_line.is_class
1412             and current_line.is_triple_quoted_string
1413         ):
1414             return before, 1
1415
1416         return before, 0
1417
1418     def _maybe_empty_lines_for_class_or_def(
1419         self, current_line: Line, before: int
1420     ) -> Tuple[int, int]:
1421         if not current_line.is_decorator:
1422             self.previous_defs.append(current_line.depth)
1423         if self.previous_line is None:
1424             # Don't insert empty lines before the first line in the file.
1425             return 0, 0
1426
1427         if self.previous_line.is_decorator:
1428             return 0, 0
1429
1430         if self.previous_line.depth < current_line.depth and (
1431             self.previous_line.is_class or self.previous_line.is_def
1432         ):
1433             return 0, 0
1434
1435         if (
1436             self.previous_line.is_comment
1437             and self.previous_line.depth == current_line.depth
1438             and before == 0
1439         ):
1440             return 0, 0
1441
1442         if self.is_pyi:
1443             if self.previous_line.depth > current_line.depth:
1444                 newlines = 1
1445             elif current_line.is_class or self.previous_line.is_class:
1446                 if current_line.is_stub_class and self.previous_line.is_stub_class:
1447                     # No blank line between classes with an empty body
1448                     newlines = 0
1449                 else:
1450                     newlines = 1
1451             elif current_line.is_def and not self.previous_line.is_def:
1452                 # Blank line between a block of functions and a block of non-functions
1453                 newlines = 1
1454             else:
1455                 newlines = 0
1456         else:
1457             newlines = 2
1458         if current_line.depth and newlines:
1459             newlines -= 1
1460         return newlines, 0
1461
1462
1463 @dataclass
1464 class LineGenerator(Visitor[Line]):
1465     """Generates reformatted Line objects.  Empty lines are not emitted.
1466
1467     Note: destroys the tree it's visiting by mutating prefixes of its leaves
1468     in ways that will no longer stringify to valid Python code on the tree.
1469     """
1470
1471     is_pyi: bool = False
1472     normalize_strings: bool = True
1473     current_line: Line = Factory(Line)
1474     remove_u_prefix: bool = False
1475
1476     def line(self, indent: int = 0) -> Iterator[Line]:
1477         """Generate a line.
1478
1479         If the line is empty, only emit if it makes sense.
1480         If the line is too long, split it first and then generate.
1481
1482         If any lines were generated, set up a new current_line.
1483         """
1484         if not self.current_line:
1485             self.current_line.depth += indent
1486             return  # Line is empty, don't emit. Creating a new one unnecessary.
1487
1488         complete_line = self.current_line
1489         self.current_line = Line(depth=complete_line.depth + indent)
1490         yield complete_line
1491
1492     def visit_default(self, node: LN) -> Iterator[Line]:
1493         """Default `visit_*()` implementation. Recurses to children of `node`."""
1494         if isinstance(node, Leaf):
1495             any_open_brackets = self.current_line.bracket_tracker.any_open_brackets()
1496             for comment in generate_comments(node):
1497                 if any_open_brackets:
1498                     # any comment within brackets is subject to splitting
1499                     self.current_line.append(comment)
1500                 elif comment.type == token.COMMENT:
1501                     # regular trailing comment
1502                     self.current_line.append(comment)
1503                     yield from self.line()
1504
1505                 else:
1506                     # regular standalone comment
1507                     yield from self.line()
1508
1509                     self.current_line.append(comment)
1510                     yield from self.line()
1511
1512             normalize_prefix(node, inside_brackets=any_open_brackets)
1513             if self.normalize_strings and node.type == token.STRING:
1514                 normalize_string_prefix(node, remove_u_prefix=self.remove_u_prefix)
1515                 normalize_string_quotes(node)
1516             if node.type == token.NUMBER:
1517                 normalize_numeric_literal(node)
1518             if node.type not in WHITESPACE:
1519                 self.current_line.append(node)
1520         yield from super().visit_default(node)
1521
1522     def visit_INDENT(self, node: Node) -> Iterator[Line]:
1523         """Increase indentation level, maybe yield a line."""
1524         # In blib2to3 INDENT never holds comments.
1525         yield from self.line(+1)
1526         yield from self.visit_default(node)
1527
1528     def visit_DEDENT(self, node: Node) -> Iterator[Line]:
1529         """Decrease indentation level, maybe yield a line."""
1530         # The current line might still wait for trailing comments.  At DEDENT time
1531         # there won't be any (they would be prefixes on the preceding NEWLINE).
1532         # Emit the line then.
1533         yield from self.line()
1534
1535         # While DEDENT has no value, its prefix may contain standalone comments
1536         # that belong to the current indentation level.  Get 'em.
1537         yield from self.visit_default(node)
1538
1539         # Finally, emit the dedent.
1540         yield from self.line(-1)
1541
1542     def visit_stmt(
1543         self, node: Node, keywords: Set[str], parens: Set[str]
1544     ) -> Iterator[Line]:
1545         """Visit a statement.
1546
1547         This implementation is shared for `if`, `while`, `for`, `try`, `except`,
1548         `def`, `with`, `class`, `assert` and assignments.
1549
1550         The relevant Python language `keywords` for a given statement will be
1551         NAME leaves within it. This methods puts those on a separate line.
1552
1553         `parens` holds a set of string leaf values immediately after which
1554         invisible parens should be put.
1555         """
1556         normalize_invisible_parens(node, parens_after=parens)
1557         for child in node.children:
1558             if child.type == token.NAME and child.value in keywords:  # type: ignore
1559                 yield from self.line()
1560
1561             yield from self.visit(child)
1562
1563     def visit_suite(self, node: Node) -> Iterator[Line]:
1564         """Visit a suite."""
1565         if self.is_pyi and is_stub_suite(node):
1566             yield from self.visit(node.children[2])
1567         else:
1568             yield from self.visit_default(node)
1569
1570     def visit_simple_stmt(self, node: Node) -> Iterator[Line]:
1571         """Visit a statement without nested statements."""
1572         is_suite_like = node.parent and node.parent.type in STATEMENT
1573         if is_suite_like:
1574             if self.is_pyi and is_stub_body(node):
1575                 yield from self.visit_default(node)
1576             else:
1577                 yield from self.line(+1)
1578                 yield from self.visit_default(node)
1579                 yield from self.line(-1)
1580
1581         else:
1582             if not self.is_pyi or not node.parent or not is_stub_suite(node.parent):
1583                 yield from self.line()
1584             yield from self.visit_default(node)
1585
1586     def visit_async_stmt(self, node: Node) -> Iterator[Line]:
1587         """Visit `async def`, `async for`, `async with`."""
1588         yield from self.line()
1589
1590         children = iter(node.children)
1591         for child in children:
1592             yield from self.visit(child)
1593
1594             if child.type == token.ASYNC:
1595                 break
1596
1597         internal_stmt = next(children)
1598         for child in internal_stmt.children:
1599             yield from self.visit(child)
1600
1601     def visit_decorators(self, node: Node) -> Iterator[Line]:
1602         """Visit decorators."""
1603         for child in node.children:
1604             yield from self.line()
1605             yield from self.visit(child)
1606
1607     def visit_SEMI(self, leaf: Leaf) -> Iterator[Line]:
1608         """Remove a semicolon and put the other statement on a separate line."""
1609         yield from self.line()
1610
1611     def visit_ENDMARKER(self, leaf: Leaf) -> Iterator[Line]:
1612         """End of file. Process outstanding comments and end with a newline."""
1613         yield from self.visit_default(leaf)
1614         yield from self.line()
1615
1616     def visit_STANDALONE_COMMENT(self, leaf: Leaf) -> Iterator[Line]:
1617         if not self.current_line.bracket_tracker.any_open_brackets():
1618             yield from self.line()
1619         yield from self.visit_default(leaf)
1620
1621     def __attrs_post_init__(self) -> None:
1622         """You are in a twisty little maze of passages."""
1623         v = self.visit_stmt
1624         Ø: Set[str] = set()
1625         self.visit_assert_stmt = partial(v, keywords={"assert"}, parens={"assert", ","})
1626         self.visit_if_stmt = partial(
1627             v, keywords={"if", "else", "elif"}, parens={"if", "elif"}
1628         )
1629         self.visit_while_stmt = partial(v, keywords={"while", "else"}, parens={"while"})
1630         self.visit_for_stmt = partial(v, keywords={"for", "else"}, parens={"for", "in"})
1631         self.visit_try_stmt = partial(
1632             v, keywords={"try", "except", "else", "finally"}, parens=Ø
1633         )
1634         self.visit_except_clause = partial(v, keywords={"except"}, parens=Ø)
1635         self.visit_with_stmt = partial(v, keywords={"with"}, parens=Ø)
1636         self.visit_funcdef = partial(v, keywords={"def"}, parens=Ø)
1637         self.visit_classdef = partial(v, keywords={"class"}, parens=Ø)
1638         self.visit_expr_stmt = partial(v, keywords=Ø, parens=ASSIGNMENTS)
1639         self.visit_return_stmt = partial(v, keywords={"return"}, parens={"return"})
1640         self.visit_import_from = partial(v, keywords=Ø, parens={"import"})
1641         self.visit_del_stmt = partial(v, keywords=Ø, parens={"del"})
1642         self.visit_async_funcdef = self.visit_async_stmt
1643         self.visit_decorated = self.visit_decorators
1644
1645
1646 IMPLICIT_TUPLE = {syms.testlist, syms.testlist_star_expr, syms.exprlist}
1647 BRACKET = {token.LPAR: token.RPAR, token.LSQB: token.RSQB, token.LBRACE: token.RBRACE}
1648 OPENING_BRACKETS = set(BRACKET.keys())
1649 CLOSING_BRACKETS = set(BRACKET.values())
1650 BRACKETS = OPENING_BRACKETS | CLOSING_BRACKETS
1651 ALWAYS_NO_SPACE = CLOSING_BRACKETS | {token.COMMA, STANDALONE_COMMENT}
1652
1653
1654 def whitespace(leaf: Leaf, *, complex_subscript: bool) -> str:  # noqa: C901
1655     """Return whitespace prefix if needed for the given `leaf`.
1656
1657     `complex_subscript` signals whether the given leaf is part of a subscription
1658     which has non-trivial arguments, like arithmetic expressions or function calls.
1659     """
1660     NO = ""
1661     SPACE = " "
1662     DOUBLESPACE = "  "
1663     t = leaf.type
1664     p = leaf.parent
1665     v = leaf.value
1666     if t in ALWAYS_NO_SPACE:
1667         return NO
1668
1669     if t == token.COMMENT:
1670         return DOUBLESPACE
1671
1672     assert p is not None, f"INTERNAL ERROR: hand-made leaf without parent: {leaf!r}"
1673     if t == token.COLON and p.type not in {
1674         syms.subscript,
1675         syms.subscriptlist,
1676         syms.sliceop,
1677     }:
1678         return NO
1679
1680     prev = leaf.prev_sibling
1681     if not prev:
1682         prevp = preceding_leaf(p)
1683         if not prevp or prevp.type in OPENING_BRACKETS:
1684             return NO
1685
1686         if t == token.COLON:
1687             if prevp.type == token.COLON:
1688                 return NO
1689
1690             elif prevp.type != token.COMMA and not complex_subscript:
1691                 return NO
1692
1693             return SPACE
1694
1695         if prevp.type == token.EQUAL:
1696             if prevp.parent:
1697                 if prevp.parent.type in {
1698                     syms.arglist,
1699                     syms.argument,
1700                     syms.parameters,
1701                     syms.varargslist,
1702                 }:
1703                     return NO
1704
1705                 elif prevp.parent.type == syms.typedargslist:
1706                     # A bit hacky: if the equal sign has whitespace, it means we
1707                     # previously found it's a typed argument.  So, we're using
1708                     # that, too.
1709                     return prevp.prefix
1710
1711         elif prevp.type in STARS:
1712             if is_vararg(prevp, within=VARARGS_PARENTS | UNPACKING_PARENTS):
1713                 return NO
1714
1715         elif prevp.type == token.COLON:
1716             if prevp.parent and prevp.parent.type in {syms.subscript, syms.sliceop}:
1717                 return SPACE if complex_subscript else NO
1718
1719         elif (
1720             prevp.parent
1721             and prevp.parent.type == syms.factor
1722             and prevp.type in MATH_OPERATORS
1723         ):
1724             return NO
1725
1726         elif (
1727             prevp.type == token.RIGHTSHIFT
1728             and prevp.parent
1729             and prevp.parent.type == syms.shift_expr
1730             and prevp.prev_sibling
1731             and prevp.prev_sibling.type == token.NAME
1732             and prevp.prev_sibling.value == "print"  # type: ignore
1733         ):
1734             # Python 2 print chevron
1735             return NO
1736
1737     elif prev.type in OPENING_BRACKETS:
1738         return NO
1739
1740     if p.type in {syms.parameters, syms.arglist}:
1741         # untyped function signatures or calls
1742         if not prev or prev.type != token.COMMA:
1743             return NO
1744
1745     elif p.type == syms.varargslist:
1746         # lambdas
1747         if prev and prev.type != token.COMMA:
1748             return NO
1749
1750     elif p.type == syms.typedargslist:
1751         # typed function signatures
1752         if not prev:
1753             return NO
1754
1755         if t == token.EQUAL:
1756             if prev.type != syms.tname:
1757                 return NO
1758
1759         elif prev.type == token.EQUAL:
1760             # A bit hacky: if the equal sign has whitespace, it means we
1761             # previously found it's a typed argument.  So, we're using that, too.
1762             return prev.prefix
1763
1764         elif prev.type != token.COMMA:
1765             return NO
1766
1767     elif p.type == syms.tname:
1768         # type names
1769         if not prev:
1770             prevp = preceding_leaf(p)
1771             if not prevp or prevp.type != token.COMMA:
1772                 return NO
1773
1774     elif p.type == syms.trailer:
1775         # attributes and calls
1776         if t == token.LPAR or t == token.RPAR:
1777             return NO
1778
1779         if not prev:
1780             if t == token.DOT:
1781                 prevp = preceding_leaf(p)
1782                 if not prevp or prevp.type != token.NUMBER:
1783                     return NO
1784
1785             elif t == token.LSQB:
1786                 return NO
1787
1788         elif prev.type != token.COMMA:
1789             return NO
1790
1791     elif p.type == syms.argument:
1792         # single argument
1793         if t == token.EQUAL:
1794             return NO
1795
1796         if not prev:
1797             prevp = preceding_leaf(p)
1798             if not prevp or prevp.type == token.LPAR:
1799                 return NO
1800
1801         elif prev.type in {token.EQUAL} | STARS:
1802             return NO
1803
1804     elif p.type == syms.decorator:
1805         # decorators
1806         return NO
1807
1808     elif p.type == syms.dotted_name:
1809         if prev:
1810             return NO
1811
1812         prevp = preceding_leaf(p)
1813         if not prevp or prevp.type == token.AT or prevp.type == token.DOT:
1814             return NO
1815
1816     elif p.type == syms.classdef:
1817         if t == token.LPAR:
1818             return NO
1819
1820         if prev and prev.type == token.LPAR:
1821             return NO
1822
1823     elif p.type in {syms.subscript, syms.sliceop}:
1824         # indexing
1825         if not prev:
1826             assert p.parent is not None, "subscripts are always parented"
1827             if p.parent.type == syms.subscriptlist:
1828                 return SPACE
1829
1830             return NO
1831
1832         elif not complex_subscript:
1833             return NO
1834
1835     elif p.type == syms.atom:
1836         if prev and t == token.DOT:
1837             # dots, but not the first one.
1838             return NO
1839
1840     elif p.type == syms.dictsetmaker:
1841         # dict unpacking
1842         if prev and prev.type == token.DOUBLESTAR:
1843             return NO
1844
1845     elif p.type in {syms.factor, syms.star_expr}:
1846         # unary ops
1847         if not prev:
1848             prevp = preceding_leaf(p)
1849             if not prevp or prevp.type in OPENING_BRACKETS:
1850                 return NO
1851
1852             prevp_parent = prevp.parent
1853             assert prevp_parent is not None
1854             if prevp.type == token.COLON and prevp_parent.type in {
1855                 syms.subscript,
1856                 syms.sliceop,
1857             }:
1858                 return NO
1859
1860             elif prevp.type == token.EQUAL and prevp_parent.type == syms.argument:
1861                 return NO
1862
1863         elif t in {token.NAME, token.NUMBER, token.STRING}:
1864             return NO
1865
1866     elif p.type == syms.import_from:
1867         if t == token.DOT:
1868             if prev and prev.type == token.DOT:
1869                 return NO
1870
1871         elif t == token.NAME:
1872             if v == "import":
1873                 return SPACE
1874
1875             if prev and prev.type == token.DOT:
1876                 return NO
1877
1878     elif p.type == syms.sliceop:
1879         return NO
1880
1881     return SPACE
1882
1883
1884 def preceding_leaf(node: Optional[LN]) -> Optional[Leaf]:
1885     """Return the first leaf that precedes `node`, if any."""
1886     while node:
1887         res = node.prev_sibling
1888         if res:
1889             if isinstance(res, Leaf):
1890                 return res
1891
1892             try:
1893                 return list(res.leaves())[-1]
1894
1895             except IndexError:
1896                 return None
1897
1898         node = node.parent
1899     return None
1900
1901
1902 def child_towards(ancestor: Node, descendant: LN) -> Optional[LN]:
1903     """Return the child of `ancestor` that contains `descendant`."""
1904     node: Optional[LN] = descendant
1905     while node and node.parent != ancestor:
1906         node = node.parent
1907     return node
1908
1909
1910 def container_of(leaf: Leaf) -> LN:
1911     """Return `leaf` or one of its ancestors that is the topmost container of it.
1912
1913     By "container" we mean a node where `leaf` is the very first child.
1914     """
1915     same_prefix = leaf.prefix
1916     container: LN = leaf
1917     while container:
1918         parent = container.parent
1919         if parent is None:
1920             break
1921
1922         if parent.children[0].prefix != same_prefix:
1923             break
1924
1925         if parent.type == syms.file_input:
1926             break
1927
1928         if parent.prev_sibling is not None and parent.prev_sibling.type in BRACKETS:
1929             break
1930
1931         container = parent
1932     return container
1933
1934
1935 def is_split_after_delimiter(leaf: Leaf, previous: Optional[Leaf] = None) -> int:
1936     """Return the priority of the `leaf` delimiter, given a line break after it.
1937
1938     The delimiter priorities returned here are from those delimiters that would
1939     cause a line break after themselves.
1940
1941     Higher numbers are higher priority.
1942     """
1943     if leaf.type == token.COMMA:
1944         return COMMA_PRIORITY
1945
1946     return 0
1947
1948
1949 def is_split_before_delimiter(leaf: Leaf, previous: Optional[Leaf] = None) -> int:
1950     """Return the priority of the `leaf` delimiter, given a line break before it.
1951
1952     The delimiter priorities returned here are from those delimiters that would
1953     cause a line break before themselves.
1954
1955     Higher numbers are higher priority.
1956     """
1957     if is_vararg(leaf, within=VARARGS_PARENTS | UNPACKING_PARENTS):
1958         # * and ** might also be MATH_OPERATORS but in this case they are not.
1959         # Don't treat them as a delimiter.
1960         return 0
1961
1962     if (
1963         leaf.type == token.DOT
1964         and leaf.parent
1965         and leaf.parent.type not in {syms.import_from, syms.dotted_name}
1966         and (previous is None or previous.type in CLOSING_BRACKETS)
1967     ):
1968         return DOT_PRIORITY
1969
1970     if (
1971         leaf.type in MATH_OPERATORS
1972         and leaf.parent
1973         and leaf.parent.type not in {syms.factor, syms.star_expr}
1974     ):
1975         return MATH_PRIORITIES[leaf.type]
1976
1977     if leaf.type in COMPARATORS:
1978         return COMPARATOR_PRIORITY
1979
1980     if (
1981         leaf.type == token.STRING
1982         and previous is not None
1983         and previous.type == token.STRING
1984     ):
1985         return STRING_PRIORITY
1986
1987     if leaf.type not in {token.NAME, token.ASYNC}:
1988         return 0
1989
1990     if (
1991         leaf.value == "for"
1992         and leaf.parent
1993         and leaf.parent.type in {syms.comp_for, syms.old_comp_for}
1994         or leaf.type == token.ASYNC
1995     ):
1996         if (
1997             not isinstance(leaf.prev_sibling, Leaf)
1998             or leaf.prev_sibling.value != "async"
1999         ):
2000             return COMPREHENSION_PRIORITY
2001
2002     if (
2003         leaf.value == "if"
2004         and leaf.parent
2005         and leaf.parent.type in {syms.comp_if, syms.old_comp_if}
2006     ):
2007         return COMPREHENSION_PRIORITY
2008
2009     if leaf.value in {"if", "else"} and leaf.parent and leaf.parent.type == syms.test:
2010         return TERNARY_PRIORITY
2011
2012     if leaf.value == "is":
2013         return COMPARATOR_PRIORITY
2014
2015     if (
2016         leaf.value == "in"
2017         and leaf.parent
2018         and leaf.parent.type in {syms.comp_op, syms.comparison}
2019         and not (
2020             previous is not None
2021             and previous.type == token.NAME
2022             and previous.value == "not"
2023         )
2024     ):
2025         return COMPARATOR_PRIORITY
2026
2027     if (
2028         leaf.value == "not"
2029         and leaf.parent
2030         and leaf.parent.type == syms.comp_op
2031         and not (
2032             previous is not None
2033             and previous.type == token.NAME
2034             and previous.value == "is"
2035         )
2036     ):
2037         return COMPARATOR_PRIORITY
2038
2039     if leaf.value in LOGIC_OPERATORS and leaf.parent:
2040         return LOGIC_PRIORITY
2041
2042     return 0
2043
2044
2045 FMT_OFF = {"# fmt: off", "# fmt:off", "# yapf: disable"}
2046 FMT_ON = {"# fmt: on", "# fmt:on", "# yapf: enable"}
2047
2048
2049 def generate_comments(leaf: LN) -> Iterator[Leaf]:
2050     """Clean the prefix of the `leaf` and generate comments from it, if any.
2051
2052     Comments in lib2to3 are shoved into the whitespace prefix.  This happens
2053     in `pgen2/driver.py:Driver.parse_tokens()`.  This was a brilliant implementation
2054     move because it does away with modifying the grammar to include all the
2055     possible places in which comments can be placed.
2056
2057     The sad consequence for us though is that comments don't "belong" anywhere.
2058     This is why this function generates simple parentless Leaf objects for
2059     comments.  We simply don't know what the correct parent should be.
2060
2061     No matter though, we can live without this.  We really only need to
2062     differentiate between inline and standalone comments.  The latter don't
2063     share the line with any code.
2064
2065     Inline comments are emitted as regular token.COMMENT leaves.  Standalone
2066     are emitted with a fake STANDALONE_COMMENT token identifier.
2067     """
2068     for pc in list_comments(leaf.prefix, is_endmarker=leaf.type == token.ENDMARKER):
2069         yield Leaf(pc.type, pc.value, prefix="\n" * pc.newlines)
2070
2071
2072 @dataclass
2073 class ProtoComment:
2074     """Describes a piece of syntax that is a comment.
2075
2076     It's not a :class:`blib2to3.pytree.Leaf` so that:
2077
2078     * it can be cached (`Leaf` objects should not be reused more than once as
2079       they store their lineno, column, prefix, and parent information);
2080     * `newlines` and `consumed` fields are kept separate from the `value`. This
2081       simplifies handling of special marker comments like ``# fmt: off/on``.
2082     """
2083
2084     type: int  # token.COMMENT or STANDALONE_COMMENT
2085     value: str  # content of the comment
2086     newlines: int  # how many newlines before the comment
2087     consumed: int  # how many characters of the original leaf's prefix did we consume
2088
2089
2090 @lru_cache(maxsize=4096)
2091 def list_comments(prefix: str, *, is_endmarker: bool) -> List[ProtoComment]:
2092     """Return a list of :class:`ProtoComment` objects parsed from the given `prefix`."""
2093     result: List[ProtoComment] = []
2094     if not prefix or "#" not in prefix:
2095         return result
2096
2097     consumed = 0
2098     nlines = 0
2099     for index, line in enumerate(prefix.split("\n")):
2100         consumed += len(line) + 1  # adding the length of the split '\n'
2101         line = line.lstrip()
2102         if not line:
2103             nlines += 1
2104         if not line.startswith("#"):
2105             continue
2106
2107         if index == 0 and not is_endmarker:
2108             comment_type = token.COMMENT  # simple trailing comment
2109         else:
2110             comment_type = STANDALONE_COMMENT
2111         comment = make_comment(line)
2112         result.append(
2113             ProtoComment(
2114                 type=comment_type, value=comment, newlines=nlines, consumed=consumed
2115             )
2116         )
2117         nlines = 0
2118     return result
2119
2120
2121 def make_comment(content: str) -> str:
2122     """Return a consistently formatted comment from the given `content` string.
2123
2124     All comments (except for "##", "#!", "#:", '#'", "#%%") should have a single
2125     space between the hash sign and the content.
2126
2127     If `content` didn't start with a hash sign, one is provided.
2128     """
2129     content = content.rstrip()
2130     if not content:
2131         return "#"
2132
2133     if content[0] == "#":
2134         content = content[1:]
2135     if content and content[0] not in " !:#'%":
2136         content = " " + content
2137     return "#" + content
2138
2139
2140 def split_line(
2141     line: Line,
2142     line_length: int,
2143     inner: bool = False,
2144     supports_trailing_commas: bool = False,
2145 ) -> Iterator[Line]:
2146     """Split a `line` into potentially many lines.
2147
2148     They should fit in the allotted `line_length` but might not be able to.
2149     `inner` signifies that there were a pair of brackets somewhere around the
2150     current `line`, possibly transitively. This means we can fallback to splitting
2151     by delimiters if the LHS/RHS don't yield any results.
2152
2153     If `supports_trailing_commas` is True, splitting may use the TRAILING_COMMA feature.
2154     """
2155     if line.is_comment:
2156         yield line
2157         return
2158
2159     line_str = str(line).strip("\n")
2160
2161     if (
2162         not line.contains_inner_type_comments()
2163         and not line.should_explode
2164         and is_line_short_enough(line, line_length=line_length, line_str=line_str)
2165     ):
2166         yield line
2167         return
2168
2169     split_funcs: List[SplitFunc]
2170     if line.is_def:
2171         split_funcs = [left_hand_split]
2172     else:
2173
2174         def rhs(line: Line, supports_trailing_commas: bool = False) -> Iterator[Line]:
2175             for omit in generate_trailers_to_omit(line, line_length):
2176                 lines = list(
2177                     right_hand_split(
2178                         line, line_length, supports_trailing_commas, omit=omit
2179                     )
2180                 )
2181                 if is_line_short_enough(lines[0], line_length=line_length):
2182                     yield from lines
2183                     return
2184
2185             # All splits failed, best effort split with no omits.
2186             # This mostly happens to multiline strings that are by definition
2187             # reported as not fitting a single line.
2188             yield from right_hand_split(line, supports_trailing_commas)
2189
2190         if line.inside_brackets:
2191             split_funcs = [delimiter_split, standalone_comment_split, rhs]
2192         else:
2193             split_funcs = [rhs]
2194     for split_func in split_funcs:
2195         # We are accumulating lines in `result` because we might want to abort
2196         # mission and return the original line in the end, or attempt a different
2197         # split altogether.
2198         result: List[Line] = []
2199         try:
2200             for l in split_func(line, supports_trailing_commas):
2201                 if str(l).strip("\n") == line_str:
2202                     raise CannotSplit("Split function returned an unchanged result")
2203
2204                 result.extend(
2205                     split_line(
2206                         l,
2207                         line_length=line_length,
2208                         inner=True,
2209                         supports_trailing_commas=supports_trailing_commas,
2210                     )
2211                 )
2212         except CannotSplit:
2213             continue
2214
2215         else:
2216             yield from result
2217             break
2218
2219     else:
2220         yield line
2221
2222
2223 def left_hand_split(
2224     line: Line, supports_trailing_commas: bool = False
2225 ) -> Iterator[Line]:
2226     """Split line into many lines, starting with the first matching bracket pair.
2227
2228     Note: this usually looks weird, only use this for function definitions.
2229     Prefer RHS otherwise.  This is why this function is not symmetrical with
2230     :func:`right_hand_split` which also handles optional parentheses.
2231     """
2232     tail_leaves: List[Leaf] = []
2233     body_leaves: List[Leaf] = []
2234     head_leaves: List[Leaf] = []
2235     current_leaves = head_leaves
2236     matching_bracket = None
2237     for leaf in line.leaves:
2238         if (
2239             current_leaves is body_leaves
2240             and leaf.type in CLOSING_BRACKETS
2241             and leaf.opening_bracket is matching_bracket
2242         ):
2243             current_leaves = tail_leaves if body_leaves else head_leaves
2244         current_leaves.append(leaf)
2245         if current_leaves is head_leaves:
2246             if leaf.type in OPENING_BRACKETS:
2247                 matching_bracket = leaf
2248                 current_leaves = body_leaves
2249     if not matching_bracket:
2250         raise CannotSplit("No brackets found")
2251
2252     head = bracket_split_build_line(head_leaves, line, matching_bracket)
2253     body = bracket_split_build_line(body_leaves, line, matching_bracket, is_body=True)
2254     tail = bracket_split_build_line(tail_leaves, line, matching_bracket)
2255     bracket_split_succeeded_or_raise(head, body, tail)
2256     for result in (head, body, tail):
2257         if result:
2258             yield result
2259
2260
2261 def right_hand_split(
2262     line: Line,
2263     line_length: int,
2264     supports_trailing_commas: bool = False,
2265     omit: Collection[LeafID] = (),
2266 ) -> Iterator[Line]:
2267     """Split line into many lines, starting with the last matching bracket pair.
2268
2269     If the split was by optional parentheses, attempt splitting without them, too.
2270     `omit` is a collection of closing bracket IDs that shouldn't be considered for
2271     this split.
2272
2273     Note: running this function modifies `bracket_depth` on the leaves of `line`.
2274     """
2275     tail_leaves: List[Leaf] = []
2276     body_leaves: List[Leaf] = []
2277     head_leaves: List[Leaf] = []
2278     current_leaves = tail_leaves
2279     opening_bracket = None
2280     closing_bracket = None
2281     for leaf in reversed(line.leaves):
2282         if current_leaves is body_leaves:
2283             if leaf is opening_bracket:
2284                 current_leaves = head_leaves if body_leaves else tail_leaves
2285         current_leaves.append(leaf)
2286         if current_leaves is tail_leaves:
2287             if leaf.type in CLOSING_BRACKETS and id(leaf) not in omit:
2288                 opening_bracket = leaf.opening_bracket
2289                 closing_bracket = leaf
2290                 current_leaves = body_leaves
2291     if not (opening_bracket and closing_bracket and head_leaves):
2292         # If there is no opening or closing_bracket that means the split failed and
2293         # all content is in the tail.  Otherwise, if `head_leaves` are empty, it means
2294         # the matching `opening_bracket` wasn't available on `line` anymore.
2295         raise CannotSplit("No brackets found")
2296
2297     tail_leaves.reverse()
2298     body_leaves.reverse()
2299     head_leaves.reverse()
2300     head = bracket_split_build_line(head_leaves, line, opening_bracket)
2301     body = bracket_split_build_line(body_leaves, line, opening_bracket, is_body=True)
2302     tail = bracket_split_build_line(tail_leaves, line, opening_bracket)
2303     bracket_split_succeeded_or_raise(head, body, tail)
2304     if (
2305         # the body shouldn't be exploded
2306         not body.should_explode
2307         # the opening bracket is an optional paren
2308         and opening_bracket.type == token.LPAR
2309         and not opening_bracket.value
2310         # the closing bracket is an optional paren
2311         and closing_bracket.type == token.RPAR
2312         and not closing_bracket.value
2313         # it's not an import (optional parens are the only thing we can split on
2314         # in this case; attempting a split without them is a waste of time)
2315         and not line.is_import
2316         # there are no standalone comments in the body
2317         and not body.contains_standalone_comments(0)
2318         # and we can actually remove the parens
2319         and can_omit_invisible_parens(body, line_length)
2320     ):
2321         omit = {id(closing_bracket), *omit}
2322         try:
2323             yield from right_hand_split(
2324                 line,
2325                 line_length,
2326                 supports_trailing_commas=supports_trailing_commas,
2327                 omit=omit,
2328             )
2329             return
2330
2331         except CannotSplit:
2332             if not (
2333                 can_be_split(body)
2334                 or is_line_short_enough(body, line_length=line_length)
2335             ):
2336                 raise CannotSplit(
2337                     "Splitting failed, body is still too long and can't be split."
2338                 )
2339
2340             elif head.contains_multiline_strings() or tail.contains_multiline_strings():
2341                 raise CannotSplit(
2342                     "The current optional pair of parentheses is bound to fail to "
2343                     "satisfy the splitting algorithm because the head or the tail "
2344                     "contains multiline strings which by definition never fit one "
2345                     "line."
2346                 )
2347
2348     ensure_visible(opening_bracket)
2349     ensure_visible(closing_bracket)
2350     for result in (head, body, tail):
2351         if result:
2352             yield result
2353
2354
2355 def bracket_split_succeeded_or_raise(head: Line, body: Line, tail: Line) -> None:
2356     """Raise :exc:`CannotSplit` if the last left- or right-hand split failed.
2357
2358     Do nothing otherwise.
2359
2360     A left- or right-hand split is based on a pair of brackets. Content before
2361     (and including) the opening bracket is left on one line, content inside the
2362     brackets is put on a separate line, and finally content starting with and
2363     following the closing bracket is put on a separate line.
2364
2365     Those are called `head`, `body`, and `tail`, respectively. If the split
2366     produced the same line (all content in `head`) or ended up with an empty `body`
2367     and the `tail` is just the closing bracket, then it's considered failed.
2368     """
2369     tail_len = len(str(tail).strip())
2370     if not body:
2371         if tail_len == 0:
2372             raise CannotSplit("Splitting brackets produced the same line")
2373
2374         elif tail_len < 3:
2375             raise CannotSplit(
2376                 f"Splitting brackets on an empty body to save "
2377                 f"{tail_len} characters is not worth it"
2378             )
2379
2380
2381 def bracket_split_build_line(
2382     leaves: List[Leaf], original: Line, opening_bracket: Leaf, *, is_body: bool = False
2383 ) -> Line:
2384     """Return a new line with given `leaves` and respective comments from `original`.
2385
2386     If `is_body` is True, the result line is one-indented inside brackets and as such
2387     has its first leaf's prefix normalized and a trailing comma added when expected.
2388     """
2389     result = Line(depth=original.depth)
2390     if is_body:
2391         result.inside_brackets = True
2392         result.depth += 1
2393         if leaves:
2394             # Since body is a new indent level, remove spurious leading whitespace.
2395             normalize_prefix(leaves[0], inside_brackets=True)
2396             # Ensure a trailing comma when expected.
2397             if original.is_import:
2398                 if leaves[-1].type != token.COMMA:
2399                     leaves.append(Leaf(token.COMMA, ","))
2400     # Populate the line
2401     for leaf in leaves:
2402         result.append(leaf, preformatted=True)
2403         for comment_after in original.comments_after(leaf):
2404             result.append(comment_after, preformatted=True)
2405     if is_body:
2406         result.should_explode = should_explode(result, opening_bracket)
2407     return result
2408
2409
2410 def dont_increase_indentation(split_func: SplitFunc) -> SplitFunc:
2411     """Normalize prefix of the first leaf in every line returned by `split_func`.
2412
2413     This is a decorator over relevant split functions.
2414     """
2415
2416     @wraps(split_func)
2417     def split_wrapper(
2418         line: Line, supports_trailing_commas: bool = False
2419     ) -> Iterator[Line]:
2420         for l in split_func(line, supports_trailing_commas):
2421             normalize_prefix(l.leaves[0], inside_brackets=True)
2422             yield l
2423
2424     return split_wrapper
2425
2426
2427 @dont_increase_indentation
2428 def delimiter_split(
2429     line: Line, supports_trailing_commas: bool = False
2430 ) -> Iterator[Line]:
2431     """Split according to delimiters of the highest priority.
2432
2433     If `supports_trailing_commas` is True, the split will add trailing commas
2434     also in function signatures that contain `*` and `**`.
2435     """
2436     try:
2437         last_leaf = line.leaves[-1]
2438     except IndexError:
2439         raise CannotSplit("Line empty")
2440
2441     bt = line.bracket_tracker
2442     try:
2443         delimiter_priority = bt.max_delimiter_priority(exclude={id(last_leaf)})
2444     except ValueError:
2445         raise CannotSplit("No delimiters found")
2446
2447     if delimiter_priority == DOT_PRIORITY:
2448         if bt.delimiter_count_with_priority(delimiter_priority) == 1:
2449             raise CannotSplit("Splitting a single attribute from its owner looks wrong")
2450
2451     current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2452     lowest_depth = sys.maxsize
2453     trailing_comma_safe = True
2454
2455     def append_to_line(leaf: Leaf) -> Iterator[Line]:
2456         """Append `leaf` to current line or to new line if appending impossible."""
2457         nonlocal current_line
2458         try:
2459             current_line.append_safe(leaf, preformatted=True)
2460         except ValueError:
2461             yield current_line
2462
2463             current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2464             current_line.append(leaf)
2465
2466     for leaf in line.leaves:
2467         yield from append_to_line(leaf)
2468
2469         for comment_after in line.comments_after(leaf):
2470             yield from append_to_line(comment_after)
2471
2472         lowest_depth = min(lowest_depth, leaf.bracket_depth)
2473         if leaf.bracket_depth == lowest_depth and is_vararg(
2474             leaf, within=VARARGS_PARENTS
2475         ):
2476             trailing_comma_safe = trailing_comma_safe and supports_trailing_commas
2477         leaf_priority = bt.delimiters.get(id(leaf))
2478         if leaf_priority == delimiter_priority:
2479             yield current_line
2480
2481             current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2482     if current_line:
2483         if (
2484             trailing_comma_safe
2485             and delimiter_priority == COMMA_PRIORITY
2486             and current_line.leaves[-1].type != token.COMMA
2487             and current_line.leaves[-1].type != STANDALONE_COMMENT
2488         ):
2489             current_line.append(Leaf(token.COMMA, ","))
2490         yield current_line
2491
2492
2493 @dont_increase_indentation
2494 def standalone_comment_split(
2495     line: Line, supports_trailing_commas: bool = False
2496 ) -> Iterator[Line]:
2497     """Split standalone comments from the rest of the line."""
2498     if not line.contains_standalone_comments(0):
2499         raise CannotSplit("Line does not have any standalone comments")
2500
2501     current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2502
2503     def append_to_line(leaf: Leaf) -> Iterator[Line]:
2504         """Append `leaf` to current line or to new line if appending impossible."""
2505         nonlocal current_line
2506         try:
2507             current_line.append_safe(leaf, preformatted=True)
2508         except ValueError:
2509             yield current_line
2510
2511             current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2512             current_line.append(leaf)
2513
2514     for leaf in line.leaves:
2515         yield from append_to_line(leaf)
2516
2517         for comment_after in line.comments_after(leaf):
2518             yield from append_to_line(comment_after)
2519
2520     if current_line:
2521         yield current_line
2522
2523
2524 def is_import(leaf: Leaf) -> bool:
2525     """Return True if the given leaf starts an import statement."""
2526     p = leaf.parent
2527     t = leaf.type
2528     v = leaf.value
2529     return bool(
2530         t == token.NAME
2531         and (
2532             (v == "import" and p and p.type == syms.import_name)
2533             or (v == "from" and p and p.type == syms.import_from)
2534         )
2535     )
2536
2537
2538 def is_type_comment(leaf: Leaf) -> bool:
2539     """Return True if the given leaf is a special comment.
2540     Only returns true for type comments for now."""
2541     t = leaf.type
2542     v = leaf.value
2543     return t in {token.COMMENT, t == STANDALONE_COMMENT} and v.startswith("# type:")
2544
2545
2546 def normalize_prefix(leaf: Leaf, *, inside_brackets: bool) -> None:
2547     """Leave existing extra newlines if not `inside_brackets`. Remove everything
2548     else.
2549
2550     Note: don't use backslashes for formatting or you'll lose your voting rights.
2551     """
2552     if not inside_brackets:
2553         spl = leaf.prefix.split("#")
2554         if "\\" not in spl[0]:
2555             nl_count = spl[-1].count("\n")
2556             if len(spl) > 1:
2557                 nl_count -= 1
2558             leaf.prefix = "\n" * nl_count
2559             return
2560
2561     leaf.prefix = ""
2562
2563
2564 def normalize_string_prefix(leaf: Leaf, remove_u_prefix: bool = False) -> None:
2565     """Make all string prefixes lowercase.
2566
2567     If remove_u_prefix is given, also removes any u prefix from the string.
2568
2569     Note: Mutates its argument.
2570     """
2571     match = re.match(r"^([furbFURB]*)(.*)$", leaf.value, re.DOTALL)
2572     assert match is not None, f"failed to match string {leaf.value!r}"
2573     orig_prefix = match.group(1)
2574     new_prefix = orig_prefix.lower()
2575     if remove_u_prefix:
2576         new_prefix = new_prefix.replace("u", "")
2577     leaf.value = f"{new_prefix}{match.group(2)}"
2578
2579
2580 def normalize_string_quotes(leaf: Leaf) -> None:
2581     """Prefer double quotes but only if it doesn't cause more escaping.
2582
2583     Adds or removes backslashes as appropriate. Doesn't parse and fix
2584     strings nested in f-strings (yet).
2585
2586     Note: Mutates its argument.
2587     """
2588     value = leaf.value.lstrip("furbFURB")
2589     if value[:3] == '"""':
2590         return
2591
2592     elif value[:3] == "'''":
2593         orig_quote = "'''"
2594         new_quote = '"""'
2595     elif value[0] == '"':
2596         orig_quote = '"'
2597         new_quote = "'"
2598     else:
2599         orig_quote = "'"
2600         new_quote = '"'
2601     first_quote_pos = leaf.value.find(orig_quote)
2602     if first_quote_pos == -1:
2603         return  # There's an internal error
2604
2605     prefix = leaf.value[:first_quote_pos]
2606     unescaped_new_quote = re.compile(rf"(([^\\]|^)(\\\\)*){new_quote}")
2607     escaped_new_quote = re.compile(rf"([^\\]|^)\\((?:\\\\)*){new_quote}")
2608     escaped_orig_quote = re.compile(rf"([^\\]|^)\\((?:\\\\)*){orig_quote}")
2609     body = leaf.value[first_quote_pos + len(orig_quote) : -len(orig_quote)]
2610     if "r" in prefix.casefold():
2611         if unescaped_new_quote.search(body):
2612             # There's at least one unescaped new_quote in this raw string
2613             # so converting is impossible
2614             return
2615
2616         # Do not introduce or remove backslashes in raw strings
2617         new_body = body
2618     else:
2619         # remove unnecessary escapes
2620         new_body = sub_twice(escaped_new_quote, rf"\1\2{new_quote}", body)
2621         if body != new_body:
2622             # Consider the string without unnecessary escapes as the original
2623             body = new_body
2624             leaf.value = f"{prefix}{orig_quote}{body}{orig_quote}"
2625         new_body = sub_twice(escaped_orig_quote, rf"\1\2{orig_quote}", new_body)
2626         new_body = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_body)
2627     if "f" in prefix.casefold():
2628         matches = re.findall(r"[^{]\{(.*?)\}[^}]", new_body)
2629         for m in matches:
2630             if "\\" in str(m):
2631                 # Do not introduce backslashes in interpolated expressions
2632                 return
2633     if new_quote == '"""' and new_body[-1:] == '"':
2634         # edge case:
2635         new_body = new_body[:-1] + '\\"'
2636     orig_escape_count = body.count("\\")
2637     new_escape_count = new_body.count("\\")
2638     if new_escape_count > orig_escape_count:
2639         return  # Do not introduce more escaping
2640
2641     if new_escape_count == orig_escape_count and orig_quote == '"':
2642         return  # Prefer double quotes
2643
2644     leaf.value = f"{prefix}{new_quote}{new_body}{new_quote}"
2645
2646
2647 def normalize_numeric_literal(leaf: Leaf) -> None:
2648     """Normalizes numeric (float, int, and complex) literals.
2649
2650     All letters used in the representation are normalized to lowercase (except
2651     in Python 2 long literals).
2652     """
2653     text = leaf.value.lower()
2654     if text.startswith(("0o", "0b")):
2655         # Leave octal and binary literals alone.
2656         pass
2657     elif text.startswith("0x"):
2658         # Change hex literals to upper case.
2659         before, after = text[:2], text[2:]
2660         text = f"{before}{after.upper()}"
2661     elif "e" in text:
2662         before, after = text.split("e")
2663         sign = ""
2664         if after.startswith("-"):
2665             after = after[1:]
2666             sign = "-"
2667         elif after.startswith("+"):
2668             after = after[1:]
2669         before = format_float_or_int_string(before)
2670         text = f"{before}e{sign}{after}"
2671     elif text.endswith(("j", "l")):
2672         number = text[:-1]
2673         suffix = text[-1]
2674         # Capitalize in "2L" because "l" looks too similar to "1".
2675         if suffix == "l":
2676             suffix = "L"
2677         text = f"{format_float_or_int_string(number)}{suffix}"
2678     else:
2679         text = format_float_or_int_string(text)
2680     leaf.value = text
2681
2682
2683 def format_float_or_int_string(text: str) -> str:
2684     """Formats a float string like "1.0"."""
2685     if "." not in text:
2686         return text
2687
2688     before, after = text.split(".")
2689     return f"{before or 0}.{after or 0}"
2690
2691
2692 def normalize_invisible_parens(node: Node, parens_after: Set[str]) -> None:
2693     """Make existing optional parentheses invisible or create new ones.
2694
2695     `parens_after` is a set of string leaf values immeditely after which parens
2696     should be put.
2697
2698     Standardizes on visible parentheses for single-element tuples, and keeps
2699     existing visible parentheses for other tuples and generator expressions.
2700     """
2701     for pc in list_comments(node.prefix, is_endmarker=False):
2702         if pc.value in FMT_OFF:
2703             # This `node` has a prefix with `# fmt: off`, don't mess with parens.
2704             return
2705
2706     check_lpar = False
2707     for index, child in enumerate(list(node.children)):
2708         if check_lpar:
2709             if child.type == syms.atom:
2710                 if maybe_make_parens_invisible_in_atom(child):
2711                     lpar = Leaf(token.LPAR, "")
2712                     rpar = Leaf(token.RPAR, "")
2713                     index = child.remove() or 0
2714                     node.insert_child(index, Node(syms.atom, [lpar, child, rpar]))
2715             elif is_one_tuple(child):
2716                 # wrap child in visible parentheses
2717                 lpar = Leaf(token.LPAR, "(")
2718                 rpar = Leaf(token.RPAR, ")")
2719                 child.remove()
2720                 node.insert_child(index, Node(syms.atom, [lpar, child, rpar]))
2721             elif node.type == syms.import_from:
2722                 # "import from" nodes store parentheses directly as part of
2723                 # the statement
2724                 if child.type == token.LPAR:
2725                     # make parentheses invisible
2726                     child.value = ""  # type: ignore
2727                     node.children[-1].value = ""  # type: ignore
2728                 elif child.type != token.STAR:
2729                     # insert invisible parentheses
2730                     node.insert_child(index, Leaf(token.LPAR, ""))
2731                     node.append_child(Leaf(token.RPAR, ""))
2732                 break
2733
2734             elif not (isinstance(child, Leaf) and is_multiline_string(child)):
2735                 # wrap child in invisible parentheses
2736                 lpar = Leaf(token.LPAR, "")
2737                 rpar = Leaf(token.RPAR, "")
2738                 index = child.remove() or 0
2739                 node.insert_child(index, Node(syms.atom, [lpar, child, rpar]))
2740
2741         check_lpar = isinstance(child, Leaf) and child.value in parens_after
2742
2743
2744 def normalize_fmt_off(node: Node) -> None:
2745     """Convert content between `# fmt: off`/`# fmt: on` into standalone comments."""
2746     try_again = True
2747     while try_again:
2748         try_again = convert_one_fmt_off_pair(node)
2749
2750
2751 def convert_one_fmt_off_pair(node: Node) -> bool:
2752     """Convert content of a single `# fmt: off`/`# fmt: on` into a standalone comment.
2753
2754     Returns True if a pair was converted.
2755     """
2756     for leaf in node.leaves():
2757         previous_consumed = 0
2758         for comment in list_comments(leaf.prefix, is_endmarker=False):
2759             if comment.value in FMT_OFF:
2760                 # We only want standalone comments. If there's no previous leaf or
2761                 # the previous leaf is indentation, it's a standalone comment in
2762                 # disguise.
2763                 if comment.type != STANDALONE_COMMENT:
2764                     prev = preceding_leaf(leaf)
2765                     if prev and prev.type not in WHITESPACE:
2766                         continue
2767
2768                 ignored_nodes = list(generate_ignored_nodes(leaf))
2769                 if not ignored_nodes:
2770                     continue
2771
2772                 first = ignored_nodes[0]  # Can be a container node with the `leaf`.
2773                 parent = first.parent
2774                 prefix = first.prefix
2775                 first.prefix = prefix[comment.consumed :]
2776                 hidden_value = (
2777                     comment.value + "\n" + "".join(str(n) for n in ignored_nodes)
2778                 )
2779                 if hidden_value.endswith("\n"):
2780                     # That happens when one of the `ignored_nodes` ended with a NEWLINE
2781                     # leaf (possibly followed by a DEDENT).
2782                     hidden_value = hidden_value[:-1]
2783                 first_idx = None
2784                 for ignored in ignored_nodes:
2785                     index = ignored.remove()
2786                     if first_idx is None:
2787                         first_idx = index
2788                 assert parent is not None, "INTERNAL ERROR: fmt: on/off handling (1)"
2789                 assert first_idx is not None, "INTERNAL ERROR: fmt: on/off handling (2)"
2790                 parent.insert_child(
2791                     first_idx,
2792                     Leaf(
2793                         STANDALONE_COMMENT,
2794                         hidden_value,
2795                         prefix=prefix[:previous_consumed] + "\n" * comment.newlines,
2796                     ),
2797                 )
2798                 return True
2799
2800             previous_consumed = comment.consumed
2801
2802     return False
2803
2804
2805 def generate_ignored_nodes(leaf: Leaf) -> Iterator[LN]:
2806     """Starting from the container of `leaf`, generate all leaves until `# fmt: on`.
2807
2808     Stops at the end of the block.
2809     """
2810     container: Optional[LN] = container_of(leaf)
2811     while container is not None and container.type != token.ENDMARKER:
2812         for comment in list_comments(container.prefix, is_endmarker=False):
2813             if comment.value in FMT_ON:
2814                 return
2815
2816         yield container
2817
2818         container = container.next_sibling
2819
2820
2821 def maybe_make_parens_invisible_in_atom(node: LN) -> bool:
2822     """If it's safe, make the parens in the atom `node` invisible, recursively.
2823
2824     Returns whether the node should itself be wrapped in invisible parentheses.
2825
2826     """
2827     if (
2828         node.type != syms.atom
2829         or is_empty_tuple(node)
2830         or is_one_tuple(node)
2831         or is_yield(node)
2832         or max_delimiter_priority_in_atom(node) >= COMMA_PRIORITY
2833     ):
2834         return False
2835
2836     first = node.children[0]
2837     last = node.children[-1]
2838     if first.type == token.LPAR and last.type == token.RPAR:
2839         # make parentheses invisible
2840         first.value = ""  # type: ignore
2841         last.value = ""  # type: ignore
2842         if len(node.children) > 1:
2843             maybe_make_parens_invisible_in_atom(node.children[1])
2844         return False
2845
2846     return True
2847
2848
2849 def is_empty_tuple(node: LN) -> bool:
2850     """Return True if `node` holds an empty tuple."""
2851     return (
2852         node.type == syms.atom
2853         and len(node.children) == 2
2854         and node.children[0].type == token.LPAR
2855         and node.children[1].type == token.RPAR
2856     )
2857
2858
2859 def is_one_tuple(node: LN) -> bool:
2860     """Return True if `node` holds a tuple with one element, with or without parens."""
2861     if node.type == syms.atom:
2862         if len(node.children) != 3:
2863             return False
2864
2865         lpar, gexp, rpar = node.children
2866         if not (
2867             lpar.type == token.LPAR
2868             and gexp.type == syms.testlist_gexp
2869             and rpar.type == token.RPAR
2870         ):
2871             return False
2872
2873         return len(gexp.children) == 2 and gexp.children[1].type == token.COMMA
2874
2875     return (
2876         node.type in IMPLICIT_TUPLE
2877         and len(node.children) == 2
2878         and node.children[1].type == token.COMMA
2879     )
2880
2881
2882 def is_yield(node: LN) -> bool:
2883     """Return True if `node` holds a `yield` or `yield from` expression."""
2884     if node.type == syms.yield_expr:
2885         return True
2886
2887     if node.type == token.NAME and node.value == "yield":  # type: ignore
2888         return True
2889
2890     if node.type != syms.atom:
2891         return False
2892
2893     if len(node.children) != 3:
2894         return False
2895
2896     lpar, expr, rpar = node.children
2897     if lpar.type == token.LPAR and rpar.type == token.RPAR:
2898         return is_yield(expr)
2899
2900     return False
2901
2902
2903 def is_vararg(leaf: Leaf, within: Set[NodeType]) -> bool:
2904     """Return True if `leaf` is a star or double star in a vararg or kwarg.
2905
2906     If `within` includes VARARGS_PARENTS, this applies to function signatures.
2907     If `within` includes UNPACKING_PARENTS, it applies to right hand-side
2908     extended iterable unpacking (PEP 3132) and additional unpacking
2909     generalizations (PEP 448).
2910     """
2911     if leaf.type not in STARS or not leaf.parent:
2912         return False
2913
2914     p = leaf.parent
2915     if p.type == syms.star_expr:
2916         # Star expressions are also used as assignment targets in extended
2917         # iterable unpacking (PEP 3132).  See what its parent is instead.
2918         if not p.parent:
2919             return False
2920
2921         p = p.parent
2922
2923     return p.type in within
2924
2925
2926 def is_multiline_string(leaf: Leaf) -> bool:
2927     """Return True if `leaf` is a multiline string that actually spans many lines."""
2928     value = leaf.value.lstrip("furbFURB")
2929     return value[:3] in {'"""', "'''"} and "\n" in value
2930
2931
2932 def is_stub_suite(node: Node) -> bool:
2933     """Return True if `node` is a suite with a stub body."""
2934     if (
2935         len(node.children) != 4
2936         or node.children[0].type != token.NEWLINE
2937         or node.children[1].type != token.INDENT
2938         or node.children[3].type != token.DEDENT
2939     ):
2940         return False
2941
2942     return is_stub_body(node.children[2])
2943
2944
2945 def is_stub_body(node: LN) -> bool:
2946     """Return True if `node` is a simple statement containing an ellipsis."""
2947     if not isinstance(node, Node) or node.type != syms.simple_stmt:
2948         return False
2949
2950     if len(node.children) != 2:
2951         return False
2952
2953     child = node.children[0]
2954     return (
2955         child.type == syms.atom
2956         and len(child.children) == 3
2957         and all(leaf == Leaf(token.DOT, ".") for leaf in child.children)
2958     )
2959
2960
2961 def max_delimiter_priority_in_atom(node: LN) -> int:
2962     """Return maximum delimiter priority inside `node`.
2963
2964     This is specific to atoms with contents contained in a pair of parentheses.
2965     If `node` isn't an atom or there are no enclosing parentheses, returns 0.
2966     """
2967     if node.type != syms.atom:
2968         return 0
2969
2970     first = node.children[0]
2971     last = node.children[-1]
2972     if not (first.type == token.LPAR and last.type == token.RPAR):
2973         return 0
2974
2975     bt = BracketTracker()
2976     for c in node.children[1:-1]:
2977         if isinstance(c, Leaf):
2978             bt.mark(c)
2979         else:
2980             for leaf in c.leaves():
2981                 bt.mark(leaf)
2982     try:
2983         return bt.max_delimiter_priority()
2984
2985     except ValueError:
2986         return 0
2987
2988
2989 def ensure_visible(leaf: Leaf) -> None:
2990     """Make sure parentheses are visible.
2991
2992     They could be invisible as part of some statements (see
2993     :func:`normalize_invible_parens` and :func:`visit_import_from`).
2994     """
2995     if leaf.type == token.LPAR:
2996         leaf.value = "("
2997     elif leaf.type == token.RPAR:
2998         leaf.value = ")"
2999
3000
3001 def should_explode(line: Line, opening_bracket: Leaf) -> bool:
3002     """Should `line` immediately be split with `delimiter_split()` after RHS?"""
3003
3004     if not (
3005         opening_bracket.parent
3006         and opening_bracket.parent.type in {syms.atom, syms.import_from}
3007         and opening_bracket.value in "[{("
3008     ):
3009         return False
3010
3011     try:
3012         last_leaf = line.leaves[-1]
3013         exclude = {id(last_leaf)} if last_leaf.type == token.COMMA else set()
3014         max_priority = line.bracket_tracker.max_delimiter_priority(exclude=exclude)
3015     except (IndexError, ValueError):
3016         return False
3017
3018     return max_priority == COMMA_PRIORITY
3019
3020
3021 def get_features_used(node: Node) -> Set[Feature]:
3022     """Return a set of (relatively) new Python features used in this file.
3023
3024     Currently looking for:
3025     - f-strings;
3026     - underscores in numeric literals; and
3027     - trailing commas after * or ** in function signatures and calls.
3028     """
3029     features: Set[Feature] = set()
3030     for n in node.pre_order():
3031         if n.type == token.STRING:
3032             value_head = n.value[:2]  # type: ignore
3033             if value_head in {'f"', 'F"', "f'", "F'", "rf", "fr", "RF", "FR"}:
3034                 features.add(Feature.F_STRINGS)
3035
3036         elif n.type == token.NUMBER:
3037             if "_" in n.value:  # type: ignore
3038                 features.add(Feature.NUMERIC_UNDERSCORES)
3039
3040         elif (
3041             n.type in {syms.typedargslist, syms.arglist}
3042             and n.children
3043             and n.children[-1].type == token.COMMA
3044         ):
3045             for ch in n.children:
3046                 if ch.type in STARS:
3047                     features.add(Feature.TRAILING_COMMA)
3048
3049                 if ch.type == syms.argument:
3050                     for argch in ch.children:
3051                         if argch.type in STARS:
3052                             features.add(Feature.TRAILING_COMMA)
3053
3054     return features
3055
3056
3057 def detect_target_versions(node: Node) -> Set[TargetVersion]:
3058     """Detect the version to target based on the nodes used."""
3059     features = get_features_used(node)
3060     return {
3061         version for version in TargetVersion if features <= VERSION_TO_FEATURES[version]
3062     }
3063
3064
3065 def generate_trailers_to_omit(line: Line, line_length: int) -> Iterator[Set[LeafID]]:
3066     """Generate sets of closing bracket IDs that should be omitted in a RHS.
3067
3068     Brackets can be omitted if the entire trailer up to and including
3069     a preceding closing bracket fits in one line.
3070
3071     Yielded sets are cumulative (contain results of previous yields, too).  First
3072     set is empty.
3073     """
3074
3075     omit: Set[LeafID] = set()
3076     yield omit
3077
3078     length = 4 * line.depth
3079     opening_bracket = None
3080     closing_bracket = None
3081     inner_brackets: Set[LeafID] = set()
3082     for index, leaf, leaf_length in enumerate_with_length(line, reversed=True):
3083         length += leaf_length
3084         if length > line_length:
3085             break
3086
3087         has_inline_comment = leaf_length > len(leaf.value) + len(leaf.prefix)
3088         if leaf.type == STANDALONE_COMMENT or has_inline_comment:
3089             break
3090
3091         if opening_bracket:
3092             if leaf is opening_bracket:
3093                 opening_bracket = None
3094             elif leaf.type in CLOSING_BRACKETS:
3095                 inner_brackets.add(id(leaf))
3096         elif leaf.type in CLOSING_BRACKETS:
3097             if index > 0 and line.leaves[index - 1].type in OPENING_BRACKETS:
3098                 # Empty brackets would fail a split so treat them as "inner"
3099                 # brackets (e.g. only add them to the `omit` set if another
3100                 # pair of brackets was good enough.
3101                 inner_brackets.add(id(leaf))
3102                 continue
3103
3104             if closing_bracket:
3105                 omit.add(id(closing_bracket))
3106                 omit.update(inner_brackets)
3107                 inner_brackets.clear()
3108                 yield omit
3109
3110             if leaf.value:
3111                 opening_bracket = leaf.opening_bracket
3112                 closing_bracket = leaf
3113
3114
3115 def get_future_imports(node: Node) -> Set[str]:
3116     """Return a set of __future__ imports in the file."""
3117     imports: Set[str] = set()
3118
3119     def get_imports_from_children(children: List[LN]) -> Generator[str, None, None]:
3120         for child in children:
3121             if isinstance(child, Leaf):
3122                 if child.type == token.NAME:
3123                     yield child.value
3124             elif child.type == syms.import_as_name:
3125                 orig_name = child.children[0]
3126                 assert isinstance(orig_name, Leaf), "Invalid syntax parsing imports"
3127                 assert orig_name.type == token.NAME, "Invalid syntax parsing imports"
3128                 yield orig_name.value
3129             elif child.type == syms.import_as_names:
3130                 yield from get_imports_from_children(child.children)
3131             else:
3132                 assert False, "Invalid syntax parsing imports"
3133
3134     for child in node.children:
3135         if child.type != syms.simple_stmt:
3136             break
3137         first_child = child.children[0]
3138         if isinstance(first_child, Leaf):
3139             # Continue looking if we see a docstring; otherwise stop.
3140             if (
3141                 len(child.children) == 2
3142                 and first_child.type == token.STRING
3143                 and child.children[1].type == token.NEWLINE
3144             ):
3145                 continue
3146             else:
3147                 break
3148         elif first_child.type == syms.import_from:
3149             module_name = first_child.children[1]
3150             if not isinstance(module_name, Leaf) or module_name.value != "__future__":
3151                 break
3152             imports |= set(get_imports_from_children(first_child.children[3:]))
3153         else:
3154             break
3155     return imports
3156
3157
3158 def gen_python_files_in_dir(
3159     path: Path,
3160     root: Path,
3161     include: Pattern[str],
3162     exclude: Pattern[str],
3163     report: "Report",
3164 ) -> Iterator[Path]:
3165     """Generate all files under `path` whose paths are not excluded by the
3166     `exclude` regex, but are included by the `include` regex.
3167
3168     Symbolic links pointing outside of the `root` directory are ignored.
3169
3170     `report` is where output about exclusions goes.
3171     """
3172     assert root.is_absolute(), f"INTERNAL ERROR: `root` must be absolute but is {root}"
3173     for child in path.iterdir():
3174         try:
3175             normalized_path = "/" + child.resolve().relative_to(root).as_posix()
3176         except ValueError:
3177             if child.is_symlink():
3178                 report.path_ignored(
3179                     child, f"is a symbolic link that points outside {root}"
3180                 )
3181                 continue
3182
3183             raise
3184
3185         if child.is_dir():
3186             normalized_path += "/"
3187         exclude_match = exclude.search(normalized_path)
3188         if exclude_match and exclude_match.group(0):
3189             report.path_ignored(child, f"matches the --exclude regular expression")
3190             continue
3191
3192         if child.is_dir():
3193             yield from gen_python_files_in_dir(child, root, include, exclude, report)
3194
3195         elif child.is_file():
3196             include_match = include.search(normalized_path)
3197             if include_match:
3198                 yield child
3199
3200
3201 @lru_cache()
3202 def find_project_root(srcs: Iterable[str]) -> Path:
3203     """Return a directory containing .git, .hg, or pyproject.toml.
3204
3205     That directory can be one of the directories passed in `srcs` or their
3206     common parent.
3207
3208     If no directory in the tree contains a marker that would specify it's the
3209     project root, the root of the file system is returned.
3210     """
3211     if not srcs:
3212         return Path("/").resolve()
3213
3214     common_base = min(Path(src).resolve() for src in srcs)
3215     if common_base.is_dir():
3216         # Append a fake file so `parents` below returns `common_base_dir`, too.
3217         common_base /= "fake-file"
3218     for directory in common_base.parents:
3219         if (directory / ".git").is_dir():
3220             return directory
3221
3222         if (directory / ".hg").is_dir():
3223             return directory
3224
3225         if (directory / "pyproject.toml").is_file():
3226             return directory
3227
3228     return directory
3229
3230
3231 @dataclass
3232 class Report:
3233     """Provides a reformatting counter. Can be rendered with `str(report)`."""
3234
3235     check: bool = False
3236     quiet: bool = False
3237     verbose: bool = False
3238     change_count: int = 0
3239     same_count: int = 0
3240     failure_count: int = 0
3241
3242     def done(self, src: Path, changed: Changed) -> None:
3243         """Increment the counter for successful reformatting. Write out a message."""
3244         if changed is Changed.YES:
3245             reformatted = "would reformat" if self.check else "reformatted"
3246             if self.verbose or not self.quiet:
3247                 out(f"{reformatted} {src}")
3248             self.change_count += 1
3249         else:
3250             if self.verbose:
3251                 if changed is Changed.NO:
3252                     msg = f"{src} already well formatted, good job."
3253                 else:
3254                     msg = f"{src} wasn't modified on disk since last run."
3255                 out(msg, bold=False)
3256             self.same_count += 1
3257
3258     def failed(self, src: Path, message: str) -> None:
3259         """Increment the counter for failed reformatting. Write out a message."""
3260         err(f"error: cannot format {src}: {message}")
3261         self.failure_count += 1
3262
3263     def path_ignored(self, path: Path, message: str) -> None:
3264         if self.verbose:
3265             out(f"{path} ignored: {message}", bold=False)
3266
3267     @property
3268     def return_code(self) -> int:
3269         """Return the exit code that the app should use.
3270
3271         This considers the current state of changed files and failures:
3272         - if there were any failures, return 123;
3273         - if any files were changed and --check is being used, return 1;
3274         - otherwise return 0.
3275         """
3276         # According to http://tldp.org/LDP/abs/html/exitcodes.html starting with
3277         # 126 we have special return codes reserved by the shell.
3278         if self.failure_count:
3279             return 123
3280
3281         elif self.change_count and self.check:
3282             return 1
3283
3284         return 0
3285
3286     def __str__(self) -> str:
3287         """Render a color report of the current state.
3288
3289         Use `click.unstyle` to remove colors.
3290         """
3291         if self.check:
3292             reformatted = "would be reformatted"
3293             unchanged = "would be left unchanged"
3294             failed = "would fail to reformat"
3295         else:
3296             reformatted = "reformatted"
3297             unchanged = "left unchanged"
3298             failed = "failed to reformat"
3299         report = []
3300         if self.change_count:
3301             s = "s" if self.change_count > 1 else ""
3302             report.append(
3303                 click.style(f"{self.change_count} file{s} {reformatted}", bold=True)
3304             )
3305         if self.same_count:
3306             s = "s" if self.same_count > 1 else ""
3307             report.append(f"{self.same_count} file{s} {unchanged}")
3308         if self.failure_count:
3309             s = "s" if self.failure_count > 1 else ""
3310             report.append(
3311                 click.style(f"{self.failure_count} file{s} {failed}", fg="red")
3312             )
3313         return ", ".join(report) + "."
3314
3315
3316 def assert_equivalent(src: str, dst: str) -> None:
3317     """Raise AssertionError if `src` and `dst` aren't equivalent."""
3318
3319     import ast
3320     import traceback
3321
3322     def _v(node: ast.AST, depth: int = 0) -> Iterator[str]:
3323         """Simple visitor generating strings to compare ASTs by content."""
3324         yield f"{'  ' * depth}{node.__class__.__name__}("
3325
3326         for field in sorted(node._fields):
3327             try:
3328                 value = getattr(node, field)
3329             except AttributeError:
3330                 continue
3331
3332             yield f"{'  ' * (depth+1)}{field}="
3333
3334             if isinstance(value, list):
3335                 for item in value:
3336                     # Ignore nested tuples within del statements, because we may insert
3337                     # parentheses and they change the AST.
3338                     if (
3339                         field == "targets"
3340                         and isinstance(node, ast.Delete)
3341                         and isinstance(item, ast.Tuple)
3342                     ):
3343                         for item in item.elts:
3344                             yield from _v(item, depth + 2)
3345                     elif isinstance(item, ast.AST):
3346                         yield from _v(item, depth + 2)
3347
3348             elif isinstance(value, ast.AST):
3349                 yield from _v(value, depth + 2)
3350
3351             else:
3352                 yield f"{'  ' * (depth+2)}{value!r},  # {value.__class__.__name__}"
3353
3354         yield f"{'  ' * depth})  # /{node.__class__.__name__}"
3355
3356     try:
3357         src_ast = ast.parse(src)
3358     except Exception as exc:
3359         major, minor = sys.version_info[:2]
3360         raise AssertionError(
3361             f"cannot use --safe with this file; failed to parse source file "
3362             f"with Python {major}.{minor}'s builtin AST. Re-run with --fast "
3363             f"or stop using deprecated Python 2 syntax. AST error message: {exc}"
3364         )
3365
3366     try:
3367         dst_ast = ast.parse(dst)
3368     except Exception as exc:
3369         log = dump_to_file("".join(traceback.format_tb(exc.__traceback__)), dst)
3370         raise AssertionError(
3371             f"INTERNAL ERROR: Black produced invalid code: {exc}. "
3372             f"Please report a bug on https://github.com/ambv/black/issues.  "
3373             f"This invalid output might be helpful: {log}"
3374         ) from None
3375
3376     src_ast_str = "\n".join(_v(src_ast))
3377     dst_ast_str = "\n".join(_v(dst_ast))
3378     if src_ast_str != dst_ast_str:
3379         log = dump_to_file(diff(src_ast_str, dst_ast_str, "src", "dst"))
3380         raise AssertionError(
3381             f"INTERNAL ERROR: Black produced code that is not equivalent to "
3382             f"the source.  "
3383             f"Please report a bug on https://github.com/ambv/black/issues.  "
3384             f"This diff might be helpful: {log}"
3385         ) from None
3386
3387
3388 def assert_stable(src: str, dst: str, mode: FileMode) -> None:
3389     """Raise AssertionError if `dst` reformats differently the second time."""
3390     newdst = format_str(dst, mode=mode)
3391     if dst != newdst:
3392         log = dump_to_file(
3393             diff(src, dst, "source", "first pass"),
3394             diff(dst, newdst, "first pass", "second pass"),
3395         )
3396         raise AssertionError(
3397             f"INTERNAL ERROR: Black produced different code on the second pass "
3398             f"of the formatter.  "
3399             f"Please report a bug on https://github.com/ambv/black/issues.  "
3400             f"This diff might be helpful: {log}"
3401         ) from None
3402
3403
3404 def dump_to_file(*output: str) -> str:
3405     """Dump `output` to a temporary file. Return path to the file."""
3406     import tempfile
3407
3408     with tempfile.NamedTemporaryFile(
3409         mode="w", prefix="blk_", suffix=".log", delete=False, encoding="utf8"
3410     ) as f:
3411         for lines in output:
3412             f.write(lines)
3413             if lines and lines[-1] != "\n":
3414                 f.write("\n")
3415     return f.name
3416
3417
3418 def diff(a: str, b: str, a_name: str, b_name: str) -> str:
3419     """Return a unified diff string between strings `a` and `b`."""
3420     import difflib
3421
3422     a_lines = [line + "\n" for line in a.split("\n")]
3423     b_lines = [line + "\n" for line in b.split("\n")]
3424     return "".join(
3425         difflib.unified_diff(a_lines, b_lines, fromfile=a_name, tofile=b_name, n=5)
3426     )
3427
3428
3429 def cancel(tasks: Iterable[asyncio.Task]) -> None:
3430     """asyncio signal handler that cancels all `tasks` and reports to stderr."""
3431     err("Aborted!")
3432     for task in tasks:
3433         task.cancel()
3434
3435
3436 def shutdown(loop: BaseEventLoop) -> None:
3437     """Cancel all pending tasks on `loop`, wait for them, and close the loop."""
3438     try:
3439         if sys.version_info[:2] >= (3, 7):
3440             all_tasks = asyncio.all_tasks
3441         else:
3442             all_tasks = asyncio.Task.all_tasks
3443         # This part is borrowed from asyncio/runners.py in Python 3.7b2.
3444         to_cancel = [task for task in all_tasks(loop) if not task.done()]
3445         if not to_cancel:
3446             return
3447
3448         for task in to_cancel:
3449             task.cancel()
3450         loop.run_until_complete(
3451             asyncio.gather(*to_cancel, loop=loop, return_exceptions=True)
3452         )
3453     finally:
3454         # `concurrent.futures.Future` objects cannot be cancelled once they
3455         # are already running. There might be some when the `shutdown()` happened.
3456         # Silence their logger's spew about the event loop being closed.
3457         cf_logger = logging.getLogger("concurrent.futures")
3458         cf_logger.setLevel(logging.CRITICAL)
3459         loop.close()
3460
3461
3462 def sub_twice(regex: Pattern[str], replacement: str, original: str) -> str:
3463     """Replace `regex` with `replacement` twice on `original`.
3464
3465     This is used by string normalization to perform replaces on
3466     overlapping matches.
3467     """
3468     return regex.sub(replacement, regex.sub(replacement, original))
3469
3470
3471 def re_compile_maybe_verbose(regex: str) -> Pattern[str]:
3472     """Compile a regular expression string in `regex`.
3473
3474     If it contains newlines, use verbose mode.
3475     """
3476     if "\n" in regex:
3477         regex = "(?x)" + regex
3478     return re.compile(regex)
3479
3480
3481 def enumerate_reversed(sequence: Sequence[T]) -> Iterator[Tuple[Index, T]]:
3482     """Like `reversed(enumerate(sequence))` if that were possible."""
3483     index = len(sequence) - 1
3484     for element in reversed(sequence):
3485         yield (index, element)
3486         index -= 1
3487
3488
3489 def enumerate_with_length(
3490     line: Line, reversed: bool = False
3491 ) -> Iterator[Tuple[Index, Leaf, int]]:
3492     """Return an enumeration of leaves with their length.
3493
3494     Stops prematurely on multiline strings and standalone comments.
3495     """
3496     op = cast(
3497         Callable[[Sequence[Leaf]], Iterator[Tuple[Index, Leaf]]],
3498         enumerate_reversed if reversed else enumerate,
3499     )
3500     for index, leaf in op(line.leaves):
3501         length = len(leaf.prefix) + len(leaf.value)
3502         if "\n" in leaf.value:
3503             return  # Multiline strings, we can't continue.
3504
3505         comment: Optional[Leaf]
3506         for comment in line.comments_after(leaf):
3507             length += len(comment.value)
3508
3509         yield index, leaf, length
3510
3511
3512 def is_line_short_enough(line: Line, *, line_length: int, line_str: str = "") -> bool:
3513     """Return True if `line` is no longer than `line_length`.
3514
3515     Uses the provided `line_str` rendering, if any, otherwise computes a new one.
3516     """
3517     if not line_str:
3518         line_str = str(line).strip("\n")
3519     return (
3520         len(line_str) <= line_length
3521         and "\n" not in line_str  # multiline strings
3522         and not line.contains_standalone_comments()
3523     )
3524
3525
3526 def can_be_split(line: Line) -> bool:
3527     """Return False if the line cannot be split *for sure*.
3528
3529     This is not an exhaustive search but a cheap heuristic that we can use to
3530     avoid some unfortunate formattings (mostly around wrapping unsplittable code
3531     in unnecessary parentheses).
3532     """
3533     leaves = line.leaves
3534     if len(leaves) < 2:
3535         return False
3536
3537     if leaves[0].type == token.STRING and leaves[1].type == token.DOT:
3538         call_count = 0
3539         dot_count = 0
3540         next = leaves[-1]
3541         for leaf in leaves[-2::-1]:
3542             if leaf.type in OPENING_BRACKETS:
3543                 if next.type not in CLOSING_BRACKETS:
3544                     return False
3545
3546                 call_count += 1
3547             elif leaf.type == token.DOT:
3548                 dot_count += 1
3549             elif leaf.type == token.NAME:
3550                 if not (next.type == token.DOT or next.type in OPENING_BRACKETS):
3551                     return False
3552
3553             elif leaf.type not in CLOSING_BRACKETS:
3554                 return False
3555
3556             if dot_count > 1 and call_count > 1:
3557                 return False
3558
3559     return True
3560
3561
3562 def can_omit_invisible_parens(line: Line, line_length: int) -> bool:
3563     """Does `line` have a shape safe to reformat without optional parens around it?
3564
3565     Returns True for only a subset of potentially nice looking formattings but
3566     the point is to not return false positives that end up producing lines that
3567     are too long.
3568     """
3569     bt = line.bracket_tracker
3570     if not bt.delimiters:
3571         # Without delimiters the optional parentheses are useless.
3572         return True
3573
3574     max_priority = bt.max_delimiter_priority()
3575     if bt.delimiter_count_with_priority(max_priority) > 1:
3576         # With more than one delimiter of a kind the optional parentheses read better.
3577         return False
3578
3579     if max_priority == DOT_PRIORITY:
3580         # A single stranded method call doesn't require optional parentheses.
3581         return True
3582
3583     assert len(line.leaves) >= 2, "Stranded delimiter"
3584
3585     first = line.leaves[0]
3586     second = line.leaves[1]
3587     penultimate = line.leaves[-2]
3588     last = line.leaves[-1]
3589
3590     # With a single delimiter, omit if the expression starts or ends with
3591     # a bracket.
3592     if first.type in OPENING_BRACKETS and second.type not in CLOSING_BRACKETS:
3593         remainder = False
3594         length = 4 * line.depth
3595         for _index, leaf, leaf_length in enumerate_with_length(line):
3596             if leaf.type in CLOSING_BRACKETS and leaf.opening_bracket is first:
3597                 remainder = True
3598             if remainder:
3599                 length += leaf_length
3600                 if length > line_length:
3601                     break
3602
3603                 if leaf.type in OPENING_BRACKETS:
3604                     # There are brackets we can further split on.
3605                     remainder = False
3606
3607         else:
3608             # checked the entire string and line length wasn't exceeded
3609             if len(line.leaves) == _index + 1:
3610                 return True
3611
3612         # Note: we are not returning False here because a line might have *both*
3613         # a leading opening bracket and a trailing closing bracket.  If the
3614         # opening bracket doesn't match our rule, maybe the closing will.
3615
3616     if (
3617         last.type == token.RPAR
3618         or last.type == token.RBRACE
3619         or (
3620             # don't use indexing for omitting optional parentheses;
3621             # it looks weird
3622             last.type == token.RSQB
3623             and last.parent
3624             and last.parent.type != syms.trailer
3625         )
3626     ):
3627         if penultimate.type in OPENING_BRACKETS:
3628             # Empty brackets don't help.
3629             return False
3630
3631         if is_multiline_string(first):
3632             # Additional wrapping of a multiline string in this situation is
3633             # unnecessary.
3634             return True
3635
3636         length = 4 * line.depth
3637         seen_other_brackets = False
3638         for _index, leaf, leaf_length in enumerate_with_length(line):
3639             length += leaf_length
3640             if leaf is last.opening_bracket:
3641                 if seen_other_brackets or length <= line_length:
3642                     return True
3643
3644             elif leaf.type in OPENING_BRACKETS:
3645                 # There are brackets we can further split on.
3646                 seen_other_brackets = True
3647
3648     return False
3649
3650
3651 def get_cache_file(mode: FileMode) -> Path:
3652     return CACHE_DIR / f"cache.{mode.get_cache_key()}.pickle"
3653
3654
3655 def read_cache(mode: FileMode) -> Cache:
3656     """Read the cache if it exists and is well formed.
3657
3658     If it is not well formed, the call to write_cache later should resolve the issue.
3659     """
3660     cache_file = get_cache_file(mode)
3661     if not cache_file.exists():
3662         return {}
3663
3664     with cache_file.open("rb") as fobj:
3665         try:
3666             cache: Cache = pickle.load(fobj)
3667         except pickle.UnpicklingError:
3668             return {}
3669
3670     return cache
3671
3672
3673 def get_cache_info(path: Path) -> CacheInfo:
3674     """Return the information used to check if a file is already formatted or not."""
3675     stat = path.stat()
3676     return stat.st_mtime, stat.st_size
3677
3678
3679 def filter_cached(cache: Cache, sources: Iterable[Path]) -> Tuple[Set[Path], Set[Path]]:
3680     """Split an iterable of paths in `sources` into two sets.
3681
3682     The first contains paths of files that modified on disk or are not in the
3683     cache. The other contains paths to non-modified files.
3684     """
3685     todo, done = set(), set()
3686     for src in sources:
3687         src = src.resolve()
3688         if cache.get(src) != get_cache_info(src):
3689             todo.add(src)
3690         else:
3691             done.add(src)
3692     return todo, done
3693
3694
3695 def write_cache(cache: Cache, sources: Iterable[Path], mode: FileMode) -> None:
3696     """Update the cache file."""
3697     cache_file = get_cache_file(mode)
3698     try:
3699         CACHE_DIR.mkdir(parents=True, exist_ok=True)
3700         new_cache = {**cache, **{src.resolve(): get_cache_info(src) for src in sources}}
3701         with tempfile.NamedTemporaryFile(dir=str(cache_file.parent), delete=False) as f:
3702             pickle.dump(new_cache, f, protocol=pickle.HIGHEST_PROTOCOL)
3703         os.replace(f.name, cache_file)
3704     except OSError:
3705         pass
3706
3707
3708 def patch_click() -> None:
3709     """Make Click not crash.
3710
3711     On certain misconfigured environments, Python 3 selects the ASCII encoding as the
3712     default which restricts paths that it can access during the lifetime of the
3713     application.  Click refuses to work in this scenario by raising a RuntimeError.
3714
3715     In case of Black the likelihood that non-ASCII characters are going to be used in
3716     file paths is minimal since it's Python source code.  Moreover, this crash was
3717     spurious on Python 3.7 thanks to PEP 538 and PEP 540.
3718     """
3719     try:
3720         from click import core
3721         from click import _unicodefun  # type: ignore
3722     except ModuleNotFoundError:
3723         return
3724
3725     for module in (core, _unicodefun):
3726         if hasattr(module, "_verify_python3_env"):
3727             module._verify_python3_env = lambda: None
3728
3729
3730 def patched_main() -> None:
3731     freeze_support()
3732     patch_click()
3733     main()
3734
3735
3736 if __name__ == "__main__":
3737     patched_main()