black.py

   1 import asyncio
   2 from asyncio.base_events import BaseEventLoop
   3 from concurrent.futures import Executor, ProcessPoolExecutor
   4 from datetime import datetime
   5 from enum import Enum, Flag
   6 from functools import lru_cache, partial, wraps
   7 import io
   8 import keyword
   9 import logging
  10 from multiprocessing import Manager
  11 import os
  12 from pathlib import Path
  13 import pickle
  14 import re
  15 import signal
  16 import sys
  17 import tokenize
  18 from typing import (
  19     Any,
  20     Callable,
  21     Collection,
  22     Dict,
  23     Generator,
  24     Generic,
  25     Iterable,
  26     Iterator,
  27     List,
  28     Optional,
  29     Pattern,
  30     Sequence,
  31     Set,
  32     Tuple,
  33     TypeVar,
  34     Union,
  35     cast,
  36 )
  37
  38 from appdirs import user_cache_dir
  39 from attr import dataclass, Factory
  40 import click
  41 import toml
  42
  43 # lib2to3 fork
  44 from blib2to3.pytree import Node, Leaf, type_repr
  45 from blib2to3 import pygram, pytree
  46 from blib2to3.pgen2 import driver, token
  47 from blib2to3.pgen2.parse import ParseError
  48
  49
  50 __version__ = "18.6b4"
  51 DEFAULT_LINE_LENGTH = 88
  52 DEFAULT_EXCLUDES = (
  53     r"/(\.git|\.hg|\.mypy_cache|\.tox|\.venv|_build|buck-out|build|dist)/"
  54 )
  55 DEFAULT_INCLUDES = r"\.pyi?$"
  56 CACHE_DIR = Path(user_cache_dir("black", version=__version__))
  57
  58
  59 # types
  60 FileContent = str
  61 Encoding = str
  62 NewLine = str
  63 Depth = int
  64 NodeType = int
  65 LeafID = int
  66 Priority = int
  67 Index = int
  68 LN = Union[Leaf, Node]
  69 SplitFunc = Callable[["Line", bool], Iterator["Line"]]
  70 Timestamp = float
  71 FileSize = int
  72 CacheInfo = Tuple[Timestamp, FileSize]
  73 Cache = Dict[Path, CacheInfo]
  74 out = partial(click.secho, bold=True, err=True)
  75 err = partial(click.secho, fg="red", err=True)
  76
  77 pygram.initialize(CACHE_DIR)
  78 syms = pygram.python_symbols
  79
  80
  81 class NothingChanged(UserWarning):
  82     """Raised by :func:`format_file` when reformatted code is the same as source."""
  83
  84
  85 class CannotSplit(Exception):
  86     """A readable split that fits the allotted line length is impossible.
  87
  88     Raised by :func:`left_hand_split`, :func:`right_hand_split`, and
  89     :func:`delimiter_split`.
  90     """
  91
  92
  93 class WriteBack(Enum):
  94     NO = 0
  95     YES = 1
  96     DIFF = 2
  97
  98     @classmethod
  99     def from_configuration(cls, *, check: bool, diff: bool) -> "WriteBack":
 100         if check and not diff:
 101             return cls.NO
 102
 103         return cls.DIFF if diff else cls.YES
 104
 105
 106 class Changed(Enum):
 107     NO = 0
 108     CACHED = 1
 109     YES = 2
 110
 111
 112 class FileMode(Flag):
 113     AUTO_DETECT = 0
 114     PYTHON36 = 1
 115     PYI = 2
 116     NO_STRING_NORMALIZATION = 4
 117
 118     @classmethod
 119     def from_configuration(
 120         cls, *, py36: bool, pyi: bool, skip_string_normalization: bool
 121     ) -> "FileMode":
 122         mode = cls.AUTO_DETECT
 123         if py36:
 124             mode |= cls.PYTHON36
 125         if pyi:
 126             mode |= cls.PYI
 127         if skip_string_normalization:
 128             mode |= cls.NO_STRING_NORMALIZATION
 129         return mode
 130
 131
 132 def read_pyproject_toml(
 133     ctx: click.Context, param: click.Parameter, value: Union[str, int, bool, None]
 134 ) -> Optional[str]:
 135     """Inject Black configuration from "pyproject.toml" into defaults in `ctx`.
 136
 137     Returns the path to a successfully found and read configuration file, None
 138     otherwise.
 139     """
 140     assert not isinstance(value, (int, bool)), "Invalid parameter type passed"
 141     if not value:
 142         root = find_project_root(ctx.params.get("src", ()))
 143         path = root / "pyproject.toml"
 144         if path.is_file():
 145             value = str(path)
 146         else:
 147             return None
 148
 149     try:
 150         pyproject_toml = toml.load(value)
 151         config = pyproject_toml.get("tool", {}).get("black", {})
 152     except (toml.TomlDecodeError, OSError) as e:
 153         raise click.BadOptionUsage(f"Error reading configuration file: {e}", ctx)
 154
 155     if not config:
 156         return None
 157
 158     if ctx.default_map is None:
 159         ctx.default_map = {}
 160     ctx.default_map.update(  # type: ignore  # bad types in .pyi
 161         {k.replace("--", "").replace("-", "_"): v for k, v in config.items()}
 162     )
 163     return value
 164
 165
 166 @click.command(context_settings=dict(help_option_names=["-h", "--help"]))
 167 @click.option(
 168     "-l",
 169     "--line-length",
 170     type=int,
 171     default=DEFAULT_LINE_LENGTH,
 172     help="How many character per line to allow.",
 173     show_default=True,
 174 )
 175 @click.option(
 176     "--py36",
 177     is_flag=True,
 178     help=(
 179         "Allow using Python 3.6-only syntax on all input files.  This will put "
 180         "trailing commas in function signatures and calls also after *args and "
 181         "**kwargs.  [default: per-file auto-detection]"
 182     ),
 183 )
 184 @click.option(
 185     "--pyi",
 186     is_flag=True,
 187     help=(
 188         "Format all input files like typing stubs regardless of file extension "
 189         "(useful when piping source on standard input)."
 190     ),
 191 )
 192 @click.option(
 193     "-S",
 194     "--skip-string-normalization",
 195     is_flag=True,
 196     help="Don't normalize string quotes or prefixes.",
 197 )
 198 @click.option(
 199     "--check",
 200     is_flag=True,
 201     help=(
 202         "Don't write the files back, just return the status.  Return code 0 "
 203         "means nothing would change.  Return code 1 means some files would be "
 204         "reformatted.  Return code 123 means there was an internal error."
 205     ),
 206 )
 207 @click.option(
 208     "--diff",
 209     is_flag=True,
 210     help="Don't write the files back, just output a diff for each file on stdout.",
 211 )
 212 @click.option(
 213     "--fast/--safe",
 214     is_flag=True,
 215     help="If --fast given, skip temporary sanity checks. [default: --safe]",
 216 )
 217 @click.option(
 218     "--include",
 219     type=str,
 220     default=DEFAULT_INCLUDES,
 221     help=(
 222         "A regular expression that matches files and directories that should be "
 223         "included on recursive searches.  An empty value means all files are "
 224         "included regardless of the name.  Use forward slashes for directories on "
 225         "all platforms (Windows, too).  Exclusions are calculated first, inclusions "
 226         "later."
 227     ),
 228     show_default=True,
 229 )
 230 @click.option(
 231     "--exclude",
 232     type=str,
 233     default=DEFAULT_EXCLUDES,
 234     help=(
 235         "A regular expression that matches files and directories that should be "
 236         "excluded on recursive searches.  An empty value means no paths are excluded. "
 237         "Use forward slashes for directories on all platforms (Windows, too).  "
 238         "Exclusions are calculated first, inclusions later."
 239     ),
 240     show_default=True,
 241 )
 242 @click.option(
 243     "-q",
 244     "--quiet",
 245     is_flag=True,
 246     help=(
 247         "Don't emit non-error messages to stderr. Errors are still emitted, "
 248         "silence those with 2>/dev/null."
 249     ),
 250 )
 251 @click.option(
 252     "-v",
 253     "--verbose",
 254     is_flag=True,
 255     help=(
 256         "Also emit messages to stderr about files that were not changed or were "
 257         "ignored due to --exclude=."
 258     ),
 259 )
 260 @click.version_option(version=__version__)
 261 @click.argument(
 262     "src",
 263     nargs=-1,
 264     type=click.Path(
 265         exists=True, file_okay=True, dir_okay=True, readable=True, allow_dash=True
 266     ),
 267     is_eager=True,
 268 )
 269 @click.option(
 270     "--config",
 271     type=click.Path(
 272         exists=False, file_okay=True, dir_okay=False, readable=True, allow_dash=False
 273     ),
 274     is_eager=True,
 275     callback=read_pyproject_toml,
 276     help="Read configuration from PATH.",
 277 )
 278 @click.pass_context
 279 def main(
 280     ctx: click.Context,
 281     line_length: int,
 282     check: bool,
 283     diff: bool,
 284     fast: bool,
 285     pyi: bool,
 286     py36: bool,
 287     skip_string_normalization: bool,
 288     quiet: bool,
 289     verbose: bool,
 290     include: str,
 291     exclude: str,
 292     src: Tuple[str],
 293     config: Optional[str],
 294 ) -> None:
 295     """The uncompromising code formatter."""
 296     write_back = WriteBack.from_configuration(check=check, diff=diff)
 297     mode = FileMode.from_configuration(
 298         py36=py36, pyi=pyi, skip_string_normalization=skip_string_normalization
 299     )
 300     if config and verbose:
 301         out(f"Using configuration from {config}.", bold=False, fg="blue")
 302     try:
 303         include_regex = re_compile_maybe_verbose(include)
 304     except re.error:
 305         err(f"Invalid regular expression for include given: {include!r}")
 306         ctx.exit(2)
 307     try:
 308         exclude_regex = re_compile_maybe_verbose(exclude)
 309     except re.error:
 310         err(f"Invalid regular expression for exclude given: {exclude!r}")
 311         ctx.exit(2)
 312     report = Report(check=check, quiet=quiet, verbose=verbose)
 313     root = find_project_root(src)
 314     sources: Set[Path] = set()
 315     for s in src:
 316         p = Path(s)
 317         if p.is_dir():
 318             sources.update(
 319                 gen_python_files_in_dir(p, root, include_regex, exclude_regex, report)
 320             )
 321         elif p.is_file() or s == "-":
 322             # if a file was explicitly given, we don't care about its extension
 323             sources.add(p)
 324         else:
 325             err(f"invalid path: {s}")
 326     if len(sources) == 0:
 327         if verbose or not quiet:
 328             out("No paths given. Nothing to do 😴")
 329         ctx.exit(0)
 330
 331     if len(sources) == 1:
 332         reformat_one(
 333             src=sources.pop(),
 334             line_length=line_length,
 335             fast=fast,
 336             write_back=write_back,
 337             mode=mode,
 338             report=report,
 339         )
 340     else:
 341         loop = asyncio.get_event_loop()
 342         executor = ProcessPoolExecutor(max_workers=os.cpu_count())
 343         try:
 344             loop.run_until_complete(
 345                 schedule_formatting(
 346                     sources=sources,
 347                     line_length=line_length,
 348                     fast=fast,
 349                     write_back=write_back,
 350                     mode=mode,
 351                     report=report,
 352                     loop=loop,
 353                     executor=executor,
 354                 )
 355             )
 356         finally:
 357             shutdown(loop)
 358     if verbose or not quiet:
 359         bang = "💥 💔 💥" if report.return_code else "✨ 🍰 ✨"
 360         out(f"All done! {bang}")
 361         click.secho(str(report), err=True)
 362     ctx.exit(report.return_code)
 363
 364
 365 def reformat_one(
 366     src: Path,
 367     line_length: int,
 368     fast: bool,
 369     write_back: WriteBack,
 370     mode: FileMode,
 371     report: "Report",
 372 ) -> None:
 373     """Reformat a single file under `src` without spawning child processes.
 374
 375     If `quiet` is True, non-error messages are not output. `line_length`,
 376     `write_back`, `fast` and `pyi` options are passed to
 377     :func:`format_file_in_place` or :func:`format_stdin_to_stdout`.
 378     """
 379     try:
 380         changed = Changed.NO
 381         if not src.is_file() and str(src) == "-":
 382             if format_stdin_to_stdout(
 383                 line_length=line_length, fast=fast, write_back=write_back, mode=mode
 384             ):
 385                 changed = Changed.YES
 386         else:
 387             cache: Cache = {}
 388             if write_back != WriteBack.DIFF:
 389                 cache = read_cache(line_length, mode)
 390                 res_src = src.resolve()
 391                 if res_src in cache and cache[res_src] == get_cache_info(res_src):
 392                     changed = Changed.CACHED
 393             if changed is not Changed.CACHED and format_file_in_place(
 394                 src,
 395                 line_length=line_length,
 396                 fast=fast,
 397                 write_back=write_back,
 398                 mode=mode,
 399             ):
 400                 changed = Changed.YES
 401             if write_back == WriteBack.YES and changed is not Changed.NO:
 402                 write_cache(cache, [src], line_length, mode)
 403         report.done(src, changed)
 404     except Exception as exc:
 405         report.failed(src, str(exc))
 406
 407
 408 async def schedule_formatting(
 409     sources: Set[Path],
 410     line_length: int,
 411     fast: bool,
 412     write_back: WriteBack,
 413     mode: FileMode,
 414     report: "Report",
 415     loop: BaseEventLoop,
 416     executor: Executor,
 417 ) -> None:
 418     """Run formatting of `sources` in parallel using the provided `executor`.
 419
 420     (Use ProcessPoolExecutors for actual parallelism.)
 421
 422     `line_length`, `write_back`, `fast`, and `pyi` options are passed to
 423     :func:`format_file_in_place`.
 424     """
 425     cache: Cache = {}
 426     if write_back != WriteBack.DIFF:
 427         cache = read_cache(line_length, mode)
 428         sources, cached = filter_cached(cache, sources)
 429         for src in sorted(cached):
 430             report.done(src, Changed.CACHED)
 431     cancelled = []
 432     formatted = []
 433     if sources:
 434         lock = None
 435         if write_back == WriteBack.DIFF:
 436             # For diff output, we need locks to ensure we don't interleave output
 437             # from different processes.
 438             manager = Manager()
 439             lock = manager.Lock()
 440         tasks = {
 441             loop.run_in_executor(
 442                 executor,
 443                 format_file_in_place,
 444                 src,
 445                 line_length,
 446                 fast,
 447                 write_back,
 448                 mode,
 449                 lock,
 450             ): src
 451             for src in sorted(sources)
 452         }
 453         pending: Iterable[asyncio.Task] = tasks.keys()
 454         try:
 455             loop.add_signal_handler(signal.SIGINT, cancel, pending)
 456             loop.add_signal_handler(signal.SIGTERM, cancel, pending)
 457         except NotImplementedError:
 458             # There are no good alternatives for these on Windows
 459             pass
 460         while pending:
 461             done, _ = await asyncio.wait(pending, return_when=asyncio.FIRST_COMPLETED)
 462             for task in done:
 463                 src = tasks.pop(task)
 464                 if task.cancelled():
 465                     cancelled.append(task)
 466                 elif task.exception():
 467                     report.failed(src, str(task.exception()))
 468                 else:
 469                     formatted.append(src)
 470                     report.done(src, Changed.YES if task.result() else Changed.NO)
 471     if cancelled:
 472         await asyncio.gather(*cancelled, loop=loop, return_exceptions=True)
 473     if write_back == WriteBack.YES and formatted:
 474         write_cache(cache, formatted, line_length, mode)
 475
 476
 477 def format_file_in_place(
 478     src: Path,
 479     line_length: int,
 480     fast: bool,
 481     write_back: WriteBack = WriteBack.NO,
 482     mode: FileMode = FileMode.AUTO_DETECT,
 483     lock: Any = None,  # multiprocessing.Manager().Lock() is some crazy proxy
 484 ) -> bool:
 485     """Format file under `src` path. Return True if changed.
 486
 487     If `write_back` is True, write reformatted code back to stdout.
 488     `line_length` and `fast` options are passed to :func:`format_file_contents`.
 489     """
 490     if src.suffix == ".pyi":
 491         mode |= FileMode.PYI
 492
 493     then = datetime.utcfromtimestamp(src.stat().st_mtime)
 494     with open(src, "rb") as buf:
 495         src_contents, encoding, newline = decode_bytes(buf.read())
 496     try:
 497         dst_contents = format_file_contents(
 498             src_contents, line_length=line_length, fast=fast, mode=mode
 499         )
 500     except NothingChanged:
 501         return False
 502
 503     if write_back == write_back.YES:
 504         with open(src, "w", encoding=encoding, newline=newline) as f:
 505             f.write(dst_contents)
 506     elif write_back == write_back.DIFF:
 507         now = datetime.utcnow()
 508         src_name = f"{src}\t{then} +0000"
 509         dst_name = f"{src}\t{now} +0000"
 510         diff_contents = diff(src_contents, dst_contents, src_name, dst_name)
 511         if lock:
 512             lock.acquire()
 513         try:
 514             f = io.TextIOWrapper(
 515                 sys.stdout.buffer,
 516                 encoding=encoding,
 517                 newline=newline,
 518                 write_through=True,
 519             )
 520             f.write(diff_contents)
 521             f.detach()
 522         finally:
 523             if lock:
 524                 lock.release()
 525     return True
 526
 527
 528 def format_stdin_to_stdout(
 529     line_length: int,
 530     fast: bool,
 531     write_back: WriteBack = WriteBack.NO,
 532     mode: FileMode = FileMode.AUTO_DETECT,
 533 ) -> bool:
 534     """Format file on stdin. Return True if changed.
 535
 536     If `write_back` is True, write reformatted code back to stdout.
 537     `line_length`, `fast`, `is_pyi`, and `force_py36` arguments are passed to
 538     :func:`format_file_contents`.
 539     """
 540     then = datetime.utcnow()
 541     src, encoding, newline = decode_bytes(sys.stdin.buffer.read())
 542     dst = src
 543     try:
 544         dst = format_file_contents(src, line_length=line_length, fast=fast, mode=mode)
 545         return True
 546
 547     except NothingChanged:
 548         return False
 549
 550     finally:
 551         f = io.TextIOWrapper(
 552             sys.stdout.buffer, encoding=encoding, newline=newline, write_through=True
 553         )
 554         if write_back == WriteBack.YES:
 555             f.write(dst)
 556         elif write_back == WriteBack.DIFF:
 557             now = datetime.utcnow()
 558             src_name = f"STDIN\t{then} +0000"
 559             dst_name = f"STDOUT\t{now} +0000"
 560             f.write(diff(src, dst, src_name, dst_name))
 561         f.detach()
 562
 563
 564 def format_file_contents(
 565     src_contents: str,
 566     *,
 567     line_length: int,
 568     fast: bool,
 569     mode: FileMode = FileMode.AUTO_DETECT,
 570 ) -> FileContent:
 571     """Reformat contents a file and return new contents.
 572
 573     If `fast` is False, additionally confirm that the reformatted code is
 574     valid by calling :func:`assert_equivalent` and :func:`assert_stable` on it.
 575     `line_length` is passed to :func:`format_str`.
 576     """
 577     if src_contents.strip() == "":
 578         raise NothingChanged
 579
 580     dst_contents = format_str(src_contents, line_length=line_length, mode=mode)
 581     if src_contents == dst_contents:
 582         raise NothingChanged
 583
 584     if not fast:
 585         assert_equivalent(src_contents, dst_contents)
 586         assert_stable(src_contents, dst_contents, line_length=line_length, mode=mode)
 587     return dst_contents
 588
 589
 590 def format_str(
 591     src_contents: str, line_length: int, *, mode: FileMode = FileMode.AUTO_DETECT
 592 ) -> FileContent:
 593     """Reformat a string and return new contents.
 594
 595     `line_length` determines how many characters per line are allowed.
 596     """
 597     src_node = lib2to3_parse(src_contents)
 598     dst_contents = ""
 599     future_imports = get_future_imports(src_node)
 600     is_pyi = bool(mode & FileMode.PYI)
 601     py36 = bool(mode & FileMode.PYTHON36) or is_python36(src_node)
 602     normalize_strings = not bool(mode & FileMode.NO_STRING_NORMALIZATION)
 603     normalize_fmt_off(src_node)
 604     lines = LineGenerator(
 605         remove_u_prefix=py36 or "unicode_literals" in future_imports,
 606         is_pyi=is_pyi,
 607         normalize_strings=normalize_strings,
 608         allow_underscores=py36,
 609     )
 610     elt = EmptyLineTracker(is_pyi=is_pyi)
 611     empty_line = Line()
 612     after = 0
 613     for current_line in lines.visit(src_node):
 614         for _ in range(after):
 615             dst_contents += str(empty_line)
 616         before, after = elt.maybe_empty_lines(current_line)
 617         for _ in range(before):
 618             dst_contents += str(empty_line)
 619         for line in split_line(current_line, line_length=line_length, py36=py36):
 620             dst_contents += str(line)
 621     return dst_contents
 622
 623
 624 def decode_bytes(src: bytes) -> Tuple[FileContent, Encoding, NewLine]:
 625     """Return a tuple of (decoded_contents, encoding, newline).
 626
 627     `newline` is either CRLF or LF but `decoded_contents` is decoded with
 628     universal newlines (i.e. only contains LF).
 629     """
 630     srcbuf = io.BytesIO(src)
 631     encoding, lines = tokenize.detect_encoding(srcbuf.readline)
 632     if not lines:
 633         return "", encoding, "\n"
 634
 635     newline = "\r\n" if b"\r\n" == lines[0][-2:] else "\n"
 636     srcbuf.seek(0)
 637     with io.TextIOWrapper(srcbuf, encoding) as tiow:
 638         return tiow.read(), encoding, newline
 639
 640
 641 GRAMMARS = [
 642     pygram.python_grammar_no_print_statement_no_exec_statement,
 643     pygram.python_grammar_no_print_statement,
 644     pygram.python_grammar,
 645 ]
 646
 647
 648 def lib2to3_parse(src_txt: str) -> Node:
 649     """Given a string with source, return the lib2to3 Node."""
 650     grammar = pygram.python_grammar_no_print_statement
 651     if src_txt[-1:] != "\n":
 652         src_txt += "\n"
 653     for grammar in GRAMMARS:
 654         drv = driver.Driver(grammar, pytree.convert)
 655         try:
 656             result = drv.parse_string(src_txt, True)
 657             break
 658
 659         except ParseError as pe:
 660             lineno, column = pe.context[1]
 661             lines = src_txt.splitlines()
 662             try:
 663                 faulty_line = lines[lineno - 1]
 664             except IndexError:
 665                 faulty_line = "<line number missing in source>"
 666             exc = ValueError(f"Cannot parse: {lineno}:{column}: {faulty_line}")
 667     else:
 668         raise exc from None
 669
 670     if isinstance(result, Leaf):
 671         result = Node(syms.file_input, [result])
 672     return result
 673
 674
 675 def lib2to3_unparse(node: Node) -> str:
 676     """Given a lib2to3 node, return its string representation."""
 677     code = str(node)
 678     return code
 679
 680
 681 T = TypeVar("T")
 682
 683
 684 class Visitor(Generic[T]):
 685     """Basic lib2to3 visitor that yields things of type `T` on `visit()`."""
 686
 687     def visit(self, node: LN) -> Iterator[T]:
 688         """Main method to visit `node` and its children.
 689
 690         It tries to find a `visit_*()` method for the given `node.type`, like
 691         `visit_simple_stmt` for Node objects or `visit_INDENT` for Leaf objects.
 692         If no dedicated `visit_*()` method is found, chooses `visit_default()`
 693         instead.
 694
 695         Then yields objects of type `T` from the selected visitor.
 696         """
 697         if node.type < 256:
 698             name = token.tok_name[node.type]
 699         else:
 700             name = type_repr(node.type)
 701         yield from getattr(self, f"visit_{name}", self.visit_default)(node)
 702
 703     def visit_default(self, node: LN) -> Iterator[T]:
 704         """Default `visit_*()` implementation. Recurses to children of `node`."""
 705         if isinstance(node, Node):
 706             for child in node.children:
 707                 yield from self.visit(child)
 708
 709
 710 @dataclass
 711 class DebugVisitor(Visitor[T]):
 712     tree_depth: int = 0
 713
 714     def visit_default(self, node: LN) -> Iterator[T]:
 715         indent = " " * (2 * self.tree_depth)
 716         if isinstance(node, Node):
 717             _type = type_repr(node.type)
 718             out(f"{indent}{_type}", fg="yellow")
 719             self.tree_depth += 1
 720             for child in node.children:
 721                 yield from self.visit(child)
 722
 723             self.tree_depth -= 1
 724             out(f"{indent}/{_type}", fg="yellow", bold=False)
 725         else:
 726             _type = token.tok_name.get(node.type, str(node.type))
 727             out(f"{indent}{_type}", fg="blue", nl=False)
 728             if node.prefix:
 729                 # We don't have to handle prefixes for `Node` objects since
 730                 # that delegates to the first child anyway.
 731                 out(f" {node.prefix!r}", fg="green", bold=False, nl=False)
 732             out(f" {node.value!r}", fg="blue", bold=False)
 733
 734     @classmethod
 735     def show(cls, code: Union[str, Leaf, Node]) -> None:
 736         """Pretty-print the lib2to3 AST of a given string of `code`.
 737
 738         Convenience method for debugging.
 739         """
 740         v: DebugVisitor[None] = DebugVisitor()
 741         if isinstance(code, str):
 742             code = lib2to3_parse(code)
 743         list(v.visit(code))
 744
 745
 746 KEYWORDS = set(keyword.kwlist)
 747 WHITESPACE = {token.DEDENT, token.INDENT, token.NEWLINE}
 748 FLOW_CONTROL = {"return", "raise", "break", "continue"}
 749 STATEMENT = {
 750     syms.if_stmt,
 751     syms.while_stmt,
 752     syms.for_stmt,
 753     syms.try_stmt,
 754     syms.except_clause,
 755     syms.with_stmt,
 756     syms.funcdef,
 757     syms.classdef,
 758 }
 759 STANDALONE_COMMENT = 153
 760 token.tok_name[STANDALONE_COMMENT] = "STANDALONE_COMMENT"
 761 LOGIC_OPERATORS = {"and", "or"}
 762 COMPARATORS = {
 763     token.LESS,
 764     token.GREATER,
 765     token.EQEQUAL,
 766     token.NOTEQUAL,
 767     token.LESSEQUAL,
 768     token.GREATEREQUAL,
 769 }
 770 MATH_OPERATORS = {
 771     token.VBAR,
 772     token.CIRCUMFLEX,
 773     token.AMPER,
 774     token.LEFTSHIFT,
 775     token.RIGHTSHIFT,
 776     token.PLUS,
 777     token.MINUS,
 778     token.STAR,
 779     token.SLASH,
 780     token.DOUBLESLASH,
 781     token.PERCENT,
 782     token.AT,
 783     token.TILDE,
 784     token.DOUBLESTAR,
 785 }
 786 STARS = {token.STAR, token.DOUBLESTAR}
 787 VARARGS_PARENTS = {
 788     syms.arglist,
 789     syms.argument,  # double star in arglist
 790     syms.trailer,  # single argument to call
 791     syms.typedargslist,
 792     syms.varargslist,  # lambdas
 793 }
 794 UNPACKING_PARENTS = {
 795     syms.atom,  # single element of a list or set literal
 796     syms.dictsetmaker,
 797     syms.listmaker,
 798     syms.testlist_gexp,
 799     syms.testlist_star_expr,
 800 }
 801 TEST_DESCENDANTS = {
 802     syms.test,
 803     syms.lambdef,
 804     syms.or_test,
 805     syms.and_test,
 806     syms.not_test,
 807     syms.comparison,
 808     syms.star_expr,
 809     syms.expr,
 810     syms.xor_expr,
 811     syms.and_expr,
 812     syms.shift_expr,
 813     syms.arith_expr,
 814     syms.trailer,
 815     syms.term,
 816     syms.power,
 817 }
 818 ASSIGNMENTS = {
 819     "=",
 820     "+=",
 821     "-=",
 822     "*=",
 823     "@=",
 824     "/=",
 825     "%=",
 826     "&=",
 827     "|=",
 828     "^=",
 829     "<<=",
 830     ">>=",
 831     "**=",
 832     "//=",
 833 }
 834 COMPREHENSION_PRIORITY = 20
 835 COMMA_PRIORITY = 18
 836 TERNARY_PRIORITY = 16
 837 LOGIC_PRIORITY = 14
 838 STRING_PRIORITY = 12
 839 COMPARATOR_PRIORITY = 10
 840 MATH_PRIORITIES = {
 841     token.VBAR: 9,
 842     token.CIRCUMFLEX: 8,
 843     token.AMPER: 7,
 844     token.LEFTSHIFT: 6,
 845     token.RIGHTSHIFT: 6,
 846     token.PLUS: 5,
 847     token.MINUS: 5,
 848     token.STAR: 4,
 849     token.SLASH: 4,
 850     token.DOUBLESLASH: 4,
 851     token.PERCENT: 4,
 852     token.AT: 4,
 853     token.TILDE: 3,
 854     token.DOUBLESTAR: 2,
 855 }
 856 DOT_PRIORITY = 1
 857
 858
 859 @dataclass
 860 class BracketTracker:
 861     """Keeps track of brackets on a line."""
 862
 863     depth: int = 0
 864     bracket_match: Dict[Tuple[Depth, NodeType], Leaf] = Factory(dict)
 865     delimiters: Dict[LeafID, Priority] = Factory(dict)
 866     previous: Optional[Leaf] = None
 867     _for_loop_variable: int = 0
 868     _lambda_arguments: int = 0
 869
 870     def mark(self, leaf: Leaf) -> None:
 871         """Mark `leaf` with bracket-related metadata. Keep track of delimiters.
 872
 873         All leaves receive an int `bracket_depth` field that stores how deep
 874         within brackets a given leaf is. 0 means there are no enclosing brackets
 875         that started on this line.
 876
 877         If a leaf is itself a closing bracket, it receives an `opening_bracket`
 878         field that it forms a pair with. This is a one-directional link to
 879         avoid reference cycles.
 880
 881         If a leaf is a delimiter (a token on which Black can split the line if
 882         needed) and it's on depth 0, its `id()` is stored in the tracker's
 883         `delimiters` field.
 884         """
 885         if leaf.type == token.COMMENT:
 886             return
 887
 888         self.maybe_decrement_after_for_loop_variable(leaf)
 889         self.maybe_decrement_after_lambda_arguments(leaf)
 890         if leaf.type in CLOSING_BRACKETS:
 891             self.depth -= 1
 892             opening_bracket = self.bracket_match.pop((self.depth, leaf.type))
 893             leaf.opening_bracket = opening_bracket
 894         leaf.bracket_depth = self.depth
 895         if self.depth == 0:
 896             delim = is_split_before_delimiter(leaf, self.previous)
 897             if delim and self.previous is not None:
 898                 self.delimiters[id(self.previous)] = delim
 899             else:
 900                 delim = is_split_after_delimiter(leaf, self.previous)
 901                 if delim:
 902                     self.delimiters[id(leaf)] = delim
 903         if leaf.type in OPENING_BRACKETS:
 904             self.bracket_match[self.depth, BRACKET[leaf.type]] = leaf
 905             self.depth += 1
 906         self.previous = leaf
 907         self.maybe_increment_lambda_arguments(leaf)
 908         self.maybe_increment_for_loop_variable(leaf)
 909
 910     def any_open_brackets(self) -> bool:
 911         """Return True if there is an yet unmatched open bracket on the line."""
 912         return bool(self.bracket_match)
 913
 914     def max_delimiter_priority(self, exclude: Iterable[LeafID] = ()) -> int:
 915         """Return the highest priority of a delimiter found on the line.
 916
 917         Values are consistent with what `is_split_*_delimiter()` return.
 918         Raises ValueError on no delimiters.
 919         """
 920         return max(v for k, v in self.delimiters.items() if k not in exclude)
 921
 922     def delimiter_count_with_priority(self, priority: int = 0) -> int:
 923         """Return the number of delimiters with the given `priority`.
 924
 925         If no `priority` is passed, defaults to max priority on the line.
 926         """
 927         if not self.delimiters:
 928             return 0
 929
 930         priority = priority or self.max_delimiter_priority()
 931         return sum(1 for p in self.delimiters.values() if p == priority)
 932
 933     def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool:
 934         """In a for loop, or comprehension, the variables are often unpacks.
 935
 936         To avoid splitting on the comma in this situation, increase the depth of
 937         tokens between `for` and `in`.
 938         """
 939         if leaf.type == token.NAME and leaf.value == "for":
 940             self.depth += 1
 941             self._for_loop_variable += 1
 942             return True
 943
 944         return False
 945
 946     def maybe_decrement_after_for_loop_variable(self, leaf: Leaf) -> bool:
 947         """See `maybe_increment_for_loop_variable` above for explanation."""
 948         if self._for_loop_variable and leaf.type == token.NAME and leaf.value == "in":
 949             self.depth -= 1
 950             self._for_loop_variable -= 1
 951             return True
 952
 953         return False
 954
 955     def maybe_increment_lambda_arguments(self, leaf: Leaf) -> bool:
 956         """In a lambda expression, there might be more than one argument.
 957
 958         To avoid splitting on the comma in this situation, increase the depth of
 959         tokens between `lambda` and `:`.
 960         """
 961         if leaf.type == token.NAME and leaf.value == "lambda":
 962             self.depth += 1
 963             self._lambda_arguments += 1
 964             return True
 965
 966         return False
 967
 968     def maybe_decrement_after_lambda_arguments(self, leaf: Leaf) -> bool:
 969         """See `maybe_increment_lambda_arguments` above for explanation."""
 970         if self._lambda_arguments and leaf.type == token.COLON:
 971             self.depth -= 1
 972             self._lambda_arguments -= 1
 973             return True
 974
 975         return False
 976
 977     def get_open_lsqb(self) -> Optional[Leaf]:
 978         """Return the most recent opening square bracket (if any)."""
 979         return self.bracket_match.get((self.depth - 1, token.RSQB))
 980
 981
 982 @dataclass
 983 class Line:
 984     """Holds leaves and comments. Can be printed with `str(line)`."""
 985
 986     depth: int = 0
 987     leaves: List[Leaf] = Factory(list)
 988     comments: List[Tuple[Index, Leaf]] = Factory(list)
 989     bracket_tracker: BracketTracker = Factory(BracketTracker)
 990     inside_brackets: bool = False
 991     should_explode: bool = False
 992
 993     def append(self, leaf: Leaf, preformatted: bool = False) -> None:
 994         """Add a new `leaf` to the end of the line.
 995
 996         Unless `preformatted` is True, the `leaf` will receive a new consistent
 997         whitespace prefix and metadata applied by :class:`BracketTracker`.
 998         Trailing commas are maybe removed, unpacked for loop variables are
 999         demoted from being delimiters.
1000
1001         Inline comments are put aside.
1002         """
1003         has_value = leaf.type in BRACKETS or bool(leaf.value.strip())
1004         if not has_value:
1005             return
1006
1007         if token.COLON == leaf.type and self.is_class_paren_empty:
1008             del self.leaves[-2:]
1009         if self.leaves and not preformatted:
1010             # Note: at this point leaf.prefix should be empty except for
1011             # imports, for which we only preserve newlines.
1012             leaf.prefix += whitespace(
1013                 leaf, complex_subscript=self.is_complex_subscript(leaf)
1014             )
1015         if self.inside_brackets or not preformatted:
1016             self.bracket_tracker.mark(leaf)
1017             self.maybe_remove_trailing_comma(leaf)
1018         if not self.append_comment(leaf):
1019             self.leaves.append(leaf)
1020
1021     def append_safe(self, leaf: Leaf, preformatted: bool = False) -> None:
1022         """Like :func:`append()` but disallow invalid standalone comment structure.
1023
1024         Raises ValueError when any `leaf` is appended after a standalone comment
1025         or when a standalone comment is not the first leaf on the line.
1026         """
1027         if self.bracket_tracker.depth == 0:
1028             if self.is_comment:
1029                 raise ValueError("cannot append to standalone comments")
1030
1031             if self.leaves and leaf.type == STANDALONE_COMMENT:
1032                 raise ValueError(
1033                     "cannot append standalone comments to a populated line"
1034                 )
1035
1036         self.append(leaf, preformatted=preformatted)
1037
1038     @property
1039     def is_comment(self) -> bool:
1040         """Is this line a standalone comment?"""
1041         return len(self.leaves) == 1 and self.leaves[0].type == STANDALONE_COMMENT
1042
1043     @property
1044     def is_decorator(self) -> bool:
1045         """Is this line a decorator?"""
1046         return bool(self) and self.leaves[0].type == token.AT
1047
1048     @property
1049     def is_import(self) -> bool:
1050         """Is this an import line?"""
1051         return bool(self) and is_import(self.leaves[0])
1052
1053     @property
1054     def is_class(self) -> bool:
1055         """Is this line a class definition?"""
1056         return (
1057             bool(self)
1058             and self.leaves[0].type == token.NAME
1059             and self.leaves[0].value == "class"
1060         )
1061
1062     @property
1063     def is_stub_class(self) -> bool:
1064         """Is this line a class definition with a body consisting only of "..."?"""
1065         return self.is_class and self.leaves[-3:] == [
1066             Leaf(token.DOT, ".") for _ in range(3)
1067         ]
1068
1069     @property
1070     def is_def(self) -> bool:
1071         """Is this a function definition? (Also returns True for async defs.)"""
1072         try:
1073             first_leaf = self.leaves[0]
1074         except IndexError:
1075             return False
1076
1077         try:
1078             second_leaf: Optional[Leaf] = self.leaves[1]
1079         except IndexError:
1080             second_leaf = None
1081         return (first_leaf.type == token.NAME and first_leaf.value == "def") or (
1082             first_leaf.type == token.ASYNC
1083             and second_leaf is not None
1084             and second_leaf.type == token.NAME
1085             and second_leaf.value == "def"
1086         )
1087
1088     @property
1089     def is_class_paren_empty(self) -> bool:
1090         """Is this a class with no base classes but using parentheses?
1091
1092         Those are unnecessary and should be removed.
1093         """
1094         return (
1095             bool(self)
1096             and len(self.leaves) == 4
1097             and self.is_class
1098             and self.leaves[2].type == token.LPAR
1099             and self.leaves[2].value == "("
1100             and self.leaves[3].type == token.RPAR
1101             and self.leaves[3].value == ")"
1102         )
1103
1104     @property
1105     def is_triple_quoted_string(self) -> bool:
1106         """Is the line a triple quoted string?"""
1107         return (
1108             bool(self)
1109             and self.leaves[0].type == token.STRING
1110             and self.leaves[0].value.startswith(('"""', "'''"))
1111         )
1112
1113     def contains_standalone_comments(self, depth_limit: int = sys.maxsize) -> bool:
1114         """If so, needs to be split before emitting."""
1115         for leaf in self.leaves:
1116             if leaf.type == STANDALONE_COMMENT:
1117                 if leaf.bracket_depth <= depth_limit:
1118                     return True
1119
1120         return False
1121
1122     def contains_multiline_strings(self) -> bool:
1123         for leaf in self.leaves:
1124             if is_multiline_string(leaf):
1125                 return True
1126
1127         return False
1128
1129     def maybe_remove_trailing_comma(self, closing: Leaf) -> bool:
1130         """Remove trailing comma if there is one and it's safe."""
1131         if not (
1132             self.leaves
1133             and self.leaves[-1].type == token.COMMA
1134             and closing.type in CLOSING_BRACKETS
1135         ):
1136             return False
1137
1138         if closing.type == token.RBRACE:
1139             self.remove_trailing_comma()
1140             return True
1141
1142         if closing.type == token.RSQB:
1143             comma = self.leaves[-1]
1144             if comma.parent and comma.parent.type == syms.listmaker:
1145                 self.remove_trailing_comma()
1146                 return True
1147
1148         # For parens let's check if it's safe to remove the comma.
1149         # Imports are always safe.
1150         if self.is_import:
1151             self.remove_trailing_comma()
1152             return True
1153
1154         # Otheriwsse, if the trailing one is the only one, we might mistakenly
1155         # change a tuple into a different type by removing the comma.
1156         depth = closing.bracket_depth + 1
1157         commas = 0
1158         opening = closing.opening_bracket
1159         for _opening_index, leaf in enumerate(self.leaves):
1160             if leaf is opening:
1161                 break
1162
1163         else:
1164             return False
1165
1166         for leaf in self.leaves[_opening_index + 1 :]:
1167             if leaf is closing:
1168                 break
1169
1170             bracket_depth = leaf.bracket_depth
1171             if bracket_depth == depth and leaf.type == token.COMMA:
1172                 commas += 1
1173                 if leaf.parent and leaf.parent.type == syms.arglist:
1174                     commas += 1
1175                     break
1176
1177         if commas > 1:
1178             self.remove_trailing_comma()
1179             return True
1180
1181         return False
1182
1183     def append_comment(self, comment: Leaf) -> bool:
1184         """Add an inline or standalone comment to the line."""
1185         if (
1186             comment.type == STANDALONE_COMMENT
1187             and self.bracket_tracker.any_open_brackets()
1188         ):
1189             comment.prefix = ""
1190             return False
1191
1192         if comment.type != token.COMMENT:
1193             return False
1194
1195         after = len(self.leaves) - 1
1196         if after == -1:
1197             comment.type = STANDALONE_COMMENT
1198             comment.prefix = ""
1199             return False
1200
1201         else:
1202             self.comments.append((after, comment))
1203             return True
1204
1205     def comments_after(self, leaf: Leaf, _index: int = -1) -> Iterator[Leaf]:
1206         """Generate comments that should appear directly after `leaf`.
1207
1208         Provide a non-negative leaf `_index` to speed up the function.
1209         """
1210         if not self.comments:
1211             return
1212
1213         if _index == -1:
1214             for _index, _leaf in enumerate(self.leaves):
1215                 if leaf is _leaf:
1216                     break
1217
1218             else:
1219                 return
1220
1221         for index, comment_after in self.comments:
1222             if _index == index:
1223                 yield comment_after
1224
1225     def remove_trailing_comma(self) -> None:
1226         """Remove the trailing comma and moves the comments attached to it."""
1227         comma_index = len(self.leaves) - 1
1228         for i in range(len(self.comments)):
1229             comment_index, comment = self.comments[i]
1230             if comment_index == comma_index:
1231                 self.comments[i] = (comma_index - 1, comment)
1232         self.leaves.pop()
1233
1234     def is_complex_subscript(self, leaf: Leaf) -> bool:
1235         """Return True iff `leaf` is part of a slice with non-trivial exprs."""
1236         open_lsqb = self.bracket_tracker.get_open_lsqb()
1237         if open_lsqb is None:
1238             return False
1239
1240         subscript_start = open_lsqb.next_sibling
1241
1242         if isinstance(subscript_start, Node):
1243             if subscript_start.type == syms.listmaker:
1244                 return False
1245
1246             if subscript_start.type == syms.subscriptlist:
1247                 subscript_start = child_towards(subscript_start, leaf)
1248         return subscript_start is not None and any(
1249             n.type in TEST_DESCENDANTS for n in subscript_start.pre_order()
1250         )
1251
1252     def __str__(self) -> str:
1253         """Render the line."""
1254         if not self:
1255             return "\n"
1256
1257         indent = "    " * self.depth
1258         leaves = iter(self.leaves)
1259         first = next(leaves)
1260         res = f"{first.prefix}{indent}{first.value}"
1261         for leaf in leaves:
1262             res += str(leaf)
1263         for _, comment in self.comments:
1264             res += str(comment)
1265         return res + "\n"
1266
1267     def __bool__(self) -> bool:
1268         """Return True if the line has leaves or comments."""
1269         return bool(self.leaves or self.comments)
1270
1271
1272 @dataclass
1273 class EmptyLineTracker:
1274     """Provides a stateful method that returns the number of potential extra
1275     empty lines needed before and after the currently processed line.
1276
1277     Note: this tracker works on lines that haven't been split yet.  It assumes
1278     the prefix of the first leaf consists of optional newlines.  Those newlines
1279     are consumed by `maybe_empty_lines()` and included in the computation.
1280     """
1281
1282     is_pyi: bool = False
1283     previous_line: Optional[Line] = None
1284     previous_after: int = 0
1285     previous_defs: List[int] = Factory(list)
1286
1287     def maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
1288         """Return the number of extra empty lines before and after the `current_line`.
1289
1290         This is for separating `def`, `async def` and `class` with extra empty
1291         lines (two on module-level).
1292         """
1293         before, after = self._maybe_empty_lines(current_line)
1294         before -= self.previous_after
1295         self.previous_after = after
1296         self.previous_line = current_line
1297         return before, after
1298
1299     def _maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
1300         max_allowed = 1
1301         if current_line.depth == 0:
1302             max_allowed = 1 if self.is_pyi else 2
1303         if current_line.leaves:
1304             # Consume the first leaf's extra newlines.
1305             first_leaf = current_line.leaves[0]
1306             before = first_leaf.prefix.count("\n")
1307             before = min(before, max_allowed)
1308             first_leaf.prefix = ""
1309         else:
1310             before = 0
1311         depth = current_line.depth
1312         while self.previous_defs and self.previous_defs[-1] >= depth:
1313             self.previous_defs.pop()
1314             if self.is_pyi:
1315                 before = 0 if depth else 1
1316             else:
1317                 before = 1 if depth else 2
1318         if current_line.is_decorator or current_line.is_def or current_line.is_class:
1319             return self._maybe_empty_lines_for_class_or_def(current_line, before)
1320
1321         if (
1322             self.previous_line
1323             and self.previous_line.is_import
1324             and not current_line.is_import
1325             and depth == self.previous_line.depth
1326         ):
1327             return (before or 1), 0
1328
1329         if (
1330             self.previous_line
1331             and self.previous_line.is_class
1332             and current_line.is_triple_quoted_string
1333         ):
1334             return before, 1
1335
1336         return before, 0
1337
1338     def _maybe_empty_lines_for_class_or_def(
1339         self, current_line: Line, before: int
1340     ) -> Tuple[int, int]:
1341         if not current_line.is_decorator:
1342             self.previous_defs.append(current_line.depth)
1343         if self.previous_line is None:
1344             # Don't insert empty lines before the first line in the file.
1345             return 0, 0
1346
1347         if self.previous_line.is_decorator:
1348             return 0, 0
1349
1350         if self.previous_line.depth < current_line.depth and (
1351             self.previous_line.is_class or self.previous_line.is_def
1352         ):
1353             return 0, 0
1354
1355         if (
1356             self.previous_line.is_comment
1357             and self.previous_line.depth == current_line.depth
1358             and before == 0
1359         ):
1360             return 0, 0
1361
1362         if self.is_pyi:
1363             if self.previous_line.depth > current_line.depth:
1364                 newlines = 1
1365             elif current_line.is_class or self.previous_line.is_class:
1366                 if current_line.is_stub_class and self.previous_line.is_stub_class:
1367                     # No blank line between classes with an emty body
1368                     newlines = 0
1369                 else:
1370                     newlines = 1
1371             elif current_line.is_def and not self.previous_line.is_def:
1372                 # Blank line between a block of functions and a block of non-functions
1373                 newlines = 1
1374             else:
1375                 newlines = 0
1376         else:
1377             newlines = 2
1378         if current_line.depth and newlines:
1379             newlines -= 1
1380         return newlines, 0
1381
1382
1383 @dataclass
1384 class LineGenerator(Visitor[Line]):
1385     """Generates reformatted Line objects.  Empty lines are not emitted.
1386
1387     Note: destroys the tree it's visiting by mutating prefixes of its leaves
1388     in ways that will no longer stringify to valid Python code on the tree.
1389     """
1390
1391     is_pyi: bool = False
1392     normalize_strings: bool = True
1393     current_line: Line = Factory(Line)
1394     remove_u_prefix: bool = False
1395     allow_underscores: bool = False
1396
1397     def line(self, indent: int = 0) -> Iterator[Line]:
1398         """Generate a line.
1399
1400         If the line is empty, only emit if it makes sense.
1401         If the line is too long, split it first and then generate.
1402
1403         If any lines were generated, set up a new current_line.
1404         """
1405         if not self.current_line:
1406             self.current_line.depth += indent
1407             return  # Line is empty, don't emit. Creating a new one unnecessary.
1408
1409         complete_line = self.current_line
1410         self.current_line = Line(depth=complete_line.depth + indent)
1411         yield complete_line
1412
1413     def visit_default(self, node: LN) -> Iterator[Line]:
1414         """Default `visit_*()` implementation. Recurses to children of `node`."""
1415         if isinstance(node, Leaf):
1416             any_open_brackets = self.current_line.bracket_tracker.any_open_brackets()
1417             for comment in generate_comments(node):
1418                 if any_open_brackets:
1419                     # any comment within brackets is subject to splitting
1420                     self.current_line.append(comment)
1421                 elif comment.type == token.COMMENT:
1422                     # regular trailing comment
1423                     self.current_line.append(comment)
1424                     yield from self.line()
1425
1426                 else:
1427                     # regular standalone comment
1428                     yield from self.line()
1429
1430                     self.current_line.append(comment)
1431                     yield from self.line()
1432
1433             normalize_prefix(node, inside_brackets=any_open_brackets)
1434             if self.normalize_strings and node.type == token.STRING:
1435                 normalize_string_prefix(node, remove_u_prefix=self.remove_u_prefix)
1436                 normalize_string_quotes(node)
1437             if node.type == token.NUMBER:
1438                 normalize_numeric_literal(node, self.allow_underscores)
1439             if node.type not in WHITESPACE:
1440                 self.current_line.append(node)
1441         yield from super().visit_default(node)
1442
1443     def visit_INDENT(self, node: Node) -> Iterator[Line]:
1444         """Increase indentation level, maybe yield a line."""
1445         # In blib2to3 INDENT never holds comments.
1446         yield from self.line(+1)
1447         yield from self.visit_default(node)
1448
1449     def visit_DEDENT(self, node: Node) -> Iterator[Line]:
1450         """Decrease indentation level, maybe yield a line."""
1451         # The current line might still wait for trailing comments.  At DEDENT time
1452         # there won't be any (they would be prefixes on the preceding NEWLINE).
1453         # Emit the line then.
1454         yield from self.line()
1455
1456         # While DEDENT has no value, its prefix may contain standalone comments
1457         # that belong to the current indentation level.  Get 'em.
1458         yield from self.visit_default(node)
1459
1460         # Finally, emit the dedent.
1461         yield from self.line(-1)
1462
1463     def visit_stmt(
1464         self, node: Node, keywords: Set[str], parens: Set[str]
1465     ) -> Iterator[Line]:
1466         """Visit a statement.
1467
1468         This implementation is shared for `if`, `while`, `for`, `try`, `except`,
1469         `def`, `with`, `class`, `assert` and assignments.
1470
1471         The relevant Python language `keywords` for a given statement will be
1472         NAME leaves within it. This methods puts those on a separate line.
1473
1474         `parens` holds a set of string leaf values immediately after which
1475         invisible parens should be put.
1476         """
1477         normalize_invisible_parens(node, parens_after=parens)
1478         for child in node.children:
1479             if child.type == token.NAME and child.value in keywords:  # type: ignore
1480                 yield from self.line()
1481
1482             yield from self.visit(child)
1483
1484     def visit_suite(self, node: Node) -> Iterator[Line]:
1485         """Visit a suite."""
1486         if self.is_pyi and is_stub_suite(node):
1487             yield from self.visit(node.children[2])
1488         else:
1489             yield from self.visit_default(node)
1490
1491     def visit_simple_stmt(self, node: Node) -> Iterator[Line]:
1492         """Visit a statement without nested statements."""
1493         is_suite_like = node.parent and node.parent.type in STATEMENT
1494         if is_suite_like:
1495             if self.is_pyi and is_stub_body(node):
1496                 yield from self.visit_default(node)
1497             else:
1498                 yield from self.line(+1)
1499                 yield from self.visit_default(node)
1500                 yield from self.line(-1)
1501
1502         else:
1503             if not self.is_pyi or not node.parent or not is_stub_suite(node.parent):
1504                 yield from self.line()
1505             yield from self.visit_default(node)
1506
1507     def visit_async_stmt(self, node: Node) -> Iterator[Line]:
1508         """Visit `async def`, `async for`, `async with`."""
1509         yield from self.line()
1510
1511         children = iter(node.children)
1512         for child in children:
1513             yield from self.visit(child)
1514
1515             if child.type == token.ASYNC:
1516                 break
1517
1518         internal_stmt = next(children)
1519         for child in internal_stmt.children:
1520             yield from self.visit(child)
1521
1522     def visit_decorators(self, node: Node) -> Iterator[Line]:
1523         """Visit decorators."""
1524         for child in node.children:
1525             yield from self.line()
1526             yield from self.visit(child)
1527
1528     def visit_SEMI(self, leaf: Leaf) -> Iterator[Line]:
1529         """Remove a semicolon and put the other statement on a separate line."""
1530         yield from self.line()
1531
1532     def visit_ENDMARKER(self, leaf: Leaf) -> Iterator[Line]:
1533         """End of file. Process outstanding comments and end with a newline."""
1534         yield from self.visit_default(leaf)
1535         yield from self.line()
1536
1537     def visit_STANDALONE_COMMENT(self, leaf: Leaf) -> Iterator[Line]:
1538         if not self.current_line.bracket_tracker.any_open_brackets():
1539             yield from self.line()
1540         yield from self.visit_default(leaf)
1541
1542     def __attrs_post_init__(self) -> None:
1543         """You are in a twisty little maze of passages."""
1544         v = self.visit_stmt
1545         Ø: Set[str] = set()
1546         self.visit_assert_stmt = partial(v, keywords={"assert"}, parens={"assert", ","})
1547         self.visit_if_stmt = partial(
1548             v, keywords={"if", "else", "elif"}, parens={"if", "elif"}
1549         )
1550         self.visit_while_stmt = partial(v, keywords={"while", "else"}, parens={"while"})
1551         self.visit_for_stmt = partial(v, keywords={"for", "else"}, parens={"for", "in"})
1552         self.visit_try_stmt = partial(
1553             v, keywords={"try", "except", "else", "finally"}, parens=Ø
1554         )
1555         self.visit_except_clause = partial(v, keywords={"except"}, parens=Ø)
1556         self.visit_with_stmt = partial(v, keywords={"with"}, parens=Ø)
1557         self.visit_funcdef = partial(v, keywords={"def"}, parens=Ø)
1558         self.visit_classdef = partial(v, keywords={"class"}, parens=Ø)
1559         self.visit_expr_stmt = partial(v, keywords=Ø, parens=ASSIGNMENTS)
1560         self.visit_return_stmt = partial(v, keywords={"return"}, parens={"return"})
1561         self.visit_import_from = partial(v, keywords=Ø, parens={"import"})
1562         self.visit_async_funcdef = self.visit_async_stmt
1563         self.visit_decorated = self.visit_decorators
1564
1565
1566 IMPLICIT_TUPLE = {syms.testlist, syms.testlist_star_expr, syms.exprlist}
1567 BRACKET = {token.LPAR: token.RPAR, token.LSQB: token.RSQB, token.LBRACE: token.RBRACE}
1568 OPENING_BRACKETS = set(BRACKET.keys())
1569 CLOSING_BRACKETS = set(BRACKET.values())
1570 BRACKETS = OPENING_BRACKETS | CLOSING_BRACKETS
1571 ALWAYS_NO_SPACE = CLOSING_BRACKETS | {token.COMMA, STANDALONE_COMMENT}
1572
1573
1574 def whitespace(leaf: Leaf, *, complex_subscript: bool) -> str:  # noqa C901
1575     """Return whitespace prefix if needed for the given `leaf`.
1576
1577     `complex_subscript` signals whether the given leaf is part of a subscription
1578     which has non-trivial arguments, like arithmetic expressions or function calls.
1579     """
1580     NO = ""
1581     SPACE = " "
1582     DOUBLESPACE = "  "
1583     t = leaf.type
1584     p = leaf.parent
1585     v = leaf.value
1586     if t in ALWAYS_NO_SPACE:
1587         return NO
1588
1589     if t == token.COMMENT:
1590         return DOUBLESPACE
1591
1592     assert p is not None, f"INTERNAL ERROR: hand-made leaf without parent: {leaf!r}"
1593     if t == token.COLON and p.type not in {
1594         syms.subscript,
1595         syms.subscriptlist,
1596         syms.sliceop,
1597     }:
1598         return NO
1599
1600     prev = leaf.prev_sibling
1601     if not prev:
1602         prevp = preceding_leaf(p)
1603         if not prevp or prevp.type in OPENING_BRACKETS:
1604             return NO
1605
1606         if t == token.COLON:
1607             if prevp.type == token.COLON:
1608                 return NO
1609
1610             elif prevp.type != token.COMMA and not complex_subscript:
1611                 return NO
1612
1613             return SPACE
1614
1615         if prevp.type == token.EQUAL:
1616             if prevp.parent:
1617                 if prevp.parent.type in {
1618                     syms.arglist,
1619                     syms.argument,
1620                     syms.parameters,
1621                     syms.varargslist,
1622                 }:
1623                     return NO
1624
1625                 elif prevp.parent.type == syms.typedargslist:
1626                     # A bit hacky: if the equal sign has whitespace, it means we
1627                     # previously found it's a typed argument.  So, we're using
1628                     # that, too.
1629                     return prevp.prefix
1630
1631         elif prevp.type in STARS:
1632             if is_vararg(prevp, within=VARARGS_PARENTS | UNPACKING_PARENTS):
1633                 return NO
1634
1635         elif prevp.type == token.COLON:
1636             if prevp.parent and prevp.parent.type in {syms.subscript, syms.sliceop}:
1637                 return SPACE if complex_subscript else NO
1638
1639         elif (
1640             prevp.parent
1641             and prevp.parent.type == syms.factor
1642             and prevp.type in MATH_OPERATORS
1643         ):
1644             return NO
1645
1646         elif (
1647             prevp.type == token.RIGHTSHIFT
1648             and prevp.parent
1649             and prevp.parent.type == syms.shift_expr
1650             and prevp.prev_sibling
1651             and prevp.prev_sibling.type == token.NAME
1652             and prevp.prev_sibling.value == "print"  # type: ignore
1653         ):
1654             # Python 2 print chevron
1655             return NO
1656
1657     elif prev.type in OPENING_BRACKETS:
1658         return NO
1659
1660     if p.type in {syms.parameters, syms.arglist}:
1661         # untyped function signatures or calls
1662         if not prev or prev.type != token.COMMA:
1663             return NO
1664
1665     elif p.type == syms.varargslist:
1666         # lambdas
1667         if prev and prev.type != token.COMMA:
1668             return NO
1669
1670     elif p.type == syms.typedargslist:
1671         # typed function signatures
1672         if not prev:
1673             return NO
1674
1675         if t == token.EQUAL:
1676             if prev.type != syms.tname:
1677                 return NO
1678
1679         elif prev.type == token.EQUAL:
1680             # A bit hacky: if the equal sign has whitespace, it means we
1681             # previously found it's a typed argument.  So, we're using that, too.
1682             return prev.prefix
1683
1684         elif prev.type != token.COMMA:
1685             return NO
1686
1687     elif p.type == syms.tname:
1688         # type names
1689         if not prev:
1690             prevp = preceding_leaf(p)
1691             if not prevp or prevp.type != token.COMMA:
1692                 return NO
1693
1694     elif p.type == syms.trailer:
1695         # attributes and calls
1696         if t == token.LPAR or t == token.RPAR:
1697             return NO
1698
1699         if not prev:
1700             if t == token.DOT:
1701                 prevp = preceding_leaf(p)
1702                 if not prevp or prevp.type != token.NUMBER:
1703                     return NO
1704
1705             elif t == token.LSQB:
1706                 return NO
1707
1708         elif prev.type != token.COMMA:
1709             return NO
1710
1711     elif p.type == syms.argument:
1712         # single argument
1713         if t == token.EQUAL:
1714             return NO
1715
1716         if not prev:
1717             prevp = preceding_leaf(p)
1718             if not prevp or prevp.type == token.LPAR:
1719                 return NO
1720
1721         elif prev.type in {token.EQUAL} | STARS:
1722             return NO
1723
1724     elif p.type == syms.decorator:
1725         # decorators
1726         return NO
1727
1728     elif p.type == syms.dotted_name:
1729         if prev:
1730             return NO
1731
1732         prevp = preceding_leaf(p)
1733         if not prevp or prevp.type == token.AT or prevp.type == token.DOT:
1734             return NO
1735
1736     elif p.type == syms.classdef:
1737         if t == token.LPAR:
1738             return NO
1739
1740         if prev and prev.type == token.LPAR:
1741             return NO
1742
1743     elif p.type in {syms.subscript, syms.sliceop}:
1744         # indexing
1745         if not prev:
1746             assert p.parent is not None, "subscripts are always parented"
1747             if p.parent.type == syms.subscriptlist:
1748                 return SPACE
1749
1750             return NO
1751
1752         elif not complex_subscript:
1753             return NO
1754
1755     elif p.type == syms.atom:
1756         if prev and t == token.DOT:
1757             # dots, but not the first one.
1758             return NO
1759
1760     elif p.type == syms.dictsetmaker:
1761         # dict unpacking
1762         if prev and prev.type == token.DOUBLESTAR:
1763             return NO
1764
1765     elif p.type in {syms.factor, syms.star_expr}:
1766         # unary ops
1767         if not prev:
1768             prevp = preceding_leaf(p)
1769             if not prevp or prevp.type in OPENING_BRACKETS:
1770                 return NO
1771
1772             prevp_parent = prevp.parent
1773             assert prevp_parent is not None
1774             if prevp.type == token.COLON and prevp_parent.type in {
1775                 syms.subscript,
1776                 syms.sliceop,
1777             }:
1778                 return NO
1779
1780             elif prevp.type == token.EQUAL and prevp_parent.type == syms.argument:
1781                 return NO
1782
1783         elif t in {token.NAME, token.NUMBER, token.STRING}:
1784             return NO
1785
1786     elif p.type == syms.import_from:
1787         if t == token.DOT:
1788             if prev and prev.type == token.DOT:
1789                 return NO
1790
1791         elif t == token.NAME:
1792             if v == "import":
1793                 return SPACE
1794
1795             if prev and prev.type == token.DOT:
1796                 return NO
1797
1798     elif p.type == syms.sliceop:
1799         return NO
1800
1801     return SPACE
1802
1803
1804 def preceding_leaf(node: Optional[LN]) -> Optional[Leaf]:
1805     """Return the first leaf that precedes `node`, if any."""
1806     while node:
1807         res = node.prev_sibling
1808         if res:
1809             if isinstance(res, Leaf):
1810                 return res
1811
1812             try:
1813                 return list(res.leaves())[-1]
1814
1815             except IndexError:
1816                 return None
1817
1818         node = node.parent
1819     return None
1820
1821
1822 def child_towards(ancestor: Node, descendant: LN) -> Optional[LN]:
1823     """Return the child of `ancestor` that contains `descendant`."""
1824     node: Optional[LN] = descendant
1825     while node and node.parent != ancestor:
1826         node = node.parent
1827     return node
1828
1829
1830 def container_of(leaf: Leaf) -> LN:
1831     """Return `leaf` or one of its ancestors that is the topmost container of it.
1832
1833     By "container" we mean a node where `leaf` is the very first child.
1834     """
1835     same_prefix = leaf.prefix
1836     container: LN = leaf
1837     while container:
1838         parent = container.parent
1839         if parent is None:
1840             break
1841
1842         if parent.children[0].prefix != same_prefix:
1843             break
1844
1845         if parent.type == syms.file_input:
1846             break
1847
1848         if parent.prev_sibling is not None and parent.prev_sibling.type in BRACKETS:
1849             break
1850
1851         container = parent
1852     return container
1853
1854
1855 def is_split_after_delimiter(leaf: Leaf, previous: Leaf = None) -> int:
1856     """Return the priority of the `leaf` delimiter, given a line break after it.
1857
1858     The delimiter priorities returned here are from those delimiters that would
1859     cause a line break after themselves.
1860
1861     Higher numbers are higher priority.
1862     """
1863     if leaf.type == token.COMMA:
1864         return COMMA_PRIORITY
1865
1866     return 0
1867
1868
1869 def is_split_before_delimiter(leaf: Leaf, previous: Leaf = None) -> int:
1870     """Return the priority of the `leaf` delimiter, given a line before after it.
1871
1872     The delimiter priorities returned here are from those delimiters that would
1873     cause a line break before themselves.
1874
1875     Higher numbers are higher priority.
1876     """
1877     if is_vararg(leaf, within=VARARGS_PARENTS | UNPACKING_PARENTS):
1878         # * and ** might also be MATH_OPERATORS but in this case they are not.
1879         # Don't treat them as a delimiter.
1880         return 0
1881
1882     if (
1883         leaf.type == token.DOT
1884         and leaf.parent
1885         and leaf.parent.type not in {syms.import_from, syms.dotted_name}
1886         and (previous is None or previous.type in CLOSING_BRACKETS)
1887     ):
1888         return DOT_PRIORITY
1889
1890     if (
1891         leaf.type in MATH_OPERATORS
1892         and leaf.parent
1893         and leaf.parent.type not in {syms.factor, syms.star_expr}
1894     ):
1895         return MATH_PRIORITIES[leaf.type]
1896
1897     if leaf.type in COMPARATORS:
1898         return COMPARATOR_PRIORITY
1899
1900     if (
1901         leaf.type == token.STRING
1902         and previous is not None
1903         and previous.type == token.STRING
1904     ):
1905         return STRING_PRIORITY
1906
1907     if leaf.type != token.NAME:
1908         return 0
1909
1910     if (
1911         leaf.value == "for"
1912         and leaf.parent
1913         and leaf.parent.type in {syms.comp_for, syms.old_comp_for}
1914     ):
1915         return COMPREHENSION_PRIORITY
1916
1917     if (
1918         leaf.value == "if"
1919         and leaf.parent
1920         and leaf.parent.type in {syms.comp_if, syms.old_comp_if}
1921     ):
1922         return COMPREHENSION_PRIORITY
1923
1924     if leaf.value in {"if", "else"} and leaf.parent and leaf.parent.type == syms.test:
1925         return TERNARY_PRIORITY
1926
1927     if leaf.value == "is":
1928         return COMPARATOR_PRIORITY
1929
1930     if (
1931         leaf.value == "in"
1932         and leaf.parent
1933         and leaf.parent.type in {syms.comp_op, syms.comparison}
1934         and not (
1935             previous is not None
1936             and previous.type == token.NAME
1937             and previous.value == "not"
1938         )
1939     ):
1940         return COMPARATOR_PRIORITY
1941
1942     if (
1943         leaf.value == "not"
1944         and leaf.parent
1945         and leaf.parent.type == syms.comp_op
1946         and not (
1947             previous is not None
1948             and previous.type == token.NAME
1949             and previous.value == "is"
1950         )
1951     ):
1952         return COMPARATOR_PRIORITY
1953
1954     if leaf.value in LOGIC_OPERATORS and leaf.parent:
1955         return LOGIC_PRIORITY
1956
1957     return 0
1958
1959
1960 FMT_OFF = {"# fmt: off", "# fmt:off", "# yapf: disable"}
1961 FMT_ON = {"# fmt: on", "# fmt:on", "# yapf: enable"}
1962
1963
1964 def generate_comments(leaf: LN) -> Iterator[Leaf]:
1965     """Clean the prefix of the `leaf` and generate comments from it, if any.
1966
1967     Comments in lib2to3 are shoved into the whitespace prefix.  This happens
1968     in `pgen2/driver.py:Driver.parse_tokens()`.  This was a brilliant implementation
1969     move because it does away with modifying the grammar to include all the
1970     possible places in which comments can be placed.
1971
1972     The sad consequence for us though is that comments don't "belong" anywhere.
1973     This is why this function generates simple parentless Leaf objects for
1974     comments.  We simply don't know what the correct parent should be.
1975
1976     No matter though, we can live without this.  We really only need to
1977     differentiate between inline and standalone comments.  The latter don't
1978     share the line with any code.
1979
1980     Inline comments are emitted as regular token.COMMENT leaves.  Standalone
1981     are emitted with a fake STANDALONE_COMMENT token identifier.
1982     """
1983     for pc in list_comments(leaf.prefix, is_endmarker=leaf.type == token.ENDMARKER):
1984         yield Leaf(pc.type, pc.value, prefix="\n" * pc.newlines)
1985
1986
1987 @dataclass
1988 class ProtoComment:
1989     type: int  # token.COMMENT or STANDALONE_COMMENT
1990     value: str  # content of the comment
1991     newlines: int  # how many newlines before the comment
1992     consumed: int  # how many characters of the original leaf's prefix did we consume
1993
1994
1995 @lru_cache(maxsize=4096)
1996 def list_comments(prefix: str, *, is_endmarker: bool) -> List[ProtoComment]:
1997     result: List[ProtoComment] = []
1998     if not prefix or "#" not in prefix:
1999         return result
2000
2001     consumed = 0
2002     nlines = 0
2003     for index, line in enumerate(prefix.split("\n")):
2004         consumed += len(line) + 1  # adding the length of the split '\n'
2005         line = line.lstrip()
2006         if not line:
2007             nlines += 1
2008         if not line.startswith("#"):
2009             continue
2010
2011         if index == 0 and not is_endmarker:
2012             comment_type = token.COMMENT  # simple trailing comment
2013         else:
2014             comment_type = STANDALONE_COMMENT
2015         comment = make_comment(line)
2016         result.append(
2017             ProtoComment(
2018                 type=comment_type, value=comment, newlines=nlines, consumed=consumed
2019             )
2020         )
2021         nlines = 0
2022     return result
2023
2024
2025 def make_comment(content: str) -> str:
2026     """Return a consistently formatted comment from the given `content` string.
2027
2028     All comments (except for "##", "#!", "#:") should have a single space between
2029     the hash sign and the content.
2030
2031     If `content` didn't start with a hash sign, one is provided.
2032     """
2033     content = content.rstrip()
2034     if not content:
2035         return "#"
2036
2037     if content[0] == "#":
2038         content = content[1:]
2039     if content and content[0] not in " !:#":
2040         content = " " + content
2041     return "#" + content
2042
2043
2044 def split_line(
2045     line: Line, line_length: int, inner: bool = False, py36: bool = False
2046 ) -> Iterator[Line]:
2047     """Split a `line` into potentially many lines.
2048
2049     They should fit in the allotted `line_length` but might not be able to.
2050     `inner` signifies that there were a pair of brackets somewhere around the
2051     current `line`, possibly transitively. This means we can fallback to splitting
2052     by delimiters if the LHS/RHS don't yield any results.
2053
2054     If `py36` is True, splitting may generate syntax that is only compatible
2055     with Python 3.6 and later.
2056     """
2057     if line.is_comment:
2058         yield line
2059         return
2060
2061     line_str = str(line).strip("\n")
2062     if not line.should_explode and is_line_short_enough(
2063         line, line_length=line_length, line_str=line_str
2064     ):
2065         yield line
2066         return
2067
2068     split_funcs: List[SplitFunc]
2069     if line.is_def:
2070         split_funcs = [left_hand_split]
2071     else:
2072
2073         def rhs(line: Line, py36: bool = False) -> Iterator[Line]:
2074             for omit in generate_trailers_to_omit(line, line_length):
2075                 lines = list(right_hand_split(line, line_length, py36, omit=omit))
2076                 if is_line_short_enough(lines[0], line_length=line_length):
2077                     yield from lines
2078                     return
2079
2080             # All splits failed, best effort split with no omits.
2081             # This mostly happens to multiline strings that are by definition
2082             # reported as not fitting a single line.
2083             yield from right_hand_split(line, py36)
2084
2085         if line.inside_brackets:
2086             split_funcs = [delimiter_split, standalone_comment_split, rhs]
2087         else:
2088             split_funcs = [rhs]
2089     for split_func in split_funcs:
2090         # We are accumulating lines in `result` because we might want to abort
2091         # mission and return the original line in the end, or attempt a different
2092         # split altogether.
2093         result: List[Line] = []
2094         try:
2095             for l in split_func(line, py36):
2096                 if str(l).strip("\n") == line_str:
2097                     raise CannotSplit("Split function returned an unchanged result")
2098
2099                 result.extend(
2100                     split_line(l, line_length=line_length, inner=True, py36=py36)
2101                 )
2102         except CannotSplit as cs:
2103             continue
2104
2105         else:
2106             yield from result
2107             break
2108
2109     else:
2110         yield line
2111
2112
2113 def left_hand_split(line: Line, py36: bool = False) -> Iterator[Line]:
2114     """Split line into many lines, starting with the first matching bracket pair.
2115
2116     Note: this usually looks weird, only use this for function definitions.
2117     Prefer RHS otherwise.  This is why this function is not symmetrical with
2118     :func:`right_hand_split` which also handles optional parentheses.
2119     """
2120     head = Line(depth=line.depth)
2121     body = Line(depth=line.depth + 1, inside_brackets=True)
2122     tail = Line(depth=line.depth)
2123     tail_leaves: List[Leaf] = []
2124     body_leaves: List[Leaf] = []
2125     head_leaves: List[Leaf] = []
2126     current_leaves = head_leaves
2127     matching_bracket = None
2128     for leaf in line.leaves:
2129         if (
2130             current_leaves is body_leaves
2131             and leaf.type in CLOSING_BRACKETS
2132             and leaf.opening_bracket is matching_bracket
2133         ):
2134             current_leaves = tail_leaves if body_leaves else head_leaves
2135         current_leaves.append(leaf)
2136         if current_leaves is head_leaves:
2137             if leaf.type in OPENING_BRACKETS:
2138                 matching_bracket = leaf
2139                 current_leaves = body_leaves
2140     # Since body is a new indent level, remove spurious leading whitespace.
2141     if body_leaves:
2142         normalize_prefix(body_leaves[0], inside_brackets=True)
2143     # Build the new lines.
2144     for result, leaves in (head, head_leaves), (body, body_leaves), (tail, tail_leaves):
2145         for leaf in leaves:
2146             result.append(leaf, preformatted=True)
2147             for comment_after in line.comments_after(leaf):
2148                 result.append(comment_after, preformatted=True)
2149     bracket_split_succeeded_or_raise(head, body, tail)
2150     for result in (head, body, tail):
2151         if result:
2152             yield result
2153
2154
2155 def right_hand_split(
2156     line: Line, line_length: int, py36: bool = False, omit: Collection[LeafID] = ()
2157 ) -> Iterator[Line]:
2158     """Split line into many lines, starting with the last matching bracket pair.
2159
2160     If the split was by optional parentheses, attempt splitting without them, too.
2161     `omit` is a collection of closing bracket IDs that shouldn't be considered for
2162     this split.
2163
2164     Note: running this function modifies `bracket_depth` on the leaves of `line`.
2165     """
2166     head = Line(depth=line.depth)
2167     body = Line(depth=line.depth + 1, inside_brackets=True)
2168     tail = Line(depth=line.depth)
2169     tail_leaves: List[Leaf] = []
2170     body_leaves: List[Leaf] = []
2171     head_leaves: List[Leaf] = []
2172     current_leaves = tail_leaves
2173     opening_bracket = None
2174     closing_bracket = None
2175     for leaf in reversed(line.leaves):
2176         if current_leaves is body_leaves:
2177             if leaf is opening_bracket:
2178                 current_leaves = head_leaves if body_leaves else tail_leaves
2179         current_leaves.append(leaf)
2180         if current_leaves is tail_leaves:
2181             if leaf.type in CLOSING_BRACKETS and id(leaf) not in omit:
2182                 opening_bracket = leaf.opening_bracket
2183                 closing_bracket = leaf
2184                 current_leaves = body_leaves
2185     tail_leaves.reverse()
2186     body_leaves.reverse()
2187     head_leaves.reverse()
2188     # Since body is a new indent level, remove spurious leading whitespace.
2189     if body_leaves:
2190         normalize_prefix(body_leaves[0], inside_brackets=True)
2191     if not head_leaves:
2192         # No `head` means the split failed. Either `tail` has all content or
2193         # the matching `opening_bracket` wasn't available on `line` anymore.
2194         raise CannotSplit("No brackets found")
2195
2196     # Build the new lines.
2197     for result, leaves in (head, head_leaves), (body, body_leaves), (tail, tail_leaves):
2198         for leaf in leaves:
2199             result.append(leaf, preformatted=True)
2200             for comment_after in line.comments_after(leaf):
2201                 result.append(comment_after, preformatted=True)
2202     assert opening_bracket and closing_bracket
2203     body.should_explode = should_explode(body, opening_bracket)
2204     bracket_split_succeeded_or_raise(head, body, tail)
2205     if (
2206         # the body shouldn't be exploded
2207         not body.should_explode
2208         # the opening bracket is an optional paren
2209         and opening_bracket.type == token.LPAR
2210         and not opening_bracket.value
2211         # the closing bracket is an optional paren
2212         and closing_bracket.type == token.RPAR
2213         and not closing_bracket.value
2214         # it's not an import (optional parens are the only thing we can split on
2215         # in this case; attempting a split without them is a waste of time)
2216         and not line.is_import
2217         # there are no standalone comments in the body
2218         and not body.contains_standalone_comments(0)
2219         # and we can actually remove the parens
2220         and can_omit_invisible_parens(body, line_length)
2221     ):
2222         omit = {id(closing_bracket), *omit}
2223         try:
2224             yield from right_hand_split(line, line_length, py36=py36, omit=omit)
2225             return
2226
2227         except CannotSplit:
2228             if not (
2229                 can_be_split(body)
2230                 or is_line_short_enough(body, line_length=line_length)
2231             ):
2232                 raise CannotSplit(
2233                     "Splitting failed, body is still too long and can't be split."
2234                 )
2235
2236             elif head.contains_multiline_strings() or tail.contains_multiline_strings():
2237                 raise CannotSplit(
2238                     "The current optional pair of parentheses is bound to fail to "
2239                     "satisfy the splitting algorithm because the head or the tail "
2240                     "contains multiline strings which by definition never fit one "
2241                     "line."
2242                 )
2243
2244     ensure_visible(opening_bracket)
2245     ensure_visible(closing_bracket)
2246     for result in (head, body, tail):
2247         if result:
2248             yield result
2249
2250
2251 def bracket_split_succeeded_or_raise(head: Line, body: Line, tail: Line) -> None:
2252     """Raise :exc:`CannotSplit` if the last left- or right-hand split failed.
2253
2254     Do nothing otherwise.
2255
2256     A left- or right-hand split is based on a pair of brackets. Content before
2257     (and including) the opening bracket is left on one line, content inside the
2258     brackets is put on a separate line, and finally content starting with and
2259     following the closing bracket is put on a separate line.
2260
2261     Those are called `head`, `body`, and `tail`, respectively. If the split
2262     produced the same line (all content in `head`) or ended up with an empty `body`
2263     and the `tail` is just the closing bracket, then it's considered failed.
2264     """
2265     tail_len = len(str(tail).strip())
2266     if not body:
2267         if tail_len == 0:
2268             raise CannotSplit("Splitting brackets produced the same line")
2269
2270         elif tail_len < 3:
2271             raise CannotSplit(
2272                 f"Splitting brackets on an empty body to save "
2273                 f"{tail_len} characters is not worth it"
2274             )
2275
2276
2277 def dont_increase_indentation(split_func: SplitFunc) -> SplitFunc:
2278     """Normalize prefix of the first leaf in every line returned by `split_func`.
2279
2280     This is a decorator over relevant split functions.
2281     """
2282
2283     @wraps(split_func)
2284     def split_wrapper(line: Line, py36: bool = False) -> Iterator[Line]:
2285         for l in split_func(line, py36):
2286             normalize_prefix(l.leaves[0], inside_brackets=True)
2287             yield l
2288
2289     return split_wrapper
2290
2291
2292 @dont_increase_indentation
2293 def delimiter_split(line: Line, py36: bool = False) -> Iterator[Line]:
2294     """Split according to delimiters of the highest priority.
2295
2296     If `py36` is True, the split will add trailing commas also in function
2297     signatures that contain `*` and `**`.
2298     """
2299     try:
2300         last_leaf = line.leaves[-1]
2301     except IndexError:
2302         raise CannotSplit("Line empty")
2303
2304     bt = line.bracket_tracker
2305     try:
2306         delimiter_priority = bt.max_delimiter_priority(exclude={id(last_leaf)})
2307     except ValueError:
2308         raise CannotSplit("No delimiters found")
2309
2310     if delimiter_priority == DOT_PRIORITY:
2311         if bt.delimiter_count_with_priority(delimiter_priority) == 1:
2312             raise CannotSplit("Splitting a single attribute from its owner looks wrong")
2313
2314     current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2315     lowest_depth = sys.maxsize
2316     trailing_comma_safe = True
2317
2318     def append_to_line(leaf: Leaf) -> Iterator[Line]:
2319         """Append `leaf` to current line or to new line if appending impossible."""
2320         nonlocal current_line
2321         try:
2322             current_line.append_safe(leaf, preformatted=True)
2323         except ValueError as ve:
2324             yield current_line
2325
2326             current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2327             current_line.append(leaf)
2328
2329     for index, leaf in enumerate(line.leaves):
2330         yield from append_to_line(leaf)
2331
2332         for comment_after in line.comments_after(leaf, index):
2333             yield from append_to_line(comment_after)
2334
2335         lowest_depth = min(lowest_depth, leaf.bracket_depth)
2336         if leaf.bracket_depth == lowest_depth and is_vararg(
2337             leaf, within=VARARGS_PARENTS
2338         ):
2339             trailing_comma_safe = trailing_comma_safe and py36
2340         leaf_priority = bt.delimiters.get(id(leaf))
2341         if leaf_priority == delimiter_priority:
2342             yield current_line
2343
2344             current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2345     if current_line:
2346         if (
2347             trailing_comma_safe
2348             and delimiter_priority == COMMA_PRIORITY
2349             and current_line.leaves[-1].type != token.COMMA
2350             and current_line.leaves[-1].type != STANDALONE_COMMENT
2351         ):
2352             current_line.append(Leaf(token.COMMA, ","))
2353         yield current_line
2354
2355
2356 @dont_increase_indentation
2357 def standalone_comment_split(line: Line, py36: bool = False) -> Iterator[Line]:
2358     """Split standalone comments from the rest of the line."""
2359     if not line.contains_standalone_comments(0):
2360         raise CannotSplit("Line does not have any standalone comments")
2361
2362     current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2363
2364     def append_to_line(leaf: Leaf) -> Iterator[Line]:
2365         """Append `leaf` to current line or to new line if appending impossible."""
2366         nonlocal current_line
2367         try:
2368             current_line.append_safe(leaf, preformatted=True)
2369         except ValueError as ve:
2370             yield current_line
2371
2372             current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2373             current_line.append(leaf)
2374
2375     for index, leaf in enumerate(line.leaves):
2376         yield from append_to_line(leaf)
2377
2378         for comment_after in line.comments_after(leaf, index):
2379             yield from append_to_line(comment_after)
2380
2381     if current_line:
2382         yield current_line
2383
2384
2385 def is_import(leaf: Leaf) -> bool:
2386     """Return True if the given leaf starts an import statement."""
2387     p = leaf.parent
2388     t = leaf.type
2389     v = leaf.value
2390     return bool(
2391         t == token.NAME
2392         and (
2393             (v == "import" and p and p.type == syms.import_name)
2394             or (v == "from" and p and p.type == syms.import_from)
2395         )
2396     )
2397
2398
2399 def normalize_prefix(leaf: Leaf, *, inside_brackets: bool) -> None:
2400     """Leave existing extra newlines if not `inside_brackets`. Remove everything
2401     else.
2402
2403     Note: don't use backslashes for formatting or you'll lose your voting rights.
2404     """
2405     if not inside_brackets:
2406         spl = leaf.prefix.split("#")
2407         if "\\" not in spl[0]:
2408             nl_count = spl[-1].count("\n")
2409             if len(spl) > 1:
2410                 nl_count -= 1
2411             leaf.prefix = "\n" * nl_count
2412             return
2413
2414     leaf.prefix = ""
2415
2416
2417 def normalize_string_prefix(leaf: Leaf, remove_u_prefix: bool = False) -> None:
2418     """Make all string prefixes lowercase.
2419
2420     If remove_u_prefix is given, also removes any u prefix from the string.
2421
2422     Note: Mutates its argument.
2423     """
2424     match = re.match(r"^([furbFURB]*)(.*)$", leaf.value, re.DOTALL)
2425     assert match is not None, f"failed to match string {leaf.value!r}"
2426     orig_prefix = match.group(1)
2427     new_prefix = orig_prefix.lower()
2428     if remove_u_prefix:
2429         new_prefix = new_prefix.replace("u", "")
2430     leaf.value = f"{new_prefix}{match.group(2)}"
2431
2432
2433 def normalize_string_quotes(leaf: Leaf) -> None:
2434     """Prefer double quotes but only if it doesn't cause more escaping.
2435
2436     Adds or removes backslashes as appropriate. Doesn't parse and fix
2437     strings nested in f-strings (yet).
2438
2439     Note: Mutates its argument.
2440     """
2441     value = leaf.value.lstrip("furbFURB")
2442     if value[:3] == '"""':
2443         return
2444
2445     elif value[:3] == "'''":
2446         orig_quote = "'''"
2447         new_quote = '"""'
2448     elif value[0] == '"':
2449         orig_quote = '"'
2450         new_quote = "'"
2451     else:
2452         orig_quote = "'"
2453         new_quote = '"'
2454     first_quote_pos = leaf.value.find(orig_quote)
2455     if first_quote_pos == -1:
2456         return  # There's an internal error
2457
2458     prefix = leaf.value[:first_quote_pos]
2459     unescaped_new_quote = re.compile(rf"(([^\\]|^)(\\\\)*){new_quote}")
2460     escaped_new_quote = re.compile(rf"([^\\]|^)\\((?:\\\\)*){new_quote}")
2461     escaped_orig_quote = re.compile(rf"([^\\]|^)\\((?:\\\\)*){orig_quote}")
2462     body = leaf.value[first_quote_pos + len(orig_quote) : -len(orig_quote)]
2463     if "r" in prefix.casefold():
2464         if unescaped_new_quote.search(body):
2465             # There's at least one unescaped new_quote in this raw string
2466             # so converting is impossible
2467             return
2468
2469         # Do not introduce or remove backslashes in raw strings
2470         new_body = body
2471     else:
2472         # remove unnecessary escapes
2473         new_body = sub_twice(escaped_new_quote, rf"\1\2{new_quote}", body)
2474         if body != new_body:
2475             # Consider the string without unnecessary escapes as the original
2476             body = new_body
2477             leaf.value = f"{prefix}{orig_quote}{body}{orig_quote}"
2478         new_body = sub_twice(escaped_orig_quote, rf"\1\2{orig_quote}", new_body)
2479         new_body = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_body)
2480     if "f" in prefix.casefold():
2481         matches = re.findall(r"[^{]\{(.*?)\}[^}]", new_body)
2482         for m in matches:
2483             if "\\" in str(m):
2484                 # Do not introduce backslashes in interpolated expressions
2485                 return
2486     if new_quote == '"""' and new_body[-1:] == '"':
2487         # edge case:
2488         new_body = new_body[:-1] + '\\"'
2489     orig_escape_count = body.count("\\")
2490     new_escape_count = new_body.count("\\")
2491     if new_escape_count > orig_escape_count:
2492         return  # Do not introduce more escaping
2493
2494     if new_escape_count == orig_escape_count and orig_quote == '"':
2495         return  # Prefer double quotes
2496
2497     leaf.value = f"{prefix}{new_quote}{new_body}{new_quote}"
2498
2499
2500 def normalize_numeric_literal(leaf: Leaf, allow_underscores: bool) -> None:
2501     """Normalizes numeric (float, int, and complex) literals."""
2502     # We want all letters (e in exponents, j in complex literals, a-f
2503     # in hex literals) to be lowercase.
2504     text = leaf.value.lower()
2505     if text.startswith(("0o", "0x", "0b")):
2506         # Leave octal, hex, and binary literals alone for now.
2507         pass
2508     elif "e" in text:
2509         before, after = text.split("e")
2510         if after.startswith("-"):
2511             after = after[1:]
2512             sign = "-"
2513         elif after.startswith("+"):
2514             after = after[1:]
2515             sign = ""
2516         else:
2517             sign = ""
2518         before = format_float_or_int_string(before, allow_underscores)
2519         after = format_int_string(after, allow_underscores)
2520         text = f"{before}e{sign}{after}"
2521     # Complex numbers and Python 2 longs
2522     elif "j" in text or "l" in text:
2523         number = text[:-1]
2524         suffix = text[-1]
2525         text = f"{format_float_or_int_string(number, allow_underscores)}{suffix}"
2526     else:
2527         text = format_float_or_int_string(text, allow_underscores)
2528     leaf.value = text
2529
2530
2531 def format_float_or_int_string(text: str, allow_underscores: bool) -> str:
2532     """Formats a float string like "1.0"."""
2533     if "." not in text:
2534         return format_int_string(text, allow_underscores)
2535     before, after = text.split(".")
2536     before = format_int_string(before, allow_underscores) if before else "0"
2537     after = format_int_string(after, allow_underscores) if after else "0"
2538     return f"{before}.{after}"
2539
2540
2541 def format_int_string(text: str, allow_underscores: bool) -> str:
2542     """Normalizes underscores in a string to e.g. 1_000_000.
2543
2544     Input must be a string consisting only of digits and underscores.
2545     """
2546     if not allow_underscores:
2547         return text
2548     text = text.replace("_", "")
2549     if len(text) <= 6:
2550         # No underscores for numbers <= 6 digits long.
2551         return text
2552     return format(int(text), "3_")
2553
2554
2555 def normalize_invisible_parens(node: Node, parens_after: Set[str]) -> None:
2556     """Make existing optional parentheses invisible or create new ones.
2557
2558     `parens_after` is a set of string leaf values immeditely after which parens
2559     should be put.
2560
2561     Standardizes on visible parentheses for single-element tuples, and keeps
2562     existing visible parentheses for other tuples and generator expressions.
2563     """
2564     for pc in list_comments(node.prefix, is_endmarker=False):
2565         if pc.value in FMT_OFF:
2566             # This `node` has a prefix with `# fmt: off`, don't mess with parens.
2567             return
2568
2569     check_lpar = False
2570     for index, child in enumerate(list(node.children)):
2571         if check_lpar:
2572             if child.type == syms.atom:
2573                 maybe_make_parens_invisible_in_atom(child)
2574             elif is_one_tuple(child):
2575                 # wrap child in visible parentheses
2576                 lpar = Leaf(token.LPAR, "(")
2577                 rpar = Leaf(token.RPAR, ")")
2578                 child.remove()
2579                 node.insert_child(index, Node(syms.atom, [lpar, child, rpar]))
2580             elif node.type == syms.import_from:
2581                 # "import from" nodes store parentheses directly as part of
2582                 # the statement
2583                 if child.type == token.LPAR:
2584                     # make parentheses invisible
2585                     child.value = ""  # type: ignore
2586                     node.children[-1].value = ""  # type: ignore
2587                 elif child.type != token.STAR:
2588                     # insert invisible parentheses
2589                     node.insert_child(index, Leaf(token.LPAR, ""))
2590                     node.append_child(Leaf(token.RPAR, ""))
2591                 break
2592
2593             elif not (isinstance(child, Leaf) and is_multiline_string(child)):
2594                 # wrap child in invisible parentheses
2595                 lpar = Leaf(token.LPAR, "")
2596                 rpar = Leaf(token.RPAR, "")
2597                 index = child.remove() or 0
2598                 node.insert_child(index, Node(syms.atom, [lpar, child, rpar]))
2599
2600         check_lpar = isinstance(child, Leaf) and child.value in parens_after
2601
2602
2603 def normalize_fmt_off(node: Node) -> None:
2604     """Convert content between `# fmt: off`/`# fmt: on` into standalone comments."""
2605     try_again = True
2606     while try_again:
2607         try_again = convert_one_fmt_off_pair(node)
2608
2609
2610 def convert_one_fmt_off_pair(node: Node) -> bool:
2611     """Convert content of a single `# fmt: off`/`# fmt: on` into a standalone comment.
2612
2613     Returns True if a pair was converted.
2614     """
2615     for leaf in node.leaves():
2616         previous_consumed = 0
2617         for comment in list_comments(leaf.prefix, is_endmarker=False):
2618             if comment.value in FMT_OFF:
2619                 # We only want standalone comments. If there's no previous leaf or
2620                 # the previous leaf is indentation, it's a standalone comment in
2621                 # disguise.
2622                 if comment.type != STANDALONE_COMMENT:
2623                     prev = preceding_leaf(leaf)
2624                     if prev and prev.type not in WHITESPACE:
2625                         continue
2626
2627                 ignored_nodes = list(generate_ignored_nodes(leaf))
2628                 if not ignored_nodes:
2629                     continue
2630
2631                 first = ignored_nodes[0]  # Can be a container node with the `leaf`.
2632                 parent = first.parent
2633                 prefix = first.prefix
2634                 first.prefix = prefix[comment.consumed :]
2635                 hidden_value = (
2636                     comment.value + "\n" + "".join(str(n) for n in ignored_nodes)
2637                 )
2638                 if hidden_value.endswith("\n"):
2639                     # That happens when one of the `ignored_nodes` ended with a NEWLINE
2640                     # leaf (possibly followed by a DEDENT).
2641                     hidden_value = hidden_value[:-1]
2642                 first_idx = None
2643                 for ignored in ignored_nodes:
2644                     index = ignored.remove()
2645                     if first_idx is None:
2646                         first_idx = index
2647                 assert parent is not None, "INTERNAL ERROR: fmt: on/off handling (1)"
2648                 assert first_idx is not None, "INTERNAL ERROR: fmt: on/off handling (2)"
2649                 parent.insert_child(
2650                     first_idx,
2651                     Leaf(
2652                         STANDALONE_COMMENT,
2653                         hidden_value,
2654                         prefix=prefix[:previous_consumed] + "\n" * comment.newlines,
2655                     ),
2656                 )
2657                 return True
2658
2659             previous_consumed = comment.consumed
2660
2661     return False
2662
2663
2664 def generate_ignored_nodes(leaf: Leaf) -> Iterator[LN]:
2665     """Starting from the container of `leaf`, generate all leaves until `# fmt: on`.
2666
2667     Stops at the end of the block.
2668     """
2669     container: Optional[LN] = container_of(leaf)
2670     while container is not None and container.type != token.ENDMARKER:
2671         for comment in list_comments(container.prefix, is_endmarker=False):
2672             if comment.value in FMT_ON:
2673                 return
2674
2675         yield container
2676
2677         container = container.next_sibling
2678
2679
2680 def maybe_make_parens_invisible_in_atom(node: LN) -> bool:
2681     """If it's safe, make the parens in the atom `node` invisible, recursively."""
2682     if (
2683         node.type != syms.atom
2684         or is_empty_tuple(node)
2685         or is_one_tuple(node)
2686         or is_yield(node)
2687         or max_delimiter_priority_in_atom(node) >= COMMA_PRIORITY
2688     ):
2689         return False
2690
2691     first = node.children[0]
2692     last = node.children[-1]
2693     if first.type == token.LPAR and last.type == token.RPAR:
2694         # make parentheses invisible
2695         first.value = ""  # type: ignore
2696         last.value = ""  # type: ignore
2697         if len(node.children) > 1:
2698             maybe_make_parens_invisible_in_atom(node.children[1])
2699         return True
2700
2701     return False
2702
2703
2704 def is_empty_tuple(node: LN) -> bool:
2705     """Return True if `node` holds an empty tuple."""
2706     return (
2707         node.type == syms.atom
2708         and len(node.children) == 2
2709         and node.children[0].type == token.LPAR
2710         and node.children[1].type == token.RPAR
2711     )
2712
2713
2714 def is_one_tuple(node: LN) -> bool:
2715     """Return True if `node` holds a tuple with one element, with or without parens."""
2716     if node.type == syms.atom:
2717         if len(node.children) != 3:
2718             return False
2719
2720         lpar, gexp, rpar = node.children
2721         if not (
2722             lpar.type == token.LPAR
2723             and gexp.type == syms.testlist_gexp
2724             and rpar.type == token.RPAR
2725         ):
2726             return False
2727
2728         return len(gexp.children) == 2 and gexp.children[1].type == token.COMMA
2729
2730     return (
2731         node.type in IMPLICIT_TUPLE
2732         and len(node.children) == 2
2733         and node.children[1].type == token.COMMA
2734     )
2735
2736
2737 def is_yield(node: LN) -> bool:
2738     """Return True if `node` holds a `yield` or `yield from` expression."""
2739     if node.type == syms.yield_expr:
2740         return True
2741
2742     if node.type == token.NAME and node.value == "yield":  # type: ignore
2743         return True
2744
2745     if node.type != syms.atom:
2746         return False
2747
2748     if len(node.children) != 3:
2749         return False
2750
2751     lpar, expr, rpar = node.children
2752     if lpar.type == token.LPAR and rpar.type == token.RPAR:
2753         return is_yield(expr)
2754
2755     return False
2756
2757
2758 def is_vararg(leaf: Leaf, within: Set[NodeType]) -> bool:
2759     """Return True if `leaf` is a star or double star in a vararg or kwarg.
2760
2761     If `within` includes VARARGS_PARENTS, this applies to function signatures.
2762     If `within` includes UNPACKING_PARENTS, it applies to right hand-side
2763     extended iterable unpacking (PEP 3132) and additional unpacking
2764     generalizations (PEP 448).
2765     """
2766     if leaf.type not in STARS or not leaf.parent:
2767         return False
2768
2769     p = leaf.parent
2770     if p.type == syms.star_expr:
2771         # Star expressions are also used as assignment targets in extended
2772         # iterable unpacking (PEP 3132).  See what its parent is instead.
2773         if not p.parent:
2774             return False
2775
2776         p = p.parent
2777
2778     return p.type in within
2779
2780
2781 def is_multiline_string(leaf: Leaf) -> bool:
2782     """Return True if `leaf` is a multiline string that actually spans many lines."""
2783     value = leaf.value.lstrip("furbFURB")
2784     return value[:3] in {'"""', "'''"} and "\n" in value
2785
2786
2787 def is_stub_suite(node: Node) -> bool:
2788     """Return True if `node` is a suite with a stub body."""
2789     if (
2790         len(node.children) != 4
2791         or node.children[0].type != token.NEWLINE
2792         or node.children[1].type != token.INDENT
2793         or node.children[3].type != token.DEDENT
2794     ):
2795         return False
2796
2797     return is_stub_body(node.children[2])
2798
2799
2800 def is_stub_body(node: LN) -> bool:
2801     """Return True if `node` is a simple statement containing an ellipsis."""
2802     if not isinstance(node, Node) or node.type != syms.simple_stmt:
2803         return False
2804
2805     if len(node.children) != 2:
2806         return False
2807
2808     child = node.children[0]
2809     return (
2810         child.type == syms.atom
2811         and len(child.children) == 3
2812         and all(leaf == Leaf(token.DOT, ".") for leaf in child.children)
2813     )
2814
2815
2816 def max_delimiter_priority_in_atom(node: LN) -> int:
2817     """Return maximum delimiter priority inside `node`.
2818
2819     This is specific to atoms with contents contained in a pair of parentheses.
2820     If `node` isn't an atom or there are no enclosing parentheses, returns 0.
2821     """
2822     if node.type != syms.atom:
2823         return 0
2824
2825     first = node.children[0]
2826     last = node.children[-1]
2827     if not (first.type == token.LPAR and last.type == token.RPAR):
2828         return 0
2829
2830     bt = BracketTracker()
2831     for c in node.children[1:-1]:
2832         if isinstance(c, Leaf):
2833             bt.mark(c)
2834         else:
2835             for leaf in c.leaves():
2836                 bt.mark(leaf)
2837     try:
2838         return bt.max_delimiter_priority()
2839
2840     except ValueError:
2841         return 0
2842
2843
2844 def ensure_visible(leaf: Leaf) -> None:
2845     """Make sure parentheses are visible.
2846
2847     They could be invisible as part of some statements (see
2848     :func:`normalize_invible_parens` and :func:`visit_import_from`).
2849     """
2850     if leaf.type == token.LPAR:
2851         leaf.value = "("
2852     elif leaf.type == token.RPAR:
2853         leaf.value = ")"
2854
2855
2856 def should_explode(line: Line, opening_bracket: Leaf) -> bool:
2857     """Should `line` immediately be split with `delimiter_split()` after RHS?"""
2858     if not (
2859         opening_bracket.parent
2860         and opening_bracket.parent.type in {syms.atom, syms.import_from}
2861         and opening_bracket.value in "[{("
2862     ):
2863         return False
2864
2865     try:
2866         last_leaf = line.leaves[-1]
2867         exclude = {id(last_leaf)} if last_leaf.type == token.COMMA else set()
2868         max_priority = line.bracket_tracker.max_delimiter_priority(exclude=exclude)
2869     except (IndexError, ValueError):
2870         return False
2871
2872     return max_priority == COMMA_PRIORITY
2873
2874
2875 def is_python36(node: Node) -> bool:
2876     """Return True if the current file is using Python 3.6+ features.
2877
2878     Currently looking for:
2879     - f-strings; and
2880     - trailing commas after * or ** in function signatures and calls.
2881     """
2882     for n in node.pre_order():
2883         if n.type == token.STRING:
2884             value_head = n.value[:2]  # type: ignore
2885             if value_head in {'f"', 'F"', "f'", "F'", "rf", "fr", "RF", "FR"}:
2886                 return True
2887
2888         elif (
2889             n.type in {syms.typedargslist, syms.arglist}
2890             and n.children
2891             and n.children[-1].type == token.COMMA
2892         ):
2893             for ch in n.children:
2894                 if ch.type in STARS:
2895                     return True
2896
2897                 if ch.type == syms.argument:
2898                     for argch in ch.children:
2899                         if argch.type in STARS:
2900                             return True
2901
2902     return False
2903
2904
2905 def generate_trailers_to_omit(line: Line, line_length: int) -> Iterator[Set[LeafID]]:
2906     """Generate sets of closing bracket IDs that should be omitted in a RHS.
2907
2908     Brackets can be omitted if the entire trailer up to and including
2909     a preceding closing bracket fits in one line.
2910
2911     Yielded sets are cumulative (contain results of previous yields, too).  First
2912     set is empty.
2913     """
2914
2915     omit: Set[LeafID] = set()
2916     yield omit
2917
2918     length = 4 * line.depth
2919     opening_bracket = None
2920     closing_bracket = None
2921     optional_brackets: Set[LeafID] = set()
2922     inner_brackets: Set[LeafID] = set()
2923     for index, leaf, leaf_length in enumerate_with_length(line, reversed=True):
2924         length += leaf_length
2925         if length > line_length:
2926             break
2927
2928         has_inline_comment = leaf_length > len(leaf.value) + len(leaf.prefix)
2929         if leaf.type == STANDALONE_COMMENT or has_inline_comment:
2930             break
2931
2932         optional_brackets.discard(id(leaf))
2933         if opening_bracket:
2934             if leaf is opening_bracket:
2935                 opening_bracket = None
2936             elif leaf.type in CLOSING_BRACKETS:
2937                 inner_brackets.add(id(leaf))
2938         elif leaf.type in CLOSING_BRACKETS:
2939             if not leaf.value:
2940                 optional_brackets.add(id(opening_bracket))
2941                 continue
2942
2943             if index > 0 and line.leaves[index - 1].type in OPENING_BRACKETS:
2944                 # Empty brackets would fail a split so treat them as "inner"
2945                 # brackets (e.g. only add them to the `omit` set if another
2946                 # pair of brackets was good enough.
2947                 inner_brackets.add(id(leaf))
2948                 continue
2949
2950             opening_bracket = leaf.opening_bracket
2951             if closing_bracket:
2952                 omit.add(id(closing_bracket))
2953                 omit.update(inner_brackets)
2954                 inner_brackets.clear()
2955                 yield omit
2956             closing_bracket = leaf
2957
2958
2959 def get_future_imports(node: Node) -> Set[str]:
2960     """Return a set of __future__ imports in the file."""
2961     imports: Set[str] = set()
2962
2963     def get_imports_from_children(children: List[LN]) -> Generator[str, None, None]:
2964         for child in children:
2965             if isinstance(child, Leaf):
2966                 if child.type == token.NAME:
2967                     yield child.value
2968             elif child.type == syms.import_as_name:
2969                 orig_name = child.children[0]
2970                 assert isinstance(orig_name, Leaf), "Invalid syntax parsing imports"
2971                 assert orig_name.type == token.NAME, "Invalid syntax parsing imports"
2972                 yield orig_name.value
2973             elif child.type == syms.import_as_names:
2974                 yield from get_imports_from_children(child.children)
2975             else:
2976                 assert False, "Invalid syntax parsing imports"
2977
2978     for child in node.children:
2979         if child.type != syms.simple_stmt:
2980             break
2981         first_child = child.children[0]
2982         if isinstance(first_child, Leaf):
2983             # Continue looking if we see a docstring; otherwise stop.
2984             if (
2985                 len(child.children) == 2
2986                 and first_child.type == token.STRING
2987                 and child.children[1].type == token.NEWLINE
2988             ):
2989                 continue
2990             else:
2991                 break
2992         elif first_child.type == syms.import_from:
2993             module_name = first_child.children[1]
2994             if not isinstance(module_name, Leaf) or module_name.value != "__future__":
2995                 break
2996             imports |= set(get_imports_from_children(first_child.children[3:]))
2997         else:
2998             break
2999     return imports
3000
3001
3002 def gen_python_files_in_dir(
3003     path: Path,
3004     root: Path,
3005     include: Pattern[str],
3006     exclude: Pattern[str],
3007     report: "Report",
3008 ) -> Iterator[Path]:
3009     """Generate all files under `path` whose paths are not excluded by the
3010     `exclude` regex, but are included by the `include` regex.
3011
3012     Symbolic links pointing outside of the `root` directory are ignored.
3013
3014     `report` is where output about exclusions goes.
3015     """
3016     assert root.is_absolute(), f"INTERNAL ERROR: `root` must be absolute but is {root}"
3017     for child in path.iterdir():
3018         try:
3019             normalized_path = "/" + child.resolve().relative_to(root).as_posix()
3020         except ValueError:
3021             if child.is_symlink():
3022                 report.path_ignored(
3023                     child, f"is a symbolic link that points outside {root}"
3024                 )
3025                 continue
3026
3027             raise
3028
3029         if child.is_dir():
3030             normalized_path += "/"
3031         exclude_match = exclude.search(normalized_path)
3032         if exclude_match and exclude_match.group(0):
3033             report.path_ignored(child, f"matches the --exclude regular expression")
3034             continue
3035
3036         if child.is_dir():
3037             yield from gen_python_files_in_dir(child, root, include, exclude, report)
3038
3039         elif child.is_file():
3040             include_match = include.search(normalized_path)
3041             if include_match:
3042                 yield child
3043
3044
3045 @lru_cache()
3046 def find_project_root(srcs: Iterable[str]) -> Path:
3047     """Return a directory containing .git, .hg, or pyproject.toml.
3048
3049     That directory can be one of the directories passed in `srcs` or their
3050     common parent.
3051
3052     If no directory in the tree contains a marker that would specify it's the
3053     project root, the root of the file system is returned.
3054     """
3055     if not srcs:
3056         return Path("/").resolve()
3057
3058     common_base = min(Path(src).resolve() for src in srcs)
3059     if common_base.is_dir():
3060         # Append a fake file so `parents` below returns `common_base_dir`, too.
3061         common_base /= "fake-file"
3062     for directory in common_base.parents:
3063         if (directory / ".git").is_dir():
3064             return directory
3065
3066         if (directory / ".hg").is_dir():
3067             return directory
3068
3069         if (directory / "pyproject.toml").is_file():
3070             return directory
3071
3072     return directory
3073
3074
3075 @dataclass
3076 class Report:
3077     """Provides a reformatting counter. Can be rendered with `str(report)`."""
3078
3079     check: bool = False
3080     quiet: bool = False
3081     verbose: bool = False
3082     change_count: int = 0
3083     same_count: int = 0
3084     failure_count: int = 0
3085
3086     def done(self, src: Path, changed: Changed) -> None:
3087         """Increment the counter for successful reformatting. Write out a message."""
3088         if changed is Changed.YES:
3089             reformatted = "would reformat" if self.check else "reformatted"
3090             if self.verbose or not self.quiet:
3091                 out(f"{reformatted} {src}")
3092             self.change_count += 1
3093         else:
3094             if self.verbose:
3095                 if changed is Changed.NO:
3096                     msg = f"{src} already well formatted, good job."
3097                 else:
3098                     msg = f"{src} wasn't modified on disk since last run."
3099                 out(msg, bold=False)
3100             self.same_count += 1
3101
3102     def failed(self, src: Path, message: str) -> None:
3103         """Increment the counter for failed reformatting. Write out a message."""
3104         err(f"error: cannot format {src}: {message}")
3105         self.failure_count += 1
3106
3107     def path_ignored(self, path: Path, message: str) -> None:
3108         if self.verbose:
3109             out(f"{path} ignored: {message}", bold=False)
3110
3111     @property
3112     def return_code(self) -> int:
3113         """Return the exit code that the app should use.
3114
3115         This considers the current state of changed files and failures:
3116         - if there were any failures, return 123;
3117         - if any files were changed and --check is being used, return 1;
3118         - otherwise return 0.
3119         """
3120         # According to http://tldp.org/LDP/abs/html/exitcodes.html starting with
3121         # 126 we have special returncodes reserved by the shell.
3122         if self.failure_count:
3123             return 123
3124
3125         elif self.change_count and self.check:
3126             return 1
3127
3128         return 0
3129
3130     def __str__(self) -> str:
3131         """Render a color report of the current state.
3132
3133         Use `click.unstyle` to remove colors.
3134         """
3135         if self.check:
3136             reformatted = "would be reformatted"
3137             unchanged = "would be left unchanged"
3138             failed = "would fail to reformat"
3139         else:
3140             reformatted = "reformatted"
3141             unchanged = "left unchanged"
3142             failed = "failed to reformat"
3143         report = []
3144         if self.change_count:
3145             s = "s" if self.change_count > 1 else ""
3146             report.append(
3147                 click.style(f"{self.change_count} file{s} {reformatted}", bold=True)
3148             )
3149         if self.same_count:
3150             s = "s" if self.same_count > 1 else ""
3151             report.append(f"{self.same_count} file{s} {unchanged}")
3152         if self.failure_count:
3153             s = "s" if self.failure_count > 1 else ""
3154             report.append(
3155                 click.style(f"{self.failure_count} file{s} {failed}", fg="red")
3156             )
3157         return ", ".join(report) + "."
3158
3159
3160 def assert_equivalent(src: str, dst: str) -> None:
3161     """Raise AssertionError if `src` and `dst` aren't equivalent."""
3162
3163     import ast
3164     import traceback
3165
3166     def _v(node: ast.AST, depth: int = 0) -> Iterator[str]:
3167         """Simple visitor generating strings to compare ASTs by content."""
3168         yield f"{'  ' * depth}{node.__class__.__name__}("
3169
3170         for field in sorted(node._fields):
3171             try:
3172                 value = getattr(node, field)
3173             except AttributeError:
3174                 continue
3175
3176             yield f"{'  ' * (depth+1)}{field}="
3177
3178             if isinstance(value, list):
3179                 for item in value:
3180                     if isinstance(item, ast.AST):
3181                         yield from _v(item, depth + 2)
3182
3183             elif isinstance(value, ast.AST):
3184                 yield from _v(value, depth + 2)
3185
3186             else:
3187                 yield f"{'  ' * (depth+2)}{value!r},  # {value.__class__.__name__}"
3188
3189         yield f"{'  ' * depth})  # /{node.__class__.__name__}"
3190
3191     try:
3192         src_ast = ast.parse(src)
3193     except Exception as exc:
3194         major, minor = sys.version_info[:2]
3195         raise AssertionError(
3196             f"cannot use --safe with this file; failed to parse source file "
3197             f"with Python {major}.{minor}'s builtin AST. Re-run with --fast "
3198             f"or stop using deprecated Python 2 syntax. AST error message: {exc}"
3199         )
3200
3201     try:
3202         dst_ast = ast.parse(dst)
3203     except Exception as exc:
3204         log = dump_to_file("".join(traceback.format_tb(exc.__traceback__)), dst)
3205         raise AssertionError(
3206             f"INTERNAL ERROR: Black produced invalid code: {exc}. "
3207             f"Please report a bug on https://github.com/ambv/black/issues.  "
3208             f"This invalid output might be helpful: {log}"
3209         ) from None
3210
3211     src_ast_str = "\n".join(_v(src_ast))
3212     dst_ast_str = "\n".join(_v(dst_ast))
3213     if src_ast_str != dst_ast_str:
3214         log = dump_to_file(diff(src_ast_str, dst_ast_str, "src", "dst"))
3215         raise AssertionError(
3216             f"INTERNAL ERROR: Black produced code that is not equivalent to "
3217             f"the source.  "
3218             f"Please report a bug on https://github.com/ambv/black/issues.  "
3219             f"This diff might be helpful: {log}"
3220         ) from None
3221
3222
3223 def assert_stable(
3224     src: str, dst: str, line_length: int, mode: FileMode = FileMode.AUTO_DETECT
3225 ) -> None:
3226     """Raise AssertionError if `dst` reformats differently the second time."""
3227     newdst = format_str(dst, line_length=line_length, mode=mode)
3228     if dst != newdst:
3229         log = dump_to_file(
3230             diff(src, dst, "source", "first pass"),
3231             diff(dst, newdst, "first pass", "second pass"),
3232         )
3233         raise AssertionError(
3234             f"INTERNAL ERROR: Black produced different code on the second pass "
3235             f"of the formatter.  "
3236             f"Please report a bug on https://github.com/ambv/black/issues.  "
3237             f"This diff might be helpful: {log}"
3238         ) from None
3239
3240
3241 def dump_to_file(*output: str) -> str:
3242     """Dump `output` to a temporary file. Return path to the file."""
3243     import tempfile
3244
3245     with tempfile.NamedTemporaryFile(
3246         mode="w", prefix="blk_", suffix=".log", delete=False, encoding="utf8"
3247     ) as f:
3248         for lines in output:
3249             f.write(lines)
3250             if lines and lines[-1] != "\n":
3251                 f.write("\n")
3252     return f.name
3253
3254
3255 def diff(a: str, b: str, a_name: str, b_name: str) -> str:
3256     """Return a unified diff string between strings `a` and `b`."""
3257     import difflib
3258
3259     a_lines = [line + "\n" for line in a.split("\n")]
3260     b_lines = [line + "\n" for line in b.split("\n")]
3261     return "".join(
3262         difflib.unified_diff(a_lines, b_lines, fromfile=a_name, tofile=b_name, n=5)
3263     )
3264
3265
3266 def cancel(tasks: Iterable[asyncio.Task]) -> None:
3267     """asyncio signal handler that cancels all `tasks` and reports to stderr."""
3268     err("Aborted!")
3269     for task in tasks:
3270         task.cancel()
3271
3272
3273 def shutdown(loop: BaseEventLoop) -> None:
3274     """Cancel all pending tasks on `loop`, wait for them, and close the loop."""
3275     try:
3276         # This part is borrowed from asyncio/runners.py in Python 3.7b2.
3277         to_cancel = [task for task in asyncio.Task.all_tasks(loop) if not task.done()]
3278         if not to_cancel:
3279             return
3280
3281         for task in to_cancel:
3282             task.cancel()
3283         loop.run_until_complete(
3284             asyncio.gather(*to_cancel, loop=loop, return_exceptions=True)
3285         )
3286     finally:
3287         # `concurrent.futures.Future` objects cannot be cancelled once they
3288         # are already running. There might be some when the `shutdown()` happened.
3289         # Silence their logger's spew about the event loop being closed.
3290         cf_logger = logging.getLogger("concurrent.futures")
3291         cf_logger.setLevel(logging.CRITICAL)
3292         loop.close()
3293
3294
3295 def sub_twice(regex: Pattern[str], replacement: str, original: str) -> str:
3296     """Replace `regex` with `replacement` twice on `original`.
3297
3298     This is used by string normalization to perform replaces on
3299     overlapping matches.
3300     """
3301     return regex.sub(replacement, regex.sub(replacement, original))
3302
3303
3304 def re_compile_maybe_verbose(regex: str) -> Pattern[str]:
3305     """Compile a regular expression string in `regex`.
3306
3307     If it contains newlines, use verbose mode.
3308     """
3309     if "\n" in regex:
3310         regex = "(?x)" + regex
3311     return re.compile(regex)
3312
3313
3314 def enumerate_reversed(sequence: Sequence[T]) -> Iterator[Tuple[Index, T]]:
3315     """Like `reversed(enumerate(sequence))` if that were possible."""
3316     index = len(sequence) - 1
3317     for element in reversed(sequence):
3318         yield (index, element)
3319         index -= 1
3320
3321
3322 def enumerate_with_length(
3323     line: Line, reversed: bool = False
3324 ) -> Iterator[Tuple[Index, Leaf, int]]:
3325     """Return an enumeration of leaves with their length.
3326
3327     Stops prematurely on multiline strings and standalone comments.
3328     """
3329     op = cast(
3330         Callable[[Sequence[Leaf]], Iterator[Tuple[Index, Leaf]]],
3331         enumerate_reversed if reversed else enumerate,
3332     )
3333     for index, leaf in op(line.leaves):
3334         length = len(leaf.prefix) + len(leaf.value)
3335         if "\n" in leaf.value:
3336             return  # Multiline strings, we can't continue.
3337
3338         comment: Optional[Leaf]
3339         for comment in line.comments_after(leaf, index):
3340             length += len(comment.value)
3341
3342         yield index, leaf, length
3343
3344
3345 def is_line_short_enough(line: Line, *, line_length: int, line_str: str = "") -> bool:
3346     """Return True if `line` is no longer than `line_length`.
3347
3348     Uses the provided `line_str` rendering, if any, otherwise computes a new one.
3349     """
3350     if not line_str:
3351         line_str = str(line).strip("\n")
3352     return (
3353         len(line_str) <= line_length
3354         and "\n" not in line_str  # multiline strings
3355         and not line.contains_standalone_comments()
3356     )
3357
3358
3359 def can_be_split(line: Line) -> bool:
3360     """Return False if the line cannot be split *for sure*.
3361
3362     This is not an exhaustive search but a cheap heuristic that we can use to
3363     avoid some unfortunate formattings (mostly around wrapping unsplittable code
3364     in unnecessary parentheses).
3365     """
3366     leaves = line.leaves
3367     if len(leaves) < 2:
3368         return False
3369
3370     if leaves[0].type == token.STRING and leaves[1].type == token.DOT:
3371         call_count = 0
3372         dot_count = 0
3373         next = leaves[-1]
3374         for leaf in leaves[-2::-1]:
3375             if leaf.type in OPENING_BRACKETS:
3376                 if next.type not in CLOSING_BRACKETS:
3377                     return False
3378
3379                 call_count += 1
3380             elif leaf.type == token.DOT:
3381                 dot_count += 1
3382             elif leaf.type == token.NAME:
3383                 if not (next.type == token.DOT or next.type in OPENING_BRACKETS):
3384                     return False
3385
3386             elif leaf.type not in CLOSING_BRACKETS:
3387                 return False
3388
3389             if dot_count > 1 and call_count > 1:
3390                 return False
3391
3392     return True
3393
3394
3395 def can_omit_invisible_parens(line: Line, line_length: int) -> bool:
3396     """Does `line` have a shape safe to reformat without optional parens around it?
3397
3398     Returns True for only a subset of potentially nice looking formattings but
3399     the point is to not return false positives that end up producing lines that
3400     are too long.
3401     """
3402     bt = line.bracket_tracker
3403     if not bt.delimiters:
3404         # Without delimiters the optional parentheses are useless.
3405         return True
3406
3407     max_priority = bt.max_delimiter_priority()
3408     if bt.delimiter_count_with_priority(max_priority) > 1:
3409         # With more than one delimiter of a kind the optional parentheses read better.
3410         return False
3411
3412     if max_priority == DOT_PRIORITY:
3413         # A single stranded method call doesn't require optional parentheses.
3414         return True
3415
3416     assert len(line.leaves) >= 2, "Stranded delimiter"
3417
3418     first = line.leaves[0]
3419     second = line.leaves[1]
3420     penultimate = line.leaves[-2]
3421     last = line.leaves[-1]
3422
3423     # With a single delimiter, omit if the expression starts or ends with
3424     # a bracket.
3425     if first.type in OPENING_BRACKETS and second.type not in CLOSING_BRACKETS:
3426         remainder = False
3427         length = 4 * line.depth
3428         for _index, leaf, leaf_length in enumerate_with_length(line):
3429             if leaf.type in CLOSING_BRACKETS and leaf.opening_bracket is first:
3430                 remainder = True
3431             if remainder:
3432                 length += leaf_length
3433                 if length > line_length:
3434                     break
3435
3436                 if leaf.type in OPENING_BRACKETS:
3437                     # There are brackets we can further split on.
3438                     remainder = False
3439
3440         else:
3441             # checked the entire string and line length wasn't exceeded
3442             if len(line.leaves) == _index + 1:
3443                 return True
3444
3445         # Note: we are not returning False here because a line might have *both*
3446         # a leading opening bracket and a trailing closing bracket.  If the
3447         # opening bracket doesn't match our rule, maybe the closing will.
3448
3449     if (
3450         last.type == token.RPAR
3451         or last.type == token.RBRACE
3452         or (
3453             # don't use indexing for omitting optional parentheses;
3454             # it looks weird
3455             last.type == token.RSQB
3456             and last.parent
3457             and last.parent.type != syms.trailer
3458         )
3459     ):
3460         if penultimate.type in OPENING_BRACKETS:
3461             # Empty brackets don't help.
3462             return False
3463
3464         if is_multiline_string(first):
3465             # Additional wrapping of a multiline string in this situation is
3466             # unnecessary.
3467             return True
3468
3469         length = 4 * line.depth
3470         seen_other_brackets = False
3471         for _index, leaf, leaf_length in enumerate_with_length(line):
3472             length += leaf_length
3473             if leaf is last.opening_bracket:
3474                 if seen_other_brackets or length <= line_length:
3475                     return True
3476
3477             elif leaf.type in OPENING_BRACKETS:
3478                 # There are brackets we can further split on.
3479                 seen_other_brackets = True
3480
3481     return False
3482
3483
3484 def get_cache_file(line_length: int, mode: FileMode) -> Path:
3485     return CACHE_DIR / f"cache.{line_length}.{mode.value}.pickle"
3486
3487
3488 def read_cache(line_length: int, mode: FileMode) -> Cache:
3489     """Read the cache if it exists and is well formed.
3490
3491     If it is not well formed, the call to write_cache later should resolve the issue.
3492     """
3493     cache_file = get_cache_file(line_length, mode)
3494     if not cache_file.exists():
3495         return {}
3496
3497     with cache_file.open("rb") as fobj:
3498         try:
3499             cache: Cache = pickle.load(fobj)
3500         except pickle.UnpicklingError:
3501             return {}
3502
3503     return cache
3504
3505
3506 def get_cache_info(path: Path) -> CacheInfo:
3507     """Return the information used to check if a file is already formatted or not."""
3508     stat = path.stat()
3509     return stat.st_mtime, stat.st_size
3510
3511
3512 def filter_cached(cache: Cache, sources: Iterable[Path]) -> Tuple[Set[Path], Set[Path]]:
3513     """Split an iterable of paths in `sources` into two sets.
3514
3515     The first contains paths of files that modified on disk or are not in the
3516     cache. The other contains paths to non-modified files.
3517     """
3518     todo, done = set(), set()
3519     for src in sources:
3520         src = src.resolve()
3521         if cache.get(src) != get_cache_info(src):
3522             todo.add(src)
3523         else:
3524             done.add(src)
3525     return todo, done
3526
3527
3528 def write_cache(
3529     cache: Cache, sources: Iterable[Path], line_length: int, mode: FileMode
3530 ) -> None:
3531     """Update the cache file."""
3532     cache_file = get_cache_file(line_length, mode)
3533     try:
3534         if not CACHE_DIR.exists():
3535             CACHE_DIR.mkdir(parents=True)
3536         new_cache = {**cache, **{src.resolve(): get_cache_info(src) for src in sources}}
3537         with cache_file.open("wb") as fobj:
3538             pickle.dump(new_cache, fobj, protocol=pickle.HIGHEST_PROTOCOL)
3539     except OSError:
3540         pass
3541
3542
3543 def patch_click() -> None:
3544     """Make Click not crash.
3545
3546     On certain misconfigured environments, Python 3 selects the ASCII encoding as the
3547     default which restricts paths that it can access during the lifetime of the
3548     application.  Click refuses to work in this scenario by raising a RuntimeError.
3549
3550     In case of Black the likelihood that non-ASCII characters are going to be used in
3551     file paths is minimal since it's Python source code.  Moreover, this crash was
3552     spurious on Python 3.7 thanks to PEP 538 and PEP 540.
3553     """
3554     try:
3555         from click import core
3556         from click import _unicodefun  # type: ignore
3557     except ModuleNotFoundError:
3558         return
3559
3560     for module in (core, _unicodefun):
3561         if hasattr(module, "_verify_python3_env"):
3562             module._verify_python3_env = lambda: None
3563
3564
3565 if __name__ == "__main__":
3566     patch_click()
3567     main()