black.py

   1 import asyncio
   2 from asyncio.base_events import BaseEventLoop
   3 from concurrent.futures import Executor, ProcessPoolExecutor
   4 from datetime import datetime
   5 from enum import Enum, Flag
   6 from functools import lru_cache, partial, wraps
   7 import io
   8 import keyword
   9 import logging
  10 from multiprocessing import Manager
  11 import os
  12 from pathlib import Path
  13 import pickle
  14 import re
  15 import signal
  16 import sys
  17 import tokenize
  18 from typing import (
  19     Any,
  20     Callable,
  21     Collection,
  22     Dict,
  23     Generator,
  24     Generic,
  25     Iterable,
  26     Iterator,
  27     List,
  28     Optional,
  29     Pattern,
  30     Sequence,
  31     Set,
  32     Tuple,
  33     TypeVar,
  34     Union,
  35     cast,
  36 )
  37
  38 from appdirs import user_cache_dir
  39 from attr import dataclass, Factory
  40 import click
  41 import toml
  42
  43 # lib2to3 fork
  44 from blib2to3.pytree import Node, Leaf, type_repr
  45 from blib2to3 import pygram, pytree
  46 from blib2to3.pgen2 import driver, token
  47 from blib2to3.pgen2.parse import ParseError
  48
  49
  50 __version__ = "18.6b4"
  51 DEFAULT_LINE_LENGTH = 88
  52 DEFAULT_EXCLUDES = (
  53     r"/(\.git|\.hg|\.mypy_cache|\.tox|\.venv|_build|buck-out|build|dist)/"
  54 )
  55 DEFAULT_INCLUDES = r"\.pyi?$"
  56 CACHE_DIR = Path(user_cache_dir("black", version=__version__))
  57
  58
  59 # types
  60 FileContent = str
  61 Encoding = str
  62 NewLine = str
  63 Depth = int
  64 NodeType = int
  65 LeafID = int
  66 Priority = int
  67 Index = int
  68 LN = Union[Leaf, Node]
  69 SplitFunc = Callable[["Line", bool], Iterator["Line"]]
  70 Timestamp = float
  71 FileSize = int
  72 CacheInfo = Tuple[Timestamp, FileSize]
  73 Cache = Dict[Path, CacheInfo]
  74 out = partial(click.secho, bold=True, err=True)
  75 err = partial(click.secho, fg="red", err=True)
  76
  77 pygram.initialize(CACHE_DIR)
  78 syms = pygram.python_symbols
  79
  80
  81 class NothingChanged(UserWarning):
  82     """Raised by :func:`format_file` when reformatted code is the same as source."""
  83
  84
  85 class CannotSplit(Exception):
  86     """A readable split that fits the allotted line length is impossible.
  87
  88     Raised by :func:`left_hand_split`, :func:`right_hand_split`, and
  89     :func:`delimiter_split`.
  90     """
  91
  92
  93 class WriteBack(Enum):
  94     NO = 0
  95     YES = 1
  96     DIFF = 2
  97     CHECK = 3
  98
  99     @classmethod
 100     def from_configuration(cls, *, check: bool, diff: bool) -> "WriteBack":
 101         if check and not diff:
 102             return cls.CHECK
 103
 104         return cls.DIFF if diff else cls.YES
 105
 106
 107 class Changed(Enum):
 108     NO = 0
 109     CACHED = 1
 110     YES = 2
 111
 112
 113 class FileMode(Flag):
 114     AUTO_DETECT = 0
 115     PYTHON36 = 1
 116     PYI = 2
 117     NO_STRING_NORMALIZATION = 4
 118
 119     @classmethod
 120     def from_configuration(
 121         cls, *, py36: bool, pyi: bool, skip_string_normalization: bool
 122     ) -> "FileMode":
 123         mode = cls.AUTO_DETECT
 124         if py36:
 125             mode |= cls.PYTHON36
 126         if pyi:
 127             mode |= cls.PYI
 128         if skip_string_normalization:
 129             mode |= cls.NO_STRING_NORMALIZATION
 130         return mode
 131
 132
 133 def read_pyproject_toml(
 134     ctx: click.Context, param: click.Parameter, value: Union[str, int, bool, None]
 135 ) -> Optional[str]:
 136     """Inject Black configuration from "pyproject.toml" into defaults in `ctx`.
 137
 138     Returns the path to a successfully found and read configuration file, None
 139     otherwise.
 140     """
 141     assert not isinstance(value, (int, bool)), "Invalid parameter type passed"
 142     if not value:
 143         root = find_project_root(ctx.params.get("src", ()))
 144         path = root / "pyproject.toml"
 145         if path.is_file():
 146             value = str(path)
 147         else:
 148             return None
 149
 150     try:
 151         pyproject_toml = toml.load(value)
 152         config = pyproject_toml.get("tool", {}).get("black", {})
 153     except (toml.TomlDecodeError, OSError) as e:
 154         raise click.BadOptionUsage(f"Error reading configuration file: {e}", ctx)
 155
 156     if not config:
 157         return None
 158
 159     if ctx.default_map is None:
 160         ctx.default_map = {}
 161     ctx.default_map.update(  # type: ignore  # bad types in .pyi
 162         {k.replace("--", "").replace("-", "_"): v for k, v in config.items()}
 163     )
 164     return value
 165
 166
 167 @click.command(context_settings=dict(help_option_names=["-h", "--help"]))
 168 @click.option(
 169     "-l",
 170     "--line-length",
 171     type=int,
 172     default=DEFAULT_LINE_LENGTH,
 173     help="How many character per line to allow.",
 174     show_default=True,
 175 )
 176 @click.option(
 177     "--py36",
 178     is_flag=True,
 179     help=(
 180         "Allow using Python 3.6-only syntax on all input files.  This will put "
 181         "trailing commas in function signatures and calls also after *args and "
 182         "**kwargs.  [default: per-file auto-detection]"
 183     ),
 184 )
 185 @click.option(
 186     "--pyi",
 187     is_flag=True,
 188     help=(
 189         "Format all input files like typing stubs regardless of file extension "
 190         "(useful when piping source on standard input)."
 191     ),
 192 )
 193 @click.option(
 194     "-S",
 195     "--skip-string-normalization",
 196     is_flag=True,
 197     help="Don't normalize string quotes or prefixes.",
 198 )
 199 @click.option(
 200     "--check",
 201     is_flag=True,
 202     help=(
 203         "Don't write the files back, just return the status.  Return code 0 "
 204         "means nothing would change.  Return code 1 means some files would be "
 205         "reformatted.  Return code 123 means there was an internal error."
 206     ),
 207 )
 208 @click.option(
 209     "--diff",
 210     is_flag=True,
 211     help="Don't write the files back, just output a diff for each file on stdout.",
 212 )
 213 @click.option(
 214     "--fast/--safe",
 215     is_flag=True,
 216     help="If --fast given, skip temporary sanity checks. [default: --safe]",
 217 )
 218 @click.option(
 219     "--include",
 220     type=str,
 221     default=DEFAULT_INCLUDES,
 222     help=(
 223         "A regular expression that matches files and directories that should be "
 224         "included on recursive searches.  An empty value means all files are "
 225         "included regardless of the name.  Use forward slashes for directories on "
 226         "all platforms (Windows, too).  Exclusions are calculated first, inclusions "
 227         "later."
 228     ),
 229     show_default=True,
 230 )
 231 @click.option(
 232     "--exclude",
 233     type=str,
 234     default=DEFAULT_EXCLUDES,
 235     help=(
 236         "A regular expression that matches files and directories that should be "
 237         "excluded on recursive searches.  An empty value means no paths are excluded. "
 238         "Use forward slashes for directories on all platforms (Windows, too).  "
 239         "Exclusions are calculated first, inclusions later."
 240     ),
 241     show_default=True,
 242 )
 243 @click.option(
 244     "-q",
 245     "--quiet",
 246     is_flag=True,
 247     help=(
 248         "Don't emit non-error messages to stderr. Errors are still emitted, "
 249         "silence those with 2>/dev/null."
 250     ),
 251 )
 252 @click.option(
 253     "-v",
 254     "--verbose",
 255     is_flag=True,
 256     help=(
 257         "Also emit messages to stderr about files that were not changed or were "
 258         "ignored due to --exclude=."
 259     ),
 260 )
 261 @click.version_option(version=__version__)
 262 @click.argument(
 263     "src",
 264     nargs=-1,
 265     type=click.Path(
 266         exists=True, file_okay=True, dir_okay=True, readable=True, allow_dash=True
 267     ),
 268     is_eager=True,
 269 )
 270 @click.option(
 271     "--config",
 272     type=click.Path(
 273         exists=False, file_okay=True, dir_okay=False, readable=True, allow_dash=False
 274     ),
 275     is_eager=True,
 276     callback=read_pyproject_toml,
 277     help="Read configuration from PATH.",
 278 )
 279 @click.pass_context
 280 def main(
 281     ctx: click.Context,
 282     line_length: int,
 283     check: bool,
 284     diff: bool,
 285     fast: bool,
 286     pyi: bool,
 287     py36: bool,
 288     skip_string_normalization: bool,
 289     quiet: bool,
 290     verbose: bool,
 291     include: str,
 292     exclude: str,
 293     src: Tuple[str],
 294     config: Optional[str],
 295 ) -> None:
 296     """The uncompromising code formatter."""
 297     write_back = WriteBack.from_configuration(check=check, diff=diff)
 298     mode = FileMode.from_configuration(
 299         py36=py36, pyi=pyi, skip_string_normalization=skip_string_normalization
 300     )
 301     if config and verbose:
 302         out(f"Using configuration from {config}.", bold=False, fg="blue")
 303     try:
 304         include_regex = re_compile_maybe_verbose(include)
 305     except re.error:
 306         err(f"Invalid regular expression for include given: {include!r}")
 307         ctx.exit(2)
 308     try:
 309         exclude_regex = re_compile_maybe_verbose(exclude)
 310     except re.error:
 311         err(f"Invalid regular expression for exclude given: {exclude!r}")
 312         ctx.exit(2)
 313     report = Report(check=check, quiet=quiet, verbose=verbose)
 314     root = find_project_root(src)
 315     sources: Set[Path] = set()
 316     for s in src:
 317         p = Path(s)
 318         if p.is_dir():
 319             sources.update(
 320                 gen_python_files_in_dir(p, root, include_regex, exclude_regex, report)
 321             )
 322         elif p.is_file() or s == "-":
 323             # if a file was explicitly given, we don't care about its extension
 324             sources.add(p)
 325         else:
 326             err(f"invalid path: {s}")
 327     if len(sources) == 0:
 328         if verbose or not quiet:
 329             out("No paths given. Nothing to do 😴")
 330         ctx.exit(0)
 331
 332     if len(sources) == 1:
 333         reformat_one(
 334             src=sources.pop(),
 335             line_length=line_length,
 336             fast=fast,
 337             write_back=write_back,
 338             mode=mode,
 339             report=report,
 340         )
 341     else:
 342         loop = asyncio.get_event_loop()
 343         executor = ProcessPoolExecutor(max_workers=os.cpu_count())
 344         try:
 345             loop.run_until_complete(
 346                 schedule_formatting(
 347                     sources=sources,
 348                     line_length=line_length,
 349                     fast=fast,
 350                     write_back=write_back,
 351                     mode=mode,
 352                     report=report,
 353                     loop=loop,
 354                     executor=executor,
 355                 )
 356             )
 357         finally:
 358             shutdown(loop)
 359     if verbose or not quiet:
 360         bang = "💥 💔 💥" if report.return_code else "✨ 🍰 ✨"
 361         out(f"All done! {bang}")
 362         click.secho(str(report), err=True)
 363     ctx.exit(report.return_code)
 364
 365
 366 def reformat_one(
 367     src: Path,
 368     line_length: int,
 369     fast: bool,
 370     write_back: WriteBack,
 371     mode: FileMode,
 372     report: "Report",
 373 ) -> None:
 374     """Reformat a single file under `src` without spawning child processes.
 375
 376     If `quiet` is True, non-error messages are not output. `line_length`,
 377     `write_back`, `fast` and `pyi` options are passed to
 378     :func:`format_file_in_place` or :func:`format_stdin_to_stdout`.
 379     """
 380     try:
 381         changed = Changed.NO
 382         if not src.is_file() and str(src) == "-":
 383             if format_stdin_to_stdout(
 384                 line_length=line_length, fast=fast, write_back=write_back, mode=mode
 385             ):
 386                 changed = Changed.YES
 387         else:
 388             cache: Cache = {}
 389             if write_back != WriteBack.DIFF:
 390                 cache = read_cache(line_length, mode)
 391                 res_src = src.resolve()
 392                 if res_src in cache and cache[res_src] == get_cache_info(res_src):
 393                     changed = Changed.CACHED
 394             if changed is not Changed.CACHED and format_file_in_place(
 395                 src,
 396                 line_length=line_length,
 397                 fast=fast,
 398                 write_back=write_back,
 399                 mode=mode,
 400             ):
 401                 changed = Changed.YES
 402             if (write_back is WriteBack.YES and changed is not Changed.CACHED) or (
 403                 write_back is WriteBack.CHECK and changed is Changed.NO
 404             ):
 405                 write_cache(cache, [src], line_length, mode)
 406         report.done(src, changed)
 407     except Exception as exc:
 408         report.failed(src, str(exc))
 409
 410
 411 async def schedule_formatting(
 412     sources: Set[Path],
 413     line_length: int,
 414     fast: bool,
 415     write_back: WriteBack,
 416     mode: FileMode,
 417     report: "Report",
 418     loop: BaseEventLoop,
 419     executor: Executor,
 420 ) -> None:
 421     """Run formatting of `sources` in parallel using the provided `executor`.
 422
 423     (Use ProcessPoolExecutors for actual parallelism.)
 424
 425     `line_length`, `write_back`, `fast`, and `pyi` options are passed to
 426     :func:`format_file_in_place`.
 427     """
 428     cache: Cache = {}
 429     if write_back != WriteBack.DIFF:
 430         cache = read_cache(line_length, mode)
 431         sources, cached = filter_cached(cache, sources)
 432         for src in sorted(cached):
 433             report.done(src, Changed.CACHED)
 434     cancelled = []
 435     sources_to_cache = []
 436     if sources:
 437         lock = None
 438         if write_back == WriteBack.DIFF:
 439             # For diff output, we need locks to ensure we don't interleave output
 440             # from different processes.
 441             manager = Manager()
 442             lock = manager.Lock()
 443         tasks = {
 444             loop.run_in_executor(
 445                 executor,
 446                 format_file_in_place,
 447                 src,
 448                 line_length,
 449                 fast,
 450                 write_back,
 451                 mode,
 452                 lock,
 453             ): src
 454             for src in sorted(sources)
 455         }
 456         pending: Iterable[asyncio.Task] = tasks.keys()
 457         try:
 458             loop.add_signal_handler(signal.SIGINT, cancel, pending)
 459             loop.add_signal_handler(signal.SIGTERM, cancel, pending)
 460         except NotImplementedError:
 461             # There are no good alternatives for these on Windows
 462             pass
 463         while pending:
 464             done, _ = await asyncio.wait(pending, return_when=asyncio.FIRST_COMPLETED)
 465             for task in done:
 466                 src = tasks.pop(task)
 467                 if task.cancelled():
 468                     cancelled.append(task)
 469                 elif task.exception():
 470                     report.failed(src, str(task.exception()))
 471                 else:
 472                     changed = Changed.YES if task.result() else Changed.NO
 473                     # If the file was written back or was successfully checked as
 474                     # well-formatted, store this information in the cache.
 475                     if write_back is WriteBack.YES or (
 476                         write_back is WriteBack.CHECK and changed is Changed.NO
 477                     ):
 478                         sources_to_cache.append(src)
 479                     report.done(src, changed)
 480     if cancelled:
 481         await asyncio.gather(*cancelled, loop=loop, return_exceptions=True)
 482     if sources_to_cache:
 483         write_cache(cache, sources_to_cache, line_length, mode)
 484
 485
 486 def format_file_in_place(
 487     src: Path,
 488     line_length: int,
 489     fast: bool,
 490     write_back: WriteBack = WriteBack.NO,
 491     mode: FileMode = FileMode.AUTO_DETECT,
 492     lock: Any = None,  # multiprocessing.Manager().Lock() is some crazy proxy
 493 ) -> bool:
 494     """Format file under `src` path. Return True if changed.
 495
 496     If `write_back` is DIFF, write a diff to stdout. If it is YES, write reformatted
 497     code to the file.
 498     `line_length` and `fast` options are passed to :func:`format_file_contents`.
 499     """
 500     if src.suffix == ".pyi":
 501         mode |= FileMode.PYI
 502
 503     then = datetime.utcfromtimestamp(src.stat().st_mtime)
 504     with open(src, "rb") as buf:
 505         src_contents, encoding, newline = decode_bytes(buf.read())
 506     try:
 507         dst_contents = format_file_contents(
 508             src_contents, line_length=line_length, fast=fast, mode=mode
 509         )
 510     except NothingChanged:
 511         return False
 512
 513     if write_back == write_back.YES:
 514         with open(src, "w", encoding=encoding, newline=newline) as f:
 515             f.write(dst_contents)
 516     elif write_back == write_back.DIFF:
 517         now = datetime.utcnow()
 518         src_name = f"{src}\t{then} +0000"
 519         dst_name = f"{src}\t{now} +0000"
 520         diff_contents = diff(src_contents, dst_contents, src_name, dst_name)
 521         if lock:
 522             lock.acquire()
 523         try:
 524             f = io.TextIOWrapper(
 525                 sys.stdout.buffer,
 526                 encoding=encoding,
 527                 newline=newline,
 528                 write_through=True,
 529             )
 530             f.write(diff_contents)
 531             f.detach()
 532         finally:
 533             if lock:
 534                 lock.release()
 535     return True
 536
 537
 538 def format_stdin_to_stdout(
 539     line_length: int,
 540     fast: bool,
 541     write_back: WriteBack = WriteBack.NO,
 542     mode: FileMode = FileMode.AUTO_DETECT,
 543 ) -> bool:
 544     """Format file on stdin. Return True if changed.
 545
 546     If `write_back` is YES, write reformatted code back to stdout. If it is DIFF,
 547     write a diff to stdout.
 548     `line_length`, `fast`, `is_pyi`, and `force_py36` arguments are passed to
 549     :func:`format_file_contents`.
 550     """
 551     then = datetime.utcnow()
 552     src, encoding, newline = decode_bytes(sys.stdin.buffer.read())
 553     dst = src
 554     try:
 555         dst = format_file_contents(src, line_length=line_length, fast=fast, mode=mode)
 556         return True
 557
 558     except NothingChanged:
 559         return False
 560
 561     finally:
 562         f = io.TextIOWrapper(
 563             sys.stdout.buffer, encoding=encoding, newline=newline, write_through=True
 564         )
 565         if write_back == WriteBack.YES:
 566             f.write(dst)
 567         elif write_back == WriteBack.DIFF:
 568             now = datetime.utcnow()
 569             src_name = f"STDIN\t{then} +0000"
 570             dst_name = f"STDOUT\t{now} +0000"
 571             f.write(diff(src, dst, src_name, dst_name))
 572         f.detach()
 573
 574
 575 def format_file_contents(
 576     src_contents: str,
 577     *,
 578     line_length: int,
 579     fast: bool,
 580     mode: FileMode = FileMode.AUTO_DETECT,
 581 ) -> FileContent:
 582     """Reformat contents a file and return new contents.
 583
 584     If `fast` is False, additionally confirm that the reformatted code is
 585     valid by calling :func:`assert_equivalent` and :func:`assert_stable` on it.
 586     `line_length` is passed to :func:`format_str`.
 587     """
 588     if src_contents.strip() == "":
 589         raise NothingChanged
 590
 591     dst_contents = format_str(src_contents, line_length=line_length, mode=mode)
 592     if src_contents == dst_contents:
 593         raise NothingChanged
 594
 595     if not fast:
 596         assert_equivalent(src_contents, dst_contents)
 597         assert_stable(src_contents, dst_contents, line_length=line_length, mode=mode)
 598     return dst_contents
 599
 600
 601 def format_str(
 602     src_contents: str, line_length: int, *, mode: FileMode = FileMode.AUTO_DETECT
 603 ) -> FileContent:
 604     """Reformat a string and return new contents.
 605
 606     `line_length` determines how many characters per line are allowed.
 607     """
 608     src_node = lib2to3_parse(src_contents)
 609     dst_contents = ""
 610     future_imports = get_future_imports(src_node)
 611     is_pyi = bool(mode & FileMode.PYI)
 612     py36 = bool(mode & FileMode.PYTHON36) or is_python36(src_node)
 613     normalize_strings = not bool(mode & FileMode.NO_STRING_NORMALIZATION)
 614     normalize_fmt_off(src_node)
 615     lines = LineGenerator(
 616         remove_u_prefix=py36 or "unicode_literals" in future_imports,
 617         is_pyi=is_pyi,
 618         normalize_strings=normalize_strings,
 619         allow_underscores=py36,
 620     )
 621     elt = EmptyLineTracker(is_pyi=is_pyi)
 622     empty_line = Line()
 623     after = 0
 624     for current_line in lines.visit(src_node):
 625         for _ in range(after):
 626             dst_contents += str(empty_line)
 627         before, after = elt.maybe_empty_lines(current_line)
 628         for _ in range(before):
 629             dst_contents += str(empty_line)
 630         for line in split_line(current_line, line_length=line_length, py36=py36):
 631             dst_contents += str(line)
 632     return dst_contents
 633
 634
 635 def decode_bytes(src: bytes) -> Tuple[FileContent, Encoding, NewLine]:
 636     """Return a tuple of (decoded_contents, encoding, newline).
 637
 638     `newline` is either CRLF or LF but `decoded_contents` is decoded with
 639     universal newlines (i.e. only contains LF).
 640     """
 641     srcbuf = io.BytesIO(src)
 642     encoding, lines = tokenize.detect_encoding(srcbuf.readline)
 643     if not lines:
 644         return "", encoding, "\n"
 645
 646     newline = "\r\n" if b"\r\n" == lines[0][-2:] else "\n"
 647     srcbuf.seek(0)
 648     with io.TextIOWrapper(srcbuf, encoding) as tiow:
 649         return tiow.read(), encoding, newline
 650
 651
 652 GRAMMARS = [
 653     pygram.python_grammar_no_print_statement_no_exec_statement,
 654     pygram.python_grammar_no_print_statement,
 655     pygram.python_grammar,
 656 ]
 657
 658
 659 def lib2to3_parse(src_txt: str) -> Node:
 660     """Given a string with source, return the lib2to3 Node."""
 661     grammar = pygram.python_grammar_no_print_statement
 662     if src_txt[-1:] != "\n":
 663         src_txt += "\n"
 664     for grammar in GRAMMARS:
 665         drv = driver.Driver(grammar, pytree.convert)
 666         try:
 667             result = drv.parse_string(src_txt, True)
 668             break
 669
 670         except ParseError as pe:
 671             lineno, column = pe.context[1]
 672             lines = src_txt.splitlines()
 673             try:
 674                 faulty_line = lines[lineno - 1]
 675             except IndexError:
 676                 faulty_line = "<line number missing in source>"
 677             exc = ValueError(f"Cannot parse: {lineno}:{column}: {faulty_line}")
 678     else:
 679         raise exc from None
 680
 681     if isinstance(result, Leaf):
 682         result = Node(syms.file_input, [result])
 683     return result
 684
 685
 686 def lib2to3_unparse(node: Node) -> str:
 687     """Given a lib2to3 node, return its string representation."""
 688     code = str(node)
 689     return code
 690
 691
 692 T = TypeVar("T")
 693
 694
 695 class Visitor(Generic[T]):
 696     """Basic lib2to3 visitor that yields things of type `T` on `visit()`."""
 697
 698     def visit(self, node: LN) -> Iterator[T]:
 699         """Main method to visit `node` and its children.
 700
 701         It tries to find a `visit_*()` method for the given `node.type`, like
 702         `visit_simple_stmt` for Node objects or `visit_INDENT` for Leaf objects.
 703         If no dedicated `visit_*()` method is found, chooses `visit_default()`
 704         instead.
 705
 706         Then yields objects of type `T` from the selected visitor.
 707         """
 708         if node.type < 256:
 709             name = token.tok_name[node.type]
 710         else:
 711             name = type_repr(node.type)
 712         yield from getattr(self, f"visit_{name}", self.visit_default)(node)
 713
 714     def visit_default(self, node: LN) -> Iterator[T]:
 715         """Default `visit_*()` implementation. Recurses to children of `node`."""
 716         if isinstance(node, Node):
 717             for child in node.children:
 718                 yield from self.visit(child)
 719
 720
 721 @dataclass
 722 class DebugVisitor(Visitor[T]):
 723     tree_depth: int = 0
 724
 725     def visit_default(self, node: LN) -> Iterator[T]:
 726         indent = " " * (2 * self.tree_depth)
 727         if isinstance(node, Node):
 728             _type = type_repr(node.type)
 729             out(f"{indent}{_type}", fg="yellow")
 730             self.tree_depth += 1
 731             for child in node.children:
 732                 yield from self.visit(child)
 733
 734             self.tree_depth -= 1
 735             out(f"{indent}/{_type}", fg="yellow", bold=False)
 736         else:
 737             _type = token.tok_name.get(node.type, str(node.type))
 738             out(f"{indent}{_type}", fg="blue", nl=False)
 739             if node.prefix:
 740                 # We don't have to handle prefixes for `Node` objects since
 741                 # that delegates to the first child anyway.
 742                 out(f" {node.prefix!r}", fg="green", bold=False, nl=False)
 743             out(f" {node.value!r}", fg="blue", bold=False)
 744
 745     @classmethod
 746     def show(cls, code: Union[str, Leaf, Node]) -> None:
 747         """Pretty-print the lib2to3 AST of a given string of `code`.
 748
 749         Convenience method for debugging.
 750         """
 751         v: DebugVisitor[None] = DebugVisitor()
 752         if isinstance(code, str):
 753             code = lib2to3_parse(code)
 754         list(v.visit(code))
 755
 756
 757 KEYWORDS = set(keyword.kwlist)
 758 WHITESPACE = {token.DEDENT, token.INDENT, token.NEWLINE}
 759 FLOW_CONTROL = {"return", "raise", "break", "continue"}
 760 STATEMENT = {
 761     syms.if_stmt,
 762     syms.while_stmt,
 763     syms.for_stmt,
 764     syms.try_stmt,
 765     syms.except_clause,
 766     syms.with_stmt,
 767     syms.funcdef,
 768     syms.classdef,
 769 }
 770 STANDALONE_COMMENT = 153
 771 token.tok_name[STANDALONE_COMMENT] = "STANDALONE_COMMENT"
 772 LOGIC_OPERATORS = {"and", "or"}
 773 COMPARATORS = {
 774     token.LESS,
 775     token.GREATER,
 776     token.EQEQUAL,
 777     token.NOTEQUAL,
 778     token.LESSEQUAL,
 779     token.GREATEREQUAL,
 780 }
 781 MATH_OPERATORS = {
 782     token.VBAR,
 783     token.CIRCUMFLEX,
 784     token.AMPER,
 785     token.LEFTSHIFT,
 786     token.RIGHTSHIFT,
 787     token.PLUS,
 788     token.MINUS,
 789     token.STAR,
 790     token.SLASH,
 791     token.DOUBLESLASH,
 792     token.PERCENT,
 793     token.AT,
 794     token.TILDE,
 795     token.DOUBLESTAR,
 796 }
 797 STARS = {token.STAR, token.DOUBLESTAR}
 798 VARARGS_PARENTS = {
 799     syms.arglist,
 800     syms.argument,  # double star in arglist
 801     syms.trailer,  # single argument to call
 802     syms.typedargslist,
 803     syms.varargslist,  # lambdas
 804 }
 805 UNPACKING_PARENTS = {
 806     syms.atom,  # single element of a list or set literal
 807     syms.dictsetmaker,
 808     syms.listmaker,
 809     syms.testlist_gexp,
 810     syms.testlist_star_expr,
 811 }
 812 TEST_DESCENDANTS = {
 813     syms.test,
 814     syms.lambdef,
 815     syms.or_test,
 816     syms.and_test,
 817     syms.not_test,
 818     syms.comparison,
 819     syms.star_expr,
 820     syms.expr,
 821     syms.xor_expr,
 822     syms.and_expr,
 823     syms.shift_expr,
 824     syms.arith_expr,
 825     syms.trailer,
 826     syms.term,
 827     syms.power,
 828 }
 829 ASSIGNMENTS = {
 830     "=",
 831     "+=",
 832     "-=",
 833     "*=",
 834     "@=",
 835     "/=",
 836     "%=",
 837     "&=",
 838     "|=",
 839     "^=",
 840     "<<=",
 841     ">>=",
 842     "**=",
 843     "//=",
 844 }
 845 COMPREHENSION_PRIORITY = 20
 846 COMMA_PRIORITY = 18
 847 TERNARY_PRIORITY = 16
 848 LOGIC_PRIORITY = 14
 849 STRING_PRIORITY = 12
 850 COMPARATOR_PRIORITY = 10
 851 MATH_PRIORITIES = {
 852     token.VBAR: 9,
 853     token.CIRCUMFLEX: 8,
 854     token.AMPER: 7,
 855     token.LEFTSHIFT: 6,
 856     token.RIGHTSHIFT: 6,
 857     token.PLUS: 5,
 858     token.MINUS: 5,
 859     token.STAR: 4,
 860     token.SLASH: 4,
 861     token.DOUBLESLASH: 4,
 862     token.PERCENT: 4,
 863     token.AT: 4,
 864     token.TILDE: 3,
 865     token.DOUBLESTAR: 2,
 866 }
 867 DOT_PRIORITY = 1
 868
 869
 870 @dataclass
 871 class BracketTracker:
 872     """Keeps track of brackets on a line."""
 873
 874     depth: int = 0
 875     bracket_match: Dict[Tuple[Depth, NodeType], Leaf] = Factory(dict)
 876     delimiters: Dict[LeafID, Priority] = Factory(dict)
 877     previous: Optional[Leaf] = None
 878     _for_loop_variable: int = 0
 879     _lambda_arguments: int = 0
 880
 881     def mark(self, leaf: Leaf) -> None:
 882         """Mark `leaf` with bracket-related metadata. Keep track of delimiters.
 883
 884         All leaves receive an int `bracket_depth` field that stores how deep
 885         within brackets a given leaf is. 0 means there are no enclosing brackets
 886         that started on this line.
 887
 888         If a leaf is itself a closing bracket, it receives an `opening_bracket`
 889         field that it forms a pair with. This is a one-directional link to
 890         avoid reference cycles.
 891
 892         If a leaf is a delimiter (a token on which Black can split the line if
 893         needed) and it's on depth 0, its `id()` is stored in the tracker's
 894         `delimiters` field.
 895         """
 896         if leaf.type == token.COMMENT:
 897             return
 898
 899         self.maybe_decrement_after_for_loop_variable(leaf)
 900         self.maybe_decrement_after_lambda_arguments(leaf)
 901         if leaf.type in CLOSING_BRACKETS:
 902             self.depth -= 1
 903             opening_bracket = self.bracket_match.pop((self.depth, leaf.type))
 904             leaf.opening_bracket = opening_bracket
 905         leaf.bracket_depth = self.depth
 906         if self.depth == 0:
 907             delim = is_split_before_delimiter(leaf, self.previous)
 908             if delim and self.previous is not None:
 909                 self.delimiters[id(self.previous)] = delim
 910             else:
 911                 delim = is_split_after_delimiter(leaf, self.previous)
 912                 if delim:
 913                     self.delimiters[id(leaf)] = delim
 914         if leaf.type in OPENING_BRACKETS:
 915             self.bracket_match[self.depth, BRACKET[leaf.type]] = leaf
 916             self.depth += 1
 917         self.previous = leaf
 918         self.maybe_increment_lambda_arguments(leaf)
 919         self.maybe_increment_for_loop_variable(leaf)
 920
 921     def any_open_brackets(self) -> bool:
 922         """Return True if there is an yet unmatched open bracket on the line."""
 923         return bool(self.bracket_match)
 924
 925     def max_delimiter_priority(self, exclude: Iterable[LeafID] = ()) -> int:
 926         """Return the highest priority of a delimiter found on the line.
 927
 928         Values are consistent with what `is_split_*_delimiter()` return.
 929         Raises ValueError on no delimiters.
 930         """
 931         return max(v for k, v in self.delimiters.items() if k not in exclude)
 932
 933     def delimiter_count_with_priority(self, priority: int = 0) -> int:
 934         """Return the number of delimiters with the given `priority`.
 935
 936         If no `priority` is passed, defaults to max priority on the line.
 937         """
 938         if not self.delimiters:
 939             return 0
 940
 941         priority = priority or self.max_delimiter_priority()
 942         return sum(1 for p in self.delimiters.values() if p == priority)
 943
 944     def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool:
 945         """In a for loop, or comprehension, the variables are often unpacks.
 946
 947         To avoid splitting on the comma in this situation, increase the depth of
 948         tokens between `for` and `in`.
 949         """
 950         if leaf.type == token.NAME and leaf.value == "for":
 951             self.depth += 1
 952             self._for_loop_variable += 1
 953             return True
 954
 955         return False
 956
 957     def maybe_decrement_after_for_loop_variable(self, leaf: Leaf) -> bool:
 958         """See `maybe_increment_for_loop_variable` above for explanation."""
 959         if self._for_loop_variable and leaf.type == token.NAME and leaf.value == "in":
 960             self.depth -= 1
 961             self._for_loop_variable -= 1
 962             return True
 963
 964         return False
 965
 966     def maybe_increment_lambda_arguments(self, leaf: Leaf) -> bool:
 967         """In a lambda expression, there might be more than one argument.
 968
 969         To avoid splitting on the comma in this situation, increase the depth of
 970         tokens between `lambda` and `:`.
 971         """
 972         if leaf.type == token.NAME and leaf.value == "lambda":
 973             self.depth += 1
 974             self._lambda_arguments += 1
 975             return True
 976
 977         return False
 978
 979     def maybe_decrement_after_lambda_arguments(self, leaf: Leaf) -> bool:
 980         """See `maybe_increment_lambda_arguments` above for explanation."""
 981         if self._lambda_arguments and leaf.type == token.COLON:
 982             self.depth -= 1
 983             self._lambda_arguments -= 1
 984             return True
 985
 986         return False
 987
 988     def get_open_lsqb(self) -> Optional[Leaf]:
 989         """Return the most recent opening square bracket (if any)."""
 990         return self.bracket_match.get((self.depth - 1, token.RSQB))
 991
 992
 993 @dataclass
 994 class Line:
 995     """Holds leaves and comments. Can be printed with `str(line)`."""
 996
 997     depth: int = 0
 998     leaves: List[Leaf] = Factory(list)
 999     comments: List[Tuple[Index, Leaf]] = Factory(list)
1000     bracket_tracker: BracketTracker = Factory(BracketTracker)
1001     inside_brackets: bool = False
1002     should_explode: bool = False
1003
1004     def append(self, leaf: Leaf, preformatted: bool = False) -> None:
1005         """Add a new `leaf` to the end of the line.
1006
1007         Unless `preformatted` is True, the `leaf` will receive a new consistent
1008         whitespace prefix and metadata applied by :class:`BracketTracker`.
1009         Trailing commas are maybe removed, unpacked for loop variables are
1010         demoted from being delimiters.
1011
1012         Inline comments are put aside.
1013         """
1014         has_value = leaf.type in BRACKETS or bool(leaf.value.strip())
1015         if not has_value:
1016             return
1017
1018         if token.COLON == leaf.type and self.is_class_paren_empty:
1019             del self.leaves[-2:]
1020         if self.leaves and not preformatted:
1021             # Note: at this point leaf.prefix should be empty except for
1022             # imports, for which we only preserve newlines.
1023             leaf.prefix += whitespace(
1024                 leaf, complex_subscript=self.is_complex_subscript(leaf)
1025             )
1026         if self.inside_brackets or not preformatted:
1027             self.bracket_tracker.mark(leaf)
1028             self.maybe_remove_trailing_comma(leaf)
1029         if not self.append_comment(leaf):
1030             self.leaves.append(leaf)
1031
1032     def append_safe(self, leaf: Leaf, preformatted: bool = False) -> None:
1033         """Like :func:`append()` but disallow invalid standalone comment structure.
1034
1035         Raises ValueError when any `leaf` is appended after a standalone comment
1036         or when a standalone comment is not the first leaf on the line.
1037         """
1038         if self.bracket_tracker.depth == 0:
1039             if self.is_comment:
1040                 raise ValueError("cannot append to standalone comments")
1041
1042             if self.leaves and leaf.type == STANDALONE_COMMENT:
1043                 raise ValueError(
1044                     "cannot append standalone comments to a populated line"
1045                 )
1046
1047         self.append(leaf, preformatted=preformatted)
1048
1049     @property
1050     def is_comment(self) -> bool:
1051         """Is this line a standalone comment?"""
1052         return len(self.leaves) == 1 and self.leaves[0].type == STANDALONE_COMMENT
1053
1054     @property
1055     def is_decorator(self) -> bool:
1056         """Is this line a decorator?"""
1057         return bool(self) and self.leaves[0].type == token.AT
1058
1059     @property
1060     def is_import(self) -> bool:
1061         """Is this an import line?"""
1062         return bool(self) and is_import(self.leaves[0])
1063
1064     @property
1065     def is_class(self) -> bool:
1066         """Is this line a class definition?"""
1067         return (
1068             bool(self)
1069             and self.leaves[0].type == token.NAME
1070             and self.leaves[0].value == "class"
1071         )
1072
1073     @property
1074     def is_stub_class(self) -> bool:
1075         """Is this line a class definition with a body consisting only of "..."?"""
1076         return self.is_class and self.leaves[-3:] == [
1077             Leaf(token.DOT, ".") for _ in range(3)
1078         ]
1079
1080     @property
1081     def is_def(self) -> bool:
1082         """Is this a function definition? (Also returns True for async defs.)"""
1083         try:
1084             first_leaf = self.leaves[0]
1085         except IndexError:
1086             return False
1087
1088         try:
1089             second_leaf: Optional[Leaf] = self.leaves[1]
1090         except IndexError:
1091             second_leaf = None
1092         return (first_leaf.type == token.NAME and first_leaf.value == "def") or (
1093             first_leaf.type == token.ASYNC
1094             and second_leaf is not None
1095             and second_leaf.type == token.NAME
1096             and second_leaf.value == "def"
1097         )
1098
1099     @property
1100     def is_class_paren_empty(self) -> bool:
1101         """Is this a class with no base classes but using parentheses?
1102
1103         Those are unnecessary and should be removed.
1104         """
1105         return (
1106             bool(self)
1107             and len(self.leaves) == 4
1108             and self.is_class
1109             and self.leaves[2].type == token.LPAR
1110             and self.leaves[2].value == "("
1111             and self.leaves[3].type == token.RPAR
1112             and self.leaves[3].value == ")"
1113         )
1114
1115     @property
1116     def is_triple_quoted_string(self) -> bool:
1117         """Is the line a triple quoted string?"""
1118         return (
1119             bool(self)
1120             and self.leaves[0].type == token.STRING
1121             and self.leaves[0].value.startswith(('"""', "'''"))
1122         )
1123
1124     def contains_standalone_comments(self, depth_limit: int = sys.maxsize) -> bool:
1125         """If so, needs to be split before emitting."""
1126         for leaf in self.leaves:
1127             if leaf.type == STANDALONE_COMMENT:
1128                 if leaf.bracket_depth <= depth_limit:
1129                     return True
1130
1131         return False
1132
1133     def contains_multiline_strings(self) -> bool:
1134         for leaf in self.leaves:
1135             if is_multiline_string(leaf):
1136                 return True
1137
1138         return False
1139
1140     def maybe_remove_trailing_comma(self, closing: Leaf) -> bool:
1141         """Remove trailing comma if there is one and it's safe."""
1142         if not (
1143             self.leaves
1144             and self.leaves[-1].type == token.COMMA
1145             and closing.type in CLOSING_BRACKETS
1146         ):
1147             return False
1148
1149         if closing.type == token.RBRACE:
1150             self.remove_trailing_comma()
1151             return True
1152
1153         if closing.type == token.RSQB:
1154             comma = self.leaves[-1]
1155             if comma.parent and comma.parent.type == syms.listmaker:
1156                 self.remove_trailing_comma()
1157                 return True
1158
1159         # For parens let's check if it's safe to remove the comma.
1160         # Imports are always safe.
1161         if self.is_import:
1162             self.remove_trailing_comma()
1163             return True
1164
1165         # Otheriwsse, if the trailing one is the only one, we might mistakenly
1166         # change a tuple into a different type by removing the comma.
1167         depth = closing.bracket_depth + 1
1168         commas = 0
1169         opening = closing.opening_bracket
1170         for _opening_index, leaf in enumerate(self.leaves):
1171             if leaf is opening:
1172                 break
1173
1174         else:
1175             return False
1176
1177         for leaf in self.leaves[_opening_index + 1 :]:
1178             if leaf is closing:
1179                 break
1180
1181             bracket_depth = leaf.bracket_depth
1182             if bracket_depth == depth and leaf.type == token.COMMA:
1183                 commas += 1
1184                 if leaf.parent and leaf.parent.type == syms.arglist:
1185                     commas += 1
1186                     break
1187
1188         if commas > 1:
1189             self.remove_trailing_comma()
1190             return True
1191
1192         return False
1193
1194     def append_comment(self, comment: Leaf) -> bool:
1195         """Add an inline or standalone comment to the line."""
1196         if (
1197             comment.type == STANDALONE_COMMENT
1198             and self.bracket_tracker.any_open_brackets()
1199         ):
1200             comment.prefix = ""
1201             return False
1202
1203         if comment.type != token.COMMENT:
1204             return False
1205
1206         after = len(self.leaves) - 1
1207         if after == -1:
1208             comment.type = STANDALONE_COMMENT
1209             comment.prefix = ""
1210             return False
1211
1212         else:
1213             self.comments.append((after, comment))
1214             return True
1215
1216     def comments_after(self, leaf: Leaf, _index: int = -1) -> Iterator[Leaf]:
1217         """Generate comments that should appear directly after `leaf`.
1218
1219         Provide a non-negative leaf `_index` to speed up the function.
1220         """
1221         if not self.comments:
1222             return
1223
1224         if _index == -1:
1225             for _index, _leaf in enumerate(self.leaves):
1226                 if leaf is _leaf:
1227                     break
1228
1229             else:
1230                 return
1231
1232         for index, comment_after in self.comments:
1233             if _index == index:
1234                 yield comment_after
1235
1236     def remove_trailing_comma(self) -> None:
1237         """Remove the trailing comma and moves the comments attached to it."""
1238         comma_index = len(self.leaves) - 1
1239         for i in range(len(self.comments)):
1240             comment_index, comment = self.comments[i]
1241             if comment_index == comma_index:
1242                 self.comments[i] = (comma_index - 1, comment)
1243         self.leaves.pop()
1244
1245     def is_complex_subscript(self, leaf: Leaf) -> bool:
1246         """Return True iff `leaf` is part of a slice with non-trivial exprs."""
1247         open_lsqb = self.bracket_tracker.get_open_lsqb()
1248         if open_lsqb is None:
1249             return False
1250
1251         subscript_start = open_lsqb.next_sibling
1252
1253         if isinstance(subscript_start, Node):
1254             if subscript_start.type == syms.listmaker:
1255                 return False
1256
1257             if subscript_start.type == syms.subscriptlist:
1258                 subscript_start = child_towards(subscript_start, leaf)
1259         return subscript_start is not None and any(
1260             n.type in TEST_DESCENDANTS for n in subscript_start.pre_order()
1261         )
1262
1263     def __str__(self) -> str:
1264         """Render the line."""
1265         if not self:
1266             return "\n"
1267
1268         indent = "    " * self.depth
1269         leaves = iter(self.leaves)
1270         first = next(leaves)
1271         res = f"{first.prefix}{indent}{first.value}"
1272         for leaf in leaves:
1273             res += str(leaf)
1274         for _, comment in self.comments:
1275             res += str(comment)
1276         return res + "\n"
1277
1278     def __bool__(self) -> bool:
1279         """Return True if the line has leaves or comments."""
1280         return bool(self.leaves or self.comments)
1281
1282
1283 @dataclass
1284 class EmptyLineTracker:
1285     """Provides a stateful method that returns the number of potential extra
1286     empty lines needed before and after the currently processed line.
1287
1288     Note: this tracker works on lines that haven't been split yet.  It assumes
1289     the prefix of the first leaf consists of optional newlines.  Those newlines
1290     are consumed by `maybe_empty_lines()` and included in the computation.
1291     """
1292
1293     is_pyi: bool = False
1294     previous_line: Optional[Line] = None
1295     previous_after: int = 0
1296     previous_defs: List[int] = Factory(list)
1297
1298     def maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
1299         """Return the number of extra empty lines before and after the `current_line`.
1300
1301         This is for separating `def`, `async def` and `class` with extra empty
1302         lines (two on module-level).
1303         """
1304         before, after = self._maybe_empty_lines(current_line)
1305         before -= self.previous_after
1306         self.previous_after = after
1307         self.previous_line = current_line
1308         return before, after
1309
1310     def _maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
1311         max_allowed = 1
1312         if current_line.depth == 0:
1313             max_allowed = 1 if self.is_pyi else 2
1314         if current_line.leaves:
1315             # Consume the first leaf's extra newlines.
1316             first_leaf = current_line.leaves[0]
1317             before = first_leaf.prefix.count("\n")
1318             before = min(before, max_allowed)
1319             first_leaf.prefix = ""
1320         else:
1321             before = 0
1322         depth = current_line.depth
1323         while self.previous_defs and self.previous_defs[-1] >= depth:
1324             self.previous_defs.pop()
1325             if self.is_pyi:
1326                 before = 0 if depth else 1
1327             else:
1328                 before = 1 if depth else 2
1329         if current_line.is_decorator or current_line.is_def or current_line.is_class:
1330             return self._maybe_empty_lines_for_class_or_def(current_line, before)
1331
1332         if (
1333             self.previous_line
1334             and self.previous_line.is_import
1335             and not current_line.is_import
1336             and depth == self.previous_line.depth
1337         ):
1338             return (before or 1), 0
1339
1340         if (
1341             self.previous_line
1342             and self.previous_line.is_class
1343             and current_line.is_triple_quoted_string
1344         ):
1345             return before, 1
1346
1347         return before, 0
1348
1349     def _maybe_empty_lines_for_class_or_def(
1350         self, current_line: Line, before: int
1351     ) -> Tuple[int, int]:
1352         if not current_line.is_decorator:
1353             self.previous_defs.append(current_line.depth)
1354         if self.previous_line is None:
1355             # Don't insert empty lines before the first line in the file.
1356             return 0, 0
1357
1358         if self.previous_line.is_decorator:
1359             return 0, 0
1360
1361         if self.previous_line.depth < current_line.depth and (
1362             self.previous_line.is_class or self.previous_line.is_def
1363         ):
1364             return 0, 0
1365
1366         if (
1367             self.previous_line.is_comment
1368             and self.previous_line.depth == current_line.depth
1369             and before == 0
1370         ):
1371             return 0, 0
1372
1373         if self.is_pyi:
1374             if self.previous_line.depth > current_line.depth:
1375                 newlines = 1
1376             elif current_line.is_class or self.previous_line.is_class:
1377                 if current_line.is_stub_class and self.previous_line.is_stub_class:
1378                     # No blank line between classes with an emty body
1379                     newlines = 0
1380                 else:
1381                     newlines = 1
1382             elif current_line.is_def and not self.previous_line.is_def:
1383                 # Blank line between a block of functions and a block of non-functions
1384                 newlines = 1
1385             else:
1386                 newlines = 0
1387         else:
1388             newlines = 2
1389         if current_line.depth and newlines:
1390             newlines -= 1
1391         return newlines, 0
1392
1393
1394 @dataclass
1395 class LineGenerator(Visitor[Line]):
1396     """Generates reformatted Line objects.  Empty lines are not emitted.
1397
1398     Note: destroys the tree it's visiting by mutating prefixes of its leaves
1399     in ways that will no longer stringify to valid Python code on the tree.
1400     """
1401
1402     is_pyi: bool = False
1403     normalize_strings: bool = True
1404     current_line: Line = Factory(Line)
1405     remove_u_prefix: bool = False
1406     allow_underscores: bool = False
1407
1408     def line(self, indent: int = 0) -> Iterator[Line]:
1409         """Generate a line.
1410
1411         If the line is empty, only emit if it makes sense.
1412         If the line is too long, split it first and then generate.
1413
1414         If any lines were generated, set up a new current_line.
1415         """
1416         if not self.current_line:
1417             self.current_line.depth += indent
1418             return  # Line is empty, don't emit. Creating a new one unnecessary.
1419
1420         complete_line = self.current_line
1421         self.current_line = Line(depth=complete_line.depth + indent)
1422         yield complete_line
1423
1424     def visit_default(self, node: LN) -> Iterator[Line]:
1425         """Default `visit_*()` implementation. Recurses to children of `node`."""
1426         if isinstance(node, Leaf):
1427             any_open_brackets = self.current_line.bracket_tracker.any_open_brackets()
1428             for comment in generate_comments(node):
1429                 if any_open_brackets:
1430                     # any comment within brackets is subject to splitting
1431                     self.current_line.append(comment)
1432                 elif comment.type == token.COMMENT:
1433                     # regular trailing comment
1434                     self.current_line.append(comment)
1435                     yield from self.line()
1436
1437                 else:
1438                     # regular standalone comment
1439                     yield from self.line()
1440
1441                     self.current_line.append(comment)
1442                     yield from self.line()
1443
1444             normalize_prefix(node, inside_brackets=any_open_brackets)
1445             if self.normalize_strings and node.type == token.STRING:
1446                 normalize_string_prefix(node, remove_u_prefix=self.remove_u_prefix)
1447                 normalize_string_quotes(node)
1448             if node.type == token.NUMBER:
1449                 normalize_numeric_literal(node, self.allow_underscores)
1450             if node.type not in WHITESPACE:
1451                 self.current_line.append(node)
1452         yield from super().visit_default(node)
1453
1454     def visit_INDENT(self, node: Node) -> Iterator[Line]:
1455         """Increase indentation level, maybe yield a line."""
1456         # In blib2to3 INDENT never holds comments.
1457         yield from self.line(+1)
1458         yield from self.visit_default(node)
1459
1460     def visit_DEDENT(self, node: Node) -> Iterator[Line]:
1461         """Decrease indentation level, maybe yield a line."""
1462         # The current line might still wait for trailing comments.  At DEDENT time
1463         # there won't be any (they would be prefixes on the preceding NEWLINE).
1464         # Emit the line then.
1465         yield from self.line()
1466
1467         # While DEDENT has no value, its prefix may contain standalone comments
1468         # that belong to the current indentation level.  Get 'em.
1469         yield from self.visit_default(node)
1470
1471         # Finally, emit the dedent.
1472         yield from self.line(-1)
1473
1474     def visit_stmt(
1475         self, node: Node, keywords: Set[str], parens: Set[str]
1476     ) -> Iterator[Line]:
1477         """Visit a statement.
1478
1479         This implementation is shared for `if`, `while`, `for`, `try`, `except`,
1480         `def`, `with`, `class`, `assert` and assignments.
1481
1482         The relevant Python language `keywords` for a given statement will be
1483         NAME leaves within it. This methods puts those on a separate line.
1484
1485         `parens` holds a set of string leaf values immediately after which
1486         invisible parens should be put.
1487         """
1488         normalize_invisible_parens(node, parens_after=parens)
1489         for child in node.children:
1490             if child.type == token.NAME and child.value in keywords:  # type: ignore
1491                 yield from self.line()
1492
1493             yield from self.visit(child)
1494
1495     def visit_suite(self, node: Node) -> Iterator[Line]:
1496         """Visit a suite."""
1497         if self.is_pyi and is_stub_suite(node):
1498             yield from self.visit(node.children[2])
1499         else:
1500             yield from self.visit_default(node)
1501
1502     def visit_simple_stmt(self, node: Node) -> Iterator[Line]:
1503         """Visit a statement without nested statements."""
1504         is_suite_like = node.parent and node.parent.type in STATEMENT
1505         if is_suite_like:
1506             if self.is_pyi and is_stub_body(node):
1507                 yield from self.visit_default(node)
1508             else:
1509                 yield from self.line(+1)
1510                 yield from self.visit_default(node)
1511                 yield from self.line(-1)
1512
1513         else:
1514             if not self.is_pyi or not node.parent or not is_stub_suite(node.parent):
1515                 yield from self.line()
1516             yield from self.visit_default(node)
1517
1518     def visit_async_stmt(self, node: Node) -> Iterator[Line]:
1519         """Visit `async def`, `async for`, `async with`."""
1520         yield from self.line()
1521
1522         children = iter(node.children)
1523         for child in children:
1524             yield from self.visit(child)
1525
1526             if child.type == token.ASYNC:
1527                 break
1528
1529         internal_stmt = next(children)
1530         for child in internal_stmt.children:
1531             yield from self.visit(child)
1532
1533     def visit_decorators(self, node: Node) -> Iterator[Line]:
1534         """Visit decorators."""
1535         for child in node.children:
1536             yield from self.line()
1537             yield from self.visit(child)
1538
1539     def visit_SEMI(self, leaf: Leaf) -> Iterator[Line]:
1540         """Remove a semicolon and put the other statement on a separate line."""
1541         yield from self.line()
1542
1543     def visit_ENDMARKER(self, leaf: Leaf) -> Iterator[Line]:
1544         """End of file. Process outstanding comments and end with a newline."""
1545         yield from self.visit_default(leaf)
1546         yield from self.line()
1547
1548     def visit_STANDALONE_COMMENT(self, leaf: Leaf) -> Iterator[Line]:
1549         if not self.current_line.bracket_tracker.any_open_brackets():
1550             yield from self.line()
1551         yield from self.visit_default(leaf)
1552
1553     def __attrs_post_init__(self) -> None:
1554         """You are in a twisty little maze of passages."""
1555         v = self.visit_stmt
1556         Ø: Set[str] = set()
1557         self.visit_assert_stmt = partial(v, keywords={"assert"}, parens={"assert", ","})
1558         self.visit_if_stmt = partial(
1559             v, keywords={"if", "else", "elif"}, parens={"if", "elif"}
1560         )
1561         self.visit_while_stmt = partial(v, keywords={"while", "else"}, parens={"while"})
1562         self.visit_for_stmt = partial(v, keywords={"for", "else"}, parens={"for", "in"})
1563         self.visit_try_stmt = partial(
1564             v, keywords={"try", "except", "else", "finally"}, parens=Ø
1565         )
1566         self.visit_except_clause = partial(v, keywords={"except"}, parens=Ø)
1567         self.visit_with_stmt = partial(v, keywords={"with"}, parens=Ø)
1568         self.visit_funcdef = partial(v, keywords={"def"}, parens=Ø)
1569         self.visit_classdef = partial(v, keywords={"class"}, parens=Ø)
1570         self.visit_expr_stmt = partial(v, keywords=Ø, parens=ASSIGNMENTS)
1571         self.visit_return_stmt = partial(v, keywords={"return"}, parens={"return"})
1572         self.visit_import_from = partial(v, keywords=Ø, parens={"import"})
1573         self.visit_async_funcdef = self.visit_async_stmt
1574         self.visit_decorated = self.visit_decorators
1575
1576
1577 IMPLICIT_TUPLE = {syms.testlist, syms.testlist_star_expr, syms.exprlist}
1578 BRACKET = {token.LPAR: token.RPAR, token.LSQB: token.RSQB, token.LBRACE: token.RBRACE}
1579 OPENING_BRACKETS = set(BRACKET.keys())
1580 CLOSING_BRACKETS = set(BRACKET.values())
1581 BRACKETS = OPENING_BRACKETS | CLOSING_BRACKETS
1582 ALWAYS_NO_SPACE = CLOSING_BRACKETS | {token.COMMA, STANDALONE_COMMENT}
1583
1584
1585 def whitespace(leaf: Leaf, *, complex_subscript: bool) -> str:  # noqa C901
1586     """Return whitespace prefix if needed for the given `leaf`.
1587
1588     `complex_subscript` signals whether the given leaf is part of a subscription
1589     which has non-trivial arguments, like arithmetic expressions or function calls.
1590     """
1591     NO = ""
1592     SPACE = " "
1593     DOUBLESPACE = "  "
1594     t = leaf.type
1595     p = leaf.parent
1596     v = leaf.value
1597     if t in ALWAYS_NO_SPACE:
1598         return NO
1599
1600     if t == token.COMMENT:
1601         return DOUBLESPACE
1602
1603     assert p is not None, f"INTERNAL ERROR: hand-made leaf without parent: {leaf!r}"
1604     if t == token.COLON and p.type not in {
1605         syms.subscript,
1606         syms.subscriptlist,
1607         syms.sliceop,
1608     }:
1609         return NO
1610
1611     prev = leaf.prev_sibling
1612     if not prev:
1613         prevp = preceding_leaf(p)
1614         if not prevp or prevp.type in OPENING_BRACKETS:
1615             return NO
1616
1617         if t == token.COLON:
1618             if prevp.type == token.COLON:
1619                 return NO
1620
1621             elif prevp.type != token.COMMA and not complex_subscript:
1622                 return NO
1623
1624             return SPACE
1625
1626         if prevp.type == token.EQUAL:
1627             if prevp.parent:
1628                 if prevp.parent.type in {
1629                     syms.arglist,
1630                     syms.argument,
1631                     syms.parameters,
1632                     syms.varargslist,
1633                 }:
1634                     return NO
1635
1636                 elif prevp.parent.type == syms.typedargslist:
1637                     # A bit hacky: if the equal sign has whitespace, it means we
1638                     # previously found it's a typed argument.  So, we're using
1639                     # that, too.
1640                     return prevp.prefix
1641
1642         elif prevp.type in STARS:
1643             if is_vararg(prevp, within=VARARGS_PARENTS | UNPACKING_PARENTS):
1644                 return NO
1645
1646         elif prevp.type == token.COLON:
1647             if prevp.parent and prevp.parent.type in {syms.subscript, syms.sliceop}:
1648                 return SPACE if complex_subscript else NO
1649
1650         elif (
1651             prevp.parent
1652             and prevp.parent.type == syms.factor
1653             and prevp.type in MATH_OPERATORS
1654         ):
1655             return NO
1656
1657         elif (
1658             prevp.type == token.RIGHTSHIFT
1659             and prevp.parent
1660             and prevp.parent.type == syms.shift_expr
1661             and prevp.prev_sibling
1662             and prevp.prev_sibling.type == token.NAME
1663             and prevp.prev_sibling.value == "print"  # type: ignore
1664         ):
1665             # Python 2 print chevron
1666             return NO
1667
1668     elif prev.type in OPENING_BRACKETS:
1669         return NO
1670
1671     if p.type in {syms.parameters, syms.arglist}:
1672         # untyped function signatures or calls
1673         if not prev or prev.type != token.COMMA:
1674             return NO
1675
1676     elif p.type == syms.varargslist:
1677         # lambdas
1678         if prev and prev.type != token.COMMA:
1679             return NO
1680
1681     elif p.type == syms.typedargslist:
1682         # typed function signatures
1683         if not prev:
1684             return NO
1685
1686         if t == token.EQUAL:
1687             if prev.type != syms.tname:
1688                 return NO
1689
1690         elif prev.type == token.EQUAL:
1691             # A bit hacky: if the equal sign has whitespace, it means we
1692             # previously found it's a typed argument.  So, we're using that, too.
1693             return prev.prefix
1694
1695         elif prev.type != token.COMMA:
1696             return NO
1697
1698     elif p.type == syms.tname:
1699         # type names
1700         if not prev:
1701             prevp = preceding_leaf(p)
1702             if not prevp or prevp.type != token.COMMA:
1703                 return NO
1704
1705     elif p.type == syms.trailer:
1706         # attributes and calls
1707         if t == token.LPAR or t == token.RPAR:
1708             return NO
1709
1710         if not prev:
1711             if t == token.DOT:
1712                 prevp = preceding_leaf(p)
1713                 if not prevp or prevp.type != token.NUMBER:
1714                     return NO
1715
1716             elif t == token.LSQB:
1717                 return NO
1718
1719         elif prev.type != token.COMMA:
1720             return NO
1721
1722     elif p.type == syms.argument:
1723         # single argument
1724         if t == token.EQUAL:
1725             return NO
1726
1727         if not prev:
1728             prevp = preceding_leaf(p)
1729             if not prevp or prevp.type == token.LPAR:
1730                 return NO
1731
1732         elif prev.type in {token.EQUAL} | STARS:
1733             return NO
1734
1735     elif p.type == syms.decorator:
1736         # decorators
1737         return NO
1738
1739     elif p.type == syms.dotted_name:
1740         if prev:
1741             return NO
1742
1743         prevp = preceding_leaf(p)
1744         if not prevp or prevp.type == token.AT or prevp.type == token.DOT:
1745             return NO
1746
1747     elif p.type == syms.classdef:
1748         if t == token.LPAR:
1749             return NO
1750
1751         if prev and prev.type == token.LPAR:
1752             return NO
1753
1754     elif p.type in {syms.subscript, syms.sliceop}:
1755         # indexing
1756         if not prev:
1757             assert p.parent is not None, "subscripts are always parented"
1758             if p.parent.type == syms.subscriptlist:
1759                 return SPACE
1760
1761             return NO
1762
1763         elif not complex_subscript:
1764             return NO
1765
1766     elif p.type == syms.atom:
1767         if prev and t == token.DOT:
1768             # dots, but not the first one.
1769             return NO
1770
1771     elif p.type == syms.dictsetmaker:
1772         # dict unpacking
1773         if prev and prev.type == token.DOUBLESTAR:
1774             return NO
1775
1776     elif p.type in {syms.factor, syms.star_expr}:
1777         # unary ops
1778         if not prev:
1779             prevp = preceding_leaf(p)
1780             if not prevp or prevp.type in OPENING_BRACKETS:
1781                 return NO
1782
1783             prevp_parent = prevp.parent
1784             assert prevp_parent is not None
1785             if prevp.type == token.COLON and prevp_parent.type in {
1786                 syms.subscript,
1787                 syms.sliceop,
1788             }:
1789                 return NO
1790
1791             elif prevp.type == token.EQUAL and prevp_parent.type == syms.argument:
1792                 return NO
1793
1794         elif t in {token.NAME, token.NUMBER, token.STRING}:
1795             return NO
1796
1797     elif p.type == syms.import_from:
1798         if t == token.DOT:
1799             if prev and prev.type == token.DOT:
1800                 return NO
1801
1802         elif t == token.NAME:
1803             if v == "import":
1804                 return SPACE
1805
1806             if prev and prev.type == token.DOT:
1807                 return NO
1808
1809     elif p.type == syms.sliceop:
1810         return NO
1811
1812     return SPACE
1813
1814
1815 def preceding_leaf(node: Optional[LN]) -> Optional[Leaf]:
1816     """Return the first leaf that precedes `node`, if any."""
1817     while node:
1818         res = node.prev_sibling
1819         if res:
1820             if isinstance(res, Leaf):
1821                 return res
1822
1823             try:
1824                 return list(res.leaves())[-1]
1825
1826             except IndexError:
1827                 return None
1828
1829         node = node.parent
1830     return None
1831
1832
1833 def child_towards(ancestor: Node, descendant: LN) -> Optional[LN]:
1834     """Return the child of `ancestor` that contains `descendant`."""
1835     node: Optional[LN] = descendant
1836     while node and node.parent != ancestor:
1837         node = node.parent
1838     return node
1839
1840
1841 def container_of(leaf: Leaf) -> LN:
1842     """Return `leaf` or one of its ancestors that is the topmost container of it.
1843
1844     By "container" we mean a node where `leaf` is the very first child.
1845     """
1846     same_prefix = leaf.prefix
1847     container: LN = leaf
1848     while container:
1849         parent = container.parent
1850         if parent is None:
1851             break
1852
1853         if parent.children[0].prefix != same_prefix:
1854             break
1855
1856         if parent.type == syms.file_input:
1857             break
1858
1859         if parent.prev_sibling is not None and parent.prev_sibling.type in BRACKETS:
1860             break
1861
1862         container = parent
1863     return container
1864
1865
1866 def is_split_after_delimiter(leaf: Leaf, previous: Leaf = None) -> int:
1867     """Return the priority of the `leaf` delimiter, given a line break after it.
1868
1869     The delimiter priorities returned here are from those delimiters that would
1870     cause a line break after themselves.
1871
1872     Higher numbers are higher priority.
1873     """
1874     if leaf.type == token.COMMA:
1875         return COMMA_PRIORITY
1876
1877     return 0
1878
1879
1880 def is_split_before_delimiter(leaf: Leaf, previous: Leaf = None) -> int:
1881     """Return the priority of the `leaf` delimiter, given a line before after it.
1882
1883     The delimiter priorities returned here are from those delimiters that would
1884     cause a line break before themselves.
1885
1886     Higher numbers are higher priority.
1887     """
1888     if is_vararg(leaf, within=VARARGS_PARENTS | UNPACKING_PARENTS):
1889         # * and ** might also be MATH_OPERATORS but in this case they are not.
1890         # Don't treat them as a delimiter.
1891         return 0
1892
1893     if (
1894         leaf.type == token.DOT
1895         and leaf.parent
1896         and leaf.parent.type not in {syms.import_from, syms.dotted_name}
1897         and (previous is None or previous.type in CLOSING_BRACKETS)
1898     ):
1899         return DOT_PRIORITY
1900
1901     if (
1902         leaf.type in MATH_OPERATORS
1903         and leaf.parent
1904         and leaf.parent.type not in {syms.factor, syms.star_expr}
1905     ):
1906         return MATH_PRIORITIES[leaf.type]
1907
1908     if leaf.type in COMPARATORS:
1909         return COMPARATOR_PRIORITY
1910
1911     if (
1912         leaf.type == token.STRING
1913         and previous is not None
1914         and previous.type == token.STRING
1915     ):
1916         return STRING_PRIORITY
1917
1918     if leaf.type != token.NAME:
1919         return 0
1920
1921     if (
1922         leaf.value == "for"
1923         and leaf.parent
1924         and leaf.parent.type in {syms.comp_for, syms.old_comp_for}
1925     ):
1926         return COMPREHENSION_PRIORITY
1927
1928     if (
1929         leaf.value == "if"
1930         and leaf.parent
1931         and leaf.parent.type in {syms.comp_if, syms.old_comp_if}
1932     ):
1933         return COMPREHENSION_PRIORITY
1934
1935     if leaf.value in {"if", "else"} and leaf.parent and leaf.parent.type == syms.test:
1936         return TERNARY_PRIORITY
1937
1938     if leaf.value == "is":
1939         return COMPARATOR_PRIORITY
1940
1941     if (
1942         leaf.value == "in"
1943         and leaf.parent
1944         and leaf.parent.type in {syms.comp_op, syms.comparison}
1945         and not (
1946             previous is not None
1947             and previous.type == token.NAME
1948             and previous.value == "not"
1949         )
1950     ):
1951         return COMPARATOR_PRIORITY
1952
1953     if (
1954         leaf.value == "not"
1955         and leaf.parent
1956         and leaf.parent.type == syms.comp_op
1957         and not (
1958             previous is not None
1959             and previous.type == token.NAME
1960             and previous.value == "is"
1961         )
1962     ):
1963         return COMPARATOR_PRIORITY
1964
1965     if leaf.value in LOGIC_OPERATORS and leaf.parent:
1966         return LOGIC_PRIORITY
1967
1968     return 0
1969
1970
1971 FMT_OFF = {"# fmt: off", "# fmt:off", "# yapf: disable"}
1972 FMT_ON = {"# fmt: on", "# fmt:on", "# yapf: enable"}
1973
1974
1975 def generate_comments(leaf: LN) -> Iterator[Leaf]:
1976     """Clean the prefix of the `leaf` and generate comments from it, if any.
1977
1978     Comments in lib2to3 are shoved into the whitespace prefix.  This happens
1979     in `pgen2/driver.py:Driver.parse_tokens()`.  This was a brilliant implementation
1980     move because it does away with modifying the grammar to include all the
1981     possible places in which comments can be placed.
1982
1983     The sad consequence for us though is that comments don't "belong" anywhere.
1984     This is why this function generates simple parentless Leaf objects for
1985     comments.  We simply don't know what the correct parent should be.
1986
1987     No matter though, we can live without this.  We really only need to
1988     differentiate between inline and standalone comments.  The latter don't
1989     share the line with any code.
1990
1991     Inline comments are emitted as regular token.COMMENT leaves.  Standalone
1992     are emitted with a fake STANDALONE_COMMENT token identifier.
1993     """
1994     for pc in list_comments(leaf.prefix, is_endmarker=leaf.type == token.ENDMARKER):
1995         yield Leaf(pc.type, pc.value, prefix="\n" * pc.newlines)
1996
1997
1998 @dataclass
1999 class ProtoComment:
2000     type: int  # token.COMMENT or STANDALONE_COMMENT
2001     value: str  # content of the comment
2002     newlines: int  # how many newlines before the comment
2003     consumed: int  # how many characters of the original leaf's prefix did we consume
2004
2005
2006 @lru_cache(maxsize=4096)
2007 def list_comments(prefix: str, *, is_endmarker: bool) -> List[ProtoComment]:
2008     result: List[ProtoComment] = []
2009     if not prefix or "#" not in prefix:
2010         return result
2011
2012     consumed = 0
2013     nlines = 0
2014     for index, line in enumerate(prefix.split("\n")):
2015         consumed += len(line) + 1  # adding the length of the split '\n'
2016         line = line.lstrip()
2017         if not line:
2018             nlines += 1
2019         if not line.startswith("#"):
2020             continue
2021
2022         if index == 0 and not is_endmarker:
2023             comment_type = token.COMMENT  # simple trailing comment
2024         else:
2025             comment_type = STANDALONE_COMMENT
2026         comment = make_comment(line)
2027         result.append(
2028             ProtoComment(
2029                 type=comment_type, value=comment, newlines=nlines, consumed=consumed
2030             )
2031         )
2032         nlines = 0
2033     return result
2034
2035
2036 def make_comment(content: str) -> str:
2037     """Return a consistently formatted comment from the given `content` string.
2038
2039     All comments (except for "##", "#!", "#:") should have a single space between
2040     the hash sign and the content.
2041
2042     If `content` didn't start with a hash sign, one is provided.
2043     """
2044     content = content.rstrip()
2045     if not content:
2046         return "#"
2047
2048     if content[0] == "#":
2049         content = content[1:]
2050     if content and content[0] not in " !:#":
2051         content = " " + content
2052     return "#" + content
2053
2054
2055 def split_line(
2056     line: Line, line_length: int, inner: bool = False, py36: bool = False
2057 ) -> Iterator[Line]:
2058     """Split a `line` into potentially many lines.
2059
2060     They should fit in the allotted `line_length` but might not be able to.
2061     `inner` signifies that there were a pair of brackets somewhere around the
2062     current `line`, possibly transitively. This means we can fallback to splitting
2063     by delimiters if the LHS/RHS don't yield any results.
2064
2065     If `py36` is True, splitting may generate syntax that is only compatible
2066     with Python 3.6 and later.
2067     """
2068     if line.is_comment:
2069         yield line
2070         return
2071
2072     line_str = str(line).strip("\n")
2073     if not line.should_explode and is_line_short_enough(
2074         line, line_length=line_length, line_str=line_str
2075     ):
2076         yield line
2077         return
2078
2079     split_funcs: List[SplitFunc]
2080     if line.is_def:
2081         split_funcs = [left_hand_split]
2082     else:
2083
2084         def rhs(line: Line, py36: bool = False) -> Iterator[Line]:
2085             for omit in generate_trailers_to_omit(line, line_length):
2086                 lines = list(right_hand_split(line, line_length, py36, omit=omit))
2087                 if is_line_short_enough(lines[0], line_length=line_length):
2088                     yield from lines
2089                     return
2090
2091             # All splits failed, best effort split with no omits.
2092             # This mostly happens to multiline strings that are by definition
2093             # reported as not fitting a single line.
2094             yield from right_hand_split(line, py36)
2095
2096         if line.inside_brackets:
2097             split_funcs = [delimiter_split, standalone_comment_split, rhs]
2098         else:
2099             split_funcs = [rhs]
2100     for split_func in split_funcs:
2101         # We are accumulating lines in `result` because we might want to abort
2102         # mission and return the original line in the end, or attempt a different
2103         # split altogether.
2104         result: List[Line] = []
2105         try:
2106             for l in split_func(line, py36):
2107                 if str(l).strip("\n") == line_str:
2108                     raise CannotSplit("Split function returned an unchanged result")
2109
2110                 result.extend(
2111                     split_line(l, line_length=line_length, inner=True, py36=py36)
2112                 )
2113         except CannotSplit as cs:
2114             continue
2115
2116         else:
2117             yield from result
2118             break
2119
2120     else:
2121         yield line
2122
2123
2124 def left_hand_split(line: Line, py36: bool = False) -> Iterator[Line]:
2125     """Split line into many lines, starting with the first matching bracket pair.
2126
2127     Note: this usually looks weird, only use this for function definitions.
2128     Prefer RHS otherwise.  This is why this function is not symmetrical with
2129     :func:`right_hand_split` which also handles optional parentheses.
2130     """
2131     head = Line(depth=line.depth)
2132     body = Line(depth=line.depth + 1, inside_brackets=True)
2133     tail = Line(depth=line.depth)
2134     tail_leaves: List[Leaf] = []
2135     body_leaves: List[Leaf] = []
2136     head_leaves: List[Leaf] = []
2137     current_leaves = head_leaves
2138     matching_bracket = None
2139     for leaf in line.leaves:
2140         if (
2141             current_leaves is body_leaves
2142             and leaf.type in CLOSING_BRACKETS
2143             and leaf.opening_bracket is matching_bracket
2144         ):
2145             current_leaves = tail_leaves if body_leaves else head_leaves
2146         current_leaves.append(leaf)
2147         if current_leaves is head_leaves:
2148             if leaf.type in OPENING_BRACKETS:
2149                 matching_bracket = leaf
2150                 current_leaves = body_leaves
2151     # Since body is a new indent level, remove spurious leading whitespace.
2152     if body_leaves:
2153         normalize_prefix(body_leaves[0], inside_brackets=True)
2154     # Build the new lines.
2155     for result, leaves in (head, head_leaves), (body, body_leaves), (tail, tail_leaves):
2156         for leaf in leaves:
2157             result.append(leaf, preformatted=True)
2158             for comment_after in line.comments_after(leaf):
2159                 result.append(comment_after, preformatted=True)
2160     bracket_split_succeeded_or_raise(head, body, tail)
2161     for result in (head, body, tail):
2162         if result:
2163             yield result
2164
2165
2166 def right_hand_split(
2167     line: Line, line_length: int, py36: bool = False, omit: Collection[LeafID] = ()
2168 ) -> Iterator[Line]:
2169     """Split line into many lines, starting with the last matching bracket pair.
2170
2171     If the split was by optional parentheses, attempt splitting without them, too.
2172     `omit` is a collection of closing bracket IDs that shouldn't be considered for
2173     this split.
2174
2175     Note: running this function modifies `bracket_depth` on the leaves of `line`.
2176     """
2177     head = Line(depth=line.depth)
2178     body = Line(depth=line.depth + 1, inside_brackets=True)
2179     tail = Line(depth=line.depth)
2180     tail_leaves: List[Leaf] = []
2181     body_leaves: List[Leaf] = []
2182     head_leaves: List[Leaf] = []
2183     current_leaves = tail_leaves
2184     opening_bracket = None
2185     closing_bracket = None
2186     for leaf in reversed(line.leaves):
2187         if current_leaves is body_leaves:
2188             if leaf is opening_bracket:
2189                 current_leaves = head_leaves if body_leaves else tail_leaves
2190         current_leaves.append(leaf)
2191         if current_leaves is tail_leaves:
2192             if leaf.type in CLOSING_BRACKETS and id(leaf) not in omit:
2193                 opening_bracket = leaf.opening_bracket
2194                 closing_bracket = leaf
2195                 current_leaves = body_leaves
2196     tail_leaves.reverse()
2197     body_leaves.reverse()
2198     head_leaves.reverse()
2199     # Since body is a new indent level, remove spurious leading whitespace.
2200     if body_leaves:
2201         normalize_prefix(body_leaves[0], inside_brackets=True)
2202     if not head_leaves:
2203         # No `head` means the split failed. Either `tail` has all content or
2204         # the matching `opening_bracket` wasn't available on `line` anymore.
2205         raise CannotSplit("No brackets found")
2206
2207     # Build the new lines.
2208     for result, leaves in (head, head_leaves), (body, body_leaves), (tail, tail_leaves):
2209         for leaf in leaves:
2210             result.append(leaf, preformatted=True)
2211             for comment_after in line.comments_after(leaf):
2212                 result.append(comment_after, preformatted=True)
2213     assert opening_bracket and closing_bracket
2214     body.should_explode = should_explode(body, opening_bracket)
2215     bracket_split_succeeded_or_raise(head, body, tail)
2216     if (
2217         # the body shouldn't be exploded
2218         not body.should_explode
2219         # the opening bracket is an optional paren
2220         and opening_bracket.type == token.LPAR
2221         and not opening_bracket.value
2222         # the closing bracket is an optional paren
2223         and closing_bracket.type == token.RPAR
2224         and not closing_bracket.value
2225         # it's not an import (optional parens are the only thing we can split on
2226         # in this case; attempting a split without them is a waste of time)
2227         and not line.is_import
2228         # there are no standalone comments in the body
2229         and not body.contains_standalone_comments(0)
2230         # and we can actually remove the parens
2231         and can_omit_invisible_parens(body, line_length)
2232     ):
2233         omit = {id(closing_bracket), *omit}
2234         try:
2235             yield from right_hand_split(line, line_length, py36=py36, omit=omit)
2236             return
2237
2238         except CannotSplit:
2239             if not (
2240                 can_be_split(body)
2241                 or is_line_short_enough(body, line_length=line_length)
2242             ):
2243                 raise CannotSplit(
2244                     "Splitting failed, body is still too long and can't be split."
2245                 )
2246
2247             elif head.contains_multiline_strings() or tail.contains_multiline_strings():
2248                 raise CannotSplit(
2249                     "The current optional pair of parentheses is bound to fail to "
2250                     "satisfy the splitting algorithm because the head or the tail "
2251                     "contains multiline strings which by definition never fit one "
2252                     "line."
2253                 )
2254
2255     ensure_visible(opening_bracket)
2256     ensure_visible(closing_bracket)
2257     for result in (head, body, tail):
2258         if result:
2259             yield result
2260
2261
2262 def bracket_split_succeeded_or_raise(head: Line, body: Line, tail: Line) -> None:
2263     """Raise :exc:`CannotSplit` if the last left- or right-hand split failed.
2264
2265     Do nothing otherwise.
2266
2267     A left- or right-hand split is based on a pair of brackets. Content before
2268     (and including) the opening bracket is left on one line, content inside the
2269     brackets is put on a separate line, and finally content starting with and
2270     following the closing bracket is put on a separate line.
2271
2272     Those are called `head`, `body`, and `tail`, respectively. If the split
2273     produced the same line (all content in `head`) or ended up with an empty `body`
2274     and the `tail` is just the closing bracket, then it's considered failed.
2275     """
2276     tail_len = len(str(tail).strip())
2277     if not body:
2278         if tail_len == 0:
2279             raise CannotSplit("Splitting brackets produced the same line")
2280
2281         elif tail_len < 3:
2282             raise CannotSplit(
2283                 f"Splitting brackets on an empty body to save "
2284                 f"{tail_len} characters is not worth it"
2285             )
2286
2287
2288 def dont_increase_indentation(split_func: SplitFunc) -> SplitFunc:
2289     """Normalize prefix of the first leaf in every line returned by `split_func`.
2290
2291     This is a decorator over relevant split functions.
2292     """
2293
2294     @wraps(split_func)
2295     def split_wrapper(line: Line, py36: bool = False) -> Iterator[Line]:
2296         for l in split_func(line, py36):
2297             normalize_prefix(l.leaves[0], inside_brackets=True)
2298             yield l
2299
2300     return split_wrapper
2301
2302
2303 @dont_increase_indentation
2304 def delimiter_split(line: Line, py36: bool = False) -> Iterator[Line]:
2305     """Split according to delimiters of the highest priority.
2306
2307     If `py36` is True, the split will add trailing commas also in function
2308     signatures that contain `*` and `**`.
2309     """
2310     try:
2311         last_leaf = line.leaves[-1]
2312     except IndexError:
2313         raise CannotSplit("Line empty")
2314
2315     bt = line.bracket_tracker
2316     try:
2317         delimiter_priority = bt.max_delimiter_priority(exclude={id(last_leaf)})
2318     except ValueError:
2319         raise CannotSplit("No delimiters found")
2320
2321     if delimiter_priority == DOT_PRIORITY:
2322         if bt.delimiter_count_with_priority(delimiter_priority) == 1:
2323             raise CannotSplit("Splitting a single attribute from its owner looks wrong")
2324
2325     current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2326     lowest_depth = sys.maxsize
2327     trailing_comma_safe = True
2328
2329     def append_to_line(leaf: Leaf) -> Iterator[Line]:
2330         """Append `leaf` to current line or to new line if appending impossible."""
2331         nonlocal current_line
2332         try:
2333             current_line.append_safe(leaf, preformatted=True)
2334         except ValueError as ve:
2335             yield current_line
2336
2337             current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2338             current_line.append(leaf)
2339
2340     for index, leaf in enumerate(line.leaves):
2341         yield from append_to_line(leaf)
2342
2343         for comment_after in line.comments_after(leaf, index):
2344             yield from append_to_line(comment_after)
2345
2346         lowest_depth = min(lowest_depth, leaf.bracket_depth)
2347         if leaf.bracket_depth == lowest_depth and is_vararg(
2348             leaf, within=VARARGS_PARENTS
2349         ):
2350             trailing_comma_safe = trailing_comma_safe and py36
2351         leaf_priority = bt.delimiters.get(id(leaf))
2352         if leaf_priority == delimiter_priority:
2353             yield current_line
2354
2355             current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2356     if current_line:
2357         if (
2358             trailing_comma_safe
2359             and delimiter_priority == COMMA_PRIORITY
2360             and current_line.leaves[-1].type != token.COMMA
2361             and current_line.leaves[-1].type != STANDALONE_COMMENT
2362         ):
2363             current_line.append(Leaf(token.COMMA, ","))
2364         yield current_line
2365
2366
2367 @dont_increase_indentation
2368 def standalone_comment_split(line: Line, py36: bool = False) -> Iterator[Line]:
2369     """Split standalone comments from the rest of the line."""
2370     if not line.contains_standalone_comments(0):
2371         raise CannotSplit("Line does not have any standalone comments")
2372
2373     current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2374
2375     def append_to_line(leaf: Leaf) -> Iterator[Line]:
2376         """Append `leaf` to current line or to new line if appending impossible."""
2377         nonlocal current_line
2378         try:
2379             current_line.append_safe(leaf, preformatted=True)
2380         except ValueError as ve:
2381             yield current_line
2382
2383             current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2384             current_line.append(leaf)
2385
2386     for index, leaf in enumerate(line.leaves):
2387         yield from append_to_line(leaf)
2388
2389         for comment_after in line.comments_after(leaf, index):
2390             yield from append_to_line(comment_after)
2391
2392     if current_line:
2393         yield current_line
2394
2395
2396 def is_import(leaf: Leaf) -> bool:
2397     """Return True if the given leaf starts an import statement."""
2398     p = leaf.parent
2399     t = leaf.type
2400     v = leaf.value
2401     return bool(
2402         t == token.NAME
2403         and (
2404             (v == "import" and p and p.type == syms.import_name)
2405             or (v == "from" and p and p.type == syms.import_from)
2406         )
2407     )
2408
2409
2410 def normalize_prefix(leaf: Leaf, *, inside_brackets: bool) -> None:
2411     """Leave existing extra newlines if not `inside_brackets`. Remove everything
2412     else.
2413
2414     Note: don't use backslashes for formatting or you'll lose your voting rights.
2415     """
2416     if not inside_brackets:
2417         spl = leaf.prefix.split("#")
2418         if "\\" not in spl[0]:
2419             nl_count = spl[-1].count("\n")
2420             if len(spl) > 1:
2421                 nl_count -= 1
2422             leaf.prefix = "\n" * nl_count
2423             return
2424
2425     leaf.prefix = ""
2426
2427
2428 def normalize_string_prefix(leaf: Leaf, remove_u_prefix: bool = False) -> None:
2429     """Make all string prefixes lowercase.
2430
2431     If remove_u_prefix is given, also removes any u prefix from the string.
2432
2433     Note: Mutates its argument.
2434     """
2435     match = re.match(r"^([furbFURB]*)(.*)$", leaf.value, re.DOTALL)
2436     assert match is not None, f"failed to match string {leaf.value!r}"
2437     orig_prefix = match.group(1)
2438     new_prefix = orig_prefix.lower()
2439     if remove_u_prefix:
2440         new_prefix = new_prefix.replace("u", "")
2441     leaf.value = f"{new_prefix}{match.group(2)}"
2442
2443
2444 def normalize_string_quotes(leaf: Leaf) -> None:
2445     """Prefer double quotes but only if it doesn't cause more escaping.
2446
2447     Adds or removes backslashes as appropriate. Doesn't parse and fix
2448     strings nested in f-strings (yet).
2449
2450     Note: Mutates its argument.
2451     """
2452     value = leaf.value.lstrip("furbFURB")
2453     if value[:3] == '"""':
2454         return
2455
2456     elif value[:3] == "'''":
2457         orig_quote = "'''"
2458         new_quote = '"""'
2459     elif value[0] == '"':
2460         orig_quote = '"'
2461         new_quote = "'"
2462     else:
2463         orig_quote = "'"
2464         new_quote = '"'
2465     first_quote_pos = leaf.value.find(orig_quote)
2466     if first_quote_pos == -1:
2467         return  # There's an internal error
2468
2469     prefix = leaf.value[:first_quote_pos]
2470     unescaped_new_quote = re.compile(rf"(([^\\]|^)(\\\\)*){new_quote}")
2471     escaped_new_quote = re.compile(rf"([^\\]|^)\\((?:\\\\)*){new_quote}")
2472     escaped_orig_quote = re.compile(rf"([^\\]|^)\\((?:\\\\)*){orig_quote}")
2473     body = leaf.value[first_quote_pos + len(orig_quote) : -len(orig_quote)]
2474     if "r" in prefix.casefold():
2475         if unescaped_new_quote.search(body):
2476             # There's at least one unescaped new_quote in this raw string
2477             # so converting is impossible
2478             return
2479
2480         # Do not introduce or remove backslashes in raw strings
2481         new_body = body
2482     else:
2483         # remove unnecessary escapes
2484         new_body = sub_twice(escaped_new_quote, rf"\1\2{new_quote}", body)
2485         if body != new_body:
2486             # Consider the string without unnecessary escapes as the original
2487             body = new_body
2488             leaf.value = f"{prefix}{orig_quote}{body}{orig_quote}"
2489         new_body = sub_twice(escaped_orig_quote, rf"\1\2{orig_quote}", new_body)
2490         new_body = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_body)
2491     if "f" in prefix.casefold():
2492         matches = re.findall(r"[^{]\{(.*?)\}[^}]", new_body)
2493         for m in matches:
2494             if "\\" in str(m):
2495                 # Do not introduce backslashes in interpolated expressions
2496                 return
2497     if new_quote == '"""' and new_body[-1:] == '"':
2498         # edge case:
2499         new_body = new_body[:-1] + '\\"'
2500     orig_escape_count = body.count("\\")
2501     new_escape_count = new_body.count("\\")
2502     if new_escape_count > orig_escape_count:
2503         return  # Do not introduce more escaping
2504
2505     if new_escape_count == orig_escape_count and orig_quote == '"':
2506         return  # Prefer double quotes
2507
2508     leaf.value = f"{prefix}{new_quote}{new_body}{new_quote}"
2509
2510
2511 def normalize_numeric_literal(leaf: Leaf, allow_underscores: bool) -> None:
2512     """Normalizes numeric (float, int, and complex) literals."""
2513     # We want all letters (e in exponents, j in complex literals, a-f
2514     # in hex literals) to be lowercase.
2515     text = leaf.value.lower()
2516     if text.startswith(("0o", "0x", "0b")):
2517         # Leave octal, hex, and binary literals alone for now.
2518         pass
2519     elif "e" in text:
2520         before, after = text.split("e")
2521         if after.startswith("-"):
2522             after = after[1:]
2523             sign = "-"
2524         elif after.startswith("+"):
2525             after = after[1:]
2526             sign = ""
2527         else:
2528             sign = ""
2529         before = format_float_or_int_string(before, allow_underscores)
2530         after = format_int_string(after, allow_underscores)
2531         text = f"{before}e{sign}{after}"
2532     # Complex numbers and Python 2 longs
2533     elif "j" in text or "l" in text:
2534         number = text[:-1]
2535         suffix = text[-1]
2536         text = f"{format_float_or_int_string(number, allow_underscores)}{suffix}"
2537     else:
2538         text = format_float_or_int_string(text, allow_underscores)
2539     leaf.value = text
2540
2541
2542 def format_float_or_int_string(text: str, allow_underscores: bool) -> str:
2543     """Formats a float string like "1.0"."""
2544     if "." not in text:
2545         return format_int_string(text, allow_underscores)
2546     before, after = text.split(".")
2547     before = format_int_string(before, allow_underscores) if before else "0"
2548     after = format_int_string(after, allow_underscores) if after else "0"
2549     return f"{before}.{after}"
2550
2551
2552 def format_int_string(text: str, allow_underscores: bool) -> str:
2553     """Normalizes underscores in a string to e.g. 1_000_000.
2554
2555     Input must be a string consisting only of digits and underscores.
2556     """
2557     if not allow_underscores:
2558         return text
2559     text = text.replace("_", "")
2560     if len(text) <= 6:
2561         # No underscores for numbers <= 6 digits long.
2562         return text
2563     return format(int(text), "3_")
2564
2565
2566 def normalize_invisible_parens(node: Node, parens_after: Set[str]) -> None:
2567     """Make existing optional parentheses invisible or create new ones.
2568
2569     `parens_after` is a set of string leaf values immeditely after which parens
2570     should be put.
2571
2572     Standardizes on visible parentheses for single-element tuples, and keeps
2573     existing visible parentheses for other tuples and generator expressions.
2574     """
2575     for pc in list_comments(node.prefix, is_endmarker=False):
2576         if pc.value in FMT_OFF:
2577             # This `node` has a prefix with `# fmt: off`, don't mess with parens.
2578             return
2579
2580     check_lpar = False
2581     for index, child in enumerate(list(node.children)):
2582         if check_lpar:
2583             if child.type == syms.atom:
2584                 maybe_make_parens_invisible_in_atom(child)
2585             elif is_one_tuple(child):
2586                 # wrap child in visible parentheses
2587                 lpar = Leaf(token.LPAR, "(")
2588                 rpar = Leaf(token.RPAR, ")")
2589                 child.remove()
2590                 node.insert_child(index, Node(syms.atom, [lpar, child, rpar]))
2591             elif node.type == syms.import_from:
2592                 # "import from" nodes store parentheses directly as part of
2593                 # the statement
2594                 if child.type == token.LPAR:
2595                     # make parentheses invisible
2596                     child.value = ""  # type: ignore
2597                     node.children[-1].value = ""  # type: ignore
2598                 elif child.type != token.STAR:
2599                     # insert invisible parentheses
2600                     node.insert_child(index, Leaf(token.LPAR, ""))
2601                     node.append_child(Leaf(token.RPAR, ""))
2602                 break
2603
2604             elif not (isinstance(child, Leaf) and is_multiline_string(child)):
2605                 # wrap child in invisible parentheses
2606                 lpar = Leaf(token.LPAR, "")
2607                 rpar = Leaf(token.RPAR, "")
2608                 index = child.remove() or 0
2609                 node.insert_child(index, Node(syms.atom, [lpar, child, rpar]))
2610
2611         check_lpar = isinstance(child, Leaf) and child.value in parens_after
2612
2613
2614 def normalize_fmt_off(node: Node) -> None:
2615     """Convert content between `# fmt: off`/`# fmt: on` into standalone comments."""
2616     try_again = True
2617     while try_again:
2618         try_again = convert_one_fmt_off_pair(node)
2619
2620
2621 def convert_one_fmt_off_pair(node: Node) -> bool:
2622     """Convert content of a single `# fmt: off`/`# fmt: on` into a standalone comment.
2623
2624     Returns True if a pair was converted.
2625     """
2626     for leaf in node.leaves():
2627         previous_consumed = 0
2628         for comment in list_comments(leaf.prefix, is_endmarker=False):
2629             if comment.value in FMT_OFF:
2630                 # We only want standalone comments. If there's no previous leaf or
2631                 # the previous leaf is indentation, it's a standalone comment in
2632                 # disguise.
2633                 if comment.type != STANDALONE_COMMENT:
2634                     prev = preceding_leaf(leaf)
2635                     if prev and prev.type not in WHITESPACE:
2636                         continue
2637
2638                 ignored_nodes = list(generate_ignored_nodes(leaf))
2639                 if not ignored_nodes:
2640                     continue
2641
2642                 first = ignored_nodes[0]  # Can be a container node with the `leaf`.
2643                 parent = first.parent
2644                 prefix = first.prefix
2645                 first.prefix = prefix[comment.consumed :]
2646                 hidden_value = (
2647                     comment.value + "\n" + "".join(str(n) for n in ignored_nodes)
2648                 )
2649                 if hidden_value.endswith("\n"):
2650                     # That happens when one of the `ignored_nodes` ended with a NEWLINE
2651                     # leaf (possibly followed by a DEDENT).
2652                     hidden_value = hidden_value[:-1]
2653                 first_idx = None
2654                 for ignored in ignored_nodes:
2655                     index = ignored.remove()
2656                     if first_idx is None:
2657                         first_idx = index
2658                 assert parent is not None, "INTERNAL ERROR: fmt: on/off handling (1)"
2659                 assert first_idx is not None, "INTERNAL ERROR: fmt: on/off handling (2)"
2660                 parent.insert_child(
2661                     first_idx,
2662                     Leaf(
2663                         STANDALONE_COMMENT,
2664                         hidden_value,
2665                         prefix=prefix[:previous_consumed] + "\n" * comment.newlines,
2666                     ),
2667                 )
2668                 return True
2669
2670             previous_consumed = comment.consumed
2671
2672     return False
2673
2674
2675 def generate_ignored_nodes(leaf: Leaf) -> Iterator[LN]:
2676     """Starting from the container of `leaf`, generate all leaves until `# fmt: on`.
2677
2678     Stops at the end of the block.
2679     """
2680     container: Optional[LN] = container_of(leaf)
2681     while container is not None and container.type != token.ENDMARKER:
2682         for comment in list_comments(container.prefix, is_endmarker=False):
2683             if comment.value in FMT_ON:
2684                 return
2685
2686         yield container
2687
2688         container = container.next_sibling
2689
2690
2691 def maybe_make_parens_invisible_in_atom(node: LN) -> bool:
2692     """If it's safe, make the parens in the atom `node` invisible, recursively."""
2693     if (
2694         node.type != syms.atom
2695         or is_empty_tuple(node)
2696         or is_one_tuple(node)
2697         or is_yield(node)
2698         or max_delimiter_priority_in_atom(node) >= COMMA_PRIORITY
2699     ):
2700         return False
2701
2702     first = node.children[0]
2703     last = node.children[-1]
2704     if first.type == token.LPAR and last.type == token.RPAR:
2705         # make parentheses invisible
2706         first.value = ""  # type: ignore
2707         last.value = ""  # type: ignore
2708         if len(node.children) > 1:
2709             maybe_make_parens_invisible_in_atom(node.children[1])
2710         return True
2711
2712     return False
2713
2714
2715 def is_empty_tuple(node: LN) -> bool:
2716     """Return True if `node` holds an empty tuple."""
2717     return (
2718         node.type == syms.atom
2719         and len(node.children) == 2
2720         and node.children[0].type == token.LPAR
2721         and node.children[1].type == token.RPAR
2722     )
2723
2724
2725 def is_one_tuple(node: LN) -> bool:
2726     """Return True if `node` holds a tuple with one element, with or without parens."""
2727     if node.type == syms.atom:
2728         if len(node.children) != 3:
2729             return False
2730
2731         lpar, gexp, rpar = node.children
2732         if not (
2733             lpar.type == token.LPAR
2734             and gexp.type == syms.testlist_gexp
2735             and rpar.type == token.RPAR
2736         ):
2737             return False
2738
2739         return len(gexp.children) == 2 and gexp.children[1].type == token.COMMA
2740
2741     return (
2742         node.type in IMPLICIT_TUPLE
2743         and len(node.children) == 2
2744         and node.children[1].type == token.COMMA
2745     )
2746
2747
2748 def is_yield(node: LN) -> bool:
2749     """Return True if `node` holds a `yield` or `yield from` expression."""
2750     if node.type == syms.yield_expr:
2751         return True
2752
2753     if node.type == token.NAME and node.value == "yield":  # type: ignore
2754         return True
2755
2756     if node.type != syms.atom:
2757         return False
2758
2759     if len(node.children) != 3:
2760         return False
2761
2762     lpar, expr, rpar = node.children
2763     if lpar.type == token.LPAR and rpar.type == token.RPAR:
2764         return is_yield(expr)
2765
2766     return False
2767
2768
2769 def is_vararg(leaf: Leaf, within: Set[NodeType]) -> bool:
2770     """Return True if `leaf` is a star or double star in a vararg or kwarg.
2771
2772     If `within` includes VARARGS_PARENTS, this applies to function signatures.
2773     If `within` includes UNPACKING_PARENTS, it applies to right hand-side
2774     extended iterable unpacking (PEP 3132) and additional unpacking
2775     generalizations (PEP 448).
2776     """
2777     if leaf.type not in STARS or not leaf.parent:
2778         return False
2779
2780     p = leaf.parent
2781     if p.type == syms.star_expr:
2782         # Star expressions are also used as assignment targets in extended
2783         # iterable unpacking (PEP 3132).  See what its parent is instead.
2784         if not p.parent:
2785             return False
2786
2787         p = p.parent
2788
2789     return p.type in within
2790
2791
2792 def is_multiline_string(leaf: Leaf) -> bool:
2793     """Return True if `leaf` is a multiline string that actually spans many lines."""
2794     value = leaf.value.lstrip("furbFURB")
2795     return value[:3] in {'"""', "'''"} and "\n" in value
2796
2797
2798 def is_stub_suite(node: Node) -> bool:
2799     """Return True if `node` is a suite with a stub body."""
2800     if (
2801         len(node.children) != 4
2802         or node.children[0].type != token.NEWLINE
2803         or node.children[1].type != token.INDENT
2804         or node.children[3].type != token.DEDENT
2805     ):
2806         return False
2807
2808     return is_stub_body(node.children[2])
2809
2810
2811 def is_stub_body(node: LN) -> bool:
2812     """Return True if `node` is a simple statement containing an ellipsis."""
2813     if not isinstance(node, Node) or node.type != syms.simple_stmt:
2814         return False
2815
2816     if len(node.children) != 2:
2817         return False
2818
2819     child = node.children[0]
2820     return (
2821         child.type == syms.atom
2822         and len(child.children) == 3
2823         and all(leaf == Leaf(token.DOT, ".") for leaf in child.children)
2824     )
2825
2826
2827 def max_delimiter_priority_in_atom(node: LN) -> int:
2828     """Return maximum delimiter priority inside `node`.
2829
2830     This is specific to atoms with contents contained in a pair of parentheses.
2831     If `node` isn't an atom or there are no enclosing parentheses, returns 0.
2832     """
2833     if node.type != syms.atom:
2834         return 0
2835
2836     first = node.children[0]
2837     last = node.children[-1]
2838     if not (first.type == token.LPAR and last.type == token.RPAR):
2839         return 0
2840
2841     bt = BracketTracker()
2842     for c in node.children[1:-1]:
2843         if isinstance(c, Leaf):
2844             bt.mark(c)
2845         else:
2846             for leaf in c.leaves():
2847                 bt.mark(leaf)
2848     try:
2849         return bt.max_delimiter_priority()
2850
2851     except ValueError:
2852         return 0
2853
2854
2855 def ensure_visible(leaf: Leaf) -> None:
2856     """Make sure parentheses are visible.
2857
2858     They could be invisible as part of some statements (see
2859     :func:`normalize_invible_parens` and :func:`visit_import_from`).
2860     """
2861     if leaf.type == token.LPAR:
2862         leaf.value = "("
2863     elif leaf.type == token.RPAR:
2864         leaf.value = ")"
2865
2866
2867 def should_explode(line: Line, opening_bracket: Leaf) -> bool:
2868     """Should `line` immediately be split with `delimiter_split()` after RHS?"""
2869     if not (
2870         opening_bracket.parent
2871         and opening_bracket.parent.type in {syms.atom, syms.import_from}
2872         and opening_bracket.value in "[{("
2873     ):
2874         return False
2875
2876     try:
2877         last_leaf = line.leaves[-1]
2878         exclude = {id(last_leaf)} if last_leaf.type == token.COMMA else set()
2879         max_priority = line.bracket_tracker.max_delimiter_priority(exclude=exclude)
2880     except (IndexError, ValueError):
2881         return False
2882
2883     return max_priority == COMMA_PRIORITY
2884
2885
2886 def is_python36(node: Node) -> bool:
2887     """Return True if the current file is using Python 3.6+ features.
2888
2889     Currently looking for:
2890     - f-strings; and
2891     - trailing commas after * or ** in function signatures and calls.
2892     """
2893     for n in node.pre_order():
2894         if n.type == token.STRING:
2895             value_head = n.value[:2]  # type: ignore
2896             if value_head in {'f"', 'F"', "f'", "F'", "rf", "fr", "RF", "FR"}:
2897                 return True
2898
2899         elif (
2900             n.type in {syms.typedargslist, syms.arglist}
2901             and n.children
2902             and n.children[-1].type == token.COMMA
2903         ):
2904             for ch in n.children:
2905                 if ch.type in STARS:
2906                     return True
2907
2908                 if ch.type == syms.argument:
2909                     for argch in ch.children:
2910                         if argch.type in STARS:
2911                             return True
2912
2913     return False
2914
2915
2916 def generate_trailers_to_omit(line: Line, line_length: int) -> Iterator[Set[LeafID]]:
2917     """Generate sets of closing bracket IDs that should be omitted in a RHS.
2918
2919     Brackets can be omitted if the entire trailer up to and including
2920     a preceding closing bracket fits in one line.
2921
2922     Yielded sets are cumulative (contain results of previous yields, too).  First
2923     set is empty.
2924     """
2925
2926     omit: Set[LeafID] = set()
2927     yield omit
2928
2929     length = 4 * line.depth
2930     opening_bracket = None
2931     closing_bracket = None
2932     optional_brackets: Set[LeafID] = set()
2933     inner_brackets: Set[LeafID] = set()
2934     for index, leaf, leaf_length in enumerate_with_length(line, reversed=True):
2935         length += leaf_length
2936         if length > line_length:
2937             break
2938
2939         has_inline_comment = leaf_length > len(leaf.value) + len(leaf.prefix)
2940         if leaf.type == STANDALONE_COMMENT or has_inline_comment:
2941             break
2942
2943         optional_brackets.discard(id(leaf))
2944         if opening_bracket:
2945             if leaf is opening_bracket:
2946                 opening_bracket = None
2947             elif leaf.type in CLOSING_BRACKETS:
2948                 inner_brackets.add(id(leaf))
2949         elif leaf.type in CLOSING_BRACKETS:
2950             if not leaf.value:
2951                 optional_brackets.add(id(opening_bracket))
2952                 continue
2953
2954             if index > 0 and line.leaves[index - 1].type in OPENING_BRACKETS:
2955                 # Empty brackets would fail a split so treat them as "inner"
2956                 # brackets (e.g. only add them to the `omit` set if another
2957                 # pair of brackets was good enough.
2958                 inner_brackets.add(id(leaf))
2959                 continue
2960
2961             opening_bracket = leaf.opening_bracket
2962             if closing_bracket:
2963                 omit.add(id(closing_bracket))
2964                 omit.update(inner_brackets)
2965                 inner_brackets.clear()
2966                 yield omit
2967             closing_bracket = leaf
2968
2969
2970 def get_future_imports(node: Node) -> Set[str]:
2971     """Return a set of __future__ imports in the file."""
2972     imports: Set[str] = set()
2973
2974     def get_imports_from_children(children: List[LN]) -> Generator[str, None, None]:
2975         for child in children:
2976             if isinstance(child, Leaf):
2977                 if child.type == token.NAME:
2978                     yield child.value
2979             elif child.type == syms.import_as_name:
2980                 orig_name = child.children[0]
2981                 assert isinstance(orig_name, Leaf), "Invalid syntax parsing imports"
2982                 assert orig_name.type == token.NAME, "Invalid syntax parsing imports"
2983                 yield orig_name.value
2984             elif child.type == syms.import_as_names:
2985                 yield from get_imports_from_children(child.children)
2986             else:
2987                 assert False, "Invalid syntax parsing imports"
2988
2989     for child in node.children:
2990         if child.type != syms.simple_stmt:
2991             break
2992         first_child = child.children[0]
2993         if isinstance(first_child, Leaf):
2994             # Continue looking if we see a docstring; otherwise stop.
2995             if (
2996                 len(child.children) == 2
2997                 and first_child.type == token.STRING
2998                 and child.children[1].type == token.NEWLINE
2999             ):
3000                 continue
3001             else:
3002                 break
3003         elif first_child.type == syms.import_from:
3004             module_name = first_child.children[1]
3005             if not isinstance(module_name, Leaf) or module_name.value != "__future__":
3006                 break
3007             imports |= set(get_imports_from_children(first_child.children[3:]))
3008         else:
3009             break
3010     return imports
3011
3012
3013 def gen_python_files_in_dir(
3014     path: Path,
3015     root: Path,
3016     include: Pattern[str],
3017     exclude: Pattern[str],
3018     report: "Report",
3019 ) -> Iterator[Path]:
3020     """Generate all files under `path` whose paths are not excluded by the
3021     `exclude` regex, but are included by the `include` regex.
3022
3023     Symbolic links pointing outside of the `root` directory are ignored.
3024
3025     `report` is where output about exclusions goes.
3026     """
3027     assert root.is_absolute(), f"INTERNAL ERROR: `root` must be absolute but is {root}"
3028     for child in path.iterdir():
3029         try:
3030             normalized_path = "/" + child.resolve().relative_to(root).as_posix()
3031         except ValueError:
3032             if child.is_symlink():
3033                 report.path_ignored(
3034                     child, f"is a symbolic link that points outside {root}"
3035                 )
3036                 continue
3037
3038             raise
3039
3040         if child.is_dir():
3041             normalized_path += "/"
3042         exclude_match = exclude.search(normalized_path)
3043         if exclude_match and exclude_match.group(0):
3044             report.path_ignored(child, f"matches the --exclude regular expression")
3045             continue
3046
3047         if child.is_dir():
3048             yield from gen_python_files_in_dir(child, root, include, exclude, report)
3049
3050         elif child.is_file():
3051             include_match = include.search(normalized_path)
3052             if include_match:
3053                 yield child
3054
3055
3056 @lru_cache()
3057 def find_project_root(srcs: Iterable[str]) -> Path:
3058     """Return a directory containing .git, .hg, or pyproject.toml.
3059
3060     That directory can be one of the directories passed in `srcs` or their
3061     common parent.
3062
3063     If no directory in the tree contains a marker that would specify it's the
3064     project root, the root of the file system is returned.
3065     """
3066     if not srcs:
3067         return Path("/").resolve()
3068
3069     common_base = min(Path(src).resolve() for src in srcs)
3070     if common_base.is_dir():
3071         # Append a fake file so `parents` below returns `common_base_dir`, too.
3072         common_base /= "fake-file"
3073     for directory in common_base.parents:
3074         if (directory / ".git").is_dir():
3075             return directory
3076
3077         if (directory / ".hg").is_dir():
3078             return directory
3079
3080         if (directory / "pyproject.toml").is_file():
3081             return directory
3082
3083     return directory
3084
3085
3086 @dataclass
3087 class Report:
3088     """Provides a reformatting counter. Can be rendered with `str(report)`."""
3089
3090     check: bool = False
3091     quiet: bool = False
3092     verbose: bool = False
3093     change_count: int = 0
3094     same_count: int = 0
3095     failure_count: int = 0
3096
3097     def done(self, src: Path, changed: Changed) -> None:
3098         """Increment the counter for successful reformatting. Write out a message."""
3099         if changed is Changed.YES:
3100             reformatted = "would reformat" if self.check else "reformatted"
3101             if self.verbose or not self.quiet:
3102                 out(f"{reformatted} {src}")
3103             self.change_count += 1
3104         else:
3105             if self.verbose:
3106                 if changed is Changed.NO:
3107                     msg = f"{src} already well formatted, good job."
3108                 else:
3109                     msg = f"{src} wasn't modified on disk since last run."
3110                 out(msg, bold=False)
3111             self.same_count += 1
3112
3113     def failed(self, src: Path, message: str) -> None:
3114         """Increment the counter for failed reformatting. Write out a message."""
3115         err(f"error: cannot format {src}: {message}")
3116         self.failure_count += 1
3117
3118     def path_ignored(self, path: Path, message: str) -> None:
3119         if self.verbose:
3120             out(f"{path} ignored: {message}", bold=False)
3121
3122     @property
3123     def return_code(self) -> int:
3124         """Return the exit code that the app should use.
3125
3126         This considers the current state of changed files and failures:
3127         - if there were any failures, return 123;
3128         - if any files were changed and --check is being used, return 1;
3129         - otherwise return 0.
3130         """
3131         # According to http://tldp.org/LDP/abs/html/exitcodes.html starting with
3132         # 126 we have special returncodes reserved by the shell.
3133         if self.failure_count:
3134             return 123
3135
3136         elif self.change_count and self.check:
3137             return 1
3138
3139         return 0
3140
3141     def __str__(self) -> str:
3142         """Render a color report of the current state.
3143
3144         Use `click.unstyle` to remove colors.
3145         """
3146         if self.check:
3147             reformatted = "would be reformatted"
3148             unchanged = "would be left unchanged"
3149             failed = "would fail to reformat"
3150         else:
3151             reformatted = "reformatted"
3152             unchanged = "left unchanged"
3153             failed = "failed to reformat"
3154         report = []
3155         if self.change_count:
3156             s = "s" if self.change_count > 1 else ""
3157             report.append(
3158                 click.style(f"{self.change_count} file{s} {reformatted}", bold=True)
3159             )
3160         if self.same_count:
3161             s = "s" if self.same_count > 1 else ""
3162             report.append(f"{self.same_count} file{s} {unchanged}")
3163         if self.failure_count:
3164             s = "s" if self.failure_count > 1 else ""
3165             report.append(
3166                 click.style(f"{self.failure_count} file{s} {failed}", fg="red")
3167             )
3168         return ", ".join(report) + "."
3169
3170
3171 def assert_equivalent(src: str, dst: str) -> None:
3172     """Raise AssertionError if `src` and `dst` aren't equivalent."""
3173
3174     import ast
3175     import traceback
3176
3177     def _v(node: ast.AST, depth: int = 0) -> Iterator[str]:
3178         """Simple visitor generating strings to compare ASTs by content."""
3179         yield f"{'  ' * depth}{node.__class__.__name__}("
3180
3181         for field in sorted(node._fields):
3182             try:
3183                 value = getattr(node, field)
3184             except AttributeError:
3185                 continue
3186
3187             yield f"{'  ' * (depth+1)}{field}="
3188
3189             if isinstance(value, list):
3190                 for item in value:
3191                     if isinstance(item, ast.AST):
3192                         yield from _v(item, depth + 2)
3193
3194             elif isinstance(value, ast.AST):
3195                 yield from _v(value, depth + 2)
3196
3197             else:
3198                 yield f"{'  ' * (depth+2)}{value!r},  # {value.__class__.__name__}"
3199
3200         yield f"{'  ' * depth})  # /{node.__class__.__name__}"
3201
3202     try:
3203         src_ast = ast.parse(src)
3204     except Exception as exc:
3205         major, minor = sys.version_info[:2]
3206         raise AssertionError(
3207             f"cannot use --safe with this file; failed to parse source file "
3208             f"with Python {major}.{minor}'s builtin AST. Re-run with --fast "
3209             f"or stop using deprecated Python 2 syntax. AST error message: {exc}"
3210         )
3211
3212     try:
3213         dst_ast = ast.parse(dst)
3214     except Exception as exc:
3215         log = dump_to_file("".join(traceback.format_tb(exc.__traceback__)), dst)
3216         raise AssertionError(
3217             f"INTERNAL ERROR: Black produced invalid code: {exc}. "
3218             f"Please report a bug on https://github.com/ambv/black/issues.  "
3219             f"This invalid output might be helpful: {log}"
3220         ) from None
3221
3222     src_ast_str = "\n".join(_v(src_ast))
3223     dst_ast_str = "\n".join(_v(dst_ast))
3224     if src_ast_str != dst_ast_str:
3225         log = dump_to_file(diff(src_ast_str, dst_ast_str, "src", "dst"))
3226         raise AssertionError(
3227             f"INTERNAL ERROR: Black produced code that is not equivalent to "
3228             f"the source.  "
3229             f"Please report a bug on https://github.com/ambv/black/issues.  "
3230             f"This diff might be helpful: {log}"
3231         ) from None
3232
3233
3234 def assert_stable(
3235     src: str, dst: str, line_length: int, mode: FileMode = FileMode.AUTO_DETECT
3236 ) -> None:
3237     """Raise AssertionError if `dst` reformats differently the second time."""
3238     newdst = format_str(dst, line_length=line_length, mode=mode)
3239     if dst != newdst:
3240         log = dump_to_file(
3241             diff(src, dst, "source", "first pass"),
3242             diff(dst, newdst, "first pass", "second pass"),
3243         )
3244         raise AssertionError(
3245             f"INTERNAL ERROR: Black produced different code on the second pass "
3246             f"of the formatter.  "
3247             f"Please report a bug on https://github.com/ambv/black/issues.  "
3248             f"This diff might be helpful: {log}"
3249         ) from None
3250
3251
3252 def dump_to_file(*output: str) -> str:
3253     """Dump `output` to a temporary file. Return path to the file."""
3254     import tempfile
3255
3256     with tempfile.NamedTemporaryFile(
3257         mode="w", prefix="blk_", suffix=".log", delete=False, encoding="utf8"
3258     ) as f:
3259         for lines in output:
3260             f.write(lines)
3261             if lines and lines[-1] != "\n":
3262                 f.write("\n")
3263     return f.name
3264
3265
3266 def diff(a: str, b: str, a_name: str, b_name: str) -> str:
3267     """Return a unified diff string between strings `a` and `b`."""
3268     import difflib
3269
3270     a_lines = [line + "\n" for line in a.split("\n")]
3271     b_lines = [line + "\n" for line in b.split("\n")]
3272     return "".join(
3273         difflib.unified_diff(a_lines, b_lines, fromfile=a_name, tofile=b_name, n=5)
3274     )
3275
3276
3277 def cancel(tasks: Iterable[asyncio.Task]) -> None:
3278     """asyncio signal handler that cancels all `tasks` and reports to stderr."""
3279     err("Aborted!")
3280     for task in tasks:
3281         task.cancel()
3282
3283
3284 def shutdown(loop: BaseEventLoop) -> None:
3285     """Cancel all pending tasks on `loop`, wait for them, and close the loop."""
3286     try:
3287         # This part is borrowed from asyncio/runners.py in Python 3.7b2.
3288         to_cancel = [task for task in asyncio.Task.all_tasks(loop) if not task.done()]
3289         if not to_cancel:
3290             return
3291
3292         for task in to_cancel:
3293             task.cancel()
3294         loop.run_until_complete(
3295             asyncio.gather(*to_cancel, loop=loop, return_exceptions=True)
3296         )
3297     finally:
3298         # `concurrent.futures.Future` objects cannot be cancelled once they
3299         # are already running. There might be some when the `shutdown()` happened.
3300         # Silence their logger's spew about the event loop being closed.
3301         cf_logger = logging.getLogger("concurrent.futures")
3302         cf_logger.setLevel(logging.CRITICAL)
3303         loop.close()
3304
3305
3306 def sub_twice(regex: Pattern[str], replacement: str, original: str) -> str:
3307     """Replace `regex` with `replacement` twice on `original`.
3308
3309     This is used by string normalization to perform replaces on
3310     overlapping matches.
3311     """
3312     return regex.sub(replacement, regex.sub(replacement, original))
3313
3314
3315 def re_compile_maybe_verbose(regex: str) -> Pattern[str]:
3316     """Compile a regular expression string in `regex`.
3317
3318     If it contains newlines, use verbose mode.
3319     """
3320     if "\n" in regex:
3321         regex = "(?x)" + regex
3322     return re.compile(regex)
3323
3324
3325 def enumerate_reversed(sequence: Sequence[T]) -> Iterator[Tuple[Index, T]]:
3326     """Like `reversed(enumerate(sequence))` if that were possible."""
3327     index = len(sequence) - 1
3328     for element in reversed(sequence):
3329         yield (index, element)
3330         index -= 1
3331
3332
3333 def enumerate_with_length(
3334     line: Line, reversed: bool = False
3335 ) -> Iterator[Tuple[Index, Leaf, int]]:
3336     """Return an enumeration of leaves with their length.
3337
3338     Stops prematurely on multiline strings and standalone comments.
3339     """
3340     op = cast(
3341         Callable[[Sequence[Leaf]], Iterator[Tuple[Index, Leaf]]],
3342         enumerate_reversed if reversed else enumerate,
3343     )
3344     for index, leaf in op(line.leaves):
3345         length = len(leaf.prefix) + len(leaf.value)
3346         if "\n" in leaf.value:
3347             return  # Multiline strings, we can't continue.
3348
3349         comment: Optional[Leaf]
3350         for comment in line.comments_after(leaf, index):
3351             length += len(comment.value)
3352
3353         yield index, leaf, length
3354
3355
3356 def is_line_short_enough(line: Line, *, line_length: int, line_str: str = "") -> bool:
3357     """Return True if `line` is no longer than `line_length`.
3358
3359     Uses the provided `line_str` rendering, if any, otherwise computes a new one.
3360     """
3361     if not line_str:
3362         line_str = str(line).strip("\n")
3363     return (
3364         len(line_str) <= line_length
3365         and "\n" not in line_str  # multiline strings
3366         and not line.contains_standalone_comments()
3367     )
3368
3369
3370 def can_be_split(line: Line) -> bool:
3371     """Return False if the line cannot be split *for sure*.
3372
3373     This is not an exhaustive search but a cheap heuristic that we can use to
3374     avoid some unfortunate formattings (mostly around wrapping unsplittable code
3375     in unnecessary parentheses).
3376     """
3377     leaves = line.leaves
3378     if len(leaves) < 2:
3379         return False
3380
3381     if leaves[0].type == token.STRING and leaves[1].type == token.DOT:
3382         call_count = 0
3383         dot_count = 0
3384         next = leaves[-1]
3385         for leaf in leaves[-2::-1]:
3386             if leaf.type in OPENING_BRACKETS:
3387                 if next.type not in CLOSING_BRACKETS:
3388                     return False
3389
3390                 call_count += 1
3391             elif leaf.type == token.DOT:
3392                 dot_count += 1
3393             elif leaf.type == token.NAME:
3394                 if not (next.type == token.DOT or next.type in OPENING_BRACKETS):
3395                     return False
3396
3397             elif leaf.type not in CLOSING_BRACKETS:
3398                 return False
3399
3400             if dot_count > 1 and call_count > 1:
3401                 return False
3402
3403     return True
3404
3405
3406 def can_omit_invisible_parens(line: Line, line_length: int) -> bool:
3407     """Does `line` have a shape safe to reformat without optional parens around it?
3408
3409     Returns True for only a subset of potentially nice looking formattings but
3410     the point is to not return false positives that end up producing lines that
3411     are too long.
3412     """
3413     bt = line.bracket_tracker
3414     if not bt.delimiters:
3415         # Without delimiters the optional parentheses are useless.
3416         return True
3417
3418     max_priority = bt.max_delimiter_priority()
3419     if bt.delimiter_count_with_priority(max_priority) > 1:
3420         # With more than one delimiter of a kind the optional parentheses read better.
3421         return False
3422
3423     if max_priority == DOT_PRIORITY:
3424         # A single stranded method call doesn't require optional parentheses.
3425         return True
3426
3427     assert len(line.leaves) >= 2, "Stranded delimiter"
3428
3429     first = line.leaves[0]
3430     second = line.leaves[1]
3431     penultimate = line.leaves[-2]
3432     last = line.leaves[-1]
3433
3434     # With a single delimiter, omit if the expression starts or ends with
3435     # a bracket.
3436     if first.type in OPENING_BRACKETS and second.type not in CLOSING_BRACKETS:
3437         remainder = False
3438         length = 4 * line.depth
3439         for _index, leaf, leaf_length in enumerate_with_length(line):
3440             if leaf.type in CLOSING_BRACKETS and leaf.opening_bracket is first:
3441                 remainder = True
3442             if remainder:
3443                 length += leaf_length
3444                 if length > line_length:
3445                     break
3446
3447                 if leaf.type in OPENING_BRACKETS:
3448                     # There are brackets we can further split on.
3449                     remainder = False
3450
3451         else:
3452             # checked the entire string and line length wasn't exceeded
3453             if len(line.leaves) == _index + 1:
3454                 return True
3455
3456         # Note: we are not returning False here because a line might have *both*
3457         # a leading opening bracket and a trailing closing bracket.  If the
3458         # opening bracket doesn't match our rule, maybe the closing will.
3459
3460     if (
3461         last.type == token.RPAR
3462         or last.type == token.RBRACE
3463         or (
3464             # don't use indexing for omitting optional parentheses;
3465             # it looks weird
3466             last.type == token.RSQB
3467             and last.parent
3468             and last.parent.type != syms.trailer
3469         )
3470     ):
3471         if penultimate.type in OPENING_BRACKETS:
3472             # Empty brackets don't help.
3473             return False
3474
3475         if is_multiline_string(first):
3476             # Additional wrapping of a multiline string in this situation is
3477             # unnecessary.
3478             return True
3479
3480         length = 4 * line.depth
3481         seen_other_brackets = False
3482         for _index, leaf, leaf_length in enumerate_with_length(line):
3483             length += leaf_length
3484             if leaf is last.opening_bracket:
3485                 if seen_other_brackets or length <= line_length:
3486                     return True
3487
3488             elif leaf.type in OPENING_BRACKETS:
3489                 # There are brackets we can further split on.
3490                 seen_other_brackets = True
3491
3492     return False
3493
3494
3495 def get_cache_file(line_length: int, mode: FileMode) -> Path:
3496     return CACHE_DIR / f"cache.{line_length}.{mode.value}.pickle"
3497
3498
3499 def read_cache(line_length: int, mode: FileMode) -> Cache:
3500     """Read the cache if it exists and is well formed.
3501
3502     If it is not well formed, the call to write_cache later should resolve the issue.
3503     """
3504     cache_file = get_cache_file(line_length, mode)
3505     if not cache_file.exists():
3506         return {}
3507
3508     with cache_file.open("rb") as fobj:
3509         try:
3510             cache: Cache = pickle.load(fobj)
3511         except pickle.UnpicklingError:
3512             return {}
3513
3514     return cache
3515
3516
3517 def get_cache_info(path: Path) -> CacheInfo:
3518     """Return the information used to check if a file is already formatted or not."""
3519     stat = path.stat()
3520     return stat.st_mtime, stat.st_size
3521
3522
3523 def filter_cached(cache: Cache, sources: Iterable[Path]) -> Tuple[Set[Path], Set[Path]]:
3524     """Split an iterable of paths in `sources` into two sets.
3525
3526     The first contains paths of files that modified on disk or are not in the
3527     cache. The other contains paths to non-modified files.
3528     """
3529     todo, done = set(), set()
3530     for src in sources:
3531         src = src.resolve()
3532         if cache.get(src) != get_cache_info(src):
3533             todo.add(src)
3534         else:
3535             done.add(src)
3536     return todo, done
3537
3538
3539 def write_cache(
3540     cache: Cache, sources: Iterable[Path], line_length: int, mode: FileMode
3541 ) -> None:
3542     """Update the cache file."""
3543     cache_file = get_cache_file(line_length, mode)
3544     try:
3545         if not CACHE_DIR.exists():
3546             CACHE_DIR.mkdir(parents=True)
3547         new_cache = {**cache, **{src.resolve(): get_cache_info(src) for src in sources}}
3548         with cache_file.open("wb") as fobj:
3549             pickle.dump(new_cache, fobj, protocol=pickle.HIGHEST_PROTOCOL)
3550     except OSError:
3551         pass
3552
3553
3554 def patch_click() -> None:
3555     """Make Click not crash.
3556
3557     On certain misconfigured environments, Python 3 selects the ASCII encoding as the
3558     default which restricts paths that it can access during the lifetime of the
3559     application.  Click refuses to work in this scenario by raising a RuntimeError.
3560
3561     In case of Black the likelihood that non-ASCII characters are going to be used in
3562     file paths is minimal since it's Python source code.  Moreover, this crash was
3563     spurious on Python 3.7 thanks to PEP 538 and PEP 540.
3564     """
3565     try:
3566         from click import core
3567         from click import _unicodefun  # type: ignore
3568     except ModuleNotFoundError:
3569         return
3570
3571     for module in (core, _unicodefun):
3572         if hasattr(module, "_verify_python3_env"):
3573             module._verify_python3_env = lambda: None
3574
3575
3576 if __name__ == "__main__":
3577     patch_click()
3578     main()