black.py

   1 import asyncio
   2 from asyncio.base_events import BaseEventLoop
   3 from concurrent.futures import Executor, ProcessPoolExecutor
   4 from datetime import datetime
   5 from enum import Enum, Flag
   6 from functools import lru_cache, partial, wraps
   7 import io
   8 import itertools
   9 import logging
  10 from multiprocessing import Manager, freeze_support
  11 import os
  12 from pathlib import Path
  13 import pickle
  14 import re
  15 import signal
  16 import sys
  17 import tokenize
  18 from typing import (
  19     Any,
  20     Callable,
  21     Collection,
  22     Dict,
  23     Generator,
  24     Generic,
  25     Iterable,
  26     Iterator,
  27     List,
  28     Optional,
  29     Pattern,
  30     Sequence,
  31     Set,
  32     Tuple,
  33     TypeVar,
  34     Union,
  35     cast,
  36 )
  37
  38 from appdirs import user_cache_dir
  39 from attr import dataclass, Factory
  40 import click
  41 import toml
  42
  43 # lib2to3 fork
  44 from blib2to3.pytree import Node, Leaf, type_repr
  45 from blib2to3 import pygram, pytree
  46 from blib2to3.pgen2 import driver, token
  47 from blib2to3.pgen2.parse import ParseError
  48
  49
  50 __version__ = "18.9b0"
  51 DEFAULT_LINE_LENGTH = 88
  52 DEFAULT_EXCLUDES = (
  53     r"/(\.eggs|\.git|\.hg|\.mypy_cache|\.nox|\.tox|\.venv|_build|buck-out|build|dist)/"
  54 )
  55 DEFAULT_INCLUDES = r"\.pyi?$"
  56 CACHE_DIR = Path(user_cache_dir("black", version=__version__))
  57
  58
  59 # types
  60 FileContent = str
  61 Encoding = str
  62 NewLine = str
  63 Depth = int
  64 NodeType = int
  65 LeafID = int
  66 Priority = int
  67 Index = int
  68 LN = Union[Leaf, Node]
  69 SplitFunc = Callable[["Line", bool], Iterator["Line"]]
  70 Timestamp = float
  71 FileSize = int
  72 CacheInfo = Tuple[Timestamp, FileSize]
  73 Cache = Dict[Path, CacheInfo]
  74 out = partial(click.secho, bold=True, err=True)
  75 err = partial(click.secho, fg="red", err=True)
  76
  77 pygram.initialize(CACHE_DIR)
  78 syms = pygram.python_symbols
  79
  80
  81 class NothingChanged(UserWarning):
  82     """Raised when reformatted code is the same as source."""
  83
  84
  85 class CannotSplit(Exception):
  86     """A readable split that fits the allotted line length is impossible."""
  87
  88
  89 class InvalidInput(ValueError):
  90     """Raised when input source code fails all parse attempts."""
  91
  92
  93 class WriteBack(Enum):
  94     NO = 0
  95     YES = 1
  96     DIFF = 2
  97     CHECK = 3
  98
  99     @classmethod
 100     def from_configuration(cls, *, check: bool, diff: bool) -> "WriteBack":
 101         if check and not diff:
 102             return cls.CHECK
 103
 104         return cls.DIFF if diff else cls.YES
 105
 106
 107 class Changed(Enum):
 108     NO = 0
 109     CACHED = 1
 110     YES = 2
 111
 112
 113 class FileMode(Flag):
 114     AUTO_DETECT = 0
 115     PYTHON36 = 1
 116     PYI = 2
 117     NO_STRING_NORMALIZATION = 4
 118     NO_NUMERIC_UNDERSCORE_NORMALIZATION = 8
 119
 120     @classmethod
 121     def from_configuration(
 122         cls,
 123         *,
 124         py36: bool,
 125         pyi: bool,
 126         skip_string_normalization: bool,
 127         skip_numeric_underscore_normalization: bool,
 128     ) -> "FileMode":
 129         mode = cls.AUTO_DETECT
 130         if py36:
 131             mode |= cls.PYTHON36
 132         if pyi:
 133             mode |= cls.PYI
 134         if skip_string_normalization:
 135             mode |= cls.NO_STRING_NORMALIZATION
 136         if skip_numeric_underscore_normalization:
 137             mode |= cls.NO_NUMERIC_UNDERSCORE_NORMALIZATION
 138         return mode
 139
 140
 141 def read_pyproject_toml(
 142     ctx: click.Context, param: click.Parameter, value: Union[str, int, bool, None]
 143 ) -> Optional[str]:
 144     """Inject Black configuration from "pyproject.toml" into defaults in `ctx`.
 145
 146     Returns the path to a successfully found and read configuration file, None
 147     otherwise.
 148     """
 149     assert not isinstance(value, (int, bool)), "Invalid parameter type passed"
 150     if not value:
 151         root = find_project_root(ctx.params.get("src", ()))
 152         path = root / "pyproject.toml"
 153         if path.is_file():
 154             value = str(path)
 155         else:
 156             return None
 157
 158     try:
 159         pyproject_toml = toml.load(value)
 160         config = pyproject_toml.get("tool", {}).get("black", {})
 161     except (toml.TomlDecodeError, OSError) as e:
 162         raise click.FileError(
 163             filename=value, hint=f"Error reading configuration file: {e}"
 164         )
 165
 166     if not config:
 167         return None
 168
 169     if ctx.default_map is None:
 170         ctx.default_map = {}
 171     ctx.default_map.update(  # type: ignore  # bad types in .pyi
 172         {k.replace("--", "").replace("-", "_"): v for k, v in config.items()}
 173     )
 174     return value
 175
 176
 177 @click.command(context_settings=dict(help_option_names=["-h", "--help"]))
 178 @click.option(
 179     "-l",
 180     "--line-length",
 181     type=int,
 182     default=DEFAULT_LINE_LENGTH,
 183     help="How many characters per line to allow.",
 184     show_default=True,
 185 )
 186 @click.option(
 187     "--py36",
 188     is_flag=True,
 189     help=(
 190         "Allow using Python 3.6-only syntax on all input files.  This will put "
 191         "trailing commas in function signatures and calls also after *args and "
 192         "**kwargs.  [default: per-file auto-detection]"
 193     ),
 194 )
 195 @click.option(
 196     "--pyi",
 197     is_flag=True,
 198     help=(
 199         "Format all input files like typing stubs regardless of file extension "
 200         "(useful when piping source on standard input)."
 201     ),
 202 )
 203 @click.option(
 204     "-S",
 205     "--skip-string-normalization",
 206     is_flag=True,
 207     help="Don't normalize string quotes or prefixes.",
 208 )
 209 @click.option(
 210     "-N",
 211     "--skip-numeric-underscore-normalization",
 212     is_flag=True,
 213     help="Don't normalize underscores in numeric literals.",
 214 )
 215 @click.option(
 216     "--check",
 217     is_flag=True,
 218     help=(
 219         "Don't write the files back, just return the status.  Return code 0 "
 220         "means nothing would change.  Return code 1 means some files would be "
 221         "reformatted.  Return code 123 means there was an internal error."
 222     ),
 223 )
 224 @click.option(
 225     "--diff",
 226     is_flag=True,
 227     help="Don't write the files back, just output a diff for each file on stdout.",
 228 )
 229 @click.option(
 230     "--fast/--safe",
 231     is_flag=True,
 232     help="If --fast given, skip temporary sanity checks. [default: --safe]",
 233 )
 234 @click.option(
 235     "--include",
 236     type=str,
 237     default=DEFAULT_INCLUDES,
 238     help=(
 239         "A regular expression that matches files and directories that should be "
 240         "included on recursive searches.  An empty value means all files are "
 241         "included regardless of the name.  Use forward slashes for directories on "
 242         "all platforms (Windows, too).  Exclusions are calculated first, inclusions "
 243         "later."
 244     ),
 245     show_default=True,
 246 )
 247 @click.option(
 248     "--exclude",
 249     type=str,
 250     default=DEFAULT_EXCLUDES,
 251     help=(
 252         "A regular expression that matches files and directories that should be "
 253         "excluded on recursive searches.  An empty value means no paths are excluded. "
 254         "Use forward slashes for directories on all platforms (Windows, too).  "
 255         "Exclusions are calculated first, inclusions later."
 256     ),
 257     show_default=True,
 258 )
 259 @click.option(
 260     "-q",
 261     "--quiet",
 262     is_flag=True,
 263     help=(
 264         "Don't emit non-error messages to stderr. Errors are still emitted, "
 265         "silence those with 2>/dev/null."
 266     ),
 267 )
 268 @click.option(
 269     "-v",
 270     "--verbose",
 271     is_flag=True,
 272     help=(
 273         "Also emit messages to stderr about files that were not changed or were "
 274         "ignored due to --exclude=."
 275     ),
 276 )
 277 @click.version_option(version=__version__)
 278 @click.argument(
 279     "src",
 280     nargs=-1,
 281     type=click.Path(
 282         exists=True, file_okay=True, dir_okay=True, readable=True, allow_dash=True
 283     ),
 284     is_eager=True,
 285 )
 286 @click.option(
 287     "--config",
 288     type=click.Path(
 289         exists=False, file_okay=True, dir_okay=False, readable=True, allow_dash=False
 290     ),
 291     is_eager=True,
 292     callback=read_pyproject_toml,
 293     help="Read configuration from PATH.",
 294 )
 295 @click.pass_context
 296 def main(
 297     ctx: click.Context,
 298     line_length: int,
 299     check: bool,
 300     diff: bool,
 301     fast: bool,
 302     pyi: bool,
 303     py36: bool,
 304     skip_string_normalization: bool,
 305     skip_numeric_underscore_normalization: bool,
 306     quiet: bool,
 307     verbose: bool,
 308     include: str,
 309     exclude: str,
 310     src: Tuple[str],
 311     config: Optional[str],
 312 ) -> None:
 313     """The uncompromising code formatter."""
 314     write_back = WriteBack.from_configuration(check=check, diff=diff)
 315     mode = FileMode.from_configuration(
 316         py36=py36,
 317         pyi=pyi,
 318         skip_string_normalization=skip_string_normalization,
 319         skip_numeric_underscore_normalization=skip_numeric_underscore_normalization,
 320     )
 321     if config and verbose:
 322         out(f"Using configuration from {config}.", bold=False, fg="blue")
 323     try:
 324         include_regex = re_compile_maybe_verbose(include)
 325     except re.error:
 326         err(f"Invalid regular expression for include given: {include!r}")
 327         ctx.exit(2)
 328     try:
 329         exclude_regex = re_compile_maybe_verbose(exclude)
 330     except re.error:
 331         err(f"Invalid regular expression for exclude given: {exclude!r}")
 332         ctx.exit(2)
 333     report = Report(check=check, quiet=quiet, verbose=verbose)
 334     root = find_project_root(src)
 335     sources: Set[Path] = set()
 336     for s in src:
 337         p = Path(s)
 338         if p.is_dir():
 339             sources.update(
 340                 gen_python_files_in_dir(p, root, include_regex, exclude_regex, report)
 341             )
 342         elif p.is_file() or s == "-":
 343             # if a file was explicitly given, we don't care about its extension
 344             sources.add(p)
 345         else:
 346             err(f"invalid path: {s}")
 347     if len(sources) == 0:
 348         if verbose or not quiet:
 349             out("No paths given. Nothing to do 😴")
 350         ctx.exit(0)
 351
 352     if len(sources) == 1:
 353         reformat_one(
 354             src=sources.pop(),
 355             line_length=line_length,
 356             fast=fast,
 357             write_back=write_back,
 358             mode=mode,
 359             report=report,
 360         )
 361     else:
 362         loop = asyncio.get_event_loop()
 363         executor = ProcessPoolExecutor(max_workers=os.cpu_count())
 364         try:
 365             loop.run_until_complete(
 366                 schedule_formatting(
 367                     sources=sources,
 368                     line_length=line_length,
 369                     fast=fast,
 370                     write_back=write_back,
 371                     mode=mode,
 372                     report=report,
 373                     loop=loop,
 374                     executor=executor,
 375                 )
 376             )
 377         finally:
 378             shutdown(loop)
 379     if verbose or not quiet:
 380         bang = "💥 💔 💥" if report.return_code else "✨ 🍰 ✨"
 381         out(f"All done! {bang}")
 382         click.secho(str(report), err=True)
 383     ctx.exit(report.return_code)
 384
 385
 386 def reformat_one(
 387     src: Path,
 388     line_length: int,
 389     fast: bool,
 390     write_back: WriteBack,
 391     mode: FileMode,
 392     report: "Report",
 393 ) -> None:
 394     """Reformat a single file under `src` without spawning child processes.
 395
 396     If `quiet` is True, non-error messages are not output. `line_length`,
 397     `write_back`, `fast` and `pyi` options are passed to
 398     :func:`format_file_in_place` or :func:`format_stdin_to_stdout`.
 399     """
 400     try:
 401         changed = Changed.NO
 402         if not src.is_file() and str(src) == "-":
 403             if format_stdin_to_stdout(
 404                 line_length=line_length, fast=fast, write_back=write_back, mode=mode
 405             ):
 406                 changed = Changed.YES
 407         else:
 408             cache: Cache = {}
 409             if write_back != WriteBack.DIFF:
 410                 cache = read_cache(line_length, mode)
 411                 res_src = src.resolve()
 412                 if res_src in cache and cache[res_src] == get_cache_info(res_src):
 413                     changed = Changed.CACHED
 414             if changed is not Changed.CACHED and format_file_in_place(
 415                 src,
 416                 line_length=line_length,
 417                 fast=fast,
 418                 write_back=write_back,
 419                 mode=mode,
 420             ):
 421                 changed = Changed.YES
 422             if (write_back is WriteBack.YES and changed is not Changed.CACHED) or (
 423                 write_back is WriteBack.CHECK and changed is Changed.NO
 424             ):
 425                 write_cache(cache, [src], line_length, mode)
 426         report.done(src, changed)
 427     except Exception as exc:
 428         report.failed(src, str(exc))
 429
 430
 431 async def schedule_formatting(
 432     sources: Set[Path],
 433     line_length: int,
 434     fast: bool,
 435     write_back: WriteBack,
 436     mode: FileMode,
 437     report: "Report",
 438     loop: BaseEventLoop,
 439     executor: Executor,
 440 ) -> None:
 441     """Run formatting of `sources` in parallel using the provided `executor`.
 442
 443     (Use ProcessPoolExecutors for actual parallelism.)
 444
 445     `line_length`, `write_back`, `fast`, and `pyi` options are passed to
 446     :func:`format_file_in_place`.
 447     """
 448     cache: Cache = {}
 449     if write_back != WriteBack.DIFF:
 450         cache = read_cache(line_length, mode)
 451         sources, cached = filter_cached(cache, sources)
 452         for src in sorted(cached):
 453             report.done(src, Changed.CACHED)
 454     if not sources:
 455         return
 456
 457     cancelled = []
 458     sources_to_cache = []
 459     lock = None
 460     if write_back == WriteBack.DIFF:
 461         # For diff output, we need locks to ensure we don't interleave output
 462         # from different processes.
 463         manager = Manager()
 464         lock = manager.Lock()
 465     tasks = {
 466         loop.run_in_executor(
 467             executor,
 468             format_file_in_place,
 469             src,
 470             line_length,
 471             fast,
 472             write_back,
 473             mode,
 474             lock,
 475         ): src
 476         for src in sorted(sources)
 477     }
 478     pending: Iterable[asyncio.Task] = tasks.keys()
 479     try:
 480         loop.add_signal_handler(signal.SIGINT, cancel, pending)
 481         loop.add_signal_handler(signal.SIGTERM, cancel, pending)
 482     except NotImplementedError:
 483         # There are no good alternatives for these on Windows.
 484         pass
 485     while pending:
 486         done, _ = await asyncio.wait(pending, return_when=asyncio.FIRST_COMPLETED)
 487         for task in done:
 488             src = tasks.pop(task)
 489             if task.cancelled():
 490                 cancelled.append(task)
 491             elif task.exception():
 492                 report.failed(src, str(task.exception()))
 493             else:
 494                 changed = Changed.YES if task.result() else Changed.NO
 495                 # If the file was written back or was successfully checked as
 496                 # well-formatted, store this information in the cache.
 497                 if write_back is WriteBack.YES or (
 498                     write_back is WriteBack.CHECK and changed is Changed.NO
 499                 ):
 500                     sources_to_cache.append(src)
 501                 report.done(src, changed)
 502     if cancelled:
 503         await asyncio.gather(*cancelled, loop=loop, return_exceptions=True)
 504     if sources_to_cache:
 505         write_cache(cache, sources_to_cache, line_length, mode)
 506
 507
 508 def format_file_in_place(
 509     src: Path,
 510     line_length: int,
 511     fast: bool,
 512     write_back: WriteBack = WriteBack.NO,
 513     mode: FileMode = FileMode.AUTO_DETECT,
 514     lock: Any = None,  # multiprocessing.Manager().Lock() is some crazy proxy
 515 ) -> bool:
 516     """Format file under `src` path. Return True if changed.
 517
 518     If `write_back` is DIFF, write a diff to stdout. If it is YES, write reformatted
 519     code to the file.
 520     `line_length` and `fast` options are passed to :func:`format_file_contents`.
 521     """
 522     if src.suffix == ".pyi":
 523         mode |= FileMode.PYI
 524
 525     then = datetime.utcfromtimestamp(src.stat().st_mtime)
 526     with open(src, "rb") as buf:
 527         src_contents, encoding, newline = decode_bytes(buf.read())
 528     try:
 529         dst_contents = format_file_contents(
 530             src_contents, line_length=line_length, fast=fast, mode=mode
 531         )
 532     except NothingChanged:
 533         return False
 534
 535     if write_back == write_back.YES:
 536         with open(src, "w", encoding=encoding, newline=newline) as f:
 537             f.write(dst_contents)
 538     elif write_back == write_back.DIFF:
 539         now = datetime.utcnow()
 540         src_name = f"{src}\t{then} +0000"
 541         dst_name = f"{src}\t{now} +0000"
 542         diff_contents = diff(src_contents, dst_contents, src_name, dst_name)
 543         if lock:
 544             lock.acquire()
 545         try:
 546             f = io.TextIOWrapper(
 547                 sys.stdout.buffer,
 548                 encoding=encoding,
 549                 newline=newline,
 550                 write_through=True,
 551             )
 552             f.write(diff_contents)
 553             f.detach()
 554         finally:
 555             if lock:
 556                 lock.release()
 557     return True
 558
 559
 560 def format_stdin_to_stdout(
 561     line_length: int,
 562     fast: bool,
 563     write_back: WriteBack = WriteBack.NO,
 564     mode: FileMode = FileMode.AUTO_DETECT,
 565 ) -> bool:
 566     """Format file on stdin. Return True if changed.
 567
 568     If `write_back` is YES, write reformatted code back to stdout. If it is DIFF,
 569     write a diff to stdout.
 570     `line_length`, `fast`, `is_pyi`, and `force_py36` arguments are passed to
 571     :func:`format_file_contents`.
 572     """
 573     then = datetime.utcnow()
 574     src, encoding, newline = decode_bytes(sys.stdin.buffer.read())
 575     dst = src
 576     try:
 577         dst = format_file_contents(src, line_length=line_length, fast=fast, mode=mode)
 578         return True
 579
 580     except NothingChanged:
 581         return False
 582
 583     finally:
 584         f = io.TextIOWrapper(
 585             sys.stdout.buffer, encoding=encoding, newline=newline, write_through=True
 586         )
 587         if write_back == WriteBack.YES:
 588             f.write(dst)
 589         elif write_back == WriteBack.DIFF:
 590             now = datetime.utcnow()
 591             src_name = f"STDIN\t{then} +0000"
 592             dst_name = f"STDOUT\t{now} +0000"
 593             f.write(diff(src, dst, src_name, dst_name))
 594         f.detach()
 595
 596
 597 def format_file_contents(
 598     src_contents: str,
 599     *,
 600     line_length: int,
 601     fast: bool,
 602     mode: FileMode = FileMode.AUTO_DETECT,
 603 ) -> FileContent:
 604     """Reformat contents a file and return new contents.
 605
 606     If `fast` is False, additionally confirm that the reformatted code is
 607     valid by calling :func:`assert_equivalent` and :func:`assert_stable` on it.
 608     `line_length` is passed to :func:`format_str`.
 609     """
 610     if src_contents.strip() == "":
 611         raise NothingChanged
 612
 613     dst_contents = format_str(src_contents, line_length=line_length, mode=mode)
 614     if src_contents == dst_contents:
 615         raise NothingChanged
 616
 617     if not fast:
 618         assert_equivalent(src_contents, dst_contents)
 619         assert_stable(src_contents, dst_contents, line_length=line_length, mode=mode)
 620     return dst_contents
 621
 622
 623 def format_str(
 624     src_contents: str, line_length: int, *, mode: FileMode = FileMode.AUTO_DETECT
 625 ) -> FileContent:
 626     """Reformat a string and return new contents.
 627
 628     `line_length` determines how many characters per line are allowed.
 629     """
 630     src_node = lib2to3_parse(src_contents.lstrip())
 631     dst_contents = ""
 632     future_imports = get_future_imports(src_node)
 633     is_pyi = bool(mode & FileMode.PYI)
 634     py36 = bool(mode & FileMode.PYTHON36) or is_python36(src_node)
 635     normalize_strings = not bool(mode & FileMode.NO_STRING_NORMALIZATION)
 636     normalize_fmt_off(src_node)
 637     lines = LineGenerator(
 638         remove_u_prefix=py36 or "unicode_literals" in future_imports,
 639         is_pyi=is_pyi,
 640         normalize_strings=normalize_strings,
 641         allow_underscores=py36
 642         and not bool(mode & FileMode.NO_NUMERIC_UNDERSCORE_NORMALIZATION),
 643     )
 644     elt = EmptyLineTracker(is_pyi=is_pyi)
 645     empty_line = Line()
 646     after = 0
 647     for current_line in lines.visit(src_node):
 648         for _ in range(after):
 649             dst_contents += str(empty_line)
 650         before, after = elt.maybe_empty_lines(current_line)
 651         for _ in range(before):
 652             dst_contents += str(empty_line)
 653         for line in split_line(current_line, line_length=line_length, py36=py36):
 654             dst_contents += str(line)
 655     return dst_contents
 656
 657
 658 def decode_bytes(src: bytes) -> Tuple[FileContent, Encoding, NewLine]:
 659     """Return a tuple of (decoded_contents, encoding, newline).
 660
 661     `newline` is either CRLF or LF but `decoded_contents` is decoded with
 662     universal newlines (i.e. only contains LF).
 663     """
 664     srcbuf = io.BytesIO(src)
 665     encoding, lines = tokenize.detect_encoding(srcbuf.readline)
 666     if not lines:
 667         return "", encoding, "\n"
 668
 669     newline = "\r\n" if b"\r\n" == lines[0][-2:] else "\n"
 670     srcbuf.seek(0)
 671     with io.TextIOWrapper(srcbuf, encoding) as tiow:
 672         return tiow.read(), encoding, newline
 673
 674
 675 GRAMMARS = [
 676     pygram.python_grammar_no_print_statement_no_exec_statement,
 677     pygram.python_grammar_no_print_statement,
 678     pygram.python_grammar,
 679 ]
 680
 681
 682 def lib2to3_parse(src_txt: str) -> Node:
 683     """Given a string with source, return the lib2to3 Node."""
 684     if src_txt[-1:] != "\n":
 685         src_txt += "\n"
 686     for grammar in GRAMMARS:
 687         drv = driver.Driver(grammar, pytree.convert)
 688         try:
 689             result = drv.parse_string(src_txt, True)
 690             break
 691
 692         except ParseError as pe:
 693             lineno, column = pe.context[1]
 694             lines = src_txt.splitlines()
 695             try:
 696                 faulty_line = lines[lineno - 1]
 697             except IndexError:
 698                 faulty_line = "<line number missing in source>"
 699             exc = InvalidInput(f"Cannot parse: {lineno}:{column}: {faulty_line}")
 700     else:
 701         raise exc from None
 702
 703     if isinstance(result, Leaf):
 704         result = Node(syms.file_input, [result])
 705     return result
 706
 707
 708 def lib2to3_unparse(node: Node) -> str:
 709     """Given a lib2to3 node, return its string representation."""
 710     code = str(node)
 711     return code
 712
 713
 714 T = TypeVar("T")
 715
 716
 717 class Visitor(Generic[T]):
 718     """Basic lib2to3 visitor that yields things of type `T` on `visit()`."""
 719
 720     def visit(self, node: LN) -> Iterator[T]:
 721         """Main method to visit `node` and its children.
 722
 723         It tries to find a `visit_*()` method for the given `node.type`, like
 724         `visit_simple_stmt` for Node objects or `visit_INDENT` for Leaf objects.
 725         If no dedicated `visit_*()` method is found, chooses `visit_default()`
 726         instead.
 727
 728         Then yields objects of type `T` from the selected visitor.
 729         """
 730         if node.type < 256:
 731             name = token.tok_name[node.type]
 732         else:
 733             name = type_repr(node.type)
 734         yield from getattr(self, f"visit_{name}", self.visit_default)(node)
 735
 736     def visit_default(self, node: LN) -> Iterator[T]:
 737         """Default `visit_*()` implementation. Recurses to children of `node`."""
 738         if isinstance(node, Node):
 739             for child in node.children:
 740                 yield from self.visit(child)
 741
 742
 743 @dataclass
 744 class DebugVisitor(Visitor[T]):
 745     tree_depth: int = 0
 746
 747     def visit_default(self, node: LN) -> Iterator[T]:
 748         indent = " " * (2 * self.tree_depth)
 749         if isinstance(node, Node):
 750             _type = type_repr(node.type)
 751             out(f"{indent}{_type}", fg="yellow")
 752             self.tree_depth += 1
 753             for child in node.children:
 754                 yield from self.visit(child)
 755
 756             self.tree_depth -= 1
 757             out(f"{indent}/{_type}", fg="yellow", bold=False)
 758         else:
 759             _type = token.tok_name.get(node.type, str(node.type))
 760             out(f"{indent}{_type}", fg="blue", nl=False)
 761             if node.prefix:
 762                 # We don't have to handle prefixes for `Node` objects since
 763                 # that delegates to the first child anyway.
 764                 out(f" {node.prefix!r}", fg="green", bold=False, nl=False)
 765             out(f" {node.value!r}", fg="blue", bold=False)
 766
 767     @classmethod
 768     def show(cls, code: Union[str, Leaf, Node]) -> None:
 769         """Pretty-print the lib2to3 AST of a given string of `code`.
 770
 771         Convenience method for debugging.
 772         """
 773         v: DebugVisitor[None] = DebugVisitor()
 774         if isinstance(code, str):
 775             code = lib2to3_parse(code)
 776         list(v.visit(code))
 777
 778
 779 WHITESPACE = {token.DEDENT, token.INDENT, token.NEWLINE}
 780 STATEMENT = {
 781     syms.if_stmt,
 782     syms.while_stmt,
 783     syms.for_stmt,
 784     syms.try_stmt,
 785     syms.except_clause,
 786     syms.with_stmt,
 787     syms.funcdef,
 788     syms.classdef,
 789 }
 790 STANDALONE_COMMENT = 153
 791 token.tok_name[STANDALONE_COMMENT] = "STANDALONE_COMMENT"
 792 LOGIC_OPERATORS = {"and", "or"}
 793 COMPARATORS = {
 794     token.LESS,
 795     token.GREATER,
 796     token.EQEQUAL,
 797     token.NOTEQUAL,
 798     token.LESSEQUAL,
 799     token.GREATEREQUAL,
 800 }
 801 MATH_OPERATORS = {
 802     token.VBAR,
 803     token.CIRCUMFLEX,
 804     token.AMPER,
 805     token.LEFTSHIFT,
 806     token.RIGHTSHIFT,
 807     token.PLUS,
 808     token.MINUS,
 809     token.STAR,
 810     token.SLASH,
 811     token.DOUBLESLASH,
 812     token.PERCENT,
 813     token.AT,
 814     token.TILDE,
 815     token.DOUBLESTAR,
 816 }
 817 STARS = {token.STAR, token.DOUBLESTAR}
 818 VARARGS_PARENTS = {
 819     syms.arglist,
 820     syms.argument,  # double star in arglist
 821     syms.trailer,  # single argument to call
 822     syms.typedargslist,
 823     syms.varargslist,  # lambdas
 824 }
 825 UNPACKING_PARENTS = {
 826     syms.atom,  # single element of a list or set literal
 827     syms.dictsetmaker,
 828     syms.listmaker,
 829     syms.testlist_gexp,
 830     syms.testlist_star_expr,
 831 }
 832 TEST_DESCENDANTS = {
 833     syms.test,
 834     syms.lambdef,
 835     syms.or_test,
 836     syms.and_test,
 837     syms.not_test,
 838     syms.comparison,
 839     syms.star_expr,
 840     syms.expr,
 841     syms.xor_expr,
 842     syms.and_expr,
 843     syms.shift_expr,
 844     syms.arith_expr,
 845     syms.trailer,
 846     syms.term,
 847     syms.power,
 848 }
 849 ASSIGNMENTS = {
 850     "=",
 851     "+=",
 852     "-=",
 853     "*=",
 854     "@=",
 855     "/=",
 856     "%=",
 857     "&=",
 858     "|=",
 859     "^=",
 860     "<<=",
 861     ">>=",
 862     "**=",
 863     "//=",
 864 }
 865 COMPREHENSION_PRIORITY = 20
 866 COMMA_PRIORITY = 18
 867 TERNARY_PRIORITY = 16
 868 LOGIC_PRIORITY = 14
 869 STRING_PRIORITY = 12
 870 COMPARATOR_PRIORITY = 10
 871 MATH_PRIORITIES = {
 872     token.VBAR: 9,
 873     token.CIRCUMFLEX: 8,
 874     token.AMPER: 7,
 875     token.LEFTSHIFT: 6,
 876     token.RIGHTSHIFT: 6,
 877     token.PLUS: 5,
 878     token.MINUS: 5,
 879     token.STAR: 4,
 880     token.SLASH: 4,
 881     token.DOUBLESLASH: 4,
 882     token.PERCENT: 4,
 883     token.AT: 4,
 884     token.TILDE: 3,
 885     token.DOUBLESTAR: 2,
 886 }
 887 DOT_PRIORITY = 1
 888
 889
 890 @dataclass
 891 class BracketTracker:
 892     """Keeps track of brackets on a line."""
 893
 894     depth: int = 0
 895     bracket_match: Dict[Tuple[Depth, NodeType], Leaf] = Factory(dict)
 896     delimiters: Dict[LeafID, Priority] = Factory(dict)
 897     previous: Optional[Leaf] = None
 898     _for_loop_depths: List[int] = Factory(list)
 899     _lambda_argument_depths: List[int] = Factory(list)
 900
 901     def mark(self, leaf: Leaf) -> None:
 902         """Mark `leaf` with bracket-related metadata. Keep track of delimiters.
 903
 904         All leaves receive an int `bracket_depth` field that stores how deep
 905         within brackets a given leaf is. 0 means there are no enclosing brackets
 906         that started on this line.
 907
 908         If a leaf is itself a closing bracket, it receives an `opening_bracket`
 909         field that it forms a pair with. This is a one-directional link to
 910         avoid reference cycles.
 911
 912         If a leaf is a delimiter (a token on which Black can split the line if
 913         needed) and it's on depth 0, its `id()` is stored in the tracker's
 914         `delimiters` field.
 915         """
 916         if leaf.type == token.COMMENT:
 917             return
 918
 919         self.maybe_decrement_after_for_loop_variable(leaf)
 920         self.maybe_decrement_after_lambda_arguments(leaf)
 921         if leaf.type in CLOSING_BRACKETS:
 922             self.depth -= 1
 923             opening_bracket = self.bracket_match.pop((self.depth, leaf.type))
 924             leaf.opening_bracket = opening_bracket
 925         leaf.bracket_depth = self.depth
 926         if self.depth == 0:
 927             delim = is_split_before_delimiter(leaf, self.previous)
 928             if delim and self.previous is not None:
 929                 self.delimiters[id(self.previous)] = delim
 930             else:
 931                 delim = is_split_after_delimiter(leaf, self.previous)
 932                 if delim:
 933                     self.delimiters[id(leaf)] = delim
 934         if leaf.type in OPENING_BRACKETS:
 935             self.bracket_match[self.depth, BRACKET[leaf.type]] = leaf
 936             self.depth += 1
 937         self.previous = leaf
 938         self.maybe_increment_lambda_arguments(leaf)
 939         self.maybe_increment_for_loop_variable(leaf)
 940
 941     def any_open_brackets(self) -> bool:
 942         """Return True if there is an yet unmatched open bracket on the line."""
 943         return bool(self.bracket_match)
 944
 945     def max_delimiter_priority(self, exclude: Iterable[LeafID] = ()) -> int:
 946         """Return the highest priority of a delimiter found on the line.
 947
 948         Values are consistent with what `is_split_*_delimiter()` return.
 949         Raises ValueError on no delimiters.
 950         """
 951         return max(v for k, v in self.delimiters.items() if k not in exclude)
 952
 953     def delimiter_count_with_priority(self, priority: int = 0) -> int:
 954         """Return the number of delimiters with the given `priority`.
 955
 956         If no `priority` is passed, defaults to max priority on the line.
 957         """
 958         if not self.delimiters:
 959             return 0
 960
 961         priority = priority or self.max_delimiter_priority()
 962         return sum(1 for p in self.delimiters.values() if p == priority)
 963
 964     def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool:
 965         """In a for loop, or comprehension, the variables are often unpacks.
 966
 967         To avoid splitting on the comma in this situation, increase the depth of
 968         tokens between `for` and `in`.
 969         """
 970         if leaf.type == token.NAME and leaf.value == "for":
 971             self.depth += 1
 972             self._for_loop_depths.append(self.depth)
 973             return True
 974
 975         return False
 976
 977     def maybe_decrement_after_for_loop_variable(self, leaf: Leaf) -> bool:
 978         """See `maybe_increment_for_loop_variable` above for explanation."""
 979         if (
 980             self._for_loop_depths
 981             and self._for_loop_depths[-1] == self.depth
 982             and leaf.type == token.NAME
 983             and leaf.value == "in"
 984         ):
 985             self.depth -= 1
 986             self._for_loop_depths.pop()
 987             return True
 988
 989         return False
 990
 991     def maybe_increment_lambda_arguments(self, leaf: Leaf) -> bool:
 992         """In a lambda expression, there might be more than one argument.
 993
 994         To avoid splitting on the comma in this situation, increase the depth of
 995         tokens between `lambda` and `:`.
 996         """
 997         if leaf.type == token.NAME and leaf.value == "lambda":
 998             self.depth += 1
 999             self._lambda_argument_depths.append(self.depth)
1000             return True
1001
1002         return False
1003
1004     def maybe_decrement_after_lambda_arguments(self, leaf: Leaf) -> bool:
1005         """See `maybe_increment_lambda_arguments` above for explanation."""
1006         if (
1007             self._lambda_argument_depths
1008             and self._lambda_argument_depths[-1] == self.depth
1009             and leaf.type == token.COLON
1010         ):
1011             self.depth -= 1
1012             self._lambda_argument_depths.pop()
1013             return True
1014
1015         return False
1016
1017     def get_open_lsqb(self) -> Optional[Leaf]:
1018         """Return the most recent opening square bracket (if any)."""
1019         return self.bracket_match.get((self.depth - 1, token.RSQB))
1020
1021
1022 @dataclass
1023 class Line:
1024     """Holds leaves and comments. Can be printed with `str(line)`."""
1025
1026     depth: int = 0
1027     leaves: List[Leaf] = Factory(list)
1028     # The LeafID keys of comments must remain ordered by the corresponding leaf's index
1029     # in leaves
1030     comments: Dict[LeafID, List[Leaf]] = Factory(dict)
1031     bracket_tracker: BracketTracker = Factory(BracketTracker)
1032     inside_brackets: bool = False
1033     should_explode: bool = False
1034
1035     def append(self, leaf: Leaf, preformatted: bool = False) -> None:
1036         """Add a new `leaf` to the end of the line.
1037
1038         Unless `preformatted` is True, the `leaf` will receive a new consistent
1039         whitespace prefix and metadata applied by :class:`BracketTracker`.
1040         Trailing commas are maybe removed, unpacked for loop variables are
1041         demoted from being delimiters.
1042
1043         Inline comments are put aside.
1044         """
1045         has_value = leaf.type in BRACKETS or bool(leaf.value.strip())
1046         if not has_value:
1047             return
1048
1049         if token.COLON == leaf.type and self.is_class_paren_empty:
1050             del self.leaves[-2:]
1051         if self.leaves and not preformatted:
1052             # Note: at this point leaf.prefix should be empty except for
1053             # imports, for which we only preserve newlines.
1054             leaf.prefix += whitespace(
1055                 leaf, complex_subscript=self.is_complex_subscript(leaf)
1056             )
1057         if self.inside_brackets or not preformatted:
1058             self.bracket_tracker.mark(leaf)
1059             self.maybe_remove_trailing_comma(leaf)
1060         if not self.append_comment(leaf):
1061             self.leaves.append(leaf)
1062
1063     def append_safe(self, leaf: Leaf, preformatted: bool = False) -> None:
1064         """Like :func:`append()` but disallow invalid standalone comment structure.
1065
1066         Raises ValueError when any `leaf` is appended after a standalone comment
1067         or when a standalone comment is not the first leaf on the line.
1068         """
1069         if self.bracket_tracker.depth == 0:
1070             if self.is_comment:
1071                 raise ValueError("cannot append to standalone comments")
1072
1073             if self.leaves and leaf.type == STANDALONE_COMMENT:
1074                 raise ValueError(
1075                     "cannot append standalone comments to a populated line"
1076                 )
1077
1078         self.append(leaf, preformatted=preformatted)
1079
1080     @property
1081     def is_comment(self) -> bool:
1082         """Is this line a standalone comment?"""
1083         return len(self.leaves) == 1 and self.leaves[0].type == STANDALONE_COMMENT
1084
1085     @property
1086     def is_decorator(self) -> bool:
1087         """Is this line a decorator?"""
1088         return bool(self) and self.leaves[0].type == token.AT
1089
1090     @property
1091     def is_import(self) -> bool:
1092         """Is this an import line?"""
1093         return bool(self) and is_import(self.leaves[0])
1094
1095     @property
1096     def is_class(self) -> bool:
1097         """Is this line a class definition?"""
1098         return (
1099             bool(self)
1100             and self.leaves[0].type == token.NAME
1101             and self.leaves[0].value == "class"
1102         )
1103
1104     @property
1105     def is_stub_class(self) -> bool:
1106         """Is this line a class definition with a body consisting only of "..."?"""
1107         return self.is_class and self.leaves[-3:] == [
1108             Leaf(token.DOT, ".") for _ in range(3)
1109         ]
1110
1111     @property
1112     def is_def(self) -> bool:
1113         """Is this a function definition? (Also returns True for async defs.)"""
1114         try:
1115             first_leaf = self.leaves[0]
1116         except IndexError:
1117             return False
1118
1119         try:
1120             second_leaf: Optional[Leaf] = self.leaves[1]
1121         except IndexError:
1122             second_leaf = None
1123         return (first_leaf.type == token.NAME and first_leaf.value == "def") or (
1124             first_leaf.type == token.ASYNC
1125             and second_leaf is not None
1126             and second_leaf.type == token.NAME
1127             and second_leaf.value == "def"
1128         )
1129
1130     @property
1131     def is_class_paren_empty(self) -> bool:
1132         """Is this a class with no base classes but using parentheses?
1133
1134         Those are unnecessary and should be removed.
1135         """
1136         return (
1137             bool(self)
1138             and len(self.leaves) == 4
1139             and self.is_class
1140             and self.leaves[2].type == token.LPAR
1141             and self.leaves[2].value == "("
1142             and self.leaves[3].type == token.RPAR
1143             and self.leaves[3].value == ")"
1144         )
1145
1146     @property
1147     def is_triple_quoted_string(self) -> bool:
1148         """Is the line a triple quoted string?"""
1149         return (
1150             bool(self)
1151             and self.leaves[0].type == token.STRING
1152             and self.leaves[0].value.startswith(('"""', "'''"))
1153         )
1154
1155     def contains_standalone_comments(self, depth_limit: int = sys.maxsize) -> bool:
1156         """If so, needs to be split before emitting."""
1157         for leaf in self.leaves:
1158             if leaf.type == STANDALONE_COMMENT:
1159                 if leaf.bracket_depth <= depth_limit:
1160                     return True
1161
1162         return False
1163
1164     def contains_multiline_strings(self) -> bool:
1165         for leaf in self.leaves:
1166             if is_multiline_string(leaf):
1167                 return True
1168
1169         return False
1170
1171     def maybe_remove_trailing_comma(self, closing: Leaf) -> bool:
1172         """Remove trailing comma if there is one and it's safe."""
1173         if not (
1174             self.leaves
1175             and self.leaves[-1].type == token.COMMA
1176             and closing.type in CLOSING_BRACKETS
1177         ):
1178             return False
1179
1180         if closing.type == token.RBRACE:
1181             self.remove_trailing_comma()
1182             return True
1183
1184         if closing.type == token.RSQB:
1185             comma = self.leaves[-1]
1186             if comma.parent and comma.parent.type == syms.listmaker:
1187                 self.remove_trailing_comma()
1188                 return True
1189
1190         # For parens let's check if it's safe to remove the comma.
1191         # Imports are always safe.
1192         if self.is_import:
1193             self.remove_trailing_comma()
1194             return True
1195
1196         # Otherwise, if the trailing one is the only one, we might mistakenly
1197         # change a tuple into a different type by removing the comma.
1198         depth = closing.bracket_depth + 1
1199         commas = 0
1200         opening = closing.opening_bracket
1201         for _opening_index, leaf in enumerate(self.leaves):
1202             if leaf is opening:
1203                 break
1204
1205         else:
1206             return False
1207
1208         for leaf in self.leaves[_opening_index + 1 :]:
1209             if leaf is closing:
1210                 break
1211
1212             bracket_depth = leaf.bracket_depth
1213             if bracket_depth == depth and leaf.type == token.COMMA:
1214                 commas += 1
1215                 if leaf.parent and leaf.parent.type == syms.arglist:
1216                     commas += 1
1217                     break
1218
1219         if commas > 1:
1220             self.remove_trailing_comma()
1221             return True
1222
1223         return False
1224
1225     def append_comment(self, comment: Leaf) -> bool:
1226         """Add an inline or standalone comment to the line."""
1227         if (
1228             comment.type == STANDALONE_COMMENT
1229             and self.bracket_tracker.any_open_brackets()
1230         ):
1231             comment.prefix = ""
1232             return False
1233
1234         if comment.type != token.COMMENT:
1235             return False
1236
1237         if not self.leaves:
1238             comment.type = STANDALONE_COMMENT
1239             comment.prefix = ""
1240             return False
1241
1242         else:
1243             leaf_id = id(self.leaves[-1])
1244             if leaf_id not in self.comments:
1245                 self.comments[leaf_id] = [comment]
1246             else:
1247                 self.comments[leaf_id].append(comment)
1248             return True
1249
1250     def comments_after(self, leaf: Leaf) -> List[Leaf]:
1251         """Generate comments that should appear directly after `leaf`."""
1252         return self.comments.get(id(leaf), [])
1253
1254     def remove_trailing_comma(self) -> None:
1255         """Remove the trailing comma and moves the comments attached to it."""
1256         # Remember, the LeafID keys of self.comments are ordered by the
1257         # corresponding leaf's index in self.leaves
1258         # If id(self.leaves[-2]) is in self.comments, the order doesn't change.
1259         # Otherwise, we insert it into self.comments, and it becomes the last entry.
1260         # However, since we delete id(self.leaves[-1]) from self.comments, the invariant
1261         # is maintained
1262         self.comments.setdefault(id(self.leaves[-2]), []).extend(
1263             self.comments.get(id(self.leaves[-1]), [])
1264         )
1265         self.comments.pop(id(self.leaves[-1]), None)
1266         self.leaves.pop()
1267
1268     def is_complex_subscript(self, leaf: Leaf) -> bool:
1269         """Return True iff `leaf` is part of a slice with non-trivial exprs."""
1270         open_lsqb = self.bracket_tracker.get_open_lsqb()
1271         if open_lsqb is None:
1272             return False
1273
1274         subscript_start = open_lsqb.next_sibling
1275
1276         if isinstance(subscript_start, Node):
1277             if subscript_start.type == syms.listmaker:
1278                 return False
1279
1280             if subscript_start.type == syms.subscriptlist:
1281                 subscript_start = child_towards(subscript_start, leaf)
1282         return subscript_start is not None and any(
1283             n.type in TEST_DESCENDANTS for n in subscript_start.pre_order()
1284         )
1285
1286     def __str__(self) -> str:
1287         """Render the line."""
1288         if not self:
1289             return "\n"
1290
1291         indent = "    " * self.depth
1292         leaves = iter(self.leaves)
1293         first = next(leaves)
1294         res = f"{first.prefix}{indent}{first.value}"
1295         for leaf in leaves:
1296             res += str(leaf)
1297         for comment in itertools.chain.from_iterable(self.comments.values()):
1298             res += str(comment)
1299         return res + "\n"
1300
1301     def __bool__(self) -> bool:
1302         """Return True if the line has leaves or comments."""
1303         return bool(self.leaves or self.comments)
1304
1305
1306 @dataclass
1307 class EmptyLineTracker:
1308     """Provides a stateful method that returns the number of potential extra
1309     empty lines needed before and after the currently processed line.
1310
1311     Note: this tracker works on lines that haven't been split yet.  It assumes
1312     the prefix of the first leaf consists of optional newlines.  Those newlines
1313     are consumed by `maybe_empty_lines()` and included in the computation.
1314     """
1315
1316     is_pyi: bool = False
1317     previous_line: Optional[Line] = None
1318     previous_after: int = 0
1319     previous_defs: List[int] = Factory(list)
1320
1321     def maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
1322         """Return the number of extra empty lines before and after the `current_line`.
1323
1324         This is for separating `def`, `async def` and `class` with extra empty
1325         lines (two on module-level).
1326         """
1327         before, after = self._maybe_empty_lines(current_line)
1328         before -= self.previous_after
1329         self.previous_after = after
1330         self.previous_line = current_line
1331         return before, after
1332
1333     def _maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
1334         max_allowed = 1
1335         if current_line.depth == 0:
1336             max_allowed = 1 if self.is_pyi else 2
1337         if current_line.leaves:
1338             # Consume the first leaf's extra newlines.
1339             first_leaf = current_line.leaves[0]
1340             before = first_leaf.prefix.count("\n")
1341             before = min(before, max_allowed)
1342             first_leaf.prefix = ""
1343         else:
1344             before = 0
1345         depth = current_line.depth
1346         while self.previous_defs and self.previous_defs[-1] >= depth:
1347             self.previous_defs.pop()
1348             if self.is_pyi:
1349                 before = 0 if depth else 1
1350             else:
1351                 before = 1 if depth else 2
1352         if current_line.is_decorator or current_line.is_def or current_line.is_class:
1353             return self._maybe_empty_lines_for_class_or_def(current_line, before)
1354
1355         if (
1356             self.previous_line
1357             and self.previous_line.is_import
1358             and not current_line.is_import
1359             and depth == self.previous_line.depth
1360         ):
1361             return (before or 1), 0
1362
1363         if (
1364             self.previous_line
1365             and self.previous_line.is_class
1366             and current_line.is_triple_quoted_string
1367         ):
1368             return before, 1
1369
1370         return before, 0
1371
1372     def _maybe_empty_lines_for_class_or_def(
1373         self, current_line: Line, before: int
1374     ) -> Tuple[int, int]:
1375         if not current_line.is_decorator:
1376             self.previous_defs.append(current_line.depth)
1377         if self.previous_line is None:
1378             # Don't insert empty lines before the first line in the file.
1379             return 0, 0
1380
1381         if self.previous_line.is_decorator:
1382             return 0, 0
1383
1384         if self.previous_line.depth < current_line.depth and (
1385             self.previous_line.is_class or self.previous_line.is_def
1386         ):
1387             return 0, 0
1388
1389         if (
1390             self.previous_line.is_comment
1391             and self.previous_line.depth == current_line.depth
1392             and before == 0
1393         ):
1394             return 0, 0
1395
1396         if self.is_pyi:
1397             if self.previous_line.depth > current_line.depth:
1398                 newlines = 1
1399             elif current_line.is_class or self.previous_line.is_class:
1400                 if current_line.is_stub_class and self.previous_line.is_stub_class:
1401                     # No blank line between classes with an empty body
1402                     newlines = 0
1403                 else:
1404                     newlines = 1
1405             elif current_line.is_def and not self.previous_line.is_def:
1406                 # Blank line between a block of functions and a block of non-functions
1407                 newlines = 1
1408             else:
1409                 newlines = 0
1410         else:
1411             newlines = 2
1412         if current_line.depth and newlines:
1413             newlines -= 1
1414         return newlines, 0
1415
1416
1417 @dataclass
1418 class LineGenerator(Visitor[Line]):
1419     """Generates reformatted Line objects.  Empty lines are not emitted.
1420
1421     Note: destroys the tree it's visiting by mutating prefixes of its leaves
1422     in ways that will no longer stringify to valid Python code on the tree.
1423     """
1424
1425     is_pyi: bool = False
1426     normalize_strings: bool = True
1427     current_line: Line = Factory(Line)
1428     remove_u_prefix: bool = False
1429     allow_underscores: bool = False
1430
1431     def line(self, indent: int = 0) -> Iterator[Line]:
1432         """Generate a line.
1433
1434         If the line is empty, only emit if it makes sense.
1435         If the line is too long, split it first and then generate.
1436
1437         If any lines were generated, set up a new current_line.
1438         """
1439         if not self.current_line:
1440             self.current_line.depth += indent
1441             return  # Line is empty, don't emit. Creating a new one unnecessary.
1442
1443         complete_line = self.current_line
1444         self.current_line = Line(depth=complete_line.depth + indent)
1445         yield complete_line
1446
1447     def visit_default(self, node: LN) -> Iterator[Line]:
1448         """Default `visit_*()` implementation. Recurses to children of `node`."""
1449         if isinstance(node, Leaf):
1450             any_open_brackets = self.current_line.bracket_tracker.any_open_brackets()
1451             for comment in generate_comments(node):
1452                 if any_open_brackets:
1453                     # any comment within brackets is subject to splitting
1454                     self.current_line.append(comment)
1455                 elif comment.type == token.COMMENT:
1456                     # regular trailing comment
1457                     self.current_line.append(comment)
1458                     yield from self.line()
1459
1460                 else:
1461                     # regular standalone comment
1462                     yield from self.line()
1463
1464                     self.current_line.append(comment)
1465                     yield from self.line()
1466
1467             normalize_prefix(node, inside_brackets=any_open_brackets)
1468             if self.normalize_strings and node.type == token.STRING:
1469                 normalize_string_prefix(node, remove_u_prefix=self.remove_u_prefix)
1470                 normalize_string_quotes(node)
1471             if node.type == token.NUMBER:
1472                 normalize_numeric_literal(node, self.allow_underscores)
1473             if node.type not in WHITESPACE:
1474                 self.current_line.append(node)
1475         yield from super().visit_default(node)
1476
1477     def visit_INDENT(self, node: Node) -> Iterator[Line]:
1478         """Increase indentation level, maybe yield a line."""
1479         # In blib2to3 INDENT never holds comments.
1480         yield from self.line(+1)
1481         yield from self.visit_default(node)
1482
1483     def visit_DEDENT(self, node: Node) -> Iterator[Line]:
1484         """Decrease indentation level, maybe yield a line."""
1485         # The current line might still wait for trailing comments.  At DEDENT time
1486         # there won't be any (they would be prefixes on the preceding NEWLINE).
1487         # Emit the line then.
1488         yield from self.line()
1489
1490         # While DEDENT has no value, its prefix may contain standalone comments
1491         # that belong to the current indentation level.  Get 'em.
1492         yield from self.visit_default(node)
1493
1494         # Finally, emit the dedent.
1495         yield from self.line(-1)
1496
1497     def visit_stmt(
1498         self, node: Node, keywords: Set[str], parens: Set[str]
1499     ) -> Iterator[Line]:
1500         """Visit a statement.
1501
1502         This implementation is shared for `if`, `while`, `for`, `try`, `except`,
1503         `def`, `with`, `class`, `assert` and assignments.
1504
1505         The relevant Python language `keywords` for a given statement will be
1506         NAME leaves within it. This methods puts those on a separate line.
1507
1508         `parens` holds a set of string leaf values immediately after which
1509         invisible parens should be put.
1510         """
1511         normalize_invisible_parens(node, parens_after=parens)
1512         for child in node.children:
1513             if child.type == token.NAME and child.value in keywords:  # type: ignore
1514                 yield from self.line()
1515
1516             yield from self.visit(child)
1517
1518     def visit_suite(self, node: Node) -> Iterator[Line]:
1519         """Visit a suite."""
1520         if self.is_pyi and is_stub_suite(node):
1521             yield from self.visit(node.children[2])
1522         else:
1523             yield from self.visit_default(node)
1524
1525     def visit_simple_stmt(self, node: Node) -> Iterator[Line]:
1526         """Visit a statement without nested statements."""
1527         is_suite_like = node.parent and node.parent.type in STATEMENT
1528         if is_suite_like:
1529             if self.is_pyi and is_stub_body(node):
1530                 yield from self.visit_default(node)
1531             else:
1532                 yield from self.line(+1)
1533                 yield from self.visit_default(node)
1534                 yield from self.line(-1)
1535
1536         else:
1537             if not self.is_pyi or not node.parent or not is_stub_suite(node.parent):
1538                 yield from self.line()
1539             yield from self.visit_default(node)
1540
1541     def visit_async_stmt(self, node: Node) -> Iterator[Line]:
1542         """Visit `async def`, `async for`, `async with`."""
1543         yield from self.line()
1544
1545         children = iter(node.children)
1546         for child in children:
1547             yield from self.visit(child)
1548
1549             if child.type == token.ASYNC:
1550                 break
1551
1552         internal_stmt = next(children)
1553         for child in internal_stmt.children:
1554             yield from self.visit(child)
1555
1556     def visit_decorators(self, node: Node) -> Iterator[Line]:
1557         """Visit decorators."""
1558         for child in node.children:
1559             yield from self.line()
1560             yield from self.visit(child)
1561
1562     def visit_SEMI(self, leaf: Leaf) -> Iterator[Line]:
1563         """Remove a semicolon and put the other statement on a separate line."""
1564         yield from self.line()
1565
1566     def visit_ENDMARKER(self, leaf: Leaf) -> Iterator[Line]:
1567         """End of file. Process outstanding comments and end with a newline."""
1568         yield from self.visit_default(leaf)
1569         yield from self.line()
1570
1571     def visit_STANDALONE_COMMENT(self, leaf: Leaf) -> Iterator[Line]:
1572         if not self.current_line.bracket_tracker.any_open_brackets():
1573             yield from self.line()
1574         yield from self.visit_default(leaf)
1575
1576     def __attrs_post_init__(self) -> None:
1577         """You are in a twisty little maze of passages."""
1578         v = self.visit_stmt
1579         Ø: Set[str] = set()
1580         self.visit_assert_stmt = partial(v, keywords={"assert"}, parens={"assert", ","})
1581         self.visit_if_stmt = partial(
1582             v, keywords={"if", "else", "elif"}, parens={"if", "elif"}
1583         )
1584         self.visit_while_stmt = partial(v, keywords={"while", "else"}, parens={"while"})
1585         self.visit_for_stmt = partial(v, keywords={"for", "else"}, parens={"for", "in"})
1586         self.visit_try_stmt = partial(
1587             v, keywords={"try", "except", "else", "finally"}, parens=Ø
1588         )
1589         self.visit_except_clause = partial(v, keywords={"except"}, parens=Ø)
1590         self.visit_with_stmt = partial(v, keywords={"with"}, parens=Ø)
1591         self.visit_funcdef = partial(v, keywords={"def"}, parens=Ø)
1592         self.visit_classdef = partial(v, keywords={"class"}, parens=Ø)
1593         self.visit_expr_stmt = partial(v, keywords=Ø, parens=ASSIGNMENTS)
1594         self.visit_return_stmt = partial(v, keywords={"return"}, parens={"return"})
1595         self.visit_import_from = partial(v, keywords=Ø, parens={"import"})
1596         self.visit_async_funcdef = self.visit_async_stmt
1597         self.visit_decorated = self.visit_decorators
1598
1599
1600 IMPLICIT_TUPLE = {syms.testlist, syms.testlist_star_expr, syms.exprlist}
1601 BRACKET = {token.LPAR: token.RPAR, token.LSQB: token.RSQB, token.LBRACE: token.RBRACE}
1602 OPENING_BRACKETS = set(BRACKET.keys())
1603 CLOSING_BRACKETS = set(BRACKET.values())
1604 BRACKETS = OPENING_BRACKETS | CLOSING_BRACKETS
1605 ALWAYS_NO_SPACE = CLOSING_BRACKETS | {token.COMMA, STANDALONE_COMMENT}
1606
1607
1608 def whitespace(leaf: Leaf, *, complex_subscript: bool) -> str:  # noqa C901
1609     """Return whitespace prefix if needed for the given `leaf`.
1610
1611     `complex_subscript` signals whether the given leaf is part of a subscription
1612     which has non-trivial arguments, like arithmetic expressions or function calls.
1613     """
1614     NO = ""
1615     SPACE = " "
1616     DOUBLESPACE = "  "
1617     t = leaf.type
1618     p = leaf.parent
1619     v = leaf.value
1620     if t in ALWAYS_NO_SPACE:
1621         return NO
1622
1623     if t == token.COMMENT:
1624         return DOUBLESPACE
1625
1626     assert p is not None, f"INTERNAL ERROR: hand-made leaf without parent: {leaf!r}"
1627     if t == token.COLON and p.type not in {
1628         syms.subscript,
1629         syms.subscriptlist,
1630         syms.sliceop,
1631     }:
1632         return NO
1633
1634     prev = leaf.prev_sibling
1635     if not prev:
1636         prevp = preceding_leaf(p)
1637         if not prevp or prevp.type in OPENING_BRACKETS:
1638             return NO
1639
1640         if t == token.COLON:
1641             if prevp.type == token.COLON:
1642                 return NO
1643
1644             elif prevp.type != token.COMMA and not complex_subscript:
1645                 return NO
1646
1647             return SPACE
1648
1649         if prevp.type == token.EQUAL:
1650             if prevp.parent:
1651                 if prevp.parent.type in {
1652                     syms.arglist,
1653                     syms.argument,
1654                     syms.parameters,
1655                     syms.varargslist,
1656                 }:
1657                     return NO
1658
1659                 elif prevp.parent.type == syms.typedargslist:
1660                     # A bit hacky: if the equal sign has whitespace, it means we
1661                     # previously found it's a typed argument.  So, we're using
1662                     # that, too.
1663                     return prevp.prefix
1664
1665         elif prevp.type in STARS:
1666             if is_vararg(prevp, within=VARARGS_PARENTS | UNPACKING_PARENTS):
1667                 return NO
1668
1669         elif prevp.type == token.COLON:
1670             if prevp.parent and prevp.parent.type in {syms.subscript, syms.sliceop}:
1671                 return SPACE if complex_subscript else NO
1672
1673         elif (
1674             prevp.parent
1675             and prevp.parent.type == syms.factor
1676             and prevp.type in MATH_OPERATORS
1677         ):
1678             return NO
1679
1680         elif (
1681             prevp.type == token.RIGHTSHIFT
1682             and prevp.parent
1683             and prevp.parent.type == syms.shift_expr
1684             and prevp.prev_sibling
1685             and prevp.prev_sibling.type == token.NAME
1686             and prevp.prev_sibling.value == "print"  # type: ignore
1687         ):
1688             # Python 2 print chevron
1689             return NO
1690
1691     elif prev.type in OPENING_BRACKETS:
1692         return NO
1693
1694     if p.type in {syms.parameters, syms.arglist}:
1695         # untyped function signatures or calls
1696         if not prev or prev.type != token.COMMA:
1697             return NO
1698
1699     elif p.type == syms.varargslist:
1700         # lambdas
1701         if prev and prev.type != token.COMMA:
1702             return NO
1703
1704     elif p.type == syms.typedargslist:
1705         # typed function signatures
1706         if not prev:
1707             return NO
1708
1709         if t == token.EQUAL:
1710             if prev.type != syms.tname:
1711                 return NO
1712
1713         elif prev.type == token.EQUAL:
1714             # A bit hacky: if the equal sign has whitespace, it means we
1715             # previously found it's a typed argument.  So, we're using that, too.
1716             return prev.prefix
1717
1718         elif prev.type != token.COMMA:
1719             return NO
1720
1721     elif p.type == syms.tname:
1722         # type names
1723         if not prev:
1724             prevp = preceding_leaf(p)
1725             if not prevp or prevp.type != token.COMMA:
1726                 return NO
1727
1728     elif p.type == syms.trailer:
1729         # attributes and calls
1730         if t == token.LPAR or t == token.RPAR:
1731             return NO
1732
1733         if not prev:
1734             if t == token.DOT:
1735                 prevp = preceding_leaf(p)
1736                 if not prevp or prevp.type != token.NUMBER:
1737                     return NO
1738
1739             elif t == token.LSQB:
1740                 return NO
1741
1742         elif prev.type != token.COMMA:
1743             return NO
1744
1745     elif p.type == syms.argument:
1746         # single argument
1747         if t == token.EQUAL:
1748             return NO
1749
1750         if not prev:
1751             prevp = preceding_leaf(p)
1752             if not prevp or prevp.type == token.LPAR:
1753                 return NO
1754
1755         elif prev.type in {token.EQUAL} | STARS:
1756             return NO
1757
1758     elif p.type == syms.decorator:
1759         # decorators
1760         return NO
1761
1762     elif p.type == syms.dotted_name:
1763         if prev:
1764             return NO
1765
1766         prevp = preceding_leaf(p)
1767         if not prevp or prevp.type == token.AT or prevp.type == token.DOT:
1768             return NO
1769
1770     elif p.type == syms.classdef:
1771         if t == token.LPAR:
1772             return NO
1773
1774         if prev and prev.type == token.LPAR:
1775             return NO
1776
1777     elif p.type in {syms.subscript, syms.sliceop}:
1778         # indexing
1779         if not prev:
1780             assert p.parent is not None, "subscripts are always parented"
1781             if p.parent.type == syms.subscriptlist:
1782                 return SPACE
1783
1784             return NO
1785
1786         elif not complex_subscript:
1787             return NO
1788
1789     elif p.type == syms.atom:
1790         if prev and t == token.DOT:
1791             # dots, but not the first one.
1792             return NO
1793
1794     elif p.type == syms.dictsetmaker:
1795         # dict unpacking
1796         if prev and prev.type == token.DOUBLESTAR:
1797             return NO
1798
1799     elif p.type in {syms.factor, syms.star_expr}:
1800         # unary ops
1801         if not prev:
1802             prevp = preceding_leaf(p)
1803             if not prevp or prevp.type in OPENING_BRACKETS:
1804                 return NO
1805
1806             prevp_parent = prevp.parent
1807             assert prevp_parent is not None
1808             if prevp.type == token.COLON and prevp_parent.type in {
1809                 syms.subscript,
1810                 syms.sliceop,
1811             }:
1812                 return NO
1813
1814             elif prevp.type == token.EQUAL and prevp_parent.type == syms.argument:
1815                 return NO
1816
1817         elif t in {token.NAME, token.NUMBER, token.STRING}:
1818             return NO
1819
1820     elif p.type == syms.import_from:
1821         if t == token.DOT:
1822             if prev and prev.type == token.DOT:
1823                 return NO
1824
1825         elif t == token.NAME:
1826             if v == "import":
1827                 return SPACE
1828
1829             if prev and prev.type == token.DOT:
1830                 return NO
1831
1832     elif p.type == syms.sliceop:
1833         return NO
1834
1835     return SPACE
1836
1837
1838 def preceding_leaf(node: Optional[LN]) -> Optional[Leaf]:
1839     """Return the first leaf that precedes `node`, if any."""
1840     while node:
1841         res = node.prev_sibling
1842         if res:
1843             if isinstance(res, Leaf):
1844                 return res
1845
1846             try:
1847                 return list(res.leaves())[-1]
1848
1849             except IndexError:
1850                 return None
1851
1852         node = node.parent
1853     return None
1854
1855
1856 def child_towards(ancestor: Node, descendant: LN) -> Optional[LN]:
1857     """Return the child of `ancestor` that contains `descendant`."""
1858     node: Optional[LN] = descendant
1859     while node and node.parent != ancestor:
1860         node = node.parent
1861     return node
1862
1863
1864 def container_of(leaf: Leaf) -> LN:
1865     """Return `leaf` or one of its ancestors that is the topmost container of it.
1866
1867     By "container" we mean a node where `leaf` is the very first child.
1868     """
1869     same_prefix = leaf.prefix
1870     container: LN = leaf
1871     while container:
1872         parent = container.parent
1873         if parent is None:
1874             break
1875
1876         if parent.children[0].prefix != same_prefix:
1877             break
1878
1879         if parent.type == syms.file_input:
1880             break
1881
1882         if parent.prev_sibling is not None and parent.prev_sibling.type in BRACKETS:
1883             break
1884
1885         container = parent
1886     return container
1887
1888
1889 def is_split_after_delimiter(leaf: Leaf, previous: Optional[Leaf] = None) -> int:
1890     """Return the priority of the `leaf` delimiter, given a line break after it.
1891
1892     The delimiter priorities returned here are from those delimiters that would
1893     cause a line break after themselves.
1894
1895     Higher numbers are higher priority.
1896     """
1897     if leaf.type == token.COMMA:
1898         return COMMA_PRIORITY
1899
1900     return 0
1901
1902
1903 def is_split_before_delimiter(leaf: Leaf, previous: Optional[Leaf] = None) -> int:
1904     """Return the priority of the `leaf` delimiter, given a line break before it.
1905
1906     The delimiter priorities returned here are from those delimiters that would
1907     cause a line break before themselves.
1908
1909     Higher numbers are higher priority.
1910     """
1911     if is_vararg(leaf, within=VARARGS_PARENTS | UNPACKING_PARENTS):
1912         # * and ** might also be MATH_OPERATORS but in this case they are not.
1913         # Don't treat them as a delimiter.
1914         return 0
1915
1916     if (
1917         leaf.type == token.DOT
1918         and leaf.parent
1919         and leaf.parent.type not in {syms.import_from, syms.dotted_name}
1920         and (previous is None or previous.type in CLOSING_BRACKETS)
1921     ):
1922         return DOT_PRIORITY
1923
1924     if (
1925         leaf.type in MATH_OPERATORS
1926         and leaf.parent
1927         and leaf.parent.type not in {syms.factor, syms.star_expr}
1928     ):
1929         return MATH_PRIORITIES[leaf.type]
1930
1931     if leaf.type in COMPARATORS:
1932         return COMPARATOR_PRIORITY
1933
1934     if (
1935         leaf.type == token.STRING
1936         and previous is not None
1937         and previous.type == token.STRING
1938     ):
1939         return STRING_PRIORITY
1940
1941     if leaf.type not in {token.NAME, token.ASYNC}:
1942         return 0
1943
1944     if (
1945         leaf.value == "for"
1946         and leaf.parent
1947         and leaf.parent.type in {syms.comp_for, syms.old_comp_for}
1948         or leaf.type == token.ASYNC
1949     ):
1950         if (
1951             not isinstance(leaf.prev_sibling, Leaf)
1952             or leaf.prev_sibling.value != "async"
1953         ):
1954             return COMPREHENSION_PRIORITY
1955
1956     if (
1957         leaf.value == "if"
1958         and leaf.parent
1959         and leaf.parent.type in {syms.comp_if, syms.old_comp_if}
1960     ):
1961         return COMPREHENSION_PRIORITY
1962
1963     if leaf.value in {"if", "else"} and leaf.parent and leaf.parent.type == syms.test:
1964         return TERNARY_PRIORITY
1965
1966     if leaf.value == "is":
1967         return COMPARATOR_PRIORITY
1968
1969     if (
1970         leaf.value == "in"
1971         and leaf.parent
1972         and leaf.parent.type in {syms.comp_op, syms.comparison}
1973         and not (
1974             previous is not None
1975             and previous.type == token.NAME
1976             and previous.value == "not"
1977         )
1978     ):
1979         return COMPARATOR_PRIORITY
1980
1981     if (
1982         leaf.value == "not"
1983         and leaf.parent
1984         and leaf.parent.type == syms.comp_op
1985         and not (
1986             previous is not None
1987             and previous.type == token.NAME
1988             and previous.value == "is"
1989         )
1990     ):
1991         return COMPARATOR_PRIORITY
1992
1993     if leaf.value in LOGIC_OPERATORS and leaf.parent:
1994         return LOGIC_PRIORITY
1995
1996     return 0
1997
1998
1999 FMT_OFF = {"# fmt: off", "# fmt:off", "# yapf: disable"}
2000 FMT_ON = {"# fmt: on", "# fmt:on", "# yapf: enable"}
2001
2002
2003 def generate_comments(leaf: LN) -> Iterator[Leaf]:
2004     """Clean the prefix of the `leaf` and generate comments from it, if any.
2005
2006     Comments in lib2to3 are shoved into the whitespace prefix.  This happens
2007     in `pgen2/driver.py:Driver.parse_tokens()`.  This was a brilliant implementation
2008     move because it does away with modifying the grammar to include all the
2009     possible places in which comments can be placed.
2010
2011     The sad consequence for us though is that comments don't "belong" anywhere.
2012     This is why this function generates simple parentless Leaf objects for
2013     comments.  We simply don't know what the correct parent should be.
2014
2015     No matter though, we can live without this.  We really only need to
2016     differentiate between inline and standalone comments.  The latter don't
2017     share the line with any code.
2018
2019     Inline comments are emitted as regular token.COMMENT leaves.  Standalone
2020     are emitted with a fake STANDALONE_COMMENT token identifier.
2021     """
2022     for pc in list_comments(leaf.prefix, is_endmarker=leaf.type == token.ENDMARKER):
2023         yield Leaf(pc.type, pc.value, prefix="\n" * pc.newlines)
2024
2025
2026 @dataclass
2027 class ProtoComment:
2028     """Describes a piece of syntax that is a comment.
2029
2030     It's not a :class:`blib2to3.pytree.Leaf` so that:
2031
2032     * it can be cached (`Leaf` objects should not be reused more than once as
2033       they store their lineno, column, prefix, and parent information);
2034     * `newlines` and `consumed` fields are kept separate from the `value`. This
2035       simplifies handling of special marker comments like ``# fmt: off/on``.
2036     """
2037
2038     type: int  # token.COMMENT or STANDALONE_COMMENT
2039     value: str  # content of the comment
2040     newlines: int  # how many newlines before the comment
2041     consumed: int  # how many characters of the original leaf's prefix did we consume
2042
2043
2044 @lru_cache(maxsize=4096)
2045 def list_comments(prefix: str, *, is_endmarker: bool) -> List[ProtoComment]:
2046     """Return a list of :class:`ProtoComment` objects parsed from the given `prefix`."""
2047     result: List[ProtoComment] = []
2048     if not prefix or "#" not in prefix:
2049         return result
2050
2051     consumed = 0
2052     nlines = 0
2053     for index, line in enumerate(prefix.split("\n")):
2054         consumed += len(line) + 1  # adding the length of the split '\n'
2055         line = line.lstrip()
2056         if not line:
2057             nlines += 1
2058         if not line.startswith("#"):
2059             continue
2060
2061         if index == 0 and not is_endmarker:
2062             comment_type = token.COMMENT  # simple trailing comment
2063         else:
2064             comment_type = STANDALONE_COMMENT
2065         comment = make_comment(line)
2066         result.append(
2067             ProtoComment(
2068                 type=comment_type, value=comment, newlines=nlines, consumed=consumed
2069             )
2070         )
2071         nlines = 0
2072     return result
2073
2074
2075 def make_comment(content: str) -> str:
2076     """Return a consistently formatted comment from the given `content` string.
2077
2078     All comments (except for "##", "#!", "#:", '#'", "#%%") should have a single
2079     space between the hash sign and the content.
2080
2081     If `content` didn't start with a hash sign, one is provided.
2082     """
2083     content = content.rstrip()
2084     if not content:
2085         return "#"
2086
2087     if content[0] == "#":
2088         content = content[1:]
2089     if content and content[0] not in " !:#'%":
2090         content = " " + content
2091     return "#" + content
2092
2093
2094 def split_line(
2095     line: Line, line_length: int, inner: bool = False, py36: bool = False
2096 ) -> Iterator[Line]:
2097     """Split a `line` into potentially many lines.
2098
2099     They should fit in the allotted `line_length` but might not be able to.
2100     `inner` signifies that there were a pair of brackets somewhere around the
2101     current `line`, possibly transitively. This means we can fallback to splitting
2102     by delimiters if the LHS/RHS don't yield any results.
2103
2104     If `py36` is True, splitting may generate syntax that is only compatible
2105     with Python 3.6 and later.
2106     """
2107     if line.is_comment:
2108         yield line
2109         return
2110
2111     line_str = str(line).strip("\n")
2112
2113     # we don't want to split special comments like type annotations
2114     # https://github.com/python/typing/issues/186
2115     has_special_comment = False
2116     for leaf in line.leaves:
2117         for comment in line.comments_after(leaf):
2118             if leaf.type == token.COMMA and is_special_comment(comment):
2119                 has_special_comment = True
2120
2121     if (
2122         not has_special_comment
2123         and not line.should_explode
2124         and is_line_short_enough(line, line_length=line_length, line_str=line_str)
2125     ):
2126         yield line
2127         return
2128
2129     split_funcs: List[SplitFunc]
2130     if line.is_def:
2131         split_funcs = [left_hand_split]
2132     else:
2133
2134         def rhs(line: Line, py36: bool = False) -> Iterator[Line]:
2135             for omit in generate_trailers_to_omit(line, line_length):
2136                 lines = list(right_hand_split(line, line_length, py36, omit=omit))
2137                 if is_line_short_enough(lines[0], line_length=line_length):
2138                     yield from lines
2139                     return
2140
2141             # All splits failed, best effort split with no omits.
2142             # This mostly happens to multiline strings that are by definition
2143             # reported as not fitting a single line.
2144             yield from right_hand_split(line, py36)
2145
2146         if line.inside_brackets:
2147             split_funcs = [delimiter_split, standalone_comment_split, rhs]
2148         else:
2149             split_funcs = [rhs]
2150     for split_func in split_funcs:
2151         # We are accumulating lines in `result` because we might want to abort
2152         # mission and return the original line in the end, or attempt a different
2153         # split altogether.
2154         result: List[Line] = []
2155         try:
2156             for l in split_func(line, py36):
2157                 if str(l).strip("\n") == line_str:
2158                     raise CannotSplit("Split function returned an unchanged result")
2159
2160                 result.extend(
2161                     split_line(l, line_length=line_length, inner=True, py36=py36)
2162                 )
2163         except CannotSplit:
2164             continue
2165
2166         else:
2167             yield from result
2168             break
2169
2170     else:
2171         yield line
2172
2173
2174 def left_hand_split(line: Line, py36: bool = False) -> Iterator[Line]:
2175     """Split line into many lines, starting with the first matching bracket pair.
2176
2177     Note: this usually looks weird, only use this for function definitions.
2178     Prefer RHS otherwise.  This is why this function is not symmetrical with
2179     :func:`right_hand_split` which also handles optional parentheses.
2180     """
2181     tail_leaves: List[Leaf] = []
2182     body_leaves: List[Leaf] = []
2183     head_leaves: List[Leaf] = []
2184     current_leaves = head_leaves
2185     matching_bracket = None
2186     for leaf in line.leaves:
2187         if (
2188             current_leaves is body_leaves
2189             and leaf.type in CLOSING_BRACKETS
2190             and leaf.opening_bracket is matching_bracket
2191         ):
2192             current_leaves = tail_leaves if body_leaves else head_leaves
2193         current_leaves.append(leaf)
2194         if current_leaves is head_leaves:
2195             if leaf.type in OPENING_BRACKETS:
2196                 matching_bracket = leaf
2197                 current_leaves = body_leaves
2198     if not matching_bracket:
2199         raise CannotSplit("No brackets found")
2200
2201     head = bracket_split_build_line(head_leaves, line, matching_bracket)
2202     body = bracket_split_build_line(body_leaves, line, matching_bracket, is_body=True)
2203     tail = bracket_split_build_line(tail_leaves, line, matching_bracket)
2204     bracket_split_succeeded_or_raise(head, body, tail)
2205     for result in (head, body, tail):
2206         if result:
2207             yield result
2208
2209
2210 def right_hand_split(
2211     line: Line, line_length: int, py36: bool = False, omit: Collection[LeafID] = ()
2212 ) -> Iterator[Line]:
2213     """Split line into many lines, starting with the last matching bracket pair.
2214
2215     If the split was by optional parentheses, attempt splitting without them, too.
2216     `omit` is a collection of closing bracket IDs that shouldn't be considered for
2217     this split.
2218
2219     Note: running this function modifies `bracket_depth` on the leaves of `line`.
2220     """
2221     tail_leaves: List[Leaf] = []
2222     body_leaves: List[Leaf] = []
2223     head_leaves: List[Leaf] = []
2224     current_leaves = tail_leaves
2225     opening_bracket = None
2226     closing_bracket = None
2227     for leaf in reversed(line.leaves):
2228         if current_leaves is body_leaves:
2229             if leaf is opening_bracket:
2230                 current_leaves = head_leaves if body_leaves else tail_leaves
2231         current_leaves.append(leaf)
2232         if current_leaves is tail_leaves:
2233             if leaf.type in CLOSING_BRACKETS and id(leaf) not in omit:
2234                 opening_bracket = leaf.opening_bracket
2235                 closing_bracket = leaf
2236                 current_leaves = body_leaves
2237     if not (opening_bracket and closing_bracket and head_leaves):
2238         # If there is no opening or closing_bracket that means the split failed and
2239         # all content is in the tail.  Otherwise, if `head_leaves` are empty, it means
2240         # the matching `opening_bracket` wasn't available on `line` anymore.
2241         raise CannotSplit("No brackets found")
2242
2243     tail_leaves.reverse()
2244     body_leaves.reverse()
2245     head_leaves.reverse()
2246     head = bracket_split_build_line(head_leaves, line, opening_bracket)
2247     body = bracket_split_build_line(body_leaves, line, opening_bracket, is_body=True)
2248     tail = bracket_split_build_line(tail_leaves, line, opening_bracket)
2249     bracket_split_succeeded_or_raise(head, body, tail)
2250     if (
2251         # the body shouldn't be exploded
2252         not body.should_explode
2253         # the opening bracket is an optional paren
2254         and opening_bracket.type == token.LPAR
2255         and not opening_bracket.value
2256         # the closing bracket is an optional paren
2257         and closing_bracket.type == token.RPAR
2258         and not closing_bracket.value
2259         # it's not an import (optional parens are the only thing we can split on
2260         # in this case; attempting a split without them is a waste of time)
2261         and not line.is_import
2262         # there are no standalone comments in the body
2263         and not body.contains_standalone_comments(0)
2264         # and we can actually remove the parens
2265         and can_omit_invisible_parens(body, line_length)
2266     ):
2267         omit = {id(closing_bracket), *omit}
2268         try:
2269             yield from right_hand_split(line, line_length, py36=py36, omit=omit)
2270             return
2271
2272         except CannotSplit:
2273             if not (
2274                 can_be_split(body)
2275                 or is_line_short_enough(body, line_length=line_length)
2276             ):
2277                 raise CannotSplit(
2278                     "Splitting failed, body is still too long and can't be split."
2279                 )
2280
2281             elif head.contains_multiline_strings() or tail.contains_multiline_strings():
2282                 raise CannotSplit(
2283                     "The current optional pair of parentheses is bound to fail to "
2284                     "satisfy the splitting algorithm because the head or the tail "
2285                     "contains multiline strings which by definition never fit one "
2286                     "line."
2287                 )
2288
2289     ensure_visible(opening_bracket)
2290     ensure_visible(closing_bracket)
2291     for result in (head, body, tail):
2292         if result:
2293             yield result
2294
2295
2296 def bracket_split_succeeded_or_raise(head: Line, body: Line, tail: Line) -> None:
2297     """Raise :exc:`CannotSplit` if the last left- or right-hand split failed.
2298
2299     Do nothing otherwise.
2300
2301     A left- or right-hand split is based on a pair of brackets. Content before
2302     (and including) the opening bracket is left on one line, content inside the
2303     brackets is put on a separate line, and finally content starting with and
2304     following the closing bracket is put on a separate line.
2305
2306     Those are called `head`, `body`, and `tail`, respectively. If the split
2307     produced the same line (all content in `head`) or ended up with an empty `body`
2308     and the `tail` is just the closing bracket, then it's considered failed.
2309     """
2310     tail_len = len(str(tail).strip())
2311     if not body:
2312         if tail_len == 0:
2313             raise CannotSplit("Splitting brackets produced the same line")
2314
2315         elif tail_len < 3:
2316             raise CannotSplit(
2317                 f"Splitting brackets on an empty body to save "
2318                 f"{tail_len} characters is not worth it"
2319             )
2320
2321
2322 def bracket_split_build_line(
2323     leaves: List[Leaf], original: Line, opening_bracket: Leaf, *, is_body: bool = False
2324 ) -> Line:
2325     """Return a new line with given `leaves` and respective comments from `original`.
2326
2327     If `is_body` is True, the result line is one-indented inside brackets and as such
2328     has its first leaf's prefix normalized and a trailing comma added when expected.
2329     """
2330     result = Line(depth=original.depth)
2331     if is_body:
2332         result.inside_brackets = True
2333         result.depth += 1
2334         if leaves:
2335             # Since body is a new indent level, remove spurious leading whitespace.
2336             normalize_prefix(leaves[0], inside_brackets=True)
2337             # Ensure a trailing comma when expected.
2338             if original.is_import:
2339                 if leaves[-1].type != token.COMMA:
2340                     leaves.append(Leaf(token.COMMA, ","))
2341     # Populate the line
2342     for leaf in leaves:
2343         result.append(leaf, preformatted=True)
2344         for comment_after in original.comments_after(leaf):
2345             result.append(comment_after, preformatted=True)
2346     if is_body:
2347         result.should_explode = should_explode(result, opening_bracket)
2348     return result
2349
2350
2351 def dont_increase_indentation(split_func: SplitFunc) -> SplitFunc:
2352     """Normalize prefix of the first leaf in every line returned by `split_func`.
2353
2354     This is a decorator over relevant split functions.
2355     """
2356
2357     @wraps(split_func)
2358     def split_wrapper(line: Line, py36: bool = False) -> Iterator[Line]:
2359         for l in split_func(line, py36):
2360             normalize_prefix(l.leaves[0], inside_brackets=True)
2361             yield l
2362
2363     return split_wrapper
2364
2365
2366 @dont_increase_indentation
2367 def delimiter_split(line: Line, py36: bool = False) -> Iterator[Line]:
2368     """Split according to delimiters of the highest priority.
2369
2370     If `py36` is True, the split will add trailing commas also in function
2371     signatures that contain `*` and `**`.
2372     """
2373     try:
2374         last_leaf = line.leaves[-1]
2375     except IndexError:
2376         raise CannotSplit("Line empty")
2377
2378     bt = line.bracket_tracker
2379     try:
2380         delimiter_priority = bt.max_delimiter_priority(exclude={id(last_leaf)})
2381     except ValueError:
2382         raise CannotSplit("No delimiters found")
2383
2384     if delimiter_priority == DOT_PRIORITY:
2385         if bt.delimiter_count_with_priority(delimiter_priority) == 1:
2386             raise CannotSplit("Splitting a single attribute from its owner looks wrong")
2387
2388     current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2389     lowest_depth = sys.maxsize
2390     trailing_comma_safe = True
2391
2392     def append_to_line(leaf: Leaf) -> Iterator[Line]:
2393         """Append `leaf` to current line or to new line if appending impossible."""
2394         nonlocal current_line
2395         try:
2396             current_line.append_safe(leaf, preformatted=True)
2397         except ValueError:
2398             yield current_line
2399
2400             current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2401             current_line.append(leaf)
2402
2403     for leaf in line.leaves:
2404         yield from append_to_line(leaf)
2405
2406         for comment_after in line.comments_after(leaf):
2407             yield from append_to_line(comment_after)
2408
2409         lowest_depth = min(lowest_depth, leaf.bracket_depth)
2410         if leaf.bracket_depth == lowest_depth and is_vararg(
2411             leaf, within=VARARGS_PARENTS
2412         ):
2413             trailing_comma_safe = trailing_comma_safe and py36
2414         leaf_priority = bt.delimiters.get(id(leaf))
2415         if leaf_priority == delimiter_priority:
2416             yield current_line
2417
2418             current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2419     if current_line:
2420         if (
2421             trailing_comma_safe
2422             and delimiter_priority == COMMA_PRIORITY
2423             and current_line.leaves[-1].type != token.COMMA
2424             and current_line.leaves[-1].type != STANDALONE_COMMENT
2425         ):
2426             current_line.append(Leaf(token.COMMA, ","))
2427         yield current_line
2428
2429
2430 @dont_increase_indentation
2431 def standalone_comment_split(line: Line, py36: bool = False) -> Iterator[Line]:
2432     """Split standalone comments from the rest of the line."""
2433     if not line.contains_standalone_comments(0):
2434         raise CannotSplit("Line does not have any standalone comments")
2435
2436     current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2437
2438     def append_to_line(leaf: Leaf) -> Iterator[Line]:
2439         """Append `leaf` to current line or to new line if appending impossible."""
2440         nonlocal current_line
2441         try:
2442             current_line.append_safe(leaf, preformatted=True)
2443         except ValueError:
2444             yield current_line
2445
2446             current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2447             current_line.append(leaf)
2448
2449     for leaf in line.leaves:
2450         yield from append_to_line(leaf)
2451
2452         for comment_after in line.comments_after(leaf):
2453             yield from append_to_line(comment_after)
2454
2455     if current_line:
2456         yield current_line
2457
2458
2459 def is_import(leaf: Leaf) -> bool:
2460     """Return True if the given leaf starts an import statement."""
2461     p = leaf.parent
2462     t = leaf.type
2463     v = leaf.value
2464     return bool(
2465         t == token.NAME
2466         and (
2467             (v == "import" and p and p.type == syms.import_name)
2468             or (v == "from" and p and p.type == syms.import_from)
2469         )
2470     )
2471
2472
2473 def is_special_comment(leaf: Leaf) -> bool:
2474     """Return True if the given leaf is a special comment.
2475     Only returns true for type comments for now."""
2476     t = leaf.type
2477     v = leaf.value
2478     return bool(
2479         (t == token.COMMENT or t == STANDALONE_COMMENT) and (v.startswith("# type:"))
2480     )
2481
2482
2483 def normalize_prefix(leaf: Leaf, *, inside_brackets: bool) -> None:
2484     """Leave existing extra newlines if not `inside_brackets`. Remove everything
2485     else.
2486
2487     Note: don't use backslashes for formatting or you'll lose your voting rights.
2488     """
2489     if not inside_brackets:
2490         spl = leaf.prefix.split("#")
2491         if "\\" not in spl[0]:
2492             nl_count = spl[-1].count("\n")
2493             if len(spl) > 1:
2494                 nl_count -= 1
2495             leaf.prefix = "\n" * nl_count
2496             return
2497
2498     leaf.prefix = ""
2499
2500
2501 def normalize_string_prefix(leaf: Leaf, remove_u_prefix: bool = False) -> None:
2502     """Make all string prefixes lowercase.
2503
2504     If remove_u_prefix is given, also removes any u prefix from the string.
2505
2506     Note: Mutates its argument.
2507     """
2508     match = re.match(r"^([furbFURB]*)(.*)$", leaf.value, re.DOTALL)
2509     assert match is not None, f"failed to match string {leaf.value!r}"
2510     orig_prefix = match.group(1)
2511     new_prefix = orig_prefix.lower()
2512     if remove_u_prefix:
2513         new_prefix = new_prefix.replace("u", "")
2514     leaf.value = f"{new_prefix}{match.group(2)}"
2515
2516
2517 def normalize_string_quotes(leaf: Leaf) -> None:
2518     """Prefer double quotes but only if it doesn't cause more escaping.
2519
2520     Adds or removes backslashes as appropriate. Doesn't parse and fix
2521     strings nested in f-strings (yet).
2522
2523     Note: Mutates its argument.
2524     """
2525     value = leaf.value.lstrip("furbFURB")
2526     if value[:3] == '"""':
2527         return
2528
2529     elif value[:3] == "'''":
2530         orig_quote = "'''"
2531         new_quote = '"""'
2532     elif value[0] == '"':
2533         orig_quote = '"'
2534         new_quote = "'"
2535     else:
2536         orig_quote = "'"
2537         new_quote = '"'
2538     first_quote_pos = leaf.value.find(orig_quote)
2539     if first_quote_pos == -1:
2540         return  # There's an internal error
2541
2542     prefix = leaf.value[:first_quote_pos]
2543     unescaped_new_quote = re.compile(rf"(([^\\]|^)(\\\\)*){new_quote}")
2544     escaped_new_quote = re.compile(rf"([^\\]|^)\\((?:\\\\)*){new_quote}")
2545     escaped_orig_quote = re.compile(rf"([^\\]|^)\\((?:\\\\)*){orig_quote}")
2546     body = leaf.value[first_quote_pos + len(orig_quote) : -len(orig_quote)]
2547     if "r" in prefix.casefold():
2548         if unescaped_new_quote.search(body):
2549             # There's at least one unescaped new_quote in this raw string
2550             # so converting is impossible
2551             return
2552
2553         # Do not introduce or remove backslashes in raw strings
2554         new_body = body
2555     else:
2556         # remove unnecessary escapes
2557         new_body = sub_twice(escaped_new_quote, rf"\1\2{new_quote}", body)
2558         if body != new_body:
2559             # Consider the string without unnecessary escapes as the original
2560             body = new_body
2561             leaf.value = f"{prefix}{orig_quote}{body}{orig_quote}"
2562         new_body = sub_twice(escaped_orig_quote, rf"\1\2{orig_quote}", new_body)
2563         new_body = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_body)
2564     if "f" in prefix.casefold():
2565         matches = re.findall(r"[^{]\{(.*?)\}[^}]", new_body)
2566         for m in matches:
2567             if "\\" in str(m):
2568                 # Do not introduce backslashes in interpolated expressions
2569                 return
2570     if new_quote == '"""' and new_body[-1:] == '"':
2571         # edge case:
2572         new_body = new_body[:-1] + '\\"'
2573     orig_escape_count = body.count("\\")
2574     new_escape_count = new_body.count("\\")
2575     if new_escape_count > orig_escape_count:
2576         return  # Do not introduce more escaping
2577
2578     if new_escape_count == orig_escape_count and orig_quote == '"':
2579         return  # Prefer double quotes
2580
2581     leaf.value = f"{prefix}{new_quote}{new_body}{new_quote}"
2582
2583
2584 def normalize_numeric_literal(leaf: Leaf, allow_underscores: bool) -> None:
2585     """Normalizes numeric (float, int, and complex) literals.
2586
2587     All letters used in the representation are normalized to lowercase (except
2588     in Python 2 long literals), and long number literals are split using underscores.
2589     """
2590     text = leaf.value.lower()
2591     if text.startswith(("0o", "0b")):
2592         # Leave octal and binary literals alone.
2593         pass
2594     elif text.startswith("0x"):
2595         # Change hex literals to upper case.
2596         before, after = text[:2], text[2:]
2597         text = f"{before}{after.upper()}"
2598     elif "e" in text:
2599         before, after = text.split("e")
2600         sign = ""
2601         if after.startswith("-"):
2602             after = after[1:]
2603             sign = "-"
2604         elif after.startswith("+"):
2605             after = after[1:]
2606         before = format_float_or_int_string(before, allow_underscores)
2607         after = format_int_string(after, allow_underscores)
2608         text = f"{before}e{sign}{after}"
2609     elif text.endswith(("j", "l")):
2610         number = text[:-1]
2611         suffix = text[-1]
2612         # Capitalize in "2L" because "l" looks too similar to "1".
2613         if suffix == "l":
2614             suffix = "L"
2615         text = f"{format_float_or_int_string(number, allow_underscores)}{suffix}"
2616     else:
2617         text = format_float_or_int_string(text, allow_underscores)
2618     leaf.value = text
2619
2620
2621 def format_float_or_int_string(text: str, allow_underscores: bool) -> str:
2622     """Formats a float string like "1.0"."""
2623     if "." not in text:
2624         return format_int_string(text, allow_underscores)
2625
2626     before, after = text.split(".")
2627     before = format_int_string(before, allow_underscores) if before else "0"
2628     if after:
2629         after = format_int_string(after, allow_underscores, count_from_end=False)
2630     else:
2631         after = "0"
2632     return f"{before}.{after}"
2633
2634
2635 def format_int_string(
2636     text: str, allow_underscores: bool, count_from_end: bool = True
2637 ) -> str:
2638     """Normalizes underscores in a string to e.g. 1_000_000.
2639
2640     Input must be a string of digits and optional underscores.
2641     If count_from_end is False, we add underscores after groups of three digits
2642     counting from the beginning instead of the end of the strings. This is used
2643     for the fractional part of float literals.
2644     """
2645     if not allow_underscores:
2646         return text
2647
2648     text = text.replace("_", "")
2649     if len(text) <= 5:
2650         # No underscores for numbers <= 5 digits long.
2651         return text
2652
2653     if count_from_end:
2654         # Avoid removing leading zeros, which are important if we're formatting
2655         # part of a number like "0.001".
2656         return format(int("1" + text), "3_")[1:].lstrip("_")
2657     else:
2658         return "_".join(text[i : i + 3] for i in range(0, len(text), 3))
2659
2660
2661 def normalize_invisible_parens(node: Node, parens_after: Set[str]) -> None:
2662     """Make existing optional parentheses invisible or create new ones.
2663
2664     `parens_after` is a set of string leaf values immeditely after which parens
2665     should be put.
2666
2667     Standardizes on visible parentheses for single-element tuples, and keeps
2668     existing visible parentheses for other tuples and generator expressions.
2669     """
2670     for pc in list_comments(node.prefix, is_endmarker=False):
2671         if pc.value in FMT_OFF:
2672             # This `node` has a prefix with `# fmt: off`, don't mess with parens.
2673             return
2674
2675     check_lpar = False
2676     for index, child in enumerate(list(node.children)):
2677         if check_lpar:
2678             if child.type == syms.atom:
2679                 if maybe_make_parens_invisible_in_atom(child):
2680                     lpar = Leaf(token.LPAR, "")
2681                     rpar = Leaf(token.RPAR, "")
2682                     index = child.remove() or 0
2683                     node.insert_child(index, Node(syms.atom, [lpar, child, rpar]))
2684             elif is_one_tuple(child):
2685                 # wrap child in visible parentheses
2686                 lpar = Leaf(token.LPAR, "(")
2687                 rpar = Leaf(token.RPAR, ")")
2688                 child.remove()
2689                 node.insert_child(index, Node(syms.atom, [lpar, child, rpar]))
2690             elif node.type == syms.import_from:
2691                 # "import from" nodes store parentheses directly as part of
2692                 # the statement
2693                 if child.type == token.LPAR:
2694                     # make parentheses invisible
2695                     child.value = ""  # type: ignore
2696                     node.children[-1].value = ""  # type: ignore
2697                 elif child.type != token.STAR:
2698                     # insert invisible parentheses
2699                     node.insert_child(index, Leaf(token.LPAR, ""))
2700                     node.append_child(Leaf(token.RPAR, ""))
2701                 break
2702
2703             elif not (isinstance(child, Leaf) and is_multiline_string(child)):
2704                 # wrap child in invisible parentheses
2705                 lpar = Leaf(token.LPAR, "")
2706                 rpar = Leaf(token.RPAR, "")
2707                 index = child.remove() or 0
2708                 node.insert_child(index, Node(syms.atom, [lpar, child, rpar]))
2709
2710         check_lpar = isinstance(child, Leaf) and child.value in parens_after
2711
2712
2713 def normalize_fmt_off(node: Node) -> None:
2714     """Convert content between `# fmt: off`/`# fmt: on` into standalone comments."""
2715     try_again = True
2716     while try_again:
2717         try_again = convert_one_fmt_off_pair(node)
2718
2719
2720 def convert_one_fmt_off_pair(node: Node) -> bool:
2721     """Convert content of a single `# fmt: off`/`# fmt: on` into a standalone comment.
2722
2723     Returns True if a pair was converted.
2724     """
2725     for leaf in node.leaves():
2726         previous_consumed = 0
2727         for comment in list_comments(leaf.prefix, is_endmarker=False):
2728             if comment.value in FMT_OFF:
2729                 # We only want standalone comments. If there's no previous leaf or
2730                 # the previous leaf is indentation, it's a standalone comment in
2731                 # disguise.
2732                 if comment.type != STANDALONE_COMMENT:
2733                     prev = preceding_leaf(leaf)
2734                     if prev and prev.type not in WHITESPACE:
2735                         continue
2736
2737                 ignored_nodes = list(generate_ignored_nodes(leaf))
2738                 if not ignored_nodes:
2739                     continue
2740
2741                 first = ignored_nodes[0]  # Can be a container node with the `leaf`.
2742                 parent = first.parent
2743                 prefix = first.prefix
2744                 first.prefix = prefix[comment.consumed :]
2745                 hidden_value = (
2746                     comment.value + "\n" + "".join(str(n) for n in ignored_nodes)
2747                 )
2748                 if hidden_value.endswith("\n"):
2749                     # That happens when one of the `ignored_nodes` ended with a NEWLINE
2750                     # leaf (possibly followed by a DEDENT).
2751                     hidden_value = hidden_value[:-1]
2752                 first_idx = None
2753                 for ignored in ignored_nodes:
2754                     index = ignored.remove()
2755                     if first_idx is None:
2756                         first_idx = index
2757                 assert parent is not None, "INTERNAL ERROR: fmt: on/off handling (1)"
2758                 assert first_idx is not None, "INTERNAL ERROR: fmt: on/off handling (2)"
2759                 parent.insert_child(
2760                     first_idx,
2761                     Leaf(
2762                         STANDALONE_COMMENT,
2763                         hidden_value,
2764                         prefix=prefix[:previous_consumed] + "\n" * comment.newlines,
2765                     ),
2766                 )
2767                 return True
2768
2769             previous_consumed = comment.consumed
2770
2771     return False
2772
2773
2774 def generate_ignored_nodes(leaf: Leaf) -> Iterator[LN]:
2775     """Starting from the container of `leaf`, generate all leaves until `# fmt: on`.
2776
2777     Stops at the end of the block.
2778     """
2779     container: Optional[LN] = container_of(leaf)
2780     while container is not None and container.type != token.ENDMARKER:
2781         for comment in list_comments(container.prefix, is_endmarker=False):
2782             if comment.value in FMT_ON:
2783                 return
2784
2785         yield container
2786
2787         container = container.next_sibling
2788
2789
2790 def maybe_make_parens_invisible_in_atom(node: LN) -> bool:
2791     """If it's safe, make the parens in the atom `node` invisible, recursively.
2792
2793     Returns whether the node should itself be wrapped in invisible parentheses.
2794
2795     """
2796     if (
2797         node.type != syms.atom
2798         or is_empty_tuple(node)
2799         or is_one_tuple(node)
2800         or is_yield(node)
2801         or max_delimiter_priority_in_atom(node) >= COMMA_PRIORITY
2802     ):
2803         return False
2804
2805     first = node.children[0]
2806     last = node.children[-1]
2807     if first.type == token.LPAR and last.type == token.RPAR:
2808         # make parentheses invisible
2809         first.value = ""  # type: ignore
2810         last.value = ""  # type: ignore
2811         if len(node.children) > 1:
2812             maybe_make_parens_invisible_in_atom(node.children[1])
2813         return False
2814
2815     return True
2816
2817
2818 def is_empty_tuple(node: LN) -> bool:
2819     """Return True if `node` holds an empty tuple."""
2820     return (
2821         node.type == syms.atom
2822         and len(node.children) == 2
2823         and node.children[0].type == token.LPAR
2824         and node.children[1].type == token.RPAR
2825     )
2826
2827
2828 def is_one_tuple(node: LN) -> bool:
2829     """Return True if `node` holds a tuple with one element, with or without parens."""
2830     if node.type == syms.atom:
2831         if len(node.children) != 3:
2832             return False
2833
2834         lpar, gexp, rpar = node.children
2835         if not (
2836             lpar.type == token.LPAR
2837             and gexp.type == syms.testlist_gexp
2838             and rpar.type == token.RPAR
2839         ):
2840             return False
2841
2842         return len(gexp.children) == 2 and gexp.children[1].type == token.COMMA
2843
2844     return (
2845         node.type in IMPLICIT_TUPLE
2846         and len(node.children) == 2
2847         and node.children[1].type == token.COMMA
2848     )
2849
2850
2851 def is_yield(node: LN) -> bool:
2852     """Return True if `node` holds a `yield` or `yield from` expression."""
2853     if node.type == syms.yield_expr:
2854         return True
2855
2856     if node.type == token.NAME and node.value == "yield":  # type: ignore
2857         return True
2858
2859     if node.type != syms.atom:
2860         return False
2861
2862     if len(node.children) != 3:
2863         return False
2864
2865     lpar, expr, rpar = node.children
2866     if lpar.type == token.LPAR and rpar.type == token.RPAR:
2867         return is_yield(expr)
2868
2869     return False
2870
2871
2872 def is_vararg(leaf: Leaf, within: Set[NodeType]) -> bool:
2873     """Return True if `leaf` is a star or double star in a vararg or kwarg.
2874
2875     If `within` includes VARARGS_PARENTS, this applies to function signatures.
2876     If `within` includes UNPACKING_PARENTS, it applies to right hand-side
2877     extended iterable unpacking (PEP 3132) and additional unpacking
2878     generalizations (PEP 448).
2879     """
2880     if leaf.type not in STARS or not leaf.parent:
2881         return False
2882
2883     p = leaf.parent
2884     if p.type == syms.star_expr:
2885         # Star expressions are also used as assignment targets in extended
2886         # iterable unpacking (PEP 3132).  See what its parent is instead.
2887         if not p.parent:
2888             return False
2889
2890         p = p.parent
2891
2892     return p.type in within
2893
2894
2895 def is_multiline_string(leaf: Leaf) -> bool:
2896     """Return True if `leaf` is a multiline string that actually spans many lines."""
2897     value = leaf.value.lstrip("furbFURB")
2898     return value[:3] in {'"""', "'''"} and "\n" in value
2899
2900
2901 def is_stub_suite(node: Node) -> bool:
2902     """Return True if `node` is a suite with a stub body."""
2903     if (
2904         len(node.children) != 4
2905         or node.children[0].type != token.NEWLINE
2906         or node.children[1].type != token.INDENT
2907         or node.children[3].type != token.DEDENT
2908     ):
2909         return False
2910
2911     return is_stub_body(node.children[2])
2912
2913
2914 def is_stub_body(node: LN) -> bool:
2915     """Return True if `node` is a simple statement containing an ellipsis."""
2916     if not isinstance(node, Node) or node.type != syms.simple_stmt:
2917         return False
2918
2919     if len(node.children) != 2:
2920         return False
2921
2922     child = node.children[0]
2923     return (
2924         child.type == syms.atom
2925         and len(child.children) == 3
2926         and all(leaf == Leaf(token.DOT, ".") for leaf in child.children)
2927     )
2928
2929
2930 def max_delimiter_priority_in_atom(node: LN) -> int:
2931     """Return maximum delimiter priority inside `node`.
2932
2933     This is specific to atoms with contents contained in a pair of parentheses.
2934     If `node` isn't an atom or there are no enclosing parentheses, returns 0.
2935     """
2936     if node.type != syms.atom:
2937         return 0
2938
2939     first = node.children[0]
2940     last = node.children[-1]
2941     if not (first.type == token.LPAR and last.type == token.RPAR):
2942         return 0
2943
2944     bt = BracketTracker()
2945     for c in node.children[1:-1]:
2946         if isinstance(c, Leaf):
2947             bt.mark(c)
2948         else:
2949             for leaf in c.leaves():
2950                 bt.mark(leaf)
2951     try:
2952         return bt.max_delimiter_priority()
2953
2954     except ValueError:
2955         return 0
2956
2957
2958 def ensure_visible(leaf: Leaf) -> None:
2959     """Make sure parentheses are visible.
2960
2961     They could be invisible as part of some statements (see
2962     :func:`normalize_invible_parens` and :func:`visit_import_from`).
2963     """
2964     if leaf.type == token.LPAR:
2965         leaf.value = "("
2966     elif leaf.type == token.RPAR:
2967         leaf.value = ")"
2968
2969
2970 def should_explode(line: Line, opening_bracket: Leaf) -> bool:
2971     """Should `line` immediately be split with `delimiter_split()` after RHS?"""
2972
2973     if not (
2974         opening_bracket.parent
2975         and opening_bracket.parent.type in {syms.atom, syms.import_from}
2976         and opening_bracket.value in "[{("
2977     ):
2978         return False
2979
2980     try:
2981         last_leaf = line.leaves[-1]
2982         exclude = {id(last_leaf)} if last_leaf.type == token.COMMA else set()
2983         max_priority = line.bracket_tracker.max_delimiter_priority(exclude=exclude)
2984     except (IndexError, ValueError):
2985         return False
2986
2987     return max_priority == COMMA_PRIORITY
2988
2989
2990 def is_python36(node: Node) -> bool:
2991     """Return True if the current file is using Python 3.6+ features.
2992
2993     Currently looking for:
2994     - f-strings;
2995     - underscores in numeric literals; and
2996     - trailing commas after * or ** in function signatures and calls.
2997     """
2998     for n in node.pre_order():
2999         if n.type == token.STRING:
3000             value_head = n.value[:2]  # type: ignore
3001             if value_head in {'f"', 'F"', "f'", "F'", "rf", "fr", "RF", "FR"}:
3002                 return True
3003
3004         elif n.type == token.NUMBER:
3005             if "_" in n.value:  # type: ignore
3006                 return True
3007
3008         elif (
3009             n.type in {syms.typedargslist, syms.arglist}
3010             and n.children
3011             and n.children[-1].type == token.COMMA
3012         ):
3013             for ch in n.children:
3014                 if ch.type in STARS:
3015                     return True
3016
3017                 if ch.type == syms.argument:
3018                     for argch in ch.children:
3019                         if argch.type in STARS:
3020                             return True
3021
3022     return False
3023
3024
3025 def generate_trailers_to_omit(line: Line, line_length: int) -> Iterator[Set[LeafID]]:
3026     """Generate sets of closing bracket IDs that should be omitted in a RHS.
3027
3028     Brackets can be omitted if the entire trailer up to and including
3029     a preceding closing bracket fits in one line.
3030
3031     Yielded sets are cumulative (contain results of previous yields, too).  First
3032     set is empty.
3033     """
3034
3035     omit: Set[LeafID] = set()
3036     yield omit
3037
3038     length = 4 * line.depth
3039     opening_bracket = None
3040     closing_bracket = None
3041     inner_brackets: Set[LeafID] = set()
3042     for index, leaf, leaf_length in enumerate_with_length(line, reversed=True):
3043         length += leaf_length
3044         if length > line_length:
3045             break
3046
3047         has_inline_comment = leaf_length > len(leaf.value) + len(leaf.prefix)
3048         if leaf.type == STANDALONE_COMMENT or has_inline_comment:
3049             break
3050
3051         if opening_bracket:
3052             if leaf is opening_bracket:
3053                 opening_bracket = None
3054             elif leaf.type in CLOSING_BRACKETS:
3055                 inner_brackets.add(id(leaf))
3056         elif leaf.type in CLOSING_BRACKETS:
3057             if index > 0 and line.leaves[index - 1].type in OPENING_BRACKETS:
3058                 # Empty brackets would fail a split so treat them as "inner"
3059                 # brackets (e.g. only add them to the `omit` set if another
3060                 # pair of brackets was good enough.
3061                 inner_brackets.add(id(leaf))
3062                 continue
3063
3064             if closing_bracket:
3065                 omit.add(id(closing_bracket))
3066                 omit.update(inner_brackets)
3067                 inner_brackets.clear()
3068                 yield omit
3069
3070             if leaf.value:
3071                 opening_bracket = leaf.opening_bracket
3072                 closing_bracket = leaf
3073
3074
3075 def get_future_imports(node: Node) -> Set[str]:
3076     """Return a set of __future__ imports in the file."""
3077     imports: Set[str] = set()
3078
3079     def get_imports_from_children(children: List[LN]) -> Generator[str, None, None]:
3080         for child in children:
3081             if isinstance(child, Leaf):
3082                 if child.type == token.NAME:
3083                     yield child.value
3084             elif child.type == syms.import_as_name:
3085                 orig_name = child.children[0]
3086                 assert isinstance(orig_name, Leaf), "Invalid syntax parsing imports"
3087                 assert orig_name.type == token.NAME, "Invalid syntax parsing imports"
3088                 yield orig_name.value
3089             elif child.type == syms.import_as_names:
3090                 yield from get_imports_from_children(child.children)
3091             else:
3092                 assert False, "Invalid syntax parsing imports"
3093
3094     for child in node.children:
3095         if child.type != syms.simple_stmt:
3096             break
3097         first_child = child.children[0]
3098         if isinstance(first_child, Leaf):
3099             # Continue looking if we see a docstring; otherwise stop.
3100             if (
3101                 len(child.children) == 2
3102                 and first_child.type == token.STRING
3103                 and child.children[1].type == token.NEWLINE
3104             ):
3105                 continue
3106             else:
3107                 break
3108         elif first_child.type == syms.import_from:
3109             module_name = first_child.children[1]
3110             if not isinstance(module_name, Leaf) or module_name.value != "__future__":
3111                 break
3112             imports |= set(get_imports_from_children(first_child.children[3:]))
3113         else:
3114             break
3115     return imports
3116
3117
3118 def gen_python_files_in_dir(
3119     path: Path,
3120     root: Path,
3121     include: Pattern[str],
3122     exclude: Pattern[str],
3123     report: "Report",
3124 ) -> Iterator[Path]:
3125     """Generate all files under `path` whose paths are not excluded by the
3126     `exclude` regex, but are included by the `include` regex.
3127
3128     Symbolic links pointing outside of the `root` directory are ignored.
3129
3130     `report` is where output about exclusions goes.
3131     """
3132     assert root.is_absolute(), f"INTERNAL ERROR: `root` must be absolute but is {root}"
3133     for child in path.iterdir():
3134         try:
3135             normalized_path = "/" + child.resolve().relative_to(root).as_posix()
3136         except ValueError:
3137             if child.is_symlink():
3138                 report.path_ignored(
3139                     child, f"is a symbolic link that points outside {root}"
3140                 )
3141                 continue
3142
3143             raise
3144
3145         if child.is_dir():
3146             normalized_path += "/"
3147         exclude_match = exclude.search(normalized_path)
3148         if exclude_match and exclude_match.group(0):
3149             report.path_ignored(child, f"matches the --exclude regular expression")
3150             continue
3151
3152         if child.is_dir():
3153             yield from gen_python_files_in_dir(child, root, include, exclude, report)
3154
3155         elif child.is_file():
3156             include_match = include.search(normalized_path)
3157             if include_match:
3158                 yield child
3159
3160
3161 @lru_cache()
3162 def find_project_root(srcs: Iterable[str]) -> Path:
3163     """Return a directory containing .git, .hg, or pyproject.toml.
3164
3165     That directory can be one of the directories passed in `srcs` or their
3166     common parent.
3167
3168     If no directory in the tree contains a marker that would specify it's the
3169     project root, the root of the file system is returned.
3170     """
3171     if not srcs:
3172         return Path("/").resolve()
3173
3174     common_base = min(Path(src).resolve() for src in srcs)
3175     if common_base.is_dir():
3176         # Append a fake file so `parents` below returns `common_base_dir`, too.
3177         common_base /= "fake-file"
3178     for directory in common_base.parents:
3179         if (directory / ".git").is_dir():
3180             return directory
3181
3182         if (directory / ".hg").is_dir():
3183             return directory
3184
3185         if (directory / "pyproject.toml").is_file():
3186             return directory
3187
3188     return directory
3189
3190
3191 @dataclass
3192 class Report:
3193     """Provides a reformatting counter. Can be rendered with `str(report)`."""
3194
3195     check: bool = False
3196     quiet: bool = False
3197     verbose: bool = False
3198     change_count: int = 0
3199     same_count: int = 0
3200     failure_count: int = 0
3201
3202     def done(self, src: Path, changed: Changed) -> None:
3203         """Increment the counter for successful reformatting. Write out a message."""
3204         if changed is Changed.YES:
3205             reformatted = "would reformat" if self.check else "reformatted"
3206             if self.verbose or not self.quiet:
3207                 out(f"{reformatted} {src}")
3208             self.change_count += 1
3209         else:
3210             if self.verbose:
3211                 if changed is Changed.NO:
3212                     msg = f"{src} already well formatted, good job."
3213                 else:
3214                     msg = f"{src} wasn't modified on disk since last run."
3215                 out(msg, bold=False)
3216             self.same_count += 1
3217
3218     def failed(self, src: Path, message: str) -> None:
3219         """Increment the counter for failed reformatting. Write out a message."""
3220         err(f"error: cannot format {src}: {message}")
3221         self.failure_count += 1
3222
3223     def path_ignored(self, path: Path, message: str) -> None:
3224         if self.verbose:
3225             out(f"{path} ignored: {message}", bold=False)
3226
3227     @property
3228     def return_code(self) -> int:
3229         """Return the exit code that the app should use.
3230
3231         This considers the current state of changed files and failures:
3232         - if there were any failures, return 123;
3233         - if any files were changed and --check is being used, return 1;
3234         - otherwise return 0.
3235         """
3236         # According to http://tldp.org/LDP/abs/html/exitcodes.html starting with
3237         # 126 we have special return codes reserved by the shell.
3238         if self.failure_count:
3239             return 123
3240
3241         elif self.change_count and self.check:
3242             return 1
3243
3244         return 0
3245
3246     def __str__(self) -> str:
3247         """Render a color report of the current state.
3248
3249         Use `click.unstyle` to remove colors.
3250         """
3251         if self.check:
3252             reformatted = "would be reformatted"
3253             unchanged = "would be left unchanged"
3254             failed = "would fail to reformat"
3255         else:
3256             reformatted = "reformatted"
3257             unchanged = "left unchanged"
3258             failed = "failed to reformat"
3259         report = []
3260         if self.change_count:
3261             s = "s" if self.change_count > 1 else ""
3262             report.append(
3263                 click.style(f"{self.change_count} file{s} {reformatted}", bold=True)
3264             )
3265         if self.same_count:
3266             s = "s" if self.same_count > 1 else ""
3267             report.append(f"{self.same_count} file{s} {unchanged}")
3268         if self.failure_count:
3269             s = "s" if self.failure_count > 1 else ""
3270             report.append(
3271                 click.style(f"{self.failure_count} file{s} {failed}", fg="red")
3272             )
3273         return ", ".join(report) + "."
3274
3275
3276 def assert_equivalent(src: str, dst: str) -> None:
3277     """Raise AssertionError if `src` and `dst` aren't equivalent."""
3278
3279     import ast
3280     import traceback
3281
3282     def _v(node: ast.AST, depth: int = 0) -> Iterator[str]:
3283         """Simple visitor generating strings to compare ASTs by content."""
3284         yield f"{'  ' * depth}{node.__class__.__name__}("
3285
3286         for field in sorted(node._fields):
3287             try:
3288                 value = getattr(node, field)
3289             except AttributeError:
3290                 continue
3291
3292             yield f"{'  ' * (depth+1)}{field}="
3293
3294             if isinstance(value, list):
3295                 for item in value:
3296                     if isinstance(item, ast.AST):
3297                         yield from _v(item, depth + 2)
3298
3299             elif isinstance(value, ast.AST):
3300                 yield from _v(value, depth + 2)
3301
3302             else:
3303                 yield f"{'  ' * (depth+2)}{value!r},  # {value.__class__.__name__}"
3304
3305         yield f"{'  ' * depth})  # /{node.__class__.__name__}"
3306
3307     try:
3308         src_ast = ast.parse(src)
3309     except Exception as exc:
3310         major, minor = sys.version_info[:2]
3311         raise AssertionError(
3312             f"cannot use --safe with this file; failed to parse source file "
3313             f"with Python {major}.{minor}'s builtin AST. Re-run with --fast "
3314             f"or stop using deprecated Python 2 syntax. AST error message: {exc}"
3315         )
3316
3317     try:
3318         dst_ast = ast.parse(dst)
3319     except Exception as exc:
3320         log = dump_to_file("".join(traceback.format_tb(exc.__traceback__)), dst)
3321         raise AssertionError(
3322             f"INTERNAL ERROR: Black produced invalid code: {exc}. "
3323             f"Please report a bug on https://github.com/ambv/black/issues.  "
3324             f"This invalid output might be helpful: {log}"
3325         ) from None
3326
3327     src_ast_str = "\n".join(_v(src_ast))
3328     dst_ast_str = "\n".join(_v(dst_ast))
3329     if src_ast_str != dst_ast_str:
3330         log = dump_to_file(diff(src_ast_str, dst_ast_str, "src", "dst"))
3331         raise AssertionError(
3332             f"INTERNAL ERROR: Black produced code that is not equivalent to "
3333             f"the source.  "
3334             f"Please report a bug on https://github.com/ambv/black/issues.  "
3335             f"This diff might be helpful: {log}"
3336         ) from None
3337
3338
3339 def assert_stable(
3340     src: str, dst: str, line_length: int, mode: FileMode = FileMode.AUTO_DETECT
3341 ) -> None:
3342     """Raise AssertionError if `dst` reformats differently the second time."""
3343     newdst = format_str(dst, line_length=line_length, mode=mode)
3344     if dst != newdst:
3345         log = dump_to_file(
3346             diff(src, dst, "source", "first pass"),
3347             diff(dst, newdst, "first pass", "second pass"),
3348         )
3349         raise AssertionError(
3350             f"INTERNAL ERROR: Black produced different code on the second pass "
3351             f"of the formatter.  "
3352             f"Please report a bug on https://github.com/ambv/black/issues.  "
3353             f"This diff might be helpful: {log}"
3354         ) from None
3355
3356
3357 def dump_to_file(*output: str) -> str:
3358     """Dump `output` to a temporary file. Return path to the file."""
3359     import tempfile
3360
3361     with tempfile.NamedTemporaryFile(
3362         mode="w", prefix="blk_", suffix=".log", delete=False, encoding="utf8"
3363     ) as f:
3364         for lines in output:
3365             f.write(lines)
3366             if lines and lines[-1] != "\n":
3367                 f.write("\n")
3368     return f.name
3369
3370
3371 def diff(a: str, b: str, a_name: str, b_name: str) -> str:
3372     """Return a unified diff string between strings `a` and `b`."""
3373     import difflib
3374
3375     a_lines = [line + "\n" for line in a.split("\n")]
3376     b_lines = [line + "\n" for line in b.split("\n")]
3377     return "".join(
3378         difflib.unified_diff(a_lines, b_lines, fromfile=a_name, tofile=b_name, n=5)
3379     )
3380
3381
3382 def cancel(tasks: Iterable[asyncio.Task]) -> None:
3383     """asyncio signal handler that cancels all `tasks` and reports to stderr."""
3384     err("Aborted!")
3385     for task in tasks:
3386         task.cancel()
3387
3388
3389 def shutdown(loop: BaseEventLoop) -> None:
3390     """Cancel all pending tasks on `loop`, wait for them, and close the loop."""
3391     try:
3392         # This part is borrowed from asyncio/runners.py in Python 3.7b2.
3393         to_cancel = [task for task in asyncio.Task.all_tasks(loop) if not task.done()]
3394         if not to_cancel:
3395             return
3396
3397         for task in to_cancel:
3398             task.cancel()
3399         loop.run_until_complete(
3400             asyncio.gather(*to_cancel, loop=loop, return_exceptions=True)
3401         )
3402     finally:
3403         # `concurrent.futures.Future` objects cannot be cancelled once they
3404         # are already running. There might be some when the `shutdown()` happened.
3405         # Silence their logger's spew about the event loop being closed.
3406         cf_logger = logging.getLogger("concurrent.futures")
3407         cf_logger.setLevel(logging.CRITICAL)
3408         loop.close()
3409
3410
3411 def sub_twice(regex: Pattern[str], replacement: str, original: str) -> str:
3412     """Replace `regex` with `replacement` twice on `original`.
3413
3414     This is used by string normalization to perform replaces on
3415     overlapping matches.
3416     """
3417     return regex.sub(replacement, regex.sub(replacement, original))
3418
3419
3420 def re_compile_maybe_verbose(regex: str) -> Pattern[str]:
3421     """Compile a regular expression string in `regex`.
3422
3423     If it contains newlines, use verbose mode.
3424     """
3425     if "\n" in regex:
3426         regex = "(?x)" + regex
3427     return re.compile(regex)
3428
3429
3430 def enumerate_reversed(sequence: Sequence[T]) -> Iterator[Tuple[Index, T]]:
3431     """Like `reversed(enumerate(sequence))` if that were possible."""
3432     index = len(sequence) - 1
3433     for element in reversed(sequence):
3434         yield (index, element)
3435         index -= 1
3436
3437
3438 def enumerate_with_length(
3439     line: Line, reversed: bool = False
3440 ) -> Iterator[Tuple[Index, Leaf, int]]:
3441     """Return an enumeration of leaves with their length.
3442
3443     Stops prematurely on multiline strings and standalone comments.
3444     """
3445     op = cast(
3446         Callable[[Sequence[Leaf]], Iterator[Tuple[Index, Leaf]]],
3447         enumerate_reversed if reversed else enumerate,
3448     )
3449     for index, leaf in op(line.leaves):
3450         length = len(leaf.prefix) + len(leaf.value)
3451         if "\n" in leaf.value:
3452             return  # Multiline strings, we can't continue.
3453
3454         comment: Optional[Leaf]
3455         for comment in line.comments_after(leaf):
3456             length += len(comment.value)
3457
3458         yield index, leaf, length
3459
3460
3461 def is_line_short_enough(line: Line, *, line_length: int, line_str: str = "") -> bool:
3462     """Return True if `line` is no longer than `line_length`.
3463
3464     Uses the provided `line_str` rendering, if any, otherwise computes a new one.
3465     """
3466     if not line_str:
3467         line_str = str(line).strip("\n")
3468     return (
3469         len(line_str) <= line_length
3470         and "\n" not in line_str  # multiline strings
3471         and not line.contains_standalone_comments()
3472     )
3473
3474
3475 def can_be_split(line: Line) -> bool:
3476     """Return False if the line cannot be split *for sure*.
3477
3478     This is not an exhaustive search but a cheap heuristic that we can use to
3479     avoid some unfortunate formattings (mostly around wrapping unsplittable code
3480     in unnecessary parentheses).
3481     """
3482     leaves = line.leaves
3483     if len(leaves) < 2:
3484         return False
3485
3486     if leaves[0].type == token.STRING and leaves[1].type == token.DOT:
3487         call_count = 0
3488         dot_count = 0
3489         next = leaves[-1]
3490         for leaf in leaves[-2::-1]:
3491             if leaf.type in OPENING_BRACKETS:
3492                 if next.type not in CLOSING_BRACKETS:
3493                     return False
3494
3495                 call_count += 1
3496             elif leaf.type == token.DOT:
3497                 dot_count += 1
3498             elif leaf.type == token.NAME:
3499                 if not (next.type == token.DOT or next.type in OPENING_BRACKETS):
3500                     return False
3501
3502             elif leaf.type not in CLOSING_BRACKETS:
3503                 return False
3504
3505             if dot_count > 1 and call_count > 1:
3506                 return False
3507
3508     return True
3509
3510
3511 def can_omit_invisible_parens(line: Line, line_length: int) -> bool:
3512     """Does `line` have a shape safe to reformat without optional parens around it?
3513
3514     Returns True for only a subset of potentially nice looking formattings but
3515     the point is to not return false positives that end up producing lines that
3516     are too long.
3517     """
3518     bt = line.bracket_tracker
3519     if not bt.delimiters:
3520         # Without delimiters the optional parentheses are useless.
3521         return True
3522
3523     max_priority = bt.max_delimiter_priority()
3524     if bt.delimiter_count_with_priority(max_priority) > 1:
3525         # With more than one delimiter of a kind the optional parentheses read better.
3526         return False
3527
3528     if max_priority == DOT_PRIORITY:
3529         # A single stranded method call doesn't require optional parentheses.
3530         return True
3531
3532     assert len(line.leaves) >= 2, "Stranded delimiter"
3533
3534     first = line.leaves[0]
3535     second = line.leaves[1]
3536     penultimate = line.leaves[-2]
3537     last = line.leaves[-1]
3538
3539     # With a single delimiter, omit if the expression starts or ends with
3540     # a bracket.
3541     if first.type in OPENING_BRACKETS and second.type not in CLOSING_BRACKETS:
3542         remainder = False
3543         length = 4 * line.depth
3544         for _index, leaf, leaf_length in enumerate_with_length(line):
3545             if leaf.type in CLOSING_BRACKETS and leaf.opening_bracket is first:
3546                 remainder = True
3547             if remainder:
3548                 length += leaf_length
3549                 if length > line_length:
3550                     break
3551
3552                 if leaf.type in OPENING_BRACKETS:
3553                     # There are brackets we can further split on.
3554                     remainder = False
3555
3556         else:
3557             # checked the entire string and line length wasn't exceeded
3558             if len(line.leaves) == _index + 1:
3559                 return True
3560
3561         # Note: we are not returning False here because a line might have *both*
3562         # a leading opening bracket and a trailing closing bracket.  If the
3563         # opening bracket doesn't match our rule, maybe the closing will.
3564
3565     if (
3566         last.type == token.RPAR
3567         or last.type == token.RBRACE
3568         or (
3569             # don't use indexing for omitting optional parentheses;
3570             # it looks weird
3571             last.type == token.RSQB
3572             and last.parent
3573             and last.parent.type != syms.trailer
3574         )
3575     ):
3576         if penultimate.type in OPENING_BRACKETS:
3577             # Empty brackets don't help.
3578             return False
3579
3580         if is_multiline_string(first):
3581             # Additional wrapping of a multiline string in this situation is
3582             # unnecessary.
3583             return True
3584
3585         length = 4 * line.depth
3586         seen_other_brackets = False
3587         for _index, leaf, leaf_length in enumerate_with_length(line):
3588             length += leaf_length
3589             if leaf is last.opening_bracket:
3590                 if seen_other_brackets or length <= line_length:
3591                     return True
3592
3593             elif leaf.type in OPENING_BRACKETS:
3594                 # There are brackets we can further split on.
3595                 seen_other_brackets = True
3596
3597     return False
3598
3599
3600 def get_cache_file(line_length: int, mode: FileMode) -> Path:
3601     return CACHE_DIR / f"cache.{line_length}.{mode.value}.pickle"
3602
3603
3604 def read_cache(line_length: int, mode: FileMode) -> Cache:
3605     """Read the cache if it exists and is well formed.
3606
3607     If it is not well formed, the call to write_cache later should resolve the issue.
3608     """
3609     cache_file = get_cache_file(line_length, mode)
3610     if not cache_file.exists():
3611         return {}
3612
3613     with cache_file.open("rb") as fobj:
3614         try:
3615             cache: Cache = pickle.load(fobj)
3616         except pickle.UnpicklingError:
3617             return {}
3618
3619     return cache
3620
3621
3622 def get_cache_info(path: Path) -> CacheInfo:
3623     """Return the information used to check if a file is already formatted or not."""
3624     stat = path.stat()
3625     return stat.st_mtime, stat.st_size
3626
3627
3628 def filter_cached(cache: Cache, sources: Iterable[Path]) -> Tuple[Set[Path], Set[Path]]:
3629     """Split an iterable of paths in `sources` into two sets.
3630
3631     The first contains paths of files that modified on disk or are not in the
3632     cache. The other contains paths to non-modified files.
3633     """
3634     todo, done = set(), set()
3635     for src in sources:
3636         src = src.resolve()
3637         if cache.get(src) != get_cache_info(src):
3638             todo.add(src)
3639         else:
3640             done.add(src)
3641     return todo, done
3642
3643
3644 def write_cache(
3645     cache: Cache, sources: Iterable[Path], line_length: int, mode: FileMode
3646 ) -> None:
3647     """Update the cache file."""
3648     cache_file = get_cache_file(line_length, mode)
3649     try:
3650         if not CACHE_DIR.exists():
3651             CACHE_DIR.mkdir(parents=True)
3652         new_cache = {**cache, **{src.resolve(): get_cache_info(src) for src in sources}}
3653         with cache_file.open("wb") as fobj:
3654             pickle.dump(new_cache, fobj, protocol=pickle.HIGHEST_PROTOCOL)
3655     except OSError:
3656         pass
3657
3658
3659 def patch_click() -> None:
3660     """Make Click not crash.
3661
3662     On certain misconfigured environments, Python 3 selects the ASCII encoding as the
3663     default which restricts paths that it can access during the lifetime of the
3664     application.  Click refuses to work in this scenario by raising a RuntimeError.
3665
3666     In case of Black the likelihood that non-ASCII characters are going to be used in
3667     file paths is minimal since it's Python source code.  Moreover, this crash was
3668     spurious on Python 3.7 thanks to PEP 538 and PEP 540.
3669     """
3670     try:
3671         from click import core
3672         from click import _unicodefun  # type: ignore
3673     except ModuleNotFoundError:
3674         return
3675
3676     for module in (core, _unicodefun):
3677         if hasattr(module, "_verify_python3_env"):
3678             module._verify_python3_env = lambda: None
3679
3680
3681 def patched_main() -> None:
3682     freeze_support()
3683     patch_click()
3684     main()
3685
3686
3687 if __name__ == "__main__":
3688     patched_main()