black.py

   1 import asyncio
   2 from asyncio.base_events import BaseEventLoop
   3 from concurrent.futures import Executor, ProcessPoolExecutor
   4 from datetime import datetime
   5 from enum import Enum, Flag
   6 from functools import lru_cache, partial, wraps
   7 import io
   8 import keyword
   9 import logging
  10 from multiprocessing import Manager
  11 import os
  12 from pathlib import Path
  13 import pickle
  14 import re
  15 import signal
  16 import sys
  17 import tokenize
  18 from typing import (
  19     Any,
  20     Callable,
  21     Collection,
  22     Dict,
  23     Generic,
  24     Iterable,
  25     Iterator,
  26     List,
  27     Optional,
  28     Pattern,
  29     Sequence,
  30     Set,
  31     Tuple,
  32     Type,
  33     TypeVar,
  34     Union,
  35     cast,
  36 )
  37
  38 from appdirs import user_cache_dir
  39 from attr import dataclass, Factory
  40 import click
  41 import toml
  42
  43 # lib2to3 fork
  44 from blib2to3.pytree import Node, Leaf, type_repr
  45 from blib2to3 import pygram, pytree
  46 from blib2to3.pgen2 import driver, token
  47 from blib2to3.pgen2.parse import ParseError
  48
  49
  50 __version__ = "18.6b2"
  51 DEFAULT_LINE_LENGTH = 88
  52 DEFAULT_EXCLUDES = (
  53     r"/(\.git|\.hg|\.mypy_cache|\.tox|\.venv|_build|buck-out|build|dist)/"
  54 )
  55 DEFAULT_INCLUDES = r"\.pyi?$"
  56 CACHE_DIR = Path(user_cache_dir("black", version=__version__))
  57
  58
  59 # types
  60 FileContent = str
  61 Encoding = str
  62 NewLine = str
  63 Depth = int
  64 NodeType = int
  65 LeafID = int
  66 Priority = int
  67 Index = int
  68 LN = Union[Leaf, Node]
  69 SplitFunc = Callable[["Line", bool], Iterator["Line"]]
  70 Timestamp = float
  71 FileSize = int
  72 CacheInfo = Tuple[Timestamp, FileSize]
  73 Cache = Dict[Path, CacheInfo]
  74 out = partial(click.secho, bold=True, err=True)
  75 err = partial(click.secho, fg="red", err=True)
  76
  77 pygram.initialize(CACHE_DIR)
  78 syms = pygram.python_symbols
  79
  80
  81 class NothingChanged(UserWarning):
  82     """Raised by :func:`format_file` when reformatted code is the same as source."""
  83
  84
  85 class CannotSplit(Exception):
  86     """A readable split that fits the allotted line length is impossible.
  87
  88     Raised by :func:`left_hand_split`, :func:`right_hand_split`, and
  89     :func:`delimiter_split`.
  90     """
  91
  92
  93 class FormatError(Exception):
  94     """Base exception for `# fmt: on` and `# fmt: off` handling.
  95
  96     It holds the number of bytes of the prefix consumed before the format
  97     control comment appeared.
  98     """
  99
 100     def __init__(self, consumed: int) -> None:
 101         super().__init__(consumed)
 102         self.consumed = consumed
 103
 104     def trim_prefix(self, leaf: Leaf) -> None:
 105         leaf.prefix = leaf.prefix[self.consumed :]
 106
 107     def leaf_from_consumed(self, leaf: Leaf) -> Leaf:
 108         """Returns a new Leaf from the consumed part of the prefix."""
 109         unformatted_prefix = leaf.prefix[: self.consumed]
 110         return Leaf(token.NEWLINE, unformatted_prefix)
 111
 112
 113 class FormatOn(FormatError):
 114     """Found a comment like `# fmt: on` in the file."""
 115
 116
 117 class FormatOff(FormatError):
 118     """Found a comment like `# fmt: off` in the file."""
 119
 120
 121 class WriteBack(Enum):
 122     NO = 0
 123     YES = 1
 124     DIFF = 2
 125
 126     @classmethod
 127     def from_configuration(cls, *, check: bool, diff: bool) -> "WriteBack":
 128         if check and not diff:
 129             return cls.NO
 130
 131         return cls.DIFF if diff else cls.YES
 132
 133
 134 class Changed(Enum):
 135     NO = 0
 136     CACHED = 1
 137     YES = 2
 138
 139
 140 class FileMode(Flag):
 141     AUTO_DETECT = 0
 142     PYTHON36 = 1
 143     PYI = 2
 144     NO_STRING_NORMALIZATION = 4
 145
 146     @classmethod
 147     def from_configuration(
 148         cls, *, py36: bool, pyi: bool, skip_string_normalization: bool
 149     ) -> "FileMode":
 150         mode = cls.AUTO_DETECT
 151         if py36:
 152             mode |= cls.PYTHON36
 153         if pyi:
 154             mode |= cls.PYI
 155         if skip_string_normalization:
 156             mode |= cls.NO_STRING_NORMALIZATION
 157         return mode
 158
 159
 160 def read_pyproject_toml(
 161     ctx: click.Context, param: click.Parameter, value: Union[str, int, bool, None]
 162 ) -> Optional[str]:
 163     """Inject Black configuration from "pyproject.toml" into defaults in `ctx`.
 164
 165     Returns the path to a successfully found and read configuration file, None
 166     otherwise.
 167     """
 168     assert not isinstance(value, (int, bool)), "Invalid parameter type passed"
 169     if not value:
 170         root = find_project_root(ctx.params.get("src", ()))
 171         path = root / "pyproject.toml"
 172         if path.is_file():
 173             value = str(path)
 174         else:
 175             return None
 176
 177     try:
 178         pyproject_toml = toml.load(value)
 179         config = pyproject_toml.get("tool", {}).get("black", {})
 180     except (toml.TomlDecodeError, OSError) as e:
 181         raise click.BadOptionUsage(f"Error reading configuration file: {e}", ctx)
 182
 183     if not config:
 184         return None
 185
 186     if ctx.default_map is None:
 187         ctx.default_map = {}
 188     ctx.default_map.update(  # type: ignore  # bad types in .pyi
 189         {k.replace("--", "").replace("-", "_"): v for k, v in config.items()}
 190     )
 191     return value
 192
 193
 194 @click.command(context_settings=dict(help_option_names=["-h", "--help"]))
 195 @click.option(
 196     "-l",
 197     "--line-length",
 198     type=int,
 199     default=DEFAULT_LINE_LENGTH,
 200     help="How many character per line to allow.",
 201     show_default=True,
 202 )
 203 @click.option(
 204     "--py36",
 205     is_flag=True,
 206     help=(
 207         "Allow using Python 3.6-only syntax on all input files.  This will put "
 208         "trailing commas in function signatures and calls also after *args and "
 209         "**kwargs.  [default: per-file auto-detection]"
 210     ),
 211 )
 212 @click.option(
 213     "--pyi",
 214     is_flag=True,
 215     help=(
 216         "Format all input files like typing stubs regardless of file extension "
 217         "(useful when piping source on standard input)."
 218     ),
 219 )
 220 @click.option(
 221     "-S",
 222     "--skip-string-normalization",
 223     is_flag=True,
 224     help="Don't normalize string quotes or prefixes.",
 225 )
 226 @click.option(
 227     "--check",
 228     is_flag=True,
 229     help=(
 230         "Don't write the files back, just return the status.  Return code 0 "
 231         "means nothing would change.  Return code 1 means some files would be "
 232         "reformatted.  Return code 123 means there was an internal error."
 233     ),
 234 )
 235 @click.option(
 236     "--diff",
 237     is_flag=True,
 238     help="Don't write the files back, just output a diff for each file on stdout.",
 239 )
 240 @click.option(
 241     "--fast/--safe",
 242     is_flag=True,
 243     help="If --fast given, skip temporary sanity checks. [default: --safe]",
 244 )
 245 @click.option(
 246     "--include",
 247     type=str,
 248     default=DEFAULT_INCLUDES,
 249     help=(
 250         "A regular expression that matches files and directories that should be "
 251         "included on recursive searches.  An empty value means all files are "
 252         "included regardless of the name.  Use forward slashes for directories on "
 253         "all platforms (Windows, too).  Exclusions are calculated first, inclusions "
 254         "later."
 255     ),
 256     show_default=True,
 257 )
 258 @click.option(
 259     "--exclude",
 260     type=str,
 261     default=DEFAULT_EXCLUDES,
 262     help=(
 263         "A regular expression that matches files and directories that should be "
 264         "excluded on recursive searches.  An empty value means no paths are excluded. "
 265         "Use forward slashes for directories on all platforms (Windows, too).  "
 266         "Exclusions are calculated first, inclusions later."
 267     ),
 268     show_default=True,
 269 )
 270 @click.option(
 271     "-q",
 272     "--quiet",
 273     is_flag=True,
 274     help=(
 275         "Don't emit non-error messages to stderr. Errors are still emitted, "
 276         "silence those with 2>/dev/null."
 277     ),
 278 )
 279 @click.option(
 280     "-v",
 281     "--verbose",
 282     is_flag=True,
 283     help=(
 284         "Also emit messages to stderr about files that were not changed or were "
 285         "ignored due to --exclude=."
 286     ),
 287 )
 288 @click.version_option(version=__version__)
 289 @click.argument(
 290     "src",
 291     nargs=-1,
 292     type=click.Path(
 293         exists=True, file_okay=True, dir_okay=True, readable=True, allow_dash=True
 294     ),
 295     is_eager=True,
 296 )
 297 @click.option(
 298     "--config",
 299     type=click.Path(
 300         exists=False, file_okay=True, dir_okay=False, readable=True, allow_dash=False
 301     ),
 302     is_eager=True,
 303     callback=read_pyproject_toml,
 304     help="Read configuration from PATH.",
 305 )
 306 @click.pass_context
 307 def main(
 308     ctx: click.Context,
 309     line_length: int,
 310     check: bool,
 311     diff: bool,
 312     fast: bool,
 313     pyi: bool,
 314     py36: bool,
 315     skip_string_normalization: bool,
 316     quiet: bool,
 317     verbose: bool,
 318     include: str,
 319     exclude: str,
 320     src: Tuple[str],
 321     config: Optional[str],
 322 ) -> None:
 323     """The uncompromising code formatter."""
 324     write_back = WriteBack.from_configuration(check=check, diff=diff)
 325     mode = FileMode.from_configuration(
 326         py36=py36, pyi=pyi, skip_string_normalization=skip_string_normalization
 327     )
 328     if config and verbose:
 329         out(f"Using configuration from {config}.", bold=False, fg="blue")
 330     try:
 331         include_regex = re_compile_maybe_verbose(include)
 332     except re.error:
 333         err(f"Invalid regular expression for include given: {include!r}")
 334         ctx.exit(2)
 335     try:
 336         exclude_regex = re_compile_maybe_verbose(exclude)
 337     except re.error:
 338         err(f"Invalid regular expression for exclude given: {exclude!r}")
 339         ctx.exit(2)
 340     report = Report(check=check, quiet=quiet, verbose=verbose)
 341     root = find_project_root(src)
 342     sources: Set[Path] = set()
 343     for s in src:
 344         p = Path(s)
 345         if p.is_dir():
 346             sources.update(
 347                 gen_python_files_in_dir(p, root, include_regex, exclude_regex, report)
 348             )
 349         elif p.is_file() or s == "-":
 350             # if a file was explicitly given, we don't care about its extension
 351             sources.add(p)
 352         else:
 353             err(f"invalid path: {s}")
 354     if len(sources) == 0:
 355         if verbose or not quiet:
 356             out("No paths given. Nothing to do 😴")
 357         ctx.exit(0)
 358
 359     if len(sources) == 1:
 360         reformat_one(
 361             src=sources.pop(),
 362             line_length=line_length,
 363             fast=fast,
 364             write_back=write_back,
 365             mode=mode,
 366             report=report,
 367         )
 368     else:
 369         loop = asyncio.get_event_loop()
 370         executor = ProcessPoolExecutor(max_workers=os.cpu_count())
 371         try:
 372             loop.run_until_complete(
 373                 schedule_formatting(
 374                     sources=sources,
 375                     line_length=line_length,
 376                     fast=fast,
 377                     write_back=write_back,
 378                     mode=mode,
 379                     report=report,
 380                     loop=loop,
 381                     executor=executor,
 382                 )
 383             )
 384         finally:
 385             shutdown(loop)
 386     if verbose or not quiet:
 387         bang = "💥 💔 💥" if report.return_code else "✨ 🍰 ✨"
 388         out(f"All done! {bang}")
 389         click.secho(str(report), err=True)
 390     ctx.exit(report.return_code)
 391
 392
 393 def reformat_one(
 394     src: Path,
 395     line_length: int,
 396     fast: bool,
 397     write_back: WriteBack,
 398     mode: FileMode,
 399     report: "Report",
 400 ) -> None:
 401     """Reformat a single file under `src` without spawning child processes.
 402
 403     If `quiet` is True, non-error messages are not output. `line_length`,
 404     `write_back`, `fast` and `pyi` options are passed to
 405     :func:`format_file_in_place` or :func:`format_stdin_to_stdout`.
 406     """
 407     try:
 408         changed = Changed.NO
 409         if not src.is_file() and str(src) == "-":
 410             if format_stdin_to_stdout(
 411                 line_length=line_length, fast=fast, write_back=write_back, mode=mode
 412             ):
 413                 changed = Changed.YES
 414         else:
 415             cache: Cache = {}
 416             if write_back != WriteBack.DIFF:
 417                 cache = read_cache(line_length, mode)
 418                 res_src = src.resolve()
 419                 if res_src in cache and cache[res_src] == get_cache_info(res_src):
 420                     changed = Changed.CACHED
 421             if changed is not Changed.CACHED and format_file_in_place(
 422                 src,
 423                 line_length=line_length,
 424                 fast=fast,
 425                 write_back=write_back,
 426                 mode=mode,
 427             ):
 428                 changed = Changed.YES
 429             if write_back == WriteBack.YES and changed is not Changed.NO:
 430                 write_cache(cache, [src], line_length, mode)
 431         report.done(src, changed)
 432     except Exception as exc:
 433         report.failed(src, str(exc))
 434
 435
 436 async def schedule_formatting(
 437     sources: Set[Path],
 438     line_length: int,
 439     fast: bool,
 440     write_back: WriteBack,
 441     mode: FileMode,
 442     report: "Report",
 443     loop: BaseEventLoop,
 444     executor: Executor,
 445 ) -> None:
 446     """Run formatting of `sources` in parallel using the provided `executor`.
 447
 448     (Use ProcessPoolExecutors for actual parallelism.)
 449
 450     `line_length`, `write_back`, `fast`, and `pyi` options are passed to
 451     :func:`format_file_in_place`.
 452     """
 453     cache: Cache = {}
 454     if write_back != WriteBack.DIFF:
 455         cache = read_cache(line_length, mode)
 456         sources, cached = filter_cached(cache, sources)
 457         for src in sorted(cached):
 458             report.done(src, Changed.CACHED)
 459     cancelled = []
 460     formatted = []
 461     if sources:
 462         lock = None
 463         if write_back == WriteBack.DIFF:
 464             # For diff output, we need locks to ensure we don't interleave output
 465             # from different processes.
 466             manager = Manager()
 467             lock = manager.Lock()
 468         tasks = {
 469             loop.run_in_executor(
 470                 executor,
 471                 format_file_in_place,
 472                 src,
 473                 line_length,
 474                 fast,
 475                 write_back,
 476                 mode,
 477                 lock,
 478             ): src
 479             for src in sorted(sources)
 480         }
 481         pending: Iterable[asyncio.Task] = tasks.keys()
 482         try:
 483             loop.add_signal_handler(signal.SIGINT, cancel, pending)
 484             loop.add_signal_handler(signal.SIGTERM, cancel, pending)
 485         except NotImplementedError:
 486             # There are no good alternatives for these on Windows
 487             pass
 488         while pending:
 489             done, _ = await asyncio.wait(pending, return_when=asyncio.FIRST_COMPLETED)
 490             for task in done:
 491                 src = tasks.pop(task)
 492                 if task.cancelled():
 493                     cancelled.append(task)
 494                 elif task.exception():
 495                     report.failed(src, str(task.exception()))
 496                 else:
 497                     formatted.append(src)
 498                     report.done(src, Changed.YES if task.result() else Changed.NO)
 499     if cancelled:
 500         await asyncio.gather(*cancelled, loop=loop, return_exceptions=True)
 501     if write_back == WriteBack.YES and formatted:
 502         write_cache(cache, formatted, line_length, mode)
 503
 504
 505 def format_file_in_place(
 506     src: Path,
 507     line_length: int,
 508     fast: bool,
 509     write_back: WriteBack = WriteBack.NO,
 510     mode: FileMode = FileMode.AUTO_DETECT,
 511     lock: Any = None,  # multiprocessing.Manager().Lock() is some crazy proxy
 512 ) -> bool:
 513     """Format file under `src` path. Return True if changed.
 514
 515     If `write_back` is True, write reformatted code back to stdout.
 516     `line_length` and `fast` options are passed to :func:`format_file_contents`.
 517     """
 518     if src.suffix == ".pyi":
 519         mode |= FileMode.PYI
 520
 521     then = datetime.utcfromtimestamp(src.stat().st_mtime)
 522     with open(src, "rb") as buf:
 523         src_contents, encoding, newline = decode_bytes(buf.read())
 524     try:
 525         dst_contents = format_file_contents(
 526             src_contents, line_length=line_length, fast=fast, mode=mode
 527         )
 528     except NothingChanged:
 529         return False
 530
 531     if write_back == write_back.YES:
 532         with open(src, "w", encoding=encoding, newline=newline) as f:
 533             f.write(dst_contents)
 534     elif write_back == write_back.DIFF:
 535         now = datetime.utcnow()
 536         src_name = f"{src}\t{then} +0000"
 537         dst_name = f"{src}\t{now} +0000"
 538         diff_contents = diff(src_contents, dst_contents, src_name, dst_name)
 539         if lock:
 540             lock.acquire()
 541         try:
 542             f = io.TextIOWrapper(
 543                 sys.stdout.buffer,
 544                 encoding=encoding,
 545                 newline=newline,
 546                 write_through=True,
 547             )
 548             f.write(diff_contents)
 549             f.detach()
 550         finally:
 551             if lock:
 552                 lock.release()
 553     return True
 554
 555
 556 def format_stdin_to_stdout(
 557     line_length: int,
 558     fast: bool,
 559     write_back: WriteBack = WriteBack.NO,
 560     mode: FileMode = FileMode.AUTO_DETECT,
 561 ) -> bool:
 562     """Format file on stdin. Return True if changed.
 563
 564     If `write_back` is True, write reformatted code back to stdout.
 565     `line_length`, `fast`, `is_pyi`, and `force_py36` arguments are passed to
 566     :func:`format_file_contents`.
 567     """
 568     then = datetime.utcnow()
 569     src, encoding, newline = decode_bytes(sys.stdin.buffer.read())
 570     dst = src
 571     try:
 572         dst = format_file_contents(src, line_length=line_length, fast=fast, mode=mode)
 573         return True
 574
 575     except NothingChanged:
 576         return False
 577
 578     finally:
 579         f = io.TextIOWrapper(
 580             sys.stdout.buffer, encoding=encoding, newline=newline, write_through=True
 581         )
 582         if write_back == WriteBack.YES:
 583             f.write(dst)
 584         elif write_back == WriteBack.DIFF:
 585             now = datetime.utcnow()
 586             src_name = f"STDIN\t{then} +0000"
 587             dst_name = f"STDOUT\t{now} +0000"
 588             f.write(diff(src, dst, src_name, dst_name))
 589         f.detach()
 590
 591
 592 def format_file_contents(
 593     src_contents: str,
 594     *,
 595     line_length: int,
 596     fast: bool,
 597     mode: FileMode = FileMode.AUTO_DETECT,
 598 ) -> FileContent:
 599     """Reformat contents a file and return new contents.
 600
 601     If `fast` is False, additionally confirm that the reformatted code is
 602     valid by calling :func:`assert_equivalent` and :func:`assert_stable` on it.
 603     `line_length` is passed to :func:`format_str`.
 604     """
 605     if src_contents.strip() == "":
 606         raise NothingChanged
 607
 608     dst_contents = format_str(src_contents, line_length=line_length, mode=mode)
 609     if src_contents == dst_contents:
 610         raise NothingChanged
 611
 612     if not fast:
 613         assert_equivalent(src_contents, dst_contents)
 614         assert_stable(src_contents, dst_contents, line_length=line_length, mode=mode)
 615     return dst_contents
 616
 617
 618 def format_str(
 619     src_contents: str, line_length: int, *, mode: FileMode = FileMode.AUTO_DETECT
 620 ) -> FileContent:
 621     """Reformat a string and return new contents.
 622
 623     `line_length` determines how many characters per line are allowed.
 624     """
 625     src_node = lib2to3_parse(src_contents)
 626     dst_contents = ""
 627     future_imports = get_future_imports(src_node)
 628     is_pyi = bool(mode & FileMode.PYI)
 629     py36 = bool(mode & FileMode.PYTHON36) or is_python36(src_node)
 630     normalize_strings = not bool(mode & FileMode.NO_STRING_NORMALIZATION)
 631     lines = LineGenerator(
 632         remove_u_prefix=py36 or "unicode_literals" in future_imports,
 633         is_pyi=is_pyi,
 634         normalize_strings=normalize_strings,
 635     )
 636     elt = EmptyLineTracker(is_pyi=is_pyi)
 637     empty_line = Line()
 638     after = 0
 639     for current_line in lines.visit(src_node):
 640         for _ in range(after):
 641             dst_contents += str(empty_line)
 642         before, after = elt.maybe_empty_lines(current_line)
 643         for _ in range(before):
 644             dst_contents += str(empty_line)
 645         for line in split_line(current_line, line_length=line_length, py36=py36):
 646             dst_contents += str(line)
 647     return dst_contents
 648
 649
 650 def decode_bytes(src: bytes) -> Tuple[FileContent, Encoding, NewLine]:
 651     """Return a tuple of (decoded_contents, encoding, newline).
 652
 653     `newline` is either CRLF or LF but `decoded_contents` is decoded with
 654     universal newlines (i.e. only contains LF).
 655     """
 656     srcbuf = io.BytesIO(src)
 657     encoding, lines = tokenize.detect_encoding(srcbuf.readline)
 658     if not lines:
 659         return "", encoding, "\n"
 660
 661     newline = "\r\n" if b"\r\n" == lines[0][-2:] else "\n"
 662     srcbuf.seek(0)
 663     with io.TextIOWrapper(srcbuf, encoding) as tiow:
 664         return tiow.read(), encoding, newline
 665
 666
 667 GRAMMARS = [
 668     pygram.python_grammar_no_print_statement_no_exec_statement,
 669     pygram.python_grammar_no_print_statement,
 670     pygram.python_grammar,
 671 ]
 672
 673
 674 def lib2to3_parse(src_txt: str) -> Node:
 675     """Given a string with source, return the lib2to3 Node."""
 676     grammar = pygram.python_grammar_no_print_statement
 677     if src_txt[-1:] != "\n":
 678         src_txt += "\n"
 679     for grammar in GRAMMARS:
 680         drv = driver.Driver(grammar, pytree.convert)
 681         try:
 682             result = drv.parse_string(src_txt, True)
 683             break
 684
 685         except ParseError as pe:
 686             lineno, column = pe.context[1]
 687             lines = src_txt.splitlines()
 688             try:
 689                 faulty_line = lines[lineno - 1]
 690             except IndexError:
 691                 faulty_line = "<line number missing in source>"
 692             exc = ValueError(f"Cannot parse: {lineno}:{column}: {faulty_line}")
 693     else:
 694         raise exc from None
 695
 696     if isinstance(result, Leaf):
 697         result = Node(syms.file_input, [result])
 698     return result
 699
 700
 701 def lib2to3_unparse(node: Node) -> str:
 702     """Given a lib2to3 node, return its string representation."""
 703     code = str(node)
 704     return code
 705
 706
 707 T = TypeVar("T")
 708
 709
 710 class Visitor(Generic[T]):
 711     """Basic lib2to3 visitor that yields things of type `T` on `visit()`."""
 712
 713     def visit(self, node: LN) -> Iterator[T]:
 714         """Main method to visit `node` and its children.
 715
 716         It tries to find a `visit_*()` method for the given `node.type`, like
 717         `visit_simple_stmt` for Node objects or `visit_INDENT` for Leaf objects.
 718         If no dedicated `visit_*()` method is found, chooses `visit_default()`
 719         instead.
 720
 721         Then yields objects of type `T` from the selected visitor.
 722         """
 723         if node.type < 256:
 724             name = token.tok_name[node.type]
 725         else:
 726             name = type_repr(node.type)
 727         yield from getattr(self, f"visit_{name}", self.visit_default)(node)
 728
 729     def visit_default(self, node: LN) -> Iterator[T]:
 730         """Default `visit_*()` implementation. Recurses to children of `node`."""
 731         if isinstance(node, Node):
 732             for child in node.children:
 733                 yield from self.visit(child)
 734
 735
 736 @dataclass
 737 class DebugVisitor(Visitor[T]):
 738     tree_depth: int = 0
 739
 740     def visit_default(self, node: LN) -> Iterator[T]:
 741         indent = " " * (2 * self.tree_depth)
 742         if isinstance(node, Node):
 743             _type = type_repr(node.type)
 744             out(f"{indent}{_type}", fg="yellow")
 745             self.tree_depth += 1
 746             for child in node.children:
 747                 yield from self.visit(child)
 748
 749             self.tree_depth -= 1
 750             out(f"{indent}/{_type}", fg="yellow", bold=False)
 751         else:
 752             _type = token.tok_name.get(node.type, str(node.type))
 753             out(f"{indent}{_type}", fg="blue", nl=False)
 754             if node.prefix:
 755                 # We don't have to handle prefixes for `Node` objects since
 756                 # that delegates to the first child anyway.
 757                 out(f" {node.prefix!r}", fg="green", bold=False, nl=False)
 758             out(f" {node.value!r}", fg="blue", bold=False)
 759
 760     @classmethod
 761     def show(cls, code: str) -> None:
 762         """Pretty-print the lib2to3 AST of a given string of `code`.
 763
 764         Convenience method for debugging.
 765         """
 766         v: DebugVisitor[None] = DebugVisitor()
 767         list(v.visit(lib2to3_parse(code)))
 768
 769
 770 KEYWORDS = set(keyword.kwlist)
 771 WHITESPACE = {token.DEDENT, token.INDENT, token.NEWLINE}
 772 FLOW_CONTROL = {"return", "raise", "break", "continue"}
 773 STATEMENT = {
 774     syms.if_stmt,
 775     syms.while_stmt,
 776     syms.for_stmt,
 777     syms.try_stmt,
 778     syms.except_clause,
 779     syms.with_stmt,
 780     syms.funcdef,
 781     syms.classdef,
 782 }
 783 STANDALONE_COMMENT = 153
 784 LOGIC_OPERATORS = {"and", "or"}
 785 COMPARATORS = {
 786     token.LESS,
 787     token.GREATER,
 788     token.EQEQUAL,
 789     token.NOTEQUAL,
 790     token.LESSEQUAL,
 791     token.GREATEREQUAL,
 792 }
 793 MATH_OPERATORS = {
 794     token.VBAR,
 795     token.CIRCUMFLEX,
 796     token.AMPER,
 797     token.LEFTSHIFT,
 798     token.RIGHTSHIFT,
 799     token.PLUS,
 800     token.MINUS,
 801     token.STAR,
 802     token.SLASH,
 803     token.DOUBLESLASH,
 804     token.PERCENT,
 805     token.AT,
 806     token.TILDE,
 807     token.DOUBLESTAR,
 808 }
 809 STARS = {token.STAR, token.DOUBLESTAR}
 810 VARARGS_PARENTS = {
 811     syms.arglist,
 812     syms.argument,  # double star in arglist
 813     syms.trailer,  # single argument to call
 814     syms.typedargslist,
 815     syms.varargslist,  # lambdas
 816 }
 817 UNPACKING_PARENTS = {
 818     syms.atom,  # single element of a list or set literal
 819     syms.dictsetmaker,
 820     syms.listmaker,
 821     syms.testlist_gexp,
 822     syms.testlist_star_expr,
 823 }
 824 TEST_DESCENDANTS = {
 825     syms.test,
 826     syms.lambdef,
 827     syms.or_test,
 828     syms.and_test,
 829     syms.not_test,
 830     syms.comparison,
 831     syms.star_expr,
 832     syms.expr,
 833     syms.xor_expr,
 834     syms.and_expr,
 835     syms.shift_expr,
 836     syms.arith_expr,
 837     syms.trailer,
 838     syms.term,
 839     syms.power,
 840 }
 841 ASSIGNMENTS = {
 842     "=",
 843     "+=",
 844     "-=",
 845     "*=",
 846     "@=",
 847     "/=",
 848     "%=",
 849     "&=",
 850     "|=",
 851     "^=",
 852     "<<=",
 853     ">>=",
 854     "**=",
 855     "//=",
 856 }
 857 COMPREHENSION_PRIORITY = 20
 858 COMMA_PRIORITY = 18
 859 TERNARY_PRIORITY = 16
 860 LOGIC_PRIORITY = 14
 861 STRING_PRIORITY = 12
 862 COMPARATOR_PRIORITY = 10
 863 MATH_PRIORITIES = {
 864     token.VBAR: 9,
 865     token.CIRCUMFLEX: 8,
 866     token.AMPER: 7,
 867     token.LEFTSHIFT: 6,
 868     token.RIGHTSHIFT: 6,
 869     token.PLUS: 5,
 870     token.MINUS: 5,
 871     token.STAR: 4,
 872     token.SLASH: 4,
 873     token.DOUBLESLASH: 4,
 874     token.PERCENT: 4,
 875     token.AT: 4,
 876     token.TILDE: 3,
 877     token.DOUBLESTAR: 2,
 878 }
 879 DOT_PRIORITY = 1
 880
 881
 882 @dataclass
 883 class BracketTracker:
 884     """Keeps track of brackets on a line."""
 885
 886     depth: int = 0
 887     bracket_match: Dict[Tuple[Depth, NodeType], Leaf] = Factory(dict)
 888     delimiters: Dict[LeafID, Priority] = Factory(dict)
 889     previous: Optional[Leaf] = None
 890     _for_loop_variable: int = 0
 891     _lambda_arguments: int = 0
 892
 893     def mark(self, leaf: Leaf) -> None:
 894         """Mark `leaf` with bracket-related metadata. Keep track of delimiters.
 895
 896         All leaves receive an int `bracket_depth` field that stores how deep
 897         within brackets a given leaf is. 0 means there are no enclosing brackets
 898         that started on this line.
 899
 900         If a leaf is itself a closing bracket, it receives an `opening_bracket`
 901         field that it forms a pair with. This is a one-directional link to
 902         avoid reference cycles.
 903
 904         If a leaf is a delimiter (a token on which Black can split the line if
 905         needed) and it's on depth 0, its `id()` is stored in the tracker's
 906         `delimiters` field.
 907         """
 908         if leaf.type == token.COMMENT:
 909             return
 910
 911         self.maybe_decrement_after_for_loop_variable(leaf)
 912         self.maybe_decrement_after_lambda_arguments(leaf)
 913         if leaf.type in CLOSING_BRACKETS:
 914             self.depth -= 1
 915             opening_bracket = self.bracket_match.pop((self.depth, leaf.type))
 916             leaf.opening_bracket = opening_bracket
 917         leaf.bracket_depth = self.depth
 918         if self.depth == 0:
 919             delim = is_split_before_delimiter(leaf, self.previous)
 920             if delim and self.previous is not None:
 921                 self.delimiters[id(self.previous)] = delim
 922             else:
 923                 delim = is_split_after_delimiter(leaf, self.previous)
 924                 if delim:
 925                     self.delimiters[id(leaf)] = delim
 926         if leaf.type in OPENING_BRACKETS:
 927             self.bracket_match[self.depth, BRACKET[leaf.type]] = leaf
 928             self.depth += 1
 929         self.previous = leaf
 930         self.maybe_increment_lambda_arguments(leaf)
 931         self.maybe_increment_for_loop_variable(leaf)
 932
 933     def any_open_brackets(self) -> bool:
 934         """Return True if there is an yet unmatched open bracket on the line."""
 935         return bool(self.bracket_match)
 936
 937     def max_delimiter_priority(self, exclude: Iterable[LeafID] = ()) -> int:
 938         """Return the highest priority of a delimiter found on the line.
 939
 940         Values are consistent with what `is_split_*_delimiter()` return.
 941         Raises ValueError on no delimiters.
 942         """
 943         return max(v for k, v in self.delimiters.items() if k not in exclude)
 944
 945     def delimiter_count_with_priority(self, priority: int = 0) -> int:
 946         """Return the number of delimiters with the given `priority`.
 947
 948         If no `priority` is passed, defaults to max priority on the line.
 949         """
 950         if not self.delimiters:
 951             return 0
 952
 953         priority = priority or self.max_delimiter_priority()
 954         return sum(1 for p in self.delimiters.values() if p == priority)
 955
 956     def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool:
 957         """In a for loop, or comprehension, the variables are often unpacks.
 958
 959         To avoid splitting on the comma in this situation, increase the depth of
 960         tokens between `for` and `in`.
 961         """
 962         if leaf.type == token.NAME and leaf.value == "for":
 963             self.depth += 1
 964             self._for_loop_variable += 1
 965             return True
 966
 967         return False
 968
 969     def maybe_decrement_after_for_loop_variable(self, leaf: Leaf) -> bool:
 970         """See `maybe_increment_for_loop_variable` above for explanation."""
 971         if self._for_loop_variable and leaf.type == token.NAME and leaf.value == "in":
 972             self.depth -= 1
 973             self._for_loop_variable -= 1
 974             return True
 975
 976         return False
 977
 978     def maybe_increment_lambda_arguments(self, leaf: Leaf) -> bool:
 979         """In a lambda expression, there might be more than one argument.
 980
 981         To avoid splitting on the comma in this situation, increase the depth of
 982         tokens between `lambda` and `:`.
 983         """
 984         if leaf.type == token.NAME and leaf.value == "lambda":
 985             self.depth += 1
 986             self._lambda_arguments += 1
 987             return True
 988
 989         return False
 990
 991     def maybe_decrement_after_lambda_arguments(self, leaf: Leaf) -> bool:
 992         """See `maybe_increment_lambda_arguments` above for explanation."""
 993         if self._lambda_arguments and leaf.type == token.COLON:
 994             self.depth -= 1
 995             self._lambda_arguments -= 1
 996             return True
 997
 998         return False
 999
1000     def get_open_lsqb(self) -> Optional[Leaf]:
1001         """Return the most recent opening square bracket (if any)."""
1002         return self.bracket_match.get((self.depth - 1, token.RSQB))
1003
1004
1005 @dataclass
1006 class Line:
1007     """Holds leaves and comments. Can be printed with `str(line)`."""
1008
1009     depth: int = 0
1010     leaves: List[Leaf] = Factory(list)
1011     comments: List[Tuple[Index, Leaf]] = Factory(list)
1012     bracket_tracker: BracketTracker = Factory(BracketTracker)
1013     inside_brackets: bool = False
1014     should_explode: bool = False
1015
1016     def append(self, leaf: Leaf, preformatted: bool = False) -> None:
1017         """Add a new `leaf` to the end of the line.
1018
1019         Unless `preformatted` is True, the `leaf` will receive a new consistent
1020         whitespace prefix and metadata applied by :class:`BracketTracker`.
1021         Trailing commas are maybe removed, unpacked for loop variables are
1022         demoted from being delimiters.
1023
1024         Inline comments are put aside.
1025         """
1026         has_value = leaf.type in BRACKETS or bool(leaf.value.strip())
1027         if not has_value:
1028             return
1029
1030         if token.COLON == leaf.type and self.is_class_paren_empty:
1031             del self.leaves[-2:]
1032         if self.leaves and not preformatted:
1033             # Note: at this point leaf.prefix should be empty except for
1034             # imports, for which we only preserve newlines.
1035             leaf.prefix += whitespace(
1036                 leaf, complex_subscript=self.is_complex_subscript(leaf)
1037             )
1038         if self.inside_brackets or not preformatted:
1039             self.bracket_tracker.mark(leaf)
1040             self.maybe_remove_trailing_comma(leaf)
1041         if not self.append_comment(leaf):
1042             self.leaves.append(leaf)
1043
1044     def append_safe(self, leaf: Leaf, preformatted: bool = False) -> None:
1045         """Like :func:`append()` but disallow invalid standalone comment structure.
1046
1047         Raises ValueError when any `leaf` is appended after a standalone comment
1048         or when a standalone comment is not the first leaf on the line.
1049         """
1050         if self.bracket_tracker.depth == 0:
1051             if self.is_comment:
1052                 raise ValueError("cannot append to standalone comments")
1053
1054             if self.leaves and leaf.type == STANDALONE_COMMENT:
1055                 raise ValueError(
1056                     "cannot append standalone comments to a populated line"
1057                 )
1058
1059         self.append(leaf, preformatted=preformatted)
1060
1061     @property
1062     def is_comment(self) -> bool:
1063         """Is this line a standalone comment?"""
1064         return len(self.leaves) == 1 and self.leaves[0].type == STANDALONE_COMMENT
1065
1066     @property
1067     def is_decorator(self) -> bool:
1068         """Is this line a decorator?"""
1069         return bool(self) and self.leaves[0].type == token.AT
1070
1071     @property
1072     def is_import(self) -> bool:
1073         """Is this an import line?"""
1074         return bool(self) and is_import(self.leaves[0])
1075
1076     @property
1077     def is_class(self) -> bool:
1078         """Is this line a class definition?"""
1079         return (
1080             bool(self)
1081             and self.leaves[0].type == token.NAME
1082             and self.leaves[0].value == "class"
1083         )
1084
1085     @property
1086     def is_stub_class(self) -> bool:
1087         """Is this line a class definition with a body consisting only of "..."?"""
1088         return self.is_class and self.leaves[-3:] == [
1089             Leaf(token.DOT, ".") for _ in range(3)
1090         ]
1091
1092     @property
1093     def is_def(self) -> bool:
1094         """Is this a function definition? (Also returns True for async defs.)"""
1095         try:
1096             first_leaf = self.leaves[0]
1097         except IndexError:
1098             return False
1099
1100         try:
1101             second_leaf: Optional[Leaf] = self.leaves[1]
1102         except IndexError:
1103             second_leaf = None
1104         return (first_leaf.type == token.NAME and first_leaf.value == "def") or (
1105             first_leaf.type == token.ASYNC
1106             and second_leaf is not None
1107             and second_leaf.type == token.NAME
1108             and second_leaf.value == "def"
1109         )
1110
1111     @property
1112     def is_class_paren_empty(self) -> bool:
1113         """Is this a class with no base classes but using parentheses?
1114
1115         Those are unnecessary and should be removed.
1116         """
1117         return (
1118             bool(self)
1119             and len(self.leaves) == 4
1120             and self.is_class
1121             and self.leaves[2].type == token.LPAR
1122             and self.leaves[2].value == "("
1123             and self.leaves[3].type == token.RPAR
1124             and self.leaves[3].value == ")"
1125         )
1126
1127     @property
1128     def is_triple_quoted_string(self) -> bool:
1129         """Is the line a triple quoted string?"""
1130         return (
1131             bool(self)
1132             and self.leaves[0].type == token.STRING
1133             and self.leaves[0].value.startswith(('"""', "'''"))
1134         )
1135
1136     def contains_standalone_comments(self, depth_limit: int = sys.maxsize) -> bool:
1137         """If so, needs to be split before emitting."""
1138         for leaf in self.leaves:
1139             if leaf.type == STANDALONE_COMMENT:
1140                 if leaf.bracket_depth <= depth_limit:
1141                     return True
1142
1143         return False
1144
1145     def contains_multiline_strings(self) -> bool:
1146         for leaf in self.leaves:
1147             if is_multiline_string(leaf):
1148                 return True
1149
1150         return False
1151
1152     def maybe_remove_trailing_comma(self, closing: Leaf) -> bool:
1153         """Remove trailing comma if there is one and it's safe."""
1154         if not (
1155             self.leaves
1156             and self.leaves[-1].type == token.COMMA
1157             and closing.type in CLOSING_BRACKETS
1158         ):
1159             return False
1160
1161         if closing.type == token.RBRACE:
1162             self.remove_trailing_comma()
1163             return True
1164
1165         if closing.type == token.RSQB:
1166             comma = self.leaves[-1]
1167             if comma.parent and comma.parent.type == syms.listmaker:
1168                 self.remove_trailing_comma()
1169                 return True
1170
1171         # For parens let's check if it's safe to remove the comma.
1172         # Imports are always safe.
1173         if self.is_import:
1174             self.remove_trailing_comma()
1175             return True
1176
1177         # Otheriwsse, if the trailing one is the only one, we might mistakenly
1178         # change a tuple into a different type by removing the comma.
1179         depth = closing.bracket_depth + 1
1180         commas = 0
1181         opening = closing.opening_bracket
1182         for _opening_index, leaf in enumerate(self.leaves):
1183             if leaf is opening:
1184                 break
1185
1186         else:
1187             return False
1188
1189         for leaf in self.leaves[_opening_index + 1 :]:
1190             if leaf is closing:
1191                 break
1192
1193             bracket_depth = leaf.bracket_depth
1194             if bracket_depth == depth and leaf.type == token.COMMA:
1195                 commas += 1
1196                 if leaf.parent and leaf.parent.type == syms.arglist:
1197                     commas += 1
1198                     break
1199
1200         if commas > 1:
1201             self.remove_trailing_comma()
1202             return True
1203
1204         return False
1205
1206     def append_comment(self, comment: Leaf) -> bool:
1207         """Add an inline or standalone comment to the line."""
1208         if (
1209             comment.type == STANDALONE_COMMENT
1210             and self.bracket_tracker.any_open_brackets()
1211         ):
1212             comment.prefix = ""
1213             return False
1214
1215         if comment.type != token.COMMENT:
1216             return False
1217
1218         after = len(self.leaves) - 1
1219         if after == -1:
1220             comment.type = STANDALONE_COMMENT
1221             comment.prefix = ""
1222             return False
1223
1224         else:
1225             self.comments.append((after, comment))
1226             return True
1227
1228     def comments_after(self, leaf: Leaf, _index: int = -1) -> Iterator[Leaf]:
1229         """Generate comments that should appear directly after `leaf`.
1230
1231         Provide a non-negative leaf `_index` to speed up the function.
1232         """
1233         if not self.comments:
1234             return
1235
1236         if _index == -1:
1237             for _index, _leaf in enumerate(self.leaves):
1238                 if leaf is _leaf:
1239                     break
1240
1241             else:
1242                 return
1243
1244         for index, comment_after in self.comments:
1245             if _index == index:
1246                 yield comment_after
1247
1248     def remove_trailing_comma(self) -> None:
1249         """Remove the trailing comma and moves the comments attached to it."""
1250         comma_index = len(self.leaves) - 1
1251         for i in range(len(self.comments)):
1252             comment_index, comment = self.comments[i]
1253             if comment_index == comma_index:
1254                 self.comments[i] = (comma_index - 1, comment)
1255         self.leaves.pop()
1256
1257     def is_complex_subscript(self, leaf: Leaf) -> bool:
1258         """Return True iff `leaf` is part of a slice with non-trivial exprs."""
1259         open_lsqb = self.bracket_tracker.get_open_lsqb()
1260         if open_lsqb is None:
1261             return False
1262
1263         subscript_start = open_lsqb.next_sibling
1264
1265         if isinstance(subscript_start, Node):
1266             if subscript_start.type == syms.listmaker:
1267                 return False
1268
1269             if subscript_start.type == syms.subscriptlist:
1270                 subscript_start = child_towards(subscript_start, leaf)
1271         return subscript_start is not None and any(
1272             n.type in TEST_DESCENDANTS for n in subscript_start.pre_order()
1273         )
1274
1275     def __str__(self) -> str:
1276         """Render the line."""
1277         if not self:
1278             return "\n"
1279
1280         indent = "    " * self.depth
1281         leaves = iter(self.leaves)
1282         first = next(leaves)
1283         res = f"{first.prefix}{indent}{first.value}"
1284         for leaf in leaves:
1285             res += str(leaf)
1286         for _, comment in self.comments:
1287             res += str(comment)
1288         return res + "\n"
1289
1290     def __bool__(self) -> bool:
1291         """Return True if the line has leaves or comments."""
1292         return bool(self.leaves or self.comments)
1293
1294
1295 class UnformattedLines(Line):
1296     """Just like :class:`Line` but stores lines which aren't reformatted."""
1297
1298     def append(self, leaf: Leaf, preformatted: bool = True) -> None:
1299         """Just add a new `leaf` to the end of the lines.
1300
1301         The `preformatted` argument is ignored.
1302
1303         Keeps track of indentation `depth`, which is useful when the user
1304         says `# fmt: on`. Otherwise, doesn't do anything with the `leaf`.
1305         """
1306         try:
1307             list(generate_comments(leaf))
1308         except FormatOn as f_on:
1309             self.leaves.append(f_on.leaf_from_consumed(leaf))
1310             raise
1311
1312         self.leaves.append(leaf)
1313         if leaf.type == token.INDENT:
1314             self.depth += 1
1315         elif leaf.type == token.DEDENT:
1316             self.depth -= 1
1317
1318     def __str__(self) -> str:
1319         """Render unformatted lines from leaves which were added with `append()`.
1320
1321         `depth` is not used for indentation in this case.
1322         """
1323         if not self:
1324             return "\n"
1325
1326         res = ""
1327         for leaf in self.leaves:
1328             res += str(leaf)
1329         return res
1330
1331     def append_comment(self, comment: Leaf) -> bool:
1332         """Not implemented in this class. Raises `NotImplementedError`."""
1333         raise NotImplementedError("Unformatted lines don't store comments separately.")
1334
1335     def maybe_remove_trailing_comma(self, closing: Leaf) -> bool:
1336         """Does nothing and returns False."""
1337         return False
1338
1339     def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool:
1340         """Does nothing and returns False."""
1341         return False
1342
1343
1344 @dataclass
1345 class EmptyLineTracker:
1346     """Provides a stateful method that returns the number of potential extra
1347     empty lines needed before and after the currently processed line.
1348
1349     Note: this tracker works on lines that haven't been split yet.  It assumes
1350     the prefix of the first leaf consists of optional newlines.  Those newlines
1351     are consumed by `maybe_empty_lines()` and included in the computation.
1352     """
1353
1354     is_pyi: bool = False
1355     previous_line: Optional[Line] = None
1356     previous_after: int = 0
1357     previous_defs: List[int] = Factory(list)
1358
1359     def maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
1360         """Return the number of extra empty lines before and after the `current_line`.
1361
1362         This is for separating `def`, `async def` and `class` with extra empty
1363         lines (two on module-level).
1364         """
1365         if isinstance(current_line, UnformattedLines):
1366             return 0, 0
1367
1368         before, after = self._maybe_empty_lines(current_line)
1369         before -= self.previous_after
1370         self.previous_after = after
1371         self.previous_line = current_line
1372         return before, after
1373
1374     def _maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
1375         max_allowed = 1
1376         if current_line.depth == 0:
1377             max_allowed = 1 if self.is_pyi else 2
1378         if current_line.leaves:
1379             # Consume the first leaf's extra newlines.
1380             first_leaf = current_line.leaves[0]
1381             before = first_leaf.prefix.count("\n")
1382             before = min(before, max_allowed)
1383             first_leaf.prefix = ""
1384         else:
1385             before = 0
1386         depth = current_line.depth
1387         while self.previous_defs and self.previous_defs[-1] >= depth:
1388             self.previous_defs.pop()
1389             if self.is_pyi:
1390                 before = 0 if depth else 1
1391             else:
1392                 before = 1 if depth else 2
1393         if current_line.is_decorator or current_line.is_def or current_line.is_class:
1394             return self._maybe_empty_lines_for_class_or_def(current_line, before)
1395
1396         if (
1397             self.previous_line
1398             and self.previous_line.is_import
1399             and not current_line.is_import
1400             and depth == self.previous_line.depth
1401         ):
1402             return (before or 1), 0
1403
1404         if (
1405             self.previous_line
1406             and self.previous_line.is_class
1407             and current_line.is_triple_quoted_string
1408         ):
1409             return before, 1
1410
1411         return before, 0
1412
1413     def _maybe_empty_lines_for_class_or_def(
1414         self, current_line: Line, before: int
1415     ) -> Tuple[int, int]:
1416         if not current_line.is_decorator:
1417             self.previous_defs.append(current_line.depth)
1418         if self.previous_line is None:
1419             # Don't insert empty lines before the first line in the file.
1420             return 0, 0
1421
1422         if self.previous_line.is_decorator:
1423             return 0, 0
1424
1425         if self.previous_line.depth < current_line.depth and (
1426             self.previous_line.is_class or self.previous_line.is_def
1427         ):
1428             return 0, 0
1429
1430         if (
1431             self.previous_line.is_comment
1432             and self.previous_line.depth == current_line.depth
1433             and before == 0
1434         ):
1435             return 0, 0
1436
1437         if self.is_pyi:
1438             if self.previous_line.depth > current_line.depth:
1439                 newlines = 1
1440             elif current_line.is_class or self.previous_line.is_class:
1441                 if current_line.is_stub_class and self.previous_line.is_stub_class:
1442                     # No blank line between classes with an emty body
1443                     newlines = 0
1444                 else:
1445                     newlines = 1
1446             elif current_line.is_def and not self.previous_line.is_def:
1447                 # Blank line between a block of functions and a block of non-functions
1448                 newlines = 1
1449             else:
1450                 newlines = 0
1451         else:
1452             newlines = 2
1453         if current_line.depth and newlines:
1454             newlines -= 1
1455         return newlines, 0
1456
1457
1458 @dataclass
1459 class LineGenerator(Visitor[Line]):
1460     """Generates reformatted Line objects.  Empty lines are not emitted.
1461
1462     Note: destroys the tree it's visiting by mutating prefixes of its leaves
1463     in ways that will no longer stringify to valid Python code on the tree.
1464     """
1465
1466     is_pyi: bool = False
1467     normalize_strings: bool = True
1468     current_line: Line = Factory(Line)
1469     remove_u_prefix: bool = False
1470
1471     def line(self, indent: int = 0, type: Type[Line] = Line) -> Iterator[Line]:
1472         """Generate a line.
1473
1474         If the line is empty, only emit if it makes sense.
1475         If the line is too long, split it first and then generate.
1476
1477         If any lines were generated, set up a new current_line.
1478         """
1479         if not self.current_line:
1480             if self.current_line.__class__ == type:
1481                 self.current_line.depth += indent
1482             else:
1483                 self.current_line = type(depth=self.current_line.depth + indent)
1484             return  # Line is empty, don't emit. Creating a new one unnecessary.
1485
1486         complete_line = self.current_line
1487         self.current_line = type(depth=complete_line.depth + indent)
1488         yield complete_line
1489
1490     def visit(self, node: LN) -> Iterator[Line]:
1491         """Main method to visit `node` and its children.
1492
1493         Yields :class:`Line` objects.
1494         """
1495         if isinstance(self.current_line, UnformattedLines):
1496             # File contained `# fmt: off`
1497             yield from self.visit_unformatted(node)
1498
1499         else:
1500             yield from super().visit(node)
1501
1502     def visit_default(self, node: LN) -> Iterator[Line]:
1503         """Default `visit_*()` implementation. Recurses to children of `node`."""
1504         if isinstance(node, Leaf):
1505             any_open_brackets = self.current_line.bracket_tracker.any_open_brackets()
1506             try:
1507                 for comment in generate_comments(node):
1508                     if any_open_brackets:
1509                         # any comment within brackets is subject to splitting
1510                         self.current_line.append(comment)
1511                     elif comment.type == token.COMMENT:
1512                         # regular trailing comment
1513                         self.current_line.append(comment)
1514                         yield from self.line()
1515
1516                     else:
1517                         # regular standalone comment
1518                         yield from self.line()
1519
1520                         self.current_line.append(comment)
1521                         yield from self.line()
1522
1523             except FormatOff as f_off:
1524                 f_off.trim_prefix(node)
1525                 yield from self.line(type=UnformattedLines)
1526                 yield from self.visit(node)
1527
1528             except FormatOn as f_on:
1529                 # This only happens here if somebody says "fmt: on" multiple
1530                 # times in a row.
1531                 f_on.trim_prefix(node)
1532                 yield from self.visit_default(node)
1533
1534             else:
1535                 normalize_prefix(node, inside_brackets=any_open_brackets)
1536                 if self.normalize_strings and node.type == token.STRING:
1537                     normalize_string_prefix(node, remove_u_prefix=self.remove_u_prefix)
1538                     normalize_string_quotes(node)
1539                 if node.type not in WHITESPACE:
1540                     self.current_line.append(node)
1541         yield from super().visit_default(node)
1542
1543     def visit_INDENT(self, node: Node) -> Iterator[Line]:
1544         """Increase indentation level, maybe yield a line."""
1545         # In blib2to3 INDENT never holds comments.
1546         yield from self.line(+1)
1547         yield from self.visit_default(node)
1548
1549     def visit_DEDENT(self, node: Node) -> Iterator[Line]:
1550         """Decrease indentation level, maybe yield a line."""
1551         # The current line might still wait for trailing comments.  At DEDENT time
1552         # there won't be any (they would be prefixes on the preceding NEWLINE).
1553         # Emit the line then.
1554         yield from self.line()
1555
1556         # While DEDENT has no value, its prefix may contain standalone comments
1557         # that belong to the current indentation level.  Get 'em.
1558         yield from self.visit_default(node)
1559
1560         # Finally, emit the dedent.
1561         yield from self.line(-1)
1562
1563     def visit_stmt(
1564         self, node: Node, keywords: Set[str], parens: Set[str]
1565     ) -> Iterator[Line]:
1566         """Visit a statement.
1567
1568         This implementation is shared for `if`, `while`, `for`, `try`, `except`,
1569         `def`, `with`, `class`, `assert` and assignments.
1570
1571         The relevant Python language `keywords` for a given statement will be
1572         NAME leaves within it. This methods puts those on a separate line.
1573
1574         `parens` holds a set of string leaf values immediately after which
1575         invisible parens should be put.
1576         """
1577         normalize_invisible_parens(node, parens_after=parens)
1578         for child in node.children:
1579             if child.type == token.NAME and child.value in keywords:  # type: ignore
1580                 yield from self.line()
1581
1582             yield from self.visit(child)
1583
1584     def visit_suite(self, node: Node) -> Iterator[Line]:
1585         """Visit a suite."""
1586         if self.is_pyi and is_stub_suite(node):
1587             yield from self.visit(node.children[2])
1588         else:
1589             yield from self.visit_default(node)
1590
1591     def visit_simple_stmt(self, node: Node) -> Iterator[Line]:
1592         """Visit a statement without nested statements."""
1593         is_suite_like = node.parent and node.parent.type in STATEMENT
1594         if is_suite_like:
1595             if self.is_pyi and is_stub_body(node):
1596                 yield from self.visit_default(node)
1597             else:
1598                 yield from self.line(+1)
1599                 yield from self.visit_default(node)
1600                 yield from self.line(-1)
1601
1602         else:
1603             if not self.is_pyi or not node.parent or not is_stub_suite(node.parent):
1604                 yield from self.line()
1605             yield from self.visit_default(node)
1606
1607     def visit_async_stmt(self, node: Node) -> Iterator[Line]:
1608         """Visit `async def`, `async for`, `async with`."""
1609         yield from self.line()
1610
1611         children = iter(node.children)
1612         for child in children:
1613             yield from self.visit(child)
1614
1615             if child.type == token.ASYNC:
1616                 break
1617
1618         internal_stmt = next(children)
1619         for child in internal_stmt.children:
1620             yield from self.visit(child)
1621
1622     def visit_decorators(self, node: Node) -> Iterator[Line]:
1623         """Visit decorators."""
1624         for child in node.children:
1625             yield from self.line()
1626             yield from self.visit(child)
1627
1628     def visit_SEMI(self, leaf: Leaf) -> Iterator[Line]:
1629         """Remove a semicolon and put the other statement on a separate line."""
1630         yield from self.line()
1631
1632     def visit_ENDMARKER(self, leaf: Leaf) -> Iterator[Line]:
1633         """End of file. Process outstanding comments and end with a newline."""
1634         yield from self.visit_default(leaf)
1635         yield from self.line()
1636
1637     def visit_unformatted(self, node: LN) -> Iterator[Line]:
1638         """Used when file contained a `# fmt: off`."""
1639         if isinstance(node, Node):
1640             for child in node.children:
1641                 yield from self.visit(child)
1642
1643         else:
1644             try:
1645                 self.current_line.append(node)
1646             except FormatOn as f_on:
1647                 f_on.trim_prefix(node)
1648                 yield from self.line()
1649                 yield from self.visit(node)
1650
1651             if node.type == token.ENDMARKER:
1652                 # somebody decided not to put a final `# fmt: on`
1653                 yield from self.line()
1654
1655     def __attrs_post_init__(self) -> None:
1656         """You are in a twisty little maze of passages."""
1657         v = self.visit_stmt
1658         Ø: Set[str] = set()
1659         self.visit_assert_stmt = partial(v, keywords={"assert"}, parens={"assert", ","})
1660         self.visit_if_stmt = partial(
1661             v, keywords={"if", "else", "elif"}, parens={"if", "elif"}
1662         )
1663         self.visit_while_stmt = partial(v, keywords={"while", "else"}, parens={"while"})
1664         self.visit_for_stmt = partial(v, keywords={"for", "else"}, parens={"for", "in"})
1665         self.visit_try_stmt = partial(
1666             v, keywords={"try", "except", "else", "finally"}, parens=Ø
1667         )
1668         self.visit_except_clause = partial(v, keywords={"except"}, parens=Ø)
1669         self.visit_with_stmt = partial(v, keywords={"with"}, parens=Ø)
1670         self.visit_funcdef = partial(v, keywords={"def"}, parens=Ø)
1671         self.visit_classdef = partial(v, keywords={"class"}, parens=Ø)
1672         self.visit_expr_stmt = partial(v, keywords=Ø, parens=ASSIGNMENTS)
1673         self.visit_return_stmt = partial(v, keywords={"return"}, parens={"return"})
1674         self.visit_import_from = partial(v, keywords=Ø, parens={"import"})
1675         self.visit_async_funcdef = self.visit_async_stmt
1676         self.visit_decorated = self.visit_decorators
1677
1678
1679 IMPLICIT_TUPLE = {syms.testlist, syms.testlist_star_expr, syms.exprlist}
1680 BRACKET = {token.LPAR: token.RPAR, token.LSQB: token.RSQB, token.LBRACE: token.RBRACE}
1681 OPENING_BRACKETS = set(BRACKET.keys())
1682 CLOSING_BRACKETS = set(BRACKET.values())
1683 BRACKETS = OPENING_BRACKETS | CLOSING_BRACKETS
1684 ALWAYS_NO_SPACE = CLOSING_BRACKETS | {token.COMMA, STANDALONE_COMMENT}
1685
1686
1687 def whitespace(leaf: Leaf, *, complex_subscript: bool) -> str:  # noqa C901
1688     """Return whitespace prefix if needed for the given `leaf`.
1689
1690     `complex_subscript` signals whether the given leaf is part of a subscription
1691     which has non-trivial arguments, like arithmetic expressions or function calls.
1692     """
1693     NO = ""
1694     SPACE = " "
1695     DOUBLESPACE = "  "
1696     t = leaf.type
1697     p = leaf.parent
1698     v = leaf.value
1699     if t in ALWAYS_NO_SPACE:
1700         return NO
1701
1702     if t == token.COMMENT:
1703         return DOUBLESPACE
1704
1705     assert p is not None, f"INTERNAL ERROR: hand-made leaf without parent: {leaf!r}"
1706     if t == token.COLON and p.type not in {
1707         syms.subscript,
1708         syms.subscriptlist,
1709         syms.sliceop,
1710     }:
1711         return NO
1712
1713     prev = leaf.prev_sibling
1714     if not prev:
1715         prevp = preceding_leaf(p)
1716         if not prevp or prevp.type in OPENING_BRACKETS:
1717             return NO
1718
1719         if t == token.COLON:
1720             if prevp.type == token.COLON:
1721                 return NO
1722
1723             elif prevp.type != token.COMMA and not complex_subscript:
1724                 return NO
1725
1726             return SPACE
1727
1728         if prevp.type == token.EQUAL:
1729             if prevp.parent:
1730                 if prevp.parent.type in {
1731                     syms.arglist,
1732                     syms.argument,
1733                     syms.parameters,
1734                     syms.varargslist,
1735                 }:
1736                     return NO
1737
1738                 elif prevp.parent.type == syms.typedargslist:
1739                     # A bit hacky: if the equal sign has whitespace, it means we
1740                     # previously found it's a typed argument.  So, we're using
1741                     # that, too.
1742                     return prevp.prefix
1743
1744         elif prevp.type in STARS:
1745             if is_vararg(prevp, within=VARARGS_PARENTS | UNPACKING_PARENTS):
1746                 return NO
1747
1748         elif prevp.type == token.COLON:
1749             if prevp.parent and prevp.parent.type in {syms.subscript, syms.sliceop}:
1750                 return SPACE if complex_subscript else NO
1751
1752         elif (
1753             prevp.parent
1754             and prevp.parent.type == syms.factor
1755             and prevp.type in MATH_OPERATORS
1756         ):
1757             return NO
1758
1759         elif (
1760             prevp.type == token.RIGHTSHIFT
1761             and prevp.parent
1762             and prevp.parent.type == syms.shift_expr
1763             and prevp.prev_sibling
1764             and prevp.prev_sibling.type == token.NAME
1765             and prevp.prev_sibling.value == "print"  # type: ignore
1766         ):
1767             # Python 2 print chevron
1768             return NO
1769
1770     elif prev.type in OPENING_BRACKETS:
1771         return NO
1772
1773     if p.type in {syms.parameters, syms.arglist}:
1774         # untyped function signatures or calls
1775         if not prev or prev.type != token.COMMA:
1776             return NO
1777
1778     elif p.type == syms.varargslist:
1779         # lambdas
1780         if prev and prev.type != token.COMMA:
1781             return NO
1782
1783     elif p.type == syms.typedargslist:
1784         # typed function signatures
1785         if not prev:
1786             return NO
1787
1788         if t == token.EQUAL:
1789             if prev.type != syms.tname:
1790                 return NO
1791
1792         elif prev.type == token.EQUAL:
1793             # A bit hacky: if the equal sign has whitespace, it means we
1794             # previously found it's a typed argument.  So, we're using that, too.
1795             return prev.prefix
1796
1797         elif prev.type != token.COMMA:
1798             return NO
1799
1800     elif p.type == syms.tname:
1801         # type names
1802         if not prev:
1803             prevp = preceding_leaf(p)
1804             if not prevp or prevp.type != token.COMMA:
1805                 return NO
1806
1807     elif p.type == syms.trailer:
1808         # attributes and calls
1809         if t == token.LPAR or t == token.RPAR:
1810             return NO
1811
1812         if not prev:
1813             if t == token.DOT:
1814                 prevp = preceding_leaf(p)
1815                 if not prevp or prevp.type != token.NUMBER:
1816                     return NO
1817
1818             elif t == token.LSQB:
1819                 return NO
1820
1821         elif prev.type != token.COMMA:
1822             return NO
1823
1824     elif p.type == syms.argument:
1825         # single argument
1826         if t == token.EQUAL:
1827             return NO
1828
1829         if not prev:
1830             prevp = preceding_leaf(p)
1831             if not prevp or prevp.type == token.LPAR:
1832                 return NO
1833
1834         elif prev.type in {token.EQUAL} | STARS:
1835             return NO
1836
1837     elif p.type == syms.decorator:
1838         # decorators
1839         return NO
1840
1841     elif p.type == syms.dotted_name:
1842         if prev:
1843             return NO
1844
1845         prevp = preceding_leaf(p)
1846         if not prevp or prevp.type == token.AT or prevp.type == token.DOT:
1847             return NO
1848
1849     elif p.type == syms.classdef:
1850         if t == token.LPAR:
1851             return NO
1852
1853         if prev and prev.type == token.LPAR:
1854             return NO
1855
1856     elif p.type in {syms.subscript, syms.sliceop}:
1857         # indexing
1858         if not prev:
1859             assert p.parent is not None, "subscripts are always parented"
1860             if p.parent.type == syms.subscriptlist:
1861                 return SPACE
1862
1863             return NO
1864
1865         elif not complex_subscript:
1866             return NO
1867
1868     elif p.type == syms.atom:
1869         if prev and t == token.DOT:
1870             # dots, but not the first one.
1871             return NO
1872
1873     elif p.type == syms.dictsetmaker:
1874         # dict unpacking
1875         if prev and prev.type == token.DOUBLESTAR:
1876             return NO
1877
1878     elif p.type in {syms.factor, syms.star_expr}:
1879         # unary ops
1880         if not prev:
1881             prevp = preceding_leaf(p)
1882             if not prevp or prevp.type in OPENING_BRACKETS:
1883                 return NO
1884
1885             prevp_parent = prevp.parent
1886             assert prevp_parent is not None
1887             if prevp.type == token.COLON and prevp_parent.type in {
1888                 syms.subscript,
1889                 syms.sliceop,
1890             }:
1891                 return NO
1892
1893             elif prevp.type == token.EQUAL and prevp_parent.type == syms.argument:
1894                 return NO
1895
1896         elif t in {token.NAME, token.NUMBER, token.STRING}:
1897             return NO
1898
1899     elif p.type == syms.import_from:
1900         if t == token.DOT:
1901             if prev and prev.type == token.DOT:
1902                 return NO
1903
1904         elif t == token.NAME:
1905             if v == "import":
1906                 return SPACE
1907
1908             if prev and prev.type == token.DOT:
1909                 return NO
1910
1911     elif p.type == syms.sliceop:
1912         return NO
1913
1914     return SPACE
1915
1916
1917 def preceding_leaf(node: Optional[LN]) -> Optional[Leaf]:
1918     """Return the first leaf that precedes `node`, if any."""
1919     while node:
1920         res = node.prev_sibling
1921         if res:
1922             if isinstance(res, Leaf):
1923                 return res
1924
1925             try:
1926                 return list(res.leaves())[-1]
1927
1928             except IndexError:
1929                 return None
1930
1931         node = node.parent
1932     return None
1933
1934
1935 def child_towards(ancestor: Node, descendant: LN) -> Optional[LN]:
1936     """Return the child of `ancestor` that contains `descendant`."""
1937     node: Optional[LN] = descendant
1938     while node and node.parent != ancestor:
1939         node = node.parent
1940     return node
1941
1942
1943 def is_split_after_delimiter(leaf: Leaf, previous: Leaf = None) -> int:
1944     """Return the priority of the `leaf` delimiter, given a line break after it.
1945
1946     The delimiter priorities returned here are from those delimiters that would
1947     cause a line break after themselves.
1948
1949     Higher numbers are higher priority.
1950     """
1951     if leaf.type == token.COMMA:
1952         return COMMA_PRIORITY
1953
1954     return 0
1955
1956
1957 def is_split_before_delimiter(leaf: Leaf, previous: Leaf = None) -> int:
1958     """Return the priority of the `leaf` delimiter, given a line before after it.
1959
1960     The delimiter priorities returned here are from those delimiters that would
1961     cause a line break before themselves.
1962
1963     Higher numbers are higher priority.
1964     """
1965     if is_vararg(leaf, within=VARARGS_PARENTS | UNPACKING_PARENTS):
1966         # * and ** might also be MATH_OPERATORS but in this case they are not.
1967         # Don't treat them as a delimiter.
1968         return 0
1969
1970     if (
1971         leaf.type == token.DOT
1972         and leaf.parent
1973         and leaf.parent.type not in {syms.import_from, syms.dotted_name}
1974         and (previous is None or previous.type in CLOSING_BRACKETS)
1975     ):
1976         return DOT_PRIORITY
1977
1978     if (
1979         leaf.type in MATH_OPERATORS
1980         and leaf.parent
1981         and leaf.parent.type not in {syms.factor, syms.star_expr}
1982     ):
1983         return MATH_PRIORITIES[leaf.type]
1984
1985     if leaf.type in COMPARATORS:
1986         return COMPARATOR_PRIORITY
1987
1988     if (
1989         leaf.type == token.STRING
1990         and previous is not None
1991         and previous.type == token.STRING
1992     ):
1993         return STRING_PRIORITY
1994
1995     if leaf.type != token.NAME:
1996         return 0
1997
1998     if (
1999         leaf.value == "for"
2000         and leaf.parent
2001         and leaf.parent.type in {syms.comp_for, syms.old_comp_for}
2002     ):
2003         return COMPREHENSION_PRIORITY
2004
2005     if (
2006         leaf.value == "if"
2007         and leaf.parent
2008         and leaf.parent.type in {syms.comp_if, syms.old_comp_if}
2009     ):
2010         return COMPREHENSION_PRIORITY
2011
2012     if leaf.value in {"if", "else"} and leaf.parent and leaf.parent.type == syms.test:
2013         return TERNARY_PRIORITY
2014
2015     if leaf.value == "is":
2016         return COMPARATOR_PRIORITY
2017
2018     if (
2019         leaf.value == "in"
2020         and leaf.parent
2021         and leaf.parent.type in {syms.comp_op, syms.comparison}
2022         and not (
2023             previous is not None
2024             and previous.type == token.NAME
2025             and previous.value == "not"
2026         )
2027     ):
2028         return COMPARATOR_PRIORITY
2029
2030     if (
2031         leaf.value == "not"
2032         and leaf.parent
2033         and leaf.parent.type == syms.comp_op
2034         and not (
2035             previous is not None
2036             and previous.type == token.NAME
2037             and previous.value == "is"
2038         )
2039     ):
2040         return COMPARATOR_PRIORITY
2041
2042     if leaf.value in LOGIC_OPERATORS and leaf.parent:
2043         return LOGIC_PRIORITY
2044
2045     return 0
2046
2047
2048 FMT_OFF = {"# fmt: off", "# fmt:off", "# yapf: disable"}
2049 FMT_ON = {"# fmt: on", "# fmt:on", "# yapf: enable"}
2050
2051
2052 def generate_comments(leaf: LN) -> Iterator[Leaf]:
2053     """Clean the prefix of the `leaf` and generate comments from it, if any.
2054
2055     Comments in lib2to3 are shoved into the whitespace prefix.  This happens
2056     in `pgen2/driver.py:Driver.parse_tokens()`.  This was a brilliant implementation
2057     move because it does away with modifying the grammar to include all the
2058     possible places in which comments can be placed.
2059
2060     The sad consequence for us though is that comments don't "belong" anywhere.
2061     This is why this function generates simple parentless Leaf objects for
2062     comments.  We simply don't know what the correct parent should be.
2063
2064     No matter though, we can live without this.  We really only need to
2065     differentiate between inline and standalone comments.  The latter don't
2066     share the line with any code.
2067
2068     Inline comments are emitted as regular token.COMMENT leaves.  Standalone
2069     are emitted with a fake STANDALONE_COMMENT token identifier.
2070     """
2071     for pc in list_comments(leaf.prefix, is_endmarker=leaf.type == token.ENDMARKER):
2072         yield Leaf(pc.type, pc.value, prefix="\n" * pc.newlines)
2073         if pc.value in FMT_ON:
2074             raise FormatOn(pc.consumed)
2075
2076         if pc.value in FMT_OFF:
2077             if pc.type == STANDALONE_COMMENT:
2078                 raise FormatOff(pc.consumed)
2079
2080             prev = preceding_leaf(leaf)
2081             if not prev or prev.type in WHITESPACE:  # standalone comment in disguise
2082                 raise FormatOff(pc.consumed)
2083
2084
2085 @dataclass
2086 class ProtoComment:
2087     type: int  # token.COMMENT or STANDALONE_COMMENT
2088     value: str  # content of the comment
2089     newlines: int  # how many newlines before the comment
2090     consumed: int  # how many characters of the original leaf's prefix did we consume
2091
2092
2093 @lru_cache(maxsize=4096)
2094 def list_comments(prefix: str, is_endmarker: bool) -> List[ProtoComment]:
2095     result: List[ProtoComment] = []
2096     if not prefix or "#" not in prefix:
2097         return result
2098
2099     consumed = 0
2100     nlines = 0
2101     for index, line in enumerate(prefix.split("\n")):
2102         consumed += len(line) + 1  # adding the length of the split '\n'
2103         line = line.lstrip()
2104         if not line:
2105             nlines += 1
2106         if not line.startswith("#"):
2107             continue
2108
2109         if index == 0 and not is_endmarker:
2110             comment_type = token.COMMENT  # simple trailing comment
2111         else:
2112             comment_type = STANDALONE_COMMENT
2113         comment = make_comment(line)
2114         result.append(
2115             ProtoComment(
2116                 type=comment_type, value=comment, newlines=nlines, consumed=consumed
2117             )
2118         )
2119         nlines = 0
2120     return result
2121
2122
2123 def make_comment(content: str) -> str:
2124     """Return a consistently formatted comment from the given `content` string.
2125
2126     All comments (except for "##", "#!", "#:") should have a single space between
2127     the hash sign and the content.
2128
2129     If `content` didn't start with a hash sign, one is provided.
2130     """
2131     content = content.rstrip()
2132     if not content:
2133         return "#"
2134
2135     if content[0] == "#":
2136         content = content[1:]
2137     if content and content[0] not in " !:#":
2138         content = " " + content
2139     return "#" + content
2140
2141
2142 def split_line(
2143     line: Line, line_length: int, inner: bool = False, py36: bool = False
2144 ) -> Iterator[Line]:
2145     """Split a `line` into potentially many lines.
2146
2147     They should fit in the allotted `line_length` but might not be able to.
2148     `inner` signifies that there were a pair of brackets somewhere around the
2149     current `line`, possibly transitively. This means we can fallback to splitting
2150     by delimiters if the LHS/RHS don't yield any results.
2151
2152     If `py36` is True, splitting may generate syntax that is only compatible
2153     with Python 3.6 and later.
2154     """
2155     if isinstance(line, UnformattedLines) or line.is_comment:
2156         yield line
2157         return
2158
2159     line_str = str(line).strip("\n")
2160     if not line.should_explode and is_line_short_enough(
2161         line, line_length=line_length, line_str=line_str
2162     ):
2163         yield line
2164         return
2165
2166     split_funcs: List[SplitFunc]
2167     if line.is_def:
2168         split_funcs = [left_hand_split]
2169     else:
2170
2171         def rhs(line: Line, py36: bool = False) -> Iterator[Line]:
2172             for omit in generate_trailers_to_omit(line, line_length):
2173                 lines = list(right_hand_split(line, line_length, py36, omit=omit))
2174                 if is_line_short_enough(lines[0], line_length=line_length):
2175                     yield from lines
2176                     return
2177
2178             # All splits failed, best effort split with no omits.
2179             # This mostly happens to multiline strings that are by definition
2180             # reported as not fitting a single line.
2181             yield from right_hand_split(line, py36)
2182
2183         if line.inside_brackets:
2184             split_funcs = [delimiter_split, standalone_comment_split, rhs]
2185         else:
2186             split_funcs = [rhs]
2187     for split_func in split_funcs:
2188         # We are accumulating lines in `result` because we might want to abort
2189         # mission and return the original line in the end, or attempt a different
2190         # split altogether.
2191         result: List[Line] = []
2192         try:
2193             for l in split_func(line, py36):
2194                 if str(l).strip("\n") == line_str:
2195                     raise CannotSplit("Split function returned an unchanged result")
2196
2197                 result.extend(
2198                     split_line(l, line_length=line_length, inner=True, py36=py36)
2199                 )
2200         except CannotSplit as cs:
2201             continue
2202
2203         else:
2204             yield from result
2205             break
2206
2207     else:
2208         yield line
2209
2210
2211 def left_hand_split(line: Line, py36: bool = False) -> Iterator[Line]:
2212     """Split line into many lines, starting with the first matching bracket pair.
2213
2214     Note: this usually looks weird, only use this for function definitions.
2215     Prefer RHS otherwise.  This is why this function is not symmetrical with
2216     :func:`right_hand_split` which also handles optional parentheses.
2217     """
2218     head = Line(depth=line.depth)
2219     body = Line(depth=line.depth + 1, inside_brackets=True)
2220     tail = Line(depth=line.depth)
2221     tail_leaves: List[Leaf] = []
2222     body_leaves: List[Leaf] = []
2223     head_leaves: List[Leaf] = []
2224     current_leaves = head_leaves
2225     matching_bracket = None
2226     for leaf in line.leaves:
2227         if (
2228             current_leaves is body_leaves
2229             and leaf.type in CLOSING_BRACKETS
2230             and leaf.opening_bracket is matching_bracket
2231         ):
2232             current_leaves = tail_leaves if body_leaves else head_leaves
2233         current_leaves.append(leaf)
2234         if current_leaves is head_leaves:
2235             if leaf.type in OPENING_BRACKETS:
2236                 matching_bracket = leaf
2237                 current_leaves = body_leaves
2238     # Since body is a new indent level, remove spurious leading whitespace.
2239     if body_leaves:
2240         normalize_prefix(body_leaves[0], inside_brackets=True)
2241     # Build the new lines.
2242     for result, leaves in (head, head_leaves), (body, body_leaves), (tail, tail_leaves):
2243         for leaf in leaves:
2244             result.append(leaf, preformatted=True)
2245             for comment_after in line.comments_after(leaf):
2246                 result.append(comment_after, preformatted=True)
2247     bracket_split_succeeded_or_raise(head, body, tail)
2248     for result in (head, body, tail):
2249         if result:
2250             yield result
2251
2252
2253 def right_hand_split(
2254     line: Line, line_length: int, py36: bool = False, omit: Collection[LeafID] = ()
2255 ) -> Iterator[Line]:
2256     """Split line into many lines, starting with the last matching bracket pair.
2257
2258     If the split was by optional parentheses, attempt splitting without them, too.
2259     `omit` is a collection of closing bracket IDs that shouldn't be considered for
2260     this split.
2261
2262     Note: running this function modifies `bracket_depth` on the leaves of `line`.
2263     """
2264     head = Line(depth=line.depth)
2265     body = Line(depth=line.depth + 1, inside_brackets=True)
2266     tail = Line(depth=line.depth)
2267     tail_leaves: List[Leaf] = []
2268     body_leaves: List[Leaf] = []
2269     head_leaves: List[Leaf] = []
2270     current_leaves = tail_leaves
2271     opening_bracket = None
2272     closing_bracket = None
2273     for leaf in reversed(line.leaves):
2274         if current_leaves is body_leaves:
2275             if leaf is opening_bracket:
2276                 current_leaves = head_leaves if body_leaves else tail_leaves
2277         current_leaves.append(leaf)
2278         if current_leaves is tail_leaves:
2279             if leaf.type in CLOSING_BRACKETS and id(leaf) not in omit:
2280                 opening_bracket = leaf.opening_bracket
2281                 closing_bracket = leaf
2282                 current_leaves = body_leaves
2283     tail_leaves.reverse()
2284     body_leaves.reverse()
2285     head_leaves.reverse()
2286     # Since body is a new indent level, remove spurious leading whitespace.
2287     if body_leaves:
2288         normalize_prefix(body_leaves[0], inside_brackets=True)
2289     if not head_leaves:
2290         # No `head` means the split failed. Either `tail` has all content or
2291         # the matching `opening_bracket` wasn't available on `line` anymore.
2292         raise CannotSplit("No brackets found")
2293
2294     # Build the new lines.
2295     for result, leaves in (head, head_leaves), (body, body_leaves), (tail, tail_leaves):
2296         for leaf in leaves:
2297             result.append(leaf, preformatted=True)
2298             for comment_after in line.comments_after(leaf):
2299                 result.append(comment_after, preformatted=True)
2300     assert opening_bracket and closing_bracket
2301     body.should_explode = should_explode(body, opening_bracket)
2302     bracket_split_succeeded_or_raise(head, body, tail)
2303     if (
2304         # the body shouldn't be exploded
2305         not body.should_explode
2306         # the opening bracket is an optional paren
2307         and opening_bracket.type == token.LPAR
2308         and not opening_bracket.value
2309         # the closing bracket is an optional paren
2310         and closing_bracket.type == token.RPAR
2311         and not closing_bracket.value
2312         # it's not an import (optional parens are the only thing we can split on
2313         # in this case; attempting a split without them is a waste of time)
2314         and not line.is_import
2315         # there are no standalone comments in the body
2316         and not body.contains_standalone_comments(0)
2317         # and we can actually remove the parens
2318         and can_omit_invisible_parens(body, line_length)
2319     ):
2320         omit = {id(closing_bracket), *omit}
2321         try:
2322             yield from right_hand_split(line, line_length, py36=py36, omit=omit)
2323             return
2324
2325         except CannotSplit:
2326             if not (
2327                 can_be_split(body)
2328                 or is_line_short_enough(body, line_length=line_length)
2329             ):
2330                 raise CannotSplit(
2331                     "Splitting failed, body is still too long and can't be split."
2332                 )
2333
2334             elif head.contains_multiline_strings() or tail.contains_multiline_strings():
2335                 raise CannotSplit(
2336                     "The current optional pair of parentheses is bound to fail to "
2337                     "satisfy the splitting algorithm because the head or the tail "
2338                     "contains multiline strings which by definition never fit one "
2339                     "line."
2340                 )
2341
2342     ensure_visible(opening_bracket)
2343     ensure_visible(closing_bracket)
2344     for result in (head, body, tail):
2345         if result:
2346             yield result
2347
2348
2349 def bracket_split_succeeded_or_raise(head: Line, body: Line, tail: Line) -> None:
2350     """Raise :exc:`CannotSplit` if the last left- or right-hand split failed.
2351
2352     Do nothing otherwise.
2353
2354     A left- or right-hand split is based on a pair of brackets. Content before
2355     (and including) the opening bracket is left on one line, content inside the
2356     brackets is put on a separate line, and finally content starting with and
2357     following the closing bracket is put on a separate line.
2358
2359     Those are called `head`, `body`, and `tail`, respectively. If the split
2360     produced the same line (all content in `head`) or ended up with an empty `body`
2361     and the `tail` is just the closing bracket, then it's considered failed.
2362     """
2363     tail_len = len(str(tail).strip())
2364     if not body:
2365         if tail_len == 0:
2366             raise CannotSplit("Splitting brackets produced the same line")
2367
2368         elif tail_len < 3:
2369             raise CannotSplit(
2370                 f"Splitting brackets on an empty body to save "
2371                 f"{tail_len} characters is not worth it"
2372             )
2373
2374
2375 def dont_increase_indentation(split_func: SplitFunc) -> SplitFunc:
2376     """Normalize prefix of the first leaf in every line returned by `split_func`.
2377
2378     This is a decorator over relevant split functions.
2379     """
2380
2381     @wraps(split_func)
2382     def split_wrapper(line: Line, py36: bool = False) -> Iterator[Line]:
2383         for l in split_func(line, py36):
2384             normalize_prefix(l.leaves[0], inside_brackets=True)
2385             yield l
2386
2387     return split_wrapper
2388
2389
2390 @dont_increase_indentation
2391 def delimiter_split(line: Line, py36: bool = False) -> Iterator[Line]:
2392     """Split according to delimiters of the highest priority.
2393
2394     If `py36` is True, the split will add trailing commas also in function
2395     signatures that contain `*` and `**`.
2396     """
2397     try:
2398         last_leaf = line.leaves[-1]
2399     except IndexError:
2400         raise CannotSplit("Line empty")
2401
2402     bt = line.bracket_tracker
2403     try:
2404         delimiter_priority = bt.max_delimiter_priority(exclude={id(last_leaf)})
2405     except ValueError:
2406         raise CannotSplit("No delimiters found")
2407
2408     if delimiter_priority == DOT_PRIORITY:
2409         if bt.delimiter_count_with_priority(delimiter_priority) == 1:
2410             raise CannotSplit("Splitting a single attribute from its owner looks wrong")
2411
2412     current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2413     lowest_depth = sys.maxsize
2414     trailing_comma_safe = True
2415
2416     def append_to_line(leaf: Leaf) -> Iterator[Line]:
2417         """Append `leaf` to current line or to new line if appending impossible."""
2418         nonlocal current_line
2419         try:
2420             current_line.append_safe(leaf, preformatted=True)
2421         except ValueError as ve:
2422             yield current_line
2423
2424             current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2425             current_line.append(leaf)
2426
2427     for index, leaf in enumerate(line.leaves):
2428         yield from append_to_line(leaf)
2429
2430         for comment_after in line.comments_after(leaf, index):
2431             yield from append_to_line(comment_after)
2432
2433         lowest_depth = min(lowest_depth, leaf.bracket_depth)
2434         if leaf.bracket_depth == lowest_depth and is_vararg(
2435             leaf, within=VARARGS_PARENTS
2436         ):
2437             trailing_comma_safe = trailing_comma_safe and py36
2438         leaf_priority = bt.delimiters.get(id(leaf))
2439         if leaf_priority == delimiter_priority:
2440             yield current_line
2441
2442             current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2443     if current_line:
2444         if (
2445             trailing_comma_safe
2446             and delimiter_priority == COMMA_PRIORITY
2447             and current_line.leaves[-1].type != token.COMMA
2448             and current_line.leaves[-1].type != STANDALONE_COMMENT
2449         ):
2450             current_line.append(Leaf(token.COMMA, ","))
2451         yield current_line
2452
2453
2454 @dont_increase_indentation
2455 def standalone_comment_split(line: Line, py36: bool = False) -> Iterator[Line]:
2456     """Split standalone comments from the rest of the line."""
2457     if not line.contains_standalone_comments(0):
2458         raise CannotSplit("Line does not have any standalone comments")
2459
2460     current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2461
2462     def append_to_line(leaf: Leaf) -> Iterator[Line]:
2463         """Append `leaf` to current line or to new line if appending impossible."""
2464         nonlocal current_line
2465         try:
2466             current_line.append_safe(leaf, preformatted=True)
2467         except ValueError as ve:
2468             yield current_line
2469
2470             current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2471             current_line.append(leaf)
2472
2473     for index, leaf in enumerate(line.leaves):
2474         yield from append_to_line(leaf)
2475
2476         for comment_after in line.comments_after(leaf, index):
2477             yield from append_to_line(comment_after)
2478
2479     if current_line:
2480         yield current_line
2481
2482
2483 def is_import(leaf: Leaf) -> bool:
2484     """Return True if the given leaf starts an import statement."""
2485     p = leaf.parent
2486     t = leaf.type
2487     v = leaf.value
2488     return bool(
2489         t == token.NAME
2490         and (
2491             (v == "import" and p and p.type == syms.import_name)
2492             or (v == "from" and p and p.type == syms.import_from)
2493         )
2494     )
2495
2496
2497 def normalize_prefix(leaf: Leaf, *, inside_brackets: bool) -> None:
2498     """Leave existing extra newlines if not `inside_brackets`. Remove everything
2499     else.
2500
2501     Note: don't use backslashes for formatting or you'll lose your voting rights.
2502     """
2503     if not inside_brackets:
2504         spl = leaf.prefix.split("#")
2505         if "\\" not in spl[0]:
2506             nl_count = spl[-1].count("\n")
2507             if len(spl) > 1:
2508                 nl_count -= 1
2509             leaf.prefix = "\n" * nl_count
2510             return
2511
2512     leaf.prefix = ""
2513
2514
2515 def normalize_string_prefix(leaf: Leaf, remove_u_prefix: bool = False) -> None:
2516     """Make all string prefixes lowercase.
2517
2518     If remove_u_prefix is given, also removes any u prefix from the string.
2519
2520     Note: Mutates its argument.
2521     """
2522     match = re.match(r"^([furbFURB]*)(.*)$", leaf.value, re.DOTALL)
2523     assert match is not None, f"failed to match string {leaf.value!r}"
2524     orig_prefix = match.group(1)
2525     new_prefix = orig_prefix.lower()
2526     if remove_u_prefix:
2527         new_prefix = new_prefix.replace("u", "")
2528     leaf.value = f"{new_prefix}{match.group(2)}"
2529
2530
2531 def normalize_string_quotes(leaf: Leaf) -> None:
2532     """Prefer double quotes but only if it doesn't cause more escaping.
2533
2534     Adds or removes backslashes as appropriate. Doesn't parse and fix
2535     strings nested in f-strings (yet).
2536
2537     Note: Mutates its argument.
2538     """
2539     value = leaf.value.lstrip("furbFURB")
2540     if value[:3] == '"""':
2541         return
2542
2543     elif value[:3] == "'''":
2544         orig_quote = "'''"
2545         new_quote = '"""'
2546     elif value[0] == '"':
2547         orig_quote = '"'
2548         new_quote = "'"
2549     else:
2550         orig_quote = "'"
2551         new_quote = '"'
2552     first_quote_pos = leaf.value.find(orig_quote)
2553     if first_quote_pos == -1:
2554         return  # There's an internal error
2555
2556     prefix = leaf.value[:first_quote_pos]
2557     unescaped_new_quote = re.compile(rf"(([^\\]|^)(\\\\)*){new_quote}")
2558     escaped_new_quote = re.compile(rf"([^\\]|^)\\((?:\\\\)*){new_quote}")
2559     escaped_orig_quote = re.compile(rf"([^\\]|^)\\((?:\\\\)*){orig_quote}")
2560     body = leaf.value[first_quote_pos + len(orig_quote) : -len(orig_quote)]
2561     if "r" in prefix.casefold():
2562         if unescaped_new_quote.search(body):
2563             # There's at least one unescaped new_quote in this raw string
2564             # so converting is impossible
2565             return
2566
2567         # Do not introduce or remove backslashes in raw strings
2568         new_body = body
2569     else:
2570         # remove unnecessary escapes
2571         new_body = sub_twice(escaped_new_quote, rf"\1\2{new_quote}", body)
2572         if body != new_body:
2573             # Consider the string without unnecessary escapes as the original
2574             body = new_body
2575             leaf.value = f"{prefix}{orig_quote}{body}{orig_quote}"
2576         new_body = sub_twice(escaped_orig_quote, rf"\1\2{orig_quote}", new_body)
2577         new_body = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_body)
2578     if "f" in prefix.casefold():
2579         matches = re.findall(r"[^{]\{(.*?)\}[^}]", new_body)
2580         for m in matches:
2581             if "\\" in str(m):
2582                 # Do not introduce backslashes in interpolated expressions
2583                 return
2584     if new_quote == '"""' and new_body[-1:] == '"':
2585         # edge case:
2586         new_body = new_body[:-1] + '\\"'
2587     orig_escape_count = body.count("\\")
2588     new_escape_count = new_body.count("\\")
2589     if new_escape_count > orig_escape_count:
2590         return  # Do not introduce more escaping
2591
2592     if new_escape_count == orig_escape_count and orig_quote == '"':
2593         return  # Prefer double quotes
2594
2595     leaf.value = f"{prefix}{new_quote}{new_body}{new_quote}"
2596
2597
2598 def normalize_invisible_parens(node: Node, parens_after: Set[str]) -> None:
2599     """Make existing optional parentheses invisible or create new ones.
2600
2601     `parens_after` is a set of string leaf values immeditely after which parens
2602     should be put.
2603
2604     Standardizes on visible parentheses for single-element tuples, and keeps
2605     existing visible parentheses for other tuples and generator expressions.
2606     """
2607     for pc in list_comments(node.prefix, is_endmarker=False):
2608         if pc.value in FMT_OFF:
2609             # This `node` has a prefix with `# fmt: off`, don't mess with parens.
2610             return
2611
2612     check_lpar = False
2613     for index, child in enumerate(list(node.children)):
2614         if check_lpar:
2615             if child.type == syms.atom:
2616                 maybe_make_parens_invisible_in_atom(child)
2617             elif is_one_tuple(child):
2618                 # wrap child in visible parentheses
2619                 lpar = Leaf(token.LPAR, "(")
2620                 rpar = Leaf(token.RPAR, ")")
2621                 child.remove()
2622                 node.insert_child(index, Node(syms.atom, [lpar, child, rpar]))
2623             elif node.type == syms.import_from:
2624                 # "import from" nodes store parentheses directly as part of
2625                 # the statement
2626                 if child.type == token.LPAR:
2627                     # make parentheses invisible
2628                     child.value = ""  # type: ignore
2629                     node.children[-1].value = ""  # type: ignore
2630                 elif child.type != token.STAR:
2631                     # insert invisible parentheses
2632                     node.insert_child(index, Leaf(token.LPAR, ""))
2633                     node.append_child(Leaf(token.RPAR, ""))
2634                 break
2635
2636             elif not (isinstance(child, Leaf) and is_multiline_string(child)):
2637                 # wrap child in invisible parentheses
2638                 lpar = Leaf(token.LPAR, "")
2639                 rpar = Leaf(token.RPAR, "")
2640                 index = child.remove() or 0
2641                 node.insert_child(index, Node(syms.atom, [lpar, child, rpar]))
2642
2643         check_lpar = isinstance(child, Leaf) and child.value in parens_after
2644
2645
2646 def maybe_make_parens_invisible_in_atom(node: LN) -> bool:
2647     """If it's safe, make the parens in the atom `node` invisible, recursively."""
2648     if (
2649         node.type != syms.atom
2650         or is_empty_tuple(node)
2651         or is_one_tuple(node)
2652         or is_yield(node)
2653         or max_delimiter_priority_in_atom(node) >= COMMA_PRIORITY
2654     ):
2655         return False
2656
2657     first = node.children[0]
2658     last = node.children[-1]
2659     if first.type == token.LPAR and last.type == token.RPAR:
2660         # make parentheses invisible
2661         first.value = ""  # type: ignore
2662         last.value = ""  # type: ignore
2663         if len(node.children) > 1:
2664             maybe_make_parens_invisible_in_atom(node.children[1])
2665         return True
2666
2667     return False
2668
2669
2670 def is_empty_tuple(node: LN) -> bool:
2671     """Return True if `node` holds an empty tuple."""
2672     return (
2673         node.type == syms.atom
2674         and len(node.children) == 2
2675         and node.children[0].type == token.LPAR
2676         and node.children[1].type == token.RPAR
2677     )
2678
2679
2680 def is_one_tuple(node: LN) -> bool:
2681     """Return True if `node` holds a tuple with one element, with or without parens."""
2682     if node.type == syms.atom:
2683         if len(node.children) != 3:
2684             return False
2685
2686         lpar, gexp, rpar = node.children
2687         if not (
2688             lpar.type == token.LPAR
2689             and gexp.type == syms.testlist_gexp
2690             and rpar.type == token.RPAR
2691         ):
2692             return False
2693
2694         return len(gexp.children) == 2 and gexp.children[1].type == token.COMMA
2695
2696     return (
2697         node.type in IMPLICIT_TUPLE
2698         and len(node.children) == 2
2699         and node.children[1].type == token.COMMA
2700     )
2701
2702
2703 def is_yield(node: LN) -> bool:
2704     """Return True if `node` holds a `yield` or `yield from` expression."""
2705     if node.type == syms.yield_expr:
2706         return True
2707
2708     if node.type == token.NAME and node.value == "yield":  # type: ignore
2709         return True
2710
2711     if node.type != syms.atom:
2712         return False
2713
2714     if len(node.children) != 3:
2715         return False
2716
2717     lpar, expr, rpar = node.children
2718     if lpar.type == token.LPAR and rpar.type == token.RPAR:
2719         return is_yield(expr)
2720
2721     return False
2722
2723
2724 def is_vararg(leaf: Leaf, within: Set[NodeType]) -> bool:
2725     """Return True if `leaf` is a star or double star in a vararg or kwarg.
2726
2727     If `within` includes VARARGS_PARENTS, this applies to function signatures.
2728     If `within` includes UNPACKING_PARENTS, it applies to right hand-side
2729     extended iterable unpacking (PEP 3132) and additional unpacking
2730     generalizations (PEP 448).
2731     """
2732     if leaf.type not in STARS or not leaf.parent:
2733         return False
2734
2735     p = leaf.parent
2736     if p.type == syms.star_expr:
2737         # Star expressions are also used as assignment targets in extended
2738         # iterable unpacking (PEP 3132).  See what its parent is instead.
2739         if not p.parent:
2740             return False
2741
2742         p = p.parent
2743
2744     return p.type in within
2745
2746
2747 def is_multiline_string(leaf: Leaf) -> bool:
2748     """Return True if `leaf` is a multiline string that actually spans many lines."""
2749     value = leaf.value.lstrip("furbFURB")
2750     return value[:3] in {'"""', "'''"} and "\n" in value
2751
2752
2753 def is_stub_suite(node: Node) -> bool:
2754     """Return True if `node` is a suite with a stub body."""
2755     if (
2756         len(node.children) != 4
2757         or node.children[0].type != token.NEWLINE
2758         or node.children[1].type != token.INDENT
2759         or node.children[3].type != token.DEDENT
2760     ):
2761         return False
2762
2763     return is_stub_body(node.children[2])
2764
2765
2766 def is_stub_body(node: LN) -> bool:
2767     """Return True if `node` is a simple statement containing an ellipsis."""
2768     if not isinstance(node, Node) or node.type != syms.simple_stmt:
2769         return False
2770
2771     if len(node.children) != 2:
2772         return False
2773
2774     child = node.children[0]
2775     return (
2776         child.type == syms.atom
2777         and len(child.children) == 3
2778         and all(leaf == Leaf(token.DOT, ".") for leaf in child.children)
2779     )
2780
2781
2782 def max_delimiter_priority_in_atom(node: LN) -> int:
2783     """Return maximum delimiter priority inside `node`.
2784
2785     This is specific to atoms with contents contained in a pair of parentheses.
2786     If `node` isn't an atom or there are no enclosing parentheses, returns 0.
2787     """
2788     if node.type != syms.atom:
2789         return 0
2790
2791     first = node.children[0]
2792     last = node.children[-1]
2793     if not (first.type == token.LPAR and last.type == token.RPAR):
2794         return 0
2795
2796     bt = BracketTracker()
2797     for c in node.children[1:-1]:
2798         if isinstance(c, Leaf):
2799             bt.mark(c)
2800         else:
2801             for leaf in c.leaves():
2802                 bt.mark(leaf)
2803     try:
2804         return bt.max_delimiter_priority()
2805
2806     except ValueError:
2807         return 0
2808
2809
2810 def ensure_visible(leaf: Leaf) -> None:
2811     """Make sure parentheses are visible.
2812
2813     They could be invisible as part of some statements (see
2814     :func:`normalize_invible_parens` and :func:`visit_import_from`).
2815     """
2816     if leaf.type == token.LPAR:
2817         leaf.value = "("
2818     elif leaf.type == token.RPAR:
2819         leaf.value = ")"
2820
2821
2822 def should_explode(line: Line, opening_bracket: Leaf) -> bool:
2823     """Should `line` immediately be split with `delimiter_split()` after RHS?"""
2824     if not (
2825         opening_bracket.parent
2826         and opening_bracket.parent.type in {syms.atom, syms.import_from}
2827         and opening_bracket.value in "[{("
2828     ):
2829         return False
2830
2831     try:
2832         last_leaf = line.leaves[-1]
2833         exclude = {id(last_leaf)} if last_leaf.type == token.COMMA else set()
2834         max_priority = line.bracket_tracker.max_delimiter_priority(exclude=exclude)
2835     except (IndexError, ValueError):
2836         return False
2837
2838     return max_priority == COMMA_PRIORITY
2839
2840
2841 def is_python36(node: Node) -> bool:
2842     """Return True if the current file is using Python 3.6+ features.
2843
2844     Currently looking for:
2845     - f-strings; and
2846     - trailing commas after * or ** in function signatures and calls.
2847     """
2848     for n in node.pre_order():
2849         if n.type == token.STRING:
2850             value_head = n.value[:2]  # type: ignore
2851             if value_head in {'f"', 'F"', "f'", "F'", "rf", "fr", "RF", "FR"}:
2852                 return True
2853
2854         elif (
2855             n.type in {syms.typedargslist, syms.arglist}
2856             and n.children
2857             and n.children[-1].type == token.COMMA
2858         ):
2859             for ch in n.children:
2860                 if ch.type in STARS:
2861                     return True
2862
2863                 if ch.type == syms.argument:
2864                     for argch in ch.children:
2865                         if argch.type in STARS:
2866                             return True
2867
2868     return False
2869
2870
2871 def generate_trailers_to_omit(line: Line, line_length: int) -> Iterator[Set[LeafID]]:
2872     """Generate sets of closing bracket IDs that should be omitted in a RHS.
2873
2874     Brackets can be omitted if the entire trailer up to and including
2875     a preceding closing bracket fits in one line.
2876
2877     Yielded sets are cumulative (contain results of previous yields, too).  First
2878     set is empty.
2879     """
2880
2881     omit: Set[LeafID] = set()
2882     yield omit
2883
2884     length = 4 * line.depth
2885     opening_bracket = None
2886     closing_bracket = None
2887     optional_brackets: Set[LeafID] = set()
2888     inner_brackets: Set[LeafID] = set()
2889     for index, leaf, leaf_length in enumerate_with_length(line, reversed=True):
2890         length += leaf_length
2891         if length > line_length:
2892             break
2893
2894         has_inline_comment = leaf_length > len(leaf.value) + len(leaf.prefix)
2895         if leaf.type == STANDALONE_COMMENT or has_inline_comment:
2896             break
2897
2898         optional_brackets.discard(id(leaf))
2899         if opening_bracket:
2900             if leaf is opening_bracket:
2901                 opening_bracket = None
2902             elif leaf.type in CLOSING_BRACKETS:
2903                 inner_brackets.add(id(leaf))
2904         elif leaf.type in CLOSING_BRACKETS:
2905             if not leaf.value:
2906                 optional_brackets.add(id(opening_bracket))
2907                 continue
2908
2909             if index > 0 and line.leaves[index - 1].type in OPENING_BRACKETS:
2910                 # Empty brackets would fail a split so treat them as "inner"
2911                 # brackets (e.g. only add them to the `omit` set if another
2912                 # pair of brackets was good enough.
2913                 inner_brackets.add(id(leaf))
2914                 continue
2915
2916             opening_bracket = leaf.opening_bracket
2917             if closing_bracket:
2918                 omit.add(id(closing_bracket))
2919                 omit.update(inner_brackets)
2920                 inner_brackets.clear()
2921                 yield omit
2922             closing_bracket = leaf
2923
2924
2925 def get_future_imports(node: Node) -> Set[str]:
2926     """Return a set of __future__ imports in the file."""
2927     imports = set()
2928     for child in node.children:
2929         if child.type != syms.simple_stmt:
2930             break
2931         first_child = child.children[0]
2932         if isinstance(first_child, Leaf):
2933             # Continue looking if we see a docstring; otherwise stop.
2934             if (
2935                 len(child.children) == 2
2936                 and first_child.type == token.STRING
2937                 and child.children[1].type == token.NEWLINE
2938             ):
2939                 continue
2940             else:
2941                 break
2942         elif first_child.type == syms.import_from:
2943             module_name = first_child.children[1]
2944             if not isinstance(module_name, Leaf) or module_name.value != "__future__":
2945                 break
2946             for import_from_child in first_child.children[3:]:
2947                 if isinstance(import_from_child, Leaf):
2948                     if import_from_child.type == token.NAME:
2949                         imports.add(import_from_child.value)
2950                 else:
2951                     assert import_from_child.type == syms.import_as_names
2952                     for leaf in import_from_child.children:
2953                         if isinstance(leaf, Leaf) and leaf.type == token.NAME:
2954                             imports.add(leaf.value)
2955         else:
2956             break
2957     return imports
2958
2959
2960 def gen_python_files_in_dir(
2961     path: Path,
2962     root: Path,
2963     include: Pattern[str],
2964     exclude: Pattern[str],
2965     report: "Report",
2966 ) -> Iterator[Path]:
2967     """Generate all files under `path` whose paths are not excluded by the
2968     `exclude` regex, but are included by the `include` regex.
2969
2970     Symbolic links pointing outside of the root directory are ignored.
2971
2972     `report` is where output about exclusions goes.
2973     """
2974     assert root.is_absolute(), f"INTERNAL ERROR: `root` must be absolute but is {root}"
2975     for child in path.iterdir():
2976         try:
2977             normalized_path = "/" + child.resolve().relative_to(root).as_posix()
2978         except ValueError:
2979             if child.is_symlink():
2980                 report.path_ignored(
2981                     child,
2982                     "is a symbolic link that points outside of the root directory",
2983                 )
2984                 continue
2985
2986             raise
2987
2988         if child.is_dir():
2989             normalized_path += "/"
2990         exclude_match = exclude.search(normalized_path)
2991         if exclude_match and exclude_match.group(0):
2992             report.path_ignored(child, f"matches the --exclude regular expression")
2993             continue
2994
2995         if child.is_dir():
2996             yield from gen_python_files_in_dir(child, root, include, exclude, report)
2997
2998         elif child.is_file():
2999             include_match = include.search(normalized_path)
3000             if include_match:
3001                 yield child
3002
3003
3004 @lru_cache()
3005 def find_project_root(srcs: Iterable[str]) -> Path:
3006     """Return a directory containing .git, .hg, or pyproject.toml.
3007
3008     That directory can be one of the directories passed in `srcs` or their
3009     common parent.
3010
3011     If no directory in the tree contains a marker that would specify it's the
3012     project root, the root of the file system is returned.
3013     """
3014     if not srcs:
3015         return Path("/").resolve()
3016
3017     common_base = min(Path(src).resolve() for src in srcs)
3018     if common_base.is_dir():
3019         # Append a fake file so `parents` below returns `common_base_dir`, too.
3020         common_base /= "fake-file"
3021     for directory in common_base.parents:
3022         if (directory / ".git").is_dir():
3023             return directory
3024
3025         if (directory / ".hg").is_dir():
3026             return directory
3027
3028         if (directory / "pyproject.toml").is_file():
3029             return directory
3030
3031     return directory
3032
3033
3034 @dataclass
3035 class Report:
3036     """Provides a reformatting counter. Can be rendered with `str(report)`."""
3037
3038     check: bool = False
3039     quiet: bool = False
3040     verbose: bool = False
3041     change_count: int = 0
3042     same_count: int = 0
3043     failure_count: int = 0
3044
3045     def done(self, src: Path, changed: Changed) -> None:
3046         """Increment the counter for successful reformatting. Write out a message."""
3047         if changed is Changed.YES:
3048             reformatted = "would reformat" if self.check else "reformatted"
3049             if self.verbose or not self.quiet:
3050                 out(f"{reformatted} {src}")
3051             self.change_count += 1
3052         else:
3053             if self.verbose:
3054                 if changed is Changed.NO:
3055                     msg = f"{src} already well formatted, good job."
3056                 else:
3057                     msg = f"{src} wasn't modified on disk since last run."
3058                 out(msg, bold=False)
3059             self.same_count += 1
3060
3061     def failed(self, src: Path, message: str) -> None:
3062         """Increment the counter for failed reformatting. Write out a message."""
3063         err(f"error: cannot format {src}: {message}")
3064         self.failure_count += 1
3065
3066     def path_ignored(self, path: Path, message: str) -> None:
3067         if self.verbose:
3068             out(f"{path} ignored: {message}", bold=False)
3069
3070     @property
3071     def return_code(self) -> int:
3072         """Return the exit code that the app should use.
3073
3074         This considers the current state of changed files and failures:
3075         - if there were any failures, return 123;
3076         - if any files were changed and --check is being used, return 1;
3077         - otherwise return 0.
3078         """
3079         # According to http://tldp.org/LDP/abs/html/exitcodes.html starting with
3080         # 126 we have special returncodes reserved by the shell.
3081         if self.failure_count:
3082             return 123
3083
3084         elif self.change_count and self.check:
3085             return 1
3086
3087         return 0
3088
3089     def __str__(self) -> str:
3090         """Render a color report of the current state.
3091
3092         Use `click.unstyle` to remove colors.
3093         """
3094         if self.check:
3095             reformatted = "would be reformatted"
3096             unchanged = "would be left unchanged"
3097             failed = "would fail to reformat"
3098         else:
3099             reformatted = "reformatted"
3100             unchanged = "left unchanged"
3101             failed = "failed to reformat"
3102         report = []
3103         if self.change_count:
3104             s = "s" if self.change_count > 1 else ""
3105             report.append(
3106                 click.style(f"{self.change_count} file{s} {reformatted}", bold=True)
3107             )
3108         if self.same_count:
3109             s = "s" if self.same_count > 1 else ""
3110             report.append(f"{self.same_count} file{s} {unchanged}")
3111         if self.failure_count:
3112             s = "s" if self.failure_count > 1 else ""
3113             report.append(
3114                 click.style(f"{self.failure_count} file{s} {failed}", fg="red")
3115             )
3116         return ", ".join(report) + "."
3117
3118
3119 def assert_equivalent(src: str, dst: str) -> None:
3120     """Raise AssertionError if `src` and `dst` aren't equivalent."""
3121
3122     import ast
3123     import traceback
3124
3125     def _v(node: ast.AST, depth: int = 0) -> Iterator[str]:
3126         """Simple visitor generating strings to compare ASTs by content."""
3127         yield f"{'  ' * depth}{node.__class__.__name__}("
3128
3129         for field in sorted(node._fields):
3130             try:
3131                 value = getattr(node, field)
3132             except AttributeError:
3133                 continue
3134
3135             yield f"{'  ' * (depth+1)}{field}="
3136
3137             if isinstance(value, list):
3138                 for item in value:
3139                     if isinstance(item, ast.AST):
3140                         yield from _v(item, depth + 2)
3141
3142             elif isinstance(value, ast.AST):
3143                 yield from _v(value, depth + 2)
3144
3145             else:
3146                 yield f"{'  ' * (depth+2)}{value!r},  # {value.__class__.__name__}"
3147
3148         yield f"{'  ' * depth})  # /{node.__class__.__name__}"
3149
3150     try:
3151         src_ast = ast.parse(src)
3152     except Exception as exc:
3153         major, minor = sys.version_info[:2]
3154         raise AssertionError(
3155             f"cannot use --safe with this file; failed to parse source file "
3156             f"with Python {major}.{minor}'s builtin AST. Re-run with --fast "
3157             f"or stop using deprecated Python 2 syntax. AST error message: {exc}"
3158         )
3159
3160     try:
3161         dst_ast = ast.parse(dst)
3162     except Exception as exc:
3163         log = dump_to_file("".join(traceback.format_tb(exc.__traceback__)), dst)
3164         raise AssertionError(
3165             f"INTERNAL ERROR: Black produced invalid code: {exc}. "
3166             f"Please report a bug on https://github.com/ambv/black/issues.  "
3167             f"This invalid output might be helpful: {log}"
3168         ) from None
3169
3170     src_ast_str = "\n".join(_v(src_ast))
3171     dst_ast_str = "\n".join(_v(dst_ast))
3172     if src_ast_str != dst_ast_str:
3173         log = dump_to_file(diff(src_ast_str, dst_ast_str, "src", "dst"))
3174         raise AssertionError(
3175             f"INTERNAL ERROR: Black produced code that is not equivalent to "
3176             f"the source.  "
3177             f"Please report a bug on https://github.com/ambv/black/issues.  "
3178             f"This diff might be helpful: {log}"
3179         ) from None
3180
3181
3182 def assert_stable(
3183     src: str, dst: str, line_length: int, mode: FileMode = FileMode.AUTO_DETECT
3184 ) -> None:
3185     """Raise AssertionError if `dst` reformats differently the second time."""
3186     newdst = format_str(dst, line_length=line_length, mode=mode)
3187     if dst != newdst:
3188         log = dump_to_file(
3189             diff(src, dst, "source", "first pass"),
3190             diff(dst, newdst, "first pass", "second pass"),
3191         )
3192         raise AssertionError(
3193             f"INTERNAL ERROR: Black produced different code on the second pass "
3194             f"of the formatter.  "
3195             f"Please report a bug on https://github.com/ambv/black/issues.  "
3196             f"This diff might be helpful: {log}"
3197         ) from None
3198
3199
3200 def dump_to_file(*output: str) -> str:
3201     """Dump `output` to a temporary file. Return path to the file."""
3202     import tempfile
3203
3204     with tempfile.NamedTemporaryFile(
3205         mode="w", prefix="blk_", suffix=".log", delete=False, encoding="utf8"
3206     ) as f:
3207         for lines in output:
3208             f.write(lines)
3209             if lines and lines[-1] != "\n":
3210                 f.write("\n")
3211     return f.name
3212
3213
3214 def diff(a: str, b: str, a_name: str, b_name: str) -> str:
3215     """Return a unified diff string between strings `a` and `b`."""
3216     import difflib
3217
3218     a_lines = [line + "\n" for line in a.split("\n")]
3219     b_lines = [line + "\n" for line in b.split("\n")]
3220     return "".join(
3221         difflib.unified_diff(a_lines, b_lines, fromfile=a_name, tofile=b_name, n=5)
3222     )
3223
3224
3225 def cancel(tasks: Iterable[asyncio.Task]) -> None:
3226     """asyncio signal handler that cancels all `tasks` and reports to stderr."""
3227     err("Aborted!")
3228     for task in tasks:
3229         task.cancel()
3230
3231
3232 def shutdown(loop: BaseEventLoop) -> None:
3233     """Cancel all pending tasks on `loop`, wait for them, and close the loop."""
3234     try:
3235         # This part is borrowed from asyncio/runners.py in Python 3.7b2.
3236         to_cancel = [task for task in asyncio.Task.all_tasks(loop) if not task.done()]
3237         if not to_cancel:
3238             return
3239
3240         for task in to_cancel:
3241             task.cancel()
3242         loop.run_until_complete(
3243             asyncio.gather(*to_cancel, loop=loop, return_exceptions=True)
3244         )
3245     finally:
3246         # `concurrent.futures.Future` objects cannot be cancelled once they
3247         # are already running. There might be some when the `shutdown()` happened.
3248         # Silence their logger's spew about the event loop being closed.
3249         cf_logger = logging.getLogger("concurrent.futures")
3250         cf_logger.setLevel(logging.CRITICAL)
3251         loop.close()
3252
3253
3254 def sub_twice(regex: Pattern[str], replacement: str, original: str) -> str:
3255     """Replace `regex` with `replacement` twice on `original`.
3256
3257     This is used by string normalization to perform replaces on
3258     overlapping matches.
3259     """
3260     return regex.sub(replacement, regex.sub(replacement, original))
3261
3262
3263 def re_compile_maybe_verbose(regex: str) -> Pattern[str]:
3264     """Compile a regular expression string in `regex`.
3265
3266     If it contains newlines, use verbose mode.
3267     """
3268     if "\n" in regex:
3269         regex = "(?x)" + regex
3270     return re.compile(regex)
3271
3272
3273 def enumerate_reversed(sequence: Sequence[T]) -> Iterator[Tuple[Index, T]]:
3274     """Like `reversed(enumerate(sequence))` if that were possible."""
3275     index = len(sequence) - 1
3276     for element in reversed(sequence):
3277         yield (index, element)
3278         index -= 1
3279
3280
3281 def enumerate_with_length(
3282     line: Line, reversed: bool = False
3283 ) -> Iterator[Tuple[Index, Leaf, int]]:
3284     """Return an enumeration of leaves with their length.
3285
3286     Stops prematurely on multiline strings and standalone comments.
3287     """
3288     op = cast(
3289         Callable[[Sequence[Leaf]], Iterator[Tuple[Index, Leaf]]],
3290         enumerate_reversed if reversed else enumerate,
3291     )
3292     for index, leaf in op(line.leaves):
3293         length = len(leaf.prefix) + len(leaf.value)
3294         if "\n" in leaf.value:
3295             return  # Multiline strings, we can't continue.
3296
3297         comment: Optional[Leaf]
3298         for comment in line.comments_after(leaf, index):
3299             length += len(comment.value)
3300
3301         yield index, leaf, length
3302
3303
3304 def is_line_short_enough(line: Line, *, line_length: int, line_str: str = "") -> bool:
3305     """Return True if `line` is no longer than `line_length`.
3306
3307     Uses the provided `line_str` rendering, if any, otherwise computes a new one.
3308     """
3309     if not line_str:
3310         line_str = str(line).strip("\n")
3311     return (
3312         len(line_str) <= line_length
3313         and "\n" not in line_str  # multiline strings
3314         and not line.contains_standalone_comments()
3315     )
3316
3317
3318 def can_be_split(line: Line) -> bool:
3319     """Return False if the line cannot be split *for sure*.
3320
3321     This is not an exhaustive search but a cheap heuristic that we can use to
3322     avoid some unfortunate formattings (mostly around wrapping unsplittable code
3323     in unnecessary parentheses).
3324     """
3325     leaves = line.leaves
3326     if len(leaves) < 2:
3327         return False
3328
3329     if leaves[0].type == token.STRING and leaves[1].type == token.DOT:
3330         call_count = 0
3331         dot_count = 0
3332         next = leaves[-1]
3333         for leaf in leaves[-2::-1]:
3334             if leaf.type in OPENING_BRACKETS:
3335                 if next.type not in CLOSING_BRACKETS:
3336                     return False
3337
3338                 call_count += 1
3339             elif leaf.type == token.DOT:
3340                 dot_count += 1
3341             elif leaf.type == token.NAME:
3342                 if not (next.type == token.DOT or next.type in OPENING_BRACKETS):
3343                     return False
3344
3345             elif leaf.type not in CLOSING_BRACKETS:
3346                 return False
3347
3348             if dot_count > 1 and call_count > 1:
3349                 return False
3350
3351     return True
3352
3353
3354 def can_omit_invisible_parens(line: Line, line_length: int) -> bool:
3355     """Does `line` have a shape safe to reformat without optional parens around it?
3356
3357     Returns True for only a subset of potentially nice looking formattings but
3358     the point is to not return false positives that end up producing lines that
3359     are too long.
3360     """
3361     bt = line.bracket_tracker
3362     if not bt.delimiters:
3363         # Without delimiters the optional parentheses are useless.
3364         return True
3365
3366     max_priority = bt.max_delimiter_priority()
3367     if bt.delimiter_count_with_priority(max_priority) > 1:
3368         # With more than one delimiter of a kind the optional parentheses read better.
3369         return False
3370
3371     if max_priority == DOT_PRIORITY:
3372         # A single stranded method call doesn't require optional parentheses.
3373         return True
3374
3375     assert len(line.leaves) >= 2, "Stranded delimiter"
3376
3377     first = line.leaves[0]
3378     second = line.leaves[1]
3379     penultimate = line.leaves[-2]
3380     last = line.leaves[-1]
3381
3382     # With a single delimiter, omit if the expression starts or ends with
3383     # a bracket.
3384     if first.type in OPENING_BRACKETS and second.type not in CLOSING_BRACKETS:
3385         remainder = False
3386         length = 4 * line.depth
3387         for _index, leaf, leaf_length in enumerate_with_length(line):
3388             if leaf.type in CLOSING_BRACKETS and leaf.opening_bracket is first:
3389                 remainder = True
3390             if remainder:
3391                 length += leaf_length
3392                 if length > line_length:
3393                     break
3394
3395                 if leaf.type in OPENING_BRACKETS:
3396                     # There are brackets we can further split on.
3397                     remainder = False
3398
3399         else:
3400             # checked the entire string and line length wasn't exceeded
3401             if len(line.leaves) == _index + 1:
3402                 return True
3403
3404         # Note: we are not returning False here because a line might have *both*
3405         # a leading opening bracket and a trailing closing bracket.  If the
3406         # opening bracket doesn't match our rule, maybe the closing will.
3407
3408     if (
3409         last.type == token.RPAR
3410         or last.type == token.RBRACE
3411         or (
3412             # don't use indexing for omitting optional parentheses;
3413             # it looks weird
3414             last.type == token.RSQB
3415             and last.parent
3416             and last.parent.type != syms.trailer
3417         )
3418     ):
3419         if penultimate.type in OPENING_BRACKETS:
3420             # Empty brackets don't help.
3421             return False
3422
3423         if is_multiline_string(first):
3424             # Additional wrapping of a multiline string in this situation is
3425             # unnecessary.
3426             return True
3427
3428         length = 4 * line.depth
3429         seen_other_brackets = False
3430         for _index, leaf, leaf_length in enumerate_with_length(line):
3431             length += leaf_length
3432             if leaf is last.opening_bracket:
3433                 if seen_other_brackets or length <= line_length:
3434                     return True
3435
3436             elif leaf.type in OPENING_BRACKETS:
3437                 # There are brackets we can further split on.
3438                 seen_other_brackets = True
3439
3440     return False
3441
3442
3443 def get_cache_file(line_length: int, mode: FileMode) -> Path:
3444     return CACHE_DIR / f"cache.{line_length}.{mode.value}.pickle"
3445
3446
3447 def read_cache(line_length: int, mode: FileMode) -> Cache:
3448     """Read the cache if it exists and is well formed.
3449
3450     If it is not well formed, the call to write_cache later should resolve the issue.
3451     """
3452     cache_file = get_cache_file(line_length, mode)
3453     if not cache_file.exists():
3454         return {}
3455
3456     with cache_file.open("rb") as fobj:
3457         try:
3458             cache: Cache = pickle.load(fobj)
3459         except pickle.UnpicklingError:
3460             return {}
3461
3462     return cache
3463
3464
3465 def get_cache_info(path: Path) -> CacheInfo:
3466     """Return the information used to check if a file is already formatted or not."""
3467     stat = path.stat()
3468     return stat.st_mtime, stat.st_size
3469
3470
3471 def filter_cached(cache: Cache, sources: Iterable[Path]) -> Tuple[Set[Path], Set[Path]]:
3472     """Split an iterable of paths in `sources` into two sets.
3473
3474     The first contains paths of files that modified on disk or are not in the
3475     cache. The other contains paths to non-modified files.
3476     """
3477     todo, done = set(), set()
3478     for src in sources:
3479         src = src.resolve()
3480         if cache.get(src) != get_cache_info(src):
3481             todo.add(src)
3482         else:
3483             done.add(src)
3484     return todo, done
3485
3486
3487 def write_cache(
3488     cache: Cache, sources: Iterable[Path], line_length: int, mode: FileMode
3489 ) -> None:
3490     """Update the cache file."""
3491     cache_file = get_cache_file(line_length, mode)
3492     try:
3493         if not CACHE_DIR.exists():
3494             CACHE_DIR.mkdir(parents=True)
3495         new_cache = {**cache, **{src.resolve(): get_cache_info(src) for src in sources}}
3496         with cache_file.open("wb") as fobj:
3497             pickle.dump(new_cache, fobj, protocol=pickle.HIGHEST_PROTOCOL)
3498     except OSError:
3499         pass
3500
3501
3502 if __name__ == "__main__":
3503     main()