black.py

   1 import asyncio
   2 import pickle
   3 from asyncio.base_events import BaseEventLoop
   4 from concurrent.futures import Executor, ProcessPoolExecutor
   5 from enum import Enum
   6 from functools import partial, wraps
   7 import keyword
   8 import logging
   9 from multiprocessing import Manager
  10 import os
  11 from pathlib import Path
  12 import re
  13 import tokenize
  14 import signal
  15 import sys
  16 from typing import (
  17     Any,
  18     Callable,
  19     Collection,
  20     Dict,
  21     Generic,
  22     Iterable,
  23     Iterator,
  24     List,
  25     Optional,
  26     Pattern,
  27     Set,
  28     Tuple,
  29     Type,
  30     TypeVar,
  31     Union,
  32 )
  33
  34 from appdirs import user_cache_dir
  35 from attr import dataclass, Factory
  36 import click
  37
  38 # lib2to3 fork
  39 from blib2to3.pytree import Node, Leaf, type_repr
  40 from blib2to3 import pygram, pytree
  41 from blib2to3.pgen2 import driver, token
  42 from blib2to3.pgen2.parse import ParseError
  43
  44 __version__ = "18.4a6"
  45 DEFAULT_LINE_LENGTH = 88
  46
  47 # types
  48 syms = pygram.python_symbols
  49 FileContent = str
  50 Encoding = str
  51 Depth = int
  52 NodeType = int
  53 LeafID = int
  54 Priority = int
  55 Index = int
  56 LN = Union[Leaf, Node]
  57 SplitFunc = Callable[["Line", bool], Iterator["Line"]]
  58 Timestamp = float
  59 FileSize = int
  60 CacheInfo = Tuple[Timestamp, FileSize]
  61 Cache = Dict[Path, CacheInfo]
  62 out = partial(click.secho, bold=True, err=True)
  63 err = partial(click.secho, fg="red", err=True)
  64
  65
  66 class NothingChanged(UserWarning):
  67     """Raised by :func:`format_file` when reformatted code is the same as source."""
  68
  69
  70 class CannotSplit(Exception):
  71     """A readable split that fits the allotted line length is impossible.
  72
  73     Raised by :func:`left_hand_split`, :func:`right_hand_split`, and
  74     :func:`delimiter_split`.
  75     """
  76
  77
  78 class FormatError(Exception):
  79     """Base exception for `# fmt: on` and `# fmt: off` handling.
  80
  81     It holds the number of bytes of the prefix consumed before the format
  82     control comment appeared.
  83     """
  84
  85     def __init__(self, consumed: int) -> None:
  86         super().__init__(consumed)
  87         self.consumed = consumed
  88
  89     def trim_prefix(self, leaf: Leaf) -> None:
  90         leaf.prefix = leaf.prefix[self.consumed :]
  91
  92     def leaf_from_consumed(self, leaf: Leaf) -> Leaf:
  93         """Returns a new Leaf from the consumed part of the prefix."""
  94         unformatted_prefix = leaf.prefix[: self.consumed]
  95         return Leaf(token.NEWLINE, unformatted_prefix)
  96
  97
  98 class FormatOn(FormatError):
  99     """Found a comment like `# fmt: on` in the file."""
 100
 101
 102 class FormatOff(FormatError):
 103     """Found a comment like `# fmt: off` in the file."""
 104
 105
 106 class WriteBack(Enum):
 107     NO = 0
 108     YES = 1
 109     DIFF = 2
 110
 111
 112 class Changed(Enum):
 113     NO = 0
 114     CACHED = 1
 115     YES = 2
 116
 117
 118 @click.command()
 119 @click.option(
 120     "-l",
 121     "--line-length",
 122     type=int,
 123     default=DEFAULT_LINE_LENGTH,
 124     help="How many character per line to allow.",
 125     show_default=True,
 126 )
 127 @click.option(
 128     "--check",
 129     is_flag=True,
 130     help=(
 131         "Don't write the files back, just return the status.  Return code 0 "
 132         "means nothing would change.  Return code 1 means some files would be "
 133         "reformatted.  Return code 123 means there was an internal error."
 134     ),
 135 )
 136 @click.option(
 137     "--diff",
 138     is_flag=True,
 139     help="Don't write the files back, just output a diff for each file on stdout.",
 140 )
 141 @click.option(
 142     "--fast/--safe",
 143     is_flag=True,
 144     help="If --fast given, skip temporary sanity checks. [default: --safe]",
 145 )
 146 @click.option(
 147     "-q",
 148     "--quiet",
 149     is_flag=True,
 150     help=(
 151         "Don't emit non-error messages to stderr. Errors are still emitted, "
 152         "silence those with 2>/dev/null."
 153     ),
 154 )
 155 @click.version_option(version=__version__)
 156 @click.argument(
 157     "src",
 158     nargs=-1,
 159     type=click.Path(
 160         exists=True, file_okay=True, dir_okay=True, readable=True, allow_dash=True
 161     ),
 162 )
 163 @click.pass_context
 164 def main(
 165     ctx: click.Context,
 166     line_length: int,
 167     check: bool,
 168     diff: bool,
 169     fast: bool,
 170     quiet: bool,
 171     src: List[str],
 172 ) -> None:
 173     """The uncompromising code formatter."""
 174     sources: List[Path] = []
 175     for s in src:
 176         p = Path(s)
 177         if p.is_dir():
 178             sources.extend(gen_python_files_in_dir(p))
 179         elif p.is_file():
 180             # if a file was explicitly given, we don't care about its extension
 181             sources.append(p)
 182         elif s == "-":
 183             sources.append(Path("-"))
 184         else:
 185             err(f"invalid path: {s}")
 186
 187     if check and not diff:
 188         write_back = WriteBack.NO
 189     elif diff:
 190         write_back = WriteBack.DIFF
 191     else:
 192         write_back = WriteBack.YES
 193     report = Report(check=check, quiet=quiet)
 194     if len(sources) == 0:
 195         out("No paths given. Nothing to do 😴")
 196         ctx.exit(0)
 197         return
 198
 199     elif len(sources) == 1:
 200         reformat_one(sources[0], line_length, fast, write_back, report)
 201     else:
 202         loop = asyncio.get_event_loop()
 203         executor = ProcessPoolExecutor(max_workers=os.cpu_count())
 204         try:
 205             loop.run_until_complete(
 206                 schedule_formatting(
 207                     sources, line_length, fast, write_back, report, loop, executor
 208                 )
 209             )
 210         finally:
 211             shutdown(loop)
 212         if not quiet:
 213             out("All done! ✨ 🍰 ✨")
 214             click.echo(str(report))
 215     ctx.exit(report.return_code)
 216
 217
 218 def reformat_one(
 219     src: Path, line_length: int, fast: bool, write_back: WriteBack, report: "Report"
 220 ) -> None:
 221     """Reformat a single file under `src` without spawning child processes.
 222
 223     If `quiet` is True, non-error messages are not output. `line_length`,
 224     `write_back`, and `fast` options are passed to :func:`format_file_in_place`.
 225     """
 226     try:
 227         changed = Changed.NO
 228         if not src.is_file() and str(src) == "-":
 229             if format_stdin_to_stdout(
 230                 line_length=line_length, fast=fast, write_back=write_back
 231             ):
 232                 changed = Changed.YES
 233         else:
 234             cache: Cache = {}
 235             if write_back != WriteBack.DIFF:
 236                 cache = read_cache(line_length)
 237                 src = src.resolve()
 238                 if src in cache and cache[src] == get_cache_info(src):
 239                     changed = Changed.CACHED
 240             if (
 241                 changed is not Changed.CACHED
 242                 and format_file_in_place(
 243                     src, line_length=line_length, fast=fast, write_back=write_back
 244                 )
 245             ):
 246                 changed = Changed.YES
 247             if write_back == WriteBack.YES and changed is not Changed.NO:
 248                 write_cache(cache, [src], line_length)
 249         report.done(src, changed)
 250     except Exception as exc:
 251         report.failed(src, str(exc))
 252
 253
 254 async def schedule_formatting(
 255     sources: List[Path],
 256     line_length: int,
 257     fast: bool,
 258     write_back: WriteBack,
 259     report: "Report",
 260     loop: BaseEventLoop,
 261     executor: Executor,
 262 ) -> None:
 263     """Run formatting of `sources` in parallel using the provided `executor`.
 264
 265     (Use ProcessPoolExecutors for actual parallelism.)
 266
 267     `line_length`, `write_back`, and `fast` options are passed to
 268     :func:`format_file_in_place`.
 269     """
 270     cache: Cache = {}
 271     if write_back != WriteBack.DIFF:
 272         cache = read_cache(line_length)
 273         sources, cached = filter_cached(cache, sources)
 274         for src in cached:
 275             report.done(src, Changed.CACHED)
 276     cancelled = []
 277     formatted = []
 278     if sources:
 279         lock = None
 280         if write_back == WriteBack.DIFF:
 281             # For diff output, we need locks to ensure we don't interleave output
 282             # from different processes.
 283             manager = Manager()
 284             lock = manager.Lock()
 285         tasks = {
 286             src: loop.run_in_executor(
 287                 executor, format_file_in_place, src, line_length, fast, write_back, lock
 288             )
 289             for src in sources
 290         }
 291         _task_values = list(tasks.values())
 292         try:
 293             loop.add_signal_handler(signal.SIGINT, cancel, _task_values)
 294             loop.add_signal_handler(signal.SIGTERM, cancel, _task_values)
 295         except NotImplementedError:
 296             # There are no good alternatives for these on Windows
 297             pass
 298         await asyncio.wait(_task_values)
 299         for src, task in tasks.items():
 300             if not task.done():
 301                 report.failed(src, "timed out, cancelling")
 302                 task.cancel()
 303                 cancelled.append(task)
 304             elif task.cancelled():
 305                 cancelled.append(task)
 306             elif task.exception():
 307                 report.failed(src, str(task.exception()))
 308             else:
 309                 formatted.append(src)
 310                 report.done(src, Changed.YES if task.result() else Changed.NO)
 311
 312     if cancelled:
 313         await asyncio.gather(*cancelled, loop=loop, return_exceptions=True)
 314     if write_back == WriteBack.YES and formatted:
 315         write_cache(cache, formatted, line_length)
 316
 317
 318 def format_file_in_place(
 319     src: Path,
 320     line_length: int,
 321     fast: bool,
 322     write_back: WriteBack = WriteBack.NO,
 323     lock: Any = None,  # multiprocessing.Manager().Lock() is some crazy proxy
 324 ) -> bool:
 325     """Format file under `src` path. Return True if changed.
 326
 327     If `write_back` is True, write reformatted code back to stdout.
 328     `line_length` and `fast` options are passed to :func:`format_file_contents`.
 329     """
 330
 331     with tokenize.open(src) as src_buffer:
 332         src_contents = src_buffer.read()
 333     try:
 334         dst_contents = format_file_contents(
 335             src_contents, line_length=line_length, fast=fast
 336         )
 337     except NothingChanged:
 338         return False
 339
 340     if write_back == write_back.YES:
 341         with open(src, "w", encoding=src_buffer.encoding) as f:
 342             f.write(dst_contents)
 343     elif write_back == write_back.DIFF:
 344         src_name = f"{src}  (original)"
 345         dst_name = f"{src}  (formatted)"
 346         diff_contents = diff(src_contents, dst_contents, src_name, dst_name)
 347         if lock:
 348             lock.acquire()
 349         try:
 350             sys.stdout.write(diff_contents)
 351         finally:
 352             if lock:
 353                 lock.release()
 354     return True
 355
 356
 357 def format_stdin_to_stdout(
 358     line_length: int, fast: bool, write_back: WriteBack = WriteBack.NO
 359 ) -> bool:
 360     """Format file on stdin. Return True if changed.
 361
 362     If `write_back` is True, write reformatted code back to stdout.
 363     `line_length` and `fast` arguments are passed to :func:`format_file_contents`.
 364     """
 365     src = sys.stdin.read()
 366     dst = src
 367     try:
 368         dst = format_file_contents(src, line_length=line_length, fast=fast)
 369         return True
 370
 371     except NothingChanged:
 372         return False
 373
 374     finally:
 375         if write_back == WriteBack.YES:
 376             sys.stdout.write(dst)
 377         elif write_back == WriteBack.DIFF:
 378             src_name = "<stdin>  (original)"
 379             dst_name = "<stdin>  (formatted)"
 380             sys.stdout.write(diff(src, dst, src_name, dst_name))
 381
 382
 383 def format_file_contents(
 384     src_contents: str, line_length: int, fast: bool
 385 ) -> FileContent:
 386     """Reformat contents a file and return new contents.
 387
 388     If `fast` is False, additionally confirm that the reformatted code is
 389     valid by calling :func:`assert_equivalent` and :func:`assert_stable` on it.
 390     `line_length` is passed to :func:`format_str`.
 391     """
 392     if src_contents.strip() == "":
 393         raise NothingChanged
 394
 395     dst_contents = format_str(src_contents, line_length=line_length)
 396     if src_contents == dst_contents:
 397         raise NothingChanged
 398
 399     if not fast:
 400         assert_equivalent(src_contents, dst_contents)
 401         assert_stable(src_contents, dst_contents, line_length=line_length)
 402     return dst_contents
 403
 404
 405 def format_str(src_contents: str, line_length: int) -> FileContent:
 406     """Reformat a string and return new contents.
 407
 408     `line_length` determines how many characters per line are allowed.
 409     """
 410     src_node = lib2to3_parse(src_contents)
 411     dst_contents = ""
 412     lines = LineGenerator()
 413     elt = EmptyLineTracker()
 414     py36 = is_python36(src_node)
 415     empty_line = Line()
 416     after = 0
 417     for current_line in lines.visit(src_node):
 418         for _ in range(after):
 419             dst_contents += str(empty_line)
 420         before, after = elt.maybe_empty_lines(current_line)
 421         for _ in range(before):
 422             dst_contents += str(empty_line)
 423         for line in split_line(current_line, line_length=line_length, py36=py36):
 424             dst_contents += str(line)
 425     return dst_contents
 426
 427
 428 GRAMMARS = [
 429     pygram.python_grammar_no_print_statement_no_exec_statement,
 430     pygram.python_grammar_no_print_statement,
 431     pygram.python_grammar,
 432 ]
 433
 434
 435 def lib2to3_parse(src_txt: str) -> Node:
 436     """Given a string with source, return the lib2to3 Node."""
 437     grammar = pygram.python_grammar_no_print_statement
 438     if src_txt[-1] != "\n":
 439         nl = "\r\n" if "\r\n" in src_txt[:1024] else "\n"
 440         src_txt += nl
 441     for grammar in GRAMMARS:
 442         drv = driver.Driver(grammar, pytree.convert)
 443         try:
 444             result = drv.parse_string(src_txt, True)
 445             break
 446
 447         except ParseError as pe:
 448             lineno, column = pe.context[1]
 449             lines = src_txt.splitlines()
 450             try:
 451                 faulty_line = lines[lineno - 1]
 452             except IndexError:
 453                 faulty_line = "<line number missing in source>"
 454             exc = ValueError(f"Cannot parse: {lineno}:{column}: {faulty_line}")
 455     else:
 456         raise exc from None
 457
 458     if isinstance(result, Leaf):
 459         result = Node(syms.file_input, [result])
 460     return result
 461
 462
 463 def lib2to3_unparse(node: Node) -> str:
 464     """Given a lib2to3 node, return its string representation."""
 465     code = str(node)
 466     return code
 467
 468
 469 T = TypeVar("T")
 470
 471
 472 class Visitor(Generic[T]):
 473     """Basic lib2to3 visitor that yields things of type `T` on `visit()`."""
 474
 475     def visit(self, node: LN) -> Iterator[T]:
 476         """Main method to visit `node` and its children.
 477
 478         It tries to find a `visit_*()` method for the given `node.type`, like
 479         `visit_simple_stmt` for Node objects or `visit_INDENT` for Leaf objects.
 480         If no dedicated `visit_*()` method is found, chooses `visit_default()`
 481         instead.
 482
 483         Then yields objects of type `T` from the selected visitor.
 484         """
 485         if node.type < 256:
 486             name = token.tok_name[node.type]
 487         else:
 488             name = type_repr(node.type)
 489         yield from getattr(self, f"visit_{name}", self.visit_default)(node)
 490
 491     def visit_default(self, node: LN) -> Iterator[T]:
 492         """Default `visit_*()` implementation. Recurses to children of `node`."""
 493         if isinstance(node, Node):
 494             for child in node.children:
 495                 yield from self.visit(child)
 496
 497
 498 @dataclass
 499 class DebugVisitor(Visitor[T]):
 500     tree_depth: int = 0
 501
 502     def visit_default(self, node: LN) -> Iterator[T]:
 503         indent = " " * (2 * self.tree_depth)
 504         if isinstance(node, Node):
 505             _type = type_repr(node.type)
 506             out(f"{indent}{_type}", fg="yellow")
 507             self.tree_depth += 1
 508             for child in node.children:
 509                 yield from self.visit(child)
 510
 511             self.tree_depth -= 1
 512             out(f"{indent}/{_type}", fg="yellow", bold=False)
 513         else:
 514             _type = token.tok_name.get(node.type, str(node.type))
 515             out(f"{indent}{_type}", fg="blue", nl=False)
 516             if node.prefix:
 517                 # We don't have to handle prefixes for `Node` objects since
 518                 # that delegates to the first child anyway.
 519                 out(f" {node.prefix!r}", fg="green", bold=False, nl=False)
 520             out(f" {node.value!r}", fg="blue", bold=False)
 521
 522     @classmethod
 523     def show(cls, code: str) -> None:
 524         """Pretty-print the lib2to3 AST of a given string of `code`.
 525
 526         Convenience method for debugging.
 527         """
 528         v: DebugVisitor[None] = DebugVisitor()
 529         list(v.visit(lib2to3_parse(code)))
 530
 531
 532 KEYWORDS = set(keyword.kwlist)
 533 WHITESPACE = {token.DEDENT, token.INDENT, token.NEWLINE}
 534 FLOW_CONTROL = {"return", "raise", "break", "continue"}
 535 STATEMENT = {
 536     syms.if_stmt,
 537     syms.while_stmt,
 538     syms.for_stmt,
 539     syms.try_stmt,
 540     syms.except_clause,
 541     syms.with_stmt,
 542     syms.funcdef,
 543     syms.classdef,
 544 }
 545 STANDALONE_COMMENT = 153
 546 LOGIC_OPERATORS = {"and", "or"}
 547 COMPARATORS = {
 548     token.LESS,
 549     token.GREATER,
 550     token.EQEQUAL,
 551     token.NOTEQUAL,
 552     token.LESSEQUAL,
 553     token.GREATEREQUAL,
 554 }
 555 MATH_OPERATORS = {
 556     token.VBAR,
 557     token.CIRCUMFLEX,
 558     token.AMPER,
 559     token.LEFTSHIFT,
 560     token.RIGHTSHIFT,
 561     token.PLUS,
 562     token.MINUS,
 563     token.STAR,
 564     token.SLASH,
 565     token.DOUBLESLASH,
 566     token.PERCENT,
 567     token.AT,
 568     token.TILDE,
 569     token.DOUBLESTAR,
 570 }
 571 STARS = {token.STAR, token.DOUBLESTAR}
 572 VARARGS_PARENTS = {
 573     syms.arglist,
 574     syms.argument,  # double star in arglist
 575     syms.trailer,  # single argument to call
 576     syms.typedargslist,
 577     syms.varargslist,  # lambdas
 578 }
 579 UNPACKING_PARENTS = {
 580     syms.atom,  # single element of a list or set literal
 581     syms.dictsetmaker,
 582     syms.listmaker,
 583     syms.testlist_gexp,
 584 }
 585 TEST_DESCENDANTS = {
 586     syms.test,
 587     syms.lambdef,
 588     syms.or_test,
 589     syms.and_test,
 590     syms.not_test,
 591     syms.comparison,
 592     syms.star_expr,
 593     syms.expr,
 594     syms.xor_expr,
 595     syms.and_expr,
 596     syms.shift_expr,
 597     syms.arith_expr,
 598     syms.trailer,
 599     syms.term,
 600     syms.power,
 601 }
 602 COMPREHENSION_PRIORITY = 20
 603 COMMA_PRIORITY = 18
 604 TERNARY_PRIORITY = 16
 605 LOGIC_PRIORITY = 14
 606 STRING_PRIORITY = 12
 607 COMPARATOR_PRIORITY = 10
 608 MATH_PRIORITIES = {
 609     token.VBAR: 8,
 610     token.CIRCUMFLEX: 7,
 611     token.AMPER: 6,
 612     token.LEFTSHIFT: 5,
 613     token.RIGHTSHIFT: 5,
 614     token.PLUS: 4,
 615     token.MINUS: 4,
 616     token.STAR: 3,
 617     token.SLASH: 3,
 618     token.DOUBLESLASH: 3,
 619     token.PERCENT: 3,
 620     token.AT: 3,
 621     token.TILDE: 2,
 622     token.DOUBLESTAR: 1,
 623 }
 624
 625
 626 @dataclass
 627 class BracketTracker:
 628     """Keeps track of brackets on a line."""
 629
 630     depth: int = 0
 631     bracket_match: Dict[Tuple[Depth, NodeType], Leaf] = Factory(dict)
 632     delimiters: Dict[LeafID, Priority] = Factory(dict)
 633     previous: Optional[Leaf] = None
 634     _for_loop_variable: bool = False
 635     _lambda_arguments: bool = False
 636
 637     def mark(self, leaf: Leaf) -> None:
 638         """Mark `leaf` with bracket-related metadata. Keep track of delimiters.
 639
 640         All leaves receive an int `bracket_depth` field that stores how deep
 641         within brackets a given leaf is. 0 means there are no enclosing brackets
 642         that started on this line.
 643
 644         If a leaf is itself a closing bracket, it receives an `opening_bracket`
 645         field that it forms a pair with. This is a one-directional link to
 646         avoid reference cycles.
 647
 648         If a leaf is a delimiter (a token on which Black can split the line if
 649         needed) and it's on depth 0, its `id()` is stored in the tracker's
 650         `delimiters` field.
 651         """
 652         if leaf.type == token.COMMENT:
 653             return
 654
 655         self.maybe_decrement_after_for_loop_variable(leaf)
 656         self.maybe_decrement_after_lambda_arguments(leaf)
 657         if leaf.type in CLOSING_BRACKETS:
 658             self.depth -= 1
 659             opening_bracket = self.bracket_match.pop((self.depth, leaf.type))
 660             leaf.opening_bracket = opening_bracket
 661         leaf.bracket_depth = self.depth
 662         if self.depth == 0:
 663             delim = is_split_before_delimiter(leaf, self.previous)
 664             if delim and self.previous is not None:
 665                 self.delimiters[id(self.previous)] = delim
 666             else:
 667                 delim = is_split_after_delimiter(leaf, self.previous)
 668                 if delim:
 669                     self.delimiters[id(leaf)] = delim
 670         if leaf.type in OPENING_BRACKETS:
 671             self.bracket_match[self.depth, BRACKET[leaf.type]] = leaf
 672             self.depth += 1
 673         self.previous = leaf
 674         self.maybe_increment_lambda_arguments(leaf)
 675         self.maybe_increment_for_loop_variable(leaf)
 676
 677     def any_open_brackets(self) -> bool:
 678         """Return True if there is an yet unmatched open bracket on the line."""
 679         return bool(self.bracket_match)
 680
 681     def max_delimiter_priority(self, exclude: Iterable[LeafID] = ()) -> int:
 682         """Return the highest priority of a delimiter found on the line.
 683
 684         Values are consistent with what `is_split_*_delimiter()` return.
 685         Raises ValueError on no delimiters.
 686         """
 687         return max(v for k, v in self.delimiters.items() if k not in exclude)
 688
 689     def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool:
 690         """In a for loop, or comprehension, the variables are often unpacks.
 691
 692         To avoid splitting on the comma in this situation, increase the depth of
 693         tokens between `for` and `in`.
 694         """
 695         if leaf.type == token.NAME and leaf.value == "for":
 696             self.depth += 1
 697             self._for_loop_variable = True
 698             return True
 699
 700         return False
 701
 702     def maybe_decrement_after_for_loop_variable(self, leaf: Leaf) -> bool:
 703         """See `maybe_increment_for_loop_variable` above for explanation."""
 704         if self._for_loop_variable and leaf.type == token.NAME and leaf.value == "in":
 705             self.depth -= 1
 706             self._for_loop_variable = False
 707             return True
 708
 709         return False
 710
 711     def maybe_increment_lambda_arguments(self, leaf: Leaf) -> bool:
 712         """In a lambda expression, there might be more than one argument.
 713
 714         To avoid splitting on the comma in this situation, increase the depth of
 715         tokens between `lambda` and `:`.
 716         """
 717         if leaf.type == token.NAME and leaf.value == "lambda":
 718             self.depth += 1
 719             self._lambda_arguments = True
 720             return True
 721
 722         return False
 723
 724     def maybe_decrement_after_lambda_arguments(self, leaf: Leaf) -> bool:
 725         """See `maybe_increment_lambda_arguments` above for explanation."""
 726         if self._lambda_arguments and leaf.type == token.COLON:
 727             self.depth -= 1
 728             self._lambda_arguments = False
 729             return True
 730
 731         return False
 732
 733     def get_open_lsqb(self) -> Optional[Leaf]:
 734         """Return the most recent opening square bracket (if any)."""
 735         return self.bracket_match.get((self.depth - 1, token.RSQB))
 736
 737
 738 @dataclass
 739 class Line:
 740     """Holds leaves and comments. Can be printed with `str(line)`."""
 741
 742     depth: int = 0
 743     leaves: List[Leaf] = Factory(list)
 744     comments: List[Tuple[Index, Leaf]] = Factory(list)
 745     bracket_tracker: BracketTracker = Factory(BracketTracker)
 746     inside_brackets: bool = False
 747
 748     def append(self, leaf: Leaf, preformatted: bool = False) -> None:
 749         """Add a new `leaf` to the end of the line.
 750
 751         Unless `preformatted` is True, the `leaf` will receive a new consistent
 752         whitespace prefix and metadata applied by :class:`BracketTracker`.
 753         Trailing commas are maybe removed, unpacked for loop variables are
 754         demoted from being delimiters.
 755
 756         Inline comments are put aside.
 757         """
 758         has_value = leaf.type in BRACKETS or bool(leaf.value.strip())
 759         if not has_value:
 760             return
 761
 762         if token.COLON == leaf.type and self.is_class_paren_empty:
 763             del self.leaves[-2:]
 764         if self.leaves and not preformatted:
 765             # Note: at this point leaf.prefix should be empty except for
 766             # imports, for which we only preserve newlines.
 767             leaf.prefix += whitespace(
 768                 leaf, complex_subscript=self.is_complex_subscript(leaf)
 769             )
 770         if self.inside_brackets or not preformatted:
 771             self.bracket_tracker.mark(leaf)
 772             self.maybe_remove_trailing_comma(leaf)
 773         if not self.append_comment(leaf):
 774             self.leaves.append(leaf)
 775
 776     def append_safe(self, leaf: Leaf, preformatted: bool = False) -> None:
 777         """Like :func:`append()` but disallow invalid standalone comment structure.
 778
 779         Raises ValueError when any `leaf` is appended after a standalone comment
 780         or when a standalone comment is not the first leaf on the line.
 781         """
 782         if self.bracket_tracker.depth == 0:
 783             if self.is_comment:
 784                 raise ValueError("cannot append to standalone comments")
 785
 786             if self.leaves and leaf.type == STANDALONE_COMMENT:
 787                 raise ValueError(
 788                     "cannot append standalone comments to a populated line"
 789                 )
 790
 791         self.append(leaf, preformatted=preformatted)
 792
 793     @property
 794     def is_comment(self) -> bool:
 795         """Is this line a standalone comment?"""
 796         return len(self.leaves) == 1 and self.leaves[0].type == STANDALONE_COMMENT
 797
 798     @property
 799     def is_decorator(self) -> bool:
 800         """Is this line a decorator?"""
 801         return bool(self) and self.leaves[0].type == token.AT
 802
 803     @property
 804     def is_import(self) -> bool:
 805         """Is this an import line?"""
 806         return bool(self) and is_import(self.leaves[0])
 807
 808     @property
 809     def is_class(self) -> bool:
 810         """Is this line a class definition?"""
 811         return (
 812             bool(self)
 813             and self.leaves[0].type == token.NAME
 814             and self.leaves[0].value == "class"
 815         )
 816
 817     @property
 818     def is_def(self) -> bool:
 819         """Is this a function definition? (Also returns True for async defs.)"""
 820         try:
 821             first_leaf = self.leaves[0]
 822         except IndexError:
 823             return False
 824
 825         try:
 826             second_leaf: Optional[Leaf] = self.leaves[1]
 827         except IndexError:
 828             second_leaf = None
 829         return (
 830             (first_leaf.type == token.NAME and first_leaf.value == "def")
 831             or (
 832                 first_leaf.type == token.ASYNC
 833                 and second_leaf is not None
 834                 and second_leaf.type == token.NAME
 835                 and second_leaf.value == "def"
 836             )
 837         )
 838
 839     @property
 840     def is_flow_control(self) -> bool:
 841         """Is this line a flow control statement?
 842
 843         Those are `return`, `raise`, `break`, and `continue`.
 844         """
 845         return (
 846             bool(self)
 847             and self.leaves[0].type == token.NAME
 848             and self.leaves[0].value in FLOW_CONTROL
 849         )
 850
 851     @property
 852     def is_yield(self) -> bool:
 853         """Is this line a yield statement?"""
 854         return (
 855             bool(self)
 856             and self.leaves[0].type == token.NAME
 857             and self.leaves[0].value == "yield"
 858         )
 859
 860     @property
 861     def is_class_paren_empty(self) -> bool:
 862         """Is this a class with no base classes but using parentheses?
 863
 864         Those are unnecessary and should be removed.
 865         """
 866         return (
 867             bool(self)
 868             and len(self.leaves) == 4
 869             and self.is_class
 870             and self.leaves[2].type == token.LPAR
 871             and self.leaves[2].value == "("
 872             and self.leaves[3].type == token.RPAR
 873             and self.leaves[3].value == ")"
 874         )
 875
 876     def contains_standalone_comments(self, depth_limit: int = sys.maxsize) -> bool:
 877         """If so, needs to be split before emitting."""
 878         for leaf in self.leaves:
 879             if leaf.type == STANDALONE_COMMENT:
 880                 if leaf.bracket_depth <= depth_limit:
 881                     return True
 882
 883         return False
 884
 885     def maybe_remove_trailing_comma(self, closing: Leaf) -> bool:
 886         """Remove trailing comma if there is one and it's safe."""
 887         if not (
 888             self.leaves
 889             and self.leaves[-1].type == token.COMMA
 890             and closing.type in CLOSING_BRACKETS
 891         ):
 892             return False
 893
 894         if closing.type == token.RBRACE:
 895             self.remove_trailing_comma()
 896             return True
 897
 898         if closing.type == token.RSQB:
 899             comma = self.leaves[-1]
 900             if comma.parent and comma.parent.type == syms.listmaker:
 901                 self.remove_trailing_comma()
 902                 return True
 903
 904         # For parens let's check if it's safe to remove the comma.
 905         # Imports are always safe.
 906         if self.is_import:
 907             self.remove_trailing_comma()
 908             return True
 909
 910         # Otheriwsse, if the trailing one is the only one, we might mistakenly
 911         # change a tuple into a different type by removing the comma.
 912         depth = closing.bracket_depth + 1
 913         commas = 0
 914         opening = closing.opening_bracket
 915         for _opening_index, leaf in enumerate(self.leaves):
 916             if leaf is opening:
 917                 break
 918
 919         else:
 920             return False
 921
 922         for leaf in self.leaves[_opening_index + 1 :]:
 923             if leaf is closing:
 924                 break
 925
 926             bracket_depth = leaf.bracket_depth
 927             if bracket_depth == depth and leaf.type == token.COMMA:
 928                 commas += 1
 929                 if leaf.parent and leaf.parent.type == syms.arglist:
 930                     commas += 1
 931                     break
 932
 933         if commas > 1:
 934             self.remove_trailing_comma()
 935             return True
 936
 937         return False
 938
 939     def append_comment(self, comment: Leaf) -> bool:
 940         """Add an inline or standalone comment to the line."""
 941         if (
 942             comment.type == STANDALONE_COMMENT
 943             and self.bracket_tracker.any_open_brackets()
 944         ):
 945             comment.prefix = ""
 946             return False
 947
 948         if comment.type != token.COMMENT:
 949             return False
 950
 951         after = len(self.leaves) - 1
 952         if after == -1:
 953             comment.type = STANDALONE_COMMENT
 954             comment.prefix = ""
 955             return False
 956
 957         else:
 958             self.comments.append((after, comment))
 959             return True
 960
 961     def comments_after(self, leaf: Leaf) -> Iterator[Leaf]:
 962         """Generate comments that should appear directly after `leaf`."""
 963         for _leaf_index, _leaf in enumerate(self.leaves):
 964             if leaf is _leaf:
 965                 break
 966
 967         else:
 968             return
 969
 970         for index, comment_after in self.comments:
 971             if _leaf_index == index:
 972                 yield comment_after
 973
 974     def remove_trailing_comma(self) -> None:
 975         """Remove the trailing comma and moves the comments attached to it."""
 976         comma_index = len(self.leaves) - 1
 977         for i in range(len(self.comments)):
 978             comment_index, comment = self.comments[i]
 979             if comment_index == comma_index:
 980                 self.comments[i] = (comma_index - 1, comment)
 981         self.leaves.pop()
 982
 983     def is_complex_subscript(self, leaf: Leaf) -> bool:
 984         """Return True iff `leaf` is part of a slice with non-trivial exprs."""
 985         open_lsqb = (
 986             leaf if leaf.type == token.LSQB else self.bracket_tracker.get_open_lsqb()
 987         )
 988         if open_lsqb is None:
 989             return False
 990
 991         subscript_start = open_lsqb.next_sibling
 992         if (
 993             isinstance(subscript_start, Node)
 994             and subscript_start.type == syms.subscriptlist
 995         ):
 996             subscript_start = child_towards(subscript_start, leaf)
 997         return subscript_start is not None and any(
 998             n.type in TEST_DESCENDANTS for n in subscript_start.pre_order()
 999         )
1000
1001     def __str__(self) -> str:
1002         """Render the line."""
1003         if not self:
1004             return "\n"
1005
1006         indent = "    " * self.depth
1007         leaves = iter(self.leaves)
1008         first = next(leaves)
1009         res = f"{first.prefix}{indent}{first.value}"
1010         for leaf in leaves:
1011             res += str(leaf)
1012         for _, comment in self.comments:
1013             res += str(comment)
1014         return res + "\n"
1015
1016     def __bool__(self) -> bool:
1017         """Return True if the line has leaves or comments."""
1018         return bool(self.leaves or self.comments)
1019
1020
1021 class UnformattedLines(Line):
1022     """Just like :class:`Line` but stores lines which aren't reformatted."""
1023
1024     def append(self, leaf: Leaf, preformatted: bool = True) -> None:
1025         """Just add a new `leaf` to the end of the lines.
1026
1027         The `preformatted` argument is ignored.
1028
1029         Keeps track of indentation `depth`, which is useful when the user
1030         says `# fmt: on`. Otherwise, doesn't do anything with the `leaf`.
1031         """
1032         try:
1033             list(generate_comments(leaf))
1034         except FormatOn as f_on:
1035             self.leaves.append(f_on.leaf_from_consumed(leaf))
1036             raise
1037
1038         self.leaves.append(leaf)
1039         if leaf.type == token.INDENT:
1040             self.depth += 1
1041         elif leaf.type == token.DEDENT:
1042             self.depth -= 1
1043
1044     def __str__(self) -> str:
1045         """Render unformatted lines from leaves which were added with `append()`.
1046
1047         `depth` is not used for indentation in this case.
1048         """
1049         if not self:
1050             return "\n"
1051
1052         res = ""
1053         for leaf in self.leaves:
1054             res += str(leaf)
1055         return res
1056
1057     def append_comment(self, comment: Leaf) -> bool:
1058         """Not implemented in this class. Raises `NotImplementedError`."""
1059         raise NotImplementedError("Unformatted lines don't store comments separately.")
1060
1061     def maybe_remove_trailing_comma(self, closing: Leaf) -> bool:
1062         """Does nothing and returns False."""
1063         return False
1064
1065     def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool:
1066         """Does nothing and returns False."""
1067         return False
1068
1069
1070 @dataclass
1071 class EmptyLineTracker:
1072     """Provides a stateful method that returns the number of potential extra
1073     empty lines needed before and after the currently processed line.
1074
1075     Note: this tracker works on lines that haven't been split yet.  It assumes
1076     the prefix of the first leaf consists of optional newlines.  Those newlines
1077     are consumed by `maybe_empty_lines()` and included in the computation.
1078     """
1079     previous_line: Optional[Line] = None
1080     previous_after: int = 0
1081     previous_defs: List[int] = Factory(list)
1082
1083     def maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
1084         """Return the number of extra empty lines before and after the `current_line`.
1085
1086         This is for separating `def`, `async def` and `class` with extra empty
1087         lines (two on module-level), as well as providing an extra empty line
1088         after flow control keywords to make them more prominent.
1089         """
1090         if isinstance(current_line, UnformattedLines):
1091             return 0, 0
1092
1093         before, after = self._maybe_empty_lines(current_line)
1094         before -= self.previous_after
1095         self.previous_after = after
1096         self.previous_line = current_line
1097         return before, after
1098
1099     def _maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
1100         max_allowed = 1
1101         if current_line.depth == 0:
1102             max_allowed = 2
1103         if current_line.leaves:
1104             # Consume the first leaf's extra newlines.
1105             first_leaf = current_line.leaves[0]
1106             before = first_leaf.prefix.count("\n")
1107             before = min(before, max_allowed)
1108             first_leaf.prefix = ""
1109         else:
1110             before = 0
1111         depth = current_line.depth
1112         while self.previous_defs and self.previous_defs[-1] >= depth:
1113             self.previous_defs.pop()
1114             before = 1 if depth else 2
1115         is_decorator = current_line.is_decorator
1116         if is_decorator or current_line.is_def or current_line.is_class:
1117             if not is_decorator:
1118                 self.previous_defs.append(depth)
1119             if self.previous_line is None:
1120                 # Don't insert empty lines before the first line in the file.
1121                 return 0, 0
1122
1123             if self.previous_line.is_decorator:
1124                 return 0, 0
1125
1126             if (
1127                 self.previous_line.is_comment
1128                 and self.previous_line.depth == current_line.depth
1129                 and before == 0
1130             ):
1131                 return 0, 0
1132
1133             newlines = 2
1134             if current_line.depth:
1135                 newlines -= 1
1136             return newlines, 0
1137
1138         if (
1139             self.previous_line
1140             and self.previous_line.is_import
1141             and not current_line.is_import
1142             and depth == self.previous_line.depth
1143         ):
1144             return (before or 1), 0
1145
1146         return before, 0
1147
1148
1149 @dataclass
1150 class LineGenerator(Visitor[Line]):
1151     """Generates reformatted Line objects.  Empty lines are not emitted.
1152
1153     Note: destroys the tree it's visiting by mutating prefixes of its leaves
1154     in ways that will no longer stringify to valid Python code on the tree.
1155     """
1156     current_line: Line = Factory(Line)
1157
1158     def line(self, indent: int = 0, type: Type[Line] = Line) -> Iterator[Line]:
1159         """Generate a line.
1160
1161         If the line is empty, only emit if it makes sense.
1162         If the line is too long, split it first and then generate.
1163
1164         If any lines were generated, set up a new current_line.
1165         """
1166         if not self.current_line:
1167             if self.current_line.__class__ == type:
1168                 self.current_line.depth += indent
1169             else:
1170                 self.current_line = type(depth=self.current_line.depth + indent)
1171             return  # Line is empty, don't emit. Creating a new one unnecessary.
1172
1173         complete_line = self.current_line
1174         self.current_line = type(depth=complete_line.depth + indent)
1175         yield complete_line
1176
1177     def visit(self, node: LN) -> Iterator[Line]:
1178         """Main method to visit `node` and its children.
1179
1180         Yields :class:`Line` objects.
1181         """
1182         if isinstance(self.current_line, UnformattedLines):
1183             # File contained `# fmt: off`
1184             yield from self.visit_unformatted(node)
1185
1186         else:
1187             yield from super().visit(node)
1188
1189     def visit_default(self, node: LN) -> Iterator[Line]:
1190         """Default `visit_*()` implementation. Recurses to children of `node`."""
1191         if isinstance(node, Leaf):
1192             any_open_brackets = self.current_line.bracket_tracker.any_open_brackets()
1193             try:
1194                 for comment in generate_comments(node):
1195                     if any_open_brackets:
1196                         # any comment within brackets is subject to splitting
1197                         self.current_line.append(comment)
1198                     elif comment.type == token.COMMENT:
1199                         # regular trailing comment
1200                         self.current_line.append(comment)
1201                         yield from self.line()
1202
1203                     else:
1204                         # regular standalone comment
1205                         yield from self.line()
1206
1207                         self.current_line.append(comment)
1208                         yield from self.line()
1209
1210             except FormatOff as f_off:
1211                 f_off.trim_prefix(node)
1212                 yield from self.line(type=UnformattedLines)
1213                 yield from self.visit(node)
1214
1215             except FormatOn as f_on:
1216                 # This only happens here if somebody says "fmt: on" multiple
1217                 # times in a row.
1218                 f_on.trim_prefix(node)
1219                 yield from self.visit_default(node)
1220
1221             else:
1222                 normalize_prefix(node, inside_brackets=any_open_brackets)
1223                 if node.type == token.STRING:
1224                     normalize_string_quotes(node)
1225                 if node.type not in WHITESPACE:
1226                     self.current_line.append(node)
1227         yield from super().visit_default(node)
1228
1229     def visit_INDENT(self, node: Node) -> Iterator[Line]:
1230         """Increase indentation level, maybe yield a line."""
1231         # In blib2to3 INDENT never holds comments.
1232         yield from self.line(+1)
1233         yield from self.visit_default(node)
1234
1235     def visit_DEDENT(self, node: Node) -> Iterator[Line]:
1236         """Decrease indentation level, maybe yield a line."""
1237         # The current line might still wait for trailing comments.  At DEDENT time
1238         # there won't be any (they would be prefixes on the preceding NEWLINE).
1239         # Emit the line then.
1240         yield from self.line()
1241
1242         # While DEDENT has no value, its prefix may contain standalone comments
1243         # that belong to the current indentation level.  Get 'em.
1244         yield from self.visit_default(node)
1245
1246         # Finally, emit the dedent.
1247         yield from self.line(-1)
1248
1249     def visit_stmt(
1250         self, node: Node, keywords: Set[str], parens: Set[str]
1251     ) -> Iterator[Line]:
1252         """Visit a statement.
1253
1254         This implementation is shared for `if`, `while`, `for`, `try`, `except`,
1255         `def`, `with`, `class`, and `assert`.
1256
1257         The relevant Python language `keywords` for a given statement will be
1258         NAME leaves within it. This methods puts those on a separate line.
1259
1260         `parens` holds pairs of nodes where invisible parentheses should be put.
1261         Keys hold nodes after which opening parentheses should be put, values
1262         hold nodes before which closing parentheses should be put.
1263         """
1264         normalize_invisible_parens(node, parens_after=parens)
1265         for child in node.children:
1266             if child.type == token.NAME and child.value in keywords:  # type: ignore
1267                 yield from self.line()
1268
1269             yield from self.visit(child)
1270
1271     def visit_simple_stmt(self, node: Node) -> Iterator[Line]:
1272         """Visit a statement without nested statements."""
1273         is_suite_like = node.parent and node.parent.type in STATEMENT
1274         if is_suite_like:
1275             yield from self.line(+1)
1276             yield from self.visit_default(node)
1277             yield from self.line(-1)
1278
1279         else:
1280             yield from self.line()
1281             yield from self.visit_default(node)
1282
1283     def visit_async_stmt(self, node: Node) -> Iterator[Line]:
1284         """Visit `async def`, `async for`, `async with`."""
1285         yield from self.line()
1286
1287         children = iter(node.children)
1288         for child in children:
1289             yield from self.visit(child)
1290
1291             if child.type == token.ASYNC:
1292                 break
1293
1294         internal_stmt = next(children)
1295         for child in internal_stmt.children:
1296             yield from self.visit(child)
1297
1298     def visit_decorators(self, node: Node) -> Iterator[Line]:
1299         """Visit decorators."""
1300         for child in node.children:
1301             yield from self.line()
1302             yield from self.visit(child)
1303
1304     def visit_import_from(self, node: Node) -> Iterator[Line]:
1305         """Visit import_from and maybe put invisible parentheses.
1306
1307         This is separate from `visit_stmt` because import statements don't
1308         support arbitrary atoms and thus handling of parentheses is custom.
1309         """
1310         check_lpar = False
1311         for index, child in enumerate(node.children):
1312             if check_lpar:
1313                 if child.type == token.LPAR:
1314                     # make parentheses invisible
1315                     child.value = ""  # type: ignore
1316                     node.children[-1].value = ""  # type: ignore
1317                 else:
1318                     # insert invisible parentheses
1319                     node.insert_child(index, Leaf(token.LPAR, ""))
1320                     node.append_child(Leaf(token.RPAR, ""))
1321                 break
1322
1323             check_lpar = (
1324                 child.type == token.NAME and child.value == "import"  # type: ignore
1325             )
1326
1327         for child in node.children:
1328             yield from self.visit(child)
1329
1330     def visit_SEMI(self, leaf: Leaf) -> Iterator[Line]:
1331         """Remove a semicolon and put the other statement on a separate line."""
1332         yield from self.line()
1333
1334     def visit_ENDMARKER(self, leaf: Leaf) -> Iterator[Line]:
1335         """End of file. Process outstanding comments and end with a newline."""
1336         yield from self.visit_default(leaf)
1337         yield from self.line()
1338
1339     def visit_unformatted(self, node: LN) -> Iterator[Line]:
1340         """Used when file contained a `# fmt: off`."""
1341         if isinstance(node, Node):
1342             for child in node.children:
1343                 yield from self.visit(child)
1344
1345         else:
1346             try:
1347                 self.current_line.append(node)
1348             except FormatOn as f_on:
1349                 f_on.trim_prefix(node)
1350                 yield from self.line()
1351                 yield from self.visit(node)
1352
1353             if node.type == token.ENDMARKER:
1354                 # somebody decided not to put a final `# fmt: on`
1355                 yield from self.line()
1356
1357     def __attrs_post_init__(self) -> None:
1358         """You are in a twisty little maze of passages."""
1359         v = self.visit_stmt
1360         Ø: Set[str] = set()
1361         self.visit_assert_stmt = partial(v, keywords={"assert"}, parens={"assert", ","})
1362         self.visit_if_stmt = partial(v, keywords={"if", "else", "elif"}, parens={"if"})
1363         self.visit_while_stmt = partial(v, keywords={"while", "else"}, parens={"while"})
1364         self.visit_for_stmt = partial(v, keywords={"for", "else"}, parens={"for", "in"})
1365         self.visit_try_stmt = partial(
1366             v, keywords={"try", "except", "else", "finally"}, parens=Ø
1367         )
1368         self.visit_except_clause = partial(v, keywords={"except"}, parens=Ø)
1369         self.visit_with_stmt = partial(v, keywords={"with"}, parens=Ø)
1370         self.visit_funcdef = partial(v, keywords={"def"}, parens=Ø)
1371         self.visit_classdef = partial(v, keywords={"class"}, parens=Ø)
1372         self.visit_async_funcdef = self.visit_async_stmt
1373         self.visit_decorated = self.visit_decorators
1374
1375
1376 IMPLICIT_TUPLE = {syms.testlist, syms.testlist_star_expr, syms.exprlist}
1377 BRACKET = {token.LPAR: token.RPAR, token.LSQB: token.RSQB, token.LBRACE: token.RBRACE}
1378 OPENING_BRACKETS = set(BRACKET.keys())
1379 CLOSING_BRACKETS = set(BRACKET.values())
1380 BRACKETS = OPENING_BRACKETS | CLOSING_BRACKETS
1381 ALWAYS_NO_SPACE = CLOSING_BRACKETS | {token.COMMA, STANDALONE_COMMENT}
1382
1383
1384 def whitespace(leaf: Leaf, *, complex_subscript: bool) -> str:  # noqa C901
1385     """Return whitespace prefix if needed for the given `leaf`.
1386
1387     `complex_subscript` signals whether the given leaf is part of a subscription
1388     which has non-trivial arguments, like arithmetic expressions or function calls.
1389     """
1390     NO = ""
1391     SPACE = " "
1392     DOUBLESPACE = "  "
1393     t = leaf.type
1394     p = leaf.parent
1395     v = leaf.value
1396     if t in ALWAYS_NO_SPACE:
1397         return NO
1398
1399     if t == token.COMMENT:
1400         return DOUBLESPACE
1401
1402     assert p is not None, f"INTERNAL ERROR: hand-made leaf without parent: {leaf!r}"
1403     if (
1404         t == token.COLON
1405         and p.type not in {syms.subscript, syms.subscriptlist, syms.sliceop}
1406     ):
1407         return NO
1408
1409     prev = leaf.prev_sibling
1410     if not prev:
1411         prevp = preceding_leaf(p)
1412         if not prevp or prevp.type in OPENING_BRACKETS:
1413             return NO
1414
1415         if t == token.COLON:
1416             if prevp.type == token.COLON:
1417                 return NO
1418
1419             elif prevp.type != token.COMMA and not complex_subscript:
1420                 return NO
1421
1422             return SPACE
1423
1424         if prevp.type == token.EQUAL:
1425             if prevp.parent:
1426                 if prevp.parent.type in {
1427                     syms.arglist, syms.argument, syms.parameters, syms.varargslist
1428                 }:
1429                     return NO
1430
1431                 elif prevp.parent.type == syms.typedargslist:
1432                     # A bit hacky: if the equal sign has whitespace, it means we
1433                     # previously found it's a typed argument.  So, we're using
1434                     # that, too.
1435                     return prevp.prefix
1436
1437         elif prevp.type in STARS:
1438             if is_vararg(prevp, within=VARARGS_PARENTS | UNPACKING_PARENTS):
1439                 return NO
1440
1441         elif prevp.type == token.COLON:
1442             if prevp.parent and prevp.parent.type in {syms.subscript, syms.sliceop}:
1443                 return SPACE if complex_subscript else NO
1444
1445         elif (
1446             prevp.parent
1447             and prevp.parent.type == syms.factor
1448             and prevp.type in MATH_OPERATORS
1449         ):
1450             return NO
1451
1452         elif (
1453             prevp.type == token.RIGHTSHIFT
1454             and prevp.parent
1455             and prevp.parent.type == syms.shift_expr
1456             and prevp.prev_sibling
1457             and prevp.prev_sibling.type == token.NAME
1458             and prevp.prev_sibling.value == "print"  # type: ignore
1459         ):
1460             # Python 2 print chevron
1461             return NO
1462
1463     elif prev.type in OPENING_BRACKETS:
1464         return NO
1465
1466     if p.type in {syms.parameters, syms.arglist}:
1467         # untyped function signatures or calls
1468         if not prev or prev.type != token.COMMA:
1469             return NO
1470
1471     elif p.type == syms.varargslist:
1472         # lambdas
1473         if prev and prev.type != token.COMMA:
1474             return NO
1475
1476     elif p.type == syms.typedargslist:
1477         # typed function signatures
1478         if not prev:
1479             return NO
1480
1481         if t == token.EQUAL:
1482             if prev.type != syms.tname:
1483                 return NO
1484
1485         elif prev.type == token.EQUAL:
1486             # A bit hacky: if the equal sign has whitespace, it means we
1487             # previously found it's a typed argument.  So, we're using that, too.
1488             return prev.prefix
1489
1490         elif prev.type != token.COMMA:
1491             return NO
1492
1493     elif p.type == syms.tname:
1494         # type names
1495         if not prev:
1496             prevp = preceding_leaf(p)
1497             if not prevp or prevp.type != token.COMMA:
1498                 return NO
1499
1500     elif p.type == syms.trailer:
1501         # attributes and calls
1502         if t == token.LPAR or t == token.RPAR:
1503             return NO
1504
1505         if not prev:
1506             if t == token.DOT:
1507                 prevp = preceding_leaf(p)
1508                 if not prevp or prevp.type != token.NUMBER:
1509                     return NO
1510
1511             elif t == token.LSQB:
1512                 return NO
1513
1514         elif prev.type != token.COMMA:
1515             return NO
1516
1517     elif p.type == syms.argument:
1518         # single argument
1519         if t == token.EQUAL:
1520             return NO
1521
1522         if not prev:
1523             prevp = preceding_leaf(p)
1524             if not prevp or prevp.type == token.LPAR:
1525                 return NO
1526
1527         elif prev.type in {token.EQUAL} | STARS:
1528             return NO
1529
1530     elif p.type == syms.decorator:
1531         # decorators
1532         return NO
1533
1534     elif p.type == syms.dotted_name:
1535         if prev:
1536             return NO
1537
1538         prevp = preceding_leaf(p)
1539         if not prevp or prevp.type == token.AT or prevp.type == token.DOT:
1540             return NO
1541
1542     elif p.type == syms.classdef:
1543         if t == token.LPAR:
1544             return NO
1545
1546         if prev and prev.type == token.LPAR:
1547             return NO
1548
1549     elif p.type in {syms.subscript, syms.sliceop}:
1550         # indexing
1551         if not prev:
1552             assert p.parent is not None, "subscripts are always parented"
1553             if p.parent.type == syms.subscriptlist:
1554                 return SPACE
1555
1556             return NO
1557
1558         elif not complex_subscript:
1559             return NO
1560
1561     elif p.type == syms.atom:
1562         if prev and t == token.DOT:
1563             # dots, but not the first one.
1564             return NO
1565
1566     elif p.type == syms.dictsetmaker:
1567         # dict unpacking
1568         if prev and prev.type == token.DOUBLESTAR:
1569             return NO
1570
1571     elif p.type in {syms.factor, syms.star_expr}:
1572         # unary ops
1573         if not prev:
1574             prevp = preceding_leaf(p)
1575             if not prevp or prevp.type in OPENING_BRACKETS:
1576                 return NO
1577
1578             prevp_parent = prevp.parent
1579             assert prevp_parent is not None
1580             if (
1581                 prevp.type == token.COLON
1582                 and prevp_parent.type in {syms.subscript, syms.sliceop}
1583             ):
1584                 return NO
1585
1586             elif prevp.type == token.EQUAL and prevp_parent.type == syms.argument:
1587                 return NO
1588
1589         elif t == token.NAME or t == token.NUMBER:
1590             return NO
1591
1592     elif p.type == syms.import_from:
1593         if t == token.DOT:
1594             if prev and prev.type == token.DOT:
1595                 return NO
1596
1597         elif t == token.NAME:
1598             if v == "import":
1599                 return SPACE
1600
1601             if prev and prev.type == token.DOT:
1602                 return NO
1603
1604     elif p.type == syms.sliceop:
1605         return NO
1606
1607     return SPACE
1608
1609
1610 def preceding_leaf(node: Optional[LN]) -> Optional[Leaf]:
1611     """Return the first leaf that precedes `node`, if any."""
1612     while node:
1613         res = node.prev_sibling
1614         if res:
1615             if isinstance(res, Leaf):
1616                 return res
1617
1618             try:
1619                 return list(res.leaves())[-1]
1620
1621             except IndexError:
1622                 return None
1623
1624         node = node.parent
1625     return None
1626
1627
1628 def child_towards(ancestor: Node, descendant: LN) -> Optional[LN]:
1629     """Return the child of `ancestor` that contains `descendant`."""
1630     node: Optional[LN] = descendant
1631     while node and node.parent != ancestor:
1632         node = node.parent
1633     return node
1634
1635
1636 def is_split_after_delimiter(leaf: Leaf, previous: Leaf = None) -> int:
1637     """Return the priority of the `leaf` delimiter, given a line break after it.
1638
1639     The delimiter priorities returned here are from those delimiters that would
1640     cause a line break after themselves.
1641
1642     Higher numbers are higher priority.
1643     """
1644     if leaf.type == token.COMMA:
1645         return COMMA_PRIORITY
1646
1647     return 0
1648
1649
1650 def is_split_before_delimiter(leaf: Leaf, previous: Leaf = None) -> int:
1651     """Return the priority of the `leaf` delimiter, given a line before after it.
1652
1653     The delimiter priorities returned here are from those delimiters that would
1654     cause a line break before themselves.
1655
1656     Higher numbers are higher priority.
1657     """
1658     if is_vararg(leaf, within=VARARGS_PARENTS | UNPACKING_PARENTS):
1659         # * and ** might also be MATH_OPERATORS but in this case they are not.
1660         # Don't treat them as a delimiter.
1661         return 0
1662
1663     if (
1664         leaf.type in MATH_OPERATORS
1665         and leaf.parent
1666         and leaf.parent.type not in {syms.factor, syms.star_expr}
1667     ):
1668         return MATH_PRIORITIES[leaf.type]
1669
1670     if leaf.type in COMPARATORS:
1671         return COMPARATOR_PRIORITY
1672
1673     if (
1674         leaf.type == token.STRING
1675         and previous is not None
1676         and previous.type == token.STRING
1677     ):
1678         return STRING_PRIORITY
1679
1680     if (
1681         leaf.type == token.NAME
1682         and leaf.value == "for"
1683         and leaf.parent
1684         and leaf.parent.type in {syms.comp_for, syms.old_comp_for}
1685     ):
1686         return COMPREHENSION_PRIORITY
1687
1688     if (
1689         leaf.type == token.NAME
1690         and leaf.value == "if"
1691         and leaf.parent
1692         and leaf.parent.type in {syms.comp_if, syms.old_comp_if}
1693     ):
1694         return COMPREHENSION_PRIORITY
1695
1696     if (
1697         leaf.type == token.NAME
1698         and leaf.value in {"if", "else"}
1699         and leaf.parent
1700         and leaf.parent.type == syms.test
1701     ):
1702         return TERNARY_PRIORITY
1703
1704     if leaf.type == token.NAME and leaf.value in LOGIC_OPERATORS and leaf.parent:
1705         return LOGIC_PRIORITY
1706
1707     return 0
1708
1709
1710 def generate_comments(leaf: Leaf) -> Iterator[Leaf]:
1711     """Clean the prefix of the `leaf` and generate comments from it, if any.
1712
1713     Comments in lib2to3 are shoved into the whitespace prefix.  This happens
1714     in `pgen2/driver.py:Driver.parse_tokens()`.  This was a brilliant implementation
1715     move because it does away with modifying the grammar to include all the
1716     possible places in which comments can be placed.
1717
1718     The sad consequence for us though is that comments don't "belong" anywhere.
1719     This is why this function generates simple parentless Leaf objects for
1720     comments.  We simply don't know what the correct parent should be.
1721
1722     No matter though, we can live without this.  We really only need to
1723     differentiate between inline and standalone comments.  The latter don't
1724     share the line with any code.
1725
1726     Inline comments are emitted as regular token.COMMENT leaves.  Standalone
1727     are emitted with a fake STANDALONE_COMMENT token identifier.
1728     """
1729     p = leaf.prefix
1730     if not p:
1731         return
1732
1733     if "#" not in p:
1734         return
1735
1736     consumed = 0
1737     nlines = 0
1738     for index, line in enumerate(p.split("\n")):
1739         consumed += len(line) + 1  # adding the length of the split '\n'
1740         line = line.lstrip()
1741         if not line:
1742             nlines += 1
1743         if not line.startswith("#"):
1744             continue
1745
1746         if index == 0 and leaf.type != token.ENDMARKER:
1747             comment_type = token.COMMENT  # simple trailing comment
1748         else:
1749             comment_type = STANDALONE_COMMENT
1750         comment = make_comment(line)
1751         yield Leaf(comment_type, comment, prefix="\n" * nlines)
1752
1753         if comment in {"# fmt: on", "# yapf: enable"}:
1754             raise FormatOn(consumed)
1755
1756         if comment in {"# fmt: off", "# yapf: disable"}:
1757             if comment_type == STANDALONE_COMMENT:
1758                 raise FormatOff(consumed)
1759
1760             prev = preceding_leaf(leaf)
1761             if not prev or prev.type in WHITESPACE:  # standalone comment in disguise
1762                 raise FormatOff(consumed)
1763
1764         nlines = 0
1765
1766
1767 def make_comment(content: str) -> str:
1768     """Return a consistently formatted comment from the given `content` string.
1769
1770     All comments (except for "##", "#!", "#:") should have a single space between
1771     the hash sign and the content.
1772
1773     If `content` didn't start with a hash sign, one is provided.
1774     """
1775     content = content.rstrip()
1776     if not content:
1777         return "#"
1778
1779     if content[0] == "#":
1780         content = content[1:]
1781     if content and content[0] not in " !:#":
1782         content = " " + content
1783     return "#" + content
1784
1785
1786 def split_line(
1787     line: Line, line_length: int, inner: bool = False, py36: bool = False
1788 ) -> Iterator[Line]:
1789     """Split a `line` into potentially many lines.
1790
1791     They should fit in the allotted `line_length` but might not be able to.
1792     `inner` signifies that there were a pair of brackets somewhere around the
1793     current `line`, possibly transitively. This means we can fallback to splitting
1794     by delimiters if the LHS/RHS don't yield any results.
1795
1796     If `py36` is True, splitting may generate syntax that is only compatible
1797     with Python 3.6 and later.
1798     """
1799     if isinstance(line, UnformattedLines) or line.is_comment:
1800         yield line
1801         return
1802
1803     line_str = str(line).strip("\n")
1804     if (
1805         len(line_str) <= line_length
1806         and "\n" not in line_str  # multiline strings
1807         and not line.contains_standalone_comments()
1808     ):
1809         yield line
1810         return
1811
1812     split_funcs: List[SplitFunc]
1813     if line.is_def:
1814         split_funcs = [left_hand_split]
1815     elif line.is_import:
1816         split_funcs = [explode_split]
1817     elif line.inside_brackets:
1818         split_funcs = [delimiter_split, standalone_comment_split, right_hand_split]
1819     else:
1820         split_funcs = [right_hand_split]
1821     for split_func in split_funcs:
1822         # We are accumulating lines in `result` because we might want to abort
1823         # mission and return the original line in the end, or attempt a different
1824         # split altogether.
1825         result: List[Line] = []
1826         try:
1827             for l in split_func(line, py36):
1828                 if str(l).strip("\n") == line_str:
1829                     raise CannotSplit("Split function returned an unchanged result")
1830
1831                 result.extend(
1832                     split_line(l, line_length=line_length, inner=True, py36=py36)
1833                 )
1834         except CannotSplit as cs:
1835             continue
1836
1837         else:
1838             yield from result
1839             break
1840
1841     else:
1842         yield line
1843
1844
1845 def left_hand_split(line: Line, py36: bool = False) -> Iterator[Line]:
1846     """Split line into many lines, starting with the first matching bracket pair.
1847
1848     Note: this usually looks weird, only use this for function definitions.
1849     Prefer RHS otherwise.  This is why this function is not symmetrical with
1850     :func:`right_hand_split` which also handles optional parentheses.
1851     """
1852     head = Line(depth=line.depth)
1853     body = Line(depth=line.depth + 1, inside_brackets=True)
1854     tail = Line(depth=line.depth)
1855     tail_leaves: List[Leaf] = []
1856     body_leaves: List[Leaf] = []
1857     head_leaves: List[Leaf] = []
1858     current_leaves = head_leaves
1859     matching_bracket = None
1860     for leaf in line.leaves:
1861         if (
1862             current_leaves is body_leaves
1863             and leaf.type in CLOSING_BRACKETS
1864             and leaf.opening_bracket is matching_bracket
1865         ):
1866             current_leaves = tail_leaves if body_leaves else head_leaves
1867         current_leaves.append(leaf)
1868         if current_leaves is head_leaves:
1869             if leaf.type in OPENING_BRACKETS:
1870                 matching_bracket = leaf
1871                 current_leaves = body_leaves
1872     # Since body is a new indent level, remove spurious leading whitespace.
1873     if body_leaves:
1874         normalize_prefix(body_leaves[0], inside_brackets=True)
1875     # Build the new lines.
1876     for result, leaves in (head, head_leaves), (body, body_leaves), (tail, tail_leaves):
1877         for leaf in leaves:
1878             result.append(leaf, preformatted=True)
1879             for comment_after in line.comments_after(leaf):
1880                 result.append(comment_after, preformatted=True)
1881     bracket_split_succeeded_or_raise(head, body, tail)
1882     for result in (head, body, tail):
1883         if result:
1884             yield result
1885
1886
1887 def right_hand_split(
1888     line: Line, py36: bool = False, omit: Collection[LeafID] = ()
1889 ) -> Iterator[Line]:
1890     """Split line into many lines, starting with the last matching bracket pair.
1891
1892     If the split was by optional parentheses, attempt splitting without them, too.
1893     """
1894     head = Line(depth=line.depth)
1895     body = Line(depth=line.depth + 1, inside_brackets=True)
1896     tail = Line(depth=line.depth)
1897     tail_leaves: List[Leaf] = []
1898     body_leaves: List[Leaf] = []
1899     head_leaves: List[Leaf] = []
1900     current_leaves = tail_leaves
1901     opening_bracket = None
1902     closing_bracket = None
1903     for leaf in reversed(line.leaves):
1904         if current_leaves is body_leaves:
1905             if leaf is opening_bracket:
1906                 current_leaves = head_leaves if body_leaves else tail_leaves
1907         current_leaves.append(leaf)
1908         if current_leaves is tail_leaves:
1909             if leaf.type in CLOSING_BRACKETS and id(leaf) not in omit:
1910                 opening_bracket = leaf.opening_bracket
1911                 closing_bracket = leaf
1912                 current_leaves = body_leaves
1913     tail_leaves.reverse()
1914     body_leaves.reverse()
1915     head_leaves.reverse()
1916     # Since body is a new indent level, remove spurious leading whitespace.
1917     if body_leaves:
1918         normalize_prefix(body_leaves[0], inside_brackets=True)
1919     elif not head_leaves:
1920         # No `head` and no `body` means the split failed. `tail` has all content.
1921         raise CannotSplit("No brackets found")
1922
1923     # Build the new lines.
1924     for result, leaves in (head, head_leaves), (body, body_leaves), (tail, tail_leaves):
1925         for leaf in leaves:
1926             result.append(leaf, preformatted=True)
1927             for comment_after in line.comments_after(leaf):
1928                 result.append(comment_after, preformatted=True)
1929     bracket_split_succeeded_or_raise(head, body, tail)
1930     assert opening_bracket and closing_bracket
1931     if (
1932         # the opening bracket is an optional paren
1933         opening_bracket.type == token.LPAR
1934         and not opening_bracket.value
1935         # the closing bracket is an optional paren
1936         and closing_bracket.type == token.RPAR
1937         and not closing_bracket.value
1938         # there are no delimiters or standalone comments in the body
1939         and not body.bracket_tracker.delimiters
1940         and not line.contains_standalone_comments(0)
1941         # and it's not an import (optional parens are the only thing we can split
1942         # on in this case; attempting a split without them is a waste of time)
1943         and not line.is_import
1944     ):
1945         omit = {id(closing_bracket), *omit}
1946         try:
1947             yield from right_hand_split(line, py36=py36, omit=omit)
1948             return
1949         except CannotSplit:
1950             pass
1951
1952     ensure_visible(opening_bracket)
1953     ensure_visible(closing_bracket)
1954     for result in (head, body, tail):
1955         if result:
1956             yield result
1957
1958
1959 def bracket_split_succeeded_or_raise(head: Line, body: Line, tail: Line) -> None:
1960     """Raise :exc:`CannotSplit` if the last left- or right-hand split failed.
1961
1962     Do nothing otherwise.
1963
1964     A left- or right-hand split is based on a pair of brackets. Content before
1965     (and including) the opening bracket is left on one line, content inside the
1966     brackets is put on a separate line, and finally content starting with and
1967     following the closing bracket is put on a separate line.
1968
1969     Those are called `head`, `body`, and `tail`, respectively. If the split
1970     produced the same line (all content in `head`) or ended up with an empty `body`
1971     and the `tail` is just the closing bracket, then it's considered failed.
1972     """
1973     tail_len = len(str(tail).strip())
1974     if not body:
1975         if tail_len == 0:
1976             raise CannotSplit("Splitting brackets produced the same line")
1977
1978         elif tail_len < 3:
1979             raise CannotSplit(
1980                 f"Splitting brackets on an empty body to save "
1981                 f"{tail_len} characters is not worth it"
1982             )
1983
1984
1985 def dont_increase_indentation(split_func: SplitFunc) -> SplitFunc:
1986     """Normalize prefix of the first leaf in every line returned by `split_func`.
1987
1988     This is a decorator over relevant split functions.
1989     """
1990
1991     @wraps(split_func)
1992     def split_wrapper(line: Line, py36: bool = False) -> Iterator[Line]:
1993         for l in split_func(line, py36):
1994             normalize_prefix(l.leaves[0], inside_brackets=True)
1995             yield l
1996
1997     return split_wrapper
1998
1999
2000 @dont_increase_indentation
2001 def delimiter_split(line: Line, py36: bool = False) -> Iterator[Line]:
2002     """Split according to delimiters of the highest priority.
2003
2004     If `py36` is True, the split will add trailing commas also in function
2005     signatures that contain `*` and `**`.
2006     """
2007     try:
2008         last_leaf = line.leaves[-1]
2009     except IndexError:
2010         raise CannotSplit("Line empty")
2011
2012     delimiters = line.bracket_tracker.delimiters
2013     try:
2014         delimiter_priority = line.bracket_tracker.max_delimiter_priority(
2015             exclude={id(last_leaf)}
2016         )
2017     except ValueError:
2018         raise CannotSplit("No delimiters found")
2019
2020     current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2021     lowest_depth = sys.maxsize
2022     trailing_comma_safe = True
2023
2024     def append_to_line(leaf: Leaf) -> Iterator[Line]:
2025         """Append `leaf` to current line or to new line if appending impossible."""
2026         nonlocal current_line
2027         try:
2028             current_line.append_safe(leaf, preformatted=True)
2029         except ValueError as ve:
2030             yield current_line
2031
2032             current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2033             current_line.append(leaf)
2034
2035     for leaf in line.leaves:
2036         yield from append_to_line(leaf)
2037
2038         for comment_after in line.comments_after(leaf):
2039             yield from append_to_line(comment_after)
2040
2041         lowest_depth = min(lowest_depth, leaf.bracket_depth)
2042         if (
2043             leaf.bracket_depth == lowest_depth
2044             and is_vararg(leaf, within=VARARGS_PARENTS)
2045         ):
2046             trailing_comma_safe = trailing_comma_safe and py36
2047         leaf_priority = delimiters.get(id(leaf))
2048         if leaf_priority == delimiter_priority:
2049             yield current_line
2050
2051             current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2052     if current_line:
2053         if (
2054             trailing_comma_safe
2055             and delimiter_priority == COMMA_PRIORITY
2056             and current_line.leaves[-1].type != token.COMMA
2057             and current_line.leaves[-1].type != STANDALONE_COMMENT
2058         ):
2059             current_line.append(Leaf(token.COMMA, ","))
2060         yield current_line
2061
2062
2063 @dont_increase_indentation
2064 def standalone_comment_split(line: Line, py36: bool = False) -> Iterator[Line]:
2065     """Split standalone comments from the rest of the line."""
2066     if not line.contains_standalone_comments(0):
2067         raise CannotSplit("Line does not have any standalone comments")
2068
2069     current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2070
2071     def append_to_line(leaf: Leaf) -> Iterator[Line]:
2072         """Append `leaf` to current line or to new line if appending impossible."""
2073         nonlocal current_line
2074         try:
2075             current_line.append_safe(leaf, preformatted=True)
2076         except ValueError as ve:
2077             yield current_line
2078
2079             current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2080             current_line.append(leaf)
2081
2082     for leaf in line.leaves:
2083         yield from append_to_line(leaf)
2084
2085         for comment_after in line.comments_after(leaf):
2086             yield from append_to_line(comment_after)
2087
2088     if current_line:
2089         yield current_line
2090
2091
2092 def explode_split(
2093     line: Line, py36: bool = False, omit: Collection[LeafID] = ()
2094 ) -> Iterator[Line]:
2095     """Split by rightmost bracket and immediately split contents by a delimiter."""
2096     new_lines = list(right_hand_split(line, py36, omit))
2097     if len(new_lines) != 3:
2098         yield from new_lines
2099         return
2100
2101     yield new_lines[0]
2102
2103     try:
2104         yield from delimiter_split(new_lines[1], py36)
2105
2106     except CannotSplit:
2107         yield new_lines[1]
2108
2109     yield new_lines[2]
2110
2111
2112 def is_import(leaf: Leaf) -> bool:
2113     """Return True if the given leaf starts an import statement."""
2114     p = leaf.parent
2115     t = leaf.type
2116     v = leaf.value
2117     return bool(
2118         t == token.NAME
2119         and (
2120             (v == "import" and p and p.type == syms.import_name)
2121             or (v == "from" and p and p.type == syms.import_from)
2122         )
2123     )
2124
2125
2126 def normalize_prefix(leaf: Leaf, *, inside_brackets: bool) -> None:
2127     """Leave existing extra newlines if not `inside_brackets`. Remove everything
2128     else.
2129
2130     Note: don't use backslashes for formatting or you'll lose your voting rights.
2131     """
2132     if not inside_brackets:
2133         spl = leaf.prefix.split("#")
2134         if "\\" not in spl[0]:
2135             nl_count = spl[-1].count("\n")
2136             if len(spl) > 1:
2137                 nl_count -= 1
2138             leaf.prefix = "\n" * nl_count
2139             return
2140
2141     leaf.prefix = ""
2142
2143
2144 def normalize_string_quotes(leaf: Leaf) -> None:
2145     """Prefer double quotes but only if it doesn't cause more escaping.
2146
2147     Adds or removes backslashes as appropriate. Doesn't parse and fix
2148     strings nested in f-strings (yet).
2149
2150     Note: Mutates its argument.
2151     """
2152     value = leaf.value.lstrip("furbFURB")
2153     if value[:3] == '"""':
2154         return
2155
2156     elif value[:3] == "'''":
2157         orig_quote = "'''"
2158         new_quote = '"""'
2159     elif value[0] == '"':
2160         orig_quote = '"'
2161         new_quote = "'"
2162     else:
2163         orig_quote = "'"
2164         new_quote = '"'
2165     first_quote_pos = leaf.value.find(orig_quote)
2166     if first_quote_pos == -1:
2167         return  # There's an internal error
2168
2169     prefix = leaf.value[:first_quote_pos]
2170     unescaped_new_quote = re.compile(rf"(([^\\]|^)(\\\\)*){new_quote}")
2171     escaped_new_quote = re.compile(rf"([^\\]|^)\\(\\\\)*{new_quote}")
2172     escaped_orig_quote = re.compile(rf"([^\\]|^)\\(\\\\)*{orig_quote}")
2173     body = leaf.value[first_quote_pos + len(orig_quote) : -len(orig_quote)]
2174     if "r" in prefix.casefold():
2175         if unescaped_new_quote.search(body):
2176             # There's at least one unescaped new_quote in this raw string
2177             # so converting is impossible
2178             return
2179
2180         # Do not introduce or remove backslashes in raw strings
2181         new_body = body
2182     else:
2183         # remove unnecessary quotes
2184         new_body = sub_twice(escaped_new_quote, rf"\1\2{new_quote}", body)
2185         if body != new_body:
2186             # Consider the string without unnecessary quotes as the original
2187             body = new_body
2188             leaf.value = f"{prefix}{orig_quote}{body}{orig_quote}"
2189         new_body = sub_twice(escaped_orig_quote, rf"\1\2{orig_quote}", new_body)
2190         new_body = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_body)
2191     if new_quote == '"""' and new_body[-1] == '"':
2192         # edge case:
2193         new_body = new_body[:-1] + '\\"'
2194     orig_escape_count = body.count("\\")
2195     new_escape_count = new_body.count("\\")
2196     if new_escape_count > orig_escape_count:
2197         return  # Do not introduce more escaping
2198
2199     if new_escape_count == orig_escape_count and orig_quote == '"':
2200         return  # Prefer double quotes
2201
2202     leaf.value = f"{prefix}{new_quote}{new_body}{new_quote}"
2203
2204
2205 def normalize_invisible_parens(node: Node, parens_after: Set[str]) -> None:
2206     """Make existing optional parentheses invisible or create new ones.
2207
2208     Standardizes on visible parentheses for single-element tuples, and keeps
2209     existing visible parentheses for other tuples and generator expressions.
2210     """
2211     check_lpar = False
2212     for child in list(node.children):
2213         if check_lpar:
2214             if child.type == syms.atom:
2215                 maybe_make_parens_invisible_in_atom(child)
2216             elif is_one_tuple(child):
2217                 # wrap child in visible parentheses
2218                 lpar = Leaf(token.LPAR, "(")
2219                 rpar = Leaf(token.RPAR, ")")
2220                 index = child.remove() or 0
2221                 node.insert_child(index, Node(syms.atom, [lpar, child, rpar]))
2222             else:
2223                 # wrap child in invisible parentheses
2224                 lpar = Leaf(token.LPAR, "")
2225                 rpar = Leaf(token.RPAR, "")
2226                 index = child.remove() or 0
2227                 node.insert_child(index, Node(syms.atom, [lpar, child, rpar]))
2228
2229         check_lpar = isinstance(child, Leaf) and child.value in parens_after
2230
2231
2232 def maybe_make_parens_invisible_in_atom(node: LN) -> bool:
2233     """If it's safe, make the parens in the atom `node` invisible, recusively."""
2234     if (
2235         node.type != syms.atom
2236         or is_empty_tuple(node)
2237         or is_one_tuple(node)
2238         or max_delimiter_priority_in_atom(node) >= COMMA_PRIORITY
2239     ):
2240         return False
2241
2242     first = node.children[0]
2243     last = node.children[-1]
2244     if first.type == token.LPAR and last.type == token.RPAR:
2245         # make parentheses invisible
2246         first.value = ""  # type: ignore
2247         last.value = ""  # type: ignore
2248         if len(node.children) > 1:
2249             maybe_make_parens_invisible_in_atom(node.children[1])
2250         return True
2251
2252     return False
2253
2254
2255 def is_empty_tuple(node: LN) -> bool:
2256     """Return True if `node` holds an empty tuple."""
2257     return (
2258         node.type == syms.atom
2259         and len(node.children) == 2
2260         and node.children[0].type == token.LPAR
2261         and node.children[1].type == token.RPAR
2262     )
2263
2264
2265 def is_one_tuple(node: LN) -> bool:
2266     """Return True if `node` holds a tuple with one element, with or without parens."""
2267     if node.type == syms.atom:
2268         if len(node.children) != 3:
2269             return False
2270
2271         lpar, gexp, rpar = node.children
2272         if not (
2273             lpar.type == token.LPAR
2274             and gexp.type == syms.testlist_gexp
2275             and rpar.type == token.RPAR
2276         ):
2277             return False
2278
2279         return len(gexp.children) == 2 and gexp.children[1].type == token.COMMA
2280
2281     return (
2282         node.type in IMPLICIT_TUPLE
2283         and len(node.children) == 2
2284         and node.children[1].type == token.COMMA
2285     )
2286
2287
2288 def is_vararg(leaf: Leaf, within: Set[NodeType]) -> bool:
2289     """Return True if `leaf` is a star or double star in a vararg or kwarg.
2290
2291     If `within` includes VARARGS_PARENTS, this applies to function signatures.
2292     If `within` includes COLLECTION_LIBERALS_PARENTS, it applies to right
2293     hand-side extended iterable unpacking (PEP 3132) and additional unpacking
2294     generalizations (PEP 448).
2295     """
2296     if leaf.type not in STARS or not leaf.parent:
2297         return False
2298
2299     p = leaf.parent
2300     if p.type == syms.star_expr:
2301         # Star expressions are also used as assignment targets in extended
2302         # iterable unpacking (PEP 3132).  See what its parent is instead.
2303         if not p.parent:
2304             return False
2305
2306         p = p.parent
2307
2308     return p.type in within
2309
2310
2311 def max_delimiter_priority_in_atom(node: LN) -> int:
2312     """Return maximum delimiter priority inside `node`.
2313
2314     This is specific to atoms with contents contained in a pair of parentheses.
2315     If `node` isn't an atom or there are no enclosing parentheses, returns 0.
2316     """
2317     if node.type != syms.atom:
2318         return 0
2319
2320     first = node.children[0]
2321     last = node.children[-1]
2322     if not (first.type == token.LPAR and last.type == token.RPAR):
2323         return 0
2324
2325     bt = BracketTracker()
2326     for c in node.children[1:-1]:
2327         if isinstance(c, Leaf):
2328             bt.mark(c)
2329         else:
2330             for leaf in c.leaves():
2331                 bt.mark(leaf)
2332     try:
2333         return bt.max_delimiter_priority()
2334
2335     except ValueError:
2336         return 0
2337
2338
2339 def ensure_visible(leaf: Leaf) -> None:
2340     """Make sure parentheses are visible.
2341
2342     They could be invisible as part of some statements (see
2343     :func:`normalize_invible_parens` and :func:`visit_import_from`).
2344     """
2345     if leaf.type == token.LPAR:
2346         leaf.value = "("
2347     elif leaf.type == token.RPAR:
2348         leaf.value = ")"
2349
2350
2351 def is_python36(node: Node) -> bool:
2352     """Return True if the current file is using Python 3.6+ features.
2353
2354     Currently looking for:
2355     - f-strings; and
2356     - trailing commas after * or ** in function signatures and calls.
2357     """
2358     for n in node.pre_order():
2359         if n.type == token.STRING:
2360             value_head = n.value[:2]  # type: ignore
2361             if value_head in {'f"', 'F"', "f'", "F'", "rf", "fr", "RF", "FR"}:
2362                 return True
2363
2364         elif (
2365             n.type in {syms.typedargslist, syms.arglist}
2366             and n.children
2367             and n.children[-1].type == token.COMMA
2368         ):
2369             for ch in n.children:
2370                 if ch.type in STARS:
2371                     return True
2372
2373                 if ch.type == syms.argument:
2374                     for argch in ch.children:
2375                         if argch.type in STARS:
2376                             return True
2377
2378     return False
2379
2380
2381 PYTHON_EXTENSIONS = {".py"}
2382 BLACKLISTED_DIRECTORIES = {
2383     "build", "buck-out", "dist", "_build", ".git", ".hg", ".mypy_cache", ".tox", ".venv"
2384 }
2385
2386
2387 def gen_python_files_in_dir(path: Path) -> Iterator[Path]:
2388     """Generate all files under `path` which aren't under BLACKLISTED_DIRECTORIES
2389     and have one of the PYTHON_EXTENSIONS.
2390     """
2391     for child in path.iterdir():
2392         if child.is_dir():
2393             if child.name in BLACKLISTED_DIRECTORIES:
2394                 continue
2395
2396             yield from gen_python_files_in_dir(child)
2397
2398         elif child.suffix in PYTHON_EXTENSIONS:
2399             yield child
2400
2401
2402 @dataclass
2403 class Report:
2404     """Provides a reformatting counter. Can be rendered with `str(report)`."""
2405     check: bool = False
2406     quiet: bool = False
2407     change_count: int = 0
2408     same_count: int = 0
2409     failure_count: int = 0
2410
2411     def done(self, src: Path, changed: Changed) -> None:
2412         """Increment the counter for successful reformatting. Write out a message."""
2413         if changed is Changed.YES:
2414             reformatted = "would reformat" if self.check else "reformatted"
2415             if not self.quiet:
2416                 out(f"{reformatted} {src}")
2417             self.change_count += 1
2418         else:
2419             if not self.quiet:
2420                 if changed is Changed.NO:
2421                     msg = f"{src} already well formatted, good job."
2422                 else:
2423                     msg = f"{src} wasn't modified on disk since last run."
2424                 out(msg, bold=False)
2425             self.same_count += 1
2426
2427     def failed(self, src: Path, message: str) -> None:
2428         """Increment the counter for failed reformatting. Write out a message."""
2429         err(f"error: cannot format {src}: {message}")
2430         self.failure_count += 1
2431
2432     @property
2433     def return_code(self) -> int:
2434         """Return the exit code that the app should use.
2435
2436         This considers the current state of changed files and failures:
2437         - if there were any failures, return 123;
2438         - if any files were changed and --check is being used, return 1;
2439         - otherwise return 0.
2440         """
2441         # According to http://tldp.org/LDP/abs/html/exitcodes.html starting with
2442         # 126 we have special returncodes reserved by the shell.
2443         if self.failure_count:
2444             return 123
2445
2446         elif self.change_count and self.check:
2447             return 1
2448
2449         return 0
2450
2451     def __str__(self) -> str:
2452         """Render a color report of the current state.
2453
2454         Use `click.unstyle` to remove colors.
2455         """
2456         if self.check:
2457             reformatted = "would be reformatted"
2458             unchanged = "would be left unchanged"
2459             failed = "would fail to reformat"
2460         else:
2461             reformatted = "reformatted"
2462             unchanged = "left unchanged"
2463             failed = "failed to reformat"
2464         report = []
2465         if self.change_count:
2466             s = "s" if self.change_count > 1 else ""
2467             report.append(
2468                 click.style(f"{self.change_count} file{s} {reformatted}", bold=True)
2469             )
2470         if self.same_count:
2471             s = "s" if self.same_count > 1 else ""
2472             report.append(f"{self.same_count} file{s} {unchanged}")
2473         if self.failure_count:
2474             s = "s" if self.failure_count > 1 else ""
2475             report.append(
2476                 click.style(f"{self.failure_count} file{s} {failed}", fg="red")
2477             )
2478         return ", ".join(report) + "."
2479
2480
2481 def assert_equivalent(src: str, dst: str) -> None:
2482     """Raise AssertionError if `src` and `dst` aren't equivalent."""
2483
2484     import ast
2485     import traceback
2486
2487     def _v(node: ast.AST, depth: int = 0) -> Iterator[str]:
2488         """Simple visitor generating strings to compare ASTs by content."""
2489         yield f"{'  ' * depth}{node.__class__.__name__}("
2490
2491         for field in sorted(node._fields):
2492             try:
2493                 value = getattr(node, field)
2494             except AttributeError:
2495                 continue
2496
2497             yield f"{'  ' * (depth+1)}{field}="
2498
2499             if isinstance(value, list):
2500                 for item in value:
2501                     if isinstance(item, ast.AST):
2502                         yield from _v(item, depth + 2)
2503
2504             elif isinstance(value, ast.AST):
2505                 yield from _v(value, depth + 2)
2506
2507             else:
2508                 yield f"{'  ' * (depth+2)}{value!r},  # {value.__class__.__name__}"
2509
2510         yield f"{'  ' * depth})  # /{node.__class__.__name__}"
2511
2512     try:
2513         src_ast = ast.parse(src)
2514     except Exception as exc:
2515         major, minor = sys.version_info[:2]
2516         raise AssertionError(
2517             f"cannot use --safe with this file; failed to parse source file "
2518             f"with Python {major}.{minor}'s builtin AST. Re-run with --fast "
2519             f"or stop using deprecated Python 2 syntax. AST error message: {exc}"
2520         )
2521
2522     try:
2523         dst_ast = ast.parse(dst)
2524     except Exception as exc:
2525         log = dump_to_file("".join(traceback.format_tb(exc.__traceback__)), dst)
2526         raise AssertionError(
2527             f"INTERNAL ERROR: Black produced invalid code: {exc}. "
2528             f"Please report a bug on https://github.com/ambv/black/issues.  "
2529             f"This invalid output might be helpful: {log}"
2530         ) from None
2531
2532     src_ast_str = "\n".join(_v(src_ast))
2533     dst_ast_str = "\n".join(_v(dst_ast))
2534     if src_ast_str != dst_ast_str:
2535         log = dump_to_file(diff(src_ast_str, dst_ast_str, "src", "dst"))
2536         raise AssertionError(
2537             f"INTERNAL ERROR: Black produced code that is not equivalent to "
2538             f"the source.  "
2539             f"Please report a bug on https://github.com/ambv/black/issues.  "
2540             f"This diff might be helpful: {log}"
2541         ) from None
2542
2543
2544 def assert_stable(src: str, dst: str, line_length: int) -> None:
2545     """Raise AssertionError if `dst` reformats differently the second time."""
2546     newdst = format_str(dst, line_length=line_length)
2547     if dst != newdst:
2548         log = dump_to_file(
2549             diff(src, dst, "source", "first pass"),
2550             diff(dst, newdst, "first pass", "second pass"),
2551         )
2552         raise AssertionError(
2553             f"INTERNAL ERROR: Black produced different code on the second pass "
2554             f"of the formatter.  "
2555             f"Please report a bug on https://github.com/ambv/black/issues.  "
2556             f"This diff might be helpful: {log}"
2557         ) from None
2558
2559
2560 def dump_to_file(*output: str) -> str:
2561     """Dump `output` to a temporary file. Return path to the file."""
2562     import tempfile
2563
2564     with tempfile.NamedTemporaryFile(
2565         mode="w", prefix="blk_", suffix=".log", delete=False, encoding="utf8"
2566     ) as f:
2567         for lines in output:
2568             f.write(lines)
2569             if lines and lines[-1] != "\n":
2570                 f.write("\n")
2571     return f.name
2572
2573
2574 def diff(a: str, b: str, a_name: str, b_name: str) -> str:
2575     """Return a unified diff string between strings `a` and `b`."""
2576     import difflib
2577
2578     a_lines = [line + "\n" for line in a.split("\n")]
2579     b_lines = [line + "\n" for line in b.split("\n")]
2580     return "".join(
2581         difflib.unified_diff(a_lines, b_lines, fromfile=a_name, tofile=b_name, n=5)
2582     )
2583
2584
2585 def cancel(tasks: List[asyncio.Task]) -> None:
2586     """asyncio signal handler that cancels all `tasks` and reports to stderr."""
2587     err("Aborted!")
2588     for task in tasks:
2589         task.cancel()
2590
2591
2592 def shutdown(loop: BaseEventLoop) -> None:
2593     """Cancel all pending tasks on `loop`, wait for them, and close the loop."""
2594     try:
2595         # This part is borrowed from asyncio/runners.py in Python 3.7b2.
2596         to_cancel = [task for task in asyncio.Task.all_tasks(loop) if not task.done()]
2597         if not to_cancel:
2598             return
2599
2600         for task in to_cancel:
2601             task.cancel()
2602         loop.run_until_complete(
2603             asyncio.gather(*to_cancel, loop=loop, return_exceptions=True)
2604         )
2605     finally:
2606         # `concurrent.futures.Future` objects cannot be cancelled once they
2607         # are already running. There might be some when the `shutdown()` happened.
2608         # Silence their logger's spew about the event loop being closed.
2609         cf_logger = logging.getLogger("concurrent.futures")
2610         cf_logger.setLevel(logging.CRITICAL)
2611         loop.close()
2612
2613
2614 def sub_twice(regex: Pattern[str], replacement: str, original: str) -> str:
2615     """Replace `regex` with `replacement` twice on `original`.
2616
2617     This is used by string normalization to perform replaces on
2618     overlapping matches.
2619     """
2620     return regex.sub(replacement, regex.sub(replacement, original))
2621
2622
2623 CACHE_DIR = Path(user_cache_dir("black", version=__version__))
2624
2625
2626 def get_cache_file(line_length: int) -> Path:
2627     return CACHE_DIR / f"cache.{line_length}.pickle"
2628
2629
2630 def read_cache(line_length: int) -> Cache:
2631     """Read the cache if it exists and is well formed.
2632
2633     If it is not well formed, the call to write_cache later should resolve the issue.
2634     """
2635     cache_file = get_cache_file(line_length)
2636     if not cache_file.exists():
2637         return {}
2638
2639     with cache_file.open("rb") as fobj:
2640         try:
2641             cache: Cache = pickle.load(fobj)
2642         except pickle.UnpicklingError:
2643             return {}
2644
2645     return cache
2646
2647
2648 def get_cache_info(path: Path) -> CacheInfo:
2649     """Return the information used to check if a file is already formatted or not."""
2650     stat = path.stat()
2651     return stat.st_mtime, stat.st_size
2652
2653
2654 def filter_cached(
2655     cache: Cache, sources: Iterable[Path]
2656 ) -> Tuple[List[Path], List[Path]]:
2657     """Split a list of paths into two.
2658
2659     The first list contains paths of files that modified on disk or are not in the
2660     cache. The other list contains paths to non-modified files.
2661     """
2662     todo, done = [], []
2663     for src in sources:
2664         src = src.resolve()
2665         if cache.get(src) != get_cache_info(src):
2666             todo.append(src)
2667         else:
2668             done.append(src)
2669     return todo, done
2670
2671
2672 def write_cache(cache: Cache, sources: List[Path], line_length: int) -> None:
2673     """Update the cache file."""
2674     cache_file = get_cache_file(line_length)
2675     try:
2676         if not CACHE_DIR.exists():
2677             CACHE_DIR.mkdir(parents=True)
2678         new_cache = {**cache, **{src.resolve(): get_cache_info(src) for src in sources}}
2679         with cache_file.open("wb") as fobj:
2680             pickle.dump(new_cache, fobj, protocol=pickle.HIGHEST_PROTOCOL)
2681     except OSError:
2682         pass
2683
2684
2685 if __name__ == "__main__":
2686     main()