black.py

   1 #!/usr/bin/env python3
   2
   3 import asyncio
   4 import pickle
   5 from asyncio.base_events import BaseEventLoop
   6 from concurrent.futures import Executor, ProcessPoolExecutor
   7 from enum import Enum
   8 from functools import partial, wraps
   9 import keyword
  10 import logging
  11 from multiprocessing import Manager
  12 import os
  13 from pathlib import Path
  14 import re
  15 import tokenize
  16 import signal
  17 import sys
  18 from typing import (
  19     Any,
  20     Callable,
  21     Collection,
  22     Dict,
  23     Generic,
  24     Iterable,
  25     Iterator,
  26     List,
  27     Optional,
  28     Pattern,
  29     Set,
  30     Tuple,
  31     Type,
  32     TypeVar,
  33     Union,
  34 )
  35
  36 from appdirs import user_cache_dir
  37 from attr import dataclass, Factory
  38 import click
  39
  40 # lib2to3 fork
  41 from blib2to3.pytree import Node, Leaf, type_repr
  42 from blib2to3 import pygram, pytree
  43 from blib2to3.pgen2 import driver, token
  44 from blib2to3.pgen2.parse import ParseError
  45
  46 __version__ = "18.4a4"
  47 DEFAULT_LINE_LENGTH = 88
  48
  49 # types
  50 syms = pygram.python_symbols
  51 FileContent = str
  52 Encoding = str
  53 Depth = int
  54 NodeType = int
  55 LeafID = int
  56 Priority = int
  57 Index = int
  58 LN = Union[Leaf, Node]
  59 SplitFunc = Callable[["Line", bool], Iterator["Line"]]
  60 Timestamp = float
  61 FileSize = int
  62 CacheInfo = Tuple[Timestamp, FileSize]
  63 Cache = Dict[Path, CacheInfo]
  64 out = partial(click.secho, bold=True, err=True)
  65 err = partial(click.secho, fg="red", err=True)
  66
  67
  68 class NothingChanged(UserWarning):
  69     """Raised by :func:`format_file` when reformatted code is the same as source."""
  70
  71
  72 class CannotSplit(Exception):
  73     """A readable split that fits the allotted line length is impossible.
  74
  75     Raised by :func:`left_hand_split`, :func:`right_hand_split`, and
  76     :func:`delimiter_split`.
  77     """
  78
  79
  80 class FormatError(Exception):
  81     """Base exception for `# fmt: on` and `# fmt: off` handling.
  82
  83     It holds the number of bytes of the prefix consumed before the format
  84     control comment appeared.
  85     """
  86
  87     def __init__(self, consumed: int) -> None:
  88         super().__init__(consumed)
  89         self.consumed = consumed
  90
  91     def trim_prefix(self, leaf: Leaf) -> None:
  92         leaf.prefix = leaf.prefix[self.consumed :]
  93
  94     def leaf_from_consumed(self, leaf: Leaf) -> Leaf:
  95         """Returns a new Leaf from the consumed part of the prefix."""
  96         unformatted_prefix = leaf.prefix[: self.consumed]
  97         return Leaf(token.NEWLINE, unformatted_prefix)
  98
  99
 100 class FormatOn(FormatError):
 101     """Found a comment like `# fmt: on` in the file."""
 102
 103
 104 class FormatOff(FormatError):
 105     """Found a comment like `# fmt: off` in the file."""
 106
 107
 108 class WriteBack(Enum):
 109     NO = 0
 110     YES = 1
 111     DIFF = 2
 112
 113
 114 class Changed(Enum):
 115     NO = 0
 116     CACHED = 1
 117     YES = 2
 118
 119
 120 @click.command()
 121 @click.option(
 122     "-l",
 123     "--line-length",
 124     type=int,
 125     default=DEFAULT_LINE_LENGTH,
 126     help="How many character per line to allow.",
 127     show_default=True,
 128 )
 129 @click.option(
 130     "--check",
 131     is_flag=True,
 132     help=(
 133         "Don't write the files back, just return the status.  Return code 0 "
 134         "means nothing would change.  Return code 1 means some files would be "
 135         "reformatted.  Return code 123 means there was an internal error."
 136     ),
 137 )
 138 @click.option(
 139     "--diff",
 140     is_flag=True,
 141     help="Don't write the files back, just output a diff for each file on stdout.",
 142 )
 143 @click.option(
 144     "--fast/--safe",
 145     is_flag=True,
 146     help="If --fast given, skip temporary sanity checks. [default: --safe]",
 147 )
 148 @click.option(
 149     "-q",
 150     "--quiet",
 151     is_flag=True,
 152     help=(
 153         "Don't emit non-error messages to stderr. Errors are still emitted, "
 154         "silence those with 2>/dev/null."
 155     ),
 156 )
 157 @click.version_option(version=__version__)
 158 @click.argument(
 159     "src",
 160     nargs=-1,
 161     type=click.Path(
 162         exists=True, file_okay=True, dir_okay=True, readable=True, allow_dash=True
 163     ),
 164 )
 165 @click.pass_context
 166 def main(
 167     ctx: click.Context,
 168     line_length: int,
 169     check: bool,
 170     diff: bool,
 171     fast: bool,
 172     quiet: bool,
 173     src: List[str],
 174 ) -> None:
 175     """The uncompromising code formatter."""
 176     sources: List[Path] = []
 177     for s in src:
 178         p = Path(s)
 179         if p.is_dir():
 180             sources.extend(gen_python_files_in_dir(p))
 181         elif p.is_file():
 182             # if a file was explicitly given, we don't care about its extension
 183             sources.append(p)
 184         elif s == "-":
 185             sources.append(Path("-"))
 186         else:
 187             err(f"invalid path: {s}")
 188
 189     if check and not diff:
 190         write_back = WriteBack.NO
 191     elif diff:
 192         write_back = WriteBack.DIFF
 193     else:
 194         write_back = WriteBack.YES
 195     report = Report(check=check, quiet=quiet)
 196     if len(sources) == 0:
 197         ctx.exit(0)
 198         return
 199
 200     elif len(sources) == 1:
 201         reformat_one(sources[0], line_length, fast, write_back, report)
 202     else:
 203         loop = asyncio.get_event_loop()
 204         executor = ProcessPoolExecutor(max_workers=os.cpu_count())
 205         try:
 206             loop.run_until_complete(
 207                 schedule_formatting(
 208                     sources, line_length, fast, write_back, report, loop, executor
 209                 )
 210             )
 211         finally:
 212             shutdown(loop)
 213         if not quiet:
 214             out("All done! ✨ 🍰 ✨")
 215             click.echo(str(report))
 216     ctx.exit(report.return_code)
 217
 218
 219 def reformat_one(
 220     src: Path, line_length: int, fast: bool, write_back: WriteBack, report: "Report"
 221 ) -> None:
 222     """Reformat a single file under `src` without spawning child processes.
 223
 224     If `quiet` is True, non-error messages are not output. `line_length`,
 225     `write_back`, and `fast` options are passed to :func:`format_file_in_place`.
 226     """
 227     try:
 228         changed = Changed.NO
 229         if not src.is_file() and str(src) == "-":
 230             if format_stdin_to_stdout(
 231                 line_length=line_length, fast=fast, write_back=write_back
 232             ):
 233                 changed = Changed.YES
 234         else:
 235             cache: Cache = {}
 236             if write_back != WriteBack.DIFF:
 237                 cache = read_cache(line_length)
 238                 src = src.resolve()
 239                 if src in cache and cache[src] == get_cache_info(src):
 240                     changed = Changed.CACHED
 241             if (
 242                 changed is not Changed.CACHED
 243                 and format_file_in_place(
 244                     src, line_length=line_length, fast=fast, write_back=write_back
 245                 )
 246             ):
 247                 changed = Changed.YES
 248             if write_back == WriteBack.YES and changed is not Changed.NO:
 249                 write_cache(cache, [src], line_length)
 250         report.done(src, changed)
 251     except Exception as exc:
 252         report.failed(src, str(exc))
 253
 254
 255 async def schedule_formatting(
 256     sources: List[Path],
 257     line_length: int,
 258     fast: bool,
 259     write_back: WriteBack,
 260     report: "Report",
 261     loop: BaseEventLoop,
 262     executor: Executor,
 263 ) -> None:
 264     """Run formatting of `sources` in parallel using the provided `executor`.
 265
 266     (Use ProcessPoolExecutors for actual parallelism.)
 267
 268     `line_length`, `write_back`, and `fast` options are passed to
 269     :func:`format_file_in_place`.
 270     """
 271     cache: Cache = {}
 272     if write_back != WriteBack.DIFF:
 273         cache = read_cache(line_length)
 274         sources, cached = filter_cached(cache, sources)
 275         for src in cached:
 276             report.done(src, Changed.CACHED)
 277     cancelled = []
 278     formatted = []
 279     if sources:
 280         lock = None
 281         if write_back == WriteBack.DIFF:
 282             # For diff output, we need locks to ensure we don't interleave output
 283             # from different processes.
 284             manager = Manager()
 285             lock = manager.Lock()
 286         tasks = {
 287             src: loop.run_in_executor(
 288                 executor, format_file_in_place, src, line_length, fast, write_back, lock
 289             )
 290             for src in sources
 291         }
 292         _task_values = list(tasks.values())
 293         try:
 294             loop.add_signal_handler(signal.SIGINT, cancel, _task_values)
 295             loop.add_signal_handler(signal.SIGTERM, cancel, _task_values)
 296         except NotImplementedError:
 297             # There are no good alternatives for these on Windows
 298             pass
 299         await asyncio.wait(_task_values)
 300         for src, task in tasks.items():
 301             if not task.done():
 302                 report.failed(src, "timed out, cancelling")
 303                 task.cancel()
 304                 cancelled.append(task)
 305             elif task.cancelled():
 306                 cancelled.append(task)
 307             elif task.exception():
 308                 report.failed(src, str(task.exception()))
 309             else:
 310                 formatted.append(src)
 311                 report.done(src, Changed.YES if task.result() else Changed.NO)
 312
 313     if cancelled:
 314         await asyncio.gather(*cancelled, loop=loop, return_exceptions=True)
 315     if write_back == WriteBack.YES and formatted:
 316         write_cache(cache, formatted, line_length)
 317
 318
 319 def format_file_in_place(
 320     src: Path,
 321     line_length: int,
 322     fast: bool,
 323     write_back: WriteBack = WriteBack.NO,
 324     lock: Any = None,  # multiprocessing.Manager().Lock() is some crazy proxy
 325 ) -> bool:
 326     """Format file under `src` path. Return True if changed.
 327
 328     If `write_back` is True, write reformatted code back to stdout.
 329     `line_length` and `fast` options are passed to :func:`format_file_contents`.
 330     """
 331
 332     with tokenize.open(src) as src_buffer:
 333         src_contents = src_buffer.read()
 334     try:
 335         dst_contents = format_file_contents(
 336             src_contents, line_length=line_length, fast=fast
 337         )
 338     except NothingChanged:
 339         return False
 340
 341     if write_back == write_back.YES:
 342         with open(src, "w", encoding=src_buffer.encoding) as f:
 343             f.write(dst_contents)
 344     elif write_back == write_back.DIFF:
 345         src_name = f"{src}  (original)"
 346         dst_name = f"{src}  (formatted)"
 347         diff_contents = diff(src_contents, dst_contents, src_name, dst_name)
 348         if lock:
 349             lock.acquire()
 350         try:
 351             sys.stdout.write(diff_contents)
 352         finally:
 353             if lock:
 354                 lock.release()
 355     return True
 356
 357
 358 def format_stdin_to_stdout(
 359     line_length: int, fast: bool, write_back: WriteBack = WriteBack.NO
 360 ) -> bool:
 361     """Format file on stdin. Return True if changed.
 362
 363     If `write_back` is True, write reformatted code back to stdout.
 364     `line_length` and `fast` arguments are passed to :func:`format_file_contents`.
 365     """
 366     src = sys.stdin.read()
 367     dst = src
 368     try:
 369         dst = format_file_contents(src, line_length=line_length, fast=fast)
 370         return True
 371
 372     except NothingChanged:
 373         return False
 374
 375     finally:
 376         if write_back == WriteBack.YES:
 377             sys.stdout.write(dst)
 378         elif write_back == WriteBack.DIFF:
 379             src_name = "<stdin>  (original)"
 380             dst_name = "<stdin>  (formatted)"
 381             sys.stdout.write(diff(src, dst, src_name, dst_name))
 382
 383
 384 def format_file_contents(
 385     src_contents: str, line_length: int, fast: bool
 386 ) -> FileContent:
 387     """Reformat contents a file and return new contents.
 388
 389     If `fast` is False, additionally confirm that the reformatted code is
 390     valid by calling :func:`assert_equivalent` and :func:`assert_stable` on it.
 391     `line_length` is passed to :func:`format_str`.
 392     """
 393     if src_contents.strip() == "":
 394         raise NothingChanged
 395
 396     dst_contents = format_str(src_contents, line_length=line_length)
 397     if src_contents == dst_contents:
 398         raise NothingChanged
 399
 400     if not fast:
 401         assert_equivalent(src_contents, dst_contents)
 402         assert_stable(src_contents, dst_contents, line_length=line_length)
 403     return dst_contents
 404
 405
 406 def format_str(src_contents: str, line_length: int) -> FileContent:
 407     """Reformat a string and return new contents.
 408
 409     `line_length` determines how many characters per line are allowed.
 410     """
 411     src_node = lib2to3_parse(src_contents)
 412     dst_contents = ""
 413     lines = LineGenerator()
 414     elt = EmptyLineTracker()
 415     py36 = is_python36(src_node)
 416     empty_line = Line()
 417     after = 0
 418     for current_line in lines.visit(src_node):
 419         for _ in range(after):
 420             dst_contents += str(empty_line)
 421         before, after = elt.maybe_empty_lines(current_line)
 422         for _ in range(before):
 423             dst_contents += str(empty_line)
 424         for line in split_line(current_line, line_length=line_length, py36=py36):
 425             dst_contents += str(line)
 426     return dst_contents
 427
 428
 429 GRAMMARS = [
 430     pygram.python_grammar_no_print_statement_no_exec_statement,
 431     pygram.python_grammar_no_print_statement,
 432     pygram.python_grammar,
 433 ]
 434
 435
 436 def lib2to3_parse(src_txt: str) -> Node:
 437     """Given a string with source, return the lib2to3 Node."""
 438     grammar = pygram.python_grammar_no_print_statement
 439     if src_txt[-1] != "\n":
 440         nl = "\r\n" if "\r\n" in src_txt[:1024] else "\n"
 441         src_txt += nl
 442     for grammar in GRAMMARS:
 443         drv = driver.Driver(grammar, pytree.convert)
 444         try:
 445             result = drv.parse_string(src_txt, True)
 446             break
 447
 448         except ParseError as pe:
 449             lineno, column = pe.context[1]
 450             lines = src_txt.splitlines()
 451             try:
 452                 faulty_line = lines[lineno - 1]
 453             except IndexError:
 454                 faulty_line = "<line number missing in source>"
 455             exc = ValueError(f"Cannot parse: {lineno}:{column}: {faulty_line}")
 456     else:
 457         raise exc from None
 458
 459     if isinstance(result, Leaf):
 460         result = Node(syms.file_input, [result])
 461     return result
 462
 463
 464 def lib2to3_unparse(node: Node) -> str:
 465     """Given a lib2to3 node, return its string representation."""
 466     code = str(node)
 467     return code
 468
 469
 470 T = TypeVar("T")
 471
 472
 473 class Visitor(Generic[T]):
 474     """Basic lib2to3 visitor that yields things of type `T` on `visit()`."""
 475
 476     def visit(self, node: LN) -> Iterator[T]:
 477         """Main method to visit `node` and its children.
 478
 479         It tries to find a `visit_*()` method for the given `node.type`, like
 480         `visit_simple_stmt` for Node objects or `visit_INDENT` for Leaf objects.
 481         If no dedicated `visit_*()` method is found, chooses `visit_default()`
 482         instead.
 483
 484         Then yields objects of type `T` from the selected visitor.
 485         """
 486         if node.type < 256:
 487             name = token.tok_name[node.type]
 488         else:
 489             name = type_repr(node.type)
 490         yield from getattr(self, f"visit_{name}", self.visit_default)(node)
 491
 492     def visit_default(self, node: LN) -> Iterator[T]:
 493         """Default `visit_*()` implementation. Recurses to children of `node`."""
 494         if isinstance(node, Node):
 495             for child in node.children:
 496                 yield from self.visit(child)
 497
 498
 499 @dataclass
 500 class DebugVisitor(Visitor[T]):
 501     tree_depth: int = 0
 502
 503     def visit_default(self, node: LN) -> Iterator[T]:
 504         indent = " " * (2 * self.tree_depth)
 505         if isinstance(node, Node):
 506             _type = type_repr(node.type)
 507             out(f"{indent}{_type}", fg="yellow")
 508             self.tree_depth += 1
 509             for child in node.children:
 510                 yield from self.visit(child)
 511
 512             self.tree_depth -= 1
 513             out(f"{indent}/{_type}", fg="yellow", bold=False)
 514         else:
 515             _type = token.tok_name.get(node.type, str(node.type))
 516             out(f"{indent}{_type}", fg="blue", nl=False)
 517             if node.prefix:
 518                 # We don't have to handle prefixes for `Node` objects since
 519                 # that delegates to the first child anyway.
 520                 out(f" {node.prefix!r}", fg="green", bold=False, nl=False)
 521             out(f" {node.value!r}", fg="blue", bold=False)
 522
 523     @classmethod
 524     def show(cls, code: str) -> None:
 525         """Pretty-print the lib2to3 AST of a given string of `code`.
 526
 527         Convenience method for debugging.
 528         """
 529         v: DebugVisitor[None] = DebugVisitor()
 530         list(v.visit(lib2to3_parse(code)))
 531
 532
 533 KEYWORDS = set(keyword.kwlist)
 534 WHITESPACE = {token.DEDENT, token.INDENT, token.NEWLINE}
 535 FLOW_CONTROL = {"return", "raise", "break", "continue"}
 536 STATEMENT = {
 537     syms.if_stmt,
 538     syms.while_stmt,
 539     syms.for_stmt,
 540     syms.try_stmt,
 541     syms.except_clause,
 542     syms.with_stmt,
 543     syms.funcdef,
 544     syms.classdef,
 545 }
 546 STANDALONE_COMMENT = 153
 547 LOGIC_OPERATORS = {"and", "or"}
 548 COMPARATORS = {
 549     token.LESS,
 550     token.GREATER,
 551     token.EQEQUAL,
 552     token.NOTEQUAL,
 553     token.LESSEQUAL,
 554     token.GREATEREQUAL,
 555 }
 556 MATH_OPERATORS = {
 557     token.PLUS,
 558     token.MINUS,
 559     token.STAR,
 560     token.SLASH,
 561     token.VBAR,
 562     token.AMPER,
 563     token.PERCENT,
 564     token.CIRCUMFLEX,
 565     token.TILDE,
 566     token.LEFTSHIFT,
 567     token.RIGHTSHIFT,
 568     token.DOUBLESTAR,
 569     token.DOUBLESLASH,
 570 }
 571 STARS = {token.STAR, token.DOUBLESTAR}
 572 VARARGS_PARENTS = {
 573     syms.arglist,
 574     syms.argument,  # double star in arglist
 575     syms.trailer,  # single argument to call
 576     syms.typedargslist,
 577     syms.varargslist,  # lambdas
 578 }
 579 UNPACKING_PARENTS = {
 580     syms.atom,  # single element of a list or set literal
 581     syms.dictsetmaker,
 582     syms.listmaker,
 583     syms.testlist_gexp,
 584 }
 585 TEST_DESCENDANTS = {
 586     syms.test,
 587     syms.lambdef,
 588     syms.or_test,
 589     syms.and_test,
 590     syms.not_test,
 591     syms.comparison,
 592     syms.star_expr,
 593     syms.expr,
 594     syms.xor_expr,
 595     syms.and_expr,
 596     syms.shift_expr,
 597     syms.arith_expr,
 598     syms.trailer,
 599     syms.term,
 600     syms.power,
 601 }
 602 COMPREHENSION_PRIORITY = 20
 603 COMMA_PRIORITY = 10
 604 TERNARY_PRIORITY = 7
 605 LOGIC_PRIORITY = 5
 606 STRING_PRIORITY = 4
 607 COMPARATOR_PRIORITY = 3
 608 MATH_PRIORITY = 1
 609
 610
 611 @dataclass
 612 class BracketTracker:
 613     """Keeps track of brackets on a line."""
 614
 615     depth: int = 0
 616     bracket_match: Dict[Tuple[Depth, NodeType], Leaf] = Factory(dict)
 617     delimiters: Dict[LeafID, Priority] = Factory(dict)
 618     previous: Optional[Leaf] = None
 619     _for_loop_variable: bool = False
 620     _lambda_arguments: bool = False
 621
 622     def mark(self, leaf: Leaf) -> None:
 623         """Mark `leaf` with bracket-related metadata. Keep track of delimiters.
 624
 625         All leaves receive an int `bracket_depth` field that stores how deep
 626         within brackets a given leaf is. 0 means there are no enclosing brackets
 627         that started on this line.
 628
 629         If a leaf is itself a closing bracket, it receives an `opening_bracket`
 630         field that it forms a pair with. This is a one-directional link to
 631         avoid reference cycles.
 632
 633         If a leaf is a delimiter (a token on which Black can split the line if
 634         needed) and it's on depth 0, its `id()` is stored in the tracker's
 635         `delimiters` field.
 636         """
 637         if leaf.type == token.COMMENT:
 638             return
 639
 640         self.maybe_decrement_after_for_loop_variable(leaf)
 641         self.maybe_decrement_after_lambda_arguments(leaf)
 642         if leaf.type in CLOSING_BRACKETS:
 643             self.depth -= 1
 644             opening_bracket = self.bracket_match.pop((self.depth, leaf.type))
 645             leaf.opening_bracket = opening_bracket
 646         leaf.bracket_depth = self.depth
 647         if self.depth == 0:
 648             delim = is_split_before_delimiter(leaf, self.previous)
 649             if delim and self.previous is not None:
 650                 self.delimiters[id(self.previous)] = delim
 651             else:
 652                 delim = is_split_after_delimiter(leaf, self.previous)
 653                 if delim:
 654                     self.delimiters[id(leaf)] = delim
 655         if leaf.type in OPENING_BRACKETS:
 656             self.bracket_match[self.depth, BRACKET[leaf.type]] = leaf
 657             self.depth += 1
 658         self.previous = leaf
 659         self.maybe_increment_lambda_arguments(leaf)
 660         self.maybe_increment_for_loop_variable(leaf)
 661
 662     def any_open_brackets(self) -> bool:
 663         """Return True if there is an yet unmatched open bracket on the line."""
 664         return bool(self.bracket_match)
 665
 666     def max_delimiter_priority(self, exclude: Iterable[LeafID] = ()) -> int:
 667         """Return the highest priority of a delimiter found on the line.
 668
 669         Values are consistent with what `is_split_*_delimiter()` return.
 670         Raises ValueError on no delimiters.
 671         """
 672         return max(v for k, v in self.delimiters.items() if k not in exclude)
 673
 674     def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool:
 675         """In a for loop, or comprehension, the variables are often unpacks.
 676
 677         To avoid splitting on the comma in this situation, increase the depth of
 678         tokens between `for` and `in`.
 679         """
 680         if leaf.type == token.NAME and leaf.value == "for":
 681             self.depth += 1
 682             self._for_loop_variable = True
 683             return True
 684
 685         return False
 686
 687     def maybe_decrement_after_for_loop_variable(self, leaf: Leaf) -> bool:
 688         """See `maybe_increment_for_loop_variable` above for explanation."""
 689         if self._for_loop_variable and leaf.type == token.NAME and leaf.value == "in":
 690             self.depth -= 1
 691             self._for_loop_variable = False
 692             return True
 693
 694         return False
 695
 696     def maybe_increment_lambda_arguments(self, leaf: Leaf) -> bool:
 697         """In a lambda expression, there might be more than one argument.
 698
 699         To avoid splitting on the comma in this situation, increase the depth of
 700         tokens between `lambda` and `:`.
 701         """
 702         if leaf.type == token.NAME and leaf.value == "lambda":
 703             self.depth += 1
 704             self._lambda_arguments = True
 705             return True
 706
 707         return False
 708
 709     def maybe_decrement_after_lambda_arguments(self, leaf: Leaf) -> bool:
 710         """See `maybe_increment_lambda_arguments` above for explanation."""
 711         if self._lambda_arguments and leaf.type == token.COLON:
 712             self.depth -= 1
 713             self._lambda_arguments = False
 714             return True
 715
 716         return False
 717
 718     def get_open_lsqb(self) -> Optional[Leaf]:
 719         """Return the most recent opening square bracket (if any)."""
 720         return self.bracket_match.get((self.depth - 1, token.RSQB))
 721
 722
 723 @dataclass
 724 class Line:
 725     """Holds leaves and comments. Can be printed with `str(line)`."""
 726
 727     depth: int = 0
 728     leaves: List[Leaf] = Factory(list)
 729     comments: List[Tuple[Index, Leaf]] = Factory(list)
 730     bracket_tracker: BracketTracker = Factory(BracketTracker)
 731     inside_brackets: bool = False
 732
 733     def append(self, leaf: Leaf, preformatted: bool = False) -> None:
 734         """Add a new `leaf` to the end of the line.
 735
 736         Unless `preformatted` is True, the `leaf` will receive a new consistent
 737         whitespace prefix and metadata applied by :class:`BracketTracker`.
 738         Trailing commas are maybe removed, unpacked for loop variables are
 739         demoted from being delimiters.
 740
 741         Inline comments are put aside.
 742         """
 743         has_value = leaf.type in BRACKETS or bool(leaf.value.strip())
 744         if not has_value:
 745             return
 746
 747         if self.leaves and not preformatted:
 748             # Note: at this point leaf.prefix should be empty except for
 749             # imports, for which we only preserve newlines.
 750             leaf.prefix += whitespace(
 751                 leaf, complex_subscript=self.is_complex_subscript(leaf)
 752             )
 753         if self.inside_brackets or not preformatted:
 754             self.bracket_tracker.mark(leaf)
 755             self.maybe_remove_trailing_comma(leaf)
 756
 757         if not self.append_comment(leaf):
 758             self.leaves.append(leaf)
 759
 760     def append_safe(self, leaf: Leaf, preformatted: bool = False) -> None:
 761         """Like :func:`append()` but disallow invalid standalone comment structure.
 762
 763         Raises ValueError when any `leaf` is appended after a standalone comment
 764         or when a standalone comment is not the first leaf on the line.
 765         """
 766         if self.bracket_tracker.depth == 0:
 767             if self.is_comment:
 768                 raise ValueError("cannot append to standalone comments")
 769
 770             if self.leaves and leaf.type == STANDALONE_COMMENT:
 771                 raise ValueError(
 772                     "cannot append standalone comments to a populated line"
 773                 )
 774
 775         self.append(leaf, preformatted=preformatted)
 776
 777     @property
 778     def is_comment(self) -> bool:
 779         """Is this line a standalone comment?"""
 780         return len(self.leaves) == 1 and self.leaves[0].type == STANDALONE_COMMENT
 781
 782     @property
 783     def is_decorator(self) -> bool:
 784         """Is this line a decorator?"""
 785         return bool(self) and self.leaves[0].type == token.AT
 786
 787     @property
 788     def is_import(self) -> bool:
 789         """Is this an import line?"""
 790         return bool(self) and is_import(self.leaves[0])
 791
 792     @property
 793     def is_class(self) -> bool:
 794         """Is this line a class definition?"""
 795         return (
 796             bool(self)
 797             and self.leaves[0].type == token.NAME
 798             and self.leaves[0].value == "class"
 799         )
 800
 801     @property
 802     def is_def(self) -> bool:
 803         """Is this a function definition? (Also returns True for async defs.)"""
 804         try:
 805             first_leaf = self.leaves[0]
 806         except IndexError:
 807             return False
 808
 809         try:
 810             second_leaf: Optional[Leaf] = self.leaves[1]
 811         except IndexError:
 812             second_leaf = None
 813         return (
 814             (first_leaf.type == token.NAME and first_leaf.value == "def")
 815             or (
 816                 first_leaf.type == token.ASYNC
 817                 and second_leaf is not None
 818                 and second_leaf.type == token.NAME
 819                 and second_leaf.value == "def"
 820             )
 821         )
 822
 823     @property
 824     def is_flow_control(self) -> bool:
 825         """Is this line a flow control statement?
 826
 827         Those are `return`, `raise`, `break`, and `continue`.
 828         """
 829         return (
 830             bool(self)
 831             and self.leaves[0].type == token.NAME
 832             and self.leaves[0].value in FLOW_CONTROL
 833         )
 834
 835     @property
 836     def is_yield(self) -> bool:
 837         """Is this line a yield statement?"""
 838         return (
 839             bool(self)
 840             and self.leaves[0].type == token.NAME
 841             and self.leaves[0].value == "yield"
 842         )
 843
 844     def contains_standalone_comments(self, depth_limit: int = sys.maxsize) -> bool:
 845         """If so, needs to be split before emitting."""
 846         for leaf in self.leaves:
 847             if leaf.type == STANDALONE_COMMENT:
 848                 if leaf.bracket_depth <= depth_limit:
 849                     return True
 850
 851         return False
 852
 853     def maybe_remove_trailing_comma(self, closing: Leaf) -> bool:
 854         """Remove trailing comma if there is one and it's safe."""
 855         if not (
 856             self.leaves
 857             and self.leaves[-1].type == token.COMMA
 858             and closing.type in CLOSING_BRACKETS
 859         ):
 860             return False
 861
 862         if closing.type == token.RBRACE:
 863             self.remove_trailing_comma()
 864             return True
 865
 866         if closing.type == token.RSQB:
 867             comma = self.leaves[-1]
 868             if comma.parent and comma.parent.type == syms.listmaker:
 869                 self.remove_trailing_comma()
 870                 return True
 871
 872         # For parens let's check if it's safe to remove the comma.  If the
 873         # trailing one is the only one, we might mistakenly change a tuple
 874         # into a different type by removing the comma.
 875         depth = closing.bracket_depth + 1
 876         commas = 0
 877         opening = closing.opening_bracket
 878         for _opening_index, leaf in enumerate(self.leaves):
 879             if leaf is opening:
 880                 break
 881
 882         else:
 883             return False
 884
 885         for leaf in self.leaves[_opening_index + 1 :]:
 886             if leaf is closing:
 887                 break
 888
 889             bracket_depth = leaf.bracket_depth
 890             if bracket_depth == depth and leaf.type == token.COMMA:
 891                 commas += 1
 892                 if leaf.parent and leaf.parent.type == syms.arglist:
 893                     commas += 1
 894                     break
 895
 896         if commas > 1:
 897             self.remove_trailing_comma()
 898             return True
 899
 900         return False
 901
 902     def append_comment(self, comment: Leaf) -> bool:
 903         """Add an inline or standalone comment to the line."""
 904         if (
 905             comment.type == STANDALONE_COMMENT
 906             and self.bracket_tracker.any_open_brackets()
 907         ):
 908             comment.prefix = ""
 909             return False
 910
 911         if comment.type != token.COMMENT:
 912             return False
 913
 914         after = len(self.leaves) - 1
 915         if after == -1:
 916             comment.type = STANDALONE_COMMENT
 917             comment.prefix = ""
 918             return False
 919
 920         else:
 921             self.comments.append((after, comment))
 922             return True
 923
 924     def comments_after(self, leaf: Leaf) -> Iterator[Leaf]:
 925         """Generate comments that should appear directly after `leaf`."""
 926         for _leaf_index, _leaf in enumerate(self.leaves):
 927             if leaf is _leaf:
 928                 break
 929
 930         else:
 931             return
 932
 933         for index, comment_after in self.comments:
 934             if _leaf_index == index:
 935                 yield comment_after
 936
 937     def remove_trailing_comma(self) -> None:
 938         """Remove the trailing comma and moves the comments attached to it."""
 939         comma_index = len(self.leaves) - 1
 940         for i in range(len(self.comments)):
 941             comment_index, comment = self.comments[i]
 942             if comment_index == comma_index:
 943                 self.comments[i] = (comma_index - 1, comment)
 944         self.leaves.pop()
 945
 946     def is_complex_subscript(self, leaf: Leaf) -> bool:
 947         """Return True iff `leaf` is part of a slice with non-trivial exprs."""
 948         open_lsqb = (
 949             leaf if leaf.type == token.LSQB else self.bracket_tracker.get_open_lsqb()
 950         )
 951         if open_lsqb is None:
 952             return False
 953
 954         subscript_start = open_lsqb.next_sibling
 955         if (
 956             isinstance(subscript_start, Node)
 957             and subscript_start.type == syms.subscriptlist
 958         ):
 959             subscript_start = child_towards(subscript_start, leaf)
 960         return subscript_start is not None and any(
 961             n.type in TEST_DESCENDANTS for n in subscript_start.pre_order()
 962         )
 963
 964     def __str__(self) -> str:
 965         """Render the line."""
 966         if not self:
 967             return "\n"
 968
 969         indent = "    " * self.depth
 970         leaves = iter(self.leaves)
 971         first = next(leaves)
 972         res = f"{first.prefix}{indent}{first.value}"
 973         for leaf in leaves:
 974             res += str(leaf)
 975         for _, comment in self.comments:
 976             res += str(comment)
 977         return res + "\n"
 978
 979     def __bool__(self) -> bool:
 980         """Return True if the line has leaves or comments."""
 981         return bool(self.leaves or self.comments)
 982
 983
 984 class UnformattedLines(Line):
 985     """Just like :class:`Line` but stores lines which aren't reformatted."""
 986
 987     def append(self, leaf: Leaf, preformatted: bool = True) -> None:
 988         """Just add a new `leaf` to the end of the lines.
 989
 990         The `preformatted` argument is ignored.
 991
 992         Keeps track of indentation `depth`, which is useful when the user
 993         says `# fmt: on`. Otherwise, doesn't do anything with the `leaf`.
 994         """
 995         try:
 996             list(generate_comments(leaf))
 997         except FormatOn as f_on:
 998             self.leaves.append(f_on.leaf_from_consumed(leaf))
 999             raise
1000
1001         self.leaves.append(leaf)
1002         if leaf.type == token.INDENT:
1003             self.depth += 1
1004         elif leaf.type == token.DEDENT:
1005             self.depth -= 1
1006
1007     def __str__(self) -> str:
1008         """Render unformatted lines from leaves which were added with `append()`.
1009
1010         `depth` is not used for indentation in this case.
1011         """
1012         if not self:
1013             return "\n"
1014
1015         res = ""
1016         for leaf in self.leaves:
1017             res += str(leaf)
1018         return res
1019
1020     def append_comment(self, comment: Leaf) -> bool:
1021         """Not implemented in this class. Raises `NotImplementedError`."""
1022         raise NotImplementedError("Unformatted lines don't store comments separately.")
1023
1024     def maybe_remove_trailing_comma(self, closing: Leaf) -> bool:
1025         """Does nothing and returns False."""
1026         return False
1027
1028     def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool:
1029         """Does nothing and returns False."""
1030         return False
1031
1032
1033 @dataclass
1034 class EmptyLineTracker:
1035     """Provides a stateful method that returns the number of potential extra
1036     empty lines needed before and after the currently processed line.
1037
1038     Note: this tracker works on lines that haven't been split yet.  It assumes
1039     the prefix of the first leaf consists of optional newlines.  Those newlines
1040     are consumed by `maybe_empty_lines()` and included in the computation.
1041     """
1042     previous_line: Optional[Line] = None
1043     previous_after: int = 0
1044     previous_defs: List[int] = Factory(list)
1045
1046     def maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
1047         """Return the number of extra empty lines before and after the `current_line`.
1048
1049         This is for separating `def`, `async def` and `class` with extra empty
1050         lines (two on module-level), as well as providing an extra empty line
1051         after flow control keywords to make them more prominent.
1052         """
1053         if isinstance(current_line, UnformattedLines):
1054             return 0, 0
1055
1056         before, after = self._maybe_empty_lines(current_line)
1057         before -= self.previous_after
1058         self.previous_after = after
1059         self.previous_line = current_line
1060         return before, after
1061
1062     def _maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
1063         max_allowed = 1
1064         if current_line.depth == 0:
1065             max_allowed = 2
1066         if current_line.leaves:
1067             # Consume the first leaf's extra newlines.
1068             first_leaf = current_line.leaves[0]
1069             before = first_leaf.prefix.count("\n")
1070             before = min(before, max_allowed)
1071             first_leaf.prefix = ""
1072         else:
1073             before = 0
1074         depth = current_line.depth
1075         while self.previous_defs and self.previous_defs[-1] >= depth:
1076             self.previous_defs.pop()
1077             before = 1 if depth else 2
1078         is_decorator = current_line.is_decorator
1079         if is_decorator or current_line.is_def or current_line.is_class:
1080             if not is_decorator:
1081                 self.previous_defs.append(depth)
1082             if self.previous_line is None:
1083                 # Don't insert empty lines before the first line in the file.
1084                 return 0, 0
1085
1086             if self.previous_line.is_decorator:
1087                 return 0, 0
1088
1089             if (
1090                 self.previous_line.is_comment
1091                 and self.previous_line.depth == current_line.depth
1092                 and before == 0
1093             ):
1094                 return 0, 0
1095
1096             newlines = 2
1097             if current_line.depth:
1098                 newlines -= 1
1099             return newlines, 0
1100
1101         if (
1102             self.previous_line
1103             and self.previous_line.is_import
1104             and not current_line.is_import
1105             and depth == self.previous_line.depth
1106         ):
1107             return (before or 1), 0
1108
1109         return before, 0
1110
1111
1112 @dataclass
1113 class LineGenerator(Visitor[Line]):
1114     """Generates reformatted Line objects.  Empty lines are not emitted.
1115
1116     Note: destroys the tree it's visiting by mutating prefixes of its leaves
1117     in ways that will no longer stringify to valid Python code on the tree.
1118     """
1119     current_line: Line = Factory(Line)
1120
1121     def line(self, indent: int = 0, type: Type[Line] = Line) -> Iterator[Line]:
1122         """Generate a line.
1123
1124         If the line is empty, only emit if it makes sense.
1125         If the line is too long, split it first and then generate.
1126
1127         If any lines were generated, set up a new current_line.
1128         """
1129         if not self.current_line:
1130             if self.current_line.__class__ == type:
1131                 self.current_line.depth += indent
1132             else:
1133                 self.current_line = type(depth=self.current_line.depth + indent)
1134             return  # Line is empty, don't emit. Creating a new one unnecessary.
1135
1136         complete_line = self.current_line
1137         self.current_line = type(depth=complete_line.depth + indent)
1138         yield complete_line
1139
1140     def visit(self, node: LN) -> Iterator[Line]:
1141         """Main method to visit `node` and its children.
1142
1143         Yields :class:`Line` objects.
1144         """
1145         if isinstance(self.current_line, UnformattedLines):
1146             # File contained `# fmt: off`
1147             yield from self.visit_unformatted(node)
1148
1149         else:
1150             yield from super().visit(node)
1151
1152     def visit_default(self, node: LN) -> Iterator[Line]:
1153         """Default `visit_*()` implementation. Recurses to children of `node`."""
1154         if isinstance(node, Leaf):
1155             any_open_brackets = self.current_line.bracket_tracker.any_open_brackets()
1156             try:
1157                 for comment in generate_comments(node):
1158                     if any_open_brackets:
1159                         # any comment within brackets is subject to splitting
1160                         self.current_line.append(comment)
1161                     elif comment.type == token.COMMENT:
1162                         # regular trailing comment
1163                         self.current_line.append(comment)
1164                         yield from self.line()
1165
1166                     else:
1167                         # regular standalone comment
1168                         yield from self.line()
1169
1170                         self.current_line.append(comment)
1171                         yield from self.line()
1172
1173             except FormatOff as f_off:
1174                 f_off.trim_prefix(node)
1175                 yield from self.line(type=UnformattedLines)
1176                 yield from self.visit(node)
1177
1178             except FormatOn as f_on:
1179                 # This only happens here if somebody says "fmt: on" multiple
1180                 # times in a row.
1181                 f_on.trim_prefix(node)
1182                 yield from self.visit_default(node)
1183
1184             else:
1185                 normalize_prefix(node, inside_brackets=any_open_brackets)
1186                 if node.type == token.STRING:
1187                     normalize_string_quotes(node)
1188                 if node.type not in WHITESPACE:
1189                     self.current_line.append(node)
1190         yield from super().visit_default(node)
1191
1192     def visit_INDENT(self, node: Node) -> Iterator[Line]:
1193         """Increase indentation level, maybe yield a line."""
1194         # In blib2to3 INDENT never holds comments.
1195         yield from self.line(+1)
1196         yield from self.visit_default(node)
1197
1198     def visit_DEDENT(self, node: Node) -> Iterator[Line]:
1199         """Decrease indentation level, maybe yield a line."""
1200         # The current line might still wait for trailing comments.  At DEDENT time
1201         # there won't be any (they would be prefixes on the preceding NEWLINE).
1202         # Emit the line then.
1203         yield from self.line()
1204
1205         # While DEDENT has no value, its prefix may contain standalone comments
1206         # that belong to the current indentation level.  Get 'em.
1207         yield from self.visit_default(node)
1208
1209         # Finally, emit the dedent.
1210         yield from self.line(-1)
1211
1212     def visit_stmt(
1213         self, node: Node, keywords: Set[str], parens: Set[str]
1214     ) -> Iterator[Line]:
1215         """Visit a statement.
1216
1217         This implementation is shared for `if`, `while`, `for`, `try`, `except`,
1218         `def`, `with`, `class`, and `assert`.
1219
1220         The relevant Python language `keywords` for a given statement will be
1221         NAME leaves within it. This methods puts those on a separate line.
1222
1223         `parens` holds pairs of nodes where invisible parentheses should be put.
1224         Keys hold nodes after which opening parentheses should be put, values
1225         hold nodes before which closing parentheses should be put.
1226         """
1227         normalize_invisible_parens(node, parens_after=parens)
1228         for child in node.children:
1229             if child.type == token.NAME and child.value in keywords:  # type: ignore
1230                 yield from self.line()
1231
1232             yield from self.visit(child)
1233
1234     def visit_simple_stmt(self, node: Node) -> Iterator[Line]:
1235         """Visit a statement without nested statements."""
1236         is_suite_like = node.parent and node.parent.type in STATEMENT
1237         if is_suite_like:
1238             yield from self.line(+1)
1239             yield from self.visit_default(node)
1240             yield from self.line(-1)
1241
1242         else:
1243             yield from self.line()
1244             yield from self.visit_default(node)
1245
1246     def visit_async_stmt(self, node: Node) -> Iterator[Line]:
1247         """Visit `async def`, `async for`, `async with`."""
1248         yield from self.line()
1249
1250         children = iter(node.children)
1251         for child in children:
1252             yield from self.visit(child)
1253
1254             if child.type == token.ASYNC:
1255                 break
1256
1257         internal_stmt = next(children)
1258         for child in internal_stmt.children:
1259             yield from self.visit(child)
1260
1261     def visit_decorators(self, node: Node) -> Iterator[Line]:
1262         """Visit decorators."""
1263         for child in node.children:
1264             yield from self.line()
1265             yield from self.visit(child)
1266
1267     def visit_import_from(self, node: Node) -> Iterator[Line]:
1268         """Visit import_from and maybe put invisible parentheses.
1269
1270         This is separate from `visit_stmt` because import statements don't
1271         support arbitrary atoms and thus handling of parentheses is custom.
1272         """
1273         check_lpar = False
1274         for index, child in enumerate(node.children):
1275             if check_lpar:
1276                 if child.type == token.LPAR:
1277                     # make parentheses invisible
1278                     child.value = ""  # type: ignore
1279                     node.children[-1].value = ""  # type: ignore
1280                 else:
1281                     # insert invisible parentheses
1282                     node.insert_child(index, Leaf(token.LPAR, ""))
1283                     node.append_child(Leaf(token.RPAR, ""))
1284                 break
1285
1286             check_lpar = (
1287                 child.type == token.NAME and child.value == "import"  # type: ignore
1288             )
1289
1290         for child in node.children:
1291             yield from self.visit(child)
1292
1293     def visit_SEMI(self, leaf: Leaf) -> Iterator[Line]:
1294         """Remove a semicolon and put the other statement on a separate line."""
1295         yield from self.line()
1296
1297     def visit_ENDMARKER(self, leaf: Leaf) -> Iterator[Line]:
1298         """End of file. Process outstanding comments and end with a newline."""
1299         yield from self.visit_default(leaf)
1300         yield from self.line()
1301
1302     def visit_unformatted(self, node: LN) -> Iterator[Line]:
1303         """Used when file contained a `# fmt: off`."""
1304         if isinstance(node, Node):
1305             for child in node.children:
1306                 yield from self.visit(child)
1307
1308         else:
1309             try:
1310                 self.current_line.append(node)
1311             except FormatOn as f_on:
1312                 f_on.trim_prefix(node)
1313                 yield from self.line()
1314                 yield from self.visit(node)
1315
1316             if node.type == token.ENDMARKER:
1317                 # somebody decided not to put a final `# fmt: on`
1318                 yield from self.line()
1319
1320     def __attrs_post_init__(self) -> None:
1321         """You are in a twisty little maze of passages."""
1322         v = self.visit_stmt
1323         Ø: Set[str] = set()
1324         self.visit_assert_stmt = partial(v, keywords={"assert"}, parens={"assert", ","})
1325         self.visit_if_stmt = partial(v, keywords={"if", "else", "elif"}, parens={"if"})
1326         self.visit_while_stmt = partial(v, keywords={"while", "else"}, parens={"while"})
1327         self.visit_for_stmt = partial(v, keywords={"for", "else"}, parens={"for", "in"})
1328         self.visit_try_stmt = partial(
1329             v, keywords={"try", "except", "else", "finally"}, parens=Ø
1330         )
1331         self.visit_except_clause = partial(v, keywords={"except"}, parens=Ø)
1332         self.visit_with_stmt = partial(v, keywords={"with"}, parens=Ø)
1333         self.visit_funcdef = partial(v, keywords={"def"}, parens=Ø)
1334         self.visit_classdef = partial(v, keywords={"class"}, parens=Ø)
1335         self.visit_async_funcdef = self.visit_async_stmt
1336         self.visit_decorated = self.visit_decorators
1337
1338
1339 IMPLICIT_TUPLE = {syms.testlist, syms.testlist_star_expr, syms.exprlist}
1340 BRACKET = {token.LPAR: token.RPAR, token.LSQB: token.RSQB, token.LBRACE: token.RBRACE}
1341 OPENING_BRACKETS = set(BRACKET.keys())
1342 CLOSING_BRACKETS = set(BRACKET.values())
1343 BRACKETS = OPENING_BRACKETS | CLOSING_BRACKETS
1344 ALWAYS_NO_SPACE = CLOSING_BRACKETS | {token.COMMA, STANDALONE_COMMENT}
1345
1346
1347 def whitespace(leaf: Leaf, *, complex_subscript: bool) -> str:  # noqa C901
1348     """Return whitespace prefix if needed for the given `leaf`.
1349
1350     `complex_subscript` signals whether the given leaf is part of a subscription
1351     which has non-trivial arguments, like arithmetic expressions or function calls.
1352     """
1353     NO = ""
1354     SPACE = " "
1355     DOUBLESPACE = "  "
1356     t = leaf.type
1357     p = leaf.parent
1358     v = leaf.value
1359     if t in ALWAYS_NO_SPACE:
1360         return NO
1361
1362     if t == token.COMMENT:
1363         return DOUBLESPACE
1364
1365     assert p is not None, f"INTERNAL ERROR: hand-made leaf without parent: {leaf!r}"
1366     if (
1367         t == token.COLON
1368         and p.type not in {syms.subscript, syms.subscriptlist, syms.sliceop}
1369     ):
1370         return NO
1371
1372     prev = leaf.prev_sibling
1373     if not prev:
1374         prevp = preceding_leaf(p)
1375         if not prevp or prevp.type in OPENING_BRACKETS:
1376             return NO
1377
1378         if t == token.COLON:
1379             if prevp.type == token.COLON:
1380                 return NO
1381
1382             elif prevp.type != token.COMMA and not complex_subscript:
1383                 return NO
1384
1385             return SPACE
1386
1387         if prevp.type == token.EQUAL:
1388             if prevp.parent:
1389                 if prevp.parent.type in {
1390                     syms.arglist, syms.argument, syms.parameters, syms.varargslist
1391                 }:
1392                     return NO
1393
1394                 elif prevp.parent.type == syms.typedargslist:
1395                     # A bit hacky: if the equal sign has whitespace, it means we
1396                     # previously found it's a typed argument.  So, we're using
1397                     # that, too.
1398                     return prevp.prefix
1399
1400         elif prevp.type in STARS:
1401             if is_vararg(prevp, within=VARARGS_PARENTS | UNPACKING_PARENTS):
1402                 return NO
1403
1404         elif prevp.type == token.COLON:
1405             if prevp.parent and prevp.parent.type in {syms.subscript, syms.sliceop}:
1406                 return SPACE if complex_subscript else NO
1407
1408         elif (
1409             prevp.parent
1410             and prevp.parent.type == syms.factor
1411             and prevp.type in MATH_OPERATORS
1412         ):
1413             return NO
1414
1415         elif (
1416             prevp.type == token.RIGHTSHIFT
1417             and prevp.parent
1418             and prevp.parent.type == syms.shift_expr
1419             and prevp.prev_sibling
1420             and prevp.prev_sibling.type == token.NAME
1421             and prevp.prev_sibling.value == "print"  # type: ignore
1422         ):
1423             # Python 2 print chevron
1424             return NO
1425
1426     elif prev.type in OPENING_BRACKETS:
1427         return NO
1428
1429     if p.type in {syms.parameters, syms.arglist}:
1430         # untyped function signatures or calls
1431         if not prev or prev.type != token.COMMA:
1432             return NO
1433
1434     elif p.type == syms.varargslist:
1435         # lambdas
1436         if prev and prev.type != token.COMMA:
1437             return NO
1438
1439     elif p.type == syms.typedargslist:
1440         # typed function signatures
1441         if not prev:
1442             return NO
1443
1444         if t == token.EQUAL:
1445             if prev.type != syms.tname:
1446                 return NO
1447
1448         elif prev.type == token.EQUAL:
1449             # A bit hacky: if the equal sign has whitespace, it means we
1450             # previously found it's a typed argument.  So, we're using that, too.
1451             return prev.prefix
1452
1453         elif prev.type != token.COMMA:
1454             return NO
1455
1456     elif p.type == syms.tname:
1457         # type names
1458         if not prev:
1459             prevp = preceding_leaf(p)
1460             if not prevp or prevp.type != token.COMMA:
1461                 return NO
1462
1463     elif p.type == syms.trailer:
1464         # attributes and calls
1465         if t == token.LPAR or t == token.RPAR:
1466             return NO
1467
1468         if not prev:
1469             if t == token.DOT:
1470                 prevp = preceding_leaf(p)
1471                 if not prevp or prevp.type != token.NUMBER:
1472                     return NO
1473
1474             elif t == token.LSQB:
1475                 return NO
1476
1477         elif prev.type != token.COMMA:
1478             return NO
1479
1480     elif p.type == syms.argument:
1481         # single argument
1482         if t == token.EQUAL:
1483             return NO
1484
1485         if not prev:
1486             prevp = preceding_leaf(p)
1487             if not prevp or prevp.type == token.LPAR:
1488                 return NO
1489
1490         elif prev.type in {token.EQUAL} | STARS:
1491             return NO
1492
1493     elif p.type == syms.decorator:
1494         # decorators
1495         return NO
1496
1497     elif p.type == syms.dotted_name:
1498         if prev:
1499             return NO
1500
1501         prevp = preceding_leaf(p)
1502         if not prevp or prevp.type == token.AT or prevp.type == token.DOT:
1503             return NO
1504
1505     elif p.type == syms.classdef:
1506         if t == token.LPAR:
1507             return NO
1508
1509         if prev and prev.type == token.LPAR:
1510             return NO
1511
1512     elif p.type in {syms.subscript, syms.sliceop}:
1513         # indexing
1514         if not prev:
1515             assert p.parent is not None, "subscripts are always parented"
1516             if p.parent.type == syms.subscriptlist:
1517                 return SPACE
1518
1519             return NO
1520
1521         elif not complex_subscript:
1522             return NO
1523
1524     elif p.type == syms.atom:
1525         if prev and t == token.DOT:
1526             # dots, but not the first one.
1527             return NO
1528
1529     elif p.type == syms.dictsetmaker:
1530         # dict unpacking
1531         if prev and prev.type == token.DOUBLESTAR:
1532             return NO
1533
1534     elif p.type in {syms.factor, syms.star_expr}:
1535         # unary ops
1536         if not prev:
1537             prevp = preceding_leaf(p)
1538             if not prevp or prevp.type in OPENING_BRACKETS:
1539                 return NO
1540
1541             prevp_parent = prevp.parent
1542             assert prevp_parent is not None
1543             if (
1544                 prevp.type == token.COLON
1545                 and prevp_parent.type in {syms.subscript, syms.sliceop}
1546             ):
1547                 return NO
1548
1549             elif prevp.type == token.EQUAL and prevp_parent.type == syms.argument:
1550                 return NO
1551
1552         elif t == token.NAME or t == token.NUMBER:
1553             return NO
1554
1555     elif p.type == syms.import_from:
1556         if t == token.DOT:
1557             if prev and prev.type == token.DOT:
1558                 return NO
1559
1560         elif t == token.NAME:
1561             if v == "import":
1562                 return SPACE
1563
1564             if prev and prev.type == token.DOT:
1565                 return NO
1566
1567     elif p.type == syms.sliceop:
1568         return NO
1569
1570     return SPACE
1571
1572
1573 def preceding_leaf(node: Optional[LN]) -> Optional[Leaf]:
1574     """Return the first leaf that precedes `node`, if any."""
1575     while node:
1576         res = node.prev_sibling
1577         if res:
1578             if isinstance(res, Leaf):
1579                 return res
1580
1581             try:
1582                 return list(res.leaves())[-1]
1583
1584             except IndexError:
1585                 return None
1586
1587         node = node.parent
1588     return None
1589
1590
1591 def child_towards(ancestor: Node, descendant: LN) -> Optional[LN]:
1592     """Return the child of `ancestor` that contains `descendant`."""
1593     node: Optional[LN] = descendant
1594     while node and node.parent != ancestor:
1595         node = node.parent
1596     return node
1597
1598
1599 def is_split_after_delimiter(leaf: Leaf, previous: Leaf = None) -> int:
1600     """Return the priority of the `leaf` delimiter, given a line break after it.
1601
1602     The delimiter priorities returned here are from those delimiters that would
1603     cause a line break after themselves.
1604
1605     Higher numbers are higher priority.
1606     """
1607     if leaf.type == token.COMMA:
1608         return COMMA_PRIORITY
1609
1610     return 0
1611
1612
1613 def is_split_before_delimiter(leaf: Leaf, previous: Leaf = None) -> int:
1614     """Return the priority of the `leaf` delimiter, given a line before after it.
1615
1616     The delimiter priorities returned here are from those delimiters that would
1617     cause a line break before themselves.
1618
1619     Higher numbers are higher priority.
1620     """
1621     if is_vararg(leaf, within=VARARGS_PARENTS | UNPACKING_PARENTS):
1622         # * and ** might also be MATH_OPERATORS but in this case they are not.
1623         # Don't treat them as a delimiter.
1624         return 0
1625
1626     if (
1627         leaf.type in MATH_OPERATORS
1628         and leaf.parent
1629         and leaf.parent.type not in {syms.factor, syms.star_expr}
1630     ):
1631         return MATH_PRIORITY
1632
1633     if leaf.type in COMPARATORS:
1634         return COMPARATOR_PRIORITY
1635
1636     if (
1637         leaf.type == token.STRING
1638         and previous is not None
1639         and previous.type == token.STRING
1640     ):
1641         return STRING_PRIORITY
1642
1643     if (
1644         leaf.type == token.NAME
1645         and leaf.value == "for"
1646         and leaf.parent
1647         and leaf.parent.type in {syms.comp_for, syms.old_comp_for}
1648     ):
1649         return COMPREHENSION_PRIORITY
1650
1651     if (
1652         leaf.type == token.NAME
1653         and leaf.value == "if"
1654         and leaf.parent
1655         and leaf.parent.type in {syms.comp_if, syms.old_comp_if}
1656     ):
1657         return COMPREHENSION_PRIORITY
1658
1659     if (
1660         leaf.type == token.NAME
1661         and leaf.value in {"if", "else"}
1662         and leaf.parent
1663         and leaf.parent.type == syms.test
1664     ):
1665         return TERNARY_PRIORITY
1666
1667     if leaf.type == token.NAME and leaf.value in LOGIC_OPERATORS and leaf.parent:
1668         return LOGIC_PRIORITY
1669
1670     return 0
1671
1672
1673 def generate_comments(leaf: Leaf) -> Iterator[Leaf]:
1674     """Clean the prefix of the `leaf` and generate comments from it, if any.
1675
1676     Comments in lib2to3 are shoved into the whitespace prefix.  This happens
1677     in `pgen2/driver.py:Driver.parse_tokens()`.  This was a brilliant implementation
1678     move because it does away with modifying the grammar to include all the
1679     possible places in which comments can be placed.
1680
1681     The sad consequence for us though is that comments don't "belong" anywhere.
1682     This is why this function generates simple parentless Leaf objects for
1683     comments.  We simply don't know what the correct parent should be.
1684
1685     No matter though, we can live without this.  We really only need to
1686     differentiate between inline and standalone comments.  The latter don't
1687     share the line with any code.
1688
1689     Inline comments are emitted as regular token.COMMENT leaves.  Standalone
1690     are emitted with a fake STANDALONE_COMMENT token identifier.
1691     """
1692     p = leaf.prefix
1693     if not p:
1694         return
1695
1696     if "#" not in p:
1697         return
1698
1699     consumed = 0
1700     nlines = 0
1701     for index, line in enumerate(p.split("\n")):
1702         consumed += len(line) + 1  # adding the length of the split '\n'
1703         line = line.lstrip()
1704         if not line:
1705             nlines += 1
1706         if not line.startswith("#"):
1707             continue
1708
1709         if index == 0 and leaf.type != token.ENDMARKER:
1710             comment_type = token.COMMENT  # simple trailing comment
1711         else:
1712             comment_type = STANDALONE_COMMENT
1713         comment = make_comment(line)
1714         yield Leaf(comment_type, comment, prefix="\n" * nlines)
1715
1716         if comment in {"# fmt: on", "# yapf: enable"}:
1717             raise FormatOn(consumed)
1718
1719         if comment in {"# fmt: off", "# yapf: disable"}:
1720             if comment_type == STANDALONE_COMMENT:
1721                 raise FormatOff(consumed)
1722
1723             prev = preceding_leaf(leaf)
1724             if not prev or prev.type in WHITESPACE:  # standalone comment in disguise
1725                 raise FormatOff(consumed)
1726
1727         nlines = 0
1728
1729
1730 def make_comment(content: str) -> str:
1731     """Return a consistently formatted comment from the given `content` string.
1732
1733     All comments (except for "##", "#!", "#:") should have a single space between
1734     the hash sign and the content.
1735
1736     If `content` didn't start with a hash sign, one is provided.
1737     """
1738     content = content.rstrip()
1739     if not content:
1740         return "#"
1741
1742     if content[0] == "#":
1743         content = content[1:]
1744     if content and content[0] not in " !:#":
1745         content = " " + content
1746     return "#" + content
1747
1748
1749 def split_line(
1750     line: Line, line_length: int, inner: bool = False, py36: bool = False
1751 ) -> Iterator[Line]:
1752     """Split a `line` into potentially many lines.
1753
1754     They should fit in the allotted `line_length` but might not be able to.
1755     `inner` signifies that there were a pair of brackets somewhere around the
1756     current `line`, possibly transitively. This means we can fallback to splitting
1757     by delimiters if the LHS/RHS don't yield any results.
1758
1759     If `py36` is True, splitting may generate syntax that is only compatible
1760     with Python 3.6 and later.
1761     """
1762     if isinstance(line, UnformattedLines) or line.is_comment:
1763         yield line
1764         return
1765
1766     line_str = str(line).strip("\n")
1767     if (
1768         len(line_str) <= line_length
1769         and "\n" not in line_str  # multiline strings
1770         and not line.contains_standalone_comments()
1771     ):
1772         yield line
1773         return
1774
1775     split_funcs: List[SplitFunc]
1776     if line.is_def:
1777         split_funcs = [left_hand_split]
1778     elif line.is_import:
1779         split_funcs = [explode_split]
1780     elif line.inside_brackets:
1781         split_funcs = [delimiter_split, standalone_comment_split, right_hand_split]
1782     else:
1783         split_funcs = [right_hand_split]
1784     for split_func in split_funcs:
1785         # We are accumulating lines in `result` because we might want to abort
1786         # mission and return the original line in the end, or attempt a different
1787         # split altogether.
1788         result: List[Line] = []
1789         try:
1790             for l in split_func(line, py36):
1791                 if str(l).strip("\n") == line_str:
1792                     raise CannotSplit("Split function returned an unchanged result")
1793
1794                 result.extend(
1795                     split_line(l, line_length=line_length, inner=True, py36=py36)
1796                 )
1797         except CannotSplit as cs:
1798             continue
1799
1800         else:
1801             yield from result
1802             break
1803
1804     else:
1805         yield line
1806
1807
1808 def left_hand_split(line: Line, py36: bool = False) -> Iterator[Line]:
1809     """Split line into many lines, starting with the first matching bracket pair.
1810
1811     Note: this usually looks weird, only use this for function definitions.
1812     Prefer RHS otherwise.
1813     """
1814     head = Line(depth=line.depth)
1815     body = Line(depth=line.depth + 1, inside_brackets=True)
1816     tail = Line(depth=line.depth)
1817     tail_leaves: List[Leaf] = []
1818     body_leaves: List[Leaf] = []
1819     head_leaves: List[Leaf] = []
1820     current_leaves = head_leaves
1821     matching_bracket = None
1822     for leaf in line.leaves:
1823         if (
1824             current_leaves is body_leaves
1825             and leaf.type in CLOSING_BRACKETS
1826             and leaf.opening_bracket is matching_bracket
1827         ):
1828             current_leaves = tail_leaves if body_leaves else head_leaves
1829         current_leaves.append(leaf)
1830         if current_leaves is head_leaves:
1831             if leaf.type in OPENING_BRACKETS:
1832                 matching_bracket = leaf
1833                 current_leaves = body_leaves
1834     # Since body is a new indent level, remove spurious leading whitespace.
1835     if body_leaves:
1836         normalize_prefix(body_leaves[0], inside_brackets=True)
1837     # Build the new lines.
1838     for result, leaves in (head, head_leaves), (body, body_leaves), (tail, tail_leaves):
1839         for leaf in leaves:
1840             result.append(leaf, preformatted=True)
1841             for comment_after in line.comments_after(leaf):
1842                 result.append(comment_after, preformatted=True)
1843     bracket_split_succeeded_or_raise(head, body, tail)
1844     for result in (head, body, tail):
1845         if result:
1846             yield result
1847
1848
1849 def right_hand_split(
1850     line: Line, py36: bool = False, omit: Collection[LeafID] = ()
1851 ) -> Iterator[Line]:
1852     """Split line into many lines, starting with the last matching bracket pair."""
1853     head = Line(depth=line.depth)
1854     body = Line(depth=line.depth + 1, inside_brackets=True)
1855     tail = Line(depth=line.depth)
1856     tail_leaves: List[Leaf] = []
1857     body_leaves: List[Leaf] = []
1858     head_leaves: List[Leaf] = []
1859     current_leaves = tail_leaves
1860     opening_bracket = None
1861     closing_bracket = None
1862     for leaf in reversed(line.leaves):
1863         if current_leaves is body_leaves:
1864             if leaf is opening_bracket:
1865                 current_leaves = head_leaves if body_leaves else tail_leaves
1866         current_leaves.append(leaf)
1867         if current_leaves is tail_leaves:
1868             if leaf.type in CLOSING_BRACKETS and id(leaf) not in omit:
1869                 opening_bracket = leaf.opening_bracket
1870                 closing_bracket = leaf
1871                 current_leaves = body_leaves
1872     tail_leaves.reverse()
1873     body_leaves.reverse()
1874     head_leaves.reverse()
1875     # Since body is a new indent level, remove spurious leading whitespace.
1876     if body_leaves:
1877         normalize_prefix(body_leaves[0], inside_brackets=True)
1878     elif not head_leaves:
1879         # No `head` and no `body` means the split failed. `tail` has all content.
1880         raise CannotSplit("No brackets found")
1881
1882     # Build the new lines.
1883     for result, leaves in (head, head_leaves), (body, body_leaves), (tail, tail_leaves):
1884         for leaf in leaves:
1885             result.append(leaf, preformatted=True)
1886             for comment_after in line.comments_after(leaf):
1887                 result.append(comment_after, preformatted=True)
1888     bracket_split_succeeded_or_raise(head, body, tail)
1889     assert opening_bracket and closing_bracket
1890     if (
1891         opening_bracket.type == token.LPAR
1892         and not opening_bracket.value
1893         and closing_bracket.type == token.RPAR
1894         and not closing_bracket.value
1895     ):
1896         # These parens were optional. If there aren't any delimiters or standalone
1897         # comments in the body, they were unnecessary and another split without
1898         # them should be attempted.
1899         if not (
1900             body.bracket_tracker.delimiters or line.contains_standalone_comments(0)
1901         ):
1902             omit = {id(closing_bracket), *omit}
1903             yield from right_hand_split(line, py36=py36, omit=omit)
1904             return
1905
1906     ensure_visible(opening_bracket)
1907     ensure_visible(closing_bracket)
1908     for result in (head, body, tail):
1909         if result:
1910             yield result
1911
1912
1913 def bracket_split_succeeded_or_raise(head: Line, body: Line, tail: Line) -> None:
1914     """Raise :exc:`CannotSplit` if the last left- or right-hand split failed.
1915
1916     Do nothing otherwise.
1917
1918     A left- or right-hand split is based on a pair of brackets. Content before
1919     (and including) the opening bracket is left on one line, content inside the
1920     brackets is put on a separate line, and finally content starting with and
1921     following the closing bracket is put on a separate line.
1922
1923     Those are called `head`, `body`, and `tail`, respectively. If the split
1924     produced the same line (all content in `head`) or ended up with an empty `body`
1925     and the `tail` is just the closing bracket, then it's considered failed.
1926     """
1927     tail_len = len(str(tail).strip())
1928     if not body:
1929         if tail_len == 0:
1930             raise CannotSplit("Splitting brackets produced the same line")
1931
1932         elif tail_len < 3:
1933             raise CannotSplit(
1934                 f"Splitting brackets on an empty body to save "
1935                 f"{tail_len} characters is not worth it"
1936             )
1937
1938
1939 def dont_increase_indentation(split_func: SplitFunc) -> SplitFunc:
1940     """Normalize prefix of the first leaf in every line returned by `split_func`.
1941
1942     This is a decorator over relevant split functions.
1943     """
1944
1945     @wraps(split_func)
1946     def split_wrapper(line: Line, py36: bool = False) -> Iterator[Line]:
1947         for l in split_func(line, py36):
1948             normalize_prefix(l.leaves[0], inside_brackets=True)
1949             yield l
1950
1951     return split_wrapper
1952
1953
1954 @dont_increase_indentation
1955 def delimiter_split(line: Line, py36: bool = False) -> Iterator[Line]:
1956     """Split according to delimiters of the highest priority.
1957
1958     If `py36` is True, the split will add trailing commas also in function
1959     signatures that contain `*` and `**`.
1960     """
1961     try:
1962         last_leaf = line.leaves[-1]
1963     except IndexError:
1964         raise CannotSplit("Line empty")
1965
1966     delimiters = line.bracket_tracker.delimiters
1967     try:
1968         delimiter_priority = line.bracket_tracker.max_delimiter_priority(
1969             exclude={id(last_leaf)}
1970         )
1971     except ValueError:
1972         raise CannotSplit("No delimiters found")
1973
1974     current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
1975     lowest_depth = sys.maxsize
1976     trailing_comma_safe = True
1977
1978     def append_to_line(leaf: Leaf) -> Iterator[Line]:
1979         """Append `leaf` to current line or to new line if appending impossible."""
1980         nonlocal current_line
1981         try:
1982             current_line.append_safe(leaf, preformatted=True)
1983         except ValueError as ve:
1984             yield current_line
1985
1986             current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
1987             current_line.append(leaf)
1988
1989     for leaf in line.leaves:
1990         yield from append_to_line(leaf)
1991
1992         for comment_after in line.comments_after(leaf):
1993             yield from append_to_line(comment_after)
1994
1995         lowest_depth = min(lowest_depth, leaf.bracket_depth)
1996         if (
1997             leaf.bracket_depth == lowest_depth
1998             and is_vararg(leaf, within=VARARGS_PARENTS)
1999         ):
2000             trailing_comma_safe = trailing_comma_safe and py36
2001         leaf_priority = delimiters.get(id(leaf))
2002         if leaf_priority == delimiter_priority:
2003             yield current_line
2004
2005             current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2006     if current_line:
2007         if (
2008             trailing_comma_safe
2009             and delimiter_priority == COMMA_PRIORITY
2010             and current_line.leaves[-1].type != token.COMMA
2011             and current_line.leaves[-1].type != STANDALONE_COMMENT
2012         ):
2013             current_line.append(Leaf(token.COMMA, ","))
2014         yield current_line
2015
2016
2017 @dont_increase_indentation
2018 def standalone_comment_split(line: Line, py36: bool = False) -> Iterator[Line]:
2019     """Split standalone comments from the rest of the line."""
2020     if not line.contains_standalone_comments(0):
2021         raise CannotSplit("Line does not have any standalone comments")
2022
2023     current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2024
2025     def append_to_line(leaf: Leaf) -> Iterator[Line]:
2026         """Append `leaf` to current line or to new line if appending impossible."""
2027         nonlocal current_line
2028         try:
2029             current_line.append_safe(leaf, preformatted=True)
2030         except ValueError as ve:
2031             yield current_line
2032
2033             current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2034             current_line.append(leaf)
2035
2036     for leaf in line.leaves:
2037         yield from append_to_line(leaf)
2038
2039         for comment_after in line.comments_after(leaf):
2040             yield from append_to_line(comment_after)
2041
2042     if current_line:
2043         yield current_line
2044
2045
2046 def explode_split(
2047     line: Line, py36: bool = False, omit: Collection[LeafID] = ()
2048 ) -> Iterator[Line]:
2049     """Split by rightmost bracket and immediately split contents by a delimiter."""
2050     new_lines = list(right_hand_split(line, py36, omit))
2051     if len(new_lines) != 3:
2052         yield from new_lines
2053         return
2054
2055     yield new_lines[0]
2056
2057     try:
2058         yield from delimiter_split(new_lines[1], py36)
2059
2060     except CannotSplit:
2061         yield new_lines[1]
2062
2063     yield new_lines[2]
2064
2065
2066 def is_import(leaf: Leaf) -> bool:
2067     """Return True if the given leaf starts an import statement."""
2068     p = leaf.parent
2069     t = leaf.type
2070     v = leaf.value
2071     return bool(
2072         t == token.NAME
2073         and (
2074             (v == "import" and p and p.type == syms.import_name)
2075             or (v == "from" and p and p.type == syms.import_from)
2076         )
2077     )
2078
2079
2080 def normalize_prefix(leaf: Leaf, *, inside_brackets: bool) -> None:
2081     """Leave existing extra newlines if not `inside_brackets`. Remove everything
2082     else.
2083
2084     Note: don't use backslashes for formatting or you'll lose your voting rights.
2085     """
2086     if not inside_brackets:
2087         spl = leaf.prefix.split("#")
2088         if "\\" not in spl[0]:
2089             nl_count = spl[-1].count("\n")
2090             if len(spl) > 1:
2091                 nl_count -= 1
2092             leaf.prefix = "\n" * nl_count
2093             return
2094
2095     leaf.prefix = ""
2096
2097
2098 def normalize_string_quotes(leaf: Leaf) -> None:
2099     """Prefer double quotes but only if it doesn't cause more escaping.
2100
2101     Adds or removes backslashes as appropriate. Doesn't parse and fix
2102     strings nested in f-strings (yet).
2103
2104     Note: Mutates its argument.
2105     """
2106     value = leaf.value.lstrip("furbFURB")
2107     if value[:3] == '"""':
2108         return
2109
2110     elif value[:3] == "'''":
2111         orig_quote = "'''"
2112         new_quote = '"""'
2113     elif value[0] == '"':
2114         orig_quote = '"'
2115         new_quote = "'"
2116     else:
2117         orig_quote = "'"
2118         new_quote = '"'
2119     first_quote_pos = leaf.value.find(orig_quote)
2120     if first_quote_pos == -1:
2121         return  # There's an internal error
2122
2123     prefix = leaf.value[:first_quote_pos]
2124     unescaped_new_quote = re.compile(rf"(([^\\]|^)(\\\\)*){new_quote}")
2125     escaped_new_quote = re.compile(rf"([^\\]|^)\\(\\\\)*{new_quote}")
2126     escaped_orig_quote = re.compile(rf"([^\\]|^)\\(\\\\)*{orig_quote}")
2127     body = leaf.value[first_quote_pos + len(orig_quote) : -len(orig_quote)]
2128     if "r" in prefix.casefold():
2129         if unescaped_new_quote.search(body):
2130             # There's at least one unescaped new_quote in this raw string
2131             # so converting is impossible
2132             return
2133
2134         # Do not introduce or remove backslashes in raw strings
2135         new_body = body
2136     else:
2137         # remove unnecessary quotes
2138         new_body = sub_twice(escaped_new_quote, rf"\1\2{new_quote}", body)
2139         if body != new_body:
2140             # Consider the string without unnecessary quotes as the original
2141             body = new_body
2142             leaf.value = f"{prefix}{orig_quote}{body}{orig_quote}"
2143         new_body = sub_twice(escaped_orig_quote, rf"\1\2{orig_quote}", new_body)
2144         new_body = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_body)
2145     if new_quote == '"""' and new_body[-1] == '"':
2146         # edge case:
2147         new_body = new_body[:-1] + '\\"'
2148     orig_escape_count = body.count("\\")
2149     new_escape_count = new_body.count("\\")
2150     if new_escape_count > orig_escape_count:
2151         return  # Do not introduce more escaping
2152
2153     if new_escape_count == orig_escape_count and orig_quote == '"':
2154         return  # Prefer double quotes
2155
2156     leaf.value = f"{prefix}{new_quote}{new_body}{new_quote}"
2157
2158
2159 def normalize_invisible_parens(node: Node, parens_after: Set[str]) -> None:
2160     """Make existing optional parentheses invisible or create new ones.
2161
2162     Standardizes on visible parentheses for single-element tuples, and keeps
2163     existing visible parentheses for other tuples and generator expressions.
2164     """
2165     check_lpar = False
2166     for child in list(node.children):
2167         if check_lpar:
2168             if child.type == syms.atom:
2169                 if not (
2170                     is_empty_tuple(child)
2171                     or is_one_tuple(child)
2172                     or max_delimiter_priority_in_atom(child) >= COMMA_PRIORITY
2173                 ):
2174                     first = child.children[0]
2175                     last = child.children[-1]
2176                     if first.type == token.LPAR and last.type == token.RPAR:
2177                         # make parentheses invisible
2178                         first.value = ""  # type: ignore
2179                         last.value = ""  # type: ignore
2180             elif is_one_tuple(child):
2181                 # wrap child in visible parentheses
2182                 lpar = Leaf(token.LPAR, "(")
2183                 rpar = Leaf(token.RPAR, ")")
2184                 index = child.remove() or 0
2185                 node.insert_child(index, Node(syms.atom, [lpar, child, rpar]))
2186             else:
2187                 # wrap child in invisible parentheses
2188                 lpar = Leaf(token.LPAR, "")
2189                 rpar = Leaf(token.RPAR, "")
2190                 index = child.remove() or 0
2191                 node.insert_child(index, Node(syms.atom, [lpar, child, rpar]))
2192
2193         check_lpar = isinstance(child, Leaf) and child.value in parens_after
2194
2195
2196 def is_empty_tuple(node: LN) -> bool:
2197     """Return True if `node` holds an empty tuple."""
2198     return (
2199         node.type == syms.atom
2200         and len(node.children) == 2
2201         and node.children[0].type == token.LPAR
2202         and node.children[1].type == token.RPAR
2203     )
2204
2205
2206 def is_one_tuple(node: LN) -> bool:
2207     """Return True if `node` holds a tuple with one element, with or without parens."""
2208     if node.type == syms.atom:
2209         if len(node.children) != 3:
2210             return False
2211
2212         lpar, gexp, rpar = node.children
2213         if not (
2214             lpar.type == token.LPAR
2215             and gexp.type == syms.testlist_gexp
2216             and rpar.type == token.RPAR
2217         ):
2218             return False
2219
2220         return len(gexp.children) == 2 and gexp.children[1].type == token.COMMA
2221
2222     return (
2223         node.type in IMPLICIT_TUPLE
2224         and len(node.children) == 2
2225         and node.children[1].type == token.COMMA
2226     )
2227
2228
2229 def is_vararg(leaf: Leaf, within: Set[NodeType]) -> bool:
2230     """Return True if `leaf` is a star or double star in a vararg or kwarg.
2231
2232     If `within` includes VARARGS_PARENTS, this applies to function signatures.
2233     If `within` includes COLLECTION_LIBERALS_PARENTS, it applies to right
2234     hand-side extended iterable unpacking (PEP 3132) and additional unpacking
2235     generalizations (PEP 448).
2236     """
2237     if leaf.type not in STARS or not leaf.parent:
2238         return False
2239
2240     p = leaf.parent
2241     if p.type == syms.star_expr:
2242         # Star expressions are also used as assignment targets in extended
2243         # iterable unpacking (PEP 3132).  See what its parent is instead.
2244         if not p.parent:
2245             return False
2246
2247         p = p.parent
2248
2249     return p.type in within
2250
2251
2252 def max_delimiter_priority_in_atom(node: LN) -> int:
2253     """Return maximum delimiter priority inside `node`.
2254
2255     This is specific to atoms with contents contained in a pair of parentheses.
2256     If `node` isn't an atom or there are no enclosing parentheses, returns 0.
2257     """
2258     if node.type != syms.atom:
2259         return 0
2260
2261     first = node.children[0]
2262     last = node.children[-1]
2263     if not (first.type == token.LPAR and last.type == token.RPAR):
2264         return 0
2265
2266     bt = BracketTracker()
2267     for c in node.children[1:-1]:
2268         if isinstance(c, Leaf):
2269             bt.mark(c)
2270         else:
2271             for leaf in c.leaves():
2272                 bt.mark(leaf)
2273     try:
2274         return bt.max_delimiter_priority()
2275
2276     except ValueError:
2277         return 0
2278
2279
2280 def ensure_visible(leaf: Leaf) -> None:
2281     """Make sure parentheses are visible.
2282
2283     They could be invisible as part of some statements (see
2284     :func:`normalize_invible_parens` and :func:`visit_import_from`).
2285     """
2286     if leaf.type == token.LPAR:
2287         leaf.value = "("
2288     elif leaf.type == token.RPAR:
2289         leaf.value = ")"
2290
2291
2292 def is_python36(node: Node) -> bool:
2293     """Return True if the current file is using Python 3.6+ features.
2294
2295     Currently looking for:
2296     - f-strings; and
2297     - trailing commas after * or ** in function signatures.
2298     """
2299     for n in node.pre_order():
2300         if n.type == token.STRING:
2301             value_head = n.value[:2]  # type: ignore
2302             if value_head in {'f"', 'F"', "f'", "F'", "rf", "fr", "RF", "FR"}:
2303                 return True
2304
2305         elif (
2306             n.type == syms.typedargslist
2307             and n.children
2308             and n.children[-1].type == token.COMMA
2309         ):
2310             for ch in n.children:
2311                 if ch.type in STARS:
2312                     return True
2313
2314     return False
2315
2316
2317 PYTHON_EXTENSIONS = {".py"}
2318 BLACKLISTED_DIRECTORIES = {
2319     "build", "buck-out", "dist", "_build", ".git", ".hg", ".mypy_cache", ".tox", ".venv"
2320 }
2321
2322
2323 def gen_python_files_in_dir(path: Path) -> Iterator[Path]:
2324     """Generate all files under `path` which aren't under BLACKLISTED_DIRECTORIES
2325     and have one of the PYTHON_EXTENSIONS.
2326     """
2327     for child in path.iterdir():
2328         if child.is_dir():
2329             if child.name in BLACKLISTED_DIRECTORIES:
2330                 continue
2331
2332             yield from gen_python_files_in_dir(child)
2333
2334         elif child.suffix in PYTHON_EXTENSIONS:
2335             yield child
2336
2337
2338 @dataclass
2339 class Report:
2340     """Provides a reformatting counter. Can be rendered with `str(report)`."""
2341     check: bool = False
2342     quiet: bool = False
2343     change_count: int = 0
2344     same_count: int = 0
2345     failure_count: int = 0
2346
2347     def done(self, src: Path, changed: Changed) -> None:
2348         """Increment the counter for successful reformatting. Write out a message."""
2349         if changed is Changed.YES:
2350             reformatted = "would reformat" if self.check else "reformatted"
2351             if not self.quiet:
2352                 out(f"{reformatted} {src}")
2353             self.change_count += 1
2354         else:
2355             if not self.quiet:
2356                 if changed is Changed.NO:
2357                     msg = f"{src} already well formatted, good job."
2358                 else:
2359                     msg = f"{src} wasn't modified on disk since last run."
2360                 out(msg, bold=False)
2361             self.same_count += 1
2362
2363     def failed(self, src: Path, message: str) -> None:
2364         """Increment the counter for failed reformatting. Write out a message."""
2365         err(f"error: cannot format {src}: {message}")
2366         self.failure_count += 1
2367
2368     @property
2369     def return_code(self) -> int:
2370         """Return the exit code that the app should use.
2371
2372         This considers the current state of changed files and failures:
2373         - if there were any failures, return 123;
2374         - if any files were changed and --check is being used, return 1;
2375         - otherwise return 0.
2376         """
2377         # According to http://tldp.org/LDP/abs/html/exitcodes.html starting with
2378         # 126 we have special returncodes reserved by the shell.
2379         if self.failure_count:
2380             return 123
2381
2382         elif self.change_count and self.check:
2383             return 1
2384
2385         return 0
2386
2387     def __str__(self) -> str:
2388         """Render a color report of the current state.
2389
2390         Use `click.unstyle` to remove colors.
2391         """
2392         if self.check:
2393             reformatted = "would be reformatted"
2394             unchanged = "would be left unchanged"
2395             failed = "would fail to reformat"
2396         else:
2397             reformatted = "reformatted"
2398             unchanged = "left unchanged"
2399             failed = "failed to reformat"
2400         report = []
2401         if self.change_count:
2402             s = "s" if self.change_count > 1 else ""
2403             report.append(
2404                 click.style(f"{self.change_count} file{s} {reformatted}", bold=True)
2405             )
2406         if self.same_count:
2407             s = "s" if self.same_count > 1 else ""
2408             report.append(f"{self.same_count} file{s} {unchanged}")
2409         if self.failure_count:
2410             s = "s" if self.failure_count > 1 else ""
2411             report.append(
2412                 click.style(f"{self.failure_count} file{s} {failed}", fg="red")
2413             )
2414         return ", ".join(report) + "."
2415
2416
2417 def assert_equivalent(src: str, dst: str) -> None:
2418     """Raise AssertionError if `src` and `dst` aren't equivalent."""
2419
2420     import ast
2421     import traceback
2422
2423     def _v(node: ast.AST, depth: int = 0) -> Iterator[str]:
2424         """Simple visitor generating strings to compare ASTs by content."""
2425         yield f"{'  ' * depth}{node.__class__.__name__}("
2426
2427         for field in sorted(node._fields):
2428             try:
2429                 value = getattr(node, field)
2430             except AttributeError:
2431                 continue
2432
2433             yield f"{'  ' * (depth+1)}{field}="
2434
2435             if isinstance(value, list):
2436                 for item in value:
2437                     if isinstance(item, ast.AST):
2438                         yield from _v(item, depth + 2)
2439
2440             elif isinstance(value, ast.AST):
2441                 yield from _v(value, depth + 2)
2442
2443             else:
2444                 yield f"{'  ' * (depth+2)}{value!r},  # {value.__class__.__name__}"
2445
2446         yield f"{'  ' * depth})  # /{node.__class__.__name__}"
2447
2448     try:
2449         src_ast = ast.parse(src)
2450     except Exception as exc:
2451         major, minor = sys.version_info[:2]
2452         raise AssertionError(
2453             f"cannot use --safe with this file; failed to parse source file "
2454             f"with Python {major}.{minor}'s builtin AST. Re-run with --fast "
2455             f"or stop using deprecated Python 2 syntax. AST error message: {exc}"
2456         )
2457
2458     try:
2459         dst_ast = ast.parse(dst)
2460     except Exception as exc:
2461         log = dump_to_file("".join(traceback.format_tb(exc.__traceback__)), dst)
2462         raise AssertionError(
2463             f"INTERNAL ERROR: Black produced invalid code: {exc}. "
2464             f"Please report a bug on https://github.com/ambv/black/issues.  "
2465             f"This invalid output might be helpful: {log}"
2466         ) from None
2467
2468     src_ast_str = "\n".join(_v(src_ast))
2469     dst_ast_str = "\n".join(_v(dst_ast))
2470     if src_ast_str != dst_ast_str:
2471         log = dump_to_file(diff(src_ast_str, dst_ast_str, "src", "dst"))
2472         raise AssertionError(
2473             f"INTERNAL ERROR: Black produced code that is not equivalent to "
2474             f"the source.  "
2475             f"Please report a bug on https://github.com/ambv/black/issues.  "
2476             f"This diff might be helpful: {log}"
2477         ) from None
2478
2479
2480 def assert_stable(src: str, dst: str, line_length: int) -> None:
2481     """Raise AssertionError if `dst` reformats differently the second time."""
2482     newdst = format_str(dst, line_length=line_length)
2483     if dst != newdst:
2484         log = dump_to_file(
2485             diff(src, dst, "source", "first pass"),
2486             diff(dst, newdst, "first pass", "second pass"),
2487         )
2488         raise AssertionError(
2489             f"INTERNAL ERROR: Black produced different code on the second pass "
2490             f"of the formatter.  "
2491             f"Please report a bug on https://github.com/ambv/black/issues.  "
2492             f"This diff might be helpful: {log}"
2493         ) from None
2494
2495
2496 def dump_to_file(*output: str) -> str:
2497     """Dump `output` to a temporary file. Return path to the file."""
2498     import tempfile
2499
2500     with tempfile.NamedTemporaryFile(
2501         mode="w", prefix="blk_", suffix=".log", delete=False, encoding="utf8"
2502     ) as f:
2503         for lines in output:
2504             f.write(lines)
2505             if lines and lines[-1] != "\n":
2506                 f.write("\n")
2507     return f.name
2508
2509
2510 def diff(a: str, b: str, a_name: str, b_name: str) -> str:
2511     """Return a unified diff string between strings `a` and `b`."""
2512     import difflib
2513
2514     a_lines = [line + "\n" for line in a.split("\n")]
2515     b_lines = [line + "\n" for line in b.split("\n")]
2516     return "".join(
2517         difflib.unified_diff(a_lines, b_lines, fromfile=a_name, tofile=b_name, n=5)
2518     )
2519
2520
2521 def cancel(tasks: List[asyncio.Task]) -> None:
2522     """asyncio signal handler that cancels all `tasks` and reports to stderr."""
2523     err("Aborted!")
2524     for task in tasks:
2525         task.cancel()
2526
2527
2528 def shutdown(loop: BaseEventLoop) -> None:
2529     """Cancel all pending tasks on `loop`, wait for them, and close the loop."""
2530     try:
2531         # This part is borrowed from asyncio/runners.py in Python 3.7b2.
2532         to_cancel = [task for task in asyncio.Task.all_tasks(loop) if not task.done()]
2533         if not to_cancel:
2534             return
2535
2536         for task in to_cancel:
2537             task.cancel()
2538         loop.run_until_complete(
2539             asyncio.gather(*to_cancel, loop=loop, return_exceptions=True)
2540         )
2541     finally:
2542         # `concurrent.futures.Future` objects cannot be cancelled once they
2543         # are already running. There might be some when the `shutdown()` happened.
2544         # Silence their logger's spew about the event loop being closed.
2545         cf_logger = logging.getLogger("concurrent.futures")
2546         cf_logger.setLevel(logging.CRITICAL)
2547         loop.close()
2548
2549
2550 def sub_twice(regex: Pattern[str], replacement: str, original: str) -> str:
2551     """Replace `regex` with `replacement` twice on `original`.
2552
2553     This is used by string normalization to perform replaces on
2554     overlapping matches.
2555     """
2556     return regex.sub(replacement, regex.sub(replacement, original))
2557
2558
2559 CACHE_DIR = Path(user_cache_dir("black", version=__version__))
2560
2561
2562 def get_cache_file(line_length: int) -> Path:
2563     return CACHE_DIR / f"cache.{line_length}.pickle"
2564
2565
2566 def read_cache(line_length: int) -> Cache:
2567     """Read the cache if it exists and is well formed.
2568
2569     If it is not well formed, the call to write_cache later should resolve the issue.
2570     """
2571     cache_file = get_cache_file(line_length)
2572     if not cache_file.exists():
2573         return {}
2574
2575     with cache_file.open("rb") as fobj:
2576         try:
2577             cache: Cache = pickle.load(fobj)
2578         except pickle.UnpicklingError:
2579             return {}
2580
2581     return cache
2582
2583
2584 def get_cache_info(path: Path) -> CacheInfo:
2585     """Return the information used to check if a file is already formatted or not."""
2586     stat = path.stat()
2587     return stat.st_mtime, stat.st_size
2588
2589
2590 def filter_cached(
2591     cache: Cache, sources: Iterable[Path]
2592 ) -> Tuple[List[Path], List[Path]]:
2593     """Split a list of paths into two.
2594
2595     The first list contains paths of files that modified on disk or are not in the
2596     cache. The other list contains paths to non-modified files.
2597     """
2598     todo, done = [], []
2599     for src in sources:
2600         src = src.resolve()
2601         if cache.get(src) != get_cache_info(src):
2602             todo.append(src)
2603         else:
2604             done.append(src)
2605     return todo, done
2606
2607
2608 def write_cache(cache: Cache, sources: List[Path], line_length: int) -> None:
2609     """Update the cache file."""
2610     cache_file = get_cache_file(line_length)
2611     try:
2612         if not CACHE_DIR.exists():
2613             CACHE_DIR.mkdir(parents=True)
2614         new_cache = {**cache, **{src.resolve(): get_cache_info(src) for src in sources}}
2615         with cache_file.open("wb") as fobj:
2616             pickle.dump(new_cache, fobj, protocol=pickle.HIGHEST_PROTOCOL)
2617     except OSError:
2618         pass
2619
2620
2621 if __name__ == "__main__":
2622     main()