black.py

   1 import asyncio
   2 import pickle
   3 from asyncio.base_events import BaseEventLoop
   4 from concurrent.futures import Executor, ProcessPoolExecutor
   5 from enum import Enum
   6 from functools import partial, wraps
   7 import keyword
   8 import logging
   9 from multiprocessing import Manager
  10 import os
  11 from pathlib import Path
  12 import re
  13 import tokenize
  14 import signal
  15 import sys
  16 from typing import (
  17     Any,
  18     Callable,
  19     Collection,
  20     Dict,
  21     Generic,
  22     Iterable,
  23     Iterator,
  24     List,
  25     Optional,
  26     Pattern,
  27     Set,
  28     Tuple,
  29     Type,
  30     TypeVar,
  31     Union,
  32 )
  33
  34 from appdirs import user_cache_dir
  35 from attr import dataclass, Factory
  36 import click
  37
  38 # lib2to3 fork
  39 from blib2to3.pytree import Node, Leaf, type_repr
  40 from blib2to3 import pygram, pytree
  41 from blib2to3.pgen2 import driver, token
  42 from blib2to3.pgen2.parse import ParseError
  43
  44 __version__ = "18.4a4"
  45 DEFAULT_LINE_LENGTH = 88
  46
  47 # types
  48 syms = pygram.python_symbols
  49 FileContent = str
  50 Encoding = str
  51 Depth = int
  52 NodeType = int
  53 LeafID = int
  54 Priority = int
  55 Index = int
  56 LN = Union[Leaf, Node]
  57 SplitFunc = Callable[["Line", bool], Iterator["Line"]]
  58 Timestamp = float
  59 FileSize = int
  60 CacheInfo = Tuple[Timestamp, FileSize]
  61 Cache = Dict[Path, CacheInfo]
  62 out = partial(click.secho, bold=True, err=True)
  63 err = partial(click.secho, fg="red", err=True)
  64
  65
  66 class NothingChanged(UserWarning):
  67     """Raised by :func:`format_file` when reformatted code is the same as source."""
  68
  69
  70 class CannotSplit(Exception):
  71     """A readable split that fits the allotted line length is impossible.
  72
  73     Raised by :func:`left_hand_split`, :func:`right_hand_split`, and
  74     :func:`delimiter_split`.
  75     """
  76
  77
  78 class FormatError(Exception):
  79     """Base exception for `# fmt: on` and `# fmt: off` handling.
  80
  81     It holds the number of bytes of the prefix consumed before the format
  82     control comment appeared.
  83     """
  84
  85     def __init__(self, consumed: int) -> None:
  86         super().__init__(consumed)
  87         self.consumed = consumed
  88
  89     def trim_prefix(self, leaf: Leaf) -> None:
  90         leaf.prefix = leaf.prefix[self.consumed :]
  91
  92     def leaf_from_consumed(self, leaf: Leaf) -> Leaf:
  93         """Returns a new Leaf from the consumed part of the prefix."""
  94         unformatted_prefix = leaf.prefix[: self.consumed]
  95         return Leaf(token.NEWLINE, unformatted_prefix)
  96
  97
  98 class FormatOn(FormatError):
  99     """Found a comment like `# fmt: on` in the file."""
 100
 101
 102 class FormatOff(FormatError):
 103     """Found a comment like `# fmt: off` in the file."""
 104
 105
 106 class WriteBack(Enum):
 107     NO = 0
 108     YES = 1
 109     DIFF = 2
 110
 111
 112 class Changed(Enum):
 113     NO = 0
 114     CACHED = 1
 115     YES = 2
 116
 117
 118 @click.command()
 119 @click.option(
 120     "-l",
 121     "--line-length",
 122     type=int,
 123     default=DEFAULT_LINE_LENGTH,
 124     help="How many character per line to allow.",
 125     show_default=True,
 126 )
 127 @click.option(
 128     "--check",
 129     is_flag=True,
 130     help=(
 131         "Don't write the files back, just return the status.  Return code 0 "
 132         "means nothing would change.  Return code 1 means some files would be "
 133         "reformatted.  Return code 123 means there was an internal error."
 134     ),
 135 )
 136 @click.option(
 137     "--diff",
 138     is_flag=True,
 139     help="Don't write the files back, just output a diff for each file on stdout.",
 140 )
 141 @click.option(
 142     "--fast/--safe",
 143     is_flag=True,
 144     help="If --fast given, skip temporary sanity checks. [default: --safe]",
 145 )
 146 @click.option(
 147     "-q",
 148     "--quiet",
 149     is_flag=True,
 150     help=(
 151         "Don't emit non-error messages to stderr. Errors are still emitted, "
 152         "silence those with 2>/dev/null."
 153     ),
 154 )
 155 @click.version_option(version=__version__)
 156 @click.argument(
 157     "src",
 158     nargs=-1,
 159     type=click.Path(
 160         exists=True, file_okay=True, dir_okay=True, readable=True, allow_dash=True
 161     ),
 162 )
 163 @click.pass_context
 164 def main(
 165     ctx: click.Context,
 166     line_length: int,
 167     check: bool,
 168     diff: bool,
 169     fast: bool,
 170     quiet: bool,
 171     src: List[str],
 172 ) -> None:
 173     """The uncompromising code formatter."""
 174     sources: List[Path] = []
 175     for s in src:
 176         p = Path(s)
 177         if p.is_dir():
 178             sources.extend(gen_python_files_in_dir(p))
 179         elif p.is_file():
 180             # if a file was explicitly given, we don't care about its extension
 181             sources.append(p)
 182         elif s == "-":
 183             sources.append(Path("-"))
 184         else:
 185             err(f"invalid path: {s}")
 186
 187     if check and not diff:
 188         write_back = WriteBack.NO
 189     elif diff:
 190         write_back = WriteBack.DIFF
 191     else:
 192         write_back = WriteBack.YES
 193     report = Report(check=check, quiet=quiet)
 194     if len(sources) == 0:
 195         out("No paths given. Nothing to do 😴")
 196         ctx.exit(0)
 197         return
 198
 199     elif len(sources) == 1:
 200         reformat_one(sources[0], line_length, fast, write_back, report)
 201     else:
 202         loop = asyncio.get_event_loop()
 203         executor = ProcessPoolExecutor(max_workers=os.cpu_count())
 204         try:
 205             loop.run_until_complete(
 206                 schedule_formatting(
 207                     sources, line_length, fast, write_back, report, loop, executor
 208                 )
 209             )
 210         finally:
 211             shutdown(loop)
 212         if not quiet:
 213             out("All done! ✨ 🍰 ✨")
 214             click.echo(str(report))
 215     ctx.exit(report.return_code)
 216
 217
 218 def reformat_one(
 219     src: Path, line_length: int, fast: bool, write_back: WriteBack, report: "Report"
 220 ) -> None:
 221     """Reformat a single file under `src` without spawning child processes.
 222
 223     If `quiet` is True, non-error messages are not output. `line_length`,
 224     `write_back`, and `fast` options are passed to :func:`format_file_in_place`.
 225     """
 226     try:
 227         changed = Changed.NO
 228         if not src.is_file() and str(src) == "-":
 229             if format_stdin_to_stdout(
 230                 line_length=line_length, fast=fast, write_back=write_back
 231             ):
 232                 changed = Changed.YES
 233         else:
 234             cache: Cache = {}
 235             if write_back != WriteBack.DIFF:
 236                 cache = read_cache(line_length)
 237                 src = src.resolve()
 238                 if src in cache and cache[src] == get_cache_info(src):
 239                     changed = Changed.CACHED
 240             if (
 241                 changed is not Changed.CACHED
 242                 and format_file_in_place(
 243                     src, line_length=line_length, fast=fast, write_back=write_back
 244                 )
 245             ):
 246                 changed = Changed.YES
 247             if write_back == WriteBack.YES and changed is not Changed.NO:
 248                 write_cache(cache, [src], line_length)
 249         report.done(src, changed)
 250     except Exception as exc:
 251         report.failed(src, str(exc))
 252
 253
 254 async def schedule_formatting(
 255     sources: List[Path],
 256     line_length: int,
 257     fast: bool,
 258     write_back: WriteBack,
 259     report: "Report",
 260     loop: BaseEventLoop,
 261     executor: Executor,
 262 ) -> None:
 263     """Run formatting of `sources` in parallel using the provided `executor`.
 264
 265     (Use ProcessPoolExecutors for actual parallelism.)
 266
 267     `line_length`, `write_back`, and `fast` options are passed to
 268     :func:`format_file_in_place`.
 269     """
 270     cache: Cache = {}
 271     if write_back != WriteBack.DIFF:
 272         cache = read_cache(line_length)
 273         sources, cached = filter_cached(cache, sources)
 274         for src in cached:
 275             report.done(src, Changed.CACHED)
 276     cancelled = []
 277     formatted = []
 278     if sources:
 279         lock = None
 280         if write_back == WriteBack.DIFF:
 281             # For diff output, we need locks to ensure we don't interleave output
 282             # from different processes.
 283             manager = Manager()
 284             lock = manager.Lock()
 285         tasks = {
 286             src: loop.run_in_executor(
 287                 executor, format_file_in_place, src, line_length, fast, write_back, lock
 288             )
 289             for src in sources
 290         }
 291         _task_values = list(tasks.values())
 292         try:
 293             loop.add_signal_handler(signal.SIGINT, cancel, _task_values)
 294             loop.add_signal_handler(signal.SIGTERM, cancel, _task_values)
 295         except NotImplementedError:
 296             # There are no good alternatives for these on Windows
 297             pass
 298         await asyncio.wait(_task_values)
 299         for src, task in tasks.items():
 300             if not task.done():
 301                 report.failed(src, "timed out, cancelling")
 302                 task.cancel()
 303                 cancelled.append(task)
 304             elif task.cancelled():
 305                 cancelled.append(task)
 306             elif task.exception():
 307                 report.failed(src, str(task.exception()))
 308             else:
 309                 formatted.append(src)
 310                 report.done(src, Changed.YES if task.result() else Changed.NO)
 311
 312     if cancelled:
 313         await asyncio.gather(*cancelled, loop=loop, return_exceptions=True)
 314     if write_back == WriteBack.YES and formatted:
 315         write_cache(cache, formatted, line_length)
 316
 317
 318 def format_file_in_place(
 319     src: Path,
 320     line_length: int,
 321     fast: bool,
 322     write_back: WriteBack = WriteBack.NO,
 323     lock: Any = None,  # multiprocessing.Manager().Lock() is some crazy proxy
 324 ) -> bool:
 325     """Format file under `src` path. Return True if changed.
 326
 327     If `write_back` is True, write reformatted code back to stdout.
 328     `line_length` and `fast` options are passed to :func:`format_file_contents`.
 329     """
 330
 331     with tokenize.open(src) as src_buffer:
 332         src_contents = src_buffer.read()
 333     try:
 334         dst_contents = format_file_contents(
 335             src_contents, line_length=line_length, fast=fast
 336         )
 337     except NothingChanged:
 338         return False
 339
 340     if write_back == write_back.YES:
 341         with open(src, "w", encoding=src_buffer.encoding) as f:
 342             f.write(dst_contents)
 343     elif write_back == write_back.DIFF:
 344         src_name = f"{src}  (original)"
 345         dst_name = f"{src}  (formatted)"
 346         diff_contents = diff(src_contents, dst_contents, src_name, dst_name)
 347         if lock:
 348             lock.acquire()
 349         try:
 350             sys.stdout.write(diff_contents)
 351         finally:
 352             if lock:
 353                 lock.release()
 354     return True
 355
 356
 357 def format_stdin_to_stdout(
 358     line_length: int, fast: bool, write_back: WriteBack = WriteBack.NO
 359 ) -> bool:
 360     """Format file on stdin. Return True if changed.
 361
 362     If `write_back` is True, write reformatted code back to stdout.
 363     `line_length` and `fast` arguments are passed to :func:`format_file_contents`.
 364     """
 365     src = sys.stdin.read()
 366     dst = src
 367     try:
 368         dst = format_file_contents(src, line_length=line_length, fast=fast)
 369         return True
 370
 371     except NothingChanged:
 372         return False
 373
 374     finally:
 375         if write_back == WriteBack.YES:
 376             sys.stdout.write(dst)
 377         elif write_back == WriteBack.DIFF:
 378             src_name = "<stdin>  (original)"
 379             dst_name = "<stdin>  (formatted)"
 380             sys.stdout.write(diff(src, dst, src_name, dst_name))
 381
 382
 383 def format_file_contents(
 384     src_contents: str, line_length: int, fast: bool
 385 ) -> FileContent:
 386     """Reformat contents a file and return new contents.
 387
 388     If `fast` is False, additionally confirm that the reformatted code is
 389     valid by calling :func:`assert_equivalent` and :func:`assert_stable` on it.
 390     `line_length` is passed to :func:`format_str`.
 391     """
 392     if src_contents.strip() == "":
 393         raise NothingChanged
 394
 395     dst_contents = format_str(src_contents, line_length=line_length)
 396     if src_contents == dst_contents:
 397         raise NothingChanged
 398
 399     if not fast:
 400         assert_equivalent(src_contents, dst_contents)
 401         assert_stable(src_contents, dst_contents, line_length=line_length)
 402     return dst_contents
 403
 404
 405 def format_str(src_contents: str, line_length: int) -> FileContent:
 406     """Reformat a string and return new contents.
 407
 408     `line_length` determines how many characters per line are allowed.
 409     """
 410     src_node = lib2to3_parse(src_contents)
 411     dst_contents = ""
 412     lines = LineGenerator()
 413     elt = EmptyLineTracker()
 414     py36 = is_python36(src_node)
 415     empty_line = Line()
 416     after = 0
 417     for current_line in lines.visit(src_node):
 418         for _ in range(after):
 419             dst_contents += str(empty_line)
 420         before, after = elt.maybe_empty_lines(current_line)
 421         for _ in range(before):
 422             dst_contents += str(empty_line)
 423         for line in split_line(current_line, line_length=line_length, py36=py36):
 424             dst_contents += str(line)
 425     return dst_contents
 426
 427
 428 GRAMMARS = [
 429     pygram.python_grammar_no_print_statement_no_exec_statement,
 430     pygram.python_grammar_no_print_statement,
 431     pygram.python_grammar,
 432 ]
 433
 434
 435 def lib2to3_parse(src_txt: str) -> Node:
 436     """Given a string with source, return the lib2to3 Node."""
 437     grammar = pygram.python_grammar_no_print_statement
 438     if src_txt[-1] != "\n":
 439         nl = "\r\n" if "\r\n" in src_txt[:1024] else "\n"
 440         src_txt += nl
 441     for grammar in GRAMMARS:
 442         drv = driver.Driver(grammar, pytree.convert)
 443         try:
 444             result = drv.parse_string(src_txt, True)
 445             break
 446
 447         except ParseError as pe:
 448             lineno, column = pe.context[1]
 449             lines = src_txt.splitlines()
 450             try:
 451                 faulty_line = lines[lineno - 1]
 452             except IndexError:
 453                 faulty_line = "<line number missing in source>"
 454             exc = ValueError(f"Cannot parse: {lineno}:{column}: {faulty_line}")
 455     else:
 456         raise exc from None
 457
 458     if isinstance(result, Leaf):
 459         result = Node(syms.file_input, [result])
 460     return result
 461
 462
 463 def lib2to3_unparse(node: Node) -> str:
 464     """Given a lib2to3 node, return its string representation."""
 465     code = str(node)
 466     return code
 467
 468
 469 T = TypeVar("T")
 470
 471
 472 class Visitor(Generic[T]):
 473     """Basic lib2to3 visitor that yields things of type `T` on `visit()`."""
 474
 475     def visit(self, node: LN) -> Iterator[T]:
 476         """Main method to visit `node` and its children.
 477
 478         It tries to find a `visit_*()` method for the given `node.type`, like
 479         `visit_simple_stmt` for Node objects or `visit_INDENT` for Leaf objects.
 480         If no dedicated `visit_*()` method is found, chooses `visit_default()`
 481         instead.
 482
 483         Then yields objects of type `T` from the selected visitor.
 484         """
 485         if node.type < 256:
 486             name = token.tok_name[node.type]
 487         else:
 488             name = type_repr(node.type)
 489         yield from getattr(self, f"visit_{name}", self.visit_default)(node)
 490
 491     def visit_default(self, node: LN) -> Iterator[T]:
 492         """Default `visit_*()` implementation. Recurses to children of `node`."""
 493         if isinstance(node, Node):
 494             for child in node.children:
 495                 yield from self.visit(child)
 496
 497
 498 @dataclass
 499 class DebugVisitor(Visitor[T]):
 500     tree_depth: int = 0
 501
 502     def visit_default(self, node: LN) -> Iterator[T]:
 503         indent = " " * (2 * self.tree_depth)
 504         if isinstance(node, Node):
 505             _type = type_repr(node.type)
 506             out(f"{indent}{_type}", fg="yellow")
 507             self.tree_depth += 1
 508             for child in node.children:
 509                 yield from self.visit(child)
 510
 511             self.tree_depth -= 1
 512             out(f"{indent}/{_type}", fg="yellow", bold=False)
 513         else:
 514             _type = token.tok_name.get(node.type, str(node.type))
 515             out(f"{indent}{_type}", fg="blue", nl=False)
 516             if node.prefix:
 517                 # We don't have to handle prefixes for `Node` objects since
 518                 # that delegates to the first child anyway.
 519                 out(f" {node.prefix!r}", fg="green", bold=False, nl=False)
 520             out(f" {node.value!r}", fg="blue", bold=False)
 521
 522     @classmethod
 523     def show(cls, code: str) -> None:
 524         """Pretty-print the lib2to3 AST of a given string of `code`.
 525
 526         Convenience method for debugging.
 527         """
 528         v: DebugVisitor[None] = DebugVisitor()
 529         list(v.visit(lib2to3_parse(code)))
 530
 531
 532 KEYWORDS = set(keyword.kwlist)
 533 WHITESPACE = {token.DEDENT, token.INDENT, token.NEWLINE}
 534 FLOW_CONTROL = {"return", "raise", "break", "continue"}
 535 STATEMENT = {
 536     syms.if_stmt,
 537     syms.while_stmt,
 538     syms.for_stmt,
 539     syms.try_stmt,
 540     syms.except_clause,
 541     syms.with_stmt,
 542     syms.funcdef,
 543     syms.classdef,
 544 }
 545 STANDALONE_COMMENT = 153
 546 LOGIC_OPERATORS = {"and", "or"}
 547 COMPARATORS = {
 548     token.LESS,
 549     token.GREATER,
 550     token.EQEQUAL,
 551     token.NOTEQUAL,
 552     token.LESSEQUAL,
 553     token.GREATEREQUAL,
 554 }
 555 MATH_OPERATORS = {
 556     token.PLUS,
 557     token.MINUS,
 558     token.STAR,
 559     token.SLASH,
 560     token.VBAR,
 561     token.AMPER,
 562     token.PERCENT,
 563     token.CIRCUMFLEX,
 564     token.TILDE,
 565     token.LEFTSHIFT,
 566     token.RIGHTSHIFT,
 567     token.DOUBLESTAR,
 568     token.DOUBLESLASH,
 569 }
 570 STARS = {token.STAR, token.DOUBLESTAR}
 571 VARARGS_PARENTS = {
 572     syms.arglist,
 573     syms.argument,  # double star in arglist
 574     syms.trailer,  # single argument to call
 575     syms.typedargslist,
 576     syms.varargslist,  # lambdas
 577 }
 578 UNPACKING_PARENTS = {
 579     syms.atom,  # single element of a list or set literal
 580     syms.dictsetmaker,
 581     syms.listmaker,
 582     syms.testlist_gexp,
 583 }
 584 TEST_DESCENDANTS = {
 585     syms.test,
 586     syms.lambdef,
 587     syms.or_test,
 588     syms.and_test,
 589     syms.not_test,
 590     syms.comparison,
 591     syms.star_expr,
 592     syms.expr,
 593     syms.xor_expr,
 594     syms.and_expr,
 595     syms.shift_expr,
 596     syms.arith_expr,
 597     syms.trailer,
 598     syms.term,
 599     syms.power,
 600 }
 601 COMPREHENSION_PRIORITY = 20
 602 COMMA_PRIORITY = 10
 603 TERNARY_PRIORITY = 7
 604 LOGIC_PRIORITY = 5
 605 STRING_PRIORITY = 4
 606 COMPARATOR_PRIORITY = 3
 607 MATH_PRIORITY = 1
 608
 609
 610 @dataclass
 611 class BracketTracker:
 612     """Keeps track of brackets on a line."""
 613
 614     depth: int = 0
 615     bracket_match: Dict[Tuple[Depth, NodeType], Leaf] = Factory(dict)
 616     delimiters: Dict[LeafID, Priority] = Factory(dict)
 617     previous: Optional[Leaf] = None
 618     _for_loop_variable: bool = False
 619     _lambda_arguments: bool = False
 620
 621     def mark(self, leaf: Leaf) -> None:
 622         """Mark `leaf` with bracket-related metadata. Keep track of delimiters.
 623
 624         All leaves receive an int `bracket_depth` field that stores how deep
 625         within brackets a given leaf is. 0 means there are no enclosing brackets
 626         that started on this line.
 627
 628         If a leaf is itself a closing bracket, it receives an `opening_bracket`
 629         field that it forms a pair with. This is a one-directional link to
 630         avoid reference cycles.
 631
 632         If a leaf is a delimiter (a token on which Black can split the line if
 633         needed) and it's on depth 0, its `id()` is stored in the tracker's
 634         `delimiters` field.
 635         """
 636         if leaf.type == token.COMMENT:
 637             return
 638
 639         self.maybe_decrement_after_for_loop_variable(leaf)
 640         self.maybe_decrement_after_lambda_arguments(leaf)
 641         if leaf.type in CLOSING_BRACKETS:
 642             self.depth -= 1
 643             opening_bracket = self.bracket_match.pop((self.depth, leaf.type))
 644             leaf.opening_bracket = opening_bracket
 645         leaf.bracket_depth = self.depth
 646         if self.depth == 0:
 647             delim = is_split_before_delimiter(leaf, self.previous)
 648             if delim and self.previous is not None:
 649                 self.delimiters[id(self.previous)] = delim
 650             else:
 651                 delim = is_split_after_delimiter(leaf, self.previous)
 652                 if delim:
 653                     self.delimiters[id(leaf)] = delim
 654         if leaf.type in OPENING_BRACKETS:
 655             self.bracket_match[self.depth, BRACKET[leaf.type]] = leaf
 656             self.depth += 1
 657         self.previous = leaf
 658         self.maybe_increment_lambda_arguments(leaf)
 659         self.maybe_increment_for_loop_variable(leaf)
 660
 661     def any_open_brackets(self) -> bool:
 662         """Return True if there is an yet unmatched open bracket on the line."""
 663         return bool(self.bracket_match)
 664
 665     def max_delimiter_priority(self, exclude: Iterable[LeafID] = ()) -> int:
 666         """Return the highest priority of a delimiter found on the line.
 667
 668         Values are consistent with what `is_split_*_delimiter()` return.
 669         Raises ValueError on no delimiters.
 670         """
 671         return max(v for k, v in self.delimiters.items() if k not in exclude)
 672
 673     def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool:
 674         """In a for loop, or comprehension, the variables are often unpacks.
 675
 676         To avoid splitting on the comma in this situation, increase the depth of
 677         tokens between `for` and `in`.
 678         """
 679         if leaf.type == token.NAME and leaf.value == "for":
 680             self.depth += 1
 681             self._for_loop_variable = True
 682             return True
 683
 684         return False
 685
 686     def maybe_decrement_after_for_loop_variable(self, leaf: Leaf) -> bool:
 687         """See `maybe_increment_for_loop_variable` above for explanation."""
 688         if self._for_loop_variable and leaf.type == token.NAME and leaf.value == "in":
 689             self.depth -= 1
 690             self._for_loop_variable = False
 691             return True
 692
 693         return False
 694
 695     def maybe_increment_lambda_arguments(self, leaf: Leaf) -> bool:
 696         """In a lambda expression, there might be more than one argument.
 697
 698         To avoid splitting on the comma in this situation, increase the depth of
 699         tokens between `lambda` and `:`.
 700         """
 701         if leaf.type == token.NAME and leaf.value == "lambda":
 702             self.depth += 1
 703             self._lambda_arguments = True
 704             return True
 705
 706         return False
 707
 708     def maybe_decrement_after_lambda_arguments(self, leaf: Leaf) -> bool:
 709         """See `maybe_increment_lambda_arguments` above for explanation."""
 710         if self._lambda_arguments and leaf.type == token.COLON:
 711             self.depth -= 1
 712             self._lambda_arguments = False
 713             return True
 714
 715         return False
 716
 717     def get_open_lsqb(self) -> Optional[Leaf]:
 718         """Return the most recent opening square bracket (if any)."""
 719         return self.bracket_match.get((self.depth - 1, token.RSQB))
 720
 721
 722 @dataclass
 723 class Line:
 724     """Holds leaves and comments. Can be printed with `str(line)`."""
 725
 726     depth: int = 0
 727     leaves: List[Leaf] = Factory(list)
 728     comments: List[Tuple[Index, Leaf]] = Factory(list)
 729     bracket_tracker: BracketTracker = Factory(BracketTracker)
 730     inside_brackets: bool = False
 731
 732     def append(self, leaf: Leaf, preformatted: bool = False) -> None:
 733         """Add a new `leaf` to the end of the line.
 734
 735         Unless `preformatted` is True, the `leaf` will receive a new consistent
 736         whitespace prefix and metadata applied by :class:`BracketTracker`.
 737         Trailing commas are maybe removed, unpacked for loop variables are
 738         demoted from being delimiters.
 739
 740         Inline comments are put aside.
 741         """
 742         has_value = leaf.type in BRACKETS or bool(leaf.value.strip())
 743         if not has_value:
 744             return
 745
 746         if self.leaves and not preformatted:
 747             # Note: at this point leaf.prefix should be empty except for
 748             # imports, for which we only preserve newlines.
 749             leaf.prefix += whitespace(
 750                 leaf, complex_subscript=self.is_complex_subscript(leaf)
 751             )
 752         if self.inside_brackets or not preformatted:
 753             self.bracket_tracker.mark(leaf)
 754             self.maybe_remove_trailing_comma(leaf)
 755
 756         if not self.append_comment(leaf):
 757             self.leaves.append(leaf)
 758
 759     def append_safe(self, leaf: Leaf, preformatted: bool = False) -> None:
 760         """Like :func:`append()` but disallow invalid standalone comment structure.
 761
 762         Raises ValueError when any `leaf` is appended after a standalone comment
 763         or when a standalone comment is not the first leaf on the line.
 764         """
 765         if self.bracket_tracker.depth == 0:
 766             if self.is_comment:
 767                 raise ValueError("cannot append to standalone comments")
 768
 769             if self.leaves and leaf.type == STANDALONE_COMMENT:
 770                 raise ValueError(
 771                     "cannot append standalone comments to a populated line"
 772                 )
 773
 774         self.append(leaf, preformatted=preformatted)
 775
 776     @property
 777     def is_comment(self) -> bool:
 778         """Is this line a standalone comment?"""
 779         return len(self.leaves) == 1 and self.leaves[0].type == STANDALONE_COMMENT
 780
 781     @property
 782     def is_decorator(self) -> bool:
 783         """Is this line a decorator?"""
 784         return bool(self) and self.leaves[0].type == token.AT
 785
 786     @property
 787     def is_import(self) -> bool:
 788         """Is this an import line?"""
 789         return bool(self) and is_import(self.leaves[0])
 790
 791     @property
 792     def is_class(self) -> bool:
 793         """Is this line a class definition?"""
 794         return (
 795             bool(self)
 796             and self.leaves[0].type == token.NAME
 797             and self.leaves[0].value == "class"
 798         )
 799
 800     @property
 801     def is_def(self) -> bool:
 802         """Is this a function definition? (Also returns True for async defs.)"""
 803         try:
 804             first_leaf = self.leaves[0]
 805         except IndexError:
 806             return False
 807
 808         try:
 809             second_leaf: Optional[Leaf] = self.leaves[1]
 810         except IndexError:
 811             second_leaf = None
 812         return (
 813             (first_leaf.type == token.NAME and first_leaf.value == "def")
 814             or (
 815                 first_leaf.type == token.ASYNC
 816                 and second_leaf is not None
 817                 and second_leaf.type == token.NAME
 818                 and second_leaf.value == "def"
 819             )
 820         )
 821
 822     @property
 823     def is_flow_control(self) -> bool:
 824         """Is this line a flow control statement?
 825
 826         Those are `return`, `raise`, `break`, and `continue`.
 827         """
 828         return (
 829             bool(self)
 830             and self.leaves[0].type == token.NAME
 831             and self.leaves[0].value in FLOW_CONTROL
 832         )
 833
 834     @property
 835     def is_yield(self) -> bool:
 836         """Is this line a yield statement?"""
 837         return (
 838             bool(self)
 839             and self.leaves[0].type == token.NAME
 840             and self.leaves[0].value == "yield"
 841         )
 842
 843     def contains_standalone_comments(self, depth_limit: int = sys.maxsize) -> bool:
 844         """If so, needs to be split before emitting."""
 845         for leaf in self.leaves:
 846             if leaf.type == STANDALONE_COMMENT:
 847                 if leaf.bracket_depth <= depth_limit:
 848                     return True
 849
 850         return False
 851
 852     def maybe_remove_trailing_comma(self, closing: Leaf) -> bool:
 853         """Remove trailing comma if there is one and it's safe."""
 854         if not (
 855             self.leaves
 856             and self.leaves[-1].type == token.COMMA
 857             and closing.type in CLOSING_BRACKETS
 858         ):
 859             return False
 860
 861         if closing.type == token.RBRACE:
 862             self.remove_trailing_comma()
 863             return True
 864
 865         if closing.type == token.RSQB:
 866             comma = self.leaves[-1]
 867             if comma.parent and comma.parent.type == syms.listmaker:
 868                 self.remove_trailing_comma()
 869                 return True
 870
 871         # For parens let's check if it's safe to remove the comma.  If the
 872         # trailing one is the only one, we might mistakenly change a tuple
 873         # into a different type by removing the comma.
 874         depth = closing.bracket_depth + 1
 875         commas = 0
 876         opening = closing.opening_bracket
 877         for _opening_index, leaf in enumerate(self.leaves):
 878             if leaf is opening:
 879                 break
 880
 881         else:
 882             return False
 883
 884         for leaf in self.leaves[_opening_index + 1 :]:
 885             if leaf is closing:
 886                 break
 887
 888             bracket_depth = leaf.bracket_depth
 889             if bracket_depth == depth and leaf.type == token.COMMA:
 890                 commas += 1
 891                 if leaf.parent and leaf.parent.type == syms.arglist:
 892                     commas += 1
 893                     break
 894
 895         if commas > 1:
 896             self.remove_trailing_comma()
 897             return True
 898
 899         return False
 900
 901     def append_comment(self, comment: Leaf) -> bool:
 902         """Add an inline or standalone comment to the line."""
 903         if (
 904             comment.type == STANDALONE_COMMENT
 905             and self.bracket_tracker.any_open_brackets()
 906         ):
 907             comment.prefix = ""
 908             return False
 909
 910         if comment.type != token.COMMENT:
 911             return False
 912
 913         after = len(self.leaves) - 1
 914         if after == -1:
 915             comment.type = STANDALONE_COMMENT
 916             comment.prefix = ""
 917             return False
 918
 919         else:
 920             self.comments.append((after, comment))
 921             return True
 922
 923     def comments_after(self, leaf: Leaf) -> Iterator[Leaf]:
 924         """Generate comments that should appear directly after `leaf`."""
 925         for _leaf_index, _leaf in enumerate(self.leaves):
 926             if leaf is _leaf:
 927                 break
 928
 929         else:
 930             return
 931
 932         for index, comment_after in self.comments:
 933             if _leaf_index == index:
 934                 yield comment_after
 935
 936     def remove_trailing_comma(self) -> None:
 937         """Remove the trailing comma and moves the comments attached to it."""
 938         comma_index = len(self.leaves) - 1
 939         for i in range(len(self.comments)):
 940             comment_index, comment = self.comments[i]
 941             if comment_index == comma_index:
 942                 self.comments[i] = (comma_index - 1, comment)
 943         self.leaves.pop()
 944
 945     def is_complex_subscript(self, leaf: Leaf) -> bool:
 946         """Return True iff `leaf` is part of a slice with non-trivial exprs."""
 947         open_lsqb = (
 948             leaf if leaf.type == token.LSQB else self.bracket_tracker.get_open_lsqb()
 949         )
 950         if open_lsqb is None:
 951             return False
 952
 953         subscript_start = open_lsqb.next_sibling
 954         if (
 955             isinstance(subscript_start, Node)
 956             and subscript_start.type == syms.subscriptlist
 957         ):
 958             subscript_start = child_towards(subscript_start, leaf)
 959         return subscript_start is not None and any(
 960             n.type in TEST_DESCENDANTS for n in subscript_start.pre_order()
 961         )
 962
 963     def __str__(self) -> str:
 964         """Render the line."""
 965         if not self:
 966             return "\n"
 967
 968         indent = "    " * self.depth
 969         leaves = iter(self.leaves)
 970         first = next(leaves)
 971         res = f"{first.prefix}{indent}{first.value}"
 972         for leaf in leaves:
 973             res += str(leaf)
 974         for _, comment in self.comments:
 975             res += str(comment)
 976         return res + "\n"
 977
 978     def __bool__(self) -> bool:
 979         """Return True if the line has leaves or comments."""
 980         return bool(self.leaves or self.comments)
 981
 982
 983 class UnformattedLines(Line):
 984     """Just like :class:`Line` but stores lines which aren't reformatted."""
 985
 986     def append(self, leaf: Leaf, preformatted: bool = True) -> None:
 987         """Just add a new `leaf` to the end of the lines.
 988
 989         The `preformatted` argument is ignored.
 990
 991         Keeps track of indentation `depth`, which is useful when the user
 992         says `# fmt: on`. Otherwise, doesn't do anything with the `leaf`.
 993         """
 994         try:
 995             list(generate_comments(leaf))
 996         except FormatOn as f_on:
 997             self.leaves.append(f_on.leaf_from_consumed(leaf))
 998             raise
 999
1000         self.leaves.append(leaf)
1001         if leaf.type == token.INDENT:
1002             self.depth += 1
1003         elif leaf.type == token.DEDENT:
1004             self.depth -= 1
1005
1006     def __str__(self) -> str:
1007         """Render unformatted lines from leaves which were added with `append()`.
1008
1009         `depth` is not used for indentation in this case.
1010         """
1011         if not self:
1012             return "\n"
1013
1014         res = ""
1015         for leaf in self.leaves:
1016             res += str(leaf)
1017         return res
1018
1019     def append_comment(self, comment: Leaf) -> bool:
1020         """Not implemented in this class. Raises `NotImplementedError`."""
1021         raise NotImplementedError("Unformatted lines don't store comments separately.")
1022
1023     def maybe_remove_trailing_comma(self, closing: Leaf) -> bool:
1024         """Does nothing and returns False."""
1025         return False
1026
1027     def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool:
1028         """Does nothing and returns False."""
1029         return False
1030
1031
1032 @dataclass
1033 class EmptyLineTracker:
1034     """Provides a stateful method that returns the number of potential extra
1035     empty lines needed before and after the currently processed line.
1036
1037     Note: this tracker works on lines that haven't been split yet.  It assumes
1038     the prefix of the first leaf consists of optional newlines.  Those newlines
1039     are consumed by `maybe_empty_lines()` and included in the computation.
1040     """
1041     previous_line: Optional[Line] = None
1042     previous_after: int = 0
1043     previous_defs: List[int] = Factory(list)
1044
1045     def maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
1046         """Return the number of extra empty lines before and after the `current_line`.
1047
1048         This is for separating `def`, `async def` and `class` with extra empty
1049         lines (two on module-level), as well as providing an extra empty line
1050         after flow control keywords to make them more prominent.
1051         """
1052         if isinstance(current_line, UnformattedLines):
1053             return 0, 0
1054
1055         before, after = self._maybe_empty_lines(current_line)
1056         before -= self.previous_after
1057         self.previous_after = after
1058         self.previous_line = current_line
1059         return before, after
1060
1061     def _maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
1062         max_allowed = 1
1063         if current_line.depth == 0:
1064             max_allowed = 2
1065         if current_line.leaves:
1066             # Consume the first leaf's extra newlines.
1067             first_leaf = current_line.leaves[0]
1068             before = first_leaf.prefix.count("\n")
1069             before = min(before, max_allowed)
1070             first_leaf.prefix = ""
1071         else:
1072             before = 0
1073         depth = current_line.depth
1074         while self.previous_defs and self.previous_defs[-1] >= depth:
1075             self.previous_defs.pop()
1076             before = 1 if depth else 2
1077         is_decorator = current_line.is_decorator
1078         if is_decorator or current_line.is_def or current_line.is_class:
1079             if not is_decorator:
1080                 self.previous_defs.append(depth)
1081             if self.previous_line is None:
1082                 # Don't insert empty lines before the first line in the file.
1083                 return 0, 0
1084
1085             if self.previous_line.is_decorator:
1086                 return 0, 0
1087
1088             if (
1089                 self.previous_line.is_comment
1090                 and self.previous_line.depth == current_line.depth
1091                 and before == 0
1092             ):
1093                 return 0, 0
1094
1095             newlines = 2
1096             if current_line.depth:
1097                 newlines -= 1
1098             return newlines, 0
1099
1100         if (
1101             self.previous_line
1102             and self.previous_line.is_import
1103             and not current_line.is_import
1104             and depth == self.previous_line.depth
1105         ):
1106             return (before or 1), 0
1107
1108         return before, 0
1109
1110
1111 @dataclass
1112 class LineGenerator(Visitor[Line]):
1113     """Generates reformatted Line objects.  Empty lines are not emitted.
1114
1115     Note: destroys the tree it's visiting by mutating prefixes of its leaves
1116     in ways that will no longer stringify to valid Python code on the tree.
1117     """
1118     current_line: Line = Factory(Line)
1119
1120     def line(self, indent: int = 0, type: Type[Line] = Line) -> Iterator[Line]:
1121         """Generate a line.
1122
1123         If the line is empty, only emit if it makes sense.
1124         If the line is too long, split it first and then generate.
1125
1126         If any lines were generated, set up a new current_line.
1127         """
1128         if not self.current_line:
1129             if self.current_line.__class__ == type:
1130                 self.current_line.depth += indent
1131             else:
1132                 self.current_line = type(depth=self.current_line.depth + indent)
1133             return  # Line is empty, don't emit. Creating a new one unnecessary.
1134
1135         complete_line = self.current_line
1136         self.current_line = type(depth=complete_line.depth + indent)
1137         yield complete_line
1138
1139     def visit(self, node: LN) -> Iterator[Line]:
1140         """Main method to visit `node` and its children.
1141
1142         Yields :class:`Line` objects.
1143         """
1144         if isinstance(self.current_line, UnformattedLines):
1145             # File contained `# fmt: off`
1146             yield from self.visit_unformatted(node)
1147
1148         else:
1149             yield from super().visit(node)
1150
1151     def visit_default(self, node: LN) -> Iterator[Line]:
1152         """Default `visit_*()` implementation. Recurses to children of `node`."""
1153         if isinstance(node, Leaf):
1154             any_open_brackets = self.current_line.bracket_tracker.any_open_brackets()
1155             try:
1156                 for comment in generate_comments(node):
1157                     if any_open_brackets:
1158                         # any comment within brackets is subject to splitting
1159                         self.current_line.append(comment)
1160                     elif comment.type == token.COMMENT:
1161                         # regular trailing comment
1162                         self.current_line.append(comment)
1163                         yield from self.line()
1164
1165                     else:
1166                         # regular standalone comment
1167                         yield from self.line()
1168
1169                         self.current_line.append(comment)
1170                         yield from self.line()
1171
1172             except FormatOff as f_off:
1173                 f_off.trim_prefix(node)
1174                 yield from self.line(type=UnformattedLines)
1175                 yield from self.visit(node)
1176
1177             except FormatOn as f_on:
1178                 # This only happens here if somebody says "fmt: on" multiple
1179                 # times in a row.
1180                 f_on.trim_prefix(node)
1181                 yield from self.visit_default(node)
1182
1183             else:
1184                 normalize_prefix(node, inside_brackets=any_open_brackets)
1185                 if node.type == token.STRING:
1186                     normalize_string_quotes(node)
1187                 if node.type not in WHITESPACE:
1188                     self.current_line.append(node)
1189         yield from super().visit_default(node)
1190
1191     def visit_INDENT(self, node: Node) -> Iterator[Line]:
1192         """Increase indentation level, maybe yield a line."""
1193         # In blib2to3 INDENT never holds comments.
1194         yield from self.line(+1)
1195         yield from self.visit_default(node)
1196
1197     def visit_DEDENT(self, node: Node) -> Iterator[Line]:
1198         """Decrease indentation level, maybe yield a line."""
1199         # The current line might still wait for trailing comments.  At DEDENT time
1200         # there won't be any (they would be prefixes on the preceding NEWLINE).
1201         # Emit the line then.
1202         yield from self.line()
1203
1204         # While DEDENT has no value, its prefix may contain standalone comments
1205         # that belong to the current indentation level.  Get 'em.
1206         yield from self.visit_default(node)
1207
1208         # Finally, emit the dedent.
1209         yield from self.line(-1)
1210
1211     def visit_stmt(
1212         self, node: Node, keywords: Set[str], parens: Set[str]
1213     ) -> Iterator[Line]:
1214         """Visit a statement.
1215
1216         This implementation is shared for `if`, `while`, `for`, `try`, `except`,
1217         `def`, `with`, `class`, and `assert`.
1218
1219         The relevant Python language `keywords` for a given statement will be
1220         NAME leaves within it. This methods puts those on a separate line.
1221
1222         `parens` holds pairs of nodes where invisible parentheses should be put.
1223         Keys hold nodes after which opening parentheses should be put, values
1224         hold nodes before which closing parentheses should be put.
1225         """
1226         normalize_invisible_parens(node, parens_after=parens)
1227         for child in node.children:
1228             if child.type == token.NAME and child.value in keywords:  # type: ignore
1229                 yield from self.line()
1230
1231             yield from self.visit(child)
1232
1233     def visit_simple_stmt(self, node: Node) -> Iterator[Line]:
1234         """Visit a statement without nested statements."""
1235         is_suite_like = node.parent and node.parent.type in STATEMENT
1236         if is_suite_like:
1237             yield from self.line(+1)
1238             yield from self.visit_default(node)
1239             yield from self.line(-1)
1240
1241         else:
1242             yield from self.line()
1243             yield from self.visit_default(node)
1244
1245     def visit_async_stmt(self, node: Node) -> Iterator[Line]:
1246         """Visit `async def`, `async for`, `async with`."""
1247         yield from self.line()
1248
1249         children = iter(node.children)
1250         for child in children:
1251             yield from self.visit(child)
1252
1253             if child.type == token.ASYNC:
1254                 break
1255
1256         internal_stmt = next(children)
1257         for child in internal_stmt.children:
1258             yield from self.visit(child)
1259
1260     def visit_decorators(self, node: Node) -> Iterator[Line]:
1261         """Visit decorators."""
1262         for child in node.children:
1263             yield from self.line()
1264             yield from self.visit(child)
1265
1266     def visit_import_from(self, node: Node) -> Iterator[Line]:
1267         """Visit import_from and maybe put invisible parentheses.
1268
1269         This is separate from `visit_stmt` because import statements don't
1270         support arbitrary atoms and thus handling of parentheses is custom.
1271         """
1272         check_lpar = False
1273         for index, child in enumerate(node.children):
1274             if check_lpar:
1275                 if child.type == token.LPAR:
1276                     # make parentheses invisible
1277                     child.value = ""  # type: ignore
1278                     node.children[-1].value = ""  # type: ignore
1279                 else:
1280                     # insert invisible parentheses
1281                     node.insert_child(index, Leaf(token.LPAR, ""))
1282                     node.append_child(Leaf(token.RPAR, ""))
1283                 break
1284
1285             check_lpar = (
1286                 child.type == token.NAME and child.value == "import"  # type: ignore
1287             )
1288
1289         for child in node.children:
1290             yield from self.visit(child)
1291
1292     def visit_SEMI(self, leaf: Leaf) -> Iterator[Line]:
1293         """Remove a semicolon and put the other statement on a separate line."""
1294         yield from self.line()
1295
1296     def visit_ENDMARKER(self, leaf: Leaf) -> Iterator[Line]:
1297         """End of file. Process outstanding comments and end with a newline."""
1298         yield from self.visit_default(leaf)
1299         yield from self.line()
1300
1301     def visit_unformatted(self, node: LN) -> Iterator[Line]:
1302         """Used when file contained a `# fmt: off`."""
1303         if isinstance(node, Node):
1304             for child in node.children:
1305                 yield from self.visit(child)
1306
1307         else:
1308             try:
1309                 self.current_line.append(node)
1310             except FormatOn as f_on:
1311                 f_on.trim_prefix(node)
1312                 yield from self.line()
1313                 yield from self.visit(node)
1314
1315             if node.type == token.ENDMARKER:
1316                 # somebody decided not to put a final `# fmt: on`
1317                 yield from self.line()
1318
1319     def __attrs_post_init__(self) -> None:
1320         """You are in a twisty little maze of passages."""
1321         v = self.visit_stmt
1322         Ø: Set[str] = set()
1323         self.visit_assert_stmt = partial(v, keywords={"assert"}, parens={"assert", ","})
1324         self.visit_if_stmt = partial(v, keywords={"if", "else", "elif"}, parens={"if"})
1325         self.visit_while_stmt = partial(v, keywords={"while", "else"}, parens={"while"})
1326         self.visit_for_stmt = partial(v, keywords={"for", "else"}, parens={"for", "in"})
1327         self.visit_try_stmt = partial(
1328             v, keywords={"try", "except", "else", "finally"}, parens=Ø
1329         )
1330         self.visit_except_clause = partial(v, keywords={"except"}, parens=Ø)
1331         self.visit_with_stmt = partial(v, keywords={"with"}, parens=Ø)
1332         self.visit_funcdef = partial(v, keywords={"def"}, parens=Ø)
1333         self.visit_classdef = partial(v, keywords={"class"}, parens=Ø)
1334         self.visit_async_funcdef = self.visit_async_stmt
1335         self.visit_decorated = self.visit_decorators
1336
1337
1338 IMPLICIT_TUPLE = {syms.testlist, syms.testlist_star_expr, syms.exprlist}
1339 BRACKET = {token.LPAR: token.RPAR, token.LSQB: token.RSQB, token.LBRACE: token.RBRACE}
1340 OPENING_BRACKETS = set(BRACKET.keys())
1341 CLOSING_BRACKETS = set(BRACKET.values())
1342 BRACKETS = OPENING_BRACKETS | CLOSING_BRACKETS
1343 ALWAYS_NO_SPACE = CLOSING_BRACKETS | {token.COMMA, STANDALONE_COMMENT}
1344
1345
1346 def whitespace(leaf: Leaf, *, complex_subscript: bool) -> str:  # noqa C901
1347     """Return whitespace prefix if needed for the given `leaf`.
1348
1349     `complex_subscript` signals whether the given leaf is part of a subscription
1350     which has non-trivial arguments, like arithmetic expressions or function calls.
1351     """
1352     NO = ""
1353     SPACE = " "
1354     DOUBLESPACE = "  "
1355     t = leaf.type
1356     p = leaf.parent
1357     v = leaf.value
1358     if t in ALWAYS_NO_SPACE:
1359         return NO
1360
1361     if t == token.COMMENT:
1362         return DOUBLESPACE
1363
1364     assert p is not None, f"INTERNAL ERROR: hand-made leaf without parent: {leaf!r}"
1365     if (
1366         t == token.COLON
1367         and p.type not in {syms.subscript, syms.subscriptlist, syms.sliceop}
1368     ):
1369         return NO
1370
1371     prev = leaf.prev_sibling
1372     if not prev:
1373         prevp = preceding_leaf(p)
1374         if not prevp or prevp.type in OPENING_BRACKETS:
1375             return NO
1376
1377         if t == token.COLON:
1378             if prevp.type == token.COLON:
1379                 return NO
1380
1381             elif prevp.type != token.COMMA and not complex_subscript:
1382                 return NO
1383
1384             return SPACE
1385
1386         if prevp.type == token.EQUAL:
1387             if prevp.parent:
1388                 if prevp.parent.type in {
1389                     syms.arglist, syms.argument, syms.parameters, syms.varargslist
1390                 }:
1391                     return NO
1392
1393                 elif prevp.parent.type == syms.typedargslist:
1394                     # A bit hacky: if the equal sign has whitespace, it means we
1395                     # previously found it's a typed argument.  So, we're using
1396                     # that, too.
1397                     return prevp.prefix
1398
1399         elif prevp.type in STARS:
1400             if is_vararg(prevp, within=VARARGS_PARENTS | UNPACKING_PARENTS):
1401                 return NO
1402
1403         elif prevp.type == token.COLON:
1404             if prevp.parent and prevp.parent.type in {syms.subscript, syms.sliceop}:
1405                 return SPACE if complex_subscript else NO
1406
1407         elif (
1408             prevp.parent
1409             and prevp.parent.type == syms.factor
1410             and prevp.type in MATH_OPERATORS
1411         ):
1412             return NO
1413
1414         elif (
1415             prevp.type == token.RIGHTSHIFT
1416             and prevp.parent
1417             and prevp.parent.type == syms.shift_expr
1418             and prevp.prev_sibling
1419             and prevp.prev_sibling.type == token.NAME
1420             and prevp.prev_sibling.value == "print"  # type: ignore
1421         ):
1422             # Python 2 print chevron
1423             return NO
1424
1425     elif prev.type in OPENING_BRACKETS:
1426         return NO
1427
1428     if p.type in {syms.parameters, syms.arglist}:
1429         # untyped function signatures or calls
1430         if not prev or prev.type != token.COMMA:
1431             return NO
1432
1433     elif p.type == syms.varargslist:
1434         # lambdas
1435         if prev and prev.type != token.COMMA:
1436             return NO
1437
1438     elif p.type == syms.typedargslist:
1439         # typed function signatures
1440         if not prev:
1441             return NO
1442
1443         if t == token.EQUAL:
1444             if prev.type != syms.tname:
1445                 return NO
1446
1447         elif prev.type == token.EQUAL:
1448             # A bit hacky: if the equal sign has whitespace, it means we
1449             # previously found it's a typed argument.  So, we're using that, too.
1450             return prev.prefix
1451
1452         elif prev.type != token.COMMA:
1453             return NO
1454
1455     elif p.type == syms.tname:
1456         # type names
1457         if not prev:
1458             prevp = preceding_leaf(p)
1459             if not prevp or prevp.type != token.COMMA:
1460                 return NO
1461
1462     elif p.type == syms.trailer:
1463         # attributes and calls
1464         if t == token.LPAR or t == token.RPAR:
1465             return NO
1466
1467         if not prev:
1468             if t == token.DOT:
1469                 prevp = preceding_leaf(p)
1470                 if not prevp or prevp.type != token.NUMBER:
1471                     return NO
1472
1473             elif t == token.LSQB:
1474                 return NO
1475
1476         elif prev.type != token.COMMA:
1477             return NO
1478
1479     elif p.type == syms.argument:
1480         # single argument
1481         if t == token.EQUAL:
1482             return NO
1483
1484         if not prev:
1485             prevp = preceding_leaf(p)
1486             if not prevp or prevp.type == token.LPAR:
1487                 return NO
1488
1489         elif prev.type in {token.EQUAL} | STARS:
1490             return NO
1491
1492     elif p.type == syms.decorator:
1493         # decorators
1494         return NO
1495
1496     elif p.type == syms.dotted_name:
1497         if prev:
1498             return NO
1499
1500         prevp = preceding_leaf(p)
1501         if not prevp or prevp.type == token.AT or prevp.type == token.DOT:
1502             return NO
1503
1504     elif p.type == syms.classdef:
1505         if t == token.LPAR:
1506             return NO
1507
1508         if prev and prev.type == token.LPAR:
1509             return NO
1510
1511     elif p.type in {syms.subscript, syms.sliceop}:
1512         # indexing
1513         if not prev:
1514             assert p.parent is not None, "subscripts are always parented"
1515             if p.parent.type == syms.subscriptlist:
1516                 return SPACE
1517
1518             return NO
1519
1520         elif not complex_subscript:
1521             return NO
1522
1523     elif p.type == syms.atom:
1524         if prev and t == token.DOT:
1525             # dots, but not the first one.
1526             return NO
1527
1528     elif p.type == syms.dictsetmaker:
1529         # dict unpacking
1530         if prev and prev.type == token.DOUBLESTAR:
1531             return NO
1532
1533     elif p.type in {syms.factor, syms.star_expr}:
1534         # unary ops
1535         if not prev:
1536             prevp = preceding_leaf(p)
1537             if not prevp or prevp.type in OPENING_BRACKETS:
1538                 return NO
1539
1540             prevp_parent = prevp.parent
1541             assert prevp_parent is not None
1542             if (
1543                 prevp.type == token.COLON
1544                 and prevp_parent.type in {syms.subscript, syms.sliceop}
1545             ):
1546                 return NO
1547
1548             elif prevp.type == token.EQUAL and prevp_parent.type == syms.argument:
1549                 return NO
1550
1551         elif t == token.NAME or t == token.NUMBER:
1552             return NO
1553
1554     elif p.type == syms.import_from:
1555         if t == token.DOT:
1556             if prev and prev.type == token.DOT:
1557                 return NO
1558
1559         elif t == token.NAME:
1560             if v == "import":
1561                 return SPACE
1562
1563             if prev and prev.type == token.DOT:
1564                 return NO
1565
1566     elif p.type == syms.sliceop:
1567         return NO
1568
1569     return SPACE
1570
1571
1572 def preceding_leaf(node: Optional[LN]) -> Optional[Leaf]:
1573     """Return the first leaf that precedes `node`, if any."""
1574     while node:
1575         res = node.prev_sibling
1576         if res:
1577             if isinstance(res, Leaf):
1578                 return res
1579
1580             try:
1581                 return list(res.leaves())[-1]
1582
1583             except IndexError:
1584                 return None
1585
1586         node = node.parent
1587     return None
1588
1589
1590 def child_towards(ancestor: Node, descendant: LN) -> Optional[LN]:
1591     """Return the child of `ancestor` that contains `descendant`."""
1592     node: Optional[LN] = descendant
1593     while node and node.parent != ancestor:
1594         node = node.parent
1595     return node
1596
1597
1598 def is_split_after_delimiter(leaf: Leaf, previous: Leaf = None) -> int:
1599     """Return the priority of the `leaf` delimiter, given a line break after it.
1600
1601     The delimiter priorities returned here are from those delimiters that would
1602     cause a line break after themselves.
1603
1604     Higher numbers are higher priority.
1605     """
1606     if leaf.type == token.COMMA:
1607         return COMMA_PRIORITY
1608
1609     return 0
1610
1611
1612 def is_split_before_delimiter(leaf: Leaf, previous: Leaf = None) -> int:
1613     """Return the priority of the `leaf` delimiter, given a line before after it.
1614
1615     The delimiter priorities returned here are from those delimiters that would
1616     cause a line break before themselves.
1617
1618     Higher numbers are higher priority.
1619     """
1620     if is_vararg(leaf, within=VARARGS_PARENTS | UNPACKING_PARENTS):
1621         # * and ** might also be MATH_OPERATORS but in this case they are not.
1622         # Don't treat them as a delimiter.
1623         return 0
1624
1625     if (
1626         leaf.type in MATH_OPERATORS
1627         and leaf.parent
1628         and leaf.parent.type not in {syms.factor, syms.star_expr}
1629     ):
1630         return MATH_PRIORITY
1631
1632     if leaf.type in COMPARATORS:
1633         return COMPARATOR_PRIORITY
1634
1635     if (
1636         leaf.type == token.STRING
1637         and previous is not None
1638         and previous.type == token.STRING
1639     ):
1640         return STRING_PRIORITY
1641
1642     if (
1643         leaf.type == token.NAME
1644         and leaf.value == "for"
1645         and leaf.parent
1646         and leaf.parent.type in {syms.comp_for, syms.old_comp_for}
1647     ):
1648         return COMPREHENSION_PRIORITY
1649
1650     if (
1651         leaf.type == token.NAME
1652         and leaf.value == "if"
1653         and leaf.parent
1654         and leaf.parent.type in {syms.comp_if, syms.old_comp_if}
1655     ):
1656         return COMPREHENSION_PRIORITY
1657
1658     if (
1659         leaf.type == token.NAME
1660         and leaf.value in {"if", "else"}
1661         and leaf.parent
1662         and leaf.parent.type == syms.test
1663     ):
1664         return TERNARY_PRIORITY
1665
1666     if leaf.type == token.NAME and leaf.value in LOGIC_OPERATORS and leaf.parent:
1667         return LOGIC_PRIORITY
1668
1669     return 0
1670
1671
1672 def generate_comments(leaf: Leaf) -> Iterator[Leaf]:
1673     """Clean the prefix of the `leaf` and generate comments from it, if any.
1674
1675     Comments in lib2to3 are shoved into the whitespace prefix.  This happens
1676     in `pgen2/driver.py:Driver.parse_tokens()`.  This was a brilliant implementation
1677     move because it does away with modifying the grammar to include all the
1678     possible places in which comments can be placed.
1679
1680     The sad consequence for us though is that comments don't "belong" anywhere.
1681     This is why this function generates simple parentless Leaf objects for
1682     comments.  We simply don't know what the correct parent should be.
1683
1684     No matter though, we can live without this.  We really only need to
1685     differentiate between inline and standalone comments.  The latter don't
1686     share the line with any code.
1687
1688     Inline comments are emitted as regular token.COMMENT leaves.  Standalone
1689     are emitted with a fake STANDALONE_COMMENT token identifier.
1690     """
1691     p = leaf.prefix
1692     if not p:
1693         return
1694
1695     if "#" not in p:
1696         return
1697
1698     consumed = 0
1699     nlines = 0
1700     for index, line in enumerate(p.split("\n")):
1701         consumed += len(line) + 1  # adding the length of the split '\n'
1702         line = line.lstrip()
1703         if not line:
1704             nlines += 1
1705         if not line.startswith("#"):
1706             continue
1707
1708         if index == 0 and leaf.type != token.ENDMARKER:
1709             comment_type = token.COMMENT  # simple trailing comment
1710         else:
1711             comment_type = STANDALONE_COMMENT
1712         comment = make_comment(line)
1713         yield Leaf(comment_type, comment, prefix="\n" * nlines)
1714
1715         if comment in {"# fmt: on", "# yapf: enable"}:
1716             raise FormatOn(consumed)
1717
1718         if comment in {"# fmt: off", "# yapf: disable"}:
1719             if comment_type == STANDALONE_COMMENT:
1720                 raise FormatOff(consumed)
1721
1722             prev = preceding_leaf(leaf)
1723             if not prev or prev.type in WHITESPACE:  # standalone comment in disguise
1724                 raise FormatOff(consumed)
1725
1726         nlines = 0
1727
1728
1729 def make_comment(content: str) -> str:
1730     """Return a consistently formatted comment from the given `content` string.
1731
1732     All comments (except for "##", "#!", "#:") should have a single space between
1733     the hash sign and the content.
1734
1735     If `content` didn't start with a hash sign, one is provided.
1736     """
1737     content = content.rstrip()
1738     if not content:
1739         return "#"
1740
1741     if content[0] == "#":
1742         content = content[1:]
1743     if content and content[0] not in " !:#":
1744         content = " " + content
1745     return "#" + content
1746
1747
1748 def split_line(
1749     line: Line, line_length: int, inner: bool = False, py36: bool = False
1750 ) -> Iterator[Line]:
1751     """Split a `line` into potentially many lines.
1752
1753     They should fit in the allotted `line_length` but might not be able to.
1754     `inner` signifies that there were a pair of brackets somewhere around the
1755     current `line`, possibly transitively. This means we can fallback to splitting
1756     by delimiters if the LHS/RHS don't yield any results.
1757
1758     If `py36` is True, splitting may generate syntax that is only compatible
1759     with Python 3.6 and later.
1760     """
1761     if isinstance(line, UnformattedLines) or line.is_comment:
1762         yield line
1763         return
1764
1765     line_str = str(line).strip("\n")
1766     if (
1767         len(line_str) <= line_length
1768         and "\n" not in line_str  # multiline strings
1769         and not line.contains_standalone_comments()
1770     ):
1771         yield line
1772         return
1773
1774     split_funcs: List[SplitFunc]
1775     if line.is_def:
1776         split_funcs = [left_hand_split]
1777     elif line.is_import:
1778         split_funcs = [explode_split]
1779     elif line.inside_brackets:
1780         split_funcs = [delimiter_split, standalone_comment_split, right_hand_split]
1781     else:
1782         split_funcs = [right_hand_split]
1783     for split_func in split_funcs:
1784         # We are accumulating lines in `result` because we might want to abort
1785         # mission and return the original line in the end, or attempt a different
1786         # split altogether.
1787         result: List[Line] = []
1788         try:
1789             for l in split_func(line, py36):
1790                 if str(l).strip("\n") == line_str:
1791                     raise CannotSplit("Split function returned an unchanged result")
1792
1793                 result.extend(
1794                     split_line(l, line_length=line_length, inner=True, py36=py36)
1795                 )
1796         except CannotSplit as cs:
1797             continue
1798
1799         else:
1800             yield from result
1801             break
1802
1803     else:
1804         yield line
1805
1806
1807 def left_hand_split(line: Line, py36: bool = False) -> Iterator[Line]:
1808     """Split line into many lines, starting with the first matching bracket pair.
1809
1810     Note: this usually looks weird, only use this for function definitions.
1811     Prefer RHS otherwise.
1812     """
1813     head = Line(depth=line.depth)
1814     body = Line(depth=line.depth + 1, inside_brackets=True)
1815     tail = Line(depth=line.depth)
1816     tail_leaves: List[Leaf] = []
1817     body_leaves: List[Leaf] = []
1818     head_leaves: List[Leaf] = []
1819     current_leaves = head_leaves
1820     matching_bracket = None
1821     for leaf in line.leaves:
1822         if (
1823             current_leaves is body_leaves
1824             and leaf.type in CLOSING_BRACKETS
1825             and leaf.opening_bracket is matching_bracket
1826         ):
1827             current_leaves = tail_leaves if body_leaves else head_leaves
1828         current_leaves.append(leaf)
1829         if current_leaves is head_leaves:
1830             if leaf.type in OPENING_BRACKETS:
1831                 matching_bracket = leaf
1832                 current_leaves = body_leaves
1833     # Since body is a new indent level, remove spurious leading whitespace.
1834     if body_leaves:
1835         normalize_prefix(body_leaves[0], inside_brackets=True)
1836     # Build the new lines.
1837     for result, leaves in (head, head_leaves), (body, body_leaves), (tail, tail_leaves):
1838         for leaf in leaves:
1839             result.append(leaf, preformatted=True)
1840             for comment_after in line.comments_after(leaf):
1841                 result.append(comment_after, preformatted=True)
1842     bracket_split_succeeded_or_raise(head, body, tail)
1843     for result in (head, body, tail):
1844         if result:
1845             yield result
1846
1847
1848 def right_hand_split(
1849     line: Line, py36: bool = False, omit: Collection[LeafID] = ()
1850 ) -> Iterator[Line]:
1851     """Split line into many lines, starting with the last matching bracket pair."""
1852     head = Line(depth=line.depth)
1853     body = Line(depth=line.depth + 1, inside_brackets=True)
1854     tail = Line(depth=line.depth)
1855     tail_leaves: List[Leaf] = []
1856     body_leaves: List[Leaf] = []
1857     head_leaves: List[Leaf] = []
1858     current_leaves = tail_leaves
1859     opening_bracket = None
1860     closing_bracket = None
1861     for leaf in reversed(line.leaves):
1862         if current_leaves is body_leaves:
1863             if leaf is opening_bracket:
1864                 current_leaves = head_leaves if body_leaves else tail_leaves
1865         current_leaves.append(leaf)
1866         if current_leaves is tail_leaves:
1867             if leaf.type in CLOSING_BRACKETS and id(leaf) not in omit:
1868                 opening_bracket = leaf.opening_bracket
1869                 closing_bracket = leaf
1870                 current_leaves = body_leaves
1871     tail_leaves.reverse()
1872     body_leaves.reverse()
1873     head_leaves.reverse()
1874     # Since body is a new indent level, remove spurious leading whitespace.
1875     if body_leaves:
1876         normalize_prefix(body_leaves[0], inside_brackets=True)
1877     elif not head_leaves:
1878         # No `head` and no `body` means the split failed. `tail` has all content.
1879         raise CannotSplit("No brackets found")
1880
1881     # Build the new lines.
1882     for result, leaves in (head, head_leaves), (body, body_leaves), (tail, tail_leaves):
1883         for leaf in leaves:
1884             result.append(leaf, preformatted=True)
1885             for comment_after in line.comments_after(leaf):
1886                 result.append(comment_after, preformatted=True)
1887     bracket_split_succeeded_or_raise(head, body, tail)
1888     assert opening_bracket and closing_bracket
1889     if (
1890         opening_bracket.type == token.LPAR
1891         and not opening_bracket.value
1892         and closing_bracket.type == token.RPAR
1893         and not closing_bracket.value
1894     ):
1895         # These parens were optional. If there aren't any delimiters or standalone
1896         # comments in the body, they were unnecessary and another split without
1897         # them should be attempted.
1898         if not (
1899             body.bracket_tracker.delimiters or line.contains_standalone_comments(0)
1900         ):
1901             omit = {id(closing_bracket), *omit}
1902             yield from right_hand_split(line, py36=py36, omit=omit)
1903             return
1904
1905     ensure_visible(opening_bracket)
1906     ensure_visible(closing_bracket)
1907     for result in (head, body, tail):
1908         if result:
1909             yield result
1910
1911
1912 def bracket_split_succeeded_or_raise(head: Line, body: Line, tail: Line) -> None:
1913     """Raise :exc:`CannotSplit` if the last left- or right-hand split failed.
1914
1915     Do nothing otherwise.
1916
1917     A left- or right-hand split is based on a pair of brackets. Content before
1918     (and including) the opening bracket is left on one line, content inside the
1919     brackets is put on a separate line, and finally content starting with and
1920     following the closing bracket is put on a separate line.
1921
1922     Those are called `head`, `body`, and `tail`, respectively. If the split
1923     produced the same line (all content in `head`) or ended up with an empty `body`
1924     and the `tail` is just the closing bracket, then it's considered failed.
1925     """
1926     tail_len = len(str(tail).strip())
1927     if not body:
1928         if tail_len == 0:
1929             raise CannotSplit("Splitting brackets produced the same line")
1930
1931         elif tail_len < 3:
1932             raise CannotSplit(
1933                 f"Splitting brackets on an empty body to save "
1934                 f"{tail_len} characters is not worth it"
1935             )
1936
1937
1938 def dont_increase_indentation(split_func: SplitFunc) -> SplitFunc:
1939     """Normalize prefix of the first leaf in every line returned by `split_func`.
1940
1941     This is a decorator over relevant split functions.
1942     """
1943
1944     @wraps(split_func)
1945     def split_wrapper(line: Line, py36: bool = False) -> Iterator[Line]:
1946         for l in split_func(line, py36):
1947             normalize_prefix(l.leaves[0], inside_brackets=True)
1948             yield l
1949
1950     return split_wrapper
1951
1952
1953 @dont_increase_indentation
1954 def delimiter_split(line: Line, py36: bool = False) -> Iterator[Line]:
1955     """Split according to delimiters of the highest priority.
1956
1957     If `py36` is True, the split will add trailing commas also in function
1958     signatures that contain `*` and `**`.
1959     """
1960     try:
1961         last_leaf = line.leaves[-1]
1962     except IndexError:
1963         raise CannotSplit("Line empty")
1964
1965     delimiters = line.bracket_tracker.delimiters
1966     try:
1967         delimiter_priority = line.bracket_tracker.max_delimiter_priority(
1968             exclude={id(last_leaf)}
1969         )
1970     except ValueError:
1971         raise CannotSplit("No delimiters found")
1972
1973     current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
1974     lowest_depth = sys.maxsize
1975     trailing_comma_safe = True
1976
1977     def append_to_line(leaf: Leaf) -> Iterator[Line]:
1978         """Append `leaf` to current line or to new line if appending impossible."""
1979         nonlocal current_line
1980         try:
1981             current_line.append_safe(leaf, preformatted=True)
1982         except ValueError as ve:
1983             yield current_line
1984
1985             current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
1986             current_line.append(leaf)
1987
1988     for leaf in line.leaves:
1989         yield from append_to_line(leaf)
1990
1991         for comment_after in line.comments_after(leaf):
1992             yield from append_to_line(comment_after)
1993
1994         lowest_depth = min(lowest_depth, leaf.bracket_depth)
1995         if (
1996             leaf.bracket_depth == lowest_depth
1997             and is_vararg(leaf, within=VARARGS_PARENTS)
1998         ):
1999             trailing_comma_safe = trailing_comma_safe and py36
2000         leaf_priority = delimiters.get(id(leaf))
2001         if leaf_priority == delimiter_priority:
2002             yield current_line
2003
2004             current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2005     if current_line:
2006         if (
2007             trailing_comma_safe
2008             and delimiter_priority == COMMA_PRIORITY
2009             and current_line.leaves[-1].type != token.COMMA
2010             and current_line.leaves[-1].type != STANDALONE_COMMENT
2011         ):
2012             current_line.append(Leaf(token.COMMA, ","))
2013         yield current_line
2014
2015
2016 @dont_increase_indentation
2017 def standalone_comment_split(line: Line, py36: bool = False) -> Iterator[Line]:
2018     """Split standalone comments from the rest of the line."""
2019     if not line.contains_standalone_comments(0):
2020         raise CannotSplit("Line does not have any standalone comments")
2021
2022     current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2023
2024     def append_to_line(leaf: Leaf) -> Iterator[Line]:
2025         """Append `leaf` to current line or to new line if appending impossible."""
2026         nonlocal current_line
2027         try:
2028             current_line.append_safe(leaf, preformatted=True)
2029         except ValueError as ve:
2030             yield current_line
2031
2032             current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2033             current_line.append(leaf)
2034
2035     for leaf in line.leaves:
2036         yield from append_to_line(leaf)
2037
2038         for comment_after in line.comments_after(leaf):
2039             yield from append_to_line(comment_after)
2040
2041     if current_line:
2042         yield current_line
2043
2044
2045 def explode_split(
2046     line: Line, py36: bool = False, omit: Collection[LeafID] = ()
2047 ) -> Iterator[Line]:
2048     """Split by rightmost bracket and immediately split contents by a delimiter."""
2049     new_lines = list(right_hand_split(line, py36, omit))
2050     if len(new_lines) != 3:
2051         yield from new_lines
2052         return
2053
2054     yield new_lines[0]
2055
2056     try:
2057         yield from delimiter_split(new_lines[1], py36)
2058
2059     except CannotSplit:
2060         yield new_lines[1]
2061
2062     yield new_lines[2]
2063
2064
2065 def is_import(leaf: Leaf) -> bool:
2066     """Return True if the given leaf starts an import statement."""
2067     p = leaf.parent
2068     t = leaf.type
2069     v = leaf.value
2070     return bool(
2071         t == token.NAME
2072         and (
2073             (v == "import" and p and p.type == syms.import_name)
2074             or (v == "from" and p and p.type == syms.import_from)
2075         )
2076     )
2077
2078
2079 def normalize_prefix(leaf: Leaf, *, inside_brackets: bool) -> None:
2080     """Leave existing extra newlines if not `inside_brackets`. Remove everything
2081     else.
2082
2083     Note: don't use backslashes for formatting or you'll lose your voting rights.
2084     """
2085     if not inside_brackets:
2086         spl = leaf.prefix.split("#")
2087         if "\\" not in spl[0]:
2088             nl_count = spl[-1].count("\n")
2089             if len(spl) > 1:
2090                 nl_count -= 1
2091             leaf.prefix = "\n" * nl_count
2092             return
2093
2094     leaf.prefix = ""
2095
2096
2097 def normalize_string_quotes(leaf: Leaf) -> None:
2098     """Prefer double quotes but only if it doesn't cause more escaping.
2099
2100     Adds or removes backslashes as appropriate. Doesn't parse and fix
2101     strings nested in f-strings (yet).
2102
2103     Note: Mutates its argument.
2104     """
2105     value = leaf.value.lstrip("furbFURB")
2106     if value[:3] == '"""':
2107         return
2108
2109     elif value[:3] == "'''":
2110         orig_quote = "'''"
2111         new_quote = '"""'
2112     elif value[0] == '"':
2113         orig_quote = '"'
2114         new_quote = "'"
2115     else:
2116         orig_quote = "'"
2117         new_quote = '"'
2118     first_quote_pos = leaf.value.find(orig_quote)
2119     if first_quote_pos == -1:
2120         return  # There's an internal error
2121
2122     prefix = leaf.value[:first_quote_pos]
2123     unescaped_new_quote = re.compile(rf"(([^\\]|^)(\\\\)*){new_quote}")
2124     escaped_new_quote = re.compile(rf"([^\\]|^)\\(\\\\)*{new_quote}")
2125     escaped_orig_quote = re.compile(rf"([^\\]|^)\\(\\\\)*{orig_quote}")
2126     body = leaf.value[first_quote_pos + len(orig_quote) : -len(orig_quote)]
2127     if "r" in prefix.casefold():
2128         if unescaped_new_quote.search(body):
2129             # There's at least one unescaped new_quote in this raw string
2130             # so converting is impossible
2131             return
2132
2133         # Do not introduce or remove backslashes in raw strings
2134         new_body = body
2135     else:
2136         # remove unnecessary quotes
2137         new_body = sub_twice(escaped_new_quote, rf"\1\2{new_quote}", body)
2138         if body != new_body:
2139             # Consider the string without unnecessary quotes as the original
2140             body = new_body
2141             leaf.value = f"{prefix}{orig_quote}{body}{orig_quote}"
2142         new_body = sub_twice(escaped_orig_quote, rf"\1\2{orig_quote}", new_body)
2143         new_body = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_body)
2144     if new_quote == '"""' and new_body[-1] == '"':
2145         # edge case:
2146         new_body = new_body[:-1] + '\\"'
2147     orig_escape_count = body.count("\\")
2148     new_escape_count = new_body.count("\\")
2149     if new_escape_count > orig_escape_count:
2150         return  # Do not introduce more escaping
2151
2152     if new_escape_count == orig_escape_count and orig_quote == '"':
2153         return  # Prefer double quotes
2154
2155     leaf.value = f"{prefix}{new_quote}{new_body}{new_quote}"
2156
2157
2158 def normalize_invisible_parens(node: Node, parens_after: Set[str]) -> None:
2159     """Make existing optional parentheses invisible or create new ones.
2160
2161     Standardizes on visible parentheses for single-element tuples, and keeps
2162     existing visible parentheses for other tuples and generator expressions.
2163     """
2164     check_lpar = False
2165     for child in list(node.children):
2166         if check_lpar:
2167             if child.type == syms.atom:
2168                 if not (
2169                     is_empty_tuple(child)
2170                     or is_one_tuple(child)
2171                     or max_delimiter_priority_in_atom(child) >= COMMA_PRIORITY
2172                 ):
2173                     first = child.children[0]
2174                     last = child.children[-1]
2175                     if first.type == token.LPAR and last.type == token.RPAR:
2176                         # make parentheses invisible
2177                         first.value = ""  # type: ignore
2178                         last.value = ""  # type: ignore
2179             elif is_one_tuple(child):
2180                 # wrap child in visible parentheses
2181                 lpar = Leaf(token.LPAR, "(")
2182                 rpar = Leaf(token.RPAR, ")")
2183                 index = child.remove() or 0
2184                 node.insert_child(index, Node(syms.atom, [lpar, child, rpar]))
2185             else:
2186                 # wrap child in invisible parentheses
2187                 lpar = Leaf(token.LPAR, "")
2188                 rpar = Leaf(token.RPAR, "")
2189                 index = child.remove() or 0
2190                 node.insert_child(index, Node(syms.atom, [lpar, child, rpar]))
2191
2192         check_lpar = isinstance(child, Leaf) and child.value in parens_after
2193
2194
2195 def is_empty_tuple(node: LN) -> bool:
2196     """Return True if `node` holds an empty tuple."""
2197     return (
2198         node.type == syms.atom
2199         and len(node.children) == 2
2200         and node.children[0].type == token.LPAR
2201         and node.children[1].type == token.RPAR
2202     )
2203
2204
2205 def is_one_tuple(node: LN) -> bool:
2206     """Return True if `node` holds a tuple with one element, with or without parens."""
2207     if node.type == syms.atom:
2208         if len(node.children) != 3:
2209             return False
2210
2211         lpar, gexp, rpar = node.children
2212         if not (
2213             lpar.type == token.LPAR
2214             and gexp.type == syms.testlist_gexp
2215             and rpar.type == token.RPAR
2216         ):
2217             return False
2218
2219         return len(gexp.children) == 2 and gexp.children[1].type == token.COMMA
2220
2221     return (
2222         node.type in IMPLICIT_TUPLE
2223         and len(node.children) == 2
2224         and node.children[1].type == token.COMMA
2225     )
2226
2227
2228 def is_vararg(leaf: Leaf, within: Set[NodeType]) -> bool:
2229     """Return True if `leaf` is a star or double star in a vararg or kwarg.
2230
2231     If `within` includes VARARGS_PARENTS, this applies to function signatures.
2232     If `within` includes COLLECTION_LIBERALS_PARENTS, it applies to right
2233     hand-side extended iterable unpacking (PEP 3132) and additional unpacking
2234     generalizations (PEP 448).
2235     """
2236     if leaf.type not in STARS or not leaf.parent:
2237         return False
2238
2239     p = leaf.parent
2240     if p.type == syms.star_expr:
2241         # Star expressions are also used as assignment targets in extended
2242         # iterable unpacking (PEP 3132).  See what its parent is instead.
2243         if not p.parent:
2244             return False
2245
2246         p = p.parent
2247
2248     return p.type in within
2249
2250
2251 def max_delimiter_priority_in_atom(node: LN) -> int:
2252     """Return maximum delimiter priority inside `node`.
2253
2254     This is specific to atoms with contents contained in a pair of parentheses.
2255     If `node` isn't an atom or there are no enclosing parentheses, returns 0.
2256     """
2257     if node.type != syms.atom:
2258         return 0
2259
2260     first = node.children[0]
2261     last = node.children[-1]
2262     if not (first.type == token.LPAR and last.type == token.RPAR):
2263         return 0
2264
2265     bt = BracketTracker()
2266     for c in node.children[1:-1]:
2267         if isinstance(c, Leaf):
2268             bt.mark(c)
2269         else:
2270             for leaf in c.leaves():
2271                 bt.mark(leaf)
2272     try:
2273         return bt.max_delimiter_priority()
2274
2275     except ValueError:
2276         return 0
2277
2278
2279 def ensure_visible(leaf: Leaf) -> None:
2280     """Make sure parentheses are visible.
2281
2282     They could be invisible as part of some statements (see
2283     :func:`normalize_invible_parens` and :func:`visit_import_from`).
2284     """
2285     if leaf.type == token.LPAR:
2286         leaf.value = "("
2287     elif leaf.type == token.RPAR:
2288         leaf.value = ")"
2289
2290
2291 def is_python36(node: Node) -> bool:
2292     """Return True if the current file is using Python 3.6+ features.
2293
2294     Currently looking for:
2295     - f-strings; and
2296     - trailing commas after * or ** in function signatures.
2297     """
2298     for n in node.pre_order():
2299         if n.type == token.STRING:
2300             value_head = n.value[:2]  # type: ignore
2301             if value_head in {'f"', 'F"', "f'", "F'", "rf", "fr", "RF", "FR"}:
2302                 return True
2303
2304         elif (
2305             n.type == syms.typedargslist
2306             and n.children
2307             and n.children[-1].type == token.COMMA
2308         ):
2309             for ch in n.children:
2310                 if ch.type in STARS:
2311                     return True
2312
2313     return False
2314
2315
2316 PYTHON_EXTENSIONS = {".py"}
2317 BLACKLISTED_DIRECTORIES = {
2318     "build", "buck-out", "dist", "_build", ".git", ".hg", ".mypy_cache", ".tox", ".venv"
2319 }
2320
2321
2322 def gen_python_files_in_dir(path: Path) -> Iterator[Path]:
2323     """Generate all files under `path` which aren't under BLACKLISTED_DIRECTORIES
2324     and have one of the PYTHON_EXTENSIONS.
2325     """
2326     for child in path.iterdir():
2327         if child.is_dir():
2328             if child.name in BLACKLISTED_DIRECTORIES:
2329                 continue
2330
2331             yield from gen_python_files_in_dir(child)
2332
2333         elif child.suffix in PYTHON_EXTENSIONS:
2334             yield child
2335
2336
2337 @dataclass
2338 class Report:
2339     """Provides a reformatting counter. Can be rendered with `str(report)`."""
2340     check: bool = False
2341     quiet: bool = False
2342     change_count: int = 0
2343     same_count: int = 0
2344     failure_count: int = 0
2345
2346     def done(self, src: Path, changed: Changed) -> None:
2347         """Increment the counter for successful reformatting. Write out a message."""
2348         if changed is Changed.YES:
2349             reformatted = "would reformat" if self.check else "reformatted"
2350             if not self.quiet:
2351                 out(f"{reformatted} {src}")
2352             self.change_count += 1
2353         else:
2354             if not self.quiet:
2355                 if changed is Changed.NO:
2356                     msg = f"{src} already well formatted, good job."
2357                 else:
2358                     msg = f"{src} wasn't modified on disk since last run."
2359                 out(msg, bold=False)
2360             self.same_count += 1
2361
2362     def failed(self, src: Path, message: str) -> None:
2363         """Increment the counter for failed reformatting. Write out a message."""
2364         err(f"error: cannot format {src}: {message}")
2365         self.failure_count += 1
2366
2367     @property
2368     def return_code(self) -> int:
2369         """Return the exit code that the app should use.
2370
2371         This considers the current state of changed files and failures:
2372         - if there were any failures, return 123;
2373         - if any files were changed and --check is being used, return 1;
2374         - otherwise return 0.
2375         """
2376         # According to http://tldp.org/LDP/abs/html/exitcodes.html starting with
2377         # 126 we have special returncodes reserved by the shell.
2378         if self.failure_count:
2379             return 123
2380
2381         elif self.change_count and self.check:
2382             return 1
2383
2384         return 0
2385
2386     def __str__(self) -> str:
2387         """Render a color report of the current state.
2388
2389         Use `click.unstyle` to remove colors.
2390         """
2391         if self.check:
2392             reformatted = "would be reformatted"
2393             unchanged = "would be left unchanged"
2394             failed = "would fail to reformat"
2395         else:
2396             reformatted = "reformatted"
2397             unchanged = "left unchanged"
2398             failed = "failed to reformat"
2399         report = []
2400         if self.change_count:
2401             s = "s" if self.change_count > 1 else ""
2402             report.append(
2403                 click.style(f"{self.change_count} file{s} {reformatted}", bold=True)
2404             )
2405         if self.same_count:
2406             s = "s" if self.same_count > 1 else ""
2407             report.append(f"{self.same_count} file{s} {unchanged}")
2408         if self.failure_count:
2409             s = "s" if self.failure_count > 1 else ""
2410             report.append(
2411                 click.style(f"{self.failure_count} file{s} {failed}", fg="red")
2412             )
2413         return ", ".join(report) + "."
2414
2415
2416 def assert_equivalent(src: str, dst: str) -> None:
2417     """Raise AssertionError if `src` and `dst` aren't equivalent."""
2418
2419     import ast
2420     import traceback
2421
2422     def _v(node: ast.AST, depth: int = 0) -> Iterator[str]:
2423         """Simple visitor generating strings to compare ASTs by content."""
2424         yield f"{'  ' * depth}{node.__class__.__name__}("
2425
2426         for field in sorted(node._fields):
2427             try:
2428                 value = getattr(node, field)
2429             except AttributeError:
2430                 continue
2431
2432             yield f"{'  ' * (depth+1)}{field}="
2433
2434             if isinstance(value, list):
2435                 for item in value:
2436                     if isinstance(item, ast.AST):
2437                         yield from _v(item, depth + 2)
2438
2439             elif isinstance(value, ast.AST):
2440                 yield from _v(value, depth + 2)
2441
2442             else:
2443                 yield f"{'  ' * (depth+2)}{value!r},  # {value.__class__.__name__}"
2444
2445         yield f"{'  ' * depth})  # /{node.__class__.__name__}"
2446
2447     try:
2448         src_ast = ast.parse(src)
2449     except Exception as exc:
2450         major, minor = sys.version_info[:2]
2451         raise AssertionError(
2452             f"cannot use --safe with this file; failed to parse source file "
2453             f"with Python {major}.{minor}'s builtin AST. Re-run with --fast "
2454             f"or stop using deprecated Python 2 syntax. AST error message: {exc}"
2455         )
2456
2457     try:
2458         dst_ast = ast.parse(dst)
2459     except Exception as exc:
2460         log = dump_to_file("".join(traceback.format_tb(exc.__traceback__)), dst)
2461         raise AssertionError(
2462             f"INTERNAL ERROR: Black produced invalid code: {exc}. "
2463             f"Please report a bug on https://github.com/ambv/black/issues.  "
2464             f"This invalid output might be helpful: {log}"
2465         ) from None
2466
2467     src_ast_str = "\n".join(_v(src_ast))
2468     dst_ast_str = "\n".join(_v(dst_ast))
2469     if src_ast_str != dst_ast_str:
2470         log = dump_to_file(diff(src_ast_str, dst_ast_str, "src", "dst"))
2471         raise AssertionError(
2472             f"INTERNAL ERROR: Black produced code that is not equivalent to "
2473             f"the source.  "
2474             f"Please report a bug on https://github.com/ambv/black/issues.  "
2475             f"This diff might be helpful: {log}"
2476         ) from None
2477
2478
2479 def assert_stable(src: str, dst: str, line_length: int) -> None:
2480     """Raise AssertionError if `dst` reformats differently the second time."""
2481     newdst = format_str(dst, line_length=line_length)
2482     if dst != newdst:
2483         log = dump_to_file(
2484             diff(src, dst, "source", "first pass"),
2485             diff(dst, newdst, "first pass", "second pass"),
2486         )
2487         raise AssertionError(
2488             f"INTERNAL ERROR: Black produced different code on the second pass "
2489             f"of the formatter.  "
2490             f"Please report a bug on https://github.com/ambv/black/issues.  "
2491             f"This diff might be helpful: {log}"
2492         ) from None
2493
2494
2495 def dump_to_file(*output: str) -> str:
2496     """Dump `output` to a temporary file. Return path to the file."""
2497     import tempfile
2498
2499     with tempfile.NamedTemporaryFile(
2500         mode="w", prefix="blk_", suffix=".log", delete=False, encoding="utf8"
2501     ) as f:
2502         for lines in output:
2503             f.write(lines)
2504             if lines and lines[-1] != "\n":
2505                 f.write("\n")
2506     return f.name
2507
2508
2509 def diff(a: str, b: str, a_name: str, b_name: str) -> str:
2510     """Return a unified diff string between strings `a` and `b`."""
2511     import difflib
2512
2513     a_lines = [line + "\n" for line in a.split("\n")]
2514     b_lines = [line + "\n" for line in b.split("\n")]
2515     return "".join(
2516         difflib.unified_diff(a_lines, b_lines, fromfile=a_name, tofile=b_name, n=5)
2517     )
2518
2519
2520 def cancel(tasks: List[asyncio.Task]) -> None:
2521     """asyncio signal handler that cancels all `tasks` and reports to stderr."""
2522     err("Aborted!")
2523     for task in tasks:
2524         task.cancel()
2525
2526
2527 def shutdown(loop: BaseEventLoop) -> None:
2528     """Cancel all pending tasks on `loop`, wait for them, and close the loop."""
2529     try:
2530         # This part is borrowed from asyncio/runners.py in Python 3.7b2.
2531         to_cancel = [task for task in asyncio.Task.all_tasks(loop) if not task.done()]
2532         if not to_cancel:
2533             return
2534
2535         for task in to_cancel:
2536             task.cancel()
2537         loop.run_until_complete(
2538             asyncio.gather(*to_cancel, loop=loop, return_exceptions=True)
2539         )
2540     finally:
2541         # `concurrent.futures.Future` objects cannot be cancelled once they
2542         # are already running. There might be some when the `shutdown()` happened.
2543         # Silence their logger's spew about the event loop being closed.
2544         cf_logger = logging.getLogger("concurrent.futures")
2545         cf_logger.setLevel(logging.CRITICAL)
2546         loop.close()
2547
2548
2549 def sub_twice(regex: Pattern[str], replacement: str, original: str) -> str:
2550     """Replace `regex` with `replacement` twice on `original`.
2551
2552     This is used by string normalization to perform replaces on
2553     overlapping matches.
2554     """
2555     return regex.sub(replacement, regex.sub(replacement, original))
2556
2557
2558 CACHE_DIR = Path(user_cache_dir("black", version=__version__))
2559
2560
2561 def get_cache_file(line_length: int) -> Path:
2562     return CACHE_DIR / f"cache.{line_length}.pickle"
2563
2564
2565 def read_cache(line_length: int) -> Cache:
2566     """Read the cache if it exists and is well formed.
2567
2568     If it is not well formed, the call to write_cache later should resolve the issue.
2569     """
2570     cache_file = get_cache_file(line_length)
2571     if not cache_file.exists():
2572         return {}
2573
2574     with cache_file.open("rb") as fobj:
2575         try:
2576             cache: Cache = pickle.load(fobj)
2577         except pickle.UnpicklingError:
2578             return {}
2579
2580     return cache
2581
2582
2583 def get_cache_info(path: Path) -> CacheInfo:
2584     """Return the information used to check if a file is already formatted or not."""
2585     stat = path.stat()
2586     return stat.st_mtime, stat.st_size
2587
2588
2589 def filter_cached(
2590     cache: Cache, sources: Iterable[Path]
2591 ) -> Tuple[List[Path], List[Path]]:
2592     """Split a list of paths into two.
2593
2594     The first list contains paths of files that modified on disk or are not in the
2595     cache. The other list contains paths to non-modified files.
2596     """
2597     todo, done = [], []
2598     for src in sources:
2599         src = src.resolve()
2600         if cache.get(src) != get_cache_info(src):
2601             todo.append(src)
2602         else:
2603             done.append(src)
2604     return todo, done
2605
2606
2607 def write_cache(cache: Cache, sources: List[Path], line_length: int) -> None:
2608     """Update the cache file."""
2609     cache_file = get_cache_file(line_length)
2610     try:
2611         if not CACHE_DIR.exists():
2612             CACHE_DIR.mkdir(parents=True)
2613         new_cache = {**cache, **{src.resolve(): get_cache_info(src) for src in sources}}
2614         with cache_file.open("wb") as fobj:
2615             pickle.dump(new_cache, fobj, protocol=pickle.HIGHEST_PROTOCOL)
2616     except OSError:
2617         pass
2618
2619
2620 if __name__ == "__main__":
2621     main()