All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
4 from asyncio.base_events import BaseEventLoop
5 from concurrent.futures import Executor, ProcessPoolExecutor
7 from functools import partial, wraps
10 from multiprocessing import Manager
12 from pathlib import Path
35 from attr import dataclass, Factory
39 from blib2to3.pytree import Node, Leaf, type_repr
40 from blib2to3 import pygram, pytree
41 from blib2to3.pgen2 import driver, token
42 from blib2to3.pgen2.parse import ParseError
44 __version__ = "18.4a2"
45 DEFAULT_LINE_LENGTH = 88
47 syms = pygram.python_symbols
55 LN = Union[Leaf, Node]
56 SplitFunc = Callable[["Line", bool], Iterator["Line"]]
57 out = partial(click.secho, bold=True, err=True)
58 err = partial(click.secho, fg="red", err=True)
61 class NothingChanged(UserWarning):
62 """Raised by :func:`format_file` when reformatted code is the same as source."""
65 class CannotSplit(Exception):
66 """A readable split that fits the allotted line length is impossible.
68 Raised by :func:`left_hand_split`, :func:`right_hand_split`, and
69 :func:`delimiter_split`.
73 class FormatError(Exception):
74 """Base exception for `# fmt: on` and `# fmt: off` handling.
76 It holds the number of bytes of the prefix consumed before the format
77 control comment appeared.
80 def __init__(self, consumed: int) -> None:
81 super().__init__(consumed)
82 self.consumed = consumed
84 def trim_prefix(self, leaf: Leaf) -> None:
85 leaf.prefix = leaf.prefix[self.consumed:]
87 def leaf_from_consumed(self, leaf: Leaf) -> Leaf:
88 """Returns a new Leaf from the consumed part of the prefix."""
89 unformatted_prefix = leaf.prefix[:self.consumed]
90 return Leaf(token.NEWLINE, unformatted_prefix)
93 class FormatOn(FormatError):
94 """Found a comment like `# fmt: on` in the file."""
97 class FormatOff(FormatError):
98 """Found a comment like `# fmt: off` in the file."""
101 class WriteBack(Enum):
112 default=DEFAULT_LINE_LENGTH,
113 help="How many character per line to allow.",
120 "Don't write the files back, just return the status. Return code 0 "
121 "means nothing would change. Return code 1 means some files would be "
122 "reformatted. Return code 123 means there was an internal error."
128 help="Don't write the files back, just output a diff for each file on stdout.",
133 help="If --fast given, skip temporary sanity checks. [default: --safe]",
140 "Don't emit non-error messages to stderr. Errors are still emitted, "
141 "silence those with 2>/dev/null."
144 @click.version_option(version=__version__)
149 exists=True, file_okay=True, dir_okay=True, readable=True, allow_dash=True
162 """The uncompromising code formatter."""
163 sources: List[Path] = []
167 sources.extend(gen_python_files_in_dir(p))
169 # if a file was explicitly given, we don't care about its extension
172 sources.append(Path("-"))
174 err(f"invalid path: {s}")
176 exc = click.ClickException("Options --check and --diff are mutually exclusive")
181 write_back = WriteBack.NO
183 write_back = WriteBack.DIFF
185 write_back = WriteBack.YES
186 if len(sources) == 0:
188 elif len(sources) == 1:
190 report = Report(check=check, quiet=quiet)
192 if not p.is_file() and str(p) == "-":
193 changed = format_stdin_to_stdout(
194 line_length=line_length, fast=fast, write_back=write_back
197 changed = format_file_in_place(
198 p, line_length=line_length, fast=fast, write_back=write_back
200 report.done(p, changed)
201 except Exception as exc:
202 report.failed(p, str(exc))
203 ctx.exit(report.return_code)
205 loop = asyncio.get_event_loop()
206 executor = ProcessPoolExecutor(max_workers=os.cpu_count())
209 return_code = loop.run_until_complete(
211 sources, line_length, write_back, fast, quiet, loop, executor
216 ctx.exit(return_code)
219 async def schedule_formatting(
222 write_back: WriteBack,
228 """Run formatting of `sources` in parallel using the provided `executor`.
230 (Use ProcessPoolExecutors for actual parallelism.)
232 `line_length`, `write_back`, and `fast` options are passed to
233 :func:`format_file_in_place`.
236 if write_back == WriteBack.DIFF:
237 # For diff output, we need locks to ensure we don't interleave output
238 # from different processes.
240 lock = manager.Lock()
242 src: loop.run_in_executor(
243 executor, format_file_in_place, src, line_length, fast, write_back, lock
247 _task_values = list(tasks.values())
248 loop.add_signal_handler(signal.SIGINT, cancel, _task_values)
249 loop.add_signal_handler(signal.SIGTERM, cancel, _task_values)
250 await asyncio.wait(tasks.values())
252 report = Report(check=write_back is WriteBack.NO, quiet=quiet)
253 for src, task in tasks.items():
255 report.failed(src, "timed out, cancelling")
257 cancelled.append(task)
258 elif task.cancelled():
259 cancelled.append(task)
260 elif task.exception():
261 report.failed(src, str(task.exception()))
263 report.done(src, task.result())
265 await asyncio.gather(*cancelled, loop=loop, return_exceptions=True)
267 out("All done! ✨ 🍰 ✨")
269 click.echo(str(report))
270 return report.return_code
273 def format_file_in_place(
277 write_back: WriteBack = WriteBack.NO,
278 lock: Any = None, # multiprocessing.Manager().Lock() is some crazy proxy
280 """Format file under `src` path. Return True if changed.
282 If `write_back` is True, write reformatted code back to stdout.
283 `line_length` and `fast` options are passed to :func:`format_file_contents`.
285 with tokenize.open(src) as src_buffer:
286 src_contents = src_buffer.read()
288 dst_contents = format_file_contents(
289 src_contents, line_length=line_length, fast=fast
291 except NothingChanged:
294 if write_back == write_back.YES:
295 with open(src, "w", encoding=src_buffer.encoding) as f:
296 f.write(dst_contents)
297 elif write_back == write_back.DIFF:
298 src_name = f"{src.name} (original)"
299 dst_name = f"{src.name} (formatted)"
300 diff_contents = diff(src_contents, dst_contents, src_name, dst_name)
304 sys.stdout.write(diff_contents)
311 def format_stdin_to_stdout(
312 line_length: int, fast: bool, write_back: WriteBack = WriteBack.NO
314 """Format file on stdin. Return True if changed.
316 If `write_back` is True, write reformatted code back to stdout.
317 `line_length` and `fast` arguments are passed to :func:`format_file_contents`.
319 src = sys.stdin.read()
322 dst = format_file_contents(src, line_length=line_length, fast=fast)
325 except NothingChanged:
329 if write_back == WriteBack.YES:
330 sys.stdout.write(dst)
331 elif write_back == WriteBack.DIFF:
332 src_name = "<stdin> (original)"
333 dst_name = "<stdin> (formatted)"
334 sys.stdout.write(diff(src, dst, src_name, dst_name))
337 def format_file_contents(
338 src_contents: str, line_length: int, fast: bool
340 """Reformat contents a file and return new contents.
342 If `fast` is False, additionally confirm that the reformatted code is
343 valid by calling :func:`assert_equivalent` and :func:`assert_stable` on it.
344 `line_length` is passed to :func:`format_str`.
346 if src_contents.strip() == "":
349 dst_contents = format_str(src_contents, line_length=line_length)
350 if src_contents == dst_contents:
354 assert_equivalent(src_contents, dst_contents)
355 assert_stable(src_contents, dst_contents, line_length=line_length)
359 def format_str(src_contents: str, line_length: int) -> FileContent:
360 """Reformat a string and return new contents.
362 `line_length` determines how many characters per line are allowed.
364 src_node = lib2to3_parse(src_contents)
366 lines = LineGenerator()
367 elt = EmptyLineTracker()
368 py36 = is_python36(src_node)
371 for current_line in lines.visit(src_node):
372 for _ in range(after):
373 dst_contents += str(empty_line)
374 before, after = elt.maybe_empty_lines(current_line)
375 for _ in range(before):
376 dst_contents += str(empty_line)
377 for line in split_line(current_line, line_length=line_length, py36=py36):
378 dst_contents += str(line)
383 pygram.python_grammar_no_print_statement_no_exec_statement,
384 pygram.python_grammar_no_print_statement,
385 pygram.python_grammar_no_exec_statement,
386 pygram.python_grammar,
390 def lib2to3_parse(src_txt: str) -> Node:
391 """Given a string with source, return the lib2to3 Node."""
392 grammar = pygram.python_grammar_no_print_statement
393 if src_txt[-1] != "\n":
394 nl = "\r\n" if "\r\n" in src_txt[:1024] else "\n"
396 for grammar in GRAMMARS:
397 drv = driver.Driver(grammar, pytree.convert)
399 result = drv.parse_string(src_txt, True)
402 except ParseError as pe:
403 lineno, column = pe.context[1]
404 lines = src_txt.splitlines()
406 faulty_line = lines[lineno - 1]
408 faulty_line = "<line number missing in source>"
409 exc = ValueError(f"Cannot parse: {lineno}:{column}: {faulty_line}")
413 if isinstance(result, Leaf):
414 result = Node(syms.file_input, [result])
418 def lib2to3_unparse(node: Node) -> str:
419 """Given a lib2to3 node, return its string representation."""
427 class Visitor(Generic[T]):
428 """Basic lib2to3 visitor that yields things of type `T` on `visit()`."""
430 def visit(self, node: LN) -> Iterator[T]:
431 """Main method to visit `node` and its children.
433 It tries to find a `visit_*()` method for the given `node.type`, like
434 `visit_simple_stmt` for Node objects or `visit_INDENT` for Leaf objects.
435 If no dedicated `visit_*()` method is found, chooses `visit_default()`
438 Then yields objects of type `T` from the selected visitor.
441 name = token.tok_name[node.type]
443 name = type_repr(node.type)
444 yield from getattr(self, f"visit_{name}", self.visit_default)(node)
446 def visit_default(self, node: LN) -> Iterator[T]:
447 """Default `visit_*()` implementation. Recurses to children of `node`."""
448 if isinstance(node, Node):
449 for child in node.children:
450 yield from self.visit(child)
454 class DebugVisitor(Visitor[T]):
457 def visit_default(self, node: LN) -> Iterator[T]:
458 indent = " " * (2 * self.tree_depth)
459 if isinstance(node, Node):
460 _type = type_repr(node.type)
461 out(f"{indent}{_type}", fg="yellow")
463 for child in node.children:
464 yield from self.visit(child)
467 out(f"{indent}/{_type}", fg="yellow", bold=False)
469 _type = token.tok_name.get(node.type, str(node.type))
470 out(f"{indent}{_type}", fg="blue", nl=False)
472 # We don't have to handle prefixes for `Node` objects since
473 # that delegates to the first child anyway.
474 out(f" {node.prefix!r}", fg="green", bold=False, nl=False)
475 out(f" {node.value!r}", fg="blue", bold=False)
478 def show(cls, code: str) -> None:
479 """Pretty-print the lib2to3 AST of a given string of `code`.
481 Convenience method for debugging.
483 v: DebugVisitor[None] = DebugVisitor()
484 list(v.visit(lib2to3_parse(code)))
487 KEYWORDS = set(keyword.kwlist)
488 WHITESPACE = {token.DEDENT, token.INDENT, token.NEWLINE}
489 FLOW_CONTROL = {"return", "raise", "break", "continue"}
500 STANDALONE_COMMENT = 153
501 LOGIC_OPERATORS = {"and", "or"}
525 STARS = {token.STAR, token.DOUBLESTAR}
528 syms.argument, # double star in arglist
529 syms.trailer, # single argument to call
531 syms.varargslist, # lambdas
533 UNPACKING_PARENTS = {
534 syms.atom, # single element of a list or set literal
539 COMPREHENSION_PRIORITY = 20
543 COMPARATOR_PRIORITY = 3
548 class BracketTracker:
549 """Keeps track of brackets on a line."""
552 bracket_match: Dict[Tuple[Depth, NodeType], Leaf] = Factory(dict)
553 delimiters: Dict[LeafID, Priority] = Factory(dict)
554 previous: Optional[Leaf] = None
556 def mark(self, leaf: Leaf) -> None:
557 """Mark `leaf` with bracket-related metadata. Keep track of delimiters.
559 All leaves receive an int `bracket_depth` field that stores how deep
560 within brackets a given leaf is. 0 means there are no enclosing brackets
561 that started on this line.
563 If a leaf is itself a closing bracket, it receives an `opening_bracket`
564 field that it forms a pair with. This is a one-directional link to
565 avoid reference cycles.
567 If a leaf is a delimiter (a token on which Black can split the line if
568 needed) and it's on depth 0, its `id()` is stored in the tracker's
571 if leaf.type == token.COMMENT:
574 if leaf.type in CLOSING_BRACKETS:
576 opening_bracket = self.bracket_match.pop((self.depth, leaf.type))
577 leaf.opening_bracket = opening_bracket
578 leaf.bracket_depth = self.depth
580 delim = is_split_before_delimiter(leaf, self.previous)
581 if delim and self.previous is not None:
582 self.delimiters[id(self.previous)] = delim
584 delim = is_split_after_delimiter(leaf, self.previous)
586 self.delimiters[id(leaf)] = delim
587 if leaf.type in OPENING_BRACKETS:
588 self.bracket_match[self.depth, BRACKET[leaf.type]] = leaf
592 def any_open_brackets(self) -> bool:
593 """Return True if there is an yet unmatched open bracket on the line."""
594 return bool(self.bracket_match)
596 def max_delimiter_priority(self, exclude: Iterable[LeafID] = ()) -> int:
597 """Return the highest priority of a delimiter found on the line.
599 Values are consistent with what `is_delimiter()` returns.
600 Raises ValueError on no delimiters.
602 return max(v for k, v in self.delimiters.items() if k not in exclude)
607 """Holds leaves and comments. Can be printed with `str(line)`."""
610 leaves: List[Leaf] = Factory(list)
611 comments: List[Tuple[Index, Leaf]] = Factory(list)
612 bracket_tracker: BracketTracker = Factory(BracketTracker)
613 inside_brackets: bool = False
614 has_for: bool = False
615 _for_loop_variable: bool = False
617 def append(self, leaf: Leaf, preformatted: bool = False) -> None:
618 """Add a new `leaf` to the end of the line.
620 Unless `preformatted` is True, the `leaf` will receive a new consistent
621 whitespace prefix and metadata applied by :class:`BracketTracker`.
622 Trailing commas are maybe removed, unpacked for loop variables are
623 demoted from being delimiters.
625 Inline comments are put aside.
627 has_value = leaf.type in BRACKETS or bool(leaf.value.strip())
631 if self.leaves and not preformatted:
632 # Note: at this point leaf.prefix should be empty except for
633 # imports, for which we only preserve newlines.
634 leaf.prefix += whitespace(leaf)
635 if self.inside_brackets or not preformatted:
636 self.maybe_decrement_after_for_loop_variable(leaf)
637 self.bracket_tracker.mark(leaf)
638 self.maybe_remove_trailing_comma(leaf)
639 self.maybe_increment_for_loop_variable(leaf)
641 if not self.append_comment(leaf):
642 self.leaves.append(leaf)
644 def append_safe(self, leaf: Leaf, preformatted: bool = False) -> None:
645 """Like :func:`append()` but disallow invalid standalone comment structure.
647 Raises ValueError when any `leaf` is appended after a standalone comment
648 or when a standalone comment is not the first leaf on the line.
650 if self.bracket_tracker.depth == 0:
652 raise ValueError("cannot append to standalone comments")
654 if self.leaves and leaf.type == STANDALONE_COMMENT:
656 "cannot append standalone comments to a populated line"
659 self.append(leaf, preformatted=preformatted)
662 def is_comment(self) -> bool:
663 """Is this line a standalone comment?"""
664 return len(self.leaves) == 1 and self.leaves[0].type == STANDALONE_COMMENT
667 def is_decorator(self) -> bool:
668 """Is this line a decorator?"""
669 return bool(self) and self.leaves[0].type == token.AT
672 def is_import(self) -> bool:
673 """Is this an import line?"""
674 return bool(self) and is_import(self.leaves[0])
677 def is_class(self) -> bool:
678 """Is this line a class definition?"""
681 and self.leaves[0].type == token.NAME
682 and self.leaves[0].value == "class"
686 def is_def(self) -> bool:
687 """Is this a function definition? (Also returns True for async defs.)"""
689 first_leaf = self.leaves[0]
694 second_leaf: Optional[Leaf] = self.leaves[1]
698 (first_leaf.type == token.NAME and first_leaf.value == "def")
700 first_leaf.type == token.ASYNC
701 and second_leaf is not None
702 and second_leaf.type == token.NAME
703 and second_leaf.value == "def"
708 def is_flow_control(self) -> bool:
709 """Is this line a flow control statement?
711 Those are `return`, `raise`, `break`, and `continue`.
715 and self.leaves[0].type == token.NAME
716 and self.leaves[0].value in FLOW_CONTROL
720 def is_yield(self) -> bool:
721 """Is this line a yield statement?"""
724 and self.leaves[0].type == token.NAME
725 and self.leaves[0].value == "yield"
728 def contains_standalone_comments(self, depth_limit: int = sys.maxsize) -> bool:
729 """If so, needs to be split before emitting."""
730 for leaf in self.leaves:
731 if leaf.type == STANDALONE_COMMENT:
732 if leaf.bracket_depth <= depth_limit:
737 def maybe_remove_trailing_comma(self, closing: Leaf) -> bool:
738 """Remove trailing comma if there is one and it's safe."""
741 and self.leaves[-1].type == token.COMMA
742 and closing.type in CLOSING_BRACKETS
746 if closing.type == token.RBRACE:
747 self.remove_trailing_comma()
750 if closing.type == token.RSQB:
751 comma = self.leaves[-1]
752 if comma.parent and comma.parent.type == syms.listmaker:
753 self.remove_trailing_comma()
756 # For parens let's check if it's safe to remove the comma. If the
757 # trailing one is the only one, we might mistakenly change a tuple
758 # into a different type by removing the comma.
759 depth = closing.bracket_depth + 1
761 opening = closing.opening_bracket
762 for _opening_index, leaf in enumerate(self.leaves):
769 for leaf in self.leaves[_opening_index + 1:]:
773 bracket_depth = leaf.bracket_depth
774 if bracket_depth == depth and leaf.type == token.COMMA:
776 if leaf.parent and leaf.parent.type == syms.arglist:
781 self.remove_trailing_comma()
786 def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool:
787 """In a for loop, or comprehension, the variables are often unpacks.
789 To avoid splitting on the comma in this situation, increase the depth of
790 tokens between `for` and `in`.
792 if leaf.type == token.NAME and leaf.value == "for":
794 self.bracket_tracker.depth += 1
795 self._for_loop_variable = True
800 def maybe_decrement_after_for_loop_variable(self, leaf: Leaf) -> bool:
801 """See `maybe_increment_for_loop_variable` above for explanation."""
802 if self._for_loop_variable and leaf.type == token.NAME and leaf.value == "in":
803 self.bracket_tracker.depth -= 1
804 self._for_loop_variable = False
809 def append_comment(self, comment: Leaf) -> bool:
810 """Add an inline or standalone comment to the line."""
812 comment.type == STANDALONE_COMMENT
813 and self.bracket_tracker.any_open_brackets()
818 if comment.type != token.COMMENT:
821 after = len(self.leaves) - 1
823 comment.type = STANDALONE_COMMENT
828 self.comments.append((after, comment))
831 def comments_after(self, leaf: Leaf) -> Iterator[Leaf]:
832 """Generate comments that should appear directly after `leaf`."""
833 for _leaf_index, _leaf in enumerate(self.leaves):
840 for index, comment_after in self.comments:
841 if _leaf_index == index:
844 def remove_trailing_comma(self) -> None:
845 """Remove the trailing comma and moves the comments attached to it."""
846 comma_index = len(self.leaves) - 1
847 for i in range(len(self.comments)):
848 comment_index, comment = self.comments[i]
849 if comment_index == comma_index:
850 self.comments[i] = (comma_index - 1, comment)
853 def __str__(self) -> str:
854 """Render the line."""
858 indent = " " * self.depth
859 leaves = iter(self.leaves)
861 res = f"{first.prefix}{indent}{first.value}"
864 for _, comment in self.comments:
868 def __bool__(self) -> bool:
869 """Return True if the line has leaves or comments."""
870 return bool(self.leaves or self.comments)
873 class UnformattedLines(Line):
874 """Just like :class:`Line` but stores lines which aren't reformatted."""
876 def append(self, leaf: Leaf, preformatted: bool = True) -> None:
877 """Just add a new `leaf` to the end of the lines.
879 The `preformatted` argument is ignored.
881 Keeps track of indentation `depth`, which is useful when the user
882 says `# fmt: on`. Otherwise, doesn't do anything with the `leaf`.
885 list(generate_comments(leaf))
886 except FormatOn as f_on:
887 self.leaves.append(f_on.leaf_from_consumed(leaf))
890 self.leaves.append(leaf)
891 if leaf.type == token.INDENT:
893 elif leaf.type == token.DEDENT:
896 def __str__(self) -> str:
897 """Render unformatted lines from leaves which were added with `append()`.
899 `depth` is not used for indentation in this case.
905 for leaf in self.leaves:
909 def append_comment(self, comment: Leaf) -> bool:
910 """Not implemented in this class. Raises `NotImplementedError`."""
911 raise NotImplementedError("Unformatted lines don't store comments separately.")
913 def maybe_remove_trailing_comma(self, closing: Leaf) -> bool:
914 """Does nothing and returns False."""
917 def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool:
918 """Does nothing and returns False."""
923 class EmptyLineTracker:
924 """Provides a stateful method that returns the number of potential extra
925 empty lines needed before and after the currently processed line.
927 Note: this tracker works on lines that haven't been split yet. It assumes
928 the prefix of the first leaf consists of optional newlines. Those newlines
929 are consumed by `maybe_empty_lines()` and included in the computation.
931 previous_line: Optional[Line] = None
932 previous_after: int = 0
933 previous_defs: List[int] = Factory(list)
935 def maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
936 """Return the number of extra empty lines before and after the `current_line`.
938 This is for separating `def`, `async def` and `class` with extra empty
939 lines (two on module-level), as well as providing an extra empty line
940 after flow control keywords to make them more prominent.
942 if isinstance(current_line, UnformattedLines):
945 before, after = self._maybe_empty_lines(current_line)
946 before -= self.previous_after
947 self.previous_after = after
948 self.previous_line = current_line
951 def _maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
953 if current_line.depth == 0:
955 if current_line.leaves:
956 # Consume the first leaf's extra newlines.
957 first_leaf = current_line.leaves[0]
958 before = first_leaf.prefix.count("\n")
959 before = min(before, max_allowed)
960 first_leaf.prefix = ""
963 depth = current_line.depth
964 while self.previous_defs and self.previous_defs[-1] >= depth:
965 self.previous_defs.pop()
966 before = 1 if depth else 2
967 is_decorator = current_line.is_decorator
968 if is_decorator or current_line.is_def or current_line.is_class:
970 self.previous_defs.append(depth)
971 if self.previous_line is None:
972 # Don't insert empty lines before the first line in the file.
975 if self.previous_line and self.previous_line.is_decorator:
976 # Don't insert empty lines between decorators.
980 if current_line.depth:
984 if current_line.is_flow_control:
989 and self.previous_line.is_import
990 and not current_line.is_import
991 and depth == self.previous_line.depth
993 return (before or 1), 0
997 and self.previous_line.is_yield
998 and (not current_line.is_yield or depth != self.previous_line.depth)
1000 return (before or 1), 0
1006 class LineGenerator(Visitor[Line]):
1007 """Generates reformatted Line objects. Empty lines are not emitted.
1009 Note: destroys the tree it's visiting by mutating prefixes of its leaves
1010 in ways that will no longer stringify to valid Python code on the tree.
1012 current_line: Line = Factory(Line)
1014 def line(self, indent: int = 0, type: Type[Line] = Line) -> Iterator[Line]:
1017 If the line is empty, only emit if it makes sense.
1018 If the line is too long, split it first and then generate.
1020 If any lines were generated, set up a new current_line.
1022 if not self.current_line:
1023 if self.current_line.__class__ == type:
1024 self.current_line.depth += indent
1026 self.current_line = type(depth=self.current_line.depth + indent)
1027 return # Line is empty, don't emit. Creating a new one unnecessary.
1029 complete_line = self.current_line
1030 self.current_line = type(depth=complete_line.depth + indent)
1033 def visit(self, node: LN) -> Iterator[Line]:
1034 """Main method to visit `node` and its children.
1036 Yields :class:`Line` objects.
1038 if isinstance(self.current_line, UnformattedLines):
1039 # File contained `# fmt: off`
1040 yield from self.visit_unformatted(node)
1043 yield from super().visit(node)
1045 def visit_default(self, node: LN) -> Iterator[Line]:
1046 """Default `visit_*()` implementation. Recurses to children of `node`."""
1047 if isinstance(node, Leaf):
1048 any_open_brackets = self.current_line.bracket_tracker.any_open_brackets()
1050 for comment in generate_comments(node):
1051 if any_open_brackets:
1052 # any comment within brackets is subject to splitting
1053 self.current_line.append(comment)
1054 elif comment.type == token.COMMENT:
1055 # regular trailing comment
1056 self.current_line.append(comment)
1057 yield from self.line()
1060 # regular standalone comment
1061 yield from self.line()
1063 self.current_line.append(comment)
1064 yield from self.line()
1066 except FormatOff as f_off:
1067 f_off.trim_prefix(node)
1068 yield from self.line(type=UnformattedLines)
1069 yield from self.visit(node)
1071 except FormatOn as f_on:
1072 # This only happens here if somebody says "fmt: on" multiple
1074 f_on.trim_prefix(node)
1075 yield from self.visit_default(node)
1078 normalize_prefix(node, inside_brackets=any_open_brackets)
1079 if node.type == token.STRING:
1080 normalize_string_quotes(node)
1081 if node.type not in WHITESPACE:
1082 self.current_line.append(node)
1083 yield from super().visit_default(node)
1085 def visit_INDENT(self, node: Node) -> Iterator[Line]:
1086 """Increase indentation level, maybe yield a line."""
1087 # In blib2to3 INDENT never holds comments.
1088 yield from self.line(+1)
1089 yield from self.visit_default(node)
1091 def visit_DEDENT(self, node: Node) -> Iterator[Line]:
1092 """Decrease indentation level, maybe yield a line."""
1093 # DEDENT has no value. Additionally, in blib2to3 it never holds comments.
1094 yield from self.line(-1)
1097 self, node: Node, keywords: Set[str], parens: Set[str]
1098 ) -> Iterator[Line]:
1099 """Visit a statement.
1101 This implementation is shared for `if`, `while`, `for`, `try`, `except`,
1102 `def`, `with`, `class`, and `assert`.
1104 The relevant Python language `keywords` for a given statement will be
1105 NAME leaves within it. This methods puts those on a separate line.
1107 `parens` holds pairs of nodes where invisible parentheses should be put.
1108 Keys hold nodes after which opening parentheses should be put, values
1109 hold nodes before which closing parentheses should be put.
1111 normalize_invisible_parens(node, parens_after=parens)
1112 for child in node.children:
1113 if child.type == token.NAME and child.value in keywords: # type: ignore
1114 yield from self.line()
1116 yield from self.visit(child)
1118 def visit_simple_stmt(self, node: Node) -> Iterator[Line]:
1119 """Visit a statement without nested statements."""
1120 is_suite_like = node.parent and node.parent.type in STATEMENT
1122 yield from self.line(+1)
1123 yield from self.visit_default(node)
1124 yield from self.line(-1)
1127 yield from self.line()
1128 yield from self.visit_default(node)
1130 def visit_async_stmt(self, node: Node) -> Iterator[Line]:
1131 """Visit `async def`, `async for`, `async with`."""
1132 yield from self.line()
1134 children = iter(node.children)
1135 for child in children:
1136 yield from self.visit(child)
1138 if child.type == token.ASYNC:
1141 internal_stmt = next(children)
1142 for child in internal_stmt.children:
1143 yield from self.visit(child)
1145 def visit_decorators(self, node: Node) -> Iterator[Line]:
1146 """Visit decorators."""
1147 for child in node.children:
1148 yield from self.line()
1149 yield from self.visit(child)
1151 def visit_import_from(self, node: Node) -> Iterator[Line]:
1152 """Visit import_from and maybe put invisible parentheses.
1154 This is separate from `visit_stmt` because import statements don't
1155 support arbitrary atoms and thus handling of parentheses is custom.
1158 for index, child in enumerate(node.children):
1160 if child.type == token.LPAR:
1161 # make parentheses invisible
1162 child.value = "" # type: ignore
1163 node.children[-1].value = "" # type: ignore
1165 # insert invisible parentheses
1166 node.insert_child(index, Leaf(token.LPAR, ""))
1167 node.append_child(Leaf(token.RPAR, ""))
1171 child.type == token.NAME and child.value == "import" # type: ignore
1174 for child in node.children:
1175 yield from self.visit(child)
1177 def visit_SEMI(self, leaf: Leaf) -> Iterator[Line]:
1178 """Remove a semicolon and put the other statement on a separate line."""
1179 yield from self.line()
1181 def visit_ENDMARKER(self, leaf: Leaf) -> Iterator[Line]:
1182 """End of file. Process outstanding comments and end with a newline."""
1183 yield from self.visit_default(leaf)
1184 yield from self.line()
1186 def visit_unformatted(self, node: LN) -> Iterator[Line]:
1187 """Used when file contained a `# fmt: off`."""
1188 if isinstance(node, Node):
1189 for child in node.children:
1190 yield from self.visit(child)
1194 self.current_line.append(node)
1195 except FormatOn as f_on:
1196 f_on.trim_prefix(node)
1197 yield from self.line()
1198 yield from self.visit(node)
1200 if node.type == token.ENDMARKER:
1201 # somebody decided not to put a final `# fmt: on`
1202 yield from self.line()
1204 def __attrs_post_init__(self) -> None:
1205 """You are in a twisty little maze of passages."""
1208 self.visit_assert_stmt = partial(v, keywords={"assert"}, parens={"assert", ","})
1209 self.visit_if_stmt = partial(v, keywords={"if", "else", "elif"}, parens={"if"})
1210 self.visit_while_stmt = partial(v, keywords={"while", "else"}, parens={"while"})
1211 self.visit_for_stmt = partial(v, keywords={"for", "else"}, parens={"for", "in"})
1212 self.visit_try_stmt = partial(
1213 v, keywords={"try", "except", "else", "finally"}, parens=Ø
1215 self.visit_except_clause = partial(v, keywords={"except"}, parens=Ø)
1216 self.visit_with_stmt = partial(v, keywords={"with"}, parens=Ø)
1217 self.visit_funcdef = partial(v, keywords={"def"}, parens=Ø)
1218 self.visit_classdef = partial(v, keywords={"class"}, parens=Ø)
1219 self.visit_async_funcdef = self.visit_async_stmt
1220 self.visit_decorated = self.visit_decorators
1223 IMPLICIT_TUPLE = {syms.testlist, syms.testlist_star_expr, syms.exprlist}
1224 BRACKET = {token.LPAR: token.RPAR, token.LSQB: token.RSQB, token.LBRACE: token.RBRACE}
1225 OPENING_BRACKETS = set(BRACKET.keys())
1226 CLOSING_BRACKETS = set(BRACKET.values())
1227 BRACKETS = OPENING_BRACKETS | CLOSING_BRACKETS
1228 ALWAYS_NO_SPACE = CLOSING_BRACKETS | {token.COMMA, STANDALONE_COMMENT}
1231 def whitespace(leaf: Leaf) -> str: # noqa C901
1232 """Return whitespace prefix if needed for the given `leaf`."""
1239 if t in ALWAYS_NO_SPACE:
1242 if t == token.COMMENT:
1245 assert p is not None, f"INTERNAL ERROR: hand-made leaf without parent: {leaf!r}"
1246 if t == token.COLON and p.type not in {syms.subscript, syms.subscriptlist}:
1249 prev = leaf.prev_sibling
1251 prevp = preceding_leaf(p)
1252 if not prevp or prevp.type in OPENING_BRACKETS:
1255 if t == token.COLON:
1256 return SPACE if prevp.type == token.COMMA else NO
1258 if prevp.type == token.EQUAL:
1260 if prevp.parent.type in {
1261 syms.arglist, syms.argument, syms.parameters, syms.varargslist
1265 elif prevp.parent.type == syms.typedargslist:
1266 # A bit hacky: if the equal sign has whitespace, it means we
1267 # previously found it's a typed argument. So, we're using
1271 elif prevp.type in STARS:
1272 if is_vararg(prevp, within=VARARGS_PARENTS | UNPACKING_PARENTS):
1275 elif prevp.type == token.COLON:
1276 if prevp.parent and prevp.parent.type in {syms.subscript, syms.sliceop}:
1281 and prevp.parent.type == syms.factor
1282 and prevp.type in MATH_OPERATORS
1287 prevp.type == token.RIGHTSHIFT
1289 and prevp.parent.type == syms.shift_expr
1290 and prevp.prev_sibling
1291 and prevp.prev_sibling.type == token.NAME
1292 and prevp.prev_sibling.value == "print" # type: ignore
1294 # Python 2 print chevron
1297 elif prev.type in OPENING_BRACKETS:
1300 if p.type in {syms.parameters, syms.arglist}:
1301 # untyped function signatures or calls
1305 if not prev or prev.type != token.COMMA:
1308 elif p.type == syms.varargslist:
1313 if prev and prev.type != token.COMMA:
1316 elif p.type == syms.typedargslist:
1317 # typed function signatures
1321 if t == token.EQUAL:
1322 if prev.type != syms.tname:
1325 elif prev.type == token.EQUAL:
1326 # A bit hacky: if the equal sign has whitespace, it means we
1327 # previously found it's a typed argument. So, we're using that, too.
1330 elif prev.type != token.COMMA:
1333 elif p.type == syms.tname:
1336 prevp = preceding_leaf(p)
1337 if not prevp or prevp.type != token.COMMA:
1340 elif p.type == syms.trailer:
1341 # attributes and calls
1342 if t == token.LPAR or t == token.RPAR:
1347 prevp = preceding_leaf(p)
1348 if not prevp or prevp.type != token.NUMBER:
1351 elif t == token.LSQB:
1354 elif prev.type != token.COMMA:
1357 elif p.type == syms.argument:
1359 if t == token.EQUAL:
1363 prevp = preceding_leaf(p)
1364 if not prevp or prevp.type == token.LPAR:
1367 elif prev.type in {token.EQUAL, token.STAR, token.DOUBLESTAR}:
1370 elif p.type == syms.decorator:
1374 elif p.type == syms.dotted_name:
1378 prevp = preceding_leaf(p)
1379 if not prevp or prevp.type == token.AT or prevp.type == token.DOT:
1382 elif p.type == syms.classdef:
1386 if prev and prev.type == token.LPAR:
1389 elif p.type == syms.subscript:
1392 assert p.parent is not None, "subscripts are always parented"
1393 if p.parent.type == syms.subscriptlist:
1401 elif p.type == syms.atom:
1402 if prev and t == token.DOT:
1403 # dots, but not the first one.
1407 p.type == syms.listmaker
1408 or p.type == syms.testlist_gexp
1409 or p.type == syms.subscriptlist
1411 # list interior, including unpacking
1415 elif p.type == syms.dictsetmaker:
1416 # dict and set interior, including unpacking
1420 if prev.type == token.DOUBLESTAR:
1423 elif p.type in {syms.factor, syms.star_expr}:
1426 prevp = preceding_leaf(p)
1427 if not prevp or prevp.type in OPENING_BRACKETS:
1430 prevp_parent = prevp.parent
1431 assert prevp_parent is not None
1433 prevp.type == token.COLON
1434 and prevp_parent.type in {syms.subscript, syms.sliceop}
1438 elif prevp.type == token.EQUAL and prevp_parent.type == syms.argument:
1441 elif t == token.NAME or t == token.NUMBER:
1444 elif p.type == syms.import_from:
1446 if prev and prev.type == token.DOT:
1449 elif t == token.NAME:
1453 if prev and prev.type == token.DOT:
1456 elif p.type == syms.sliceop:
1462 def preceding_leaf(node: Optional[LN]) -> Optional[Leaf]:
1463 """Return the first leaf that precedes `node`, if any."""
1465 res = node.prev_sibling
1467 if isinstance(res, Leaf):
1471 return list(res.leaves())[-1]
1480 def is_split_after_delimiter(leaf: Leaf, previous: Leaf = None) -> int:
1481 """Return the priority of the `leaf` delimiter, given a line break after it.
1483 The delimiter priorities returned here are from those delimiters that would
1484 cause a line break after themselves.
1486 Higher numbers are higher priority.
1488 if leaf.type == token.COMMA:
1489 return COMMA_PRIORITY
1494 def is_split_before_delimiter(leaf: Leaf, previous: Leaf = None) -> int:
1495 """Return the priority of the `leaf` delimiter, given a line before after it.
1497 The delimiter priorities returned here are from those delimiters that would
1498 cause a line break before themselves.
1500 Higher numbers are higher priority.
1502 if is_vararg(leaf, within=VARARGS_PARENTS | UNPACKING_PARENTS):
1503 # * and ** might also be MATH_OPERATORS but in this case they are not.
1504 # Don't treat them as a delimiter.
1508 leaf.type in MATH_OPERATORS
1510 and leaf.parent.type not in {syms.factor, syms.star_expr}
1512 return MATH_PRIORITY
1514 if leaf.type in COMPARATORS:
1515 return COMPARATOR_PRIORITY
1518 leaf.type == token.STRING
1519 and previous is not None
1520 and previous.type == token.STRING
1522 return STRING_PRIORITY
1525 leaf.type == token.NAME
1526 and leaf.value == "for"
1528 and leaf.parent.type in {syms.comp_for, syms.old_comp_for}
1530 return COMPREHENSION_PRIORITY
1533 leaf.type == token.NAME
1534 and leaf.value == "if"
1536 and leaf.parent.type in {syms.comp_if, syms.old_comp_if}
1538 return COMPREHENSION_PRIORITY
1540 if leaf.type == token.NAME and leaf.value in LOGIC_OPERATORS and leaf.parent:
1541 return LOGIC_PRIORITY
1546 def is_delimiter(leaf: Leaf, previous: Leaf = None) -> int:
1547 """Return the priority of the `leaf` delimiter. Return 0 if not delimiter.
1549 Higher numbers are higher priority.
1552 is_split_before_delimiter(leaf, previous),
1553 is_split_after_delimiter(leaf, previous),
1557 def generate_comments(leaf: Leaf) -> Iterator[Leaf]:
1558 """Clean the prefix of the `leaf` and generate comments from it, if any.
1560 Comments in lib2to3 are shoved into the whitespace prefix. This happens
1561 in `pgen2/driver.py:Driver.parse_tokens()`. This was a brilliant implementation
1562 move because it does away with modifying the grammar to include all the
1563 possible places in which comments can be placed.
1565 The sad consequence for us though is that comments don't "belong" anywhere.
1566 This is why this function generates simple parentless Leaf objects for
1567 comments. We simply don't know what the correct parent should be.
1569 No matter though, we can live without this. We really only need to
1570 differentiate between inline and standalone comments. The latter don't
1571 share the line with any code.
1573 Inline comments are emitted as regular token.COMMENT leaves. Standalone
1574 are emitted with a fake STANDALONE_COMMENT token identifier.
1585 for index, line in enumerate(p.split("\n")):
1586 consumed += len(line) + 1 # adding the length of the split '\n'
1587 line = line.lstrip()
1590 if not line.startswith("#"):
1593 if index == 0 and leaf.type != token.ENDMARKER:
1594 comment_type = token.COMMENT # simple trailing comment
1596 comment_type = STANDALONE_COMMENT
1597 comment = make_comment(line)
1598 yield Leaf(comment_type, comment, prefix="\n" * nlines)
1600 if comment in {"# fmt: on", "# yapf: enable"}:
1601 raise FormatOn(consumed)
1603 if comment in {"# fmt: off", "# yapf: disable"}:
1604 if comment_type == STANDALONE_COMMENT:
1605 raise FormatOff(consumed)
1607 prev = preceding_leaf(leaf)
1608 if not prev or prev.type in WHITESPACE: # standalone comment in disguise
1609 raise FormatOff(consumed)
1614 def make_comment(content: str) -> str:
1615 """Return a consistently formatted comment from the given `content` string.
1617 All comments (except for "##", "#!", "#:") should have a single space between
1618 the hash sign and the content.
1620 If `content` didn't start with a hash sign, one is provided.
1622 content = content.rstrip()
1626 if content[0] == "#":
1627 content = content[1:]
1628 if content and content[0] not in " !:#":
1629 content = " " + content
1630 return "#" + content
1634 line: Line, line_length: int, inner: bool = False, py36: bool = False
1635 ) -> Iterator[Line]:
1636 """Split a `line` into potentially many lines.
1638 They should fit in the allotted `line_length` but might not be able to.
1639 `inner` signifies that there were a pair of brackets somewhere around the
1640 current `line`, possibly transitively. This means we can fallback to splitting
1641 by delimiters if the LHS/RHS don't yield any results.
1643 If `py36` is True, splitting may generate syntax that is only compatible
1644 with Python 3.6 and later.
1646 if isinstance(line, UnformattedLines) or line.is_comment:
1650 line_str = str(line).strip("\n")
1652 len(line_str) <= line_length
1653 and "\n" not in line_str # multiline strings
1654 and not line.contains_standalone_comments()
1659 split_funcs: List[SplitFunc]
1661 split_funcs = [left_hand_split]
1662 elif line.inside_brackets:
1663 split_funcs = [delimiter_split, standalone_comment_split, right_hand_split]
1665 split_funcs = [right_hand_split]
1666 for split_func in split_funcs:
1667 # We are accumulating lines in `result` because we might want to abort
1668 # mission and return the original line in the end, or attempt a different
1670 result: List[Line] = []
1672 for l in split_func(line, py36):
1673 if str(l).strip("\n") == line_str:
1674 raise CannotSplit("Split function returned an unchanged result")
1677 split_line(l, line_length=line_length, inner=True, py36=py36)
1679 except CannotSplit as cs:
1690 def left_hand_split(line: Line, py36: bool = False) -> Iterator[Line]:
1691 """Split line into many lines, starting with the first matching bracket pair.
1693 Note: this usually looks weird, only use this for function definitions.
1694 Prefer RHS otherwise.
1696 head = Line(depth=line.depth)
1697 body = Line(depth=line.depth + 1, inside_brackets=True)
1698 tail = Line(depth=line.depth)
1699 tail_leaves: List[Leaf] = []
1700 body_leaves: List[Leaf] = []
1701 head_leaves: List[Leaf] = []
1702 current_leaves = head_leaves
1703 matching_bracket = None
1704 for leaf in line.leaves:
1706 current_leaves is body_leaves
1707 and leaf.type in CLOSING_BRACKETS
1708 and leaf.opening_bracket is matching_bracket
1710 current_leaves = tail_leaves if body_leaves else head_leaves
1711 current_leaves.append(leaf)
1712 if current_leaves is head_leaves:
1713 if leaf.type in OPENING_BRACKETS:
1714 matching_bracket = leaf
1715 current_leaves = body_leaves
1716 # Since body is a new indent level, remove spurious leading whitespace.
1718 normalize_prefix(body_leaves[0], inside_brackets=True)
1719 # Build the new lines.
1720 for result, leaves in (head, head_leaves), (body, body_leaves), (tail, tail_leaves):
1722 result.append(leaf, preformatted=True)
1723 for comment_after in line.comments_after(leaf):
1724 result.append(comment_after, preformatted=True)
1725 bracket_split_succeeded_or_raise(head, body, tail)
1726 for result in (head, body, tail):
1731 def right_hand_split(
1732 line: Line, py36: bool = False, omit: Collection[LeafID] = ()
1733 ) -> Iterator[Line]:
1734 """Split line into many lines, starting with the last matching bracket pair."""
1735 head = Line(depth=line.depth)
1736 body = Line(depth=line.depth + 1, inside_brackets=True)
1737 tail = Line(depth=line.depth)
1738 tail_leaves: List[Leaf] = []
1739 body_leaves: List[Leaf] = []
1740 head_leaves: List[Leaf] = []
1741 current_leaves = tail_leaves
1742 opening_bracket = None
1743 closing_bracket = None
1744 for leaf in reversed(line.leaves):
1745 if current_leaves is body_leaves:
1746 if leaf is opening_bracket:
1747 current_leaves = head_leaves if body_leaves else tail_leaves
1748 current_leaves.append(leaf)
1749 if current_leaves is tail_leaves:
1750 if leaf.type in CLOSING_BRACKETS and id(leaf) not in omit:
1751 opening_bracket = leaf.opening_bracket
1752 closing_bracket = leaf
1753 current_leaves = body_leaves
1754 tail_leaves.reverse()
1755 body_leaves.reverse()
1756 head_leaves.reverse()
1757 # Since body is a new indent level, remove spurious leading whitespace.
1759 normalize_prefix(body_leaves[0], inside_brackets=True)
1760 elif not head_leaves:
1761 # No `head` and no `body` means the split failed. `tail` has all content.
1762 raise CannotSplit("No brackets found")
1764 # Build the new lines.
1765 for result, leaves in (head, head_leaves), (body, body_leaves), (tail, tail_leaves):
1767 result.append(leaf, preformatted=True)
1768 for comment_after in line.comments_after(leaf):
1769 result.append(comment_after, preformatted=True)
1770 bracket_split_succeeded_or_raise(head, body, tail)
1771 assert opening_bracket and closing_bracket
1773 opening_bracket.type == token.LPAR
1774 and not opening_bracket.value
1775 and closing_bracket.type == token.RPAR
1776 and not closing_bracket.value
1778 # These parens were optional. If there aren't any delimiters or standalone
1779 # comments in the body, they were unnecessary and another split without
1780 # them should be attempted.
1782 body.bracket_tracker.delimiters or line.contains_standalone_comments(0)
1784 omit = {id(closing_bracket), *omit}
1785 yield from right_hand_split(line, py36=py36, omit=omit)
1788 ensure_visible(opening_bracket)
1789 ensure_visible(closing_bracket)
1790 for result in (head, body, tail):
1795 def bracket_split_succeeded_or_raise(head: Line, body: Line, tail: Line) -> None:
1796 """Raise :exc:`CannotSplit` if the last left- or right-hand split failed.
1798 Do nothing otherwise.
1800 A left- or right-hand split is based on a pair of brackets. Content before
1801 (and including) the opening bracket is left on one line, content inside the
1802 brackets is put on a separate line, and finally content starting with and
1803 following the closing bracket is put on a separate line.
1805 Those are called `head`, `body`, and `tail`, respectively. If the split
1806 produced the same line (all content in `head`) or ended up with an empty `body`
1807 and the `tail` is just the closing bracket, then it's considered failed.
1809 tail_len = len(str(tail).strip())
1812 raise CannotSplit("Splitting brackets produced the same line")
1816 f"Splitting brackets on an empty body to save "
1817 f"{tail_len} characters is not worth it"
1821 def dont_increase_indentation(split_func: SplitFunc) -> SplitFunc:
1822 """Normalize prefix of the first leaf in every line returned by `split_func`.
1824 This is a decorator over relevant split functions.
1828 def split_wrapper(line: Line, py36: bool = False) -> Iterator[Line]:
1829 for l in split_func(line, py36):
1830 normalize_prefix(l.leaves[0], inside_brackets=True)
1833 return split_wrapper
1836 @dont_increase_indentation
1837 def delimiter_split(line: Line, py36: bool = False) -> Iterator[Line]:
1838 """Split according to delimiters of the highest priority.
1840 If `py36` is True, the split will add trailing commas also in function
1841 signatures that contain `*` and `**`.
1844 last_leaf = line.leaves[-1]
1846 raise CannotSplit("Line empty")
1848 delimiters = line.bracket_tracker.delimiters
1850 delimiter_priority = line.bracket_tracker.max_delimiter_priority(
1851 exclude={id(last_leaf)}
1854 raise CannotSplit("No delimiters found")
1856 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
1857 lowest_depth = sys.maxsize
1858 trailing_comma_safe = True
1860 def append_to_line(leaf: Leaf) -> Iterator[Line]:
1861 """Append `leaf` to current line or to new line if appending impossible."""
1862 nonlocal current_line
1864 current_line.append_safe(leaf, preformatted=True)
1865 except ValueError as ve:
1868 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
1869 current_line.append(leaf)
1871 for leaf in line.leaves:
1872 yield from append_to_line(leaf)
1874 for comment_after in line.comments_after(leaf):
1875 yield from append_to_line(comment_after)
1877 lowest_depth = min(lowest_depth, leaf.bracket_depth)
1879 leaf.bracket_depth == lowest_depth
1880 and is_vararg(leaf, within=VARARGS_PARENTS)
1882 trailing_comma_safe = trailing_comma_safe and py36
1883 leaf_priority = delimiters.get(id(leaf))
1884 if leaf_priority == delimiter_priority:
1887 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
1891 and delimiter_priority == COMMA_PRIORITY
1892 and current_line.leaves[-1].type != token.COMMA
1893 and current_line.leaves[-1].type != STANDALONE_COMMENT
1895 current_line.append(Leaf(token.COMMA, ","))
1899 @dont_increase_indentation
1900 def standalone_comment_split(line: Line, py36: bool = False) -> Iterator[Line]:
1901 """Split standalone comments from the rest of the line."""
1902 if not line.contains_standalone_comments(0):
1903 raise CannotSplit("Line does not have any standalone comments")
1905 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
1907 def append_to_line(leaf: Leaf) -> Iterator[Line]:
1908 """Append `leaf` to current line or to new line if appending impossible."""
1909 nonlocal current_line
1911 current_line.append_safe(leaf, preformatted=True)
1912 except ValueError as ve:
1915 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
1916 current_line.append(leaf)
1918 for leaf in line.leaves:
1919 yield from append_to_line(leaf)
1921 for comment_after in line.comments_after(leaf):
1922 yield from append_to_line(comment_after)
1928 def is_import(leaf: Leaf) -> bool:
1929 """Return True if the given leaf starts an import statement."""
1936 (v == "import" and p and p.type == syms.import_name)
1937 or (v == "from" and p and p.type == syms.import_from)
1942 def normalize_prefix(leaf: Leaf, *, inside_brackets: bool) -> None:
1943 """Leave existing extra newlines if not `inside_brackets`. Remove everything
1946 Note: don't use backslashes for formatting or you'll lose your voting rights.
1948 if not inside_brackets:
1949 spl = leaf.prefix.split("#")
1950 if "\\" not in spl[0]:
1951 nl_count = spl[-1].count("\n")
1954 leaf.prefix = "\n" * nl_count
1960 def normalize_string_quotes(leaf: Leaf) -> None:
1961 """Prefer double quotes but only if it doesn't cause more escaping.
1963 Adds or removes backslashes as appropriate. Doesn't parse and fix
1964 strings nested in f-strings (yet).
1966 Note: Mutates its argument.
1968 value = leaf.value.lstrip("furbFURB")
1969 if value[:3] == '"""':
1972 elif value[:3] == "'''":
1975 elif value[0] == '"':
1981 first_quote_pos = leaf.value.find(orig_quote)
1982 if first_quote_pos == -1:
1983 return # There's an internal error
1985 prefix = leaf.value[:first_quote_pos]
1986 unescaped_new_quote = re.compile(rf"(([^\\]|^)(\\\\)*){new_quote}")
1987 escaped_new_quote = re.compile(rf"([^\\]|^)\\(\\\\)*{new_quote}")
1988 escaped_orig_quote = re.compile(rf"([^\\]|^)\\(\\\\)*{orig_quote}")
1989 body = leaf.value[first_quote_pos + len(orig_quote):-len(orig_quote)]
1990 if "r" in prefix.casefold():
1991 if unescaped_new_quote.search(body):
1992 # There's at least one unescaped new_quote in this raw string
1993 # so converting is impossible
1996 # Do not introduce or remove backslashes in raw strings
1999 # remove unnecessary quotes
2000 new_body = sub_twice(escaped_new_quote, rf"\1\2{new_quote}", body)
2001 if body != new_body:
2002 # Consider the string without unnecessary quotes as the original
2004 leaf.value = f"{prefix}{orig_quote}{body}{orig_quote}"
2005 new_body = sub_twice(escaped_orig_quote, rf"\1\2{orig_quote}", new_body)
2006 new_body = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_body)
2007 if new_quote == '"""' and new_body[-1] == '"':
2009 new_body = new_body[:-1] + '\\"'
2010 orig_escape_count = body.count("\\")
2011 new_escape_count = new_body.count("\\")
2012 if new_escape_count > orig_escape_count:
2013 return # Do not introduce more escaping
2015 if new_escape_count == orig_escape_count and orig_quote == '"':
2016 return # Prefer double quotes
2018 leaf.value = f"{prefix}{new_quote}{new_body}{new_quote}"
2021 def normalize_invisible_parens(node: Node, parens_after: Set[str]) -> None:
2022 """Make existing optional parentheses invisible or create new ones.
2024 Standardizes on visible parentheses for single-element tuples, and keeps
2025 existing visible parentheses for other tuples and generator expressions.
2028 for child in list(node.children):
2030 if child.type == syms.atom:
2032 is_empty_tuple(child)
2033 or is_one_tuple(child)
2034 or max_delimiter_priority_in_atom(child) >= COMMA_PRIORITY
2036 first = child.children[0]
2037 last = child.children[-1]
2038 if first.type == token.LPAR and last.type == token.RPAR:
2039 # make parentheses invisible
2040 first.value = "" # type: ignore
2041 last.value = "" # type: ignore
2042 elif is_one_tuple(child):
2043 # wrap child in visible parentheses
2044 lpar = Leaf(token.LPAR, "(")
2045 rpar = Leaf(token.RPAR, ")")
2046 index = child.remove() or 0
2047 node.insert_child(index, Node(syms.atom, [lpar, child, rpar]))
2049 # wrap child in invisible parentheses
2050 lpar = Leaf(token.LPAR, "")
2051 rpar = Leaf(token.RPAR, "")
2052 index = child.remove() or 0
2053 node.insert_child(index, Node(syms.atom, [lpar, child, rpar]))
2055 check_lpar = isinstance(child, Leaf) and child.value in parens_after
2058 def is_empty_tuple(node: LN) -> bool:
2059 """Return True if `node` holds an empty tuple."""
2061 node.type == syms.atom
2062 and len(node.children) == 2
2063 and node.children[0].type == token.LPAR
2064 and node.children[1].type == token.RPAR
2068 def is_one_tuple(node: LN) -> bool:
2069 """Return True if `node` holds a tuple with one element, with or without parens."""
2070 if node.type == syms.atom:
2071 if len(node.children) != 3:
2074 lpar, gexp, rpar = node.children
2076 lpar.type == token.LPAR
2077 and gexp.type == syms.testlist_gexp
2078 and rpar.type == token.RPAR
2082 return len(gexp.children) == 2 and gexp.children[1].type == token.COMMA
2085 node.type in IMPLICIT_TUPLE
2086 and len(node.children) == 2
2087 and node.children[1].type == token.COMMA
2091 def is_vararg(leaf: Leaf, within: Set[NodeType]) -> bool:
2092 """Return True if `leaf` is a star or double star in a vararg or kwarg.
2094 If `within` includes VARARGS_PARENTS, this applies to function signatures.
2095 If `within` includes COLLECTION_LIBERALS_PARENTS, it applies to right
2096 hand-side extended iterable unpacking (PEP 3132) and additional unpacking
2097 generalizations (PEP 448).
2099 if leaf.type not in STARS or not leaf.parent:
2103 if p.type == syms.star_expr:
2104 # Star expressions are also used as assignment targets in extended
2105 # iterable unpacking (PEP 3132). See what its parent is instead.
2111 return p.type in within
2114 def max_delimiter_priority_in_atom(node: LN) -> int:
2115 if node.type != syms.atom:
2118 first = node.children[0]
2119 last = node.children[-1]
2120 if not (first.type == token.LPAR and last.type == token.RPAR):
2123 bt = BracketTracker()
2124 for c in node.children[1:-1]:
2125 if isinstance(c, Leaf):
2128 for leaf in c.leaves():
2131 return bt.max_delimiter_priority()
2137 def ensure_visible(leaf: Leaf) -> None:
2138 """Make sure parentheses are visible.
2140 They could be invisible as part of some statements (see
2141 :func:`normalize_invible_parens` and :func:`visit_import_from`).
2143 if leaf.type == token.LPAR:
2145 elif leaf.type == token.RPAR:
2149 def is_python36(node: Node) -> bool:
2150 """Return True if the current file is using Python 3.6+ features.
2152 Currently looking for:
2154 - trailing commas after * or ** in function signatures.
2156 for n in node.pre_order():
2157 if n.type == token.STRING:
2158 value_head = n.value[:2] # type: ignore
2159 if value_head in {'f"', 'F"', "f'", "F'", "rf", "fr", "RF", "FR"}:
2163 n.type == syms.typedargslist
2165 and n.children[-1].type == token.COMMA
2167 for ch in n.children:
2168 if ch.type == token.STAR or ch.type == token.DOUBLESTAR:
2174 PYTHON_EXTENSIONS = {".py"}
2175 BLACKLISTED_DIRECTORIES = {
2176 "build", "buck-out", "dist", "_build", ".git", ".hg", ".mypy_cache", ".tox", ".venv"
2180 def gen_python_files_in_dir(path: Path) -> Iterator[Path]:
2181 """Generate all files under `path` which aren't under BLACKLISTED_DIRECTORIES
2182 and have one of the PYTHON_EXTENSIONS.
2184 for child in path.iterdir():
2186 if child.name in BLACKLISTED_DIRECTORIES:
2189 yield from gen_python_files_in_dir(child)
2191 elif child.suffix in PYTHON_EXTENSIONS:
2197 """Provides a reformatting counter. Can be rendered with `str(report)`."""
2200 change_count: int = 0
2202 failure_count: int = 0
2204 def done(self, src: Path, changed: bool) -> None:
2205 """Increment the counter for successful reformatting. Write out a message."""
2207 reformatted = "would reformat" if self.check else "reformatted"
2209 out(f"{reformatted} {src}")
2210 self.change_count += 1
2213 out(f"{src} already well formatted, good job.", bold=False)
2214 self.same_count += 1
2216 def failed(self, src: Path, message: str) -> None:
2217 """Increment the counter for failed reformatting. Write out a message."""
2218 err(f"error: cannot format {src}: {message}")
2219 self.failure_count += 1
2222 def return_code(self) -> int:
2223 """Return the exit code that the app should use.
2225 This considers the current state of changed files and failures:
2226 - if there were any failures, return 123;
2227 - if any files were changed and --check is being used, return 1;
2228 - otherwise return 0.
2230 # According to http://tldp.org/LDP/abs/html/exitcodes.html starting with
2231 # 126 we have special returncodes reserved by the shell.
2232 if self.failure_count:
2235 elif self.change_count and self.check:
2240 def __str__(self) -> str:
2241 """Render a color report of the current state.
2243 Use `click.unstyle` to remove colors.
2246 reformatted = "would be reformatted"
2247 unchanged = "would be left unchanged"
2248 failed = "would fail to reformat"
2250 reformatted = "reformatted"
2251 unchanged = "left unchanged"
2252 failed = "failed to reformat"
2254 if self.change_count:
2255 s = "s" if self.change_count > 1 else ""
2257 click.style(f"{self.change_count} file{s} {reformatted}", bold=True)
2260 s = "s" if self.same_count > 1 else ""
2261 report.append(f"{self.same_count} file{s} {unchanged}")
2262 if self.failure_count:
2263 s = "s" if self.failure_count > 1 else ""
2265 click.style(f"{self.failure_count} file{s} {failed}", fg="red")
2267 return ", ".join(report) + "."
2270 def assert_equivalent(src: str, dst: str) -> None:
2271 """Raise AssertionError if `src` and `dst` aren't equivalent."""
2276 def _v(node: ast.AST, depth: int = 0) -> Iterator[str]:
2277 """Simple visitor generating strings to compare ASTs by content."""
2278 yield f"{' ' * depth}{node.__class__.__name__}("
2280 for field in sorted(node._fields):
2282 value = getattr(node, field)
2283 except AttributeError:
2286 yield f"{' ' * (depth+1)}{field}="
2288 if isinstance(value, list):
2290 if isinstance(item, ast.AST):
2291 yield from _v(item, depth + 2)
2293 elif isinstance(value, ast.AST):
2294 yield from _v(value, depth + 2)
2297 yield f"{' ' * (depth+2)}{value!r}, # {value.__class__.__name__}"
2299 yield f"{' ' * depth}) # /{node.__class__.__name__}"
2302 src_ast = ast.parse(src)
2303 except Exception as exc:
2304 major, minor = sys.version_info[:2]
2305 raise AssertionError(
2306 f"cannot use --safe with this file; failed to parse source file "
2307 f"with Python {major}.{minor}'s builtin AST. Re-run with --fast "
2308 f"or stop using deprecated Python 2 syntax. AST error message: {exc}"
2312 dst_ast = ast.parse(dst)
2313 except Exception as exc:
2314 log = dump_to_file("".join(traceback.format_tb(exc.__traceback__)), dst)
2315 raise AssertionError(
2316 f"INTERNAL ERROR: Black produced invalid code: {exc}. "
2317 f"Please report a bug on https://github.com/ambv/black/issues. "
2318 f"This invalid output might be helpful: {log}"
2321 src_ast_str = "\n".join(_v(src_ast))
2322 dst_ast_str = "\n".join(_v(dst_ast))
2323 if src_ast_str != dst_ast_str:
2324 log = dump_to_file(diff(src_ast_str, dst_ast_str, "src", "dst"))
2325 raise AssertionError(
2326 f"INTERNAL ERROR: Black produced code that is not equivalent to "
2328 f"Please report a bug on https://github.com/ambv/black/issues. "
2329 f"This diff might be helpful: {log}"
2333 def assert_stable(src: str, dst: str, line_length: int) -> None:
2334 """Raise AssertionError if `dst` reformats differently the second time."""
2335 newdst = format_str(dst, line_length=line_length)
2338 diff(src, dst, "source", "first pass"),
2339 diff(dst, newdst, "first pass", "second pass"),
2341 raise AssertionError(
2342 f"INTERNAL ERROR: Black produced different code on the second pass "
2343 f"of the formatter. "
2344 f"Please report a bug on https://github.com/ambv/black/issues. "
2345 f"This diff might be helpful: {log}"
2349 def dump_to_file(*output: str) -> str:
2350 """Dump `output` to a temporary file. Return path to the file."""
2353 with tempfile.NamedTemporaryFile(
2354 mode="w", prefix="blk_", suffix=".log", delete=False, encoding="utf8"
2356 for lines in output:
2358 if lines and lines[-1] != "\n":
2363 def diff(a: str, b: str, a_name: str, b_name: str) -> str:
2364 """Return a unified diff string between strings `a` and `b`."""
2367 a_lines = [line + "\n" for line in a.split("\n")]
2368 b_lines = [line + "\n" for line in b.split("\n")]
2370 difflib.unified_diff(a_lines, b_lines, fromfile=a_name, tofile=b_name, n=5)
2374 def cancel(tasks: List[asyncio.Task]) -> None:
2375 """asyncio signal handler that cancels all `tasks` and reports to stderr."""
2381 def shutdown(loop: BaseEventLoop) -> None:
2382 """Cancel all pending tasks on `loop`, wait for them, and close the loop."""
2384 # This part is borrowed from asyncio/runners.py in Python 3.7b2.
2385 to_cancel = [task for task in asyncio.Task.all_tasks(loop) if not task.done()]
2389 for task in to_cancel:
2391 loop.run_until_complete(
2392 asyncio.gather(*to_cancel, loop=loop, return_exceptions=True)
2395 # `concurrent.futures.Future` objects cannot be cancelled once they
2396 # are already running. There might be some when the `shutdown()` happened.
2397 # Silence their logger's spew about the event loop being closed.
2398 cf_logger = logging.getLogger("concurrent.futures")
2399 cf_logger.setLevel(logging.CRITICAL)
2403 def sub_twice(regex: Pattern[str], replacement: str, original: str) -> str:
2404 """Replace `regex` with `replacement` twice on `original`.
2406 This is used by string normalization to perform replaces on
2407 overlapping matches.
2409 return regex.sub(replacement, regex.sub(replacement, original))
2412 if __name__ == "__main__":