All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
3 from asyncio.base_events import BaseEventLoop
4 from concurrent.futures import Executor, ProcessPoolExecutor
6 from functools import partial, wraps
9 from multiprocessing import Manager
11 from pathlib import Path
34 from appdirs import user_cache_dir
35 from attr import dataclass, Factory
39 from blib2to3.pytree import Node, Leaf, type_repr
40 from blib2to3 import pygram, pytree
41 from blib2to3.pgen2 import driver, token
42 from blib2to3.pgen2.parse import ParseError
44 __version__ = "18.4a4"
45 DEFAULT_LINE_LENGTH = 88
48 syms = pygram.python_symbols
56 LN = Union[Leaf, Node]
57 SplitFunc = Callable[["Line", bool], Iterator["Line"]]
60 CacheInfo = Tuple[Timestamp, FileSize]
61 Cache = Dict[Path, CacheInfo]
62 out = partial(click.secho, bold=True, err=True)
63 err = partial(click.secho, fg="red", err=True)
66 class NothingChanged(UserWarning):
67 """Raised by :func:`format_file` when reformatted code is the same as source."""
70 class CannotSplit(Exception):
71 """A readable split that fits the allotted line length is impossible.
73 Raised by :func:`left_hand_split`, :func:`right_hand_split`, and
74 :func:`delimiter_split`.
78 class FormatError(Exception):
79 """Base exception for `# fmt: on` and `# fmt: off` handling.
81 It holds the number of bytes of the prefix consumed before the format
82 control comment appeared.
85 def __init__(self, consumed: int) -> None:
86 super().__init__(consumed)
87 self.consumed = consumed
89 def trim_prefix(self, leaf: Leaf) -> None:
90 leaf.prefix = leaf.prefix[self.consumed :]
92 def leaf_from_consumed(self, leaf: Leaf) -> Leaf:
93 """Returns a new Leaf from the consumed part of the prefix."""
94 unformatted_prefix = leaf.prefix[: self.consumed]
95 return Leaf(token.NEWLINE, unformatted_prefix)
98 class FormatOn(FormatError):
99 """Found a comment like `# fmt: on` in the file."""
102 class FormatOff(FormatError):
103 """Found a comment like `# fmt: off` in the file."""
106 class WriteBack(Enum):
123 default=DEFAULT_LINE_LENGTH,
124 help="How many character per line to allow.",
131 "Don't write the files back, just return the status. Return code 0 "
132 "means nothing would change. Return code 1 means some files would be "
133 "reformatted. Return code 123 means there was an internal error."
139 help="Don't write the files back, just output a diff for each file on stdout.",
144 help="If --fast given, skip temporary sanity checks. [default: --safe]",
151 "Don't emit non-error messages to stderr. Errors are still emitted, "
152 "silence those with 2>/dev/null."
155 @click.version_option(version=__version__)
160 exists=True, file_okay=True, dir_okay=True, readable=True, allow_dash=True
173 """The uncompromising code formatter."""
174 sources: List[Path] = []
178 sources.extend(gen_python_files_in_dir(p))
180 # if a file was explicitly given, we don't care about its extension
183 sources.append(Path("-"))
185 err(f"invalid path: {s}")
187 if check and not diff:
188 write_back = WriteBack.NO
190 write_back = WriteBack.DIFF
192 write_back = WriteBack.YES
193 report = Report(check=check, quiet=quiet)
194 if len(sources) == 0:
195 out("No paths given. Nothing to do 😴")
199 elif len(sources) == 1:
200 reformat_one(sources[0], line_length, fast, write_back, report)
202 loop = asyncio.get_event_loop()
203 executor = ProcessPoolExecutor(max_workers=os.cpu_count())
205 loop.run_until_complete(
207 sources, line_length, fast, write_back, report, loop, executor
213 out("All done! ✨ 🍰 ✨")
214 click.echo(str(report))
215 ctx.exit(report.return_code)
219 src: Path, line_length: int, fast: bool, write_back: WriteBack, report: "Report"
221 """Reformat a single file under `src` without spawning child processes.
223 If `quiet` is True, non-error messages are not output. `line_length`,
224 `write_back`, and `fast` options are passed to :func:`format_file_in_place`.
228 if not src.is_file() and str(src) == "-":
229 if format_stdin_to_stdout(
230 line_length=line_length, fast=fast, write_back=write_back
232 changed = Changed.YES
235 if write_back != WriteBack.DIFF:
236 cache = read_cache(line_length)
238 if src in cache and cache[src] == get_cache_info(src):
239 changed = Changed.CACHED
241 changed is not Changed.CACHED
242 and format_file_in_place(
243 src, line_length=line_length, fast=fast, write_back=write_back
246 changed = Changed.YES
247 if write_back == WriteBack.YES and changed is not Changed.NO:
248 write_cache(cache, [src], line_length)
249 report.done(src, changed)
250 except Exception as exc:
251 report.failed(src, str(exc))
254 async def schedule_formatting(
258 write_back: WriteBack,
263 """Run formatting of `sources` in parallel using the provided `executor`.
265 (Use ProcessPoolExecutors for actual parallelism.)
267 `line_length`, `write_back`, and `fast` options are passed to
268 :func:`format_file_in_place`.
271 if write_back != WriteBack.DIFF:
272 cache = read_cache(line_length)
273 sources, cached = filter_cached(cache, sources)
275 report.done(src, Changed.CACHED)
280 if write_back == WriteBack.DIFF:
281 # For diff output, we need locks to ensure we don't interleave output
282 # from different processes.
284 lock = manager.Lock()
286 src: loop.run_in_executor(
287 executor, format_file_in_place, src, line_length, fast, write_back, lock
291 _task_values = list(tasks.values())
293 loop.add_signal_handler(signal.SIGINT, cancel, _task_values)
294 loop.add_signal_handler(signal.SIGTERM, cancel, _task_values)
295 except NotImplementedError:
296 # There are no good alternatives for these on Windows
298 await asyncio.wait(_task_values)
299 for src, task in tasks.items():
301 report.failed(src, "timed out, cancelling")
303 cancelled.append(task)
304 elif task.cancelled():
305 cancelled.append(task)
306 elif task.exception():
307 report.failed(src, str(task.exception()))
309 formatted.append(src)
310 report.done(src, Changed.YES if task.result() else Changed.NO)
313 await asyncio.gather(*cancelled, loop=loop, return_exceptions=True)
314 if write_back == WriteBack.YES and formatted:
315 write_cache(cache, formatted, line_length)
318 def format_file_in_place(
322 write_back: WriteBack = WriteBack.NO,
323 lock: Any = None, # multiprocessing.Manager().Lock() is some crazy proxy
325 """Format file under `src` path. Return True if changed.
327 If `write_back` is True, write reformatted code back to stdout.
328 `line_length` and `fast` options are passed to :func:`format_file_contents`.
331 with tokenize.open(src) as src_buffer:
332 src_contents = src_buffer.read()
334 dst_contents = format_file_contents(
335 src_contents, line_length=line_length, fast=fast
337 except NothingChanged:
340 if write_back == write_back.YES:
341 with open(src, "w", encoding=src_buffer.encoding) as f:
342 f.write(dst_contents)
343 elif write_back == write_back.DIFF:
344 src_name = f"{src} (original)"
345 dst_name = f"{src} (formatted)"
346 diff_contents = diff(src_contents, dst_contents, src_name, dst_name)
350 sys.stdout.write(diff_contents)
357 def format_stdin_to_stdout(
358 line_length: int, fast: bool, write_back: WriteBack = WriteBack.NO
360 """Format file on stdin. Return True if changed.
362 If `write_back` is True, write reformatted code back to stdout.
363 `line_length` and `fast` arguments are passed to :func:`format_file_contents`.
365 src = sys.stdin.read()
368 dst = format_file_contents(src, line_length=line_length, fast=fast)
371 except NothingChanged:
375 if write_back == WriteBack.YES:
376 sys.stdout.write(dst)
377 elif write_back == WriteBack.DIFF:
378 src_name = "<stdin> (original)"
379 dst_name = "<stdin> (formatted)"
380 sys.stdout.write(diff(src, dst, src_name, dst_name))
383 def format_file_contents(
384 src_contents: str, line_length: int, fast: bool
386 """Reformat contents a file and return new contents.
388 If `fast` is False, additionally confirm that the reformatted code is
389 valid by calling :func:`assert_equivalent` and :func:`assert_stable` on it.
390 `line_length` is passed to :func:`format_str`.
392 if src_contents.strip() == "":
395 dst_contents = format_str(src_contents, line_length=line_length)
396 if src_contents == dst_contents:
400 assert_equivalent(src_contents, dst_contents)
401 assert_stable(src_contents, dst_contents, line_length=line_length)
405 def format_str(src_contents: str, line_length: int) -> FileContent:
406 """Reformat a string and return new contents.
408 `line_length` determines how many characters per line are allowed.
410 src_node = lib2to3_parse(src_contents)
412 lines = LineGenerator()
413 elt = EmptyLineTracker()
414 py36 = is_python36(src_node)
417 for current_line in lines.visit(src_node):
418 for _ in range(after):
419 dst_contents += str(empty_line)
420 before, after = elt.maybe_empty_lines(current_line)
421 for _ in range(before):
422 dst_contents += str(empty_line)
423 for line in split_line(current_line, line_length=line_length, py36=py36):
424 dst_contents += str(line)
429 pygram.python_grammar_no_print_statement_no_exec_statement,
430 pygram.python_grammar_no_print_statement,
431 pygram.python_grammar,
435 def lib2to3_parse(src_txt: str) -> Node:
436 """Given a string with source, return the lib2to3 Node."""
437 grammar = pygram.python_grammar_no_print_statement
438 if src_txt[-1] != "\n":
439 nl = "\r\n" if "\r\n" in src_txt[:1024] else "\n"
441 for grammar in GRAMMARS:
442 drv = driver.Driver(grammar, pytree.convert)
444 result = drv.parse_string(src_txt, True)
447 except ParseError as pe:
448 lineno, column = pe.context[1]
449 lines = src_txt.splitlines()
451 faulty_line = lines[lineno - 1]
453 faulty_line = "<line number missing in source>"
454 exc = ValueError(f"Cannot parse: {lineno}:{column}: {faulty_line}")
458 if isinstance(result, Leaf):
459 result = Node(syms.file_input, [result])
463 def lib2to3_unparse(node: Node) -> str:
464 """Given a lib2to3 node, return its string representation."""
472 class Visitor(Generic[T]):
473 """Basic lib2to3 visitor that yields things of type `T` on `visit()`."""
475 def visit(self, node: LN) -> Iterator[T]:
476 """Main method to visit `node` and its children.
478 It tries to find a `visit_*()` method for the given `node.type`, like
479 `visit_simple_stmt` for Node objects or `visit_INDENT` for Leaf objects.
480 If no dedicated `visit_*()` method is found, chooses `visit_default()`
483 Then yields objects of type `T` from the selected visitor.
486 name = token.tok_name[node.type]
488 name = type_repr(node.type)
489 yield from getattr(self, f"visit_{name}", self.visit_default)(node)
491 def visit_default(self, node: LN) -> Iterator[T]:
492 """Default `visit_*()` implementation. Recurses to children of `node`."""
493 if isinstance(node, Node):
494 for child in node.children:
495 yield from self.visit(child)
499 class DebugVisitor(Visitor[T]):
502 def visit_default(self, node: LN) -> Iterator[T]:
503 indent = " " * (2 * self.tree_depth)
504 if isinstance(node, Node):
505 _type = type_repr(node.type)
506 out(f"{indent}{_type}", fg="yellow")
508 for child in node.children:
509 yield from self.visit(child)
512 out(f"{indent}/{_type}", fg="yellow", bold=False)
514 _type = token.tok_name.get(node.type, str(node.type))
515 out(f"{indent}{_type}", fg="blue", nl=False)
517 # We don't have to handle prefixes for `Node` objects since
518 # that delegates to the first child anyway.
519 out(f" {node.prefix!r}", fg="green", bold=False, nl=False)
520 out(f" {node.value!r}", fg="blue", bold=False)
523 def show(cls, code: str) -> None:
524 """Pretty-print the lib2to3 AST of a given string of `code`.
526 Convenience method for debugging.
528 v: DebugVisitor[None] = DebugVisitor()
529 list(v.visit(lib2to3_parse(code)))
532 KEYWORDS = set(keyword.kwlist)
533 WHITESPACE = {token.DEDENT, token.INDENT, token.NEWLINE}
534 FLOW_CONTROL = {"return", "raise", "break", "continue"}
545 STANDALONE_COMMENT = 153
546 LOGIC_OPERATORS = {"and", "or"}
570 STARS = {token.STAR, token.DOUBLESTAR}
573 syms.argument, # double star in arglist
574 syms.trailer, # single argument to call
576 syms.varargslist, # lambdas
578 UNPACKING_PARENTS = {
579 syms.atom, # single element of a list or set literal
601 COMPREHENSION_PRIORITY = 20
606 COMPARATOR_PRIORITY = 3
611 class BracketTracker:
612 """Keeps track of brackets on a line."""
615 bracket_match: Dict[Tuple[Depth, NodeType], Leaf] = Factory(dict)
616 delimiters: Dict[LeafID, Priority] = Factory(dict)
617 previous: Optional[Leaf] = None
618 _for_loop_variable: bool = False
619 _lambda_arguments: bool = False
621 def mark(self, leaf: Leaf) -> None:
622 """Mark `leaf` with bracket-related metadata. Keep track of delimiters.
624 All leaves receive an int `bracket_depth` field that stores how deep
625 within brackets a given leaf is. 0 means there are no enclosing brackets
626 that started on this line.
628 If a leaf is itself a closing bracket, it receives an `opening_bracket`
629 field that it forms a pair with. This is a one-directional link to
630 avoid reference cycles.
632 If a leaf is a delimiter (a token on which Black can split the line if
633 needed) and it's on depth 0, its `id()` is stored in the tracker's
636 if leaf.type == token.COMMENT:
639 self.maybe_decrement_after_for_loop_variable(leaf)
640 self.maybe_decrement_after_lambda_arguments(leaf)
641 if leaf.type in CLOSING_BRACKETS:
643 opening_bracket = self.bracket_match.pop((self.depth, leaf.type))
644 leaf.opening_bracket = opening_bracket
645 leaf.bracket_depth = self.depth
647 delim = is_split_before_delimiter(leaf, self.previous)
648 if delim and self.previous is not None:
649 self.delimiters[id(self.previous)] = delim
651 delim = is_split_after_delimiter(leaf, self.previous)
653 self.delimiters[id(leaf)] = delim
654 if leaf.type in OPENING_BRACKETS:
655 self.bracket_match[self.depth, BRACKET[leaf.type]] = leaf
658 self.maybe_increment_lambda_arguments(leaf)
659 self.maybe_increment_for_loop_variable(leaf)
661 def any_open_brackets(self) -> bool:
662 """Return True if there is an yet unmatched open bracket on the line."""
663 return bool(self.bracket_match)
665 def max_delimiter_priority(self, exclude: Iterable[LeafID] = ()) -> int:
666 """Return the highest priority of a delimiter found on the line.
668 Values are consistent with what `is_split_*_delimiter()` return.
669 Raises ValueError on no delimiters.
671 return max(v for k, v in self.delimiters.items() if k not in exclude)
673 def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool:
674 """In a for loop, or comprehension, the variables are often unpacks.
676 To avoid splitting on the comma in this situation, increase the depth of
677 tokens between `for` and `in`.
679 if leaf.type == token.NAME and leaf.value == "for":
681 self._for_loop_variable = True
686 def maybe_decrement_after_for_loop_variable(self, leaf: Leaf) -> bool:
687 """See `maybe_increment_for_loop_variable` above for explanation."""
688 if self._for_loop_variable and leaf.type == token.NAME and leaf.value == "in":
690 self._for_loop_variable = False
695 def maybe_increment_lambda_arguments(self, leaf: Leaf) -> bool:
696 """In a lambda expression, there might be more than one argument.
698 To avoid splitting on the comma in this situation, increase the depth of
699 tokens between `lambda` and `:`.
701 if leaf.type == token.NAME and leaf.value == "lambda":
703 self._lambda_arguments = True
708 def maybe_decrement_after_lambda_arguments(self, leaf: Leaf) -> bool:
709 """See `maybe_increment_lambda_arguments` above for explanation."""
710 if self._lambda_arguments and leaf.type == token.COLON:
712 self._lambda_arguments = False
717 def get_open_lsqb(self) -> Optional[Leaf]:
718 """Return the most recent opening square bracket (if any)."""
719 return self.bracket_match.get((self.depth - 1, token.RSQB))
724 """Holds leaves and comments. Can be printed with `str(line)`."""
727 leaves: List[Leaf] = Factory(list)
728 comments: List[Tuple[Index, Leaf]] = Factory(list)
729 bracket_tracker: BracketTracker = Factory(BracketTracker)
730 inside_brackets: bool = False
732 def append(self, leaf: Leaf, preformatted: bool = False) -> None:
733 """Add a new `leaf` to the end of the line.
735 Unless `preformatted` is True, the `leaf` will receive a new consistent
736 whitespace prefix and metadata applied by :class:`BracketTracker`.
737 Trailing commas are maybe removed, unpacked for loop variables are
738 demoted from being delimiters.
740 Inline comments are put aside.
742 has_value = leaf.type in BRACKETS or bool(leaf.value.strip())
746 if self.leaves and not preformatted:
747 # Note: at this point leaf.prefix should be empty except for
748 # imports, for which we only preserve newlines.
749 leaf.prefix += whitespace(
750 leaf, complex_subscript=self.is_complex_subscript(leaf)
752 if self.inside_brackets or not preformatted:
753 self.bracket_tracker.mark(leaf)
754 self.maybe_remove_trailing_comma(leaf)
756 if not self.append_comment(leaf):
757 self.leaves.append(leaf)
759 def append_safe(self, leaf: Leaf, preformatted: bool = False) -> None:
760 """Like :func:`append()` but disallow invalid standalone comment structure.
762 Raises ValueError when any `leaf` is appended after a standalone comment
763 or when a standalone comment is not the first leaf on the line.
765 if self.bracket_tracker.depth == 0:
767 raise ValueError("cannot append to standalone comments")
769 if self.leaves and leaf.type == STANDALONE_COMMENT:
771 "cannot append standalone comments to a populated line"
774 self.append(leaf, preformatted=preformatted)
777 def is_comment(self) -> bool:
778 """Is this line a standalone comment?"""
779 return len(self.leaves) == 1 and self.leaves[0].type == STANDALONE_COMMENT
782 def is_decorator(self) -> bool:
783 """Is this line a decorator?"""
784 return bool(self) and self.leaves[0].type == token.AT
787 def is_import(self) -> bool:
788 """Is this an import line?"""
789 return bool(self) and is_import(self.leaves[0])
792 def is_class(self) -> bool:
793 """Is this line a class definition?"""
796 and self.leaves[0].type == token.NAME
797 and self.leaves[0].value == "class"
801 def is_def(self) -> bool:
802 """Is this a function definition? (Also returns True for async defs.)"""
804 first_leaf = self.leaves[0]
809 second_leaf: Optional[Leaf] = self.leaves[1]
813 (first_leaf.type == token.NAME and first_leaf.value == "def")
815 first_leaf.type == token.ASYNC
816 and second_leaf is not None
817 and second_leaf.type == token.NAME
818 and second_leaf.value == "def"
823 def is_flow_control(self) -> bool:
824 """Is this line a flow control statement?
826 Those are `return`, `raise`, `break`, and `continue`.
830 and self.leaves[0].type == token.NAME
831 and self.leaves[0].value in FLOW_CONTROL
835 def is_yield(self) -> bool:
836 """Is this line a yield statement?"""
839 and self.leaves[0].type == token.NAME
840 and self.leaves[0].value == "yield"
843 def contains_standalone_comments(self, depth_limit: int = sys.maxsize) -> bool:
844 """If so, needs to be split before emitting."""
845 for leaf in self.leaves:
846 if leaf.type == STANDALONE_COMMENT:
847 if leaf.bracket_depth <= depth_limit:
852 def maybe_remove_trailing_comma(self, closing: Leaf) -> bool:
853 """Remove trailing comma if there is one and it's safe."""
856 and self.leaves[-1].type == token.COMMA
857 and closing.type in CLOSING_BRACKETS
861 if closing.type == token.RBRACE:
862 self.remove_trailing_comma()
865 if closing.type == token.RSQB:
866 comma = self.leaves[-1]
867 if comma.parent and comma.parent.type == syms.listmaker:
868 self.remove_trailing_comma()
871 # For parens let's check if it's safe to remove the comma. If the
872 # trailing one is the only one, we might mistakenly change a tuple
873 # into a different type by removing the comma.
874 depth = closing.bracket_depth + 1
876 opening = closing.opening_bracket
877 for _opening_index, leaf in enumerate(self.leaves):
884 for leaf in self.leaves[_opening_index + 1 :]:
888 bracket_depth = leaf.bracket_depth
889 if bracket_depth == depth and leaf.type == token.COMMA:
891 if leaf.parent and leaf.parent.type == syms.arglist:
896 self.remove_trailing_comma()
901 def append_comment(self, comment: Leaf) -> bool:
902 """Add an inline or standalone comment to the line."""
904 comment.type == STANDALONE_COMMENT
905 and self.bracket_tracker.any_open_brackets()
910 if comment.type != token.COMMENT:
913 after = len(self.leaves) - 1
915 comment.type = STANDALONE_COMMENT
920 self.comments.append((after, comment))
923 def comments_after(self, leaf: Leaf) -> Iterator[Leaf]:
924 """Generate comments that should appear directly after `leaf`."""
925 for _leaf_index, _leaf in enumerate(self.leaves):
932 for index, comment_after in self.comments:
933 if _leaf_index == index:
936 def remove_trailing_comma(self) -> None:
937 """Remove the trailing comma and moves the comments attached to it."""
938 comma_index = len(self.leaves) - 1
939 for i in range(len(self.comments)):
940 comment_index, comment = self.comments[i]
941 if comment_index == comma_index:
942 self.comments[i] = (comma_index - 1, comment)
945 def is_complex_subscript(self, leaf: Leaf) -> bool:
946 """Return True iff `leaf` is part of a slice with non-trivial exprs."""
948 leaf if leaf.type == token.LSQB else self.bracket_tracker.get_open_lsqb()
950 if open_lsqb is None:
953 subscript_start = open_lsqb.next_sibling
955 isinstance(subscript_start, Node)
956 and subscript_start.type == syms.subscriptlist
958 subscript_start = child_towards(subscript_start, leaf)
959 return subscript_start is not None and any(
960 n.type in TEST_DESCENDANTS for n in subscript_start.pre_order()
963 def __str__(self) -> str:
964 """Render the line."""
968 indent = " " * self.depth
969 leaves = iter(self.leaves)
971 res = f"{first.prefix}{indent}{first.value}"
974 for _, comment in self.comments:
978 def __bool__(self) -> bool:
979 """Return True if the line has leaves or comments."""
980 return bool(self.leaves or self.comments)
983 class UnformattedLines(Line):
984 """Just like :class:`Line` but stores lines which aren't reformatted."""
986 def append(self, leaf: Leaf, preformatted: bool = True) -> None:
987 """Just add a new `leaf` to the end of the lines.
989 The `preformatted` argument is ignored.
991 Keeps track of indentation `depth`, which is useful when the user
992 says `# fmt: on`. Otherwise, doesn't do anything with the `leaf`.
995 list(generate_comments(leaf))
996 except FormatOn as f_on:
997 self.leaves.append(f_on.leaf_from_consumed(leaf))
1000 self.leaves.append(leaf)
1001 if leaf.type == token.INDENT:
1003 elif leaf.type == token.DEDENT:
1006 def __str__(self) -> str:
1007 """Render unformatted lines from leaves which were added with `append()`.
1009 `depth` is not used for indentation in this case.
1015 for leaf in self.leaves:
1019 def append_comment(self, comment: Leaf) -> bool:
1020 """Not implemented in this class. Raises `NotImplementedError`."""
1021 raise NotImplementedError("Unformatted lines don't store comments separately.")
1023 def maybe_remove_trailing_comma(self, closing: Leaf) -> bool:
1024 """Does nothing and returns False."""
1027 def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool:
1028 """Does nothing and returns False."""
1033 class EmptyLineTracker:
1034 """Provides a stateful method that returns the number of potential extra
1035 empty lines needed before and after the currently processed line.
1037 Note: this tracker works on lines that haven't been split yet. It assumes
1038 the prefix of the first leaf consists of optional newlines. Those newlines
1039 are consumed by `maybe_empty_lines()` and included in the computation.
1041 previous_line: Optional[Line] = None
1042 previous_after: int = 0
1043 previous_defs: List[int] = Factory(list)
1045 def maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
1046 """Return the number of extra empty lines before and after the `current_line`.
1048 This is for separating `def`, `async def` and `class` with extra empty
1049 lines (two on module-level), as well as providing an extra empty line
1050 after flow control keywords to make them more prominent.
1052 if isinstance(current_line, UnformattedLines):
1055 before, after = self._maybe_empty_lines(current_line)
1056 before -= self.previous_after
1057 self.previous_after = after
1058 self.previous_line = current_line
1059 return before, after
1061 def _maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
1063 if current_line.depth == 0:
1065 if current_line.leaves:
1066 # Consume the first leaf's extra newlines.
1067 first_leaf = current_line.leaves[0]
1068 before = first_leaf.prefix.count("\n")
1069 before = min(before, max_allowed)
1070 first_leaf.prefix = ""
1073 depth = current_line.depth
1074 while self.previous_defs and self.previous_defs[-1] >= depth:
1075 self.previous_defs.pop()
1076 before = 1 if depth else 2
1077 is_decorator = current_line.is_decorator
1078 if is_decorator or current_line.is_def or current_line.is_class:
1079 if not is_decorator:
1080 self.previous_defs.append(depth)
1081 if self.previous_line is None:
1082 # Don't insert empty lines before the first line in the file.
1085 if self.previous_line.is_decorator:
1089 self.previous_line.is_comment
1090 and self.previous_line.depth == current_line.depth
1096 if current_line.depth:
1102 and self.previous_line.is_import
1103 and not current_line.is_import
1104 and depth == self.previous_line.depth
1106 return (before or 1), 0
1112 class LineGenerator(Visitor[Line]):
1113 """Generates reformatted Line objects. Empty lines are not emitted.
1115 Note: destroys the tree it's visiting by mutating prefixes of its leaves
1116 in ways that will no longer stringify to valid Python code on the tree.
1118 current_line: Line = Factory(Line)
1120 def line(self, indent: int = 0, type: Type[Line] = Line) -> Iterator[Line]:
1123 If the line is empty, only emit if it makes sense.
1124 If the line is too long, split it first and then generate.
1126 If any lines were generated, set up a new current_line.
1128 if not self.current_line:
1129 if self.current_line.__class__ == type:
1130 self.current_line.depth += indent
1132 self.current_line = type(depth=self.current_line.depth + indent)
1133 return # Line is empty, don't emit. Creating a new one unnecessary.
1135 complete_line = self.current_line
1136 self.current_line = type(depth=complete_line.depth + indent)
1139 def visit(self, node: LN) -> Iterator[Line]:
1140 """Main method to visit `node` and its children.
1142 Yields :class:`Line` objects.
1144 if isinstance(self.current_line, UnformattedLines):
1145 # File contained `# fmt: off`
1146 yield from self.visit_unformatted(node)
1149 yield from super().visit(node)
1151 def visit_default(self, node: LN) -> Iterator[Line]:
1152 """Default `visit_*()` implementation. Recurses to children of `node`."""
1153 if isinstance(node, Leaf):
1154 any_open_brackets = self.current_line.bracket_tracker.any_open_brackets()
1156 for comment in generate_comments(node):
1157 if any_open_brackets:
1158 # any comment within brackets is subject to splitting
1159 self.current_line.append(comment)
1160 elif comment.type == token.COMMENT:
1161 # regular trailing comment
1162 self.current_line.append(comment)
1163 yield from self.line()
1166 # regular standalone comment
1167 yield from self.line()
1169 self.current_line.append(comment)
1170 yield from self.line()
1172 except FormatOff as f_off:
1173 f_off.trim_prefix(node)
1174 yield from self.line(type=UnformattedLines)
1175 yield from self.visit(node)
1177 except FormatOn as f_on:
1178 # This only happens here if somebody says "fmt: on" multiple
1180 f_on.trim_prefix(node)
1181 yield from self.visit_default(node)
1184 normalize_prefix(node, inside_brackets=any_open_brackets)
1185 if node.type == token.STRING:
1186 normalize_string_quotes(node)
1187 if node.type not in WHITESPACE:
1188 self.current_line.append(node)
1189 yield from super().visit_default(node)
1191 def visit_INDENT(self, node: Node) -> Iterator[Line]:
1192 """Increase indentation level, maybe yield a line."""
1193 # In blib2to3 INDENT never holds comments.
1194 yield from self.line(+1)
1195 yield from self.visit_default(node)
1197 def visit_DEDENT(self, node: Node) -> Iterator[Line]:
1198 """Decrease indentation level, maybe yield a line."""
1199 # The current line might still wait for trailing comments. At DEDENT time
1200 # there won't be any (they would be prefixes on the preceding NEWLINE).
1201 # Emit the line then.
1202 yield from self.line()
1204 # While DEDENT has no value, its prefix may contain standalone comments
1205 # that belong to the current indentation level. Get 'em.
1206 yield from self.visit_default(node)
1208 # Finally, emit the dedent.
1209 yield from self.line(-1)
1212 self, node: Node, keywords: Set[str], parens: Set[str]
1213 ) -> Iterator[Line]:
1214 """Visit a statement.
1216 This implementation is shared for `if`, `while`, `for`, `try`, `except`,
1217 `def`, `with`, `class`, and `assert`.
1219 The relevant Python language `keywords` for a given statement will be
1220 NAME leaves within it. This methods puts those on a separate line.
1222 `parens` holds pairs of nodes where invisible parentheses should be put.
1223 Keys hold nodes after which opening parentheses should be put, values
1224 hold nodes before which closing parentheses should be put.
1226 normalize_invisible_parens(node, parens_after=parens)
1227 for child in node.children:
1228 if child.type == token.NAME and child.value in keywords: # type: ignore
1229 yield from self.line()
1231 yield from self.visit(child)
1233 def visit_simple_stmt(self, node: Node) -> Iterator[Line]:
1234 """Visit a statement without nested statements."""
1235 is_suite_like = node.parent and node.parent.type in STATEMENT
1237 yield from self.line(+1)
1238 yield from self.visit_default(node)
1239 yield from self.line(-1)
1242 yield from self.line()
1243 yield from self.visit_default(node)
1245 def visit_async_stmt(self, node: Node) -> Iterator[Line]:
1246 """Visit `async def`, `async for`, `async with`."""
1247 yield from self.line()
1249 children = iter(node.children)
1250 for child in children:
1251 yield from self.visit(child)
1253 if child.type == token.ASYNC:
1256 internal_stmt = next(children)
1257 for child in internal_stmt.children:
1258 yield from self.visit(child)
1260 def visit_decorators(self, node: Node) -> Iterator[Line]:
1261 """Visit decorators."""
1262 for child in node.children:
1263 yield from self.line()
1264 yield from self.visit(child)
1266 def visit_import_from(self, node: Node) -> Iterator[Line]:
1267 """Visit import_from and maybe put invisible parentheses.
1269 This is separate from `visit_stmt` because import statements don't
1270 support arbitrary atoms and thus handling of parentheses is custom.
1273 for index, child in enumerate(node.children):
1275 if child.type == token.LPAR:
1276 # make parentheses invisible
1277 child.value = "" # type: ignore
1278 node.children[-1].value = "" # type: ignore
1280 # insert invisible parentheses
1281 node.insert_child(index, Leaf(token.LPAR, ""))
1282 node.append_child(Leaf(token.RPAR, ""))
1286 child.type == token.NAME and child.value == "import" # type: ignore
1289 for child in node.children:
1290 yield from self.visit(child)
1292 def visit_SEMI(self, leaf: Leaf) -> Iterator[Line]:
1293 """Remove a semicolon and put the other statement on a separate line."""
1294 yield from self.line()
1296 def visit_ENDMARKER(self, leaf: Leaf) -> Iterator[Line]:
1297 """End of file. Process outstanding comments and end with a newline."""
1298 yield from self.visit_default(leaf)
1299 yield from self.line()
1301 def visit_unformatted(self, node: LN) -> Iterator[Line]:
1302 """Used when file contained a `# fmt: off`."""
1303 if isinstance(node, Node):
1304 for child in node.children:
1305 yield from self.visit(child)
1309 self.current_line.append(node)
1310 except FormatOn as f_on:
1311 f_on.trim_prefix(node)
1312 yield from self.line()
1313 yield from self.visit(node)
1315 if node.type == token.ENDMARKER:
1316 # somebody decided not to put a final `# fmt: on`
1317 yield from self.line()
1319 def __attrs_post_init__(self) -> None:
1320 """You are in a twisty little maze of passages."""
1323 self.visit_assert_stmt = partial(v, keywords={"assert"}, parens={"assert", ","})
1324 self.visit_if_stmt = partial(v, keywords={"if", "else", "elif"}, parens={"if"})
1325 self.visit_while_stmt = partial(v, keywords={"while", "else"}, parens={"while"})
1326 self.visit_for_stmt = partial(v, keywords={"for", "else"}, parens={"for", "in"})
1327 self.visit_try_stmt = partial(
1328 v, keywords={"try", "except", "else", "finally"}, parens=Ø
1330 self.visit_except_clause = partial(v, keywords={"except"}, parens=Ø)
1331 self.visit_with_stmt = partial(v, keywords={"with"}, parens=Ø)
1332 self.visit_funcdef = partial(v, keywords={"def"}, parens=Ø)
1333 self.visit_classdef = partial(v, keywords={"class"}, parens=Ø)
1334 self.visit_async_funcdef = self.visit_async_stmt
1335 self.visit_decorated = self.visit_decorators
1338 IMPLICIT_TUPLE = {syms.testlist, syms.testlist_star_expr, syms.exprlist}
1339 BRACKET = {token.LPAR: token.RPAR, token.LSQB: token.RSQB, token.LBRACE: token.RBRACE}
1340 OPENING_BRACKETS = set(BRACKET.keys())
1341 CLOSING_BRACKETS = set(BRACKET.values())
1342 BRACKETS = OPENING_BRACKETS | CLOSING_BRACKETS
1343 ALWAYS_NO_SPACE = CLOSING_BRACKETS | {token.COMMA, STANDALONE_COMMENT}
1346 def whitespace(leaf: Leaf, *, complex_subscript: bool) -> str: # noqa C901
1347 """Return whitespace prefix if needed for the given `leaf`.
1349 `complex_subscript` signals whether the given leaf is part of a subscription
1350 which has non-trivial arguments, like arithmetic expressions or function calls.
1358 if t in ALWAYS_NO_SPACE:
1361 if t == token.COMMENT:
1364 assert p is not None, f"INTERNAL ERROR: hand-made leaf without parent: {leaf!r}"
1367 and p.type not in {syms.subscript, syms.subscriptlist, syms.sliceop}
1371 prev = leaf.prev_sibling
1373 prevp = preceding_leaf(p)
1374 if not prevp or prevp.type in OPENING_BRACKETS:
1377 if t == token.COLON:
1378 if prevp.type == token.COLON:
1381 elif prevp.type != token.COMMA and not complex_subscript:
1386 if prevp.type == token.EQUAL:
1388 if prevp.parent.type in {
1389 syms.arglist, syms.argument, syms.parameters, syms.varargslist
1393 elif prevp.parent.type == syms.typedargslist:
1394 # A bit hacky: if the equal sign has whitespace, it means we
1395 # previously found it's a typed argument. So, we're using
1399 elif prevp.type in STARS:
1400 if is_vararg(prevp, within=VARARGS_PARENTS | UNPACKING_PARENTS):
1403 elif prevp.type == token.COLON:
1404 if prevp.parent and prevp.parent.type in {syms.subscript, syms.sliceop}:
1405 return SPACE if complex_subscript else NO
1409 and prevp.parent.type == syms.factor
1410 and prevp.type in MATH_OPERATORS
1415 prevp.type == token.RIGHTSHIFT
1417 and prevp.parent.type == syms.shift_expr
1418 and prevp.prev_sibling
1419 and prevp.prev_sibling.type == token.NAME
1420 and prevp.prev_sibling.value == "print" # type: ignore
1422 # Python 2 print chevron
1425 elif prev.type in OPENING_BRACKETS:
1428 if p.type in {syms.parameters, syms.arglist}:
1429 # untyped function signatures or calls
1430 if not prev or prev.type != token.COMMA:
1433 elif p.type == syms.varargslist:
1435 if prev and prev.type != token.COMMA:
1438 elif p.type == syms.typedargslist:
1439 # typed function signatures
1443 if t == token.EQUAL:
1444 if prev.type != syms.tname:
1447 elif prev.type == token.EQUAL:
1448 # A bit hacky: if the equal sign has whitespace, it means we
1449 # previously found it's a typed argument. So, we're using that, too.
1452 elif prev.type != token.COMMA:
1455 elif p.type == syms.tname:
1458 prevp = preceding_leaf(p)
1459 if not prevp or prevp.type != token.COMMA:
1462 elif p.type == syms.trailer:
1463 # attributes and calls
1464 if t == token.LPAR or t == token.RPAR:
1469 prevp = preceding_leaf(p)
1470 if not prevp or prevp.type != token.NUMBER:
1473 elif t == token.LSQB:
1476 elif prev.type != token.COMMA:
1479 elif p.type == syms.argument:
1481 if t == token.EQUAL:
1485 prevp = preceding_leaf(p)
1486 if not prevp or prevp.type == token.LPAR:
1489 elif prev.type in {token.EQUAL} | STARS:
1492 elif p.type == syms.decorator:
1496 elif p.type == syms.dotted_name:
1500 prevp = preceding_leaf(p)
1501 if not prevp or prevp.type == token.AT or prevp.type == token.DOT:
1504 elif p.type == syms.classdef:
1508 if prev and prev.type == token.LPAR:
1511 elif p.type in {syms.subscript, syms.sliceop}:
1514 assert p.parent is not None, "subscripts are always parented"
1515 if p.parent.type == syms.subscriptlist:
1520 elif not complex_subscript:
1523 elif p.type == syms.atom:
1524 if prev and t == token.DOT:
1525 # dots, but not the first one.
1528 elif p.type == syms.dictsetmaker:
1530 if prev and prev.type == token.DOUBLESTAR:
1533 elif p.type in {syms.factor, syms.star_expr}:
1536 prevp = preceding_leaf(p)
1537 if not prevp or prevp.type in OPENING_BRACKETS:
1540 prevp_parent = prevp.parent
1541 assert prevp_parent is not None
1543 prevp.type == token.COLON
1544 and prevp_parent.type in {syms.subscript, syms.sliceop}
1548 elif prevp.type == token.EQUAL and prevp_parent.type == syms.argument:
1551 elif t == token.NAME or t == token.NUMBER:
1554 elif p.type == syms.import_from:
1556 if prev and prev.type == token.DOT:
1559 elif t == token.NAME:
1563 if prev and prev.type == token.DOT:
1566 elif p.type == syms.sliceop:
1572 def preceding_leaf(node: Optional[LN]) -> Optional[Leaf]:
1573 """Return the first leaf that precedes `node`, if any."""
1575 res = node.prev_sibling
1577 if isinstance(res, Leaf):
1581 return list(res.leaves())[-1]
1590 def child_towards(ancestor: Node, descendant: LN) -> Optional[LN]:
1591 """Return the child of `ancestor` that contains `descendant`."""
1592 node: Optional[LN] = descendant
1593 while node and node.parent != ancestor:
1598 def is_split_after_delimiter(leaf: Leaf, previous: Leaf = None) -> int:
1599 """Return the priority of the `leaf` delimiter, given a line break after it.
1601 The delimiter priorities returned here are from those delimiters that would
1602 cause a line break after themselves.
1604 Higher numbers are higher priority.
1606 if leaf.type == token.COMMA:
1607 return COMMA_PRIORITY
1612 def is_split_before_delimiter(leaf: Leaf, previous: Leaf = None) -> int:
1613 """Return the priority of the `leaf` delimiter, given a line before after it.
1615 The delimiter priorities returned here are from those delimiters that would
1616 cause a line break before themselves.
1618 Higher numbers are higher priority.
1620 if is_vararg(leaf, within=VARARGS_PARENTS | UNPACKING_PARENTS):
1621 # * and ** might also be MATH_OPERATORS but in this case they are not.
1622 # Don't treat them as a delimiter.
1626 leaf.type in MATH_OPERATORS
1628 and leaf.parent.type not in {syms.factor, syms.star_expr}
1630 return MATH_PRIORITY
1632 if leaf.type in COMPARATORS:
1633 return COMPARATOR_PRIORITY
1636 leaf.type == token.STRING
1637 and previous is not None
1638 and previous.type == token.STRING
1640 return STRING_PRIORITY
1643 leaf.type == token.NAME
1644 and leaf.value == "for"
1646 and leaf.parent.type in {syms.comp_for, syms.old_comp_for}
1648 return COMPREHENSION_PRIORITY
1651 leaf.type == token.NAME
1652 and leaf.value == "if"
1654 and leaf.parent.type in {syms.comp_if, syms.old_comp_if}
1656 return COMPREHENSION_PRIORITY
1659 leaf.type == token.NAME
1660 and leaf.value in {"if", "else"}
1662 and leaf.parent.type == syms.test
1664 return TERNARY_PRIORITY
1666 if leaf.type == token.NAME and leaf.value in LOGIC_OPERATORS and leaf.parent:
1667 return LOGIC_PRIORITY
1672 def generate_comments(leaf: Leaf) -> Iterator[Leaf]:
1673 """Clean the prefix of the `leaf` and generate comments from it, if any.
1675 Comments in lib2to3 are shoved into the whitespace prefix. This happens
1676 in `pgen2/driver.py:Driver.parse_tokens()`. This was a brilliant implementation
1677 move because it does away with modifying the grammar to include all the
1678 possible places in which comments can be placed.
1680 The sad consequence for us though is that comments don't "belong" anywhere.
1681 This is why this function generates simple parentless Leaf objects for
1682 comments. We simply don't know what the correct parent should be.
1684 No matter though, we can live without this. We really only need to
1685 differentiate between inline and standalone comments. The latter don't
1686 share the line with any code.
1688 Inline comments are emitted as regular token.COMMENT leaves. Standalone
1689 are emitted with a fake STANDALONE_COMMENT token identifier.
1700 for index, line in enumerate(p.split("\n")):
1701 consumed += len(line) + 1 # adding the length of the split '\n'
1702 line = line.lstrip()
1705 if not line.startswith("#"):
1708 if index == 0 and leaf.type != token.ENDMARKER:
1709 comment_type = token.COMMENT # simple trailing comment
1711 comment_type = STANDALONE_COMMENT
1712 comment = make_comment(line)
1713 yield Leaf(comment_type, comment, prefix="\n" * nlines)
1715 if comment in {"# fmt: on", "# yapf: enable"}:
1716 raise FormatOn(consumed)
1718 if comment in {"# fmt: off", "# yapf: disable"}:
1719 if comment_type == STANDALONE_COMMENT:
1720 raise FormatOff(consumed)
1722 prev = preceding_leaf(leaf)
1723 if not prev or prev.type in WHITESPACE: # standalone comment in disguise
1724 raise FormatOff(consumed)
1729 def make_comment(content: str) -> str:
1730 """Return a consistently formatted comment from the given `content` string.
1732 All comments (except for "##", "#!", "#:") should have a single space between
1733 the hash sign and the content.
1735 If `content` didn't start with a hash sign, one is provided.
1737 content = content.rstrip()
1741 if content[0] == "#":
1742 content = content[1:]
1743 if content and content[0] not in " !:#":
1744 content = " " + content
1745 return "#" + content
1749 line: Line, line_length: int, inner: bool = False, py36: bool = False
1750 ) -> Iterator[Line]:
1751 """Split a `line` into potentially many lines.
1753 They should fit in the allotted `line_length` but might not be able to.
1754 `inner` signifies that there were a pair of brackets somewhere around the
1755 current `line`, possibly transitively. This means we can fallback to splitting
1756 by delimiters if the LHS/RHS don't yield any results.
1758 If `py36` is True, splitting may generate syntax that is only compatible
1759 with Python 3.6 and later.
1761 if isinstance(line, UnformattedLines) or line.is_comment:
1765 line_str = str(line).strip("\n")
1767 len(line_str) <= line_length
1768 and "\n" not in line_str # multiline strings
1769 and not line.contains_standalone_comments()
1774 split_funcs: List[SplitFunc]
1776 split_funcs = [left_hand_split]
1777 elif line.is_import:
1778 split_funcs = [explode_split]
1779 elif line.inside_brackets:
1780 split_funcs = [delimiter_split, standalone_comment_split, right_hand_split]
1782 split_funcs = [right_hand_split]
1783 for split_func in split_funcs:
1784 # We are accumulating lines in `result` because we might want to abort
1785 # mission and return the original line in the end, or attempt a different
1787 result: List[Line] = []
1789 for l in split_func(line, py36):
1790 if str(l).strip("\n") == line_str:
1791 raise CannotSplit("Split function returned an unchanged result")
1794 split_line(l, line_length=line_length, inner=True, py36=py36)
1796 except CannotSplit as cs:
1807 def left_hand_split(line: Line, py36: bool = False) -> Iterator[Line]:
1808 """Split line into many lines, starting with the first matching bracket pair.
1810 Note: this usually looks weird, only use this for function definitions.
1811 Prefer RHS otherwise.
1813 head = Line(depth=line.depth)
1814 body = Line(depth=line.depth + 1, inside_brackets=True)
1815 tail = Line(depth=line.depth)
1816 tail_leaves: List[Leaf] = []
1817 body_leaves: List[Leaf] = []
1818 head_leaves: List[Leaf] = []
1819 current_leaves = head_leaves
1820 matching_bracket = None
1821 for leaf in line.leaves:
1823 current_leaves is body_leaves
1824 and leaf.type in CLOSING_BRACKETS
1825 and leaf.opening_bracket is matching_bracket
1827 current_leaves = tail_leaves if body_leaves else head_leaves
1828 current_leaves.append(leaf)
1829 if current_leaves is head_leaves:
1830 if leaf.type in OPENING_BRACKETS:
1831 matching_bracket = leaf
1832 current_leaves = body_leaves
1833 # Since body is a new indent level, remove spurious leading whitespace.
1835 normalize_prefix(body_leaves[0], inside_brackets=True)
1836 # Build the new lines.
1837 for result, leaves in (head, head_leaves), (body, body_leaves), (tail, tail_leaves):
1839 result.append(leaf, preformatted=True)
1840 for comment_after in line.comments_after(leaf):
1841 result.append(comment_after, preformatted=True)
1842 bracket_split_succeeded_or_raise(head, body, tail)
1843 for result in (head, body, tail):
1848 def right_hand_split(
1849 line: Line, py36: bool = False, omit: Collection[LeafID] = ()
1850 ) -> Iterator[Line]:
1851 """Split line into many lines, starting with the last matching bracket pair."""
1852 head = Line(depth=line.depth)
1853 body = Line(depth=line.depth + 1, inside_brackets=True)
1854 tail = Line(depth=line.depth)
1855 tail_leaves: List[Leaf] = []
1856 body_leaves: List[Leaf] = []
1857 head_leaves: List[Leaf] = []
1858 current_leaves = tail_leaves
1859 opening_bracket = None
1860 closing_bracket = None
1861 for leaf in reversed(line.leaves):
1862 if current_leaves is body_leaves:
1863 if leaf is opening_bracket:
1864 current_leaves = head_leaves if body_leaves else tail_leaves
1865 current_leaves.append(leaf)
1866 if current_leaves is tail_leaves:
1867 if leaf.type in CLOSING_BRACKETS and id(leaf) not in omit:
1868 opening_bracket = leaf.opening_bracket
1869 closing_bracket = leaf
1870 current_leaves = body_leaves
1871 tail_leaves.reverse()
1872 body_leaves.reverse()
1873 head_leaves.reverse()
1874 # Since body is a new indent level, remove spurious leading whitespace.
1876 normalize_prefix(body_leaves[0], inside_brackets=True)
1877 elif not head_leaves:
1878 # No `head` and no `body` means the split failed. `tail` has all content.
1879 raise CannotSplit("No brackets found")
1881 # Build the new lines.
1882 for result, leaves in (head, head_leaves), (body, body_leaves), (tail, tail_leaves):
1884 result.append(leaf, preformatted=True)
1885 for comment_after in line.comments_after(leaf):
1886 result.append(comment_after, preformatted=True)
1887 bracket_split_succeeded_or_raise(head, body, tail)
1888 assert opening_bracket and closing_bracket
1890 opening_bracket.type == token.LPAR
1891 and not opening_bracket.value
1892 and closing_bracket.type == token.RPAR
1893 and not closing_bracket.value
1895 # These parens were optional. If there aren't any delimiters or standalone
1896 # comments in the body, they were unnecessary and another split without
1897 # them should be attempted.
1899 body.bracket_tracker.delimiters or line.contains_standalone_comments(0)
1901 omit = {id(closing_bracket), *omit}
1902 yield from right_hand_split(line, py36=py36, omit=omit)
1905 ensure_visible(opening_bracket)
1906 ensure_visible(closing_bracket)
1907 for result in (head, body, tail):
1912 def bracket_split_succeeded_or_raise(head: Line, body: Line, tail: Line) -> None:
1913 """Raise :exc:`CannotSplit` if the last left- or right-hand split failed.
1915 Do nothing otherwise.
1917 A left- or right-hand split is based on a pair of brackets. Content before
1918 (and including) the opening bracket is left on one line, content inside the
1919 brackets is put on a separate line, and finally content starting with and
1920 following the closing bracket is put on a separate line.
1922 Those are called `head`, `body`, and `tail`, respectively. If the split
1923 produced the same line (all content in `head`) or ended up with an empty `body`
1924 and the `tail` is just the closing bracket, then it's considered failed.
1926 tail_len = len(str(tail).strip())
1929 raise CannotSplit("Splitting brackets produced the same line")
1933 f"Splitting brackets on an empty body to save "
1934 f"{tail_len} characters is not worth it"
1938 def dont_increase_indentation(split_func: SplitFunc) -> SplitFunc:
1939 """Normalize prefix of the first leaf in every line returned by `split_func`.
1941 This is a decorator over relevant split functions.
1945 def split_wrapper(line: Line, py36: bool = False) -> Iterator[Line]:
1946 for l in split_func(line, py36):
1947 normalize_prefix(l.leaves[0], inside_brackets=True)
1950 return split_wrapper
1953 @dont_increase_indentation
1954 def delimiter_split(line: Line, py36: bool = False) -> Iterator[Line]:
1955 """Split according to delimiters of the highest priority.
1957 If `py36` is True, the split will add trailing commas also in function
1958 signatures that contain `*` and `**`.
1961 last_leaf = line.leaves[-1]
1963 raise CannotSplit("Line empty")
1965 delimiters = line.bracket_tracker.delimiters
1967 delimiter_priority = line.bracket_tracker.max_delimiter_priority(
1968 exclude={id(last_leaf)}
1971 raise CannotSplit("No delimiters found")
1973 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
1974 lowest_depth = sys.maxsize
1975 trailing_comma_safe = True
1977 def append_to_line(leaf: Leaf) -> Iterator[Line]:
1978 """Append `leaf` to current line or to new line if appending impossible."""
1979 nonlocal current_line
1981 current_line.append_safe(leaf, preformatted=True)
1982 except ValueError as ve:
1985 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
1986 current_line.append(leaf)
1988 for leaf in line.leaves:
1989 yield from append_to_line(leaf)
1991 for comment_after in line.comments_after(leaf):
1992 yield from append_to_line(comment_after)
1994 lowest_depth = min(lowest_depth, leaf.bracket_depth)
1996 leaf.bracket_depth == lowest_depth
1997 and is_vararg(leaf, within=VARARGS_PARENTS)
1999 trailing_comma_safe = trailing_comma_safe and py36
2000 leaf_priority = delimiters.get(id(leaf))
2001 if leaf_priority == delimiter_priority:
2004 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2008 and delimiter_priority == COMMA_PRIORITY
2009 and current_line.leaves[-1].type != token.COMMA
2010 and current_line.leaves[-1].type != STANDALONE_COMMENT
2012 current_line.append(Leaf(token.COMMA, ","))
2016 @dont_increase_indentation
2017 def standalone_comment_split(line: Line, py36: bool = False) -> Iterator[Line]:
2018 """Split standalone comments from the rest of the line."""
2019 if not line.contains_standalone_comments(0):
2020 raise CannotSplit("Line does not have any standalone comments")
2022 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2024 def append_to_line(leaf: Leaf) -> Iterator[Line]:
2025 """Append `leaf` to current line or to new line if appending impossible."""
2026 nonlocal current_line
2028 current_line.append_safe(leaf, preformatted=True)
2029 except ValueError as ve:
2032 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2033 current_line.append(leaf)
2035 for leaf in line.leaves:
2036 yield from append_to_line(leaf)
2038 for comment_after in line.comments_after(leaf):
2039 yield from append_to_line(comment_after)
2046 line: Line, py36: bool = False, omit: Collection[LeafID] = ()
2047 ) -> Iterator[Line]:
2048 """Split by rightmost bracket and immediately split contents by a delimiter."""
2049 new_lines = list(right_hand_split(line, py36, omit))
2050 if len(new_lines) != 3:
2051 yield from new_lines
2057 yield from delimiter_split(new_lines[1], py36)
2065 def is_import(leaf: Leaf) -> bool:
2066 """Return True if the given leaf starts an import statement."""
2073 (v == "import" and p and p.type == syms.import_name)
2074 or (v == "from" and p and p.type == syms.import_from)
2079 def normalize_prefix(leaf: Leaf, *, inside_brackets: bool) -> None:
2080 """Leave existing extra newlines if not `inside_brackets`. Remove everything
2083 Note: don't use backslashes for formatting or you'll lose your voting rights.
2085 if not inside_brackets:
2086 spl = leaf.prefix.split("#")
2087 if "\\" not in spl[0]:
2088 nl_count = spl[-1].count("\n")
2091 leaf.prefix = "\n" * nl_count
2097 def normalize_string_quotes(leaf: Leaf) -> None:
2098 """Prefer double quotes but only if it doesn't cause more escaping.
2100 Adds or removes backslashes as appropriate. Doesn't parse and fix
2101 strings nested in f-strings (yet).
2103 Note: Mutates its argument.
2105 value = leaf.value.lstrip("furbFURB")
2106 if value[:3] == '"""':
2109 elif value[:3] == "'''":
2112 elif value[0] == '"':
2118 first_quote_pos = leaf.value.find(orig_quote)
2119 if first_quote_pos == -1:
2120 return # There's an internal error
2122 prefix = leaf.value[:first_quote_pos]
2123 unescaped_new_quote = re.compile(rf"(([^\\]|^)(\\\\)*){new_quote}")
2124 escaped_new_quote = re.compile(rf"([^\\]|^)\\(\\\\)*{new_quote}")
2125 escaped_orig_quote = re.compile(rf"([^\\]|^)\\(\\\\)*{orig_quote}")
2126 body = leaf.value[first_quote_pos + len(orig_quote) : -len(orig_quote)]
2127 if "r" in prefix.casefold():
2128 if unescaped_new_quote.search(body):
2129 # There's at least one unescaped new_quote in this raw string
2130 # so converting is impossible
2133 # Do not introduce or remove backslashes in raw strings
2136 # remove unnecessary quotes
2137 new_body = sub_twice(escaped_new_quote, rf"\1\2{new_quote}", body)
2138 if body != new_body:
2139 # Consider the string without unnecessary quotes as the original
2141 leaf.value = f"{prefix}{orig_quote}{body}{orig_quote}"
2142 new_body = sub_twice(escaped_orig_quote, rf"\1\2{orig_quote}", new_body)
2143 new_body = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_body)
2144 if new_quote == '"""' and new_body[-1] == '"':
2146 new_body = new_body[:-1] + '\\"'
2147 orig_escape_count = body.count("\\")
2148 new_escape_count = new_body.count("\\")
2149 if new_escape_count > orig_escape_count:
2150 return # Do not introduce more escaping
2152 if new_escape_count == orig_escape_count and orig_quote == '"':
2153 return # Prefer double quotes
2155 leaf.value = f"{prefix}{new_quote}{new_body}{new_quote}"
2158 def normalize_invisible_parens(node: Node, parens_after: Set[str]) -> None:
2159 """Make existing optional parentheses invisible or create new ones.
2161 Standardizes on visible parentheses for single-element tuples, and keeps
2162 existing visible parentheses for other tuples and generator expressions.
2165 for child in list(node.children):
2167 if child.type == syms.atom:
2169 is_empty_tuple(child)
2170 or is_one_tuple(child)
2171 or max_delimiter_priority_in_atom(child) >= COMMA_PRIORITY
2173 first = child.children[0]
2174 last = child.children[-1]
2175 if first.type == token.LPAR and last.type == token.RPAR:
2176 # make parentheses invisible
2177 first.value = "" # type: ignore
2178 last.value = "" # type: ignore
2179 elif is_one_tuple(child):
2180 # wrap child in visible parentheses
2181 lpar = Leaf(token.LPAR, "(")
2182 rpar = Leaf(token.RPAR, ")")
2183 index = child.remove() or 0
2184 node.insert_child(index, Node(syms.atom, [lpar, child, rpar]))
2186 # wrap child in invisible parentheses
2187 lpar = Leaf(token.LPAR, "")
2188 rpar = Leaf(token.RPAR, "")
2189 index = child.remove() or 0
2190 node.insert_child(index, Node(syms.atom, [lpar, child, rpar]))
2192 check_lpar = isinstance(child, Leaf) and child.value in parens_after
2195 def is_empty_tuple(node: LN) -> bool:
2196 """Return True if `node` holds an empty tuple."""
2198 node.type == syms.atom
2199 and len(node.children) == 2
2200 and node.children[0].type == token.LPAR
2201 and node.children[1].type == token.RPAR
2205 def is_one_tuple(node: LN) -> bool:
2206 """Return True if `node` holds a tuple with one element, with or without parens."""
2207 if node.type == syms.atom:
2208 if len(node.children) != 3:
2211 lpar, gexp, rpar = node.children
2213 lpar.type == token.LPAR
2214 and gexp.type == syms.testlist_gexp
2215 and rpar.type == token.RPAR
2219 return len(gexp.children) == 2 and gexp.children[1].type == token.COMMA
2222 node.type in IMPLICIT_TUPLE
2223 and len(node.children) == 2
2224 and node.children[1].type == token.COMMA
2228 def is_vararg(leaf: Leaf, within: Set[NodeType]) -> bool:
2229 """Return True if `leaf` is a star or double star in a vararg or kwarg.
2231 If `within` includes VARARGS_PARENTS, this applies to function signatures.
2232 If `within` includes COLLECTION_LIBERALS_PARENTS, it applies to right
2233 hand-side extended iterable unpacking (PEP 3132) and additional unpacking
2234 generalizations (PEP 448).
2236 if leaf.type not in STARS or not leaf.parent:
2240 if p.type == syms.star_expr:
2241 # Star expressions are also used as assignment targets in extended
2242 # iterable unpacking (PEP 3132). See what its parent is instead.
2248 return p.type in within
2251 def max_delimiter_priority_in_atom(node: LN) -> int:
2252 """Return maximum delimiter priority inside `node`.
2254 This is specific to atoms with contents contained in a pair of parentheses.
2255 If `node` isn't an atom or there are no enclosing parentheses, returns 0.
2257 if node.type != syms.atom:
2260 first = node.children[0]
2261 last = node.children[-1]
2262 if not (first.type == token.LPAR and last.type == token.RPAR):
2265 bt = BracketTracker()
2266 for c in node.children[1:-1]:
2267 if isinstance(c, Leaf):
2270 for leaf in c.leaves():
2273 return bt.max_delimiter_priority()
2279 def ensure_visible(leaf: Leaf) -> None:
2280 """Make sure parentheses are visible.
2282 They could be invisible as part of some statements (see
2283 :func:`normalize_invible_parens` and :func:`visit_import_from`).
2285 if leaf.type == token.LPAR:
2287 elif leaf.type == token.RPAR:
2291 def is_python36(node: Node) -> bool:
2292 """Return True if the current file is using Python 3.6+ features.
2294 Currently looking for:
2296 - trailing commas after * or ** in function signatures.
2298 for n in node.pre_order():
2299 if n.type == token.STRING:
2300 value_head = n.value[:2] # type: ignore
2301 if value_head in {'f"', 'F"', "f'", "F'", "rf", "fr", "RF", "FR"}:
2305 n.type == syms.typedargslist
2307 and n.children[-1].type == token.COMMA
2309 for ch in n.children:
2310 if ch.type in STARS:
2316 PYTHON_EXTENSIONS = {".py"}
2317 BLACKLISTED_DIRECTORIES = {
2318 "build", "buck-out", "dist", "_build", ".git", ".hg", ".mypy_cache", ".tox", ".venv"
2322 def gen_python_files_in_dir(path: Path) -> Iterator[Path]:
2323 """Generate all files under `path` which aren't under BLACKLISTED_DIRECTORIES
2324 and have one of the PYTHON_EXTENSIONS.
2326 for child in path.iterdir():
2328 if child.name in BLACKLISTED_DIRECTORIES:
2331 yield from gen_python_files_in_dir(child)
2333 elif child.suffix in PYTHON_EXTENSIONS:
2339 """Provides a reformatting counter. Can be rendered with `str(report)`."""
2342 change_count: int = 0
2344 failure_count: int = 0
2346 def done(self, src: Path, changed: Changed) -> None:
2347 """Increment the counter for successful reformatting. Write out a message."""
2348 if changed is Changed.YES:
2349 reformatted = "would reformat" if self.check else "reformatted"
2351 out(f"{reformatted} {src}")
2352 self.change_count += 1
2355 if changed is Changed.NO:
2356 msg = f"{src} already well formatted, good job."
2358 msg = f"{src} wasn't modified on disk since last run."
2359 out(msg, bold=False)
2360 self.same_count += 1
2362 def failed(self, src: Path, message: str) -> None:
2363 """Increment the counter for failed reformatting. Write out a message."""
2364 err(f"error: cannot format {src}: {message}")
2365 self.failure_count += 1
2368 def return_code(self) -> int:
2369 """Return the exit code that the app should use.
2371 This considers the current state of changed files and failures:
2372 - if there were any failures, return 123;
2373 - if any files were changed and --check is being used, return 1;
2374 - otherwise return 0.
2376 # According to http://tldp.org/LDP/abs/html/exitcodes.html starting with
2377 # 126 we have special returncodes reserved by the shell.
2378 if self.failure_count:
2381 elif self.change_count and self.check:
2386 def __str__(self) -> str:
2387 """Render a color report of the current state.
2389 Use `click.unstyle` to remove colors.
2392 reformatted = "would be reformatted"
2393 unchanged = "would be left unchanged"
2394 failed = "would fail to reformat"
2396 reformatted = "reformatted"
2397 unchanged = "left unchanged"
2398 failed = "failed to reformat"
2400 if self.change_count:
2401 s = "s" if self.change_count > 1 else ""
2403 click.style(f"{self.change_count} file{s} {reformatted}", bold=True)
2406 s = "s" if self.same_count > 1 else ""
2407 report.append(f"{self.same_count} file{s} {unchanged}")
2408 if self.failure_count:
2409 s = "s" if self.failure_count > 1 else ""
2411 click.style(f"{self.failure_count} file{s} {failed}", fg="red")
2413 return ", ".join(report) + "."
2416 def assert_equivalent(src: str, dst: str) -> None:
2417 """Raise AssertionError if `src` and `dst` aren't equivalent."""
2422 def _v(node: ast.AST, depth: int = 0) -> Iterator[str]:
2423 """Simple visitor generating strings to compare ASTs by content."""
2424 yield f"{' ' * depth}{node.__class__.__name__}("
2426 for field in sorted(node._fields):
2428 value = getattr(node, field)
2429 except AttributeError:
2432 yield f"{' ' * (depth+1)}{field}="
2434 if isinstance(value, list):
2436 if isinstance(item, ast.AST):
2437 yield from _v(item, depth + 2)
2439 elif isinstance(value, ast.AST):
2440 yield from _v(value, depth + 2)
2443 yield f"{' ' * (depth+2)}{value!r}, # {value.__class__.__name__}"
2445 yield f"{' ' * depth}) # /{node.__class__.__name__}"
2448 src_ast = ast.parse(src)
2449 except Exception as exc:
2450 major, minor = sys.version_info[:2]
2451 raise AssertionError(
2452 f"cannot use --safe with this file; failed to parse source file "
2453 f"with Python {major}.{minor}'s builtin AST. Re-run with --fast "
2454 f"or stop using deprecated Python 2 syntax. AST error message: {exc}"
2458 dst_ast = ast.parse(dst)
2459 except Exception as exc:
2460 log = dump_to_file("".join(traceback.format_tb(exc.__traceback__)), dst)
2461 raise AssertionError(
2462 f"INTERNAL ERROR: Black produced invalid code: {exc}. "
2463 f"Please report a bug on https://github.com/ambv/black/issues. "
2464 f"This invalid output might be helpful: {log}"
2467 src_ast_str = "\n".join(_v(src_ast))
2468 dst_ast_str = "\n".join(_v(dst_ast))
2469 if src_ast_str != dst_ast_str:
2470 log = dump_to_file(diff(src_ast_str, dst_ast_str, "src", "dst"))
2471 raise AssertionError(
2472 f"INTERNAL ERROR: Black produced code that is not equivalent to "
2474 f"Please report a bug on https://github.com/ambv/black/issues. "
2475 f"This diff might be helpful: {log}"
2479 def assert_stable(src: str, dst: str, line_length: int) -> None:
2480 """Raise AssertionError if `dst` reformats differently the second time."""
2481 newdst = format_str(dst, line_length=line_length)
2484 diff(src, dst, "source", "first pass"),
2485 diff(dst, newdst, "first pass", "second pass"),
2487 raise AssertionError(
2488 f"INTERNAL ERROR: Black produced different code on the second pass "
2489 f"of the formatter. "
2490 f"Please report a bug on https://github.com/ambv/black/issues. "
2491 f"This diff might be helpful: {log}"
2495 def dump_to_file(*output: str) -> str:
2496 """Dump `output` to a temporary file. Return path to the file."""
2499 with tempfile.NamedTemporaryFile(
2500 mode="w", prefix="blk_", suffix=".log", delete=False, encoding="utf8"
2502 for lines in output:
2504 if lines and lines[-1] != "\n":
2509 def diff(a: str, b: str, a_name: str, b_name: str) -> str:
2510 """Return a unified diff string between strings `a` and `b`."""
2513 a_lines = [line + "\n" for line in a.split("\n")]
2514 b_lines = [line + "\n" for line in b.split("\n")]
2516 difflib.unified_diff(a_lines, b_lines, fromfile=a_name, tofile=b_name, n=5)
2520 def cancel(tasks: List[asyncio.Task]) -> None:
2521 """asyncio signal handler that cancels all `tasks` and reports to stderr."""
2527 def shutdown(loop: BaseEventLoop) -> None:
2528 """Cancel all pending tasks on `loop`, wait for them, and close the loop."""
2530 # This part is borrowed from asyncio/runners.py in Python 3.7b2.
2531 to_cancel = [task for task in asyncio.Task.all_tasks(loop) if not task.done()]
2535 for task in to_cancel:
2537 loop.run_until_complete(
2538 asyncio.gather(*to_cancel, loop=loop, return_exceptions=True)
2541 # `concurrent.futures.Future` objects cannot be cancelled once they
2542 # are already running. There might be some when the `shutdown()` happened.
2543 # Silence their logger's spew about the event loop being closed.
2544 cf_logger = logging.getLogger("concurrent.futures")
2545 cf_logger.setLevel(logging.CRITICAL)
2549 def sub_twice(regex: Pattern[str], replacement: str, original: str) -> str:
2550 """Replace `regex` with `replacement` twice on `original`.
2552 This is used by string normalization to perform replaces on
2553 overlapping matches.
2555 return regex.sub(replacement, regex.sub(replacement, original))
2558 CACHE_DIR = Path(user_cache_dir("black", version=__version__))
2561 def get_cache_file(line_length: int) -> Path:
2562 return CACHE_DIR / f"cache.{line_length}.pickle"
2565 def read_cache(line_length: int) -> Cache:
2566 """Read the cache if it exists and is well formed.
2568 If it is not well formed, the call to write_cache later should resolve the issue.
2570 cache_file = get_cache_file(line_length)
2571 if not cache_file.exists():
2574 with cache_file.open("rb") as fobj:
2576 cache: Cache = pickle.load(fobj)
2577 except pickle.UnpicklingError:
2583 def get_cache_info(path: Path) -> CacheInfo:
2584 """Return the information used to check if a file is already formatted or not."""
2586 return stat.st_mtime, stat.st_size
2590 cache: Cache, sources: Iterable[Path]
2591 ) -> Tuple[List[Path], List[Path]]:
2592 """Split a list of paths into two.
2594 The first list contains paths of files that modified on disk or are not in the
2595 cache. The other list contains paths to non-modified files.
2600 if cache.get(src) != get_cache_info(src):
2607 def write_cache(cache: Cache, sources: List[Path], line_length: int) -> None:
2608 """Update the cache file."""
2609 cache_file = get_cache_file(line_length)
2611 if not CACHE_DIR.exists():
2612 CACHE_DIR.mkdir(parents=True)
2613 new_cache = {**cache, **{src.resolve(): get_cache_info(src) for src in sources}}
2614 with cache_file.open("wb") as fobj:
2615 pickle.dump(new_cache, fobj, protocol=pickle.HIGHEST_PROTOCOL)
2620 if __name__ == "__main__":