All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
3 from asyncio.base_events import BaseEventLoop
4 from concurrent.futures import Executor, ProcessPoolExecutor
6 from functools import partial, wraps
9 from multiprocessing import Manager
11 from pathlib import Path
34 from appdirs import user_cache_dir
35 from attr import dataclass, Factory
39 from blib2to3.pytree import Node, Leaf, type_repr
40 from blib2to3 import pygram, pytree
41 from blib2to3.pgen2 import driver, token
42 from blib2to3.pgen2.parse import ParseError
44 __version__ = "18.4a4"
45 DEFAULT_LINE_LENGTH = 88
48 syms = pygram.python_symbols
56 LN = Union[Leaf, Node]
57 SplitFunc = Callable[["Line", bool], Iterator["Line"]]
60 CacheInfo = Tuple[Timestamp, FileSize]
61 Cache = Dict[Path, CacheInfo]
62 out = partial(click.secho, bold=True, err=True)
63 err = partial(click.secho, fg="red", err=True)
66 class NothingChanged(UserWarning):
67 """Raised by :func:`format_file` when reformatted code is the same as source."""
70 class CannotSplit(Exception):
71 """A readable split that fits the allotted line length is impossible.
73 Raised by :func:`left_hand_split`, :func:`right_hand_split`, and
74 :func:`delimiter_split`.
78 class FormatError(Exception):
79 """Base exception for `# fmt: on` and `# fmt: off` handling.
81 It holds the number of bytes of the prefix consumed before the format
82 control comment appeared.
85 def __init__(self, consumed: int) -> None:
86 super().__init__(consumed)
87 self.consumed = consumed
89 def trim_prefix(self, leaf: Leaf) -> None:
90 leaf.prefix = leaf.prefix[self.consumed :]
92 def leaf_from_consumed(self, leaf: Leaf) -> Leaf:
93 """Returns a new Leaf from the consumed part of the prefix."""
94 unformatted_prefix = leaf.prefix[: self.consumed]
95 return Leaf(token.NEWLINE, unformatted_prefix)
98 class FormatOn(FormatError):
99 """Found a comment like `# fmt: on` in the file."""
102 class FormatOff(FormatError):
103 """Found a comment like `# fmt: off` in the file."""
106 class WriteBack(Enum):
123 default=DEFAULT_LINE_LENGTH,
124 help="How many character per line to allow.",
131 "Don't write the files back, just return the status. Return code 0 "
132 "means nothing would change. Return code 1 means some files would be "
133 "reformatted. Return code 123 means there was an internal error."
139 help="Don't write the files back, just output a diff for each file on stdout.",
144 help="If --fast given, skip temporary sanity checks. [default: --safe]",
151 "Don't emit non-error messages to stderr. Errors are still emitted, "
152 "silence those with 2>/dev/null."
155 @click.version_option(version=__version__)
160 exists=True, file_okay=True, dir_okay=True, readable=True, allow_dash=True
173 """The uncompromising code formatter."""
174 sources: List[Path] = []
178 sources.extend(gen_python_files_in_dir(p))
180 # if a file was explicitly given, we don't care about its extension
183 sources.append(Path("-"))
185 err(f"invalid path: {s}")
187 if check and not diff:
188 write_back = WriteBack.NO
190 write_back = WriteBack.DIFF
192 write_back = WriteBack.YES
193 report = Report(check=check, quiet=quiet)
194 if len(sources) == 0:
195 out("No paths given. Nothing to do 😴")
199 elif len(sources) == 1:
200 reformat_one(sources[0], line_length, fast, write_back, report)
202 loop = asyncio.get_event_loop()
203 executor = ProcessPoolExecutor(max_workers=os.cpu_count())
205 loop.run_until_complete(
207 sources, line_length, fast, write_back, report, loop, executor
213 out("All done! ✨ 🍰 ✨")
214 click.echo(str(report))
215 ctx.exit(report.return_code)
219 src: Path, line_length: int, fast: bool, write_back: WriteBack, report: "Report"
221 """Reformat a single file under `src` without spawning child processes.
223 If `quiet` is True, non-error messages are not output. `line_length`,
224 `write_back`, and `fast` options are passed to :func:`format_file_in_place`.
228 if not src.is_file() and str(src) == "-":
229 if format_stdin_to_stdout(
230 line_length=line_length, fast=fast, write_back=write_back
232 changed = Changed.YES
235 if write_back != WriteBack.DIFF:
236 cache = read_cache(line_length)
238 if src in cache and cache[src] == get_cache_info(src):
239 changed = Changed.CACHED
241 changed is not Changed.CACHED
242 and format_file_in_place(
243 src, line_length=line_length, fast=fast, write_back=write_back
246 changed = Changed.YES
247 if write_back == WriteBack.YES and changed is not Changed.NO:
248 write_cache(cache, [src], line_length)
249 report.done(src, changed)
250 except Exception as exc:
251 report.failed(src, str(exc))
254 async def schedule_formatting(
258 write_back: WriteBack,
263 """Run formatting of `sources` in parallel using the provided `executor`.
265 (Use ProcessPoolExecutors for actual parallelism.)
267 `line_length`, `write_back`, and `fast` options are passed to
268 :func:`format_file_in_place`.
271 if write_back != WriteBack.DIFF:
272 cache = read_cache(line_length)
273 sources, cached = filter_cached(cache, sources)
275 report.done(src, Changed.CACHED)
280 if write_back == WriteBack.DIFF:
281 # For diff output, we need locks to ensure we don't interleave output
282 # from different processes.
284 lock = manager.Lock()
286 src: loop.run_in_executor(
287 executor, format_file_in_place, src, line_length, fast, write_back, lock
291 _task_values = list(tasks.values())
293 loop.add_signal_handler(signal.SIGINT, cancel, _task_values)
294 loop.add_signal_handler(signal.SIGTERM, cancel, _task_values)
295 except NotImplementedError:
296 # There are no good alternatives for these on Windows
298 await asyncio.wait(_task_values)
299 for src, task in tasks.items():
301 report.failed(src, "timed out, cancelling")
303 cancelled.append(task)
304 elif task.cancelled():
305 cancelled.append(task)
306 elif task.exception():
307 report.failed(src, str(task.exception()))
309 formatted.append(src)
310 report.done(src, Changed.YES if task.result() else Changed.NO)
313 await asyncio.gather(*cancelled, loop=loop, return_exceptions=True)
314 if write_back == WriteBack.YES and formatted:
315 write_cache(cache, formatted, line_length)
318 def format_file_in_place(
322 write_back: WriteBack = WriteBack.NO,
323 lock: Any = None, # multiprocessing.Manager().Lock() is some crazy proxy
325 """Format file under `src` path. Return True if changed.
327 If `write_back` is True, write reformatted code back to stdout.
328 `line_length` and `fast` options are passed to :func:`format_file_contents`.
331 with tokenize.open(src) as src_buffer:
332 src_contents = src_buffer.read()
334 dst_contents = format_file_contents(
335 src_contents, line_length=line_length, fast=fast
337 except NothingChanged:
340 if write_back == write_back.YES:
341 with open(src, "w", encoding=src_buffer.encoding) as f:
342 f.write(dst_contents)
343 elif write_back == write_back.DIFF:
344 src_name = f"{src} (original)"
345 dst_name = f"{src} (formatted)"
346 diff_contents = diff(src_contents, dst_contents, src_name, dst_name)
350 sys.stdout.write(diff_contents)
357 def format_stdin_to_stdout(
358 line_length: int, fast: bool, write_back: WriteBack = WriteBack.NO
360 """Format file on stdin. Return True if changed.
362 If `write_back` is True, write reformatted code back to stdout.
363 `line_length` and `fast` arguments are passed to :func:`format_file_contents`.
365 src = sys.stdin.read()
368 dst = format_file_contents(src, line_length=line_length, fast=fast)
371 except NothingChanged:
375 if write_back == WriteBack.YES:
376 sys.stdout.write(dst)
377 elif write_back == WriteBack.DIFF:
378 src_name = "<stdin> (original)"
379 dst_name = "<stdin> (formatted)"
380 sys.stdout.write(diff(src, dst, src_name, dst_name))
383 def format_file_contents(
384 src_contents: str, line_length: int, fast: bool
386 """Reformat contents a file and return new contents.
388 If `fast` is False, additionally confirm that the reformatted code is
389 valid by calling :func:`assert_equivalent` and :func:`assert_stable` on it.
390 `line_length` is passed to :func:`format_str`.
392 if src_contents.strip() == "":
395 dst_contents = format_str(src_contents, line_length=line_length)
396 if src_contents == dst_contents:
400 assert_equivalent(src_contents, dst_contents)
401 assert_stable(src_contents, dst_contents, line_length=line_length)
405 def format_str(src_contents: str, line_length: int) -> FileContent:
406 """Reformat a string and return new contents.
408 `line_length` determines how many characters per line are allowed.
410 src_node = lib2to3_parse(src_contents)
412 lines = LineGenerator()
413 elt = EmptyLineTracker()
414 py36 = is_python36(src_node)
417 for current_line in lines.visit(src_node):
418 for _ in range(after):
419 dst_contents += str(empty_line)
420 before, after = elt.maybe_empty_lines(current_line)
421 for _ in range(before):
422 dst_contents += str(empty_line)
423 for line in split_line(current_line, line_length=line_length, py36=py36):
424 dst_contents += str(line)
429 pygram.python_grammar_no_print_statement_no_exec_statement,
430 pygram.python_grammar_no_print_statement,
431 pygram.python_grammar,
435 def lib2to3_parse(src_txt: str) -> Node:
436 """Given a string with source, return the lib2to3 Node."""
437 grammar = pygram.python_grammar_no_print_statement
438 if src_txt[-1] != "\n":
439 nl = "\r\n" if "\r\n" in src_txt[:1024] else "\n"
441 for grammar in GRAMMARS:
442 drv = driver.Driver(grammar, pytree.convert)
444 result = drv.parse_string(src_txt, True)
447 except ParseError as pe:
448 lineno, column = pe.context[1]
449 lines = src_txt.splitlines()
451 faulty_line = lines[lineno - 1]
453 faulty_line = "<line number missing in source>"
454 exc = ValueError(f"Cannot parse: {lineno}:{column}: {faulty_line}")
458 if isinstance(result, Leaf):
459 result = Node(syms.file_input, [result])
463 def lib2to3_unparse(node: Node) -> str:
464 """Given a lib2to3 node, return its string representation."""
472 class Visitor(Generic[T]):
473 """Basic lib2to3 visitor that yields things of type `T` on `visit()`."""
475 def visit(self, node: LN) -> Iterator[T]:
476 """Main method to visit `node` and its children.
478 It tries to find a `visit_*()` method for the given `node.type`, like
479 `visit_simple_stmt` for Node objects or `visit_INDENT` for Leaf objects.
480 If no dedicated `visit_*()` method is found, chooses `visit_default()`
483 Then yields objects of type `T` from the selected visitor.
486 name = token.tok_name[node.type]
488 name = type_repr(node.type)
489 yield from getattr(self, f"visit_{name}", self.visit_default)(node)
491 def visit_default(self, node: LN) -> Iterator[T]:
492 """Default `visit_*()` implementation. Recurses to children of `node`."""
493 if isinstance(node, Node):
494 for child in node.children:
495 yield from self.visit(child)
499 class DebugVisitor(Visitor[T]):
502 def visit_default(self, node: LN) -> Iterator[T]:
503 indent = " " * (2 * self.tree_depth)
504 if isinstance(node, Node):
505 _type = type_repr(node.type)
506 out(f"{indent}{_type}", fg="yellow")
508 for child in node.children:
509 yield from self.visit(child)
512 out(f"{indent}/{_type}", fg="yellow", bold=False)
514 _type = token.tok_name.get(node.type, str(node.type))
515 out(f"{indent}{_type}", fg="blue", nl=False)
517 # We don't have to handle prefixes for `Node` objects since
518 # that delegates to the first child anyway.
519 out(f" {node.prefix!r}", fg="green", bold=False, nl=False)
520 out(f" {node.value!r}", fg="blue", bold=False)
523 def show(cls, code: str) -> None:
524 """Pretty-print the lib2to3 AST of a given string of `code`.
526 Convenience method for debugging.
528 v: DebugVisitor[None] = DebugVisitor()
529 list(v.visit(lib2to3_parse(code)))
532 KEYWORDS = set(keyword.kwlist)
533 WHITESPACE = {token.DEDENT, token.INDENT, token.NEWLINE}
534 FLOW_CONTROL = {"return", "raise", "break", "continue"}
545 STANDALONE_COMMENT = 153
546 LOGIC_OPERATORS = {"and", "or"}
570 STARS = {token.STAR, token.DOUBLESTAR}
573 syms.argument, # double star in arglist
574 syms.trailer, # single argument to call
576 syms.varargslist, # lambdas
578 UNPACKING_PARENTS = {
579 syms.atom, # single element of a list or set literal
601 COMPREHENSION_PRIORITY = 20
606 COMPARATOR_PRIORITY = 3
611 class BracketTracker:
612 """Keeps track of brackets on a line."""
615 bracket_match: Dict[Tuple[Depth, NodeType], Leaf] = Factory(dict)
616 delimiters: Dict[LeafID, Priority] = Factory(dict)
617 previous: Optional[Leaf] = None
618 _for_loop_variable: bool = False
619 _lambda_arguments: bool = False
621 def mark(self, leaf: Leaf) -> None:
622 """Mark `leaf` with bracket-related metadata. Keep track of delimiters.
624 All leaves receive an int `bracket_depth` field that stores how deep
625 within brackets a given leaf is. 0 means there are no enclosing brackets
626 that started on this line.
628 If a leaf is itself a closing bracket, it receives an `opening_bracket`
629 field that it forms a pair with. This is a one-directional link to
630 avoid reference cycles.
632 If a leaf is a delimiter (a token on which Black can split the line if
633 needed) and it's on depth 0, its `id()` is stored in the tracker's
636 if leaf.type == token.COMMENT:
639 self.maybe_decrement_after_for_loop_variable(leaf)
640 self.maybe_decrement_after_lambda_arguments(leaf)
641 if leaf.type in CLOSING_BRACKETS:
643 opening_bracket = self.bracket_match.pop((self.depth, leaf.type))
644 leaf.opening_bracket = opening_bracket
645 leaf.bracket_depth = self.depth
647 delim = is_split_before_delimiter(leaf, self.previous)
648 if delim and self.previous is not None:
649 self.delimiters[id(self.previous)] = delim
651 delim = is_split_after_delimiter(leaf, self.previous)
653 self.delimiters[id(leaf)] = delim
654 if leaf.type in OPENING_BRACKETS:
655 self.bracket_match[self.depth, BRACKET[leaf.type]] = leaf
658 self.maybe_increment_lambda_arguments(leaf)
659 self.maybe_increment_for_loop_variable(leaf)
661 def any_open_brackets(self) -> bool:
662 """Return True if there is an yet unmatched open bracket on the line."""
663 return bool(self.bracket_match)
665 def max_delimiter_priority(self, exclude: Iterable[LeafID] = ()) -> int:
666 """Return the highest priority of a delimiter found on the line.
668 Values are consistent with what `is_split_*_delimiter()` return.
669 Raises ValueError on no delimiters.
671 return max(v for k, v in self.delimiters.items() if k not in exclude)
673 def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool:
674 """In a for loop, or comprehension, the variables are often unpacks.
676 To avoid splitting on the comma in this situation, increase the depth of
677 tokens between `for` and `in`.
679 if leaf.type == token.NAME and leaf.value == "for":
681 self._for_loop_variable = True
686 def maybe_decrement_after_for_loop_variable(self, leaf: Leaf) -> bool:
687 """See `maybe_increment_for_loop_variable` above for explanation."""
688 if self._for_loop_variable and leaf.type == token.NAME and leaf.value == "in":
690 self._for_loop_variable = False
695 def maybe_increment_lambda_arguments(self, leaf: Leaf) -> bool:
696 """In a lambda expression, there might be more than one argument.
698 To avoid splitting on the comma in this situation, increase the depth of
699 tokens between `lambda` and `:`.
701 if leaf.type == token.NAME and leaf.value == "lambda":
703 self._lambda_arguments = True
708 def maybe_decrement_after_lambda_arguments(self, leaf: Leaf) -> bool:
709 """See `maybe_increment_lambda_arguments` above for explanation."""
710 if self._lambda_arguments and leaf.type == token.COLON:
712 self._lambda_arguments = False
717 def get_open_lsqb(self) -> Optional[Leaf]:
718 """Return the most recent opening square bracket (if any)."""
719 return self.bracket_match.get((self.depth - 1, token.RSQB))
724 """Holds leaves and comments. Can be printed with `str(line)`."""
727 leaves: List[Leaf] = Factory(list)
728 comments: List[Tuple[Index, Leaf]] = Factory(list)
729 bracket_tracker: BracketTracker = Factory(BracketTracker)
730 inside_brackets: bool = False
732 def append(self, leaf: Leaf, preformatted: bool = False) -> None:
733 """Add a new `leaf` to the end of the line.
735 Unless `preformatted` is True, the `leaf` will receive a new consistent
736 whitespace prefix and metadata applied by :class:`BracketTracker`.
737 Trailing commas are maybe removed, unpacked for loop variables are
738 demoted from being delimiters.
740 Inline comments are put aside.
742 has_value = leaf.type in BRACKETS or bool(leaf.value.strip())
746 if token.COLON == leaf.type and self.is_class_paren_empty:
748 if self.leaves and not preformatted:
749 # Note: at this point leaf.prefix should be empty except for
750 # imports, for which we only preserve newlines.
751 leaf.prefix += whitespace(
752 leaf, complex_subscript=self.is_complex_subscript(leaf)
754 if self.inside_brackets or not preformatted:
755 self.bracket_tracker.mark(leaf)
756 self.maybe_remove_trailing_comma(leaf)
757 if not self.append_comment(leaf):
758 self.leaves.append(leaf)
760 def append_safe(self, leaf: Leaf, preformatted: bool = False) -> None:
761 """Like :func:`append()` but disallow invalid standalone comment structure.
763 Raises ValueError when any `leaf` is appended after a standalone comment
764 or when a standalone comment is not the first leaf on the line.
766 if self.bracket_tracker.depth == 0:
768 raise ValueError("cannot append to standalone comments")
770 if self.leaves and leaf.type == STANDALONE_COMMENT:
772 "cannot append standalone comments to a populated line"
775 self.append(leaf, preformatted=preformatted)
778 def is_comment(self) -> bool:
779 """Is this line a standalone comment?"""
780 return len(self.leaves) == 1 and self.leaves[0].type == STANDALONE_COMMENT
783 def is_decorator(self) -> bool:
784 """Is this line a decorator?"""
785 return bool(self) and self.leaves[0].type == token.AT
788 def is_import(self) -> bool:
789 """Is this an import line?"""
790 return bool(self) and is_import(self.leaves[0])
793 def is_class(self) -> bool:
794 """Is this line a class definition?"""
797 and self.leaves[0].type == token.NAME
798 and self.leaves[0].value == "class"
802 def is_def(self) -> bool:
803 """Is this a function definition? (Also returns True for async defs.)"""
805 first_leaf = self.leaves[0]
810 second_leaf: Optional[Leaf] = self.leaves[1]
814 (first_leaf.type == token.NAME and first_leaf.value == "def")
816 first_leaf.type == token.ASYNC
817 and second_leaf is not None
818 and second_leaf.type == token.NAME
819 and second_leaf.value == "def"
824 def is_flow_control(self) -> bool:
825 """Is this line a flow control statement?
827 Those are `return`, `raise`, `break`, and `continue`.
831 and self.leaves[0].type == token.NAME
832 and self.leaves[0].value in FLOW_CONTROL
836 def is_yield(self) -> bool:
837 """Is this line a yield statement?"""
840 and self.leaves[0].type == token.NAME
841 and self.leaves[0].value == "yield"
845 def is_class_paren_empty(self) -> bool:
846 """Is this a class with no base classes but using parentheses?
848 Those are unnecessary and should be removed.
852 and len(self.leaves) == 4
854 and self.leaves[2].type == token.LPAR
855 and self.leaves[2].value == "("
856 and self.leaves[3].type == token.RPAR
857 and self.leaves[3].value == ")"
860 def contains_standalone_comments(self, depth_limit: int = sys.maxsize) -> bool:
861 """If so, needs to be split before emitting."""
862 for leaf in self.leaves:
863 if leaf.type == STANDALONE_COMMENT:
864 if leaf.bracket_depth <= depth_limit:
869 def maybe_remove_trailing_comma(self, closing: Leaf) -> bool:
870 """Remove trailing comma if there is one and it's safe."""
873 and self.leaves[-1].type == token.COMMA
874 and closing.type in CLOSING_BRACKETS
878 if closing.type == token.RBRACE:
879 self.remove_trailing_comma()
882 if closing.type == token.RSQB:
883 comma = self.leaves[-1]
884 if comma.parent and comma.parent.type == syms.listmaker:
885 self.remove_trailing_comma()
888 # For parens let's check if it's safe to remove the comma. If the
889 # trailing one is the only one, we might mistakenly change a tuple
890 # into a different type by removing the comma.
891 depth = closing.bracket_depth + 1
893 opening = closing.opening_bracket
894 for _opening_index, leaf in enumerate(self.leaves):
901 for leaf in self.leaves[_opening_index + 1 :]:
905 bracket_depth = leaf.bracket_depth
906 if bracket_depth == depth and leaf.type == token.COMMA:
908 if leaf.parent and leaf.parent.type == syms.arglist:
913 self.remove_trailing_comma()
918 def append_comment(self, comment: Leaf) -> bool:
919 """Add an inline or standalone comment to the line."""
921 comment.type == STANDALONE_COMMENT
922 and self.bracket_tracker.any_open_brackets()
927 if comment.type != token.COMMENT:
930 after = len(self.leaves) - 1
932 comment.type = STANDALONE_COMMENT
937 self.comments.append((after, comment))
940 def comments_after(self, leaf: Leaf) -> Iterator[Leaf]:
941 """Generate comments that should appear directly after `leaf`."""
942 for _leaf_index, _leaf in enumerate(self.leaves):
949 for index, comment_after in self.comments:
950 if _leaf_index == index:
953 def remove_trailing_comma(self) -> None:
954 """Remove the trailing comma and moves the comments attached to it."""
955 comma_index = len(self.leaves) - 1
956 for i in range(len(self.comments)):
957 comment_index, comment = self.comments[i]
958 if comment_index == comma_index:
959 self.comments[i] = (comma_index - 1, comment)
962 def is_complex_subscript(self, leaf: Leaf) -> bool:
963 """Return True iff `leaf` is part of a slice with non-trivial exprs."""
965 leaf if leaf.type == token.LSQB else self.bracket_tracker.get_open_lsqb()
967 if open_lsqb is None:
970 subscript_start = open_lsqb.next_sibling
972 isinstance(subscript_start, Node)
973 and subscript_start.type == syms.subscriptlist
975 subscript_start = child_towards(subscript_start, leaf)
976 return subscript_start is not None and any(
977 n.type in TEST_DESCENDANTS for n in subscript_start.pre_order()
980 def __str__(self) -> str:
981 """Render the line."""
985 indent = " " * self.depth
986 leaves = iter(self.leaves)
988 res = f"{first.prefix}{indent}{first.value}"
991 for _, comment in self.comments:
995 def __bool__(self) -> bool:
996 """Return True if the line has leaves or comments."""
997 return bool(self.leaves or self.comments)
1000 class UnformattedLines(Line):
1001 """Just like :class:`Line` but stores lines which aren't reformatted."""
1003 def append(self, leaf: Leaf, preformatted: bool = True) -> None:
1004 """Just add a new `leaf` to the end of the lines.
1006 The `preformatted` argument is ignored.
1008 Keeps track of indentation `depth`, which is useful when the user
1009 says `# fmt: on`. Otherwise, doesn't do anything with the `leaf`.
1012 list(generate_comments(leaf))
1013 except FormatOn as f_on:
1014 self.leaves.append(f_on.leaf_from_consumed(leaf))
1017 self.leaves.append(leaf)
1018 if leaf.type == token.INDENT:
1020 elif leaf.type == token.DEDENT:
1023 def __str__(self) -> str:
1024 """Render unformatted lines from leaves which were added with `append()`.
1026 `depth` is not used for indentation in this case.
1032 for leaf in self.leaves:
1036 def append_comment(self, comment: Leaf) -> bool:
1037 """Not implemented in this class. Raises `NotImplementedError`."""
1038 raise NotImplementedError("Unformatted lines don't store comments separately.")
1040 def maybe_remove_trailing_comma(self, closing: Leaf) -> bool:
1041 """Does nothing and returns False."""
1044 def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool:
1045 """Does nothing and returns False."""
1050 class EmptyLineTracker:
1051 """Provides a stateful method that returns the number of potential extra
1052 empty lines needed before and after the currently processed line.
1054 Note: this tracker works on lines that haven't been split yet. It assumes
1055 the prefix of the first leaf consists of optional newlines. Those newlines
1056 are consumed by `maybe_empty_lines()` and included in the computation.
1058 previous_line: Optional[Line] = None
1059 previous_after: int = 0
1060 previous_defs: List[int] = Factory(list)
1062 def maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
1063 """Return the number of extra empty lines before and after the `current_line`.
1065 This is for separating `def`, `async def` and `class` with extra empty
1066 lines (two on module-level), as well as providing an extra empty line
1067 after flow control keywords to make them more prominent.
1069 if isinstance(current_line, UnformattedLines):
1072 before, after = self._maybe_empty_lines(current_line)
1073 before -= self.previous_after
1074 self.previous_after = after
1075 self.previous_line = current_line
1076 return before, after
1078 def _maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
1080 if current_line.depth == 0:
1082 if current_line.leaves:
1083 # Consume the first leaf's extra newlines.
1084 first_leaf = current_line.leaves[0]
1085 before = first_leaf.prefix.count("\n")
1086 before = min(before, max_allowed)
1087 first_leaf.prefix = ""
1090 depth = current_line.depth
1091 while self.previous_defs and self.previous_defs[-1] >= depth:
1092 self.previous_defs.pop()
1093 before = 1 if depth else 2
1094 is_decorator = current_line.is_decorator
1095 if is_decorator or current_line.is_def or current_line.is_class:
1096 if not is_decorator:
1097 self.previous_defs.append(depth)
1098 if self.previous_line is None:
1099 # Don't insert empty lines before the first line in the file.
1102 if self.previous_line.is_decorator:
1106 self.previous_line.is_comment
1107 and self.previous_line.depth == current_line.depth
1113 if current_line.depth:
1119 and self.previous_line.is_import
1120 and not current_line.is_import
1121 and depth == self.previous_line.depth
1123 return (before or 1), 0
1129 class LineGenerator(Visitor[Line]):
1130 """Generates reformatted Line objects. Empty lines are not emitted.
1132 Note: destroys the tree it's visiting by mutating prefixes of its leaves
1133 in ways that will no longer stringify to valid Python code on the tree.
1135 current_line: Line = Factory(Line)
1137 def line(self, indent: int = 0, type: Type[Line] = Line) -> Iterator[Line]:
1140 If the line is empty, only emit if it makes sense.
1141 If the line is too long, split it first and then generate.
1143 If any lines were generated, set up a new current_line.
1145 if not self.current_line:
1146 if self.current_line.__class__ == type:
1147 self.current_line.depth += indent
1149 self.current_line = type(depth=self.current_line.depth + indent)
1150 return # Line is empty, don't emit. Creating a new one unnecessary.
1152 complete_line = self.current_line
1153 self.current_line = type(depth=complete_line.depth + indent)
1156 def visit(self, node: LN) -> Iterator[Line]:
1157 """Main method to visit `node` and its children.
1159 Yields :class:`Line` objects.
1161 if isinstance(self.current_line, UnformattedLines):
1162 # File contained `# fmt: off`
1163 yield from self.visit_unformatted(node)
1166 yield from super().visit(node)
1168 def visit_default(self, node: LN) -> Iterator[Line]:
1169 """Default `visit_*()` implementation. Recurses to children of `node`."""
1170 if isinstance(node, Leaf):
1171 any_open_brackets = self.current_line.bracket_tracker.any_open_brackets()
1173 for comment in generate_comments(node):
1174 if any_open_brackets:
1175 # any comment within brackets is subject to splitting
1176 self.current_line.append(comment)
1177 elif comment.type == token.COMMENT:
1178 # regular trailing comment
1179 self.current_line.append(comment)
1180 yield from self.line()
1183 # regular standalone comment
1184 yield from self.line()
1186 self.current_line.append(comment)
1187 yield from self.line()
1189 except FormatOff as f_off:
1190 f_off.trim_prefix(node)
1191 yield from self.line(type=UnformattedLines)
1192 yield from self.visit(node)
1194 except FormatOn as f_on:
1195 # This only happens here if somebody says "fmt: on" multiple
1197 f_on.trim_prefix(node)
1198 yield from self.visit_default(node)
1201 normalize_prefix(node, inside_brackets=any_open_brackets)
1202 if node.type == token.STRING:
1203 normalize_string_quotes(node)
1204 if node.type not in WHITESPACE:
1205 self.current_line.append(node)
1206 yield from super().visit_default(node)
1208 def visit_INDENT(self, node: Node) -> Iterator[Line]:
1209 """Increase indentation level, maybe yield a line."""
1210 # In blib2to3 INDENT never holds comments.
1211 yield from self.line(+1)
1212 yield from self.visit_default(node)
1214 def visit_DEDENT(self, node: Node) -> Iterator[Line]:
1215 """Decrease indentation level, maybe yield a line."""
1216 # The current line might still wait for trailing comments. At DEDENT time
1217 # there won't be any (they would be prefixes on the preceding NEWLINE).
1218 # Emit the line then.
1219 yield from self.line()
1221 # While DEDENT has no value, its prefix may contain standalone comments
1222 # that belong to the current indentation level. Get 'em.
1223 yield from self.visit_default(node)
1225 # Finally, emit the dedent.
1226 yield from self.line(-1)
1229 self, node: Node, keywords: Set[str], parens: Set[str]
1230 ) -> Iterator[Line]:
1231 """Visit a statement.
1233 This implementation is shared for `if`, `while`, `for`, `try`, `except`,
1234 `def`, `with`, `class`, and `assert`.
1236 The relevant Python language `keywords` for a given statement will be
1237 NAME leaves within it. This methods puts those on a separate line.
1239 `parens` holds pairs of nodes where invisible parentheses should be put.
1240 Keys hold nodes after which opening parentheses should be put, values
1241 hold nodes before which closing parentheses should be put.
1243 normalize_invisible_parens(node, parens_after=parens)
1244 for child in node.children:
1245 if child.type == token.NAME and child.value in keywords: # type: ignore
1246 yield from self.line()
1248 yield from self.visit(child)
1250 def visit_simple_stmt(self, node: Node) -> Iterator[Line]:
1251 """Visit a statement without nested statements."""
1252 is_suite_like = node.parent and node.parent.type in STATEMENT
1254 yield from self.line(+1)
1255 yield from self.visit_default(node)
1256 yield from self.line(-1)
1259 yield from self.line()
1260 yield from self.visit_default(node)
1262 def visit_async_stmt(self, node: Node) -> Iterator[Line]:
1263 """Visit `async def`, `async for`, `async with`."""
1264 yield from self.line()
1266 children = iter(node.children)
1267 for child in children:
1268 yield from self.visit(child)
1270 if child.type == token.ASYNC:
1273 internal_stmt = next(children)
1274 for child in internal_stmt.children:
1275 yield from self.visit(child)
1277 def visit_decorators(self, node: Node) -> Iterator[Line]:
1278 """Visit decorators."""
1279 for child in node.children:
1280 yield from self.line()
1281 yield from self.visit(child)
1283 def visit_import_from(self, node: Node) -> Iterator[Line]:
1284 """Visit import_from and maybe put invisible parentheses.
1286 This is separate from `visit_stmt` because import statements don't
1287 support arbitrary atoms and thus handling of parentheses is custom.
1290 for index, child in enumerate(node.children):
1292 if child.type == token.LPAR:
1293 # make parentheses invisible
1294 child.value = "" # type: ignore
1295 node.children[-1].value = "" # type: ignore
1297 # insert invisible parentheses
1298 node.insert_child(index, Leaf(token.LPAR, ""))
1299 node.append_child(Leaf(token.RPAR, ""))
1303 child.type == token.NAME and child.value == "import" # type: ignore
1306 for child in node.children:
1307 yield from self.visit(child)
1309 def visit_SEMI(self, leaf: Leaf) -> Iterator[Line]:
1310 """Remove a semicolon and put the other statement on a separate line."""
1311 yield from self.line()
1313 def visit_ENDMARKER(self, leaf: Leaf) -> Iterator[Line]:
1314 """End of file. Process outstanding comments and end with a newline."""
1315 yield from self.visit_default(leaf)
1316 yield from self.line()
1318 def visit_unformatted(self, node: LN) -> Iterator[Line]:
1319 """Used when file contained a `# fmt: off`."""
1320 if isinstance(node, Node):
1321 for child in node.children:
1322 yield from self.visit(child)
1326 self.current_line.append(node)
1327 except FormatOn as f_on:
1328 f_on.trim_prefix(node)
1329 yield from self.line()
1330 yield from self.visit(node)
1332 if node.type == token.ENDMARKER:
1333 # somebody decided not to put a final `# fmt: on`
1334 yield from self.line()
1336 def __attrs_post_init__(self) -> None:
1337 """You are in a twisty little maze of passages."""
1340 self.visit_assert_stmt = partial(v, keywords={"assert"}, parens={"assert", ","})
1341 self.visit_if_stmt = partial(v, keywords={"if", "else", "elif"}, parens={"if"})
1342 self.visit_while_stmt = partial(v, keywords={"while", "else"}, parens={"while"})
1343 self.visit_for_stmt = partial(v, keywords={"for", "else"}, parens={"for", "in"})
1344 self.visit_try_stmt = partial(
1345 v, keywords={"try", "except", "else", "finally"}, parens=Ø
1347 self.visit_except_clause = partial(v, keywords={"except"}, parens=Ø)
1348 self.visit_with_stmt = partial(v, keywords={"with"}, parens=Ø)
1349 self.visit_funcdef = partial(v, keywords={"def"}, parens=Ø)
1350 self.visit_classdef = partial(v, keywords={"class"}, parens=Ø)
1351 self.visit_async_funcdef = self.visit_async_stmt
1352 self.visit_decorated = self.visit_decorators
1355 IMPLICIT_TUPLE = {syms.testlist, syms.testlist_star_expr, syms.exprlist}
1356 BRACKET = {token.LPAR: token.RPAR, token.LSQB: token.RSQB, token.LBRACE: token.RBRACE}
1357 OPENING_BRACKETS = set(BRACKET.keys())
1358 CLOSING_BRACKETS = set(BRACKET.values())
1359 BRACKETS = OPENING_BRACKETS | CLOSING_BRACKETS
1360 ALWAYS_NO_SPACE = CLOSING_BRACKETS | {token.COMMA, STANDALONE_COMMENT}
1363 def whitespace(leaf: Leaf, *, complex_subscript: bool) -> str: # noqa C901
1364 """Return whitespace prefix if needed for the given `leaf`.
1366 `complex_subscript` signals whether the given leaf is part of a subscription
1367 which has non-trivial arguments, like arithmetic expressions or function calls.
1375 if t in ALWAYS_NO_SPACE:
1378 if t == token.COMMENT:
1381 assert p is not None, f"INTERNAL ERROR: hand-made leaf without parent: {leaf!r}"
1384 and p.type not in {syms.subscript, syms.subscriptlist, syms.sliceop}
1388 prev = leaf.prev_sibling
1390 prevp = preceding_leaf(p)
1391 if not prevp or prevp.type in OPENING_BRACKETS:
1394 if t == token.COLON:
1395 if prevp.type == token.COLON:
1398 elif prevp.type != token.COMMA and not complex_subscript:
1403 if prevp.type == token.EQUAL:
1405 if prevp.parent.type in {
1406 syms.arglist, syms.argument, syms.parameters, syms.varargslist
1410 elif prevp.parent.type == syms.typedargslist:
1411 # A bit hacky: if the equal sign has whitespace, it means we
1412 # previously found it's a typed argument. So, we're using
1416 elif prevp.type in STARS:
1417 if is_vararg(prevp, within=VARARGS_PARENTS | UNPACKING_PARENTS):
1420 elif prevp.type == token.COLON:
1421 if prevp.parent and prevp.parent.type in {syms.subscript, syms.sliceop}:
1422 return SPACE if complex_subscript else NO
1426 and prevp.parent.type == syms.factor
1427 and prevp.type in MATH_OPERATORS
1432 prevp.type == token.RIGHTSHIFT
1434 and prevp.parent.type == syms.shift_expr
1435 and prevp.prev_sibling
1436 and prevp.prev_sibling.type == token.NAME
1437 and prevp.prev_sibling.value == "print" # type: ignore
1439 # Python 2 print chevron
1442 elif prev.type in OPENING_BRACKETS:
1445 if p.type in {syms.parameters, syms.arglist}:
1446 # untyped function signatures or calls
1447 if not prev or prev.type != token.COMMA:
1450 elif p.type == syms.varargslist:
1452 if prev and prev.type != token.COMMA:
1455 elif p.type == syms.typedargslist:
1456 # typed function signatures
1460 if t == token.EQUAL:
1461 if prev.type != syms.tname:
1464 elif prev.type == token.EQUAL:
1465 # A bit hacky: if the equal sign has whitespace, it means we
1466 # previously found it's a typed argument. So, we're using that, too.
1469 elif prev.type != token.COMMA:
1472 elif p.type == syms.tname:
1475 prevp = preceding_leaf(p)
1476 if not prevp or prevp.type != token.COMMA:
1479 elif p.type == syms.trailer:
1480 # attributes and calls
1481 if t == token.LPAR or t == token.RPAR:
1486 prevp = preceding_leaf(p)
1487 if not prevp or prevp.type != token.NUMBER:
1490 elif t == token.LSQB:
1493 elif prev.type != token.COMMA:
1496 elif p.type == syms.argument:
1498 if t == token.EQUAL:
1502 prevp = preceding_leaf(p)
1503 if not prevp or prevp.type == token.LPAR:
1506 elif prev.type in {token.EQUAL} | STARS:
1509 elif p.type == syms.decorator:
1513 elif p.type == syms.dotted_name:
1517 prevp = preceding_leaf(p)
1518 if not prevp or prevp.type == token.AT or prevp.type == token.DOT:
1521 elif p.type == syms.classdef:
1525 if prev and prev.type == token.LPAR:
1528 elif p.type in {syms.subscript, syms.sliceop}:
1531 assert p.parent is not None, "subscripts are always parented"
1532 if p.parent.type == syms.subscriptlist:
1537 elif not complex_subscript:
1540 elif p.type == syms.atom:
1541 if prev and t == token.DOT:
1542 # dots, but not the first one.
1545 elif p.type == syms.dictsetmaker:
1547 if prev and prev.type == token.DOUBLESTAR:
1550 elif p.type in {syms.factor, syms.star_expr}:
1553 prevp = preceding_leaf(p)
1554 if not prevp or prevp.type in OPENING_BRACKETS:
1557 prevp_parent = prevp.parent
1558 assert prevp_parent is not None
1560 prevp.type == token.COLON
1561 and prevp_parent.type in {syms.subscript, syms.sliceop}
1565 elif prevp.type == token.EQUAL and prevp_parent.type == syms.argument:
1568 elif t == token.NAME or t == token.NUMBER:
1571 elif p.type == syms.import_from:
1573 if prev and prev.type == token.DOT:
1576 elif t == token.NAME:
1580 if prev and prev.type == token.DOT:
1583 elif p.type == syms.sliceop:
1589 def preceding_leaf(node: Optional[LN]) -> Optional[Leaf]:
1590 """Return the first leaf that precedes `node`, if any."""
1592 res = node.prev_sibling
1594 if isinstance(res, Leaf):
1598 return list(res.leaves())[-1]
1607 def child_towards(ancestor: Node, descendant: LN) -> Optional[LN]:
1608 """Return the child of `ancestor` that contains `descendant`."""
1609 node: Optional[LN] = descendant
1610 while node and node.parent != ancestor:
1615 def is_split_after_delimiter(leaf: Leaf, previous: Leaf = None) -> int:
1616 """Return the priority of the `leaf` delimiter, given a line break after it.
1618 The delimiter priorities returned here are from those delimiters that would
1619 cause a line break after themselves.
1621 Higher numbers are higher priority.
1623 if leaf.type == token.COMMA:
1624 return COMMA_PRIORITY
1629 def is_split_before_delimiter(leaf: Leaf, previous: Leaf = None) -> int:
1630 """Return the priority of the `leaf` delimiter, given a line before after it.
1632 The delimiter priorities returned here are from those delimiters that would
1633 cause a line break before themselves.
1635 Higher numbers are higher priority.
1637 if is_vararg(leaf, within=VARARGS_PARENTS | UNPACKING_PARENTS):
1638 # * and ** might also be MATH_OPERATORS but in this case they are not.
1639 # Don't treat them as a delimiter.
1643 leaf.type in MATH_OPERATORS
1645 and leaf.parent.type not in {syms.factor, syms.star_expr}
1647 return MATH_PRIORITY
1649 if leaf.type in COMPARATORS:
1650 return COMPARATOR_PRIORITY
1653 leaf.type == token.STRING
1654 and previous is not None
1655 and previous.type == token.STRING
1657 return STRING_PRIORITY
1660 leaf.type == token.NAME
1661 and leaf.value == "for"
1663 and leaf.parent.type in {syms.comp_for, syms.old_comp_for}
1665 return COMPREHENSION_PRIORITY
1668 leaf.type == token.NAME
1669 and leaf.value == "if"
1671 and leaf.parent.type in {syms.comp_if, syms.old_comp_if}
1673 return COMPREHENSION_PRIORITY
1676 leaf.type == token.NAME
1677 and leaf.value in {"if", "else"}
1679 and leaf.parent.type == syms.test
1681 return TERNARY_PRIORITY
1683 if leaf.type == token.NAME and leaf.value in LOGIC_OPERATORS and leaf.parent:
1684 return LOGIC_PRIORITY
1689 def generate_comments(leaf: Leaf) -> Iterator[Leaf]:
1690 """Clean the prefix of the `leaf` and generate comments from it, if any.
1692 Comments in lib2to3 are shoved into the whitespace prefix. This happens
1693 in `pgen2/driver.py:Driver.parse_tokens()`. This was a brilliant implementation
1694 move because it does away with modifying the grammar to include all the
1695 possible places in which comments can be placed.
1697 The sad consequence for us though is that comments don't "belong" anywhere.
1698 This is why this function generates simple parentless Leaf objects for
1699 comments. We simply don't know what the correct parent should be.
1701 No matter though, we can live without this. We really only need to
1702 differentiate between inline and standalone comments. The latter don't
1703 share the line with any code.
1705 Inline comments are emitted as regular token.COMMENT leaves. Standalone
1706 are emitted with a fake STANDALONE_COMMENT token identifier.
1717 for index, line in enumerate(p.split("\n")):
1718 consumed += len(line) + 1 # adding the length of the split '\n'
1719 line = line.lstrip()
1722 if not line.startswith("#"):
1725 if index == 0 and leaf.type != token.ENDMARKER:
1726 comment_type = token.COMMENT # simple trailing comment
1728 comment_type = STANDALONE_COMMENT
1729 comment = make_comment(line)
1730 yield Leaf(comment_type, comment, prefix="\n" * nlines)
1732 if comment in {"# fmt: on", "# yapf: enable"}:
1733 raise FormatOn(consumed)
1735 if comment in {"# fmt: off", "# yapf: disable"}:
1736 if comment_type == STANDALONE_COMMENT:
1737 raise FormatOff(consumed)
1739 prev = preceding_leaf(leaf)
1740 if not prev or prev.type in WHITESPACE: # standalone comment in disguise
1741 raise FormatOff(consumed)
1746 def make_comment(content: str) -> str:
1747 """Return a consistently formatted comment from the given `content` string.
1749 All comments (except for "##", "#!", "#:") should have a single space between
1750 the hash sign and the content.
1752 If `content` didn't start with a hash sign, one is provided.
1754 content = content.rstrip()
1758 if content[0] == "#":
1759 content = content[1:]
1760 if content and content[0] not in " !:#":
1761 content = " " + content
1762 return "#" + content
1766 line: Line, line_length: int, inner: bool = False, py36: bool = False
1767 ) -> Iterator[Line]:
1768 """Split a `line` into potentially many lines.
1770 They should fit in the allotted `line_length` but might not be able to.
1771 `inner` signifies that there were a pair of brackets somewhere around the
1772 current `line`, possibly transitively. This means we can fallback to splitting
1773 by delimiters if the LHS/RHS don't yield any results.
1775 If `py36` is True, splitting may generate syntax that is only compatible
1776 with Python 3.6 and later.
1778 if isinstance(line, UnformattedLines) or line.is_comment:
1782 line_str = str(line).strip("\n")
1784 len(line_str) <= line_length
1785 and "\n" not in line_str # multiline strings
1786 and not line.contains_standalone_comments()
1791 split_funcs: List[SplitFunc]
1793 split_funcs = [left_hand_split]
1794 elif line.is_import:
1795 split_funcs = [explode_split]
1796 elif line.inside_brackets:
1797 split_funcs = [delimiter_split, standalone_comment_split, right_hand_split]
1799 split_funcs = [right_hand_split]
1800 for split_func in split_funcs:
1801 # We are accumulating lines in `result` because we might want to abort
1802 # mission and return the original line in the end, or attempt a different
1804 result: List[Line] = []
1806 for l in split_func(line, py36):
1807 if str(l).strip("\n") == line_str:
1808 raise CannotSplit("Split function returned an unchanged result")
1811 split_line(l, line_length=line_length, inner=True, py36=py36)
1813 except CannotSplit as cs:
1824 def left_hand_split(line: Line, py36: bool = False) -> Iterator[Line]:
1825 """Split line into many lines, starting with the first matching bracket pair.
1827 Note: this usually looks weird, only use this for function definitions.
1828 Prefer RHS otherwise.
1830 head = Line(depth=line.depth)
1831 body = Line(depth=line.depth + 1, inside_brackets=True)
1832 tail = Line(depth=line.depth)
1833 tail_leaves: List[Leaf] = []
1834 body_leaves: List[Leaf] = []
1835 head_leaves: List[Leaf] = []
1836 current_leaves = head_leaves
1837 matching_bracket = None
1838 for leaf in line.leaves:
1840 current_leaves is body_leaves
1841 and leaf.type in CLOSING_BRACKETS
1842 and leaf.opening_bracket is matching_bracket
1844 current_leaves = tail_leaves if body_leaves else head_leaves
1845 current_leaves.append(leaf)
1846 if current_leaves is head_leaves:
1847 if leaf.type in OPENING_BRACKETS:
1848 matching_bracket = leaf
1849 current_leaves = body_leaves
1850 # Since body is a new indent level, remove spurious leading whitespace.
1852 normalize_prefix(body_leaves[0], inside_brackets=True)
1853 # Build the new lines.
1854 for result, leaves in (head, head_leaves), (body, body_leaves), (tail, tail_leaves):
1856 result.append(leaf, preformatted=True)
1857 for comment_after in line.comments_after(leaf):
1858 result.append(comment_after, preformatted=True)
1859 bracket_split_succeeded_or_raise(head, body, tail)
1860 for result in (head, body, tail):
1865 def right_hand_split(
1866 line: Line, py36: bool = False, omit: Collection[LeafID] = ()
1867 ) -> Iterator[Line]:
1868 """Split line into many lines, starting with the last matching bracket pair."""
1869 head = Line(depth=line.depth)
1870 body = Line(depth=line.depth + 1, inside_brackets=True)
1871 tail = Line(depth=line.depth)
1872 tail_leaves: List[Leaf] = []
1873 body_leaves: List[Leaf] = []
1874 head_leaves: List[Leaf] = []
1875 current_leaves = tail_leaves
1876 opening_bracket = None
1877 closing_bracket = None
1878 for leaf in reversed(line.leaves):
1879 if current_leaves is body_leaves:
1880 if leaf is opening_bracket:
1881 current_leaves = head_leaves if body_leaves else tail_leaves
1882 current_leaves.append(leaf)
1883 if current_leaves is tail_leaves:
1884 if leaf.type in CLOSING_BRACKETS and id(leaf) not in omit:
1885 opening_bracket = leaf.opening_bracket
1886 closing_bracket = leaf
1887 current_leaves = body_leaves
1888 tail_leaves.reverse()
1889 body_leaves.reverse()
1890 head_leaves.reverse()
1891 # Since body is a new indent level, remove spurious leading whitespace.
1893 normalize_prefix(body_leaves[0], inside_brackets=True)
1894 elif not head_leaves:
1895 # No `head` and no `body` means the split failed. `tail` has all content.
1896 raise CannotSplit("No brackets found")
1898 # Build the new lines.
1899 for result, leaves in (head, head_leaves), (body, body_leaves), (tail, tail_leaves):
1901 result.append(leaf, preformatted=True)
1902 for comment_after in line.comments_after(leaf):
1903 result.append(comment_after, preformatted=True)
1904 bracket_split_succeeded_or_raise(head, body, tail)
1905 assert opening_bracket and closing_bracket
1907 opening_bracket.type == token.LPAR
1908 and not opening_bracket.value
1909 and closing_bracket.type == token.RPAR
1910 and not closing_bracket.value
1912 # These parens were optional. If there aren't any delimiters or standalone
1913 # comments in the body, they were unnecessary and another split without
1914 # them should be attempted.
1916 body.bracket_tracker.delimiters or line.contains_standalone_comments(0)
1918 omit = {id(closing_bracket), *omit}
1919 yield from right_hand_split(line, py36=py36, omit=omit)
1922 ensure_visible(opening_bracket)
1923 ensure_visible(closing_bracket)
1924 for result in (head, body, tail):
1929 def bracket_split_succeeded_or_raise(head: Line, body: Line, tail: Line) -> None:
1930 """Raise :exc:`CannotSplit` if the last left- or right-hand split failed.
1932 Do nothing otherwise.
1934 A left- or right-hand split is based on a pair of brackets. Content before
1935 (and including) the opening bracket is left on one line, content inside the
1936 brackets is put on a separate line, and finally content starting with and
1937 following the closing bracket is put on a separate line.
1939 Those are called `head`, `body`, and `tail`, respectively. If the split
1940 produced the same line (all content in `head`) or ended up with an empty `body`
1941 and the `tail` is just the closing bracket, then it's considered failed.
1943 tail_len = len(str(tail).strip())
1946 raise CannotSplit("Splitting brackets produced the same line")
1950 f"Splitting brackets on an empty body to save "
1951 f"{tail_len} characters is not worth it"
1955 def dont_increase_indentation(split_func: SplitFunc) -> SplitFunc:
1956 """Normalize prefix of the first leaf in every line returned by `split_func`.
1958 This is a decorator over relevant split functions.
1962 def split_wrapper(line: Line, py36: bool = False) -> Iterator[Line]:
1963 for l in split_func(line, py36):
1964 normalize_prefix(l.leaves[0], inside_brackets=True)
1967 return split_wrapper
1970 @dont_increase_indentation
1971 def delimiter_split(line: Line, py36: bool = False) -> Iterator[Line]:
1972 """Split according to delimiters of the highest priority.
1974 If `py36` is True, the split will add trailing commas also in function
1975 signatures that contain `*` and `**`.
1978 last_leaf = line.leaves[-1]
1980 raise CannotSplit("Line empty")
1982 delimiters = line.bracket_tracker.delimiters
1984 delimiter_priority = line.bracket_tracker.max_delimiter_priority(
1985 exclude={id(last_leaf)}
1988 raise CannotSplit("No delimiters found")
1990 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
1991 lowest_depth = sys.maxsize
1992 trailing_comma_safe = True
1994 def append_to_line(leaf: Leaf) -> Iterator[Line]:
1995 """Append `leaf` to current line or to new line if appending impossible."""
1996 nonlocal current_line
1998 current_line.append_safe(leaf, preformatted=True)
1999 except ValueError as ve:
2002 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2003 current_line.append(leaf)
2005 for leaf in line.leaves:
2006 yield from append_to_line(leaf)
2008 for comment_after in line.comments_after(leaf):
2009 yield from append_to_line(comment_after)
2011 lowest_depth = min(lowest_depth, leaf.bracket_depth)
2013 leaf.bracket_depth == lowest_depth
2014 and is_vararg(leaf, within=VARARGS_PARENTS)
2016 trailing_comma_safe = trailing_comma_safe and py36
2017 leaf_priority = delimiters.get(id(leaf))
2018 if leaf_priority == delimiter_priority:
2021 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2025 and delimiter_priority == COMMA_PRIORITY
2026 and current_line.leaves[-1].type != token.COMMA
2027 and current_line.leaves[-1].type != STANDALONE_COMMENT
2029 current_line.append(Leaf(token.COMMA, ","))
2033 @dont_increase_indentation
2034 def standalone_comment_split(line: Line, py36: bool = False) -> Iterator[Line]:
2035 """Split standalone comments from the rest of the line."""
2036 if not line.contains_standalone_comments(0):
2037 raise CannotSplit("Line does not have any standalone comments")
2039 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2041 def append_to_line(leaf: Leaf) -> Iterator[Line]:
2042 """Append `leaf` to current line or to new line if appending impossible."""
2043 nonlocal current_line
2045 current_line.append_safe(leaf, preformatted=True)
2046 except ValueError as ve:
2049 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2050 current_line.append(leaf)
2052 for leaf in line.leaves:
2053 yield from append_to_line(leaf)
2055 for comment_after in line.comments_after(leaf):
2056 yield from append_to_line(comment_after)
2063 line: Line, py36: bool = False, omit: Collection[LeafID] = ()
2064 ) -> Iterator[Line]:
2065 """Split by rightmost bracket and immediately split contents by a delimiter."""
2066 new_lines = list(right_hand_split(line, py36, omit))
2067 if len(new_lines) != 3:
2068 yield from new_lines
2074 yield from delimiter_split(new_lines[1], py36)
2082 def is_import(leaf: Leaf) -> bool:
2083 """Return True if the given leaf starts an import statement."""
2090 (v == "import" and p and p.type == syms.import_name)
2091 or (v == "from" and p and p.type == syms.import_from)
2096 def normalize_prefix(leaf: Leaf, *, inside_brackets: bool) -> None:
2097 """Leave existing extra newlines if not `inside_brackets`. Remove everything
2100 Note: don't use backslashes for formatting or you'll lose your voting rights.
2102 if not inside_brackets:
2103 spl = leaf.prefix.split("#")
2104 if "\\" not in spl[0]:
2105 nl_count = spl[-1].count("\n")
2108 leaf.prefix = "\n" * nl_count
2114 def normalize_string_quotes(leaf: Leaf) -> None:
2115 """Prefer double quotes but only if it doesn't cause more escaping.
2117 Adds or removes backslashes as appropriate. Doesn't parse and fix
2118 strings nested in f-strings (yet).
2120 Note: Mutates its argument.
2122 value = leaf.value.lstrip("furbFURB")
2123 if value[:3] == '"""':
2126 elif value[:3] == "'''":
2129 elif value[0] == '"':
2135 first_quote_pos = leaf.value.find(orig_quote)
2136 if first_quote_pos == -1:
2137 return # There's an internal error
2139 prefix = leaf.value[:first_quote_pos]
2140 unescaped_new_quote = re.compile(rf"(([^\\]|^)(\\\\)*){new_quote}")
2141 escaped_new_quote = re.compile(rf"([^\\]|^)\\(\\\\)*{new_quote}")
2142 escaped_orig_quote = re.compile(rf"([^\\]|^)\\(\\\\)*{orig_quote}")
2143 body = leaf.value[first_quote_pos + len(orig_quote) : -len(orig_quote)]
2144 if "r" in prefix.casefold():
2145 if unescaped_new_quote.search(body):
2146 # There's at least one unescaped new_quote in this raw string
2147 # so converting is impossible
2150 # Do not introduce or remove backslashes in raw strings
2153 # remove unnecessary quotes
2154 new_body = sub_twice(escaped_new_quote, rf"\1\2{new_quote}", body)
2155 if body != new_body:
2156 # Consider the string without unnecessary quotes as the original
2158 leaf.value = f"{prefix}{orig_quote}{body}{orig_quote}"
2159 new_body = sub_twice(escaped_orig_quote, rf"\1\2{orig_quote}", new_body)
2160 new_body = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_body)
2161 if new_quote == '"""' and new_body[-1] == '"':
2163 new_body = new_body[:-1] + '\\"'
2164 orig_escape_count = body.count("\\")
2165 new_escape_count = new_body.count("\\")
2166 if new_escape_count > orig_escape_count:
2167 return # Do not introduce more escaping
2169 if new_escape_count == orig_escape_count and orig_quote == '"':
2170 return # Prefer double quotes
2172 leaf.value = f"{prefix}{new_quote}{new_body}{new_quote}"
2175 def normalize_invisible_parens(node: Node, parens_after: Set[str]) -> None:
2176 """Make existing optional parentheses invisible or create new ones.
2178 Standardizes on visible parentheses for single-element tuples, and keeps
2179 existing visible parentheses for other tuples and generator expressions.
2182 for child in list(node.children):
2184 if child.type == syms.atom:
2186 is_empty_tuple(child)
2187 or is_one_tuple(child)
2188 or max_delimiter_priority_in_atom(child) >= COMMA_PRIORITY
2190 first = child.children[0]
2191 last = child.children[-1]
2192 if first.type == token.LPAR and last.type == token.RPAR:
2193 # make parentheses invisible
2194 first.value = "" # type: ignore
2195 last.value = "" # type: ignore
2196 elif is_one_tuple(child):
2197 # wrap child in visible parentheses
2198 lpar = Leaf(token.LPAR, "(")
2199 rpar = Leaf(token.RPAR, ")")
2200 index = child.remove() or 0
2201 node.insert_child(index, Node(syms.atom, [lpar, child, rpar]))
2203 # wrap child in invisible parentheses
2204 lpar = Leaf(token.LPAR, "")
2205 rpar = Leaf(token.RPAR, "")
2206 index = child.remove() or 0
2207 node.insert_child(index, Node(syms.atom, [lpar, child, rpar]))
2209 check_lpar = isinstance(child, Leaf) and child.value in parens_after
2212 def is_empty_tuple(node: LN) -> bool:
2213 """Return True if `node` holds an empty tuple."""
2215 node.type == syms.atom
2216 and len(node.children) == 2
2217 and node.children[0].type == token.LPAR
2218 and node.children[1].type == token.RPAR
2222 def is_one_tuple(node: LN) -> bool:
2223 """Return True if `node` holds a tuple with one element, with or without parens."""
2224 if node.type == syms.atom:
2225 if len(node.children) != 3:
2228 lpar, gexp, rpar = node.children
2230 lpar.type == token.LPAR
2231 and gexp.type == syms.testlist_gexp
2232 and rpar.type == token.RPAR
2236 return len(gexp.children) == 2 and gexp.children[1].type == token.COMMA
2239 node.type in IMPLICIT_TUPLE
2240 and len(node.children) == 2
2241 and node.children[1].type == token.COMMA
2245 def is_vararg(leaf: Leaf, within: Set[NodeType]) -> bool:
2246 """Return True if `leaf` is a star or double star in a vararg or kwarg.
2248 If `within` includes VARARGS_PARENTS, this applies to function signatures.
2249 If `within` includes COLLECTION_LIBERALS_PARENTS, it applies to right
2250 hand-side extended iterable unpacking (PEP 3132) and additional unpacking
2251 generalizations (PEP 448).
2253 if leaf.type not in STARS or not leaf.parent:
2257 if p.type == syms.star_expr:
2258 # Star expressions are also used as assignment targets in extended
2259 # iterable unpacking (PEP 3132). See what its parent is instead.
2265 return p.type in within
2268 def max_delimiter_priority_in_atom(node: LN) -> int:
2269 """Return maximum delimiter priority inside `node`.
2271 This is specific to atoms with contents contained in a pair of parentheses.
2272 If `node` isn't an atom or there are no enclosing parentheses, returns 0.
2274 if node.type != syms.atom:
2277 first = node.children[0]
2278 last = node.children[-1]
2279 if not (first.type == token.LPAR and last.type == token.RPAR):
2282 bt = BracketTracker()
2283 for c in node.children[1:-1]:
2284 if isinstance(c, Leaf):
2287 for leaf in c.leaves():
2290 return bt.max_delimiter_priority()
2296 def ensure_visible(leaf: Leaf) -> None:
2297 """Make sure parentheses are visible.
2299 They could be invisible as part of some statements (see
2300 :func:`normalize_invible_parens` and :func:`visit_import_from`).
2302 if leaf.type == token.LPAR:
2304 elif leaf.type == token.RPAR:
2308 def is_python36(node: Node) -> bool:
2309 """Return True if the current file is using Python 3.6+ features.
2311 Currently looking for:
2313 - trailing commas after * or ** in function signatures.
2315 for n in node.pre_order():
2316 if n.type == token.STRING:
2317 value_head = n.value[:2] # type: ignore
2318 if value_head in {'f"', 'F"', "f'", "F'", "rf", "fr", "RF", "FR"}:
2322 n.type == syms.typedargslist
2324 and n.children[-1].type == token.COMMA
2326 for ch in n.children:
2327 if ch.type in STARS:
2333 PYTHON_EXTENSIONS = {".py"}
2334 BLACKLISTED_DIRECTORIES = {
2335 "build", "buck-out", "dist", "_build", ".git", ".hg", ".mypy_cache", ".tox", ".venv"
2339 def gen_python_files_in_dir(path: Path) -> Iterator[Path]:
2340 """Generate all files under `path` which aren't under BLACKLISTED_DIRECTORIES
2341 and have one of the PYTHON_EXTENSIONS.
2343 for child in path.iterdir():
2345 if child.name in BLACKLISTED_DIRECTORIES:
2348 yield from gen_python_files_in_dir(child)
2350 elif child.suffix in PYTHON_EXTENSIONS:
2356 """Provides a reformatting counter. Can be rendered with `str(report)`."""
2359 change_count: int = 0
2361 failure_count: int = 0
2363 def done(self, src: Path, changed: Changed) -> None:
2364 """Increment the counter for successful reformatting. Write out a message."""
2365 if changed is Changed.YES:
2366 reformatted = "would reformat" if self.check else "reformatted"
2368 out(f"{reformatted} {src}")
2369 self.change_count += 1
2372 if changed is Changed.NO:
2373 msg = f"{src} already well formatted, good job."
2375 msg = f"{src} wasn't modified on disk since last run."
2376 out(msg, bold=False)
2377 self.same_count += 1
2379 def failed(self, src: Path, message: str) -> None:
2380 """Increment the counter for failed reformatting. Write out a message."""
2381 err(f"error: cannot format {src}: {message}")
2382 self.failure_count += 1
2385 def return_code(self) -> int:
2386 """Return the exit code that the app should use.
2388 This considers the current state of changed files and failures:
2389 - if there were any failures, return 123;
2390 - if any files were changed and --check is being used, return 1;
2391 - otherwise return 0.
2393 # According to http://tldp.org/LDP/abs/html/exitcodes.html starting with
2394 # 126 we have special returncodes reserved by the shell.
2395 if self.failure_count:
2398 elif self.change_count and self.check:
2403 def __str__(self) -> str:
2404 """Render a color report of the current state.
2406 Use `click.unstyle` to remove colors.
2409 reformatted = "would be reformatted"
2410 unchanged = "would be left unchanged"
2411 failed = "would fail to reformat"
2413 reformatted = "reformatted"
2414 unchanged = "left unchanged"
2415 failed = "failed to reformat"
2417 if self.change_count:
2418 s = "s" if self.change_count > 1 else ""
2420 click.style(f"{self.change_count} file{s} {reformatted}", bold=True)
2423 s = "s" if self.same_count > 1 else ""
2424 report.append(f"{self.same_count} file{s} {unchanged}")
2425 if self.failure_count:
2426 s = "s" if self.failure_count > 1 else ""
2428 click.style(f"{self.failure_count} file{s} {failed}", fg="red")
2430 return ", ".join(report) + "."
2433 def assert_equivalent(src: str, dst: str) -> None:
2434 """Raise AssertionError if `src` and `dst` aren't equivalent."""
2439 def _v(node: ast.AST, depth: int = 0) -> Iterator[str]:
2440 """Simple visitor generating strings to compare ASTs by content."""
2441 yield f"{' ' * depth}{node.__class__.__name__}("
2443 for field in sorted(node._fields):
2445 value = getattr(node, field)
2446 except AttributeError:
2449 yield f"{' ' * (depth+1)}{field}="
2451 if isinstance(value, list):
2453 if isinstance(item, ast.AST):
2454 yield from _v(item, depth + 2)
2456 elif isinstance(value, ast.AST):
2457 yield from _v(value, depth + 2)
2460 yield f"{' ' * (depth+2)}{value!r}, # {value.__class__.__name__}"
2462 yield f"{' ' * depth}) # /{node.__class__.__name__}"
2465 src_ast = ast.parse(src)
2466 except Exception as exc:
2467 major, minor = sys.version_info[:2]
2468 raise AssertionError(
2469 f"cannot use --safe with this file; failed to parse source file "
2470 f"with Python {major}.{minor}'s builtin AST. Re-run with --fast "
2471 f"or stop using deprecated Python 2 syntax. AST error message: {exc}"
2475 dst_ast = ast.parse(dst)
2476 except Exception as exc:
2477 log = dump_to_file("".join(traceback.format_tb(exc.__traceback__)), dst)
2478 raise AssertionError(
2479 f"INTERNAL ERROR: Black produced invalid code: {exc}. "
2480 f"Please report a bug on https://github.com/ambv/black/issues. "
2481 f"This invalid output might be helpful: {log}"
2484 src_ast_str = "\n".join(_v(src_ast))
2485 dst_ast_str = "\n".join(_v(dst_ast))
2486 if src_ast_str != dst_ast_str:
2487 log = dump_to_file(diff(src_ast_str, dst_ast_str, "src", "dst"))
2488 raise AssertionError(
2489 f"INTERNAL ERROR: Black produced code that is not equivalent to "
2491 f"Please report a bug on https://github.com/ambv/black/issues. "
2492 f"This diff might be helpful: {log}"
2496 def assert_stable(src: str, dst: str, line_length: int) -> None:
2497 """Raise AssertionError if `dst` reformats differently the second time."""
2498 newdst = format_str(dst, line_length=line_length)
2501 diff(src, dst, "source", "first pass"),
2502 diff(dst, newdst, "first pass", "second pass"),
2504 raise AssertionError(
2505 f"INTERNAL ERROR: Black produced different code on the second pass "
2506 f"of the formatter. "
2507 f"Please report a bug on https://github.com/ambv/black/issues. "
2508 f"This diff might be helpful: {log}"
2512 def dump_to_file(*output: str) -> str:
2513 """Dump `output` to a temporary file. Return path to the file."""
2516 with tempfile.NamedTemporaryFile(
2517 mode="w", prefix="blk_", suffix=".log", delete=False, encoding="utf8"
2519 for lines in output:
2521 if lines and lines[-1] != "\n":
2526 def diff(a: str, b: str, a_name: str, b_name: str) -> str:
2527 """Return a unified diff string between strings `a` and `b`."""
2530 a_lines = [line + "\n" for line in a.split("\n")]
2531 b_lines = [line + "\n" for line in b.split("\n")]
2533 difflib.unified_diff(a_lines, b_lines, fromfile=a_name, tofile=b_name, n=5)
2537 def cancel(tasks: List[asyncio.Task]) -> None:
2538 """asyncio signal handler that cancels all `tasks` and reports to stderr."""
2544 def shutdown(loop: BaseEventLoop) -> None:
2545 """Cancel all pending tasks on `loop`, wait for them, and close the loop."""
2547 # This part is borrowed from asyncio/runners.py in Python 3.7b2.
2548 to_cancel = [task for task in asyncio.Task.all_tasks(loop) if not task.done()]
2552 for task in to_cancel:
2554 loop.run_until_complete(
2555 asyncio.gather(*to_cancel, loop=loop, return_exceptions=True)
2558 # `concurrent.futures.Future` objects cannot be cancelled once they
2559 # are already running. There might be some when the `shutdown()` happened.
2560 # Silence their logger's spew about the event loop being closed.
2561 cf_logger = logging.getLogger("concurrent.futures")
2562 cf_logger.setLevel(logging.CRITICAL)
2566 def sub_twice(regex: Pattern[str], replacement: str, original: str) -> str:
2567 """Replace `regex` with `replacement` twice on `original`.
2569 This is used by string normalization to perform replaces on
2570 overlapping matches.
2572 return regex.sub(replacement, regex.sub(replacement, original))
2575 CACHE_DIR = Path(user_cache_dir("black", version=__version__))
2578 def get_cache_file(line_length: int) -> Path:
2579 return CACHE_DIR / f"cache.{line_length}.pickle"
2582 def read_cache(line_length: int) -> Cache:
2583 """Read the cache if it exists and is well formed.
2585 If it is not well formed, the call to write_cache later should resolve the issue.
2587 cache_file = get_cache_file(line_length)
2588 if not cache_file.exists():
2591 with cache_file.open("rb") as fobj:
2593 cache: Cache = pickle.load(fobj)
2594 except pickle.UnpicklingError:
2600 def get_cache_info(path: Path) -> CacheInfo:
2601 """Return the information used to check if a file is already formatted or not."""
2603 return stat.st_mtime, stat.st_size
2607 cache: Cache, sources: Iterable[Path]
2608 ) -> Tuple[List[Path], List[Path]]:
2609 """Split a list of paths into two.
2611 The first list contains paths of files that modified on disk or are not in the
2612 cache. The other list contains paths to non-modified files.
2617 if cache.get(src) != get_cache_info(src):
2624 def write_cache(cache: Cache, sources: List[Path], line_length: int) -> None:
2625 """Update the cache file."""
2626 cache_file = get_cache_file(line_length)
2628 if not CACHE_DIR.exists():
2629 CACHE_DIR.mkdir(parents=True)
2630 new_cache = {**cache, **{src.resolve(): get_cache_info(src) for src in sources}}
2631 with cache_file.open("wb") as fobj:
2632 pickle.dump(new_cache, fobj, protocol=pickle.HIGHEST_PROTOCOL)
2637 if __name__ == "__main__":