All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
3 from asyncio.base_events import BaseEventLoop
4 from concurrent.futures import Executor, ProcessPoolExecutor
6 from functools import partial, wraps
9 from multiprocessing import Manager
11 from pathlib import Path
34 from appdirs import user_cache_dir
35 from attr import dataclass, Factory
39 from blib2to3.pytree import Node, Leaf, type_repr
40 from blib2to3 import pygram, pytree
41 from blib2to3.pgen2 import driver, token
42 from blib2to3.pgen2.parse import ParseError
44 __version__ = "18.4a4"
45 DEFAULT_LINE_LENGTH = 88
48 syms = pygram.python_symbols
56 LN = Union[Leaf, Node]
57 SplitFunc = Callable[["Line", bool], Iterator["Line"]]
60 CacheInfo = Tuple[Timestamp, FileSize]
61 Cache = Dict[Path, CacheInfo]
62 out = partial(click.secho, bold=True, err=True)
63 err = partial(click.secho, fg="red", err=True)
66 class NothingChanged(UserWarning):
67 """Raised by :func:`format_file` when reformatted code is the same as source."""
70 class CannotSplit(Exception):
71 """A readable split that fits the allotted line length is impossible.
73 Raised by :func:`left_hand_split`, :func:`right_hand_split`, and
74 :func:`delimiter_split`.
78 class FormatError(Exception):
79 """Base exception for `# fmt: on` and `# fmt: off` handling.
81 It holds the number of bytes of the prefix consumed before the format
82 control comment appeared.
85 def __init__(self, consumed: int) -> None:
86 super().__init__(consumed)
87 self.consumed = consumed
89 def trim_prefix(self, leaf: Leaf) -> None:
90 leaf.prefix = leaf.prefix[self.consumed :]
92 def leaf_from_consumed(self, leaf: Leaf) -> Leaf:
93 """Returns a new Leaf from the consumed part of the prefix."""
94 unformatted_prefix = leaf.prefix[: self.consumed]
95 return Leaf(token.NEWLINE, unformatted_prefix)
98 class FormatOn(FormatError):
99 """Found a comment like `# fmt: on` in the file."""
102 class FormatOff(FormatError):
103 """Found a comment like `# fmt: off` in the file."""
106 class WriteBack(Enum):
123 default=DEFAULT_LINE_LENGTH,
124 help="How many character per line to allow.",
131 "Don't write the files back, just return the status. Return code 0 "
132 "means nothing would change. Return code 1 means some files would be "
133 "reformatted. Return code 123 means there was an internal error."
139 help="Don't write the files back, just output a diff for each file on stdout.",
144 help="If --fast given, skip temporary sanity checks. [default: --safe]",
151 "Don't emit non-error messages to stderr. Errors are still emitted, "
152 "silence those with 2>/dev/null."
155 @click.version_option(version=__version__)
160 exists=True, file_okay=True, dir_okay=True, readable=True, allow_dash=True
173 """The uncompromising code formatter."""
174 sources: List[Path] = []
178 sources.extend(gen_python_files_in_dir(p))
180 # if a file was explicitly given, we don't care about its extension
183 sources.append(Path("-"))
185 err(f"invalid path: {s}")
187 if check and not diff:
188 write_back = WriteBack.NO
190 write_back = WriteBack.DIFF
192 write_back = WriteBack.YES
193 report = Report(check=check, quiet=quiet)
194 if len(sources) == 0:
198 elif len(sources) == 1:
199 reformat_one(sources[0], line_length, fast, write_back, report)
201 loop = asyncio.get_event_loop()
202 executor = ProcessPoolExecutor(max_workers=os.cpu_count())
204 loop.run_until_complete(
206 sources, line_length, fast, write_back, report, loop, executor
212 out("All done! ✨ 🍰 ✨")
213 click.echo(str(report))
214 ctx.exit(report.return_code)
218 src: Path, line_length: int, fast: bool, write_back: WriteBack, report: "Report"
220 """Reformat a single file under `src` without spawning child processes.
222 If `quiet` is True, non-error messages are not output. `line_length`,
223 `write_back`, and `fast` options are passed to :func:`format_file_in_place`.
227 if not src.is_file() and str(src) == "-":
228 if format_stdin_to_stdout(
229 line_length=line_length, fast=fast, write_back=write_back
231 changed = Changed.YES
234 if write_back != WriteBack.DIFF:
235 cache = read_cache(line_length)
237 if src in cache and cache[src] == get_cache_info(src):
238 changed = Changed.CACHED
240 changed is not Changed.CACHED
241 and format_file_in_place(
242 src, line_length=line_length, fast=fast, write_back=write_back
245 changed = Changed.YES
246 if write_back == WriteBack.YES and changed is not Changed.NO:
247 write_cache(cache, [src], line_length)
248 report.done(src, changed)
249 except Exception as exc:
250 report.failed(src, str(exc))
253 async def schedule_formatting(
257 write_back: WriteBack,
262 """Run formatting of `sources` in parallel using the provided `executor`.
264 (Use ProcessPoolExecutors for actual parallelism.)
266 `line_length`, `write_back`, and `fast` options are passed to
267 :func:`format_file_in_place`.
270 if write_back != WriteBack.DIFF:
271 cache = read_cache(line_length)
272 sources, cached = filter_cached(cache, sources)
274 report.done(src, Changed.CACHED)
279 if write_back == WriteBack.DIFF:
280 # For diff output, we need locks to ensure we don't interleave output
281 # from different processes.
283 lock = manager.Lock()
285 src: loop.run_in_executor(
286 executor, format_file_in_place, src, line_length, fast, write_back, lock
290 _task_values = list(tasks.values())
292 loop.add_signal_handler(signal.SIGINT, cancel, _task_values)
293 loop.add_signal_handler(signal.SIGTERM, cancel, _task_values)
294 except NotImplementedError:
295 # There are no good alternatives for these on Windows
297 await asyncio.wait(_task_values)
298 for src, task in tasks.items():
300 report.failed(src, "timed out, cancelling")
302 cancelled.append(task)
303 elif task.cancelled():
304 cancelled.append(task)
305 elif task.exception():
306 report.failed(src, str(task.exception()))
308 formatted.append(src)
309 report.done(src, Changed.YES if task.result() else Changed.NO)
312 await asyncio.gather(*cancelled, loop=loop, return_exceptions=True)
313 if write_back == WriteBack.YES and formatted:
314 write_cache(cache, formatted, line_length)
317 def format_file_in_place(
321 write_back: WriteBack = WriteBack.NO,
322 lock: Any = None, # multiprocessing.Manager().Lock() is some crazy proxy
324 """Format file under `src` path. Return True if changed.
326 If `write_back` is True, write reformatted code back to stdout.
327 `line_length` and `fast` options are passed to :func:`format_file_contents`.
330 with tokenize.open(src) as src_buffer:
331 src_contents = src_buffer.read()
333 dst_contents = format_file_contents(
334 src_contents, line_length=line_length, fast=fast
336 except NothingChanged:
339 if write_back == write_back.YES:
340 with open(src, "w", encoding=src_buffer.encoding) as f:
341 f.write(dst_contents)
342 elif write_back == write_back.DIFF:
343 src_name = f"{src} (original)"
344 dst_name = f"{src} (formatted)"
345 diff_contents = diff(src_contents, dst_contents, src_name, dst_name)
349 sys.stdout.write(diff_contents)
356 def format_stdin_to_stdout(
357 line_length: int, fast: bool, write_back: WriteBack = WriteBack.NO
359 """Format file on stdin. Return True if changed.
361 If `write_back` is True, write reformatted code back to stdout.
362 `line_length` and `fast` arguments are passed to :func:`format_file_contents`.
364 src = sys.stdin.read()
367 dst = format_file_contents(src, line_length=line_length, fast=fast)
370 except NothingChanged:
374 if write_back == WriteBack.YES:
375 sys.stdout.write(dst)
376 elif write_back == WriteBack.DIFF:
377 src_name = "<stdin> (original)"
378 dst_name = "<stdin> (formatted)"
379 sys.stdout.write(diff(src, dst, src_name, dst_name))
382 def format_file_contents(
383 src_contents: str, line_length: int, fast: bool
385 """Reformat contents a file and return new contents.
387 If `fast` is False, additionally confirm that the reformatted code is
388 valid by calling :func:`assert_equivalent` and :func:`assert_stable` on it.
389 `line_length` is passed to :func:`format_str`.
391 if src_contents.strip() == "":
394 dst_contents = format_str(src_contents, line_length=line_length)
395 if src_contents == dst_contents:
399 assert_equivalent(src_contents, dst_contents)
400 assert_stable(src_contents, dst_contents, line_length=line_length)
404 def format_str(src_contents: str, line_length: int) -> FileContent:
405 """Reformat a string and return new contents.
407 `line_length` determines how many characters per line are allowed.
409 src_node = lib2to3_parse(src_contents)
411 lines = LineGenerator()
412 elt = EmptyLineTracker()
413 py36 = is_python36(src_node)
416 for current_line in lines.visit(src_node):
417 for _ in range(after):
418 dst_contents += str(empty_line)
419 before, after = elt.maybe_empty_lines(current_line)
420 for _ in range(before):
421 dst_contents += str(empty_line)
422 for line in split_line(current_line, line_length=line_length, py36=py36):
423 dst_contents += str(line)
428 pygram.python_grammar_no_print_statement_no_exec_statement,
429 pygram.python_grammar_no_print_statement,
430 pygram.python_grammar,
434 def lib2to3_parse(src_txt: str) -> Node:
435 """Given a string with source, return the lib2to3 Node."""
436 grammar = pygram.python_grammar_no_print_statement
437 if src_txt[-1] != "\n":
438 nl = "\r\n" if "\r\n" in src_txt[:1024] else "\n"
440 for grammar in GRAMMARS:
441 drv = driver.Driver(grammar, pytree.convert)
443 result = drv.parse_string(src_txt, True)
446 except ParseError as pe:
447 lineno, column = pe.context[1]
448 lines = src_txt.splitlines()
450 faulty_line = lines[lineno - 1]
452 faulty_line = "<line number missing in source>"
453 exc = ValueError(f"Cannot parse: {lineno}:{column}: {faulty_line}")
457 if isinstance(result, Leaf):
458 result = Node(syms.file_input, [result])
462 def lib2to3_unparse(node: Node) -> str:
463 """Given a lib2to3 node, return its string representation."""
471 class Visitor(Generic[T]):
472 """Basic lib2to3 visitor that yields things of type `T` on `visit()`."""
474 def visit(self, node: LN) -> Iterator[T]:
475 """Main method to visit `node` and its children.
477 It tries to find a `visit_*()` method for the given `node.type`, like
478 `visit_simple_stmt` for Node objects or `visit_INDENT` for Leaf objects.
479 If no dedicated `visit_*()` method is found, chooses `visit_default()`
482 Then yields objects of type `T` from the selected visitor.
485 name = token.tok_name[node.type]
487 name = type_repr(node.type)
488 yield from getattr(self, f"visit_{name}", self.visit_default)(node)
490 def visit_default(self, node: LN) -> Iterator[T]:
491 """Default `visit_*()` implementation. Recurses to children of `node`."""
492 if isinstance(node, Node):
493 for child in node.children:
494 yield from self.visit(child)
498 class DebugVisitor(Visitor[T]):
501 def visit_default(self, node: LN) -> Iterator[T]:
502 indent = " " * (2 * self.tree_depth)
503 if isinstance(node, Node):
504 _type = type_repr(node.type)
505 out(f"{indent}{_type}", fg="yellow")
507 for child in node.children:
508 yield from self.visit(child)
511 out(f"{indent}/{_type}", fg="yellow", bold=False)
513 _type = token.tok_name.get(node.type, str(node.type))
514 out(f"{indent}{_type}", fg="blue", nl=False)
516 # We don't have to handle prefixes for `Node` objects since
517 # that delegates to the first child anyway.
518 out(f" {node.prefix!r}", fg="green", bold=False, nl=False)
519 out(f" {node.value!r}", fg="blue", bold=False)
522 def show(cls, code: str) -> None:
523 """Pretty-print the lib2to3 AST of a given string of `code`.
525 Convenience method for debugging.
527 v: DebugVisitor[None] = DebugVisitor()
528 list(v.visit(lib2to3_parse(code)))
531 KEYWORDS = set(keyword.kwlist)
532 WHITESPACE = {token.DEDENT, token.INDENT, token.NEWLINE}
533 FLOW_CONTROL = {"return", "raise", "break", "continue"}
544 STANDALONE_COMMENT = 153
545 LOGIC_OPERATORS = {"and", "or"}
569 STARS = {token.STAR, token.DOUBLESTAR}
572 syms.argument, # double star in arglist
573 syms.trailer, # single argument to call
575 syms.varargslist, # lambdas
577 UNPACKING_PARENTS = {
578 syms.atom, # single element of a list or set literal
600 COMPREHENSION_PRIORITY = 20
605 COMPARATOR_PRIORITY = 3
610 class BracketTracker:
611 """Keeps track of brackets on a line."""
614 bracket_match: Dict[Tuple[Depth, NodeType], Leaf] = Factory(dict)
615 delimiters: Dict[LeafID, Priority] = Factory(dict)
616 previous: Optional[Leaf] = None
617 _for_loop_variable: bool = False
618 _lambda_arguments: bool = False
620 def mark(self, leaf: Leaf) -> None:
621 """Mark `leaf` with bracket-related metadata. Keep track of delimiters.
623 All leaves receive an int `bracket_depth` field that stores how deep
624 within brackets a given leaf is. 0 means there are no enclosing brackets
625 that started on this line.
627 If a leaf is itself a closing bracket, it receives an `opening_bracket`
628 field that it forms a pair with. This is a one-directional link to
629 avoid reference cycles.
631 If a leaf is a delimiter (a token on which Black can split the line if
632 needed) and it's on depth 0, its `id()` is stored in the tracker's
635 if leaf.type == token.COMMENT:
638 self.maybe_decrement_after_for_loop_variable(leaf)
639 self.maybe_decrement_after_lambda_arguments(leaf)
640 if leaf.type in CLOSING_BRACKETS:
642 opening_bracket = self.bracket_match.pop((self.depth, leaf.type))
643 leaf.opening_bracket = opening_bracket
644 leaf.bracket_depth = self.depth
646 delim = is_split_before_delimiter(leaf, self.previous)
647 if delim and self.previous is not None:
648 self.delimiters[id(self.previous)] = delim
650 delim = is_split_after_delimiter(leaf, self.previous)
652 self.delimiters[id(leaf)] = delim
653 if leaf.type in OPENING_BRACKETS:
654 self.bracket_match[self.depth, BRACKET[leaf.type]] = leaf
657 self.maybe_increment_lambda_arguments(leaf)
658 self.maybe_increment_for_loop_variable(leaf)
660 def any_open_brackets(self) -> bool:
661 """Return True if there is an yet unmatched open bracket on the line."""
662 return bool(self.bracket_match)
664 def max_delimiter_priority(self, exclude: Iterable[LeafID] = ()) -> int:
665 """Return the highest priority of a delimiter found on the line.
667 Values are consistent with what `is_split_*_delimiter()` return.
668 Raises ValueError on no delimiters.
670 return max(v for k, v in self.delimiters.items() if k not in exclude)
672 def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool:
673 """In a for loop, or comprehension, the variables are often unpacks.
675 To avoid splitting on the comma in this situation, increase the depth of
676 tokens between `for` and `in`.
678 if leaf.type == token.NAME and leaf.value == "for":
680 self._for_loop_variable = True
685 def maybe_decrement_after_for_loop_variable(self, leaf: Leaf) -> bool:
686 """See `maybe_increment_for_loop_variable` above for explanation."""
687 if self._for_loop_variable and leaf.type == token.NAME and leaf.value == "in":
689 self._for_loop_variable = False
694 def maybe_increment_lambda_arguments(self, leaf: Leaf) -> bool:
695 """In a lambda expression, there might be more than one argument.
697 To avoid splitting on the comma in this situation, increase the depth of
698 tokens between `lambda` and `:`.
700 if leaf.type == token.NAME and leaf.value == "lambda":
702 self._lambda_arguments = True
707 def maybe_decrement_after_lambda_arguments(self, leaf: Leaf) -> bool:
708 """See `maybe_increment_lambda_arguments` above for explanation."""
709 if self._lambda_arguments and leaf.type == token.COLON:
711 self._lambda_arguments = False
716 def get_open_lsqb(self) -> Optional[Leaf]:
717 """Return the most recent opening square bracket (if any)."""
718 return self.bracket_match.get((self.depth - 1, token.RSQB))
723 """Holds leaves and comments. Can be printed with `str(line)`."""
726 leaves: List[Leaf] = Factory(list)
727 comments: List[Tuple[Index, Leaf]] = Factory(list)
728 bracket_tracker: BracketTracker = Factory(BracketTracker)
729 inside_brackets: bool = False
731 def append(self, leaf: Leaf, preformatted: bool = False) -> None:
732 """Add a new `leaf` to the end of the line.
734 Unless `preformatted` is True, the `leaf` will receive a new consistent
735 whitespace prefix and metadata applied by :class:`BracketTracker`.
736 Trailing commas are maybe removed, unpacked for loop variables are
737 demoted from being delimiters.
739 Inline comments are put aside.
741 has_value = leaf.type in BRACKETS or bool(leaf.value.strip())
745 if self.leaves and not preformatted:
746 # Note: at this point leaf.prefix should be empty except for
747 # imports, for which we only preserve newlines.
748 leaf.prefix += whitespace(
749 leaf, complex_subscript=self.is_complex_subscript(leaf)
751 if self.inside_brackets or not preformatted:
752 self.bracket_tracker.mark(leaf)
753 self.maybe_remove_trailing_comma(leaf)
755 if not self.append_comment(leaf):
756 self.leaves.append(leaf)
758 def append_safe(self, leaf: Leaf, preformatted: bool = False) -> None:
759 """Like :func:`append()` but disallow invalid standalone comment structure.
761 Raises ValueError when any `leaf` is appended after a standalone comment
762 or when a standalone comment is not the first leaf on the line.
764 if self.bracket_tracker.depth == 0:
766 raise ValueError("cannot append to standalone comments")
768 if self.leaves and leaf.type == STANDALONE_COMMENT:
770 "cannot append standalone comments to a populated line"
773 self.append(leaf, preformatted=preformatted)
776 def is_comment(self) -> bool:
777 """Is this line a standalone comment?"""
778 return len(self.leaves) == 1 and self.leaves[0].type == STANDALONE_COMMENT
781 def is_decorator(self) -> bool:
782 """Is this line a decorator?"""
783 return bool(self) and self.leaves[0].type == token.AT
786 def is_import(self) -> bool:
787 """Is this an import line?"""
788 return bool(self) and is_import(self.leaves[0])
791 def is_class(self) -> bool:
792 """Is this line a class definition?"""
795 and self.leaves[0].type == token.NAME
796 and self.leaves[0].value == "class"
800 def is_def(self) -> bool:
801 """Is this a function definition? (Also returns True for async defs.)"""
803 first_leaf = self.leaves[0]
808 second_leaf: Optional[Leaf] = self.leaves[1]
812 (first_leaf.type == token.NAME and first_leaf.value == "def")
814 first_leaf.type == token.ASYNC
815 and second_leaf is not None
816 and second_leaf.type == token.NAME
817 and second_leaf.value == "def"
822 def is_flow_control(self) -> bool:
823 """Is this line a flow control statement?
825 Those are `return`, `raise`, `break`, and `continue`.
829 and self.leaves[0].type == token.NAME
830 and self.leaves[0].value in FLOW_CONTROL
834 def is_yield(self) -> bool:
835 """Is this line a yield statement?"""
838 and self.leaves[0].type == token.NAME
839 and self.leaves[0].value == "yield"
842 def contains_standalone_comments(self, depth_limit: int = sys.maxsize) -> bool:
843 """If so, needs to be split before emitting."""
844 for leaf in self.leaves:
845 if leaf.type == STANDALONE_COMMENT:
846 if leaf.bracket_depth <= depth_limit:
851 def maybe_remove_trailing_comma(self, closing: Leaf) -> bool:
852 """Remove trailing comma if there is one and it's safe."""
855 and self.leaves[-1].type == token.COMMA
856 and closing.type in CLOSING_BRACKETS
860 if closing.type == token.RBRACE:
861 self.remove_trailing_comma()
864 if closing.type == token.RSQB:
865 comma = self.leaves[-1]
866 if comma.parent and comma.parent.type == syms.listmaker:
867 self.remove_trailing_comma()
870 # For parens let's check if it's safe to remove the comma. If the
871 # trailing one is the only one, we might mistakenly change a tuple
872 # into a different type by removing the comma.
873 depth = closing.bracket_depth + 1
875 opening = closing.opening_bracket
876 for _opening_index, leaf in enumerate(self.leaves):
883 for leaf in self.leaves[_opening_index + 1 :]:
887 bracket_depth = leaf.bracket_depth
888 if bracket_depth == depth and leaf.type == token.COMMA:
890 if leaf.parent and leaf.parent.type == syms.arglist:
895 self.remove_trailing_comma()
900 def append_comment(self, comment: Leaf) -> bool:
901 """Add an inline or standalone comment to the line."""
903 comment.type == STANDALONE_COMMENT
904 and self.bracket_tracker.any_open_brackets()
909 if comment.type != token.COMMENT:
912 after = len(self.leaves) - 1
914 comment.type = STANDALONE_COMMENT
919 self.comments.append((after, comment))
922 def comments_after(self, leaf: Leaf) -> Iterator[Leaf]:
923 """Generate comments that should appear directly after `leaf`."""
924 for _leaf_index, _leaf in enumerate(self.leaves):
931 for index, comment_after in self.comments:
932 if _leaf_index == index:
935 def remove_trailing_comma(self) -> None:
936 """Remove the trailing comma and moves the comments attached to it."""
937 comma_index = len(self.leaves) - 1
938 for i in range(len(self.comments)):
939 comment_index, comment = self.comments[i]
940 if comment_index == comma_index:
941 self.comments[i] = (comma_index - 1, comment)
944 def is_complex_subscript(self, leaf: Leaf) -> bool:
945 """Return True iff `leaf` is part of a slice with non-trivial exprs."""
947 leaf if leaf.type == token.LSQB else self.bracket_tracker.get_open_lsqb()
949 if open_lsqb is None:
952 subscript_start = open_lsqb.next_sibling
954 isinstance(subscript_start, Node)
955 and subscript_start.type == syms.subscriptlist
957 subscript_start = child_towards(subscript_start, leaf)
958 return subscript_start is not None and any(
959 n.type in TEST_DESCENDANTS for n in subscript_start.pre_order()
962 def __str__(self) -> str:
963 """Render the line."""
967 indent = " " * self.depth
968 leaves = iter(self.leaves)
970 res = f"{first.prefix}{indent}{first.value}"
973 for _, comment in self.comments:
977 def __bool__(self) -> bool:
978 """Return True if the line has leaves or comments."""
979 return bool(self.leaves or self.comments)
982 class UnformattedLines(Line):
983 """Just like :class:`Line` but stores lines which aren't reformatted."""
985 def append(self, leaf: Leaf, preformatted: bool = True) -> None:
986 """Just add a new `leaf` to the end of the lines.
988 The `preformatted` argument is ignored.
990 Keeps track of indentation `depth`, which is useful when the user
991 says `# fmt: on`. Otherwise, doesn't do anything with the `leaf`.
994 list(generate_comments(leaf))
995 except FormatOn as f_on:
996 self.leaves.append(f_on.leaf_from_consumed(leaf))
999 self.leaves.append(leaf)
1000 if leaf.type == token.INDENT:
1002 elif leaf.type == token.DEDENT:
1005 def __str__(self) -> str:
1006 """Render unformatted lines from leaves which were added with `append()`.
1008 `depth` is not used for indentation in this case.
1014 for leaf in self.leaves:
1018 def append_comment(self, comment: Leaf) -> bool:
1019 """Not implemented in this class. Raises `NotImplementedError`."""
1020 raise NotImplementedError("Unformatted lines don't store comments separately.")
1022 def maybe_remove_trailing_comma(self, closing: Leaf) -> bool:
1023 """Does nothing and returns False."""
1026 def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool:
1027 """Does nothing and returns False."""
1032 class EmptyLineTracker:
1033 """Provides a stateful method that returns the number of potential extra
1034 empty lines needed before and after the currently processed line.
1036 Note: this tracker works on lines that haven't been split yet. It assumes
1037 the prefix of the first leaf consists of optional newlines. Those newlines
1038 are consumed by `maybe_empty_lines()` and included in the computation.
1040 previous_line: Optional[Line] = None
1041 previous_after: int = 0
1042 previous_defs: List[int] = Factory(list)
1044 def maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
1045 """Return the number of extra empty lines before and after the `current_line`.
1047 This is for separating `def`, `async def` and `class` with extra empty
1048 lines (two on module-level), as well as providing an extra empty line
1049 after flow control keywords to make them more prominent.
1051 if isinstance(current_line, UnformattedLines):
1054 before, after = self._maybe_empty_lines(current_line)
1055 before -= self.previous_after
1056 self.previous_after = after
1057 self.previous_line = current_line
1058 return before, after
1060 def _maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
1062 if current_line.depth == 0:
1064 if current_line.leaves:
1065 # Consume the first leaf's extra newlines.
1066 first_leaf = current_line.leaves[0]
1067 before = first_leaf.prefix.count("\n")
1068 before = min(before, max_allowed)
1069 first_leaf.prefix = ""
1072 depth = current_line.depth
1073 while self.previous_defs and self.previous_defs[-1] >= depth:
1074 self.previous_defs.pop()
1075 before = 1 if depth else 2
1076 is_decorator = current_line.is_decorator
1077 if is_decorator or current_line.is_def or current_line.is_class:
1078 if not is_decorator:
1079 self.previous_defs.append(depth)
1080 if self.previous_line is None:
1081 # Don't insert empty lines before the first line in the file.
1084 if self.previous_line.is_decorator:
1088 self.previous_line.is_comment
1089 and self.previous_line.depth == current_line.depth
1095 if current_line.depth:
1101 and self.previous_line.is_import
1102 and not current_line.is_import
1103 and depth == self.previous_line.depth
1105 return (before or 1), 0
1111 class LineGenerator(Visitor[Line]):
1112 """Generates reformatted Line objects. Empty lines are not emitted.
1114 Note: destroys the tree it's visiting by mutating prefixes of its leaves
1115 in ways that will no longer stringify to valid Python code on the tree.
1117 current_line: Line = Factory(Line)
1119 def line(self, indent: int = 0, type: Type[Line] = Line) -> Iterator[Line]:
1122 If the line is empty, only emit if it makes sense.
1123 If the line is too long, split it first and then generate.
1125 If any lines were generated, set up a new current_line.
1127 if not self.current_line:
1128 if self.current_line.__class__ == type:
1129 self.current_line.depth += indent
1131 self.current_line = type(depth=self.current_line.depth + indent)
1132 return # Line is empty, don't emit. Creating a new one unnecessary.
1134 complete_line = self.current_line
1135 self.current_line = type(depth=complete_line.depth + indent)
1138 def visit(self, node: LN) -> Iterator[Line]:
1139 """Main method to visit `node` and its children.
1141 Yields :class:`Line` objects.
1143 if isinstance(self.current_line, UnformattedLines):
1144 # File contained `# fmt: off`
1145 yield from self.visit_unformatted(node)
1148 yield from super().visit(node)
1150 def visit_default(self, node: LN) -> Iterator[Line]:
1151 """Default `visit_*()` implementation. Recurses to children of `node`."""
1152 if isinstance(node, Leaf):
1153 any_open_brackets = self.current_line.bracket_tracker.any_open_brackets()
1155 for comment in generate_comments(node):
1156 if any_open_brackets:
1157 # any comment within brackets is subject to splitting
1158 self.current_line.append(comment)
1159 elif comment.type == token.COMMENT:
1160 # regular trailing comment
1161 self.current_line.append(comment)
1162 yield from self.line()
1165 # regular standalone comment
1166 yield from self.line()
1168 self.current_line.append(comment)
1169 yield from self.line()
1171 except FormatOff as f_off:
1172 f_off.trim_prefix(node)
1173 yield from self.line(type=UnformattedLines)
1174 yield from self.visit(node)
1176 except FormatOn as f_on:
1177 # This only happens here if somebody says "fmt: on" multiple
1179 f_on.trim_prefix(node)
1180 yield from self.visit_default(node)
1183 normalize_prefix(node, inside_brackets=any_open_brackets)
1184 if node.type == token.STRING:
1185 normalize_string_quotes(node)
1186 if node.type not in WHITESPACE:
1187 self.current_line.append(node)
1188 yield from super().visit_default(node)
1190 def visit_INDENT(self, node: Node) -> Iterator[Line]:
1191 """Increase indentation level, maybe yield a line."""
1192 # In blib2to3 INDENT never holds comments.
1193 yield from self.line(+1)
1194 yield from self.visit_default(node)
1196 def visit_DEDENT(self, node: Node) -> Iterator[Line]:
1197 """Decrease indentation level, maybe yield a line."""
1198 # The current line might still wait for trailing comments. At DEDENT time
1199 # there won't be any (they would be prefixes on the preceding NEWLINE).
1200 # Emit the line then.
1201 yield from self.line()
1203 # While DEDENT has no value, its prefix may contain standalone comments
1204 # that belong to the current indentation level. Get 'em.
1205 yield from self.visit_default(node)
1207 # Finally, emit the dedent.
1208 yield from self.line(-1)
1211 self, node: Node, keywords: Set[str], parens: Set[str]
1212 ) -> Iterator[Line]:
1213 """Visit a statement.
1215 This implementation is shared for `if`, `while`, `for`, `try`, `except`,
1216 `def`, `with`, `class`, and `assert`.
1218 The relevant Python language `keywords` for a given statement will be
1219 NAME leaves within it. This methods puts those on a separate line.
1221 `parens` holds pairs of nodes where invisible parentheses should be put.
1222 Keys hold nodes after which opening parentheses should be put, values
1223 hold nodes before which closing parentheses should be put.
1225 normalize_invisible_parens(node, parens_after=parens)
1226 for child in node.children:
1227 if child.type == token.NAME and child.value in keywords: # type: ignore
1228 yield from self.line()
1230 yield from self.visit(child)
1232 def visit_simple_stmt(self, node: Node) -> Iterator[Line]:
1233 """Visit a statement without nested statements."""
1234 is_suite_like = node.parent and node.parent.type in STATEMENT
1236 yield from self.line(+1)
1237 yield from self.visit_default(node)
1238 yield from self.line(-1)
1241 yield from self.line()
1242 yield from self.visit_default(node)
1244 def visit_async_stmt(self, node: Node) -> Iterator[Line]:
1245 """Visit `async def`, `async for`, `async with`."""
1246 yield from self.line()
1248 children = iter(node.children)
1249 for child in children:
1250 yield from self.visit(child)
1252 if child.type == token.ASYNC:
1255 internal_stmt = next(children)
1256 for child in internal_stmt.children:
1257 yield from self.visit(child)
1259 def visit_decorators(self, node: Node) -> Iterator[Line]:
1260 """Visit decorators."""
1261 for child in node.children:
1262 yield from self.line()
1263 yield from self.visit(child)
1265 def visit_import_from(self, node: Node) -> Iterator[Line]:
1266 """Visit import_from and maybe put invisible parentheses.
1268 This is separate from `visit_stmt` because import statements don't
1269 support arbitrary atoms and thus handling of parentheses is custom.
1272 for index, child in enumerate(node.children):
1274 if child.type == token.LPAR:
1275 # make parentheses invisible
1276 child.value = "" # type: ignore
1277 node.children[-1].value = "" # type: ignore
1279 # insert invisible parentheses
1280 node.insert_child(index, Leaf(token.LPAR, ""))
1281 node.append_child(Leaf(token.RPAR, ""))
1285 child.type == token.NAME and child.value == "import" # type: ignore
1288 for child in node.children:
1289 yield from self.visit(child)
1291 def visit_SEMI(self, leaf: Leaf) -> Iterator[Line]:
1292 """Remove a semicolon and put the other statement on a separate line."""
1293 yield from self.line()
1295 def visit_ENDMARKER(self, leaf: Leaf) -> Iterator[Line]:
1296 """End of file. Process outstanding comments and end with a newline."""
1297 yield from self.visit_default(leaf)
1298 yield from self.line()
1300 def visit_unformatted(self, node: LN) -> Iterator[Line]:
1301 """Used when file contained a `# fmt: off`."""
1302 if isinstance(node, Node):
1303 for child in node.children:
1304 yield from self.visit(child)
1308 self.current_line.append(node)
1309 except FormatOn as f_on:
1310 f_on.trim_prefix(node)
1311 yield from self.line()
1312 yield from self.visit(node)
1314 if node.type == token.ENDMARKER:
1315 # somebody decided not to put a final `# fmt: on`
1316 yield from self.line()
1318 def __attrs_post_init__(self) -> None:
1319 """You are in a twisty little maze of passages."""
1322 self.visit_assert_stmt = partial(v, keywords={"assert"}, parens={"assert", ","})
1323 self.visit_if_stmt = partial(v, keywords={"if", "else", "elif"}, parens={"if"})
1324 self.visit_while_stmt = partial(v, keywords={"while", "else"}, parens={"while"})
1325 self.visit_for_stmt = partial(v, keywords={"for", "else"}, parens={"for", "in"})
1326 self.visit_try_stmt = partial(
1327 v, keywords={"try", "except", "else", "finally"}, parens=Ø
1329 self.visit_except_clause = partial(v, keywords={"except"}, parens=Ø)
1330 self.visit_with_stmt = partial(v, keywords={"with"}, parens=Ø)
1331 self.visit_funcdef = partial(v, keywords={"def"}, parens=Ø)
1332 self.visit_classdef = partial(v, keywords={"class"}, parens=Ø)
1333 self.visit_async_funcdef = self.visit_async_stmt
1334 self.visit_decorated = self.visit_decorators
1337 IMPLICIT_TUPLE = {syms.testlist, syms.testlist_star_expr, syms.exprlist}
1338 BRACKET = {token.LPAR: token.RPAR, token.LSQB: token.RSQB, token.LBRACE: token.RBRACE}
1339 OPENING_BRACKETS = set(BRACKET.keys())
1340 CLOSING_BRACKETS = set(BRACKET.values())
1341 BRACKETS = OPENING_BRACKETS | CLOSING_BRACKETS
1342 ALWAYS_NO_SPACE = CLOSING_BRACKETS | {token.COMMA, STANDALONE_COMMENT}
1345 def whitespace(leaf: Leaf, *, complex_subscript: bool) -> str: # noqa C901
1346 """Return whitespace prefix if needed for the given `leaf`.
1348 `complex_subscript` signals whether the given leaf is part of a subscription
1349 which has non-trivial arguments, like arithmetic expressions or function calls.
1357 if t in ALWAYS_NO_SPACE:
1360 if t == token.COMMENT:
1363 assert p is not None, f"INTERNAL ERROR: hand-made leaf without parent: {leaf!r}"
1366 and p.type not in {syms.subscript, syms.subscriptlist, syms.sliceop}
1370 prev = leaf.prev_sibling
1372 prevp = preceding_leaf(p)
1373 if not prevp or prevp.type in OPENING_BRACKETS:
1376 if t == token.COLON:
1377 if prevp.type == token.COLON:
1380 elif prevp.type != token.COMMA and not complex_subscript:
1385 if prevp.type == token.EQUAL:
1387 if prevp.parent.type in {
1388 syms.arglist, syms.argument, syms.parameters, syms.varargslist
1392 elif prevp.parent.type == syms.typedargslist:
1393 # A bit hacky: if the equal sign has whitespace, it means we
1394 # previously found it's a typed argument. So, we're using
1398 elif prevp.type in STARS:
1399 if is_vararg(prevp, within=VARARGS_PARENTS | UNPACKING_PARENTS):
1402 elif prevp.type == token.COLON:
1403 if prevp.parent and prevp.parent.type in {syms.subscript, syms.sliceop}:
1404 return SPACE if complex_subscript else NO
1408 and prevp.parent.type == syms.factor
1409 and prevp.type in MATH_OPERATORS
1414 prevp.type == token.RIGHTSHIFT
1416 and prevp.parent.type == syms.shift_expr
1417 and prevp.prev_sibling
1418 and prevp.prev_sibling.type == token.NAME
1419 and prevp.prev_sibling.value == "print" # type: ignore
1421 # Python 2 print chevron
1424 elif prev.type in OPENING_BRACKETS:
1427 if p.type in {syms.parameters, syms.arglist}:
1428 # untyped function signatures or calls
1429 if not prev or prev.type != token.COMMA:
1432 elif p.type == syms.varargslist:
1434 if prev and prev.type != token.COMMA:
1437 elif p.type == syms.typedargslist:
1438 # typed function signatures
1442 if t == token.EQUAL:
1443 if prev.type != syms.tname:
1446 elif prev.type == token.EQUAL:
1447 # A bit hacky: if the equal sign has whitespace, it means we
1448 # previously found it's a typed argument. So, we're using that, too.
1451 elif prev.type != token.COMMA:
1454 elif p.type == syms.tname:
1457 prevp = preceding_leaf(p)
1458 if not prevp or prevp.type != token.COMMA:
1461 elif p.type == syms.trailer:
1462 # attributes and calls
1463 if t == token.LPAR or t == token.RPAR:
1468 prevp = preceding_leaf(p)
1469 if not prevp or prevp.type != token.NUMBER:
1472 elif t == token.LSQB:
1475 elif prev.type != token.COMMA:
1478 elif p.type == syms.argument:
1480 if t == token.EQUAL:
1484 prevp = preceding_leaf(p)
1485 if not prevp or prevp.type == token.LPAR:
1488 elif prev.type in {token.EQUAL} | STARS:
1491 elif p.type == syms.decorator:
1495 elif p.type == syms.dotted_name:
1499 prevp = preceding_leaf(p)
1500 if not prevp or prevp.type == token.AT or prevp.type == token.DOT:
1503 elif p.type == syms.classdef:
1507 if prev and prev.type == token.LPAR:
1510 elif p.type in {syms.subscript, syms.sliceop}:
1513 assert p.parent is not None, "subscripts are always parented"
1514 if p.parent.type == syms.subscriptlist:
1519 elif not complex_subscript:
1522 elif p.type == syms.atom:
1523 if prev and t == token.DOT:
1524 # dots, but not the first one.
1527 elif p.type == syms.dictsetmaker:
1529 if prev and prev.type == token.DOUBLESTAR:
1532 elif p.type in {syms.factor, syms.star_expr}:
1535 prevp = preceding_leaf(p)
1536 if not prevp or prevp.type in OPENING_BRACKETS:
1539 prevp_parent = prevp.parent
1540 assert prevp_parent is not None
1542 prevp.type == token.COLON
1543 and prevp_parent.type in {syms.subscript, syms.sliceop}
1547 elif prevp.type == token.EQUAL and prevp_parent.type == syms.argument:
1550 elif t == token.NAME or t == token.NUMBER:
1553 elif p.type == syms.import_from:
1555 if prev and prev.type == token.DOT:
1558 elif t == token.NAME:
1562 if prev and prev.type == token.DOT:
1565 elif p.type == syms.sliceop:
1571 def preceding_leaf(node: Optional[LN]) -> Optional[Leaf]:
1572 """Return the first leaf that precedes `node`, if any."""
1574 res = node.prev_sibling
1576 if isinstance(res, Leaf):
1580 return list(res.leaves())[-1]
1589 def child_towards(ancestor: Node, descendant: LN) -> Optional[LN]:
1590 """Return the child of `ancestor` that contains `descendant`."""
1591 node: Optional[LN] = descendant
1592 while node and node.parent != ancestor:
1597 def is_split_after_delimiter(leaf: Leaf, previous: Leaf = None) -> int:
1598 """Return the priority of the `leaf` delimiter, given a line break after it.
1600 The delimiter priorities returned here are from those delimiters that would
1601 cause a line break after themselves.
1603 Higher numbers are higher priority.
1605 if leaf.type == token.COMMA:
1606 return COMMA_PRIORITY
1611 def is_split_before_delimiter(leaf: Leaf, previous: Leaf = None) -> int:
1612 """Return the priority of the `leaf` delimiter, given a line before after it.
1614 The delimiter priorities returned here are from those delimiters that would
1615 cause a line break before themselves.
1617 Higher numbers are higher priority.
1619 if is_vararg(leaf, within=VARARGS_PARENTS | UNPACKING_PARENTS):
1620 # * and ** might also be MATH_OPERATORS but in this case they are not.
1621 # Don't treat them as a delimiter.
1625 leaf.type in MATH_OPERATORS
1627 and leaf.parent.type not in {syms.factor, syms.star_expr}
1629 return MATH_PRIORITY
1631 if leaf.type in COMPARATORS:
1632 return COMPARATOR_PRIORITY
1635 leaf.type == token.STRING
1636 and previous is not None
1637 and previous.type == token.STRING
1639 return STRING_PRIORITY
1642 leaf.type == token.NAME
1643 and leaf.value == "for"
1645 and leaf.parent.type in {syms.comp_for, syms.old_comp_for}
1647 return COMPREHENSION_PRIORITY
1650 leaf.type == token.NAME
1651 and leaf.value == "if"
1653 and leaf.parent.type in {syms.comp_if, syms.old_comp_if}
1655 return COMPREHENSION_PRIORITY
1658 leaf.type == token.NAME
1659 and leaf.value in {"if", "else"}
1661 and leaf.parent.type == syms.test
1663 return TERNARY_PRIORITY
1665 if leaf.type == token.NAME and leaf.value in LOGIC_OPERATORS and leaf.parent:
1666 return LOGIC_PRIORITY
1671 def generate_comments(leaf: Leaf) -> Iterator[Leaf]:
1672 """Clean the prefix of the `leaf` and generate comments from it, if any.
1674 Comments in lib2to3 are shoved into the whitespace prefix. This happens
1675 in `pgen2/driver.py:Driver.parse_tokens()`. This was a brilliant implementation
1676 move because it does away with modifying the grammar to include all the
1677 possible places in which comments can be placed.
1679 The sad consequence for us though is that comments don't "belong" anywhere.
1680 This is why this function generates simple parentless Leaf objects for
1681 comments. We simply don't know what the correct parent should be.
1683 No matter though, we can live without this. We really only need to
1684 differentiate between inline and standalone comments. The latter don't
1685 share the line with any code.
1687 Inline comments are emitted as regular token.COMMENT leaves. Standalone
1688 are emitted with a fake STANDALONE_COMMENT token identifier.
1699 for index, line in enumerate(p.split("\n")):
1700 consumed += len(line) + 1 # adding the length of the split '\n'
1701 line = line.lstrip()
1704 if not line.startswith("#"):
1707 if index == 0 and leaf.type != token.ENDMARKER:
1708 comment_type = token.COMMENT # simple trailing comment
1710 comment_type = STANDALONE_COMMENT
1711 comment = make_comment(line)
1712 yield Leaf(comment_type, comment, prefix="\n" * nlines)
1714 if comment in {"# fmt: on", "# yapf: enable"}:
1715 raise FormatOn(consumed)
1717 if comment in {"# fmt: off", "# yapf: disable"}:
1718 if comment_type == STANDALONE_COMMENT:
1719 raise FormatOff(consumed)
1721 prev = preceding_leaf(leaf)
1722 if not prev or prev.type in WHITESPACE: # standalone comment in disguise
1723 raise FormatOff(consumed)
1728 def make_comment(content: str) -> str:
1729 """Return a consistently formatted comment from the given `content` string.
1731 All comments (except for "##", "#!", "#:") should have a single space between
1732 the hash sign and the content.
1734 If `content` didn't start with a hash sign, one is provided.
1736 content = content.rstrip()
1740 if content[0] == "#":
1741 content = content[1:]
1742 if content and content[0] not in " !:#":
1743 content = " " + content
1744 return "#" + content
1748 line: Line, line_length: int, inner: bool = False, py36: bool = False
1749 ) -> Iterator[Line]:
1750 """Split a `line` into potentially many lines.
1752 They should fit in the allotted `line_length` but might not be able to.
1753 `inner` signifies that there were a pair of brackets somewhere around the
1754 current `line`, possibly transitively. This means we can fallback to splitting
1755 by delimiters if the LHS/RHS don't yield any results.
1757 If `py36` is True, splitting may generate syntax that is only compatible
1758 with Python 3.6 and later.
1760 if isinstance(line, UnformattedLines) or line.is_comment:
1764 line_str = str(line).strip("\n")
1766 len(line_str) <= line_length
1767 and "\n" not in line_str # multiline strings
1768 and not line.contains_standalone_comments()
1773 split_funcs: List[SplitFunc]
1775 split_funcs = [left_hand_split]
1776 elif line.is_import:
1777 split_funcs = [explode_split]
1778 elif line.inside_brackets:
1779 split_funcs = [delimiter_split, standalone_comment_split, right_hand_split]
1781 split_funcs = [right_hand_split]
1782 for split_func in split_funcs:
1783 # We are accumulating lines in `result` because we might want to abort
1784 # mission and return the original line in the end, or attempt a different
1786 result: List[Line] = []
1788 for l in split_func(line, py36):
1789 if str(l).strip("\n") == line_str:
1790 raise CannotSplit("Split function returned an unchanged result")
1793 split_line(l, line_length=line_length, inner=True, py36=py36)
1795 except CannotSplit as cs:
1806 def left_hand_split(line: Line, py36: bool = False) -> Iterator[Line]:
1807 """Split line into many lines, starting with the first matching bracket pair.
1809 Note: this usually looks weird, only use this for function definitions.
1810 Prefer RHS otherwise.
1812 head = Line(depth=line.depth)
1813 body = Line(depth=line.depth + 1, inside_brackets=True)
1814 tail = Line(depth=line.depth)
1815 tail_leaves: List[Leaf] = []
1816 body_leaves: List[Leaf] = []
1817 head_leaves: List[Leaf] = []
1818 current_leaves = head_leaves
1819 matching_bracket = None
1820 for leaf in line.leaves:
1822 current_leaves is body_leaves
1823 and leaf.type in CLOSING_BRACKETS
1824 and leaf.opening_bracket is matching_bracket
1826 current_leaves = tail_leaves if body_leaves else head_leaves
1827 current_leaves.append(leaf)
1828 if current_leaves is head_leaves:
1829 if leaf.type in OPENING_BRACKETS:
1830 matching_bracket = leaf
1831 current_leaves = body_leaves
1832 # Since body is a new indent level, remove spurious leading whitespace.
1834 normalize_prefix(body_leaves[0], inside_brackets=True)
1835 # Build the new lines.
1836 for result, leaves in (head, head_leaves), (body, body_leaves), (tail, tail_leaves):
1838 result.append(leaf, preformatted=True)
1839 for comment_after in line.comments_after(leaf):
1840 result.append(comment_after, preformatted=True)
1841 bracket_split_succeeded_or_raise(head, body, tail)
1842 for result in (head, body, tail):
1847 def right_hand_split(
1848 line: Line, py36: bool = False, omit: Collection[LeafID] = ()
1849 ) -> Iterator[Line]:
1850 """Split line into many lines, starting with the last matching bracket pair."""
1851 head = Line(depth=line.depth)
1852 body = Line(depth=line.depth + 1, inside_brackets=True)
1853 tail = Line(depth=line.depth)
1854 tail_leaves: List[Leaf] = []
1855 body_leaves: List[Leaf] = []
1856 head_leaves: List[Leaf] = []
1857 current_leaves = tail_leaves
1858 opening_bracket = None
1859 closing_bracket = None
1860 for leaf in reversed(line.leaves):
1861 if current_leaves is body_leaves:
1862 if leaf is opening_bracket:
1863 current_leaves = head_leaves if body_leaves else tail_leaves
1864 current_leaves.append(leaf)
1865 if current_leaves is tail_leaves:
1866 if leaf.type in CLOSING_BRACKETS and id(leaf) not in omit:
1867 opening_bracket = leaf.opening_bracket
1868 closing_bracket = leaf
1869 current_leaves = body_leaves
1870 tail_leaves.reverse()
1871 body_leaves.reverse()
1872 head_leaves.reverse()
1873 # Since body is a new indent level, remove spurious leading whitespace.
1875 normalize_prefix(body_leaves[0], inside_brackets=True)
1876 elif not head_leaves:
1877 # No `head` and no `body` means the split failed. `tail` has all content.
1878 raise CannotSplit("No brackets found")
1880 # Build the new lines.
1881 for result, leaves in (head, head_leaves), (body, body_leaves), (tail, tail_leaves):
1883 result.append(leaf, preformatted=True)
1884 for comment_after in line.comments_after(leaf):
1885 result.append(comment_after, preformatted=True)
1886 bracket_split_succeeded_or_raise(head, body, tail)
1887 assert opening_bracket and closing_bracket
1889 opening_bracket.type == token.LPAR
1890 and not opening_bracket.value
1891 and closing_bracket.type == token.RPAR
1892 and not closing_bracket.value
1894 # These parens were optional. If there aren't any delimiters or standalone
1895 # comments in the body, they were unnecessary and another split without
1896 # them should be attempted.
1898 body.bracket_tracker.delimiters or line.contains_standalone_comments(0)
1900 omit = {id(closing_bracket), *omit}
1901 yield from right_hand_split(line, py36=py36, omit=omit)
1904 ensure_visible(opening_bracket)
1905 ensure_visible(closing_bracket)
1906 for result in (head, body, tail):
1911 def bracket_split_succeeded_or_raise(head: Line, body: Line, tail: Line) -> None:
1912 """Raise :exc:`CannotSplit` if the last left- or right-hand split failed.
1914 Do nothing otherwise.
1916 A left- or right-hand split is based on a pair of brackets. Content before
1917 (and including) the opening bracket is left on one line, content inside the
1918 brackets is put on a separate line, and finally content starting with and
1919 following the closing bracket is put on a separate line.
1921 Those are called `head`, `body`, and `tail`, respectively. If the split
1922 produced the same line (all content in `head`) or ended up with an empty `body`
1923 and the `tail` is just the closing bracket, then it's considered failed.
1925 tail_len = len(str(tail).strip())
1928 raise CannotSplit("Splitting brackets produced the same line")
1932 f"Splitting brackets on an empty body to save "
1933 f"{tail_len} characters is not worth it"
1937 def dont_increase_indentation(split_func: SplitFunc) -> SplitFunc:
1938 """Normalize prefix of the first leaf in every line returned by `split_func`.
1940 This is a decorator over relevant split functions.
1944 def split_wrapper(line: Line, py36: bool = False) -> Iterator[Line]:
1945 for l in split_func(line, py36):
1946 normalize_prefix(l.leaves[0], inside_brackets=True)
1949 return split_wrapper
1952 @dont_increase_indentation
1953 def delimiter_split(line: Line, py36: bool = False) -> Iterator[Line]:
1954 """Split according to delimiters of the highest priority.
1956 If `py36` is True, the split will add trailing commas also in function
1957 signatures that contain `*` and `**`.
1960 last_leaf = line.leaves[-1]
1962 raise CannotSplit("Line empty")
1964 delimiters = line.bracket_tracker.delimiters
1966 delimiter_priority = line.bracket_tracker.max_delimiter_priority(
1967 exclude={id(last_leaf)}
1970 raise CannotSplit("No delimiters found")
1972 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
1973 lowest_depth = sys.maxsize
1974 trailing_comma_safe = True
1976 def append_to_line(leaf: Leaf) -> Iterator[Line]:
1977 """Append `leaf` to current line or to new line if appending impossible."""
1978 nonlocal current_line
1980 current_line.append_safe(leaf, preformatted=True)
1981 except ValueError as ve:
1984 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
1985 current_line.append(leaf)
1987 for leaf in line.leaves:
1988 yield from append_to_line(leaf)
1990 for comment_after in line.comments_after(leaf):
1991 yield from append_to_line(comment_after)
1993 lowest_depth = min(lowest_depth, leaf.bracket_depth)
1995 leaf.bracket_depth == lowest_depth
1996 and is_vararg(leaf, within=VARARGS_PARENTS)
1998 trailing_comma_safe = trailing_comma_safe and py36
1999 leaf_priority = delimiters.get(id(leaf))
2000 if leaf_priority == delimiter_priority:
2003 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2007 and delimiter_priority == COMMA_PRIORITY
2008 and current_line.leaves[-1].type != token.COMMA
2009 and current_line.leaves[-1].type != STANDALONE_COMMENT
2011 current_line.append(Leaf(token.COMMA, ","))
2015 @dont_increase_indentation
2016 def standalone_comment_split(line: Line, py36: bool = False) -> Iterator[Line]:
2017 """Split standalone comments from the rest of the line."""
2018 if not line.contains_standalone_comments(0):
2019 raise CannotSplit("Line does not have any standalone comments")
2021 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2023 def append_to_line(leaf: Leaf) -> Iterator[Line]:
2024 """Append `leaf` to current line or to new line if appending impossible."""
2025 nonlocal current_line
2027 current_line.append_safe(leaf, preformatted=True)
2028 except ValueError as ve:
2031 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2032 current_line.append(leaf)
2034 for leaf in line.leaves:
2035 yield from append_to_line(leaf)
2037 for comment_after in line.comments_after(leaf):
2038 yield from append_to_line(comment_after)
2045 line: Line, py36: bool = False, omit: Collection[LeafID] = ()
2046 ) -> Iterator[Line]:
2047 """Split by rightmost bracket and immediately split contents by a delimiter."""
2048 new_lines = list(right_hand_split(line, py36, omit))
2049 if len(new_lines) != 3:
2050 yield from new_lines
2056 yield from delimiter_split(new_lines[1], py36)
2064 def is_import(leaf: Leaf) -> bool:
2065 """Return True if the given leaf starts an import statement."""
2072 (v == "import" and p and p.type == syms.import_name)
2073 or (v == "from" and p and p.type == syms.import_from)
2078 def normalize_prefix(leaf: Leaf, *, inside_brackets: bool) -> None:
2079 """Leave existing extra newlines if not `inside_brackets`. Remove everything
2082 Note: don't use backslashes for formatting or you'll lose your voting rights.
2084 if not inside_brackets:
2085 spl = leaf.prefix.split("#")
2086 if "\\" not in spl[0]:
2087 nl_count = spl[-1].count("\n")
2090 leaf.prefix = "\n" * nl_count
2096 def normalize_string_quotes(leaf: Leaf) -> None:
2097 """Prefer double quotes but only if it doesn't cause more escaping.
2099 Adds or removes backslashes as appropriate. Doesn't parse and fix
2100 strings nested in f-strings (yet).
2102 Note: Mutates its argument.
2104 value = leaf.value.lstrip("furbFURB")
2105 if value[:3] == '"""':
2108 elif value[:3] == "'''":
2111 elif value[0] == '"':
2117 first_quote_pos = leaf.value.find(orig_quote)
2118 if first_quote_pos == -1:
2119 return # There's an internal error
2121 prefix = leaf.value[:first_quote_pos]
2122 unescaped_new_quote = re.compile(rf"(([^\\]|^)(\\\\)*){new_quote}")
2123 escaped_new_quote = re.compile(rf"([^\\]|^)\\(\\\\)*{new_quote}")
2124 escaped_orig_quote = re.compile(rf"([^\\]|^)\\(\\\\)*{orig_quote}")
2125 body = leaf.value[first_quote_pos + len(orig_quote) : -len(orig_quote)]
2126 if "r" in prefix.casefold():
2127 if unescaped_new_quote.search(body):
2128 # There's at least one unescaped new_quote in this raw string
2129 # so converting is impossible
2132 # Do not introduce or remove backslashes in raw strings
2135 # remove unnecessary quotes
2136 new_body = sub_twice(escaped_new_quote, rf"\1\2{new_quote}", body)
2137 if body != new_body:
2138 # Consider the string without unnecessary quotes as the original
2140 leaf.value = f"{prefix}{orig_quote}{body}{orig_quote}"
2141 new_body = sub_twice(escaped_orig_quote, rf"\1\2{orig_quote}", new_body)
2142 new_body = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_body)
2143 if new_quote == '"""' and new_body[-1] == '"':
2145 new_body = new_body[:-1] + '\\"'
2146 orig_escape_count = body.count("\\")
2147 new_escape_count = new_body.count("\\")
2148 if new_escape_count > orig_escape_count:
2149 return # Do not introduce more escaping
2151 if new_escape_count == orig_escape_count and orig_quote == '"':
2152 return # Prefer double quotes
2154 leaf.value = f"{prefix}{new_quote}{new_body}{new_quote}"
2157 def normalize_invisible_parens(node: Node, parens_after: Set[str]) -> None:
2158 """Make existing optional parentheses invisible or create new ones.
2160 Standardizes on visible parentheses for single-element tuples, and keeps
2161 existing visible parentheses for other tuples and generator expressions.
2164 for child in list(node.children):
2166 if child.type == syms.atom:
2168 is_empty_tuple(child)
2169 or is_one_tuple(child)
2170 or max_delimiter_priority_in_atom(child) >= COMMA_PRIORITY
2172 first = child.children[0]
2173 last = child.children[-1]
2174 if first.type == token.LPAR and last.type == token.RPAR:
2175 # make parentheses invisible
2176 first.value = "" # type: ignore
2177 last.value = "" # type: ignore
2178 elif is_one_tuple(child):
2179 # wrap child in visible parentheses
2180 lpar = Leaf(token.LPAR, "(")
2181 rpar = Leaf(token.RPAR, ")")
2182 index = child.remove() or 0
2183 node.insert_child(index, Node(syms.atom, [lpar, child, rpar]))
2185 # wrap child in invisible parentheses
2186 lpar = Leaf(token.LPAR, "")
2187 rpar = Leaf(token.RPAR, "")
2188 index = child.remove() or 0
2189 node.insert_child(index, Node(syms.atom, [lpar, child, rpar]))
2191 check_lpar = isinstance(child, Leaf) and child.value in parens_after
2194 def is_empty_tuple(node: LN) -> bool:
2195 """Return True if `node` holds an empty tuple."""
2197 node.type == syms.atom
2198 and len(node.children) == 2
2199 and node.children[0].type == token.LPAR
2200 and node.children[1].type == token.RPAR
2204 def is_one_tuple(node: LN) -> bool:
2205 """Return True if `node` holds a tuple with one element, with or without parens."""
2206 if node.type == syms.atom:
2207 if len(node.children) != 3:
2210 lpar, gexp, rpar = node.children
2212 lpar.type == token.LPAR
2213 and gexp.type == syms.testlist_gexp
2214 and rpar.type == token.RPAR
2218 return len(gexp.children) == 2 and gexp.children[1].type == token.COMMA
2221 node.type in IMPLICIT_TUPLE
2222 and len(node.children) == 2
2223 and node.children[1].type == token.COMMA
2227 def is_vararg(leaf: Leaf, within: Set[NodeType]) -> bool:
2228 """Return True if `leaf` is a star or double star in a vararg or kwarg.
2230 If `within` includes VARARGS_PARENTS, this applies to function signatures.
2231 If `within` includes COLLECTION_LIBERALS_PARENTS, it applies to right
2232 hand-side extended iterable unpacking (PEP 3132) and additional unpacking
2233 generalizations (PEP 448).
2235 if leaf.type not in STARS or not leaf.parent:
2239 if p.type == syms.star_expr:
2240 # Star expressions are also used as assignment targets in extended
2241 # iterable unpacking (PEP 3132). See what its parent is instead.
2247 return p.type in within
2250 def max_delimiter_priority_in_atom(node: LN) -> int:
2251 """Return maximum delimiter priority inside `node`.
2253 This is specific to atoms with contents contained in a pair of parentheses.
2254 If `node` isn't an atom or there are no enclosing parentheses, returns 0.
2256 if node.type != syms.atom:
2259 first = node.children[0]
2260 last = node.children[-1]
2261 if not (first.type == token.LPAR and last.type == token.RPAR):
2264 bt = BracketTracker()
2265 for c in node.children[1:-1]:
2266 if isinstance(c, Leaf):
2269 for leaf in c.leaves():
2272 return bt.max_delimiter_priority()
2278 def ensure_visible(leaf: Leaf) -> None:
2279 """Make sure parentheses are visible.
2281 They could be invisible as part of some statements (see
2282 :func:`normalize_invible_parens` and :func:`visit_import_from`).
2284 if leaf.type == token.LPAR:
2286 elif leaf.type == token.RPAR:
2290 def is_python36(node: Node) -> bool:
2291 """Return True if the current file is using Python 3.6+ features.
2293 Currently looking for:
2295 - trailing commas after * or ** in function signatures.
2297 for n in node.pre_order():
2298 if n.type == token.STRING:
2299 value_head = n.value[:2] # type: ignore
2300 if value_head in {'f"', 'F"', "f'", "F'", "rf", "fr", "RF", "FR"}:
2304 n.type == syms.typedargslist
2306 and n.children[-1].type == token.COMMA
2308 for ch in n.children:
2309 if ch.type in STARS:
2315 PYTHON_EXTENSIONS = {".py"}
2316 BLACKLISTED_DIRECTORIES = {
2317 "build", "buck-out", "dist", "_build", ".git", ".hg", ".mypy_cache", ".tox", ".venv"
2321 def gen_python_files_in_dir(path: Path) -> Iterator[Path]:
2322 """Generate all files under `path` which aren't under BLACKLISTED_DIRECTORIES
2323 and have one of the PYTHON_EXTENSIONS.
2325 for child in path.iterdir():
2327 if child.name in BLACKLISTED_DIRECTORIES:
2330 yield from gen_python_files_in_dir(child)
2332 elif child.suffix in PYTHON_EXTENSIONS:
2338 """Provides a reformatting counter. Can be rendered with `str(report)`."""
2341 change_count: int = 0
2343 failure_count: int = 0
2345 def done(self, src: Path, changed: Changed) -> None:
2346 """Increment the counter for successful reformatting. Write out a message."""
2347 if changed is Changed.YES:
2348 reformatted = "would reformat" if self.check else "reformatted"
2350 out(f"{reformatted} {src}")
2351 self.change_count += 1
2354 if changed is Changed.NO:
2355 msg = f"{src} already well formatted, good job."
2357 msg = f"{src} wasn't modified on disk since last run."
2358 out(msg, bold=False)
2359 self.same_count += 1
2361 def failed(self, src: Path, message: str) -> None:
2362 """Increment the counter for failed reformatting. Write out a message."""
2363 err(f"error: cannot format {src}: {message}")
2364 self.failure_count += 1
2367 def return_code(self) -> int:
2368 """Return the exit code that the app should use.
2370 This considers the current state of changed files and failures:
2371 - if there were any failures, return 123;
2372 - if any files were changed and --check is being used, return 1;
2373 - otherwise return 0.
2375 # According to http://tldp.org/LDP/abs/html/exitcodes.html starting with
2376 # 126 we have special returncodes reserved by the shell.
2377 if self.failure_count:
2380 elif self.change_count and self.check:
2385 def __str__(self) -> str:
2386 """Render a color report of the current state.
2388 Use `click.unstyle` to remove colors.
2391 reformatted = "would be reformatted"
2392 unchanged = "would be left unchanged"
2393 failed = "would fail to reformat"
2395 reformatted = "reformatted"
2396 unchanged = "left unchanged"
2397 failed = "failed to reformat"
2399 if self.change_count:
2400 s = "s" if self.change_count > 1 else ""
2402 click.style(f"{self.change_count} file{s} {reformatted}", bold=True)
2405 s = "s" if self.same_count > 1 else ""
2406 report.append(f"{self.same_count} file{s} {unchanged}")
2407 if self.failure_count:
2408 s = "s" if self.failure_count > 1 else ""
2410 click.style(f"{self.failure_count} file{s} {failed}", fg="red")
2412 return ", ".join(report) + "."
2415 def assert_equivalent(src: str, dst: str) -> None:
2416 """Raise AssertionError if `src` and `dst` aren't equivalent."""
2421 def _v(node: ast.AST, depth: int = 0) -> Iterator[str]:
2422 """Simple visitor generating strings to compare ASTs by content."""
2423 yield f"{' ' * depth}{node.__class__.__name__}("
2425 for field in sorted(node._fields):
2427 value = getattr(node, field)
2428 except AttributeError:
2431 yield f"{' ' * (depth+1)}{field}="
2433 if isinstance(value, list):
2435 if isinstance(item, ast.AST):
2436 yield from _v(item, depth + 2)
2438 elif isinstance(value, ast.AST):
2439 yield from _v(value, depth + 2)
2442 yield f"{' ' * (depth+2)}{value!r}, # {value.__class__.__name__}"
2444 yield f"{' ' * depth}) # /{node.__class__.__name__}"
2447 src_ast = ast.parse(src)
2448 except Exception as exc:
2449 major, minor = sys.version_info[:2]
2450 raise AssertionError(
2451 f"cannot use --safe with this file; failed to parse source file "
2452 f"with Python {major}.{minor}'s builtin AST. Re-run with --fast "
2453 f"or stop using deprecated Python 2 syntax. AST error message: {exc}"
2457 dst_ast = ast.parse(dst)
2458 except Exception as exc:
2459 log = dump_to_file("".join(traceback.format_tb(exc.__traceback__)), dst)
2460 raise AssertionError(
2461 f"INTERNAL ERROR: Black produced invalid code: {exc}. "
2462 f"Please report a bug on https://github.com/ambv/black/issues. "
2463 f"This invalid output might be helpful: {log}"
2466 src_ast_str = "\n".join(_v(src_ast))
2467 dst_ast_str = "\n".join(_v(dst_ast))
2468 if src_ast_str != dst_ast_str:
2469 log = dump_to_file(diff(src_ast_str, dst_ast_str, "src", "dst"))
2470 raise AssertionError(
2471 f"INTERNAL ERROR: Black produced code that is not equivalent to "
2473 f"Please report a bug on https://github.com/ambv/black/issues. "
2474 f"This diff might be helpful: {log}"
2478 def assert_stable(src: str, dst: str, line_length: int) -> None:
2479 """Raise AssertionError if `dst` reformats differently the second time."""
2480 newdst = format_str(dst, line_length=line_length)
2483 diff(src, dst, "source", "first pass"),
2484 diff(dst, newdst, "first pass", "second pass"),
2486 raise AssertionError(
2487 f"INTERNAL ERROR: Black produced different code on the second pass "
2488 f"of the formatter. "
2489 f"Please report a bug on https://github.com/ambv/black/issues. "
2490 f"This diff might be helpful: {log}"
2494 def dump_to_file(*output: str) -> str:
2495 """Dump `output` to a temporary file. Return path to the file."""
2498 with tempfile.NamedTemporaryFile(
2499 mode="w", prefix="blk_", suffix=".log", delete=False, encoding="utf8"
2501 for lines in output:
2503 if lines and lines[-1] != "\n":
2508 def diff(a: str, b: str, a_name: str, b_name: str) -> str:
2509 """Return a unified diff string between strings `a` and `b`."""
2512 a_lines = [line + "\n" for line in a.split("\n")]
2513 b_lines = [line + "\n" for line in b.split("\n")]
2515 difflib.unified_diff(a_lines, b_lines, fromfile=a_name, tofile=b_name, n=5)
2519 def cancel(tasks: List[asyncio.Task]) -> None:
2520 """asyncio signal handler that cancels all `tasks` and reports to stderr."""
2526 def shutdown(loop: BaseEventLoop) -> None:
2527 """Cancel all pending tasks on `loop`, wait for them, and close the loop."""
2529 # This part is borrowed from asyncio/runners.py in Python 3.7b2.
2530 to_cancel = [task for task in asyncio.Task.all_tasks(loop) if not task.done()]
2534 for task in to_cancel:
2536 loop.run_until_complete(
2537 asyncio.gather(*to_cancel, loop=loop, return_exceptions=True)
2540 # `concurrent.futures.Future` objects cannot be cancelled once they
2541 # are already running. There might be some when the `shutdown()` happened.
2542 # Silence their logger's spew about the event loop being closed.
2543 cf_logger = logging.getLogger("concurrent.futures")
2544 cf_logger.setLevel(logging.CRITICAL)
2548 def sub_twice(regex: Pattern[str], replacement: str, original: str) -> str:
2549 """Replace `regex` with `replacement` twice on `original`.
2551 This is used by string normalization to perform replaces on
2552 overlapping matches.
2554 return regex.sub(replacement, regex.sub(replacement, original))
2557 CACHE_DIR = Path(user_cache_dir("black", version=__version__))
2560 def get_cache_file(line_length: int) -> Path:
2561 return CACHE_DIR / f"cache.{line_length}.pickle"
2564 def read_cache(line_length: int) -> Cache:
2565 """Read the cache if it exists and is well formed.
2567 If it is not well formed, the call to write_cache later should resolve the issue.
2569 cache_file = get_cache_file(line_length)
2570 if not cache_file.exists():
2573 with cache_file.open("rb") as fobj:
2575 cache: Cache = pickle.load(fobj)
2576 except pickle.UnpicklingError:
2582 def get_cache_info(path: Path) -> CacheInfo:
2583 """Return the information used to check if a file is already formatted or not."""
2585 return stat.st_mtime, stat.st_size
2589 cache: Cache, sources: Iterable[Path]
2590 ) -> Tuple[List[Path], List[Path]]:
2591 """Split a list of paths into two.
2593 The first list contains paths of files that modified on disk or are not in the
2594 cache. The other list contains paths to non-modified files.
2599 if cache.get(src) != get_cache_info(src):
2606 def write_cache(cache: Cache, sources: List[Path], line_length: int) -> None:
2607 """Update the cache file."""
2608 cache_file = get_cache_file(line_length)
2610 if not CACHE_DIR.exists():
2611 CACHE_DIR.mkdir(parents=True)
2612 new_cache = {**cache, **{src.resolve(): get_cache_info(src) for src in sources}}
2613 with cache_file.open("wb") as fobj:
2614 pickle.dump(new_cache, fobj, protocol=pickle.HIGHEST_PROTOCOL)
2619 if __name__ == "__main__":