All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
5 from asyncio.base_events import BaseEventLoop
6 from concurrent.futures import Executor, ProcessPoolExecutor
8 from functools import partial, wraps
11 from multiprocessing import Manager
13 from pathlib import Path
36 from appdirs import user_cache_dir
37 from attr import dataclass, Factory
41 from blib2to3.pytree import Node, Leaf, type_repr
42 from blib2to3 import pygram, pytree
43 from blib2to3.pgen2 import driver, token
44 from blib2to3.pgen2.parse import ParseError
46 __version__ = "18.4a2"
47 DEFAULT_LINE_LENGTH = 88
49 syms = pygram.python_symbols
57 LN = Union[Leaf, Node]
58 SplitFunc = Callable[["Line", bool], Iterator["Line"]]
61 CacheInfo = Tuple[Timestamp, FileSize]
62 Cache = Dict[Path, CacheInfo]
63 out = partial(click.secho, bold=True, err=True)
64 err = partial(click.secho, fg="red", err=True)
67 class NothingChanged(UserWarning):
68 """Raised by :func:`format_file` when reformatted code is the same as source."""
71 class CannotSplit(Exception):
72 """A readable split that fits the allotted line length is impossible.
74 Raised by :func:`left_hand_split`, :func:`right_hand_split`, and
75 :func:`delimiter_split`.
79 class FormatError(Exception):
80 """Base exception for `# fmt: on` and `# fmt: off` handling.
82 It holds the number of bytes of the prefix consumed before the format
83 control comment appeared.
86 def __init__(self, consumed: int) -> None:
87 super().__init__(consumed)
88 self.consumed = consumed
90 def trim_prefix(self, leaf: Leaf) -> None:
91 leaf.prefix = leaf.prefix[self.consumed:]
93 def leaf_from_consumed(self, leaf: Leaf) -> Leaf:
94 """Returns a new Leaf from the consumed part of the prefix."""
95 unformatted_prefix = leaf.prefix[:self.consumed]
96 return Leaf(token.NEWLINE, unformatted_prefix)
99 class FormatOn(FormatError):
100 """Found a comment like `# fmt: on` in the file."""
103 class FormatOff(FormatError):
104 """Found a comment like `# fmt: off` in the file."""
107 class WriteBack(Enum):
124 default=DEFAULT_LINE_LENGTH,
125 help="How many character per line to allow.",
132 "Don't write the files back, just return the status. Return code 0 "
133 "means nothing would change. Return code 1 means some files would be "
134 "reformatted. Return code 123 means there was an internal error."
140 help="Don't write the files back, just output a diff for each file on stdout.",
145 help="If --fast given, skip temporary sanity checks. [default: --safe]",
152 "Don't emit non-error messages to stderr. Errors are still emitted, "
153 "silence those with 2>/dev/null."
156 @click.version_option(version=__version__)
161 exists=True, file_okay=True, dir_okay=True, readable=True, allow_dash=True
174 """The uncompromising code formatter."""
175 sources: List[Path] = []
179 sources.extend(gen_python_files_in_dir(p))
181 # if a file was explicitly given, we don't care about its extension
184 sources.append(Path("-"))
186 err(f"invalid path: {s}")
188 exc = click.ClickException("Options --check and --diff are mutually exclusive")
193 write_back = WriteBack.NO
195 write_back = WriteBack.DIFF
197 write_back = WriteBack.YES
198 if len(sources) == 0:
202 elif len(sources) == 1:
203 return_code = run_single_file_mode(
204 line_length, check, fast, quiet, write_back, sources[0]
207 return_code = run_multi_file_mode(line_length, fast, quiet, write_back, sources)
208 ctx.exit(return_code)
211 def run_single_file_mode(
216 write_back: WriteBack,
219 report = Report(check=check, quiet=quiet)
222 if not src.is_file() and str(src) == "-":
223 if format_stdin_to_stdout(
224 line_length=line_length, fast=fast, write_back=write_back
226 changed = Changed.YES
229 if write_back != WriteBack.DIFF:
232 if src in cache and cache[src] == get_cache_info(src):
233 changed = Changed.CACHED
235 changed is not Changed.CACHED
236 and format_file_in_place(
237 src, line_length=line_length, fast=fast, write_back=write_back
240 changed = Changed.YES
241 if write_back != WriteBack.DIFF and changed is not Changed.NO:
242 write_cache(cache, [src])
243 report.done(src, changed)
244 except Exception as exc:
245 report.failed(src, str(exc))
246 return report.return_code
249 def run_multi_file_mode(
253 write_back: WriteBack,
256 loop = asyncio.get_event_loop()
257 executor = ProcessPoolExecutor(max_workers=os.cpu_count())
260 return_code = loop.run_until_complete(
262 sources, line_length, write_back, fast, quiet, loop, executor
270 async def schedule_formatting(
273 write_back: WriteBack,
279 """Run formatting of `sources` in parallel using the provided `executor`.
281 (Use ProcessPoolExecutors for actual parallelism.)
283 `line_length`, `write_back`, and `fast` options are passed to
284 :func:`format_file_in_place`.
286 report = Report(check=write_back is WriteBack.NO, quiet=quiet)
288 if write_back != WriteBack.DIFF:
290 sources, cached = filter_cached(cache, sources)
292 report.done(src, Changed.CACHED)
297 if write_back == WriteBack.DIFF:
298 # For diff output, we need locks to ensure we don't interleave output
299 # from different processes.
301 lock = manager.Lock()
303 src: loop.run_in_executor(
304 executor, format_file_in_place, src, line_length, fast, write_back, lock
308 _task_values = list(tasks.values())
309 loop.add_signal_handler(signal.SIGINT, cancel, _task_values)
310 loop.add_signal_handler(signal.SIGTERM, cancel, _task_values)
311 await asyncio.wait(_task_values)
312 for src, task in tasks.items():
314 report.failed(src, "timed out, cancelling")
316 cancelled.append(task)
317 elif task.cancelled():
318 cancelled.append(task)
319 elif task.exception():
320 report.failed(src, str(task.exception()))
322 formatted.append(src)
323 report.done(src, Changed.YES if task.result() else Changed.NO)
326 await asyncio.gather(*cancelled, loop=loop, return_exceptions=True)
328 out("All done! ✨ 🍰 ✨")
330 click.echo(str(report))
332 if write_back != WriteBack.DIFF and formatted:
333 write_cache(cache, formatted)
335 return report.return_code
338 def format_file_in_place(
342 write_back: WriteBack = WriteBack.NO,
343 lock: Any = None, # multiprocessing.Manager().Lock() is some crazy proxy
345 """Format file under `src` path. Return True if changed.
347 If `write_back` is True, write reformatted code back to stdout.
348 `line_length` and `fast` options are passed to :func:`format_file_contents`.
351 with tokenize.open(src) as src_buffer:
352 src_contents = src_buffer.read()
354 dst_contents = format_file_contents(
355 src_contents, line_length=line_length, fast=fast
357 except NothingChanged:
360 if write_back == write_back.YES:
361 with open(src, "w", encoding=src_buffer.encoding) as f:
362 f.write(dst_contents)
363 elif write_back == write_back.DIFF:
364 src_name = f"{src.name} (original)"
365 dst_name = f"{src.name} (formatted)"
366 diff_contents = diff(src_contents, dst_contents, src_name, dst_name)
370 sys.stdout.write(diff_contents)
377 def format_stdin_to_stdout(
378 line_length: int, fast: bool, write_back: WriteBack = WriteBack.NO
380 """Format file on stdin. Return True if changed.
382 If `write_back` is True, write reformatted code back to stdout.
383 `line_length` and `fast` arguments are passed to :func:`format_file_contents`.
385 src = sys.stdin.read()
388 dst = format_file_contents(src, line_length=line_length, fast=fast)
391 except NothingChanged:
395 if write_back == WriteBack.YES:
396 sys.stdout.write(dst)
397 elif write_back == WriteBack.DIFF:
398 src_name = "<stdin> (original)"
399 dst_name = "<stdin> (formatted)"
400 sys.stdout.write(diff(src, dst, src_name, dst_name))
403 def format_file_contents(
404 src_contents: str, line_length: int, fast: bool
406 """Reformat contents a file and return new contents.
408 If `fast` is False, additionally confirm that the reformatted code is
409 valid by calling :func:`assert_equivalent` and :func:`assert_stable` on it.
410 `line_length` is passed to :func:`format_str`.
412 if src_contents.strip() == "":
415 dst_contents = format_str(src_contents, line_length=line_length)
416 if src_contents == dst_contents:
420 assert_equivalent(src_contents, dst_contents)
421 assert_stable(src_contents, dst_contents, line_length=line_length)
425 def format_str(src_contents: str, line_length: int) -> FileContent:
426 """Reformat a string and return new contents.
428 `line_length` determines how many characters per line are allowed.
430 src_node = lib2to3_parse(src_contents)
432 lines = LineGenerator()
433 elt = EmptyLineTracker()
434 py36 = is_python36(src_node)
437 for current_line in lines.visit(src_node):
438 for _ in range(after):
439 dst_contents += str(empty_line)
440 before, after = elt.maybe_empty_lines(current_line)
441 for _ in range(before):
442 dst_contents += str(empty_line)
443 for line in split_line(current_line, line_length=line_length, py36=py36):
444 dst_contents += str(line)
449 pygram.python_grammar_no_print_statement_no_exec_statement,
450 pygram.python_grammar_no_print_statement,
451 pygram.python_grammar_no_exec_statement,
452 pygram.python_grammar,
456 def lib2to3_parse(src_txt: str) -> Node:
457 """Given a string with source, return the lib2to3 Node."""
458 grammar = pygram.python_grammar_no_print_statement
459 if src_txt[-1] != "\n":
460 nl = "\r\n" if "\r\n" in src_txt[:1024] else "\n"
462 for grammar in GRAMMARS:
463 drv = driver.Driver(grammar, pytree.convert)
465 result = drv.parse_string(src_txt, True)
468 except ParseError as pe:
469 lineno, column = pe.context[1]
470 lines = src_txt.splitlines()
472 faulty_line = lines[lineno - 1]
474 faulty_line = "<line number missing in source>"
475 exc = ValueError(f"Cannot parse: {lineno}:{column}: {faulty_line}")
479 if isinstance(result, Leaf):
480 result = Node(syms.file_input, [result])
484 def lib2to3_unparse(node: Node) -> str:
485 """Given a lib2to3 node, return its string representation."""
493 class Visitor(Generic[T]):
494 """Basic lib2to3 visitor that yields things of type `T` on `visit()`."""
496 def visit(self, node: LN) -> Iterator[T]:
497 """Main method to visit `node` and its children.
499 It tries to find a `visit_*()` method for the given `node.type`, like
500 `visit_simple_stmt` for Node objects or `visit_INDENT` for Leaf objects.
501 If no dedicated `visit_*()` method is found, chooses `visit_default()`
504 Then yields objects of type `T` from the selected visitor.
507 name = token.tok_name[node.type]
509 name = type_repr(node.type)
510 yield from getattr(self, f"visit_{name}", self.visit_default)(node)
512 def visit_default(self, node: LN) -> Iterator[T]:
513 """Default `visit_*()` implementation. Recurses to children of `node`."""
514 if isinstance(node, Node):
515 for child in node.children:
516 yield from self.visit(child)
520 class DebugVisitor(Visitor[T]):
523 def visit_default(self, node: LN) -> Iterator[T]:
524 indent = " " * (2 * self.tree_depth)
525 if isinstance(node, Node):
526 _type = type_repr(node.type)
527 out(f"{indent}{_type}", fg="yellow")
529 for child in node.children:
530 yield from self.visit(child)
533 out(f"{indent}/{_type}", fg="yellow", bold=False)
535 _type = token.tok_name.get(node.type, str(node.type))
536 out(f"{indent}{_type}", fg="blue", nl=False)
538 # We don't have to handle prefixes for `Node` objects since
539 # that delegates to the first child anyway.
540 out(f" {node.prefix!r}", fg="green", bold=False, nl=False)
541 out(f" {node.value!r}", fg="blue", bold=False)
544 def show(cls, code: str) -> None:
545 """Pretty-print the lib2to3 AST of a given string of `code`.
547 Convenience method for debugging.
549 v: DebugVisitor[None] = DebugVisitor()
550 list(v.visit(lib2to3_parse(code)))
553 KEYWORDS = set(keyword.kwlist)
554 WHITESPACE = {token.DEDENT, token.INDENT, token.NEWLINE}
555 FLOW_CONTROL = {"return", "raise", "break", "continue"}
566 STANDALONE_COMMENT = 153
567 LOGIC_OPERATORS = {"and", "or"}
591 STARS = {token.STAR, token.DOUBLESTAR}
594 syms.argument, # double star in arglist
595 syms.trailer, # single argument to call
597 syms.varargslist, # lambdas
599 UNPACKING_PARENTS = {
600 syms.atom, # single element of a list or set literal
605 COMPREHENSION_PRIORITY = 20
609 COMPARATOR_PRIORITY = 3
614 class BracketTracker:
615 """Keeps track of brackets on a line."""
618 bracket_match: Dict[Tuple[Depth, NodeType], Leaf] = Factory(dict)
619 delimiters: Dict[LeafID, Priority] = Factory(dict)
620 previous: Optional[Leaf] = None
622 def mark(self, leaf: Leaf) -> None:
623 """Mark `leaf` with bracket-related metadata. Keep track of delimiters.
625 All leaves receive an int `bracket_depth` field that stores how deep
626 within brackets a given leaf is. 0 means there are no enclosing brackets
627 that started on this line.
629 If a leaf is itself a closing bracket, it receives an `opening_bracket`
630 field that it forms a pair with. This is a one-directional link to
631 avoid reference cycles.
633 If a leaf is a delimiter (a token on which Black can split the line if
634 needed) and it's on depth 0, its `id()` is stored in the tracker's
637 if leaf.type == token.COMMENT:
640 if leaf.type in CLOSING_BRACKETS:
642 opening_bracket = self.bracket_match.pop((self.depth, leaf.type))
643 leaf.opening_bracket = opening_bracket
644 leaf.bracket_depth = self.depth
646 delim = is_split_before_delimiter(leaf, self.previous)
647 if delim and self.previous is not None:
648 self.delimiters[id(self.previous)] = delim
650 delim = is_split_after_delimiter(leaf, self.previous)
652 self.delimiters[id(leaf)] = delim
653 if leaf.type in OPENING_BRACKETS:
654 self.bracket_match[self.depth, BRACKET[leaf.type]] = leaf
658 def any_open_brackets(self) -> bool:
659 """Return True if there is an yet unmatched open bracket on the line."""
660 return bool(self.bracket_match)
662 def max_delimiter_priority(self, exclude: Iterable[LeafID] = ()) -> int:
663 """Return the highest priority of a delimiter found on the line.
665 Values are consistent with what `is_delimiter()` returns.
666 Raises ValueError on no delimiters.
668 return max(v for k, v in self.delimiters.items() if k not in exclude)
673 """Holds leaves and comments. Can be printed with `str(line)`."""
676 leaves: List[Leaf] = Factory(list)
677 comments: List[Tuple[Index, Leaf]] = Factory(list)
678 bracket_tracker: BracketTracker = Factory(BracketTracker)
679 inside_brackets: bool = False
680 has_for: bool = False
681 _for_loop_variable: bool = False
683 def append(self, leaf: Leaf, preformatted: bool = False) -> None:
684 """Add a new `leaf` to the end of the line.
686 Unless `preformatted` is True, the `leaf` will receive a new consistent
687 whitespace prefix and metadata applied by :class:`BracketTracker`.
688 Trailing commas are maybe removed, unpacked for loop variables are
689 demoted from being delimiters.
691 Inline comments are put aside.
693 has_value = leaf.type in BRACKETS or bool(leaf.value.strip())
697 if self.leaves and not preformatted:
698 # Note: at this point leaf.prefix should be empty except for
699 # imports, for which we only preserve newlines.
700 leaf.prefix += whitespace(leaf)
701 if self.inside_brackets or not preformatted:
702 self.maybe_decrement_after_for_loop_variable(leaf)
703 self.bracket_tracker.mark(leaf)
704 self.maybe_remove_trailing_comma(leaf)
705 self.maybe_increment_for_loop_variable(leaf)
707 if not self.append_comment(leaf):
708 self.leaves.append(leaf)
710 def append_safe(self, leaf: Leaf, preformatted: bool = False) -> None:
711 """Like :func:`append()` but disallow invalid standalone comment structure.
713 Raises ValueError when any `leaf` is appended after a standalone comment
714 or when a standalone comment is not the first leaf on the line.
716 if self.bracket_tracker.depth == 0:
718 raise ValueError("cannot append to standalone comments")
720 if self.leaves and leaf.type == STANDALONE_COMMENT:
722 "cannot append standalone comments to a populated line"
725 self.append(leaf, preformatted=preformatted)
728 def is_comment(self) -> bool:
729 """Is this line a standalone comment?"""
730 return len(self.leaves) == 1 and self.leaves[0].type == STANDALONE_COMMENT
733 def is_decorator(self) -> bool:
734 """Is this line a decorator?"""
735 return bool(self) and self.leaves[0].type == token.AT
738 def is_import(self) -> bool:
739 """Is this an import line?"""
740 return bool(self) and is_import(self.leaves[0])
743 def is_class(self) -> bool:
744 """Is this line a class definition?"""
747 and self.leaves[0].type == token.NAME
748 and self.leaves[0].value == "class"
752 def is_def(self) -> bool:
753 """Is this a function definition? (Also returns True for async defs.)"""
755 first_leaf = self.leaves[0]
760 second_leaf: Optional[Leaf] = self.leaves[1]
764 (first_leaf.type == token.NAME and first_leaf.value == "def")
766 first_leaf.type == token.ASYNC
767 and second_leaf is not None
768 and second_leaf.type == token.NAME
769 and second_leaf.value == "def"
774 def is_flow_control(self) -> bool:
775 """Is this line a flow control statement?
777 Those are `return`, `raise`, `break`, and `continue`.
781 and self.leaves[0].type == token.NAME
782 and self.leaves[0].value in FLOW_CONTROL
786 def is_yield(self) -> bool:
787 """Is this line a yield statement?"""
790 and self.leaves[0].type == token.NAME
791 and self.leaves[0].value == "yield"
794 def contains_standalone_comments(self, depth_limit: int = sys.maxsize) -> bool:
795 """If so, needs to be split before emitting."""
796 for leaf in self.leaves:
797 if leaf.type == STANDALONE_COMMENT:
798 if leaf.bracket_depth <= depth_limit:
803 def maybe_remove_trailing_comma(self, closing: Leaf) -> bool:
804 """Remove trailing comma if there is one and it's safe."""
807 and self.leaves[-1].type == token.COMMA
808 and closing.type in CLOSING_BRACKETS
812 if closing.type == token.RBRACE:
813 self.remove_trailing_comma()
816 if closing.type == token.RSQB:
817 comma = self.leaves[-1]
818 if comma.parent and comma.parent.type == syms.listmaker:
819 self.remove_trailing_comma()
822 # For parens let's check if it's safe to remove the comma. If the
823 # trailing one is the only one, we might mistakenly change a tuple
824 # into a different type by removing the comma.
825 depth = closing.bracket_depth + 1
827 opening = closing.opening_bracket
828 for _opening_index, leaf in enumerate(self.leaves):
835 for leaf in self.leaves[_opening_index + 1:]:
839 bracket_depth = leaf.bracket_depth
840 if bracket_depth == depth and leaf.type == token.COMMA:
842 if leaf.parent and leaf.parent.type == syms.arglist:
847 self.remove_trailing_comma()
852 def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool:
853 """In a for loop, or comprehension, the variables are often unpacks.
855 To avoid splitting on the comma in this situation, increase the depth of
856 tokens between `for` and `in`.
858 if leaf.type == token.NAME and leaf.value == "for":
860 self.bracket_tracker.depth += 1
861 self._for_loop_variable = True
866 def maybe_decrement_after_for_loop_variable(self, leaf: Leaf) -> bool:
867 """See `maybe_increment_for_loop_variable` above for explanation."""
868 if self._for_loop_variable and leaf.type == token.NAME and leaf.value == "in":
869 self.bracket_tracker.depth -= 1
870 self._for_loop_variable = False
875 def append_comment(self, comment: Leaf) -> bool:
876 """Add an inline or standalone comment to the line."""
878 comment.type == STANDALONE_COMMENT
879 and self.bracket_tracker.any_open_brackets()
884 if comment.type != token.COMMENT:
887 after = len(self.leaves) - 1
889 comment.type = STANDALONE_COMMENT
894 self.comments.append((after, comment))
897 def comments_after(self, leaf: Leaf) -> Iterator[Leaf]:
898 """Generate comments that should appear directly after `leaf`."""
899 for _leaf_index, _leaf in enumerate(self.leaves):
906 for index, comment_after in self.comments:
907 if _leaf_index == index:
910 def remove_trailing_comma(self) -> None:
911 """Remove the trailing comma and moves the comments attached to it."""
912 comma_index = len(self.leaves) - 1
913 for i in range(len(self.comments)):
914 comment_index, comment = self.comments[i]
915 if comment_index == comma_index:
916 self.comments[i] = (comma_index - 1, comment)
919 def __str__(self) -> str:
920 """Render the line."""
924 indent = " " * self.depth
925 leaves = iter(self.leaves)
927 res = f"{first.prefix}{indent}{first.value}"
930 for _, comment in self.comments:
934 def __bool__(self) -> bool:
935 """Return True if the line has leaves or comments."""
936 return bool(self.leaves or self.comments)
939 class UnformattedLines(Line):
940 """Just like :class:`Line` but stores lines which aren't reformatted."""
942 def append(self, leaf: Leaf, preformatted: bool = True) -> None:
943 """Just add a new `leaf` to the end of the lines.
945 The `preformatted` argument is ignored.
947 Keeps track of indentation `depth`, which is useful when the user
948 says `# fmt: on`. Otherwise, doesn't do anything with the `leaf`.
951 list(generate_comments(leaf))
952 except FormatOn as f_on:
953 self.leaves.append(f_on.leaf_from_consumed(leaf))
956 self.leaves.append(leaf)
957 if leaf.type == token.INDENT:
959 elif leaf.type == token.DEDENT:
962 def __str__(self) -> str:
963 """Render unformatted lines from leaves which were added with `append()`.
965 `depth` is not used for indentation in this case.
971 for leaf in self.leaves:
975 def append_comment(self, comment: Leaf) -> bool:
976 """Not implemented in this class. Raises `NotImplementedError`."""
977 raise NotImplementedError("Unformatted lines don't store comments separately.")
979 def maybe_remove_trailing_comma(self, closing: Leaf) -> bool:
980 """Does nothing and returns False."""
983 def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool:
984 """Does nothing and returns False."""
989 class EmptyLineTracker:
990 """Provides a stateful method that returns the number of potential extra
991 empty lines needed before and after the currently processed line.
993 Note: this tracker works on lines that haven't been split yet. It assumes
994 the prefix of the first leaf consists of optional newlines. Those newlines
995 are consumed by `maybe_empty_lines()` and included in the computation.
997 previous_line: Optional[Line] = None
998 previous_after: int = 0
999 previous_defs: List[int] = Factory(list)
1001 def maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
1002 """Return the number of extra empty lines before and after the `current_line`.
1004 This is for separating `def`, `async def` and `class` with extra empty
1005 lines (two on module-level), as well as providing an extra empty line
1006 after flow control keywords to make them more prominent.
1008 if isinstance(current_line, UnformattedLines):
1011 before, after = self._maybe_empty_lines(current_line)
1012 before -= self.previous_after
1013 self.previous_after = after
1014 self.previous_line = current_line
1015 return before, after
1017 def _maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
1019 if current_line.depth == 0:
1021 if current_line.leaves:
1022 # Consume the first leaf's extra newlines.
1023 first_leaf = current_line.leaves[0]
1024 before = first_leaf.prefix.count("\n")
1025 before = min(before, max_allowed)
1026 first_leaf.prefix = ""
1029 depth = current_line.depth
1030 while self.previous_defs and self.previous_defs[-1] >= depth:
1031 self.previous_defs.pop()
1032 before = 1 if depth else 2
1033 is_decorator = current_line.is_decorator
1034 if is_decorator or current_line.is_def or current_line.is_class:
1035 if not is_decorator:
1036 self.previous_defs.append(depth)
1037 if self.previous_line is None:
1038 # Don't insert empty lines before the first line in the file.
1041 if self.previous_line and self.previous_line.is_decorator:
1042 # Don't insert empty lines between decorators.
1046 if current_line.depth:
1050 if current_line.is_flow_control:
1055 and self.previous_line.is_import
1056 and not current_line.is_import
1057 and depth == self.previous_line.depth
1059 return (before or 1), 0
1063 and self.previous_line.is_yield
1064 and (not current_line.is_yield or depth != self.previous_line.depth)
1066 return (before or 1), 0
1072 class LineGenerator(Visitor[Line]):
1073 """Generates reformatted Line objects. Empty lines are not emitted.
1075 Note: destroys the tree it's visiting by mutating prefixes of its leaves
1076 in ways that will no longer stringify to valid Python code on the tree.
1078 current_line: Line = Factory(Line)
1080 def line(self, indent: int = 0, type: Type[Line] = Line) -> Iterator[Line]:
1083 If the line is empty, only emit if it makes sense.
1084 If the line is too long, split it first and then generate.
1086 If any lines were generated, set up a new current_line.
1088 if not self.current_line:
1089 if self.current_line.__class__ == type:
1090 self.current_line.depth += indent
1092 self.current_line = type(depth=self.current_line.depth + indent)
1093 return # Line is empty, don't emit. Creating a new one unnecessary.
1095 complete_line = self.current_line
1096 self.current_line = type(depth=complete_line.depth + indent)
1099 def visit(self, node: LN) -> Iterator[Line]:
1100 """Main method to visit `node` and its children.
1102 Yields :class:`Line` objects.
1104 if isinstance(self.current_line, UnformattedLines):
1105 # File contained `# fmt: off`
1106 yield from self.visit_unformatted(node)
1109 yield from super().visit(node)
1111 def visit_default(self, node: LN) -> Iterator[Line]:
1112 """Default `visit_*()` implementation. Recurses to children of `node`."""
1113 if isinstance(node, Leaf):
1114 any_open_brackets = self.current_line.bracket_tracker.any_open_brackets()
1116 for comment in generate_comments(node):
1117 if any_open_brackets:
1118 # any comment within brackets is subject to splitting
1119 self.current_line.append(comment)
1120 elif comment.type == token.COMMENT:
1121 # regular trailing comment
1122 self.current_line.append(comment)
1123 yield from self.line()
1126 # regular standalone comment
1127 yield from self.line()
1129 self.current_line.append(comment)
1130 yield from self.line()
1132 except FormatOff as f_off:
1133 f_off.trim_prefix(node)
1134 yield from self.line(type=UnformattedLines)
1135 yield from self.visit(node)
1137 except FormatOn as f_on:
1138 # This only happens here if somebody says "fmt: on" multiple
1140 f_on.trim_prefix(node)
1141 yield from self.visit_default(node)
1144 normalize_prefix(node, inside_brackets=any_open_brackets)
1145 if node.type == token.STRING:
1146 normalize_string_quotes(node)
1147 if node.type not in WHITESPACE:
1148 self.current_line.append(node)
1149 yield from super().visit_default(node)
1151 def visit_INDENT(self, node: Node) -> Iterator[Line]:
1152 """Increase indentation level, maybe yield a line."""
1153 # In blib2to3 INDENT never holds comments.
1154 yield from self.line(+1)
1155 yield from self.visit_default(node)
1157 def visit_DEDENT(self, node: Node) -> Iterator[Line]:
1158 """Decrease indentation level, maybe yield a line."""
1159 # DEDENT has no value. Additionally, in blib2to3 it never holds comments.
1160 yield from self.line(-1)
1163 self, node: Node, keywords: Set[str], parens: Set[str]
1164 ) -> Iterator[Line]:
1165 """Visit a statement.
1167 This implementation is shared for `if`, `while`, `for`, `try`, `except`,
1168 `def`, `with`, `class`, and `assert`.
1170 The relevant Python language `keywords` for a given statement will be
1171 NAME leaves within it. This methods puts those on a separate line.
1173 `parens` holds pairs of nodes where invisible parentheses should be put.
1174 Keys hold nodes after which opening parentheses should be put, values
1175 hold nodes before which closing parentheses should be put.
1177 normalize_invisible_parens(node, parens_after=parens)
1178 for child in node.children:
1179 if child.type == token.NAME and child.value in keywords: # type: ignore
1180 yield from self.line()
1182 yield from self.visit(child)
1184 def visit_simple_stmt(self, node: Node) -> Iterator[Line]:
1185 """Visit a statement without nested statements."""
1186 is_suite_like = node.parent and node.parent.type in STATEMENT
1188 yield from self.line(+1)
1189 yield from self.visit_default(node)
1190 yield from self.line(-1)
1193 yield from self.line()
1194 yield from self.visit_default(node)
1196 def visit_async_stmt(self, node: Node) -> Iterator[Line]:
1197 """Visit `async def`, `async for`, `async with`."""
1198 yield from self.line()
1200 children = iter(node.children)
1201 for child in children:
1202 yield from self.visit(child)
1204 if child.type == token.ASYNC:
1207 internal_stmt = next(children)
1208 for child in internal_stmt.children:
1209 yield from self.visit(child)
1211 def visit_decorators(self, node: Node) -> Iterator[Line]:
1212 """Visit decorators."""
1213 for child in node.children:
1214 yield from self.line()
1215 yield from self.visit(child)
1217 def visit_import_from(self, node: Node) -> Iterator[Line]:
1218 """Visit import_from and maybe put invisible parentheses.
1220 This is separate from `visit_stmt` because import statements don't
1221 support arbitrary atoms and thus handling of parentheses is custom.
1224 for index, child in enumerate(node.children):
1226 if child.type == token.LPAR:
1227 # make parentheses invisible
1228 child.value = "" # type: ignore
1229 node.children[-1].value = "" # type: ignore
1231 # insert invisible parentheses
1232 node.insert_child(index, Leaf(token.LPAR, ""))
1233 node.append_child(Leaf(token.RPAR, ""))
1237 child.type == token.NAME and child.value == "import" # type: ignore
1240 for child in node.children:
1241 yield from self.visit(child)
1243 def visit_SEMI(self, leaf: Leaf) -> Iterator[Line]:
1244 """Remove a semicolon and put the other statement on a separate line."""
1245 yield from self.line()
1247 def visit_ENDMARKER(self, leaf: Leaf) -> Iterator[Line]:
1248 """End of file. Process outstanding comments and end with a newline."""
1249 yield from self.visit_default(leaf)
1250 yield from self.line()
1252 def visit_unformatted(self, node: LN) -> Iterator[Line]:
1253 """Used when file contained a `# fmt: off`."""
1254 if isinstance(node, Node):
1255 for child in node.children:
1256 yield from self.visit(child)
1260 self.current_line.append(node)
1261 except FormatOn as f_on:
1262 f_on.trim_prefix(node)
1263 yield from self.line()
1264 yield from self.visit(node)
1266 if node.type == token.ENDMARKER:
1267 # somebody decided not to put a final `# fmt: on`
1268 yield from self.line()
1270 def __attrs_post_init__(self) -> None:
1271 """You are in a twisty little maze of passages."""
1274 self.visit_assert_stmt = partial(v, keywords={"assert"}, parens={"assert", ","})
1275 self.visit_if_stmt = partial(v, keywords={"if", "else", "elif"}, parens={"if"})
1276 self.visit_while_stmt = partial(v, keywords={"while", "else"}, parens={"while"})
1277 self.visit_for_stmt = partial(v, keywords={"for", "else"}, parens={"for", "in"})
1278 self.visit_try_stmt = partial(
1279 v, keywords={"try", "except", "else", "finally"}, parens=Ø
1281 self.visit_except_clause = partial(v, keywords={"except"}, parens=Ø)
1282 self.visit_with_stmt = partial(v, keywords={"with"}, parens=Ø)
1283 self.visit_funcdef = partial(v, keywords={"def"}, parens=Ø)
1284 self.visit_classdef = partial(v, keywords={"class"}, parens=Ø)
1285 self.visit_async_funcdef = self.visit_async_stmt
1286 self.visit_decorated = self.visit_decorators
1289 IMPLICIT_TUPLE = {syms.testlist, syms.testlist_star_expr, syms.exprlist}
1290 BRACKET = {token.LPAR: token.RPAR, token.LSQB: token.RSQB, token.LBRACE: token.RBRACE}
1291 OPENING_BRACKETS = set(BRACKET.keys())
1292 CLOSING_BRACKETS = set(BRACKET.values())
1293 BRACKETS = OPENING_BRACKETS | CLOSING_BRACKETS
1294 ALWAYS_NO_SPACE = CLOSING_BRACKETS | {token.COMMA, STANDALONE_COMMENT}
1297 def whitespace(leaf: Leaf) -> str: # noqa C901
1298 """Return whitespace prefix if needed for the given `leaf`."""
1305 if t in ALWAYS_NO_SPACE:
1308 if t == token.COMMENT:
1311 assert p is not None, f"INTERNAL ERROR: hand-made leaf without parent: {leaf!r}"
1312 if t == token.COLON and p.type not in {syms.subscript, syms.subscriptlist}:
1315 prev = leaf.prev_sibling
1317 prevp = preceding_leaf(p)
1318 if not prevp or prevp.type in OPENING_BRACKETS:
1321 if t == token.COLON:
1322 return SPACE if prevp.type == token.COMMA else NO
1324 if prevp.type == token.EQUAL:
1326 if prevp.parent.type in {
1327 syms.arglist, syms.argument, syms.parameters, syms.varargslist
1331 elif prevp.parent.type == syms.typedargslist:
1332 # A bit hacky: if the equal sign has whitespace, it means we
1333 # previously found it's a typed argument. So, we're using
1337 elif prevp.type in STARS:
1338 if is_vararg(prevp, within=VARARGS_PARENTS | UNPACKING_PARENTS):
1341 elif prevp.type == token.COLON:
1342 if prevp.parent and prevp.parent.type in {syms.subscript, syms.sliceop}:
1347 and prevp.parent.type == syms.factor
1348 and prevp.type in MATH_OPERATORS
1353 prevp.type == token.RIGHTSHIFT
1355 and prevp.parent.type == syms.shift_expr
1356 and prevp.prev_sibling
1357 and prevp.prev_sibling.type == token.NAME
1358 and prevp.prev_sibling.value == "print" # type: ignore
1360 # Python 2 print chevron
1363 elif prev.type in OPENING_BRACKETS:
1366 if p.type in {syms.parameters, syms.arglist}:
1367 # untyped function signatures or calls
1371 if not prev or prev.type != token.COMMA:
1374 elif p.type == syms.varargslist:
1379 if prev and prev.type != token.COMMA:
1382 elif p.type == syms.typedargslist:
1383 # typed function signatures
1387 if t == token.EQUAL:
1388 if prev.type != syms.tname:
1391 elif prev.type == token.EQUAL:
1392 # A bit hacky: if the equal sign has whitespace, it means we
1393 # previously found it's a typed argument. So, we're using that, too.
1396 elif prev.type != token.COMMA:
1399 elif p.type == syms.tname:
1402 prevp = preceding_leaf(p)
1403 if not prevp or prevp.type != token.COMMA:
1406 elif p.type == syms.trailer:
1407 # attributes and calls
1408 if t == token.LPAR or t == token.RPAR:
1413 prevp = preceding_leaf(p)
1414 if not prevp or prevp.type != token.NUMBER:
1417 elif t == token.LSQB:
1420 elif prev.type != token.COMMA:
1423 elif p.type == syms.argument:
1425 if t == token.EQUAL:
1429 prevp = preceding_leaf(p)
1430 if not prevp or prevp.type == token.LPAR:
1433 elif prev.type in {token.EQUAL} | STARS:
1436 elif p.type == syms.decorator:
1440 elif p.type == syms.dotted_name:
1444 prevp = preceding_leaf(p)
1445 if not prevp or prevp.type == token.AT or prevp.type == token.DOT:
1448 elif p.type == syms.classdef:
1452 if prev and prev.type == token.LPAR:
1455 elif p.type == syms.subscript:
1458 assert p.parent is not None, "subscripts are always parented"
1459 if p.parent.type == syms.subscriptlist:
1467 elif p.type == syms.atom:
1468 if prev and t == token.DOT:
1469 # dots, but not the first one.
1473 p.type == syms.listmaker
1474 or p.type == syms.testlist_gexp
1475 or p.type == syms.subscriptlist
1477 # list interior, including unpacking
1481 elif p.type == syms.dictsetmaker:
1482 # dict and set interior, including unpacking
1486 if prev.type == token.DOUBLESTAR:
1489 elif p.type in {syms.factor, syms.star_expr}:
1492 prevp = preceding_leaf(p)
1493 if not prevp or prevp.type in OPENING_BRACKETS:
1496 prevp_parent = prevp.parent
1497 assert prevp_parent is not None
1499 prevp.type == token.COLON
1500 and prevp_parent.type in {syms.subscript, syms.sliceop}
1504 elif prevp.type == token.EQUAL and prevp_parent.type == syms.argument:
1507 elif t == token.NAME or t == token.NUMBER:
1510 elif p.type == syms.import_from:
1512 if prev and prev.type == token.DOT:
1515 elif t == token.NAME:
1519 if prev and prev.type == token.DOT:
1522 elif p.type == syms.sliceop:
1528 def preceding_leaf(node: Optional[LN]) -> Optional[Leaf]:
1529 """Return the first leaf that precedes `node`, if any."""
1531 res = node.prev_sibling
1533 if isinstance(res, Leaf):
1537 return list(res.leaves())[-1]
1546 def is_split_after_delimiter(leaf: Leaf, previous: Leaf = None) -> int:
1547 """Return the priority of the `leaf` delimiter, given a line break after it.
1549 The delimiter priorities returned here are from those delimiters that would
1550 cause a line break after themselves.
1552 Higher numbers are higher priority.
1554 if leaf.type == token.COMMA:
1555 return COMMA_PRIORITY
1560 def is_split_before_delimiter(leaf: Leaf, previous: Leaf = None) -> int:
1561 """Return the priority of the `leaf` delimiter, given a line before after it.
1563 The delimiter priorities returned here are from those delimiters that would
1564 cause a line break before themselves.
1566 Higher numbers are higher priority.
1568 if is_vararg(leaf, within=VARARGS_PARENTS | UNPACKING_PARENTS):
1569 # * and ** might also be MATH_OPERATORS but in this case they are not.
1570 # Don't treat them as a delimiter.
1574 leaf.type in MATH_OPERATORS
1576 and leaf.parent.type not in {syms.factor, syms.star_expr}
1578 return MATH_PRIORITY
1580 if leaf.type in COMPARATORS:
1581 return COMPARATOR_PRIORITY
1584 leaf.type == token.STRING
1585 and previous is not None
1586 and previous.type == token.STRING
1588 return STRING_PRIORITY
1591 leaf.type == token.NAME
1592 and leaf.value == "for"
1594 and leaf.parent.type in {syms.comp_for, syms.old_comp_for}
1596 return COMPREHENSION_PRIORITY
1599 leaf.type == token.NAME
1600 and leaf.value == "if"
1602 and leaf.parent.type in {syms.comp_if, syms.old_comp_if}
1604 return COMPREHENSION_PRIORITY
1606 if leaf.type == token.NAME and leaf.value in LOGIC_OPERATORS and leaf.parent:
1607 return LOGIC_PRIORITY
1612 def is_delimiter(leaf: Leaf, previous: Leaf = None) -> int:
1613 """Return the priority of the `leaf` delimiter. Return 0 if not delimiter.
1615 Higher numbers are higher priority.
1618 is_split_before_delimiter(leaf, previous),
1619 is_split_after_delimiter(leaf, previous),
1623 def generate_comments(leaf: Leaf) -> Iterator[Leaf]:
1624 """Clean the prefix of the `leaf` and generate comments from it, if any.
1626 Comments in lib2to3 are shoved into the whitespace prefix. This happens
1627 in `pgen2/driver.py:Driver.parse_tokens()`. This was a brilliant implementation
1628 move because it does away with modifying the grammar to include all the
1629 possible places in which comments can be placed.
1631 The sad consequence for us though is that comments don't "belong" anywhere.
1632 This is why this function generates simple parentless Leaf objects for
1633 comments. We simply don't know what the correct parent should be.
1635 No matter though, we can live without this. We really only need to
1636 differentiate between inline and standalone comments. The latter don't
1637 share the line with any code.
1639 Inline comments are emitted as regular token.COMMENT leaves. Standalone
1640 are emitted with a fake STANDALONE_COMMENT token identifier.
1651 for index, line in enumerate(p.split("\n")):
1652 consumed += len(line) + 1 # adding the length of the split '\n'
1653 line = line.lstrip()
1656 if not line.startswith("#"):
1659 if index == 0 and leaf.type != token.ENDMARKER:
1660 comment_type = token.COMMENT # simple trailing comment
1662 comment_type = STANDALONE_COMMENT
1663 comment = make_comment(line)
1664 yield Leaf(comment_type, comment, prefix="\n" * nlines)
1666 if comment in {"# fmt: on", "# yapf: enable"}:
1667 raise FormatOn(consumed)
1669 if comment in {"# fmt: off", "# yapf: disable"}:
1670 if comment_type == STANDALONE_COMMENT:
1671 raise FormatOff(consumed)
1673 prev = preceding_leaf(leaf)
1674 if not prev or prev.type in WHITESPACE: # standalone comment in disguise
1675 raise FormatOff(consumed)
1680 def make_comment(content: str) -> str:
1681 """Return a consistently formatted comment from the given `content` string.
1683 All comments (except for "##", "#!", "#:") should have a single space between
1684 the hash sign and the content.
1686 If `content` didn't start with a hash sign, one is provided.
1688 content = content.rstrip()
1692 if content[0] == "#":
1693 content = content[1:]
1694 if content and content[0] not in " !:#":
1695 content = " " + content
1696 return "#" + content
1700 line: Line, line_length: int, inner: bool = False, py36: bool = False
1701 ) -> Iterator[Line]:
1702 """Split a `line` into potentially many lines.
1704 They should fit in the allotted `line_length` but might not be able to.
1705 `inner` signifies that there were a pair of brackets somewhere around the
1706 current `line`, possibly transitively. This means we can fallback to splitting
1707 by delimiters if the LHS/RHS don't yield any results.
1709 If `py36` is True, splitting may generate syntax that is only compatible
1710 with Python 3.6 and later.
1712 if isinstance(line, UnformattedLines) or line.is_comment:
1716 line_str = str(line).strip("\n")
1718 len(line_str) <= line_length
1719 and "\n" not in line_str # multiline strings
1720 and not line.contains_standalone_comments()
1725 split_funcs: List[SplitFunc]
1727 split_funcs = [left_hand_split]
1728 elif line.inside_brackets:
1729 split_funcs = [delimiter_split, standalone_comment_split, right_hand_split]
1731 split_funcs = [right_hand_split]
1732 for split_func in split_funcs:
1733 # We are accumulating lines in `result` because we might want to abort
1734 # mission and return the original line in the end, or attempt a different
1736 result: List[Line] = []
1738 for l in split_func(line, py36):
1739 if str(l).strip("\n") == line_str:
1740 raise CannotSplit("Split function returned an unchanged result")
1743 split_line(l, line_length=line_length, inner=True, py36=py36)
1745 except CannotSplit as cs:
1756 def left_hand_split(line: Line, py36: bool = False) -> Iterator[Line]:
1757 """Split line into many lines, starting with the first matching bracket pair.
1759 Note: this usually looks weird, only use this for function definitions.
1760 Prefer RHS otherwise.
1762 head = Line(depth=line.depth)
1763 body = Line(depth=line.depth + 1, inside_brackets=True)
1764 tail = Line(depth=line.depth)
1765 tail_leaves: List[Leaf] = []
1766 body_leaves: List[Leaf] = []
1767 head_leaves: List[Leaf] = []
1768 current_leaves = head_leaves
1769 matching_bracket = None
1770 for leaf in line.leaves:
1772 current_leaves is body_leaves
1773 and leaf.type in CLOSING_BRACKETS
1774 and leaf.opening_bracket is matching_bracket
1776 current_leaves = tail_leaves if body_leaves else head_leaves
1777 current_leaves.append(leaf)
1778 if current_leaves is head_leaves:
1779 if leaf.type in OPENING_BRACKETS:
1780 matching_bracket = leaf
1781 current_leaves = body_leaves
1782 # Since body is a new indent level, remove spurious leading whitespace.
1784 normalize_prefix(body_leaves[0], inside_brackets=True)
1785 # Build the new lines.
1786 for result, leaves in (head, head_leaves), (body, body_leaves), (tail, tail_leaves):
1788 result.append(leaf, preformatted=True)
1789 for comment_after in line.comments_after(leaf):
1790 result.append(comment_after, preformatted=True)
1791 bracket_split_succeeded_or_raise(head, body, tail)
1792 for result in (head, body, tail):
1797 def right_hand_split(
1798 line: Line, py36: bool = False, omit: Collection[LeafID] = ()
1799 ) -> Iterator[Line]:
1800 """Split line into many lines, starting with the last matching bracket pair."""
1801 head = Line(depth=line.depth)
1802 body = Line(depth=line.depth + 1, inside_brackets=True)
1803 tail = Line(depth=line.depth)
1804 tail_leaves: List[Leaf] = []
1805 body_leaves: List[Leaf] = []
1806 head_leaves: List[Leaf] = []
1807 current_leaves = tail_leaves
1808 opening_bracket = None
1809 closing_bracket = None
1810 for leaf in reversed(line.leaves):
1811 if current_leaves is body_leaves:
1812 if leaf is opening_bracket:
1813 current_leaves = head_leaves if body_leaves else tail_leaves
1814 current_leaves.append(leaf)
1815 if current_leaves is tail_leaves:
1816 if leaf.type in CLOSING_BRACKETS and id(leaf) not in omit:
1817 opening_bracket = leaf.opening_bracket
1818 closing_bracket = leaf
1819 current_leaves = body_leaves
1820 tail_leaves.reverse()
1821 body_leaves.reverse()
1822 head_leaves.reverse()
1823 # Since body is a new indent level, remove spurious leading whitespace.
1825 normalize_prefix(body_leaves[0], inside_brackets=True)
1826 elif not head_leaves:
1827 # No `head` and no `body` means the split failed. `tail` has all content.
1828 raise CannotSplit("No brackets found")
1830 # Build the new lines.
1831 for result, leaves in (head, head_leaves), (body, body_leaves), (tail, tail_leaves):
1833 result.append(leaf, preformatted=True)
1834 for comment_after in line.comments_after(leaf):
1835 result.append(comment_after, preformatted=True)
1836 bracket_split_succeeded_or_raise(head, body, tail)
1837 assert opening_bracket and closing_bracket
1839 opening_bracket.type == token.LPAR
1840 and not opening_bracket.value
1841 and closing_bracket.type == token.RPAR
1842 and not closing_bracket.value
1844 # These parens were optional. If there aren't any delimiters or standalone
1845 # comments in the body, they were unnecessary and another split without
1846 # them should be attempted.
1848 body.bracket_tracker.delimiters or line.contains_standalone_comments(0)
1850 omit = {id(closing_bracket), *omit}
1851 yield from right_hand_split(line, py36=py36, omit=omit)
1854 ensure_visible(opening_bracket)
1855 ensure_visible(closing_bracket)
1856 for result in (head, body, tail):
1861 def bracket_split_succeeded_or_raise(head: Line, body: Line, tail: Line) -> None:
1862 """Raise :exc:`CannotSplit` if the last left- or right-hand split failed.
1864 Do nothing otherwise.
1866 A left- or right-hand split is based on a pair of brackets. Content before
1867 (and including) the opening bracket is left on one line, content inside the
1868 brackets is put on a separate line, and finally content starting with and
1869 following the closing bracket is put on a separate line.
1871 Those are called `head`, `body`, and `tail`, respectively. If the split
1872 produced the same line (all content in `head`) or ended up with an empty `body`
1873 and the `tail` is just the closing bracket, then it's considered failed.
1875 tail_len = len(str(tail).strip())
1878 raise CannotSplit("Splitting brackets produced the same line")
1882 f"Splitting brackets on an empty body to save "
1883 f"{tail_len} characters is not worth it"
1887 def dont_increase_indentation(split_func: SplitFunc) -> SplitFunc:
1888 """Normalize prefix of the first leaf in every line returned by `split_func`.
1890 This is a decorator over relevant split functions.
1894 def split_wrapper(line: Line, py36: bool = False) -> Iterator[Line]:
1895 for l in split_func(line, py36):
1896 normalize_prefix(l.leaves[0], inside_brackets=True)
1899 return split_wrapper
1902 @dont_increase_indentation
1903 def delimiter_split(line: Line, py36: bool = False) -> Iterator[Line]:
1904 """Split according to delimiters of the highest priority.
1906 If `py36` is True, the split will add trailing commas also in function
1907 signatures that contain `*` and `**`.
1910 last_leaf = line.leaves[-1]
1912 raise CannotSplit("Line empty")
1914 delimiters = line.bracket_tracker.delimiters
1916 delimiter_priority = line.bracket_tracker.max_delimiter_priority(
1917 exclude={id(last_leaf)}
1920 raise CannotSplit("No delimiters found")
1922 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
1923 lowest_depth = sys.maxsize
1924 trailing_comma_safe = True
1926 def append_to_line(leaf: Leaf) -> Iterator[Line]:
1927 """Append `leaf` to current line or to new line if appending impossible."""
1928 nonlocal current_line
1930 current_line.append_safe(leaf, preformatted=True)
1931 except ValueError as ve:
1934 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
1935 current_line.append(leaf)
1937 for leaf in line.leaves:
1938 yield from append_to_line(leaf)
1940 for comment_after in line.comments_after(leaf):
1941 yield from append_to_line(comment_after)
1943 lowest_depth = min(lowest_depth, leaf.bracket_depth)
1945 leaf.bracket_depth == lowest_depth
1946 and is_vararg(leaf, within=VARARGS_PARENTS)
1948 trailing_comma_safe = trailing_comma_safe and py36
1949 leaf_priority = delimiters.get(id(leaf))
1950 if leaf_priority == delimiter_priority:
1953 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
1957 and delimiter_priority == COMMA_PRIORITY
1958 and current_line.leaves[-1].type != token.COMMA
1959 and current_line.leaves[-1].type != STANDALONE_COMMENT
1961 current_line.append(Leaf(token.COMMA, ","))
1965 @dont_increase_indentation
1966 def standalone_comment_split(line: Line, py36: bool = False) -> Iterator[Line]:
1967 """Split standalone comments from the rest of the line."""
1968 if not line.contains_standalone_comments(0):
1969 raise CannotSplit("Line does not have any standalone comments")
1971 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
1973 def append_to_line(leaf: Leaf) -> Iterator[Line]:
1974 """Append `leaf` to current line or to new line if appending impossible."""
1975 nonlocal current_line
1977 current_line.append_safe(leaf, preformatted=True)
1978 except ValueError as ve:
1981 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
1982 current_line.append(leaf)
1984 for leaf in line.leaves:
1985 yield from append_to_line(leaf)
1987 for comment_after in line.comments_after(leaf):
1988 yield from append_to_line(comment_after)
1994 def is_import(leaf: Leaf) -> bool:
1995 """Return True if the given leaf starts an import statement."""
2002 (v == "import" and p and p.type == syms.import_name)
2003 or (v == "from" and p and p.type == syms.import_from)
2008 def normalize_prefix(leaf: Leaf, *, inside_brackets: bool) -> None:
2009 """Leave existing extra newlines if not `inside_brackets`. Remove everything
2012 Note: don't use backslashes for formatting or you'll lose your voting rights.
2014 if not inside_brackets:
2015 spl = leaf.prefix.split("#")
2016 if "\\" not in spl[0]:
2017 nl_count = spl[-1].count("\n")
2020 leaf.prefix = "\n" * nl_count
2026 def normalize_string_quotes(leaf: Leaf) -> None:
2027 """Prefer double quotes but only if it doesn't cause more escaping.
2029 Adds or removes backslashes as appropriate. Doesn't parse and fix
2030 strings nested in f-strings (yet).
2032 Note: Mutates its argument.
2034 value = leaf.value.lstrip("furbFURB")
2035 if value[:3] == '"""':
2038 elif value[:3] == "'''":
2041 elif value[0] == '"':
2047 first_quote_pos = leaf.value.find(orig_quote)
2048 if first_quote_pos == -1:
2049 return # There's an internal error
2051 prefix = leaf.value[:first_quote_pos]
2052 unescaped_new_quote = re.compile(rf"(([^\\]|^)(\\\\)*){new_quote}")
2053 escaped_new_quote = re.compile(rf"([^\\]|^)\\(\\\\)*{new_quote}")
2054 escaped_orig_quote = re.compile(rf"([^\\]|^)\\(\\\\)*{orig_quote}")
2055 body = leaf.value[first_quote_pos + len(orig_quote):-len(orig_quote)]
2056 if "r" in prefix.casefold():
2057 if unescaped_new_quote.search(body):
2058 # There's at least one unescaped new_quote in this raw string
2059 # so converting is impossible
2062 # Do not introduce or remove backslashes in raw strings
2065 # remove unnecessary quotes
2066 new_body = sub_twice(escaped_new_quote, rf"\1\2{new_quote}", body)
2067 if body != new_body:
2068 # Consider the string without unnecessary quotes as the original
2070 leaf.value = f"{prefix}{orig_quote}{body}{orig_quote}"
2071 new_body = sub_twice(escaped_orig_quote, rf"\1\2{orig_quote}", new_body)
2072 new_body = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_body)
2073 if new_quote == '"""' and new_body[-1] == '"':
2075 new_body = new_body[:-1] + '\\"'
2076 orig_escape_count = body.count("\\")
2077 new_escape_count = new_body.count("\\")
2078 if new_escape_count > orig_escape_count:
2079 return # Do not introduce more escaping
2081 if new_escape_count == orig_escape_count and orig_quote == '"':
2082 return # Prefer double quotes
2084 leaf.value = f"{prefix}{new_quote}{new_body}{new_quote}"
2087 def normalize_invisible_parens(node: Node, parens_after: Set[str]) -> None:
2088 """Make existing optional parentheses invisible or create new ones.
2090 Standardizes on visible parentheses for single-element tuples, and keeps
2091 existing visible parentheses for other tuples and generator expressions.
2094 for child in list(node.children):
2096 if child.type == syms.atom:
2098 is_empty_tuple(child)
2099 or is_one_tuple(child)
2100 or max_delimiter_priority_in_atom(child) >= COMMA_PRIORITY
2102 first = child.children[0]
2103 last = child.children[-1]
2104 if first.type == token.LPAR and last.type == token.RPAR:
2105 # make parentheses invisible
2106 first.value = "" # type: ignore
2107 last.value = "" # type: ignore
2108 elif is_one_tuple(child):
2109 # wrap child in visible parentheses
2110 lpar = Leaf(token.LPAR, "(")
2111 rpar = Leaf(token.RPAR, ")")
2112 index = child.remove() or 0
2113 node.insert_child(index, Node(syms.atom, [lpar, child, rpar]))
2115 # wrap child in invisible parentheses
2116 lpar = Leaf(token.LPAR, "")
2117 rpar = Leaf(token.RPAR, "")
2118 index = child.remove() or 0
2119 node.insert_child(index, Node(syms.atom, [lpar, child, rpar]))
2121 check_lpar = isinstance(child, Leaf) and child.value in parens_after
2124 def is_empty_tuple(node: LN) -> bool:
2125 """Return True if `node` holds an empty tuple."""
2127 node.type == syms.atom
2128 and len(node.children) == 2
2129 and node.children[0].type == token.LPAR
2130 and node.children[1].type == token.RPAR
2134 def is_one_tuple(node: LN) -> bool:
2135 """Return True if `node` holds a tuple with one element, with or without parens."""
2136 if node.type == syms.atom:
2137 if len(node.children) != 3:
2140 lpar, gexp, rpar = node.children
2142 lpar.type == token.LPAR
2143 and gexp.type == syms.testlist_gexp
2144 and rpar.type == token.RPAR
2148 return len(gexp.children) == 2 and gexp.children[1].type == token.COMMA
2151 node.type in IMPLICIT_TUPLE
2152 and len(node.children) == 2
2153 and node.children[1].type == token.COMMA
2157 def is_vararg(leaf: Leaf, within: Set[NodeType]) -> bool:
2158 """Return True if `leaf` is a star or double star in a vararg or kwarg.
2160 If `within` includes VARARGS_PARENTS, this applies to function signatures.
2161 If `within` includes COLLECTION_LIBERALS_PARENTS, it applies to right
2162 hand-side extended iterable unpacking (PEP 3132) and additional unpacking
2163 generalizations (PEP 448).
2165 if leaf.type not in STARS or not leaf.parent:
2169 if p.type == syms.star_expr:
2170 # Star expressions are also used as assignment targets in extended
2171 # iterable unpacking (PEP 3132). See what its parent is instead.
2177 return p.type in within
2180 def max_delimiter_priority_in_atom(node: LN) -> int:
2181 if node.type != syms.atom:
2184 first = node.children[0]
2185 last = node.children[-1]
2186 if not (first.type == token.LPAR and last.type == token.RPAR):
2189 bt = BracketTracker()
2190 for c in node.children[1:-1]:
2191 if isinstance(c, Leaf):
2194 for leaf in c.leaves():
2197 return bt.max_delimiter_priority()
2203 def ensure_visible(leaf: Leaf) -> None:
2204 """Make sure parentheses are visible.
2206 They could be invisible as part of some statements (see
2207 :func:`normalize_invible_parens` and :func:`visit_import_from`).
2209 if leaf.type == token.LPAR:
2211 elif leaf.type == token.RPAR:
2215 def is_python36(node: Node) -> bool:
2216 """Return True if the current file is using Python 3.6+ features.
2218 Currently looking for:
2220 - trailing commas after * or ** in function signatures.
2222 for n in node.pre_order():
2223 if n.type == token.STRING:
2224 value_head = n.value[:2] # type: ignore
2225 if value_head in {'f"', 'F"', "f'", "F'", "rf", "fr", "RF", "FR"}:
2229 n.type == syms.typedargslist
2231 and n.children[-1].type == token.COMMA
2233 for ch in n.children:
2234 if ch.type in STARS:
2240 PYTHON_EXTENSIONS = {".py"}
2241 BLACKLISTED_DIRECTORIES = {
2242 "build", "buck-out", "dist", "_build", ".git", ".hg", ".mypy_cache", ".tox", ".venv"
2246 def gen_python_files_in_dir(path: Path) -> Iterator[Path]:
2247 """Generate all files under `path` which aren't under BLACKLISTED_DIRECTORIES
2248 and have one of the PYTHON_EXTENSIONS.
2250 for child in path.iterdir():
2252 if child.name in BLACKLISTED_DIRECTORIES:
2255 yield from gen_python_files_in_dir(child)
2257 elif child.suffix in PYTHON_EXTENSIONS:
2263 """Provides a reformatting counter. Can be rendered with `str(report)`."""
2266 change_count: int = 0
2268 failure_count: int = 0
2270 def done(self, src: Path, changed: Changed) -> None:
2271 """Increment the counter for successful reformatting. Write out a message."""
2272 if changed is Changed.YES:
2273 reformatted = "would reformat" if self.check else "reformatted"
2275 out(f"{reformatted} {src}")
2276 self.change_count += 1
2279 if changed is Changed.NO:
2280 msg = f"{src} already well formatted, good job."
2282 msg = f"{src} wasn't modified on disk since last run."
2283 out(msg, bold=False)
2284 self.same_count += 1
2286 def failed(self, src: Path, message: str) -> None:
2287 """Increment the counter for failed reformatting. Write out a message."""
2288 err(f"error: cannot format {src}: {message}")
2289 self.failure_count += 1
2292 def return_code(self) -> int:
2293 """Return the exit code that the app should use.
2295 This considers the current state of changed files and failures:
2296 - if there were any failures, return 123;
2297 - if any files were changed and --check is being used, return 1;
2298 - otherwise return 0.
2300 # According to http://tldp.org/LDP/abs/html/exitcodes.html starting with
2301 # 126 we have special returncodes reserved by the shell.
2302 if self.failure_count:
2305 elif self.change_count and self.check:
2310 def __str__(self) -> str:
2311 """Render a color report of the current state.
2313 Use `click.unstyle` to remove colors.
2316 reformatted = "would be reformatted"
2317 unchanged = "would be left unchanged"
2318 failed = "would fail to reformat"
2320 reformatted = "reformatted"
2321 unchanged = "left unchanged"
2322 failed = "failed to reformat"
2324 if self.change_count:
2325 s = "s" if self.change_count > 1 else ""
2327 click.style(f"{self.change_count} file{s} {reformatted}", bold=True)
2330 s = "s" if self.same_count > 1 else ""
2331 report.append(f"{self.same_count} file{s} {unchanged}")
2332 if self.failure_count:
2333 s = "s" if self.failure_count > 1 else ""
2335 click.style(f"{self.failure_count} file{s} {failed}", fg="red")
2337 return ", ".join(report) + "."
2340 def assert_equivalent(src: str, dst: str) -> None:
2341 """Raise AssertionError if `src` and `dst` aren't equivalent."""
2346 def _v(node: ast.AST, depth: int = 0) -> Iterator[str]:
2347 """Simple visitor generating strings to compare ASTs by content."""
2348 yield f"{' ' * depth}{node.__class__.__name__}("
2350 for field in sorted(node._fields):
2352 value = getattr(node, field)
2353 except AttributeError:
2356 yield f"{' ' * (depth+1)}{field}="
2358 if isinstance(value, list):
2360 if isinstance(item, ast.AST):
2361 yield from _v(item, depth + 2)
2363 elif isinstance(value, ast.AST):
2364 yield from _v(value, depth + 2)
2367 yield f"{' ' * (depth+2)}{value!r}, # {value.__class__.__name__}"
2369 yield f"{' ' * depth}) # /{node.__class__.__name__}"
2372 src_ast = ast.parse(src)
2373 except Exception as exc:
2374 major, minor = sys.version_info[:2]
2375 raise AssertionError(
2376 f"cannot use --safe with this file; failed to parse source file "
2377 f"with Python {major}.{minor}'s builtin AST. Re-run with --fast "
2378 f"or stop using deprecated Python 2 syntax. AST error message: {exc}"
2382 dst_ast = ast.parse(dst)
2383 except Exception as exc:
2384 log = dump_to_file("".join(traceback.format_tb(exc.__traceback__)), dst)
2385 raise AssertionError(
2386 f"INTERNAL ERROR: Black produced invalid code: {exc}. "
2387 f"Please report a bug on https://github.com/ambv/black/issues. "
2388 f"This invalid output might be helpful: {log}"
2391 src_ast_str = "\n".join(_v(src_ast))
2392 dst_ast_str = "\n".join(_v(dst_ast))
2393 if src_ast_str != dst_ast_str:
2394 log = dump_to_file(diff(src_ast_str, dst_ast_str, "src", "dst"))
2395 raise AssertionError(
2396 f"INTERNAL ERROR: Black produced code that is not equivalent to "
2398 f"Please report a bug on https://github.com/ambv/black/issues. "
2399 f"This diff might be helpful: {log}"
2403 def assert_stable(src: str, dst: str, line_length: int) -> None:
2404 """Raise AssertionError if `dst` reformats differently the second time."""
2405 newdst = format_str(dst, line_length=line_length)
2408 diff(src, dst, "source", "first pass"),
2409 diff(dst, newdst, "first pass", "second pass"),
2411 raise AssertionError(
2412 f"INTERNAL ERROR: Black produced different code on the second pass "
2413 f"of the formatter. "
2414 f"Please report a bug on https://github.com/ambv/black/issues. "
2415 f"This diff might be helpful: {log}"
2419 def dump_to_file(*output: str) -> str:
2420 """Dump `output` to a temporary file. Return path to the file."""
2423 with tempfile.NamedTemporaryFile(
2424 mode="w", prefix="blk_", suffix=".log", delete=False, encoding="utf8"
2426 for lines in output:
2428 if lines and lines[-1] != "\n":
2433 def diff(a: str, b: str, a_name: str, b_name: str) -> str:
2434 """Return a unified diff string between strings `a` and `b`."""
2437 a_lines = [line + "\n" for line in a.split("\n")]
2438 b_lines = [line + "\n" for line in b.split("\n")]
2440 difflib.unified_diff(a_lines, b_lines, fromfile=a_name, tofile=b_name, n=5)
2444 def cancel(tasks: List[asyncio.Task]) -> None:
2445 """asyncio signal handler that cancels all `tasks` and reports to stderr."""
2451 def shutdown(loop: BaseEventLoop) -> None:
2452 """Cancel all pending tasks on `loop`, wait for them, and close the loop."""
2454 # This part is borrowed from asyncio/runners.py in Python 3.7b2.
2455 to_cancel = [task for task in asyncio.Task.all_tasks(loop) if not task.done()]
2459 for task in to_cancel:
2461 loop.run_until_complete(
2462 asyncio.gather(*to_cancel, loop=loop, return_exceptions=True)
2465 # `concurrent.futures.Future` objects cannot be cancelled once they
2466 # are already running. There might be some when the `shutdown()` happened.
2467 # Silence their logger's spew about the event loop being closed.
2468 cf_logger = logging.getLogger("concurrent.futures")
2469 cf_logger.setLevel(logging.CRITICAL)
2473 def sub_twice(regex: Pattern[str], replacement: str, original: str) -> str:
2474 """Replace `regex` with `replacement` twice on `original`.
2476 This is used by string normalization to perform replaces on
2477 overlapping matches.
2479 return regex.sub(replacement, regex.sub(replacement, original))
2482 CACHE_DIR = Path(user_cache_dir("black", version=__version__))
2483 CACHE_FILE = CACHE_DIR / "cache.pickle"
2486 def read_cache() -> Cache:
2487 """Read the cache if it exists and is well formed.
2489 If it is not well formed, the call to write_cache later should resolve the issue.
2491 if not CACHE_FILE.exists():
2494 with CACHE_FILE.open("rb") as fobj:
2496 cache: Cache = pickle.load(fobj)
2497 except pickle.UnpicklingError:
2503 def get_cache_info(path: Path) -> CacheInfo:
2504 """Return the information used to check if a file is already formatted or not."""
2506 return stat.st_mtime, stat.st_size
2510 cache: Cache, sources: Iterable[Path]
2511 ) -> Tuple[List[Path], List[Path]]:
2512 """Split a list of paths into two.
2514 The first list contains paths of files that modified on disk or are not in the
2515 cache. The other list contains paths to non-modified files.
2520 if cache.get(src) != get_cache_info(src):
2527 def write_cache(cache: Cache, sources: List[Path]) -> None:
2528 """Update the cache file."""
2530 if not CACHE_DIR.exists():
2531 CACHE_DIR.mkdir(parents=True)
2532 new_cache = {**cache, **{src.resolve(): get_cache_info(src) for src in sources}}
2533 with CACHE_FILE.open("wb") as fobj:
2534 pickle.dump(new_cache, fobj, protocol=pickle.HIGHEST_PROTOCOL)
2539 if __name__ == "__main__":