All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
5 from asyncio.base_events import BaseEventLoop
6 from concurrent.futures import Executor, ProcessPoolExecutor
8 from functools import partial, wraps
11 from multiprocessing import Manager
13 from pathlib import Path
36 from appdirs import user_cache_dir
37 from attr import dataclass, Factory
41 from blib2to3.pytree import Node, Leaf, type_repr
42 from blib2to3 import pygram, pytree
43 from blib2to3.pgen2 import driver, token
44 from blib2to3.pgen2.parse import ParseError
46 __version__ = "18.4a2"
47 DEFAULT_LINE_LENGTH = 88
49 syms = pygram.python_symbols
57 LN = Union[Leaf, Node]
58 SplitFunc = Callable[["Line", bool], Iterator["Line"]]
61 CacheInfo = Tuple[Timestamp, FileSize]
62 Cache = Dict[Path, CacheInfo]
63 out = partial(click.secho, bold=True, err=True)
64 err = partial(click.secho, fg="red", err=True)
67 class NothingChanged(UserWarning):
68 """Raised by :func:`format_file` when reformatted code is the same as source."""
71 class CannotSplit(Exception):
72 """A readable split that fits the allotted line length is impossible.
74 Raised by :func:`left_hand_split`, :func:`right_hand_split`, and
75 :func:`delimiter_split`.
79 class FormatError(Exception):
80 """Base exception for `# fmt: on` and `# fmt: off` handling.
82 It holds the number of bytes of the prefix consumed before the format
83 control comment appeared.
86 def __init__(self, consumed: int) -> None:
87 super().__init__(consumed)
88 self.consumed = consumed
90 def trim_prefix(self, leaf: Leaf) -> None:
91 leaf.prefix = leaf.prefix[self.consumed:]
93 def leaf_from_consumed(self, leaf: Leaf) -> Leaf:
94 """Returns a new Leaf from the consumed part of the prefix."""
95 unformatted_prefix = leaf.prefix[:self.consumed]
96 return Leaf(token.NEWLINE, unformatted_prefix)
99 class FormatOn(FormatError):
100 """Found a comment like `# fmt: on` in the file."""
103 class FormatOff(FormatError):
104 """Found a comment like `# fmt: off` in the file."""
107 class WriteBack(Enum):
124 default=DEFAULT_LINE_LENGTH,
125 help="How many character per line to allow.",
132 "Don't write the files back, just return the status. Return code 0 "
133 "means nothing would change. Return code 1 means some files would be "
134 "reformatted. Return code 123 means there was an internal error."
140 help="Don't write the files back, just output a diff for each file on stdout.",
145 help="If --fast given, skip temporary sanity checks. [default: --safe]",
152 "Don't emit non-error messages to stderr. Errors are still emitted, "
153 "silence those with 2>/dev/null."
156 @click.version_option(version=__version__)
161 exists=True, file_okay=True, dir_okay=True, readable=True, allow_dash=True
174 """The uncompromising code formatter."""
175 sources: List[Path] = []
179 sources.extend(gen_python_files_in_dir(p))
181 # if a file was explicitly given, we don't care about its extension
184 sources.append(Path("-"))
186 err(f"invalid path: {s}")
188 exc = click.ClickException("Options --check and --diff are mutually exclusive")
193 write_back = WriteBack.NO
195 write_back = WriteBack.DIFF
197 write_back = WriteBack.YES
198 if len(sources) == 0:
202 elif len(sources) == 1:
203 return_code = run_single_file_mode(
204 line_length, check, fast, quiet, write_back, sources[0]
207 return_code = run_multi_file_mode(line_length, fast, quiet, write_back, sources)
208 ctx.exit(return_code)
211 def run_single_file_mode(
216 write_back: WriteBack,
219 report = Report(check=check, quiet=quiet)
221 if not src.is_file() and str(src) == "-":
222 changed = format_stdin_to_stdout(
223 line_length=line_length, fast=fast, write_back=write_back
228 if write_back != WriteBack.DIFF:
231 if src in cache and cache[src] == get_cache_info(src):
232 changed = Changed.CACHED
233 if changed is not Changed.CACHED:
234 changed = format_file_in_place(
235 src, line_length=line_length, fast=fast, write_back=write_back
237 if write_back != WriteBack.DIFF and changed is not Changed.NO:
238 write_cache(cache, [src])
239 report.done(src, changed)
240 except Exception as exc:
241 report.failed(src, str(exc))
242 return report.return_code
245 def run_multi_file_mode(
249 write_back: WriteBack,
252 loop = asyncio.get_event_loop()
253 executor = ProcessPoolExecutor(max_workers=os.cpu_count())
256 return_code = loop.run_until_complete(
258 sources, line_length, write_back, fast, quiet, loop, executor
266 async def schedule_formatting(
269 write_back: WriteBack,
275 """Run formatting of `sources` in parallel using the provided `executor`.
277 (Use ProcessPoolExecutors for actual parallelism.)
279 `line_length`, `write_back`, and `fast` options are passed to
280 :func:`format_file_in_place`.
282 report = Report(check=write_back is WriteBack.NO, quiet=quiet)
284 if write_back != WriteBack.DIFF:
286 sources, cached = filter_cached(cache, sources)
288 report.done(src, Changed.CACHED)
293 if write_back == WriteBack.DIFF:
294 # For diff output, we need locks to ensure we don't interleave output
295 # from different processes.
297 lock = manager.Lock()
299 src: loop.run_in_executor(
300 executor, format_file_in_place, src, line_length, fast, write_back, lock
304 _task_values = list(tasks.values())
305 loop.add_signal_handler(signal.SIGINT, cancel, _task_values)
306 loop.add_signal_handler(signal.SIGTERM, cancel, _task_values)
307 await asyncio.wait(_task_values)
308 for src, task in tasks.items():
310 report.failed(src, "timed out, cancelling")
312 cancelled.append(task)
313 elif task.cancelled():
314 cancelled.append(task)
315 elif task.exception():
316 report.failed(src, str(task.exception()))
318 formatted.append(src)
319 report.done(src, task.result())
322 await asyncio.gather(*cancelled, loop=loop, return_exceptions=True)
324 out("All done! ✨ 🍰 ✨")
326 click.echo(str(report))
328 if write_back != WriteBack.DIFF and formatted:
329 write_cache(cache, formatted)
331 return report.return_code
334 def format_file_in_place(
338 write_back: WriteBack = WriteBack.NO,
339 lock: Any = None, # multiprocessing.Manager().Lock() is some crazy proxy
341 """Format file under `src` path. Return True if changed.
343 If `write_back` is True, write reformatted code back to stdout.
344 `line_length` and `fast` options are passed to :func:`format_file_contents`.
347 with tokenize.open(src) as src_buffer:
348 src_contents = src_buffer.read()
350 dst_contents = format_file_contents(
351 src_contents, line_length=line_length, fast=fast
353 except NothingChanged:
356 if write_back == write_back.YES:
357 with open(src, "w", encoding=src_buffer.encoding) as f:
358 f.write(dst_contents)
359 elif write_back == write_back.DIFF:
360 src_name = f"{src.name} (original)"
361 dst_name = f"{src.name} (formatted)"
362 diff_contents = diff(src_contents, dst_contents, src_name, dst_name)
366 sys.stdout.write(diff_contents)
373 def format_stdin_to_stdout(
374 line_length: int, fast: bool, write_back: WriteBack = WriteBack.NO
376 """Format file on stdin. Return True if changed.
378 If `write_back` is True, write reformatted code back to stdout.
379 `line_length` and `fast` arguments are passed to :func:`format_file_contents`.
381 src = sys.stdin.read()
384 dst = format_file_contents(src, line_length=line_length, fast=fast)
387 except NothingChanged:
391 if write_back == WriteBack.YES:
392 sys.stdout.write(dst)
393 elif write_back == WriteBack.DIFF:
394 src_name = "<stdin> (original)"
395 dst_name = "<stdin> (formatted)"
396 sys.stdout.write(diff(src, dst, src_name, dst_name))
399 def format_file_contents(
400 src_contents: str, line_length: int, fast: bool
402 """Reformat contents a file and return new contents.
404 If `fast` is False, additionally confirm that the reformatted code is
405 valid by calling :func:`assert_equivalent` and :func:`assert_stable` on it.
406 `line_length` is passed to :func:`format_str`.
408 if src_contents.strip() == "":
411 dst_contents = format_str(src_contents, line_length=line_length)
412 if src_contents == dst_contents:
416 assert_equivalent(src_contents, dst_contents)
417 assert_stable(src_contents, dst_contents, line_length=line_length)
421 def format_str(src_contents: str, line_length: int) -> FileContent:
422 """Reformat a string and return new contents.
424 `line_length` determines how many characters per line are allowed.
426 src_node = lib2to3_parse(src_contents)
428 lines = LineGenerator()
429 elt = EmptyLineTracker()
430 py36 = is_python36(src_node)
433 for current_line in lines.visit(src_node):
434 for _ in range(after):
435 dst_contents += str(empty_line)
436 before, after = elt.maybe_empty_lines(current_line)
437 for _ in range(before):
438 dst_contents += str(empty_line)
439 for line in split_line(current_line, line_length=line_length, py36=py36):
440 dst_contents += str(line)
445 pygram.python_grammar_no_print_statement_no_exec_statement,
446 pygram.python_grammar_no_print_statement,
447 pygram.python_grammar_no_exec_statement,
448 pygram.python_grammar,
452 def lib2to3_parse(src_txt: str) -> Node:
453 """Given a string with source, return the lib2to3 Node."""
454 grammar = pygram.python_grammar_no_print_statement
455 if src_txt[-1] != "\n":
456 nl = "\r\n" if "\r\n" in src_txt[:1024] else "\n"
458 for grammar in GRAMMARS:
459 drv = driver.Driver(grammar, pytree.convert)
461 result = drv.parse_string(src_txt, True)
464 except ParseError as pe:
465 lineno, column = pe.context[1]
466 lines = src_txt.splitlines()
468 faulty_line = lines[lineno - 1]
470 faulty_line = "<line number missing in source>"
471 exc = ValueError(f"Cannot parse: {lineno}:{column}: {faulty_line}")
475 if isinstance(result, Leaf):
476 result = Node(syms.file_input, [result])
480 def lib2to3_unparse(node: Node) -> str:
481 """Given a lib2to3 node, return its string representation."""
489 class Visitor(Generic[T]):
490 """Basic lib2to3 visitor that yields things of type `T` on `visit()`."""
492 def visit(self, node: LN) -> Iterator[T]:
493 """Main method to visit `node` and its children.
495 It tries to find a `visit_*()` method for the given `node.type`, like
496 `visit_simple_stmt` for Node objects or `visit_INDENT` for Leaf objects.
497 If no dedicated `visit_*()` method is found, chooses `visit_default()`
500 Then yields objects of type `T` from the selected visitor.
503 name = token.tok_name[node.type]
505 name = type_repr(node.type)
506 yield from getattr(self, f"visit_{name}", self.visit_default)(node)
508 def visit_default(self, node: LN) -> Iterator[T]:
509 """Default `visit_*()` implementation. Recurses to children of `node`."""
510 if isinstance(node, Node):
511 for child in node.children:
512 yield from self.visit(child)
516 class DebugVisitor(Visitor[T]):
519 def visit_default(self, node: LN) -> Iterator[T]:
520 indent = " " * (2 * self.tree_depth)
521 if isinstance(node, Node):
522 _type = type_repr(node.type)
523 out(f"{indent}{_type}", fg="yellow")
525 for child in node.children:
526 yield from self.visit(child)
529 out(f"{indent}/{_type}", fg="yellow", bold=False)
531 _type = token.tok_name.get(node.type, str(node.type))
532 out(f"{indent}{_type}", fg="blue", nl=False)
534 # We don't have to handle prefixes for `Node` objects since
535 # that delegates to the first child anyway.
536 out(f" {node.prefix!r}", fg="green", bold=False, nl=False)
537 out(f" {node.value!r}", fg="blue", bold=False)
540 def show(cls, code: str) -> None:
541 """Pretty-print the lib2to3 AST of a given string of `code`.
543 Convenience method for debugging.
545 v: DebugVisitor[None] = DebugVisitor()
546 list(v.visit(lib2to3_parse(code)))
549 KEYWORDS = set(keyword.kwlist)
550 WHITESPACE = {token.DEDENT, token.INDENT, token.NEWLINE}
551 FLOW_CONTROL = {"return", "raise", "break", "continue"}
562 STANDALONE_COMMENT = 153
563 LOGIC_OPERATORS = {"and", "or"}
587 STARS = {token.STAR, token.DOUBLESTAR}
590 syms.argument, # double star in arglist
591 syms.trailer, # single argument to call
593 syms.varargslist, # lambdas
595 UNPACKING_PARENTS = {
596 syms.atom, # single element of a list or set literal
601 COMPREHENSION_PRIORITY = 20
605 COMPARATOR_PRIORITY = 3
610 class BracketTracker:
611 """Keeps track of brackets on a line."""
614 bracket_match: Dict[Tuple[Depth, NodeType], Leaf] = Factory(dict)
615 delimiters: Dict[LeafID, Priority] = Factory(dict)
616 previous: Optional[Leaf] = None
618 def mark(self, leaf: Leaf) -> None:
619 """Mark `leaf` with bracket-related metadata. Keep track of delimiters.
621 All leaves receive an int `bracket_depth` field that stores how deep
622 within brackets a given leaf is. 0 means there are no enclosing brackets
623 that started on this line.
625 If a leaf is itself a closing bracket, it receives an `opening_bracket`
626 field that it forms a pair with. This is a one-directional link to
627 avoid reference cycles.
629 If a leaf is a delimiter (a token on which Black can split the line if
630 needed) and it's on depth 0, its `id()` is stored in the tracker's
633 if leaf.type == token.COMMENT:
636 if leaf.type in CLOSING_BRACKETS:
638 opening_bracket = self.bracket_match.pop((self.depth, leaf.type))
639 leaf.opening_bracket = opening_bracket
640 leaf.bracket_depth = self.depth
642 delim = is_split_before_delimiter(leaf, self.previous)
643 if delim and self.previous is not None:
644 self.delimiters[id(self.previous)] = delim
646 delim = is_split_after_delimiter(leaf, self.previous)
648 self.delimiters[id(leaf)] = delim
649 if leaf.type in OPENING_BRACKETS:
650 self.bracket_match[self.depth, BRACKET[leaf.type]] = leaf
654 def any_open_brackets(self) -> bool:
655 """Return True if there is an yet unmatched open bracket on the line."""
656 return bool(self.bracket_match)
658 def max_delimiter_priority(self, exclude: Iterable[LeafID] = ()) -> int:
659 """Return the highest priority of a delimiter found on the line.
661 Values are consistent with what `is_delimiter()` returns.
662 Raises ValueError on no delimiters.
664 return max(v for k, v in self.delimiters.items() if k not in exclude)
669 """Holds leaves and comments. Can be printed with `str(line)`."""
672 leaves: List[Leaf] = Factory(list)
673 comments: List[Tuple[Index, Leaf]] = Factory(list)
674 bracket_tracker: BracketTracker = Factory(BracketTracker)
675 inside_brackets: bool = False
676 has_for: bool = False
677 _for_loop_variable: bool = False
679 def append(self, leaf: Leaf, preformatted: bool = False) -> None:
680 """Add a new `leaf` to the end of the line.
682 Unless `preformatted` is True, the `leaf` will receive a new consistent
683 whitespace prefix and metadata applied by :class:`BracketTracker`.
684 Trailing commas are maybe removed, unpacked for loop variables are
685 demoted from being delimiters.
687 Inline comments are put aside.
689 has_value = leaf.type in BRACKETS or bool(leaf.value.strip())
693 if self.leaves and not preformatted:
694 # Note: at this point leaf.prefix should be empty except for
695 # imports, for which we only preserve newlines.
696 leaf.prefix += whitespace(leaf)
697 if self.inside_brackets or not preformatted:
698 self.maybe_decrement_after_for_loop_variable(leaf)
699 self.bracket_tracker.mark(leaf)
700 self.maybe_remove_trailing_comma(leaf)
701 self.maybe_increment_for_loop_variable(leaf)
703 if not self.append_comment(leaf):
704 self.leaves.append(leaf)
706 def append_safe(self, leaf: Leaf, preformatted: bool = False) -> None:
707 """Like :func:`append()` but disallow invalid standalone comment structure.
709 Raises ValueError when any `leaf` is appended after a standalone comment
710 or when a standalone comment is not the first leaf on the line.
712 if self.bracket_tracker.depth == 0:
714 raise ValueError("cannot append to standalone comments")
716 if self.leaves and leaf.type == STANDALONE_COMMENT:
718 "cannot append standalone comments to a populated line"
721 self.append(leaf, preformatted=preformatted)
724 def is_comment(self) -> bool:
725 """Is this line a standalone comment?"""
726 return len(self.leaves) == 1 and self.leaves[0].type == STANDALONE_COMMENT
729 def is_decorator(self) -> bool:
730 """Is this line a decorator?"""
731 return bool(self) and self.leaves[0].type == token.AT
734 def is_import(self) -> bool:
735 """Is this an import line?"""
736 return bool(self) and is_import(self.leaves[0])
739 def is_class(self) -> bool:
740 """Is this line a class definition?"""
743 and self.leaves[0].type == token.NAME
744 and self.leaves[0].value == "class"
748 def is_def(self) -> bool:
749 """Is this a function definition? (Also returns True for async defs.)"""
751 first_leaf = self.leaves[0]
756 second_leaf: Optional[Leaf] = self.leaves[1]
760 (first_leaf.type == token.NAME and first_leaf.value == "def")
762 first_leaf.type == token.ASYNC
763 and second_leaf is not None
764 and second_leaf.type == token.NAME
765 and second_leaf.value == "def"
770 def is_flow_control(self) -> bool:
771 """Is this line a flow control statement?
773 Those are `return`, `raise`, `break`, and `continue`.
777 and self.leaves[0].type == token.NAME
778 and self.leaves[0].value in FLOW_CONTROL
782 def is_yield(self) -> bool:
783 """Is this line a yield statement?"""
786 and self.leaves[0].type == token.NAME
787 and self.leaves[0].value == "yield"
790 def contains_standalone_comments(self, depth_limit: int = sys.maxsize) -> bool:
791 """If so, needs to be split before emitting."""
792 for leaf in self.leaves:
793 if leaf.type == STANDALONE_COMMENT:
794 if leaf.bracket_depth <= depth_limit:
799 def maybe_remove_trailing_comma(self, closing: Leaf) -> bool:
800 """Remove trailing comma if there is one and it's safe."""
803 and self.leaves[-1].type == token.COMMA
804 and closing.type in CLOSING_BRACKETS
808 if closing.type == token.RBRACE:
809 self.remove_trailing_comma()
812 if closing.type == token.RSQB:
813 comma = self.leaves[-1]
814 if comma.parent and comma.parent.type == syms.listmaker:
815 self.remove_trailing_comma()
818 # For parens let's check if it's safe to remove the comma. If the
819 # trailing one is the only one, we might mistakenly change a tuple
820 # into a different type by removing the comma.
821 depth = closing.bracket_depth + 1
823 opening = closing.opening_bracket
824 for _opening_index, leaf in enumerate(self.leaves):
831 for leaf in self.leaves[_opening_index + 1:]:
835 bracket_depth = leaf.bracket_depth
836 if bracket_depth == depth and leaf.type == token.COMMA:
838 if leaf.parent and leaf.parent.type == syms.arglist:
843 self.remove_trailing_comma()
848 def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool:
849 """In a for loop, or comprehension, the variables are often unpacks.
851 To avoid splitting on the comma in this situation, increase the depth of
852 tokens between `for` and `in`.
854 if leaf.type == token.NAME and leaf.value == "for":
856 self.bracket_tracker.depth += 1
857 self._for_loop_variable = True
862 def maybe_decrement_after_for_loop_variable(self, leaf: Leaf) -> bool:
863 """See `maybe_increment_for_loop_variable` above for explanation."""
864 if self._for_loop_variable and leaf.type == token.NAME and leaf.value == "in":
865 self.bracket_tracker.depth -= 1
866 self._for_loop_variable = False
871 def append_comment(self, comment: Leaf) -> bool:
872 """Add an inline or standalone comment to the line."""
874 comment.type == STANDALONE_COMMENT
875 and self.bracket_tracker.any_open_brackets()
880 if comment.type != token.COMMENT:
883 after = len(self.leaves) - 1
885 comment.type = STANDALONE_COMMENT
890 self.comments.append((after, comment))
893 def comments_after(self, leaf: Leaf) -> Iterator[Leaf]:
894 """Generate comments that should appear directly after `leaf`."""
895 for _leaf_index, _leaf in enumerate(self.leaves):
902 for index, comment_after in self.comments:
903 if _leaf_index == index:
906 def remove_trailing_comma(self) -> None:
907 """Remove the trailing comma and moves the comments attached to it."""
908 comma_index = len(self.leaves) - 1
909 for i in range(len(self.comments)):
910 comment_index, comment = self.comments[i]
911 if comment_index == comma_index:
912 self.comments[i] = (comma_index - 1, comment)
915 def __str__(self) -> str:
916 """Render the line."""
920 indent = " " * self.depth
921 leaves = iter(self.leaves)
923 res = f"{first.prefix}{indent}{first.value}"
926 for _, comment in self.comments:
930 def __bool__(self) -> bool:
931 """Return True if the line has leaves or comments."""
932 return bool(self.leaves or self.comments)
935 class UnformattedLines(Line):
936 """Just like :class:`Line` but stores lines which aren't reformatted."""
938 def append(self, leaf: Leaf, preformatted: bool = True) -> None:
939 """Just add a new `leaf` to the end of the lines.
941 The `preformatted` argument is ignored.
943 Keeps track of indentation `depth`, which is useful when the user
944 says `# fmt: on`. Otherwise, doesn't do anything with the `leaf`.
947 list(generate_comments(leaf))
948 except FormatOn as f_on:
949 self.leaves.append(f_on.leaf_from_consumed(leaf))
952 self.leaves.append(leaf)
953 if leaf.type == token.INDENT:
955 elif leaf.type == token.DEDENT:
958 def __str__(self) -> str:
959 """Render unformatted lines from leaves which were added with `append()`.
961 `depth` is not used for indentation in this case.
967 for leaf in self.leaves:
971 def append_comment(self, comment: Leaf) -> bool:
972 """Not implemented in this class. Raises `NotImplementedError`."""
973 raise NotImplementedError("Unformatted lines don't store comments separately.")
975 def maybe_remove_trailing_comma(self, closing: Leaf) -> bool:
976 """Does nothing and returns False."""
979 def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool:
980 """Does nothing and returns False."""
985 class EmptyLineTracker:
986 """Provides a stateful method that returns the number of potential extra
987 empty lines needed before and after the currently processed line.
989 Note: this tracker works on lines that haven't been split yet. It assumes
990 the prefix of the first leaf consists of optional newlines. Those newlines
991 are consumed by `maybe_empty_lines()` and included in the computation.
993 previous_line: Optional[Line] = None
994 previous_after: int = 0
995 previous_defs: List[int] = Factory(list)
997 def maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
998 """Return the number of extra empty lines before and after the `current_line`.
1000 This is for separating `def`, `async def` and `class` with extra empty
1001 lines (two on module-level), as well as providing an extra empty line
1002 after flow control keywords to make them more prominent.
1004 if isinstance(current_line, UnformattedLines):
1007 before, after = self._maybe_empty_lines(current_line)
1008 before -= self.previous_after
1009 self.previous_after = after
1010 self.previous_line = current_line
1011 return before, after
1013 def _maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
1015 if current_line.depth == 0:
1017 if current_line.leaves:
1018 # Consume the first leaf's extra newlines.
1019 first_leaf = current_line.leaves[0]
1020 before = first_leaf.prefix.count("\n")
1021 before = min(before, max_allowed)
1022 first_leaf.prefix = ""
1025 depth = current_line.depth
1026 while self.previous_defs and self.previous_defs[-1] >= depth:
1027 self.previous_defs.pop()
1028 before = 1 if depth else 2
1029 is_decorator = current_line.is_decorator
1030 if is_decorator or current_line.is_def or current_line.is_class:
1031 if not is_decorator:
1032 self.previous_defs.append(depth)
1033 if self.previous_line is None:
1034 # Don't insert empty lines before the first line in the file.
1037 if self.previous_line and self.previous_line.is_decorator:
1038 # Don't insert empty lines between decorators.
1042 if current_line.depth:
1046 if current_line.is_flow_control:
1051 and self.previous_line.is_import
1052 and not current_line.is_import
1053 and depth == self.previous_line.depth
1055 return (before or 1), 0
1059 and self.previous_line.is_yield
1060 and (not current_line.is_yield or depth != self.previous_line.depth)
1062 return (before or 1), 0
1068 class LineGenerator(Visitor[Line]):
1069 """Generates reformatted Line objects. Empty lines are not emitted.
1071 Note: destroys the tree it's visiting by mutating prefixes of its leaves
1072 in ways that will no longer stringify to valid Python code on the tree.
1074 current_line: Line = Factory(Line)
1076 def line(self, indent: int = 0, type: Type[Line] = Line) -> Iterator[Line]:
1079 If the line is empty, only emit if it makes sense.
1080 If the line is too long, split it first and then generate.
1082 If any lines were generated, set up a new current_line.
1084 if not self.current_line:
1085 if self.current_line.__class__ == type:
1086 self.current_line.depth += indent
1088 self.current_line = type(depth=self.current_line.depth + indent)
1089 return # Line is empty, don't emit. Creating a new one unnecessary.
1091 complete_line = self.current_line
1092 self.current_line = type(depth=complete_line.depth + indent)
1095 def visit(self, node: LN) -> Iterator[Line]:
1096 """Main method to visit `node` and its children.
1098 Yields :class:`Line` objects.
1100 if isinstance(self.current_line, UnformattedLines):
1101 # File contained `# fmt: off`
1102 yield from self.visit_unformatted(node)
1105 yield from super().visit(node)
1107 def visit_default(self, node: LN) -> Iterator[Line]:
1108 """Default `visit_*()` implementation. Recurses to children of `node`."""
1109 if isinstance(node, Leaf):
1110 any_open_brackets = self.current_line.bracket_tracker.any_open_brackets()
1112 for comment in generate_comments(node):
1113 if any_open_brackets:
1114 # any comment within brackets is subject to splitting
1115 self.current_line.append(comment)
1116 elif comment.type == token.COMMENT:
1117 # regular trailing comment
1118 self.current_line.append(comment)
1119 yield from self.line()
1122 # regular standalone comment
1123 yield from self.line()
1125 self.current_line.append(comment)
1126 yield from self.line()
1128 except FormatOff as f_off:
1129 f_off.trim_prefix(node)
1130 yield from self.line(type=UnformattedLines)
1131 yield from self.visit(node)
1133 except FormatOn as f_on:
1134 # This only happens here if somebody says "fmt: on" multiple
1136 f_on.trim_prefix(node)
1137 yield from self.visit_default(node)
1140 normalize_prefix(node, inside_brackets=any_open_brackets)
1141 if node.type == token.STRING:
1142 normalize_string_quotes(node)
1143 if node.type not in WHITESPACE:
1144 self.current_line.append(node)
1145 yield from super().visit_default(node)
1147 def visit_INDENT(self, node: Node) -> Iterator[Line]:
1148 """Increase indentation level, maybe yield a line."""
1149 # In blib2to3 INDENT never holds comments.
1150 yield from self.line(+1)
1151 yield from self.visit_default(node)
1153 def visit_DEDENT(self, node: Node) -> Iterator[Line]:
1154 """Decrease indentation level, maybe yield a line."""
1155 # DEDENT has no value. Additionally, in blib2to3 it never holds comments.
1156 yield from self.line(-1)
1159 self, node: Node, keywords: Set[str], parens: Set[str]
1160 ) -> Iterator[Line]:
1161 """Visit a statement.
1163 This implementation is shared for `if`, `while`, `for`, `try`, `except`,
1164 `def`, `with`, `class`, and `assert`.
1166 The relevant Python language `keywords` for a given statement will be
1167 NAME leaves within it. This methods puts those on a separate line.
1169 `parens` holds pairs of nodes where invisible parentheses should be put.
1170 Keys hold nodes after which opening parentheses should be put, values
1171 hold nodes before which closing parentheses should be put.
1173 normalize_invisible_parens(node, parens_after=parens)
1174 for child in node.children:
1175 if child.type == token.NAME and child.value in keywords: # type: ignore
1176 yield from self.line()
1178 yield from self.visit(child)
1180 def visit_simple_stmt(self, node: Node) -> Iterator[Line]:
1181 """Visit a statement without nested statements."""
1182 is_suite_like = node.parent and node.parent.type in STATEMENT
1184 yield from self.line(+1)
1185 yield from self.visit_default(node)
1186 yield from self.line(-1)
1189 yield from self.line()
1190 yield from self.visit_default(node)
1192 def visit_async_stmt(self, node: Node) -> Iterator[Line]:
1193 """Visit `async def`, `async for`, `async with`."""
1194 yield from self.line()
1196 children = iter(node.children)
1197 for child in children:
1198 yield from self.visit(child)
1200 if child.type == token.ASYNC:
1203 internal_stmt = next(children)
1204 for child in internal_stmt.children:
1205 yield from self.visit(child)
1207 def visit_decorators(self, node: Node) -> Iterator[Line]:
1208 """Visit decorators."""
1209 for child in node.children:
1210 yield from self.line()
1211 yield from self.visit(child)
1213 def visit_import_from(self, node: Node) -> Iterator[Line]:
1214 """Visit import_from and maybe put invisible parentheses.
1216 This is separate from `visit_stmt` because import statements don't
1217 support arbitrary atoms and thus handling of parentheses is custom.
1220 for index, child in enumerate(node.children):
1222 if child.type == token.LPAR:
1223 # make parentheses invisible
1224 child.value = "" # type: ignore
1225 node.children[-1].value = "" # type: ignore
1227 # insert invisible parentheses
1228 node.insert_child(index, Leaf(token.LPAR, ""))
1229 node.append_child(Leaf(token.RPAR, ""))
1233 child.type == token.NAME and child.value == "import" # type: ignore
1236 for child in node.children:
1237 yield from self.visit(child)
1239 def visit_SEMI(self, leaf: Leaf) -> Iterator[Line]:
1240 """Remove a semicolon and put the other statement on a separate line."""
1241 yield from self.line()
1243 def visit_ENDMARKER(self, leaf: Leaf) -> Iterator[Line]:
1244 """End of file. Process outstanding comments and end with a newline."""
1245 yield from self.visit_default(leaf)
1246 yield from self.line()
1248 def visit_unformatted(self, node: LN) -> Iterator[Line]:
1249 """Used when file contained a `# fmt: off`."""
1250 if isinstance(node, Node):
1251 for child in node.children:
1252 yield from self.visit(child)
1256 self.current_line.append(node)
1257 except FormatOn as f_on:
1258 f_on.trim_prefix(node)
1259 yield from self.line()
1260 yield from self.visit(node)
1262 if node.type == token.ENDMARKER:
1263 # somebody decided not to put a final `# fmt: on`
1264 yield from self.line()
1266 def __attrs_post_init__(self) -> None:
1267 """You are in a twisty little maze of passages."""
1270 self.visit_assert_stmt = partial(v, keywords={"assert"}, parens={"assert", ","})
1271 self.visit_if_stmt = partial(v, keywords={"if", "else", "elif"}, parens={"if"})
1272 self.visit_while_stmt = partial(v, keywords={"while", "else"}, parens={"while"})
1273 self.visit_for_stmt = partial(v, keywords={"for", "else"}, parens={"for", "in"})
1274 self.visit_try_stmt = partial(
1275 v, keywords={"try", "except", "else", "finally"}, parens=Ø
1277 self.visit_except_clause = partial(v, keywords={"except"}, parens=Ø)
1278 self.visit_with_stmt = partial(v, keywords={"with"}, parens=Ø)
1279 self.visit_funcdef = partial(v, keywords={"def"}, parens=Ø)
1280 self.visit_classdef = partial(v, keywords={"class"}, parens=Ø)
1281 self.visit_async_funcdef = self.visit_async_stmt
1282 self.visit_decorated = self.visit_decorators
1285 IMPLICIT_TUPLE = {syms.testlist, syms.testlist_star_expr, syms.exprlist}
1286 BRACKET = {token.LPAR: token.RPAR, token.LSQB: token.RSQB, token.LBRACE: token.RBRACE}
1287 OPENING_BRACKETS = set(BRACKET.keys())
1288 CLOSING_BRACKETS = set(BRACKET.values())
1289 BRACKETS = OPENING_BRACKETS | CLOSING_BRACKETS
1290 ALWAYS_NO_SPACE = CLOSING_BRACKETS | {token.COMMA, STANDALONE_COMMENT}
1293 def whitespace(leaf: Leaf) -> str: # noqa C901
1294 """Return whitespace prefix if needed for the given `leaf`."""
1301 if t in ALWAYS_NO_SPACE:
1304 if t == token.COMMENT:
1307 assert p is not None, f"INTERNAL ERROR: hand-made leaf without parent: {leaf!r}"
1308 if t == token.COLON and p.type not in {syms.subscript, syms.subscriptlist}:
1311 prev = leaf.prev_sibling
1313 prevp = preceding_leaf(p)
1314 if not prevp or prevp.type in OPENING_BRACKETS:
1317 if t == token.COLON:
1318 return SPACE if prevp.type == token.COMMA else NO
1320 if prevp.type == token.EQUAL:
1322 if prevp.parent.type in {
1323 syms.arglist, syms.argument, syms.parameters, syms.varargslist
1327 elif prevp.parent.type == syms.typedargslist:
1328 # A bit hacky: if the equal sign has whitespace, it means we
1329 # previously found it's a typed argument. So, we're using
1333 elif prevp.type in STARS:
1334 if is_vararg(prevp, within=VARARGS_PARENTS | UNPACKING_PARENTS):
1337 elif prevp.type == token.COLON:
1338 if prevp.parent and prevp.parent.type in {syms.subscript, syms.sliceop}:
1343 and prevp.parent.type == syms.factor
1344 and prevp.type in MATH_OPERATORS
1349 prevp.type == token.RIGHTSHIFT
1351 and prevp.parent.type == syms.shift_expr
1352 and prevp.prev_sibling
1353 and prevp.prev_sibling.type == token.NAME
1354 and prevp.prev_sibling.value == "print" # type: ignore
1356 # Python 2 print chevron
1359 elif prev.type in OPENING_BRACKETS:
1362 if p.type in {syms.parameters, syms.arglist}:
1363 # untyped function signatures or calls
1367 if not prev or prev.type != token.COMMA:
1370 elif p.type == syms.varargslist:
1375 if prev and prev.type != token.COMMA:
1378 elif p.type == syms.typedargslist:
1379 # typed function signatures
1383 if t == token.EQUAL:
1384 if prev.type != syms.tname:
1387 elif prev.type == token.EQUAL:
1388 # A bit hacky: if the equal sign has whitespace, it means we
1389 # previously found it's a typed argument. So, we're using that, too.
1392 elif prev.type != token.COMMA:
1395 elif p.type == syms.tname:
1398 prevp = preceding_leaf(p)
1399 if not prevp or prevp.type != token.COMMA:
1402 elif p.type == syms.trailer:
1403 # attributes and calls
1404 if t == token.LPAR or t == token.RPAR:
1409 prevp = preceding_leaf(p)
1410 if not prevp or prevp.type != token.NUMBER:
1413 elif t == token.LSQB:
1416 elif prev.type != token.COMMA:
1419 elif p.type == syms.argument:
1421 if t == token.EQUAL:
1425 prevp = preceding_leaf(p)
1426 if not prevp or prevp.type == token.LPAR:
1429 elif prev.type in {token.EQUAL} | STARS:
1432 elif p.type == syms.decorator:
1436 elif p.type == syms.dotted_name:
1440 prevp = preceding_leaf(p)
1441 if not prevp or prevp.type == token.AT or prevp.type == token.DOT:
1444 elif p.type == syms.classdef:
1448 if prev and prev.type == token.LPAR:
1451 elif p.type == syms.subscript:
1454 assert p.parent is not None, "subscripts are always parented"
1455 if p.parent.type == syms.subscriptlist:
1463 elif p.type == syms.atom:
1464 if prev and t == token.DOT:
1465 # dots, but not the first one.
1469 p.type == syms.listmaker
1470 or p.type == syms.testlist_gexp
1471 or p.type == syms.subscriptlist
1473 # list interior, including unpacking
1477 elif p.type == syms.dictsetmaker:
1478 # dict and set interior, including unpacking
1482 if prev.type == token.DOUBLESTAR:
1485 elif p.type in {syms.factor, syms.star_expr}:
1488 prevp = preceding_leaf(p)
1489 if not prevp or prevp.type in OPENING_BRACKETS:
1492 prevp_parent = prevp.parent
1493 assert prevp_parent is not None
1495 prevp.type == token.COLON
1496 and prevp_parent.type in {syms.subscript, syms.sliceop}
1500 elif prevp.type == token.EQUAL and prevp_parent.type == syms.argument:
1503 elif t == token.NAME or t == token.NUMBER:
1506 elif p.type == syms.import_from:
1508 if prev and prev.type == token.DOT:
1511 elif t == token.NAME:
1515 if prev and prev.type == token.DOT:
1518 elif p.type == syms.sliceop:
1524 def preceding_leaf(node: Optional[LN]) -> Optional[Leaf]:
1525 """Return the first leaf that precedes `node`, if any."""
1527 res = node.prev_sibling
1529 if isinstance(res, Leaf):
1533 return list(res.leaves())[-1]
1542 def is_split_after_delimiter(leaf: Leaf, previous: Leaf = None) -> int:
1543 """Return the priority of the `leaf` delimiter, given a line break after it.
1545 The delimiter priorities returned here are from those delimiters that would
1546 cause a line break after themselves.
1548 Higher numbers are higher priority.
1550 if leaf.type == token.COMMA:
1551 return COMMA_PRIORITY
1556 def is_split_before_delimiter(leaf: Leaf, previous: Leaf = None) -> int:
1557 """Return the priority of the `leaf` delimiter, given a line before after it.
1559 The delimiter priorities returned here are from those delimiters that would
1560 cause a line break before themselves.
1562 Higher numbers are higher priority.
1564 if is_vararg(leaf, within=VARARGS_PARENTS | UNPACKING_PARENTS):
1565 # * and ** might also be MATH_OPERATORS but in this case they are not.
1566 # Don't treat them as a delimiter.
1570 leaf.type in MATH_OPERATORS
1572 and leaf.parent.type not in {syms.factor, syms.star_expr}
1574 return MATH_PRIORITY
1576 if leaf.type in COMPARATORS:
1577 return COMPARATOR_PRIORITY
1580 leaf.type == token.STRING
1581 and previous is not None
1582 and previous.type == token.STRING
1584 return STRING_PRIORITY
1587 leaf.type == token.NAME
1588 and leaf.value == "for"
1590 and leaf.parent.type in {syms.comp_for, syms.old_comp_for}
1592 return COMPREHENSION_PRIORITY
1595 leaf.type == token.NAME
1596 and leaf.value == "if"
1598 and leaf.parent.type in {syms.comp_if, syms.old_comp_if}
1600 return COMPREHENSION_PRIORITY
1602 if leaf.type == token.NAME and leaf.value in LOGIC_OPERATORS and leaf.parent:
1603 return LOGIC_PRIORITY
1608 def is_delimiter(leaf: Leaf, previous: Leaf = None) -> int:
1609 """Return the priority of the `leaf` delimiter. Return 0 if not delimiter.
1611 Higher numbers are higher priority.
1614 is_split_before_delimiter(leaf, previous),
1615 is_split_after_delimiter(leaf, previous),
1619 def generate_comments(leaf: Leaf) -> Iterator[Leaf]:
1620 """Clean the prefix of the `leaf` and generate comments from it, if any.
1622 Comments in lib2to3 are shoved into the whitespace prefix. This happens
1623 in `pgen2/driver.py:Driver.parse_tokens()`. This was a brilliant implementation
1624 move because it does away with modifying the grammar to include all the
1625 possible places in which comments can be placed.
1627 The sad consequence for us though is that comments don't "belong" anywhere.
1628 This is why this function generates simple parentless Leaf objects for
1629 comments. We simply don't know what the correct parent should be.
1631 No matter though, we can live without this. We really only need to
1632 differentiate between inline and standalone comments. The latter don't
1633 share the line with any code.
1635 Inline comments are emitted as regular token.COMMENT leaves. Standalone
1636 are emitted with a fake STANDALONE_COMMENT token identifier.
1647 for index, line in enumerate(p.split("\n")):
1648 consumed += len(line) + 1 # adding the length of the split '\n'
1649 line = line.lstrip()
1652 if not line.startswith("#"):
1655 if index == 0 and leaf.type != token.ENDMARKER:
1656 comment_type = token.COMMENT # simple trailing comment
1658 comment_type = STANDALONE_COMMENT
1659 comment = make_comment(line)
1660 yield Leaf(comment_type, comment, prefix="\n" * nlines)
1662 if comment in {"# fmt: on", "# yapf: enable"}:
1663 raise FormatOn(consumed)
1665 if comment in {"# fmt: off", "# yapf: disable"}:
1666 if comment_type == STANDALONE_COMMENT:
1667 raise FormatOff(consumed)
1669 prev = preceding_leaf(leaf)
1670 if not prev or prev.type in WHITESPACE: # standalone comment in disguise
1671 raise FormatOff(consumed)
1676 def make_comment(content: str) -> str:
1677 """Return a consistently formatted comment from the given `content` string.
1679 All comments (except for "##", "#!", "#:") should have a single space between
1680 the hash sign and the content.
1682 If `content` didn't start with a hash sign, one is provided.
1684 content = content.rstrip()
1688 if content[0] == "#":
1689 content = content[1:]
1690 if content and content[0] not in " !:#":
1691 content = " " + content
1692 return "#" + content
1696 line: Line, line_length: int, inner: bool = False, py36: bool = False
1697 ) -> Iterator[Line]:
1698 """Split a `line` into potentially many lines.
1700 They should fit in the allotted `line_length` but might not be able to.
1701 `inner` signifies that there were a pair of brackets somewhere around the
1702 current `line`, possibly transitively. This means we can fallback to splitting
1703 by delimiters if the LHS/RHS don't yield any results.
1705 If `py36` is True, splitting may generate syntax that is only compatible
1706 with Python 3.6 and later.
1708 if isinstance(line, UnformattedLines) or line.is_comment:
1712 line_str = str(line).strip("\n")
1714 len(line_str) <= line_length
1715 and "\n" not in line_str # multiline strings
1716 and not line.contains_standalone_comments()
1721 split_funcs: List[SplitFunc]
1723 split_funcs = [left_hand_split]
1724 elif line.inside_brackets:
1725 split_funcs = [delimiter_split, standalone_comment_split, right_hand_split]
1727 split_funcs = [right_hand_split]
1728 for split_func in split_funcs:
1729 # We are accumulating lines in `result` because we might want to abort
1730 # mission and return the original line in the end, or attempt a different
1732 result: List[Line] = []
1734 for l in split_func(line, py36):
1735 if str(l).strip("\n") == line_str:
1736 raise CannotSplit("Split function returned an unchanged result")
1739 split_line(l, line_length=line_length, inner=True, py36=py36)
1741 except CannotSplit as cs:
1752 def left_hand_split(line: Line, py36: bool = False) -> Iterator[Line]:
1753 """Split line into many lines, starting with the first matching bracket pair.
1755 Note: this usually looks weird, only use this for function definitions.
1756 Prefer RHS otherwise.
1758 head = Line(depth=line.depth)
1759 body = Line(depth=line.depth + 1, inside_brackets=True)
1760 tail = Line(depth=line.depth)
1761 tail_leaves: List[Leaf] = []
1762 body_leaves: List[Leaf] = []
1763 head_leaves: List[Leaf] = []
1764 current_leaves = head_leaves
1765 matching_bracket = None
1766 for leaf in line.leaves:
1768 current_leaves is body_leaves
1769 and leaf.type in CLOSING_BRACKETS
1770 and leaf.opening_bracket is matching_bracket
1772 current_leaves = tail_leaves if body_leaves else head_leaves
1773 current_leaves.append(leaf)
1774 if current_leaves is head_leaves:
1775 if leaf.type in OPENING_BRACKETS:
1776 matching_bracket = leaf
1777 current_leaves = body_leaves
1778 # Since body is a new indent level, remove spurious leading whitespace.
1780 normalize_prefix(body_leaves[0], inside_brackets=True)
1781 # Build the new lines.
1782 for result, leaves in (head, head_leaves), (body, body_leaves), (tail, tail_leaves):
1784 result.append(leaf, preformatted=True)
1785 for comment_after in line.comments_after(leaf):
1786 result.append(comment_after, preformatted=True)
1787 bracket_split_succeeded_or_raise(head, body, tail)
1788 for result in (head, body, tail):
1793 def right_hand_split(
1794 line: Line, py36: bool = False, omit: Collection[LeafID] = ()
1795 ) -> Iterator[Line]:
1796 """Split line into many lines, starting with the last matching bracket pair."""
1797 head = Line(depth=line.depth)
1798 body = Line(depth=line.depth + 1, inside_brackets=True)
1799 tail = Line(depth=line.depth)
1800 tail_leaves: List[Leaf] = []
1801 body_leaves: List[Leaf] = []
1802 head_leaves: List[Leaf] = []
1803 current_leaves = tail_leaves
1804 opening_bracket = None
1805 closing_bracket = None
1806 for leaf in reversed(line.leaves):
1807 if current_leaves is body_leaves:
1808 if leaf is opening_bracket:
1809 current_leaves = head_leaves if body_leaves else tail_leaves
1810 current_leaves.append(leaf)
1811 if current_leaves is tail_leaves:
1812 if leaf.type in CLOSING_BRACKETS and id(leaf) not in omit:
1813 opening_bracket = leaf.opening_bracket
1814 closing_bracket = leaf
1815 current_leaves = body_leaves
1816 tail_leaves.reverse()
1817 body_leaves.reverse()
1818 head_leaves.reverse()
1819 # Since body is a new indent level, remove spurious leading whitespace.
1821 normalize_prefix(body_leaves[0], inside_brackets=True)
1822 elif not head_leaves:
1823 # No `head` and no `body` means the split failed. `tail` has all content.
1824 raise CannotSplit("No brackets found")
1826 # Build the new lines.
1827 for result, leaves in (head, head_leaves), (body, body_leaves), (tail, tail_leaves):
1829 result.append(leaf, preformatted=True)
1830 for comment_after in line.comments_after(leaf):
1831 result.append(comment_after, preformatted=True)
1832 bracket_split_succeeded_or_raise(head, body, tail)
1833 assert opening_bracket and closing_bracket
1835 opening_bracket.type == token.LPAR
1836 and not opening_bracket.value
1837 and closing_bracket.type == token.RPAR
1838 and not closing_bracket.value
1840 # These parens were optional. If there aren't any delimiters or standalone
1841 # comments in the body, they were unnecessary and another split without
1842 # them should be attempted.
1844 body.bracket_tracker.delimiters or line.contains_standalone_comments(0)
1846 omit = {id(closing_bracket), *omit}
1847 yield from right_hand_split(line, py36=py36, omit=omit)
1850 ensure_visible(opening_bracket)
1851 ensure_visible(closing_bracket)
1852 for result in (head, body, tail):
1857 def bracket_split_succeeded_or_raise(head: Line, body: Line, tail: Line) -> None:
1858 """Raise :exc:`CannotSplit` if the last left- or right-hand split failed.
1860 Do nothing otherwise.
1862 A left- or right-hand split is based on a pair of brackets. Content before
1863 (and including) the opening bracket is left on one line, content inside the
1864 brackets is put on a separate line, and finally content starting with and
1865 following the closing bracket is put on a separate line.
1867 Those are called `head`, `body`, and `tail`, respectively. If the split
1868 produced the same line (all content in `head`) or ended up with an empty `body`
1869 and the `tail` is just the closing bracket, then it's considered failed.
1871 tail_len = len(str(tail).strip())
1874 raise CannotSplit("Splitting brackets produced the same line")
1878 f"Splitting brackets on an empty body to save "
1879 f"{tail_len} characters is not worth it"
1883 def dont_increase_indentation(split_func: SplitFunc) -> SplitFunc:
1884 """Normalize prefix of the first leaf in every line returned by `split_func`.
1886 This is a decorator over relevant split functions.
1890 def split_wrapper(line: Line, py36: bool = False) -> Iterator[Line]:
1891 for l in split_func(line, py36):
1892 normalize_prefix(l.leaves[0], inside_brackets=True)
1895 return split_wrapper
1898 @dont_increase_indentation
1899 def delimiter_split(line: Line, py36: bool = False) -> Iterator[Line]:
1900 """Split according to delimiters of the highest priority.
1902 If `py36` is True, the split will add trailing commas also in function
1903 signatures that contain `*` and `**`.
1906 last_leaf = line.leaves[-1]
1908 raise CannotSplit("Line empty")
1910 delimiters = line.bracket_tracker.delimiters
1912 delimiter_priority = line.bracket_tracker.max_delimiter_priority(
1913 exclude={id(last_leaf)}
1916 raise CannotSplit("No delimiters found")
1918 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
1919 lowest_depth = sys.maxsize
1920 trailing_comma_safe = True
1922 def append_to_line(leaf: Leaf) -> Iterator[Line]:
1923 """Append `leaf` to current line or to new line if appending impossible."""
1924 nonlocal current_line
1926 current_line.append_safe(leaf, preformatted=True)
1927 except ValueError as ve:
1930 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
1931 current_line.append(leaf)
1933 for leaf in line.leaves:
1934 yield from append_to_line(leaf)
1936 for comment_after in line.comments_after(leaf):
1937 yield from append_to_line(comment_after)
1939 lowest_depth = min(lowest_depth, leaf.bracket_depth)
1941 leaf.bracket_depth == lowest_depth
1942 and is_vararg(leaf, within=VARARGS_PARENTS)
1944 trailing_comma_safe = trailing_comma_safe and py36
1945 leaf_priority = delimiters.get(id(leaf))
1946 if leaf_priority == delimiter_priority:
1949 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
1953 and delimiter_priority == COMMA_PRIORITY
1954 and current_line.leaves[-1].type != token.COMMA
1955 and current_line.leaves[-1].type != STANDALONE_COMMENT
1957 current_line.append(Leaf(token.COMMA, ","))
1961 @dont_increase_indentation
1962 def standalone_comment_split(line: Line, py36: bool = False) -> Iterator[Line]:
1963 """Split standalone comments from the rest of the line."""
1964 if not line.contains_standalone_comments(0):
1965 raise CannotSplit("Line does not have any standalone comments")
1967 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
1969 def append_to_line(leaf: Leaf) -> Iterator[Line]:
1970 """Append `leaf` to current line or to new line if appending impossible."""
1971 nonlocal current_line
1973 current_line.append_safe(leaf, preformatted=True)
1974 except ValueError as ve:
1977 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
1978 current_line.append(leaf)
1980 for leaf in line.leaves:
1981 yield from append_to_line(leaf)
1983 for comment_after in line.comments_after(leaf):
1984 yield from append_to_line(comment_after)
1990 def is_import(leaf: Leaf) -> bool:
1991 """Return True if the given leaf starts an import statement."""
1998 (v == "import" and p and p.type == syms.import_name)
1999 or (v == "from" and p and p.type == syms.import_from)
2004 def normalize_prefix(leaf: Leaf, *, inside_brackets: bool) -> None:
2005 """Leave existing extra newlines if not `inside_brackets`. Remove everything
2008 Note: don't use backslashes for formatting or you'll lose your voting rights.
2010 if not inside_brackets:
2011 spl = leaf.prefix.split("#")
2012 if "\\" not in spl[0]:
2013 nl_count = spl[-1].count("\n")
2016 leaf.prefix = "\n" * nl_count
2022 def normalize_string_quotes(leaf: Leaf) -> None:
2023 """Prefer double quotes but only if it doesn't cause more escaping.
2025 Adds or removes backslashes as appropriate. Doesn't parse and fix
2026 strings nested in f-strings (yet).
2028 Note: Mutates its argument.
2030 value = leaf.value.lstrip("furbFURB")
2031 if value[:3] == '"""':
2034 elif value[:3] == "'''":
2037 elif value[0] == '"':
2043 first_quote_pos = leaf.value.find(orig_quote)
2044 if first_quote_pos == -1:
2045 return # There's an internal error
2047 prefix = leaf.value[:first_quote_pos]
2048 unescaped_new_quote = re.compile(rf"(([^\\]|^)(\\\\)*){new_quote}")
2049 escaped_new_quote = re.compile(rf"([^\\]|^)\\(\\\\)*{new_quote}")
2050 escaped_orig_quote = re.compile(rf"([^\\]|^)\\(\\\\)*{orig_quote}")
2051 body = leaf.value[first_quote_pos + len(orig_quote):-len(orig_quote)]
2052 if "r" in prefix.casefold():
2053 if unescaped_new_quote.search(body):
2054 # There's at least one unescaped new_quote in this raw string
2055 # so converting is impossible
2058 # Do not introduce or remove backslashes in raw strings
2061 # remove unnecessary quotes
2062 new_body = sub_twice(escaped_new_quote, rf"\1\2{new_quote}", body)
2063 if body != new_body:
2064 # Consider the string without unnecessary quotes as the original
2066 leaf.value = f"{prefix}{orig_quote}{body}{orig_quote}"
2067 new_body = sub_twice(escaped_orig_quote, rf"\1\2{orig_quote}", new_body)
2068 new_body = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_body)
2069 if new_quote == '"""' and new_body[-1] == '"':
2071 new_body = new_body[:-1] + '\\"'
2072 orig_escape_count = body.count("\\")
2073 new_escape_count = new_body.count("\\")
2074 if new_escape_count > orig_escape_count:
2075 return # Do not introduce more escaping
2077 if new_escape_count == orig_escape_count and orig_quote == '"':
2078 return # Prefer double quotes
2080 leaf.value = f"{prefix}{new_quote}{new_body}{new_quote}"
2083 def normalize_invisible_parens(node: Node, parens_after: Set[str]) -> None:
2084 """Make existing optional parentheses invisible or create new ones.
2086 Standardizes on visible parentheses for single-element tuples, and keeps
2087 existing visible parentheses for other tuples and generator expressions.
2090 for child in list(node.children):
2092 if child.type == syms.atom:
2094 is_empty_tuple(child)
2095 or is_one_tuple(child)
2096 or max_delimiter_priority_in_atom(child) >= COMMA_PRIORITY
2098 first = child.children[0]
2099 last = child.children[-1]
2100 if first.type == token.LPAR and last.type == token.RPAR:
2101 # make parentheses invisible
2102 first.value = "" # type: ignore
2103 last.value = "" # type: ignore
2104 elif is_one_tuple(child):
2105 # wrap child in visible parentheses
2106 lpar = Leaf(token.LPAR, "(")
2107 rpar = Leaf(token.RPAR, ")")
2108 index = child.remove() or 0
2109 node.insert_child(index, Node(syms.atom, [lpar, child, rpar]))
2111 # wrap child in invisible parentheses
2112 lpar = Leaf(token.LPAR, "")
2113 rpar = Leaf(token.RPAR, "")
2114 index = child.remove() or 0
2115 node.insert_child(index, Node(syms.atom, [lpar, child, rpar]))
2117 check_lpar = isinstance(child, Leaf) and child.value in parens_after
2120 def is_empty_tuple(node: LN) -> bool:
2121 """Return True if `node` holds an empty tuple."""
2123 node.type == syms.atom
2124 and len(node.children) == 2
2125 and node.children[0].type == token.LPAR
2126 and node.children[1].type == token.RPAR
2130 def is_one_tuple(node: LN) -> bool:
2131 """Return True if `node` holds a tuple with one element, with or without parens."""
2132 if node.type == syms.atom:
2133 if len(node.children) != 3:
2136 lpar, gexp, rpar = node.children
2138 lpar.type == token.LPAR
2139 and gexp.type == syms.testlist_gexp
2140 and rpar.type == token.RPAR
2144 return len(gexp.children) == 2 and gexp.children[1].type == token.COMMA
2147 node.type in IMPLICIT_TUPLE
2148 and len(node.children) == 2
2149 and node.children[1].type == token.COMMA
2153 def is_vararg(leaf: Leaf, within: Set[NodeType]) -> bool:
2154 """Return True if `leaf` is a star or double star in a vararg or kwarg.
2156 If `within` includes VARARGS_PARENTS, this applies to function signatures.
2157 If `within` includes COLLECTION_LIBERALS_PARENTS, it applies to right
2158 hand-side extended iterable unpacking (PEP 3132) and additional unpacking
2159 generalizations (PEP 448).
2161 if leaf.type not in STARS or not leaf.parent:
2165 if p.type == syms.star_expr:
2166 # Star expressions are also used as assignment targets in extended
2167 # iterable unpacking (PEP 3132). See what its parent is instead.
2173 return p.type in within
2176 def max_delimiter_priority_in_atom(node: LN) -> int:
2177 if node.type != syms.atom:
2180 first = node.children[0]
2181 last = node.children[-1]
2182 if not (first.type == token.LPAR and last.type == token.RPAR):
2185 bt = BracketTracker()
2186 for c in node.children[1:-1]:
2187 if isinstance(c, Leaf):
2190 for leaf in c.leaves():
2193 return bt.max_delimiter_priority()
2199 def ensure_visible(leaf: Leaf) -> None:
2200 """Make sure parentheses are visible.
2202 They could be invisible as part of some statements (see
2203 :func:`normalize_invible_parens` and :func:`visit_import_from`).
2205 if leaf.type == token.LPAR:
2207 elif leaf.type == token.RPAR:
2211 def is_python36(node: Node) -> bool:
2212 """Return True if the current file is using Python 3.6+ features.
2214 Currently looking for:
2216 - trailing commas after * or ** in function signatures.
2218 for n in node.pre_order():
2219 if n.type == token.STRING:
2220 value_head = n.value[:2] # type: ignore
2221 if value_head in {'f"', 'F"', "f'", "F'", "rf", "fr", "RF", "FR"}:
2225 n.type == syms.typedargslist
2227 and n.children[-1].type == token.COMMA
2229 for ch in n.children:
2230 if ch.type in STARS:
2236 PYTHON_EXTENSIONS = {".py"}
2237 BLACKLISTED_DIRECTORIES = {
2238 "build", "buck-out", "dist", "_build", ".git", ".hg", ".mypy_cache", ".tox", ".venv"
2242 def gen_python_files_in_dir(path: Path) -> Iterator[Path]:
2243 """Generate all files under `path` which aren't under BLACKLISTED_DIRECTORIES
2244 and have one of the PYTHON_EXTENSIONS.
2246 for child in path.iterdir():
2248 if child.name in BLACKLISTED_DIRECTORIES:
2251 yield from gen_python_files_in_dir(child)
2253 elif child.suffix in PYTHON_EXTENSIONS:
2259 """Provides a reformatting counter. Can be rendered with `str(report)`."""
2262 change_count: int = 0
2264 failure_count: int = 0
2266 def done(self, src: Path, changed: Changed) -> None:
2267 """Increment the counter for successful reformatting. Write out a message."""
2268 if changed is Changed.YES:
2269 reformatted = "would reformat" if self.check else "reformatted"
2271 out(f"{reformatted} {src}")
2272 self.change_count += 1
2275 if changed is Changed.NO:
2276 msg = f"{src} already well formatted, good job."
2278 msg = f"{src} wasn't modified on disk since last run."
2279 out(msg, bold=False)
2280 self.same_count += 1
2282 def failed(self, src: Path, message: str) -> None:
2283 """Increment the counter for failed reformatting. Write out a message."""
2284 err(f"error: cannot format {src}: {message}")
2285 self.failure_count += 1
2288 def return_code(self) -> int:
2289 """Return the exit code that the app should use.
2291 This considers the current state of changed files and failures:
2292 - if there were any failures, return 123;
2293 - if any files were changed and --check is being used, return 1;
2294 - otherwise return 0.
2296 # According to http://tldp.org/LDP/abs/html/exitcodes.html starting with
2297 # 126 we have special returncodes reserved by the shell.
2298 if self.failure_count:
2301 elif self.change_count and self.check:
2306 def __str__(self) -> str:
2307 """Render a color report of the current state.
2309 Use `click.unstyle` to remove colors.
2312 reformatted = "would be reformatted"
2313 unchanged = "would be left unchanged"
2314 failed = "would fail to reformat"
2316 reformatted = "reformatted"
2317 unchanged = "left unchanged"
2318 failed = "failed to reformat"
2320 if self.change_count:
2321 s = "s" if self.change_count > 1 else ""
2323 click.style(f"{self.change_count} file{s} {reformatted}", bold=True)
2326 s = "s" if self.same_count > 1 else ""
2327 report.append(f"{self.same_count} file{s} {unchanged}")
2328 if self.failure_count:
2329 s = "s" if self.failure_count > 1 else ""
2331 click.style(f"{self.failure_count} file{s} {failed}", fg="red")
2333 return ", ".join(report) + "."
2336 def assert_equivalent(src: str, dst: str) -> None:
2337 """Raise AssertionError if `src` and `dst` aren't equivalent."""
2342 def _v(node: ast.AST, depth: int = 0) -> Iterator[str]:
2343 """Simple visitor generating strings to compare ASTs by content."""
2344 yield f"{' ' * depth}{node.__class__.__name__}("
2346 for field in sorted(node._fields):
2348 value = getattr(node, field)
2349 except AttributeError:
2352 yield f"{' ' * (depth+1)}{field}="
2354 if isinstance(value, list):
2356 if isinstance(item, ast.AST):
2357 yield from _v(item, depth + 2)
2359 elif isinstance(value, ast.AST):
2360 yield from _v(value, depth + 2)
2363 yield f"{' ' * (depth+2)}{value!r}, # {value.__class__.__name__}"
2365 yield f"{' ' * depth}) # /{node.__class__.__name__}"
2368 src_ast = ast.parse(src)
2369 except Exception as exc:
2370 major, minor = sys.version_info[:2]
2371 raise AssertionError(
2372 f"cannot use --safe with this file; failed to parse source file "
2373 f"with Python {major}.{minor}'s builtin AST. Re-run with --fast "
2374 f"or stop using deprecated Python 2 syntax. AST error message: {exc}"
2378 dst_ast = ast.parse(dst)
2379 except Exception as exc:
2380 log = dump_to_file("".join(traceback.format_tb(exc.__traceback__)), dst)
2381 raise AssertionError(
2382 f"INTERNAL ERROR: Black produced invalid code: {exc}. "
2383 f"Please report a bug on https://github.com/ambv/black/issues. "
2384 f"This invalid output might be helpful: {log}"
2387 src_ast_str = "\n".join(_v(src_ast))
2388 dst_ast_str = "\n".join(_v(dst_ast))
2389 if src_ast_str != dst_ast_str:
2390 log = dump_to_file(diff(src_ast_str, dst_ast_str, "src", "dst"))
2391 raise AssertionError(
2392 f"INTERNAL ERROR: Black produced code that is not equivalent to "
2394 f"Please report a bug on https://github.com/ambv/black/issues. "
2395 f"This diff might be helpful: {log}"
2399 def assert_stable(src: str, dst: str, line_length: int) -> None:
2400 """Raise AssertionError if `dst` reformats differently the second time."""
2401 newdst = format_str(dst, line_length=line_length)
2404 diff(src, dst, "source", "first pass"),
2405 diff(dst, newdst, "first pass", "second pass"),
2407 raise AssertionError(
2408 f"INTERNAL ERROR: Black produced different code on the second pass "
2409 f"of the formatter. "
2410 f"Please report a bug on https://github.com/ambv/black/issues. "
2411 f"This diff might be helpful: {log}"
2415 def dump_to_file(*output: str) -> str:
2416 """Dump `output` to a temporary file. Return path to the file."""
2419 with tempfile.NamedTemporaryFile(
2420 mode="w", prefix="blk_", suffix=".log", delete=False, encoding="utf8"
2422 for lines in output:
2424 if lines and lines[-1] != "\n":
2429 def diff(a: str, b: str, a_name: str, b_name: str) -> str:
2430 """Return a unified diff string between strings `a` and `b`."""
2433 a_lines = [line + "\n" for line in a.split("\n")]
2434 b_lines = [line + "\n" for line in b.split("\n")]
2436 difflib.unified_diff(a_lines, b_lines, fromfile=a_name, tofile=b_name, n=5)
2440 def cancel(tasks: List[asyncio.Task]) -> None:
2441 """asyncio signal handler that cancels all `tasks` and reports to stderr."""
2447 def shutdown(loop: BaseEventLoop) -> None:
2448 """Cancel all pending tasks on `loop`, wait for them, and close the loop."""
2450 # This part is borrowed from asyncio/runners.py in Python 3.7b2.
2451 to_cancel = [task for task in asyncio.Task.all_tasks(loop) if not task.done()]
2455 for task in to_cancel:
2457 loop.run_until_complete(
2458 asyncio.gather(*to_cancel, loop=loop, return_exceptions=True)
2461 # `concurrent.futures.Future` objects cannot be cancelled once they
2462 # are already running. There might be some when the `shutdown()` happened.
2463 # Silence their logger's spew about the event loop being closed.
2464 cf_logger = logging.getLogger("concurrent.futures")
2465 cf_logger.setLevel(logging.CRITICAL)
2469 def sub_twice(regex: Pattern[str], replacement: str, original: str) -> str:
2470 """Replace `regex` with `replacement` twice on `original`.
2472 This is used by string normalization to perform replaces on
2473 overlapping matches.
2475 return regex.sub(replacement, regex.sub(replacement, original))
2478 CACHE_DIR = Path(user_cache_dir("black", version=__version__))
2479 CACHE_FILE = CACHE_DIR / "cache.pickle"
2482 def read_cache() -> Cache:
2483 """Read the cache if it exists and is well formed.
2485 If it is not well formed, the call to write_cache later should resolve the issue.
2487 if not CACHE_FILE.exists():
2490 with CACHE_FILE.open("rb") as fobj:
2492 cache: Cache = pickle.load(fobj)
2493 except pickle.UnpicklingError:
2499 def get_cache_info(path: Path) -> CacheInfo:
2500 """Return the information used to check if a file is already formatted or not."""
2502 return stat.st_mtime, stat.st_size
2506 cache: Cache, sources: Iterable[Path]
2507 ) -> Tuple[List[Path], List[Path]]:
2508 """Split a list of paths into two.
2510 The first list contains paths of files that modified on disk or are not in the
2511 cache. The other list contains paths to non-modified files.
2516 if cache.get(src) != get_cache_info(src):
2523 def write_cache(cache: Cache, sources: List[Path]) -> None:
2524 """Update the cache file."""
2526 if not CACHE_DIR.exists():
2527 CACHE_DIR.mkdir(parents=True)
2528 new_cache = {**cache, **{src.resolve(): get_cache_info(src) for src in sources}}
2529 with CACHE_FILE.open("wb") as fobj:
2530 pickle.dump(new_cache, fobj, protocol=pickle.HIGHEST_PROTOCOL)
2535 if __name__ == "__main__":