All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
3 from asyncio.base_events import BaseEventLoop
4 from concurrent.futures import Executor, ProcessPoolExecutor
6 from functools import partial, wraps
9 from multiprocessing import Manager
11 from pathlib import Path
35 from appdirs import user_cache_dir
36 from attr import dataclass, Factory
40 from blib2to3.pytree import Node, Leaf, type_repr
41 from blib2to3 import pygram, pytree
42 from blib2to3.pgen2 import driver, token
43 from blib2to3.pgen2.parse import ParseError
46 __version__ = "18.4a6"
47 DEFAULT_LINE_LENGTH = 88
50 syms = pygram.python_symbols
58 LN = Union[Leaf, Node]
59 SplitFunc = Callable[["Line", bool], Iterator["Line"]]
62 CacheInfo = Tuple[Timestamp, FileSize]
63 Cache = Dict[Path, CacheInfo]
64 out = partial(click.secho, bold=True, err=True)
65 err = partial(click.secho, fg="red", err=True)
68 class NothingChanged(UserWarning):
69 """Raised by :func:`format_file` when reformatted code is the same as source."""
72 class CannotSplit(Exception):
73 """A readable split that fits the allotted line length is impossible.
75 Raised by :func:`left_hand_split`, :func:`right_hand_split`, and
76 :func:`delimiter_split`.
80 class FormatError(Exception):
81 """Base exception for `# fmt: on` and `# fmt: off` handling.
83 It holds the number of bytes of the prefix consumed before the format
84 control comment appeared.
87 def __init__(self, consumed: int) -> None:
88 super().__init__(consumed)
89 self.consumed = consumed
91 def trim_prefix(self, leaf: Leaf) -> None:
92 leaf.prefix = leaf.prefix[self.consumed :]
94 def leaf_from_consumed(self, leaf: Leaf) -> Leaf:
95 """Returns a new Leaf from the consumed part of the prefix."""
96 unformatted_prefix = leaf.prefix[: self.consumed]
97 return Leaf(token.NEWLINE, unformatted_prefix)
100 class FormatOn(FormatError):
101 """Found a comment like `# fmt: on` in the file."""
104 class FormatOff(FormatError):
105 """Found a comment like `# fmt: off` in the file."""
108 class WriteBack(Enum):
125 default=DEFAULT_LINE_LENGTH,
126 help="How many character per line to allow.",
133 "Don't write the files back, just return the status. Return code 0 "
134 "means nothing would change. Return code 1 means some files would be "
135 "reformatted. Return code 123 means there was an internal error."
141 help="Don't write the files back, just output a diff for each file on stdout.",
146 help="If --fast given, skip temporary sanity checks. [default: --safe]",
153 "Don't emit non-error messages to stderr. Errors are still emitted, "
154 "silence those with 2>/dev/null."
157 @click.version_option(version=__version__)
162 exists=True, file_okay=True, dir_okay=True, readable=True, allow_dash=True
175 """The uncompromising code formatter."""
176 sources: List[Path] = []
180 sources.extend(gen_python_files_in_dir(p))
182 # if a file was explicitly given, we don't care about its extension
185 sources.append(Path("-"))
187 err(f"invalid path: {s}")
189 if check and not diff:
190 write_back = WriteBack.NO
192 write_back = WriteBack.DIFF
194 write_back = WriteBack.YES
195 report = Report(check=check, quiet=quiet)
196 if len(sources) == 0:
197 out("No paths given. Nothing to do 😴")
201 elif len(sources) == 1:
202 reformat_one(sources[0], line_length, fast, write_back, report)
204 loop = asyncio.get_event_loop()
205 executor = ProcessPoolExecutor(max_workers=os.cpu_count())
207 loop.run_until_complete(
209 sources, line_length, fast, write_back, report, loop, executor
215 out("All done! ✨ 🍰 ✨")
216 click.echo(str(report))
217 ctx.exit(report.return_code)
221 src: Path, line_length: int, fast: bool, write_back: WriteBack, report: "Report"
223 """Reformat a single file under `src` without spawning child processes.
225 If `quiet` is True, non-error messages are not output. `line_length`,
226 `write_back`, and `fast` options are passed to :func:`format_file_in_place`.
230 if not src.is_file() and str(src) == "-":
231 if format_stdin_to_stdout(
232 line_length=line_length, fast=fast, write_back=write_back
234 changed = Changed.YES
237 if write_back != WriteBack.DIFF:
238 cache = read_cache(line_length)
240 if src in cache and cache[src] == get_cache_info(src):
241 changed = Changed.CACHED
242 if changed is not Changed.CACHED and format_file_in_place(
243 src, line_length=line_length, fast=fast, write_back=write_back
245 changed = Changed.YES
246 if write_back == WriteBack.YES and changed is not Changed.NO:
247 write_cache(cache, [src], line_length)
248 report.done(src, changed)
249 except Exception as exc:
250 report.failed(src, str(exc))
253 async def schedule_formatting(
257 write_back: WriteBack,
262 """Run formatting of `sources` in parallel using the provided `executor`.
264 (Use ProcessPoolExecutors for actual parallelism.)
266 `line_length`, `write_back`, and `fast` options are passed to
267 :func:`format_file_in_place`.
270 if write_back != WriteBack.DIFF:
271 cache = read_cache(line_length)
272 sources, cached = filter_cached(cache, sources)
274 report.done(src, Changed.CACHED)
279 if write_back == WriteBack.DIFF:
280 # For diff output, we need locks to ensure we don't interleave output
281 # from different processes.
283 lock = manager.Lock()
285 src: loop.run_in_executor(
286 executor, format_file_in_place, src, line_length, fast, write_back, lock
290 _task_values = list(tasks.values())
292 loop.add_signal_handler(signal.SIGINT, cancel, _task_values)
293 loop.add_signal_handler(signal.SIGTERM, cancel, _task_values)
294 except NotImplementedError:
295 # There are no good alternatives for these on Windows
297 await asyncio.wait(_task_values)
298 for src, task in tasks.items():
300 report.failed(src, "timed out, cancelling")
302 cancelled.append(task)
303 elif task.cancelled():
304 cancelled.append(task)
305 elif task.exception():
306 report.failed(src, str(task.exception()))
308 formatted.append(src)
309 report.done(src, Changed.YES if task.result() else Changed.NO)
312 await asyncio.gather(*cancelled, loop=loop, return_exceptions=True)
313 if write_back == WriteBack.YES and formatted:
314 write_cache(cache, formatted, line_length)
317 def format_file_in_place(
321 write_back: WriteBack = WriteBack.NO,
322 lock: Any = None, # multiprocessing.Manager().Lock() is some crazy proxy
324 """Format file under `src` path. Return True if changed.
326 If `write_back` is True, write reformatted code back to stdout.
327 `line_length` and `fast` options are passed to :func:`format_file_contents`.
329 is_pyi = src.suffix == ".pyi"
331 with tokenize.open(src) as src_buffer:
332 src_contents = src_buffer.read()
334 dst_contents = format_file_contents(
335 src_contents, line_length=line_length, fast=fast, is_pyi=is_pyi
337 except NothingChanged:
340 if write_back == write_back.YES:
341 with open(src, "w", encoding=src_buffer.encoding) as f:
342 f.write(dst_contents)
343 elif write_back == write_back.DIFF:
344 src_name = f"{src} (original)"
345 dst_name = f"{src} (formatted)"
346 diff_contents = diff(src_contents, dst_contents, src_name, dst_name)
350 sys.stdout.write(diff_contents)
357 def format_stdin_to_stdout(
358 line_length: int, fast: bool, write_back: WriteBack = WriteBack.NO
360 """Format file on stdin. Return True if changed.
362 If `write_back` is True, write reformatted code back to stdout.
363 `line_length` and `fast` arguments are passed to :func:`format_file_contents`.
365 src = sys.stdin.read()
368 dst = format_file_contents(src, line_length=line_length, fast=fast)
371 except NothingChanged:
375 if write_back == WriteBack.YES:
376 sys.stdout.write(dst)
377 elif write_back == WriteBack.DIFF:
378 src_name = "<stdin> (original)"
379 dst_name = "<stdin> (formatted)"
380 sys.stdout.write(diff(src, dst, src_name, dst_name))
383 def format_file_contents(
384 src_contents: str, *, line_length: int, fast: bool, is_pyi: bool = False
386 """Reformat contents a file and return new contents.
388 If `fast` is False, additionally confirm that the reformatted code is
389 valid by calling :func:`assert_equivalent` and :func:`assert_stable` on it.
390 `line_length` is passed to :func:`format_str`.
392 if src_contents.strip() == "":
395 dst_contents = format_str(src_contents, line_length=line_length, is_pyi=is_pyi)
396 if src_contents == dst_contents:
400 assert_equivalent(src_contents, dst_contents)
402 src_contents, dst_contents, line_length=line_length, is_pyi=is_pyi
408 src_contents: str, line_length: int, *, is_pyi: bool = False
410 """Reformat a string and return new contents.
412 `line_length` determines how many characters per line are allowed.
414 src_node = lib2to3_parse(src_contents)
416 future_imports = get_future_imports(src_node)
417 elt = EmptyLineTracker(is_pyi=is_pyi)
418 py36 = is_python36(src_node)
419 lines = LineGenerator(
420 remove_u_prefix=py36 or "unicode_literals" in future_imports, is_pyi=is_pyi
424 for current_line in lines.visit(src_node):
425 for _ in range(after):
426 dst_contents += str(empty_line)
427 before, after = elt.maybe_empty_lines(current_line)
428 for _ in range(before):
429 dst_contents += str(empty_line)
430 for line in split_line(current_line, line_length=line_length, py36=py36):
431 dst_contents += str(line)
436 pygram.python_grammar_no_print_statement_no_exec_statement,
437 pygram.python_grammar_no_print_statement,
438 pygram.python_grammar,
442 def lib2to3_parse(src_txt: str) -> Node:
443 """Given a string with source, return the lib2to3 Node."""
444 grammar = pygram.python_grammar_no_print_statement
445 if src_txt[-1] != "\n":
446 nl = "\r\n" if "\r\n" in src_txt[:1024] else "\n"
448 for grammar in GRAMMARS:
449 drv = driver.Driver(grammar, pytree.convert)
451 result = drv.parse_string(src_txt, True)
454 except ParseError as pe:
455 lineno, column = pe.context[1]
456 lines = src_txt.splitlines()
458 faulty_line = lines[lineno - 1]
460 faulty_line = "<line number missing in source>"
461 exc = ValueError(f"Cannot parse: {lineno}:{column}: {faulty_line}")
465 if isinstance(result, Leaf):
466 result = Node(syms.file_input, [result])
470 def lib2to3_unparse(node: Node) -> str:
471 """Given a lib2to3 node, return its string representation."""
479 class Visitor(Generic[T]):
480 """Basic lib2to3 visitor that yields things of type `T` on `visit()`."""
482 def visit(self, node: LN) -> Iterator[T]:
483 """Main method to visit `node` and its children.
485 It tries to find a `visit_*()` method for the given `node.type`, like
486 `visit_simple_stmt` for Node objects or `visit_INDENT` for Leaf objects.
487 If no dedicated `visit_*()` method is found, chooses `visit_default()`
490 Then yields objects of type `T` from the selected visitor.
493 name = token.tok_name[node.type]
495 name = type_repr(node.type)
496 yield from getattr(self, f"visit_{name}", self.visit_default)(node)
498 def visit_default(self, node: LN) -> Iterator[T]:
499 """Default `visit_*()` implementation. Recurses to children of `node`."""
500 if isinstance(node, Node):
501 for child in node.children:
502 yield from self.visit(child)
506 class DebugVisitor(Visitor[T]):
509 def visit_default(self, node: LN) -> Iterator[T]:
510 indent = " " * (2 * self.tree_depth)
511 if isinstance(node, Node):
512 _type = type_repr(node.type)
513 out(f"{indent}{_type}", fg="yellow")
515 for child in node.children:
516 yield from self.visit(child)
519 out(f"{indent}/{_type}", fg="yellow", bold=False)
521 _type = token.tok_name.get(node.type, str(node.type))
522 out(f"{indent}{_type}", fg="blue", nl=False)
524 # We don't have to handle prefixes for `Node` objects since
525 # that delegates to the first child anyway.
526 out(f" {node.prefix!r}", fg="green", bold=False, nl=False)
527 out(f" {node.value!r}", fg="blue", bold=False)
530 def show(cls, code: str) -> None:
531 """Pretty-print the lib2to3 AST of a given string of `code`.
533 Convenience method for debugging.
535 v: DebugVisitor[None] = DebugVisitor()
536 list(v.visit(lib2to3_parse(code)))
539 KEYWORDS = set(keyword.kwlist)
540 WHITESPACE = {token.DEDENT, token.INDENT, token.NEWLINE}
541 FLOW_CONTROL = {"return", "raise", "break", "continue"}
552 STANDALONE_COMMENT = 153
553 LOGIC_OPERATORS = {"and", "or"}
578 STARS = {token.STAR, token.DOUBLESTAR}
581 syms.argument, # double star in arglist
582 syms.trailer, # single argument to call
584 syms.varargslist, # lambdas
586 UNPACKING_PARENTS = {
587 syms.atom, # single element of a list or set literal
625 COMPREHENSION_PRIORITY = 20
627 TERNARY_PRIORITY = 16
630 COMPARATOR_PRIORITY = 10
641 token.DOUBLESLASH: 3,
650 class BracketTracker:
651 """Keeps track of brackets on a line."""
654 bracket_match: Dict[Tuple[Depth, NodeType], Leaf] = Factory(dict)
655 delimiters: Dict[LeafID, Priority] = Factory(dict)
656 previous: Optional[Leaf] = None
657 _for_loop_variable: int = 0
658 _lambda_arguments: int = 0
660 def mark(self, leaf: Leaf) -> None:
661 """Mark `leaf` with bracket-related metadata. Keep track of delimiters.
663 All leaves receive an int `bracket_depth` field that stores how deep
664 within brackets a given leaf is. 0 means there are no enclosing brackets
665 that started on this line.
667 If a leaf is itself a closing bracket, it receives an `opening_bracket`
668 field that it forms a pair with. This is a one-directional link to
669 avoid reference cycles.
671 If a leaf is a delimiter (a token on which Black can split the line if
672 needed) and it's on depth 0, its `id()` is stored in the tracker's
675 if leaf.type == token.COMMENT:
678 self.maybe_decrement_after_for_loop_variable(leaf)
679 self.maybe_decrement_after_lambda_arguments(leaf)
680 if leaf.type in CLOSING_BRACKETS:
682 opening_bracket = self.bracket_match.pop((self.depth, leaf.type))
683 leaf.opening_bracket = opening_bracket
684 leaf.bracket_depth = self.depth
686 delim = is_split_before_delimiter(leaf, self.previous)
687 if delim and self.previous is not None:
688 self.delimiters[id(self.previous)] = delim
690 delim = is_split_after_delimiter(leaf, self.previous)
692 self.delimiters[id(leaf)] = delim
693 if leaf.type in OPENING_BRACKETS:
694 self.bracket_match[self.depth, BRACKET[leaf.type]] = leaf
697 self.maybe_increment_lambda_arguments(leaf)
698 self.maybe_increment_for_loop_variable(leaf)
700 def any_open_brackets(self) -> bool:
701 """Return True if there is an yet unmatched open bracket on the line."""
702 return bool(self.bracket_match)
704 def max_delimiter_priority(self, exclude: Iterable[LeafID] = ()) -> int:
705 """Return the highest priority of a delimiter found on the line.
707 Values are consistent with what `is_split_*_delimiter()` return.
708 Raises ValueError on no delimiters.
710 return max(v for k, v in self.delimiters.items() if k not in exclude)
712 def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool:
713 """In a for loop, or comprehension, the variables are often unpacks.
715 To avoid splitting on the comma in this situation, increase the depth of
716 tokens between `for` and `in`.
718 if leaf.type == token.NAME and leaf.value == "for":
720 self._for_loop_variable += 1
725 def maybe_decrement_after_for_loop_variable(self, leaf: Leaf) -> bool:
726 """See `maybe_increment_for_loop_variable` above for explanation."""
727 if self._for_loop_variable and leaf.type == token.NAME and leaf.value == "in":
729 self._for_loop_variable -= 1
734 def maybe_increment_lambda_arguments(self, leaf: Leaf) -> bool:
735 """In a lambda expression, there might be more than one argument.
737 To avoid splitting on the comma in this situation, increase the depth of
738 tokens between `lambda` and `:`.
740 if leaf.type == token.NAME and leaf.value == "lambda":
742 self._lambda_arguments += 1
747 def maybe_decrement_after_lambda_arguments(self, leaf: Leaf) -> bool:
748 """See `maybe_increment_lambda_arguments` above for explanation."""
749 if self._lambda_arguments and leaf.type == token.COLON:
751 self._lambda_arguments -= 1
756 def get_open_lsqb(self) -> Optional[Leaf]:
757 """Return the most recent opening square bracket (if any)."""
758 return self.bracket_match.get((self.depth - 1, token.RSQB))
763 """Holds leaves and comments. Can be printed with `str(line)`."""
766 leaves: List[Leaf] = Factory(list)
767 comments: List[Tuple[Index, Leaf]] = Factory(list)
768 bracket_tracker: BracketTracker = Factory(BracketTracker)
769 inside_brackets: bool = False
771 def append(self, leaf: Leaf, preformatted: bool = False) -> None:
772 """Add a new `leaf` to the end of the line.
774 Unless `preformatted` is True, the `leaf` will receive a new consistent
775 whitespace prefix and metadata applied by :class:`BracketTracker`.
776 Trailing commas are maybe removed, unpacked for loop variables are
777 demoted from being delimiters.
779 Inline comments are put aside.
781 has_value = leaf.type in BRACKETS or bool(leaf.value.strip())
785 if token.COLON == leaf.type and self.is_class_paren_empty:
787 if self.leaves and not preformatted:
788 # Note: at this point leaf.prefix should be empty except for
789 # imports, for which we only preserve newlines.
790 leaf.prefix += whitespace(
791 leaf, complex_subscript=self.is_complex_subscript(leaf)
793 if self.inside_brackets or not preformatted:
794 self.bracket_tracker.mark(leaf)
795 self.maybe_remove_trailing_comma(leaf)
796 if not self.append_comment(leaf):
797 self.leaves.append(leaf)
799 def append_safe(self, leaf: Leaf, preformatted: bool = False) -> None:
800 """Like :func:`append()` but disallow invalid standalone comment structure.
802 Raises ValueError when any `leaf` is appended after a standalone comment
803 or when a standalone comment is not the first leaf on the line.
805 if self.bracket_tracker.depth == 0:
807 raise ValueError("cannot append to standalone comments")
809 if self.leaves and leaf.type == STANDALONE_COMMENT:
811 "cannot append standalone comments to a populated line"
814 self.append(leaf, preformatted=preformatted)
817 def is_comment(self) -> bool:
818 """Is this line a standalone comment?"""
819 return len(self.leaves) == 1 and self.leaves[0].type == STANDALONE_COMMENT
822 def is_decorator(self) -> bool:
823 """Is this line a decorator?"""
824 return bool(self) and self.leaves[0].type == token.AT
827 def is_import(self) -> bool:
828 """Is this an import line?"""
829 return bool(self) and is_import(self.leaves[0])
832 def is_class(self) -> bool:
833 """Is this line a class definition?"""
836 and self.leaves[0].type == token.NAME
837 and self.leaves[0].value == "class"
841 def is_stub_class(self) -> bool:
842 """Is this line a class definition with a body consisting only of "..."?"""
845 and self.leaves[-3:] == [Leaf(token.DOT, ".") for _ in range(3)]
849 def is_def(self) -> bool:
850 """Is this a function definition? (Also returns True for async defs.)"""
852 first_leaf = self.leaves[0]
857 second_leaf: Optional[Leaf] = self.leaves[1]
860 return (first_leaf.type == token.NAME and first_leaf.value == "def") or (
861 first_leaf.type == token.ASYNC
862 and second_leaf is not None
863 and second_leaf.type == token.NAME
864 and second_leaf.value == "def"
868 def is_flow_control(self) -> bool:
869 """Is this line a flow control statement?
871 Those are `return`, `raise`, `break`, and `continue`.
875 and self.leaves[0].type == token.NAME
876 and self.leaves[0].value in FLOW_CONTROL
880 def is_yield(self) -> bool:
881 """Is this line a yield statement?"""
884 and self.leaves[0].type == token.NAME
885 and self.leaves[0].value == "yield"
889 def is_class_paren_empty(self) -> bool:
890 """Is this a class with no base classes but using parentheses?
892 Those are unnecessary and should be removed.
896 and len(self.leaves) == 4
898 and self.leaves[2].type == token.LPAR
899 and self.leaves[2].value == "("
900 and self.leaves[3].type == token.RPAR
901 and self.leaves[3].value == ")"
904 def contains_standalone_comments(self, depth_limit: int = sys.maxsize) -> bool:
905 """If so, needs to be split before emitting."""
906 for leaf in self.leaves:
907 if leaf.type == STANDALONE_COMMENT:
908 if leaf.bracket_depth <= depth_limit:
913 def maybe_remove_trailing_comma(self, closing: Leaf) -> bool:
914 """Remove trailing comma if there is one and it's safe."""
917 and self.leaves[-1].type == token.COMMA
918 and closing.type in CLOSING_BRACKETS
922 if closing.type == token.RBRACE:
923 self.remove_trailing_comma()
926 if closing.type == token.RSQB:
927 comma = self.leaves[-1]
928 if comma.parent and comma.parent.type == syms.listmaker:
929 self.remove_trailing_comma()
932 # For parens let's check if it's safe to remove the comma.
933 # Imports are always safe.
935 self.remove_trailing_comma()
938 # Otheriwsse, if the trailing one is the only one, we might mistakenly
939 # change a tuple into a different type by removing the comma.
940 depth = closing.bracket_depth + 1
942 opening = closing.opening_bracket
943 for _opening_index, leaf in enumerate(self.leaves):
950 for leaf in self.leaves[_opening_index + 1 :]:
954 bracket_depth = leaf.bracket_depth
955 if bracket_depth == depth and leaf.type == token.COMMA:
957 if leaf.parent and leaf.parent.type == syms.arglist:
962 self.remove_trailing_comma()
967 def append_comment(self, comment: Leaf) -> bool:
968 """Add an inline or standalone comment to the line."""
970 comment.type == STANDALONE_COMMENT
971 and self.bracket_tracker.any_open_brackets()
976 if comment.type != token.COMMENT:
979 after = len(self.leaves) - 1
981 comment.type = STANDALONE_COMMENT
986 self.comments.append((after, comment))
989 def comments_after(self, leaf: Leaf, _index: int = -1) -> Iterator[Leaf]:
990 """Generate comments that should appear directly after `leaf`.
992 Provide a non-negative leaf `_index` to speed up the function.
995 for _index, _leaf in enumerate(self.leaves):
1002 for index, comment_after in self.comments:
1006 def remove_trailing_comma(self) -> None:
1007 """Remove the trailing comma and moves the comments attached to it."""
1008 comma_index = len(self.leaves) - 1
1009 for i in range(len(self.comments)):
1010 comment_index, comment = self.comments[i]
1011 if comment_index == comma_index:
1012 self.comments[i] = (comma_index - 1, comment)
1015 def is_complex_subscript(self, leaf: Leaf) -> bool:
1016 """Return True iff `leaf` is part of a slice with non-trivial exprs."""
1018 leaf if leaf.type == token.LSQB else self.bracket_tracker.get_open_lsqb()
1020 if open_lsqb is None:
1023 subscript_start = open_lsqb.next_sibling
1025 isinstance(subscript_start, Node)
1026 and subscript_start.type == syms.subscriptlist
1028 subscript_start = child_towards(subscript_start, leaf)
1029 return subscript_start is not None and any(
1030 n.type in TEST_DESCENDANTS for n in subscript_start.pre_order()
1033 def __str__(self) -> str:
1034 """Render the line."""
1038 indent = " " * self.depth
1039 leaves = iter(self.leaves)
1040 first = next(leaves)
1041 res = f"{first.prefix}{indent}{first.value}"
1044 for _, comment in self.comments:
1048 def __bool__(self) -> bool:
1049 """Return True if the line has leaves or comments."""
1050 return bool(self.leaves or self.comments)
1053 class UnformattedLines(Line):
1054 """Just like :class:`Line` but stores lines which aren't reformatted."""
1056 def append(self, leaf: Leaf, preformatted: bool = True) -> None:
1057 """Just add a new `leaf` to the end of the lines.
1059 The `preformatted` argument is ignored.
1061 Keeps track of indentation `depth`, which is useful when the user
1062 says `# fmt: on`. Otherwise, doesn't do anything with the `leaf`.
1065 list(generate_comments(leaf))
1066 except FormatOn as f_on:
1067 self.leaves.append(f_on.leaf_from_consumed(leaf))
1070 self.leaves.append(leaf)
1071 if leaf.type == token.INDENT:
1073 elif leaf.type == token.DEDENT:
1076 def __str__(self) -> str:
1077 """Render unformatted lines from leaves which were added with `append()`.
1079 `depth` is not used for indentation in this case.
1085 for leaf in self.leaves:
1089 def append_comment(self, comment: Leaf) -> bool:
1090 """Not implemented in this class. Raises `NotImplementedError`."""
1091 raise NotImplementedError("Unformatted lines don't store comments separately.")
1093 def maybe_remove_trailing_comma(self, closing: Leaf) -> bool:
1094 """Does nothing and returns False."""
1097 def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool:
1098 """Does nothing and returns False."""
1103 class EmptyLineTracker:
1104 """Provides a stateful method that returns the number of potential extra
1105 empty lines needed before and after the currently processed line.
1107 Note: this tracker works on lines that haven't been split yet. It assumes
1108 the prefix of the first leaf consists of optional newlines. Those newlines
1109 are consumed by `maybe_empty_lines()` and included in the computation.
1111 is_pyi: bool = False
1112 previous_line: Optional[Line] = None
1113 previous_after: int = 0
1114 previous_defs: List[int] = Factory(list)
1116 def maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
1117 """Return the number of extra empty lines before and after the `current_line`.
1119 This is for separating `def`, `async def` and `class` with extra empty
1120 lines (two on module-level), as well as providing an extra empty line
1121 after flow control keywords to make them more prominent.
1123 if isinstance(current_line, UnformattedLines):
1126 before, after = self._maybe_empty_lines(current_line)
1127 before -= self.previous_after
1128 self.previous_after = after
1129 self.previous_line = current_line
1130 return before, after
1132 def _maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
1134 if current_line.depth == 0:
1135 max_allowed = 1 if self.is_pyi else 2
1136 if current_line.leaves:
1137 # Consume the first leaf's extra newlines.
1138 first_leaf = current_line.leaves[0]
1139 before = first_leaf.prefix.count("\n")
1140 before = min(before, max_allowed)
1141 first_leaf.prefix = ""
1144 depth = current_line.depth
1145 while self.previous_defs and self.previous_defs[-1] >= depth:
1146 self.previous_defs.pop()
1148 before = 0 if depth else 1
1150 before = 1 if depth else 2
1151 is_decorator = current_line.is_decorator
1152 if is_decorator or current_line.is_def or current_line.is_class:
1153 if not is_decorator:
1154 self.previous_defs.append(depth)
1155 if self.previous_line is None:
1156 # Don't insert empty lines before the first line in the file.
1159 if self.previous_line.is_decorator:
1163 self.previous_line.is_comment
1164 and self.previous_line.depth == current_line.depth
1170 if self.previous_line.depth > current_line.depth:
1172 elif current_line.is_class or self.previous_line.is_class:
1173 if current_line.is_stub_class and self.previous_line.is_stub_class:
1181 if current_line.depth and newlines:
1187 and self.previous_line.is_import
1188 and not current_line.is_import
1189 and depth == self.previous_line.depth
1191 return (before or 1), 0
1197 class LineGenerator(Visitor[Line]):
1198 """Generates reformatted Line objects. Empty lines are not emitted.
1200 Note: destroys the tree it's visiting by mutating prefixes of its leaves
1201 in ways that will no longer stringify to valid Python code on the tree.
1203 is_pyi: bool = False
1204 current_line: Line = Factory(Line)
1205 remove_u_prefix: bool = False
1207 def line(self, indent: int = 0, type: Type[Line] = Line) -> Iterator[Line]:
1210 If the line is empty, only emit if it makes sense.
1211 If the line is too long, split it first and then generate.
1213 If any lines were generated, set up a new current_line.
1215 if not self.current_line:
1216 if self.current_line.__class__ == type:
1217 self.current_line.depth += indent
1219 self.current_line = type(depth=self.current_line.depth + indent)
1220 return # Line is empty, don't emit. Creating a new one unnecessary.
1222 complete_line = self.current_line
1223 self.current_line = type(depth=complete_line.depth + indent)
1226 def visit(self, node: LN) -> Iterator[Line]:
1227 """Main method to visit `node` and its children.
1229 Yields :class:`Line` objects.
1231 if isinstance(self.current_line, UnformattedLines):
1232 # File contained `# fmt: off`
1233 yield from self.visit_unformatted(node)
1236 yield from super().visit(node)
1238 def visit_default(self, node: LN) -> Iterator[Line]:
1239 """Default `visit_*()` implementation. Recurses to children of `node`."""
1240 if isinstance(node, Leaf):
1241 any_open_brackets = self.current_line.bracket_tracker.any_open_brackets()
1243 for comment in generate_comments(node):
1244 if any_open_brackets:
1245 # any comment within brackets is subject to splitting
1246 self.current_line.append(comment)
1247 elif comment.type == token.COMMENT:
1248 # regular trailing comment
1249 self.current_line.append(comment)
1250 yield from self.line()
1253 # regular standalone comment
1254 yield from self.line()
1256 self.current_line.append(comment)
1257 yield from self.line()
1259 except FormatOff as f_off:
1260 f_off.trim_prefix(node)
1261 yield from self.line(type=UnformattedLines)
1262 yield from self.visit(node)
1264 except FormatOn as f_on:
1265 # This only happens here if somebody says "fmt: on" multiple
1267 f_on.trim_prefix(node)
1268 yield from self.visit_default(node)
1271 normalize_prefix(node, inside_brackets=any_open_brackets)
1272 if node.type == token.STRING:
1273 normalize_string_prefix(node, remove_u_prefix=self.remove_u_prefix)
1274 normalize_string_quotes(node)
1275 if node.type not in WHITESPACE:
1276 self.current_line.append(node)
1277 yield from super().visit_default(node)
1279 def visit_INDENT(self, node: Node) -> Iterator[Line]:
1280 """Increase indentation level, maybe yield a line."""
1281 # In blib2to3 INDENT never holds comments.
1282 yield from self.line(+1)
1283 yield from self.visit_default(node)
1285 def visit_DEDENT(self, node: Node) -> Iterator[Line]:
1286 """Decrease indentation level, maybe yield a line."""
1287 # The current line might still wait for trailing comments. At DEDENT time
1288 # there won't be any (they would be prefixes on the preceding NEWLINE).
1289 # Emit the line then.
1290 yield from self.line()
1292 # While DEDENT has no value, its prefix may contain standalone comments
1293 # that belong to the current indentation level. Get 'em.
1294 yield from self.visit_default(node)
1296 # Finally, emit the dedent.
1297 yield from self.line(-1)
1300 self, node: Node, keywords: Set[str], parens: Set[str]
1301 ) -> Iterator[Line]:
1302 """Visit a statement.
1304 This implementation is shared for `if`, `while`, `for`, `try`, `except`,
1305 `def`, `with`, `class`, `assert` and assignments.
1307 The relevant Python language `keywords` for a given statement will be
1308 NAME leaves within it. This methods puts those on a separate line.
1310 `parens` holds a set of string leaf values immeditely after which
1311 invisible parens should be put.
1313 normalize_invisible_parens(node, parens_after=parens)
1314 for child in node.children:
1315 if child.type == token.NAME and child.value in keywords: # type: ignore
1316 yield from self.line()
1318 yield from self.visit(child)
1320 def visit_suite(self, node: Node) -> Iterator[Line]:
1321 """Visit a suite."""
1322 if self.is_pyi and is_stub_suite(node):
1323 yield from self.visit(node.children[2])
1325 yield from self.visit_default(node)
1327 def visit_simple_stmt(self, node: Node) -> Iterator[Line]:
1328 """Visit a statement without nested statements."""
1329 is_suite_like = node.parent and node.parent.type in STATEMENT
1331 if self.is_pyi and is_stub_body(node):
1332 yield from self.visit_default(node)
1334 yield from self.line(+1)
1335 yield from self.visit_default(node)
1336 yield from self.line(-1)
1339 if not self.is_pyi or not node.parent or not is_stub_suite(node.parent):
1340 yield from self.line()
1341 yield from self.visit_default(node)
1343 def visit_async_stmt(self, node: Node) -> Iterator[Line]:
1344 """Visit `async def`, `async for`, `async with`."""
1345 yield from self.line()
1347 children = iter(node.children)
1348 for child in children:
1349 yield from self.visit(child)
1351 if child.type == token.ASYNC:
1354 internal_stmt = next(children)
1355 for child in internal_stmt.children:
1356 yield from self.visit(child)
1358 def visit_decorators(self, node: Node) -> Iterator[Line]:
1359 """Visit decorators."""
1360 for child in node.children:
1361 yield from self.line()
1362 yield from self.visit(child)
1364 def visit_import_from(self, node: Node) -> Iterator[Line]:
1365 """Visit import_from and maybe put invisible parentheses.
1367 This is separate from `visit_stmt` because import statements don't
1368 support arbitrary atoms and thus handling of parentheses is custom.
1371 for index, child in enumerate(node.children):
1373 if child.type == token.LPAR:
1374 # make parentheses invisible
1375 child.value = "" # type: ignore
1376 node.children[-1].value = "" # type: ignore
1378 # insert invisible parentheses
1379 node.insert_child(index, Leaf(token.LPAR, ""))
1380 node.append_child(Leaf(token.RPAR, ""))
1384 child.type == token.NAME and child.value == "import" # type: ignore
1387 for child in node.children:
1388 yield from self.visit(child)
1390 def visit_SEMI(self, leaf: Leaf) -> Iterator[Line]:
1391 """Remove a semicolon and put the other statement on a separate line."""
1392 yield from self.line()
1394 def visit_ENDMARKER(self, leaf: Leaf) -> Iterator[Line]:
1395 """End of file. Process outstanding comments and end with a newline."""
1396 yield from self.visit_default(leaf)
1397 yield from self.line()
1399 def visit_unformatted(self, node: LN) -> Iterator[Line]:
1400 """Used when file contained a `# fmt: off`."""
1401 if isinstance(node, Node):
1402 for child in node.children:
1403 yield from self.visit(child)
1407 self.current_line.append(node)
1408 except FormatOn as f_on:
1409 f_on.trim_prefix(node)
1410 yield from self.line()
1411 yield from self.visit(node)
1413 if node.type == token.ENDMARKER:
1414 # somebody decided not to put a final `# fmt: on`
1415 yield from self.line()
1417 def __attrs_post_init__(self) -> None:
1418 """You are in a twisty little maze of passages."""
1421 self.visit_assert_stmt = partial(v, keywords={"assert"}, parens={"assert", ","})
1422 self.visit_if_stmt = partial(
1423 v, keywords={"if", "else", "elif"}, parens={"if", "elif"}
1425 self.visit_while_stmt = partial(v, keywords={"while", "else"}, parens={"while"})
1426 self.visit_for_stmt = partial(v, keywords={"for", "else"}, parens={"for", "in"})
1427 self.visit_try_stmt = partial(
1428 v, keywords={"try", "except", "else", "finally"}, parens=Ø
1430 self.visit_except_clause = partial(v, keywords={"except"}, parens=Ø)
1431 self.visit_with_stmt = partial(v, keywords={"with"}, parens=Ø)
1432 self.visit_funcdef = partial(v, keywords={"def"}, parens=Ø)
1433 self.visit_classdef = partial(v, keywords={"class"}, parens=Ø)
1434 self.visit_expr_stmt = partial(v, keywords=Ø, parens=ASSIGNMENTS)
1435 self.visit_return_stmt = partial(v, keywords={"return"}, parens={"return"})
1436 self.visit_async_funcdef = self.visit_async_stmt
1437 self.visit_decorated = self.visit_decorators
1440 IMPLICIT_TUPLE = {syms.testlist, syms.testlist_star_expr, syms.exprlist}
1441 BRACKET = {token.LPAR: token.RPAR, token.LSQB: token.RSQB, token.LBRACE: token.RBRACE}
1442 OPENING_BRACKETS = set(BRACKET.keys())
1443 CLOSING_BRACKETS = set(BRACKET.values())
1444 BRACKETS = OPENING_BRACKETS | CLOSING_BRACKETS
1445 ALWAYS_NO_SPACE = CLOSING_BRACKETS | {token.COMMA, STANDALONE_COMMENT}
1448 def whitespace(leaf: Leaf, *, complex_subscript: bool) -> str: # noqa C901
1449 """Return whitespace prefix if needed for the given `leaf`.
1451 `complex_subscript` signals whether the given leaf is part of a subscription
1452 which has non-trivial arguments, like arithmetic expressions or function calls.
1460 if t in ALWAYS_NO_SPACE:
1463 if t == token.COMMENT:
1466 assert p is not None, f"INTERNAL ERROR: hand-made leaf without parent: {leaf!r}"
1469 and p.type not in {syms.subscript, syms.subscriptlist, syms.sliceop}
1473 prev = leaf.prev_sibling
1475 prevp = preceding_leaf(p)
1476 if not prevp or prevp.type in OPENING_BRACKETS:
1479 if t == token.COLON:
1480 if prevp.type == token.COLON:
1483 elif prevp.type != token.COMMA and not complex_subscript:
1488 if prevp.type == token.EQUAL:
1490 if prevp.parent.type in {
1491 syms.arglist, syms.argument, syms.parameters, syms.varargslist
1495 elif prevp.parent.type == syms.typedargslist:
1496 # A bit hacky: if the equal sign has whitespace, it means we
1497 # previously found it's a typed argument. So, we're using
1501 elif prevp.type in STARS:
1502 if is_vararg(prevp, within=VARARGS_PARENTS | UNPACKING_PARENTS):
1505 elif prevp.type == token.COLON:
1506 if prevp.parent and prevp.parent.type in {syms.subscript, syms.sliceop}:
1507 return SPACE if complex_subscript else NO
1511 and prevp.parent.type == syms.factor
1512 and prevp.type in MATH_OPERATORS
1517 prevp.type == token.RIGHTSHIFT
1519 and prevp.parent.type == syms.shift_expr
1520 and prevp.prev_sibling
1521 and prevp.prev_sibling.type == token.NAME
1522 and prevp.prev_sibling.value == "print" # type: ignore
1524 # Python 2 print chevron
1527 elif prev.type in OPENING_BRACKETS:
1530 if p.type in {syms.parameters, syms.arglist}:
1531 # untyped function signatures or calls
1532 if not prev or prev.type != token.COMMA:
1535 elif p.type == syms.varargslist:
1537 if prev and prev.type != token.COMMA:
1540 elif p.type == syms.typedargslist:
1541 # typed function signatures
1545 if t == token.EQUAL:
1546 if prev.type != syms.tname:
1549 elif prev.type == token.EQUAL:
1550 # A bit hacky: if the equal sign has whitespace, it means we
1551 # previously found it's a typed argument. So, we're using that, too.
1554 elif prev.type != token.COMMA:
1557 elif p.type == syms.tname:
1560 prevp = preceding_leaf(p)
1561 if not prevp or prevp.type != token.COMMA:
1564 elif p.type == syms.trailer:
1565 # attributes and calls
1566 if t == token.LPAR or t == token.RPAR:
1571 prevp = preceding_leaf(p)
1572 if not prevp or prevp.type != token.NUMBER:
1575 elif t == token.LSQB:
1578 elif prev.type != token.COMMA:
1581 elif p.type == syms.argument:
1583 if t == token.EQUAL:
1587 prevp = preceding_leaf(p)
1588 if not prevp or prevp.type == token.LPAR:
1591 elif prev.type in {token.EQUAL} | STARS:
1594 elif p.type == syms.decorator:
1598 elif p.type == syms.dotted_name:
1602 prevp = preceding_leaf(p)
1603 if not prevp or prevp.type == token.AT or prevp.type == token.DOT:
1606 elif p.type == syms.classdef:
1610 if prev and prev.type == token.LPAR:
1613 elif p.type in {syms.subscript, syms.sliceop}:
1616 assert p.parent is not None, "subscripts are always parented"
1617 if p.parent.type == syms.subscriptlist:
1622 elif not complex_subscript:
1625 elif p.type == syms.atom:
1626 if prev and t == token.DOT:
1627 # dots, but not the first one.
1630 elif p.type == syms.dictsetmaker:
1632 if prev and prev.type == token.DOUBLESTAR:
1635 elif p.type in {syms.factor, syms.star_expr}:
1638 prevp = preceding_leaf(p)
1639 if not prevp or prevp.type in OPENING_BRACKETS:
1642 prevp_parent = prevp.parent
1643 assert prevp_parent is not None
1645 prevp.type == token.COLON
1646 and prevp_parent.type in {syms.subscript, syms.sliceop}
1650 elif prevp.type == token.EQUAL and prevp_parent.type == syms.argument:
1653 elif t == token.NAME or t == token.NUMBER:
1656 elif p.type == syms.import_from:
1658 if prev and prev.type == token.DOT:
1661 elif t == token.NAME:
1665 if prev and prev.type == token.DOT:
1668 elif p.type == syms.sliceop:
1674 def preceding_leaf(node: Optional[LN]) -> Optional[Leaf]:
1675 """Return the first leaf that precedes `node`, if any."""
1677 res = node.prev_sibling
1679 if isinstance(res, Leaf):
1683 return list(res.leaves())[-1]
1692 def child_towards(ancestor: Node, descendant: LN) -> Optional[LN]:
1693 """Return the child of `ancestor` that contains `descendant`."""
1694 node: Optional[LN] = descendant
1695 while node and node.parent != ancestor:
1700 def is_split_after_delimiter(leaf: Leaf, previous: Leaf = None) -> int:
1701 """Return the priority of the `leaf` delimiter, given a line break after it.
1703 The delimiter priorities returned here are from those delimiters that would
1704 cause a line break after themselves.
1706 Higher numbers are higher priority.
1708 if leaf.type == token.COMMA:
1709 return COMMA_PRIORITY
1714 def is_split_before_delimiter(leaf: Leaf, previous: Leaf = None) -> int:
1715 """Return the priority of the `leaf` delimiter, given a line before after it.
1717 The delimiter priorities returned here are from those delimiters that would
1718 cause a line break before themselves.
1720 Higher numbers are higher priority.
1722 if is_vararg(leaf, within=VARARGS_PARENTS | UNPACKING_PARENTS):
1723 # * and ** might also be MATH_OPERATORS but in this case they are not.
1724 # Don't treat them as a delimiter.
1728 leaf.type in MATH_OPERATORS
1730 and leaf.parent.type not in {syms.factor, syms.star_expr}
1732 return MATH_PRIORITIES[leaf.type]
1734 if leaf.type in COMPARATORS:
1735 return COMPARATOR_PRIORITY
1738 leaf.type == token.STRING
1739 and previous is not None
1740 and previous.type == token.STRING
1742 return STRING_PRIORITY
1745 leaf.type == token.NAME
1746 and leaf.value == "for"
1748 and leaf.parent.type in {syms.comp_for, syms.old_comp_for}
1750 return COMPREHENSION_PRIORITY
1753 leaf.type == token.NAME
1754 and leaf.value == "if"
1756 and leaf.parent.type in {syms.comp_if, syms.old_comp_if}
1758 return COMPREHENSION_PRIORITY
1761 leaf.type == token.NAME
1762 and leaf.value in {"if", "else"}
1764 and leaf.parent.type == syms.test
1766 return TERNARY_PRIORITY
1768 if leaf.type == token.NAME and leaf.value in LOGIC_OPERATORS and leaf.parent:
1769 return LOGIC_PRIORITY
1774 def generate_comments(leaf: Leaf) -> Iterator[Leaf]:
1775 """Clean the prefix of the `leaf` and generate comments from it, if any.
1777 Comments in lib2to3 are shoved into the whitespace prefix. This happens
1778 in `pgen2/driver.py:Driver.parse_tokens()`. This was a brilliant implementation
1779 move because it does away with modifying the grammar to include all the
1780 possible places in which comments can be placed.
1782 The sad consequence for us though is that comments don't "belong" anywhere.
1783 This is why this function generates simple parentless Leaf objects for
1784 comments. We simply don't know what the correct parent should be.
1786 No matter though, we can live without this. We really only need to
1787 differentiate between inline and standalone comments. The latter don't
1788 share the line with any code.
1790 Inline comments are emitted as regular token.COMMENT leaves. Standalone
1791 are emitted with a fake STANDALONE_COMMENT token identifier.
1802 for index, line in enumerate(p.split("\n")):
1803 consumed += len(line) + 1 # adding the length of the split '\n'
1804 line = line.lstrip()
1807 if not line.startswith("#"):
1810 if index == 0 and leaf.type != token.ENDMARKER:
1811 comment_type = token.COMMENT # simple trailing comment
1813 comment_type = STANDALONE_COMMENT
1814 comment = make_comment(line)
1815 yield Leaf(comment_type, comment, prefix="\n" * nlines)
1817 if comment in {"# fmt: on", "# yapf: enable"}:
1818 raise FormatOn(consumed)
1820 if comment in {"# fmt: off", "# yapf: disable"}:
1821 if comment_type == STANDALONE_COMMENT:
1822 raise FormatOff(consumed)
1824 prev = preceding_leaf(leaf)
1825 if not prev or prev.type in WHITESPACE: # standalone comment in disguise
1826 raise FormatOff(consumed)
1831 def make_comment(content: str) -> str:
1832 """Return a consistently formatted comment from the given `content` string.
1834 All comments (except for "##", "#!", "#:") should have a single space between
1835 the hash sign and the content.
1837 If `content` didn't start with a hash sign, one is provided.
1839 content = content.rstrip()
1843 if content[0] == "#":
1844 content = content[1:]
1845 if content and content[0] not in " !:#":
1846 content = " " + content
1847 return "#" + content
1851 line: Line, line_length: int, inner: bool = False, py36: bool = False
1852 ) -> Iterator[Line]:
1853 """Split a `line` into potentially many lines.
1855 They should fit in the allotted `line_length` but might not be able to.
1856 `inner` signifies that there were a pair of brackets somewhere around the
1857 current `line`, possibly transitively. This means we can fallback to splitting
1858 by delimiters if the LHS/RHS don't yield any results.
1860 If `py36` is True, splitting may generate syntax that is only compatible
1861 with Python 3.6 and later.
1863 if isinstance(line, UnformattedLines) or line.is_comment:
1867 line_str = str(line).strip("\n")
1868 if is_line_short_enough(line, line_length=line_length, line_str=line_str):
1872 split_funcs: List[SplitFunc]
1874 split_funcs = [left_hand_split]
1875 elif line.is_import:
1876 split_funcs = [explode_split]
1879 def rhs(line: Line, py36: bool = False) -> Iterator[Line]:
1880 for omit in generate_trailers_to_omit(line, line_length):
1881 lines = list(right_hand_split(line, py36, omit=omit))
1882 if is_line_short_enough(lines[0], line_length=line_length):
1886 # All splits failed, best effort split with no omits.
1887 yield from right_hand_split(line, py36)
1889 if line.inside_brackets:
1890 split_funcs = [delimiter_split, standalone_comment_split, rhs]
1893 for split_func in split_funcs:
1894 # We are accumulating lines in `result` because we might want to abort
1895 # mission and return the original line in the end, or attempt a different
1897 result: List[Line] = []
1899 for l in split_func(line, py36):
1900 if str(l).strip("\n") == line_str:
1901 raise CannotSplit("Split function returned an unchanged result")
1904 split_line(l, line_length=line_length, inner=True, py36=py36)
1906 except CannotSplit as cs:
1917 def left_hand_split(line: Line, py36: bool = False) -> Iterator[Line]:
1918 """Split line into many lines, starting with the first matching bracket pair.
1920 Note: this usually looks weird, only use this for function definitions.
1921 Prefer RHS otherwise. This is why this function is not symmetrical with
1922 :func:`right_hand_split` which also handles optional parentheses.
1924 head = Line(depth=line.depth)
1925 body = Line(depth=line.depth + 1, inside_brackets=True)
1926 tail = Line(depth=line.depth)
1927 tail_leaves: List[Leaf] = []
1928 body_leaves: List[Leaf] = []
1929 head_leaves: List[Leaf] = []
1930 current_leaves = head_leaves
1931 matching_bracket = None
1932 for leaf in line.leaves:
1934 current_leaves is body_leaves
1935 and leaf.type in CLOSING_BRACKETS
1936 and leaf.opening_bracket is matching_bracket
1938 current_leaves = tail_leaves if body_leaves else head_leaves
1939 current_leaves.append(leaf)
1940 if current_leaves is head_leaves:
1941 if leaf.type in OPENING_BRACKETS:
1942 matching_bracket = leaf
1943 current_leaves = body_leaves
1944 # Since body is a new indent level, remove spurious leading whitespace.
1946 normalize_prefix(body_leaves[0], inside_brackets=True)
1947 # Build the new lines.
1948 for result, leaves in (head, head_leaves), (body, body_leaves), (tail, tail_leaves):
1950 result.append(leaf, preformatted=True)
1951 for comment_after in line.comments_after(leaf):
1952 result.append(comment_after, preformatted=True)
1953 bracket_split_succeeded_or_raise(head, body, tail)
1954 for result in (head, body, tail):
1959 def right_hand_split(
1960 line: Line, py36: bool = False, omit: Collection[LeafID] = ()
1961 ) -> Iterator[Line]:
1962 """Split line into many lines, starting with the last matching bracket pair.
1964 If the split was by optional parentheses, attempt splitting without them, too.
1965 `omit` is a collection of closing bracket IDs that shouldn't be considered for
1968 Note: running this function modifies `bracket_depth` on the leaves of `line`.
1970 head = Line(depth=line.depth)
1971 body = Line(depth=line.depth + 1, inside_brackets=True)
1972 tail = Line(depth=line.depth)
1973 tail_leaves: List[Leaf] = []
1974 body_leaves: List[Leaf] = []
1975 head_leaves: List[Leaf] = []
1976 current_leaves = tail_leaves
1977 opening_bracket = None
1978 closing_bracket = None
1979 for leaf in reversed(line.leaves):
1980 if current_leaves is body_leaves:
1981 if leaf is opening_bracket:
1982 current_leaves = head_leaves if body_leaves else tail_leaves
1983 current_leaves.append(leaf)
1984 if current_leaves is tail_leaves:
1985 if leaf.type in CLOSING_BRACKETS and id(leaf) not in omit:
1986 opening_bracket = leaf.opening_bracket
1987 closing_bracket = leaf
1988 current_leaves = body_leaves
1989 tail_leaves.reverse()
1990 body_leaves.reverse()
1991 head_leaves.reverse()
1992 # Since body is a new indent level, remove spurious leading whitespace.
1994 normalize_prefix(body_leaves[0], inside_brackets=True)
1996 # No `head` means the split failed. Either `tail` has all content or
1997 # the matching `opening_bracket` wasn't available on `line` anymore.
1998 raise CannotSplit("No brackets found")
2000 # Build the new lines.
2001 for result, leaves in (head, head_leaves), (body, body_leaves), (tail, tail_leaves):
2003 result.append(leaf, preformatted=True)
2004 for comment_after in line.comments_after(leaf):
2005 result.append(comment_after, preformatted=True)
2006 bracket_split_succeeded_or_raise(head, body, tail)
2007 assert opening_bracket and closing_bracket
2009 # the opening bracket is an optional paren
2010 opening_bracket.type == token.LPAR
2011 and not opening_bracket.value
2012 # the closing bracket is an optional paren
2013 and closing_bracket.type == token.RPAR
2014 and not closing_bracket.value
2015 # there are no standalone comments in the body
2016 and not line.contains_standalone_comments(0)
2017 # and it's not an import (optional parens are the only thing we can split
2018 # on in this case; attempting a split without them is a waste of time)
2019 and not line.is_import
2021 omit = {id(closing_bracket), *omit}
2022 delimiter_count = len(body.bracket_tracker.delimiters)
2024 delimiter_count == 0
2025 or delimiter_count == 1
2027 body.leaves[0].type in OPENING_BRACKETS
2028 or body.leaves[-1].type in CLOSING_BRACKETS
2032 yield from right_hand_split(line, py36=py36, omit=omit)
2037 ensure_visible(opening_bracket)
2038 ensure_visible(closing_bracket)
2039 for result in (head, body, tail):
2044 def bracket_split_succeeded_or_raise(head: Line, body: Line, tail: Line) -> None:
2045 """Raise :exc:`CannotSplit` if the last left- or right-hand split failed.
2047 Do nothing otherwise.
2049 A left- or right-hand split is based on a pair of brackets. Content before
2050 (and including) the opening bracket is left on one line, content inside the
2051 brackets is put on a separate line, and finally content starting with and
2052 following the closing bracket is put on a separate line.
2054 Those are called `head`, `body`, and `tail`, respectively. If the split
2055 produced the same line (all content in `head`) or ended up with an empty `body`
2056 and the `tail` is just the closing bracket, then it's considered failed.
2058 tail_len = len(str(tail).strip())
2061 raise CannotSplit("Splitting brackets produced the same line")
2065 f"Splitting brackets on an empty body to save "
2066 f"{tail_len} characters is not worth it"
2070 def dont_increase_indentation(split_func: SplitFunc) -> SplitFunc:
2071 """Normalize prefix of the first leaf in every line returned by `split_func`.
2073 This is a decorator over relevant split functions.
2077 def split_wrapper(line: Line, py36: bool = False) -> Iterator[Line]:
2078 for l in split_func(line, py36):
2079 normalize_prefix(l.leaves[0], inside_brackets=True)
2082 return split_wrapper
2085 @dont_increase_indentation
2086 def delimiter_split(line: Line, py36: bool = False) -> Iterator[Line]:
2087 """Split according to delimiters of the highest priority.
2089 If `py36` is True, the split will add trailing commas also in function
2090 signatures that contain `*` and `**`.
2093 last_leaf = line.leaves[-1]
2095 raise CannotSplit("Line empty")
2097 delimiters = line.bracket_tracker.delimiters
2099 delimiter_priority = line.bracket_tracker.max_delimiter_priority(
2100 exclude={id(last_leaf)}
2103 raise CannotSplit("No delimiters found")
2105 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2106 lowest_depth = sys.maxsize
2107 trailing_comma_safe = True
2109 def append_to_line(leaf: Leaf) -> Iterator[Line]:
2110 """Append `leaf` to current line or to new line if appending impossible."""
2111 nonlocal current_line
2113 current_line.append_safe(leaf, preformatted=True)
2114 except ValueError as ve:
2117 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2118 current_line.append(leaf)
2120 for index, leaf in enumerate(line.leaves):
2121 yield from append_to_line(leaf)
2123 for comment_after in line.comments_after(leaf, index):
2124 yield from append_to_line(comment_after)
2126 lowest_depth = min(lowest_depth, leaf.bracket_depth)
2128 leaf.bracket_depth == lowest_depth
2129 and is_vararg(leaf, within=VARARGS_PARENTS)
2131 trailing_comma_safe = trailing_comma_safe and py36
2132 leaf_priority = delimiters.get(id(leaf))
2133 if leaf_priority == delimiter_priority:
2136 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2140 and delimiter_priority == COMMA_PRIORITY
2141 and current_line.leaves[-1].type != token.COMMA
2142 and current_line.leaves[-1].type != STANDALONE_COMMENT
2144 current_line.append(Leaf(token.COMMA, ","))
2148 @dont_increase_indentation
2149 def standalone_comment_split(line: Line, py36: bool = False) -> Iterator[Line]:
2150 """Split standalone comments from the rest of the line."""
2151 if not line.contains_standalone_comments(0):
2152 raise CannotSplit("Line does not have any standalone comments")
2154 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2156 def append_to_line(leaf: Leaf) -> Iterator[Line]:
2157 """Append `leaf` to current line or to new line if appending impossible."""
2158 nonlocal current_line
2160 current_line.append_safe(leaf, preformatted=True)
2161 except ValueError as ve:
2164 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2165 current_line.append(leaf)
2167 for index, leaf in enumerate(line.leaves):
2168 yield from append_to_line(leaf)
2170 for comment_after in line.comments_after(leaf, index):
2171 yield from append_to_line(comment_after)
2178 line: Line, py36: bool = False, omit: Collection[LeafID] = ()
2179 ) -> Iterator[Line]:
2180 """Split by rightmost bracket and immediately split contents by a delimiter."""
2181 new_lines = list(right_hand_split(line, py36, omit))
2182 if len(new_lines) != 3:
2183 yield from new_lines
2189 yield from delimiter_split(new_lines[1], py36)
2197 def is_import(leaf: Leaf) -> bool:
2198 """Return True if the given leaf starts an import statement."""
2205 (v == "import" and p and p.type == syms.import_name)
2206 or (v == "from" and p and p.type == syms.import_from)
2211 def normalize_prefix(leaf: Leaf, *, inside_brackets: bool) -> None:
2212 """Leave existing extra newlines if not `inside_brackets`. Remove everything
2215 Note: don't use backslashes for formatting or you'll lose your voting rights.
2217 if not inside_brackets:
2218 spl = leaf.prefix.split("#")
2219 if "\\" not in spl[0]:
2220 nl_count = spl[-1].count("\n")
2223 leaf.prefix = "\n" * nl_count
2229 def normalize_string_prefix(leaf: Leaf, remove_u_prefix: bool = False) -> None:
2230 """Make all string prefixes lowercase.
2232 If remove_u_prefix is given, also removes any u prefix from the string.
2234 Note: Mutates its argument.
2236 match = re.match(r"^([furbFURB]*)(.*)$", leaf.value, re.DOTALL)
2237 assert match is not None, f"failed to match string {leaf.value!r}"
2238 orig_prefix = match.group(1)
2239 new_prefix = orig_prefix.lower()
2241 new_prefix = new_prefix.replace("u", "")
2242 leaf.value = f"{new_prefix}{match.group(2)}"
2245 def normalize_string_quotes(leaf: Leaf) -> None:
2246 """Prefer double quotes but only if it doesn't cause more escaping.
2248 Adds or removes backslashes as appropriate. Doesn't parse and fix
2249 strings nested in f-strings (yet).
2251 Note: Mutates its argument.
2253 value = leaf.value.lstrip("furbFURB")
2254 if value[:3] == '"""':
2257 elif value[:3] == "'''":
2260 elif value[0] == '"':
2266 first_quote_pos = leaf.value.find(orig_quote)
2267 if first_quote_pos == -1:
2268 return # There's an internal error
2270 prefix = leaf.value[:first_quote_pos]
2271 unescaped_new_quote = re.compile(rf"(([^\\]|^)(\\\\)*){new_quote}")
2272 escaped_new_quote = re.compile(rf"([^\\]|^)\\(\\\\)*{new_quote}")
2273 escaped_orig_quote = re.compile(rf"([^\\]|^)\\(\\\\)*{orig_quote}")
2274 body = leaf.value[first_quote_pos + len(orig_quote) : -len(orig_quote)]
2275 if "r" in prefix.casefold():
2276 if unescaped_new_quote.search(body):
2277 # There's at least one unescaped new_quote in this raw string
2278 # so converting is impossible
2281 # Do not introduce or remove backslashes in raw strings
2284 # remove unnecessary quotes
2285 new_body = sub_twice(escaped_new_quote, rf"\1\2{new_quote}", body)
2286 if body != new_body:
2287 # Consider the string without unnecessary quotes as the original
2289 leaf.value = f"{prefix}{orig_quote}{body}{orig_quote}"
2290 new_body = sub_twice(escaped_orig_quote, rf"\1\2{orig_quote}", new_body)
2291 new_body = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_body)
2292 if new_quote == '"""' and new_body[-1] == '"':
2294 new_body = new_body[:-1] + '\\"'
2295 orig_escape_count = body.count("\\")
2296 new_escape_count = new_body.count("\\")
2297 if new_escape_count > orig_escape_count:
2298 return # Do not introduce more escaping
2300 if new_escape_count == orig_escape_count and orig_quote == '"':
2301 return # Prefer double quotes
2303 leaf.value = f"{prefix}{new_quote}{new_body}{new_quote}"
2306 def normalize_invisible_parens(node: Node, parens_after: Set[str]) -> None:
2307 """Make existing optional parentheses invisible or create new ones.
2309 `parens_after` is a set of string leaf values immeditely after which parens
2312 Standardizes on visible parentheses for single-element tuples, and keeps
2313 existing visible parentheses for other tuples and generator expressions.
2316 for child in list(node.children):
2318 if child.type == syms.atom:
2319 maybe_make_parens_invisible_in_atom(child)
2320 elif is_one_tuple(child):
2321 # wrap child in visible parentheses
2322 lpar = Leaf(token.LPAR, "(")
2323 rpar = Leaf(token.RPAR, ")")
2324 index = child.remove() or 0
2325 node.insert_child(index, Node(syms.atom, [lpar, child, rpar]))
2327 # wrap child in invisible parentheses
2328 lpar = Leaf(token.LPAR, "")
2329 rpar = Leaf(token.RPAR, "")
2330 index = child.remove() or 0
2331 node.insert_child(index, Node(syms.atom, [lpar, child, rpar]))
2333 check_lpar = isinstance(child, Leaf) and child.value in parens_after
2336 def maybe_make_parens_invisible_in_atom(node: LN) -> bool:
2337 """If it's safe, make the parens in the atom `node` invisible, recusively."""
2339 node.type != syms.atom
2340 or is_empty_tuple(node)
2341 or is_one_tuple(node)
2343 or max_delimiter_priority_in_atom(node) >= COMMA_PRIORITY
2347 first = node.children[0]
2348 last = node.children[-1]
2349 if first.type == token.LPAR and last.type == token.RPAR:
2350 # make parentheses invisible
2351 first.value = "" # type: ignore
2352 last.value = "" # type: ignore
2353 if len(node.children) > 1:
2354 maybe_make_parens_invisible_in_atom(node.children[1])
2360 def is_empty_tuple(node: LN) -> bool:
2361 """Return True if `node` holds an empty tuple."""
2363 node.type == syms.atom
2364 and len(node.children) == 2
2365 and node.children[0].type == token.LPAR
2366 and node.children[1].type == token.RPAR
2370 def is_one_tuple(node: LN) -> bool:
2371 """Return True if `node` holds a tuple with one element, with or without parens."""
2372 if node.type == syms.atom:
2373 if len(node.children) != 3:
2376 lpar, gexp, rpar = node.children
2378 lpar.type == token.LPAR
2379 and gexp.type == syms.testlist_gexp
2380 and rpar.type == token.RPAR
2384 return len(gexp.children) == 2 and gexp.children[1].type == token.COMMA
2387 node.type in IMPLICIT_TUPLE
2388 and len(node.children) == 2
2389 and node.children[1].type == token.COMMA
2393 def is_yield(node: LN) -> bool:
2394 """Return True if `node` holds a `yield` or `yield from` expression."""
2395 if node.type == syms.yield_expr:
2398 if node.type == token.NAME and node.value == "yield": # type: ignore
2401 if node.type != syms.atom:
2404 if len(node.children) != 3:
2407 lpar, expr, rpar = node.children
2408 if lpar.type == token.LPAR and rpar.type == token.RPAR:
2409 return is_yield(expr)
2414 def is_vararg(leaf: Leaf, within: Set[NodeType]) -> bool:
2415 """Return True if `leaf` is a star or double star in a vararg or kwarg.
2417 If `within` includes VARARGS_PARENTS, this applies to function signatures.
2418 If `within` includes UNPACKING_PARENTS, it applies to right hand-side
2419 extended iterable unpacking (PEP 3132) and additional unpacking
2420 generalizations (PEP 448).
2422 if leaf.type not in STARS or not leaf.parent:
2426 if p.type == syms.star_expr:
2427 # Star expressions are also used as assignment targets in extended
2428 # iterable unpacking (PEP 3132). See what its parent is instead.
2434 return p.type in within
2437 def is_stub_suite(node: Node) -> bool:
2438 """Return True if `node` is a suite with a stub body."""
2440 len(node.children) != 4
2441 or node.children[0].type != token.NEWLINE
2442 or node.children[1].type != token.INDENT
2443 or node.children[3].type != token.DEDENT
2447 return is_stub_body(node.children[2])
2450 def is_stub_body(node: LN) -> bool:
2451 """Return True if `node` is a simple statement containing an ellipsis."""
2452 if not isinstance(node, Node) or node.type != syms.simple_stmt:
2455 if len(node.children) != 2:
2458 child = node.children[0]
2460 child.type == syms.atom
2461 and len(child.children) == 3
2462 and all(leaf == Leaf(token.DOT, ".") for leaf in child.children)
2466 def max_delimiter_priority_in_atom(node: LN) -> int:
2467 """Return maximum delimiter priority inside `node`.
2469 This is specific to atoms with contents contained in a pair of parentheses.
2470 If `node` isn't an atom or there are no enclosing parentheses, returns 0.
2472 if node.type != syms.atom:
2475 first = node.children[0]
2476 last = node.children[-1]
2477 if not (first.type == token.LPAR and last.type == token.RPAR):
2480 bt = BracketTracker()
2481 for c in node.children[1:-1]:
2482 if isinstance(c, Leaf):
2485 for leaf in c.leaves():
2488 return bt.max_delimiter_priority()
2494 def ensure_visible(leaf: Leaf) -> None:
2495 """Make sure parentheses are visible.
2497 They could be invisible as part of some statements (see
2498 :func:`normalize_invible_parens` and :func:`visit_import_from`).
2500 if leaf.type == token.LPAR:
2502 elif leaf.type == token.RPAR:
2506 def is_python36(node: Node) -> bool:
2507 """Return True if the current file is using Python 3.6+ features.
2509 Currently looking for:
2511 - trailing commas after * or ** in function signatures and calls.
2513 for n in node.pre_order():
2514 if n.type == token.STRING:
2515 value_head = n.value[:2] # type: ignore
2516 if value_head in {'f"', 'F"', "f'", "F'", "rf", "fr", "RF", "FR"}:
2520 n.type in {syms.typedargslist, syms.arglist}
2522 and n.children[-1].type == token.COMMA
2524 for ch in n.children:
2525 if ch.type in STARS:
2528 if ch.type == syms.argument:
2529 for argch in ch.children:
2530 if argch.type in STARS:
2536 def generate_trailers_to_omit(line: Line, line_length: int) -> Iterator[Set[LeafID]]:
2537 """Generate sets of closing bracket IDs that should be omitted in a RHS.
2539 Brackets can be omitted if the entire trailer up to and including
2540 a preceding closing bracket fits in one line.
2542 Yielded sets are cumulative (contain results of previous yields, too). First
2546 omit: Set[LeafID] = set()
2549 length = 4 * line.depth
2550 opening_bracket = None
2551 closing_bracket = None
2552 optional_brackets: Set[LeafID] = set()
2553 inner_brackets: Set[LeafID] = set()
2554 for index, leaf in enumerate_reversed(line.leaves):
2555 length += len(leaf.prefix) + len(leaf.value)
2556 if length > line_length:
2559 comment: Optional[Leaf]
2560 for comment in line.comments_after(leaf, index):
2561 if "\n" in comment.prefix:
2562 break # Oops, standalone comment!
2564 length += len(comment.value)
2567 if comment is not None:
2568 break # There was a standalone comment, we can't continue.
2570 optional_brackets.discard(id(leaf))
2572 if leaf is opening_bracket:
2573 opening_bracket = None
2574 elif leaf.type in CLOSING_BRACKETS:
2575 inner_brackets.add(id(leaf))
2576 elif leaf.type in CLOSING_BRACKETS:
2578 optional_brackets.add(id(opening_bracket))
2581 if index > 0 and line.leaves[index - 1].type in OPENING_BRACKETS:
2582 # Empty brackets would fail a split so treat them as "inner"
2583 # brackets (e.g. only add them to the `omit` set if another
2584 # pair of brackets was good enough.
2585 inner_brackets.add(id(leaf))
2588 opening_bracket = leaf.opening_bracket
2590 omit.add(id(closing_bracket))
2591 omit.update(inner_brackets)
2592 inner_brackets.clear()
2594 closing_bracket = leaf
2597 def get_future_imports(node: Node) -> Set[str]:
2598 """Return a set of __future__ imports in the file."""
2600 for child in node.children:
2601 if child.type != syms.simple_stmt:
2603 first_child = child.children[0]
2604 if isinstance(first_child, Leaf):
2605 # Continue looking if we see a docstring; otherwise stop.
2607 len(child.children) == 2
2608 and first_child.type == token.STRING
2609 and child.children[1].type == token.NEWLINE
2614 elif first_child.type == syms.import_from:
2615 module_name = first_child.children[1]
2616 if not isinstance(module_name, Leaf) or module_name.value != "__future__":
2618 for import_from_child in first_child.children[3:]:
2619 if isinstance(import_from_child, Leaf):
2620 if import_from_child.type == token.NAME:
2621 imports.add(import_from_child.value)
2623 assert import_from_child.type == syms.import_as_names
2624 for leaf in import_from_child.children:
2625 if isinstance(leaf, Leaf) and leaf.type == token.NAME:
2626 imports.add(leaf.value)
2632 PYTHON_EXTENSIONS = {".py", ".pyi"}
2633 BLACKLISTED_DIRECTORIES = {
2634 "build", "buck-out", "dist", "_build", ".git", ".hg", ".mypy_cache", ".tox", ".venv"
2638 def gen_python_files_in_dir(path: Path) -> Iterator[Path]:
2639 """Generate all files under `path` which aren't under BLACKLISTED_DIRECTORIES
2640 and have one of the PYTHON_EXTENSIONS.
2642 for child in path.iterdir():
2644 if child.name in BLACKLISTED_DIRECTORIES:
2647 yield from gen_python_files_in_dir(child)
2649 elif child.is_file() and child.suffix in PYTHON_EXTENSIONS:
2655 """Provides a reformatting counter. Can be rendered with `str(report)`."""
2658 change_count: int = 0
2660 failure_count: int = 0
2662 def done(self, src: Path, changed: Changed) -> None:
2663 """Increment the counter for successful reformatting. Write out a message."""
2664 if changed is Changed.YES:
2665 reformatted = "would reformat" if self.check else "reformatted"
2667 out(f"{reformatted} {src}")
2668 self.change_count += 1
2671 if changed is Changed.NO:
2672 msg = f"{src} already well formatted, good job."
2674 msg = f"{src} wasn't modified on disk since last run."
2675 out(msg, bold=False)
2676 self.same_count += 1
2678 def failed(self, src: Path, message: str) -> None:
2679 """Increment the counter for failed reformatting. Write out a message."""
2680 err(f"error: cannot format {src}: {message}")
2681 self.failure_count += 1
2684 def return_code(self) -> int:
2685 """Return the exit code that the app should use.
2687 This considers the current state of changed files and failures:
2688 - if there were any failures, return 123;
2689 - if any files were changed and --check is being used, return 1;
2690 - otherwise return 0.
2692 # According to http://tldp.org/LDP/abs/html/exitcodes.html starting with
2693 # 126 we have special returncodes reserved by the shell.
2694 if self.failure_count:
2697 elif self.change_count and self.check:
2702 def __str__(self) -> str:
2703 """Render a color report of the current state.
2705 Use `click.unstyle` to remove colors.
2708 reformatted = "would be reformatted"
2709 unchanged = "would be left unchanged"
2710 failed = "would fail to reformat"
2712 reformatted = "reformatted"
2713 unchanged = "left unchanged"
2714 failed = "failed to reformat"
2716 if self.change_count:
2717 s = "s" if self.change_count > 1 else ""
2719 click.style(f"{self.change_count} file{s} {reformatted}", bold=True)
2722 s = "s" if self.same_count > 1 else ""
2723 report.append(f"{self.same_count} file{s} {unchanged}")
2724 if self.failure_count:
2725 s = "s" if self.failure_count > 1 else ""
2727 click.style(f"{self.failure_count} file{s} {failed}", fg="red")
2729 return ", ".join(report) + "."
2732 def assert_equivalent(src: str, dst: str) -> None:
2733 """Raise AssertionError if `src` and `dst` aren't equivalent."""
2738 def _v(node: ast.AST, depth: int = 0) -> Iterator[str]:
2739 """Simple visitor generating strings to compare ASTs by content."""
2740 yield f"{' ' * depth}{node.__class__.__name__}("
2742 for field in sorted(node._fields):
2744 value = getattr(node, field)
2745 except AttributeError:
2748 yield f"{' ' * (depth+1)}{field}="
2750 if isinstance(value, list):
2752 if isinstance(item, ast.AST):
2753 yield from _v(item, depth + 2)
2755 elif isinstance(value, ast.AST):
2756 yield from _v(value, depth + 2)
2759 yield f"{' ' * (depth+2)}{value!r}, # {value.__class__.__name__}"
2761 yield f"{' ' * depth}) # /{node.__class__.__name__}"
2764 src_ast = ast.parse(src)
2765 except Exception as exc:
2766 major, minor = sys.version_info[:2]
2767 raise AssertionError(
2768 f"cannot use --safe with this file; failed to parse source file "
2769 f"with Python {major}.{minor}'s builtin AST. Re-run with --fast "
2770 f"or stop using deprecated Python 2 syntax. AST error message: {exc}"
2774 dst_ast = ast.parse(dst)
2775 except Exception as exc:
2776 log = dump_to_file("".join(traceback.format_tb(exc.__traceback__)), dst)
2777 raise AssertionError(
2778 f"INTERNAL ERROR: Black produced invalid code: {exc}. "
2779 f"Please report a bug on https://github.com/ambv/black/issues. "
2780 f"This invalid output might be helpful: {log}"
2783 src_ast_str = "\n".join(_v(src_ast))
2784 dst_ast_str = "\n".join(_v(dst_ast))
2785 if src_ast_str != dst_ast_str:
2786 log = dump_to_file(diff(src_ast_str, dst_ast_str, "src", "dst"))
2787 raise AssertionError(
2788 f"INTERNAL ERROR: Black produced code that is not equivalent to "
2790 f"Please report a bug on https://github.com/ambv/black/issues. "
2791 f"This diff might be helpful: {log}"
2795 def assert_stable(src: str, dst: str, line_length: int, is_pyi: bool = False) -> None:
2796 """Raise AssertionError if `dst` reformats differently the second time."""
2797 newdst = format_str(dst, line_length=line_length, is_pyi=is_pyi)
2800 diff(src, dst, "source", "first pass"),
2801 diff(dst, newdst, "first pass", "second pass"),
2803 raise AssertionError(
2804 f"INTERNAL ERROR: Black produced different code on the second pass "
2805 f"of the formatter. "
2806 f"Please report a bug on https://github.com/ambv/black/issues. "
2807 f"This diff might be helpful: {log}"
2811 def dump_to_file(*output: str) -> str:
2812 """Dump `output` to a temporary file. Return path to the file."""
2815 with tempfile.NamedTemporaryFile(
2816 mode="w", prefix="blk_", suffix=".log", delete=False, encoding="utf8"
2818 for lines in output:
2820 if lines and lines[-1] != "\n":
2825 def diff(a: str, b: str, a_name: str, b_name: str) -> str:
2826 """Return a unified diff string between strings `a` and `b`."""
2829 a_lines = [line + "\n" for line in a.split("\n")]
2830 b_lines = [line + "\n" for line in b.split("\n")]
2832 difflib.unified_diff(a_lines, b_lines, fromfile=a_name, tofile=b_name, n=5)
2836 def cancel(tasks: List[asyncio.Task]) -> None:
2837 """asyncio signal handler that cancels all `tasks` and reports to stderr."""
2843 def shutdown(loop: BaseEventLoop) -> None:
2844 """Cancel all pending tasks on `loop`, wait for them, and close the loop."""
2846 # This part is borrowed from asyncio/runners.py in Python 3.7b2.
2847 to_cancel = [task for task in asyncio.Task.all_tasks(loop) if not task.done()]
2851 for task in to_cancel:
2853 loop.run_until_complete(
2854 asyncio.gather(*to_cancel, loop=loop, return_exceptions=True)
2857 # `concurrent.futures.Future` objects cannot be cancelled once they
2858 # are already running. There might be some when the `shutdown()` happened.
2859 # Silence their logger's spew about the event loop being closed.
2860 cf_logger = logging.getLogger("concurrent.futures")
2861 cf_logger.setLevel(logging.CRITICAL)
2865 def sub_twice(regex: Pattern[str], replacement: str, original: str) -> str:
2866 """Replace `regex` with `replacement` twice on `original`.
2868 This is used by string normalization to perform replaces on
2869 overlapping matches.
2871 return regex.sub(replacement, regex.sub(replacement, original))
2874 def enumerate_reversed(sequence: Sequence[T]) -> Iterator[Tuple[Index, T]]:
2875 """Like `reversed(enumerate(sequence))` if that were possible."""
2876 index = len(sequence) - 1
2877 for element in reversed(sequence):
2878 yield (index, element)
2882 def is_line_short_enough(line: Line, *, line_length: int, line_str: str = "") -> bool:
2883 """Return True if `line` is no longer than `line_length`.
2885 Uses the provided `line_str` rendering, if any, otherwise computes a new one.
2888 line_str = str(line).strip("\n")
2890 len(line_str) <= line_length
2891 and "\n" not in line_str # multiline strings
2892 and not line.contains_standalone_comments()
2896 CACHE_DIR = Path(user_cache_dir("black", version=__version__))
2899 def get_cache_file(line_length: int) -> Path:
2900 return CACHE_DIR / f"cache.{line_length}.pickle"
2903 def read_cache(line_length: int) -> Cache:
2904 """Read the cache if it exists and is well formed.
2906 If it is not well formed, the call to write_cache later should resolve the issue.
2908 cache_file = get_cache_file(line_length)
2909 if not cache_file.exists():
2912 with cache_file.open("rb") as fobj:
2914 cache: Cache = pickle.load(fobj)
2915 except pickle.UnpicklingError:
2921 def get_cache_info(path: Path) -> CacheInfo:
2922 """Return the information used to check if a file is already formatted or not."""
2924 return stat.st_mtime, stat.st_size
2928 cache: Cache, sources: Iterable[Path]
2929 ) -> Tuple[List[Path], List[Path]]:
2930 """Split a list of paths into two.
2932 The first list contains paths of files that modified on disk or are not in the
2933 cache. The other list contains paths to non-modified files.
2938 if cache.get(src) != get_cache_info(src):
2945 def write_cache(cache: Cache, sources: List[Path], line_length: int) -> None:
2946 """Update the cache file."""
2947 cache_file = get_cache_file(line_length)
2949 if not CACHE_DIR.exists():
2950 CACHE_DIR.mkdir(parents=True)
2951 new_cache = {**cache, **{src.resolve(): get_cache_info(src) for src in sources}}
2952 with cache_file.open("wb") as fobj:
2953 pickle.dump(new_cache, fobj, protocol=pickle.HIGHEST_PROTOCOL)
2958 if __name__ == "__main__":