All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
3 from asyncio.base_events import BaseEventLoop
4 from concurrent.futures import Executor, ProcessPoolExecutor
6 from functools import partial, wraps
9 from multiprocessing import Manager
11 from pathlib import Path
35 from appdirs import user_cache_dir
36 from attr import dataclass, Factory
40 from blib2to3.pytree import Node, Leaf, type_repr
41 from blib2to3 import pygram, pytree
42 from blib2to3.pgen2 import driver, token
43 from blib2to3.pgen2.parse import ParseError
46 __version__ = "18.4a6"
47 DEFAULT_LINE_LENGTH = 88
50 syms = pygram.python_symbols
58 LN = Union[Leaf, Node]
59 SplitFunc = Callable[["Line", bool], Iterator["Line"]]
62 CacheInfo = Tuple[Timestamp, FileSize]
63 Cache = Dict[Path, CacheInfo]
64 out = partial(click.secho, bold=True, err=True)
65 err = partial(click.secho, fg="red", err=True)
68 class NothingChanged(UserWarning):
69 """Raised by :func:`format_file` when reformatted code is the same as source."""
72 class CannotSplit(Exception):
73 """A readable split that fits the allotted line length is impossible.
75 Raised by :func:`left_hand_split`, :func:`right_hand_split`, and
76 :func:`delimiter_split`.
80 class FormatError(Exception):
81 """Base exception for `# fmt: on` and `# fmt: off` handling.
83 It holds the number of bytes of the prefix consumed before the format
84 control comment appeared.
87 def __init__(self, consumed: int) -> None:
88 super().__init__(consumed)
89 self.consumed = consumed
91 def trim_prefix(self, leaf: Leaf) -> None:
92 leaf.prefix = leaf.prefix[self.consumed :]
94 def leaf_from_consumed(self, leaf: Leaf) -> Leaf:
95 """Returns a new Leaf from the consumed part of the prefix."""
96 unformatted_prefix = leaf.prefix[: self.consumed]
97 return Leaf(token.NEWLINE, unformatted_prefix)
100 class FormatOn(FormatError):
101 """Found a comment like `# fmt: on` in the file."""
104 class FormatOff(FormatError):
105 """Found a comment like `# fmt: off` in the file."""
108 class WriteBack(Enum):
125 default=DEFAULT_LINE_LENGTH,
126 help="How many character per line to allow.",
133 "Don't write the files back, just return the status. Return code 0 "
134 "means nothing would change. Return code 1 means some files would be "
135 "reformatted. Return code 123 means there was an internal error."
141 help="Don't write the files back, just output a diff for each file on stdout.",
146 help="If --fast given, skip temporary sanity checks. [default: --safe]",
153 "Don't emit non-error messages to stderr. Errors are still emitted, "
154 "silence those with 2>/dev/null."
157 @click.version_option(version=__version__)
162 exists=True, file_okay=True, dir_okay=True, readable=True, allow_dash=True
175 """The uncompromising code formatter."""
176 sources: List[Path] = []
180 sources.extend(gen_python_files_in_dir(p))
182 # if a file was explicitly given, we don't care about its extension
185 sources.append(Path("-"))
187 err(f"invalid path: {s}")
189 if check and not diff:
190 write_back = WriteBack.NO
192 write_back = WriteBack.DIFF
194 write_back = WriteBack.YES
195 report = Report(check=check, quiet=quiet)
196 if len(sources) == 0:
197 out("No paths given. Nothing to do 😴")
201 elif len(sources) == 1:
202 reformat_one(sources[0], line_length, fast, write_back, report)
204 loop = asyncio.get_event_loop()
205 executor = ProcessPoolExecutor(max_workers=os.cpu_count())
207 loop.run_until_complete(
209 sources, line_length, fast, write_back, report, loop, executor
215 out("All done! ✨ 🍰 ✨")
216 click.echo(str(report))
217 ctx.exit(report.return_code)
221 src: Path, line_length: int, fast: bool, write_back: WriteBack, report: "Report"
223 """Reformat a single file under `src` without spawning child processes.
225 If `quiet` is True, non-error messages are not output. `line_length`,
226 `write_back`, and `fast` options are passed to :func:`format_file_in_place`.
230 if not src.is_file() and str(src) == "-":
231 if format_stdin_to_stdout(
232 line_length=line_length, fast=fast, write_back=write_back
234 changed = Changed.YES
237 if write_back != WriteBack.DIFF:
238 cache = read_cache(line_length)
240 if src in cache and cache[src] == get_cache_info(src):
241 changed = Changed.CACHED
242 if changed is not Changed.CACHED and format_file_in_place(
243 src, line_length=line_length, fast=fast, write_back=write_back
245 changed = Changed.YES
246 if write_back == WriteBack.YES and changed is not Changed.NO:
247 write_cache(cache, [src], line_length)
248 report.done(src, changed)
249 except Exception as exc:
250 report.failed(src, str(exc))
253 async def schedule_formatting(
257 write_back: WriteBack,
262 """Run formatting of `sources` in parallel using the provided `executor`.
264 (Use ProcessPoolExecutors for actual parallelism.)
266 `line_length`, `write_back`, and `fast` options are passed to
267 :func:`format_file_in_place`.
270 if write_back != WriteBack.DIFF:
271 cache = read_cache(line_length)
272 sources, cached = filter_cached(cache, sources)
274 report.done(src, Changed.CACHED)
279 if write_back == WriteBack.DIFF:
280 # For diff output, we need locks to ensure we don't interleave output
281 # from different processes.
283 lock = manager.Lock()
285 loop.run_in_executor(
286 executor, format_file_in_place, src, line_length, fast, write_back, lock
288 for src in sorted(sources)
290 pending: Iterable[asyncio.Task] = tasks.keys()
292 loop.add_signal_handler(signal.SIGINT, cancel, pending)
293 loop.add_signal_handler(signal.SIGTERM, cancel, pending)
294 except NotImplementedError:
295 # There are no good alternatives for these on Windows
298 done, _ = await asyncio.wait(pending, return_when=asyncio.FIRST_COMPLETED)
300 src = tasks.pop(task)
302 cancelled.append(task)
303 elif task.exception():
304 report.failed(src, str(task.exception()))
306 formatted.append(src)
307 report.done(src, Changed.YES if task.result() else Changed.NO)
309 await asyncio.gather(*cancelled, loop=loop, return_exceptions=True)
310 if write_back == WriteBack.YES and formatted:
311 write_cache(cache, formatted, line_length)
314 def format_file_in_place(
318 write_back: WriteBack = WriteBack.NO,
319 lock: Any = None, # multiprocessing.Manager().Lock() is some crazy proxy
321 """Format file under `src` path. Return True if changed.
323 If `write_back` is True, write reformatted code back to stdout.
324 `line_length` and `fast` options are passed to :func:`format_file_contents`.
326 is_pyi = src.suffix == ".pyi"
328 with tokenize.open(src) as src_buffer:
329 src_contents = src_buffer.read()
331 dst_contents = format_file_contents(
332 src_contents, line_length=line_length, fast=fast, is_pyi=is_pyi
334 except NothingChanged:
337 if write_back == write_back.YES:
338 with open(src, "w", encoding=src_buffer.encoding) as f:
339 f.write(dst_contents)
340 elif write_back == write_back.DIFF:
341 src_name = f"{src} (original)"
342 dst_name = f"{src} (formatted)"
343 diff_contents = diff(src_contents, dst_contents, src_name, dst_name)
347 sys.stdout.write(diff_contents)
354 def format_stdin_to_stdout(
355 line_length: int, fast: bool, write_back: WriteBack = WriteBack.NO
357 """Format file on stdin. Return True if changed.
359 If `write_back` is True, write reformatted code back to stdout.
360 `line_length` and `fast` arguments are passed to :func:`format_file_contents`.
362 src = sys.stdin.read()
365 dst = format_file_contents(src, line_length=line_length, fast=fast)
368 except NothingChanged:
372 if write_back == WriteBack.YES:
373 sys.stdout.write(dst)
374 elif write_back == WriteBack.DIFF:
375 src_name = "<stdin> (original)"
376 dst_name = "<stdin> (formatted)"
377 sys.stdout.write(diff(src, dst, src_name, dst_name))
380 def format_file_contents(
381 src_contents: str, *, line_length: int, fast: bool, is_pyi: bool = False
383 """Reformat contents a file and return new contents.
385 If `fast` is False, additionally confirm that the reformatted code is
386 valid by calling :func:`assert_equivalent` and :func:`assert_stable` on it.
387 `line_length` is passed to :func:`format_str`.
389 if src_contents.strip() == "":
392 dst_contents = format_str(src_contents, line_length=line_length, is_pyi=is_pyi)
393 if src_contents == dst_contents:
397 assert_equivalent(src_contents, dst_contents)
399 src_contents, dst_contents, line_length=line_length, is_pyi=is_pyi
405 src_contents: str, line_length: int, *, is_pyi: bool = False
407 """Reformat a string and return new contents.
409 `line_length` determines how many characters per line are allowed.
411 src_node = lib2to3_parse(src_contents)
413 future_imports = get_future_imports(src_node)
414 elt = EmptyLineTracker(is_pyi=is_pyi)
415 py36 = is_python36(src_node)
416 lines = LineGenerator(
417 remove_u_prefix=py36 or "unicode_literals" in future_imports, is_pyi=is_pyi
421 for current_line in lines.visit(src_node):
422 for _ in range(after):
423 dst_contents += str(empty_line)
424 before, after = elt.maybe_empty_lines(current_line)
425 for _ in range(before):
426 dst_contents += str(empty_line)
427 for line in split_line(current_line, line_length=line_length, py36=py36):
428 dst_contents += str(line)
433 pygram.python_grammar_no_print_statement_no_exec_statement,
434 pygram.python_grammar_no_print_statement,
435 pygram.python_grammar,
439 def lib2to3_parse(src_txt: str) -> Node:
440 """Given a string with source, return the lib2to3 Node."""
441 grammar = pygram.python_grammar_no_print_statement
442 if src_txt[-1] != "\n":
443 nl = "\r\n" if "\r\n" in src_txt[:1024] else "\n"
445 for grammar in GRAMMARS:
446 drv = driver.Driver(grammar, pytree.convert)
448 result = drv.parse_string(src_txt, True)
451 except ParseError as pe:
452 lineno, column = pe.context[1]
453 lines = src_txt.splitlines()
455 faulty_line = lines[lineno - 1]
457 faulty_line = "<line number missing in source>"
458 exc = ValueError(f"Cannot parse: {lineno}:{column}: {faulty_line}")
462 if isinstance(result, Leaf):
463 result = Node(syms.file_input, [result])
467 def lib2to3_unparse(node: Node) -> str:
468 """Given a lib2to3 node, return its string representation."""
476 class Visitor(Generic[T]):
477 """Basic lib2to3 visitor that yields things of type `T` on `visit()`."""
479 def visit(self, node: LN) -> Iterator[T]:
480 """Main method to visit `node` and its children.
482 It tries to find a `visit_*()` method for the given `node.type`, like
483 `visit_simple_stmt` for Node objects or `visit_INDENT` for Leaf objects.
484 If no dedicated `visit_*()` method is found, chooses `visit_default()`
487 Then yields objects of type `T` from the selected visitor.
490 name = token.tok_name[node.type]
492 name = type_repr(node.type)
493 yield from getattr(self, f"visit_{name}", self.visit_default)(node)
495 def visit_default(self, node: LN) -> Iterator[T]:
496 """Default `visit_*()` implementation. Recurses to children of `node`."""
497 if isinstance(node, Node):
498 for child in node.children:
499 yield from self.visit(child)
503 class DebugVisitor(Visitor[T]):
506 def visit_default(self, node: LN) -> Iterator[T]:
507 indent = " " * (2 * self.tree_depth)
508 if isinstance(node, Node):
509 _type = type_repr(node.type)
510 out(f"{indent}{_type}", fg="yellow")
512 for child in node.children:
513 yield from self.visit(child)
516 out(f"{indent}/{_type}", fg="yellow", bold=False)
518 _type = token.tok_name.get(node.type, str(node.type))
519 out(f"{indent}{_type}", fg="blue", nl=False)
521 # We don't have to handle prefixes for `Node` objects since
522 # that delegates to the first child anyway.
523 out(f" {node.prefix!r}", fg="green", bold=False, nl=False)
524 out(f" {node.value!r}", fg="blue", bold=False)
527 def show(cls, code: str) -> None:
528 """Pretty-print the lib2to3 AST of a given string of `code`.
530 Convenience method for debugging.
532 v: DebugVisitor[None] = DebugVisitor()
533 list(v.visit(lib2to3_parse(code)))
536 KEYWORDS = set(keyword.kwlist)
537 WHITESPACE = {token.DEDENT, token.INDENT, token.NEWLINE}
538 FLOW_CONTROL = {"return", "raise", "break", "continue"}
549 STANDALONE_COMMENT = 153
550 LOGIC_OPERATORS = {"and", "or"}
575 STARS = {token.STAR, token.DOUBLESTAR}
578 syms.argument, # double star in arglist
579 syms.trailer, # single argument to call
581 syms.varargslist, # lambdas
583 UNPACKING_PARENTS = {
584 syms.atom, # single element of a list or set literal
622 COMPREHENSION_PRIORITY = 20
624 TERNARY_PRIORITY = 16
627 COMPARATOR_PRIORITY = 10
638 token.DOUBLESLASH: 3,
647 class BracketTracker:
648 """Keeps track of brackets on a line."""
651 bracket_match: Dict[Tuple[Depth, NodeType], Leaf] = Factory(dict)
652 delimiters: Dict[LeafID, Priority] = Factory(dict)
653 previous: Optional[Leaf] = None
654 _for_loop_variable: int = 0
655 _lambda_arguments: int = 0
657 def mark(self, leaf: Leaf) -> None:
658 """Mark `leaf` with bracket-related metadata. Keep track of delimiters.
660 All leaves receive an int `bracket_depth` field that stores how deep
661 within brackets a given leaf is. 0 means there are no enclosing brackets
662 that started on this line.
664 If a leaf is itself a closing bracket, it receives an `opening_bracket`
665 field that it forms a pair with. This is a one-directional link to
666 avoid reference cycles.
668 If a leaf is a delimiter (a token on which Black can split the line if
669 needed) and it's on depth 0, its `id()` is stored in the tracker's
672 if leaf.type == token.COMMENT:
675 self.maybe_decrement_after_for_loop_variable(leaf)
676 self.maybe_decrement_after_lambda_arguments(leaf)
677 if leaf.type in CLOSING_BRACKETS:
679 opening_bracket = self.bracket_match.pop((self.depth, leaf.type))
680 leaf.opening_bracket = opening_bracket
681 leaf.bracket_depth = self.depth
683 delim = is_split_before_delimiter(leaf, self.previous)
684 if delim and self.previous is not None:
685 self.delimiters[id(self.previous)] = delim
687 delim = is_split_after_delimiter(leaf, self.previous)
689 self.delimiters[id(leaf)] = delim
690 if leaf.type in OPENING_BRACKETS:
691 self.bracket_match[self.depth, BRACKET[leaf.type]] = leaf
694 self.maybe_increment_lambda_arguments(leaf)
695 self.maybe_increment_for_loop_variable(leaf)
697 def any_open_brackets(self) -> bool:
698 """Return True if there is an yet unmatched open bracket on the line."""
699 return bool(self.bracket_match)
701 def max_delimiter_priority(self, exclude: Iterable[LeafID] = ()) -> int:
702 """Return the highest priority of a delimiter found on the line.
704 Values are consistent with what `is_split_*_delimiter()` return.
705 Raises ValueError on no delimiters.
707 return max(v for k, v in self.delimiters.items() if k not in exclude)
709 def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool:
710 """In a for loop, or comprehension, the variables are often unpacks.
712 To avoid splitting on the comma in this situation, increase the depth of
713 tokens between `for` and `in`.
715 if leaf.type == token.NAME and leaf.value == "for":
717 self._for_loop_variable += 1
722 def maybe_decrement_after_for_loop_variable(self, leaf: Leaf) -> bool:
723 """See `maybe_increment_for_loop_variable` above for explanation."""
724 if self._for_loop_variable and leaf.type == token.NAME and leaf.value == "in":
726 self._for_loop_variable -= 1
731 def maybe_increment_lambda_arguments(self, leaf: Leaf) -> bool:
732 """In a lambda expression, there might be more than one argument.
734 To avoid splitting on the comma in this situation, increase the depth of
735 tokens between `lambda` and `:`.
737 if leaf.type == token.NAME and leaf.value == "lambda":
739 self._lambda_arguments += 1
744 def maybe_decrement_after_lambda_arguments(self, leaf: Leaf) -> bool:
745 """See `maybe_increment_lambda_arguments` above for explanation."""
746 if self._lambda_arguments and leaf.type == token.COLON:
748 self._lambda_arguments -= 1
753 def get_open_lsqb(self) -> Optional[Leaf]:
754 """Return the most recent opening square bracket (if any)."""
755 return self.bracket_match.get((self.depth - 1, token.RSQB))
760 """Holds leaves and comments. Can be printed with `str(line)`."""
763 leaves: List[Leaf] = Factory(list)
764 comments: List[Tuple[Index, Leaf]] = Factory(list)
765 bracket_tracker: BracketTracker = Factory(BracketTracker)
766 inside_brackets: bool = False
768 def append(self, leaf: Leaf, preformatted: bool = False) -> None:
769 """Add a new `leaf` to the end of the line.
771 Unless `preformatted` is True, the `leaf` will receive a new consistent
772 whitespace prefix and metadata applied by :class:`BracketTracker`.
773 Trailing commas are maybe removed, unpacked for loop variables are
774 demoted from being delimiters.
776 Inline comments are put aside.
778 has_value = leaf.type in BRACKETS or bool(leaf.value.strip())
782 if token.COLON == leaf.type and self.is_class_paren_empty:
784 if self.leaves and not preformatted:
785 # Note: at this point leaf.prefix should be empty except for
786 # imports, for which we only preserve newlines.
787 leaf.prefix += whitespace(
788 leaf, complex_subscript=self.is_complex_subscript(leaf)
790 if self.inside_brackets or not preformatted:
791 self.bracket_tracker.mark(leaf)
792 self.maybe_remove_trailing_comma(leaf)
793 if not self.append_comment(leaf):
794 self.leaves.append(leaf)
796 def append_safe(self, leaf: Leaf, preformatted: bool = False) -> None:
797 """Like :func:`append()` but disallow invalid standalone comment structure.
799 Raises ValueError when any `leaf` is appended after a standalone comment
800 or when a standalone comment is not the first leaf on the line.
802 if self.bracket_tracker.depth == 0:
804 raise ValueError("cannot append to standalone comments")
806 if self.leaves and leaf.type == STANDALONE_COMMENT:
808 "cannot append standalone comments to a populated line"
811 self.append(leaf, preformatted=preformatted)
814 def is_comment(self) -> bool:
815 """Is this line a standalone comment?"""
816 return len(self.leaves) == 1 and self.leaves[0].type == STANDALONE_COMMENT
819 def is_decorator(self) -> bool:
820 """Is this line a decorator?"""
821 return bool(self) and self.leaves[0].type == token.AT
824 def is_import(self) -> bool:
825 """Is this an import line?"""
826 return bool(self) and is_import(self.leaves[0])
829 def is_class(self) -> bool:
830 """Is this line a class definition?"""
833 and self.leaves[0].type == token.NAME
834 and self.leaves[0].value == "class"
838 def is_stub_class(self) -> bool:
839 """Is this line a class definition with a body consisting only of "..."?"""
842 and self.leaves[-3:] == [Leaf(token.DOT, ".") for _ in range(3)]
846 def is_def(self) -> bool:
847 """Is this a function definition? (Also returns True for async defs.)"""
849 first_leaf = self.leaves[0]
854 second_leaf: Optional[Leaf] = self.leaves[1]
857 return (first_leaf.type == token.NAME and first_leaf.value == "def") or (
858 first_leaf.type == token.ASYNC
859 and second_leaf is not None
860 and second_leaf.type == token.NAME
861 and second_leaf.value == "def"
865 def is_flow_control(self) -> bool:
866 """Is this line a flow control statement?
868 Those are `return`, `raise`, `break`, and `continue`.
872 and self.leaves[0].type == token.NAME
873 and self.leaves[0].value in FLOW_CONTROL
877 def is_yield(self) -> bool:
878 """Is this line a yield statement?"""
881 and self.leaves[0].type == token.NAME
882 and self.leaves[0].value == "yield"
886 def is_class_paren_empty(self) -> bool:
887 """Is this a class with no base classes but using parentheses?
889 Those are unnecessary and should be removed.
893 and len(self.leaves) == 4
895 and self.leaves[2].type == token.LPAR
896 and self.leaves[2].value == "("
897 and self.leaves[3].type == token.RPAR
898 and self.leaves[3].value == ")"
901 def contains_standalone_comments(self, depth_limit: int = sys.maxsize) -> bool:
902 """If so, needs to be split before emitting."""
903 for leaf in self.leaves:
904 if leaf.type == STANDALONE_COMMENT:
905 if leaf.bracket_depth <= depth_limit:
910 def maybe_remove_trailing_comma(self, closing: Leaf) -> bool:
911 """Remove trailing comma if there is one and it's safe."""
914 and self.leaves[-1].type == token.COMMA
915 and closing.type in CLOSING_BRACKETS
919 if closing.type == token.RBRACE:
920 self.remove_trailing_comma()
923 if closing.type == token.RSQB:
924 comma = self.leaves[-1]
925 if comma.parent and comma.parent.type == syms.listmaker:
926 self.remove_trailing_comma()
929 # For parens let's check if it's safe to remove the comma.
930 # Imports are always safe.
932 self.remove_trailing_comma()
935 # Otheriwsse, if the trailing one is the only one, we might mistakenly
936 # change a tuple into a different type by removing the comma.
937 depth = closing.bracket_depth + 1
939 opening = closing.opening_bracket
940 for _opening_index, leaf in enumerate(self.leaves):
947 for leaf in self.leaves[_opening_index + 1 :]:
951 bracket_depth = leaf.bracket_depth
952 if bracket_depth == depth and leaf.type == token.COMMA:
954 if leaf.parent and leaf.parent.type == syms.arglist:
959 self.remove_trailing_comma()
964 def append_comment(self, comment: Leaf) -> bool:
965 """Add an inline or standalone comment to the line."""
967 comment.type == STANDALONE_COMMENT
968 and self.bracket_tracker.any_open_brackets()
973 if comment.type != token.COMMENT:
976 after = len(self.leaves) - 1
978 comment.type = STANDALONE_COMMENT
983 self.comments.append((after, comment))
986 def comments_after(self, leaf: Leaf, _index: int = -1) -> Iterator[Leaf]:
987 """Generate comments that should appear directly after `leaf`.
989 Provide a non-negative leaf `_index` to speed up the function.
992 for _index, _leaf in enumerate(self.leaves):
999 for index, comment_after in self.comments:
1003 def remove_trailing_comma(self) -> None:
1004 """Remove the trailing comma and moves the comments attached to it."""
1005 comma_index = len(self.leaves) - 1
1006 for i in range(len(self.comments)):
1007 comment_index, comment = self.comments[i]
1008 if comment_index == comma_index:
1009 self.comments[i] = (comma_index - 1, comment)
1012 def is_complex_subscript(self, leaf: Leaf) -> bool:
1013 """Return True iff `leaf` is part of a slice with non-trivial exprs."""
1015 leaf if leaf.type == token.LSQB else self.bracket_tracker.get_open_lsqb()
1017 if open_lsqb is None:
1020 subscript_start = open_lsqb.next_sibling
1022 isinstance(subscript_start, Node)
1023 and subscript_start.type == syms.subscriptlist
1025 subscript_start = child_towards(subscript_start, leaf)
1026 return subscript_start is not None and any(
1027 n.type in TEST_DESCENDANTS for n in subscript_start.pre_order()
1030 def __str__(self) -> str:
1031 """Render the line."""
1035 indent = " " * self.depth
1036 leaves = iter(self.leaves)
1037 first = next(leaves)
1038 res = f"{first.prefix}{indent}{first.value}"
1041 for _, comment in self.comments:
1045 def __bool__(self) -> bool:
1046 """Return True if the line has leaves or comments."""
1047 return bool(self.leaves or self.comments)
1050 class UnformattedLines(Line):
1051 """Just like :class:`Line` but stores lines which aren't reformatted."""
1053 def append(self, leaf: Leaf, preformatted: bool = True) -> None:
1054 """Just add a new `leaf` to the end of the lines.
1056 The `preformatted` argument is ignored.
1058 Keeps track of indentation `depth`, which is useful when the user
1059 says `# fmt: on`. Otherwise, doesn't do anything with the `leaf`.
1062 list(generate_comments(leaf))
1063 except FormatOn as f_on:
1064 self.leaves.append(f_on.leaf_from_consumed(leaf))
1067 self.leaves.append(leaf)
1068 if leaf.type == token.INDENT:
1070 elif leaf.type == token.DEDENT:
1073 def __str__(self) -> str:
1074 """Render unformatted lines from leaves which were added with `append()`.
1076 `depth` is not used for indentation in this case.
1082 for leaf in self.leaves:
1086 def append_comment(self, comment: Leaf) -> bool:
1087 """Not implemented in this class. Raises `NotImplementedError`."""
1088 raise NotImplementedError("Unformatted lines don't store comments separately.")
1090 def maybe_remove_trailing_comma(self, closing: Leaf) -> bool:
1091 """Does nothing and returns False."""
1094 def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool:
1095 """Does nothing and returns False."""
1100 class EmptyLineTracker:
1101 """Provides a stateful method that returns the number of potential extra
1102 empty lines needed before and after the currently processed line.
1104 Note: this tracker works on lines that haven't been split yet. It assumes
1105 the prefix of the first leaf consists of optional newlines. Those newlines
1106 are consumed by `maybe_empty_lines()` and included in the computation.
1108 is_pyi: bool = False
1109 previous_line: Optional[Line] = None
1110 previous_after: int = 0
1111 previous_defs: List[int] = Factory(list)
1113 def maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
1114 """Return the number of extra empty lines before and after the `current_line`.
1116 This is for separating `def`, `async def` and `class` with extra empty
1117 lines (two on module-level), as well as providing an extra empty line
1118 after flow control keywords to make them more prominent.
1120 if isinstance(current_line, UnformattedLines):
1123 before, after = self._maybe_empty_lines(current_line)
1124 before -= self.previous_after
1125 self.previous_after = after
1126 self.previous_line = current_line
1127 return before, after
1129 def _maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
1131 if current_line.depth == 0:
1132 max_allowed = 1 if self.is_pyi else 2
1133 if current_line.leaves:
1134 # Consume the first leaf's extra newlines.
1135 first_leaf = current_line.leaves[0]
1136 before = first_leaf.prefix.count("\n")
1137 before = min(before, max_allowed)
1138 first_leaf.prefix = ""
1141 depth = current_line.depth
1142 while self.previous_defs and self.previous_defs[-1] >= depth:
1143 self.previous_defs.pop()
1145 before = 0 if depth else 1
1147 before = 1 if depth else 2
1148 is_decorator = current_line.is_decorator
1149 if is_decorator or current_line.is_def or current_line.is_class:
1150 if not is_decorator:
1151 self.previous_defs.append(depth)
1152 if self.previous_line is None:
1153 # Don't insert empty lines before the first line in the file.
1156 if self.previous_line.is_decorator:
1160 self.previous_line.is_comment
1161 and self.previous_line.depth == current_line.depth
1167 if self.previous_line.depth > current_line.depth:
1169 elif current_line.is_class or self.previous_line.is_class:
1170 if current_line.is_stub_class and self.previous_line.is_stub_class:
1178 if current_line.depth and newlines:
1184 and self.previous_line.is_import
1185 and not current_line.is_import
1186 and depth == self.previous_line.depth
1188 return (before or 1), 0
1194 class LineGenerator(Visitor[Line]):
1195 """Generates reformatted Line objects. Empty lines are not emitted.
1197 Note: destroys the tree it's visiting by mutating prefixes of its leaves
1198 in ways that will no longer stringify to valid Python code on the tree.
1200 is_pyi: bool = False
1201 current_line: Line = Factory(Line)
1202 remove_u_prefix: bool = False
1204 def line(self, indent: int = 0, type: Type[Line] = Line) -> Iterator[Line]:
1207 If the line is empty, only emit if it makes sense.
1208 If the line is too long, split it first and then generate.
1210 If any lines were generated, set up a new current_line.
1212 if not self.current_line:
1213 if self.current_line.__class__ == type:
1214 self.current_line.depth += indent
1216 self.current_line = type(depth=self.current_line.depth + indent)
1217 return # Line is empty, don't emit. Creating a new one unnecessary.
1219 complete_line = self.current_line
1220 self.current_line = type(depth=complete_line.depth + indent)
1223 def visit(self, node: LN) -> Iterator[Line]:
1224 """Main method to visit `node` and its children.
1226 Yields :class:`Line` objects.
1228 if isinstance(self.current_line, UnformattedLines):
1229 # File contained `# fmt: off`
1230 yield from self.visit_unformatted(node)
1233 yield from super().visit(node)
1235 def visit_default(self, node: LN) -> Iterator[Line]:
1236 """Default `visit_*()` implementation. Recurses to children of `node`."""
1237 if isinstance(node, Leaf):
1238 any_open_brackets = self.current_line.bracket_tracker.any_open_brackets()
1240 for comment in generate_comments(node):
1241 if any_open_brackets:
1242 # any comment within brackets is subject to splitting
1243 self.current_line.append(comment)
1244 elif comment.type == token.COMMENT:
1245 # regular trailing comment
1246 self.current_line.append(comment)
1247 yield from self.line()
1250 # regular standalone comment
1251 yield from self.line()
1253 self.current_line.append(comment)
1254 yield from self.line()
1256 except FormatOff as f_off:
1257 f_off.trim_prefix(node)
1258 yield from self.line(type=UnformattedLines)
1259 yield from self.visit(node)
1261 except FormatOn as f_on:
1262 # This only happens here if somebody says "fmt: on" multiple
1264 f_on.trim_prefix(node)
1265 yield from self.visit_default(node)
1268 normalize_prefix(node, inside_brackets=any_open_brackets)
1269 if node.type == token.STRING:
1270 normalize_string_prefix(node, remove_u_prefix=self.remove_u_prefix)
1271 normalize_string_quotes(node)
1272 if node.type not in WHITESPACE:
1273 self.current_line.append(node)
1274 yield from super().visit_default(node)
1276 def visit_INDENT(self, node: Node) -> Iterator[Line]:
1277 """Increase indentation level, maybe yield a line."""
1278 # In blib2to3 INDENT never holds comments.
1279 yield from self.line(+1)
1280 yield from self.visit_default(node)
1282 def visit_DEDENT(self, node: Node) -> Iterator[Line]:
1283 """Decrease indentation level, maybe yield a line."""
1284 # The current line might still wait for trailing comments. At DEDENT time
1285 # there won't be any (they would be prefixes on the preceding NEWLINE).
1286 # Emit the line then.
1287 yield from self.line()
1289 # While DEDENT has no value, its prefix may contain standalone comments
1290 # that belong to the current indentation level. Get 'em.
1291 yield from self.visit_default(node)
1293 # Finally, emit the dedent.
1294 yield from self.line(-1)
1297 self, node: Node, keywords: Set[str], parens: Set[str]
1298 ) -> Iterator[Line]:
1299 """Visit a statement.
1301 This implementation is shared for `if`, `while`, `for`, `try`, `except`,
1302 `def`, `with`, `class`, `assert` and assignments.
1304 The relevant Python language `keywords` for a given statement will be
1305 NAME leaves within it. This methods puts those on a separate line.
1307 `parens` holds a set of string leaf values immeditely after which
1308 invisible parens should be put.
1310 normalize_invisible_parens(node, parens_after=parens)
1311 for child in node.children:
1312 if child.type == token.NAME and child.value in keywords: # type: ignore
1313 yield from self.line()
1315 yield from self.visit(child)
1317 def visit_suite(self, node: Node) -> Iterator[Line]:
1318 """Visit a suite."""
1319 if self.is_pyi and is_stub_suite(node):
1320 yield from self.visit(node.children[2])
1322 yield from self.visit_default(node)
1324 def visit_simple_stmt(self, node: Node) -> Iterator[Line]:
1325 """Visit a statement without nested statements."""
1326 is_suite_like = node.parent and node.parent.type in STATEMENT
1328 if self.is_pyi and is_stub_body(node):
1329 yield from self.visit_default(node)
1331 yield from self.line(+1)
1332 yield from self.visit_default(node)
1333 yield from self.line(-1)
1336 if not self.is_pyi or not node.parent or not is_stub_suite(node.parent):
1337 yield from self.line()
1338 yield from self.visit_default(node)
1340 def visit_async_stmt(self, node: Node) -> Iterator[Line]:
1341 """Visit `async def`, `async for`, `async with`."""
1342 yield from self.line()
1344 children = iter(node.children)
1345 for child in children:
1346 yield from self.visit(child)
1348 if child.type == token.ASYNC:
1351 internal_stmt = next(children)
1352 for child in internal_stmt.children:
1353 yield from self.visit(child)
1355 def visit_decorators(self, node: Node) -> Iterator[Line]:
1356 """Visit decorators."""
1357 for child in node.children:
1358 yield from self.line()
1359 yield from self.visit(child)
1361 def visit_import_from(self, node: Node) -> Iterator[Line]:
1362 """Visit import_from and maybe put invisible parentheses.
1364 This is separate from `visit_stmt` because import statements don't
1365 support arbitrary atoms and thus handling of parentheses is custom.
1368 for index, child in enumerate(node.children):
1370 if child.type == token.LPAR:
1371 # make parentheses invisible
1372 child.value = "" # type: ignore
1373 node.children[-1].value = "" # type: ignore
1375 # insert invisible parentheses
1376 node.insert_child(index, Leaf(token.LPAR, ""))
1377 node.append_child(Leaf(token.RPAR, ""))
1381 child.type == token.NAME and child.value == "import" # type: ignore
1384 for child in node.children:
1385 yield from self.visit(child)
1387 def visit_SEMI(self, leaf: Leaf) -> Iterator[Line]:
1388 """Remove a semicolon and put the other statement on a separate line."""
1389 yield from self.line()
1391 def visit_ENDMARKER(self, leaf: Leaf) -> Iterator[Line]:
1392 """End of file. Process outstanding comments and end with a newline."""
1393 yield from self.visit_default(leaf)
1394 yield from self.line()
1396 def visit_unformatted(self, node: LN) -> Iterator[Line]:
1397 """Used when file contained a `# fmt: off`."""
1398 if isinstance(node, Node):
1399 for child in node.children:
1400 yield from self.visit(child)
1404 self.current_line.append(node)
1405 except FormatOn as f_on:
1406 f_on.trim_prefix(node)
1407 yield from self.line()
1408 yield from self.visit(node)
1410 if node.type == token.ENDMARKER:
1411 # somebody decided not to put a final `# fmt: on`
1412 yield from self.line()
1414 def __attrs_post_init__(self) -> None:
1415 """You are in a twisty little maze of passages."""
1418 self.visit_assert_stmt = partial(v, keywords={"assert"}, parens={"assert", ","})
1419 self.visit_if_stmt = partial(
1420 v, keywords={"if", "else", "elif"}, parens={"if", "elif"}
1422 self.visit_while_stmt = partial(v, keywords={"while", "else"}, parens={"while"})
1423 self.visit_for_stmt = partial(v, keywords={"for", "else"}, parens={"for", "in"})
1424 self.visit_try_stmt = partial(
1425 v, keywords={"try", "except", "else", "finally"}, parens=Ø
1427 self.visit_except_clause = partial(v, keywords={"except"}, parens=Ø)
1428 self.visit_with_stmt = partial(v, keywords={"with"}, parens=Ø)
1429 self.visit_funcdef = partial(v, keywords={"def"}, parens=Ø)
1430 self.visit_classdef = partial(v, keywords={"class"}, parens=Ø)
1431 self.visit_expr_stmt = partial(v, keywords=Ø, parens=ASSIGNMENTS)
1432 self.visit_return_stmt = partial(v, keywords={"return"}, parens={"return"})
1433 self.visit_async_funcdef = self.visit_async_stmt
1434 self.visit_decorated = self.visit_decorators
1437 IMPLICIT_TUPLE = {syms.testlist, syms.testlist_star_expr, syms.exprlist}
1438 BRACKET = {token.LPAR: token.RPAR, token.LSQB: token.RSQB, token.LBRACE: token.RBRACE}
1439 OPENING_BRACKETS = set(BRACKET.keys())
1440 CLOSING_BRACKETS = set(BRACKET.values())
1441 BRACKETS = OPENING_BRACKETS | CLOSING_BRACKETS
1442 ALWAYS_NO_SPACE = CLOSING_BRACKETS | {token.COMMA, STANDALONE_COMMENT}
1445 def whitespace(leaf: Leaf, *, complex_subscript: bool) -> str: # noqa C901
1446 """Return whitespace prefix if needed for the given `leaf`.
1448 `complex_subscript` signals whether the given leaf is part of a subscription
1449 which has non-trivial arguments, like arithmetic expressions or function calls.
1457 if t in ALWAYS_NO_SPACE:
1460 if t == token.COMMENT:
1463 assert p is not None, f"INTERNAL ERROR: hand-made leaf without parent: {leaf!r}"
1466 and p.type not in {syms.subscript, syms.subscriptlist, syms.sliceop}
1470 prev = leaf.prev_sibling
1472 prevp = preceding_leaf(p)
1473 if not prevp or prevp.type in OPENING_BRACKETS:
1476 if t == token.COLON:
1477 if prevp.type == token.COLON:
1480 elif prevp.type != token.COMMA and not complex_subscript:
1485 if prevp.type == token.EQUAL:
1487 if prevp.parent.type in {
1488 syms.arglist, syms.argument, syms.parameters, syms.varargslist
1492 elif prevp.parent.type == syms.typedargslist:
1493 # A bit hacky: if the equal sign has whitespace, it means we
1494 # previously found it's a typed argument. So, we're using
1498 elif prevp.type in STARS:
1499 if is_vararg(prevp, within=VARARGS_PARENTS | UNPACKING_PARENTS):
1502 elif prevp.type == token.COLON:
1503 if prevp.parent and prevp.parent.type in {syms.subscript, syms.sliceop}:
1504 return SPACE if complex_subscript else NO
1508 and prevp.parent.type == syms.factor
1509 and prevp.type in MATH_OPERATORS
1514 prevp.type == token.RIGHTSHIFT
1516 and prevp.parent.type == syms.shift_expr
1517 and prevp.prev_sibling
1518 and prevp.prev_sibling.type == token.NAME
1519 and prevp.prev_sibling.value == "print" # type: ignore
1521 # Python 2 print chevron
1524 elif prev.type in OPENING_BRACKETS:
1527 if p.type in {syms.parameters, syms.arglist}:
1528 # untyped function signatures or calls
1529 if not prev or prev.type != token.COMMA:
1532 elif p.type == syms.varargslist:
1534 if prev and prev.type != token.COMMA:
1537 elif p.type == syms.typedargslist:
1538 # typed function signatures
1542 if t == token.EQUAL:
1543 if prev.type != syms.tname:
1546 elif prev.type == token.EQUAL:
1547 # A bit hacky: if the equal sign has whitespace, it means we
1548 # previously found it's a typed argument. So, we're using that, too.
1551 elif prev.type != token.COMMA:
1554 elif p.type == syms.tname:
1557 prevp = preceding_leaf(p)
1558 if not prevp or prevp.type != token.COMMA:
1561 elif p.type == syms.trailer:
1562 # attributes and calls
1563 if t == token.LPAR or t == token.RPAR:
1568 prevp = preceding_leaf(p)
1569 if not prevp or prevp.type != token.NUMBER:
1572 elif t == token.LSQB:
1575 elif prev.type != token.COMMA:
1578 elif p.type == syms.argument:
1580 if t == token.EQUAL:
1584 prevp = preceding_leaf(p)
1585 if not prevp or prevp.type == token.LPAR:
1588 elif prev.type in {token.EQUAL} | STARS:
1591 elif p.type == syms.decorator:
1595 elif p.type == syms.dotted_name:
1599 prevp = preceding_leaf(p)
1600 if not prevp or prevp.type == token.AT or prevp.type == token.DOT:
1603 elif p.type == syms.classdef:
1607 if prev and prev.type == token.LPAR:
1610 elif p.type in {syms.subscript, syms.sliceop}:
1613 assert p.parent is not None, "subscripts are always parented"
1614 if p.parent.type == syms.subscriptlist:
1619 elif not complex_subscript:
1622 elif p.type == syms.atom:
1623 if prev and t == token.DOT:
1624 # dots, but not the first one.
1627 elif p.type == syms.dictsetmaker:
1629 if prev and prev.type == token.DOUBLESTAR:
1632 elif p.type in {syms.factor, syms.star_expr}:
1635 prevp = preceding_leaf(p)
1636 if not prevp or prevp.type in OPENING_BRACKETS:
1639 prevp_parent = prevp.parent
1640 assert prevp_parent is not None
1642 prevp.type == token.COLON
1643 and prevp_parent.type in {syms.subscript, syms.sliceop}
1647 elif prevp.type == token.EQUAL and prevp_parent.type == syms.argument:
1650 elif t == token.NAME or t == token.NUMBER:
1653 elif p.type == syms.import_from:
1655 if prev and prev.type == token.DOT:
1658 elif t == token.NAME:
1662 if prev and prev.type == token.DOT:
1665 elif p.type == syms.sliceop:
1671 def preceding_leaf(node: Optional[LN]) -> Optional[Leaf]:
1672 """Return the first leaf that precedes `node`, if any."""
1674 res = node.prev_sibling
1676 if isinstance(res, Leaf):
1680 return list(res.leaves())[-1]
1689 def child_towards(ancestor: Node, descendant: LN) -> Optional[LN]:
1690 """Return the child of `ancestor` that contains `descendant`."""
1691 node: Optional[LN] = descendant
1692 while node and node.parent != ancestor:
1697 def is_split_after_delimiter(leaf: Leaf, previous: Leaf = None) -> int:
1698 """Return the priority of the `leaf` delimiter, given a line break after it.
1700 The delimiter priorities returned here are from those delimiters that would
1701 cause a line break after themselves.
1703 Higher numbers are higher priority.
1705 if leaf.type == token.COMMA:
1706 return COMMA_PRIORITY
1711 def is_split_before_delimiter(leaf: Leaf, previous: Leaf = None) -> int:
1712 """Return the priority of the `leaf` delimiter, given a line before after it.
1714 The delimiter priorities returned here are from those delimiters that would
1715 cause a line break before themselves.
1717 Higher numbers are higher priority.
1719 if is_vararg(leaf, within=VARARGS_PARENTS | UNPACKING_PARENTS):
1720 # * and ** might also be MATH_OPERATORS but in this case they are not.
1721 # Don't treat them as a delimiter.
1725 leaf.type in MATH_OPERATORS
1727 and leaf.parent.type not in {syms.factor, syms.star_expr}
1729 return MATH_PRIORITIES[leaf.type]
1731 if leaf.type in COMPARATORS:
1732 return COMPARATOR_PRIORITY
1735 leaf.type == token.STRING
1736 and previous is not None
1737 and previous.type == token.STRING
1739 return STRING_PRIORITY
1742 leaf.type == token.NAME
1743 and leaf.value == "for"
1745 and leaf.parent.type in {syms.comp_for, syms.old_comp_for}
1747 return COMPREHENSION_PRIORITY
1750 leaf.type == token.NAME
1751 and leaf.value == "if"
1753 and leaf.parent.type in {syms.comp_if, syms.old_comp_if}
1755 return COMPREHENSION_PRIORITY
1758 leaf.type == token.NAME
1759 and leaf.value in {"if", "else"}
1761 and leaf.parent.type == syms.test
1763 return TERNARY_PRIORITY
1765 if leaf.type == token.NAME and leaf.value in LOGIC_OPERATORS and leaf.parent:
1766 return LOGIC_PRIORITY
1771 def generate_comments(leaf: Leaf) -> Iterator[Leaf]:
1772 """Clean the prefix of the `leaf` and generate comments from it, if any.
1774 Comments in lib2to3 are shoved into the whitespace prefix. This happens
1775 in `pgen2/driver.py:Driver.parse_tokens()`. This was a brilliant implementation
1776 move because it does away with modifying the grammar to include all the
1777 possible places in which comments can be placed.
1779 The sad consequence for us though is that comments don't "belong" anywhere.
1780 This is why this function generates simple parentless Leaf objects for
1781 comments. We simply don't know what the correct parent should be.
1783 No matter though, we can live without this. We really only need to
1784 differentiate between inline and standalone comments. The latter don't
1785 share the line with any code.
1787 Inline comments are emitted as regular token.COMMENT leaves. Standalone
1788 are emitted with a fake STANDALONE_COMMENT token identifier.
1799 for index, line in enumerate(p.split("\n")):
1800 consumed += len(line) + 1 # adding the length of the split '\n'
1801 line = line.lstrip()
1804 if not line.startswith("#"):
1807 if index == 0 and leaf.type != token.ENDMARKER:
1808 comment_type = token.COMMENT # simple trailing comment
1810 comment_type = STANDALONE_COMMENT
1811 comment = make_comment(line)
1812 yield Leaf(comment_type, comment, prefix="\n" * nlines)
1814 if comment in {"# fmt: on", "# yapf: enable"}:
1815 raise FormatOn(consumed)
1817 if comment in {"# fmt: off", "# yapf: disable"}:
1818 if comment_type == STANDALONE_COMMENT:
1819 raise FormatOff(consumed)
1821 prev = preceding_leaf(leaf)
1822 if not prev or prev.type in WHITESPACE: # standalone comment in disguise
1823 raise FormatOff(consumed)
1828 def make_comment(content: str) -> str:
1829 """Return a consistently formatted comment from the given `content` string.
1831 All comments (except for "##", "#!", "#:") should have a single space between
1832 the hash sign and the content.
1834 If `content` didn't start with a hash sign, one is provided.
1836 content = content.rstrip()
1840 if content[0] == "#":
1841 content = content[1:]
1842 if content and content[0] not in " !:#":
1843 content = " " + content
1844 return "#" + content
1848 line: Line, line_length: int, inner: bool = False, py36: bool = False
1849 ) -> Iterator[Line]:
1850 """Split a `line` into potentially many lines.
1852 They should fit in the allotted `line_length` but might not be able to.
1853 `inner` signifies that there were a pair of brackets somewhere around the
1854 current `line`, possibly transitively. This means we can fallback to splitting
1855 by delimiters if the LHS/RHS don't yield any results.
1857 If `py36` is True, splitting may generate syntax that is only compatible
1858 with Python 3.6 and later.
1860 if isinstance(line, UnformattedLines) or line.is_comment:
1864 line_str = str(line).strip("\n")
1865 if is_line_short_enough(line, line_length=line_length, line_str=line_str):
1869 split_funcs: List[SplitFunc]
1871 split_funcs = [left_hand_split]
1872 elif line.is_import:
1873 split_funcs = [explode_split]
1876 def rhs(line: Line, py36: bool = False) -> Iterator[Line]:
1877 for omit in generate_trailers_to_omit(line, line_length):
1878 lines = list(right_hand_split(line, py36, omit=omit))
1879 if is_line_short_enough(lines[0], line_length=line_length):
1883 # All splits failed, best effort split with no omits.
1884 yield from right_hand_split(line, py36)
1886 if line.inside_brackets:
1887 split_funcs = [delimiter_split, standalone_comment_split, rhs]
1890 for split_func in split_funcs:
1891 # We are accumulating lines in `result` because we might want to abort
1892 # mission and return the original line in the end, or attempt a different
1894 result: List[Line] = []
1896 for l in split_func(line, py36):
1897 if str(l).strip("\n") == line_str:
1898 raise CannotSplit("Split function returned an unchanged result")
1901 split_line(l, line_length=line_length, inner=True, py36=py36)
1903 except CannotSplit as cs:
1914 def left_hand_split(line: Line, py36: bool = False) -> Iterator[Line]:
1915 """Split line into many lines, starting with the first matching bracket pair.
1917 Note: this usually looks weird, only use this for function definitions.
1918 Prefer RHS otherwise. This is why this function is not symmetrical with
1919 :func:`right_hand_split` which also handles optional parentheses.
1921 head = Line(depth=line.depth)
1922 body = Line(depth=line.depth + 1, inside_brackets=True)
1923 tail = Line(depth=line.depth)
1924 tail_leaves: List[Leaf] = []
1925 body_leaves: List[Leaf] = []
1926 head_leaves: List[Leaf] = []
1927 current_leaves = head_leaves
1928 matching_bracket = None
1929 for leaf in line.leaves:
1931 current_leaves is body_leaves
1932 and leaf.type in CLOSING_BRACKETS
1933 and leaf.opening_bracket is matching_bracket
1935 current_leaves = tail_leaves if body_leaves else head_leaves
1936 current_leaves.append(leaf)
1937 if current_leaves is head_leaves:
1938 if leaf.type in OPENING_BRACKETS:
1939 matching_bracket = leaf
1940 current_leaves = body_leaves
1941 # Since body is a new indent level, remove spurious leading whitespace.
1943 normalize_prefix(body_leaves[0], inside_brackets=True)
1944 # Build the new lines.
1945 for result, leaves in (head, head_leaves), (body, body_leaves), (tail, tail_leaves):
1947 result.append(leaf, preformatted=True)
1948 for comment_after in line.comments_after(leaf):
1949 result.append(comment_after, preformatted=True)
1950 bracket_split_succeeded_or_raise(head, body, tail)
1951 for result in (head, body, tail):
1956 def right_hand_split(
1957 line: Line, py36: bool = False, omit: Collection[LeafID] = ()
1958 ) -> Iterator[Line]:
1959 """Split line into many lines, starting with the last matching bracket pair.
1961 If the split was by optional parentheses, attempt splitting without them, too.
1962 `omit` is a collection of closing bracket IDs that shouldn't be considered for
1965 Note: running this function modifies `bracket_depth` on the leaves of `line`.
1967 head = Line(depth=line.depth)
1968 body = Line(depth=line.depth + 1, inside_brackets=True)
1969 tail = Line(depth=line.depth)
1970 tail_leaves: List[Leaf] = []
1971 body_leaves: List[Leaf] = []
1972 head_leaves: List[Leaf] = []
1973 current_leaves = tail_leaves
1974 opening_bracket = None
1975 closing_bracket = None
1976 for leaf in reversed(line.leaves):
1977 if current_leaves is body_leaves:
1978 if leaf is opening_bracket:
1979 current_leaves = head_leaves if body_leaves else tail_leaves
1980 current_leaves.append(leaf)
1981 if current_leaves is tail_leaves:
1982 if leaf.type in CLOSING_BRACKETS and id(leaf) not in omit:
1983 opening_bracket = leaf.opening_bracket
1984 closing_bracket = leaf
1985 current_leaves = body_leaves
1986 tail_leaves.reverse()
1987 body_leaves.reverse()
1988 head_leaves.reverse()
1989 # Since body is a new indent level, remove spurious leading whitespace.
1991 normalize_prefix(body_leaves[0], inside_brackets=True)
1993 # No `head` means the split failed. Either `tail` has all content or
1994 # the matching `opening_bracket` wasn't available on `line` anymore.
1995 raise CannotSplit("No brackets found")
1997 # Build the new lines.
1998 for result, leaves in (head, head_leaves), (body, body_leaves), (tail, tail_leaves):
2000 result.append(leaf, preformatted=True)
2001 for comment_after in line.comments_after(leaf):
2002 result.append(comment_after, preformatted=True)
2003 bracket_split_succeeded_or_raise(head, body, tail)
2004 assert opening_bracket and closing_bracket
2006 # the opening bracket is an optional paren
2007 opening_bracket.type == token.LPAR
2008 and not opening_bracket.value
2009 # the closing bracket is an optional paren
2010 and closing_bracket.type == token.RPAR
2011 and not closing_bracket.value
2012 # there are no standalone comments in the body
2013 and not line.contains_standalone_comments(0)
2014 # and it's not an import (optional parens are the only thing we can split
2015 # on in this case; attempting a split without them is a waste of time)
2016 and not line.is_import
2018 omit = {id(closing_bracket), *omit}
2019 delimiter_count = len(body.bracket_tracker.delimiters)
2021 delimiter_count == 0
2022 or delimiter_count == 1
2024 body.leaves[0].type in OPENING_BRACKETS
2025 or body.leaves[-1].type in CLOSING_BRACKETS
2029 yield from right_hand_split(line, py36=py36, omit=omit)
2034 ensure_visible(opening_bracket)
2035 ensure_visible(closing_bracket)
2036 for result in (head, body, tail):
2041 def bracket_split_succeeded_or_raise(head: Line, body: Line, tail: Line) -> None:
2042 """Raise :exc:`CannotSplit` if the last left- or right-hand split failed.
2044 Do nothing otherwise.
2046 A left- or right-hand split is based on a pair of brackets. Content before
2047 (and including) the opening bracket is left on one line, content inside the
2048 brackets is put on a separate line, and finally content starting with and
2049 following the closing bracket is put on a separate line.
2051 Those are called `head`, `body`, and `tail`, respectively. If the split
2052 produced the same line (all content in `head`) or ended up with an empty `body`
2053 and the `tail` is just the closing bracket, then it's considered failed.
2055 tail_len = len(str(tail).strip())
2058 raise CannotSplit("Splitting brackets produced the same line")
2062 f"Splitting brackets on an empty body to save "
2063 f"{tail_len} characters is not worth it"
2067 def dont_increase_indentation(split_func: SplitFunc) -> SplitFunc:
2068 """Normalize prefix of the first leaf in every line returned by `split_func`.
2070 This is a decorator over relevant split functions.
2074 def split_wrapper(line: Line, py36: bool = False) -> Iterator[Line]:
2075 for l in split_func(line, py36):
2076 normalize_prefix(l.leaves[0], inside_brackets=True)
2079 return split_wrapper
2082 @dont_increase_indentation
2083 def delimiter_split(line: Line, py36: bool = False) -> Iterator[Line]:
2084 """Split according to delimiters of the highest priority.
2086 If `py36` is True, the split will add trailing commas also in function
2087 signatures that contain `*` and `**`.
2090 last_leaf = line.leaves[-1]
2092 raise CannotSplit("Line empty")
2094 delimiters = line.bracket_tracker.delimiters
2096 delimiter_priority = line.bracket_tracker.max_delimiter_priority(
2097 exclude={id(last_leaf)}
2100 raise CannotSplit("No delimiters found")
2102 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2103 lowest_depth = sys.maxsize
2104 trailing_comma_safe = True
2106 def append_to_line(leaf: Leaf) -> Iterator[Line]:
2107 """Append `leaf` to current line or to new line if appending impossible."""
2108 nonlocal current_line
2110 current_line.append_safe(leaf, preformatted=True)
2111 except ValueError as ve:
2114 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2115 current_line.append(leaf)
2117 for index, leaf in enumerate(line.leaves):
2118 yield from append_to_line(leaf)
2120 for comment_after in line.comments_after(leaf, index):
2121 yield from append_to_line(comment_after)
2123 lowest_depth = min(lowest_depth, leaf.bracket_depth)
2125 leaf.bracket_depth == lowest_depth
2126 and is_vararg(leaf, within=VARARGS_PARENTS)
2128 trailing_comma_safe = trailing_comma_safe and py36
2129 leaf_priority = delimiters.get(id(leaf))
2130 if leaf_priority == delimiter_priority:
2133 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2137 and delimiter_priority == COMMA_PRIORITY
2138 and current_line.leaves[-1].type != token.COMMA
2139 and current_line.leaves[-1].type != STANDALONE_COMMENT
2141 current_line.append(Leaf(token.COMMA, ","))
2145 @dont_increase_indentation
2146 def standalone_comment_split(line: Line, py36: bool = False) -> Iterator[Line]:
2147 """Split standalone comments from the rest of the line."""
2148 if not line.contains_standalone_comments(0):
2149 raise CannotSplit("Line does not have any standalone comments")
2151 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2153 def append_to_line(leaf: Leaf) -> Iterator[Line]:
2154 """Append `leaf` to current line or to new line if appending impossible."""
2155 nonlocal current_line
2157 current_line.append_safe(leaf, preformatted=True)
2158 except ValueError as ve:
2161 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
2162 current_line.append(leaf)
2164 for index, leaf in enumerate(line.leaves):
2165 yield from append_to_line(leaf)
2167 for comment_after in line.comments_after(leaf, index):
2168 yield from append_to_line(comment_after)
2175 line: Line, py36: bool = False, omit: Collection[LeafID] = ()
2176 ) -> Iterator[Line]:
2177 """Split by rightmost bracket and immediately split contents by a delimiter."""
2178 new_lines = list(right_hand_split(line, py36, omit))
2179 if len(new_lines) != 3:
2180 yield from new_lines
2186 yield from delimiter_split(new_lines[1], py36)
2194 def is_import(leaf: Leaf) -> bool:
2195 """Return True if the given leaf starts an import statement."""
2202 (v == "import" and p and p.type == syms.import_name)
2203 or (v == "from" and p and p.type == syms.import_from)
2208 def normalize_prefix(leaf: Leaf, *, inside_brackets: bool) -> None:
2209 """Leave existing extra newlines if not `inside_brackets`. Remove everything
2212 Note: don't use backslashes for formatting or you'll lose your voting rights.
2214 if not inside_brackets:
2215 spl = leaf.prefix.split("#")
2216 if "\\" not in spl[0]:
2217 nl_count = spl[-1].count("\n")
2220 leaf.prefix = "\n" * nl_count
2226 def normalize_string_prefix(leaf: Leaf, remove_u_prefix: bool = False) -> None:
2227 """Make all string prefixes lowercase.
2229 If remove_u_prefix is given, also removes any u prefix from the string.
2231 Note: Mutates its argument.
2233 match = re.match(r"^([furbFURB]*)(.*)$", leaf.value, re.DOTALL)
2234 assert match is not None, f"failed to match string {leaf.value!r}"
2235 orig_prefix = match.group(1)
2236 new_prefix = orig_prefix.lower()
2238 new_prefix = new_prefix.replace("u", "")
2239 leaf.value = f"{new_prefix}{match.group(2)}"
2242 def normalize_string_quotes(leaf: Leaf) -> None:
2243 """Prefer double quotes but only if it doesn't cause more escaping.
2245 Adds or removes backslashes as appropriate. Doesn't parse and fix
2246 strings nested in f-strings (yet).
2248 Note: Mutates its argument.
2250 value = leaf.value.lstrip("furbFURB")
2251 if value[:3] == '"""':
2254 elif value[:3] == "'''":
2257 elif value[0] == '"':
2263 first_quote_pos = leaf.value.find(orig_quote)
2264 if first_quote_pos == -1:
2265 return # There's an internal error
2267 prefix = leaf.value[:first_quote_pos]
2268 unescaped_new_quote = re.compile(rf"(([^\\]|^)(\\\\)*){new_quote}")
2269 escaped_new_quote = re.compile(rf"([^\\]|^)\\(\\\\)*{new_quote}")
2270 escaped_orig_quote = re.compile(rf"([^\\]|^)\\(\\\\)*{orig_quote}")
2271 body = leaf.value[first_quote_pos + len(orig_quote) : -len(orig_quote)]
2272 if "r" in prefix.casefold():
2273 if unescaped_new_quote.search(body):
2274 # There's at least one unescaped new_quote in this raw string
2275 # so converting is impossible
2278 # Do not introduce or remove backslashes in raw strings
2281 # remove unnecessary quotes
2282 new_body = sub_twice(escaped_new_quote, rf"\1\2{new_quote}", body)
2283 if body != new_body:
2284 # Consider the string without unnecessary quotes as the original
2286 leaf.value = f"{prefix}{orig_quote}{body}{orig_quote}"
2287 new_body = sub_twice(escaped_orig_quote, rf"\1\2{orig_quote}", new_body)
2288 new_body = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_body)
2289 if new_quote == '"""' and new_body[-1] == '"':
2291 new_body = new_body[:-1] + '\\"'
2292 orig_escape_count = body.count("\\")
2293 new_escape_count = new_body.count("\\")
2294 if new_escape_count > orig_escape_count:
2295 return # Do not introduce more escaping
2297 if new_escape_count == orig_escape_count and orig_quote == '"':
2298 return # Prefer double quotes
2300 leaf.value = f"{prefix}{new_quote}{new_body}{new_quote}"
2303 def normalize_invisible_parens(node: Node, parens_after: Set[str]) -> None:
2304 """Make existing optional parentheses invisible or create new ones.
2306 `parens_after` is a set of string leaf values immeditely after which parens
2309 Standardizes on visible parentheses for single-element tuples, and keeps
2310 existing visible parentheses for other tuples and generator expressions.
2313 for child in list(node.children):
2315 if child.type == syms.atom:
2316 maybe_make_parens_invisible_in_atom(child)
2317 elif is_one_tuple(child):
2318 # wrap child in visible parentheses
2319 lpar = Leaf(token.LPAR, "(")
2320 rpar = Leaf(token.RPAR, ")")
2321 index = child.remove() or 0
2322 node.insert_child(index, Node(syms.atom, [lpar, child, rpar]))
2324 # wrap child in invisible parentheses
2325 lpar = Leaf(token.LPAR, "")
2326 rpar = Leaf(token.RPAR, "")
2327 index = child.remove() or 0
2328 node.insert_child(index, Node(syms.atom, [lpar, child, rpar]))
2330 check_lpar = isinstance(child, Leaf) and child.value in parens_after
2333 def maybe_make_parens_invisible_in_atom(node: LN) -> bool:
2334 """If it's safe, make the parens in the atom `node` invisible, recusively."""
2336 node.type != syms.atom
2337 or is_empty_tuple(node)
2338 or is_one_tuple(node)
2340 or max_delimiter_priority_in_atom(node) >= COMMA_PRIORITY
2344 first = node.children[0]
2345 last = node.children[-1]
2346 if first.type == token.LPAR and last.type == token.RPAR:
2347 # make parentheses invisible
2348 first.value = "" # type: ignore
2349 last.value = "" # type: ignore
2350 if len(node.children) > 1:
2351 maybe_make_parens_invisible_in_atom(node.children[1])
2357 def is_empty_tuple(node: LN) -> bool:
2358 """Return True if `node` holds an empty tuple."""
2360 node.type == syms.atom
2361 and len(node.children) == 2
2362 and node.children[0].type == token.LPAR
2363 and node.children[1].type == token.RPAR
2367 def is_one_tuple(node: LN) -> bool:
2368 """Return True if `node` holds a tuple with one element, with or without parens."""
2369 if node.type == syms.atom:
2370 if len(node.children) != 3:
2373 lpar, gexp, rpar = node.children
2375 lpar.type == token.LPAR
2376 and gexp.type == syms.testlist_gexp
2377 and rpar.type == token.RPAR
2381 return len(gexp.children) == 2 and gexp.children[1].type == token.COMMA
2384 node.type in IMPLICIT_TUPLE
2385 and len(node.children) == 2
2386 and node.children[1].type == token.COMMA
2390 def is_yield(node: LN) -> bool:
2391 """Return True if `node` holds a `yield` or `yield from` expression."""
2392 if node.type == syms.yield_expr:
2395 if node.type == token.NAME and node.value == "yield": # type: ignore
2398 if node.type != syms.atom:
2401 if len(node.children) != 3:
2404 lpar, expr, rpar = node.children
2405 if lpar.type == token.LPAR and rpar.type == token.RPAR:
2406 return is_yield(expr)
2411 def is_vararg(leaf: Leaf, within: Set[NodeType]) -> bool:
2412 """Return True if `leaf` is a star or double star in a vararg or kwarg.
2414 If `within` includes VARARGS_PARENTS, this applies to function signatures.
2415 If `within` includes UNPACKING_PARENTS, it applies to right hand-side
2416 extended iterable unpacking (PEP 3132) and additional unpacking
2417 generalizations (PEP 448).
2419 if leaf.type not in STARS or not leaf.parent:
2423 if p.type == syms.star_expr:
2424 # Star expressions are also used as assignment targets in extended
2425 # iterable unpacking (PEP 3132). See what its parent is instead.
2431 return p.type in within
2434 def is_stub_suite(node: Node) -> bool:
2435 """Return True if `node` is a suite with a stub body."""
2437 len(node.children) != 4
2438 or node.children[0].type != token.NEWLINE
2439 or node.children[1].type != token.INDENT
2440 or node.children[3].type != token.DEDENT
2444 return is_stub_body(node.children[2])
2447 def is_stub_body(node: LN) -> bool:
2448 """Return True if `node` is a simple statement containing an ellipsis."""
2449 if not isinstance(node, Node) or node.type != syms.simple_stmt:
2452 if len(node.children) != 2:
2455 child = node.children[0]
2457 child.type == syms.atom
2458 and len(child.children) == 3
2459 and all(leaf == Leaf(token.DOT, ".") for leaf in child.children)
2463 def max_delimiter_priority_in_atom(node: LN) -> int:
2464 """Return maximum delimiter priority inside `node`.
2466 This is specific to atoms with contents contained in a pair of parentheses.
2467 If `node` isn't an atom or there are no enclosing parentheses, returns 0.
2469 if node.type != syms.atom:
2472 first = node.children[0]
2473 last = node.children[-1]
2474 if not (first.type == token.LPAR and last.type == token.RPAR):
2477 bt = BracketTracker()
2478 for c in node.children[1:-1]:
2479 if isinstance(c, Leaf):
2482 for leaf in c.leaves():
2485 return bt.max_delimiter_priority()
2491 def ensure_visible(leaf: Leaf) -> None:
2492 """Make sure parentheses are visible.
2494 They could be invisible as part of some statements (see
2495 :func:`normalize_invible_parens` and :func:`visit_import_from`).
2497 if leaf.type == token.LPAR:
2499 elif leaf.type == token.RPAR:
2503 def is_python36(node: Node) -> bool:
2504 """Return True if the current file is using Python 3.6+ features.
2506 Currently looking for:
2508 - trailing commas after * or ** in function signatures and calls.
2510 for n in node.pre_order():
2511 if n.type == token.STRING:
2512 value_head = n.value[:2] # type: ignore
2513 if value_head in {'f"', 'F"', "f'", "F'", "rf", "fr", "RF", "FR"}:
2517 n.type in {syms.typedargslist, syms.arglist}
2519 and n.children[-1].type == token.COMMA
2521 for ch in n.children:
2522 if ch.type in STARS:
2525 if ch.type == syms.argument:
2526 for argch in ch.children:
2527 if argch.type in STARS:
2533 def generate_trailers_to_omit(line: Line, line_length: int) -> Iterator[Set[LeafID]]:
2534 """Generate sets of closing bracket IDs that should be omitted in a RHS.
2536 Brackets can be omitted if the entire trailer up to and including
2537 a preceding closing bracket fits in one line.
2539 Yielded sets are cumulative (contain results of previous yields, too). First
2543 omit: Set[LeafID] = set()
2546 length = 4 * line.depth
2547 opening_bracket = None
2548 closing_bracket = None
2549 optional_brackets: Set[LeafID] = set()
2550 inner_brackets: Set[LeafID] = set()
2551 for index, leaf in enumerate_reversed(line.leaves):
2552 length += len(leaf.prefix) + len(leaf.value)
2553 if length > line_length:
2556 comment: Optional[Leaf]
2557 for comment in line.comments_after(leaf, index):
2558 if "\n" in comment.prefix:
2559 break # Oops, standalone comment!
2561 length += len(comment.value)
2564 if comment is not None:
2565 break # There was a standalone comment, we can't continue.
2567 optional_brackets.discard(id(leaf))
2569 if leaf is opening_bracket:
2570 opening_bracket = None
2571 elif leaf.type in CLOSING_BRACKETS:
2572 inner_brackets.add(id(leaf))
2573 elif leaf.type in CLOSING_BRACKETS:
2575 optional_brackets.add(id(opening_bracket))
2578 if index > 0 and line.leaves[index - 1].type in OPENING_BRACKETS:
2579 # Empty brackets would fail a split so treat them as "inner"
2580 # brackets (e.g. only add them to the `omit` set if another
2581 # pair of brackets was good enough.
2582 inner_brackets.add(id(leaf))
2585 opening_bracket = leaf.opening_bracket
2587 omit.add(id(closing_bracket))
2588 omit.update(inner_brackets)
2589 inner_brackets.clear()
2591 closing_bracket = leaf
2594 def get_future_imports(node: Node) -> Set[str]:
2595 """Return a set of __future__ imports in the file."""
2597 for child in node.children:
2598 if child.type != syms.simple_stmt:
2600 first_child = child.children[0]
2601 if isinstance(first_child, Leaf):
2602 # Continue looking if we see a docstring; otherwise stop.
2604 len(child.children) == 2
2605 and first_child.type == token.STRING
2606 and child.children[1].type == token.NEWLINE
2611 elif first_child.type == syms.import_from:
2612 module_name = first_child.children[1]
2613 if not isinstance(module_name, Leaf) or module_name.value != "__future__":
2615 for import_from_child in first_child.children[3:]:
2616 if isinstance(import_from_child, Leaf):
2617 if import_from_child.type == token.NAME:
2618 imports.add(import_from_child.value)
2620 assert import_from_child.type == syms.import_as_names
2621 for leaf in import_from_child.children:
2622 if isinstance(leaf, Leaf) and leaf.type == token.NAME:
2623 imports.add(leaf.value)
2629 PYTHON_EXTENSIONS = {".py", ".pyi"}
2630 BLACKLISTED_DIRECTORIES = {
2631 "build", "buck-out", "dist", "_build", ".git", ".hg", ".mypy_cache", ".tox", ".venv"
2635 def gen_python_files_in_dir(path: Path) -> Iterator[Path]:
2636 """Generate all files under `path` which aren't under BLACKLISTED_DIRECTORIES
2637 and have one of the PYTHON_EXTENSIONS.
2639 for child in path.iterdir():
2641 if child.name in BLACKLISTED_DIRECTORIES:
2644 yield from gen_python_files_in_dir(child)
2646 elif child.is_file() and child.suffix in PYTHON_EXTENSIONS:
2652 """Provides a reformatting counter. Can be rendered with `str(report)`."""
2655 change_count: int = 0
2657 failure_count: int = 0
2659 def done(self, src: Path, changed: Changed) -> None:
2660 """Increment the counter for successful reformatting. Write out a message."""
2661 if changed is Changed.YES:
2662 reformatted = "would reformat" if self.check else "reformatted"
2664 out(f"{reformatted} {src}")
2665 self.change_count += 1
2668 if changed is Changed.NO:
2669 msg = f"{src} already well formatted, good job."
2671 msg = f"{src} wasn't modified on disk since last run."
2672 out(msg, bold=False)
2673 self.same_count += 1
2675 def failed(self, src: Path, message: str) -> None:
2676 """Increment the counter for failed reformatting. Write out a message."""
2677 err(f"error: cannot format {src}: {message}")
2678 self.failure_count += 1
2681 def return_code(self) -> int:
2682 """Return the exit code that the app should use.
2684 This considers the current state of changed files and failures:
2685 - if there were any failures, return 123;
2686 - if any files were changed and --check is being used, return 1;
2687 - otherwise return 0.
2689 # According to http://tldp.org/LDP/abs/html/exitcodes.html starting with
2690 # 126 we have special returncodes reserved by the shell.
2691 if self.failure_count:
2694 elif self.change_count and self.check:
2699 def __str__(self) -> str:
2700 """Render a color report of the current state.
2702 Use `click.unstyle` to remove colors.
2705 reformatted = "would be reformatted"
2706 unchanged = "would be left unchanged"
2707 failed = "would fail to reformat"
2709 reformatted = "reformatted"
2710 unchanged = "left unchanged"
2711 failed = "failed to reformat"
2713 if self.change_count:
2714 s = "s" if self.change_count > 1 else ""
2716 click.style(f"{self.change_count} file{s} {reformatted}", bold=True)
2719 s = "s" if self.same_count > 1 else ""
2720 report.append(f"{self.same_count} file{s} {unchanged}")
2721 if self.failure_count:
2722 s = "s" if self.failure_count > 1 else ""
2724 click.style(f"{self.failure_count} file{s} {failed}", fg="red")
2726 return ", ".join(report) + "."
2729 def assert_equivalent(src: str, dst: str) -> None:
2730 """Raise AssertionError if `src` and `dst` aren't equivalent."""
2735 def _v(node: ast.AST, depth: int = 0) -> Iterator[str]:
2736 """Simple visitor generating strings to compare ASTs by content."""
2737 yield f"{' ' * depth}{node.__class__.__name__}("
2739 for field in sorted(node._fields):
2741 value = getattr(node, field)
2742 except AttributeError:
2745 yield f"{' ' * (depth+1)}{field}="
2747 if isinstance(value, list):
2749 if isinstance(item, ast.AST):
2750 yield from _v(item, depth + 2)
2752 elif isinstance(value, ast.AST):
2753 yield from _v(value, depth + 2)
2756 yield f"{' ' * (depth+2)}{value!r}, # {value.__class__.__name__}"
2758 yield f"{' ' * depth}) # /{node.__class__.__name__}"
2761 src_ast = ast.parse(src)
2762 except Exception as exc:
2763 major, minor = sys.version_info[:2]
2764 raise AssertionError(
2765 f"cannot use --safe with this file; failed to parse source file "
2766 f"with Python {major}.{minor}'s builtin AST. Re-run with --fast "
2767 f"or stop using deprecated Python 2 syntax. AST error message: {exc}"
2771 dst_ast = ast.parse(dst)
2772 except Exception as exc:
2773 log = dump_to_file("".join(traceback.format_tb(exc.__traceback__)), dst)
2774 raise AssertionError(
2775 f"INTERNAL ERROR: Black produced invalid code: {exc}. "
2776 f"Please report a bug on https://github.com/ambv/black/issues. "
2777 f"This invalid output might be helpful: {log}"
2780 src_ast_str = "\n".join(_v(src_ast))
2781 dst_ast_str = "\n".join(_v(dst_ast))
2782 if src_ast_str != dst_ast_str:
2783 log = dump_to_file(diff(src_ast_str, dst_ast_str, "src", "dst"))
2784 raise AssertionError(
2785 f"INTERNAL ERROR: Black produced code that is not equivalent to "
2787 f"Please report a bug on https://github.com/ambv/black/issues. "
2788 f"This diff might be helpful: {log}"
2792 def assert_stable(src: str, dst: str, line_length: int, is_pyi: bool = False) -> None:
2793 """Raise AssertionError if `dst` reformats differently the second time."""
2794 newdst = format_str(dst, line_length=line_length, is_pyi=is_pyi)
2797 diff(src, dst, "source", "first pass"),
2798 diff(dst, newdst, "first pass", "second pass"),
2800 raise AssertionError(
2801 f"INTERNAL ERROR: Black produced different code on the second pass "
2802 f"of the formatter. "
2803 f"Please report a bug on https://github.com/ambv/black/issues. "
2804 f"This diff might be helpful: {log}"
2808 def dump_to_file(*output: str) -> str:
2809 """Dump `output` to a temporary file. Return path to the file."""
2812 with tempfile.NamedTemporaryFile(
2813 mode="w", prefix="blk_", suffix=".log", delete=False, encoding="utf8"
2815 for lines in output:
2817 if lines and lines[-1] != "\n":
2822 def diff(a: str, b: str, a_name: str, b_name: str) -> str:
2823 """Return a unified diff string between strings `a` and `b`."""
2826 a_lines = [line + "\n" for line in a.split("\n")]
2827 b_lines = [line + "\n" for line in b.split("\n")]
2829 difflib.unified_diff(a_lines, b_lines, fromfile=a_name, tofile=b_name, n=5)
2833 def cancel(tasks: Iterable[asyncio.Task]) -> None:
2834 """asyncio signal handler that cancels all `tasks` and reports to stderr."""
2840 def shutdown(loop: BaseEventLoop) -> None:
2841 """Cancel all pending tasks on `loop`, wait for them, and close the loop."""
2843 # This part is borrowed from asyncio/runners.py in Python 3.7b2.
2844 to_cancel = [task for task in asyncio.Task.all_tasks(loop) if not task.done()]
2848 for task in to_cancel:
2850 loop.run_until_complete(
2851 asyncio.gather(*to_cancel, loop=loop, return_exceptions=True)
2854 # `concurrent.futures.Future` objects cannot be cancelled once they
2855 # are already running. There might be some when the `shutdown()` happened.
2856 # Silence their logger's spew about the event loop being closed.
2857 cf_logger = logging.getLogger("concurrent.futures")
2858 cf_logger.setLevel(logging.CRITICAL)
2862 def sub_twice(regex: Pattern[str], replacement: str, original: str) -> str:
2863 """Replace `regex` with `replacement` twice on `original`.
2865 This is used by string normalization to perform replaces on
2866 overlapping matches.
2868 return regex.sub(replacement, regex.sub(replacement, original))
2871 def enumerate_reversed(sequence: Sequence[T]) -> Iterator[Tuple[Index, T]]:
2872 """Like `reversed(enumerate(sequence))` if that were possible."""
2873 index = len(sequence) - 1
2874 for element in reversed(sequence):
2875 yield (index, element)
2879 def is_line_short_enough(line: Line, *, line_length: int, line_str: str = "") -> bool:
2880 """Return True if `line` is no longer than `line_length`.
2882 Uses the provided `line_str` rendering, if any, otherwise computes a new one.
2885 line_str = str(line).strip("\n")
2887 len(line_str) <= line_length
2888 and "\n" not in line_str # multiline strings
2889 and not line.contains_standalone_comments()
2893 CACHE_DIR = Path(user_cache_dir("black", version=__version__))
2896 def get_cache_file(line_length: int) -> Path:
2897 return CACHE_DIR / f"cache.{line_length}.pickle"
2900 def read_cache(line_length: int) -> Cache:
2901 """Read the cache if it exists and is well formed.
2903 If it is not well formed, the call to write_cache later should resolve the issue.
2905 cache_file = get_cache_file(line_length)
2906 if not cache_file.exists():
2909 with cache_file.open("rb") as fobj:
2911 cache: Cache = pickle.load(fobj)
2912 except pickle.UnpicklingError:
2918 def get_cache_info(path: Path) -> CacheInfo:
2919 """Return the information used to check if a file is already formatted or not."""
2921 return stat.st_mtime, stat.st_size
2925 cache: Cache, sources: Iterable[Path]
2926 ) -> Tuple[List[Path], List[Path]]:
2927 """Split a list of paths into two.
2929 The first list contains paths of files that modified on disk or are not in the
2930 cache. The other list contains paths to non-modified files.
2935 if cache.get(src) != get_cache_info(src):
2942 def write_cache(cache: Cache, sources: List[Path], line_length: int) -> None:
2943 """Update the cache file."""
2944 cache_file = get_cache_file(line_length)
2946 if not CACHE_DIR.exists():
2947 CACHE_DIR.mkdir(parents=True)
2948 new_cache = {**cache, **{src.resolve(): get_cache_info(src) for src in sources}}
2949 with cache_file.open("wb") as fobj:
2950 pickle.dump(new_cache, fobj, protocol=pickle.HIGHEST_PROTOCOL)
2955 if __name__ == "__main__":