All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
4 from asyncio.base_events import BaseEventLoop
5 from concurrent.futures import Executor, ProcessPoolExecutor
7 from functools import partial, wraps
10 from multiprocessing import Manager
12 from pathlib import Path
33 from attr import dataclass, Factory
37 from blib2to3.pytree import Node, Leaf, type_repr
38 from blib2to3 import pygram, pytree
39 from blib2to3.pgen2 import driver, token
40 from blib2to3.pgen2.parse import ParseError
42 __version__ = "18.4a0"
43 DEFAULT_LINE_LENGTH = 88
45 syms = pygram.python_symbols
53 LN = Union[Leaf, Node]
54 SplitFunc = Callable[["Line", bool], Iterator["Line"]]
55 out = partial(click.secho, bold=True, err=True)
56 err = partial(click.secho, fg="red", err=True)
59 class NothingChanged(UserWarning):
60 """Raised by :func:`format_file` when reformatted code is the same as source."""
63 class CannotSplit(Exception):
64 """A readable split that fits the allotted line length is impossible.
66 Raised by :func:`left_hand_split`, :func:`right_hand_split`, and
67 :func:`delimiter_split`.
71 class FormatError(Exception):
72 """Base exception for `# fmt: on` and `# fmt: off` handling.
74 It holds the number of bytes of the prefix consumed before the format
75 control comment appeared.
78 def __init__(self, consumed: int) -> None:
79 super().__init__(consumed)
80 self.consumed = consumed
82 def trim_prefix(self, leaf: Leaf) -> None:
83 leaf.prefix = leaf.prefix[self.consumed:]
85 def leaf_from_consumed(self, leaf: Leaf) -> Leaf:
86 """Returns a new Leaf from the consumed part of the prefix."""
87 unformatted_prefix = leaf.prefix[:self.consumed]
88 return Leaf(token.NEWLINE, unformatted_prefix)
91 class FormatOn(FormatError):
92 """Found a comment like `# fmt: on` in the file."""
95 class FormatOff(FormatError):
96 """Found a comment like `# fmt: off` in the file."""
99 class WriteBack(Enum):
110 default=DEFAULT_LINE_LENGTH,
111 help="How many character per line to allow.",
118 "Don't write the files back, just return the status. Return code 0 "
119 "means nothing would change. Return code 1 means some files would be "
120 "reformatted. Return code 123 means there was an internal error."
126 help="Don't write the files back, just output a diff for each file on stdout.",
131 help="If --fast given, skip temporary sanity checks. [default: --safe]",
138 "Don't emit non-error messages to stderr. Errors are still emitted, "
139 "silence those with 2>/dev/null."
142 @click.version_option(version=__version__)
147 exists=True, file_okay=True, dir_okay=True, readable=True, allow_dash=True
160 """The uncompromising code formatter."""
161 sources: List[Path] = []
165 sources.extend(gen_python_files_in_dir(p))
167 # if a file was explicitly given, we don't care about its extension
170 sources.append(Path("-"))
172 err(f"invalid path: {s}")
174 exc = click.ClickException("Options --check and --diff are mutually exclusive")
179 write_back = WriteBack.NO
181 write_back = WriteBack.DIFF
183 write_back = WriteBack.YES
184 if len(sources) == 0:
186 elif len(sources) == 1:
188 report = Report(check=check, quiet=quiet)
190 if not p.is_file() and str(p) == "-":
191 changed = format_stdin_to_stdout(
192 line_length=line_length, fast=fast, write_back=write_back
195 changed = format_file_in_place(
196 p, line_length=line_length, fast=fast, write_back=write_back
198 report.done(p, changed)
199 except Exception as exc:
200 report.failed(p, str(exc))
201 ctx.exit(report.return_code)
203 loop = asyncio.get_event_loop()
204 executor = ProcessPoolExecutor(max_workers=os.cpu_count())
207 return_code = loop.run_until_complete(
209 sources, line_length, write_back, fast, quiet, loop, executor
214 ctx.exit(return_code)
217 async def schedule_formatting(
220 write_back: WriteBack,
226 """Run formatting of `sources` in parallel using the provided `executor`.
228 (Use ProcessPoolExecutors for actual parallelism.)
230 `line_length`, `write_back`, and `fast` options are passed to
231 :func:`format_file_in_place`.
234 if write_back == WriteBack.DIFF:
235 # For diff output, we need locks to ensure we don't interleave output
236 # from different processes.
238 lock = manager.Lock()
240 src: loop.run_in_executor(
241 executor, format_file_in_place, src, line_length, fast, write_back, lock
245 _task_values = list(tasks.values())
246 loop.add_signal_handler(signal.SIGINT, cancel, _task_values)
247 loop.add_signal_handler(signal.SIGTERM, cancel, _task_values)
248 await asyncio.wait(tasks.values())
250 report = Report(check=write_back is WriteBack.NO, quiet=quiet)
251 for src, task in tasks.items():
253 report.failed(src, "timed out, cancelling")
255 cancelled.append(task)
256 elif task.cancelled():
257 cancelled.append(task)
258 elif task.exception():
259 report.failed(src, str(task.exception()))
261 report.done(src, task.result())
263 await asyncio.gather(*cancelled, loop=loop, return_exceptions=True)
265 out("All done! ✨ 🍰 ✨")
267 click.echo(str(report))
268 return report.return_code
271 def format_file_in_place(
275 write_back: WriteBack = WriteBack.NO,
276 lock: Any = None, # multiprocessing.Manager().Lock() is some crazy proxy
278 """Format file under `src` path. Return True if changed.
280 If `write_back` is True, write reformatted code back to stdout.
281 `line_length` and `fast` options are passed to :func:`format_file_contents`.
283 with tokenize.open(src) as src_buffer:
284 src_contents = src_buffer.read()
286 dst_contents = format_file_contents(
287 src_contents, line_length=line_length, fast=fast
289 except NothingChanged:
292 if write_back == write_back.YES:
293 with open(src, "w", encoding=src_buffer.encoding) as f:
294 f.write(dst_contents)
295 elif write_back == write_back.DIFF:
296 src_name = f"{src.name} (original)"
297 dst_name = f"{src.name} (formatted)"
298 diff_contents = diff(src_contents, dst_contents, src_name, dst_name)
302 sys.stdout.write(diff_contents)
309 def format_stdin_to_stdout(
310 line_length: int, fast: bool, write_back: WriteBack = WriteBack.NO
312 """Format file on stdin. Return True if changed.
314 If `write_back` is True, write reformatted code back to stdout.
315 `line_length` and `fast` arguments are passed to :func:`format_file_contents`.
317 src = sys.stdin.read()
319 dst = format_file_contents(src, line_length=line_length, fast=fast)
322 except NothingChanged:
327 if write_back == WriteBack.YES:
328 sys.stdout.write(dst)
329 elif write_back == WriteBack.DIFF:
330 src_name = "<stdin> (original)"
331 dst_name = "<stdin> (formatted)"
332 sys.stdout.write(diff(src, dst, src_name, dst_name))
335 def format_file_contents(
336 src_contents: str, line_length: int, fast: bool
338 """Reformat contents a file and return new contents.
340 If `fast` is False, additionally confirm that the reformatted code is
341 valid by calling :func:`assert_equivalent` and :func:`assert_stable` on it.
342 `line_length` is passed to :func:`format_str`.
344 if src_contents.strip() == "":
347 dst_contents = format_str(src_contents, line_length=line_length)
348 if src_contents == dst_contents:
352 assert_equivalent(src_contents, dst_contents)
353 assert_stable(src_contents, dst_contents, line_length=line_length)
357 def format_str(src_contents: str, line_length: int) -> FileContent:
358 """Reformat a string and return new contents.
360 `line_length` determines how many characters per line are allowed.
362 src_node = lib2to3_parse(src_contents)
364 lines = LineGenerator()
365 elt = EmptyLineTracker()
366 py36 = is_python36(src_node)
369 for current_line in lines.visit(src_node):
370 for _ in range(after):
371 dst_contents += str(empty_line)
372 before, after = elt.maybe_empty_lines(current_line)
373 for _ in range(before):
374 dst_contents += str(empty_line)
375 for line in split_line(current_line, line_length=line_length, py36=py36):
376 dst_contents += str(line)
381 pygram.python_grammar_no_print_statement_no_exec_statement,
382 pygram.python_grammar_no_print_statement,
383 pygram.python_grammar_no_exec_statement,
384 pygram.python_grammar,
388 def lib2to3_parse(src_txt: str) -> Node:
389 """Given a string with source, return the lib2to3 Node."""
390 grammar = pygram.python_grammar_no_print_statement
391 if src_txt[-1] != "\n":
392 nl = "\r\n" if "\r\n" in src_txt[:1024] else "\n"
394 for grammar in GRAMMARS:
395 drv = driver.Driver(grammar, pytree.convert)
397 result = drv.parse_string(src_txt, True)
400 except ParseError as pe:
401 lineno, column = pe.context[1]
402 lines = src_txt.splitlines()
404 faulty_line = lines[lineno - 1]
406 faulty_line = "<line number missing in source>"
407 exc = ValueError(f"Cannot parse: {lineno}:{column}: {faulty_line}")
411 if isinstance(result, Leaf):
412 result = Node(syms.file_input, [result])
416 def lib2to3_unparse(node: Node) -> str:
417 """Given a lib2to3 node, return its string representation."""
425 class Visitor(Generic[T]):
426 """Basic lib2to3 visitor that yields things of type `T` on `visit()`."""
428 def visit(self, node: LN) -> Iterator[T]:
429 """Main method to visit `node` and its children.
431 It tries to find a `visit_*()` method for the given `node.type`, like
432 `visit_simple_stmt` for Node objects or `visit_INDENT` for Leaf objects.
433 If no dedicated `visit_*()` method is found, chooses `visit_default()`
436 Then yields objects of type `T` from the selected visitor.
439 name = token.tok_name[node.type]
441 name = type_repr(node.type)
442 yield from getattr(self, f"visit_{name}", self.visit_default)(node)
444 def visit_default(self, node: LN) -> Iterator[T]:
445 """Default `visit_*()` implementation. Recurses to children of `node`."""
446 if isinstance(node, Node):
447 for child in node.children:
448 yield from self.visit(child)
452 class DebugVisitor(Visitor[T]):
455 def visit_default(self, node: LN) -> Iterator[T]:
456 indent = " " * (2 * self.tree_depth)
457 if isinstance(node, Node):
458 _type = type_repr(node.type)
459 out(f"{indent}{_type}", fg="yellow")
461 for child in node.children:
462 yield from self.visit(child)
465 out(f"{indent}/{_type}", fg="yellow", bold=False)
467 _type = token.tok_name.get(node.type, str(node.type))
468 out(f"{indent}{_type}", fg="blue", nl=False)
470 # We don't have to handle prefixes for `Node` objects since
471 # that delegates to the first child anyway.
472 out(f" {node.prefix!r}", fg="green", bold=False, nl=False)
473 out(f" {node.value!r}", fg="blue", bold=False)
476 def show(cls, code: str) -> None:
477 """Pretty-print the lib2to3 AST of a given string of `code`.
479 Convenience method for debugging.
481 v: DebugVisitor[None] = DebugVisitor()
482 list(v.visit(lib2to3_parse(code)))
485 KEYWORDS = set(keyword.kwlist)
486 WHITESPACE = {token.DEDENT, token.INDENT, token.NEWLINE}
487 FLOW_CONTROL = {"return", "raise", "break", "continue"}
498 STANDALONE_COMMENT = 153
499 LOGIC_OPERATORS = {"and", "or"}
523 VARARGS = {token.STAR, token.DOUBLESTAR}
524 COMPREHENSION_PRIORITY = 20
528 COMPARATOR_PRIORITY = 3
533 class BracketTracker:
534 """Keeps track of brackets on a line."""
537 bracket_match: Dict[Tuple[Depth, NodeType], Leaf] = Factory(dict)
538 delimiters: Dict[LeafID, Priority] = Factory(dict)
539 previous: Optional[Leaf] = None
541 def mark(self, leaf: Leaf) -> None:
542 """Mark `leaf` with bracket-related metadata. Keep track of delimiters.
544 All leaves receive an int `bracket_depth` field that stores how deep
545 within brackets a given leaf is. 0 means there are no enclosing brackets
546 that started on this line.
548 If a leaf is itself a closing bracket, it receives an `opening_bracket`
549 field that it forms a pair with. This is a one-directional link to
550 avoid reference cycles.
552 If a leaf is a delimiter (a token on which Black can split the line if
553 needed) and it's on depth 0, its `id()` is stored in the tracker's
556 if leaf.type == token.COMMENT:
559 if leaf.type in CLOSING_BRACKETS:
561 opening_bracket = self.bracket_match.pop((self.depth, leaf.type))
562 leaf.opening_bracket = opening_bracket
563 leaf.bracket_depth = self.depth
565 delim = is_split_before_delimiter(leaf, self.previous)
566 if delim and self.previous is not None:
567 self.delimiters[id(self.previous)] = delim
569 delim = is_split_after_delimiter(leaf, self.previous)
571 self.delimiters[id(leaf)] = delim
572 if leaf.type in OPENING_BRACKETS:
573 self.bracket_match[self.depth, BRACKET[leaf.type]] = leaf
577 def any_open_brackets(self) -> bool:
578 """Return True if there is an yet unmatched open bracket on the line."""
579 return bool(self.bracket_match)
581 def max_delimiter_priority(self, exclude: Iterable[LeafID] = ()) -> int:
582 """Return the highest priority of a delimiter found on the line.
584 Values are consistent with what `is_delimiter()` returns.
586 return max(v for k, v in self.delimiters.items() if k not in exclude)
591 """Holds leaves and comments. Can be printed with `str(line)`."""
594 leaves: List[Leaf] = Factory(list)
595 comments: List[Tuple[Index, Leaf]] = Factory(list)
596 bracket_tracker: BracketTracker = Factory(BracketTracker)
597 inside_brackets: bool = False
598 has_for: bool = False
599 _for_loop_variable: bool = False
601 def append(self, leaf: Leaf, preformatted: bool = False) -> None:
602 """Add a new `leaf` to the end of the line.
604 Unless `preformatted` is True, the `leaf` will receive a new consistent
605 whitespace prefix and metadata applied by :class:`BracketTracker`.
606 Trailing commas are maybe removed, unpacked for loop variables are
607 demoted from being delimiters.
609 Inline comments are put aside.
611 has_value = leaf.value.strip()
615 if self.leaves and not preformatted:
616 # Note: at this point leaf.prefix should be empty except for
617 # imports, for which we only preserve newlines.
618 leaf.prefix += whitespace(leaf)
619 if self.inside_brackets or not preformatted:
620 self.maybe_decrement_after_for_loop_variable(leaf)
621 self.bracket_tracker.mark(leaf)
622 self.maybe_remove_trailing_comma(leaf)
623 self.maybe_increment_for_loop_variable(leaf)
625 if not self.append_comment(leaf):
626 self.leaves.append(leaf)
628 def append_safe(self, leaf: Leaf, preformatted: bool = False) -> None:
629 """Like :func:`append()` but disallow invalid standalone comment structure.
631 Raises ValueError when any `leaf` is appended after a standalone comment
632 or when a standalone comment is not the first leaf on the line.
634 if self.bracket_tracker.depth == 0:
636 raise ValueError("cannot append to standalone comments")
638 if self.leaves and leaf.type == STANDALONE_COMMENT:
640 "cannot append standalone comments to a populated line"
643 self.append(leaf, preformatted=preformatted)
646 def is_comment(self) -> bool:
647 """Is this line a standalone comment?"""
648 return len(self.leaves) == 1 and self.leaves[0].type == STANDALONE_COMMENT
651 def is_decorator(self) -> bool:
652 """Is this line a decorator?"""
653 return bool(self) and self.leaves[0].type == token.AT
656 def is_import(self) -> bool:
657 """Is this an import line?"""
658 return bool(self) and is_import(self.leaves[0])
661 def is_class(self) -> bool:
662 """Is this line a class definition?"""
665 and self.leaves[0].type == token.NAME
666 and self.leaves[0].value == "class"
670 def is_def(self) -> bool:
671 """Is this a function definition? (Also returns True for async defs.)"""
673 first_leaf = self.leaves[0]
678 second_leaf: Optional[Leaf] = self.leaves[1]
682 (first_leaf.type == token.NAME and first_leaf.value == "def")
684 first_leaf.type == token.ASYNC
685 and second_leaf is not None
686 and second_leaf.type == token.NAME
687 and second_leaf.value == "def"
692 def is_flow_control(self) -> bool:
693 """Is this line a flow control statement?
695 Those are `return`, `raise`, `break`, and `continue`.
699 and self.leaves[0].type == token.NAME
700 and self.leaves[0].value in FLOW_CONTROL
704 def is_yield(self) -> bool:
705 """Is this line a yield statement?"""
708 and self.leaves[0].type == token.NAME
709 and self.leaves[0].value == "yield"
713 def contains_standalone_comments(self) -> bool:
714 """If so, needs to be split before emitting."""
715 for leaf in self.leaves:
716 if leaf.type == STANDALONE_COMMENT:
721 def maybe_remove_trailing_comma(self, closing: Leaf) -> bool:
722 """Remove trailing comma if there is one and it's safe."""
725 and self.leaves[-1].type == token.COMMA
726 and closing.type in CLOSING_BRACKETS
730 if closing.type == token.RBRACE:
731 self.remove_trailing_comma()
734 if closing.type == token.RSQB:
735 comma = self.leaves[-1]
736 if comma.parent and comma.parent.type == syms.listmaker:
737 self.remove_trailing_comma()
740 # For parens let's check if it's safe to remove the comma. If the
741 # trailing one is the only one, we might mistakenly change a tuple
742 # into a different type by removing the comma.
743 depth = closing.bracket_depth + 1
745 opening = closing.opening_bracket
746 for _opening_index, leaf in enumerate(self.leaves):
753 for leaf in self.leaves[_opening_index + 1:]:
757 bracket_depth = leaf.bracket_depth
758 if bracket_depth == depth and leaf.type == token.COMMA:
760 if leaf.parent and leaf.parent.type == syms.arglist:
765 self.remove_trailing_comma()
770 def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool:
771 """In a for loop, or comprehension, the variables are often unpacks.
773 To avoid splitting on the comma in this situation, increase the depth of
774 tokens between `for` and `in`.
776 if leaf.type == token.NAME and leaf.value == "for":
778 self.bracket_tracker.depth += 1
779 self._for_loop_variable = True
784 def maybe_decrement_after_for_loop_variable(self, leaf: Leaf) -> bool:
785 """See `maybe_increment_for_loop_variable` above for explanation."""
786 if self._for_loop_variable and leaf.type == token.NAME and leaf.value == "in":
787 self.bracket_tracker.depth -= 1
788 self._for_loop_variable = False
793 def append_comment(self, comment: Leaf) -> bool:
794 """Add an inline or standalone comment to the line."""
796 comment.type == STANDALONE_COMMENT
797 and self.bracket_tracker.any_open_brackets()
802 if comment.type != token.COMMENT:
805 after = len(self.leaves) - 1
807 comment.type = STANDALONE_COMMENT
812 self.comments.append((after, comment))
815 def comments_after(self, leaf: Leaf) -> Iterator[Leaf]:
816 """Generate comments that should appear directly after `leaf`."""
817 for _leaf_index, _leaf in enumerate(self.leaves):
824 for index, comment_after in self.comments:
825 if _leaf_index == index:
828 def remove_trailing_comma(self) -> None:
829 """Remove the trailing comma and moves the comments attached to it."""
830 comma_index = len(self.leaves) - 1
831 for i in range(len(self.comments)):
832 comment_index, comment = self.comments[i]
833 if comment_index == comma_index:
834 self.comments[i] = (comma_index - 1, comment)
837 def __str__(self) -> str:
838 """Render the line."""
842 indent = " " * self.depth
843 leaves = iter(self.leaves)
845 res = f"{first.prefix}{indent}{first.value}"
848 for _, comment in self.comments:
852 def __bool__(self) -> bool:
853 """Return True if the line has leaves or comments."""
854 return bool(self.leaves or self.comments)
857 class UnformattedLines(Line):
858 """Just like :class:`Line` but stores lines which aren't reformatted."""
860 def append(self, leaf: Leaf, preformatted: bool = True) -> None:
861 """Just add a new `leaf` to the end of the lines.
863 The `preformatted` argument is ignored.
865 Keeps track of indentation `depth`, which is useful when the user
866 says `# fmt: on`. Otherwise, doesn't do anything with the `leaf`.
869 list(generate_comments(leaf))
870 except FormatOn as f_on:
871 self.leaves.append(f_on.leaf_from_consumed(leaf))
874 self.leaves.append(leaf)
875 if leaf.type == token.INDENT:
877 elif leaf.type == token.DEDENT:
880 def __str__(self) -> str:
881 """Render unformatted lines from leaves which were added with `append()`.
883 `depth` is not used for indentation in this case.
889 for leaf in self.leaves:
893 def append_comment(self, comment: Leaf) -> bool:
894 """Not implemented in this class. Raises `NotImplementedError`."""
895 raise NotImplementedError("Unformatted lines don't store comments separately.")
897 def maybe_remove_trailing_comma(self, closing: Leaf) -> bool:
898 """Does nothing and returns False."""
901 def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool:
902 """Does nothing and returns False."""
907 class EmptyLineTracker:
908 """Provides a stateful method that returns the number of potential extra
909 empty lines needed before and after the currently processed line.
911 Note: this tracker works on lines that haven't been split yet. It assumes
912 the prefix of the first leaf consists of optional newlines. Those newlines
913 are consumed by `maybe_empty_lines()` and included in the computation.
915 previous_line: Optional[Line] = None
916 previous_after: int = 0
917 previous_defs: List[int] = Factory(list)
919 def maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
920 """Return the number of extra empty lines before and after the `current_line`.
922 This is for separating `def`, `async def` and `class` with extra empty
923 lines (two on module-level), as well as providing an extra empty line
924 after flow control keywords to make them more prominent.
926 if isinstance(current_line, UnformattedLines):
929 before, after = self._maybe_empty_lines(current_line)
930 before -= self.previous_after
931 self.previous_after = after
932 self.previous_line = current_line
935 def _maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
937 if current_line.depth == 0:
939 if current_line.leaves:
940 # Consume the first leaf's extra newlines.
941 first_leaf = current_line.leaves[0]
942 before = first_leaf.prefix.count("\n")
943 before = min(before, max_allowed)
944 first_leaf.prefix = ""
947 depth = current_line.depth
948 while self.previous_defs and self.previous_defs[-1] >= depth:
949 self.previous_defs.pop()
950 before = 1 if depth else 2
951 is_decorator = current_line.is_decorator
952 if is_decorator or current_line.is_def or current_line.is_class:
954 self.previous_defs.append(depth)
955 if self.previous_line is None:
956 # Don't insert empty lines before the first line in the file.
959 if self.previous_line and self.previous_line.is_decorator:
960 # Don't insert empty lines between decorators.
964 if current_line.depth:
968 if current_line.is_flow_control:
973 and self.previous_line.is_import
974 and not current_line.is_import
975 and depth == self.previous_line.depth
977 return (before or 1), 0
981 and self.previous_line.is_yield
982 and (not current_line.is_yield or depth != self.previous_line.depth)
984 return (before or 1), 0
990 class LineGenerator(Visitor[Line]):
991 """Generates reformatted Line objects. Empty lines are not emitted.
993 Note: destroys the tree it's visiting by mutating prefixes of its leaves
994 in ways that will no longer stringify to valid Python code on the tree.
996 current_line: Line = Factory(Line)
998 def line(self, indent: int = 0, type: Type[Line] = Line) -> Iterator[Line]:
1001 If the line is empty, only emit if it makes sense.
1002 If the line is too long, split it first and then generate.
1004 If any lines were generated, set up a new current_line.
1006 if not self.current_line:
1007 if self.current_line.__class__ == type:
1008 self.current_line.depth += indent
1010 self.current_line = type(depth=self.current_line.depth + indent)
1011 return # Line is empty, don't emit. Creating a new one unnecessary.
1013 complete_line = self.current_line
1014 self.current_line = type(depth=complete_line.depth + indent)
1017 def visit(self, node: LN) -> Iterator[Line]:
1018 """Main method to visit `node` and its children.
1020 Yields :class:`Line` objects.
1022 if isinstance(self.current_line, UnformattedLines):
1023 # File contained `# fmt: off`
1024 yield from self.visit_unformatted(node)
1027 yield from super().visit(node)
1029 def visit_default(self, node: LN) -> Iterator[Line]:
1030 """Default `visit_*()` implementation. Recurses to children of `node`."""
1031 if isinstance(node, Leaf):
1032 any_open_brackets = self.current_line.bracket_tracker.any_open_brackets()
1034 for comment in generate_comments(node):
1035 if any_open_brackets:
1036 # any comment within brackets is subject to splitting
1037 self.current_line.append(comment)
1038 elif comment.type == token.COMMENT:
1039 # regular trailing comment
1040 self.current_line.append(comment)
1041 yield from self.line()
1044 # regular standalone comment
1045 yield from self.line()
1047 self.current_line.append(comment)
1048 yield from self.line()
1050 except FormatOff as f_off:
1051 f_off.trim_prefix(node)
1052 yield from self.line(type=UnformattedLines)
1053 yield from self.visit(node)
1055 except FormatOn as f_on:
1056 # This only happens here if somebody says "fmt: on" multiple
1058 f_on.trim_prefix(node)
1059 yield from self.visit_default(node)
1062 normalize_prefix(node, inside_brackets=any_open_brackets)
1063 if node.type == token.STRING:
1064 normalize_string_quotes(node)
1065 if node.type not in WHITESPACE:
1066 self.current_line.append(node)
1067 yield from super().visit_default(node)
1069 def visit_INDENT(self, node: Node) -> Iterator[Line]:
1070 """Increase indentation level, maybe yield a line."""
1071 # In blib2to3 INDENT never holds comments.
1072 yield from self.line(+1)
1073 yield from self.visit_default(node)
1075 def visit_DEDENT(self, node: Node) -> Iterator[Line]:
1076 """Decrease indentation level, maybe yield a line."""
1077 # DEDENT has no value. Additionally, in blib2to3 it never holds comments.
1078 yield from self.line(-1)
1080 def visit_stmt(self, node: Node, keywords: Set[str]) -> Iterator[Line]:
1081 """Visit a statement.
1083 This implementation is shared for `if`, `while`, `for`, `try`, `except`,
1084 `def`, `with`, and `class`.
1086 The relevant Python language `keywords` for a given statement will be NAME
1087 leaves within it. This methods puts those on a separate line.
1089 for child in node.children:
1090 if child.type == token.NAME and child.value in keywords: # type: ignore
1091 yield from self.line()
1093 yield from self.visit(child)
1095 def visit_simple_stmt(self, node: Node) -> Iterator[Line]:
1096 """Visit a statement without nested statements."""
1097 is_suite_like = node.parent and node.parent.type in STATEMENT
1099 yield from self.line(+1)
1100 yield from self.visit_default(node)
1101 yield from self.line(-1)
1104 yield from self.line()
1105 yield from self.visit_default(node)
1107 def visit_async_stmt(self, node: Node) -> Iterator[Line]:
1108 """Visit `async def`, `async for`, `async with`."""
1109 yield from self.line()
1111 children = iter(node.children)
1112 for child in children:
1113 yield from self.visit(child)
1115 if child.type == token.ASYNC:
1118 internal_stmt = next(children)
1119 for child in internal_stmt.children:
1120 yield from self.visit(child)
1122 def visit_decorators(self, node: Node) -> Iterator[Line]:
1123 """Visit decorators."""
1124 for child in node.children:
1125 yield from self.line()
1126 yield from self.visit(child)
1128 def visit_SEMI(self, leaf: Leaf) -> Iterator[Line]:
1129 """Remove a semicolon and put the other statement on a separate line."""
1130 yield from self.line()
1132 def visit_ENDMARKER(self, leaf: Leaf) -> Iterator[Line]:
1133 """End of file. Process outstanding comments and end with a newline."""
1134 yield from self.visit_default(leaf)
1135 yield from self.line()
1137 def visit_unformatted(self, node: LN) -> Iterator[Line]:
1138 """Used when file contained a `# fmt: off`."""
1139 if isinstance(node, Node):
1140 for child in node.children:
1141 yield from self.visit(child)
1145 self.current_line.append(node)
1146 except FormatOn as f_on:
1147 f_on.trim_prefix(node)
1148 yield from self.line()
1149 yield from self.visit(node)
1151 if node.type == token.ENDMARKER:
1152 # somebody decided not to put a final `# fmt: on`
1153 yield from self.line()
1155 def __attrs_post_init__(self) -> None:
1156 """You are in a twisty little maze of passages."""
1158 self.visit_if_stmt = partial(v, keywords={"if", "else", "elif"})
1159 self.visit_while_stmt = partial(v, keywords={"while", "else"})
1160 self.visit_for_stmt = partial(v, keywords={"for", "else"})
1161 self.visit_try_stmt = partial(v, keywords={"try", "except", "else", "finally"})
1162 self.visit_except_clause = partial(v, keywords={"except"})
1163 self.visit_funcdef = partial(v, keywords={"def"})
1164 self.visit_with_stmt = partial(v, keywords={"with"})
1165 self.visit_classdef = partial(v, keywords={"class"})
1166 self.visit_async_funcdef = self.visit_async_stmt
1167 self.visit_decorated = self.visit_decorators
1170 BRACKET = {token.LPAR: token.RPAR, token.LSQB: token.RSQB, token.LBRACE: token.RBRACE}
1171 OPENING_BRACKETS = set(BRACKET.keys())
1172 CLOSING_BRACKETS = set(BRACKET.values())
1173 BRACKETS = OPENING_BRACKETS | CLOSING_BRACKETS
1174 ALWAYS_NO_SPACE = CLOSING_BRACKETS | {token.COMMA, STANDALONE_COMMENT}
1177 def whitespace(leaf: Leaf) -> str: # noqa C901
1178 """Return whitespace prefix if needed for the given `leaf`."""
1185 if t in ALWAYS_NO_SPACE:
1188 if t == token.COMMENT:
1191 assert p is not None, f"INTERNAL ERROR: hand-made leaf without parent: {leaf!r}"
1192 if t == token.COLON and p.type not in {syms.subscript, syms.subscriptlist}:
1195 prev = leaf.prev_sibling
1197 prevp = preceding_leaf(p)
1198 if not prevp or prevp.type in OPENING_BRACKETS:
1201 if t == token.COLON:
1202 return SPACE if prevp.type == token.COMMA else NO
1204 if prevp.type == token.EQUAL:
1206 if prevp.parent.type in {
1207 syms.arglist, syms.argument, syms.parameters, syms.varargslist
1211 elif prevp.parent.type == syms.typedargslist:
1212 # A bit hacky: if the equal sign has whitespace, it means we
1213 # previously found it's a typed argument. So, we're using
1217 elif prevp.type == token.DOUBLESTAR:
1218 if prevp.parent and prevp.parent.type in {
1228 elif prevp.type == token.COLON:
1229 if prevp.parent and prevp.parent.type in {syms.subscript, syms.sliceop}:
1234 and prevp.parent.type in {syms.factor, syms.star_expr}
1235 and prevp.type in MATH_OPERATORS
1240 prevp.type == token.RIGHTSHIFT
1242 and prevp.parent.type == syms.shift_expr
1243 and prevp.prev_sibling
1244 and prevp.prev_sibling.type == token.NAME
1245 and prevp.prev_sibling.value == "print" # type: ignore
1247 # Python 2 print chevron
1250 elif prev.type in OPENING_BRACKETS:
1253 if p.type in {syms.parameters, syms.arglist}:
1254 # untyped function signatures or calls
1258 if not prev or prev.type != token.COMMA:
1261 elif p.type == syms.varargslist:
1266 if prev and prev.type != token.COMMA:
1269 elif p.type == syms.typedargslist:
1270 # typed function signatures
1274 if t == token.EQUAL:
1275 if prev.type != syms.tname:
1278 elif prev.type == token.EQUAL:
1279 # A bit hacky: if the equal sign has whitespace, it means we
1280 # previously found it's a typed argument. So, we're using that, too.
1283 elif prev.type != token.COMMA:
1286 elif p.type == syms.tname:
1289 prevp = preceding_leaf(p)
1290 if not prevp or prevp.type != token.COMMA:
1293 elif p.type == syms.trailer:
1294 # attributes and calls
1295 if t == token.LPAR or t == token.RPAR:
1300 prevp = preceding_leaf(p)
1301 if not prevp or prevp.type != token.NUMBER:
1304 elif t == token.LSQB:
1307 elif prev.type != token.COMMA:
1310 elif p.type == syms.argument:
1312 if t == token.EQUAL:
1316 prevp = preceding_leaf(p)
1317 if not prevp or prevp.type == token.LPAR:
1320 elif prev.type == token.EQUAL or prev.type == token.DOUBLESTAR:
1323 elif p.type == syms.decorator:
1327 elif p.type == syms.dotted_name:
1331 prevp = preceding_leaf(p)
1332 if not prevp or prevp.type == token.AT or prevp.type == token.DOT:
1335 elif p.type == syms.classdef:
1339 if prev and prev.type == token.LPAR:
1342 elif p.type == syms.subscript:
1345 assert p.parent is not None, "subscripts are always parented"
1346 if p.parent.type == syms.subscriptlist:
1354 elif p.type == syms.atom:
1355 if prev and t == token.DOT:
1356 # dots, but not the first one.
1360 p.type == syms.listmaker
1361 or p.type == syms.testlist_gexp
1362 or p.type == syms.subscriptlist
1364 # list interior, including unpacking
1368 elif p.type == syms.dictsetmaker:
1369 # dict and set interior, including unpacking
1373 if prev.type == token.DOUBLESTAR:
1376 elif p.type in {syms.factor, syms.star_expr}:
1379 prevp = preceding_leaf(p)
1380 if not prevp or prevp.type in OPENING_BRACKETS:
1383 prevp_parent = prevp.parent
1384 assert prevp_parent is not None
1385 if prevp.type == token.COLON and prevp_parent.type in {
1386 syms.subscript, syms.sliceop
1390 elif prevp.type == token.EQUAL and prevp_parent.type == syms.argument:
1393 elif t == token.NAME or t == token.NUMBER:
1396 elif p.type == syms.import_from:
1398 if prev and prev.type == token.DOT:
1401 elif t == token.NAME:
1405 if prev and prev.type == token.DOT:
1408 elif p.type == syms.sliceop:
1414 def preceding_leaf(node: Optional[LN]) -> Optional[Leaf]:
1415 """Return the first leaf that precedes `node`, if any."""
1417 res = node.prev_sibling
1419 if isinstance(res, Leaf):
1423 return list(res.leaves())[-1]
1432 def is_split_after_delimiter(leaf: Leaf, previous: Leaf = None) -> int:
1433 """Return the priority of the `leaf` delimiter, given a line break after it.
1435 The delimiter priorities returned here are from those delimiters that would
1436 cause a line break after themselves.
1438 Higher numbers are higher priority.
1440 if leaf.type == token.COMMA:
1441 return COMMA_PRIORITY
1446 def is_split_before_delimiter(leaf: Leaf, previous: Leaf = None) -> int:
1447 """Return the priority of the `leaf` delimiter, given a line before after it.
1449 The delimiter priorities returned here are from those delimiters that would
1450 cause a line break before themselves.
1452 Higher numbers are higher priority.
1455 leaf.type in VARARGS
1457 and leaf.parent.type in {syms.argument, syms.typedargslist}
1459 # * and ** might also be MATH_OPERATORS but in this case they are not.
1460 # Don't treat them as a delimiter.
1464 leaf.type in MATH_OPERATORS
1466 and leaf.parent.type not in {syms.factor, syms.star_expr}
1468 return MATH_PRIORITY
1470 if leaf.type in COMPARATORS:
1471 return COMPARATOR_PRIORITY
1474 leaf.type == token.STRING
1475 and previous is not None
1476 and previous.type == token.STRING
1478 return STRING_PRIORITY
1481 leaf.type == token.NAME
1482 and leaf.value == "for"
1484 and leaf.parent.type in {syms.comp_for, syms.old_comp_for}
1486 return COMPREHENSION_PRIORITY
1489 leaf.type == token.NAME
1490 and leaf.value == "if"
1492 and leaf.parent.type in {syms.comp_if, syms.old_comp_if}
1494 return COMPREHENSION_PRIORITY
1496 if leaf.type == token.NAME and leaf.value in LOGIC_OPERATORS and leaf.parent:
1497 return LOGIC_PRIORITY
1502 def is_delimiter(leaf: Leaf, previous: Leaf = None) -> int:
1503 """Return the priority of the `leaf` delimiter. Return 0 if not delimiter.
1505 Higher numbers are higher priority.
1508 is_split_before_delimiter(leaf, previous),
1509 is_split_after_delimiter(leaf, previous),
1513 def generate_comments(leaf: Leaf) -> Iterator[Leaf]:
1514 """Clean the prefix of the `leaf` and generate comments from it, if any.
1516 Comments in lib2to3 are shoved into the whitespace prefix. This happens
1517 in `pgen2/driver.py:Driver.parse_tokens()`. This was a brilliant implementation
1518 move because it does away with modifying the grammar to include all the
1519 possible places in which comments can be placed.
1521 The sad consequence for us though is that comments don't "belong" anywhere.
1522 This is why this function generates simple parentless Leaf objects for
1523 comments. We simply don't know what the correct parent should be.
1525 No matter though, we can live without this. We really only need to
1526 differentiate between inline and standalone comments. The latter don't
1527 share the line with any code.
1529 Inline comments are emitted as regular token.COMMENT leaves. Standalone
1530 are emitted with a fake STANDALONE_COMMENT token identifier.
1541 for index, line in enumerate(p.split("\n")):
1542 consumed += len(line) + 1 # adding the length of the split '\n'
1543 line = line.lstrip()
1546 if not line.startswith("#"):
1549 if index == 0 and leaf.type != token.ENDMARKER:
1550 comment_type = token.COMMENT # simple trailing comment
1552 comment_type = STANDALONE_COMMENT
1553 comment = make_comment(line)
1554 yield Leaf(comment_type, comment, prefix="\n" * nlines)
1556 if comment in {"# fmt: on", "# yapf: enable"}:
1557 raise FormatOn(consumed)
1559 if comment in {"# fmt: off", "# yapf: disable"}:
1560 if comment_type == STANDALONE_COMMENT:
1561 raise FormatOff(consumed)
1563 prev = preceding_leaf(leaf)
1564 if not prev or prev.type in WHITESPACE: # standalone comment in disguise
1565 raise FormatOff(consumed)
1570 def make_comment(content: str) -> str:
1571 """Return a consistently formatted comment from the given `content` string.
1573 All comments (except for "##", "#!", "#:") should have a single space between
1574 the hash sign and the content.
1576 If `content` didn't start with a hash sign, one is provided.
1578 content = content.rstrip()
1582 if content[0] == "#":
1583 content = content[1:]
1584 if content and content[0] not in " !:#":
1585 content = " " + content
1586 return "#" + content
1590 line: Line, line_length: int, inner: bool = False, py36: bool = False
1591 ) -> Iterator[Line]:
1592 """Split a `line` into potentially many lines.
1594 They should fit in the allotted `line_length` but might not be able to.
1595 `inner` signifies that there were a pair of brackets somewhere around the
1596 current `line`, possibly transitively. This means we can fallback to splitting
1597 by delimiters if the LHS/RHS don't yield any results.
1599 If `py36` is True, splitting may generate syntax that is only compatible
1600 with Python 3.6 and later.
1602 if isinstance(line, UnformattedLines) or line.is_comment:
1606 line_str = str(line).strip("\n")
1608 len(line_str) <= line_length
1609 and "\n" not in line_str # multiline strings
1610 and not line.contains_standalone_comments
1615 split_funcs: List[SplitFunc]
1617 split_funcs = [left_hand_split]
1618 elif line.inside_brackets:
1619 split_funcs = [delimiter_split, standalone_comment_split, right_hand_split]
1621 split_funcs = [right_hand_split]
1622 for split_func in split_funcs:
1623 # We are accumulating lines in `result` because we might want to abort
1624 # mission and return the original line in the end, or attempt a different
1626 result: List[Line] = []
1628 for l in split_func(line, py36):
1629 if str(l).strip("\n") == line_str:
1630 raise CannotSplit("Split function returned an unchanged result")
1633 split_line(l, line_length=line_length, inner=True, py36=py36)
1635 except CannotSplit as cs:
1646 def left_hand_split(line: Line, py36: bool = False) -> Iterator[Line]:
1647 """Split line into many lines, starting with the first matching bracket pair.
1649 Note: this usually looks weird, only use this for function definitions.
1650 Prefer RHS otherwise.
1652 head = Line(depth=line.depth)
1653 body = Line(depth=line.depth + 1, inside_brackets=True)
1654 tail = Line(depth=line.depth)
1655 tail_leaves: List[Leaf] = []
1656 body_leaves: List[Leaf] = []
1657 head_leaves: List[Leaf] = []
1658 current_leaves = head_leaves
1659 matching_bracket = None
1660 for leaf in line.leaves:
1662 current_leaves is body_leaves
1663 and leaf.type in CLOSING_BRACKETS
1664 and leaf.opening_bracket is matching_bracket
1666 current_leaves = tail_leaves if body_leaves else head_leaves
1667 current_leaves.append(leaf)
1668 if current_leaves is head_leaves:
1669 if leaf.type in OPENING_BRACKETS:
1670 matching_bracket = leaf
1671 current_leaves = body_leaves
1672 # Since body is a new indent level, remove spurious leading whitespace.
1674 normalize_prefix(body_leaves[0], inside_brackets=True)
1675 # Build the new lines.
1676 for result, leaves in (
1677 (head, head_leaves), (body, body_leaves), (tail, tail_leaves)
1680 result.append(leaf, preformatted=True)
1681 for comment_after in line.comments_after(leaf):
1682 result.append(comment_after, preformatted=True)
1683 bracket_split_succeeded_or_raise(head, body, tail)
1684 for result in (head, body, tail):
1689 def right_hand_split(line: Line, py36: bool = False) -> Iterator[Line]:
1690 """Split line into many lines, starting with the last matching bracket pair."""
1691 head = Line(depth=line.depth)
1692 body = Line(depth=line.depth + 1, inside_brackets=True)
1693 tail = Line(depth=line.depth)
1694 tail_leaves: List[Leaf] = []
1695 body_leaves: List[Leaf] = []
1696 head_leaves: List[Leaf] = []
1697 current_leaves = tail_leaves
1698 opening_bracket = None
1699 for leaf in reversed(line.leaves):
1700 if current_leaves is body_leaves:
1701 if leaf is opening_bracket:
1702 current_leaves = head_leaves if body_leaves else tail_leaves
1703 current_leaves.append(leaf)
1704 if current_leaves is tail_leaves:
1705 if leaf.type in CLOSING_BRACKETS:
1706 opening_bracket = leaf.opening_bracket
1707 current_leaves = body_leaves
1708 tail_leaves.reverse()
1709 body_leaves.reverse()
1710 head_leaves.reverse()
1711 # Since body is a new indent level, remove spurious leading whitespace.
1713 normalize_prefix(body_leaves[0], inside_brackets=True)
1714 # Build the new lines.
1715 for result, leaves in (
1716 (head, head_leaves), (body, body_leaves), (tail, tail_leaves)
1719 result.append(leaf, preformatted=True)
1720 for comment_after in line.comments_after(leaf):
1721 result.append(comment_after, preformatted=True)
1722 bracket_split_succeeded_or_raise(head, body, tail)
1723 for result in (head, body, tail):
1728 def bracket_split_succeeded_or_raise(head: Line, body: Line, tail: Line) -> None:
1729 """Raise :exc:`CannotSplit` if the last left- or right-hand split failed.
1731 Do nothing otherwise.
1733 A left- or right-hand split is based on a pair of brackets. Content before
1734 (and including) the opening bracket is left on one line, content inside the
1735 brackets is put on a separate line, and finally content starting with and
1736 following the closing bracket is put on a separate line.
1738 Those are called `head`, `body`, and `tail`, respectively. If the split
1739 produced the same line (all content in `head`) or ended up with an empty `body`
1740 and the `tail` is just the closing bracket, then it's considered failed.
1742 tail_len = len(str(tail).strip())
1745 raise CannotSplit("Splitting brackets produced the same line")
1749 f"Splitting brackets on an empty body to save "
1750 f"{tail_len} characters is not worth it"
1754 def dont_increase_indentation(split_func: SplitFunc) -> SplitFunc:
1755 """Normalize prefix of the first leaf in every line returned by `split_func`.
1757 This is a decorator over relevant split functions.
1761 def split_wrapper(line: Line, py36: bool = False) -> Iterator[Line]:
1762 for l in split_func(line, py36):
1763 normalize_prefix(l.leaves[0], inside_brackets=True)
1766 return split_wrapper
1769 @dont_increase_indentation
1770 def delimiter_split(line: Line, py36: bool = False) -> Iterator[Line]:
1771 """Split according to delimiters of the highest priority.
1773 If `py36` is True, the split will add trailing commas also in function
1774 signatures that contain `*` and `**`.
1777 last_leaf = line.leaves[-1]
1779 raise CannotSplit("Line empty")
1781 delimiters = line.bracket_tracker.delimiters
1783 delimiter_priority = line.bracket_tracker.max_delimiter_priority(
1784 exclude={id(last_leaf)}
1787 raise CannotSplit("No delimiters found")
1789 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
1790 lowest_depth = sys.maxsize
1791 trailing_comma_safe = True
1793 def append_to_line(leaf: Leaf) -> Iterator[Line]:
1794 """Append `leaf` to current line or to new line if appending impossible."""
1795 nonlocal current_line
1797 current_line.append_safe(leaf, preformatted=True)
1798 except ValueError as ve:
1801 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
1802 current_line.append(leaf)
1804 for leaf in line.leaves:
1805 yield from append_to_line(leaf)
1807 for comment_after in line.comments_after(leaf):
1808 yield from append_to_line(comment_after)
1810 lowest_depth = min(lowest_depth, leaf.bracket_depth)
1812 leaf.bracket_depth == lowest_depth
1813 and leaf.type == token.STAR
1814 or leaf.type == token.DOUBLESTAR
1816 trailing_comma_safe = trailing_comma_safe and py36
1817 leaf_priority = delimiters.get(id(leaf))
1818 if leaf_priority == delimiter_priority:
1821 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
1825 and delimiter_priority == COMMA_PRIORITY
1826 and current_line.leaves[-1].type != token.COMMA
1827 and current_line.leaves[-1].type != STANDALONE_COMMENT
1829 current_line.append(Leaf(token.COMMA, ","))
1833 @dont_increase_indentation
1834 def standalone_comment_split(line: Line, py36: bool = False) -> Iterator[Line]:
1835 """Split standalone comments from the rest of the line."""
1836 for leaf in line.leaves:
1837 if leaf.type == STANDALONE_COMMENT:
1838 if leaf.bracket_depth == 0:
1842 raise CannotSplit("Line does not have any standalone comments")
1844 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
1846 def append_to_line(leaf: Leaf) -> Iterator[Line]:
1847 """Append `leaf` to current line or to new line if appending impossible."""
1848 nonlocal current_line
1850 current_line.append_safe(leaf, preformatted=True)
1851 except ValueError as ve:
1854 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
1855 current_line.append(leaf)
1857 for leaf in line.leaves:
1858 yield from append_to_line(leaf)
1860 for comment_after in line.comments_after(leaf):
1861 yield from append_to_line(comment_after)
1867 def is_import(leaf: Leaf) -> bool:
1868 """Return True if the given leaf starts an import statement."""
1875 (v == "import" and p and p.type == syms.import_name)
1876 or (v == "from" and p and p.type == syms.import_from)
1881 def normalize_prefix(leaf: Leaf, *, inside_brackets: bool) -> None:
1882 """Leave existing extra newlines if not `inside_brackets`. Remove everything
1885 Note: don't use backslashes for formatting or you'll lose your voting rights.
1887 if not inside_brackets:
1888 spl = leaf.prefix.split("#")
1889 if "\\" not in spl[0]:
1890 nl_count = spl[-1].count("\n")
1893 leaf.prefix = "\n" * nl_count
1899 def normalize_string_quotes(leaf: Leaf) -> None:
1900 """Prefer double quotes but only if it doesn't cause more escaping.
1902 Adds or removes backslashes as appropriate. Doesn't parse and fix
1903 strings nested in f-strings (yet).
1905 Note: Mutates its argument.
1907 value = leaf.value.lstrip("furbFURB")
1908 if value[:3] == '"""':
1911 elif value[:3] == "'''":
1914 elif value[0] == '"':
1920 first_quote_pos = leaf.value.find(orig_quote)
1921 if first_quote_pos == -1:
1922 return # There's an internal error
1924 prefix = leaf.value[:first_quote_pos]
1925 body = leaf.value[first_quote_pos + len(orig_quote):-len(orig_quote)]
1926 unescaped_new_quote = re.compile(r"(([^\\]|^)(\\\\)*)" + new_quote)
1927 escaped_orig_quote = re.compile(r"\\(\\\\)*" + orig_quote)
1928 if "r" in prefix.casefold():
1929 if unescaped_new_quote.search(body):
1930 # There's at least one unescaped new_quote in this raw string
1931 # so converting is impossible
1934 # Do not introduce or remove backslashes in raw strings
1937 new_body = escaped_orig_quote.sub(f"\\1{orig_quote}", body)
1938 new_body = unescaped_new_quote.sub(f"\\1\\\\{new_quote}", new_body)
1939 if new_quote == '"""' and new_body[-1] == '"':
1941 new_body = new_body[:-1] + '\\"'
1942 orig_escape_count = body.count("\\")
1943 new_escape_count = new_body.count("\\")
1944 if new_escape_count > orig_escape_count:
1945 return # Do not introduce more escaping
1947 if new_escape_count == orig_escape_count and orig_quote == '"':
1948 return # Prefer double quotes
1950 leaf.value = f"{prefix}{new_quote}{new_body}{new_quote}"
1953 def is_python36(node: Node) -> bool:
1954 """Return True if the current file is using Python 3.6+ features.
1956 Currently looking for:
1958 - trailing commas after * or ** in function signatures.
1960 for n in node.pre_order():
1961 if n.type == token.STRING:
1962 value_head = n.value[:2] # type: ignore
1963 if value_head in {'f"', 'F"', "f'", "F'", "rf", "fr", "RF", "FR"}:
1967 n.type == syms.typedargslist
1969 and n.children[-1].type == token.COMMA
1971 for ch in n.children:
1972 if ch.type == token.STAR or ch.type == token.DOUBLESTAR:
1978 PYTHON_EXTENSIONS = {".py"}
1979 BLACKLISTED_DIRECTORIES = {
1980 "build", "buck-out", "dist", "_build", ".git", ".hg", ".mypy_cache", ".tox", ".venv"
1984 def gen_python_files_in_dir(path: Path) -> Iterator[Path]:
1985 """Generate all files under `path` which aren't under BLACKLISTED_DIRECTORIES
1986 and have one of the PYTHON_EXTENSIONS.
1988 for child in path.iterdir():
1990 if child.name in BLACKLISTED_DIRECTORIES:
1993 yield from gen_python_files_in_dir(child)
1995 elif child.suffix in PYTHON_EXTENSIONS:
2001 """Provides a reformatting counter. Can be rendered with `str(report)`."""
2004 change_count: int = 0
2006 failure_count: int = 0
2008 def done(self, src: Path, changed: bool) -> None:
2009 """Increment the counter for successful reformatting. Write out a message."""
2011 reformatted = "would reformat" if self.check else "reformatted"
2013 out(f"{reformatted} {src}")
2014 self.change_count += 1
2017 out(f"{src} already well formatted, good job.", bold=False)
2018 self.same_count += 1
2020 def failed(self, src: Path, message: str) -> None:
2021 """Increment the counter for failed reformatting. Write out a message."""
2022 err(f"error: cannot format {src}: {message}")
2023 self.failure_count += 1
2026 def return_code(self) -> int:
2027 """Return the exit code that the app should use.
2029 This considers the current state of changed files and failures:
2030 - if there were any failures, return 123;
2031 - if any files were changed and --check is being used, return 1;
2032 - otherwise return 0.
2034 # According to http://tldp.org/LDP/abs/html/exitcodes.html starting with
2035 # 126 we have special returncodes reserved by the shell.
2036 if self.failure_count:
2039 elif self.change_count and self.check:
2044 def __str__(self) -> str:
2045 """Render a color report of the current state.
2047 Use `click.unstyle` to remove colors.
2050 reformatted = "would be reformatted"
2051 unchanged = "would be left unchanged"
2052 failed = "would fail to reformat"
2054 reformatted = "reformatted"
2055 unchanged = "left unchanged"
2056 failed = "failed to reformat"
2058 if self.change_count:
2059 s = "s" if self.change_count > 1 else ""
2061 click.style(f"{self.change_count} file{s} {reformatted}", bold=True)
2064 s = "s" if self.same_count > 1 else ""
2065 report.append(f"{self.same_count} file{s} {unchanged}")
2066 if self.failure_count:
2067 s = "s" if self.failure_count > 1 else ""
2069 click.style(f"{self.failure_count} file{s} {failed}", fg="red")
2071 return ", ".join(report) + "."
2074 def assert_equivalent(src: str, dst: str) -> None:
2075 """Raise AssertionError if `src` and `dst` aren't equivalent."""
2080 def _v(node: ast.AST, depth: int = 0) -> Iterator[str]:
2081 """Simple visitor generating strings to compare ASTs by content."""
2082 yield f"{' ' * depth}{node.__class__.__name__}("
2084 for field in sorted(node._fields):
2086 value = getattr(node, field)
2087 except AttributeError:
2090 yield f"{' ' * (depth+1)}{field}="
2092 if isinstance(value, list):
2094 if isinstance(item, ast.AST):
2095 yield from _v(item, depth + 2)
2097 elif isinstance(value, ast.AST):
2098 yield from _v(value, depth + 2)
2101 yield f"{' ' * (depth+2)}{value!r}, # {value.__class__.__name__}"
2103 yield f"{' ' * depth}) # /{node.__class__.__name__}"
2106 src_ast = ast.parse(src)
2107 except Exception as exc:
2108 major, minor = sys.version_info[:2]
2109 raise AssertionError(
2110 f"cannot use --safe with this file; failed to parse source file "
2111 f"with Python {major}.{minor}'s builtin AST. Re-run with --fast "
2112 f"or stop using deprecated Python 2 syntax. AST error message: {exc}"
2116 dst_ast = ast.parse(dst)
2117 except Exception as exc:
2118 log = dump_to_file("".join(traceback.format_tb(exc.__traceback__)), dst)
2119 raise AssertionError(
2120 f"INTERNAL ERROR: Black produced invalid code: {exc}. "
2121 f"Please report a bug on https://github.com/ambv/black/issues. "
2122 f"This invalid output might be helpful: {log}"
2125 src_ast_str = "\n".join(_v(src_ast))
2126 dst_ast_str = "\n".join(_v(dst_ast))
2127 if src_ast_str != dst_ast_str:
2128 log = dump_to_file(diff(src_ast_str, dst_ast_str, "src", "dst"))
2129 raise AssertionError(
2130 f"INTERNAL ERROR: Black produced code that is not equivalent to "
2132 f"Please report a bug on https://github.com/ambv/black/issues. "
2133 f"This diff might be helpful: {log}"
2137 def assert_stable(src: str, dst: str, line_length: int) -> None:
2138 """Raise AssertionError if `dst` reformats differently the second time."""
2139 newdst = format_str(dst, line_length=line_length)
2142 diff(src, dst, "source", "first pass"),
2143 diff(dst, newdst, "first pass", "second pass"),
2145 raise AssertionError(
2146 f"INTERNAL ERROR: Black produced different code on the second pass "
2147 f"of the formatter. "
2148 f"Please report a bug on https://github.com/ambv/black/issues. "
2149 f"This diff might be helpful: {log}"
2153 def dump_to_file(*output: str) -> str:
2154 """Dump `output` to a temporary file. Return path to the file."""
2157 with tempfile.NamedTemporaryFile(
2158 mode="w", prefix="blk_", suffix=".log", delete=False
2160 for lines in output:
2162 if lines and lines[-1] != "\n":
2167 def diff(a: str, b: str, a_name: str, b_name: str) -> str:
2168 """Return a unified diff string between strings `a` and `b`."""
2171 a_lines = [line + "\n" for line in a.split("\n")]
2172 b_lines = [line + "\n" for line in b.split("\n")]
2174 difflib.unified_diff(a_lines, b_lines, fromfile=a_name, tofile=b_name, n=5)
2178 def cancel(tasks: List[asyncio.Task]) -> None:
2179 """asyncio signal handler that cancels all `tasks` and reports to stderr."""
2185 def shutdown(loop: BaseEventLoop) -> None:
2186 """Cancel all pending tasks on `loop`, wait for them, and close the loop."""
2188 # This part is borrowed from asyncio/runners.py in Python 3.7b2.
2189 to_cancel = [task for task in asyncio.Task.all_tasks(loop) if not task.done()]
2193 for task in to_cancel:
2195 loop.run_until_complete(
2196 asyncio.gather(*to_cancel, loop=loop, return_exceptions=True)
2199 # `concurrent.futures.Future` objects cannot be cancelled once they
2200 # are already running. There might be some when the `shutdown()` happened.
2201 # Silence their logger's spew about the event loop being closed.
2202 cf_logger = logging.getLogger("concurrent.futures")
2203 cf_logger.setLevel(logging.CRITICAL)
2207 if __name__ == "__main__":