All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
3 from asyncio.base_events import BaseEventLoop
4 from concurrent.futures import Executor, ProcessPoolExecutor
5 from functools import partial
8 from pathlib import Path
12 Dict, Generic, Iterable, Iterator, List, Optional, Set, Tuple, TypeVar, Union
15 from attr import dataclass, Factory
19 from blib2to3.pytree import Node, Leaf, type_repr
20 from blib2to3 import pygram, pytree
21 from blib2to3.pgen2 import driver, token
22 from blib2to3.pgen2.parse import ParseError
24 __version__ = "18.3a2"
25 DEFAULT_LINE_LENGTH = 88
27 syms = pygram.python_symbols
34 LN = Union[Leaf, Node]
35 out = partial(click.secho, bold=True, err=True)
36 err = partial(click.secho, fg='red', err=True)
39 class NothingChanged(UserWarning):
40 """Raised by `format_file` when the reformatted code is the same as source."""
43 class CannotSplit(Exception):
44 """A readable split that fits the allotted line length is impossible.
46 Raised by `left_hand_split()` and `right_hand_split()`.
55 default=DEFAULT_LINE_LENGTH,
56 help='How many character per line to allow.',
63 "Don't write back the files, just return the status. Return code 0 "
64 "means nothing changed. Return code 1 means some files were "
65 "reformatted. Return code 123 means there was an internal error."
71 help='If --fast given, skip temporary sanity checks. [default: --safe]',
73 @click.version_option(version=__version__)
77 type=click.Path(exists=True, file_okay=True, dir_okay=True, readable=True),
81 ctx: click.Context, line_length: int, check: bool, fast: bool, src: List[str]
83 """The uncompromising code formatter."""
84 sources: List[Path] = []
88 sources.extend(gen_python_files_in_dir(p))
90 # if a file was explicitly given, we don't care about its extension
93 err(f'invalid path: {s}')
96 elif len(sources) == 1:
100 changed = format_file_in_place(
101 p, line_length=line_length, fast=fast, write_back=not check
103 report.done(p, changed)
104 except Exception as exc:
105 report.failed(p, str(exc))
106 ctx.exit(report.return_code)
108 loop = asyncio.get_event_loop()
109 executor = ProcessPoolExecutor(max_workers=os.cpu_count())
112 return_code = loop.run_until_complete(
114 sources, line_length, not check, fast, loop, executor
119 ctx.exit(return_code)
122 async def schedule_formatting(
131 src: loop.run_in_executor(
132 executor, format_file_in_place, src, line_length, fast, write_back
136 await asyncio.wait(tasks.values())
139 for src, task in tasks.items():
141 report.failed(src, 'timed out, cancelling')
143 cancelled.append(task)
144 elif task.exception():
145 report.failed(src, str(task.exception()))
147 report.done(src, task.result())
149 await asyncio.wait(cancelled, timeout=2)
150 out('All done! ✨ 🍰 ✨')
151 click.echo(str(report))
152 return report.return_code
155 def format_file_in_place(
156 src: Path, line_length: int, fast: bool, write_back: bool = False
158 """Format the file and rewrite if changed. Return True if changed."""
160 contents, encoding = format_file(src, line_length=line_length, fast=fast)
161 except NothingChanged:
165 with open(src, "w", encoding=encoding) as f:
171 src: Path, line_length: int, fast: bool
172 ) -> Tuple[FileContent, Encoding]:
173 """Reformats a file and returns its contents and encoding."""
174 with tokenize.open(src) as src_buffer:
175 src_contents = src_buffer.read()
176 if src_contents.strip() == '':
177 raise NothingChanged(src)
179 dst_contents = format_str(src_contents, line_length=line_length)
180 if src_contents == dst_contents:
181 raise NothingChanged(src)
184 assert_equivalent(src_contents, dst_contents)
185 assert_stable(src_contents, dst_contents, line_length=line_length)
186 return dst_contents, src_buffer.encoding
189 def format_str(src_contents: str, line_length: int) -> FileContent:
190 """Reformats a string and returns new contents."""
191 src_node = lib2to3_parse(src_contents)
193 comments: List[Line] = []
194 lines = LineGenerator()
195 elt = EmptyLineTracker()
196 py36 = is_python36(src_node)
199 for current_line in lines.visit(src_node):
200 for _ in range(after):
201 dst_contents += str(empty_line)
202 before, after = elt.maybe_empty_lines(current_line)
203 for _ in range(before):
204 dst_contents += str(empty_line)
205 if not current_line.is_comment:
206 for comment in comments:
207 dst_contents += str(comment)
209 for line in split_line(current_line, line_length=line_length, py36=py36):
210 dst_contents += str(line)
212 comments.append(current_line)
214 if elt.previous_defs:
215 # Separate postscriptum comments from the last module-level def.
216 dst_contents += str(empty_line)
217 dst_contents += str(empty_line)
218 for comment in comments:
219 dst_contents += str(comment)
223 def lib2to3_parse(src_txt: str) -> Node:
224 """Given a string with source, return the lib2to3 Node."""
225 grammar = pygram.python_grammar_no_print_statement
226 drv = driver.Driver(grammar, pytree.convert)
227 if src_txt[-1] != '\n':
228 nl = '\r\n' if '\r\n' in src_txt[:1024] else '\n'
231 result = drv.parse_string(src_txt, True)
232 except ParseError as pe:
233 lineno, column = pe.context[1]
234 lines = src_txt.splitlines()
236 faulty_line = lines[lineno - 1]
238 faulty_line = "<line number missing in source>"
239 raise ValueError(f"Cannot parse: {lineno}:{column}: {faulty_line}") from None
241 if isinstance(result, Leaf):
242 result = Node(syms.file_input, [result])
246 def lib2to3_unparse(node: Node) -> str:
247 """Given a lib2to3 node, return its string representation."""
255 class Visitor(Generic[T]):
256 """Basic lib2to3 visitor that yields things on visiting."""
258 def visit(self, node: LN) -> Iterator[T]:
260 name = token.tok_name[node.type]
262 name = type_repr(node.type)
263 yield from getattr(self, f'visit_{name}', self.visit_default)(node)
265 def visit_default(self, node: LN) -> Iterator[T]:
266 if isinstance(node, Node):
267 for child in node.children:
268 yield from self.visit(child)
272 class DebugVisitor(Visitor[T]):
275 def visit_default(self, node: LN) -> Iterator[T]:
276 indent = ' ' * (2 * self.tree_depth)
277 if isinstance(node, Node):
278 _type = type_repr(node.type)
279 out(f'{indent}{_type}', fg='yellow')
281 for child in node.children:
282 yield from self.visit(child)
285 out(f'{indent}/{_type}', fg='yellow', bold=False)
287 _type = token.tok_name.get(node.type, str(node.type))
288 out(f'{indent}{_type}', fg='blue', nl=False)
290 # We don't have to handle prefixes for `Node` objects since
291 # that delegates to the first child anyway.
292 out(f' {node.prefix!r}', fg='green', bold=False, nl=False)
293 out(f' {node.value!r}', fg='blue', bold=False)
296 KEYWORDS = set(keyword.kwlist)
297 WHITESPACE = {token.DEDENT, token.INDENT, token.NEWLINE}
298 FLOW_CONTROL = {'return', 'raise', 'break', 'continue'}
309 STANDALONE_COMMENT = 153
310 LOGIC_OPERATORS = {'and', 'or'}
333 COMPREHENSION_PRIORITY = 20
337 COMPARATOR_PRIORITY = 3
342 class BracketTracker:
344 bracket_match: Dict[Tuple[Depth, NodeType], Leaf] = Factory(dict)
345 delimiters: Dict[LeafID, Priority] = Factory(dict)
346 previous: Optional[Leaf] = None
348 def mark(self, leaf: Leaf) -> None:
349 if leaf.type == token.COMMENT:
352 if leaf.type in CLOSING_BRACKETS:
354 opening_bracket = self.bracket_match.pop((self.depth, leaf.type))
355 leaf.opening_bracket = opening_bracket
356 leaf.bracket_depth = self.depth
358 delim = is_delimiter(leaf)
360 self.delimiters[id(leaf)] = delim
361 elif self.previous is not None:
362 if leaf.type == token.STRING and self.previous.type == token.STRING:
363 self.delimiters[id(self.previous)] = STRING_PRIORITY
365 leaf.type == token.NAME
366 and leaf.value == 'for'
368 and leaf.parent.type in {syms.comp_for, syms.old_comp_for}
370 self.delimiters[id(self.previous)] = COMPREHENSION_PRIORITY
372 leaf.type == token.NAME
373 and leaf.value == 'if'
375 and leaf.parent.type in {syms.comp_if, syms.old_comp_if}
377 self.delimiters[id(self.previous)] = COMPREHENSION_PRIORITY
379 leaf.type == token.NAME
380 and leaf.value in LOGIC_OPERATORS
383 self.delimiters[id(self.previous)] = LOGIC_PRIORITY
384 if leaf.type in OPENING_BRACKETS:
385 self.bracket_match[self.depth, BRACKET[leaf.type]] = leaf
389 def any_open_brackets(self) -> bool:
390 """Returns True if there is an yet unmatched open bracket on the line."""
391 return bool(self.bracket_match)
393 def max_priority(self, exclude: Iterable[LeafID] =()) -> int:
394 """Returns the highest priority of a delimiter found on the line.
396 Values are consistent with what `is_delimiter()` returns.
398 return max(v for k, v in self.delimiters.items() if k not in exclude)
404 leaves: List[Leaf] = Factory(list)
405 comments: Dict[LeafID, Leaf] = Factory(dict)
406 bracket_tracker: BracketTracker = Factory(BracketTracker)
407 inside_brackets: bool = False
408 has_for: bool = False
409 _for_loop_variable: bool = False
411 def append(self, leaf: Leaf, preformatted: bool = False) -> None:
412 has_value = leaf.value.strip()
416 if self.leaves and not preformatted:
417 # Note: at this point leaf.prefix should be empty except for
418 # imports, for which we only preserve newlines.
419 leaf.prefix += whitespace(leaf)
420 if self.inside_brackets or not preformatted:
421 self.maybe_decrement_after_for_loop_variable(leaf)
422 self.bracket_tracker.mark(leaf)
423 self.maybe_remove_trailing_comma(leaf)
424 self.maybe_increment_for_loop_variable(leaf)
425 if self.maybe_adapt_standalone_comment(leaf):
428 if not self.append_comment(leaf):
429 self.leaves.append(leaf)
432 def is_comment(self) -> bool:
433 return bool(self) and self.leaves[0].type == STANDALONE_COMMENT
436 def is_decorator(self) -> bool:
437 return bool(self) and self.leaves[0].type == token.AT
440 def is_import(self) -> bool:
441 return bool(self) and is_import(self.leaves[0])
444 def is_class(self) -> bool:
447 and self.leaves[0].type == token.NAME
448 and self.leaves[0].value == 'class'
452 def is_def(self) -> bool:
453 """Also returns True for async defs."""
455 first_leaf = self.leaves[0]
460 second_leaf: Optional[Leaf] = self.leaves[1]
464 (first_leaf.type == token.NAME and first_leaf.value == 'def')
466 first_leaf.type == token.NAME
467 and first_leaf.value == 'async'
468 and second_leaf is not None
469 and second_leaf.type == token.NAME
470 and second_leaf.value == 'def'
475 def is_flow_control(self) -> bool:
478 and self.leaves[0].type == token.NAME
479 and self.leaves[0].value in FLOW_CONTROL
483 def is_yield(self) -> bool:
486 and self.leaves[0].type == token.NAME
487 and self.leaves[0].value == 'yield'
490 def maybe_remove_trailing_comma(self, closing: Leaf) -> bool:
493 and self.leaves[-1].type == token.COMMA
494 and closing.type in CLOSING_BRACKETS
498 if closing.type == token.RSQB or closing.type == token.RBRACE:
502 # For parens let's check if it's safe to remove the comma. If the
503 # trailing one is the only one, we might mistakenly change a tuple
504 # into a different type by removing the comma.
505 depth = closing.bracket_depth + 1
507 opening = closing.opening_bracket
508 for _opening_index, leaf in enumerate(self.leaves):
515 for leaf in self.leaves[_opening_index + 1:]:
519 bracket_depth = leaf.bracket_depth
520 if bracket_depth == depth and leaf.type == token.COMMA:
522 if leaf.parent and leaf.parent.type == syms.arglist:
532 def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool:
533 """In a for loop, or comprehension, the variables are often unpacks.
535 To avoid splitting on the comma in this situation, we will increase
536 the depth of tokens between `for` and `in`.
538 if leaf.type == token.NAME and leaf.value == 'for':
540 self.bracket_tracker.depth += 1
541 self._for_loop_variable = True
546 def maybe_decrement_after_for_loop_variable(self, leaf: Leaf) -> bool:
547 # See `maybe_increment_for_loop_variable` above for explanation.
548 if self._for_loop_variable and leaf.type == token.NAME and leaf.value == 'in':
549 self.bracket_tracker.depth -= 1
550 self._for_loop_variable = False
555 def maybe_adapt_standalone_comment(self, comment: Leaf) -> bool:
556 """Hack a standalone comment to act as a trailing comment for line splitting.
558 If this line has brackets and a standalone `comment`, we need to adapt
559 it to be able to still reformat the line.
561 This is not perfect, the line to which the standalone comment gets
562 appended will appear "too long" when splitting.
565 comment.type == STANDALONE_COMMENT
566 and self.bracket_tracker.any_open_brackets()
570 comment.type = token.COMMENT
571 comment.prefix = '\n' + ' ' * (self.depth + 1)
572 return self.append_comment(comment)
574 def append_comment(self, comment: Leaf) -> bool:
575 if comment.type != token.COMMENT:
579 after = id(self.last_non_delimiter())
581 comment.type = STANDALONE_COMMENT
586 if after in self.comments:
587 self.comments[after].value += str(comment)
589 self.comments[after] = comment
592 def last_non_delimiter(self) -> Leaf:
593 for i in range(len(self.leaves)):
594 last = self.leaves[-i - 1]
595 if not is_delimiter(last):
598 raise LookupError("No non-delimiters found")
600 def __str__(self) -> str:
604 indent = ' ' * self.depth
605 leaves = iter(self.leaves)
607 res = f'{first.prefix}{indent}{first.value}'
610 for comment in self.comments.values():
614 def __bool__(self) -> bool:
615 return bool(self.leaves or self.comments)
619 class EmptyLineTracker:
620 """Provides a stateful method that returns the number of potential extra
621 empty lines needed before and after the currently processed line.
623 Note: this tracker works on lines that haven't been split yet. It assumes
624 the prefix of the first leaf consists of optional newlines. Those newlines
625 are consumed by `maybe_empty_lines()` and included in the computation.
627 previous_line: Optional[Line] = None
628 previous_after: int = 0
629 previous_defs: List[int] = Factory(list)
631 def maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
632 """Returns the number of extra empty lines before and after the `current_line`.
634 This is for separating `def`, `async def` and `class` with extra empty lines
635 (two on module-level), as well as providing an extra empty line after flow
636 control keywords to make them more prominent.
638 if current_line.is_comment:
639 # Don't count standalone comments towards previous empty lines.
642 before, after = self._maybe_empty_lines(current_line)
643 before -= self.previous_after
644 self.previous_after = after
645 self.previous_line = current_line
648 def _maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
649 if current_line.leaves:
650 # Consume the first leaf's extra newlines.
651 first_leaf = current_line.leaves[0]
652 before = int('\n' in first_leaf.prefix)
653 first_leaf.prefix = ''
656 depth = current_line.depth
657 while self.previous_defs and self.previous_defs[-1] >= depth:
658 self.previous_defs.pop()
659 before = 1 if depth else 2
660 is_decorator = current_line.is_decorator
661 if is_decorator or current_line.is_def or current_line.is_class:
663 self.previous_defs.append(depth)
664 if self.previous_line is None:
665 # Don't insert empty lines before the first line in the file.
668 if self.previous_line and self.previous_line.is_decorator:
669 # Don't insert empty lines between decorators.
673 if current_line.depth:
677 if current_line.is_flow_control:
682 and self.previous_line.is_import
683 and not current_line.is_import
684 and depth == self.previous_line.depth
686 return (before or 1), 0
690 and self.previous_line.is_yield
691 and (not current_line.is_yield or depth != self.previous_line.depth)
693 return (before or 1), 0
699 class LineGenerator(Visitor[Line]):
700 """Generates reformatted Line objects. Empty lines are not emitted.
702 Note: destroys the tree it's visiting by mutating prefixes of its leaves
703 in ways that will no longer stringify to valid Python code on the tree.
705 current_line: Line = Factory(Line)
706 standalone_comments: List[Leaf] = Factory(list)
708 def line(self, indent: int = 0) -> Iterator[Line]:
711 If the line is empty, only emit if it makes sense.
712 If the line is too long, split it first and then generate.
714 If any lines were generated, set up a new current_line.
716 if not self.current_line:
717 self.current_line.depth += indent
718 return # Line is empty, don't emit. Creating a new one unnecessary.
720 complete_line = self.current_line
721 self.current_line = Line(depth=complete_line.depth + indent)
724 def visit_default(self, node: LN) -> Iterator[Line]:
725 if isinstance(node, Leaf):
726 for comment in generate_comments(node):
727 if self.current_line.bracket_tracker.any_open_brackets():
728 # any comment within brackets is subject to splitting
729 self.current_line.append(comment)
730 elif comment.type == token.COMMENT:
731 # regular trailing comment
732 self.current_line.append(comment)
733 yield from self.line()
736 # regular standalone comment, to be processed later (see
737 # docstring in `generate_comments()`
738 self.standalone_comments.append(comment)
739 normalize_prefix(node)
740 if node.type not in WHITESPACE:
741 for comment in self.standalone_comments:
742 yield from self.line()
744 self.current_line.append(comment)
745 yield from self.line()
747 self.standalone_comments = []
748 self.current_line.append(node)
749 yield from super().visit_default(node)
751 def visit_suite(self, node: Node) -> Iterator[Line]:
752 """Body of a statement after a colon."""
753 children = iter(node.children)
754 # Process newline before indenting. It might contain an inline
755 # comment that should go right after the colon.
756 newline = next(children)
757 yield from self.visit(newline)
758 yield from self.line(+1)
760 for child in children:
761 yield from self.visit(child)
763 yield from self.line(-1)
765 def visit_stmt(self, node: Node, keywords: Set[str]) -> Iterator[Line]:
766 """Visit a statement.
768 The relevant Python language keywords for this statement are NAME leaves
771 for child in node.children:
772 if child.type == token.NAME and child.value in keywords: # type: ignore
773 yield from self.line()
775 yield from self.visit(child)
777 def visit_simple_stmt(self, node: Node) -> Iterator[Line]:
778 """A statement without nested statements."""
779 is_suite_like = node.parent and node.parent.type in STATEMENT
781 yield from self.line(+1)
782 yield from self.visit_default(node)
783 yield from self.line(-1)
786 yield from self.line()
787 yield from self.visit_default(node)
789 def visit_async_stmt(self, node: Node) -> Iterator[Line]:
790 yield from self.line()
792 children = iter(node.children)
793 for child in children:
794 yield from self.visit(child)
796 if child.type == token.NAME and child.value == 'async': # type: ignore
799 internal_stmt = next(children)
800 for child in internal_stmt.children:
801 yield from self.visit(child)
803 def visit_decorators(self, node: Node) -> Iterator[Line]:
804 for child in node.children:
805 yield from self.line()
806 yield from self.visit(child)
808 def visit_SEMI(self, leaf: Leaf) -> Iterator[Line]:
809 yield from self.line()
811 def visit_ENDMARKER(self, leaf: Leaf) -> Iterator[Line]:
812 yield from self.visit_default(leaf)
813 yield from self.line()
815 def __attrs_post_init__(self) -> None:
816 """You are in a twisty little maze of passages."""
818 self.visit_if_stmt = partial(v, keywords={'if', 'else', 'elif'})
819 self.visit_while_stmt = partial(v, keywords={'while', 'else'})
820 self.visit_for_stmt = partial(v, keywords={'for', 'else'})
821 self.visit_try_stmt = partial(v, keywords={'try', 'except', 'else', 'finally'})
822 self.visit_except_clause = partial(v, keywords={'except'})
823 self.visit_funcdef = partial(v, keywords={'def'})
824 self.visit_with_stmt = partial(v, keywords={'with'})
825 self.visit_classdef = partial(v, keywords={'class'})
826 self.visit_async_funcdef = self.visit_async_stmt
827 self.visit_decorated = self.visit_decorators
830 BRACKET = {token.LPAR: token.RPAR, token.LSQB: token.RSQB, token.LBRACE: token.RBRACE}
831 OPENING_BRACKETS = set(BRACKET.keys())
832 CLOSING_BRACKETS = set(BRACKET.values())
833 BRACKETS = OPENING_BRACKETS | CLOSING_BRACKETS
834 ALWAYS_NO_SPACE = CLOSING_BRACKETS | {token.COMMA, STANDALONE_COMMENT}
837 def whitespace(leaf: Leaf) -> str: # noqa C901
838 """Return whitespace prefix if needed for the given `leaf`."""
845 if t in ALWAYS_NO_SPACE:
848 if t == token.COMMENT:
851 assert p is not None, f"INTERNAL ERROR: hand-made leaf without parent: {leaf!r}"
852 if t == token.COLON and p.type != syms.subscript:
855 prev = leaf.prev_sibling
857 prevp = preceding_leaf(p)
858 if not prevp or prevp.type in OPENING_BRACKETS:
862 return SPACE if prevp.type == token.COMMA else NO
864 if prevp.type == token.EQUAL:
865 if prevp.parent and prevp.parent.type in {
874 elif prevp.type == token.DOUBLESTAR:
875 if prevp.parent and prevp.parent.type in {
884 elif prevp.type == token.COLON:
885 if prevp.parent and prevp.parent.type == syms.subscript:
888 elif prevp.parent and prevp.parent.type in {syms.factor, syms.star_expr}:
891 elif prev.type in OPENING_BRACKETS:
894 if p.type in {syms.parameters, syms.arglist}:
895 # untyped function signatures or calls
899 if not prev or prev.type != token.COMMA:
902 if p.type == syms.varargslist:
907 if prev and prev.type != token.COMMA:
910 elif p.type == syms.typedargslist:
911 # typed function signatures
916 if prev.type != syms.tname:
919 elif prev.type == token.EQUAL:
920 # A bit hacky: if the equal sign has whitespace, it means we
921 # previously found it's a typed argument. So, we're using that, too.
924 elif prev.type != token.COMMA:
927 elif p.type == syms.tname:
930 prevp = preceding_leaf(p)
931 if not prevp or prevp.type != token.COMMA:
934 elif p.type == syms.trailer:
935 # attributes and calls
936 if t == token.LPAR or t == token.RPAR:
941 prevp = preceding_leaf(p)
942 if not prevp or prevp.type != token.NUMBER:
945 elif t == token.LSQB:
948 elif prev.type != token.COMMA:
951 elif p.type == syms.argument:
957 prevp = preceding_leaf(p)
958 if not prevp or prevp.type == token.LPAR:
961 elif prev.type == token.EQUAL or prev.type == token.DOUBLESTAR:
964 elif p.type == syms.decorator:
968 elif p.type == syms.dotted_name:
972 prevp = preceding_leaf(p)
973 if not prevp or prevp.type == token.AT or prevp.type == token.DOT:
976 elif p.type == syms.classdef:
980 if prev and prev.type == token.LPAR:
983 elif p.type == syms.subscript:
986 assert p.parent is not None, "subscripts are always parented"
987 if p.parent.type == syms.subscriptlist:
995 elif p.type == syms.atom:
996 if prev and t == token.DOT:
997 # dots, but not the first one.
1001 p.type == syms.listmaker
1002 or p.type == syms.testlist_gexp
1003 or p.type == syms.subscriptlist
1005 # list interior, including unpacking
1009 elif p.type == syms.dictsetmaker:
1010 # dict and set interior, including unpacking
1014 if prev.type == token.DOUBLESTAR:
1017 elif p.type in {syms.factor, syms.star_expr}:
1020 prevp = preceding_leaf(p)
1021 if not prevp or prevp.type in OPENING_BRACKETS:
1024 prevp_parent = prevp.parent
1025 assert prevp_parent is not None
1026 if prevp.type == token.COLON and prevp_parent.type in {
1027 syms.subscript, syms.sliceop
1031 elif prevp.type == token.EQUAL and prevp_parent.type == syms.argument:
1034 elif t == token.NAME or t == token.NUMBER:
1037 elif p.type == syms.import_from:
1039 if prev and prev.type == token.DOT:
1042 elif t == token.NAME:
1046 if prev and prev.type == token.DOT:
1049 elif p.type == syms.sliceop:
1055 def preceding_leaf(node: Optional[LN]) -> Optional[Leaf]:
1056 """Returns the first leaf that precedes `node`, if any."""
1058 res = node.prev_sibling
1060 if isinstance(res, Leaf):
1064 return list(res.leaves())[-1]
1073 def is_delimiter(leaf: Leaf) -> int:
1074 """Returns the priority of the `leaf` delimiter. Returns 0 if not delimiter.
1076 Higher numbers are higher priority.
1078 if leaf.type == token.COMMA:
1079 return COMMA_PRIORITY
1081 if leaf.type in COMPARATORS:
1082 return COMPARATOR_PRIORITY
1085 leaf.type in MATH_OPERATORS
1087 and leaf.parent.type not in {syms.factor, syms.star_expr}
1089 return MATH_PRIORITY
1094 def generate_comments(leaf: Leaf) -> Iterator[Leaf]:
1095 """Cleans the prefix of the `leaf` and generates comments from it, if any.
1097 Comments in lib2to3 are shoved into the whitespace prefix. This happens
1098 in `pgen2/driver.py:Driver.parse_tokens()`. This was a brilliant implementation
1099 move because it does away with modifying the grammar to include all the
1100 possible places in which comments can be placed.
1102 The sad consequence for us though is that comments don't "belong" anywhere.
1103 This is why this function generates simple parentless Leaf objects for
1104 comments. We simply don't know what the correct parent should be.
1106 No matter though, we can live without this. We really only need to
1107 differentiate between inline and standalone comments. The latter don't
1108 share the line with any code.
1110 Inline comments are emitted as regular token.COMMENT leaves. Standalone
1111 are emitted with a fake STANDALONE_COMMENT token identifier.
1116 if '#' not in leaf.prefix:
1119 before_comment, content = leaf.prefix.split('#', 1)
1120 content = content.rstrip()
1121 if content and (content[0] not in {' ', '!', '#'}):
1122 content = ' ' + content
1123 is_standalone_comment = (
1124 '\n' in before_comment or '\n' in content or leaf.type == token.ENDMARKER
1126 if not is_standalone_comment:
1127 # simple trailing comment
1128 yield Leaf(token.COMMENT, value='#' + content)
1131 for line in ('#' + content).split('\n'):
1132 line = line.lstrip()
1133 if not line.startswith('#'):
1136 yield Leaf(STANDALONE_COMMENT, line)
1140 line: Line, line_length: int, inner: bool = False, py36: bool = False
1141 ) -> Iterator[Line]:
1142 """Splits a `line` into potentially many lines.
1144 They should fit in the allotted `line_length` but might not be able to.
1145 `inner` signifies that there were a pair of brackets somewhere around the
1146 current `line`, possibly transitively. This means we can fallback to splitting
1147 by delimiters if the LHS/RHS don't yield any results.
1149 If `py36` is True, splitting may generate syntax that is only compatible
1150 with Python 3.6 and later.
1152 line_str = str(line).strip('\n')
1153 if len(line_str) <= line_length and '\n' not in line_str:
1158 split_funcs = [left_hand_split]
1159 elif line.inside_brackets:
1160 split_funcs = [delimiter_split]
1161 if '\n' not in line_str:
1162 # Only attempt RHS if we don't have multiline strings or comments
1164 split_funcs.append(right_hand_split)
1166 split_funcs = [right_hand_split]
1167 for split_func in split_funcs:
1168 # We are accumulating lines in `result` because we might want to abort
1169 # mission and return the original line in the end, or attempt a different
1171 result: List[Line] = []
1173 for l in split_func(line, py36=py36):
1174 if str(l).strip('\n') == line_str:
1175 raise CannotSplit("Split function returned an unchanged result")
1178 split_line(l, line_length=line_length, inner=True, py36=py36)
1180 except CannotSplit as cs:
1191 def left_hand_split(line: Line, py36: bool = False) -> Iterator[Line]:
1192 """Split line into many lines, starting with the first matching bracket pair.
1194 Note: this usually looks weird, only use this for function definitions.
1195 Prefer RHS otherwise.
1197 head = Line(depth=line.depth)
1198 body = Line(depth=line.depth + 1, inside_brackets=True)
1199 tail = Line(depth=line.depth)
1200 tail_leaves: List[Leaf] = []
1201 body_leaves: List[Leaf] = []
1202 head_leaves: List[Leaf] = []
1203 current_leaves = head_leaves
1204 matching_bracket = None
1205 for leaf in line.leaves:
1207 current_leaves is body_leaves
1208 and leaf.type in CLOSING_BRACKETS
1209 and leaf.opening_bracket is matching_bracket
1211 current_leaves = tail_leaves if body_leaves else head_leaves
1212 current_leaves.append(leaf)
1213 if current_leaves is head_leaves:
1214 if leaf.type in OPENING_BRACKETS:
1215 matching_bracket = leaf
1216 current_leaves = body_leaves
1217 # Since body is a new indent level, remove spurious leading whitespace.
1219 normalize_prefix(body_leaves[0])
1220 # Build the new lines.
1221 for result, leaves in (
1222 (head, head_leaves), (body, body_leaves), (tail, tail_leaves)
1225 result.append(leaf, preformatted=True)
1226 comment_after = line.comments.get(id(leaf))
1228 result.append(comment_after, preformatted=True)
1229 split_succeeded_or_raise(head, body, tail)
1230 for result in (head, body, tail):
1235 def right_hand_split(line: Line, py36: bool = False) -> Iterator[Line]:
1236 """Split line into many lines, starting with the last matching bracket pair."""
1237 head = Line(depth=line.depth)
1238 body = Line(depth=line.depth + 1, inside_brackets=True)
1239 tail = Line(depth=line.depth)
1240 tail_leaves: List[Leaf] = []
1241 body_leaves: List[Leaf] = []
1242 head_leaves: List[Leaf] = []
1243 current_leaves = tail_leaves
1244 opening_bracket = None
1245 for leaf in reversed(line.leaves):
1246 if current_leaves is body_leaves:
1247 if leaf is opening_bracket:
1248 current_leaves = head_leaves if body_leaves else tail_leaves
1249 current_leaves.append(leaf)
1250 if current_leaves is tail_leaves:
1251 if leaf.type in CLOSING_BRACKETS:
1252 opening_bracket = leaf.opening_bracket
1253 current_leaves = body_leaves
1254 tail_leaves.reverse()
1255 body_leaves.reverse()
1256 head_leaves.reverse()
1257 # Since body is a new indent level, remove spurious leading whitespace.
1259 normalize_prefix(body_leaves[0])
1260 # Build the new lines.
1261 for result, leaves in (
1262 (head, head_leaves), (body, body_leaves), (tail, tail_leaves)
1265 result.append(leaf, preformatted=True)
1266 comment_after = line.comments.get(id(leaf))
1268 result.append(comment_after, preformatted=True)
1269 split_succeeded_or_raise(head, body, tail)
1270 for result in (head, body, tail):
1275 def split_succeeded_or_raise(head: Line, body: Line, tail: Line) -> None:
1276 tail_len = len(str(tail).strip())
1279 raise CannotSplit("Splitting brackets produced the same line")
1283 f"Splitting brackets on an empty body to save "
1284 f"{tail_len} characters is not worth it"
1288 def delimiter_split(line: Line, py36: bool = False) -> Iterator[Line]:
1289 """Split according to delimiters of the highest priority.
1291 This kind of split doesn't increase indentation.
1292 If `py36` is True, the split will add trailing commas also in function
1293 signatures that contain * and **.
1296 last_leaf = line.leaves[-1]
1298 raise CannotSplit("Line empty")
1300 delimiters = line.bracket_tracker.delimiters
1302 delimiter_priority = line.bracket_tracker.max_priority(exclude={id(last_leaf)})
1304 raise CannotSplit("No delimiters found")
1306 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
1307 lowest_depth = sys.maxsize
1308 trailing_comma_safe = True
1309 for leaf in line.leaves:
1310 current_line.append(leaf, preformatted=True)
1311 comment_after = line.comments.get(id(leaf))
1313 current_line.append(comment_after, preformatted=True)
1314 lowest_depth = min(lowest_depth, leaf.bracket_depth)
1316 leaf.bracket_depth == lowest_depth
1317 and leaf.type == token.STAR
1318 or leaf.type == token.DOUBLESTAR
1320 trailing_comma_safe = trailing_comma_safe and py36
1321 leaf_priority = delimiters.get(id(leaf))
1322 if leaf_priority == delimiter_priority:
1323 normalize_prefix(current_line.leaves[0])
1326 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
1329 delimiter_priority == COMMA_PRIORITY
1330 and current_line.leaves[-1].type != token.COMMA
1331 and trailing_comma_safe
1333 current_line.append(Leaf(token.COMMA, ','))
1334 normalize_prefix(current_line.leaves[0])
1338 def is_import(leaf: Leaf) -> bool:
1339 """Returns True if the given leaf starts an import statement."""
1346 (v == 'import' and p and p.type == syms.import_name)
1347 or (v == 'from' and p and p.type == syms.import_from)
1352 def normalize_prefix(leaf: Leaf) -> None:
1353 """Leave existing extra newlines for imports. Remove everything else."""
1355 spl = leaf.prefix.split('#', 1)
1356 nl_count = spl[0].count('\n')
1357 leaf.prefix = '\n' * nl_count
1363 def is_python36(node: Node) -> bool:
1364 """Returns True if the current file is using Python 3.6+ features.
1366 Currently looking for:
1368 - trailing commas after * or ** in function signatures.
1370 for n in node.pre_order():
1371 if n.type == token.STRING:
1372 value_head = n.value[:2] # type: ignore
1373 if value_head in {'f"', 'F"', "f'", "F'", 'rf', 'fr', 'RF', 'FR'}:
1377 n.type == syms.typedargslist
1379 and n.children[-1].type == token.COMMA
1381 for ch in n.children:
1382 if ch.type == token.STAR or ch.type == token.DOUBLESTAR:
1388 PYTHON_EXTENSIONS = {'.py'}
1389 BLACKLISTED_DIRECTORIES = {
1390 'build', 'buck-out', 'dist', '_build', '.git', '.hg', '.mypy_cache', '.tox', '.venv'
1394 def gen_python_files_in_dir(path: Path) -> Iterator[Path]:
1395 for child in path.iterdir():
1397 if child.name in BLACKLISTED_DIRECTORIES:
1400 yield from gen_python_files_in_dir(child)
1402 elif child.suffix in PYTHON_EXTENSIONS:
1408 """Provides a reformatting counter."""
1409 change_count: int = 0
1411 failure_count: int = 0
1413 def done(self, src: Path, changed: bool) -> None:
1414 """Increment the counter for successful reformatting. Write out a message."""
1416 out(f'reformatted {src}')
1417 self.change_count += 1
1419 out(f'{src} already well formatted, good job.', bold=False)
1420 self.same_count += 1
1422 def failed(self, src: Path, message: str) -> None:
1423 """Increment the counter for failed reformatting. Write out a message."""
1424 err(f'error: cannot format {src}: {message}')
1425 self.failure_count += 1
1428 def return_code(self) -> int:
1429 """Which return code should the app use considering the current state."""
1430 # According to http://tldp.org/LDP/abs/html/exitcodes.html starting with
1431 # 126 we have special returncodes reserved by the shell.
1432 if self.failure_count:
1435 elif self.change_count:
1440 def __str__(self) -> str:
1441 """A color report of the current state.
1443 Use `click.unstyle` to remove colors.
1446 if self.change_count:
1447 s = 's' if self.change_count > 1 else ''
1449 click.style(f'{self.change_count} file{s} reformatted', bold=True)
1452 s = 's' if self.same_count > 1 else ''
1453 report.append(f'{self.same_count} file{s} left unchanged')
1454 if self.failure_count:
1455 s = 's' if self.failure_count > 1 else ''
1458 f'{self.failure_count} file{s} failed to reformat', fg='red'
1461 return ', '.join(report) + '.'
1464 def assert_equivalent(src: str, dst: str) -> None:
1465 """Raises AssertionError if `src` and `dst` aren't equivalent.
1467 This is a temporary sanity check until Black becomes stable.
1473 def _v(node: ast.AST, depth: int = 0) -> Iterator[str]:
1474 """Simple visitor generating strings to compare ASTs by content."""
1475 yield f"{' ' * depth}{node.__class__.__name__}("
1477 for field in sorted(node._fields):
1479 value = getattr(node, field)
1480 except AttributeError:
1483 yield f"{' ' * (depth+1)}{field}="
1485 if isinstance(value, list):
1487 if isinstance(item, ast.AST):
1488 yield from _v(item, depth + 2)
1490 elif isinstance(value, ast.AST):
1491 yield from _v(value, depth + 2)
1494 yield f"{' ' * (depth+2)}{value!r}, # {value.__class__.__name__}"
1496 yield f"{' ' * depth}) # /{node.__class__.__name__}"
1499 src_ast = ast.parse(src)
1500 except Exception as exc:
1501 raise AssertionError(f"cannot parse source: {exc}") from None
1504 dst_ast = ast.parse(dst)
1505 except Exception as exc:
1506 log = dump_to_file(''.join(traceback.format_tb(exc.__traceback__)), dst)
1507 raise AssertionError(
1508 f"INTERNAL ERROR: Black produced invalid code: {exc}. "
1509 f"Please report a bug on https://github.com/ambv/black/issues. "
1510 f"This invalid output might be helpful: {log}"
1513 src_ast_str = '\n'.join(_v(src_ast))
1514 dst_ast_str = '\n'.join(_v(dst_ast))
1515 if src_ast_str != dst_ast_str:
1516 log = dump_to_file(diff(src_ast_str, dst_ast_str, 'src', 'dst'))
1517 raise AssertionError(
1518 f"INTERNAL ERROR: Black produced code that is not equivalent to "
1520 f"Please report a bug on https://github.com/ambv/black/issues. "
1521 f"This diff might be helpful: {log}"
1525 def assert_stable(src: str, dst: str, line_length: int) -> None:
1526 """Raises AssertionError if `dst` reformats differently the second time.
1528 This is a temporary sanity check until Black becomes stable.
1530 newdst = format_str(dst, line_length=line_length)
1533 diff(src, dst, 'source', 'first pass'),
1534 diff(dst, newdst, 'first pass', 'second pass'),
1536 raise AssertionError(
1537 f"INTERNAL ERROR: Black produced different code on the second pass "
1538 f"of the formatter. "
1539 f"Please report a bug on https://github.com/ambv/black/issues. "
1540 f"This diff might be helpful: {log}"
1544 def dump_to_file(*output: str) -> str:
1545 """Dumps `output` to a temporary file. Returns path to the file."""
1548 with tempfile.NamedTemporaryFile(
1549 mode='w', prefix='blk_', suffix='.log', delete=False
1551 for lines in output:
1557 def diff(a: str, b: str, a_name: str, b_name: str) -> str:
1558 """Returns a udiff string between strings `a` and `b`."""
1561 a_lines = [line + '\n' for line in a.split('\n')]
1562 b_lines = [line + '\n' for line in b.split('\n')]
1564 difflib.unified_diff(a_lines, b_lines, fromfile=a_name, tofile=b_name, n=5)
1568 if __name__ == '__main__':