All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
3 from asyncio.base_events import BaseEventLoop
4 from concurrent.futures import Executor, ProcessPoolExecutor
5 from functools import partial
8 from pathlib import Path
12 Dict, Generic, Iterable, Iterator, List, Optional, Set, Tuple, TypeVar, Union
15 from attr import attrib, dataclass, Factory
19 from blib2to3.pytree import Node, Leaf, type_repr
20 from blib2to3 import pygram, pytree
21 from blib2to3.pgen2 import driver, token
22 from blib2to3.pgen2.parse import ParseError
24 __version__ = "18.3a1"
25 DEFAULT_LINE_LENGTH = 88
27 syms = pygram.python_symbols
34 LN = Union[Leaf, Node]
35 out = partial(click.secho, bold=True, err=True)
36 err = partial(click.secho, fg='red', err=True)
39 class NothingChanged(UserWarning):
40 """Raised by `format_file` when the reformatted code is the same as source."""
43 class CannotSplit(Exception):
44 """A readable split that fits the allotted line length is impossible.
46 Raised by `left_hand_split()` and `right_hand_split()`.
55 default=DEFAULT_LINE_LENGTH,
56 help='How many character per line to allow.',
63 "Don't write back the files, just return the status. Return code 0 "
64 "means nothing changed. Return code 1 means some files were "
65 "reformatted. Return code 123 means there was an internal error."
71 help='If --fast given, skip temporary sanity checks. [default: --safe]',
73 @click.version_option(version=__version__)
77 type=click.Path(exists=True, file_okay=True, dir_okay=True, readable=True),
81 ctx: click.Context, line_length: int, check: bool, fast: bool, src: List[str]
83 """The uncompromising code formatter."""
84 sources: List[Path] = []
88 sources.extend(gen_python_files_in_dir(p))
90 # if a file was explicitly given, we don't care about its extension
93 err(f'invalid path: {s}')
96 elif len(sources) == 1:
100 changed = format_file_in_place(
101 p, line_length=line_length, fast=fast, write_back=not check
103 report.done(p, changed)
104 except Exception as exc:
105 report.failed(p, str(exc))
106 ctx.exit(report.return_code)
108 loop = asyncio.get_event_loop()
109 executor = ProcessPoolExecutor(max_workers=os.cpu_count())
112 return_code = loop.run_until_complete(
114 sources, line_length, not check, fast, loop, executor
119 ctx.exit(return_code)
122 async def schedule_formatting(
131 src: loop.run_in_executor(
132 executor, format_file_in_place, src, line_length, fast, write_back
136 await asyncio.wait(tasks.values())
139 for src, task in tasks.items():
141 report.failed(src, 'timed out, cancelling')
143 cancelled.append(task)
144 elif task.exception():
145 report.failed(src, str(task.exception()))
147 report.done(src, task.result())
149 await asyncio.wait(cancelled, timeout=2)
150 out('All done! ✨ 🍰 ✨')
151 click.echo(str(report))
152 return report.return_code
155 def format_file_in_place(
156 src: Path, line_length: int, fast: bool, write_back: bool = False
158 """Format the file and rewrite if changed. Return True if changed."""
160 contents, encoding = format_file(src, line_length=line_length, fast=fast)
161 except NothingChanged:
165 with open(src, "w", encoding=encoding) as f:
171 src: Path, line_length: int, fast: bool
172 ) -> Tuple[FileContent, Encoding]:
173 """Reformats a file and returns its contents and encoding."""
174 with tokenize.open(src) as src_buffer:
175 src_contents = src_buffer.read()
176 if src_contents.strip() == '':
177 raise NothingChanged(src)
179 dst_contents = format_str(src_contents, line_length=line_length)
180 if src_contents == dst_contents:
181 raise NothingChanged(src)
184 assert_equivalent(src_contents, dst_contents)
185 assert_stable(src_contents, dst_contents, line_length=line_length)
186 return dst_contents, src_buffer.encoding
189 def format_str(src_contents: str, line_length: int) -> FileContent:
190 """Reformats a string and returns new contents."""
191 src_node = lib2to3_parse(src_contents)
193 comments: List[Line] = []
194 lines = LineGenerator()
195 elt = EmptyLineTracker()
196 py36 = is_python36(src_node)
199 for current_line in lines.visit(src_node):
200 for _ in range(after):
201 dst_contents += str(empty_line)
202 before, after = elt.maybe_empty_lines(current_line)
203 for _ in range(before):
204 dst_contents += str(empty_line)
205 if not current_line.is_comment:
206 for comment in comments:
207 dst_contents += str(comment)
209 for line in split_line(current_line, line_length=line_length, py36=py36):
210 dst_contents += str(line)
212 comments.append(current_line)
213 for comment in comments:
214 dst_contents += str(comment)
218 def lib2to3_parse(src_txt: str) -> Node:
219 """Given a string with source, return the lib2to3 Node."""
220 grammar = pygram.python_grammar_no_print_statement
221 drv = driver.Driver(grammar, pytree.convert)
222 if src_txt[-1] != '\n':
223 nl = '\r\n' if '\r\n' in src_txt[:1024] else '\n'
226 result = drv.parse_string(src_txt, True)
227 except ParseError as pe:
228 lineno, column = pe.context[1]
229 lines = src_txt.splitlines()
231 faulty_line = lines[lineno - 1]
233 faulty_line = "<line number missing in source>"
234 raise ValueError(f"Cannot parse: {lineno}:{column}: {faulty_line}") from None
236 if isinstance(result, Leaf):
237 result = Node(syms.file_input, [result])
241 def lib2to3_unparse(node: Node) -> str:
242 """Given a lib2to3 node, return its string representation."""
250 class Visitor(Generic[T]):
251 """Basic lib2to3 visitor that yields things on visiting."""
253 def visit(self, node: LN) -> Iterator[T]:
255 name = token.tok_name[node.type]
257 name = type_repr(node.type)
258 yield from getattr(self, f'visit_{name}', self.visit_default)(node)
260 def visit_default(self, node: LN) -> Iterator[T]:
261 if isinstance(node, Node):
262 for child in node.children:
263 yield from self.visit(child)
267 class DebugVisitor(Visitor[T]):
268 tree_depth: int = attrib(default=0)
270 def visit_default(self, node: LN) -> Iterator[T]:
271 indent = ' ' * (2 * self.tree_depth)
272 if isinstance(node, Node):
273 _type = type_repr(node.type)
274 out(f'{indent}{_type}', fg='yellow')
276 for child in node.children:
277 yield from self.visit(child)
280 out(f'{indent}/{_type}', fg='yellow', bold=False)
282 _type = token.tok_name.get(node.type, str(node.type))
283 out(f'{indent}{_type}', fg='blue', nl=False)
285 # We don't have to handle prefixes for `Node` objects since
286 # that delegates to the first child anyway.
287 out(f' {node.prefix!r}', fg='green', bold=False, nl=False)
288 out(f' {node.value!r}', fg='blue', bold=False)
291 KEYWORDS = set(keyword.kwlist)
292 WHITESPACE = {token.DEDENT, token.INDENT, token.NEWLINE}
293 FLOW_CONTROL = {'return', 'raise', 'break', 'continue'}
304 STANDALONE_COMMENT = 153
305 LOGIC_OPERATORS = {'and', 'or'}
328 COMPREHENSION_PRIORITY = 20
332 COMPARATOR_PRIORITY = 3
337 class BracketTracker:
338 depth: int = attrib(default=0)
339 bracket_match: Dict[Tuple[Depth, NodeType], Leaf] = attrib(default=Factory(dict))
340 delimiters: Dict[LeafID, Priority] = attrib(default=Factory(dict))
341 previous: Optional[Leaf] = attrib(default=None)
343 def mark(self, leaf: Leaf) -> None:
344 if leaf.type == token.COMMENT:
347 if leaf.type in CLOSING_BRACKETS:
349 opening_bracket = self.bracket_match.pop((self.depth, leaf.type))
350 leaf.opening_bracket = opening_bracket
351 leaf.bracket_depth = self.depth
353 delim = is_delimiter(leaf)
355 self.delimiters[id(leaf)] = delim
356 elif self.previous is not None:
357 if leaf.type == token.STRING and self.previous.type == token.STRING:
358 self.delimiters[id(self.previous)] = STRING_PRIORITY
360 leaf.type == token.NAME and
361 leaf.value == 'for' and
363 leaf.parent.type in {syms.comp_for, syms.old_comp_for}
365 self.delimiters[id(self.previous)] = COMPREHENSION_PRIORITY
367 leaf.type == token.NAME and
368 leaf.value == 'if' and
370 leaf.parent.type in {syms.comp_if, syms.old_comp_if}
372 self.delimiters[id(self.previous)] = COMPREHENSION_PRIORITY
373 if leaf.type in OPENING_BRACKETS:
374 self.bracket_match[self.depth, BRACKET[leaf.type]] = leaf
378 def any_open_brackets(self) -> bool:
379 """Returns True if there is an yet unmatched open bracket on the line."""
380 return bool(self.bracket_match)
382 def max_priority(self, exclude: Iterable[LeafID] =()) -> int:
383 """Returns the highest priority of a delimiter found on the line.
385 Values are consistent with what `is_delimiter()` returns.
387 return max(v for k, v in self.delimiters.items() if k not in exclude)
392 depth: int = attrib(default=0)
393 leaves: List[Leaf] = attrib(default=Factory(list))
394 comments: Dict[LeafID, Leaf] = attrib(default=Factory(dict))
395 bracket_tracker: BracketTracker = attrib(default=Factory(BracketTracker))
396 inside_brackets: bool = attrib(default=False)
397 has_for: bool = attrib(default=False)
398 _for_loop_variable: bool = attrib(default=False, init=False)
400 def append(self, leaf: Leaf, preformatted: bool = False) -> None:
401 has_value = leaf.value.strip()
405 if self.leaves and not preformatted:
406 # Note: at this point leaf.prefix should be empty except for
407 # imports, for which we only preserve newlines.
408 leaf.prefix += whitespace(leaf)
409 if self.inside_brackets or not preformatted:
410 self.maybe_decrement_after_for_loop_variable(leaf)
411 self.bracket_tracker.mark(leaf)
412 self.maybe_remove_trailing_comma(leaf)
413 self.maybe_increment_for_loop_variable(leaf)
414 if self.maybe_adapt_standalone_comment(leaf):
417 if not self.append_comment(leaf):
418 self.leaves.append(leaf)
421 def is_comment(self) -> bool:
422 return bool(self) and self.leaves[0].type == STANDALONE_COMMENT
425 def is_decorator(self) -> bool:
426 return bool(self) and self.leaves[0].type == token.AT
429 def is_import(self) -> bool:
430 return bool(self) and is_import(self.leaves[0])
433 def is_class(self) -> bool:
436 self.leaves[0].type == token.NAME and
437 self.leaves[0].value == 'class'
441 def is_def(self) -> bool:
442 """Also returns True for async defs."""
444 first_leaf = self.leaves[0]
449 second_leaf: Optional[Leaf] = self.leaves[1]
453 (first_leaf.type == token.NAME and first_leaf.value == 'def') or
455 first_leaf.type == token.NAME and
456 first_leaf.value == 'async' and
457 second_leaf is not None and
458 second_leaf.type == token.NAME and
459 second_leaf.value == 'def'
464 def is_flow_control(self) -> bool:
467 self.leaves[0].type == token.NAME and
468 self.leaves[0].value in FLOW_CONTROL
472 def is_yield(self) -> bool:
475 self.leaves[0].type == token.NAME and
476 self.leaves[0].value == 'yield'
479 def maybe_remove_trailing_comma(self, closing: Leaf) -> bool:
482 self.leaves[-1].type == token.COMMA and
483 closing.type in CLOSING_BRACKETS
487 if closing.type == token.RSQB or closing.type == token.RBRACE:
491 # For parens let's check if it's safe to remove the comma. If the
492 # trailing one is the only one, we might mistakenly change a tuple
493 # into a different type by removing the comma.
494 depth = closing.bracket_depth + 1
496 opening = closing.opening_bracket
497 for _opening_index, leaf in enumerate(self.leaves):
504 for leaf in self.leaves[_opening_index + 1:]:
508 bracket_depth = leaf.bracket_depth
509 if bracket_depth == depth and leaf.type == token.COMMA:
517 def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool:
518 """In a for loop, or comprehension, the variables are often unpacks.
520 To avoid splitting on the comma in this situation, we will increase
521 the depth of tokens between `for` and `in`.
523 if leaf.type == token.NAME and leaf.value == 'for':
525 self.bracket_tracker.depth += 1
526 self._for_loop_variable = True
531 def maybe_decrement_after_for_loop_variable(self, leaf: Leaf) -> bool:
532 # See `maybe_increment_for_loop_variable` above for explanation.
533 if self._for_loop_variable and leaf.type == token.NAME and leaf.value == 'in':
534 self.bracket_tracker.depth -= 1
535 self._for_loop_variable = False
540 def maybe_adapt_standalone_comment(self, comment: Leaf) -> bool:
541 """Hack a standalone comment to act as a trailing comment for line splitting.
543 If this line has brackets and a standalone `comment`, we need to adapt
544 it to be able to still reformat the line.
546 This is not perfect, the line to which the standalone comment gets
547 appended will appear "too long" when splitting.
550 comment.type == STANDALONE_COMMENT and
551 self.bracket_tracker.any_open_brackets()
555 comment.type = token.COMMENT
556 comment.prefix = '\n' + ' ' * (self.depth + 1)
557 return self.append_comment(comment)
559 def append_comment(self, comment: Leaf) -> bool:
560 if comment.type != token.COMMENT:
564 after = id(self.last_non_delimiter())
566 comment.type = STANDALONE_COMMENT
571 if after in self.comments:
572 self.comments[after].value += str(comment)
574 self.comments[after] = comment
577 def last_non_delimiter(self) -> Leaf:
578 for i in range(len(self.leaves)):
579 last = self.leaves[-i - 1]
580 if not is_delimiter(last):
583 raise LookupError("No non-delimiters found")
585 def __str__(self) -> str:
589 indent = ' ' * self.depth
590 leaves = iter(self.leaves)
592 res = f'{first.prefix}{indent}{first.value}'
595 for comment in self.comments.values():
599 def __bool__(self) -> bool:
600 return bool(self.leaves or self.comments)
604 class EmptyLineTracker:
605 """Provides a stateful method that returns the number of potential extra
606 empty lines needed before and after the currently processed line.
608 Note: this tracker works on lines that haven't been split yet.
610 previous_line: Optional[Line] = attrib(default=None)
611 previous_after: int = attrib(default=0)
612 previous_defs: List[int] = attrib(default=Factory(list))
614 def maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
615 """Returns the number of extra empty lines before and after the `current_line`.
617 This is for separating `def`, `async def` and `class` with extra empty lines
618 (two on module-level), as well as providing an extra empty line after flow
619 control keywords to make them more prominent.
621 before, after = self._maybe_empty_lines(current_line)
622 self.previous_after = after
623 self.previous_line = current_line
626 def _maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
628 depth = current_line.depth
629 while self.previous_defs and self.previous_defs[-1] >= depth:
630 self.previous_defs.pop()
631 before = (1 if depth else 2) - self.previous_after
632 is_decorator = current_line.is_decorator
633 if is_decorator or current_line.is_def or current_line.is_class:
635 self.previous_defs.append(depth)
636 if self.previous_line is None:
637 # Don't insert empty lines before the first line in the file.
640 if self.previous_line and self.previous_line.is_decorator:
641 # Don't insert empty lines between decorators.
645 if current_line.depth:
647 newlines -= self.previous_after
650 if current_line.is_flow_control:
654 self.previous_line and
655 self.previous_line.is_import and
656 not current_line.is_import and
657 depth == self.previous_line.depth
659 return (before or 1), 0
662 self.previous_line and
663 self.previous_line.is_yield and
664 (not current_line.is_yield or depth != self.previous_line.depth)
666 return (before or 1), 0
672 class LineGenerator(Visitor[Line]):
673 """Generates reformatted Line objects. Empty lines are not emitted.
675 Note: destroys the tree it's visiting by mutating prefixes of its leaves
676 in ways that will no longer stringify to valid Python code on the tree.
678 current_line: Line = attrib(default=Factory(Line))
679 standalone_comments: List[Leaf] = attrib(default=Factory(list))
681 def line(self, indent: int = 0) -> Iterator[Line]:
684 If the line is empty, only emit if it makes sense.
685 If the line is too long, split it first and then generate.
687 If any lines were generated, set up a new current_line.
689 if not self.current_line:
690 self.current_line.depth += indent
691 return # Line is empty, don't emit. Creating a new one unnecessary.
693 complete_line = self.current_line
694 self.current_line = Line(depth=complete_line.depth + indent)
697 def visit_default(self, node: LN) -> Iterator[Line]:
698 if isinstance(node, Leaf):
699 for comment in generate_comments(node):
700 if self.current_line.bracket_tracker.any_open_brackets():
701 # any comment within brackets is subject to splitting
702 self.current_line.append(comment)
703 elif comment.type == token.COMMENT:
704 # regular trailing comment
705 self.current_line.append(comment)
706 yield from self.line()
709 # regular standalone comment, to be processed later (see
710 # docstring in `generate_comments()`
711 self.standalone_comments.append(comment)
712 normalize_prefix(node)
713 if node.type not in WHITESPACE:
714 for comment in self.standalone_comments:
715 yield from self.line()
717 self.current_line.append(comment)
718 yield from self.line()
720 self.standalone_comments = []
721 self.current_line.append(node)
722 yield from super().visit_default(node)
724 def visit_suite(self, node: Node) -> Iterator[Line]:
725 """Body of a statement after a colon."""
726 children = iter(node.children)
727 # Process newline before indenting. It might contain an inline
728 # comment that should go right after the colon.
729 newline = next(children)
730 yield from self.visit(newline)
731 yield from self.line(+1)
733 for child in children:
734 yield from self.visit(child)
736 yield from self.line(-1)
738 def visit_stmt(self, node: Node, keywords: Set[str]) -> Iterator[Line]:
739 """Visit a statement.
741 The relevant Python language keywords for this statement are NAME leaves
744 for child in node.children:
745 if child.type == token.NAME and child.value in keywords: # type: ignore
746 yield from self.line()
748 yield from self.visit(child)
750 def visit_simple_stmt(self, node: Node) -> Iterator[Line]:
751 """A statement without nested statements."""
752 is_suite_like = node.parent and node.parent.type in STATEMENT
754 yield from self.line(+1)
755 yield from self.visit_default(node)
756 yield from self.line(-1)
759 yield from self.line()
760 yield from self.visit_default(node)
762 def visit_async_stmt(self, node: Node) -> Iterator[Line]:
763 yield from self.line()
765 children = iter(node.children)
766 for child in children:
767 yield from self.visit(child)
769 if child.type == token.NAME and child.value == 'async': # type: ignore
772 internal_stmt = next(children)
773 for child in internal_stmt.children:
774 yield from self.visit(child)
776 def visit_decorators(self, node: Node) -> Iterator[Line]:
777 for child in node.children:
778 yield from self.line()
779 yield from self.visit(child)
781 def visit_SEMI(self, leaf: Leaf) -> Iterator[Line]:
782 yield from self.line()
784 def visit_ENDMARKER(self, leaf: Leaf) -> Iterator[Line]:
785 yield from self.visit_default(leaf)
786 yield from self.line()
788 def __attrs_post_init__(self) -> None:
789 """You are in a twisty little maze of passages."""
791 self.visit_if_stmt = partial(v, keywords={'if', 'else', 'elif'})
792 self.visit_while_stmt = partial(v, keywords={'while', 'else'})
793 self.visit_for_stmt = partial(v, keywords={'for', 'else'})
794 self.visit_try_stmt = partial(v, keywords={'try', 'except', 'else', 'finally'})
795 self.visit_except_clause = partial(v, keywords={'except'})
796 self.visit_funcdef = partial(v, keywords={'def'})
797 self.visit_with_stmt = partial(v, keywords={'with'})
798 self.visit_classdef = partial(v, keywords={'class'})
799 self.visit_async_funcdef = self.visit_async_stmt
800 self.visit_decorated = self.visit_decorators
803 BRACKET = {token.LPAR: token.RPAR, token.LSQB: token.RSQB, token.LBRACE: token.RBRACE}
804 OPENING_BRACKETS = set(BRACKET.keys())
805 CLOSING_BRACKETS = set(BRACKET.values())
806 BRACKETS = OPENING_BRACKETS | CLOSING_BRACKETS
809 def whitespace(leaf: Leaf) -> str:
810 """Return whitespace prefix if needed for the given `leaf`."""
826 if t == token.COMMENT:
829 if t == STANDALONE_COMMENT:
832 if t in CLOSING_BRACKETS:
835 assert p is not None, f"INTERNAL ERROR: hand-made leaf without parent: {leaf!r}"
836 prev = leaf.prev_sibling
838 prevp = preceding_leaf(p)
839 if not prevp or prevp.type in OPENING_BRACKETS:
842 if prevp.type == token.EQUAL:
843 if prevp.parent and prevp.parent.type in {
852 elif prevp.type == token.DOUBLESTAR:
853 if prevp.parent and prevp.parent.type in {
862 elif prevp.type == token.COLON:
863 if prevp.parent and prevp.parent.type == syms.subscript:
866 elif prevp.parent and prevp.parent.type == syms.factor:
869 elif prev.type in OPENING_BRACKETS:
872 if p.type in {syms.parameters, syms.arglist}:
873 # untyped function signatures or calls
877 if not prev or prev.type != token.COMMA:
880 if p.type == syms.varargslist:
885 if prev and prev.type != token.COMMA:
888 elif p.type == syms.typedargslist:
889 # typed function signatures
894 if prev.type != syms.tname:
897 elif prev.type == token.EQUAL:
898 # A bit hacky: if the equal sign has whitespace, it means we
899 # previously found it's a typed argument. So, we're using that, too.
902 elif prev.type != token.COMMA:
905 elif p.type == syms.tname:
908 prevp = preceding_leaf(p)
909 if not prevp or prevp.type != token.COMMA:
912 elif p.type == syms.trailer:
913 # attributes and calls
914 if t == token.LPAR or t == token.RPAR:
919 prevp = preceding_leaf(p)
920 if not prevp or prevp.type != token.NUMBER:
923 elif t == token.LSQB:
926 elif prev.type != token.COMMA:
929 elif p.type == syms.argument:
935 prevp = preceding_leaf(p)
936 if not prevp or prevp.type == token.LPAR:
939 elif prev.type == token.EQUAL or prev.type == token.DOUBLESTAR:
942 elif p.type == syms.decorator:
946 elif p.type == syms.dotted_name:
950 prevp = preceding_leaf(p)
951 if not prevp or prevp.type == token.AT or prevp.type == token.DOT:
954 elif p.type == syms.classdef:
958 if prev and prev.type == token.LPAR:
961 elif p.type == syms.subscript:
964 assert p.parent is not None, "subscripts are always parented"
965 if p.parent.type == syms.subscriptlist:
970 elif prev.type == token.COLON:
973 elif p.type == syms.atom:
974 if prev and t == token.DOT:
975 # dots, but not the first one.
979 p.type == syms.listmaker or
980 p.type == syms.testlist_gexp or
981 p.type == syms.subscriptlist
983 # list interior, including unpacking
987 elif p.type == syms.dictsetmaker:
988 # dict and set interior, including unpacking
992 if prev.type == token.DOUBLESTAR:
995 elif p.type == syms.factor or p.type == syms.star_expr:
998 prevp = preceding_leaf(p)
999 if not prevp or prevp.type in OPENING_BRACKETS:
1002 prevp_parent = prevp.parent
1003 assert prevp_parent is not None
1004 if prevp.type == token.COLON and prevp_parent.type in {
1005 syms.subscript, syms.sliceop
1009 elif prevp.type == token.EQUAL and prevp_parent.type == syms.argument:
1012 elif t == token.NAME or t == token.NUMBER:
1015 elif p.type == syms.import_from:
1017 if prev and prev.type == token.DOT:
1020 elif t == token.NAME:
1024 if prev and prev.type == token.DOT:
1027 elif p.type == syms.sliceop:
1033 def preceding_leaf(node: Optional[LN]) -> Optional[Leaf]:
1034 """Returns the first leaf that precedes `node`, if any."""
1036 res = node.prev_sibling
1038 if isinstance(res, Leaf):
1042 return list(res.leaves())[-1]
1051 def is_delimiter(leaf: Leaf) -> int:
1052 """Returns the priority of the `leaf` delimiter. Returns 0 if not delimiter.
1054 Higher numbers are higher priority.
1056 if leaf.type == token.COMMA:
1057 return COMMA_PRIORITY
1059 if leaf.type == token.NAME and leaf.value in LOGIC_OPERATORS:
1060 return LOGIC_PRIORITY
1062 if leaf.type in COMPARATORS:
1063 return COMPARATOR_PRIORITY
1066 leaf.type in MATH_OPERATORS and
1068 leaf.parent.type not in {syms.factor, syms.star_expr}
1070 return MATH_PRIORITY
1075 def generate_comments(leaf: Leaf) -> Iterator[Leaf]:
1076 """Cleans the prefix of the `leaf` and generates comments from it, if any.
1078 Comments in lib2to3 are shoved into the whitespace prefix. This happens
1079 in `pgen2/driver.py:Driver.parse_tokens()`. This was a brilliant implementation
1080 move because it does away with modifying the grammar to include all the
1081 possible places in which comments can be placed.
1083 The sad consequence for us though is that comments don't "belong" anywhere.
1084 This is why this function generates simple parentless Leaf objects for
1085 comments. We simply don't know what the correct parent should be.
1087 No matter though, we can live without this. We really only need to
1088 differentiate between inline and standalone comments. The latter don't
1089 share the line with any code.
1091 Inline comments are emitted as regular token.COMMENT leaves. Standalone
1092 are emitted with a fake STANDALONE_COMMENT token identifier.
1097 if '#' not in leaf.prefix:
1100 before_comment, content = leaf.prefix.split('#', 1)
1101 content = content.rstrip()
1102 if content and (content[0] not in {' ', '!', '#'}):
1103 content = ' ' + content
1104 is_standalone_comment = (
1105 '\n' in before_comment or '\n' in content or leaf.type == token.DEDENT
1107 if not is_standalone_comment:
1108 # simple trailing comment
1109 yield Leaf(token.COMMENT, value='#' + content)
1112 for line in ('#' + content).split('\n'):
1113 line = line.lstrip()
1114 if not line.startswith('#'):
1117 yield Leaf(STANDALONE_COMMENT, line)
1121 line: Line, line_length: int, inner: bool = False, py36: bool = False
1122 ) -> Iterator[Line]:
1123 """Splits a `line` into potentially many lines.
1125 They should fit in the allotted `line_length` but might not be able to.
1126 `inner` signifies that there were a pair of brackets somewhere around the
1127 current `line`, possibly transitively. This means we can fallback to splitting
1128 by delimiters if the LHS/RHS don't yield any results.
1130 If `py36` is True, splitting may generate syntax that is only compatible
1131 with Python 3.6 and later.
1133 line_str = str(line).strip('\n')
1134 if len(line_str) <= line_length and '\n' not in line_str:
1139 split_funcs = [left_hand_split]
1140 elif line.inside_brackets:
1141 split_funcs = [delimiter_split]
1142 if '\n' not in line_str:
1143 # Only attempt RHS if we don't have multiline strings or comments
1145 split_funcs.append(right_hand_split)
1147 split_funcs = [right_hand_split]
1148 for split_func in split_funcs:
1149 # We are accumulating lines in `result` because we might want to abort
1150 # mission and return the original line in the end, or attempt a different
1152 result: List[Line] = []
1154 for l in split_func(line, py36=py36):
1155 if str(l).strip('\n') == line_str:
1156 raise CannotSplit("Split function returned an unchanged result")
1159 split_line(l, line_length=line_length, inner=True, py36=py36)
1161 except CannotSplit as cs:
1172 def left_hand_split(line: Line, py36: bool = False) -> Iterator[Line]:
1173 """Split line into many lines, starting with the first matching bracket pair.
1175 Note: this usually looks weird, only use this for function definitions.
1176 Prefer RHS otherwise.
1178 head = Line(depth=line.depth)
1179 body = Line(depth=line.depth + 1, inside_brackets=True)
1180 tail = Line(depth=line.depth)
1181 tail_leaves: List[Leaf] = []
1182 body_leaves: List[Leaf] = []
1183 head_leaves: List[Leaf] = []
1184 current_leaves = head_leaves
1185 matching_bracket = None
1186 for leaf in line.leaves:
1188 current_leaves is body_leaves and
1189 leaf.type in CLOSING_BRACKETS and
1190 leaf.opening_bracket is matching_bracket
1192 current_leaves = tail_leaves
1193 current_leaves.append(leaf)
1194 if current_leaves is head_leaves:
1195 if leaf.type in OPENING_BRACKETS:
1196 matching_bracket = leaf
1197 current_leaves = body_leaves
1198 # Since body is a new indent level, remove spurious leading whitespace.
1200 normalize_prefix(body_leaves[0])
1201 # Build the new lines.
1202 for result, leaves in (
1203 (head, head_leaves), (body, body_leaves), (tail, tail_leaves)
1206 result.append(leaf, preformatted=True)
1207 comment_after = line.comments.get(id(leaf))
1209 result.append(comment_after, preformatted=True)
1210 # Check if the split succeeded.
1211 tail_len = len(str(tail))
1214 raise CannotSplit("Splitting brackets produced the same line")
1218 f"Splitting brackets on an empty body to save "
1219 f"{tail_len} characters is not worth it"
1222 for result in (head, body, tail):
1227 def right_hand_split(line: Line, py36: bool = False) -> Iterator[Line]:
1228 """Split line into many lines, starting with the last matching bracket pair."""
1229 head = Line(depth=line.depth)
1230 body = Line(depth=line.depth + 1, inside_brackets=True)
1231 tail = Line(depth=line.depth)
1232 tail_leaves: List[Leaf] = []
1233 body_leaves: List[Leaf] = []
1234 head_leaves: List[Leaf] = []
1235 current_leaves = tail_leaves
1236 opening_bracket = None
1237 for leaf in reversed(line.leaves):
1238 if current_leaves is body_leaves:
1239 if leaf is opening_bracket:
1240 current_leaves = head_leaves
1241 current_leaves.append(leaf)
1242 if current_leaves is tail_leaves:
1243 if leaf.type in CLOSING_BRACKETS:
1244 opening_bracket = leaf.opening_bracket
1245 current_leaves = body_leaves
1246 tail_leaves.reverse()
1247 body_leaves.reverse()
1248 head_leaves.reverse()
1249 # Since body is a new indent level, remove spurious leading whitespace.
1251 normalize_prefix(body_leaves[0])
1252 # Build the new lines.
1253 for result, leaves in (
1254 (head, head_leaves), (body, body_leaves), (tail, tail_leaves)
1257 result.append(leaf, preformatted=True)
1258 comment_after = line.comments.get(id(leaf))
1260 result.append(comment_after, preformatted=True)
1261 # Check if the split succeeded.
1262 tail_len = len(str(tail).strip('\n'))
1265 raise CannotSplit("Splitting brackets produced the same line")
1269 f"Splitting brackets on an empty body to save "
1270 f"{tail_len} characters is not worth it"
1273 for result in (head, body, tail):
1278 def delimiter_split(line: Line, py36: bool = False) -> Iterator[Line]:
1279 """Split according to delimiters of the highest priority.
1281 This kind of split doesn't increase indentation.
1282 If `py36` is True, the split will add trailing commas also in function
1283 signatures that contain * and **.
1286 last_leaf = line.leaves[-1]
1288 raise CannotSplit("Line empty")
1290 delimiters = line.bracket_tracker.delimiters
1292 delimiter_priority = line.bracket_tracker.max_priority(exclude={id(last_leaf)})
1294 raise CannotSplit("No delimiters found")
1296 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
1297 lowest_depth = sys.maxsize
1298 trailing_comma_safe = True
1299 for leaf in line.leaves:
1300 current_line.append(leaf, preformatted=True)
1301 comment_after = line.comments.get(id(leaf))
1303 current_line.append(comment_after, preformatted=True)
1304 lowest_depth = min(lowest_depth, leaf.bracket_depth)
1306 leaf.bracket_depth == lowest_depth and
1307 leaf.type == token.STAR or
1308 leaf.type == token.DOUBLESTAR
1310 trailing_comma_safe = trailing_comma_safe and py36
1311 leaf_priority = delimiters.get(id(leaf))
1312 if leaf_priority == delimiter_priority:
1313 normalize_prefix(current_line.leaves[0])
1316 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
1319 delimiter_priority == COMMA_PRIORITY and
1320 current_line.leaves[-1].type != token.COMMA and
1323 current_line.append(Leaf(token.COMMA, ','))
1324 normalize_prefix(current_line.leaves[0])
1328 def is_import(leaf: Leaf) -> bool:
1329 """Returns True if the given leaf starts an import statement."""
1336 (v == 'import' and p and p.type == syms.import_name) or
1337 (v == 'from' and p and p.type == syms.import_from)
1342 def normalize_prefix(leaf: Leaf) -> None:
1343 """Leave existing extra newlines for imports. Remove everything else."""
1345 spl = leaf.prefix.split('#', 1)
1346 nl_count = spl[0].count('\n')
1348 # Skip one newline since it was for a standalone comment.
1350 leaf.prefix = '\n' * nl_count
1356 def is_python36(node: Node) -> bool:
1357 """Returns True if the current file is using Python 3.6+ features.
1359 Currently looking for:
1361 - trailing commas after * or ** in function signatures.
1363 for n in node.pre_order():
1364 if n.type == token.STRING:
1365 value_head = n.value[:2] # type: ignore
1366 if value_head in {'f"', 'F"', "f'", "F'", 'rf', 'fr', 'RF', 'FR'}:
1370 n.type == syms.typedargslist and
1372 n.children[-1].type == token.COMMA
1374 for ch in n.children:
1375 if ch.type == token.STAR or ch.type == token.DOUBLESTAR:
1381 PYTHON_EXTENSIONS = {'.py'}
1382 BLACKLISTED_DIRECTORIES = {
1383 'build', 'buck-out', 'dist', '_build', '.git', '.hg', '.mypy_cache', '.tox', '.venv'
1387 def gen_python_files_in_dir(path: Path) -> Iterator[Path]:
1388 for child in path.iterdir():
1390 if child.name in BLACKLISTED_DIRECTORIES:
1393 yield from gen_python_files_in_dir(child)
1395 elif child.suffix in PYTHON_EXTENSIONS:
1401 """Provides a reformatting counter."""
1402 change_count: int = attrib(default=0)
1403 same_count: int = attrib(default=0)
1404 failure_count: int = attrib(default=0)
1406 def done(self, src: Path, changed: bool) -> None:
1407 """Increment the counter for successful reformatting. Write out a message."""
1409 out(f'reformatted {src}')
1410 self.change_count += 1
1412 out(f'{src} already well formatted, good job.', bold=False)
1413 self.same_count += 1
1415 def failed(self, src: Path, message: str) -> None:
1416 """Increment the counter for failed reformatting. Write out a message."""
1417 err(f'error: cannot format {src}: {message}')
1418 self.failure_count += 1
1421 def return_code(self) -> int:
1422 """Which return code should the app use considering the current state."""
1423 # According to http://tldp.org/LDP/abs/html/exitcodes.html starting with
1424 # 126 we have special returncodes reserved by the shell.
1425 if self.failure_count:
1428 elif self.change_count:
1433 def __str__(self) -> str:
1434 """A color report of the current state.
1436 Use `click.unstyle` to remove colors.
1439 if self.change_count:
1440 s = 's' if self.change_count > 1 else ''
1442 click.style(f'{self.change_count} file{s} reformatted', bold=True)
1445 s = 's' if self.same_count > 1 else ''
1446 report.append(f'{self.same_count} file{s} left unchanged')
1447 if self.failure_count:
1448 s = 's' if self.failure_count > 1 else ''
1451 f'{self.failure_count} file{s} failed to reformat', fg='red'
1454 return ', '.join(report) + '.'
1457 def assert_equivalent(src: str, dst: str) -> None:
1458 """Raises AssertionError if `src` and `dst` aren't equivalent.
1460 This is a temporary sanity check until Black becomes stable.
1466 def _v(node: ast.AST, depth: int = 0) -> Iterator[str]:
1467 """Simple visitor generating strings to compare ASTs by content."""
1468 yield f"{' ' * depth}{node.__class__.__name__}("
1470 for field in sorted(node._fields):
1472 value = getattr(node, field)
1473 except AttributeError:
1476 yield f"{' ' * (depth+1)}{field}="
1478 if isinstance(value, list):
1480 if isinstance(item, ast.AST):
1481 yield from _v(item, depth + 2)
1483 elif isinstance(value, ast.AST):
1484 yield from _v(value, depth + 2)
1487 yield f"{' ' * (depth+2)}{value!r}, # {value.__class__.__name__}"
1489 yield f"{' ' * depth}) # /{node.__class__.__name__}"
1492 src_ast = ast.parse(src)
1493 except Exception as exc:
1494 raise AssertionError(f"cannot parse source: {exc}") from None
1497 dst_ast = ast.parse(dst)
1498 except Exception as exc:
1499 log = dump_to_file(''.join(traceback.format_tb(exc.__traceback__)), dst)
1500 raise AssertionError(
1501 f"INTERNAL ERROR: Black produced invalid code: {exc}. "
1502 f"Please report a bug on https://github.com/ambv/black/issues. "
1503 f"This invalid output might be helpful: {log}",
1506 src_ast_str = '\n'.join(_v(src_ast))
1507 dst_ast_str = '\n'.join(_v(dst_ast))
1508 if src_ast_str != dst_ast_str:
1509 log = dump_to_file(diff(src_ast_str, dst_ast_str, 'src', 'dst'))
1510 raise AssertionError(
1511 f"INTERNAL ERROR: Black produced code that is not equivalent to "
1513 f"Please report a bug on https://github.com/ambv/black/issues. "
1514 f"This diff might be helpful: {log}",
1518 def assert_stable(src: str, dst: str, line_length: int) -> None:
1519 """Raises AssertionError if `dst` reformats differently the second time.
1521 This is a temporary sanity check until Black becomes stable.
1523 newdst = format_str(dst, line_length=line_length)
1526 diff(src, dst, 'source', 'first pass'),
1527 diff(dst, newdst, 'first pass', 'second pass'),
1529 raise AssertionError(
1530 f"INTERNAL ERROR: Black produced different code on the second pass "
1531 f"of the formatter. "
1532 f"Please report a bug on https://github.com/ambv/black/issues. "
1533 f"This diff might be helpful: {log}",
1537 def dump_to_file(*output: str) -> str:
1538 """Dumps `output` to a temporary file. Returns path to the file."""
1541 with tempfile.NamedTemporaryFile(
1542 mode='w', prefix='blk_', suffix='.log', delete=False
1544 for lines in output:
1550 def diff(a: str, b: str, a_name: str, b_name: str) -> str:
1551 """Returns a udiff string between strings `a` and `b`."""
1554 a_lines = [line + '\n' for line in a.split('\n')]
1555 b_lines = [line + '\n' for line in b.split('\n')]
1557 difflib.unified_diff(a_lines, b_lines, fromfile=a_name, tofile=b_name, n=5)
1561 if __name__ == '__main__':