All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
4 from asyncio.base_events import BaseEventLoop
5 from concurrent.futures import Executor, ProcessPoolExecutor
6 from functools import partial
9 from pathlib import Path
13 Dict, Generic, Iterable, Iterator, List, Optional, Set, Tuple, TypeVar, Union
16 from attr import dataclass, Factory
20 from blib2to3.pytree import Node, Leaf, type_repr
21 from blib2to3 import pygram, pytree
22 from blib2to3.pgen2 import driver, token
23 from blib2to3.pgen2.parse import ParseError
25 __version__ = "18.3a3"
26 DEFAULT_LINE_LENGTH = 88
28 syms = pygram.python_symbols
35 LN = Union[Leaf, Node]
36 out = partial(click.secho, bold=True, err=True)
37 err = partial(click.secho, fg='red', err=True)
40 class NothingChanged(UserWarning):
41 """Raised by `format_file` when the reformatted code is the same as source."""
44 class CannotSplit(Exception):
45 """A readable split that fits the allotted line length is impossible.
47 Raised by `left_hand_split()`, `right_hand_split()`, and `delimiter_split()`.
56 default=DEFAULT_LINE_LENGTH,
57 help='How many character per line to allow.',
64 "Don't write back the files, just return the status. Return code 0 "
65 "means nothing would change. Return code 1 means some files would be "
66 "reformatted. Return code 123 means there was an internal error."
72 help='If --fast given, skip temporary sanity checks. [default: --safe]',
74 @click.version_option(version=__version__)
79 exists=True, file_okay=True, dir_okay=True, readable=True, allow_dash=True
84 ctx: click.Context, line_length: int, check: bool, fast: bool, src: List[str]
86 """The uncompromising code formatter."""
87 sources: List[Path] = []
91 sources.extend(gen_python_files_in_dir(p))
93 # if a file was explicitly given, we don't care about its extension
96 sources.append(Path('-'))
98 err(f'invalid path: {s}')
101 elif len(sources) == 1:
103 report = Report(check=check)
105 if not p.is_file() and str(p) == '-':
106 changed = format_stdin_to_stdout(
107 line_length=line_length, fast=fast, write_back=not check
110 changed = format_file_in_place(
111 p, line_length=line_length, fast=fast, write_back=not check
113 report.done(p, changed)
114 except Exception as exc:
115 report.failed(p, str(exc))
116 ctx.exit(report.return_code)
118 loop = asyncio.get_event_loop()
119 executor = ProcessPoolExecutor(max_workers=os.cpu_count())
122 return_code = loop.run_until_complete(
124 sources, line_length, not check, fast, loop, executor
129 ctx.exit(return_code)
132 async def schedule_formatting(
141 src: loop.run_in_executor(
142 executor, format_file_in_place, src, line_length, fast, write_back
146 await asyncio.wait(tasks.values())
149 for src, task in tasks.items():
151 report.failed(src, 'timed out, cancelling')
153 cancelled.append(task)
154 elif task.exception():
155 report.failed(src, str(task.exception()))
157 report.done(src, task.result())
159 await asyncio.wait(cancelled, timeout=2)
160 out('All done! ✨ 🍰 ✨')
161 click.echo(str(report))
162 return report.return_code
165 def format_file_in_place(
166 src: Path, line_length: int, fast: bool, write_back: bool = False
168 """Format the file and rewrite if changed. Return True if changed."""
169 with tokenize.open(src) as src_buffer:
170 src_contents = src_buffer.read()
172 contents = format_file_contents(
173 src_contents, line_length=line_length, fast=fast
175 except NothingChanged:
179 with open(src, "w", encoding=src_buffer.encoding) as f:
184 def format_stdin_to_stdout(
185 line_length: int, fast: bool, write_back: bool = False
187 """Format file on stdin and pipe output to stdout. Return True if changed."""
188 contents = sys.stdin.read()
190 contents = format_file_contents(contents, line_length=line_length, fast=fast)
193 except NothingChanged:
198 sys.stdout.write(contents)
201 def format_file_contents(
202 src_contents: str, line_length: int, fast: bool
204 """Reformats a file and returns its contents and encoding."""
205 if src_contents.strip() == '':
208 dst_contents = format_str(src_contents, line_length=line_length)
209 if src_contents == dst_contents:
213 assert_equivalent(src_contents, dst_contents)
214 assert_stable(src_contents, dst_contents, line_length=line_length)
218 def format_str(src_contents: str, line_length: int) -> FileContent:
219 """Reformats a string and returns new contents."""
220 src_node = lib2to3_parse(src_contents)
222 lines = LineGenerator()
223 elt = EmptyLineTracker()
224 py36 = is_python36(src_node)
227 for current_line in lines.visit(src_node):
228 for _ in range(after):
229 dst_contents += str(empty_line)
230 before, after = elt.maybe_empty_lines(current_line)
231 for _ in range(before):
232 dst_contents += str(empty_line)
233 for line in split_line(current_line, line_length=line_length, py36=py36):
234 dst_contents += str(line)
239 pygram.python_grammar_no_print_statement_no_exec_statement,
240 pygram.python_grammar_no_print_statement,
241 pygram.python_grammar_no_exec_statement,
242 pygram.python_grammar,
246 def lib2to3_parse(src_txt: str) -> Node:
247 """Given a string with source, return the lib2to3 Node."""
248 grammar = pygram.python_grammar_no_print_statement
249 if src_txt[-1] != '\n':
250 nl = '\r\n' if '\r\n' in src_txt[:1024] else '\n'
252 for grammar in GRAMMARS:
253 drv = driver.Driver(grammar, pytree.convert)
255 result = drv.parse_string(src_txt, True)
258 except ParseError as pe:
259 lineno, column = pe.context[1]
260 lines = src_txt.splitlines()
262 faulty_line = lines[lineno - 1]
264 faulty_line = "<line number missing in source>"
265 exc = ValueError(f"Cannot parse: {lineno}:{column}: {faulty_line}")
269 if isinstance(result, Leaf):
270 result = Node(syms.file_input, [result])
274 def lib2to3_unparse(node: Node) -> str:
275 """Given a lib2to3 node, return its string representation."""
283 class Visitor(Generic[T]):
284 """Basic lib2to3 visitor that yields things on visiting."""
286 def visit(self, node: LN) -> Iterator[T]:
288 name = token.tok_name[node.type]
290 name = type_repr(node.type)
291 yield from getattr(self, f'visit_{name}', self.visit_default)(node)
293 def visit_default(self, node: LN) -> Iterator[T]:
294 if isinstance(node, Node):
295 for child in node.children:
296 yield from self.visit(child)
300 class DebugVisitor(Visitor[T]):
303 def visit_default(self, node: LN) -> Iterator[T]:
304 indent = ' ' * (2 * self.tree_depth)
305 if isinstance(node, Node):
306 _type = type_repr(node.type)
307 out(f'{indent}{_type}', fg='yellow')
309 for child in node.children:
310 yield from self.visit(child)
313 out(f'{indent}/{_type}', fg='yellow', bold=False)
315 _type = token.tok_name.get(node.type, str(node.type))
316 out(f'{indent}{_type}', fg='blue', nl=False)
318 # We don't have to handle prefixes for `Node` objects since
319 # that delegates to the first child anyway.
320 out(f' {node.prefix!r}', fg='green', bold=False, nl=False)
321 out(f' {node.value!r}', fg='blue', bold=False)
324 KEYWORDS = set(keyword.kwlist)
325 WHITESPACE = {token.DEDENT, token.INDENT, token.NEWLINE}
326 FLOW_CONTROL = {'return', 'raise', 'break', 'continue'}
337 STANDALONE_COMMENT = 153
338 LOGIC_OPERATORS = {'and', 'or'}
362 COMPREHENSION_PRIORITY = 20
366 COMPARATOR_PRIORITY = 3
371 class BracketTracker:
373 bracket_match: Dict[Tuple[Depth, NodeType], Leaf] = Factory(dict)
374 delimiters: Dict[LeafID, Priority] = Factory(dict)
375 previous: Optional[Leaf] = None
377 def mark(self, leaf: Leaf) -> None:
378 if leaf.type == token.COMMENT:
381 if leaf.type in CLOSING_BRACKETS:
383 opening_bracket = self.bracket_match.pop((self.depth, leaf.type))
384 leaf.opening_bracket = opening_bracket
385 leaf.bracket_depth = self.depth
387 delim = is_delimiter(leaf)
389 self.delimiters[id(leaf)] = delim
390 elif self.previous is not None:
391 if leaf.type == token.STRING and self.previous.type == token.STRING:
392 self.delimiters[id(self.previous)] = STRING_PRIORITY
394 leaf.type == token.NAME
395 and leaf.value == 'for'
397 and leaf.parent.type in {syms.comp_for, syms.old_comp_for}
399 self.delimiters[id(self.previous)] = COMPREHENSION_PRIORITY
401 leaf.type == token.NAME
402 and leaf.value == 'if'
404 and leaf.parent.type in {syms.comp_if, syms.old_comp_if}
406 self.delimiters[id(self.previous)] = COMPREHENSION_PRIORITY
408 leaf.type == token.NAME
409 and leaf.value in LOGIC_OPERATORS
412 self.delimiters[id(self.previous)] = LOGIC_PRIORITY
413 if leaf.type in OPENING_BRACKETS:
414 self.bracket_match[self.depth, BRACKET[leaf.type]] = leaf
418 def any_open_brackets(self) -> bool:
419 """Returns True if there is an yet unmatched open bracket on the line."""
420 return bool(self.bracket_match)
422 def max_priority(self, exclude: Iterable[LeafID] = ()) -> int:
423 """Returns the highest priority of a delimiter found on the line.
425 Values are consistent with what `is_delimiter()` returns.
427 return max(v for k, v in self.delimiters.items() if k not in exclude)
433 leaves: List[Leaf] = Factory(list)
434 comments: Dict[LeafID, Leaf] = Factory(dict)
435 bracket_tracker: BracketTracker = Factory(BracketTracker)
436 inside_brackets: bool = False
437 has_for: bool = False
438 _for_loop_variable: bool = False
440 def append(self, leaf: Leaf, preformatted: bool = False) -> None:
441 has_value = leaf.value.strip()
445 if self.leaves and not preformatted:
446 # Note: at this point leaf.prefix should be empty except for
447 # imports, for which we only preserve newlines.
448 leaf.prefix += whitespace(leaf)
449 if self.inside_brackets or not preformatted:
450 self.maybe_decrement_after_for_loop_variable(leaf)
451 self.bracket_tracker.mark(leaf)
452 self.maybe_remove_trailing_comma(leaf)
453 self.maybe_increment_for_loop_variable(leaf)
454 if self.maybe_adapt_standalone_comment(leaf):
457 if not self.append_comment(leaf):
458 self.leaves.append(leaf)
461 def is_comment(self) -> bool:
462 return bool(self) and self.leaves[0].type == STANDALONE_COMMENT
465 def is_decorator(self) -> bool:
466 return bool(self) and self.leaves[0].type == token.AT
469 def is_import(self) -> bool:
470 return bool(self) and is_import(self.leaves[0])
473 def is_class(self) -> bool:
476 and self.leaves[0].type == token.NAME
477 and self.leaves[0].value == 'class'
481 def is_def(self) -> bool:
482 """Also returns True for async defs."""
484 first_leaf = self.leaves[0]
489 second_leaf: Optional[Leaf] = self.leaves[1]
493 (first_leaf.type == token.NAME and first_leaf.value == 'def')
495 first_leaf.type == token.ASYNC
496 and second_leaf is not None
497 and second_leaf.type == token.NAME
498 and second_leaf.value == 'def'
503 def is_flow_control(self) -> bool:
506 and self.leaves[0].type == token.NAME
507 and self.leaves[0].value in FLOW_CONTROL
511 def is_yield(self) -> bool:
514 and self.leaves[0].type == token.NAME
515 and self.leaves[0].value == 'yield'
518 def maybe_remove_trailing_comma(self, closing: Leaf) -> bool:
521 and self.leaves[-1].type == token.COMMA
522 and closing.type in CLOSING_BRACKETS
526 if closing.type == token.RBRACE:
530 if closing.type == token.RSQB:
531 comma = self.leaves[-1]
532 if comma.parent and comma.parent.type == syms.listmaker:
536 # For parens let's check if it's safe to remove the comma. If the
537 # trailing one is the only one, we might mistakenly change a tuple
538 # into a different type by removing the comma.
539 depth = closing.bracket_depth + 1
541 opening = closing.opening_bracket
542 for _opening_index, leaf in enumerate(self.leaves):
549 for leaf in self.leaves[_opening_index + 1:]:
553 bracket_depth = leaf.bracket_depth
554 if bracket_depth == depth and leaf.type == token.COMMA:
556 if leaf.parent and leaf.parent.type == syms.arglist:
566 def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool:
567 """In a for loop, or comprehension, the variables are often unpacks.
569 To avoid splitting on the comma in this situation, we will increase
570 the depth of tokens between `for` and `in`.
572 if leaf.type == token.NAME and leaf.value == 'for':
574 self.bracket_tracker.depth += 1
575 self._for_loop_variable = True
580 def maybe_decrement_after_for_loop_variable(self, leaf: Leaf) -> bool:
581 # See `maybe_increment_for_loop_variable` above for explanation.
582 if self._for_loop_variable and leaf.type == token.NAME and leaf.value == 'in':
583 self.bracket_tracker.depth -= 1
584 self._for_loop_variable = False
589 def maybe_adapt_standalone_comment(self, comment: Leaf) -> bool:
590 """Hack a standalone comment to act as a trailing comment for line splitting.
592 If this line has brackets and a standalone `comment`, we need to adapt
593 it to be able to still reformat the line.
595 This is not perfect, the line to which the standalone comment gets
596 appended will appear "too long" when splitting.
599 comment.type == STANDALONE_COMMENT
600 and self.bracket_tracker.any_open_brackets()
604 comment.type = token.COMMENT
605 comment.prefix = '\n' + ' ' * (self.depth + 1)
606 return self.append_comment(comment)
608 def append_comment(self, comment: Leaf) -> bool:
609 if comment.type != token.COMMENT:
613 after = id(self.last_non_delimiter())
615 comment.type = STANDALONE_COMMENT
620 if after in self.comments:
621 self.comments[after].value += str(comment)
623 self.comments[after] = comment
626 def last_non_delimiter(self) -> Leaf:
627 for i in range(len(self.leaves)):
628 last = self.leaves[-i - 1]
629 if not is_delimiter(last):
632 raise LookupError("No non-delimiters found")
634 def __str__(self) -> str:
638 indent = ' ' * self.depth
639 leaves = iter(self.leaves)
641 res = f'{first.prefix}{indent}{first.value}'
644 for comment in self.comments.values():
648 def __bool__(self) -> bool:
649 return bool(self.leaves or self.comments)
653 class EmptyLineTracker:
654 """Provides a stateful method that returns the number of potential extra
655 empty lines needed before and after the currently processed line.
657 Note: this tracker works on lines that haven't been split yet. It assumes
658 the prefix of the first leaf consists of optional newlines. Those newlines
659 are consumed by `maybe_empty_lines()` and included in the computation.
661 previous_line: Optional[Line] = None
662 previous_after: int = 0
663 previous_defs: List[int] = Factory(list)
665 def maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
666 """Returns the number of extra empty lines before and after the `current_line`.
668 This is for separating `def`, `async def` and `class` with extra empty lines
669 (two on module-level), as well as providing an extra empty line after flow
670 control keywords to make them more prominent.
672 before, after = self._maybe_empty_lines(current_line)
673 before -= self.previous_after
674 self.previous_after = after
675 self.previous_line = current_line
678 def _maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
680 if current_line.is_comment and current_line.depth == 0:
682 if current_line.leaves:
683 # Consume the first leaf's extra newlines.
684 first_leaf = current_line.leaves[0]
685 before = first_leaf.prefix.count('\n')
686 before = min(before, max(before, max_allowed))
687 first_leaf.prefix = ''
690 depth = current_line.depth
691 while self.previous_defs and self.previous_defs[-1] >= depth:
692 self.previous_defs.pop()
693 before = 1 if depth else 2
694 is_decorator = current_line.is_decorator
695 if is_decorator or current_line.is_def or current_line.is_class:
697 self.previous_defs.append(depth)
698 if self.previous_line is None:
699 # Don't insert empty lines before the first line in the file.
702 if self.previous_line and self.previous_line.is_decorator:
703 # Don't insert empty lines between decorators.
707 if current_line.depth:
711 if current_line.is_flow_control:
716 and self.previous_line.is_import
717 and not current_line.is_import
718 and depth == self.previous_line.depth
720 return (before or 1), 0
724 and self.previous_line.is_yield
725 and (not current_line.is_yield or depth != self.previous_line.depth)
727 return (before or 1), 0
733 class LineGenerator(Visitor[Line]):
734 """Generates reformatted Line objects. Empty lines are not emitted.
736 Note: destroys the tree it's visiting by mutating prefixes of its leaves
737 in ways that will no longer stringify to valid Python code on the tree.
739 current_line: Line = Factory(Line)
741 def line(self, indent: int = 0) -> Iterator[Line]:
744 If the line is empty, only emit if it makes sense.
745 If the line is too long, split it first and then generate.
747 If any lines were generated, set up a new current_line.
749 if not self.current_line:
750 self.current_line.depth += indent
751 return # Line is empty, don't emit. Creating a new one unnecessary.
753 complete_line = self.current_line
754 self.current_line = Line(depth=complete_line.depth + indent)
757 def visit_default(self, node: LN) -> Iterator[Line]:
758 if isinstance(node, Leaf):
759 any_open_brackets = self.current_line.bracket_tracker.any_open_brackets()
760 for comment in generate_comments(node):
761 if any_open_brackets:
762 # any comment within brackets is subject to splitting
763 self.current_line.append(comment)
764 elif comment.type == token.COMMENT:
765 # regular trailing comment
766 self.current_line.append(comment)
767 yield from self.line()
770 # regular standalone comment
771 yield from self.line()
773 self.current_line.append(comment)
774 yield from self.line()
776 normalize_prefix(node, inside_brackets=any_open_brackets)
777 if node.type not in WHITESPACE:
778 self.current_line.append(node)
779 yield from super().visit_default(node)
781 def visit_INDENT(self, node: Node) -> Iterator[Line]:
782 yield from self.line(+1)
783 yield from self.visit_default(node)
785 def visit_DEDENT(self, node: Node) -> Iterator[Line]:
786 yield from self.line(-1)
788 def visit_stmt(self, node: Node, keywords: Set[str]) -> Iterator[Line]:
789 """Visit a statement.
791 The relevant Python language keywords for this statement are NAME leaves
794 for child in node.children:
795 if child.type == token.NAME and child.value in keywords: # type: ignore
796 yield from self.line()
798 yield from self.visit(child)
800 def visit_simple_stmt(self, node: Node) -> Iterator[Line]:
801 """A statement without nested statements."""
802 is_suite_like = node.parent and node.parent.type in STATEMENT
804 yield from self.line(+1)
805 yield from self.visit_default(node)
806 yield from self.line(-1)
809 yield from self.line()
810 yield from self.visit_default(node)
812 def visit_async_stmt(self, node: Node) -> Iterator[Line]:
813 yield from self.line()
815 children = iter(node.children)
816 for child in children:
817 yield from self.visit(child)
819 if child.type == token.ASYNC:
822 internal_stmt = next(children)
823 for child in internal_stmt.children:
824 yield from self.visit(child)
826 def visit_decorators(self, node: Node) -> Iterator[Line]:
827 for child in node.children:
828 yield from self.line()
829 yield from self.visit(child)
831 def visit_SEMI(self, leaf: Leaf) -> Iterator[Line]:
832 yield from self.line()
834 def visit_ENDMARKER(self, leaf: Leaf) -> Iterator[Line]:
835 yield from self.visit_default(leaf)
836 yield from self.line()
838 def __attrs_post_init__(self) -> None:
839 """You are in a twisty little maze of passages."""
841 self.visit_if_stmt = partial(v, keywords={'if', 'else', 'elif'})
842 self.visit_while_stmt = partial(v, keywords={'while', 'else'})
843 self.visit_for_stmt = partial(v, keywords={'for', 'else'})
844 self.visit_try_stmt = partial(v, keywords={'try', 'except', 'else', 'finally'})
845 self.visit_except_clause = partial(v, keywords={'except'})
846 self.visit_funcdef = partial(v, keywords={'def'})
847 self.visit_with_stmt = partial(v, keywords={'with'})
848 self.visit_classdef = partial(v, keywords={'class'})
849 self.visit_async_funcdef = self.visit_async_stmt
850 self.visit_decorated = self.visit_decorators
853 BRACKET = {token.LPAR: token.RPAR, token.LSQB: token.RSQB, token.LBRACE: token.RBRACE}
854 OPENING_BRACKETS = set(BRACKET.keys())
855 CLOSING_BRACKETS = set(BRACKET.values())
856 BRACKETS = OPENING_BRACKETS | CLOSING_BRACKETS
857 ALWAYS_NO_SPACE = CLOSING_BRACKETS | {token.COMMA, STANDALONE_COMMENT}
860 def whitespace(leaf: Leaf) -> str: # noqa C901
861 """Return whitespace prefix if needed for the given `leaf`."""
868 if t in ALWAYS_NO_SPACE:
871 if t == token.COMMENT:
874 assert p is not None, f"INTERNAL ERROR: hand-made leaf without parent: {leaf!r}"
875 if t == token.COLON and p.type not in {syms.subscript, syms.subscriptlist}:
878 prev = leaf.prev_sibling
880 prevp = preceding_leaf(p)
881 if not prevp or prevp.type in OPENING_BRACKETS:
885 return SPACE if prevp.type == token.COMMA else NO
887 if prevp.type == token.EQUAL:
889 if prevp.parent.type in {
890 syms.arglist, syms.argument, syms.parameters, syms.varargslist
894 elif prevp.parent.type == syms.typedargslist:
895 # A bit hacky: if the equal sign has whitespace, it means we
896 # previously found it's a typed argument. So, we're using
900 elif prevp.type == token.DOUBLESTAR:
901 if prevp.parent and prevp.parent.type in {
911 elif prevp.type == token.COLON:
912 if prevp.parent and prevp.parent.type in {syms.subscript, syms.sliceop}:
917 and prevp.parent.type in {syms.factor, syms.star_expr}
918 and prevp.type in MATH_OPERATORS
923 prevp.type == token.RIGHTSHIFT
925 and prevp.parent.type == syms.shift_expr
926 and prevp.prev_sibling
927 and prevp.prev_sibling.type == token.NAME
928 and prevp.prev_sibling.value == 'print' # type: ignore
930 # Python 2 print chevron
933 elif prev.type in OPENING_BRACKETS:
936 if p.type in {syms.parameters, syms.arglist}:
937 # untyped function signatures or calls
941 if not prev or prev.type != token.COMMA:
944 elif p.type == syms.varargslist:
949 if prev and prev.type != token.COMMA:
952 elif p.type == syms.typedargslist:
953 # typed function signatures
958 if prev.type != syms.tname:
961 elif prev.type == token.EQUAL:
962 # A bit hacky: if the equal sign has whitespace, it means we
963 # previously found it's a typed argument. So, we're using that, too.
966 elif prev.type != token.COMMA:
969 elif p.type == syms.tname:
972 prevp = preceding_leaf(p)
973 if not prevp or prevp.type != token.COMMA:
976 elif p.type == syms.trailer:
977 # attributes and calls
978 if t == token.LPAR or t == token.RPAR:
983 prevp = preceding_leaf(p)
984 if not prevp or prevp.type != token.NUMBER:
987 elif t == token.LSQB:
990 elif prev.type != token.COMMA:
993 elif p.type == syms.argument:
999 prevp = preceding_leaf(p)
1000 if not prevp or prevp.type == token.LPAR:
1003 elif prev.type == token.EQUAL or prev.type == token.DOUBLESTAR:
1006 elif p.type == syms.decorator:
1010 elif p.type == syms.dotted_name:
1014 prevp = preceding_leaf(p)
1015 if not prevp or prevp.type == token.AT or prevp.type == token.DOT:
1018 elif p.type == syms.classdef:
1022 if prev and prev.type == token.LPAR:
1025 elif p.type == syms.subscript:
1028 assert p.parent is not None, "subscripts are always parented"
1029 if p.parent.type == syms.subscriptlist:
1037 elif p.type == syms.atom:
1038 if prev and t == token.DOT:
1039 # dots, but not the first one.
1043 p.type == syms.listmaker
1044 or p.type == syms.testlist_gexp
1045 or p.type == syms.subscriptlist
1047 # list interior, including unpacking
1051 elif p.type == syms.dictsetmaker:
1052 # dict and set interior, including unpacking
1056 if prev.type == token.DOUBLESTAR:
1059 elif p.type in {syms.factor, syms.star_expr}:
1062 prevp = preceding_leaf(p)
1063 if not prevp or prevp.type in OPENING_BRACKETS:
1066 prevp_parent = prevp.parent
1067 assert prevp_parent is not None
1068 if prevp.type == token.COLON and prevp_parent.type in {
1069 syms.subscript, syms.sliceop
1073 elif prevp.type == token.EQUAL and prevp_parent.type == syms.argument:
1076 elif t == token.NAME or t == token.NUMBER:
1079 elif p.type == syms.import_from:
1081 if prev and prev.type == token.DOT:
1084 elif t == token.NAME:
1088 if prev and prev.type == token.DOT:
1091 elif p.type == syms.sliceop:
1097 def preceding_leaf(node: Optional[LN]) -> Optional[Leaf]:
1098 """Returns the first leaf that precedes `node`, if any."""
1100 res = node.prev_sibling
1102 if isinstance(res, Leaf):
1106 return list(res.leaves())[-1]
1115 def is_delimiter(leaf: Leaf) -> int:
1116 """Returns the priority of the `leaf` delimiter. Returns 0 if not delimiter.
1118 Higher numbers are higher priority.
1120 if leaf.type == token.COMMA:
1121 return COMMA_PRIORITY
1123 if leaf.type in COMPARATORS:
1124 return COMPARATOR_PRIORITY
1127 leaf.type in MATH_OPERATORS
1129 and leaf.parent.type not in {syms.factor, syms.star_expr}
1131 return MATH_PRIORITY
1136 def generate_comments(leaf: Leaf) -> Iterator[Leaf]:
1137 """Cleans the prefix of the `leaf` and generates comments from it, if any.
1139 Comments in lib2to3 are shoved into the whitespace prefix. This happens
1140 in `pgen2/driver.py:Driver.parse_tokens()`. This was a brilliant implementation
1141 move because it does away with modifying the grammar to include all the
1142 possible places in which comments can be placed.
1144 The sad consequence for us though is that comments don't "belong" anywhere.
1145 This is why this function generates simple parentless Leaf objects for
1146 comments. We simply don't know what the correct parent should be.
1148 No matter though, we can live without this. We really only need to
1149 differentiate between inline and standalone comments. The latter don't
1150 share the line with any code.
1152 Inline comments are emitted as regular token.COMMENT leaves. Standalone
1153 are emitted with a fake STANDALONE_COMMENT token identifier.
1163 for index, line in enumerate(p.split('\n')):
1164 line = line.lstrip()
1167 if not line.startswith('#'):
1170 if index == 0 and leaf.type != token.ENDMARKER:
1171 comment_type = token.COMMENT # simple trailing comment
1173 comment_type = STANDALONE_COMMENT
1174 yield Leaf(comment_type, make_comment(line), prefix='\n' * nlines)
1179 def make_comment(content: str) -> str:
1180 content = content.rstrip()
1184 if content[0] == '#':
1185 content = content[1:]
1186 if content and content[0] not in ' !:#':
1187 content = ' ' + content
1188 return '#' + content
1192 line: Line, line_length: int, inner: bool = False, py36: bool = False
1193 ) -> Iterator[Line]:
1194 """Splits a `line` into potentially many lines.
1196 They should fit in the allotted `line_length` but might not be able to.
1197 `inner` signifies that there were a pair of brackets somewhere around the
1198 current `line`, possibly transitively. This means we can fallback to splitting
1199 by delimiters if the LHS/RHS don't yield any results.
1201 If `py36` is True, splitting may generate syntax that is only compatible
1202 with Python 3.6 and later.
1204 line_str = str(line).strip('\n')
1205 if len(line_str) <= line_length and '\n' not in line_str:
1210 split_funcs = [left_hand_split]
1211 elif line.inside_brackets:
1212 split_funcs = [delimiter_split]
1213 if '\n' not in line_str:
1214 # Only attempt RHS if we don't have multiline strings or comments
1216 split_funcs.append(right_hand_split)
1218 split_funcs = [right_hand_split]
1219 for split_func in split_funcs:
1220 # We are accumulating lines in `result` because we might want to abort
1221 # mission and return the original line in the end, or attempt a different
1223 result: List[Line] = []
1225 for l in split_func(line, py36=py36):
1226 if str(l).strip('\n') == line_str:
1227 raise CannotSplit("Split function returned an unchanged result")
1230 split_line(l, line_length=line_length, inner=True, py36=py36)
1232 except CannotSplit as cs:
1243 def left_hand_split(line: Line, py36: bool = False) -> Iterator[Line]:
1244 """Split line into many lines, starting with the first matching bracket pair.
1246 Note: this usually looks weird, only use this for function definitions.
1247 Prefer RHS otherwise.
1249 head = Line(depth=line.depth)
1250 body = Line(depth=line.depth + 1, inside_brackets=True)
1251 tail = Line(depth=line.depth)
1252 tail_leaves: List[Leaf] = []
1253 body_leaves: List[Leaf] = []
1254 head_leaves: List[Leaf] = []
1255 current_leaves = head_leaves
1256 matching_bracket = None
1257 for leaf in line.leaves:
1259 current_leaves is body_leaves
1260 and leaf.type in CLOSING_BRACKETS
1261 and leaf.opening_bracket is matching_bracket
1263 current_leaves = tail_leaves if body_leaves else head_leaves
1264 current_leaves.append(leaf)
1265 if current_leaves is head_leaves:
1266 if leaf.type in OPENING_BRACKETS:
1267 matching_bracket = leaf
1268 current_leaves = body_leaves
1269 # Since body is a new indent level, remove spurious leading whitespace.
1271 normalize_prefix(body_leaves[0], inside_brackets=True)
1272 # Build the new lines.
1273 for result, leaves in (
1274 (head, head_leaves), (body, body_leaves), (tail, tail_leaves)
1277 result.append(leaf, preformatted=True)
1278 comment_after = line.comments.get(id(leaf))
1280 result.append(comment_after, preformatted=True)
1281 split_succeeded_or_raise(head, body, tail)
1282 for result in (head, body, tail):
1287 def right_hand_split(line: Line, py36: bool = False) -> Iterator[Line]:
1288 """Split line into many lines, starting with the last matching bracket pair."""
1289 head = Line(depth=line.depth)
1290 body = Line(depth=line.depth + 1, inside_brackets=True)
1291 tail = Line(depth=line.depth)
1292 tail_leaves: List[Leaf] = []
1293 body_leaves: List[Leaf] = []
1294 head_leaves: List[Leaf] = []
1295 current_leaves = tail_leaves
1296 opening_bracket = None
1297 for leaf in reversed(line.leaves):
1298 if current_leaves is body_leaves:
1299 if leaf is opening_bracket:
1300 current_leaves = head_leaves if body_leaves else tail_leaves
1301 current_leaves.append(leaf)
1302 if current_leaves is tail_leaves:
1303 if leaf.type in CLOSING_BRACKETS:
1304 opening_bracket = leaf.opening_bracket
1305 current_leaves = body_leaves
1306 tail_leaves.reverse()
1307 body_leaves.reverse()
1308 head_leaves.reverse()
1309 # Since body is a new indent level, remove spurious leading whitespace.
1311 normalize_prefix(body_leaves[0], inside_brackets=True)
1312 # Build the new lines.
1313 for result, leaves in (
1314 (head, head_leaves), (body, body_leaves), (tail, tail_leaves)
1317 result.append(leaf, preformatted=True)
1318 comment_after = line.comments.get(id(leaf))
1320 result.append(comment_after, preformatted=True)
1321 split_succeeded_or_raise(head, body, tail)
1322 for result in (head, body, tail):
1327 def split_succeeded_or_raise(head: Line, body: Line, tail: Line) -> None:
1328 tail_len = len(str(tail).strip())
1331 raise CannotSplit("Splitting brackets produced the same line")
1335 f"Splitting brackets on an empty body to save "
1336 f"{tail_len} characters is not worth it"
1340 def delimiter_split(line: Line, py36: bool = False) -> Iterator[Line]:
1341 """Split according to delimiters of the highest priority.
1343 This kind of split doesn't increase indentation.
1344 If `py36` is True, the split will add trailing commas also in function
1345 signatures that contain * and **.
1348 last_leaf = line.leaves[-1]
1350 raise CannotSplit("Line empty")
1352 delimiters = line.bracket_tracker.delimiters
1354 delimiter_priority = line.bracket_tracker.max_priority(exclude={id(last_leaf)})
1356 raise CannotSplit("No delimiters found")
1358 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
1359 lowest_depth = sys.maxsize
1360 trailing_comma_safe = True
1361 for leaf in line.leaves:
1362 current_line.append(leaf, preformatted=True)
1363 comment_after = line.comments.get(id(leaf))
1365 current_line.append(comment_after, preformatted=True)
1366 lowest_depth = min(lowest_depth, leaf.bracket_depth)
1368 leaf.bracket_depth == lowest_depth
1369 and leaf.type == token.STAR
1370 or leaf.type == token.DOUBLESTAR
1372 trailing_comma_safe = trailing_comma_safe and py36
1373 leaf_priority = delimiters.get(id(leaf))
1374 if leaf_priority == delimiter_priority:
1375 normalize_prefix(current_line.leaves[0], inside_brackets=True)
1378 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
1381 delimiter_priority == COMMA_PRIORITY
1382 and current_line.leaves[-1].type != token.COMMA
1383 and trailing_comma_safe
1385 current_line.append(Leaf(token.COMMA, ','))
1386 normalize_prefix(current_line.leaves[0], inside_brackets=True)
1390 def is_import(leaf: Leaf) -> bool:
1391 """Returns True if the given leaf starts an import statement."""
1398 (v == 'import' and p and p.type == syms.import_name)
1399 or (v == 'from' and p and p.type == syms.import_from)
1404 def normalize_prefix(leaf: Leaf, *, inside_brackets: bool) -> None:
1405 """Leave existing extra newlines if not `inside_brackets`.
1407 Remove everything else. Note: don't use backslashes for formatting or
1408 you'll lose your voting rights.
1410 if not inside_brackets:
1411 spl = leaf.prefix.split('#')
1412 if '\\' not in spl[0]:
1413 nl_count = spl[-1].count('\n')
1416 leaf.prefix = '\n' * nl_count
1422 def is_python36(node: Node) -> bool:
1423 """Returns True if the current file is using Python 3.6+ features.
1425 Currently looking for:
1427 - trailing commas after * or ** in function signatures.
1429 for n in node.pre_order():
1430 if n.type == token.STRING:
1431 value_head = n.value[:2] # type: ignore
1432 if value_head in {'f"', 'F"', "f'", "F'", 'rf', 'fr', 'RF', 'FR'}:
1436 n.type == syms.typedargslist
1438 and n.children[-1].type == token.COMMA
1440 for ch in n.children:
1441 if ch.type == token.STAR or ch.type == token.DOUBLESTAR:
1447 PYTHON_EXTENSIONS = {'.py'}
1448 BLACKLISTED_DIRECTORIES = {
1449 'build', 'buck-out', 'dist', '_build', '.git', '.hg', '.mypy_cache', '.tox', '.venv'
1453 def gen_python_files_in_dir(path: Path) -> Iterator[Path]:
1454 for child in path.iterdir():
1456 if child.name in BLACKLISTED_DIRECTORIES:
1459 yield from gen_python_files_in_dir(child)
1461 elif child.suffix in PYTHON_EXTENSIONS:
1467 """Provides a reformatting counter."""
1469 change_count: int = 0
1471 failure_count: int = 0
1473 def done(self, src: Path, changed: bool) -> None:
1474 """Increment the counter for successful reformatting. Write out a message."""
1476 reformatted = 'would reformat' if self.check else 'reformatted'
1477 out(f'{reformatted} {src}')
1478 self.change_count += 1
1480 out(f'{src} already well formatted, good job.', bold=False)
1481 self.same_count += 1
1483 def failed(self, src: Path, message: str) -> None:
1484 """Increment the counter for failed reformatting. Write out a message."""
1485 err(f'error: cannot format {src}: {message}')
1486 self.failure_count += 1
1489 def return_code(self) -> int:
1490 """Which return code should the app use considering the current state."""
1491 # According to http://tldp.org/LDP/abs/html/exitcodes.html starting with
1492 # 126 we have special returncodes reserved by the shell.
1493 if self.failure_count:
1496 elif self.change_count and self.check:
1501 def __str__(self) -> str:
1502 """A color report of the current state.
1504 Use `click.unstyle` to remove colors.
1507 reformatted = "would be reformatted"
1508 unchanged = "would be left unchanged"
1509 failed = "would fail to reformat"
1511 reformatted = "reformatted"
1512 unchanged = "left unchanged"
1513 failed = "failed to reformat"
1515 if self.change_count:
1516 s = 's' if self.change_count > 1 else ''
1518 click.style(f'{self.change_count} file{s} {reformatted}', bold=True)
1521 s = 's' if self.same_count > 1 else ''
1522 report.append(f'{self.same_count} file{s} {unchanged}')
1523 if self.failure_count:
1524 s = 's' if self.failure_count > 1 else ''
1526 click.style(f'{self.failure_count} file{s} {failed}', fg='red')
1528 return ', '.join(report) + '.'
1531 def assert_equivalent(src: str, dst: str) -> None:
1532 """Raises AssertionError if `src` and `dst` aren't equivalent.
1534 This is a temporary sanity check until Black becomes stable.
1540 def _v(node: ast.AST, depth: int = 0) -> Iterator[str]:
1541 """Simple visitor generating strings to compare ASTs by content."""
1542 yield f"{' ' * depth}{node.__class__.__name__}("
1544 for field in sorted(node._fields):
1546 value = getattr(node, field)
1547 except AttributeError:
1550 yield f"{' ' * (depth+1)}{field}="
1552 if isinstance(value, list):
1554 if isinstance(item, ast.AST):
1555 yield from _v(item, depth + 2)
1557 elif isinstance(value, ast.AST):
1558 yield from _v(value, depth + 2)
1561 yield f"{' ' * (depth+2)}{value!r}, # {value.__class__.__name__}"
1563 yield f"{' ' * depth}) # /{node.__class__.__name__}"
1566 src_ast = ast.parse(src)
1567 except Exception as exc:
1568 major, minor = sys.version_info[:2]
1569 raise AssertionError(
1570 f"cannot use --safe with this file; failed to parse source file "
1571 f"with Python {major}.{minor}'s builtin AST. Re-run with --fast "
1572 f"or stop using deprecated Python 2 syntax. AST error message: {exc}"
1576 dst_ast = ast.parse(dst)
1577 except Exception as exc:
1578 log = dump_to_file(''.join(traceback.format_tb(exc.__traceback__)), dst)
1579 raise AssertionError(
1580 f"INTERNAL ERROR: Black produced invalid code: {exc}. "
1581 f"Please report a bug on https://github.com/ambv/black/issues. "
1582 f"This invalid output might be helpful: {log}"
1585 src_ast_str = '\n'.join(_v(src_ast))
1586 dst_ast_str = '\n'.join(_v(dst_ast))
1587 if src_ast_str != dst_ast_str:
1588 log = dump_to_file(diff(src_ast_str, dst_ast_str, 'src', 'dst'))
1589 raise AssertionError(
1590 f"INTERNAL ERROR: Black produced code that is not equivalent to "
1592 f"Please report a bug on https://github.com/ambv/black/issues. "
1593 f"This diff might be helpful: {log}"
1597 def assert_stable(src: str, dst: str, line_length: int) -> None:
1598 """Raises AssertionError if `dst` reformats differently the second time.
1600 This is a temporary sanity check until Black becomes stable.
1602 newdst = format_str(dst, line_length=line_length)
1605 diff(src, dst, 'source', 'first pass'),
1606 diff(dst, newdst, 'first pass', 'second pass'),
1608 raise AssertionError(
1609 f"INTERNAL ERROR: Black produced different code on the second pass "
1610 f"of the formatter. "
1611 f"Please report a bug on https://github.com/ambv/black/issues. "
1612 f"This diff might be helpful: {log}"
1616 def dump_to_file(*output: str) -> str:
1617 """Dumps `output` to a temporary file. Returns path to the file."""
1620 with tempfile.NamedTemporaryFile(
1621 mode='w', prefix='blk_', suffix='.log', delete=False
1623 for lines in output:
1629 def diff(a: str, b: str, a_name: str, b_name: str) -> str:
1630 """Returns a udiff string between strings `a` and `b`."""
1633 a_lines = [line + '\n' for line in a.split('\n')]
1634 b_lines = [line + '\n' for line in b.split('\n')]
1636 difflib.unified_diff(a_lines, b_lines, fromfile=a_name, tofile=b_name, n=5)
1640 if __name__ == '__main__':