All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
4 from asyncio.base_events import BaseEventLoop
5 from concurrent.futures import Executor, ProcessPoolExecutor
6 from functools import partial
9 from pathlib import Path
13 Dict, Generic, Iterable, Iterator, List, Optional, Set, Tuple, TypeVar, Union
16 from attr import dataclass, Factory
20 from blib2to3.pytree import Node, Leaf, type_repr
21 from blib2to3 import pygram, pytree
22 from blib2to3.pgen2 import driver, token
23 from blib2to3.pgen2.parse import ParseError
25 __version__ = "18.3a3"
26 DEFAULT_LINE_LENGTH = 88
28 syms = pygram.python_symbols
35 LN = Union[Leaf, Node]
36 out = partial(click.secho, bold=True, err=True)
37 err = partial(click.secho, fg='red', err=True)
40 class NothingChanged(UserWarning):
41 """Raised by `format_file` when the reformatted code is the same as source."""
44 class CannotSplit(Exception):
45 """A readable split that fits the allotted line length is impossible.
47 Raised by `left_hand_split()`, `right_hand_split()`, and `delimiter_split()`.
56 default=DEFAULT_LINE_LENGTH,
57 help='How many character per line to allow.',
64 "Don't write back the files, just return the status. Return code 0 "
65 "means nothing would change. Return code 1 means some files would be "
66 "reformatted. Return code 123 means there was an internal error."
72 help='If --fast given, skip temporary sanity checks. [default: --safe]',
74 @click.version_option(version=__version__)
79 exists=True, file_okay=True, dir_okay=True, readable=True, allow_dash=True
84 ctx: click.Context, line_length: int, check: bool, fast: bool, src: List[str]
86 """The uncompromising code formatter."""
87 sources: List[Path] = []
91 sources.extend(gen_python_files_in_dir(p))
93 # if a file was explicitly given, we don't care about its extension
96 sources.append(Path('-'))
98 err(f'invalid path: {s}')
101 elif len(sources) == 1:
103 report = Report(check=check)
105 if not p.is_file() and str(p) == '-':
106 changed = format_stdin_to_stdout(
107 line_length=line_length, fast=fast, write_back=not check
110 changed = format_file_in_place(
111 p, line_length=line_length, fast=fast, write_back=not check
113 report.done(p, changed)
114 except Exception as exc:
115 report.failed(p, str(exc))
116 ctx.exit(report.return_code)
118 loop = asyncio.get_event_loop()
119 executor = ProcessPoolExecutor(max_workers=os.cpu_count())
122 return_code = loop.run_until_complete(
124 sources, line_length, not check, fast, loop, executor
129 ctx.exit(return_code)
132 async def schedule_formatting(
141 src: loop.run_in_executor(
142 executor, format_file_in_place, src, line_length, fast, write_back
146 await asyncio.wait(tasks.values())
149 for src, task in tasks.items():
151 report.failed(src, 'timed out, cancelling')
153 cancelled.append(task)
154 elif task.exception():
155 report.failed(src, str(task.exception()))
157 report.done(src, task.result())
159 await asyncio.wait(cancelled, timeout=2)
160 out('All done! ✨ 🍰 ✨')
161 click.echo(str(report))
162 return report.return_code
165 def format_file_in_place(
166 src: Path, line_length: int, fast: bool, write_back: bool = False
168 """Format the file and rewrite if changed. Return True if changed."""
169 with tokenize.open(src) as src_buffer:
170 src_contents = src_buffer.read()
172 contents = format_file_contents(
173 src_contents, line_length=line_length, fast=fast
175 except NothingChanged:
179 with open(src, "w", encoding=src_buffer.encoding) as f:
184 def format_stdin_to_stdout(
185 line_length: int, fast: bool, write_back: bool = False
187 """Format file on stdin and pipe output to stdout. Return True if changed."""
188 contents = sys.stdin.read()
190 contents = format_file_contents(contents, line_length=line_length, fast=fast)
193 except NothingChanged:
198 sys.stdout.write(contents)
201 def format_file_contents(
202 src_contents: str, line_length: int, fast: bool
204 """Reformats a file and returns its contents and encoding."""
205 if src_contents.strip() == '':
208 dst_contents = format_str(src_contents, line_length=line_length)
209 if src_contents == dst_contents:
213 assert_equivalent(src_contents, dst_contents)
214 assert_stable(src_contents, dst_contents, line_length=line_length)
218 def format_str(src_contents: str, line_length: int) -> FileContent:
219 """Reformats a string and returns new contents."""
220 src_node = lib2to3_parse(src_contents)
222 lines = LineGenerator()
223 elt = EmptyLineTracker()
224 py36 = is_python36(src_node)
227 for current_line in lines.visit(src_node):
228 for _ in range(after):
229 dst_contents += str(empty_line)
230 before, after = elt.maybe_empty_lines(current_line)
231 for _ in range(before):
232 dst_contents += str(empty_line)
233 for line in split_line(current_line, line_length=line_length, py36=py36):
234 dst_contents += str(line)
239 pygram.python_grammar_no_print_statement_no_exec_statement,
240 pygram.python_grammar_no_print_statement,
241 pygram.python_grammar_no_exec_statement,
242 pygram.python_grammar,
246 def lib2to3_parse(src_txt: str) -> Node:
247 """Given a string with source, return the lib2to3 Node."""
248 grammar = pygram.python_grammar_no_print_statement
249 if src_txt[-1] != '\n':
250 nl = '\r\n' if '\r\n' in src_txt[:1024] else '\n'
252 for grammar in GRAMMARS:
253 drv = driver.Driver(grammar, pytree.convert)
255 result = drv.parse_string(src_txt, True)
258 except ParseError as pe:
259 lineno, column = pe.context[1]
260 lines = src_txt.splitlines()
262 faulty_line = lines[lineno - 1]
264 faulty_line = "<line number missing in source>"
265 exc = ValueError(f"Cannot parse: {lineno}:{column}: {faulty_line}")
269 if isinstance(result, Leaf):
270 result = Node(syms.file_input, [result])
274 def lib2to3_unparse(node: Node) -> str:
275 """Given a lib2to3 node, return its string representation."""
283 class Visitor(Generic[T]):
284 """Basic lib2to3 visitor that yields things on visiting."""
286 def visit(self, node: LN) -> Iterator[T]:
288 name = token.tok_name[node.type]
290 name = type_repr(node.type)
291 yield from getattr(self, f'visit_{name}', self.visit_default)(node)
293 def visit_default(self, node: LN) -> Iterator[T]:
294 if isinstance(node, Node):
295 for child in node.children:
296 yield from self.visit(child)
300 class DebugVisitor(Visitor[T]):
303 def visit_default(self, node: LN) -> Iterator[T]:
304 indent = ' ' * (2 * self.tree_depth)
305 if isinstance(node, Node):
306 _type = type_repr(node.type)
307 out(f'{indent}{_type}', fg='yellow')
309 for child in node.children:
310 yield from self.visit(child)
313 out(f'{indent}/{_type}', fg='yellow', bold=False)
315 _type = token.tok_name.get(node.type, str(node.type))
316 out(f'{indent}{_type}', fg='blue', nl=False)
318 # We don't have to handle prefixes for `Node` objects since
319 # that delegates to the first child anyway.
320 out(f' {node.prefix!r}', fg='green', bold=False, nl=False)
321 out(f' {node.value!r}', fg='blue', bold=False)
324 KEYWORDS = set(keyword.kwlist)
325 WHITESPACE = {token.DEDENT, token.INDENT, token.NEWLINE}
326 FLOW_CONTROL = {'return', 'raise', 'break', 'continue'}
337 STANDALONE_COMMENT = 153
338 LOGIC_OPERATORS = {'and', 'or'}
362 COMPREHENSION_PRIORITY = 20
366 COMPARATOR_PRIORITY = 3
371 class BracketTracker:
373 bracket_match: Dict[Tuple[Depth, NodeType], Leaf] = Factory(dict)
374 delimiters: Dict[LeafID, Priority] = Factory(dict)
375 previous: Optional[Leaf] = None
377 def mark(self, leaf: Leaf) -> None:
378 if leaf.type == token.COMMENT:
381 if leaf.type in CLOSING_BRACKETS:
383 opening_bracket = self.bracket_match.pop((self.depth, leaf.type))
384 leaf.opening_bracket = opening_bracket
385 leaf.bracket_depth = self.depth
387 delim = is_delimiter(leaf)
389 self.delimiters[id(leaf)] = delim
390 elif self.previous is not None:
391 if leaf.type == token.STRING and self.previous.type == token.STRING:
392 self.delimiters[id(self.previous)] = STRING_PRIORITY
394 leaf.type == token.NAME
395 and leaf.value == 'for'
397 and leaf.parent.type in {syms.comp_for, syms.old_comp_for}
399 self.delimiters[id(self.previous)] = COMPREHENSION_PRIORITY
401 leaf.type == token.NAME
402 and leaf.value == 'if'
404 and leaf.parent.type in {syms.comp_if, syms.old_comp_if}
406 self.delimiters[id(self.previous)] = COMPREHENSION_PRIORITY
408 leaf.type == token.NAME
409 and leaf.value in LOGIC_OPERATORS
412 self.delimiters[id(self.previous)] = LOGIC_PRIORITY
413 if leaf.type in OPENING_BRACKETS:
414 self.bracket_match[self.depth, BRACKET[leaf.type]] = leaf
418 def any_open_brackets(self) -> bool:
419 """Returns True if there is an yet unmatched open bracket on the line."""
420 return bool(self.bracket_match)
422 def max_priority(self, exclude: Iterable[LeafID] =()) -> int:
423 """Returns the highest priority of a delimiter found on the line.
425 Values are consistent with what `is_delimiter()` returns.
427 return max(v for k, v in self.delimiters.items() if k not in exclude)
433 leaves: List[Leaf] = Factory(list)
434 comments: Dict[LeafID, Leaf] = Factory(dict)
435 bracket_tracker: BracketTracker = Factory(BracketTracker)
436 inside_brackets: bool = False
437 has_for: bool = False
438 _for_loop_variable: bool = False
440 def append(self, leaf: Leaf, preformatted: bool = False) -> None:
441 has_value = leaf.value.strip()
445 if self.leaves and not preformatted:
446 # Note: at this point leaf.prefix should be empty except for
447 # imports, for which we only preserve newlines.
448 leaf.prefix += whitespace(leaf)
449 if self.inside_brackets or not preformatted:
450 self.maybe_decrement_after_for_loop_variable(leaf)
451 self.bracket_tracker.mark(leaf)
452 self.maybe_remove_trailing_comma(leaf)
453 self.maybe_increment_for_loop_variable(leaf)
454 if self.maybe_adapt_standalone_comment(leaf):
457 if not self.append_comment(leaf):
458 self.leaves.append(leaf)
461 def is_comment(self) -> bool:
462 return bool(self) and self.leaves[0].type == STANDALONE_COMMENT
465 def is_decorator(self) -> bool:
466 return bool(self) and self.leaves[0].type == token.AT
469 def is_import(self) -> bool:
470 return bool(self) and is_import(self.leaves[0])
473 def is_class(self) -> bool:
476 and self.leaves[0].type == token.NAME
477 and self.leaves[0].value == 'class'
481 def is_def(self) -> bool:
482 """Also returns True for async defs."""
484 first_leaf = self.leaves[0]
489 second_leaf: Optional[Leaf] = self.leaves[1]
493 (first_leaf.type == token.NAME and first_leaf.value == 'def')
495 first_leaf.type == token.ASYNC
496 and second_leaf is not None
497 and second_leaf.type == token.NAME
498 and second_leaf.value == 'def'
503 def is_flow_control(self) -> bool:
506 and self.leaves[0].type == token.NAME
507 and self.leaves[0].value in FLOW_CONTROL
511 def is_yield(self) -> bool:
514 and self.leaves[0].type == token.NAME
515 and self.leaves[0].value == 'yield'
518 def maybe_remove_trailing_comma(self, closing: Leaf) -> bool:
521 and self.leaves[-1].type == token.COMMA
522 and closing.type in CLOSING_BRACKETS
526 if closing.type == token.RBRACE:
530 if closing.type == token.RSQB:
531 comma = self.leaves[-1]
532 if comma.parent and comma.parent.type == syms.listmaker:
536 # For parens let's check if it's safe to remove the comma. If the
537 # trailing one is the only one, we might mistakenly change a tuple
538 # into a different type by removing the comma.
539 depth = closing.bracket_depth + 1
541 opening = closing.opening_bracket
542 for _opening_index, leaf in enumerate(self.leaves):
549 for leaf in self.leaves[_opening_index + 1:]:
553 bracket_depth = leaf.bracket_depth
554 if bracket_depth == depth and leaf.type == token.COMMA:
556 if leaf.parent and leaf.parent.type == syms.arglist:
566 def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool:
567 """In a for loop, or comprehension, the variables are often unpacks.
569 To avoid splitting on the comma in this situation, we will increase
570 the depth of tokens between `for` and `in`.
572 if leaf.type == token.NAME and leaf.value == 'for':
574 self.bracket_tracker.depth += 1
575 self._for_loop_variable = True
580 def maybe_decrement_after_for_loop_variable(self, leaf: Leaf) -> bool:
581 # See `maybe_increment_for_loop_variable` above for explanation.
582 if self._for_loop_variable and leaf.type == token.NAME and leaf.value == 'in':
583 self.bracket_tracker.depth -= 1
584 self._for_loop_variable = False
589 def maybe_adapt_standalone_comment(self, comment: Leaf) -> bool:
590 """Hack a standalone comment to act as a trailing comment for line splitting.
592 If this line has brackets and a standalone `comment`, we need to adapt
593 it to be able to still reformat the line.
595 This is not perfect, the line to which the standalone comment gets
596 appended will appear "too long" when splitting.
599 comment.type == STANDALONE_COMMENT
600 and self.bracket_tracker.any_open_brackets()
604 comment.type = token.COMMENT
605 comment.prefix = '\n' + ' ' * (self.depth + 1)
606 return self.append_comment(comment)
608 def append_comment(self, comment: Leaf) -> bool:
609 if comment.type != token.COMMENT:
613 after = id(self.last_non_delimiter())
615 comment.type = STANDALONE_COMMENT
620 if after in self.comments:
621 self.comments[after].value += str(comment)
623 self.comments[after] = comment
626 def last_non_delimiter(self) -> Leaf:
627 for i in range(len(self.leaves)):
628 last = self.leaves[-i - 1]
629 if not is_delimiter(last):
632 raise LookupError("No non-delimiters found")
634 def __str__(self) -> str:
638 indent = ' ' * self.depth
639 leaves = iter(self.leaves)
641 res = f'{first.prefix}{indent}{first.value}'
644 for comment in self.comments.values():
648 def __bool__(self) -> bool:
649 return bool(self.leaves or self.comments)
653 class EmptyLineTracker:
654 """Provides a stateful method that returns the number of potential extra
655 empty lines needed before and after the currently processed line.
657 Note: this tracker works on lines that haven't been split yet. It assumes
658 the prefix of the first leaf consists of optional newlines. Those newlines
659 are consumed by `maybe_empty_lines()` and included in the computation.
661 previous_line: Optional[Line] = None
662 previous_after: int = 0
663 previous_defs: List[int] = Factory(list)
665 def maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
666 """Returns the number of extra empty lines before and after the `current_line`.
668 This is for separating `def`, `async def` and `class` with extra empty lines
669 (two on module-level), as well as providing an extra empty line after flow
670 control keywords to make them more prominent.
672 before, after = self._maybe_empty_lines(current_line)
673 before -= self.previous_after
674 self.previous_after = after
675 self.previous_line = current_line
678 def _maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
680 if current_line.is_comment and current_line.depth == 0:
682 if current_line.leaves:
683 # Consume the first leaf's extra newlines.
684 first_leaf = current_line.leaves[0]
685 before = first_leaf.prefix.count('\n')
686 before = min(before, max(before, max_allowed))
687 first_leaf.prefix = ''
690 depth = current_line.depth
691 while self.previous_defs and self.previous_defs[-1] >= depth:
692 self.previous_defs.pop()
693 before = 1 if depth else 2
694 is_decorator = current_line.is_decorator
695 if is_decorator or current_line.is_def or current_line.is_class:
697 self.previous_defs.append(depth)
698 if self.previous_line is None:
699 # Don't insert empty lines before the first line in the file.
702 if self.previous_line and self.previous_line.is_decorator:
703 # Don't insert empty lines between decorators.
707 if current_line.depth:
711 if current_line.is_flow_control:
716 and self.previous_line.is_import
717 and not current_line.is_import
718 and depth == self.previous_line.depth
720 return (before or 1), 0
724 and self.previous_line.is_yield
725 and (not current_line.is_yield or depth != self.previous_line.depth)
727 return (before or 1), 0
733 class LineGenerator(Visitor[Line]):
734 """Generates reformatted Line objects. Empty lines are not emitted.
736 Note: destroys the tree it's visiting by mutating prefixes of its leaves
737 in ways that will no longer stringify to valid Python code on the tree.
739 current_line: Line = Factory(Line)
741 def line(self, indent: int = 0) -> Iterator[Line]:
744 If the line is empty, only emit if it makes sense.
745 If the line is too long, split it first and then generate.
747 If any lines were generated, set up a new current_line.
749 if not self.current_line:
750 self.current_line.depth += indent
751 return # Line is empty, don't emit. Creating a new one unnecessary.
753 complete_line = self.current_line
754 self.current_line = Line(depth=complete_line.depth + indent)
757 def visit_default(self, node: LN) -> Iterator[Line]:
758 if isinstance(node, Leaf):
759 any_open_brackets = self.current_line.bracket_tracker.any_open_brackets()
760 for comment in generate_comments(node):
761 if any_open_brackets:
762 # any comment within brackets is subject to splitting
763 self.current_line.append(comment)
764 elif comment.type == token.COMMENT:
765 # regular trailing comment
766 self.current_line.append(comment)
767 yield from self.line()
770 # regular standalone comment
771 yield from self.line()
773 self.current_line.append(comment)
774 yield from self.line()
776 normalize_prefix(node, inside_brackets=any_open_brackets)
777 if node.type not in WHITESPACE:
778 self.current_line.append(node)
779 yield from super().visit_default(node)
781 def visit_INDENT(self, node: Node) -> Iterator[Line]:
782 yield from self.line(+1)
783 yield from self.visit_default(node)
785 def visit_DEDENT(self, node: Node) -> Iterator[Line]:
786 yield from self.line(-1)
788 def visit_stmt(self, node: Node, keywords: Set[str]) -> Iterator[Line]:
789 """Visit a statement.
791 The relevant Python language keywords for this statement are NAME leaves
794 for child in node.children:
795 if child.type == token.NAME and child.value in keywords: # type: ignore
796 yield from self.line()
798 yield from self.visit(child)
800 def visit_simple_stmt(self, node: Node) -> Iterator[Line]:
801 """A statement without nested statements."""
802 is_suite_like = node.parent and node.parent.type in STATEMENT
804 yield from self.line(+1)
805 yield from self.visit_default(node)
806 yield from self.line(-1)
809 yield from self.line()
810 yield from self.visit_default(node)
812 def visit_async_stmt(self, node: Node) -> Iterator[Line]:
813 yield from self.line()
815 children = iter(node.children)
816 for child in children:
817 yield from self.visit(child)
819 if child.type == token.ASYNC:
822 internal_stmt = next(children)
823 for child in internal_stmt.children:
824 yield from self.visit(child)
826 def visit_decorators(self, node: Node) -> Iterator[Line]:
827 for child in node.children:
828 yield from self.line()
829 yield from self.visit(child)
831 def visit_SEMI(self, leaf: Leaf) -> Iterator[Line]:
832 yield from self.line()
834 def visit_ENDMARKER(self, leaf: Leaf) -> Iterator[Line]:
835 yield from self.visit_default(leaf)
836 yield from self.line()
838 def __attrs_post_init__(self) -> None:
839 """You are in a twisty little maze of passages."""
841 self.visit_if_stmt = partial(v, keywords={'if', 'else', 'elif'})
842 self.visit_while_stmt = partial(v, keywords={'while', 'else'})
843 self.visit_for_stmt = partial(v, keywords={'for', 'else'})
844 self.visit_try_stmt = partial(v, keywords={'try', 'except', 'else', 'finally'})
845 self.visit_except_clause = partial(v, keywords={'except'})
846 self.visit_funcdef = partial(v, keywords={'def'})
847 self.visit_with_stmt = partial(v, keywords={'with'})
848 self.visit_classdef = partial(v, keywords={'class'})
849 self.visit_async_funcdef = self.visit_async_stmt
850 self.visit_decorated = self.visit_decorators
853 BRACKET = {token.LPAR: token.RPAR, token.LSQB: token.RSQB, token.LBRACE: token.RBRACE}
854 OPENING_BRACKETS = set(BRACKET.keys())
855 CLOSING_BRACKETS = set(BRACKET.values())
856 BRACKETS = OPENING_BRACKETS | CLOSING_BRACKETS
857 ALWAYS_NO_SPACE = CLOSING_BRACKETS | {token.COMMA, STANDALONE_COMMENT}
860 def whitespace(leaf: Leaf) -> str: # noqa C901
861 """Return whitespace prefix if needed for the given `leaf`."""
868 if t in ALWAYS_NO_SPACE:
871 if t == token.COMMENT:
874 assert p is not None, f"INTERNAL ERROR: hand-made leaf without parent: {leaf!r}"
875 if t == token.COLON and p.type not in {syms.subscript, syms.subscriptlist}:
878 prev = leaf.prev_sibling
880 prevp = preceding_leaf(p)
881 if not prevp or prevp.type in OPENING_BRACKETS:
885 return SPACE if prevp.type == token.COMMA else NO
887 if prevp.type == token.EQUAL:
888 if prevp.parent and prevp.parent.type in {
897 elif prevp.type == token.DOUBLESTAR:
898 if prevp.parent and prevp.parent.type in {
908 elif prevp.type == token.COLON:
909 if prevp.parent and prevp.parent.type in {syms.subscript, syms.sliceop}:
914 and prevp.parent.type in {syms.factor, syms.star_expr}
915 and prevp.type in MATH_OPERATORS
920 prevp.type == token.RIGHTSHIFT
922 and prevp.parent.type == syms.shift_expr
923 and prevp.prev_sibling
924 and prevp.prev_sibling.type == token.NAME
925 and prevp.prev_sibling.value == 'print' # type: ignore
927 # Python 2 print chevron
930 elif prev.type in OPENING_BRACKETS:
933 if p.type in {syms.parameters, syms.arglist}:
934 # untyped function signatures or calls
938 if not prev or prev.type != token.COMMA:
941 if p.type == syms.varargslist:
946 if prev and prev.type != token.COMMA:
949 elif p.type == syms.typedargslist:
950 # typed function signatures
955 if prev.type != syms.tname:
958 elif prev.type == token.EQUAL:
959 # A bit hacky: if the equal sign has whitespace, it means we
960 # previously found it's a typed argument. So, we're using that, too.
963 elif prev.type != token.COMMA:
966 elif p.type == syms.tname:
969 prevp = preceding_leaf(p)
970 if not prevp or prevp.type != token.COMMA:
973 elif p.type == syms.trailer:
974 # attributes and calls
975 if t == token.LPAR or t == token.RPAR:
980 prevp = preceding_leaf(p)
981 if not prevp or prevp.type != token.NUMBER:
984 elif t == token.LSQB:
987 elif prev.type != token.COMMA:
990 elif p.type == syms.argument:
996 prevp = preceding_leaf(p)
997 if not prevp or prevp.type == token.LPAR:
1000 elif prev.type == token.EQUAL or prev.type == token.DOUBLESTAR:
1003 elif p.type == syms.decorator:
1007 elif p.type == syms.dotted_name:
1011 prevp = preceding_leaf(p)
1012 if not prevp or prevp.type == token.AT or prevp.type == token.DOT:
1015 elif p.type == syms.classdef:
1019 if prev and prev.type == token.LPAR:
1022 elif p.type == syms.subscript:
1025 assert p.parent is not None, "subscripts are always parented"
1026 if p.parent.type == syms.subscriptlist:
1034 elif p.type == syms.atom:
1035 if prev and t == token.DOT:
1036 # dots, but not the first one.
1040 p.type == syms.listmaker
1041 or p.type == syms.testlist_gexp
1042 or p.type == syms.subscriptlist
1044 # list interior, including unpacking
1048 elif p.type == syms.dictsetmaker:
1049 # dict and set interior, including unpacking
1053 if prev.type == token.DOUBLESTAR:
1056 elif p.type in {syms.factor, syms.star_expr}:
1059 prevp = preceding_leaf(p)
1060 if not prevp or prevp.type in OPENING_BRACKETS:
1063 prevp_parent = prevp.parent
1064 assert prevp_parent is not None
1065 if prevp.type == token.COLON and prevp_parent.type in {
1066 syms.subscript, syms.sliceop
1070 elif prevp.type == token.EQUAL and prevp_parent.type == syms.argument:
1073 elif t == token.NAME or t == token.NUMBER:
1076 elif p.type == syms.import_from:
1078 if prev and prev.type == token.DOT:
1081 elif t == token.NAME:
1085 if prev and prev.type == token.DOT:
1088 elif p.type == syms.sliceop:
1094 def preceding_leaf(node: Optional[LN]) -> Optional[Leaf]:
1095 """Returns the first leaf that precedes `node`, if any."""
1097 res = node.prev_sibling
1099 if isinstance(res, Leaf):
1103 return list(res.leaves())[-1]
1112 def is_delimiter(leaf: Leaf) -> int:
1113 """Returns the priority of the `leaf` delimiter. Returns 0 if not delimiter.
1115 Higher numbers are higher priority.
1117 if leaf.type == token.COMMA:
1118 return COMMA_PRIORITY
1120 if leaf.type in COMPARATORS:
1121 return COMPARATOR_PRIORITY
1124 leaf.type in MATH_OPERATORS
1126 and leaf.parent.type not in {syms.factor, syms.star_expr}
1128 return MATH_PRIORITY
1133 def generate_comments(leaf: Leaf) -> Iterator[Leaf]:
1134 """Cleans the prefix of the `leaf` and generates comments from it, if any.
1136 Comments in lib2to3 are shoved into the whitespace prefix. This happens
1137 in `pgen2/driver.py:Driver.parse_tokens()`. This was a brilliant implementation
1138 move because it does away with modifying the grammar to include all the
1139 possible places in which comments can be placed.
1141 The sad consequence for us though is that comments don't "belong" anywhere.
1142 This is why this function generates simple parentless Leaf objects for
1143 comments. We simply don't know what the correct parent should be.
1145 No matter though, we can live without this. We really only need to
1146 differentiate between inline and standalone comments. The latter don't
1147 share the line with any code.
1149 Inline comments are emitted as regular token.COMMENT leaves. Standalone
1150 are emitted with a fake STANDALONE_COMMENT token identifier.
1160 for index, line in enumerate(p.split('\n')):
1161 line = line.lstrip()
1164 if not line.startswith('#'):
1167 if index == 0 and leaf.type != token.ENDMARKER:
1168 comment_type = token.COMMENT # simple trailing comment
1170 comment_type = STANDALONE_COMMENT
1171 yield Leaf(comment_type, make_comment(line), prefix='\n' * nlines)
1176 def make_comment(content: str) -> str:
1177 content = content.rstrip()
1181 if content[0] == '#':
1182 content = content[1:]
1183 if content and content[0] not in {' ', '!', '#'}:
1184 content = ' ' + content
1185 return '#' + content
1189 line: Line, line_length: int, inner: bool = False, py36: bool = False
1190 ) -> Iterator[Line]:
1191 """Splits a `line` into potentially many lines.
1193 They should fit in the allotted `line_length` but might not be able to.
1194 `inner` signifies that there were a pair of brackets somewhere around the
1195 current `line`, possibly transitively. This means we can fallback to splitting
1196 by delimiters if the LHS/RHS don't yield any results.
1198 If `py36` is True, splitting may generate syntax that is only compatible
1199 with Python 3.6 and later.
1201 line_str = str(line).strip('\n')
1202 if len(line_str) <= line_length and '\n' not in line_str:
1207 split_funcs = [left_hand_split]
1208 elif line.inside_brackets:
1209 split_funcs = [delimiter_split]
1210 if '\n' not in line_str:
1211 # Only attempt RHS if we don't have multiline strings or comments
1213 split_funcs.append(right_hand_split)
1215 split_funcs = [right_hand_split]
1216 for split_func in split_funcs:
1217 # We are accumulating lines in `result` because we might want to abort
1218 # mission and return the original line in the end, or attempt a different
1220 result: List[Line] = []
1222 for l in split_func(line, py36=py36):
1223 if str(l).strip('\n') == line_str:
1224 raise CannotSplit("Split function returned an unchanged result")
1227 split_line(l, line_length=line_length, inner=True, py36=py36)
1229 except CannotSplit as cs:
1240 def left_hand_split(line: Line, py36: bool = False) -> Iterator[Line]:
1241 """Split line into many lines, starting with the first matching bracket pair.
1243 Note: this usually looks weird, only use this for function definitions.
1244 Prefer RHS otherwise.
1246 head = Line(depth=line.depth)
1247 body = Line(depth=line.depth + 1, inside_brackets=True)
1248 tail = Line(depth=line.depth)
1249 tail_leaves: List[Leaf] = []
1250 body_leaves: List[Leaf] = []
1251 head_leaves: List[Leaf] = []
1252 current_leaves = head_leaves
1253 matching_bracket = None
1254 for leaf in line.leaves:
1256 current_leaves is body_leaves
1257 and leaf.type in CLOSING_BRACKETS
1258 and leaf.opening_bracket is matching_bracket
1260 current_leaves = tail_leaves if body_leaves else head_leaves
1261 current_leaves.append(leaf)
1262 if current_leaves is head_leaves:
1263 if leaf.type in OPENING_BRACKETS:
1264 matching_bracket = leaf
1265 current_leaves = body_leaves
1266 # Since body is a new indent level, remove spurious leading whitespace.
1268 normalize_prefix(body_leaves[0], inside_brackets=True)
1269 # Build the new lines.
1270 for result, leaves in (
1271 (head, head_leaves), (body, body_leaves), (tail, tail_leaves)
1274 result.append(leaf, preformatted=True)
1275 comment_after = line.comments.get(id(leaf))
1277 result.append(comment_after, preformatted=True)
1278 split_succeeded_or_raise(head, body, tail)
1279 for result in (head, body, tail):
1284 def right_hand_split(line: Line, py36: bool = False) -> Iterator[Line]:
1285 """Split line into many lines, starting with the last matching bracket pair."""
1286 head = Line(depth=line.depth)
1287 body = Line(depth=line.depth + 1, inside_brackets=True)
1288 tail = Line(depth=line.depth)
1289 tail_leaves: List[Leaf] = []
1290 body_leaves: List[Leaf] = []
1291 head_leaves: List[Leaf] = []
1292 current_leaves = tail_leaves
1293 opening_bracket = None
1294 for leaf in reversed(line.leaves):
1295 if current_leaves is body_leaves:
1296 if leaf is opening_bracket:
1297 current_leaves = head_leaves if body_leaves else tail_leaves
1298 current_leaves.append(leaf)
1299 if current_leaves is tail_leaves:
1300 if leaf.type in CLOSING_BRACKETS:
1301 opening_bracket = leaf.opening_bracket
1302 current_leaves = body_leaves
1303 tail_leaves.reverse()
1304 body_leaves.reverse()
1305 head_leaves.reverse()
1306 # Since body is a new indent level, remove spurious leading whitespace.
1308 normalize_prefix(body_leaves[0], inside_brackets=True)
1309 # Build the new lines.
1310 for result, leaves in (
1311 (head, head_leaves), (body, body_leaves), (tail, tail_leaves)
1314 result.append(leaf, preformatted=True)
1315 comment_after = line.comments.get(id(leaf))
1317 result.append(comment_after, preformatted=True)
1318 split_succeeded_or_raise(head, body, tail)
1319 for result in (head, body, tail):
1324 def split_succeeded_or_raise(head: Line, body: Line, tail: Line) -> None:
1325 tail_len = len(str(tail).strip())
1328 raise CannotSplit("Splitting brackets produced the same line")
1332 f"Splitting brackets on an empty body to save "
1333 f"{tail_len} characters is not worth it"
1337 def delimiter_split(line: Line, py36: bool = False) -> Iterator[Line]:
1338 """Split according to delimiters of the highest priority.
1340 This kind of split doesn't increase indentation.
1341 If `py36` is True, the split will add trailing commas also in function
1342 signatures that contain * and **.
1345 last_leaf = line.leaves[-1]
1347 raise CannotSplit("Line empty")
1349 delimiters = line.bracket_tracker.delimiters
1351 delimiter_priority = line.bracket_tracker.max_priority(exclude={id(last_leaf)})
1353 raise CannotSplit("No delimiters found")
1355 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
1356 lowest_depth = sys.maxsize
1357 trailing_comma_safe = True
1358 for leaf in line.leaves:
1359 current_line.append(leaf, preformatted=True)
1360 comment_after = line.comments.get(id(leaf))
1362 current_line.append(comment_after, preformatted=True)
1363 lowest_depth = min(lowest_depth, leaf.bracket_depth)
1365 leaf.bracket_depth == lowest_depth
1366 and leaf.type == token.STAR
1367 or leaf.type == token.DOUBLESTAR
1369 trailing_comma_safe = trailing_comma_safe and py36
1370 leaf_priority = delimiters.get(id(leaf))
1371 if leaf_priority == delimiter_priority:
1372 normalize_prefix(current_line.leaves[0], inside_brackets=True)
1375 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
1378 delimiter_priority == COMMA_PRIORITY
1379 and current_line.leaves[-1].type != token.COMMA
1380 and trailing_comma_safe
1382 current_line.append(Leaf(token.COMMA, ','))
1383 normalize_prefix(current_line.leaves[0], inside_brackets=True)
1387 def is_import(leaf: Leaf) -> bool:
1388 """Returns True if the given leaf starts an import statement."""
1395 (v == 'import' and p and p.type == syms.import_name)
1396 or (v == 'from' and p and p.type == syms.import_from)
1401 def normalize_prefix(leaf: Leaf, *, inside_brackets: bool) -> None:
1402 """Leave existing extra newlines if not `inside_brackets`.
1404 Remove everything else. Note: don't use backslashes for formatting or
1405 you'll lose your voting rights.
1407 if not inside_brackets:
1408 spl = leaf.prefix.split('#')
1409 if '\\' not in spl[0]:
1410 nl_count = spl[-1].count('\n')
1413 leaf.prefix = '\n' * nl_count
1419 def is_python36(node: Node) -> bool:
1420 """Returns True if the current file is using Python 3.6+ features.
1422 Currently looking for:
1424 - trailing commas after * or ** in function signatures.
1426 for n in node.pre_order():
1427 if n.type == token.STRING:
1428 value_head = n.value[:2] # type: ignore
1429 if value_head in {'f"', 'F"', "f'", "F'", 'rf', 'fr', 'RF', 'FR'}:
1433 n.type == syms.typedargslist
1435 and n.children[-1].type == token.COMMA
1437 for ch in n.children:
1438 if ch.type == token.STAR or ch.type == token.DOUBLESTAR:
1444 PYTHON_EXTENSIONS = {'.py'}
1445 BLACKLISTED_DIRECTORIES = {
1446 'build', 'buck-out', 'dist', '_build', '.git', '.hg', '.mypy_cache', '.tox', '.venv'
1450 def gen_python_files_in_dir(path: Path) -> Iterator[Path]:
1451 for child in path.iterdir():
1453 if child.name in BLACKLISTED_DIRECTORIES:
1456 yield from gen_python_files_in_dir(child)
1458 elif child.suffix in PYTHON_EXTENSIONS:
1464 """Provides a reformatting counter."""
1466 change_count: int = 0
1468 failure_count: int = 0
1470 def done(self, src: Path, changed: bool) -> None:
1471 """Increment the counter for successful reformatting. Write out a message."""
1473 reformatted = 'would reformat' if self.check else 'reformatted'
1474 out(f'{reformatted} {src}')
1475 self.change_count += 1
1477 out(f'{src} already well formatted, good job.', bold=False)
1478 self.same_count += 1
1480 def failed(self, src: Path, message: str) -> None:
1481 """Increment the counter for failed reformatting. Write out a message."""
1482 err(f'error: cannot format {src}: {message}')
1483 self.failure_count += 1
1486 def return_code(self) -> int:
1487 """Which return code should the app use considering the current state."""
1488 # According to http://tldp.org/LDP/abs/html/exitcodes.html starting with
1489 # 126 we have special returncodes reserved by the shell.
1490 if self.failure_count:
1493 elif self.change_count and self.check:
1498 def __str__(self) -> str:
1499 """A color report of the current state.
1501 Use `click.unstyle` to remove colors.
1504 reformatted = "would be reformatted"
1505 unchanged = "would be left unchanged"
1506 failed = "would fail to reformat"
1508 reformatted = "reformatted"
1509 unchanged = "left unchanged"
1510 failed = "failed to reformat"
1512 if self.change_count:
1513 s = 's' if self.change_count > 1 else ''
1515 click.style(f'{self.change_count} file{s} {reformatted}', bold=True)
1518 s = 's' if self.same_count > 1 else ''
1519 report.append(f'{self.same_count} file{s} {unchanged}')
1520 if self.failure_count:
1521 s = 's' if self.failure_count > 1 else ''
1523 click.style(f'{self.failure_count} file{s} {failed}', fg='red')
1525 return ', '.join(report) + '.'
1528 def assert_equivalent(src: str, dst: str) -> None:
1529 """Raises AssertionError if `src` and `dst` aren't equivalent.
1531 This is a temporary sanity check until Black becomes stable.
1537 def _v(node: ast.AST, depth: int = 0) -> Iterator[str]:
1538 """Simple visitor generating strings to compare ASTs by content."""
1539 yield f"{' ' * depth}{node.__class__.__name__}("
1541 for field in sorted(node._fields):
1543 value = getattr(node, field)
1544 except AttributeError:
1547 yield f"{' ' * (depth+1)}{field}="
1549 if isinstance(value, list):
1551 if isinstance(item, ast.AST):
1552 yield from _v(item, depth + 2)
1554 elif isinstance(value, ast.AST):
1555 yield from _v(value, depth + 2)
1558 yield f"{' ' * (depth+2)}{value!r}, # {value.__class__.__name__}"
1560 yield f"{' ' * depth}) # /{node.__class__.__name__}"
1563 src_ast = ast.parse(src)
1564 except Exception as exc:
1565 major, minor = sys.version_info[:2]
1566 raise AssertionError(
1567 f"cannot use --safe with this file; failed to parse source file "
1568 f"with Python {major}.{minor}'s builtin AST. Re-run with --fast "
1569 f"or stop using deprecated Python 2 syntax. AST error message: {exc}"
1573 dst_ast = ast.parse(dst)
1574 except Exception as exc:
1575 log = dump_to_file(''.join(traceback.format_tb(exc.__traceback__)), dst)
1576 raise AssertionError(
1577 f"INTERNAL ERROR: Black produced invalid code: {exc}. "
1578 f"Please report a bug on https://github.com/ambv/black/issues. "
1579 f"This invalid output might be helpful: {log}"
1582 src_ast_str = '\n'.join(_v(src_ast))
1583 dst_ast_str = '\n'.join(_v(dst_ast))
1584 if src_ast_str != dst_ast_str:
1585 log = dump_to_file(diff(src_ast_str, dst_ast_str, 'src', 'dst'))
1586 raise AssertionError(
1587 f"INTERNAL ERROR: Black produced code that is not equivalent to "
1589 f"Please report a bug on https://github.com/ambv/black/issues. "
1590 f"This diff might be helpful: {log}"
1594 def assert_stable(src: str, dst: str, line_length: int) -> None:
1595 """Raises AssertionError if `dst` reformats differently the second time.
1597 This is a temporary sanity check until Black becomes stable.
1599 newdst = format_str(dst, line_length=line_length)
1602 diff(src, dst, 'source', 'first pass'),
1603 diff(dst, newdst, 'first pass', 'second pass'),
1605 raise AssertionError(
1606 f"INTERNAL ERROR: Black produced different code on the second pass "
1607 f"of the formatter. "
1608 f"Please report a bug on https://github.com/ambv/black/issues. "
1609 f"This diff might be helpful: {log}"
1613 def dump_to_file(*output: str) -> str:
1614 """Dumps `output` to a temporary file. Returns path to the file."""
1617 with tempfile.NamedTemporaryFile(
1618 mode='w', prefix='blk_', suffix='.log', delete=False
1620 for lines in output:
1626 def diff(a: str, b: str, a_name: str, b_name: str) -> str:
1627 """Returns a udiff string between strings `a` and `b`."""
1630 a_lines = [line + '\n' for line in a.split('\n')]
1631 b_lines = [line + '\n' for line in b.split('\n')]
1633 difflib.unified_diff(a_lines, b_lines, fromfile=a_name, tofile=b_name, n=5)
1637 if __name__ == '__main__':