All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
3 from asyncio.base_events import BaseEventLoop
4 from concurrent.futures import Executor, ProcessPoolExecutor
5 from functools import partial
8 from pathlib import Path
11 Dict, Generic, Iterable, Iterator, List, Optional, Set, Tuple, TypeVar, Union
14 from attr import attrib, dataclass, Factory
18 from blib2to3.pytree import Node, Leaf, type_repr
19 from blib2to3 import pygram, pytree
20 from blib2to3.pgen2 import driver, token
21 from blib2to3.pgen2.parse import ParseError
23 __version__ = "18.3a1"
24 DEFAULT_LINE_LENGTH = 88
26 syms = pygram.python_symbols
33 LN = Union[Leaf, Node]
34 out = partial(click.secho, bold=True, err=True)
35 err = partial(click.secho, fg='red', err=True)
38 class NothingChanged(UserWarning):
39 """Raised by `format_file` when the reformatted code is the same as source."""
42 class CannotSplit(Exception):
43 """A readable split that fits the allotted line length is impossible.
45 Raised by `left_hand_split()` and `right_hand_split()`.
54 default=DEFAULT_LINE_LENGTH,
55 help='How many character per line to allow.',
61 help='If --fast given, skip temporary sanity checks. [default: --safe]',
63 @click.version_option(version=__version__)
67 type=click.Path(exists=True, file_okay=True, dir_okay=True, readable=True),
70 def main(ctx: click.Context, line_length: int, fast: bool, src: List[str]) -> None:
71 """The uncompromising code formatter."""
72 sources: List[Path] = []
76 sources.extend(gen_python_files_in_dir(p))
78 # if a file was explicitly given, we don't care about its extension
81 err(f'invalid path: {s}')
84 elif len(sources) == 1:
88 changed = format_file_in_place(p, line_length=line_length, fast=fast)
89 report.done(p, changed)
90 except Exception as exc:
91 report.failed(p, str(exc))
92 ctx.exit(report.return_code)
94 loop = asyncio.get_event_loop()
95 executor = ProcessPoolExecutor(max_workers=os.cpu_count())
98 return_code = loop.run_until_complete(
99 schedule_formatting(sources, line_length, fast, loop, executor)
103 ctx.exit(return_code)
106 async def schedule_formatting(
114 src: loop.run_in_executor(
115 executor, format_file_in_place, src, line_length, fast
119 await asyncio.wait(tasks.values())
122 for src, task in tasks.items():
124 report.failed(src, 'timed out, cancelling')
126 cancelled.append(task)
127 elif task.exception():
128 report.failed(src, str(task.exception()))
130 report.done(src, task.result())
132 await asyncio.wait(cancelled, timeout=2)
133 out('All done! ✨ 🍰 ✨')
134 click.echo(str(report))
135 return report.return_code
138 def format_file_in_place(src: Path, line_length: int, fast: bool) -> bool:
139 """Format the file and rewrite if changed. Return True if changed."""
141 contents, encoding = format_file(src, line_length=line_length, fast=fast)
142 except NothingChanged:
145 with open(src, "w", encoding=encoding) as f:
151 src: Path, line_length: int, fast: bool
152 ) -> Tuple[FileContent, Encoding]:
153 """Reformats a file and returns its contents and encoding."""
154 with tokenize.open(src) as src_buffer:
155 src_contents = src_buffer.read()
156 if src_contents.strip() == '':
157 raise NothingChanged(src)
159 dst_contents = format_str(src_contents, line_length=line_length)
160 if src_contents == dst_contents:
161 raise NothingChanged(src)
164 assert_equivalent(src_contents, dst_contents)
165 assert_stable(src_contents, dst_contents, line_length=line_length)
166 return dst_contents, src_buffer.encoding
169 def format_str(src_contents: str, line_length: int) -> FileContent:
170 """Reformats a string and returns new contents."""
171 src_node = lib2to3_parse(src_contents)
173 comments: List[Line] = []
174 lines = LineGenerator()
175 elt = EmptyLineTracker()
178 for current_line in lines.visit(src_node):
179 for _ in range(after):
180 dst_contents += str(empty_line)
181 before, after = elt.maybe_empty_lines(current_line)
182 for _ in range(before):
183 dst_contents += str(empty_line)
184 if not current_line.is_comment:
185 for comment in comments:
186 dst_contents += str(comment)
188 for line in split_line(current_line, line_length=line_length):
189 dst_contents += str(line)
191 comments.append(current_line)
192 for comment in comments:
193 dst_contents += str(comment)
197 def lib2to3_parse(src_txt: str) -> Node:
198 """Given a string with source, return the lib2to3 Node."""
199 grammar = pygram.python_grammar_no_print_statement
200 drv = driver.Driver(grammar, pytree.convert)
201 if src_txt[-1] != '\n':
202 nl = '\r\n' if '\r\n' in src_txt[:1024] else '\n'
205 result = drv.parse_string(src_txt, True)
206 except ParseError as pe:
207 lineno, column = pe.context[1]
208 lines = src_txt.splitlines()
210 faulty_line = lines[lineno - 1]
212 faulty_line = "<line number missing in source>"
213 raise ValueError(f"Cannot parse: {lineno}:{column}: {faulty_line}") from None
215 if isinstance(result, Leaf):
216 result = Node(syms.file_input, [result])
220 def lib2to3_unparse(node: Node) -> str:
221 """Given a lib2to3 node, return its string representation."""
229 class Visitor(Generic[T]):
230 """Basic lib2to3 visitor that yields things on visiting."""
232 def visit(self, node: LN) -> Iterator[T]:
234 name = token.tok_name[node.type]
236 name = type_repr(node.type)
237 yield from getattr(self, f'visit_{name}', self.visit_default)(node)
239 def visit_default(self, node: LN) -> Iterator[T]:
240 if isinstance(node, Node):
241 for child in node.children:
242 yield from self.visit(child)
246 class DebugVisitor(Visitor[T]):
247 tree_depth: int = attrib(default=0)
249 def visit_default(self, node: LN) -> Iterator[T]:
250 indent = ' ' * (2 * self.tree_depth)
251 if isinstance(node, Node):
252 _type = type_repr(node.type)
253 out(f'{indent}{_type}', fg='yellow')
255 for child in node.children:
256 yield from self.visit(child)
259 out(f'{indent}/{_type}', fg='yellow', bold=False)
261 _type = token.tok_name.get(node.type, str(node.type))
262 out(f'{indent}{_type}', fg='blue', nl=False)
264 # We don't have to handle prefixes for `Node` objects since
265 # that delegates to the first child anyway.
266 out(f' {node.prefix!r}', fg='green', bold=False, nl=False)
267 out(f' {node.value!r}', fg='blue', bold=False)
270 KEYWORDS = set(keyword.kwlist)
271 WHITESPACE = {token.DEDENT, token.INDENT, token.NEWLINE}
272 FLOW_CONTROL = {'return', 'raise', 'break', 'continue'}
283 STANDALONE_COMMENT = 153
284 LOGIC_OPERATORS = {'and', 'or'}
307 COMPREHENSION_PRIORITY = 20
311 COMPARATOR_PRIORITY = 3
316 class BracketTracker:
317 depth: int = attrib(default=0)
318 bracket_match: Dict[Tuple[Depth, NodeType], Leaf] = attrib(default=Factory(dict))
319 delimiters: Dict[LeafID, Priority] = attrib(default=Factory(dict))
320 previous: Optional[Leaf] = attrib(default=None)
322 def mark(self, leaf: Leaf) -> None:
323 if leaf.type == token.COMMENT:
326 if leaf.type in CLOSING_BRACKETS:
328 opening_bracket = self.bracket_match.pop((self.depth, leaf.type))
329 leaf.opening_bracket = opening_bracket # type: ignore
330 leaf.bracket_depth = self.depth # type: ignore
332 delim = is_delimiter(leaf)
334 self.delimiters[id(leaf)] = delim
335 elif self.previous is not None:
336 if leaf.type == token.STRING and self.previous.type == token.STRING:
337 self.delimiters[id(self.previous)] = STRING_PRIORITY
339 leaf.type == token.NAME and
340 leaf.value == 'for' and
342 leaf.parent.type in {syms.comp_for, syms.old_comp_for}
344 self.delimiters[id(self.previous)] = COMPREHENSION_PRIORITY
346 leaf.type == token.NAME and
347 leaf.value == 'if' and
349 leaf.parent.type in {syms.comp_if, syms.old_comp_if}
351 self.delimiters[id(self.previous)] = COMPREHENSION_PRIORITY
352 if leaf.type in OPENING_BRACKETS:
353 self.bracket_match[self.depth, BRACKET[leaf.type]] = leaf
357 def any_open_brackets(self) -> bool:
358 """Returns True if there is an yet unmatched open bracket on the line."""
359 return bool(self.bracket_match)
361 def max_priority(self, exclude: Iterable[LeafID] = ()) -> int:
362 """Returns the highest priority of a delimiter found on the line.
364 Values are consistent with what `is_delimiter()` returns.
366 return max(v for k, v in self.delimiters.items() if k not in exclude)
371 depth: int = attrib(default=0)
372 leaves: List[Leaf] = attrib(default=Factory(list))
373 comments: Dict[LeafID, Leaf] = attrib(default=Factory(dict))
374 bracket_tracker: BracketTracker = attrib(default=Factory(BracketTracker))
375 inside_brackets: bool = attrib(default=False)
377 def append(self, leaf: Leaf, preformatted: bool = False) -> None:
378 has_value = leaf.value.strip()
382 if self.leaves and not preformatted:
383 # Note: at this point leaf.prefix should be empty except for
384 # imports, for which we only preserve newlines.
385 leaf.prefix += whitespace(leaf)
386 if self.inside_brackets or not preformatted:
387 self.bracket_tracker.mark(leaf)
388 self.maybe_remove_trailing_comma(leaf)
389 if self.maybe_adapt_standalone_comment(leaf):
392 if not self.append_comment(leaf):
393 self.leaves.append(leaf)
396 def is_comment(self) -> bool:
397 return bool(self) and self.leaves[0].type == STANDALONE_COMMENT
400 def is_decorator(self) -> bool:
401 return bool(self) and self.leaves[0].type == token.AT
404 def is_import(self) -> bool:
405 return bool(self) and is_import(self.leaves[0])
408 def is_class(self) -> bool:
411 self.leaves[0].type == token.NAME and
412 self.leaves[0].value == 'class'
416 def is_def(self) -> bool:
417 """Also returns True for async defs."""
419 first_leaf = self.leaves[0]
424 second_leaf: Optional[Leaf] = self.leaves[1]
428 (first_leaf.type == token.NAME and first_leaf.value == 'def') or
430 first_leaf.type == token.NAME and
431 first_leaf.value == 'async' and
432 second_leaf is not None and
433 second_leaf.type == token.NAME and
434 second_leaf.value == 'def'
439 def is_flow_control(self) -> bool:
442 self.leaves[0].type == token.NAME and
443 self.leaves[0].value in FLOW_CONTROL
447 def is_yield(self) -> bool:
450 self.leaves[0].type == token.NAME and
451 self.leaves[0].value == 'yield'
454 def maybe_remove_trailing_comma(self, closing: Leaf) -> bool:
457 self.leaves[-1].type == token.COMMA and
458 closing.type in CLOSING_BRACKETS
462 if closing.type == token.RSQB or closing.type == token.RBRACE:
466 # For parens let's check if it's safe to remove the comma. If the
467 # trailing one is the only one, we might mistakenly change a tuple
468 # into a different type by removing the comma.
469 depth = closing.bracket_depth + 1 # type: ignore
471 opening = closing.opening_bracket # type: ignore
472 for _opening_index, leaf in enumerate(self.leaves):
479 for leaf in self.leaves[_opening_index + 1:]:
483 bracket_depth = leaf.bracket_depth # type: ignore
484 if bracket_depth == depth and leaf.type == token.COMMA:
492 def maybe_adapt_standalone_comment(self, comment: Leaf) -> bool:
493 """Hack a standalone comment to act as a trailing comment for line splitting.
495 If this line has brackets and a standalone `comment`, we need to adapt
496 it to be able to still reformat the line.
498 This is not perfect, the line to which the standalone comment gets
499 appended will appear "too long" when splitting.
502 comment.type == STANDALONE_COMMENT and
503 self.bracket_tracker.any_open_brackets()
507 comment.type = token.COMMENT
508 comment.prefix = '\n' + ' ' * (self.depth + 1)
509 return self.append_comment(comment)
511 def append_comment(self, comment: Leaf) -> bool:
512 if comment.type != token.COMMENT:
516 after = id(self.last_non_delimiter())
518 comment.type = STANDALONE_COMMENT
523 if after in self.comments:
524 self.comments[after].value += str(comment)
526 self.comments[after] = comment
529 def last_non_delimiter(self) -> Leaf:
530 for i in range(len(self.leaves)):
531 last = self.leaves[-i - 1]
532 if not is_delimiter(last):
535 raise LookupError("No non-delimiters found")
537 def __str__(self) -> str:
541 indent = ' ' * self.depth
542 leaves = iter(self.leaves)
544 res = f'{first.prefix}{indent}{first.value}'
547 for comment in self.comments.values():
551 def __bool__(self) -> bool:
552 return bool(self.leaves or self.comments)
556 class EmptyLineTracker:
557 """Provides a stateful method that returns the number of potential extra
558 empty lines needed before and after the currently processed line.
560 Note: this tracker works on lines that haven't been split yet.
562 previous_line: Optional[Line] = attrib(default=None)
563 previous_after: int = attrib(default=0)
564 previous_defs: List[int] = attrib(default=Factory(list))
566 def maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
567 """Returns the number of extra empty lines before and after the `current_line`.
569 This is for separating `def`, `async def` and `class` with extra empty lines
570 (two on module-level), as well as providing an extra empty line after flow
571 control keywords to make them more prominent.
573 before, after = self._maybe_empty_lines(current_line)
574 self.previous_after = after
575 self.previous_line = current_line
578 def _maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
580 depth = current_line.depth
581 while self.previous_defs and self.previous_defs[-1] >= depth:
582 self.previous_defs.pop()
583 before = (1 if depth else 2) - self.previous_after
584 is_decorator = current_line.is_decorator
585 if is_decorator or current_line.is_def or current_line.is_class:
587 self.previous_defs.append(depth)
588 if self.previous_line is None:
589 # Don't insert empty lines before the first line in the file.
592 if self.previous_line and self.previous_line.is_decorator:
593 # Don't insert empty lines between decorators.
597 if current_line.depth:
599 newlines -= self.previous_after
602 if current_line.is_flow_control:
606 self.previous_line and
607 self.previous_line.is_import and
608 not current_line.is_import and
609 depth == self.previous_line.depth
611 return (before or 1), 0
614 self.previous_line and
615 self.previous_line.is_yield and
616 (not current_line.is_yield or depth != self.previous_line.depth)
618 return (before or 1), 0
624 class LineGenerator(Visitor[Line]):
625 """Generates reformatted Line objects. Empty lines are not emitted.
627 Note: destroys the tree it's visiting by mutating prefixes of its leaves
628 in ways that will no longer stringify to valid Python code on the tree.
630 current_line: Line = attrib(default=Factory(Line))
631 standalone_comments: List[Leaf] = attrib(default=Factory(list))
633 def line(self, indent: int = 0) -> Iterator[Line]:
636 If the line is empty, only emit if it makes sense.
637 If the line is too long, split it first and then generate.
639 If any lines were generated, set up a new current_line.
641 if not self.current_line:
642 self.current_line.depth += indent
643 return # Line is empty, don't emit. Creating a new one unnecessary.
645 complete_line = self.current_line
646 self.current_line = Line(depth=complete_line.depth + indent)
649 def visit_default(self, node: LN) -> Iterator[Line]:
650 if isinstance(node, Leaf):
651 for comment in generate_comments(node):
652 if self.current_line.bracket_tracker.any_open_brackets():
653 # any comment within brackets is subject to splitting
654 self.current_line.append(comment)
655 elif comment.type == token.COMMENT:
656 # regular trailing comment
657 self.current_line.append(comment)
658 yield from self.line()
661 # regular standalone comment, to be processed later (see
662 # docstring in `generate_comments()`
663 self.standalone_comments.append(comment)
664 normalize_prefix(node)
665 if node.type not in WHITESPACE:
666 for comment in self.standalone_comments:
667 yield from self.line()
669 self.current_line.append(comment)
670 yield from self.line()
672 self.standalone_comments = []
673 self.current_line.append(node)
674 yield from super().visit_default(node)
676 def visit_suite(self, node: Node) -> Iterator[Line]:
677 """Body of a statement after a colon."""
678 children = iter(node.children)
679 # Process newline before indenting. It might contain an inline
680 # comment that should go right after the colon.
681 newline = next(children)
682 yield from self.visit(newline)
683 yield from self.line(+1)
685 for child in children:
686 yield from self.visit(child)
688 yield from self.line(-1)
690 def visit_stmt(self, node: Node, keywords: Set[str]) -> Iterator[Line]:
691 """Visit a statement.
693 The relevant Python language keywords for this statement are NAME leaves
696 for child in node.children:
697 if child.type == token.NAME and child.value in keywords: # type: ignore
698 yield from self.line()
700 yield from self.visit(child)
702 def visit_simple_stmt(self, node: Node) -> Iterator[Line]:
703 """A statement without nested statements."""
704 is_suite_like = node.parent and node.parent.type in STATEMENT
706 yield from self.line(+1)
707 yield from self.visit_default(node)
708 yield from self.line(-1)
711 yield from self.line()
712 yield from self.visit_default(node)
714 def visit_async_stmt(self, node: Node) -> Iterator[Line]:
715 yield from self.line()
717 children = iter(node.children)
718 for child in children:
719 yield from self.visit(child)
721 if child.type == token.NAME and child.value == 'async': # type: ignore
724 internal_stmt = next(children)
725 for child in internal_stmt.children:
726 yield from self.visit(child)
728 def visit_decorators(self, node: Node) -> Iterator[Line]:
729 for child in node.children:
730 yield from self.line()
731 yield from self.visit(child)
733 def visit_SEMI(self, leaf: Leaf) -> Iterator[Line]:
734 yield from self.line()
736 def visit_ENDMARKER(self, leaf: Leaf) -> Iterator[Line]:
737 yield from self.visit_default(leaf)
738 yield from self.line()
740 def __attrs_post_init__(self) -> None:
741 """You are in a twisty little maze of passages."""
743 self.visit_if_stmt = partial(v, keywords={'if', 'else', 'elif'})
744 self.visit_while_stmt = partial(v, keywords={'while', 'else'})
745 self.visit_for_stmt = partial(v, keywords={'for', 'else'})
746 self.visit_try_stmt = partial(v, keywords={'try', 'except', 'else', 'finally'})
747 self.visit_except_clause = partial(v, keywords={'except'})
748 self.visit_funcdef = partial(v, keywords={'def'})
749 self.visit_with_stmt = partial(v, keywords={'with'})
750 self.visit_classdef = partial(v, keywords={'class'})
751 self.visit_async_funcdef = self.visit_async_stmt
752 self.visit_decorated = self.visit_decorators
755 BRACKET = {token.LPAR: token.RPAR, token.LSQB: token.RSQB, token.LBRACE: token.RBRACE}
756 OPENING_BRACKETS = set(BRACKET.keys())
757 CLOSING_BRACKETS = set(BRACKET.values())
758 BRACKETS = OPENING_BRACKETS | CLOSING_BRACKETS
761 def whitespace(leaf: Leaf) -> str:
762 """Return whitespace prefix if needed for the given `leaf`."""
778 if t == token.COMMENT:
781 if t == STANDALONE_COMMENT:
784 assert p is not None, f"INTERNAL ERROR: hand-made leaf without parent: {leaf!r}"
785 if p.type in {syms.parameters, syms.arglist}:
786 # untyped function signatures or calls
790 prev = leaf.prev_sibling
791 if not prev or prev.type != token.COMMA:
794 if p.type == syms.varargslist:
799 prev = leaf.prev_sibling
800 if prev and prev.type != token.COMMA:
803 elif p.type == syms.typedargslist:
804 # typed function signatures
805 prev = leaf.prev_sibling
810 if prev.type != syms.tname:
813 elif prev.type == token.EQUAL:
814 # A bit hacky: if the equal sign has whitespace, it means we
815 # previously found it's a typed argument. So, we're using that, too.
818 elif prev.type != token.COMMA:
821 elif p.type == syms.tname:
823 prev = leaf.prev_sibling
825 prevp = preceding_leaf(p)
826 if not prevp or prevp.type != token.COMMA:
829 elif p.type == syms.trailer:
830 # attributes and calls
831 if t == token.LPAR or t == token.RPAR:
834 prev = leaf.prev_sibling
837 prevp = preceding_leaf(p)
838 if not prevp or prevp.type != token.NUMBER:
841 elif t == token.LSQB:
844 elif prev.type != token.COMMA:
847 elif p.type == syms.argument:
852 prev = leaf.prev_sibling
854 prevp = preceding_leaf(p)
855 if not prevp or prevp.type == token.LPAR:
858 elif prev.type == token.EQUAL or prev.type == token.DOUBLESTAR:
861 elif p.type == syms.decorator:
865 elif p.type == syms.dotted_name:
866 prev = leaf.prev_sibling
870 prevp = preceding_leaf(p)
871 if not prevp or prevp.type == token.AT or prevp.type == token.DOT:
874 elif p.type == syms.classdef:
878 prev = leaf.prev_sibling
879 if prev and prev.type == token.LPAR:
882 elif p.type == syms.subscript:
887 prev = leaf.prev_sibling
888 if not prev or prev.type == token.COLON:
905 # various arithmetic and logic expressions
906 prev = leaf.prev_sibling
908 prevp = preceding_leaf(p)
909 if not prevp or prevp.type in OPENING_BRACKETS:
912 if prevp.type == token.EQUAL:
913 if prevp.parent and prevp.parent.type in {
914 syms.varargslist, syms.parameters, syms.arglist, syms.argument
920 elif p.type == syms.atom:
921 if t in CLOSING_BRACKETS:
924 prev = leaf.prev_sibling
926 prevp = preceding_leaf(p)
930 if prevp.type in OPENING_BRACKETS:
933 if prevp.type == token.EQUAL:
934 if prevp.parent and prevp.parent.type in {
935 syms.varargslist, syms.parameters, syms.arglist, syms.argument
939 if prevp.type == token.DOUBLESTAR:
940 if prevp.parent and prevp.parent.type in {
941 syms.varargslist, syms.parameters, syms.arglist, syms.dictsetmaker
945 elif prev.type in OPENING_BRACKETS:
949 # dots, but not the first one.
953 p.type == syms.listmaker or
954 p.type == syms.testlist_gexp or
955 p.type == syms.subscriptlist
957 # list interior, including unpacking
958 prev = leaf.prev_sibling
962 elif p.type == syms.dictsetmaker:
963 # dict and set interior, including unpacking
964 prev = leaf.prev_sibling
968 if prev.type == token.DOUBLESTAR:
971 elif p.type == syms.factor or p.type == syms.star_expr:
973 prev = leaf.prev_sibling
975 prevp = preceding_leaf(p)
976 if not prevp or prevp.type in OPENING_BRACKETS:
979 prevp_parent = prevp.parent
980 assert prevp_parent is not None
981 if prevp.type == token.COLON and prevp_parent.type in {
982 syms.subscript, syms.sliceop
986 elif prevp.type == token.EQUAL and prevp_parent.type == syms.argument:
989 elif t == token.NAME or t == token.NUMBER:
992 elif p.type == syms.import_from:
994 prev = leaf.prev_sibling
995 if prev and prev.type == token.DOT:
998 elif t == token.NAME:
1002 prev = leaf.prev_sibling
1003 if prev and prev.type == token.DOT:
1006 elif p.type == syms.sliceop:
1012 def preceding_leaf(node: Optional[LN]) -> Optional[Leaf]:
1013 """Returns the first leaf that precedes `node`, if any."""
1015 res = node.prev_sibling
1017 if isinstance(res, Leaf):
1021 return list(res.leaves())[-1]
1030 def is_delimiter(leaf: Leaf) -> int:
1031 """Returns the priority of the `leaf` delimiter. Returns 0 if not delimiter.
1033 Higher numbers are higher priority.
1035 if leaf.type == token.COMMA:
1036 return COMMA_PRIORITY
1038 if leaf.type == token.NAME and leaf.value in LOGIC_OPERATORS:
1039 return LOGIC_PRIORITY
1041 if leaf.type in COMPARATORS:
1042 return COMPARATOR_PRIORITY
1045 leaf.type in MATH_OPERATORS and
1047 leaf.parent.type not in {syms.factor, syms.star_expr}
1049 return MATH_PRIORITY
1054 def generate_comments(leaf: Leaf) -> Iterator[Leaf]:
1055 """Cleans the prefix of the `leaf` and generates comments from it, if any.
1057 Comments in lib2to3 are shoved into the whitespace prefix. This happens
1058 in `pgen2/driver.py:Driver.parse_tokens()`. This was a brilliant implementation
1059 move because it does away with modifying the grammar to include all the
1060 possible places in which comments can be placed.
1062 The sad consequence for us though is that comments don't "belong" anywhere.
1063 This is why this function generates simple parentless Leaf objects for
1064 comments. We simply don't know what the correct parent should be.
1066 No matter though, we can live without this. We really only need to
1067 differentiate between inline and standalone comments. The latter don't
1068 share the line with any code.
1070 Inline comments are emitted as regular token.COMMENT leaves. Standalone
1071 are emitted with a fake STANDALONE_COMMENT token identifier.
1076 if '#' not in leaf.prefix:
1079 before_comment, content = leaf.prefix.split('#', 1)
1080 content = content.rstrip()
1081 if content and (content[0] not in {' ', '!', '#'}):
1082 content = ' ' + content
1083 is_standalone_comment = (
1084 '\n' in before_comment or '\n' in content or leaf.type == token.DEDENT
1086 if not is_standalone_comment:
1087 # simple trailing comment
1088 yield Leaf(token.COMMENT, value='#' + content)
1091 for line in ('#' + content).split('\n'):
1092 line = line.lstrip()
1093 if not line.startswith('#'):
1096 yield Leaf(STANDALONE_COMMENT, line)
1099 def split_line(line: Line, line_length: int, inner: bool = False) -> Iterator[Line]:
1100 """Splits a `line` into potentially many lines.
1102 They should fit in the allotted `line_length` but might not be able to.
1103 `inner` signifies that there were a pair of brackets somewhere around the
1104 current `line`, possibly transitively. This means we can fallback to splitting
1105 by delimiters if the LHS/RHS don't yield any results.
1107 line_str = str(line).strip('\n')
1108 if len(line_str) <= line_length and '\n' not in line_str:
1113 split_funcs = [left_hand_split]
1114 elif line.inside_brackets:
1115 split_funcs = [delimiter_split]
1116 if '\n' not in line_str:
1117 # Only attempt RHS if we don't have multiline strings or comments
1119 split_funcs.append(right_hand_split)
1121 split_funcs = [right_hand_split]
1122 for split_func in split_funcs:
1123 # We are accumulating lines in `result` because we might want to abort
1124 # mission and return the original line in the end, or attempt a different
1126 result: List[Line] = []
1128 for l in split_func(line):
1129 if str(l).strip('\n') == line_str:
1130 raise CannotSplit("Split function returned an unchanged result")
1132 result.extend(split_line(l, line_length=line_length, inner=True))
1133 except CannotSplit as cs:
1144 def left_hand_split(line: Line) -> Iterator[Line]:
1145 """Split line into many lines, starting with the first matching bracket pair.
1147 Note: this usually looks weird, only use this for function definitions.
1148 Prefer RHS otherwise.
1150 head = Line(depth=line.depth)
1151 body = Line(depth=line.depth + 1, inside_brackets=True)
1152 tail = Line(depth=line.depth)
1153 tail_leaves: List[Leaf] = []
1154 body_leaves: List[Leaf] = []
1155 head_leaves: List[Leaf] = []
1156 current_leaves = head_leaves
1157 matching_bracket = None
1158 for leaf in line.leaves:
1160 current_leaves is body_leaves and
1161 leaf.type in CLOSING_BRACKETS and
1162 leaf.opening_bracket is matching_bracket # type: ignore
1164 current_leaves = tail_leaves
1165 current_leaves.append(leaf)
1166 if current_leaves is head_leaves:
1167 if leaf.type in OPENING_BRACKETS:
1168 matching_bracket = leaf
1169 current_leaves = body_leaves
1170 # Since body is a new indent level, remove spurious leading whitespace.
1172 normalize_prefix(body_leaves[0])
1173 # Build the new lines.
1174 for result, leaves in (
1175 (head, head_leaves), (body, body_leaves), (tail, tail_leaves)
1178 result.append(leaf, preformatted=True)
1179 comment_after = line.comments.get(id(leaf))
1181 result.append(comment_after, preformatted=True)
1182 # Check if the split succeeded.
1183 tail_len = len(str(tail))
1186 raise CannotSplit("Splitting brackets produced the same line")
1190 f"Splitting brackets on an empty body to save "
1191 f"{tail_len} characters is not worth it"
1194 for result in (head, body, tail):
1199 def right_hand_split(line: Line) -> Iterator[Line]:
1200 """Split line into many lines, starting with the last matching bracket pair."""
1201 head = Line(depth=line.depth)
1202 body = Line(depth=line.depth + 1, inside_brackets=True)
1203 tail = Line(depth=line.depth)
1204 tail_leaves: List[Leaf] = []
1205 body_leaves: List[Leaf] = []
1206 head_leaves: List[Leaf] = []
1207 current_leaves = tail_leaves
1208 opening_bracket = None
1209 for leaf in reversed(line.leaves):
1210 if current_leaves is body_leaves:
1211 if leaf is opening_bracket:
1212 current_leaves = head_leaves
1213 current_leaves.append(leaf)
1214 if current_leaves is tail_leaves:
1215 if leaf.type in CLOSING_BRACKETS:
1216 opening_bracket = leaf.opening_bracket # type: ignore
1217 current_leaves = body_leaves
1218 tail_leaves.reverse()
1219 body_leaves.reverse()
1220 head_leaves.reverse()
1221 # Since body is a new indent level, remove spurious leading whitespace.
1223 normalize_prefix(body_leaves[0])
1224 # Build the new lines.
1225 for result, leaves in (
1226 (head, head_leaves), (body, body_leaves), (tail, tail_leaves)
1229 result.append(leaf, preformatted=True)
1230 comment_after = line.comments.get(id(leaf))
1232 result.append(comment_after, preformatted=True)
1233 # Check if the split succeeded.
1234 tail_len = len(str(tail).strip('\n'))
1237 raise CannotSplit("Splitting brackets produced the same line")
1241 f"Splitting brackets on an empty body to save "
1242 f"{tail_len} characters is not worth it"
1245 for result in (head, body, tail):
1250 def delimiter_split(line: Line) -> Iterator[Line]:
1251 """Split according to delimiters of the highest priority.
1253 This kind of split doesn't increase indentation.
1256 last_leaf = line.leaves[-1]
1258 raise CannotSplit("Line empty")
1260 delimiters = line.bracket_tracker.delimiters
1262 delimiter_priority = line.bracket_tracker.max_priority(exclude={id(last_leaf)})
1264 raise CannotSplit("No delimiters found")
1266 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
1267 for leaf in line.leaves:
1268 current_line.append(leaf, preformatted=True)
1269 comment_after = line.comments.get(id(leaf))
1271 current_line.append(comment_after, preformatted=True)
1272 leaf_priority = delimiters.get(id(leaf))
1273 if leaf_priority == delimiter_priority:
1274 normalize_prefix(current_line.leaves[0])
1277 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
1280 delimiter_priority == COMMA_PRIORITY and
1281 current_line.leaves[-1].type != token.COMMA
1283 current_line.append(Leaf(token.COMMA, ','))
1284 normalize_prefix(current_line.leaves[0])
1288 def is_import(leaf: Leaf) -> bool:
1289 """Returns True if the given leaf starts an import statement."""
1296 (v == 'import' and p and p.type == syms.import_name) or
1297 (v == 'from' and p and p.type == syms.import_from)
1302 def normalize_prefix(leaf: Leaf) -> None:
1303 """Leave existing extra newlines for imports. Remove everything else."""
1305 spl = leaf.prefix.split('#', 1)
1306 nl_count = spl[0].count('\n')
1308 # Skip one newline since it was for a standalone comment.
1310 leaf.prefix = '\n' * nl_count
1316 PYTHON_EXTENSIONS = {'.py'}
1317 BLACKLISTED_DIRECTORIES = {
1318 'build', 'buck-out', 'dist', '_build', '.git', '.hg', '.mypy_cache', '.tox', '.venv'
1322 def gen_python_files_in_dir(path: Path) -> Iterator[Path]:
1323 for child in path.iterdir():
1325 if child.name in BLACKLISTED_DIRECTORIES:
1328 yield from gen_python_files_in_dir(child)
1330 elif child.suffix in PYTHON_EXTENSIONS:
1336 """Provides a reformatting counter."""
1337 change_count: int = attrib(default=0)
1338 same_count: int = attrib(default=0)
1339 failure_count: int = attrib(default=0)
1341 def done(self, src: Path, changed: bool) -> None:
1342 """Increment the counter for successful reformatting. Write out a message."""
1344 out(f'reformatted {src}')
1345 self.change_count += 1
1347 out(f'{src} already well formatted, good job.', bold=False)
1348 self.same_count += 1
1350 def failed(self, src: Path, message: str) -> None:
1351 """Increment the counter for failed reformatting. Write out a message."""
1352 err(f'error: cannot format {src}: {message}')
1353 self.failure_count += 1
1356 def return_code(self) -> int:
1357 """Which return code should the app use considering the current state."""
1358 return 1 if self.failure_count else 0
1360 def __str__(self) -> str:
1361 """A color report of the current state.
1363 Use `click.unstyle` to remove colors.
1366 if self.change_count:
1367 s = 's' if self.change_count > 1 else ''
1369 click.style(f'{self.change_count} file{s} reformatted', bold=True)
1372 s = 's' if self.same_count > 1 else ''
1373 report.append(f'{self.same_count} file{s} left unchanged')
1374 if self.failure_count:
1375 s = 's' if self.failure_count > 1 else ''
1378 f'{self.failure_count} file{s} failed to reformat', fg='red'
1381 return ', '.join(report) + '.'
1384 def assert_equivalent(src: str, dst: str) -> None:
1385 """Raises AssertionError if `src` and `dst` aren't equivalent.
1387 This is a temporary sanity check until Black becomes stable.
1393 def _v(node: ast.AST, depth: int = 0) -> Iterator[str]:
1394 """Simple visitor generating strings to compare ASTs by content."""
1395 yield f"{' ' * depth}{node.__class__.__name__}("
1397 for field in sorted(node._fields):
1399 value = getattr(node, field)
1400 except AttributeError:
1403 yield f"{' ' * (depth+1)}{field}="
1405 if isinstance(value, list):
1407 if isinstance(item, ast.AST):
1408 yield from _v(item, depth + 2)
1410 elif isinstance(value, ast.AST):
1411 yield from _v(value, depth + 2)
1414 yield f"{' ' * (depth+2)}{value!r}, # {value.__class__.__name__}"
1416 yield f"{' ' * depth}) # /{node.__class__.__name__}"
1419 src_ast = ast.parse(src)
1420 except Exception as exc:
1421 raise AssertionError(f"cannot parse source: {exc}") from None
1424 dst_ast = ast.parse(dst)
1425 except Exception as exc:
1426 log = dump_to_file(''.join(traceback.format_tb(exc.__traceback__)), dst)
1427 raise AssertionError(
1428 f"INTERNAL ERROR: Black produced invalid code: {exc}. "
1429 f"Please report a bug on https://github.com/ambv/black/issues. "
1430 f"This invalid output might be helpful: {log}",
1433 src_ast_str = '\n'.join(_v(src_ast))
1434 dst_ast_str = '\n'.join(_v(dst_ast))
1435 if src_ast_str != dst_ast_str:
1436 log = dump_to_file(diff(src_ast_str, dst_ast_str, 'src', 'dst'))
1437 raise AssertionError(
1438 f"INTERNAL ERROR: Black produced code that is not equivalent to "
1440 f"Please report a bug on https://github.com/ambv/black/issues. "
1441 f"This diff might be helpful: {log}",
1445 def assert_stable(src: str, dst: str, line_length: int) -> None:
1446 """Raises AssertionError if `dst` reformats differently the second time.
1448 This is a temporary sanity check until Black becomes stable.
1450 newdst = format_str(dst, line_length=line_length)
1453 diff(src, dst, 'source', 'first pass'),
1454 diff(dst, newdst, 'first pass', 'second pass'),
1456 raise AssertionError(
1457 f"INTERNAL ERROR: Black produced different code on the second pass "
1458 f"of the formatter. "
1459 f"Please report a bug on https://github.com/ambv/black/issues. "
1460 f"This diff might be helpful: {log}",
1464 def dump_to_file(*output: str) -> str:
1465 """Dumps `output` to a temporary file. Returns path to the file."""
1468 with tempfile.NamedTemporaryFile(
1469 mode='w', prefix='blk_', suffix='.log', delete=False
1471 for lines in output:
1477 def diff(a: str, b: str, a_name: str, b_name: str) -> str:
1478 """Returns a udiff string between strings `a` and `b`."""
1481 a_lines = [line + '\n' for line in a.split('\n')]
1482 b_lines = [line + '\n' for line in b.split('\n')]
1484 difflib.unified_diff(a_lines, b_lines, fromfile=a_name, tofile=b_name, n=5)
1488 if __name__ == '__main__':