All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
3 from asyncio.base_events import BaseEventLoop
4 from concurrent.futures import Executor, ProcessPoolExecutor
5 from functools import partial
8 from pathlib import Path
11 Dict, Generic, Iterable, Iterator, List, Optional, Set, Tuple, TypeVar, Union
14 from attr import attrib, dataclass, Factory
18 from blib2to3.pytree import Node, Leaf, type_repr
19 from blib2to3 import pygram, pytree
20 from blib2to3.pgen2 import driver, token
21 from blib2to3.pgen2.parse import ParseError
23 __version__ = "18.3a1"
24 DEFAULT_LINE_LENGTH = 88
26 syms = pygram.python_symbols
33 LN = Union[Leaf, Node]
34 out = partial(click.secho, bold=True, err=True)
35 err = partial(click.secho, fg='red', err=True)
38 class NothingChanged(UserWarning):
39 """Raised by `format_file` when the reformatted code is the same as source."""
42 class CannotSplit(Exception):
43 """A readable split that fits the allotted line length is impossible.
45 Raised by `left_hand_split()` and `right_hand_split()`.
54 default=DEFAULT_LINE_LENGTH,
55 help='How many character per line to allow.',
62 "Don't write back the files, just return the status. Return code 0 "
63 "means nothing changed. Return code 1 means some files were "
64 "reformatted. Return code 123 means there was an internal error."
70 help='If --fast given, skip temporary sanity checks. [default: --safe]',
72 @click.version_option(version=__version__)
76 type=click.Path(exists=True, file_okay=True, dir_okay=True, readable=True),
80 ctx: click.Context, line_length: int, check: bool, fast: bool, src: List[str]
82 """The uncompromising code formatter."""
83 sources: List[Path] = []
87 sources.extend(gen_python_files_in_dir(p))
89 # if a file was explicitly given, we don't care about its extension
92 err(f'invalid path: {s}')
95 elif len(sources) == 1:
99 changed = format_file_in_place(
100 p, line_length=line_length, fast=fast, write_back=not check
102 report.done(p, changed)
103 except Exception as exc:
104 report.failed(p, str(exc))
105 ctx.exit(report.return_code)
107 loop = asyncio.get_event_loop()
108 executor = ProcessPoolExecutor(max_workers=os.cpu_count())
111 return_code = loop.run_until_complete(
113 sources, line_length, not check, fast, loop, executor
118 ctx.exit(return_code)
121 async def schedule_formatting(
130 src: loop.run_in_executor(
131 executor, format_file_in_place, src, line_length, fast, write_back
135 await asyncio.wait(tasks.values())
138 for src, task in tasks.items():
140 report.failed(src, 'timed out, cancelling')
142 cancelled.append(task)
143 elif task.exception():
144 report.failed(src, str(task.exception()))
146 report.done(src, task.result())
148 await asyncio.wait(cancelled, timeout=2)
149 out('All done! ✨ 🍰 ✨')
150 click.echo(str(report))
151 return report.return_code
154 def format_file_in_place(
155 src: Path, line_length: int, fast: bool, write_back: bool = False
157 """Format the file and rewrite if changed. Return True if changed."""
159 contents, encoding = format_file(src, line_length=line_length, fast=fast)
160 except NothingChanged:
164 with open(src, "w", encoding=encoding) as f:
170 src: Path, line_length: int, fast: bool
171 ) -> Tuple[FileContent, Encoding]:
172 """Reformats a file and returns its contents and encoding."""
173 with tokenize.open(src) as src_buffer:
174 src_contents = src_buffer.read()
175 if src_contents.strip() == '':
176 raise NothingChanged(src)
178 dst_contents = format_str(src_contents, line_length=line_length)
179 if src_contents == dst_contents:
180 raise NothingChanged(src)
183 assert_equivalent(src_contents, dst_contents)
184 assert_stable(src_contents, dst_contents, line_length=line_length)
185 return dst_contents, src_buffer.encoding
188 def format_str(src_contents: str, line_length: int) -> FileContent:
189 """Reformats a string and returns new contents."""
190 src_node = lib2to3_parse(src_contents)
192 comments: List[Line] = []
193 lines = LineGenerator()
194 elt = EmptyLineTracker()
197 for current_line in lines.visit(src_node):
198 for _ in range(after):
199 dst_contents += str(empty_line)
200 before, after = elt.maybe_empty_lines(current_line)
201 for _ in range(before):
202 dst_contents += str(empty_line)
203 if not current_line.is_comment:
204 for comment in comments:
205 dst_contents += str(comment)
207 for line in split_line(current_line, line_length=line_length):
208 dst_contents += str(line)
210 comments.append(current_line)
211 for comment in comments:
212 dst_contents += str(comment)
216 def lib2to3_parse(src_txt: str) -> Node:
217 """Given a string with source, return the lib2to3 Node."""
218 grammar = pygram.python_grammar_no_print_statement
219 drv = driver.Driver(grammar, pytree.convert)
220 if src_txt[-1] != '\n':
221 nl = '\r\n' if '\r\n' in src_txt[:1024] else '\n'
224 result = drv.parse_string(src_txt, True)
225 except ParseError as pe:
226 lineno, column = pe.context[1]
227 lines = src_txt.splitlines()
229 faulty_line = lines[lineno - 1]
231 faulty_line = "<line number missing in source>"
232 raise ValueError(f"Cannot parse: {lineno}:{column}: {faulty_line}") from None
234 if isinstance(result, Leaf):
235 result = Node(syms.file_input, [result])
239 def lib2to3_unparse(node: Node) -> str:
240 """Given a lib2to3 node, return its string representation."""
248 class Visitor(Generic[T]):
249 """Basic lib2to3 visitor that yields things on visiting."""
251 def visit(self, node: LN) -> Iterator[T]:
253 name = token.tok_name[node.type]
255 name = type_repr(node.type)
256 yield from getattr(self, f'visit_{name}', self.visit_default)(node)
258 def visit_default(self, node: LN) -> Iterator[T]:
259 if isinstance(node, Node):
260 for child in node.children:
261 yield from self.visit(child)
265 class DebugVisitor(Visitor[T]):
266 tree_depth: int = attrib(default=0)
268 def visit_default(self, node: LN) -> Iterator[T]:
269 indent = ' ' * (2 * self.tree_depth)
270 if isinstance(node, Node):
271 _type = type_repr(node.type)
272 out(f'{indent}{_type}', fg='yellow')
274 for child in node.children:
275 yield from self.visit(child)
278 out(f'{indent}/{_type}', fg='yellow', bold=False)
280 _type = token.tok_name.get(node.type, str(node.type))
281 out(f'{indent}{_type}', fg='blue', nl=False)
283 # We don't have to handle prefixes for `Node` objects since
284 # that delegates to the first child anyway.
285 out(f' {node.prefix!r}', fg='green', bold=False, nl=False)
286 out(f' {node.value!r}', fg='blue', bold=False)
289 KEYWORDS = set(keyword.kwlist)
290 WHITESPACE = {token.DEDENT, token.INDENT, token.NEWLINE}
291 FLOW_CONTROL = {'return', 'raise', 'break', 'continue'}
302 STANDALONE_COMMENT = 153
303 LOGIC_OPERATORS = {'and', 'or'}
326 COMPREHENSION_PRIORITY = 20
330 COMPARATOR_PRIORITY = 3
335 class BracketTracker:
336 depth: int = attrib(default=0)
337 bracket_match: Dict[Tuple[Depth, NodeType], Leaf] = attrib(default=Factory(dict))
338 delimiters: Dict[LeafID, Priority] = attrib(default=Factory(dict))
339 previous: Optional[Leaf] = attrib(default=None)
341 def mark(self, leaf: Leaf) -> None:
342 if leaf.type == token.COMMENT:
345 if leaf.type in CLOSING_BRACKETS:
347 opening_bracket = self.bracket_match.pop((self.depth, leaf.type))
348 leaf.opening_bracket = opening_bracket # type: ignore
349 leaf.bracket_depth = self.depth # type: ignore
351 delim = is_delimiter(leaf)
353 self.delimiters[id(leaf)] = delim
354 elif self.previous is not None:
355 if leaf.type == token.STRING and self.previous.type == token.STRING:
356 self.delimiters[id(self.previous)] = STRING_PRIORITY
358 leaf.type == token.NAME and
359 leaf.value == 'for' and
361 leaf.parent.type in {syms.comp_for, syms.old_comp_for}
363 self.delimiters[id(self.previous)] = COMPREHENSION_PRIORITY
365 leaf.type == token.NAME and
366 leaf.value == 'if' and
368 leaf.parent.type in {syms.comp_if, syms.old_comp_if}
370 self.delimiters[id(self.previous)] = COMPREHENSION_PRIORITY
371 if leaf.type in OPENING_BRACKETS:
372 self.bracket_match[self.depth, BRACKET[leaf.type]] = leaf
376 def any_open_brackets(self) -> bool:
377 """Returns True if there is an yet unmatched open bracket on the line."""
378 return bool(self.bracket_match)
380 def max_priority(self, exclude: Iterable[LeafID] =()) -> int:
381 """Returns the highest priority of a delimiter found on the line.
383 Values are consistent with what `is_delimiter()` returns.
385 return max(v for k, v in self.delimiters.items() if k not in exclude)
390 depth: int = attrib(default=0)
391 leaves: List[Leaf] = attrib(default=Factory(list))
392 comments: Dict[LeafID, Leaf] = attrib(default=Factory(dict))
393 bracket_tracker: BracketTracker = attrib(default=Factory(BracketTracker))
394 inside_brackets: bool = attrib(default=False)
395 has_for: bool = attrib(default=False)
396 _for_loop_variable: bool = attrib(default=False, init=False)
398 def append(self, leaf: Leaf, preformatted: bool = False) -> None:
399 has_value = leaf.value.strip()
403 if self.leaves and not preformatted:
404 # Note: at this point leaf.prefix should be empty except for
405 # imports, for which we only preserve newlines.
406 leaf.prefix += whitespace(leaf)
407 if self.inside_brackets or not preformatted:
408 self.maybe_decrement_after_for_loop_variable(leaf)
409 self.bracket_tracker.mark(leaf)
410 self.maybe_remove_trailing_comma(leaf)
411 self.maybe_increment_for_loop_variable(leaf)
412 if self.maybe_adapt_standalone_comment(leaf):
415 if not self.append_comment(leaf):
416 self.leaves.append(leaf)
419 def is_comment(self) -> bool:
420 return bool(self) and self.leaves[0].type == STANDALONE_COMMENT
423 def is_decorator(self) -> bool:
424 return bool(self) and self.leaves[0].type == token.AT
427 def is_import(self) -> bool:
428 return bool(self) and is_import(self.leaves[0])
431 def is_class(self) -> bool:
434 self.leaves[0].type == token.NAME and
435 self.leaves[0].value == 'class'
439 def is_def(self) -> bool:
440 """Also returns True for async defs."""
442 first_leaf = self.leaves[0]
447 second_leaf: Optional[Leaf] = self.leaves[1]
451 (first_leaf.type == token.NAME and first_leaf.value == 'def') or
453 first_leaf.type == token.NAME and
454 first_leaf.value == 'async' and
455 second_leaf is not None and
456 second_leaf.type == token.NAME and
457 second_leaf.value == 'def'
462 def is_flow_control(self) -> bool:
465 self.leaves[0].type == token.NAME and
466 self.leaves[0].value in FLOW_CONTROL
470 def is_yield(self) -> bool:
473 self.leaves[0].type == token.NAME and
474 self.leaves[0].value == 'yield'
477 def maybe_remove_trailing_comma(self, closing: Leaf) -> bool:
480 self.leaves[-1].type == token.COMMA and
481 closing.type in CLOSING_BRACKETS
485 if closing.type == token.RSQB or closing.type == token.RBRACE:
489 # For parens let's check if it's safe to remove the comma. If the
490 # trailing one is the only one, we might mistakenly change a tuple
491 # into a different type by removing the comma.
492 depth = closing.bracket_depth + 1 # type: ignore
494 opening = closing.opening_bracket # type: ignore
495 for _opening_index, leaf in enumerate(self.leaves):
502 for leaf in self.leaves[_opening_index + 1:]:
506 bracket_depth = leaf.bracket_depth # type: ignore
507 if bracket_depth == depth and leaf.type == token.COMMA:
515 def maybe_increment_for_loop_variable(self, leaf: Leaf) -> bool:
516 """In a for loop, or comprehension, the variables are often unpacks.
518 To avoid splitting on the comma in this situation, we will increase
519 the depth of tokens between `for` and `in`.
521 if leaf.type == token.NAME and leaf.value == 'for':
523 self.bracket_tracker.depth += 1
524 self._for_loop_variable = True
529 def maybe_decrement_after_for_loop_variable(self, leaf: Leaf) -> bool:
530 # See `maybe_increment_for_loop_variable` above for explanation.
531 if self._for_loop_variable and leaf.type == token.NAME and leaf.value == 'in':
532 self.bracket_tracker.depth -= 1
533 self._for_loop_variable = False
538 def maybe_adapt_standalone_comment(self, comment: Leaf) -> bool:
539 """Hack a standalone comment to act as a trailing comment for line splitting.
541 If this line has brackets and a standalone `comment`, we need to adapt
542 it to be able to still reformat the line.
544 This is not perfect, the line to which the standalone comment gets
545 appended will appear "too long" when splitting.
548 comment.type == STANDALONE_COMMENT and
549 self.bracket_tracker.any_open_brackets()
553 comment.type = token.COMMENT
554 comment.prefix = '\n' + ' ' * (self.depth + 1)
555 return self.append_comment(comment)
557 def append_comment(self, comment: Leaf) -> bool:
558 if comment.type != token.COMMENT:
562 after = id(self.last_non_delimiter())
564 comment.type = STANDALONE_COMMENT
569 if after in self.comments:
570 self.comments[after].value += str(comment)
572 self.comments[after] = comment
575 def last_non_delimiter(self) -> Leaf:
576 for i in range(len(self.leaves)):
577 last = self.leaves[-i - 1]
578 if not is_delimiter(last):
581 raise LookupError("No non-delimiters found")
583 def __str__(self) -> str:
587 indent = ' ' * self.depth
588 leaves = iter(self.leaves)
590 res = f'{first.prefix}{indent}{first.value}'
593 for comment in self.comments.values():
597 def __bool__(self) -> bool:
598 return bool(self.leaves or self.comments)
602 class EmptyLineTracker:
603 """Provides a stateful method that returns the number of potential extra
604 empty lines needed before and after the currently processed line.
606 Note: this tracker works on lines that haven't been split yet.
608 previous_line: Optional[Line] = attrib(default=None)
609 previous_after: int = attrib(default=0)
610 previous_defs: List[int] = attrib(default=Factory(list))
612 def maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
613 """Returns the number of extra empty lines before and after the `current_line`.
615 This is for separating `def`, `async def` and `class` with extra empty lines
616 (two on module-level), as well as providing an extra empty line after flow
617 control keywords to make them more prominent.
619 before, after = self._maybe_empty_lines(current_line)
620 self.previous_after = after
621 self.previous_line = current_line
624 def _maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]:
626 depth = current_line.depth
627 while self.previous_defs and self.previous_defs[-1] >= depth:
628 self.previous_defs.pop()
629 before = (1 if depth else 2) - self.previous_after
630 is_decorator = current_line.is_decorator
631 if is_decorator or current_line.is_def or current_line.is_class:
633 self.previous_defs.append(depth)
634 if self.previous_line is None:
635 # Don't insert empty lines before the first line in the file.
638 if self.previous_line and self.previous_line.is_decorator:
639 # Don't insert empty lines between decorators.
643 if current_line.depth:
645 newlines -= self.previous_after
648 if current_line.is_flow_control:
652 self.previous_line and
653 self.previous_line.is_import and
654 not current_line.is_import and
655 depth == self.previous_line.depth
657 return (before or 1), 0
660 self.previous_line and
661 self.previous_line.is_yield and
662 (not current_line.is_yield or depth != self.previous_line.depth)
664 return (before or 1), 0
670 class LineGenerator(Visitor[Line]):
671 """Generates reformatted Line objects. Empty lines are not emitted.
673 Note: destroys the tree it's visiting by mutating prefixes of its leaves
674 in ways that will no longer stringify to valid Python code on the tree.
676 current_line: Line = attrib(default=Factory(Line))
677 standalone_comments: List[Leaf] = attrib(default=Factory(list))
679 def line(self, indent: int = 0) -> Iterator[Line]:
682 If the line is empty, only emit if it makes sense.
683 If the line is too long, split it first and then generate.
685 If any lines were generated, set up a new current_line.
687 if not self.current_line:
688 self.current_line.depth += indent
689 return # Line is empty, don't emit. Creating a new one unnecessary.
691 complete_line = self.current_line
692 self.current_line = Line(depth=complete_line.depth + indent)
695 def visit_default(self, node: LN) -> Iterator[Line]:
696 if isinstance(node, Leaf):
697 for comment in generate_comments(node):
698 if self.current_line.bracket_tracker.any_open_brackets():
699 # any comment within brackets is subject to splitting
700 self.current_line.append(comment)
701 elif comment.type == token.COMMENT:
702 # regular trailing comment
703 self.current_line.append(comment)
704 yield from self.line()
707 # regular standalone comment, to be processed later (see
708 # docstring in `generate_comments()`
709 self.standalone_comments.append(comment)
710 normalize_prefix(node)
711 if node.type not in WHITESPACE:
712 for comment in self.standalone_comments:
713 yield from self.line()
715 self.current_line.append(comment)
716 yield from self.line()
718 self.standalone_comments = []
719 self.current_line.append(node)
720 yield from super().visit_default(node)
722 def visit_suite(self, node: Node) -> Iterator[Line]:
723 """Body of a statement after a colon."""
724 children = iter(node.children)
725 # Process newline before indenting. It might contain an inline
726 # comment that should go right after the colon.
727 newline = next(children)
728 yield from self.visit(newline)
729 yield from self.line(+1)
731 for child in children:
732 yield from self.visit(child)
734 yield from self.line(-1)
736 def visit_stmt(self, node: Node, keywords: Set[str]) -> Iterator[Line]:
737 """Visit a statement.
739 The relevant Python language keywords for this statement are NAME leaves
742 for child in node.children:
743 if child.type == token.NAME and child.value in keywords: # type: ignore
744 yield from self.line()
746 yield from self.visit(child)
748 def visit_simple_stmt(self, node: Node) -> Iterator[Line]:
749 """A statement without nested statements."""
750 is_suite_like = node.parent and node.parent.type in STATEMENT
752 yield from self.line(+1)
753 yield from self.visit_default(node)
754 yield from self.line(-1)
757 yield from self.line()
758 yield from self.visit_default(node)
760 def visit_async_stmt(self, node: Node) -> Iterator[Line]:
761 yield from self.line()
763 children = iter(node.children)
764 for child in children:
765 yield from self.visit(child)
767 if child.type == token.NAME and child.value == 'async': # type: ignore
770 internal_stmt = next(children)
771 for child in internal_stmt.children:
772 yield from self.visit(child)
774 def visit_decorators(self, node: Node) -> Iterator[Line]:
775 for child in node.children:
776 yield from self.line()
777 yield from self.visit(child)
779 def visit_SEMI(self, leaf: Leaf) -> Iterator[Line]:
780 yield from self.line()
782 def visit_ENDMARKER(self, leaf: Leaf) -> Iterator[Line]:
783 yield from self.visit_default(leaf)
784 yield from self.line()
786 def __attrs_post_init__(self) -> None:
787 """You are in a twisty little maze of passages."""
789 self.visit_if_stmt = partial(v, keywords={'if', 'else', 'elif'})
790 self.visit_while_stmt = partial(v, keywords={'while', 'else'})
791 self.visit_for_stmt = partial(v, keywords={'for', 'else'})
792 self.visit_try_stmt = partial(v, keywords={'try', 'except', 'else', 'finally'})
793 self.visit_except_clause = partial(v, keywords={'except'})
794 self.visit_funcdef = partial(v, keywords={'def'})
795 self.visit_with_stmt = partial(v, keywords={'with'})
796 self.visit_classdef = partial(v, keywords={'class'})
797 self.visit_async_funcdef = self.visit_async_stmt
798 self.visit_decorated = self.visit_decorators
801 BRACKET = {token.LPAR: token.RPAR, token.LSQB: token.RSQB, token.LBRACE: token.RBRACE}
802 OPENING_BRACKETS = set(BRACKET.keys())
803 CLOSING_BRACKETS = set(BRACKET.values())
804 BRACKETS = OPENING_BRACKETS | CLOSING_BRACKETS
807 def whitespace(leaf: Leaf) -> str:
808 """Return whitespace prefix if needed for the given `leaf`."""
824 if t == token.COMMENT:
827 if t == STANDALONE_COMMENT:
830 if t in CLOSING_BRACKETS:
833 assert p is not None, f"INTERNAL ERROR: hand-made leaf without parent: {leaf!r}"
834 prev = leaf.prev_sibling
836 prevp = preceding_leaf(p)
837 if not prevp or prevp.type in OPENING_BRACKETS:
840 if prevp.type == token.EQUAL:
841 if prevp.parent and prevp.parent.type in {
850 elif prevp.type == token.DOUBLESTAR:
851 if prevp.parent and prevp.parent.type in {
860 elif prevp.type == token.COLON:
861 if prevp.parent and prevp.parent.type == syms.subscript:
864 elif prevp.parent and prevp.parent.type == syms.factor:
867 elif prev.type in OPENING_BRACKETS:
870 if p.type in {syms.parameters, syms.arglist}:
871 # untyped function signatures or calls
875 if not prev or prev.type != token.COMMA:
878 if p.type == syms.varargslist:
883 if prev and prev.type != token.COMMA:
886 elif p.type == syms.typedargslist:
887 # typed function signatures
892 if prev.type != syms.tname:
895 elif prev.type == token.EQUAL:
896 # A bit hacky: if the equal sign has whitespace, it means we
897 # previously found it's a typed argument. So, we're using that, too.
900 elif prev.type != token.COMMA:
903 elif p.type == syms.tname:
906 prevp = preceding_leaf(p)
907 if not prevp or prevp.type != token.COMMA:
910 elif p.type == syms.trailer:
911 # attributes and calls
912 if t == token.LPAR or t == token.RPAR:
917 prevp = preceding_leaf(p)
918 if not prevp or prevp.type != token.NUMBER:
921 elif t == token.LSQB:
924 elif prev.type != token.COMMA:
927 elif p.type == syms.argument:
933 prevp = preceding_leaf(p)
934 if not prevp or prevp.type == token.LPAR:
937 elif prev.type == token.EQUAL or prev.type == token.DOUBLESTAR:
940 elif p.type == syms.decorator:
944 elif p.type == syms.dotted_name:
948 prevp = preceding_leaf(p)
949 if not prevp or prevp.type == token.AT or prevp.type == token.DOT:
952 elif p.type == syms.classdef:
956 if prev and prev.type == token.LPAR:
959 elif p.type == syms.subscript:
961 if not prev or prev.type == token.COLON:
964 elif p.type == syms.atom:
965 if prev and t == token.DOT:
966 # dots, but not the first one.
970 p.type == syms.listmaker or
971 p.type == syms.testlist_gexp or
972 p.type == syms.subscriptlist
974 # list interior, including unpacking
978 elif p.type == syms.dictsetmaker:
979 # dict and set interior, including unpacking
983 if prev.type == token.DOUBLESTAR:
986 elif p.type == syms.factor or p.type == syms.star_expr:
989 prevp = preceding_leaf(p)
990 if not prevp or prevp.type in OPENING_BRACKETS:
993 prevp_parent = prevp.parent
994 assert prevp_parent is not None
995 if prevp.type == token.COLON and prevp_parent.type in {
996 syms.subscript, syms.sliceop
1000 elif prevp.type == token.EQUAL and prevp_parent.type == syms.argument:
1003 elif t == token.NAME or t == token.NUMBER:
1006 elif p.type == syms.import_from:
1008 if prev and prev.type == token.DOT:
1011 elif t == token.NAME:
1015 if prev and prev.type == token.DOT:
1018 elif p.type == syms.sliceop:
1024 def preceding_leaf(node: Optional[LN]) -> Optional[Leaf]:
1025 """Returns the first leaf that precedes `node`, if any."""
1027 res = node.prev_sibling
1029 if isinstance(res, Leaf):
1033 return list(res.leaves())[-1]
1042 def is_delimiter(leaf: Leaf) -> int:
1043 """Returns the priority of the `leaf` delimiter. Returns 0 if not delimiter.
1045 Higher numbers are higher priority.
1047 if leaf.type == token.COMMA:
1048 return COMMA_PRIORITY
1050 if leaf.type == token.NAME and leaf.value in LOGIC_OPERATORS:
1051 return LOGIC_PRIORITY
1053 if leaf.type in COMPARATORS:
1054 return COMPARATOR_PRIORITY
1057 leaf.type in MATH_OPERATORS and
1059 leaf.parent.type not in {syms.factor, syms.star_expr}
1061 return MATH_PRIORITY
1066 def generate_comments(leaf: Leaf) -> Iterator[Leaf]:
1067 """Cleans the prefix of the `leaf` and generates comments from it, if any.
1069 Comments in lib2to3 are shoved into the whitespace prefix. This happens
1070 in `pgen2/driver.py:Driver.parse_tokens()`. This was a brilliant implementation
1071 move because it does away with modifying the grammar to include all the
1072 possible places in which comments can be placed.
1074 The sad consequence for us though is that comments don't "belong" anywhere.
1075 This is why this function generates simple parentless Leaf objects for
1076 comments. We simply don't know what the correct parent should be.
1078 No matter though, we can live without this. We really only need to
1079 differentiate between inline and standalone comments. The latter don't
1080 share the line with any code.
1082 Inline comments are emitted as regular token.COMMENT leaves. Standalone
1083 are emitted with a fake STANDALONE_COMMENT token identifier.
1088 if '#' not in leaf.prefix:
1091 before_comment, content = leaf.prefix.split('#', 1)
1092 content = content.rstrip()
1093 if content and (content[0] not in {' ', '!', '#'}):
1094 content = ' ' + content
1095 is_standalone_comment = (
1096 '\n' in before_comment or '\n' in content or leaf.type == token.DEDENT
1098 if not is_standalone_comment:
1099 # simple trailing comment
1100 yield Leaf(token.COMMENT, value='#' + content)
1103 for line in ('#' + content).split('\n'):
1104 line = line.lstrip()
1105 if not line.startswith('#'):
1108 yield Leaf(STANDALONE_COMMENT, line)
1111 def split_line(line: Line, line_length: int, inner: bool = False) -> Iterator[Line]:
1112 """Splits a `line` into potentially many lines.
1114 They should fit in the allotted `line_length` but might not be able to.
1115 `inner` signifies that there were a pair of brackets somewhere around the
1116 current `line`, possibly transitively. This means we can fallback to splitting
1117 by delimiters if the LHS/RHS don't yield any results.
1119 line_str = str(line).strip('\n')
1120 if len(line_str) <= line_length and '\n' not in line_str:
1125 split_funcs = [left_hand_split]
1126 elif line.inside_brackets:
1127 split_funcs = [delimiter_split]
1128 if '\n' not in line_str:
1129 # Only attempt RHS if we don't have multiline strings or comments
1131 split_funcs.append(right_hand_split)
1133 split_funcs = [right_hand_split]
1134 for split_func in split_funcs:
1135 # We are accumulating lines in `result` because we might want to abort
1136 # mission and return the original line in the end, or attempt a different
1138 result: List[Line] = []
1140 for l in split_func(line):
1141 if str(l).strip('\n') == line_str:
1142 raise CannotSplit("Split function returned an unchanged result")
1144 result.extend(split_line(l, line_length=line_length, inner=True))
1145 except CannotSplit as cs:
1156 def left_hand_split(line: Line) -> Iterator[Line]:
1157 """Split line into many lines, starting with the first matching bracket pair.
1159 Note: this usually looks weird, only use this for function definitions.
1160 Prefer RHS otherwise.
1162 head = Line(depth=line.depth)
1163 body = Line(depth=line.depth + 1, inside_brackets=True)
1164 tail = Line(depth=line.depth)
1165 tail_leaves: List[Leaf] = []
1166 body_leaves: List[Leaf] = []
1167 head_leaves: List[Leaf] = []
1168 current_leaves = head_leaves
1169 matching_bracket = None
1170 for leaf in line.leaves:
1172 current_leaves is body_leaves and
1173 leaf.type in CLOSING_BRACKETS and
1174 leaf.opening_bracket is matching_bracket # type: ignore
1176 current_leaves = tail_leaves
1177 current_leaves.append(leaf)
1178 if current_leaves is head_leaves:
1179 if leaf.type in OPENING_BRACKETS:
1180 matching_bracket = leaf
1181 current_leaves = body_leaves
1182 # Since body is a new indent level, remove spurious leading whitespace.
1184 normalize_prefix(body_leaves[0])
1185 # Build the new lines.
1186 for result, leaves in (
1187 (head, head_leaves), (body, body_leaves), (tail, tail_leaves)
1190 result.append(leaf, preformatted=True)
1191 comment_after = line.comments.get(id(leaf))
1193 result.append(comment_after, preformatted=True)
1194 # Check if the split succeeded.
1195 tail_len = len(str(tail))
1198 raise CannotSplit("Splitting brackets produced the same line")
1202 f"Splitting brackets on an empty body to save "
1203 f"{tail_len} characters is not worth it"
1206 for result in (head, body, tail):
1211 def right_hand_split(line: Line) -> Iterator[Line]:
1212 """Split line into many lines, starting with the last matching bracket pair."""
1213 head = Line(depth=line.depth)
1214 body = Line(depth=line.depth + 1, inside_brackets=True)
1215 tail = Line(depth=line.depth)
1216 tail_leaves: List[Leaf] = []
1217 body_leaves: List[Leaf] = []
1218 head_leaves: List[Leaf] = []
1219 current_leaves = tail_leaves
1220 opening_bracket = None
1221 for leaf in reversed(line.leaves):
1222 if current_leaves is body_leaves:
1223 if leaf is opening_bracket:
1224 current_leaves = head_leaves
1225 current_leaves.append(leaf)
1226 if current_leaves is tail_leaves:
1227 if leaf.type in CLOSING_BRACKETS:
1228 opening_bracket = leaf.opening_bracket # type: ignore
1229 current_leaves = body_leaves
1230 tail_leaves.reverse()
1231 body_leaves.reverse()
1232 head_leaves.reverse()
1233 # Since body is a new indent level, remove spurious leading whitespace.
1235 normalize_prefix(body_leaves[0])
1236 # Build the new lines.
1237 for result, leaves in (
1238 (head, head_leaves), (body, body_leaves), (tail, tail_leaves)
1241 result.append(leaf, preformatted=True)
1242 comment_after = line.comments.get(id(leaf))
1244 result.append(comment_after, preformatted=True)
1245 # Check if the split succeeded.
1246 tail_len = len(str(tail).strip('\n'))
1249 raise CannotSplit("Splitting brackets produced the same line")
1253 f"Splitting brackets on an empty body to save "
1254 f"{tail_len} characters is not worth it"
1257 for result in (head, body, tail):
1262 def delimiter_split(line: Line) -> Iterator[Line]:
1263 """Split according to delimiters of the highest priority.
1265 This kind of split doesn't increase indentation.
1268 last_leaf = line.leaves[-1]
1270 raise CannotSplit("Line empty")
1272 delimiters = line.bracket_tracker.delimiters
1274 delimiter_priority = line.bracket_tracker.max_priority(exclude={id(last_leaf)})
1276 raise CannotSplit("No delimiters found")
1278 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
1279 for leaf in line.leaves:
1280 current_line.append(leaf, preformatted=True)
1281 comment_after = line.comments.get(id(leaf))
1283 current_line.append(comment_after, preformatted=True)
1284 leaf_priority = delimiters.get(id(leaf))
1285 if leaf_priority == delimiter_priority:
1286 normalize_prefix(current_line.leaves[0])
1289 current_line = Line(depth=line.depth, inside_brackets=line.inside_brackets)
1292 delimiter_priority == COMMA_PRIORITY and
1293 current_line.leaves[-1].type != token.COMMA
1295 current_line.append(Leaf(token.COMMA, ','))
1296 normalize_prefix(current_line.leaves[0])
1300 def is_import(leaf: Leaf) -> bool:
1301 """Returns True if the given leaf starts an import statement."""
1308 (v == 'import' and p and p.type == syms.import_name) or
1309 (v == 'from' and p and p.type == syms.import_from)
1314 def normalize_prefix(leaf: Leaf) -> None:
1315 """Leave existing extra newlines for imports. Remove everything else."""
1317 spl = leaf.prefix.split('#', 1)
1318 nl_count = spl[0].count('\n')
1320 # Skip one newline since it was for a standalone comment.
1322 leaf.prefix = '\n' * nl_count
1328 PYTHON_EXTENSIONS = {'.py'}
1329 BLACKLISTED_DIRECTORIES = {
1330 'build', 'buck-out', 'dist', '_build', '.git', '.hg', '.mypy_cache', '.tox', '.venv'
1334 def gen_python_files_in_dir(path: Path) -> Iterator[Path]:
1335 for child in path.iterdir():
1337 if child.name in BLACKLISTED_DIRECTORIES:
1340 yield from gen_python_files_in_dir(child)
1342 elif child.suffix in PYTHON_EXTENSIONS:
1348 """Provides a reformatting counter."""
1349 change_count: int = attrib(default=0)
1350 same_count: int = attrib(default=0)
1351 failure_count: int = attrib(default=0)
1353 def done(self, src: Path, changed: bool) -> None:
1354 """Increment the counter for successful reformatting. Write out a message."""
1356 out(f'reformatted {src}')
1357 self.change_count += 1
1359 out(f'{src} already well formatted, good job.', bold=False)
1360 self.same_count += 1
1362 def failed(self, src: Path, message: str) -> None:
1363 """Increment the counter for failed reformatting. Write out a message."""
1364 err(f'error: cannot format {src}: {message}')
1365 self.failure_count += 1
1368 def return_code(self) -> int:
1369 """Which return code should the app use considering the current state."""
1370 # According to http://tldp.org/LDP/abs/html/exitcodes.html starting with
1371 # 126 we have special returncodes reserved by the shell.
1372 if self.failure_count:
1375 elif self.change_count:
1380 def __str__(self) -> str:
1381 """A color report of the current state.
1383 Use `click.unstyle` to remove colors.
1386 if self.change_count:
1387 s = 's' if self.change_count > 1 else ''
1389 click.style(f'{self.change_count} file{s} reformatted', bold=True)
1392 s = 's' if self.same_count > 1 else ''
1393 report.append(f'{self.same_count} file{s} left unchanged')
1394 if self.failure_count:
1395 s = 's' if self.failure_count > 1 else ''
1398 f'{self.failure_count} file{s} failed to reformat', fg='red'
1401 return ', '.join(report) + '.'
1404 def assert_equivalent(src: str, dst: str) -> None:
1405 """Raises AssertionError if `src` and `dst` aren't equivalent.
1407 This is a temporary sanity check until Black becomes stable.
1413 def _v(node: ast.AST, depth: int = 0) -> Iterator[str]:
1414 """Simple visitor generating strings to compare ASTs by content."""
1415 yield f"{' ' * depth}{node.__class__.__name__}("
1417 for field in sorted(node._fields):
1419 value = getattr(node, field)
1420 except AttributeError:
1423 yield f"{' ' * (depth+1)}{field}="
1425 if isinstance(value, list):
1427 if isinstance(item, ast.AST):
1428 yield from _v(item, depth + 2)
1430 elif isinstance(value, ast.AST):
1431 yield from _v(value, depth + 2)
1434 yield f"{' ' * (depth+2)}{value!r}, # {value.__class__.__name__}"
1436 yield f"{' ' * depth}) # /{node.__class__.__name__}"
1439 src_ast = ast.parse(src)
1440 except Exception as exc:
1441 raise AssertionError(f"cannot parse source: {exc}") from None
1444 dst_ast = ast.parse(dst)
1445 except Exception as exc:
1446 log = dump_to_file(''.join(traceback.format_tb(exc.__traceback__)), dst)
1447 raise AssertionError(
1448 f"INTERNAL ERROR: Black produced invalid code: {exc}. "
1449 f"Please report a bug on https://github.com/ambv/black/issues. "
1450 f"This invalid output might be helpful: {log}",
1453 src_ast_str = '\n'.join(_v(src_ast))
1454 dst_ast_str = '\n'.join(_v(dst_ast))
1455 if src_ast_str != dst_ast_str:
1456 log = dump_to_file(diff(src_ast_str, dst_ast_str, 'src', 'dst'))
1457 raise AssertionError(
1458 f"INTERNAL ERROR: Black produced code that is not equivalent to "
1460 f"Please report a bug on https://github.com/ambv/black/issues. "
1461 f"This diff might be helpful: {log}",
1465 def assert_stable(src: str, dst: str, line_length: int) -> None:
1466 """Raises AssertionError if `dst` reformats differently the second time.
1468 This is a temporary sanity check until Black becomes stable.
1470 newdst = format_str(dst, line_length=line_length)
1473 diff(src, dst, 'source', 'first pass'),
1474 diff(dst, newdst, 'first pass', 'second pass'),
1476 raise AssertionError(
1477 f"INTERNAL ERROR: Black produced different code on the second pass "
1478 f"of the formatter. "
1479 f"Please report a bug on https://github.com/ambv/black/issues. "
1480 f"This diff might be helpful: {log}",
1484 def dump_to_file(*output: str) -> str:
1485 """Dumps `output` to a temporary file. Returns path to the file."""
1488 with tempfile.NamedTemporaryFile(
1489 mode='w', prefix='blk_', suffix='.log', delete=False
1491 for lines in output:
1497 def diff(a: str, b: str, a_name: str, b_name: str) -> str:
1498 """Returns a udiff string between strings `a` and `b`."""
1501 a_lines = [line + '\n' for line in a.split('\n')]
1502 b_lines = [line + '\n' for line in b.split('\n')]
1504 difflib.unified_diff(a_lines, b_lines, fromfile=a_name, tofile=b_name, n=5)
1508 if __name__ == '__main__':