All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
2 blib2to3 Node/Leaf transformation-related utility functions.
6 from typing import Final, Generic, Iterator, List, Optional, Set, Tuple, TypeVar, Union
8 if sys.version_info >= (3, 10):
9 from typing import TypeGuard
11 from typing_extensions import TypeGuard
13 from mypy_extensions import mypyc_attr
15 from black.cache import CACHE_DIR
16 from black.mode import Mode, Preview
17 from black.strings import has_triple_quotes
18 from blib2to3 import pygram
19 from blib2to3.pgen2 import token
20 from blib2to3.pytree import NL, Leaf, Node, type_repr
22 pygram.initialize(CACHE_DIR)
23 syms: Final = pygram.python_symbols
28 LN = Union[Leaf, Node]
33 WHITESPACE: Final = {token.DEDENT, token.INDENT, token.NEWLINE}
46 STANDALONE_COMMENT: Final = 153
47 token.tok_name[STANDALONE_COMMENT] = "STANDALONE_COMMENT"
48 LOGIC_OPERATORS: Final = {"and", "or"}
49 COMPARATORS: Final = {
57 MATH_OPERATORS: Final = {
73 STARS: Final = {token.STAR, token.DOUBLESTAR}
74 VARARGS_SPECIALS: Final = STARS | {token.SLASH}
75 VARARGS_PARENTS: Final = {
77 syms.argument, # double star in arglist
78 syms.trailer, # single argument to call
80 syms.varargslist, # lambdas
82 UNPACKING_PARENTS: Final = {
83 syms.atom, # single element of a list or set literal
87 syms.testlist_star_expr,
91 TEST_DESCENDANTS: Final = {
108 TYPED_NAMES: Final = {syms.tname, syms.tname_star}
109 ASSIGNMENTS: Final = {
126 IMPLICIT_TUPLE: Final = {syms.testlist, syms.testlist_star_expr, syms.exprlist}
128 token.LPAR: token.RPAR,
129 token.LSQB: token.RSQB,
130 token.LBRACE: token.RBRACE,
132 OPENING_BRACKETS: Final = set(BRACKET.keys())
133 CLOSING_BRACKETS: Final = set(BRACKET.values())
134 BRACKETS: Final = OPENING_BRACKETS | CLOSING_BRACKETS
135 ALWAYS_NO_SPACE: Final = CLOSING_BRACKETS | {token.COMMA, STANDALONE_COMMENT}
140 @mypyc_attr(allow_interpreted_subclasses=True)
141 class Visitor(Generic[T]):
142 """Basic lib2to3 visitor that yields things of type `T` on `visit()`."""
144 def visit(self, node: LN) -> Iterator[T]:
145 """Main method to visit `node` and its children.
147 It tries to find a `visit_*()` method for the given `node.type`, like
148 `visit_simple_stmt` for Node objects or `visit_INDENT` for Leaf objects.
149 If no dedicated `visit_*()` method is found, chooses `visit_default()`
152 Then yields objects of type `T` from the selected visitor.
155 name = token.tok_name[node.type]
157 name = str(type_repr(node.type))
158 # We explicitly branch on whether a visitor exists (instead of
159 # using self.visit_default as the default arg to getattr) in order
160 # to save needing to create a bound method object and so mypyc can
161 # generate a native call to visit_default.
162 visitf = getattr(self, f"visit_{name}", None)
164 yield from visitf(node)
166 yield from self.visit_default(node)
168 def visit_default(self, node: LN) -> Iterator[T]:
169 """Default `visit_*()` implementation. Recurses to children of `node`."""
170 if isinstance(node, Node):
171 for child in node.children:
172 yield from self.visit(child)
175 def whitespace(leaf: Leaf, *, complex_subscript: bool, mode: Mode) -> str: # noqa: C901
176 """Return whitespace prefix if needed for the given `leaf`.
178 `complex_subscript` signals whether the given leaf is part of a subscription
179 which has non-trivial arguments, like arithmetic expressions or function calls.
182 SPACE: Final[str] = " "
183 DOUBLESPACE: Final[str] = " "
187 if t in ALWAYS_NO_SPACE:
190 if t == token.COMMENT:
193 assert p is not None, f"INTERNAL ERROR: hand-made leaf without parent: {leaf!r}"
194 if t == token.COLON and p.type not in {
201 prev = leaf.prev_sibling
203 prevp = preceding_leaf(p)
204 if not prevp or prevp.type in OPENING_BRACKETS:
208 if prevp.type == token.COLON:
211 elif prevp.type != token.COMMA and not complex_subscript:
216 if prevp.type == token.EQUAL:
218 if prevp.parent.type in {
226 elif prevp.parent.type == syms.typedargslist:
227 # A bit hacky: if the equal sign has whitespace, it means we
228 # previously found it's a typed argument. So, we're using
233 prevp.type == token.STAR
234 and parent_type(prevp) == syms.star_expr
235 and parent_type(prevp.parent) == syms.subscriptlist
237 # No space between typevar tuples.
240 elif prevp.type in VARARGS_SPECIALS:
241 if is_vararg(prevp, within=VARARGS_PARENTS | UNPACKING_PARENTS):
244 elif prevp.type == token.COLON:
245 if prevp.parent and prevp.parent.type in {syms.subscript, syms.sliceop}:
246 return SPACE if complex_subscript else NO
250 and prevp.parent.type == syms.factor
251 and prevp.type in MATH_OPERATORS
255 elif prevp.type == token.AT and p.parent and p.parent.type == syms.decorator:
256 # no space in decorators
259 elif prev.type in OPENING_BRACKETS:
262 if p.type in {syms.parameters, syms.arglist}:
263 # untyped function signatures or calls
264 if not prev or prev.type != token.COMMA:
267 elif p.type == syms.varargslist:
269 if prev and prev.type != token.COMMA:
272 elif p.type == syms.typedargslist:
273 # typed function signatures
278 if prev.type not in TYPED_NAMES:
281 elif prev.type == token.EQUAL:
282 # A bit hacky: if the equal sign has whitespace, it means we
283 # previously found it's a typed argument. So, we're using that, too.
286 elif prev.type != token.COMMA:
289 elif p.type in TYPED_NAMES:
292 prevp = preceding_leaf(p)
293 if not prevp or prevp.type != token.COMMA:
296 elif p.type == syms.trailer:
297 # attributes and calls
298 if t == token.LPAR or t == token.RPAR:
302 if t == token.DOT or t == token.LSQB:
305 elif prev.type != token.COMMA:
308 elif p.type == syms.argument:
314 prevp = preceding_leaf(p)
315 if not prevp or prevp.type == token.LPAR:
318 elif prev.type in {token.EQUAL} | VARARGS_SPECIALS:
321 elif p.type == syms.decorator:
325 elif p.type == syms.dotted_name:
329 prevp = preceding_leaf(p)
330 if not prevp or prevp.type == token.AT or prevp.type == token.DOT:
333 elif p.type == syms.classdef:
337 if prev and prev.type == token.LPAR:
340 elif p.type in {syms.subscript, syms.sliceop}:
343 assert p.parent is not None, "subscripts are always parented"
344 if p.parent.type == syms.subscriptlist:
349 elif Preview.walrus_subscript in mode and (
350 t == token.COLONEQUAL or prev.type == token.COLONEQUAL
354 elif not complex_subscript:
357 elif p.type == syms.atom:
358 if prev and t == token.DOT:
359 # dots, but not the first one.
362 elif p.type == syms.dictsetmaker:
364 if prev and prev.type == token.DOUBLESTAR:
367 elif p.type in {syms.factor, syms.star_expr}:
370 prevp = preceding_leaf(p)
371 if not prevp or prevp.type in OPENING_BRACKETS:
374 prevp_parent = prevp.parent
375 assert prevp_parent is not None
376 if prevp.type == token.COLON and prevp_parent.type in {
382 elif prevp.type == token.EQUAL and prevp_parent.type == syms.argument:
385 elif t in {token.NAME, token.NUMBER, token.STRING}:
388 elif p.type == syms.import_from:
390 if prev and prev.type == token.DOT:
393 elif t == token.NAME:
397 if prev and prev.type == token.DOT:
400 elif p.type == syms.sliceop:
403 elif p.type == syms.except_clause:
410 def preceding_leaf(node: Optional[LN]) -> Optional[Leaf]:
411 """Return the first leaf that precedes `node`, if any."""
413 res = node.prev_sibling
415 if isinstance(res, Leaf):
419 return list(res.leaves())[-1]
428 def prev_siblings_are(node: Optional[LN], tokens: List[Optional[NodeType]]) -> bool:
429 """Return if the `node` and its previous siblings match types against the provided
430 list of tokens; the provided `node`has its type matched against the last element in
431 the list. `None` can be used as the first element to declare that the start of the
432 list is anchored at the start of its parent's children."""
435 if tokens[-1] is None:
439 if node.type != tokens[-1]:
441 return prev_siblings_are(node.prev_sibling, tokens[:-1])
444 def parent_type(node: Optional[LN]) -> Optional[NodeType]:
447 @node.parent.type, if @node is not None and has a parent.
451 if node is None or node.parent is None:
454 return node.parent.type
457 def child_towards(ancestor: Node, descendant: LN) -> Optional[LN]:
458 """Return the child of `ancestor` that contains `descendant`."""
459 node: Optional[LN] = descendant
460 while node and node.parent != ancestor:
465 def replace_child(old_child: LN, new_child: LN) -> None:
468 * If @old_child.parent is set, replace @old_child with @new_child in
469 @old_child's underlying Node structure.
471 * Otherwise, this function does nothing.
473 parent = old_child.parent
477 child_idx = old_child.remove()
478 if child_idx is not None:
479 parent.insert_child(child_idx, new_child)
482 def container_of(leaf: Leaf) -> LN:
483 """Return `leaf` or one of its ancestors that is the topmost container of it.
485 By "container" we mean a node where `leaf` is the very first child.
487 same_prefix = leaf.prefix
490 parent = container.parent
494 if parent.children[0].prefix != same_prefix:
497 if parent.type == syms.file_input:
500 if parent.prev_sibling is not None and parent.prev_sibling.type in BRACKETS:
507 def first_leaf_of(node: LN) -> Optional[Leaf]:
508 """Returns the first leaf of the node tree."""
509 if isinstance(node, Leaf):
512 return first_leaf_of(node.children[0])
517 def is_arith_like(node: LN) -> bool:
518 """Whether node is an arithmetic or a binary arithmetic expression"""
519 return node.type in {
527 def is_docstring(leaf: Leaf) -> bool:
528 if prev_siblings_are(
529 leaf.parent, [None, token.NEWLINE, token.INDENT, syms.simple_stmt]
533 # Multiline docstring on the same line as the `def`.
534 if prev_siblings_are(leaf.parent, [syms.parameters, token.COLON, syms.simple_stmt]):
535 # `syms.parameters` is only used in funcdefs and async_funcdefs in the Python
536 # grammar. We're safe to return True without further checks.
542 def is_empty_tuple(node: LN) -> bool:
543 """Return True if `node` holds an empty tuple."""
545 node.type == syms.atom
546 and len(node.children) == 2
547 and node.children[0].type == token.LPAR
548 and node.children[1].type == token.RPAR
552 def is_one_tuple(node: LN) -> bool:
553 """Return True if `node` holds a tuple with one element, with or without parens."""
554 if node.type == syms.atom:
555 gexp = unwrap_singleton_parenthesis(node)
556 if gexp is None or gexp.type != syms.testlist_gexp:
559 return len(gexp.children) == 2 and gexp.children[1].type == token.COMMA
562 node.type in IMPLICIT_TUPLE
563 and len(node.children) == 2
564 and node.children[1].type == token.COMMA
568 def is_tuple_containing_walrus(node: LN) -> bool:
569 """Return True if `node` holds a tuple that contains a walrus operator."""
570 if node.type != syms.atom:
572 gexp = unwrap_singleton_parenthesis(node)
573 if gexp is None or gexp.type != syms.testlist_gexp:
576 return any(child.type == syms.namedexpr_test for child in gexp.children)
579 def is_one_sequence_between(
583 brackets: Tuple[int, int] = (token.LPAR, token.RPAR),
585 """Return True if content between `opening` and `closing` is a one-sequence."""
586 if (opening.type, closing.type) != brackets:
589 depth = closing.bracket_depth + 1
590 for _opening_index, leaf in enumerate(leaves):
595 raise LookupError("Opening paren not found in `leaves`")
599 for leaf in leaves[_opening_index:]:
603 bracket_depth = leaf.bracket_depth
604 if bracket_depth == depth and leaf.type == token.COMMA:
606 if leaf.parent and leaf.parent.type in {
616 def is_walrus_assignment(node: LN) -> bool:
617 """Return True iff `node` is of the shape ( test := test )"""
618 inner = unwrap_singleton_parenthesis(node)
619 return inner is not None and inner.type == syms.namedexpr_test
622 def is_simple_decorator_trailer(node: LN, last: bool = False) -> bool:
623 """Return True iff `node` is a trailer valid in a simple decorator"""
624 return node.type == syms.trailer and (
626 len(node.children) == 2
627 and node.children[0].type == token.DOT
628 and node.children[1].type == token.NAME
630 # last trailer can be an argument-less parentheses pair
633 and len(node.children) == 2
634 and node.children[0].type == token.LPAR
635 and node.children[1].type == token.RPAR
637 # last trailer can be arguments
640 and len(node.children) == 3
641 and node.children[0].type == token.LPAR
642 # and node.children[1].type == syms.argument
643 and node.children[2].type == token.RPAR
648 def is_simple_decorator_expression(node: LN) -> bool:
649 """Return True iff `node` could be a 'dotted name' decorator
651 This function takes the node of the 'namedexpr_test' of the new decorator
652 grammar and test if it would be valid under the old decorator grammar.
654 The old grammar was: decorator: @ dotted_name [arguments] NEWLINE
655 The new grammar is : decorator: @ namedexpr_test NEWLINE
657 if node.type == token.NAME:
659 if node.type == syms.power:
662 node.children[0].type == token.NAME
663 and all(map(is_simple_decorator_trailer, node.children[1:-1]))
665 len(node.children) < 2
666 or is_simple_decorator_trailer(node.children[-1], last=True)
672 def is_yield(node: LN) -> bool:
673 """Return True if `node` holds a `yield` or `yield from` expression."""
674 if node.type == syms.yield_expr:
677 if is_name_token(node) and node.value == "yield":
680 if node.type != syms.atom:
683 if len(node.children) != 3:
686 lpar, expr, rpar = node.children
687 if lpar.type == token.LPAR and rpar.type == token.RPAR:
688 return is_yield(expr)
693 def is_vararg(leaf: Leaf, within: Set[NodeType]) -> bool:
694 """Return True if `leaf` is a star or double star in a vararg or kwarg.
696 If `within` includes VARARGS_PARENTS, this applies to function signatures.
697 If `within` includes UNPACKING_PARENTS, it applies to right hand-side
698 extended iterable unpacking (PEP 3132) and additional unpacking
699 generalizations (PEP 448).
701 if leaf.type not in VARARGS_SPECIALS or not leaf.parent:
705 if p.type == syms.star_expr:
706 # Star expressions are also used as assignment targets in extended
707 # iterable unpacking (PEP 3132). See what its parent is instead.
713 return p.type in within
716 def is_multiline_string(leaf: Leaf) -> bool:
717 """Return True if `leaf` is a multiline string that actually spans many lines."""
718 return has_triple_quotes(leaf.value) and "\n" in leaf.value
721 def is_stub_suite(node: Node) -> bool:
722 """Return True if `node` is a suite with a stub body."""
724 # If there is a comment, we want to keep it.
725 if node.prefix.strip():
729 len(node.children) != 4
730 or node.children[0].type != token.NEWLINE
731 or node.children[1].type != token.INDENT
732 or node.children[3].type != token.DEDENT
736 if node.children[3].prefix.strip():
739 return is_stub_body(node.children[2])
742 def is_stub_body(node: LN) -> bool:
743 """Return True if `node` is a simple statement containing an ellipsis."""
744 if not isinstance(node, Node) or node.type != syms.simple_stmt:
747 if len(node.children) != 2:
750 child = node.children[0]
752 not child.prefix.strip()
753 and child.type == syms.atom
754 and len(child.children) == 3
755 and all(leaf == Leaf(token.DOT, ".") for leaf in child.children)
759 def is_atom_with_invisible_parens(node: LN) -> bool:
760 """Given a `LN`, determines whether it's an atom `node` with invisible
761 parens. Useful in dedupe-ing and normalizing parens.
763 if isinstance(node, Leaf) or node.type != syms.atom:
766 first, last = node.children[0], node.children[-1]
768 isinstance(first, Leaf)
769 and first.type == token.LPAR
770 and first.value == ""
771 and isinstance(last, Leaf)
772 and last.type == token.RPAR
777 def is_empty_par(leaf: Leaf) -> bool:
778 return is_empty_lpar(leaf) or is_empty_rpar(leaf)
781 def is_empty_lpar(leaf: Leaf) -> bool:
782 return leaf.type == token.LPAR and leaf.value == ""
785 def is_empty_rpar(leaf: Leaf) -> bool:
786 return leaf.type == token.RPAR and leaf.value == ""
789 def is_import(leaf: Leaf) -> bool:
790 """Return True if the given leaf starts an import statement."""
797 (v == "import" and p and p.type == syms.import_name)
798 or (v == "from" and p and p.type == syms.import_from)
803 def is_with_or_async_with_stmt(leaf: Leaf) -> bool:
804 """Return True if the given leaf starts a with or async with statement."""
806 leaf.type == token.NAME
807 and leaf.value == "with"
809 and leaf.parent.type == syms.with_stmt
811 leaf.type == token.ASYNC
812 and leaf.next_sibling
813 and leaf.next_sibling.type == syms.with_stmt
817 def is_async_stmt_or_funcdef(leaf: Leaf) -> bool:
818 """Return True if the given leaf starts an async def/for/with statement.
820 Note that `async def` can be either an `async_stmt` or `async_funcdef`,
821 the latter is used when it has decorators.
824 leaf.type == token.ASYNC
826 and leaf.parent.type in {syms.async_stmt, syms.async_funcdef}
830 def is_type_comment(leaf: Leaf) -> bool:
831 """Return True if the given leaf is a type comment. This function should only
832 be used for general type comments (excluding ignore annotations, which should
833 use `is_type_ignore_comment`). Note that general type comments are no longer
834 used in modern version of Python, this function may be deprecated in the future."""
837 return t in {token.COMMENT, STANDALONE_COMMENT} and v.startswith("# type:")
840 def is_type_ignore_comment(leaf: Leaf) -> bool:
841 """Return True if the given leaf is a type comment with ignore annotation."""
844 return t in {token.COMMENT, STANDALONE_COMMENT} and is_type_ignore_comment_string(v)
847 def is_type_ignore_comment_string(value: str) -> bool:
848 """Return True if the given string match with type comment with
849 ignore annotation."""
850 return value.startswith("# type: ignore")
853 def wrap_in_parentheses(parent: Node, child: LN, *, visible: bool = True) -> None:
854 """Wrap `child` in parentheses.
856 This replaces `child` with an atom holding the parentheses and the old
857 child. That requires moving the prefix.
859 If `visible` is False, the leaves will be valueless (and thus invisible).
861 lpar = Leaf(token.LPAR, "(" if visible else "")
862 rpar = Leaf(token.RPAR, ")" if visible else "")
863 prefix = child.prefix
865 index = child.remove() or 0
866 new_child = Node(syms.atom, [lpar, child, rpar])
867 new_child.prefix = prefix
868 parent.insert_child(index, new_child)
871 def unwrap_singleton_parenthesis(node: LN) -> Optional[LN]:
872 """Returns `wrapped` if `node` is of the shape ( wrapped ).
874 Parenthesis can be optional. Returns None otherwise"""
875 if len(node.children) != 3:
878 lpar, wrapped, rpar = node.children
879 if not (lpar.type == token.LPAR and rpar.type == token.RPAR):
885 def ensure_visible(leaf: Leaf) -> None:
886 """Make sure parentheses are visible.
888 They could be invisible as part of some statements (see
889 :func:`normalize_invisible_parens` and :func:`visit_import_from`).
891 if leaf.type == token.LPAR:
893 elif leaf.type == token.RPAR:
897 def is_name_token(nl: NL) -> TypeGuard[Leaf]:
898 return nl.type == token.NAME
901 def is_lpar_token(nl: NL) -> TypeGuard[Leaf]:
902 return nl.type == token.LPAR
905 def is_rpar_token(nl: NL) -> TypeGuard[Leaf]:
906 return nl.type == token.RPAR
909 def is_string_token(nl: NL) -> TypeGuard[Leaf]:
910 return nl.type == token.STRING
913 def is_number_token(nl: NL) -> TypeGuard[Leaf]:
914 return nl.type == token.NUMBER
917 def is_part_of_annotation(leaf: Leaf) -> bool:
918 """Returns whether this leaf is part of type annotations."""
919 ancestor = leaf.parent
920 while ancestor is not None:
921 if ancestor.prev_sibling and ancestor.prev_sibling.type == token.RARROW:
923 if ancestor.parent and ancestor.parent.type == syms.tname:
925 ancestor = ancestor.parent