All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
2 blib2to3 Node/Leaf transformation-related utility functions.
18 if sys.version_info >= (3, 8):
19 from typing import Final
21 from typing_extensions import Final
22 if sys.version_info >= (3, 10):
23 from typing import TypeGuard
25 from typing_extensions import TypeGuard
27 from mypy_extensions import mypyc_attr
30 from blib2to3.pytree import Node, Leaf, type_repr, NL
31 from blib2to3 import pygram
32 from blib2to3.pgen2 import token
34 from black.cache import CACHE_DIR
35 from black.strings import has_triple_quotes
38 pygram.initialize(CACHE_DIR)
39 syms: Final = pygram.python_symbols
44 LN = Union[Leaf, Node]
49 WHITESPACE: Final = {token.DEDENT, token.INDENT, token.NEWLINE}
62 STANDALONE_COMMENT: Final = 153
63 token.tok_name[STANDALONE_COMMENT] = "STANDALONE_COMMENT"
64 LOGIC_OPERATORS: Final = {"and", "or"}
65 COMPARATORS: Final = {
73 MATH_OPERATORS: Final = {
89 STARS: Final = {token.STAR, token.DOUBLESTAR}
90 VARARGS_SPECIALS: Final = STARS | {token.SLASH}
91 VARARGS_PARENTS: Final = {
93 syms.argument, # double star in arglist
94 syms.trailer, # single argument to call
96 syms.varargslist, # lambdas
98 UNPACKING_PARENTS: Final = {
99 syms.atom, # single element of a list or set literal
103 syms.testlist_star_expr,
107 TEST_DESCENDANTS: Final = {
124 ASSIGNMENTS: Final = {
141 IMPLICIT_TUPLE: Final = {syms.testlist, syms.testlist_star_expr, syms.exprlist}
143 token.LPAR: token.RPAR,
144 token.LSQB: token.RSQB,
145 token.LBRACE: token.RBRACE,
147 OPENING_BRACKETS: Final = set(BRACKET.keys())
148 CLOSING_BRACKETS: Final = set(BRACKET.values())
149 BRACKETS: Final = OPENING_BRACKETS | CLOSING_BRACKETS
150 ALWAYS_NO_SPACE: Final = CLOSING_BRACKETS | {token.COMMA, STANDALONE_COMMENT}
155 @mypyc_attr(allow_interpreted_subclasses=True)
156 class Visitor(Generic[T]):
157 """Basic lib2to3 visitor that yields things of type `T` on `visit()`."""
159 def visit(self, node: LN) -> Iterator[T]:
160 """Main method to visit `node` and its children.
162 It tries to find a `visit_*()` method for the given `node.type`, like
163 `visit_simple_stmt` for Node objects or `visit_INDENT` for Leaf objects.
164 If no dedicated `visit_*()` method is found, chooses `visit_default()`
167 Then yields objects of type `T` from the selected visitor.
170 name = token.tok_name[node.type]
172 name = str(type_repr(node.type))
173 # We explicitly branch on whether a visitor exists (instead of
174 # using self.visit_default as the default arg to getattr) in order
175 # to save needing to create a bound method object and so mypyc can
176 # generate a native call to visit_default.
177 visitf = getattr(self, f"visit_{name}", None)
179 yield from visitf(node)
181 yield from self.visit_default(node)
183 def visit_default(self, node: LN) -> Iterator[T]:
184 """Default `visit_*()` implementation. Recurses to children of `node`."""
185 if isinstance(node, Node):
186 for child in node.children:
187 yield from self.visit(child)
190 def whitespace(leaf: Leaf, *, complex_subscript: bool) -> str: # noqa: C901
191 """Return whitespace prefix if needed for the given `leaf`.
193 `complex_subscript` signals whether the given leaf is part of a subscription
194 which has non-trivial arguments, like arithmetic expressions or function calls.
198 DOUBLESPACE: Final = " "
202 if t in ALWAYS_NO_SPACE:
205 if t == token.COMMENT:
208 assert p is not None, f"INTERNAL ERROR: hand-made leaf without parent: {leaf!r}"
209 if t == token.COLON and p.type not in {
216 prev = leaf.prev_sibling
218 prevp = preceding_leaf(p)
219 if not prevp or prevp.type in OPENING_BRACKETS:
223 if prevp.type == token.COLON:
226 elif prevp.type != token.COMMA and not complex_subscript:
231 if prevp.type == token.EQUAL:
233 if prevp.parent.type in {
241 elif prevp.parent.type == syms.typedargslist:
242 # A bit hacky: if the equal sign has whitespace, it means we
243 # previously found it's a typed argument. So, we're using
247 elif prevp.type in VARARGS_SPECIALS:
248 if is_vararg(prevp, within=VARARGS_PARENTS | UNPACKING_PARENTS):
251 elif prevp.type == token.COLON:
252 if prevp.parent and prevp.parent.type in {syms.subscript, syms.sliceop}:
253 return SPACE if complex_subscript else NO
257 and prevp.parent.type == syms.factor
258 and prevp.type in MATH_OPERATORS
263 prevp.type == token.RIGHTSHIFT
265 and prevp.parent.type == syms.shift_expr
266 and prevp.prev_sibling
267 and is_name_token(prevp.prev_sibling)
268 and prevp.prev_sibling.value == "print"
270 # Python 2 print chevron
272 elif prevp.type == token.AT and p.parent and p.parent.type == syms.decorator:
273 # no space in decorators
276 elif prev.type in OPENING_BRACKETS:
279 if p.type in {syms.parameters, syms.arglist}:
280 # untyped function signatures or calls
281 if not prev or prev.type != token.COMMA:
284 elif p.type == syms.varargslist:
286 if prev and prev.type != token.COMMA:
289 elif p.type == syms.typedargslist:
290 # typed function signatures
295 if prev.type != syms.tname:
298 elif prev.type == token.EQUAL:
299 # A bit hacky: if the equal sign has whitespace, it means we
300 # previously found it's a typed argument. So, we're using that, too.
303 elif prev.type != token.COMMA:
306 elif p.type == syms.tname:
309 prevp = preceding_leaf(p)
310 if not prevp or prevp.type != token.COMMA:
313 elif p.type == syms.trailer:
314 # attributes and calls
315 if t == token.LPAR or t == token.RPAR:
320 prevp = preceding_leaf(p)
321 if not prevp or prevp.type != token.NUMBER:
324 elif t == token.LSQB:
327 elif prev.type != token.COMMA:
330 elif p.type == syms.argument:
336 prevp = preceding_leaf(p)
337 if not prevp or prevp.type == token.LPAR:
340 elif prev.type in {token.EQUAL} | VARARGS_SPECIALS:
343 elif p.type == syms.decorator:
347 elif p.type == syms.dotted_name:
351 prevp = preceding_leaf(p)
352 if not prevp or prevp.type == token.AT or prevp.type == token.DOT:
355 elif p.type == syms.classdef:
359 if prev and prev.type == token.LPAR:
362 elif p.type in {syms.subscript, syms.sliceop}:
365 assert p.parent is not None, "subscripts are always parented"
366 if p.parent.type == syms.subscriptlist:
371 elif not complex_subscript:
374 elif p.type == syms.atom:
375 if prev and t == token.DOT:
376 # dots, but not the first one.
379 elif p.type == syms.dictsetmaker:
381 if prev and prev.type == token.DOUBLESTAR:
384 elif p.type in {syms.factor, syms.star_expr}:
387 prevp = preceding_leaf(p)
388 if not prevp or prevp.type in OPENING_BRACKETS:
391 prevp_parent = prevp.parent
392 assert prevp_parent is not None
393 if prevp.type == token.COLON and prevp_parent.type in {
399 elif prevp.type == token.EQUAL and prevp_parent.type == syms.argument:
402 elif t in {token.NAME, token.NUMBER, token.STRING}:
405 elif p.type == syms.import_from:
407 if prev and prev.type == token.DOT:
410 elif t == token.NAME:
414 if prev and prev.type == token.DOT:
417 elif p.type == syms.sliceop:
423 def preceding_leaf(node: Optional[LN]) -> Optional[Leaf]:
424 """Return the first leaf that precedes `node`, if any."""
426 res = node.prev_sibling
428 if isinstance(res, Leaf):
432 return list(res.leaves())[-1]
441 def prev_siblings_are(node: Optional[LN], tokens: List[Optional[NodeType]]) -> bool:
442 """Return if the `node` and its previous siblings match types against the provided
443 list of tokens; the provided `node`has its type matched against the last element in
444 the list. `None` can be used as the first element to declare that the start of the
445 list is anchored at the start of its parent's children."""
448 if tokens[-1] is None:
452 if node.type != tokens[-1]:
454 return prev_siblings_are(node.prev_sibling, tokens[:-1])
457 def last_two_except(leaves: List[Leaf], omit: Collection[LeafID]) -> Tuple[Leaf, Leaf]:
458 """Return (penultimate, last) leaves skipping brackets in `omit` and contents."""
459 stop_after: Optional[Leaf] = None
460 last: Optional[Leaf] = None
461 for leaf in reversed(leaves):
463 if leaf is stop_after:
471 stop_after = leaf.opening_bracket
475 raise LookupError("Last two leaves were also skipped")
478 def parent_type(node: Optional[LN]) -> Optional[NodeType]:
481 @node.parent.type, if @node is not None and has a parent.
485 if node is None or node.parent is None:
488 return node.parent.type
491 def child_towards(ancestor: Node, descendant: LN) -> Optional[LN]:
492 """Return the child of `ancestor` that contains `descendant`."""
493 node: Optional[LN] = descendant
494 while node and node.parent != ancestor:
499 def replace_child(old_child: LN, new_child: LN) -> None:
502 * If @old_child.parent is set, replace @old_child with @new_child in
503 @old_child's underlying Node structure.
505 * Otherwise, this function does nothing.
507 parent = old_child.parent
511 child_idx = old_child.remove()
512 if child_idx is not None:
513 parent.insert_child(child_idx, new_child)
516 def container_of(leaf: Leaf) -> LN:
517 """Return `leaf` or one of its ancestors that is the topmost container of it.
519 By "container" we mean a node where `leaf` is the very first child.
521 same_prefix = leaf.prefix
524 parent = container.parent
528 if parent.children[0].prefix != same_prefix:
531 if parent.type == syms.file_input:
534 if parent.prev_sibling is not None and parent.prev_sibling.type in BRACKETS:
541 def first_leaf_column(node: Node) -> Optional[int]:
542 """Returns the column of the first leaf child of a node."""
543 for child in node.children:
544 if isinstance(child, Leaf):
549 def first_child_is_arith(node: Node) -> bool:
550 """Whether first child is an arithmetic or a binary arithmetic expression"""
557 return bool(node.children and node.children[0].type in expr_types)
560 def is_docstring(leaf: Leaf) -> bool:
561 if prev_siblings_are(
562 leaf.parent, [None, token.NEWLINE, token.INDENT, syms.simple_stmt]
566 # Multiline docstring on the same line as the `def`.
567 if prev_siblings_are(leaf.parent, [syms.parameters, token.COLON, syms.simple_stmt]):
568 # `syms.parameters` is only used in funcdefs and async_funcdefs in the Python
569 # grammar. We're safe to return True without further checks.
575 def is_empty_tuple(node: LN) -> bool:
576 """Return True if `node` holds an empty tuple."""
578 node.type == syms.atom
579 and len(node.children) == 2
580 and node.children[0].type == token.LPAR
581 and node.children[1].type == token.RPAR
585 def is_one_tuple(node: LN) -> bool:
586 """Return True if `node` holds a tuple with one element, with or without parens."""
587 if node.type == syms.atom:
588 gexp = unwrap_singleton_parenthesis(node)
589 if gexp is None or gexp.type != syms.testlist_gexp:
592 return len(gexp.children) == 2 and gexp.children[1].type == token.COMMA
595 node.type in IMPLICIT_TUPLE
596 and len(node.children) == 2
597 and node.children[1].type == token.COMMA
601 def is_one_tuple_between(opening: Leaf, closing: Leaf, leaves: List[Leaf]) -> bool:
602 """Return True if content between `opening` and `closing` looks like a one-tuple."""
603 if opening.type != token.LPAR and closing.type != token.RPAR:
606 depth = closing.bracket_depth + 1
607 for _opening_index, leaf in enumerate(leaves):
612 raise LookupError("Opening paren not found in `leaves`")
616 for leaf in leaves[_opening_index:]:
620 bracket_depth = leaf.bracket_depth
621 if bracket_depth == depth and leaf.type == token.COMMA:
623 if leaf.parent and leaf.parent.type in {
633 def is_walrus_assignment(node: LN) -> bool:
634 """Return True iff `node` is of the shape ( test := test )"""
635 inner = unwrap_singleton_parenthesis(node)
636 return inner is not None and inner.type == syms.namedexpr_test
639 def is_simple_decorator_trailer(node: LN, last: bool = False) -> bool:
640 """Return True iff `node` is a trailer valid in a simple decorator"""
641 return node.type == syms.trailer and (
643 len(node.children) == 2
644 and node.children[0].type == token.DOT
645 and node.children[1].type == token.NAME
647 # last trailer can be an argument-less parentheses pair
650 and len(node.children) == 2
651 and node.children[0].type == token.LPAR
652 and node.children[1].type == token.RPAR
654 # last trailer can be arguments
657 and len(node.children) == 3
658 and node.children[0].type == token.LPAR
659 # and node.children[1].type == syms.argument
660 and node.children[2].type == token.RPAR
665 def is_simple_decorator_expression(node: LN) -> bool:
666 """Return True iff `node` could be a 'dotted name' decorator
668 This function takes the node of the 'namedexpr_test' of the new decorator
669 grammar and test if it would be valid under the old decorator grammar.
671 The old grammar was: decorator: @ dotted_name [arguments] NEWLINE
672 The new grammar is : decorator: @ namedexpr_test NEWLINE
674 if node.type == token.NAME:
676 if node.type == syms.power:
679 node.children[0].type == token.NAME
680 and all(map(is_simple_decorator_trailer, node.children[1:-1]))
682 len(node.children) < 2
683 or is_simple_decorator_trailer(node.children[-1], last=True)
689 def is_yield(node: LN) -> bool:
690 """Return True if `node` holds a `yield` or `yield from` expression."""
691 if node.type == syms.yield_expr:
694 if is_name_token(node) and node.value == "yield":
697 if node.type != syms.atom:
700 if len(node.children) != 3:
703 lpar, expr, rpar = node.children
704 if lpar.type == token.LPAR and rpar.type == token.RPAR:
705 return is_yield(expr)
710 def is_vararg(leaf: Leaf, within: Set[NodeType]) -> bool:
711 """Return True if `leaf` is a star or double star in a vararg or kwarg.
713 If `within` includes VARARGS_PARENTS, this applies to function signatures.
714 If `within` includes UNPACKING_PARENTS, it applies to right hand-side
715 extended iterable unpacking (PEP 3132) and additional unpacking
716 generalizations (PEP 448).
718 if leaf.type not in VARARGS_SPECIALS or not leaf.parent:
722 if p.type == syms.star_expr:
723 # Star expressions are also used as assignment targets in extended
724 # iterable unpacking (PEP 3132). See what its parent is instead.
730 return p.type in within
733 def is_multiline_string(leaf: Leaf) -> bool:
734 """Return True if `leaf` is a multiline string that actually spans many lines."""
735 return has_triple_quotes(leaf.value) and "\n" in leaf.value
738 def is_stub_suite(node: Node) -> bool:
739 """Return True if `node` is a suite with a stub body."""
741 len(node.children) != 4
742 or node.children[0].type != token.NEWLINE
743 or node.children[1].type != token.INDENT
744 or node.children[3].type != token.DEDENT
748 return is_stub_body(node.children[2])
751 def is_stub_body(node: LN) -> bool:
752 """Return True if `node` is a simple statement containing an ellipsis."""
753 if not isinstance(node, Node) or node.type != syms.simple_stmt:
756 if len(node.children) != 2:
759 child = node.children[0]
761 child.type == syms.atom
762 and len(child.children) == 3
763 and all(leaf == Leaf(token.DOT, ".") for leaf in child.children)
767 def is_atom_with_invisible_parens(node: LN) -> bool:
768 """Given a `LN`, determines whether it's an atom `node` with invisible
769 parens. Useful in dedupe-ing and normalizing parens.
771 if isinstance(node, Leaf) or node.type != syms.atom:
774 first, last = node.children[0], node.children[-1]
776 isinstance(first, Leaf)
777 and first.type == token.LPAR
778 and first.value == ""
779 and isinstance(last, Leaf)
780 and last.type == token.RPAR
785 def is_empty_par(leaf: Leaf) -> bool:
786 return is_empty_lpar(leaf) or is_empty_rpar(leaf)
789 def is_empty_lpar(leaf: Leaf) -> bool:
790 return leaf.type == token.LPAR and leaf.value == ""
793 def is_empty_rpar(leaf: Leaf) -> bool:
794 return leaf.type == token.RPAR and leaf.value == ""
797 def is_import(leaf: Leaf) -> bool:
798 """Return True if the given leaf starts an import statement."""
805 (v == "import" and p and p.type == syms.import_name)
806 or (v == "from" and p and p.type == syms.import_from)
811 def is_type_comment(leaf: Leaf, suffix: str = "") -> bool:
812 """Return True if the given leaf is a special comment.
813 Only returns true for type comments for now."""
816 return t in {token.COMMENT, STANDALONE_COMMENT} and v.startswith("# type:" + suffix)
819 def wrap_in_parentheses(parent: Node, child: LN, *, visible: bool = True) -> None:
820 """Wrap `child` in parentheses.
822 This replaces `child` with an atom holding the parentheses and the old
823 child. That requires moving the prefix.
825 If `visible` is False, the leaves will be valueless (and thus invisible).
827 lpar = Leaf(token.LPAR, "(" if visible else "")
828 rpar = Leaf(token.RPAR, ")" if visible else "")
829 prefix = child.prefix
831 index = child.remove() or 0
832 new_child = Node(syms.atom, [lpar, child, rpar])
833 new_child.prefix = prefix
834 parent.insert_child(index, new_child)
837 def unwrap_singleton_parenthesis(node: LN) -> Optional[LN]:
838 """Returns `wrapped` if `node` is of the shape ( wrapped ).
840 Parenthesis can be optional. Returns None otherwise"""
841 if len(node.children) != 3:
844 lpar, wrapped, rpar = node.children
845 if not (lpar.type == token.LPAR and rpar.type == token.RPAR):
851 def ensure_visible(leaf: Leaf) -> None:
852 """Make sure parentheses are visible.
854 They could be invisible as part of some statements (see
855 :func:`normalize_invisible_parens` and :func:`visit_import_from`).
857 if leaf.type == token.LPAR:
859 elif leaf.type == token.RPAR:
863 def is_name_token(nl: NL) -> TypeGuard[Leaf]:
864 return nl.type == token.NAME
867 def is_lpar_token(nl: NL) -> TypeGuard[Leaf]:
868 return nl.type == token.LPAR
871 def is_rpar_token(nl: NL) -> TypeGuard[Leaf]:
872 return nl.type == token.RPAR
875 def is_string_token(nl: NL) -> TypeGuard[Leaf]:
876 return nl.type == token.STRING