All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
1 # Copyright 2006 Google, Inc. All Rights Reserved.
2 # Licensed to PSF under a Contributor Agreement.
5 Python parse tree definitions.
7 This is a very concrete parse tree; we need to keep every token and
8 even the comments and whitespace between tokens.
10 There's also a pattern matching implementation here.
13 # mypy: allow-untyped-defs, allow-incomplete-defs
28 from blib2to3.pgen2.grammar import Grammar
30 __author__ = "Guido van Rossum <guido@python.org>"
33 from io import StringIO
35 HUGE: int = 0x7FFFFFFF # maximum repeat count, default max
37 _type_reprs: Dict[int, Union[str, int]] = {}
40 def type_repr(type_num: int) -> Union[str, int]:
43 from .pygram import python_symbols
45 # printing tokens is possible but not as useful
46 # from .pgen2 import token // token.__dict__.items():
47 for name in dir(python_symbols):
48 val = getattr(python_symbols, name)
50 _type_reprs[val] = name
51 return _type_reprs.setdefault(type_num, type_num)
54 _P = TypeVar("_P", bound="Base")
56 NL = Union["Node", "Leaf"]
57 Context = Tuple[str, Tuple[int, int]]
58 RawNode = Tuple[int, Optional[str], Optional[Context], Optional[List[NL]]]
63 Abstract base class for Node and Leaf.
65 This provides some default functionality and boilerplate using the
68 A node may be a subnode of at most one parent.
71 # Default values for instance variables
72 type: int # int: token number (< 256) or symbol number (>= 256)
73 parent: Optional["Node"] = None # Parent node pointer, or None
74 children: List[NL] # List of subnodes
75 was_changed: bool = False
76 was_checked: bool = False
78 def __new__(cls, *args, **kwds):
79 """Constructor that prevents Base from being instantiated."""
80 assert cls is not Base, "Cannot instantiate Base"
81 return object.__new__(cls)
83 def __eq__(self, other: Any) -> bool:
85 Compare two nodes for equality.
87 This calls the method _eq().
89 if self.__class__ is not other.__class__:
91 return self._eq(other)
94 def prefix(self) -> str:
95 raise NotImplementedError
97 def _eq(self: _P, other: _P) -> bool:
99 Compare two nodes for equality.
101 This is called by __eq__ and __ne__. It is only called if the two nodes
102 have the same type. This must be implemented by the concrete subclass.
103 Nodes should be considered equal if they have the same structure,
104 ignoring the prefix string and other context information.
106 raise NotImplementedError
108 def __deepcopy__(self: _P, memo: Any) -> _P:
111 def clone(self: _P) -> _P:
113 Return a cloned (deep) copy of self.
115 This must be implemented by the concrete subclass.
117 raise NotImplementedError
119 def post_order(self) -> Iterator[NL]:
121 Return a post-order iterator for the tree.
123 This must be implemented by the concrete subclass.
125 raise NotImplementedError
127 def pre_order(self) -> Iterator[NL]:
129 Return a pre-order iterator for the tree.
131 This must be implemented by the concrete subclass.
133 raise NotImplementedError
135 def replace(self, new: Union[NL, List[NL]]) -> None:
136 """Replace this node with a new one in the parent."""
137 assert self.parent is not None, str(self)
138 assert new is not None
139 if not isinstance(new, list):
143 for ch in self.parent.children:
145 assert not found, (self.parent.children, self, new)
147 l_children.extend(new)
150 l_children.append(ch)
151 assert found, (self.children, self, new)
152 self.parent.children = l_children
153 self.parent.changed()
154 self.parent.invalidate_sibling_maps()
156 x.parent = self.parent
159 def get_lineno(self) -> Optional[int]:
160 """Return the line number which generated the invocant node."""
162 while not isinstance(node, Leaf):
163 if not node.children:
165 node = node.children[0]
168 def changed(self) -> None:
172 self.parent.changed()
173 self.was_changed = True
175 def remove(self) -> Optional[int]:
177 Remove the node from the tree. Returns the position of the node in its
178 parent's children before it was removed.
181 for i, node in enumerate(self.parent.children):
183 del self.parent.children[i]
184 self.parent.changed()
185 self.parent.invalidate_sibling_maps()
191 def next_sibling(self) -> Optional[NL]:
193 The node immediately following the invocant in their parent's children
194 list. If the invocant does not have a next sibling, it is None
196 if self.parent is None:
199 if self.parent.next_sibling_map is None:
200 self.parent.update_sibling_maps()
201 assert self.parent.next_sibling_map is not None
202 return self.parent.next_sibling_map[id(self)]
205 def prev_sibling(self) -> Optional[NL]:
207 The node immediately preceding the invocant in their parent's children
208 list. If the invocant does not have a previous sibling, it is None.
210 if self.parent is None:
213 if self.parent.prev_sibling_map is None:
214 self.parent.update_sibling_maps()
215 assert self.parent.prev_sibling_map is not None
216 return self.parent.prev_sibling_map[id(self)]
218 def leaves(self) -> Iterator["Leaf"]:
219 for child in self.children:
220 yield from child.leaves()
222 def depth(self) -> int:
223 if self.parent is None:
225 return 1 + self.parent.depth()
227 def get_suffix(self) -> str:
229 Return the string immediately following the invocant node. This is
230 effectively equivalent to node.next_sibling.prefix
232 next_sib = self.next_sibling
235 prefix = next_sib.prefix
240 """Concrete implementation for interior nodes."""
242 fixers_applied: Optional[List[Any]]
243 used_names: Optional[Set[str]]
249 context: Optional[Any] = None,
250 prefix: Optional[str] = None,
251 fixers_applied: Optional[List[Any]] = None,
256 Takes a type constant (a symbol number >= 256), a sequence of
257 child nodes, and an optional context keyword argument.
259 As a side effect, the parent pointers of the children are updated.
261 assert type >= 256, type
263 self.children = list(children)
264 for ch in self.children:
265 assert ch.parent is None, repr(ch)
267 self.invalidate_sibling_maps()
268 if prefix is not None:
271 self.fixers_applied = fixers_applied[:]
273 self.fixers_applied = None
275 def __repr__(self) -> str:
276 """Return a canonical string representation."""
277 assert self.type is not None
278 return "{}({}, {!r})".format(
279 self.__class__.__name__,
280 type_repr(self.type),
284 def __str__(self) -> str:
286 Return a pretty string representation.
288 This reproduces the input source exactly.
290 return "".join(map(str, self.children))
292 def _eq(self, other: Base) -> bool:
293 """Compare two nodes for equality."""
294 return (self.type, self.children) == (other.type, other.children)
296 def clone(self) -> "Node":
297 assert self.type is not None
298 """Return a cloned (deep) copy of self."""
301 [ch.clone() for ch in self.children],
302 fixers_applied=self.fixers_applied,
305 def post_order(self) -> Iterator[NL]:
306 """Return a post-order iterator for the tree."""
307 for child in self.children:
308 yield from child.post_order()
311 def pre_order(self) -> Iterator[NL]:
312 """Return a pre-order iterator for the tree."""
314 for child in self.children:
315 yield from child.pre_order()
318 def prefix(self) -> str:
320 The whitespace and comments preceding this node in the input.
322 if not self.children:
324 return self.children[0].prefix
327 def prefix(self, prefix: str) -> None:
329 self.children[0].prefix = prefix
331 def set_child(self, i: int, child: NL) -> None:
333 Equivalent to 'node.children[i] = child'. This method also sets the
334 child's parent attribute appropriately.
337 self.children[i].parent = None
338 self.children[i] = child
340 self.invalidate_sibling_maps()
342 def insert_child(self, i: int, child: NL) -> None:
344 Equivalent to 'node.children.insert(i, child)'. This method also sets
345 the child's parent attribute appropriately.
348 self.children.insert(i, child)
350 self.invalidate_sibling_maps()
352 def append_child(self, child: NL) -> None:
354 Equivalent to 'node.children.append(child)'. This method also sets the
355 child's parent attribute appropriately.
358 self.children.append(child)
360 self.invalidate_sibling_maps()
362 def invalidate_sibling_maps(self) -> None:
363 self.prev_sibling_map: Optional[Dict[int, Optional[NL]]] = None
364 self.next_sibling_map: Optional[Dict[int, Optional[NL]]] = None
366 def update_sibling_maps(self) -> None:
367 _prev: Dict[int, Optional[NL]] = {}
368 _next: Dict[int, Optional[NL]] = {}
369 self.prev_sibling_map = _prev
370 self.next_sibling_map = _next
371 previous: Optional[NL] = None
372 for current in self.children:
373 _prev[id(current)] = previous
374 _next[id(previous)] = current
376 _next[id(current)] = None
380 """Concrete implementation for leaf nodes."""
382 # Default values for instance variables
384 fixers_applied: List[Any]
386 # Changed later in brackets.py
387 opening_bracket: Optional["Leaf"] = None
388 used_names: Optional[Set[str]]
389 _prefix = "" # Whitespace and comments preceding this token in the input
390 lineno: int = 0 # Line where this token starts in the input
391 column: int = 0 # Column where this token starts in the input
392 # If not None, this Leaf is created by converting a block of fmt off/skip
393 # code, and `fmt_pass_converted_first_leaf` points to the first Leaf in the
395 fmt_pass_converted_first_leaf: Optional["Leaf"] = None
401 context: Optional[Context] = None,
402 prefix: Optional[str] = None,
403 fixers_applied: List[Any] = [],
404 opening_bracket: Optional["Leaf"] = None,
405 fmt_pass_converted_first_leaf: Optional["Leaf"] = None,
410 Takes a type constant (a token number < 256), a string value, and an
411 optional context keyword argument.
414 assert 0 <= type < 256, type
415 if context is not None:
416 self._prefix, (self.lineno, self.column) = context
419 if prefix is not None:
420 self._prefix = prefix
421 self.fixers_applied: Optional[List[Any]] = fixers_applied[:]
423 self.opening_bracket = opening_bracket
424 self.fmt_pass_converted_first_leaf = fmt_pass_converted_first_leaf
426 def __repr__(self) -> str:
427 """Return a canonical string representation."""
428 from .pgen2.token import tok_name
430 assert self.type is not None
431 return "{}({}, {!r})".format(
432 self.__class__.__name__,
433 tok_name.get(self.type, self.type),
437 def __str__(self) -> str:
439 Return a pretty string representation.
441 This reproduces the input source exactly.
443 return self._prefix + str(self.value)
445 def _eq(self, other: "Leaf") -> bool:
446 """Compare two nodes for equality."""
447 return (self.type, self.value) == (other.type, other.value)
449 def clone(self) -> "Leaf":
450 assert self.type is not None
451 """Return a cloned (deep) copy of self."""
455 (self.prefix, (self.lineno, self.column)),
456 fixers_applied=self.fixers_applied,
459 def leaves(self) -> Iterator["Leaf"]:
462 def post_order(self) -> Iterator["Leaf"]:
463 """Return a post-order iterator for the tree."""
466 def pre_order(self) -> Iterator["Leaf"]:
467 """Return a pre-order iterator for the tree."""
471 def prefix(self) -> str:
473 The whitespace and comments preceding this token in the input.
478 def prefix(self, prefix: str) -> None:
480 self._prefix = prefix
483 def convert(gr: Grammar, raw_node: RawNode) -> NL:
485 Convert raw node information to a Node or Leaf instance.
487 This is passed to the parser driver which calls it whenever a reduction of a
488 grammar rule produces a new complete node, so that the tree is build
491 type, value, context, children = raw_node
492 if children or type in gr.number2symbol:
493 # If there's exactly one child, return that child instead of
494 # creating a new node.
495 assert children is not None
496 if len(children) == 1:
498 return Node(type, children, context=context)
500 return Leaf(type, value or "", context=context)
503 _Results = Dict[str, NL]
508 A pattern is a tree matching pattern.
510 It looks for a specific node type (token or symbol), and
511 optionally for a specific content.
513 This is an abstract base class. There are three concrete
516 - LeafPattern matches a single leaf node;
517 - NodePattern matches a single node (usually non-leaf);
518 - WildcardPattern matches a sequence of nodes of variable length.
521 # Defaults for instance variables
523 type = None # Node type (token if < 256, symbol if >= 256)
524 content: Any = None # Optional content matching pattern
525 name: Optional[str] = None # Optional name used to store match in results dict
527 def __new__(cls, *args, **kwds):
528 """Constructor that prevents BasePattern from being instantiated."""
529 assert cls is not BasePattern, "Cannot instantiate BasePattern"
530 return object.__new__(cls)
532 def __repr__(self) -> str:
533 assert self.type is not None
534 args = [type_repr(self.type), self.content, self.name]
535 while args and args[-1] is None:
537 return "{}({})".format(self.__class__.__name__, ", ".join(map(repr, args)))
539 def _submatch(self, node, results=None) -> bool:
540 raise NotImplementedError
542 def optimize(self) -> "BasePattern":
544 A subclass can define this as a hook for optimizations.
546 Returns either self or another node with the same effect.
550 def match(self, node: NL, results: Optional[_Results] = None) -> bool:
552 Does this pattern exactly match a node?
554 Returns True if it matches, False if not.
556 If results is not None, it must be a dict which will be
557 updated with the nodes matching named subpatterns.
559 Default implementation for non-wildcard patterns.
561 if self.type is not None and node.type != self.type:
563 if self.content is not None:
564 r: Optional[_Results] = None
565 if results is not None:
567 if not self._submatch(node, r):
570 assert results is not None
572 if results is not None and self.name:
573 results[self.name] = node
576 def match_seq(self, nodes: List[NL], results: Optional[_Results] = None) -> bool:
578 Does this pattern exactly match a sequence of nodes?
580 Default implementation for non-wildcard patterns.
584 return self.match(nodes[0], results)
586 def generate_matches(self, nodes: List[NL]) -> Iterator[Tuple[int, _Results]]:
588 Generator yielding all matches for this pattern.
590 Default implementation for non-wildcard patterns.
593 if nodes and self.match(nodes[0], r):
597 class LeafPattern(BasePattern):
600 type: Optional[int] = None,
601 content: Optional[str] = None,
602 name: Optional[str] = None,
605 Initializer. Takes optional type, content, and name.
607 The type, if given must be a token type (< 256). If not given,
608 this matches any *leaf* node; the content may still be required.
610 The content, if given, must be a string.
612 If a name is given, the matching node is stored in the results
616 assert 0 <= type < 256, type
617 if content is not None:
618 assert isinstance(content, str), repr(content)
620 self.content = content
623 def match(self, node: NL, results=None) -> bool:
624 """Override match() to insist on a leaf node."""
625 if not isinstance(node, Leaf):
627 return BasePattern.match(self, node, results)
629 def _submatch(self, node, results=None):
631 Match the pattern's content to the node's children.
633 This assumes the node type matches and self.content is not None.
635 Returns True if it matches, False if not.
637 If results is not None, it must be a dict which will be
638 updated with the nodes matching named subpatterns.
640 When returning False, the results dict may still be updated.
642 return self.content == node.value
645 class NodePattern(BasePattern):
646 wildcards: bool = False
650 type: Optional[int] = None,
651 content: Optional[Iterable[str]] = None,
652 name: Optional[str] = None,
655 Initializer. Takes optional type, content, and name.
657 The type, if given, must be a symbol type (>= 256). If the
658 type is None this matches *any* single node (leaf or not),
659 except if content is not None, in which it only matches
660 non-leaf nodes that also match the content pattern.
662 The content, if not None, must be a sequence of Patterns that
663 must match the node's children exactly. If the content is
664 given, the type must not be None.
666 If a name is given, the matching node is stored in the results
670 assert type >= 256, type
671 if content is not None:
672 assert not isinstance(content, str), repr(content)
673 newcontent = list(content)
674 for i, item in enumerate(newcontent):
675 assert isinstance(item, BasePattern), (i, item)
676 # I don't even think this code is used anywhere, but it does cause
677 # unreachable errors from mypy. This function's signature does look
678 # odd though *shrug*.
679 if isinstance(item, WildcardPattern): # type: ignore[unreachable]
680 self.wildcards = True # type: ignore[unreachable]
682 self.content = newcontent # TODO: this is unbound when content is None
685 def _submatch(self, node, results=None) -> bool:
687 Match the pattern's content to the node's children.
689 This assumes the node type matches and self.content is not None.
691 Returns True if it matches, False if not.
693 If results is not None, it must be a dict which will be
694 updated with the nodes matching named subpatterns.
696 When returning False, the results dict may still be updated.
699 for c, r in generate_matches(self.content, node.children):
700 if c == len(node.children):
701 if results is not None:
705 if len(self.content) != len(node.children):
707 for subpattern, child in zip(self.content, node.children):
708 if not subpattern.match(child, results):
713 class WildcardPattern(BasePattern):
715 A wildcard pattern can match zero or more nodes.
717 This has all the flexibility needed to implement patterns like:
721 (...)* (...)+ (...)? (...){m,n}
723 except it always uses non-greedy matching.
731 content: Optional[str] = None,
734 name: Optional[str] = None,
740 content: optional sequence of subsequences of patterns;
741 if absent, matches one node;
742 if present, each subsequence is an alternative [*]
743 min: optional minimum number of times to match, default 0
744 max: optional maximum number of times to match, default HUGE
745 name: optional name assigned to this match
747 [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is
748 equivalent to (a b c | d e | f g h); if content is None,
749 this is equivalent to '.' in regular expression terms.
750 The min and max parameters work as follows:
751 min=0, max=maxint: .*
752 min=1, max=maxint: .+
755 If content is not None, replace the dot with the parenthesized
756 list of alternatives, e.g. (a b c | d e | f g h)*
758 assert 0 <= min <= max <= HUGE, (min, max)
759 if content is not None:
760 f = lambda s: tuple(s)
761 wrapped_content = tuple(map(f, content)) # Protect against alterations
762 # Check sanity of alternatives
763 assert len(wrapped_content), repr(
765 ) # Can't have zero alternatives
766 for alt in wrapped_content:
767 assert len(alt), repr(alt) # Can have empty alternatives
768 self.content = wrapped_content
773 def optimize(self) -> Any:
774 """Optimize certain stacked wildcard patterns."""
777 self.content is not None
778 and len(self.content) == 1
779 and len(self.content[0]) == 1
781 subpattern = self.content[0][0]
782 if self.min == 1 and self.max == 1:
783 if self.content is None:
784 return NodePattern(name=self.name)
785 if subpattern is not None and self.name == subpattern.name:
786 return subpattern.optimize()
789 and isinstance(subpattern, WildcardPattern)
790 and subpattern.min <= 1
791 and self.name == subpattern.name
793 return WildcardPattern(
795 self.min * subpattern.min,
796 self.max * subpattern.max,
801 def match(self, node, results=None) -> bool:
802 """Does this pattern exactly match a node?"""
803 return self.match_seq([node], results)
805 def match_seq(self, nodes, results=None) -> bool:
806 """Does this pattern exactly match a sequence of nodes?"""
807 for c, r in self.generate_matches(nodes):
809 if results is not None:
812 results[self.name] = list(nodes)
816 def generate_matches(self, nodes) -> Iterator[Tuple[int, _Results]]:
818 Generator yielding matches for a sequence of nodes.
821 nodes: sequence of nodes
824 (count, results) tuples where:
825 count: the match comprises nodes[:count];
826 results: dict containing named submatches.
828 if self.content is None:
829 # Shortcut for special case (see __init__.__doc__)
830 for count in range(self.min, 1 + min(len(nodes), self.max)):
833 r[self.name] = nodes[:count]
835 elif self.name == "bare_name":
836 yield self._bare_name_matches(nodes)
838 # The reason for this is that hitting the recursion limit usually
839 # results in some ugly messages about how RuntimeErrors are being
840 # ignored. We only have to do this on CPython, though, because other
841 # implementations don't have this nasty bug in the first place.
842 if hasattr(sys, "getrefcount"):
843 save_stderr = sys.stderr
844 sys.stderr = StringIO()
846 for count, r in self._recursive_matches(nodes, 0):
848 r[self.name] = nodes[:count]
851 # We fall back to the iterative pattern matching scheme if the recursive
852 # scheme hits the recursion limit.
853 for count, r in self._iterative_matches(nodes):
855 r[self.name] = nodes[:count]
858 if hasattr(sys, "getrefcount"):
859 sys.stderr = save_stderr
861 def _iterative_matches(self, nodes) -> Iterator[Tuple[int, _Results]]:
862 """Helper to iteratively yield the matches."""
868 # generate matches that use just one alt from self.content
869 for alt in self.content:
870 for c, r in generate_matches(alt, nodes):
872 results.append((c, r))
874 # for each match, iterate down the nodes
877 for c0, r0 in results:
878 # stop if the entire set of nodes has been matched
879 if c0 < nodelen and c0 <= self.max:
880 for alt in self.content:
881 for c1, r1 in generate_matches(alt, nodes[c0:]):
887 new_results.append((c0 + c1, r))
888 results = new_results
890 def _bare_name_matches(self, nodes) -> Tuple[int, _Results]:
891 """Special optimized matcher for bare_name."""
893 r = {} # type: _Results
896 while not done and count < max:
898 for leaf in self.content:
899 if leaf[0].match(nodes[count], r):
903 assert self.name is not None
904 r[self.name] = nodes[:count]
907 def _recursive_matches(self, nodes, count) -> Iterator[Tuple[int, _Results]]:
908 """Helper to recursively yield the matches."""
909 assert self.content is not None
910 if count >= self.min:
913 for alt in self.content:
914 for c0, r0 in generate_matches(alt, nodes):
915 for c1, r1 in self._recursive_matches(nodes[c0:], count + 1):
922 class NegatedPattern(BasePattern):
923 def __init__(self, content: Optional[BasePattern] = None) -> None:
927 The argument is either a pattern or None. If it is None, this
928 only matches an empty sequence (effectively '$' in regex
929 lingo). If it is not None, this matches whenever the argument
930 pattern doesn't have any matches.
932 if content is not None:
933 assert isinstance(content, BasePattern), repr(content)
934 self.content = content
936 def match(self, node, results=None) -> bool:
937 # We never match a node in its entirety
940 def match_seq(self, nodes, results=None) -> bool:
941 # We only match an empty sequence of nodes in its entirety
942 return len(nodes) == 0
944 def generate_matches(self, nodes: List[NL]) -> Iterator[Tuple[int, _Results]]:
945 if self.content is None:
946 # Return a match if there is an empty sequence
950 # Return a match if the argument pattern has no matches
951 for c, r in self.content.generate_matches(nodes):
956 def generate_matches(
957 patterns: List[BasePattern], nodes: List[NL]
958 ) -> Iterator[Tuple[int, _Results]]:
960 Generator yielding matches for a sequence of patterns and nodes.
963 patterns: a sequence of patterns
964 nodes: a sequence of nodes
967 (count, results) tuples where:
968 count: the entire sequence of patterns matches nodes[:count];
969 results: dict containing named submatches.
974 p, rest = patterns[0], patterns[1:]
975 for c0, r0 in p.generate_matches(nodes):
979 for c1, r1 in generate_matches(rest, nodes[c0:]):