All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
1 # Copyright 2006 Google, Inc. All Rights Reserved.
2 # Licensed to PSF under a Contributor Agreement.
5 Python parse tree definitions.
7 This is a very concrete parse tree; we need to keep every token and
8 even the comments and whitespace between tokens.
10 There's also a pattern matching implementation here.
13 # mypy: allow-untyped-defs
28 from blib2to3.pgen2.grammar import Grammar
30 __author__ = "Guido van Rossum <guido@python.org>"
33 from io import StringIO
35 HUGE: int = 0x7FFFFFFF # maximum repeat count, default max
37 _type_reprs: Dict[int, Union[Text, int]] = {}
40 def type_repr(type_num: int) -> Union[Text, int]:
43 from .pygram import python_symbols
45 # printing tokens is possible but not as useful
46 # from .pgen2 import token // token.__dict__.items():
47 for name in dir(python_symbols):
48 val = getattr(python_symbols, name)
50 _type_reprs[val] = name
51 return _type_reprs.setdefault(type_num, type_num)
54 _P = TypeVar("_P", bound="Base")
56 NL = Union["Node", "Leaf"]
57 Context = Tuple[Text, Tuple[int, int]]
58 RawNode = Tuple[int, Optional[Text], Optional[Context], Optional[List[NL]]]
64 Abstract base class for Node and Leaf.
66 This provides some default functionality and boilerplate using the
69 A node may be a subnode of at most one parent.
72 # Default values for instance variables
73 type: int # int: token number (< 256) or symbol number (>= 256)
74 parent: Optional["Node"] = None # Parent node pointer, or None
75 children: List[NL] # List of subnodes
76 was_changed: bool = False
77 was_checked: bool = False
79 def __new__(cls, *args, **kwds):
80 """Constructor that prevents Base from being instantiated."""
81 assert cls is not Base, "Cannot instantiate Base"
82 return object.__new__(cls)
84 def __eq__(self, other: Any) -> bool:
86 Compare two nodes for equality.
88 This calls the method _eq().
90 if self.__class__ is not other.__class__:
92 return self._eq(other)
95 def prefix(self) -> Text:
96 raise NotImplementedError
98 def _eq(self: _P, other: _P) -> bool:
100 Compare two nodes for equality.
102 This is called by __eq__ and __ne__. It is only called if the two nodes
103 have the same type. This must be implemented by the concrete subclass.
104 Nodes should be considered equal if they have the same structure,
105 ignoring the prefix string and other context information.
107 raise NotImplementedError
109 def __deepcopy__(self: _P, memo: Any) -> _P:
112 def clone(self: _P) -> _P:
114 Return a cloned (deep) copy of self.
116 This must be implemented by the concrete subclass.
118 raise NotImplementedError
120 def post_order(self) -> Iterator[NL]:
122 Return a post-order iterator for the tree.
124 This must be implemented by the concrete subclass.
126 raise NotImplementedError
128 def pre_order(self) -> Iterator[NL]:
130 Return a pre-order iterator for the tree.
132 This must be implemented by the concrete subclass.
134 raise NotImplementedError
136 def replace(self, new: Union[NL, List[NL]]) -> None:
137 """Replace this node with a new one in the parent."""
138 assert self.parent is not None, str(self)
139 assert new is not None
140 if not isinstance(new, list):
144 for ch in self.parent.children:
146 assert not found, (self.parent.children, self, new)
148 l_children.extend(new)
151 l_children.append(ch)
152 assert found, (self.children, self, new)
153 self.parent.children = l_children
154 self.parent.changed()
155 self.parent.invalidate_sibling_maps()
157 x.parent = self.parent
160 def get_lineno(self) -> Optional[int]:
161 """Return the line number which generated the invocant node."""
163 while not isinstance(node, Leaf):
164 if not node.children:
166 node = node.children[0]
169 def changed(self) -> None:
173 self.parent.changed()
174 self.was_changed = True
176 def remove(self) -> Optional[int]:
178 Remove the node from the tree. Returns the position of the node in its
179 parent's children before it was removed.
182 for i, node in enumerate(self.parent.children):
184 del self.parent.children[i]
185 self.parent.changed()
186 self.parent.invalidate_sibling_maps()
192 def next_sibling(self) -> Optional[NL]:
194 The node immediately following the invocant in their parent's children
195 list. If the invocant does not have a next sibling, it is None
197 if self.parent is None:
200 if self.parent.next_sibling_map is None:
201 self.parent.update_sibling_maps()
202 assert self.parent.next_sibling_map is not None
203 return self.parent.next_sibling_map[id(self)]
206 def prev_sibling(self) -> Optional[NL]:
208 The node immediately preceding the invocant in their parent's children
209 list. If the invocant does not have a previous sibling, it is None.
211 if self.parent is None:
214 if self.parent.prev_sibling_map is None:
215 self.parent.update_sibling_maps()
216 assert self.parent.prev_sibling_map is not None
217 return self.parent.prev_sibling_map[id(self)]
219 def leaves(self) -> Iterator["Leaf"]:
220 for child in self.children:
221 yield from child.leaves()
223 def depth(self) -> int:
224 if self.parent is None:
226 return 1 + self.parent.depth()
228 def get_suffix(self) -> Text:
230 Return the string immediately following the invocant node. This is
231 effectively equivalent to node.next_sibling.prefix
233 next_sib = self.next_sibling
236 prefix = next_sib.prefix
242 """Concrete implementation for interior nodes."""
244 fixers_applied: Optional[List[Any]]
245 used_names: Optional[Set[Text]]
251 context: Optional[Any] = None,
252 prefix: Optional[Text] = None,
253 fixers_applied: Optional[List[Any]] = None,
258 Takes a type constant (a symbol number >= 256), a sequence of
259 child nodes, and an optional context keyword argument.
261 As a side effect, the parent pointers of the children are updated.
263 assert type >= 256, type
265 self.children = list(children)
266 for ch in self.children:
267 assert ch.parent is None, repr(ch)
269 self.invalidate_sibling_maps()
270 if prefix is not None:
273 self.fixers_applied = fixers_applied[:]
275 self.fixers_applied = None
277 def __repr__(self) -> Text:
278 """Return a canonical string representation."""
279 assert self.type is not None
280 return "%s(%s, %r)" % (
281 self.__class__.__name__,
282 type_repr(self.type),
286 def __str__(self) -> Text:
288 Return a pretty string representation.
290 This reproduces the input source exactly.
292 return "".join(map(str, self.children))
294 def _eq(self, other) -> bool:
295 """Compare two nodes for equality."""
296 return (self.type, self.children) == (other.type, other.children)
298 def clone(self) -> "Node":
299 assert self.type is not None
300 """Return a cloned (deep) copy of self."""
303 [ch.clone() for ch in self.children],
304 fixers_applied=self.fixers_applied,
307 def post_order(self) -> Iterator[NL]:
308 """Return a post-order iterator for the tree."""
309 for child in self.children:
310 yield from child.post_order()
313 def pre_order(self) -> Iterator[NL]:
314 """Return a pre-order iterator for the tree."""
316 for child in self.children:
317 yield from child.pre_order()
320 def prefix(self) -> Text:
322 The whitespace and comments preceding this node in the input.
324 if not self.children:
326 return self.children[0].prefix
329 def prefix(self, prefix) -> None:
331 self.children[0].prefix = prefix
333 def set_child(self, i: int, child: NL) -> None:
335 Equivalent to 'node.children[i] = child'. This method also sets the
336 child's parent attribute appropriately.
339 self.children[i].parent = None
340 self.children[i] = child
342 self.invalidate_sibling_maps()
344 def insert_child(self, i: int, child: NL) -> None:
346 Equivalent to 'node.children.insert(i, child)'. This method also sets
347 the child's parent attribute appropriately.
350 self.children.insert(i, child)
352 self.invalidate_sibling_maps()
354 def append_child(self, child: NL) -> None:
356 Equivalent to 'node.children.append(child)'. This method also sets the
357 child's parent attribute appropriately.
360 self.children.append(child)
362 self.invalidate_sibling_maps()
364 def invalidate_sibling_maps(self) -> None:
365 self.prev_sibling_map: Optional[Dict[int, Optional[NL]]] = None
366 self.next_sibling_map: Optional[Dict[int, Optional[NL]]] = None
368 def update_sibling_maps(self) -> None:
369 _prev: Dict[int, Optional[NL]] = {}
370 _next: Dict[int, Optional[NL]] = {}
371 self.prev_sibling_map = _prev
372 self.next_sibling_map = _next
373 previous: Optional[NL] = None
374 for current in self.children:
375 _prev[id(current)] = previous
376 _next[id(previous)] = current
378 _next[id(current)] = None
383 """Concrete implementation for leaf nodes."""
385 # Default values for instance variables
387 fixers_applied: List[Any]
389 opening_bracket: "Leaf"
390 used_names: Optional[Set[Text]]
391 _prefix = "" # Whitespace and comments preceding this token in the input
392 lineno: int = 0 # Line where this token starts in the input
393 column: int = 0 # Column where this token starts in the input
399 context: Optional[Context] = None,
400 prefix: Optional[Text] = None,
401 fixers_applied: List[Any] = [],
406 Takes a type constant (a token number < 256), a string value, and an
407 optional context keyword argument.
410 assert 0 <= type < 256, type
411 if context is not None:
412 self._prefix, (self.lineno, self.column) = context
415 if prefix is not None:
416 self._prefix = prefix
417 self.fixers_applied: Optional[List[Any]] = fixers_applied[:]
420 def __repr__(self) -> str:
421 """Return a canonical string representation."""
422 from .pgen2.token import tok_name
424 assert self.type is not None
425 return "%s(%s, %r)" % (
426 self.__class__.__name__,
427 tok_name.get(self.type, self.type),
431 def __str__(self) -> Text:
433 Return a pretty string representation.
435 This reproduces the input source exactly.
437 return self._prefix + str(self.value)
439 def _eq(self, other) -> bool:
440 """Compare two nodes for equality."""
441 return (self.type, self.value) == (other.type, other.value)
443 def clone(self) -> "Leaf":
444 assert self.type is not None
445 """Return a cloned (deep) copy of self."""
449 (self.prefix, (self.lineno, self.column)),
450 fixers_applied=self.fixers_applied,
453 def leaves(self) -> Iterator["Leaf"]:
456 def post_order(self) -> Iterator["Leaf"]:
457 """Return a post-order iterator for the tree."""
460 def pre_order(self) -> Iterator["Leaf"]:
461 """Return a pre-order iterator for the tree."""
465 def prefix(self) -> Text:
467 The whitespace and comments preceding this token in the input.
472 def prefix(self, prefix) -> None:
474 self._prefix = prefix
477 def convert(gr: Grammar, raw_node: RawNode) -> NL:
479 Convert raw node information to a Node or Leaf instance.
481 This is passed to the parser driver which calls it whenever a reduction of a
482 grammar rule produces a new complete node, so that the tree is build
485 type, value, context, children = raw_node
486 if children or type in gr.number2symbol:
487 # If there's exactly one child, return that child instead of
488 # creating a new node.
489 assert children is not None
490 if len(children) == 1:
492 return Node(type, children, context=context)
494 return Leaf(type, value or "", context=context)
497 _Results = Dict[Text, NL]
500 class BasePattern(object):
503 A pattern is a tree matching pattern.
505 It looks for a specific node type (token or symbol), and
506 optionally for a specific content.
508 This is an abstract base class. There are three concrete
511 - LeafPattern matches a single leaf node;
512 - NodePattern matches a single node (usually non-leaf);
513 - WildcardPattern matches a sequence of nodes of variable length.
516 # Defaults for instance variables
518 type = None # Node type (token if < 256, symbol if >= 256)
519 content: Any = None # Optional content matching pattern
520 name: Optional[Text] = None # Optional name used to store match in results dict
522 def __new__(cls, *args, **kwds):
523 """Constructor that prevents BasePattern from being instantiated."""
524 assert cls is not BasePattern, "Cannot instantiate BasePattern"
525 return object.__new__(cls)
527 def __repr__(self) -> Text:
528 assert self.type is not None
529 args = [type_repr(self.type), self.content, self.name]
530 while args and args[-1] is None:
532 return "%s(%s)" % (self.__class__.__name__, ", ".join(map(repr, args)))
534 def _submatch(self, node, results=None) -> bool:
535 raise NotImplementedError
537 def optimize(self) -> "BasePattern":
539 A subclass can define this as a hook for optimizations.
541 Returns either self or another node with the same effect.
545 def match(self, node: NL, results: Optional[_Results] = None) -> bool:
547 Does this pattern exactly match a node?
549 Returns True if it matches, False if not.
551 If results is not None, it must be a dict which will be
552 updated with the nodes matching named subpatterns.
554 Default implementation for non-wildcard patterns.
556 if self.type is not None and node.type != self.type:
558 if self.content is not None:
559 r: Optional[_Results] = None
560 if results is not None:
562 if not self._submatch(node, r):
565 assert results is not None
567 if results is not None and self.name:
568 results[self.name] = node
571 def match_seq(self, nodes: List[NL], results: Optional[_Results] = None) -> bool:
573 Does this pattern exactly match a sequence of nodes?
575 Default implementation for non-wildcard patterns.
579 return self.match(nodes[0], results)
581 def generate_matches(self, nodes: List[NL]) -> Iterator[Tuple[int, _Results]]:
583 Generator yielding all matches for this pattern.
585 Default implementation for non-wildcard patterns.
588 if nodes and self.match(nodes[0], r):
592 class LeafPattern(BasePattern):
595 type: Optional[int] = None,
596 content: Optional[Text] = None,
597 name: Optional[Text] = None,
600 Initializer. Takes optional type, content, and name.
602 The type, if given must be a token type (< 256). If not given,
603 this matches any *leaf* node; the content may still be required.
605 The content, if given, must be a string.
607 If a name is given, the matching node is stored in the results
611 assert 0 <= type < 256, type
612 if content is not None:
613 assert isinstance(content, str), repr(content)
615 self.content = content
618 def match(self, node: NL, results=None):
619 """Override match() to insist on a leaf node."""
620 if not isinstance(node, Leaf):
622 return BasePattern.match(self, node, results)
624 def _submatch(self, node, results=None):
626 Match the pattern's content to the node's children.
628 This assumes the node type matches and self.content is not None.
630 Returns True if it matches, False if not.
632 If results is not None, it must be a dict which will be
633 updated with the nodes matching named subpatterns.
635 When returning False, the results dict may still be updated.
637 return self.content == node.value
640 class NodePattern(BasePattern):
642 wildcards: bool = False
646 type: Optional[int] = None,
647 content: Optional[Iterable[Text]] = None,
648 name: Optional[Text] = None,
651 Initializer. Takes optional type, content, and name.
653 The type, if given, must be a symbol type (>= 256). If the
654 type is None this matches *any* single node (leaf or not),
655 except if content is not None, in which it only matches
656 non-leaf nodes that also match the content pattern.
658 The content, if not None, must be a sequence of Patterns that
659 must match the node's children exactly. If the content is
660 given, the type must not be None.
662 If a name is given, the matching node is stored in the results
666 assert type >= 256, type
667 if content is not None:
668 assert not isinstance(content, str), repr(content)
669 newcontent = list(content)
670 for i, item in enumerate(newcontent):
671 assert isinstance(item, BasePattern), (i, item)
672 # I don't even think this code is used anywhere, but it does cause
673 # unreachable errors from mypy. This function's signature does look
674 # odd though *shrug*.
675 if isinstance(item, WildcardPattern): # type: ignore[unreachable]
676 self.wildcards = True # type: ignore[unreachable]
678 self.content = newcontent
681 def _submatch(self, node, results=None) -> bool:
683 Match the pattern's content to the node's children.
685 This assumes the node type matches and self.content is not None.
687 Returns True if it matches, False if not.
689 If results is not None, it must be a dict which will be
690 updated with the nodes matching named subpatterns.
692 When returning False, the results dict may still be updated.
695 for c, r in generate_matches(self.content, node.children):
696 if c == len(node.children):
697 if results is not None:
701 if len(self.content) != len(node.children):
703 for subpattern, child in zip(self.content, node.children):
704 if not subpattern.match(child, results):
709 class WildcardPattern(BasePattern):
712 A wildcard pattern can match zero or more nodes.
714 This has all the flexibility needed to implement patterns like:
718 (...)* (...)+ (...)? (...){m,n}
720 except it always uses non-greedy matching.
728 content: Optional[Text] = None,
731 name: Optional[Text] = None,
737 content: optional sequence of subsequences of patterns;
738 if absent, matches one node;
739 if present, each subsequence is an alternative [*]
740 min: optional minimum number of times to match, default 0
741 max: optional maximum number of times to match, default HUGE
742 name: optional name assigned to this match
744 [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is
745 equivalent to (a b c | d e | f g h); if content is None,
746 this is equivalent to '.' in regular expression terms.
747 The min and max parameters work as follows:
748 min=0, max=maxint: .*
749 min=1, max=maxint: .+
752 If content is not None, replace the dot with the parenthesized
753 list of alternatives, e.g. (a b c | d e | f g h)*
755 assert 0 <= min <= max <= HUGE, (min, max)
756 if content is not None:
757 f = lambda s: tuple(s)
758 wrapped_content = tuple(map(f, content)) # Protect against alterations
759 # Check sanity of alternatives
760 assert len(wrapped_content), repr(
762 ) # Can't have zero alternatives
763 for alt in wrapped_content:
764 assert len(alt), repr(alt) # Can have empty alternatives
765 self.content = wrapped_content
770 def optimize(self) -> Any:
771 """Optimize certain stacked wildcard patterns."""
774 self.content is not None
775 and len(self.content) == 1
776 and len(self.content[0]) == 1
778 subpattern = self.content[0][0]
779 if self.min == 1 and self.max == 1:
780 if self.content is None:
781 return NodePattern(name=self.name)
782 if subpattern is not None and self.name == subpattern.name:
783 return subpattern.optimize()
786 and isinstance(subpattern, WildcardPattern)
787 and subpattern.min <= 1
788 and self.name == subpattern.name
790 return WildcardPattern(
792 self.min * subpattern.min,
793 self.max * subpattern.max,
798 def match(self, node, results=None) -> bool:
799 """Does this pattern exactly match a node?"""
800 return self.match_seq([node], results)
802 def match_seq(self, nodes, results=None) -> bool:
803 """Does this pattern exactly match a sequence of nodes?"""
804 for c, r in self.generate_matches(nodes):
806 if results is not None:
809 results[self.name] = list(nodes)
813 def generate_matches(self, nodes) -> Iterator[Tuple[int, _Results]]:
815 Generator yielding matches for a sequence of nodes.
818 nodes: sequence of nodes
821 (count, results) tuples where:
822 count: the match comprises nodes[:count];
823 results: dict containing named submatches.
825 if self.content is None:
826 # Shortcut for special case (see __init__.__doc__)
827 for count in range(self.min, 1 + min(len(nodes), self.max)):
830 r[self.name] = nodes[:count]
832 elif self.name == "bare_name":
833 yield self._bare_name_matches(nodes)
835 # The reason for this is that hitting the recursion limit usually
836 # results in some ugly messages about how RuntimeErrors are being
837 # ignored. We only have to do this on CPython, though, because other
838 # implementations don't have this nasty bug in the first place.
839 if hasattr(sys, "getrefcount"):
840 save_stderr = sys.stderr
841 sys.stderr = StringIO()
843 for count, r in self._recursive_matches(nodes, 0):
845 r[self.name] = nodes[:count]
848 # We fall back to the iterative pattern matching scheme if the recursive
849 # scheme hits the recursion limit.
850 for count, r in self._iterative_matches(nodes):
852 r[self.name] = nodes[:count]
855 if hasattr(sys, "getrefcount"):
856 sys.stderr = save_stderr
858 def _iterative_matches(self, nodes) -> Iterator[Tuple[int, _Results]]:
859 """Helper to iteratively yield the matches."""
865 # generate matches that use just one alt from self.content
866 for alt in self.content:
867 for c, r in generate_matches(alt, nodes):
869 results.append((c, r))
871 # for each match, iterate down the nodes
874 for c0, r0 in results:
875 # stop if the entire set of nodes has been matched
876 if c0 < nodelen and c0 <= self.max:
877 for alt in self.content:
878 for c1, r1 in generate_matches(alt, nodes[c0:]):
884 new_results.append((c0 + c1, r))
885 results = new_results
887 def _bare_name_matches(self, nodes) -> Tuple[int, _Results]:
888 """Special optimized matcher for bare_name."""
890 r = {} # type: _Results
893 while not done and count < max:
895 for leaf in self.content:
896 if leaf[0].match(nodes[count], r):
900 assert self.name is not None
901 r[self.name] = nodes[:count]
904 def _recursive_matches(self, nodes, count) -> Iterator[Tuple[int, _Results]]:
905 """Helper to recursively yield the matches."""
906 assert self.content is not None
907 if count >= self.min:
910 for alt in self.content:
911 for c0, r0 in generate_matches(alt, nodes):
912 for c1, r1 in self._recursive_matches(nodes[c0:], count + 1):
919 class NegatedPattern(BasePattern):
920 def __init__(self, content: Optional[Any] = None) -> None:
924 The argument is either a pattern or None. If it is None, this
925 only matches an empty sequence (effectively '$' in regex
926 lingo). If it is not None, this matches whenever the argument
927 pattern doesn't have any matches.
929 if content is not None:
930 assert isinstance(content, BasePattern), repr(content)
931 self.content = content
933 def match(self, node, results=None) -> bool:
934 # We never match a node in its entirety
937 def match_seq(self, nodes, results=None) -> bool:
938 # We only match an empty sequence of nodes in its entirety
939 return len(nodes) == 0
941 def generate_matches(self, nodes) -> Iterator[Tuple[int, _Results]]:
942 if self.content is None:
943 # Return a match if there is an empty sequence
947 # Return a match if the argument pattern has no matches
948 for c, r in self.content.generate_matches(nodes):
953 def generate_matches(
954 patterns: List[BasePattern], nodes: List[NL]
955 ) -> Iterator[Tuple[int, _Results]]:
957 Generator yielding matches for a sequence of patterns and nodes.
960 patterns: a sequence of patterns
961 nodes: a sequence of nodes
964 (count, results) tuples where:
965 count: the entire sequence of patterns matches nodes[:count];
966 results: dict containing named submatches.
971 p, rest = patterns[0], patterns[1:]
972 for c0, r0 in p.generate_matches(nodes):
976 for c1, r1 in generate_matches(rest, nodes[c0:]):