All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
1 # Copyright 2006 Google, Inc. All Rights Reserved.
2 # Licensed to PSF under a Contributor Agreement.
5 Python parse tree definitions.
7 This is a very concrete parse tree; we need to keep every token and
8 even the comments and whitespace between tokens.
10 There's also a pattern matching implementation here.
13 # mypy: allow-untyped-defs
29 from blib2to3.pgen2.grammar import Grammar
31 __author__ = "Guido van Rossum <guido@python.org>"
34 from io import StringIO
36 HUGE: int = 0x7FFFFFFF # maximum repeat count, default max
38 _type_reprs: Dict[int, Union[Text, int]] = {}
41 def type_repr(type_num: int) -> Union[Text, int]:
44 from .pygram import python_symbols
46 # printing tokens is possible but not as useful
47 # from .pgen2 import token // token.__dict__.items():
48 for name in dir(python_symbols):
49 val = getattr(python_symbols, name)
51 _type_reprs[val] = name
52 return _type_reprs.setdefault(type_num, type_num)
57 NL = Union["Node", "Leaf"]
58 Context = Tuple[Text, Tuple[int, int]]
59 RawNode = Tuple[int, Optional[Text], Optional[Context], Optional[List[NL]]]
65 Abstract base class for Node and Leaf.
67 This provides some default functionality and boilerplate using the
70 A node may be a subnode of at most one parent.
73 # Default values for instance variables
74 type: int # int: token number (< 256) or symbol number (>= 256)
75 parent: Optional["Node"] = None # Parent node pointer, or None
76 children: List[NL] # List of subnodes
77 was_changed: bool = False
78 was_checked: bool = False
80 def __new__(cls, *args, **kwds):
81 """Constructor that prevents Base from being instantiated."""
82 assert cls is not Base, "Cannot instantiate Base"
83 return object.__new__(cls)
85 def __eq__(self, other: Any) -> bool:
87 Compare two nodes for equality.
89 This calls the method _eq().
91 if self.__class__ is not other.__class__:
93 return self._eq(other)
95 __hash__ = None # type: Any # For Py3 compatibility.
98 def prefix(self) -> Text:
99 raise NotImplementedError
101 def _eq(self: _P, other: _P) -> bool:
103 Compare two nodes for equality.
105 This is called by __eq__ and __ne__. It is only called if the two nodes
106 have the same type. This must be implemented by the concrete subclass.
107 Nodes should be considered equal if they have the same structure,
108 ignoring the prefix string and other context information.
110 raise NotImplementedError
112 def clone(self: _P) -> _P:
114 Return a cloned (deep) copy of self.
116 This must be implemented by the concrete subclass.
118 raise NotImplementedError
120 def post_order(self) -> Iterator[NL]:
122 Return a post-order iterator for the tree.
124 This must be implemented by the concrete subclass.
126 raise NotImplementedError
128 def pre_order(self) -> Iterator[NL]:
130 Return a pre-order iterator for the tree.
132 This must be implemented by the concrete subclass.
134 raise NotImplementedError
136 def replace(self, new: Union[NL, List[NL]]) -> None:
137 """Replace this node with a new one in the parent."""
138 assert self.parent is not None, str(self)
139 assert new is not None
140 if not isinstance(new, list):
144 for ch in self.parent.children:
146 assert not found, (self.parent.children, self, new)
148 l_children.extend(new)
151 l_children.append(ch)
152 assert found, (self.children, self, new)
153 self.parent.children = l_children
154 self.parent.changed()
155 self.parent.invalidate_sibling_maps()
157 x.parent = self.parent
160 def get_lineno(self) -> Optional[int]:
161 """Return the line number which generated the invocant node."""
163 while not isinstance(node, Leaf):
164 if not node.children:
166 node = node.children[0]
169 def changed(self) -> None:
173 self.parent.changed()
174 self.was_changed = True
176 def remove(self) -> Optional[int]:
178 Remove the node from the tree. Returns the position of the node in its
179 parent's children before it was removed.
182 for i, node in enumerate(self.parent.children):
184 del self.parent.children[i]
185 self.parent.changed()
186 self.parent.invalidate_sibling_maps()
192 def next_sibling(self) -> Optional[NL]:
194 The node immediately following the invocant in their parent's children
195 list. If the invocant does not have a next sibling, it is None
197 if self.parent is None:
200 if self.parent.next_sibling_map is None:
201 self.parent.update_sibling_maps()
202 assert self.parent.next_sibling_map is not None
203 return self.parent.next_sibling_map[id(self)]
206 def prev_sibling(self) -> Optional[NL]:
208 The node immediately preceding the invocant in their parent's children
209 list. If the invocant does not have a previous sibling, it is None.
211 if self.parent is None:
214 if self.parent.prev_sibling_map is None:
215 self.parent.update_sibling_maps()
216 assert self.parent.prev_sibling_map is not None
217 return self.parent.prev_sibling_map[id(self)]
219 def leaves(self) -> Iterator["Leaf"]:
220 for child in self.children:
221 yield from child.leaves()
223 def depth(self) -> int:
224 if self.parent is None:
226 return 1 + self.parent.depth()
228 def get_suffix(self) -> Text:
230 Return the string immediately following the invocant node. This is
231 effectively equivalent to node.next_sibling.prefix
233 next_sib = self.next_sibling
236 prefix = next_sib.prefix
242 """Concrete implementation for interior nodes."""
244 fixers_applied: Optional[List[Any]]
245 used_names: Optional[Set[Text]]
251 context: Optional[Any] = None,
252 prefix: Optional[Text] = None,
253 fixers_applied: Optional[List[Any]] = None,
258 Takes a type constant (a symbol number >= 256), a sequence of
259 child nodes, and an optional context keyword argument.
261 As a side effect, the parent pointers of the children are updated.
263 assert type >= 256, type
265 self.children = list(children)
266 for ch in self.children:
267 assert ch.parent is None, repr(ch)
269 self.invalidate_sibling_maps()
270 if prefix is not None:
273 self.fixers_applied = fixers_applied[:]
275 self.fixers_applied = None
277 def __repr__(self) -> Text:
278 """Return a canonical string representation."""
279 assert self.type is not None
280 return "%s(%s, %r)" % (
281 self.__class__.__name__,
282 type_repr(self.type),
286 def __str__(self) -> Text:
288 Return a pretty string representation.
290 This reproduces the input source exactly.
292 return "".join(map(str, self.children))
294 def _eq(self, other) -> bool:
295 """Compare two nodes for equality."""
296 return (self.type, self.children) == (other.type, other.children)
298 def clone(self) -> "Node":
299 assert self.type is not None
300 """Return a cloned (deep) copy of self."""
303 [ch.clone() for ch in self.children],
304 fixers_applied=self.fixers_applied,
307 def post_order(self) -> Iterator[NL]:
308 """Return a post-order iterator for the tree."""
309 for child in self.children:
310 yield from child.post_order()
313 def pre_order(self) -> Iterator[NL]:
314 """Return a pre-order iterator for the tree."""
316 for child in self.children:
317 yield from child.pre_order()
320 def prefix(self) -> Text:
322 The whitespace and comments preceding this node in the input.
324 if not self.children:
326 return self.children[0].prefix
329 def prefix(self, prefix) -> None:
331 self.children[0].prefix = prefix
333 def set_child(self, i: int, child: NL) -> None:
335 Equivalent to 'node.children[i] = child'. This method also sets the
336 child's parent attribute appropriately.
339 self.children[i].parent = None
340 self.children[i] = child
342 self.invalidate_sibling_maps()
344 def insert_child(self, i: int, child: NL) -> None:
346 Equivalent to 'node.children.insert(i, child)'. This method also sets
347 the child's parent attribute appropriately.
350 self.children.insert(i, child)
352 self.invalidate_sibling_maps()
354 def append_child(self, child: NL) -> None:
356 Equivalent to 'node.children.append(child)'. This method also sets the
357 child's parent attribute appropriately.
360 self.children.append(child)
362 self.invalidate_sibling_maps()
364 def invalidate_sibling_maps(self) -> None:
365 self.prev_sibling_map: Optional[Dict[int, Optional[NL]]] = None
366 self.next_sibling_map: Optional[Dict[int, Optional[NL]]] = None
368 def update_sibling_maps(self) -> None:
369 _prev: Dict[int, Optional[NL]] = {}
370 _next: Dict[int, Optional[NL]] = {}
371 self.prev_sibling_map = _prev
372 self.next_sibling_map = _next
373 previous: Optional[NL] = None
374 for current in self.children:
375 _prev[id(current)] = previous
376 _next[id(previous)] = current
378 _next[id(current)] = None
383 """Concrete implementation for leaf nodes."""
385 # Default values for instance variables
387 fixers_applied: List[Any]
389 opening_bracket: "Leaf"
390 used_names: Optional[Set[Text]]
391 _prefix = "" # Whitespace and comments preceding this token in the input
392 lineno: int = 0 # Line where this token starts in the input
393 column: int = 0 # Column where this token starts in the input
399 context: Optional[Context] = None,
400 prefix: Optional[Text] = None,
401 fixers_applied: List[Any] = [],
406 Takes a type constant (a token number < 256), a string value, and an
407 optional context keyword argument.
410 assert 0 <= type < 256, type
411 if context is not None:
412 self._prefix, (self.lineno, self.column) = context
415 if prefix is not None:
416 self._prefix = prefix
417 self.fixers_applied: Optional[List[Any]] = fixers_applied[:]
420 def __repr__(self) -> str:
421 """Return a canonical string representation."""
422 from .pgen2.token import tok_name
424 assert self.type is not None
425 return "%s(%s, %r)" % (
426 self.__class__.__name__,
427 tok_name.get(self.type, self.type),
431 def __str__(self) -> Text:
433 Return a pretty string representation.
435 This reproduces the input source exactly.
437 return self.prefix + str(self.value)
439 def _eq(self, other) -> bool:
440 """Compare two nodes for equality."""
441 return (self.type, self.value) == (other.type, other.value)
443 def clone(self) -> "Leaf":
444 assert self.type is not None
445 """Return a cloned (deep) copy of self."""
449 (self.prefix, (self.lineno, self.column)),
450 fixers_applied=self.fixers_applied,
453 def leaves(self) -> Iterator["Leaf"]:
456 def post_order(self) -> Iterator["Leaf"]:
457 """Return a post-order iterator for the tree."""
460 def pre_order(self) -> Iterator["Leaf"]:
461 """Return a pre-order iterator for the tree."""
465 def prefix(self) -> Text:
467 The whitespace and comments preceding this token in the input.
472 def prefix(self, prefix) -> None:
474 self._prefix = prefix
477 def convert(gr: Grammar, raw_node: RawNode) -> NL:
479 Convert raw node information to a Node or Leaf instance.
481 This is passed to the parser driver which calls it whenever a reduction of a
482 grammar rule produces a new complete node, so that the tree is build
485 type, value, context, children = raw_node
486 if children or type in gr.number2symbol:
487 # If there's exactly one child, return that child instead of
488 # creating a new node.
489 assert children is not None
490 if len(children) == 1:
492 return Node(type, children, context=context)
494 return Leaf(type, value or "", context=context)
497 _Results = Dict[Text, NL]
500 class BasePattern(object):
503 A pattern is a tree matching pattern.
505 It looks for a specific node type (token or symbol), and
506 optionally for a specific content.
508 This is an abstract base class. There are three concrete
511 - LeafPattern matches a single leaf node;
512 - NodePattern matches a single node (usually non-leaf);
513 - WildcardPattern matches a sequence of nodes of variable length.
516 # Defaults for instance variables
518 type = None # Node type (token if < 256, symbol if >= 256)
519 content: Any = None # Optional content matching pattern
520 name: Optional[Text] = None # Optional name used to store match in results dict
522 def __new__(cls, *args, **kwds):
523 """Constructor that prevents BasePattern from being instantiated."""
524 assert cls is not BasePattern, "Cannot instantiate BasePattern"
525 return object.__new__(cls)
527 def __repr__(self) -> Text:
528 assert self.type is not None
529 args = [type_repr(self.type), self.content, self.name]
530 while args and args[-1] is None:
532 return "%s(%s)" % (self.__class__.__name__, ", ".join(map(repr, args)))
534 def _submatch(self, node, results=None) -> bool:
535 raise NotImplementedError
537 def optimize(self) -> "BasePattern":
539 A subclass can define this as a hook for optimizations.
541 Returns either self or another node with the same effect.
545 def match(self, node: NL, results: Optional[_Results] = None) -> bool:
547 Does this pattern exactly match a node?
549 Returns True if it matches, False if not.
551 If results is not None, it must be a dict which will be
552 updated with the nodes matching named subpatterns.
554 Default implementation for non-wildcard patterns.
556 if self.type is not None and node.type != self.type:
558 if self.content is not None:
559 r: Optional[_Results] = None
560 if results is not None:
562 if not self._submatch(node, r):
565 assert results is not None
567 if results is not None and self.name:
568 results[self.name] = node
571 def match_seq(self, nodes: List[NL], results: Optional[_Results] = None) -> bool:
573 Does this pattern exactly match a sequence of nodes?
575 Default implementation for non-wildcard patterns.
579 return self.match(nodes[0], results)
581 def generate_matches(self, nodes: List[NL]) -> Iterator[Tuple[int, _Results]]:
583 Generator yielding all matches for this pattern.
585 Default implementation for non-wildcard patterns.
588 if nodes and self.match(nodes[0], r):
592 class LeafPattern(BasePattern):
595 type: Optional[int] = None,
596 content: Optional[Text] = None,
597 name: Optional[Text] = None,
600 Initializer. Takes optional type, content, and name.
602 The type, if given must be a token type (< 256). If not given,
603 this matches any *leaf* node; the content may still be required.
605 The content, if given, must be a string.
607 If a name is given, the matching node is stored in the results
611 assert 0 <= type < 256, type
612 if content is not None:
613 assert isinstance(content, str), repr(content)
615 self.content = content
618 def match(self, node: NL, results=None):
619 """Override match() to insist on a leaf node."""
620 if not isinstance(node, Leaf):
622 return BasePattern.match(self, node, results)
624 def _submatch(self, node, results=None):
626 Match the pattern's content to the node's children.
628 This assumes the node type matches and self.content is not None.
630 Returns True if it matches, False if not.
632 If results is not None, it must be a dict which will be
633 updated with the nodes matching named subpatterns.
635 When returning False, the results dict may still be updated.
637 return self.content == node.value
640 class NodePattern(BasePattern):
642 wildcards: bool = False
646 type: Optional[int] = None,
647 content: Optional[Iterable[Text]] = None,
648 name: Optional[Text] = None,
651 Initializer. Takes optional type, content, and name.
653 The type, if given, must be a symbol type (>= 256). If the
654 type is None this matches *any* single node (leaf or not),
655 except if content is not None, in which it only matches
656 non-leaf nodes that also match the content pattern.
658 The content, if not None, must be a sequence of Patterns that
659 must match the node's children exactly. If the content is
660 given, the type must not be None.
662 If a name is given, the matching node is stored in the results
666 assert type >= 256, type
667 if content is not None:
668 assert not isinstance(content, str), repr(content)
669 newcontent = list(content)
670 for i, item in enumerate(newcontent):
671 assert isinstance(item, BasePattern), (i, item)
672 if isinstance(item, WildcardPattern):
673 self.wildcards = True
675 self.content = newcontent
678 def _submatch(self, node, results=None) -> bool:
680 Match the pattern's content to the node's children.
682 This assumes the node type matches and self.content is not None.
684 Returns True if it matches, False if not.
686 If results is not None, it must be a dict which will be
687 updated with the nodes matching named subpatterns.
689 When returning False, the results dict may still be updated.
692 for c, r in generate_matches(self.content, node.children):
693 if c == len(node.children):
694 if results is not None:
698 if len(self.content) != len(node.children):
700 for subpattern, child in zip(self.content, node.children):
701 if not subpattern.match(child, results):
706 class WildcardPattern(BasePattern):
709 A wildcard pattern can match zero or more nodes.
711 This has all the flexibility needed to implement patterns like:
715 (...)* (...)+ (...)? (...){m,n}
717 except it always uses non-greedy matching.
725 content: Optional[Text] = None,
728 name: Optional[Text] = None,
734 content: optional sequence of subsequences of patterns;
735 if absent, matches one node;
736 if present, each subsequence is an alternative [*]
737 min: optional minimum number of times to match, default 0
738 max: optional maximum number of times to match, default HUGE
739 name: optional name assigned to this match
741 [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is
742 equivalent to (a b c | d e | f g h); if content is None,
743 this is equivalent to '.' in regular expression terms.
744 The min and max parameters work as follows:
745 min=0, max=maxint: .*
746 min=1, max=maxint: .+
749 If content is not None, replace the dot with the parenthesized
750 list of alternatives, e.g. (a b c | d e | f g h)*
752 assert 0 <= min <= max <= HUGE, (min, max)
753 if content is not None:
754 f = lambda s: tuple(s)
755 wrapped_content = tuple(map(f, content)) # Protect against alterations
756 # Check sanity of alternatives
757 assert len(wrapped_content), repr(
759 ) # Can't have zero alternatives
760 for alt in wrapped_content:
761 assert len(alt), repr(alt) # Can have empty alternatives
762 self.content = wrapped_content
767 def optimize(self) -> Any:
768 """Optimize certain stacked wildcard patterns."""
771 self.content is not None
772 and len(self.content) == 1
773 and len(self.content[0]) == 1
775 subpattern = self.content[0][0]
776 if self.min == 1 and self.max == 1:
777 if self.content is None:
778 return NodePattern(name=self.name)
779 if subpattern is not None and self.name == subpattern.name:
780 return subpattern.optimize()
783 and isinstance(subpattern, WildcardPattern)
784 and subpattern.min <= 1
785 and self.name == subpattern.name
787 return WildcardPattern(
789 self.min * subpattern.min,
790 self.max * subpattern.max,
795 def match(self, node, results=None) -> bool:
796 """Does this pattern exactly match a node?"""
797 return self.match_seq([node], results)
799 def match_seq(self, nodes, results=None) -> bool:
800 """Does this pattern exactly match a sequence of nodes?"""
801 for c, r in self.generate_matches(nodes):
803 if results is not None:
806 results[self.name] = list(nodes)
810 def generate_matches(self, nodes) -> Iterator[Tuple[int, _Results]]:
812 Generator yielding matches for a sequence of nodes.
815 nodes: sequence of nodes
818 (count, results) tuples where:
819 count: the match comprises nodes[:count];
820 results: dict containing named submatches.
822 if self.content is None:
823 # Shortcut for special case (see __init__.__doc__)
824 for count in range(self.min, 1 + min(len(nodes), self.max)):
827 r[self.name] = nodes[:count]
829 elif self.name == "bare_name":
830 yield self._bare_name_matches(nodes)
832 # The reason for this is that hitting the recursion limit usually
833 # results in some ugly messages about how RuntimeErrors are being
834 # ignored. We only have to do this on CPython, though, because other
835 # implementations don't have this nasty bug in the first place.
836 if hasattr(sys, "getrefcount"):
837 save_stderr = sys.stderr
838 sys.stderr = StringIO()
840 for count, r in self._recursive_matches(nodes, 0):
842 r[self.name] = nodes[:count]
845 # We fall back to the iterative pattern matching scheme if the recursive
846 # scheme hits the recursion limit.
847 for count, r in self._iterative_matches(nodes):
849 r[self.name] = nodes[:count]
852 if hasattr(sys, "getrefcount"):
853 sys.stderr = save_stderr
855 def _iterative_matches(self, nodes) -> Iterator[Tuple[int, _Results]]:
856 """Helper to iteratively yield the matches."""
862 # generate matches that use just one alt from self.content
863 for alt in self.content:
864 for c, r in generate_matches(alt, nodes):
866 results.append((c, r))
868 # for each match, iterate down the nodes
871 for c0, r0 in results:
872 # stop if the entire set of nodes has been matched
873 if c0 < nodelen and c0 <= self.max:
874 for alt in self.content:
875 for c1, r1 in generate_matches(alt, nodes[c0:]):
881 new_results.append((c0 + c1, r))
882 results = new_results
884 def _bare_name_matches(self, nodes) -> Tuple[int, _Results]:
885 """Special optimized matcher for bare_name."""
887 r = {} # type: _Results
890 while not done and count < max:
892 for leaf in self.content:
893 if leaf[0].match(nodes[count], r):
897 assert self.name is not None
898 r[self.name] = nodes[:count]
901 def _recursive_matches(self, nodes, count) -> Iterator[Tuple[int, _Results]]:
902 """Helper to recursively yield the matches."""
903 assert self.content is not None
904 if count >= self.min:
907 for alt in self.content:
908 for c0, r0 in generate_matches(alt, nodes):
909 for c1, r1 in self._recursive_matches(nodes[c0:], count + 1):
916 class NegatedPattern(BasePattern):
917 def __init__(self, content: Optional[Any] = None) -> None:
921 The argument is either a pattern or None. If it is None, this
922 only matches an empty sequence (effectively '$' in regex
923 lingo). If it is not None, this matches whenever the argument
924 pattern doesn't have any matches.
926 if content is not None:
927 assert isinstance(content, BasePattern), repr(content)
928 self.content = content
930 def match(self, node, results=None) -> bool:
931 # We never match a node in its entirety
934 def match_seq(self, nodes, results=None) -> bool:
935 # We only match an empty sequence of nodes in its entirety
936 return len(nodes) == 0
938 def generate_matches(self, nodes) -> Iterator[Tuple[int, _Results]]:
939 if self.content is None:
940 # Return a match if there is an empty sequence
944 # Return a match if the argument pattern has no matches
945 for c, r in self.content.generate_matches(nodes):
950 def generate_matches(
951 patterns: List[BasePattern], nodes: List[NL]
952 ) -> Iterator[Tuple[int, _Results]]:
954 Generator yielding matches for a sequence of patterns and nodes.
957 patterns: a sequence of patterns
958 nodes: a sequence of nodes
961 (count, results) tuples where:
962 count: the entire sequence of patterns matches nodes[:count];
963 results: dict containing named submatches.
968 p, rest = patterns[0], patterns[1:]
969 for c0, r0 in p.generate_matches(nodes):
973 for c1, r1 in generate_matches(rest, nodes[c0:]):
980 _Convert = Callable[[Grammar, RawNode], Any]