All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
1 # Copyright 2006 Google, Inc. All Rights Reserved.
2 # Licensed to PSF under a Contributor Agreement.
5 Python parse tree definitions.
7 This is a very concrete parse tree; we need to keep every token and
8 even the comments and whitespace between tokens.
10 There's also a pattern matching implementation here.
13 # mypy: allow-untyped-defs
30 from blib2to3.pgen2.grammar import Grammar
32 __author__ = "Guido van Rossum <guido@python.org>"
35 from io import StringIO
37 HUGE: int = 0x7fffffff # maximum repeat count, default max
39 _type_reprs: Dict[int, Union[Text, int]] = {}
42 def type_repr(type_num: int) -> Union[Text, int]:
45 from .pygram import python_symbols
47 # printing tokens is possible but not as useful
48 # from .pgen2 import token // token.__dict__.items():
49 for name in dir(python_symbols):
50 val = getattr(python_symbols, name)
52 _type_reprs[val] = name
53 return _type_reprs.setdefault(type_num, type_num)
58 NL = Union["Node", "Leaf"]
59 Context = Tuple[Text, Tuple[int, int]]
60 RawNode = Tuple[int, Optional[Text], Optional[Context], Optional[List[NL]]]
66 Abstract base class for Node and Leaf.
68 This provides some default functionality and boilerplate using the
71 A node may be a subnode of at most one parent.
74 # Default values for instance variables
75 type: int # int: token number (< 256) or symbol number (>= 256)
76 parent: Optional["Node"] = None # Parent node pointer, or None
77 children: List[NL] # List of subnodes
78 was_changed: bool = False
79 was_checked: bool = False
81 def __new__(cls, *args, **kwds):
82 """Constructor that prevents Base from being instantiated."""
83 assert cls is not Base, "Cannot instantiate Base"
84 return object.__new__(cls)
86 def __eq__(self, other: Any) -> bool:
88 Compare two nodes for equality.
90 This calls the method _eq().
92 if self.__class__ is not other.__class__:
94 return self._eq(other)
96 __hash__ = None # type: Any # For Py3 compatibility.
99 def prefix(self) -> Text:
100 raise NotImplementedError
102 def _eq(self: _P, other: _P) -> bool:
104 Compare two nodes for equality.
106 This is called by __eq__ and __ne__. It is only called if the two nodes
107 have the same type. This must be implemented by the concrete subclass.
108 Nodes should be considered equal if they have the same structure,
109 ignoring the prefix string and other context information.
111 raise NotImplementedError
113 def clone(self: _P) -> _P:
115 Return a cloned (deep) copy of self.
117 This must be implemented by the concrete subclass.
119 raise NotImplementedError
121 def post_order(self) -> Iterator[NL]:
123 Return a post-order iterator for the tree.
125 This must be implemented by the concrete subclass.
127 raise NotImplementedError
129 def pre_order(self) -> Iterator[NL]:
131 Return a pre-order iterator for the tree.
133 This must be implemented by the concrete subclass.
135 raise NotImplementedError
137 def replace(self, new: Union[NL, List[NL]]) -> None:
138 """Replace this node with a new one in the parent."""
139 assert self.parent is not None, str(self)
140 assert new is not None
141 if not isinstance(new, list):
145 for ch in self.parent.children:
147 assert not found, (self.parent.children, self, new)
149 l_children.extend(new)
152 l_children.append(ch)
153 assert found, (self.children, self, new)
154 self.parent.children = l_children
155 self.parent.changed()
156 self.parent.invalidate_sibling_maps()
158 x.parent = self.parent
161 def get_lineno(self) -> Optional[int]:
162 """Return the line number which generated the invocant node."""
164 while not isinstance(node, Leaf):
165 if not node.children:
167 node = node.children[0]
170 def changed(self) -> None:
174 self.parent.changed()
175 self.was_changed = True
177 def remove(self) -> Optional[int]:
179 Remove the node from the tree. Returns the position of the node in its
180 parent's children before it was removed.
183 for i, node in enumerate(self.parent.children):
185 del self.parent.children[i]
186 self.parent.changed()
187 self.parent.invalidate_sibling_maps()
193 def next_sibling(self) -> Optional[NL]:
195 The node immediately following the invocant in their parent's children
196 list. If the invocant does not have a next sibling, it is None
198 if self.parent is None:
201 if self.parent.next_sibling_map is None:
202 self.parent.update_sibling_maps()
203 assert self.parent.next_sibling_map is not None
204 return self.parent.next_sibling_map[id(self)]
207 def prev_sibling(self) -> Optional[NL]:
209 The node immediately preceding the invocant in their parent's children
210 list. If the invocant does not have a previous sibling, it is None.
212 if self.parent is None:
215 if self.parent.prev_sibling_map is None:
216 self.parent.update_sibling_maps()
217 assert self.parent.prev_sibling_map is not None
218 return self.parent.prev_sibling_map[id(self)]
220 def leaves(self) -> Iterator["Leaf"]:
221 for child in self.children:
222 yield from child.leaves()
224 def depth(self) -> int:
225 if self.parent is None:
227 return 1 + self.parent.depth()
229 def get_suffix(self) -> Text:
231 Return the string immediately following the invocant node. This is
232 effectively equivalent to node.next_sibling.prefix
234 next_sib = self.next_sibling
237 prefix = next_sib.prefix
243 """Concrete implementation for interior nodes."""
245 fixers_applied: Optional[List[Any]]
246 used_names: Optional[Set[Text]]
252 context: Optional[Any] = None,
253 prefix: Optional[Text] = None,
254 fixers_applied: Optional[List[Any]] = None,
259 Takes a type constant (a symbol number >= 256), a sequence of
260 child nodes, and an optional context keyword argument.
262 As a side effect, the parent pointers of the children are updated.
264 assert type >= 256, type
266 self.children = list(children)
267 for ch in self.children:
268 assert ch.parent is None, repr(ch)
270 self.invalidate_sibling_maps()
271 if prefix is not None:
274 self.fixers_applied = fixers_applied[:]
276 self.fixers_applied = None
278 def __repr__(self) -> Text:
279 """Return a canonical string representation."""
280 assert self.type is not None
281 return "%s(%s, %r)" % (
282 self.__class__.__name__,
283 type_repr(self.type),
287 def __str__(self) -> Text:
289 Return a pretty string representation.
291 This reproduces the input source exactly.
293 return "".join(map(str, self.children))
295 def _eq(self, other) -> bool:
296 """Compare two nodes for equality."""
297 return (self.type, self.children) == (other.type, other.children)
299 def clone(self) -> "Node":
300 assert self.type is not None
301 """Return a cloned (deep) copy of self."""
304 [ch.clone() for ch in self.children],
305 fixers_applied=self.fixers_applied,
308 def post_order(self) -> Iterator[NL]:
309 """Return a post-order iterator for the tree."""
310 for child in self.children:
311 yield from child.post_order()
314 def pre_order(self) -> Iterator[NL]:
315 """Return a pre-order iterator for the tree."""
317 for child in self.children:
318 yield from child.pre_order()
321 def prefix(self) -> Text:
323 The whitespace and comments preceding this node in the input.
325 if not self.children:
327 return self.children[0].prefix
330 def prefix(self, prefix) -> None:
332 self.children[0].prefix = prefix
334 def set_child(self, i: int, child: NL) -> None:
336 Equivalent to 'node.children[i] = child'. This method also sets the
337 child's parent attribute appropriately.
340 self.children[i].parent = None
341 self.children[i] = child
343 self.invalidate_sibling_maps()
345 def insert_child(self, i: int, child: NL) -> None:
347 Equivalent to 'node.children.insert(i, child)'. This method also sets
348 the child's parent attribute appropriately.
351 self.children.insert(i, child)
353 self.invalidate_sibling_maps()
355 def append_child(self, child: NL) -> None:
357 Equivalent to 'node.children.append(child)'. This method also sets the
358 child's parent attribute appropriately.
361 self.children.append(child)
363 self.invalidate_sibling_maps()
365 def invalidate_sibling_maps(self) -> None:
366 self.prev_sibling_map: Optional[Dict[int, Optional[NL]]] = None
367 self.next_sibling_map: Optional[Dict[int, Optional[NL]]] = None
369 def update_sibling_maps(self) -> None:
370 _prev: Dict[int, Optional[NL]] = {}
371 _next: Dict[int, Optional[NL]] = {}
372 self.prev_sibling_map = _prev
373 self.next_sibling_map = _next
374 previous: Optional[NL] = None
375 for current in self.children:
376 _prev[id(current)] = previous
377 _next[id(previous)] = current
379 _next[id(current)] = None
384 """Concrete implementation for leaf nodes."""
386 # Default values for instance variables
388 fixers_applied: List[Any]
390 opening_bracket: "Leaf"
391 used_names: Optional[Set[Text]]
392 _prefix = "" # Whitespace and comments preceding this token in the input
393 lineno: int = 0 # Line where this token starts in the input
394 column: int = 0 # Column where this token starts in the input
400 context: Optional[Context] = None,
401 prefix: Optional[Text] = None,
402 fixers_applied: List[Any] = [],
407 Takes a type constant (a token number < 256), a string value, and an
408 optional context keyword argument.
411 assert 0 <= type < 256, type
412 if context is not None:
413 self._prefix, (self.lineno, self.column) = context
416 if prefix is not None:
417 self._prefix = prefix
418 self.fixers_applied: Optional[List[Any]] = fixers_applied[:]
421 def __repr__(self) -> str:
422 """Return a canonical string representation."""
423 from .pgen2.token import tok_name
425 assert self.type is not None
426 return "%s(%s, %r)" % (
427 self.__class__.__name__,
428 tok_name.get(self.type, self.type),
432 def __str__(self) -> Text:
434 Return a pretty string representation.
436 This reproduces the input source exactly.
438 return self.prefix + str(self.value)
440 def _eq(self, other) -> bool:
441 """Compare two nodes for equality."""
442 return (self.type, self.value) == (other.type, other.value)
444 def clone(self) -> "Leaf":
445 assert self.type is not None
446 """Return a cloned (deep) copy of self."""
450 (self.prefix, (self.lineno, self.column)),
451 fixers_applied=self.fixers_applied,
454 def leaves(self) -> Iterator["Leaf"]:
457 def post_order(self) -> Iterator["Leaf"]:
458 """Return a post-order iterator for the tree."""
461 def pre_order(self) -> Iterator["Leaf"]:
462 """Return a pre-order iterator for the tree."""
466 def prefix(self) -> Text:
468 The whitespace and comments preceding this token in the input.
473 def prefix(self, prefix) -> None:
475 self._prefix = prefix
478 def convert(gr: Grammar, raw_node: RawNode) -> NL:
480 Convert raw node information to a Node or Leaf instance.
482 This is passed to the parser driver which calls it whenever a reduction of a
483 grammar rule produces a new complete node, so that the tree is build
486 type, value, context, children = raw_node
487 if children or type in gr.number2symbol:
488 # If there's exactly one child, return that child instead of
489 # creating a new node.
490 assert children is not None
491 if len(children) == 1:
493 return Node(type, children, context=context)
495 return Leaf(type, value or "", context=context)
498 _Results = Dict[Text, NL]
501 class BasePattern(object):
504 A pattern is a tree matching pattern.
506 It looks for a specific node type (token or symbol), and
507 optionally for a specific content.
509 This is an abstract base class. There are three concrete
512 - LeafPattern matches a single leaf node;
513 - NodePattern matches a single node (usually non-leaf);
514 - WildcardPattern matches a sequence of nodes of variable length.
517 # Defaults for instance variables
519 type = None # Node type (token if < 256, symbol if >= 256)
520 content: Any = None # Optional content matching pattern
521 name: Optional[Text] = None # Optional name used to store match in results dict
523 def __new__(cls, *args, **kwds):
524 """Constructor that prevents BasePattern from being instantiated."""
525 assert cls is not BasePattern, "Cannot instantiate BasePattern"
526 return object.__new__(cls)
528 def __repr__(self) -> Text:
529 assert self.type is not None
530 args = [type_repr(self.type), self.content, self.name]
531 while args and args[-1] is None:
533 return "%s(%s)" % (self.__class__.__name__, ", ".join(map(repr, args)))
535 def _submatch(self, node, results=None) -> bool:
536 raise NotImplementedError
538 def optimize(self) -> "BasePattern":
540 A subclass can define this as a hook for optimizations.
542 Returns either self or another node with the same effect.
546 def match(self, node: NL, results: Optional[_Results] = None) -> bool:
548 Does this pattern exactly match a node?
550 Returns True if it matches, False if not.
552 If results is not None, it must be a dict which will be
553 updated with the nodes matching named subpatterns.
555 Default implementation for non-wildcard patterns.
557 if self.type is not None and node.type != self.type:
559 if self.content is not None:
560 r: Optional[_Results] = None
561 if results is not None:
563 if not self._submatch(node, r):
566 assert results is not None
568 if results is not None and self.name:
569 results[self.name] = node
572 def match_seq(self, nodes: List[NL], results: Optional[_Results] = None) -> bool:
574 Does this pattern exactly match a sequence of nodes?
576 Default implementation for non-wildcard patterns.
580 return self.match(nodes[0], results)
582 def generate_matches(self, nodes: List[NL]) -> Iterator[Tuple[int, _Results]]:
584 Generator yielding all matches for this pattern.
586 Default implementation for non-wildcard patterns.
589 if nodes and self.match(nodes[0], r):
593 class LeafPattern(BasePattern):
596 type: Optional[int] = None,
597 content: Optional[Text] = None,
598 name: Optional[Text] = None,
601 Initializer. Takes optional type, content, and name.
603 The type, if given must be a token type (< 256). If not given,
604 this matches any *leaf* node; the content may still be required.
606 The content, if given, must be a string.
608 If a name is given, the matching node is stored in the results
612 assert 0 <= type < 256, type
613 if content is not None:
614 assert isinstance(content, str), repr(content)
616 self.content = content
619 def match(self, node: NL, results=None):
620 """Override match() to insist on a leaf node."""
621 if not isinstance(node, Leaf):
623 return BasePattern.match(self, node, results)
625 def _submatch(self, node, results=None):
627 Match the pattern's content to the node's children.
629 This assumes the node type matches and self.content is not None.
631 Returns True if it matches, False if not.
633 If results is not None, it must be a dict which will be
634 updated with the nodes matching named subpatterns.
636 When returning False, the results dict may still be updated.
638 return self.content == node.value
641 class NodePattern(BasePattern):
643 wildcards: bool = False
647 type: Optional[int] = None,
648 content: Optional[Iterable[Text]] = None,
649 name: Optional[Text] = None,
652 Initializer. Takes optional type, content, and name.
654 The type, if given, must be a symbol type (>= 256). If the
655 type is None this matches *any* single node (leaf or not),
656 except if content is not None, in which it only matches
657 non-leaf nodes that also match the content pattern.
659 The content, if not None, must be a sequence of Patterns that
660 must match the node's children exactly. If the content is
661 given, the type must not be None.
663 If a name is given, the matching node is stored in the results
667 assert type >= 256, type
668 if content is not None:
669 assert not isinstance(content, str), repr(content)
670 newcontent = list(content)
671 for i, item in enumerate(newcontent):
672 assert isinstance(item, BasePattern), (i, item)
673 if isinstance(item, WildcardPattern):
674 self.wildcards = True
676 self.content = newcontent
679 def _submatch(self, node, results=None) -> bool:
681 Match the pattern's content to the node's children.
683 This assumes the node type matches and self.content is not None.
685 Returns True if it matches, False if not.
687 If results is not None, it must be a dict which will be
688 updated with the nodes matching named subpatterns.
690 When returning False, the results dict may still be updated.
693 for c, r in generate_matches(self.content, node.children):
694 if c == len(node.children):
695 if results is not None:
699 if len(self.content) != len(node.children):
701 for subpattern, child in zip(self.content, node.children):
702 if not subpattern.match(child, results):
707 class WildcardPattern(BasePattern):
710 A wildcard pattern can match zero or more nodes.
712 This has all the flexibility needed to implement patterns like:
716 (...)* (...)+ (...)? (...){m,n}
718 except it always uses non-greedy matching.
726 content: Optional[Text] = None,
729 name: Optional[Text] = None,
735 content: optional sequence of subsequences of patterns;
736 if absent, matches one node;
737 if present, each subsequence is an alternative [*]
738 min: optional minimum number of times to match, default 0
739 max: optional maximum number of times to match, default HUGE
740 name: optional name assigned to this match
742 [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is
743 equivalent to (a b c | d e | f g h); if content is None,
744 this is equivalent to '.' in regular expression terms.
745 The min and max parameters work as follows:
746 min=0, max=maxint: .*
747 min=1, max=maxint: .+
750 If content is not None, replace the dot with the parenthesized
751 list of alternatives, e.g. (a b c | d e | f g h)*
753 assert 0 <= min <= max <= HUGE, (min, max)
754 if content is not None:
755 f = lambda s: tuple(s)
756 wrapped_content = tuple(map(f, content)) # Protect against alterations
757 # Check sanity of alternatives
758 assert len(wrapped_content), repr(
760 ) # Can't have zero alternatives
761 for alt in wrapped_content:
762 assert len(alt), repr(alt) # Can have empty alternatives
763 self.content = wrapped_content
768 def optimize(self) -> Any:
769 """Optimize certain stacked wildcard patterns."""
772 self.content is not None
773 and len(self.content) == 1
774 and len(self.content[0]) == 1
776 subpattern = self.content[0][0]
777 if self.min == 1 and self.max == 1:
778 if self.content is None:
779 return NodePattern(name=self.name)
780 if subpattern is not None and self.name == subpattern.name:
781 return subpattern.optimize()
784 and isinstance(subpattern, WildcardPattern)
785 and subpattern.min <= 1
786 and self.name == subpattern.name
788 return WildcardPattern(
790 self.min * subpattern.min,
791 self.max * subpattern.max,
796 def match(self, node, results=None) -> bool:
797 """Does this pattern exactly match a node?"""
798 return self.match_seq([node], results)
800 def match_seq(self, nodes, results=None) -> bool:
801 """Does this pattern exactly match a sequence of nodes?"""
802 for c, r in self.generate_matches(nodes):
804 if results is not None:
807 results[self.name] = list(nodes)
811 def generate_matches(self, nodes) -> Iterator[Tuple[int, _Results]]:
813 Generator yielding matches for a sequence of nodes.
816 nodes: sequence of nodes
819 (count, results) tuples where:
820 count: the match comprises nodes[:count];
821 results: dict containing named submatches.
823 if self.content is None:
824 # Shortcut for special case (see __init__.__doc__)
825 for count in range(self.min, 1 + min(len(nodes), self.max)):
828 r[self.name] = nodes[:count]
830 elif self.name == "bare_name":
831 yield self._bare_name_matches(nodes)
833 # The reason for this is that hitting the recursion limit usually
834 # results in some ugly messages about how RuntimeErrors are being
835 # ignored. We only have to do this on CPython, though, because other
836 # implementations don't have this nasty bug in the first place.
837 if hasattr(sys, "getrefcount"):
838 save_stderr = sys.stderr
839 sys.stderr = StringIO()
841 for count, r in self._recursive_matches(nodes, 0):
843 r[self.name] = nodes[:count]
846 # We fall back to the iterative pattern matching scheme if the recursive
847 # scheme hits the recursion limit.
848 for count, r in self._iterative_matches(nodes):
850 r[self.name] = nodes[:count]
853 if hasattr(sys, "getrefcount"):
854 sys.stderr = save_stderr
856 def _iterative_matches(self, nodes) -> Iterator[Tuple[int, _Results]]:
857 """Helper to iteratively yield the matches."""
863 # generate matches that use just one alt from self.content
864 for alt in self.content:
865 for c, r in generate_matches(alt, nodes):
867 results.append((c, r))
869 # for each match, iterate down the nodes
872 for c0, r0 in results:
873 # stop if the entire set of nodes has been matched
874 if c0 < nodelen and c0 <= self.max:
875 for alt in self.content:
876 for c1, r1 in generate_matches(alt, nodes[c0:]):
882 new_results.append((c0 + c1, r))
883 results = new_results
885 def _bare_name_matches(self, nodes) -> Tuple[int, _Results]:
886 """Special optimized matcher for bare_name."""
888 r = {} # type: _Results
891 while not done and count < max:
893 for leaf in self.content:
894 if leaf[0].match(nodes[count], r):
898 assert self.name is not None
899 r[self.name] = nodes[:count]
902 def _recursive_matches(self, nodes, count) -> Iterator[Tuple[int, _Results]]:
903 """Helper to recursively yield the matches."""
904 assert self.content is not None
905 if count >= self.min:
908 for alt in self.content:
909 for c0, r0 in generate_matches(alt, nodes):
910 for c1, r1 in self._recursive_matches(nodes[c0:], count + 1):
917 class NegatedPattern(BasePattern):
918 def __init__(self, content: Optional[Any] = None) -> None:
922 The argument is either a pattern or None. If it is None, this
923 only matches an empty sequence (effectively '$' in regex
924 lingo). If it is not None, this matches whenever the argument
925 pattern doesn't have any matches.
927 if content is not None:
928 assert isinstance(content, BasePattern), repr(content)
929 self.content = content
931 def match(self, node, results=None) -> bool:
932 # We never match a node in its entirety
935 def match_seq(self, nodes, results=None) -> bool:
936 # We only match an empty sequence of nodes in its entirety
937 return len(nodes) == 0
939 def generate_matches(self, nodes) -> Iterator[Tuple[int, _Results]]:
940 if self.content is None:
941 # Return a match if there is an empty sequence
945 # Return a match if the argument pattern has no matches
946 for c, r in self.content.generate_matches(nodes):
951 def generate_matches(
952 patterns: List[BasePattern], nodes: List[NL]
953 ) -> Iterator[Tuple[int, _Results]]:
955 Generator yielding matches for a sequence of patterns and nodes.
958 patterns: a sequence of patterns
959 nodes: a sequence of nodes
962 (count, results) tuples where:
963 count: the entire sequence of patterns matches nodes[:count];
964 results: dict containing named submatches.
969 p, rest = patterns[0], patterns[1:]
970 for c0, r0 in p.generate_matches(nodes):
974 for c1, r1 in generate_matches(rest, nodes[c0:]):
981 _Convert = Callable[[Grammar, RawNode], Any]