All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
1 # Copyright 2006 Google, Inc. All Rights Reserved.
2 # Licensed to PSF under a Contributor Agreement.
5 Python parse tree definitions.
7 This is a very concrete parse tree; we need to keep every token and
8 even the comments and whitespace between tokens.
10 There's also a pattern matching implementation here.
13 # mypy: allow-untyped-defs
28 from blib2to3.pgen2.grammar import Grammar
30 __author__ = "Guido van Rossum <guido@python.org>"
33 from io import StringIO
35 HUGE: int = 0x7FFFFFFF # maximum repeat count, default max
37 _type_reprs: Dict[int, Union[Text, int]] = {}
40 def type_repr(type_num: int) -> Union[Text, int]:
43 from .pygram import python_symbols
45 # printing tokens is possible but not as useful
46 # from .pgen2 import token // token.__dict__.items():
47 for name in dir(python_symbols):
48 val = getattr(python_symbols, name)
50 _type_reprs[val] = name
51 return _type_reprs.setdefault(type_num, type_num)
54 _P = TypeVar("_P", bound="Base")
56 NL = Union["Node", "Leaf"]
57 Context = Tuple[Text, Tuple[int, int]]
58 RawNode = Tuple[int, Optional[Text], Optional[Context], Optional[List[NL]]]
64 Abstract base class for Node and Leaf.
66 This provides some default functionality and boilerplate using the
69 A node may be a subnode of at most one parent.
72 # Default values for instance variables
73 type: int # int: token number (< 256) or symbol number (>= 256)
74 parent: Optional["Node"] = None # Parent node pointer, or None
75 children: List[NL] # List of subnodes
76 was_changed: bool = False
77 was_checked: bool = False
79 def __new__(cls, *args, **kwds):
80 """Constructor that prevents Base from being instantiated."""
81 assert cls is not Base, "Cannot instantiate Base"
82 return object.__new__(cls)
84 def __eq__(self, other: Any) -> bool:
86 Compare two nodes for equality.
88 This calls the method _eq().
90 if self.__class__ is not other.__class__:
92 return self._eq(other)
95 def prefix(self) -> Text:
96 raise NotImplementedError
98 def _eq(self: _P, other: _P) -> bool:
100 Compare two nodes for equality.
102 This is called by __eq__ and __ne__. It is only called if the two nodes
103 have the same type. This must be implemented by the concrete subclass.
104 Nodes should be considered equal if they have the same structure,
105 ignoring the prefix string and other context information.
107 raise NotImplementedError
109 def __deepcopy__(self: _P, memo: Any) -> _P:
112 def clone(self: _P) -> _P:
114 Return a cloned (deep) copy of self.
116 This must be implemented by the concrete subclass.
118 raise NotImplementedError
120 def post_order(self) -> Iterator[NL]:
122 Return a post-order iterator for the tree.
124 This must be implemented by the concrete subclass.
126 raise NotImplementedError
128 def pre_order(self) -> Iterator[NL]:
130 Return a pre-order iterator for the tree.
132 This must be implemented by the concrete subclass.
134 raise NotImplementedError
136 def replace(self, new: Union[NL, List[NL]]) -> None:
137 """Replace this node with a new one in the parent."""
138 assert self.parent is not None, str(self)
139 assert new is not None
140 if not isinstance(new, list):
144 for ch in self.parent.children:
146 assert not found, (self.parent.children, self, new)
148 l_children.extend(new)
151 l_children.append(ch)
152 assert found, (self.children, self, new)
153 self.parent.children = l_children
154 self.parent.changed()
155 self.parent.invalidate_sibling_maps()
157 x.parent = self.parent
160 def get_lineno(self) -> Optional[int]:
161 """Return the line number which generated the invocant node."""
163 while not isinstance(node, Leaf):
164 if not node.children:
166 node = node.children[0]
169 def changed(self) -> None:
173 self.parent.changed()
174 self.was_changed = True
176 def remove(self) -> Optional[int]:
178 Remove the node from the tree. Returns the position of the node in its
179 parent's children before it was removed.
182 for i, node in enumerate(self.parent.children):
184 del self.parent.children[i]
185 self.parent.changed()
186 self.parent.invalidate_sibling_maps()
192 def next_sibling(self) -> Optional[NL]:
194 The node immediately following the invocant in their parent's children
195 list. If the invocant does not have a next sibling, it is None
197 if self.parent is None:
200 if self.parent.next_sibling_map is None:
201 self.parent.update_sibling_maps()
202 assert self.parent.next_sibling_map is not None
203 return self.parent.next_sibling_map[id(self)]
206 def prev_sibling(self) -> Optional[NL]:
208 The node immediately preceding the invocant in their parent's children
209 list. If the invocant does not have a previous sibling, it is None.
211 if self.parent is None:
214 if self.parent.prev_sibling_map is None:
215 self.parent.update_sibling_maps()
216 assert self.parent.prev_sibling_map is not None
217 return self.parent.prev_sibling_map[id(self)]
219 def leaves(self) -> Iterator["Leaf"]:
220 for child in self.children:
221 yield from child.leaves()
223 def depth(self) -> int:
224 if self.parent is None:
226 return 1 + self.parent.depth()
228 def get_suffix(self) -> Text:
230 Return the string immediately following the invocant node. This is
231 effectively equivalent to node.next_sibling.prefix
233 next_sib = self.next_sibling
236 prefix = next_sib.prefix
242 """Concrete implementation for interior nodes."""
244 fixers_applied: Optional[List[Any]]
245 used_names: Optional[Set[Text]]
251 context: Optional[Any] = None,
252 prefix: Optional[Text] = None,
253 fixers_applied: Optional[List[Any]] = None,
258 Takes a type constant (a symbol number >= 256), a sequence of
259 child nodes, and an optional context keyword argument.
261 As a side effect, the parent pointers of the children are updated.
263 assert type >= 256, type
265 self.children = list(children)
266 for ch in self.children:
267 assert ch.parent is None, repr(ch)
269 self.invalidate_sibling_maps()
270 if prefix is not None:
273 self.fixers_applied = fixers_applied[:]
275 self.fixers_applied = None
277 def __repr__(self) -> Text:
278 """Return a canonical string representation."""
279 assert self.type is not None
280 return "%s(%s, %r)" % (
281 self.__class__.__name__,
282 type_repr(self.type),
286 def __str__(self) -> Text:
288 Return a pretty string representation.
290 This reproduces the input source exactly.
292 return "".join(map(str, self.children))
294 def _eq(self, other) -> bool:
295 """Compare two nodes for equality."""
296 return (self.type, self.children) == (other.type, other.children)
298 def clone(self) -> "Node":
299 assert self.type is not None
300 """Return a cloned (deep) copy of self."""
303 [ch.clone() for ch in self.children],
304 fixers_applied=self.fixers_applied,
307 def post_order(self) -> Iterator[NL]:
308 """Return a post-order iterator for the tree."""
309 for child in self.children:
310 yield from child.post_order()
313 def pre_order(self) -> Iterator[NL]:
314 """Return a pre-order iterator for the tree."""
316 for child in self.children:
317 yield from child.pre_order()
320 def prefix(self) -> Text:
322 The whitespace and comments preceding this node in the input.
324 if not self.children:
326 return self.children[0].prefix
329 def prefix(self, prefix) -> None:
331 self.children[0].prefix = prefix
333 def set_child(self, i: int, child: NL) -> None:
335 Equivalent to 'node.children[i] = child'. This method also sets the
336 child's parent attribute appropriately.
339 self.children[i].parent = None
340 self.children[i] = child
342 self.invalidate_sibling_maps()
344 def insert_child(self, i: int, child: NL) -> None:
346 Equivalent to 'node.children.insert(i, child)'. This method also sets
347 the child's parent attribute appropriately.
350 self.children.insert(i, child)
352 self.invalidate_sibling_maps()
354 def append_child(self, child: NL) -> None:
356 Equivalent to 'node.children.append(child)'. This method also sets the
357 child's parent attribute appropriately.
360 self.children.append(child)
362 self.invalidate_sibling_maps()
364 def invalidate_sibling_maps(self) -> None:
365 self.prev_sibling_map: Optional[Dict[int, Optional[NL]]] = None
366 self.next_sibling_map: Optional[Dict[int, Optional[NL]]] = None
368 def update_sibling_maps(self) -> None:
369 _prev: Dict[int, Optional[NL]] = {}
370 _next: Dict[int, Optional[NL]] = {}
371 self.prev_sibling_map = _prev
372 self.next_sibling_map = _next
373 previous: Optional[NL] = None
374 for current in self.children:
375 _prev[id(current)] = previous
376 _next[id(previous)] = current
378 _next[id(current)] = None
383 """Concrete implementation for leaf nodes."""
385 # Default values for instance variables
387 fixers_applied: List[Any]
389 # Changed later in brackets.py
390 opening_bracket: Optional["Leaf"] = None
391 used_names: Optional[Set[Text]]
392 _prefix = "" # Whitespace and comments preceding this token in the input
393 lineno: int = 0 # Line where this token starts in the input
394 column: int = 0 # Column where this token starts in the input
400 context: Optional[Context] = None,
401 prefix: Optional[Text] = None,
402 fixers_applied: List[Any] = [],
403 opening_bracket: Optional["Leaf"] = None,
408 Takes a type constant (a token number < 256), a string value, and an
409 optional context keyword argument.
412 assert 0 <= type < 256, type
413 if context is not None:
414 self._prefix, (self.lineno, self.column) = context
417 if prefix is not None:
418 self._prefix = prefix
419 self.fixers_applied: Optional[List[Any]] = fixers_applied[:]
421 self.opening_bracket = opening_bracket
423 def __repr__(self) -> str:
424 """Return a canonical string representation."""
425 from .pgen2.token import tok_name
427 assert self.type is not None
428 return "%s(%s, %r)" % (
429 self.__class__.__name__,
430 tok_name.get(self.type, self.type),
434 def __str__(self) -> Text:
436 Return a pretty string representation.
438 This reproduces the input source exactly.
440 return self._prefix + str(self.value)
442 def _eq(self, other) -> bool:
443 """Compare two nodes for equality."""
444 return (self.type, self.value) == (other.type, other.value)
446 def clone(self) -> "Leaf":
447 assert self.type is not None
448 """Return a cloned (deep) copy of self."""
452 (self.prefix, (self.lineno, self.column)),
453 fixers_applied=self.fixers_applied,
454 opening_bracket=self.opening_bracket,
457 def leaves(self) -> Iterator["Leaf"]:
460 def post_order(self) -> Iterator["Leaf"]:
461 """Return a post-order iterator for the tree."""
464 def pre_order(self) -> Iterator["Leaf"]:
465 """Return a pre-order iterator for the tree."""
469 def prefix(self) -> Text:
471 The whitespace and comments preceding this token in the input.
476 def prefix(self, prefix) -> None:
478 self._prefix = prefix
481 def convert(gr: Grammar, raw_node: RawNode) -> NL:
483 Convert raw node information to a Node or Leaf instance.
485 This is passed to the parser driver which calls it whenever a reduction of a
486 grammar rule produces a new complete node, so that the tree is build
489 type, value, context, children = raw_node
490 if children or type in gr.number2symbol:
491 # If there's exactly one child, return that child instead of
492 # creating a new node.
493 assert children is not None
494 if len(children) == 1:
496 return Node(type, children, context=context)
498 return Leaf(type, value or "", context=context)
501 _Results = Dict[Text, NL]
504 class BasePattern(object):
507 A pattern is a tree matching pattern.
509 It looks for a specific node type (token or symbol), and
510 optionally for a specific content.
512 This is an abstract base class. There are three concrete
515 - LeafPattern matches a single leaf node;
516 - NodePattern matches a single node (usually non-leaf);
517 - WildcardPattern matches a sequence of nodes of variable length.
520 # Defaults for instance variables
522 type = None # Node type (token if < 256, symbol if >= 256)
523 content: Any = None # Optional content matching pattern
524 name: Optional[Text] = None # Optional name used to store match in results dict
526 def __new__(cls, *args, **kwds):
527 """Constructor that prevents BasePattern from being instantiated."""
528 assert cls is not BasePattern, "Cannot instantiate BasePattern"
529 return object.__new__(cls)
531 def __repr__(self) -> Text:
532 assert self.type is not None
533 args = [type_repr(self.type), self.content, self.name]
534 while args and args[-1] is None:
536 return "%s(%s)" % (self.__class__.__name__, ", ".join(map(repr, args)))
538 def _submatch(self, node, results=None) -> bool:
539 raise NotImplementedError
541 def optimize(self) -> "BasePattern":
543 A subclass can define this as a hook for optimizations.
545 Returns either self or another node with the same effect.
549 def match(self, node: NL, results: Optional[_Results] = None) -> bool:
551 Does this pattern exactly match a node?
553 Returns True if it matches, False if not.
555 If results is not None, it must be a dict which will be
556 updated with the nodes matching named subpatterns.
558 Default implementation for non-wildcard patterns.
560 if self.type is not None and node.type != self.type:
562 if self.content is not None:
563 r: Optional[_Results] = None
564 if results is not None:
566 if not self._submatch(node, r):
569 assert results is not None
571 if results is not None and self.name:
572 results[self.name] = node
575 def match_seq(self, nodes: List[NL], results: Optional[_Results] = None) -> bool:
577 Does this pattern exactly match a sequence of nodes?
579 Default implementation for non-wildcard patterns.
583 return self.match(nodes[0], results)
585 def generate_matches(self, nodes: List[NL]) -> Iterator[Tuple[int, _Results]]:
587 Generator yielding all matches for this pattern.
589 Default implementation for non-wildcard patterns.
592 if nodes and self.match(nodes[0], r):
596 class LeafPattern(BasePattern):
599 type: Optional[int] = None,
600 content: Optional[Text] = None,
601 name: Optional[Text] = None,
604 Initializer. Takes optional type, content, and name.
606 The type, if given must be a token type (< 256). If not given,
607 this matches any *leaf* node; the content may still be required.
609 The content, if given, must be a string.
611 If a name is given, the matching node is stored in the results
615 assert 0 <= type < 256, type
616 if content is not None:
617 assert isinstance(content, str), repr(content)
619 self.content = content
622 def match(self, node: NL, results=None):
623 """Override match() to insist on a leaf node."""
624 if not isinstance(node, Leaf):
626 return BasePattern.match(self, node, results)
628 def _submatch(self, node, results=None):
630 Match the pattern's content to the node's children.
632 This assumes the node type matches and self.content is not None.
634 Returns True if it matches, False if not.
636 If results is not None, it must be a dict which will be
637 updated with the nodes matching named subpatterns.
639 When returning False, the results dict may still be updated.
641 return self.content == node.value
644 class NodePattern(BasePattern):
646 wildcards: bool = False
650 type: Optional[int] = None,
651 content: Optional[Iterable[Text]] = None,
652 name: Optional[Text] = None,
655 Initializer. Takes optional type, content, and name.
657 The type, if given, must be a symbol type (>= 256). If the
658 type is None this matches *any* single node (leaf or not),
659 except if content is not None, in which it only matches
660 non-leaf nodes that also match the content pattern.
662 The content, if not None, must be a sequence of Patterns that
663 must match the node's children exactly. If the content is
664 given, the type must not be None.
666 If a name is given, the matching node is stored in the results
670 assert type >= 256, type
671 if content is not None:
672 assert not isinstance(content, str), repr(content)
673 newcontent = list(content)
674 for i, item in enumerate(newcontent):
675 assert isinstance(item, BasePattern), (i, item)
676 # I don't even think this code is used anywhere, but it does cause
677 # unreachable errors from mypy. This function's signature does look
678 # odd though *shrug*.
679 if isinstance(item, WildcardPattern): # type: ignore[unreachable]
680 self.wildcards = True # type: ignore[unreachable]
682 self.content = newcontent
685 def _submatch(self, node, results=None) -> bool:
687 Match the pattern's content to the node's children.
689 This assumes the node type matches and self.content is not None.
691 Returns True if it matches, False if not.
693 If results is not None, it must be a dict which will be
694 updated with the nodes matching named subpatterns.
696 When returning False, the results dict may still be updated.
699 for c, r in generate_matches(self.content, node.children):
700 if c == len(node.children):
701 if results is not None:
705 if len(self.content) != len(node.children):
707 for subpattern, child in zip(self.content, node.children):
708 if not subpattern.match(child, results):
713 class WildcardPattern(BasePattern):
716 A wildcard pattern can match zero or more nodes.
718 This has all the flexibility needed to implement patterns like:
722 (...)* (...)+ (...)? (...){m,n}
724 except it always uses non-greedy matching.
732 content: Optional[Text] = None,
735 name: Optional[Text] = None,
741 content: optional sequence of subsequences of patterns;
742 if absent, matches one node;
743 if present, each subsequence is an alternative [*]
744 min: optional minimum number of times to match, default 0
745 max: optional maximum number of times to match, default HUGE
746 name: optional name assigned to this match
748 [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is
749 equivalent to (a b c | d e | f g h); if content is None,
750 this is equivalent to '.' in regular expression terms.
751 The min and max parameters work as follows:
752 min=0, max=maxint: .*
753 min=1, max=maxint: .+
756 If content is not None, replace the dot with the parenthesized
757 list of alternatives, e.g. (a b c | d e | f g h)*
759 assert 0 <= min <= max <= HUGE, (min, max)
760 if content is not None:
761 f = lambda s: tuple(s)
762 wrapped_content = tuple(map(f, content)) # Protect against alterations
763 # Check sanity of alternatives
764 assert len(wrapped_content), repr(
766 ) # Can't have zero alternatives
767 for alt in wrapped_content:
768 assert len(alt), repr(alt) # Can have empty alternatives
769 self.content = wrapped_content
774 def optimize(self) -> Any:
775 """Optimize certain stacked wildcard patterns."""
778 self.content is not None
779 and len(self.content) == 1
780 and len(self.content[0]) == 1
782 subpattern = self.content[0][0]
783 if self.min == 1 and self.max == 1:
784 if self.content is None:
785 return NodePattern(name=self.name)
786 if subpattern is not None and self.name == subpattern.name:
787 return subpattern.optimize()
790 and isinstance(subpattern, WildcardPattern)
791 and subpattern.min <= 1
792 and self.name == subpattern.name
794 return WildcardPattern(
796 self.min * subpattern.min,
797 self.max * subpattern.max,
802 def match(self, node, results=None) -> bool:
803 """Does this pattern exactly match a node?"""
804 return self.match_seq([node], results)
806 def match_seq(self, nodes, results=None) -> bool:
807 """Does this pattern exactly match a sequence of nodes?"""
808 for c, r in self.generate_matches(nodes):
810 if results is not None:
813 results[self.name] = list(nodes)
817 def generate_matches(self, nodes) -> Iterator[Tuple[int, _Results]]:
819 Generator yielding matches for a sequence of nodes.
822 nodes: sequence of nodes
825 (count, results) tuples where:
826 count: the match comprises nodes[:count];
827 results: dict containing named submatches.
829 if self.content is None:
830 # Shortcut for special case (see __init__.__doc__)
831 for count in range(self.min, 1 + min(len(nodes), self.max)):
834 r[self.name] = nodes[:count]
836 elif self.name == "bare_name":
837 yield self._bare_name_matches(nodes)
839 # The reason for this is that hitting the recursion limit usually
840 # results in some ugly messages about how RuntimeErrors are being
841 # ignored. We only have to do this on CPython, though, because other
842 # implementations don't have this nasty bug in the first place.
843 if hasattr(sys, "getrefcount"):
844 save_stderr = sys.stderr
845 sys.stderr = StringIO()
847 for count, r in self._recursive_matches(nodes, 0):
849 r[self.name] = nodes[:count]
852 # We fall back to the iterative pattern matching scheme if the recursive
853 # scheme hits the recursion limit.
854 for count, r in self._iterative_matches(nodes):
856 r[self.name] = nodes[:count]
859 if hasattr(sys, "getrefcount"):
860 sys.stderr = save_stderr
862 def _iterative_matches(self, nodes) -> Iterator[Tuple[int, _Results]]:
863 """Helper to iteratively yield the matches."""
869 # generate matches that use just one alt from self.content
870 for alt in self.content:
871 for c, r in generate_matches(alt, nodes):
873 results.append((c, r))
875 # for each match, iterate down the nodes
878 for c0, r0 in results:
879 # stop if the entire set of nodes has been matched
880 if c0 < nodelen and c0 <= self.max:
881 for alt in self.content:
882 for c1, r1 in generate_matches(alt, nodes[c0:]):
888 new_results.append((c0 + c1, r))
889 results = new_results
891 def _bare_name_matches(self, nodes) -> Tuple[int, _Results]:
892 """Special optimized matcher for bare_name."""
894 r = {} # type: _Results
897 while not done and count < max:
899 for leaf in self.content:
900 if leaf[0].match(nodes[count], r):
904 assert self.name is not None
905 r[self.name] = nodes[:count]
908 def _recursive_matches(self, nodes, count) -> Iterator[Tuple[int, _Results]]:
909 """Helper to recursively yield the matches."""
910 assert self.content is not None
911 if count >= self.min:
914 for alt in self.content:
915 for c0, r0 in generate_matches(alt, nodes):
916 for c1, r1 in self._recursive_matches(nodes[c0:], count + 1):
923 class NegatedPattern(BasePattern):
924 def __init__(self, content: Optional[Any] = None) -> None:
928 The argument is either a pattern or None. If it is None, this
929 only matches an empty sequence (effectively '$' in regex
930 lingo). If it is not None, this matches whenever the argument
931 pattern doesn't have any matches.
933 if content is not None:
934 assert isinstance(content, BasePattern), repr(content)
935 self.content = content
937 def match(self, node, results=None) -> bool:
938 # We never match a node in its entirety
941 def match_seq(self, nodes, results=None) -> bool:
942 # We only match an empty sequence of nodes in its entirety
943 return len(nodes) == 0
945 def generate_matches(self, nodes) -> Iterator[Tuple[int, _Results]]:
946 if self.content is None:
947 # Return a match if there is an empty sequence
951 # Return a match if the argument pattern has no matches
952 for c, r in self.content.generate_matches(nodes):
957 def generate_matches(
958 patterns: List[BasePattern], nodes: List[NL]
959 ) -> Iterator[Tuple[int, _Results]]:
961 Generator yielding matches for a sequence of patterns and nodes.
964 patterns: a sequence of patterns
965 nodes: a sequence of nodes
968 (count, results) tuples where:
969 count: the entire sequence of patterns matches nodes[:count];
970 results: dict containing named submatches.
975 p, rest = patterns[0], patterns[1:]
976 for c0, r0 in p.generate_matches(nodes):
980 for c1, r1 in generate_matches(rest, nodes[c0:]):