All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
1 # Copyright 2006 Google, Inc. All Rights Reserved.
2 # Licensed to PSF under a Contributor Agreement.
5 Python parse tree definitions.
7 This is a very concrete parse tree; we need to keep every token and
8 even the comments and whitespace between tokens.
10 There's also a pattern matching implementation here.
13 # mypy: allow-untyped-defs
29 from blib2to3.pgen2.grammar import Grammar
31 __author__ = "Guido van Rossum <guido@python.org>"
34 from io import StringIO
36 HUGE: int = 0x7FFFFFFF # maximum repeat count, default max
38 _type_reprs: Dict[int, Union[Text, int]] = {}
41 def type_repr(type_num: int) -> Union[Text, int]:
44 from .pygram import python_symbols
46 # printing tokens is possible but not as useful
47 # from .pgen2 import token // token.__dict__.items():
48 for name in dir(python_symbols):
49 val = getattr(python_symbols, name)
51 _type_reprs[val] = name
52 return _type_reprs.setdefault(type_num, type_num)
55 _P = TypeVar("_P", bound="Base")
57 NL = Union["Node", "Leaf"]
58 Context = Tuple[Text, Tuple[int, int]]
59 RawNode = Tuple[int, Optional[Text], Optional[Context], Optional[List[NL]]]
65 Abstract base class for Node and Leaf.
67 This provides some default functionality and boilerplate using the
70 A node may be a subnode of at most one parent.
73 # Default values for instance variables
74 type: int # int: token number (< 256) or symbol number (>= 256)
75 parent: Optional["Node"] = None # Parent node pointer, or None
76 children: List[NL] # List of subnodes
77 was_changed: bool = False
78 was_checked: bool = False
80 def __new__(cls, *args, **kwds):
81 """Constructor that prevents Base from being instantiated."""
82 assert cls is not Base, "Cannot instantiate Base"
83 return object.__new__(cls)
85 def __eq__(self, other: Any) -> bool:
87 Compare two nodes for equality.
89 This calls the method _eq().
91 if self.__class__ is not other.__class__:
93 return self._eq(other)
95 __hash__ = None # type: Any # For Py3 compatibility.
98 def prefix(self) -> Text:
99 raise NotImplementedError
101 def _eq(self: _P, other: _P) -> bool:
103 Compare two nodes for equality.
105 This is called by __eq__ and __ne__. It is only called if the two nodes
106 have the same type. This must be implemented by the concrete subclass.
107 Nodes should be considered equal if they have the same structure,
108 ignoring the prefix string and other context information.
110 raise NotImplementedError
112 def __deepcopy__(self: _P, memo: Any) -> _P:
115 def clone(self: _P) -> _P:
117 Return a cloned (deep) copy of self.
119 This must be implemented by the concrete subclass.
121 raise NotImplementedError
123 def post_order(self) -> Iterator[NL]:
125 Return a post-order iterator for the tree.
127 This must be implemented by the concrete subclass.
129 raise NotImplementedError
131 def pre_order(self) -> Iterator[NL]:
133 Return a pre-order iterator for the tree.
135 This must be implemented by the concrete subclass.
137 raise NotImplementedError
139 def replace(self, new: Union[NL, List[NL]]) -> None:
140 """Replace this node with a new one in the parent."""
141 assert self.parent is not None, str(self)
142 assert new is not None
143 if not isinstance(new, list):
147 for ch in self.parent.children:
149 assert not found, (self.parent.children, self, new)
151 l_children.extend(new)
154 l_children.append(ch)
155 assert found, (self.children, self, new)
156 self.parent.children = l_children
157 self.parent.changed()
158 self.parent.invalidate_sibling_maps()
160 x.parent = self.parent
163 def get_lineno(self) -> Optional[int]:
164 """Return the line number which generated the invocant node."""
166 while not isinstance(node, Leaf):
167 if not node.children:
169 node = node.children[0]
172 def changed(self) -> None:
176 self.parent.changed()
177 self.was_changed = True
179 def remove(self) -> Optional[int]:
181 Remove the node from the tree. Returns the position of the node in its
182 parent's children before it was removed.
185 for i, node in enumerate(self.parent.children):
187 del self.parent.children[i]
188 self.parent.changed()
189 self.parent.invalidate_sibling_maps()
195 def next_sibling(self) -> Optional[NL]:
197 The node immediately following the invocant in their parent's children
198 list. If the invocant does not have a next sibling, it is None
200 if self.parent is None:
203 if self.parent.next_sibling_map is None:
204 self.parent.update_sibling_maps()
205 assert self.parent.next_sibling_map is not None
206 return self.parent.next_sibling_map[id(self)]
209 def prev_sibling(self) -> Optional[NL]:
211 The node immediately preceding the invocant in their parent's children
212 list. If the invocant does not have a previous sibling, it is None.
214 if self.parent is None:
217 if self.parent.prev_sibling_map is None:
218 self.parent.update_sibling_maps()
219 assert self.parent.prev_sibling_map is not None
220 return self.parent.prev_sibling_map[id(self)]
222 def leaves(self) -> Iterator["Leaf"]:
223 for child in self.children:
224 yield from child.leaves()
226 def depth(self) -> int:
227 if self.parent is None:
229 return 1 + self.parent.depth()
231 def get_suffix(self) -> Text:
233 Return the string immediately following the invocant node. This is
234 effectively equivalent to node.next_sibling.prefix
236 next_sib = self.next_sibling
239 prefix = next_sib.prefix
245 """Concrete implementation for interior nodes."""
247 fixers_applied: Optional[List[Any]]
248 used_names: Optional[Set[Text]]
254 context: Optional[Any] = None,
255 prefix: Optional[Text] = None,
256 fixers_applied: Optional[List[Any]] = None,
261 Takes a type constant (a symbol number >= 256), a sequence of
262 child nodes, and an optional context keyword argument.
264 As a side effect, the parent pointers of the children are updated.
266 assert type >= 256, type
268 self.children = list(children)
269 for ch in self.children:
270 assert ch.parent is None, repr(ch)
272 self.invalidate_sibling_maps()
273 if prefix is not None:
276 self.fixers_applied = fixers_applied[:]
278 self.fixers_applied = None
280 def __repr__(self) -> Text:
281 """Return a canonical string representation."""
282 assert self.type is not None
283 return "%s(%s, %r)" % (
284 self.__class__.__name__,
285 type_repr(self.type),
289 def __str__(self) -> Text:
291 Return a pretty string representation.
293 This reproduces the input source exactly.
295 return "".join(map(str, self.children))
297 def _eq(self, other) -> bool:
298 """Compare two nodes for equality."""
299 return (self.type, self.children) == (other.type, other.children)
301 def clone(self) -> "Node":
302 assert self.type is not None
303 """Return a cloned (deep) copy of self."""
306 [ch.clone() for ch in self.children],
307 fixers_applied=self.fixers_applied,
310 def post_order(self) -> Iterator[NL]:
311 """Return a post-order iterator for the tree."""
312 for child in self.children:
313 yield from child.post_order()
316 def pre_order(self) -> Iterator[NL]:
317 """Return a pre-order iterator for the tree."""
319 for child in self.children:
320 yield from child.pre_order()
323 def prefix(self) -> Text:
325 The whitespace and comments preceding this node in the input.
327 if not self.children:
329 return self.children[0].prefix
332 def prefix(self, prefix) -> None:
334 self.children[0].prefix = prefix
336 def set_child(self, i: int, child: NL) -> None:
338 Equivalent to 'node.children[i] = child'. This method also sets the
339 child's parent attribute appropriately.
342 self.children[i].parent = None
343 self.children[i] = child
345 self.invalidate_sibling_maps()
347 def insert_child(self, i: int, child: NL) -> None:
349 Equivalent to 'node.children.insert(i, child)'. This method also sets
350 the child's parent attribute appropriately.
353 self.children.insert(i, child)
355 self.invalidate_sibling_maps()
357 def append_child(self, child: NL) -> None:
359 Equivalent to 'node.children.append(child)'. This method also sets the
360 child's parent attribute appropriately.
363 self.children.append(child)
365 self.invalidate_sibling_maps()
367 def invalidate_sibling_maps(self) -> None:
368 self.prev_sibling_map: Optional[Dict[int, Optional[NL]]] = None
369 self.next_sibling_map: Optional[Dict[int, Optional[NL]]] = None
371 def update_sibling_maps(self) -> None:
372 _prev: Dict[int, Optional[NL]] = {}
373 _next: Dict[int, Optional[NL]] = {}
374 self.prev_sibling_map = _prev
375 self.next_sibling_map = _next
376 previous: Optional[NL] = None
377 for current in self.children:
378 _prev[id(current)] = previous
379 _next[id(previous)] = current
381 _next[id(current)] = None
386 """Concrete implementation for leaf nodes."""
388 # Default values for instance variables
390 fixers_applied: List[Any]
392 opening_bracket: "Leaf"
393 used_names: Optional[Set[Text]]
394 _prefix = "" # Whitespace and comments preceding this token in the input
395 lineno: int = 0 # Line where this token starts in the input
396 column: int = 0 # Column where this token starts in the input
402 context: Optional[Context] = None,
403 prefix: Optional[Text] = None,
404 fixers_applied: List[Any] = [],
409 Takes a type constant (a token number < 256), a string value, and an
410 optional context keyword argument.
413 assert 0 <= type < 256, type
414 if context is not None:
415 self._prefix, (self.lineno, self.column) = context
418 if prefix is not None:
419 self._prefix = prefix
420 self.fixers_applied: Optional[List[Any]] = fixers_applied[:]
423 def __repr__(self) -> str:
424 """Return a canonical string representation."""
425 from .pgen2.token import tok_name
427 assert self.type is not None
428 return "%s(%s, %r)" % (
429 self.__class__.__name__,
430 tok_name.get(self.type, self.type),
434 def __str__(self) -> Text:
436 Return a pretty string representation.
438 This reproduces the input source exactly.
440 return self.prefix + str(self.value)
442 def _eq(self, other) -> bool:
443 """Compare two nodes for equality."""
444 return (self.type, self.value) == (other.type, other.value)
446 def clone(self) -> "Leaf":
447 assert self.type is not None
448 """Return a cloned (deep) copy of self."""
452 (self.prefix, (self.lineno, self.column)),
453 fixers_applied=self.fixers_applied,
456 def leaves(self) -> Iterator["Leaf"]:
459 def post_order(self) -> Iterator["Leaf"]:
460 """Return a post-order iterator for the tree."""
463 def pre_order(self) -> Iterator["Leaf"]:
464 """Return a pre-order iterator for the tree."""
468 def prefix(self) -> Text:
470 The whitespace and comments preceding this token in the input.
475 def prefix(self, prefix) -> None:
477 self._prefix = prefix
480 def convert(gr: Grammar, raw_node: RawNode) -> NL:
482 Convert raw node information to a Node or Leaf instance.
484 This is passed to the parser driver which calls it whenever a reduction of a
485 grammar rule produces a new complete node, so that the tree is build
488 type, value, context, children = raw_node
489 if children or type in gr.number2symbol:
490 # If there's exactly one child, return that child instead of
491 # creating a new node.
492 assert children is not None
493 if len(children) == 1:
495 return Node(type, children, context=context)
497 return Leaf(type, value or "", context=context)
500 _Results = Dict[Text, NL]
503 class BasePattern(object):
506 A pattern is a tree matching pattern.
508 It looks for a specific node type (token or symbol), and
509 optionally for a specific content.
511 This is an abstract base class. There are three concrete
514 - LeafPattern matches a single leaf node;
515 - NodePattern matches a single node (usually non-leaf);
516 - WildcardPattern matches a sequence of nodes of variable length.
519 # Defaults for instance variables
521 type = None # Node type (token if < 256, symbol if >= 256)
522 content: Any = None # Optional content matching pattern
523 name: Optional[Text] = None # Optional name used to store match in results dict
525 def __new__(cls, *args, **kwds):
526 """Constructor that prevents BasePattern from being instantiated."""
527 assert cls is not BasePattern, "Cannot instantiate BasePattern"
528 return object.__new__(cls)
530 def __repr__(self) -> Text:
531 assert self.type is not None
532 args = [type_repr(self.type), self.content, self.name]
533 while args and args[-1] is None:
535 return "%s(%s)" % (self.__class__.__name__, ", ".join(map(repr, args)))
537 def _submatch(self, node, results=None) -> bool:
538 raise NotImplementedError
540 def optimize(self) -> "BasePattern":
542 A subclass can define this as a hook for optimizations.
544 Returns either self or another node with the same effect.
548 def match(self, node: NL, results: Optional[_Results] = None) -> bool:
550 Does this pattern exactly match a node?
552 Returns True if it matches, False if not.
554 If results is not None, it must be a dict which will be
555 updated with the nodes matching named subpatterns.
557 Default implementation for non-wildcard patterns.
559 if self.type is not None and node.type != self.type:
561 if self.content is not None:
562 r: Optional[_Results] = None
563 if results is not None:
565 if not self._submatch(node, r):
568 assert results is not None
570 if results is not None and self.name:
571 results[self.name] = node
574 def match_seq(self, nodes: List[NL], results: Optional[_Results] = None) -> bool:
576 Does this pattern exactly match a sequence of nodes?
578 Default implementation for non-wildcard patterns.
582 return self.match(nodes[0], results)
584 def generate_matches(self, nodes: List[NL]) -> Iterator[Tuple[int, _Results]]:
586 Generator yielding all matches for this pattern.
588 Default implementation for non-wildcard patterns.
591 if nodes and self.match(nodes[0], r):
595 class LeafPattern(BasePattern):
598 type: Optional[int] = None,
599 content: Optional[Text] = None,
600 name: Optional[Text] = None,
603 Initializer. Takes optional type, content, and name.
605 The type, if given must be a token type (< 256). If not given,
606 this matches any *leaf* node; the content may still be required.
608 The content, if given, must be a string.
610 If a name is given, the matching node is stored in the results
614 assert 0 <= type < 256, type
615 if content is not None:
616 assert isinstance(content, str), repr(content)
618 self.content = content
621 def match(self, node: NL, results=None):
622 """Override match() to insist on a leaf node."""
623 if not isinstance(node, Leaf):
625 return BasePattern.match(self, node, results)
627 def _submatch(self, node, results=None):
629 Match the pattern's content to the node's children.
631 This assumes the node type matches and self.content is not None.
633 Returns True if it matches, False if not.
635 If results is not None, it must be a dict which will be
636 updated with the nodes matching named subpatterns.
638 When returning False, the results dict may still be updated.
640 return self.content == node.value
643 class NodePattern(BasePattern):
645 wildcards: bool = False
649 type: Optional[int] = None,
650 content: Optional[Iterable[Text]] = None,
651 name: Optional[Text] = None,
654 Initializer. Takes optional type, content, and name.
656 The type, if given, must be a symbol type (>= 256). If the
657 type is None this matches *any* single node (leaf or not),
658 except if content is not None, in which it only matches
659 non-leaf nodes that also match the content pattern.
661 The content, if not None, must be a sequence of Patterns that
662 must match the node's children exactly. If the content is
663 given, the type must not be None.
665 If a name is given, the matching node is stored in the results
669 assert type >= 256, type
670 if content is not None:
671 assert not isinstance(content, str), repr(content)
672 newcontent = list(content)
673 for i, item in enumerate(newcontent):
674 assert isinstance(item, BasePattern), (i, item)
675 if isinstance(item, WildcardPattern):
676 self.wildcards = True
678 self.content = newcontent
681 def _submatch(self, node, results=None) -> bool:
683 Match the pattern's content to the node's children.
685 This assumes the node type matches and self.content is not None.
687 Returns True if it matches, False if not.
689 If results is not None, it must be a dict which will be
690 updated with the nodes matching named subpatterns.
692 When returning False, the results dict may still be updated.
695 for c, r in generate_matches(self.content, node.children):
696 if c == len(node.children):
697 if results is not None:
701 if len(self.content) != len(node.children):
703 for subpattern, child in zip(self.content, node.children):
704 if not subpattern.match(child, results):
709 class WildcardPattern(BasePattern):
712 A wildcard pattern can match zero or more nodes.
714 This has all the flexibility needed to implement patterns like:
718 (...)* (...)+ (...)? (...){m,n}
720 except it always uses non-greedy matching.
728 content: Optional[Text] = None,
731 name: Optional[Text] = None,
737 content: optional sequence of subsequences of patterns;
738 if absent, matches one node;
739 if present, each subsequence is an alternative [*]
740 min: optional minimum number of times to match, default 0
741 max: optional maximum number of times to match, default HUGE
742 name: optional name assigned to this match
744 [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is
745 equivalent to (a b c | d e | f g h); if content is None,
746 this is equivalent to '.' in regular expression terms.
747 The min and max parameters work as follows:
748 min=0, max=maxint: .*
749 min=1, max=maxint: .+
752 If content is not None, replace the dot with the parenthesized
753 list of alternatives, e.g. (a b c | d e | f g h)*
755 assert 0 <= min <= max <= HUGE, (min, max)
756 if content is not None:
757 f = lambda s: tuple(s)
758 wrapped_content = tuple(map(f, content)) # Protect against alterations
759 # Check sanity of alternatives
760 assert len(wrapped_content), repr(
762 ) # Can't have zero alternatives
763 for alt in wrapped_content:
764 assert len(alt), repr(alt) # Can have empty alternatives
765 self.content = wrapped_content
770 def optimize(self) -> Any:
771 """Optimize certain stacked wildcard patterns."""
774 self.content is not None
775 and len(self.content) == 1
776 and len(self.content[0]) == 1
778 subpattern = self.content[0][0]
779 if self.min == 1 and self.max == 1:
780 if self.content is None:
781 return NodePattern(name=self.name)
782 if subpattern is not None and self.name == subpattern.name:
783 return subpattern.optimize()
786 and isinstance(subpattern, WildcardPattern)
787 and subpattern.min <= 1
788 and self.name == subpattern.name
790 return WildcardPattern(
792 self.min * subpattern.min,
793 self.max * subpattern.max,
798 def match(self, node, results=None) -> bool:
799 """Does this pattern exactly match a node?"""
800 return self.match_seq([node], results)
802 def match_seq(self, nodes, results=None) -> bool:
803 """Does this pattern exactly match a sequence of nodes?"""
804 for c, r in self.generate_matches(nodes):
806 if results is not None:
809 results[self.name] = list(nodes)
813 def generate_matches(self, nodes) -> Iterator[Tuple[int, _Results]]:
815 Generator yielding matches for a sequence of nodes.
818 nodes: sequence of nodes
821 (count, results) tuples where:
822 count: the match comprises nodes[:count];
823 results: dict containing named submatches.
825 if self.content is None:
826 # Shortcut for special case (see __init__.__doc__)
827 for count in range(self.min, 1 + min(len(nodes), self.max)):
830 r[self.name] = nodes[:count]
832 elif self.name == "bare_name":
833 yield self._bare_name_matches(nodes)
835 # The reason for this is that hitting the recursion limit usually
836 # results in some ugly messages about how RuntimeErrors are being
837 # ignored. We only have to do this on CPython, though, because other
838 # implementations don't have this nasty bug in the first place.
839 if hasattr(sys, "getrefcount"):
840 save_stderr = sys.stderr
841 sys.stderr = StringIO()
843 for count, r in self._recursive_matches(nodes, 0):
845 r[self.name] = nodes[:count]
848 # We fall back to the iterative pattern matching scheme if the recursive
849 # scheme hits the recursion limit.
850 for count, r in self._iterative_matches(nodes):
852 r[self.name] = nodes[:count]
855 if hasattr(sys, "getrefcount"):
856 sys.stderr = save_stderr
858 def _iterative_matches(self, nodes) -> Iterator[Tuple[int, _Results]]:
859 """Helper to iteratively yield the matches."""
865 # generate matches that use just one alt from self.content
866 for alt in self.content:
867 for c, r in generate_matches(alt, nodes):
869 results.append((c, r))
871 # for each match, iterate down the nodes
874 for c0, r0 in results:
875 # stop if the entire set of nodes has been matched
876 if c0 < nodelen and c0 <= self.max:
877 for alt in self.content:
878 for c1, r1 in generate_matches(alt, nodes[c0:]):
884 new_results.append((c0 + c1, r))
885 results = new_results
887 def _bare_name_matches(self, nodes) -> Tuple[int, _Results]:
888 """Special optimized matcher for bare_name."""
890 r = {} # type: _Results
893 while not done and count < max:
895 for leaf in self.content:
896 if leaf[0].match(nodes[count], r):
900 assert self.name is not None
901 r[self.name] = nodes[:count]
904 def _recursive_matches(self, nodes, count) -> Iterator[Tuple[int, _Results]]:
905 """Helper to recursively yield the matches."""
906 assert self.content is not None
907 if count >= self.min:
910 for alt in self.content:
911 for c0, r0 in generate_matches(alt, nodes):
912 for c1, r1 in self._recursive_matches(nodes[c0:], count + 1):
919 class NegatedPattern(BasePattern):
920 def __init__(self, content: Optional[Any] = None) -> None:
924 The argument is either a pattern or None. If it is None, this
925 only matches an empty sequence (effectively '$' in regex
926 lingo). If it is not None, this matches whenever the argument
927 pattern doesn't have any matches.
929 if content is not None:
930 assert isinstance(content, BasePattern), repr(content)
931 self.content = content
933 def match(self, node, results=None) -> bool:
934 # We never match a node in its entirety
937 def match_seq(self, nodes, results=None) -> bool:
938 # We only match an empty sequence of nodes in its entirety
939 return len(nodes) == 0
941 def generate_matches(self, nodes) -> Iterator[Tuple[int, _Results]]:
942 if self.content is None:
943 # Return a match if there is an empty sequence
947 # Return a match if the argument pattern has no matches
948 for c, r in self.content.generate_matches(nodes):
953 def generate_matches(
954 patterns: List[BasePattern], nodes: List[NL]
955 ) -> Iterator[Tuple[int, _Results]]:
957 Generator yielding matches for a sequence of patterns and nodes.
960 patterns: a sequence of patterns
961 nodes: a sequence of nodes
964 (count, results) tuples where:
965 count: the entire sequence of patterns matches nodes[:count];
966 results: dict containing named submatches.
971 p, rest = patterns[0], patterns[1:]
972 for c0, r0 in p.generate_matches(nodes):
976 for c1, r1 in generate_matches(rest, nodes[c0:]):
983 _Convert = Callable[[Grammar, RawNode], Any]