All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
1 # Copyright 2006 Google, Inc. All Rights Reserved.
2 # Licensed to PSF under a Contributor Agreement.
5 Python parse tree definitions.
7 This is a very concrete parse tree; we need to keep every token and
8 even the comments and whitespace between tokens.
10 There's also a pattern matching implementation here.
13 # mypy: allow-untyped-defs, allow-incomplete-defs
28 from blib2to3.pgen2.grammar import Grammar
30 __author__ = "Guido van Rossum <guido@python.org>"
33 from io import StringIO
35 HUGE: int = 0x7FFFFFFF # maximum repeat count, default max
37 _type_reprs: Dict[int, Union[Text, int]] = {}
40 def type_repr(type_num: int) -> Union[Text, int]:
43 from .pygram import python_symbols
45 # printing tokens is possible but not as useful
46 # from .pgen2 import token // token.__dict__.items():
47 for name in dir(python_symbols):
48 val = getattr(python_symbols, name)
50 _type_reprs[val] = name
51 return _type_reprs.setdefault(type_num, type_num)
54 _P = TypeVar("_P", bound="Base")
56 NL = Union["Node", "Leaf"]
57 Context = Tuple[Text, Tuple[int, int]]
58 RawNode = Tuple[int, Optional[Text], Optional[Context], Optional[List[NL]]]
64 Abstract base class for Node and Leaf.
66 This provides some default functionality and boilerplate using the
69 A node may be a subnode of at most one parent.
72 # Default values for instance variables
73 type: int # int: token number (< 256) or symbol number (>= 256)
74 parent: Optional["Node"] = None # Parent node pointer, or None
75 children: List[NL] # List of subnodes
76 was_changed: bool = False
77 was_checked: bool = False
79 def __new__(cls, *args, **kwds):
80 """Constructor that prevents Base from being instantiated."""
81 assert cls is not Base, "Cannot instantiate Base"
82 return object.__new__(cls)
84 def __eq__(self, other: Any) -> bool:
86 Compare two nodes for equality.
88 This calls the method _eq().
90 if self.__class__ is not other.__class__:
92 return self._eq(other)
95 def prefix(self) -> Text:
96 raise NotImplementedError
98 def _eq(self: _P, other: _P) -> bool:
100 Compare two nodes for equality.
102 This is called by __eq__ and __ne__. It is only called if the two nodes
103 have the same type. This must be implemented by the concrete subclass.
104 Nodes should be considered equal if they have the same structure,
105 ignoring the prefix string and other context information.
107 raise NotImplementedError
109 def __deepcopy__(self: _P, memo: Any) -> _P:
112 def clone(self: _P) -> _P:
114 Return a cloned (deep) copy of self.
116 This must be implemented by the concrete subclass.
118 raise NotImplementedError
120 def post_order(self) -> Iterator[NL]:
122 Return a post-order iterator for the tree.
124 This must be implemented by the concrete subclass.
126 raise NotImplementedError
128 def pre_order(self) -> Iterator[NL]:
130 Return a pre-order iterator for the tree.
132 This must be implemented by the concrete subclass.
134 raise NotImplementedError
136 def replace(self, new: Union[NL, List[NL]]) -> None:
137 """Replace this node with a new one in the parent."""
138 assert self.parent is not None, str(self)
139 assert new is not None
140 if not isinstance(new, list):
144 for ch in self.parent.children:
146 assert not found, (self.parent.children, self, new)
148 l_children.extend(new)
151 l_children.append(ch)
152 assert found, (self.children, self, new)
153 self.parent.children = l_children
154 self.parent.changed()
155 self.parent.invalidate_sibling_maps()
157 x.parent = self.parent
160 def get_lineno(self) -> Optional[int]:
161 """Return the line number which generated the invocant node."""
163 while not isinstance(node, Leaf):
164 if not node.children:
166 node = node.children[0]
169 def changed(self) -> None:
173 self.parent.changed()
174 self.was_changed = True
176 def remove(self) -> Optional[int]:
178 Remove the node from the tree. Returns the position of the node in its
179 parent's children before it was removed.
182 for i, node in enumerate(self.parent.children):
184 del self.parent.children[i]
185 self.parent.changed()
186 self.parent.invalidate_sibling_maps()
192 def next_sibling(self) -> Optional[NL]:
194 The node immediately following the invocant in their parent's children
195 list. If the invocant does not have a next sibling, it is None
197 if self.parent is None:
200 if self.parent.next_sibling_map is None:
201 self.parent.update_sibling_maps()
202 assert self.parent.next_sibling_map is not None
203 return self.parent.next_sibling_map[id(self)]
206 def prev_sibling(self) -> Optional[NL]:
208 The node immediately preceding the invocant in their parent's children
209 list. If the invocant does not have a previous sibling, it is None.
211 if self.parent is None:
214 if self.parent.prev_sibling_map is None:
215 self.parent.update_sibling_maps()
216 assert self.parent.prev_sibling_map is not None
217 return self.parent.prev_sibling_map[id(self)]
219 def leaves(self) -> Iterator["Leaf"]:
220 for child in self.children:
221 yield from child.leaves()
223 def depth(self) -> int:
224 if self.parent is None:
226 return 1 + self.parent.depth()
228 def get_suffix(self) -> Text:
230 Return the string immediately following the invocant node. This is
231 effectively equivalent to node.next_sibling.prefix
233 next_sib = self.next_sibling
236 prefix = next_sib.prefix
242 """Concrete implementation for interior nodes."""
244 fixers_applied: Optional[List[Any]]
245 used_names: Optional[Set[Text]]
251 context: Optional[Any] = None,
252 prefix: Optional[Text] = None,
253 fixers_applied: Optional[List[Any]] = None,
258 Takes a type constant (a symbol number >= 256), a sequence of
259 child nodes, and an optional context keyword argument.
261 As a side effect, the parent pointers of the children are updated.
263 assert type >= 256, type
265 self.children = list(children)
266 for ch in self.children:
267 assert ch.parent is None, repr(ch)
269 self.invalidate_sibling_maps()
270 if prefix is not None:
273 self.fixers_applied = fixers_applied[:]
275 self.fixers_applied = None
277 def __repr__(self) -> Text:
278 """Return a canonical string representation."""
279 assert self.type is not None
280 return "%s(%s, %r)" % (
281 self.__class__.__name__,
282 type_repr(self.type),
286 def __str__(self) -> Text:
288 Return a pretty string representation.
290 This reproduces the input source exactly.
292 return "".join(map(str, self.children))
294 def _eq(self, other: Base) -> bool:
295 """Compare two nodes for equality."""
296 return (self.type, self.children) == (other.type, other.children)
298 def clone(self) -> "Node":
299 assert self.type is not None
300 """Return a cloned (deep) copy of self."""
303 [ch.clone() for ch in self.children],
304 fixers_applied=self.fixers_applied,
307 def post_order(self) -> Iterator[NL]:
308 """Return a post-order iterator for the tree."""
309 for child in self.children:
310 yield from child.post_order()
313 def pre_order(self) -> Iterator[NL]:
314 """Return a pre-order iterator for the tree."""
316 for child in self.children:
317 yield from child.pre_order()
320 def prefix(self) -> Text:
322 The whitespace and comments preceding this node in the input.
324 if not self.children:
326 return self.children[0].prefix
329 def prefix(self, prefix: Text) -> None:
331 self.children[0].prefix = prefix
333 def set_child(self, i: int, child: NL) -> None:
335 Equivalent to 'node.children[i] = child'. This method also sets the
336 child's parent attribute appropriately.
339 self.children[i].parent = None
340 self.children[i] = child
342 self.invalidate_sibling_maps()
344 def insert_child(self, i: int, child: NL) -> None:
346 Equivalent to 'node.children.insert(i, child)'. This method also sets
347 the child's parent attribute appropriately.
350 self.children.insert(i, child)
352 self.invalidate_sibling_maps()
354 def append_child(self, child: NL) -> None:
356 Equivalent to 'node.children.append(child)'. This method also sets the
357 child's parent attribute appropriately.
360 self.children.append(child)
362 self.invalidate_sibling_maps()
364 def invalidate_sibling_maps(self) -> None:
365 self.prev_sibling_map: Optional[Dict[int, Optional[NL]]] = None
366 self.next_sibling_map: Optional[Dict[int, Optional[NL]]] = None
368 def update_sibling_maps(self) -> None:
369 _prev: Dict[int, Optional[NL]] = {}
370 _next: Dict[int, Optional[NL]] = {}
371 self.prev_sibling_map = _prev
372 self.next_sibling_map = _next
373 previous: Optional[NL] = None
374 for current in self.children:
375 _prev[id(current)] = previous
376 _next[id(previous)] = current
378 _next[id(current)] = None
383 """Concrete implementation for leaf nodes."""
385 # Default values for instance variables
387 fixers_applied: List[Any]
389 # Changed later in brackets.py
390 opening_bracket: Optional["Leaf"] = None
391 used_names: Optional[Set[Text]]
392 _prefix = "" # Whitespace and comments preceding this token in the input
393 lineno: int = 0 # Line where this token starts in the input
394 column: int = 0 # Column where this token starts in the input
395 # If not None, this Leaf is created by converting a block of fmt off/skip
396 # code, and `fmt_pass_converted_first_leaf` points to the first Leaf in the
398 fmt_pass_converted_first_leaf: Optional["Leaf"] = None
404 context: Optional[Context] = None,
405 prefix: Optional[Text] = None,
406 fixers_applied: List[Any] = [],
407 opening_bracket: Optional["Leaf"] = None,
408 fmt_pass_converted_first_leaf: Optional["Leaf"] = None,
413 Takes a type constant (a token number < 256), a string value, and an
414 optional context keyword argument.
417 assert 0 <= type < 256, type
418 if context is not None:
419 self._prefix, (self.lineno, self.column) = context
422 if prefix is not None:
423 self._prefix = prefix
424 self.fixers_applied: Optional[List[Any]] = fixers_applied[:]
426 self.opening_bracket = opening_bracket
427 self.fmt_pass_converted_first_leaf = fmt_pass_converted_first_leaf
429 def __repr__(self) -> str:
430 """Return a canonical string representation."""
431 from .pgen2.token import tok_name
433 assert self.type is not None
434 return "%s(%s, %r)" % (
435 self.__class__.__name__,
436 tok_name.get(self.type, self.type),
440 def __str__(self) -> Text:
442 Return a pretty string representation.
444 This reproduces the input source exactly.
446 return self._prefix + str(self.value)
448 def _eq(self, other: "Leaf") -> bool:
449 """Compare two nodes for equality."""
450 return (self.type, self.value) == (other.type, other.value)
452 def clone(self) -> "Leaf":
453 assert self.type is not None
454 """Return a cloned (deep) copy of self."""
458 (self.prefix, (self.lineno, self.column)),
459 fixers_applied=self.fixers_applied,
462 def leaves(self) -> Iterator["Leaf"]:
465 def post_order(self) -> Iterator["Leaf"]:
466 """Return a post-order iterator for the tree."""
469 def pre_order(self) -> Iterator["Leaf"]:
470 """Return a pre-order iterator for the tree."""
474 def prefix(self) -> Text:
476 The whitespace and comments preceding this token in the input.
481 def prefix(self, prefix: Text) -> None:
483 self._prefix = prefix
486 def convert(gr: Grammar, raw_node: RawNode) -> NL:
488 Convert raw node information to a Node or Leaf instance.
490 This is passed to the parser driver which calls it whenever a reduction of a
491 grammar rule produces a new complete node, so that the tree is build
494 type, value, context, children = raw_node
495 if children or type in gr.number2symbol:
496 # If there's exactly one child, return that child instead of
497 # creating a new node.
498 assert children is not None
499 if len(children) == 1:
501 return Node(type, children, context=context)
503 return Leaf(type, value or "", context=context)
506 _Results = Dict[Text, NL]
509 class BasePattern(object):
512 A pattern is a tree matching pattern.
514 It looks for a specific node type (token or symbol), and
515 optionally for a specific content.
517 This is an abstract base class. There are three concrete
520 - LeafPattern matches a single leaf node;
521 - NodePattern matches a single node (usually non-leaf);
522 - WildcardPattern matches a sequence of nodes of variable length.
525 # Defaults for instance variables
527 type = None # Node type (token if < 256, symbol if >= 256)
528 content: Any = None # Optional content matching pattern
529 name: Optional[Text] = None # Optional name used to store match in results dict
531 def __new__(cls, *args, **kwds):
532 """Constructor that prevents BasePattern from being instantiated."""
533 assert cls is not BasePattern, "Cannot instantiate BasePattern"
534 return object.__new__(cls)
536 def __repr__(self) -> Text:
537 assert self.type is not None
538 args = [type_repr(self.type), self.content, self.name]
539 while args and args[-1] is None:
541 return "%s(%s)" % (self.__class__.__name__, ", ".join(map(repr, args)))
543 def _submatch(self, node, results=None) -> bool:
544 raise NotImplementedError
546 def optimize(self) -> "BasePattern":
548 A subclass can define this as a hook for optimizations.
550 Returns either self or another node with the same effect.
554 def match(self, node: NL, results: Optional[_Results] = None) -> bool:
556 Does this pattern exactly match a node?
558 Returns True if it matches, False if not.
560 If results is not None, it must be a dict which will be
561 updated with the nodes matching named subpatterns.
563 Default implementation for non-wildcard patterns.
565 if self.type is not None and node.type != self.type:
567 if self.content is not None:
568 r: Optional[_Results] = None
569 if results is not None:
571 if not self._submatch(node, r):
574 assert results is not None
576 if results is not None and self.name:
577 results[self.name] = node
580 def match_seq(self, nodes: List[NL], results: Optional[_Results] = None) -> bool:
582 Does this pattern exactly match a sequence of nodes?
584 Default implementation for non-wildcard patterns.
588 return self.match(nodes[0], results)
590 def generate_matches(self, nodes: List[NL]) -> Iterator[Tuple[int, _Results]]:
592 Generator yielding all matches for this pattern.
594 Default implementation for non-wildcard patterns.
597 if nodes and self.match(nodes[0], r):
601 class LeafPattern(BasePattern):
604 type: Optional[int] = None,
605 content: Optional[Text] = None,
606 name: Optional[Text] = None,
609 Initializer. Takes optional type, content, and name.
611 The type, if given must be a token type (< 256). If not given,
612 this matches any *leaf* node; the content may still be required.
614 The content, if given, must be a string.
616 If a name is given, the matching node is stored in the results
620 assert 0 <= type < 256, type
621 if content is not None:
622 assert isinstance(content, str), repr(content)
624 self.content = content
627 def match(self, node: NL, results=None) -> bool:
628 """Override match() to insist on a leaf node."""
629 if not isinstance(node, Leaf):
631 return BasePattern.match(self, node, results)
633 def _submatch(self, node, results=None):
635 Match the pattern's content to the node's children.
637 This assumes the node type matches and self.content is not None.
639 Returns True if it matches, False if not.
641 If results is not None, it must be a dict which will be
642 updated with the nodes matching named subpatterns.
644 When returning False, the results dict may still be updated.
646 return self.content == node.value
649 class NodePattern(BasePattern):
651 wildcards: bool = False
655 type: Optional[int] = None,
656 content: Optional[Iterable[Text]] = None,
657 name: Optional[Text] = None,
660 Initializer. Takes optional type, content, and name.
662 The type, if given, must be a symbol type (>= 256). If the
663 type is None this matches *any* single node (leaf or not),
664 except if content is not None, in which it only matches
665 non-leaf nodes that also match the content pattern.
667 The content, if not None, must be a sequence of Patterns that
668 must match the node's children exactly. If the content is
669 given, the type must not be None.
671 If a name is given, the matching node is stored in the results
675 assert type >= 256, type
676 if content is not None:
677 assert not isinstance(content, str), repr(content)
678 newcontent = list(content)
679 for i, item in enumerate(newcontent):
680 assert isinstance(item, BasePattern), (i, item)
681 # I don't even think this code is used anywhere, but it does cause
682 # unreachable errors from mypy. This function's signature does look
683 # odd though *shrug*.
684 if isinstance(item, WildcardPattern): # type: ignore[unreachable]
685 self.wildcards = True # type: ignore[unreachable]
687 self.content = newcontent # TODO: this is unbound when content is None
690 def _submatch(self, node, results=None) -> bool:
692 Match the pattern's content to the node's children.
694 This assumes the node type matches and self.content is not None.
696 Returns True if it matches, False if not.
698 If results is not None, it must be a dict which will be
699 updated with the nodes matching named subpatterns.
701 When returning False, the results dict may still be updated.
704 for c, r in generate_matches(self.content, node.children):
705 if c == len(node.children):
706 if results is not None:
710 if len(self.content) != len(node.children):
712 for subpattern, child in zip(self.content, node.children):
713 if not subpattern.match(child, results):
718 class WildcardPattern(BasePattern):
721 A wildcard pattern can match zero or more nodes.
723 This has all the flexibility needed to implement patterns like:
727 (...)* (...)+ (...)? (...){m,n}
729 except it always uses non-greedy matching.
737 content: Optional[Text] = None,
740 name: Optional[Text] = None,
746 content: optional sequence of subsequences of patterns;
747 if absent, matches one node;
748 if present, each subsequence is an alternative [*]
749 min: optional minimum number of times to match, default 0
750 max: optional maximum number of times to match, default HUGE
751 name: optional name assigned to this match
753 [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is
754 equivalent to (a b c | d e | f g h); if content is None,
755 this is equivalent to '.' in regular expression terms.
756 The min and max parameters work as follows:
757 min=0, max=maxint: .*
758 min=1, max=maxint: .+
761 If content is not None, replace the dot with the parenthesized
762 list of alternatives, e.g. (a b c | d e | f g h)*
764 assert 0 <= min <= max <= HUGE, (min, max)
765 if content is not None:
766 f = lambda s: tuple(s)
767 wrapped_content = tuple(map(f, content)) # Protect against alterations
768 # Check sanity of alternatives
769 assert len(wrapped_content), repr(
771 ) # Can't have zero alternatives
772 for alt in wrapped_content:
773 assert len(alt), repr(alt) # Can have empty alternatives
774 self.content = wrapped_content
779 def optimize(self) -> Any:
780 """Optimize certain stacked wildcard patterns."""
783 self.content is not None
784 and len(self.content) == 1
785 and len(self.content[0]) == 1
787 subpattern = self.content[0][0]
788 if self.min == 1 and self.max == 1:
789 if self.content is None:
790 return NodePattern(name=self.name)
791 if subpattern is not None and self.name == subpattern.name:
792 return subpattern.optimize()
795 and isinstance(subpattern, WildcardPattern)
796 and subpattern.min <= 1
797 and self.name == subpattern.name
799 return WildcardPattern(
801 self.min * subpattern.min,
802 self.max * subpattern.max,
807 def match(self, node, results=None) -> bool:
808 """Does this pattern exactly match a node?"""
809 return self.match_seq([node], results)
811 def match_seq(self, nodes, results=None) -> bool:
812 """Does this pattern exactly match a sequence of nodes?"""
813 for c, r in self.generate_matches(nodes):
815 if results is not None:
818 results[self.name] = list(nodes)
822 def generate_matches(self, nodes) -> Iterator[Tuple[int, _Results]]:
824 Generator yielding matches for a sequence of nodes.
827 nodes: sequence of nodes
830 (count, results) tuples where:
831 count: the match comprises nodes[:count];
832 results: dict containing named submatches.
834 if self.content is None:
835 # Shortcut for special case (see __init__.__doc__)
836 for count in range(self.min, 1 + min(len(nodes), self.max)):
839 r[self.name] = nodes[:count]
841 elif self.name == "bare_name":
842 yield self._bare_name_matches(nodes)
844 # The reason for this is that hitting the recursion limit usually
845 # results in some ugly messages about how RuntimeErrors are being
846 # ignored. We only have to do this on CPython, though, because other
847 # implementations don't have this nasty bug in the first place.
848 if hasattr(sys, "getrefcount"):
849 save_stderr = sys.stderr
850 sys.stderr = StringIO()
852 for count, r in self._recursive_matches(nodes, 0):
854 r[self.name] = nodes[:count]
857 # We fall back to the iterative pattern matching scheme if the recursive
858 # scheme hits the recursion limit.
859 for count, r in self._iterative_matches(nodes):
861 r[self.name] = nodes[:count]
864 if hasattr(sys, "getrefcount"):
865 sys.stderr = save_stderr
867 def _iterative_matches(self, nodes) -> Iterator[Tuple[int, _Results]]:
868 """Helper to iteratively yield the matches."""
874 # generate matches that use just one alt from self.content
875 for alt in self.content:
876 for c, r in generate_matches(alt, nodes):
878 results.append((c, r))
880 # for each match, iterate down the nodes
883 for c0, r0 in results:
884 # stop if the entire set of nodes has been matched
885 if c0 < nodelen and c0 <= self.max:
886 for alt in self.content:
887 for c1, r1 in generate_matches(alt, nodes[c0:]):
893 new_results.append((c0 + c1, r))
894 results = new_results
896 def _bare_name_matches(self, nodes) -> Tuple[int, _Results]:
897 """Special optimized matcher for bare_name."""
899 r = {} # type: _Results
902 while not done and count < max:
904 for leaf in self.content:
905 if leaf[0].match(nodes[count], r):
909 assert self.name is not None
910 r[self.name] = nodes[:count]
913 def _recursive_matches(self, nodes, count) -> Iterator[Tuple[int, _Results]]:
914 """Helper to recursively yield the matches."""
915 assert self.content is not None
916 if count >= self.min:
919 for alt in self.content:
920 for c0, r0 in generate_matches(alt, nodes):
921 for c1, r1 in self._recursive_matches(nodes[c0:], count + 1):
928 class NegatedPattern(BasePattern):
929 def __init__(self, content: Optional[BasePattern] = None) -> None:
933 The argument is either a pattern or None. If it is None, this
934 only matches an empty sequence (effectively '$' in regex
935 lingo). If it is not None, this matches whenever the argument
936 pattern doesn't have any matches.
938 if content is not None:
939 assert isinstance(content, BasePattern), repr(content)
940 self.content = content
942 def match(self, node, results=None) -> bool:
943 # We never match a node in its entirety
946 def match_seq(self, nodes, results=None) -> bool:
947 # We only match an empty sequence of nodes in its entirety
948 return len(nodes) == 0
950 def generate_matches(self, nodes: List[NL]) -> Iterator[Tuple[int, _Results]]:
951 if self.content is None:
952 # Return a match if there is an empty sequence
956 # Return a match if the argument pattern has no matches
957 for c, r in self.content.generate_matches(nodes):
962 def generate_matches(
963 patterns: List[BasePattern], nodes: List[NL]
964 ) -> Iterator[Tuple[int, _Results]]:
966 Generator yielding matches for a sequence of patterns and nodes.
969 patterns: a sequence of patterns
970 nodes: a sequence of nodes
973 (count, results) tuples where:
974 count: the entire sequence of patterns matches nodes[:count];
975 results: dict containing named submatches.
980 p, rest = patterns[0], patterns[1:]
981 for c0, r0 in p.generate_matches(nodes):
985 for c1, r1 in generate_matches(rest, nodes[c0:]):