All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
1 # Copyright 2006 Google, Inc. All Rights Reserved.
2 # Licensed to PSF under a Contributor Agreement.
5 Python parse tree definitions.
7 This is a very concrete parse tree; we need to keep every token and
8 even the comments and whitespace between tokens.
10 There's also a pattern matching implementation here.
13 # mypy: allow-untyped-defs
28 from blib2to3.pgen2.grammar import Grammar
30 __author__ = "Guido van Rossum <guido@python.org>"
33 from io import StringIO
35 HUGE: int = 0x7FFFFFFF # maximum repeat count, default max
37 _type_reprs: Dict[int, Union[Text, int]] = {}
40 def type_repr(type_num: int) -> Union[Text, int]:
43 from .pygram import python_symbols
45 # printing tokens is possible but not as useful
46 # from .pgen2 import token // token.__dict__.items():
47 for name in dir(python_symbols):
48 val = getattr(python_symbols, name)
50 _type_reprs[val] = name
51 return _type_reprs.setdefault(type_num, type_num)
54 _P = TypeVar("_P", bound="Base")
56 NL = Union["Node", "Leaf"]
57 Context = Tuple[Text, Tuple[int, int]]
58 RawNode = Tuple[int, Optional[Text], Optional[Context], Optional[List[NL]]]
64 Abstract base class for Node and Leaf.
66 This provides some default functionality and boilerplate using the
69 A node may be a subnode of at most one parent.
72 # Default values for instance variables
73 type: int # int: token number (< 256) or symbol number (>= 256)
74 parent: Optional["Node"] = None # Parent node pointer, or None
75 children: List[NL] # List of subnodes
76 was_changed: bool = False
77 was_checked: bool = False
79 def __new__(cls, *args, **kwds):
80 """Constructor that prevents Base from being instantiated."""
81 assert cls is not Base, "Cannot instantiate Base"
82 return object.__new__(cls)
84 def __eq__(self, other: Any) -> bool:
86 Compare two nodes for equality.
88 This calls the method _eq().
90 if self.__class__ is not other.__class__:
92 return self._eq(other)
95 def prefix(self) -> Text:
96 raise NotImplementedError
98 def _eq(self: _P, other: _P) -> bool:
100 Compare two nodes for equality.
102 This is called by __eq__ and __ne__. It is only called if the two nodes
103 have the same type. This must be implemented by the concrete subclass.
104 Nodes should be considered equal if they have the same structure,
105 ignoring the prefix string and other context information.
107 raise NotImplementedError
109 def __deepcopy__(self: _P, memo: Any) -> _P:
112 def clone(self: _P) -> _P:
114 Return a cloned (deep) copy of self.
116 This must be implemented by the concrete subclass.
118 raise NotImplementedError
120 def post_order(self) -> Iterator[NL]:
122 Return a post-order iterator for the tree.
124 This must be implemented by the concrete subclass.
126 raise NotImplementedError
128 def pre_order(self) -> Iterator[NL]:
130 Return a pre-order iterator for the tree.
132 This must be implemented by the concrete subclass.
134 raise NotImplementedError
136 def replace(self, new: Union[NL, List[NL]]) -> None:
137 """Replace this node with a new one in the parent."""
138 assert self.parent is not None, str(self)
139 assert new is not None
140 if not isinstance(new, list):
144 for ch in self.parent.children:
146 assert not found, (self.parent.children, self, new)
148 l_children.extend(new)
151 l_children.append(ch)
152 assert found, (self.children, self, new)
153 self.parent.children = l_children
154 self.parent.changed()
155 self.parent.invalidate_sibling_maps()
157 x.parent = self.parent
160 def get_lineno(self) -> Optional[int]:
161 """Return the line number which generated the invocant node."""
163 while not isinstance(node, Leaf):
164 if not node.children:
166 node = node.children[0]
169 def changed(self) -> None:
173 self.parent.changed()
174 self.was_changed = True
176 def remove(self) -> Optional[int]:
178 Remove the node from the tree. Returns the position of the node in its
179 parent's children before it was removed.
182 for i, node in enumerate(self.parent.children):
184 del self.parent.children[i]
185 self.parent.changed()
186 self.parent.invalidate_sibling_maps()
192 def next_sibling(self) -> Optional[NL]:
194 The node immediately following the invocant in their parent's children
195 list. If the invocant does not have a next sibling, it is None
197 if self.parent is None:
200 if self.parent.next_sibling_map is None:
201 self.parent.update_sibling_maps()
202 assert self.parent.next_sibling_map is not None
203 return self.parent.next_sibling_map[id(self)]
206 def prev_sibling(self) -> Optional[NL]:
208 The node immediately preceding the invocant in their parent's children
209 list. If the invocant does not have a previous sibling, it is None.
211 if self.parent is None:
214 if self.parent.prev_sibling_map is None:
215 self.parent.update_sibling_maps()
216 assert self.parent.prev_sibling_map is not None
217 return self.parent.prev_sibling_map[id(self)]
219 def leaves(self) -> Iterator["Leaf"]:
220 for child in self.children:
221 yield from child.leaves()
223 def depth(self) -> int:
224 if self.parent is None:
226 return 1 + self.parent.depth()
228 def get_suffix(self) -> Text:
230 Return the string immediately following the invocant node. This is
231 effectively equivalent to node.next_sibling.prefix
233 next_sib = self.next_sibling
236 prefix = next_sib.prefix
242 """Concrete implementation for interior nodes."""
244 fixers_applied: Optional[List[Any]]
245 used_names: Optional[Set[Text]]
251 context: Optional[Any] = None,
252 prefix: Optional[Text] = None,
253 fixers_applied: Optional[List[Any]] = None,
258 Takes a type constant (a symbol number >= 256), a sequence of
259 child nodes, and an optional context keyword argument.
261 As a side effect, the parent pointers of the children are updated.
263 assert type >= 256, type
265 self.children = list(children)
266 for ch in self.children:
267 assert ch.parent is None, repr(ch)
269 self.invalidate_sibling_maps()
270 if prefix is not None:
273 self.fixers_applied = fixers_applied[:]
275 self.fixers_applied = None
277 def __repr__(self) -> Text:
278 """Return a canonical string representation."""
279 assert self.type is not None
280 return "%s(%s, %r)" % (
281 self.__class__.__name__,
282 type_repr(self.type),
286 def __str__(self) -> Text:
288 Return a pretty string representation.
290 This reproduces the input source exactly.
292 return "".join(map(str, self.children))
294 def _eq(self, other) -> bool:
295 """Compare two nodes for equality."""
296 return (self.type, self.children) == (other.type, other.children)
298 def clone(self) -> "Node":
299 assert self.type is not None
300 """Return a cloned (deep) copy of self."""
303 [ch.clone() for ch in self.children],
304 fixers_applied=self.fixers_applied,
307 def post_order(self) -> Iterator[NL]:
308 """Return a post-order iterator for the tree."""
309 for child in self.children:
310 yield from child.post_order()
313 def pre_order(self) -> Iterator[NL]:
314 """Return a pre-order iterator for the tree."""
316 for child in self.children:
317 yield from child.pre_order()
320 def prefix(self) -> Text:
322 The whitespace and comments preceding this node in the input.
324 if not self.children:
326 return self.children[0].prefix
329 def prefix(self, prefix) -> None:
331 self.children[0].prefix = prefix
333 def set_child(self, i: int, child: NL) -> None:
335 Equivalent to 'node.children[i] = child'. This method also sets the
336 child's parent attribute appropriately.
339 self.children[i].parent = None
340 self.children[i] = child
342 self.invalidate_sibling_maps()
344 def insert_child(self, i: int, child: NL) -> None:
346 Equivalent to 'node.children.insert(i, child)'. This method also sets
347 the child's parent attribute appropriately.
350 self.children.insert(i, child)
352 self.invalidate_sibling_maps()
354 def append_child(self, child: NL) -> None:
356 Equivalent to 'node.children.append(child)'. This method also sets the
357 child's parent attribute appropriately.
360 self.children.append(child)
362 self.invalidate_sibling_maps()
364 def invalidate_sibling_maps(self) -> None:
365 self.prev_sibling_map: Optional[Dict[int, Optional[NL]]] = None
366 self.next_sibling_map: Optional[Dict[int, Optional[NL]]] = None
368 def update_sibling_maps(self) -> None:
369 _prev: Dict[int, Optional[NL]] = {}
370 _next: Dict[int, Optional[NL]] = {}
371 self.prev_sibling_map = _prev
372 self.next_sibling_map = _next
373 previous: Optional[NL] = None
374 for current in self.children:
375 _prev[id(current)] = previous
376 _next[id(previous)] = current
378 _next[id(current)] = None
383 """Concrete implementation for leaf nodes."""
385 # Default values for instance variables
387 fixers_applied: List[Any]
389 # Changed later in brackets.py
390 opening_bracket: Optional["Leaf"] = None
391 used_names: Optional[Set[Text]]
392 _prefix = "" # Whitespace and comments preceding this token in the input
393 lineno: int = 0 # Line where this token starts in the input
394 column: int = 0 # Column where this token starts in the input
400 context: Optional[Context] = None,
401 prefix: Optional[Text] = None,
402 fixers_applied: List[Any] = [],
403 opening_bracket: Optional["Leaf"] = None,
408 Takes a type constant (a token number < 256), a string value, and an
409 optional context keyword argument.
412 assert 0 <= type < 256, type
413 if context is not None:
414 self._prefix, (self.lineno, self.column) = context
417 if prefix is not None:
418 self._prefix = prefix
419 self.fixers_applied: Optional[List[Any]] = fixers_applied[:]
421 self.opening_bracket = opening_bracket
423 def __repr__(self) -> str:
424 """Return a canonical string representation."""
425 from .pgen2.token import tok_name
427 assert self.type is not None
428 return "%s(%s, %r)" % (
429 self.__class__.__name__,
430 tok_name.get(self.type, self.type),
434 def __str__(self) -> Text:
436 Return a pretty string representation.
438 This reproduces the input source exactly.
440 return self._prefix + str(self.value)
442 def _eq(self, other) -> bool:
443 """Compare two nodes for equality."""
444 return (self.type, self.value) == (other.type, other.value)
446 def clone(self) -> "Leaf":
447 assert self.type is not None
448 """Return a cloned (deep) copy of self."""
452 (self.prefix, (self.lineno, self.column)),
453 fixers_applied=self.fixers_applied,
456 def leaves(self) -> Iterator["Leaf"]:
459 def post_order(self) -> Iterator["Leaf"]:
460 """Return a post-order iterator for the tree."""
463 def pre_order(self) -> Iterator["Leaf"]:
464 """Return a pre-order iterator for the tree."""
468 def prefix(self) -> Text:
470 The whitespace and comments preceding this token in the input.
475 def prefix(self, prefix) -> None:
477 self._prefix = prefix
480 def convert(gr: Grammar, raw_node: RawNode) -> NL:
482 Convert raw node information to a Node or Leaf instance.
484 This is passed to the parser driver which calls it whenever a reduction of a
485 grammar rule produces a new complete node, so that the tree is build
488 type, value, context, children = raw_node
489 if children or type in gr.number2symbol:
490 # If there's exactly one child, return that child instead of
491 # creating a new node.
492 assert children is not None
493 if len(children) == 1:
495 return Node(type, children, context=context)
497 return Leaf(type, value or "", context=context)
500 _Results = Dict[Text, NL]
503 class BasePattern(object):
506 A pattern is a tree matching pattern.
508 It looks for a specific node type (token or symbol), and
509 optionally for a specific content.
511 This is an abstract base class. There are three concrete
514 - LeafPattern matches a single leaf node;
515 - NodePattern matches a single node (usually non-leaf);
516 - WildcardPattern matches a sequence of nodes of variable length.
519 # Defaults for instance variables
521 type = None # Node type (token if < 256, symbol if >= 256)
522 content: Any = None # Optional content matching pattern
523 name: Optional[Text] = None # Optional name used to store match in results dict
525 def __new__(cls, *args, **kwds):
526 """Constructor that prevents BasePattern from being instantiated."""
527 assert cls is not BasePattern, "Cannot instantiate BasePattern"
528 return object.__new__(cls)
530 def __repr__(self) -> Text:
531 assert self.type is not None
532 args = [type_repr(self.type), self.content, self.name]
533 while args and args[-1] is None:
535 return "%s(%s)" % (self.__class__.__name__, ", ".join(map(repr, args)))
537 def _submatch(self, node, results=None) -> bool:
538 raise NotImplementedError
540 def optimize(self) -> "BasePattern":
542 A subclass can define this as a hook for optimizations.
544 Returns either self or another node with the same effect.
548 def match(self, node: NL, results: Optional[_Results] = None) -> bool:
550 Does this pattern exactly match a node?
552 Returns True if it matches, False if not.
554 If results is not None, it must be a dict which will be
555 updated with the nodes matching named subpatterns.
557 Default implementation for non-wildcard patterns.
559 if self.type is not None and node.type != self.type:
561 if self.content is not None:
562 r: Optional[_Results] = None
563 if results is not None:
565 if not self._submatch(node, r):
568 assert results is not None
570 if results is not None and self.name:
571 results[self.name] = node
574 def match_seq(self, nodes: List[NL], results: Optional[_Results] = None) -> bool:
576 Does this pattern exactly match a sequence of nodes?
578 Default implementation for non-wildcard patterns.
582 return self.match(nodes[0], results)
584 def generate_matches(self, nodes: List[NL]) -> Iterator[Tuple[int, _Results]]:
586 Generator yielding all matches for this pattern.
588 Default implementation for non-wildcard patterns.
591 if nodes and self.match(nodes[0], r):
595 class LeafPattern(BasePattern):
598 type: Optional[int] = None,
599 content: Optional[Text] = None,
600 name: Optional[Text] = None,
603 Initializer. Takes optional type, content, and name.
605 The type, if given must be a token type (< 256). If not given,
606 this matches any *leaf* node; the content may still be required.
608 The content, if given, must be a string.
610 If a name is given, the matching node is stored in the results
614 assert 0 <= type < 256, type
615 if content is not None:
616 assert isinstance(content, str), repr(content)
618 self.content = content
621 def match(self, node: NL, results=None):
622 """Override match() to insist on a leaf node."""
623 if not isinstance(node, Leaf):
625 return BasePattern.match(self, node, results)
627 def _submatch(self, node, results=None):
629 Match the pattern's content to the node's children.
631 This assumes the node type matches and self.content is not None.
633 Returns True if it matches, False if not.
635 If results is not None, it must be a dict which will be
636 updated with the nodes matching named subpatterns.
638 When returning False, the results dict may still be updated.
640 return self.content == node.value
643 class NodePattern(BasePattern):
645 wildcards: bool = False
649 type: Optional[int] = None,
650 content: Optional[Iterable[Text]] = None,
651 name: Optional[Text] = None,
654 Initializer. Takes optional type, content, and name.
656 The type, if given, must be a symbol type (>= 256). If the
657 type is None this matches *any* single node (leaf or not),
658 except if content is not None, in which it only matches
659 non-leaf nodes that also match the content pattern.
661 The content, if not None, must be a sequence of Patterns that
662 must match the node's children exactly. If the content is
663 given, the type must not be None.
665 If a name is given, the matching node is stored in the results
669 assert type >= 256, type
670 if content is not None:
671 assert not isinstance(content, str), repr(content)
672 newcontent = list(content)
673 for i, item in enumerate(newcontent):
674 assert isinstance(item, BasePattern), (i, item)
675 # I don't even think this code is used anywhere, but it does cause
676 # unreachable errors from mypy. This function's signature does look
677 # odd though *shrug*.
678 if isinstance(item, WildcardPattern): # type: ignore[unreachable]
679 self.wildcards = True # type: ignore[unreachable]
681 self.content = newcontent
684 def _submatch(self, node, results=None) -> bool:
686 Match the pattern's content to the node's children.
688 This assumes the node type matches and self.content is not None.
690 Returns True if it matches, False if not.
692 If results is not None, it must be a dict which will be
693 updated with the nodes matching named subpatterns.
695 When returning False, the results dict may still be updated.
698 for c, r in generate_matches(self.content, node.children):
699 if c == len(node.children):
700 if results is not None:
704 if len(self.content) != len(node.children):
706 for subpattern, child in zip(self.content, node.children):
707 if not subpattern.match(child, results):
712 class WildcardPattern(BasePattern):
715 A wildcard pattern can match zero or more nodes.
717 This has all the flexibility needed to implement patterns like:
721 (...)* (...)+ (...)? (...){m,n}
723 except it always uses non-greedy matching.
731 content: Optional[Text] = None,
734 name: Optional[Text] = None,
740 content: optional sequence of subsequences of patterns;
741 if absent, matches one node;
742 if present, each subsequence is an alternative [*]
743 min: optional minimum number of times to match, default 0
744 max: optional maximum number of times to match, default HUGE
745 name: optional name assigned to this match
747 [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is
748 equivalent to (a b c | d e | f g h); if content is None,
749 this is equivalent to '.' in regular expression terms.
750 The min and max parameters work as follows:
751 min=0, max=maxint: .*
752 min=1, max=maxint: .+
755 If content is not None, replace the dot with the parenthesized
756 list of alternatives, e.g. (a b c | d e | f g h)*
758 assert 0 <= min <= max <= HUGE, (min, max)
759 if content is not None:
760 f = lambda s: tuple(s)
761 wrapped_content = tuple(map(f, content)) # Protect against alterations
762 # Check sanity of alternatives
763 assert len(wrapped_content), repr(
765 ) # Can't have zero alternatives
766 for alt in wrapped_content:
767 assert len(alt), repr(alt) # Can have empty alternatives
768 self.content = wrapped_content
773 def optimize(self) -> Any:
774 """Optimize certain stacked wildcard patterns."""
777 self.content is not None
778 and len(self.content) == 1
779 and len(self.content[0]) == 1
781 subpattern = self.content[0][0]
782 if self.min == 1 and self.max == 1:
783 if self.content is None:
784 return NodePattern(name=self.name)
785 if subpattern is not None and self.name == subpattern.name:
786 return subpattern.optimize()
789 and isinstance(subpattern, WildcardPattern)
790 and subpattern.min <= 1
791 and self.name == subpattern.name
793 return WildcardPattern(
795 self.min * subpattern.min,
796 self.max * subpattern.max,
801 def match(self, node, results=None) -> bool:
802 """Does this pattern exactly match a node?"""
803 return self.match_seq([node], results)
805 def match_seq(self, nodes, results=None) -> bool:
806 """Does this pattern exactly match a sequence of nodes?"""
807 for c, r in self.generate_matches(nodes):
809 if results is not None:
812 results[self.name] = list(nodes)
816 def generate_matches(self, nodes) -> Iterator[Tuple[int, _Results]]:
818 Generator yielding matches for a sequence of nodes.
821 nodes: sequence of nodes
824 (count, results) tuples where:
825 count: the match comprises nodes[:count];
826 results: dict containing named submatches.
828 if self.content is None:
829 # Shortcut for special case (see __init__.__doc__)
830 for count in range(self.min, 1 + min(len(nodes), self.max)):
833 r[self.name] = nodes[:count]
835 elif self.name == "bare_name":
836 yield self._bare_name_matches(nodes)
838 # The reason for this is that hitting the recursion limit usually
839 # results in some ugly messages about how RuntimeErrors are being
840 # ignored. We only have to do this on CPython, though, because other
841 # implementations don't have this nasty bug in the first place.
842 if hasattr(sys, "getrefcount"):
843 save_stderr = sys.stderr
844 sys.stderr = StringIO()
846 for count, r in self._recursive_matches(nodes, 0):
848 r[self.name] = nodes[:count]
851 # We fall back to the iterative pattern matching scheme if the recursive
852 # scheme hits the recursion limit.
853 for count, r in self._iterative_matches(nodes):
855 r[self.name] = nodes[:count]
858 if hasattr(sys, "getrefcount"):
859 sys.stderr = save_stderr
861 def _iterative_matches(self, nodes) -> Iterator[Tuple[int, _Results]]:
862 """Helper to iteratively yield the matches."""
868 # generate matches that use just one alt from self.content
869 for alt in self.content:
870 for c, r in generate_matches(alt, nodes):
872 results.append((c, r))
874 # for each match, iterate down the nodes
877 for c0, r0 in results:
878 # stop if the entire set of nodes has been matched
879 if c0 < nodelen and c0 <= self.max:
880 for alt in self.content:
881 for c1, r1 in generate_matches(alt, nodes[c0:]):
887 new_results.append((c0 + c1, r))
888 results = new_results
890 def _bare_name_matches(self, nodes) -> Tuple[int, _Results]:
891 """Special optimized matcher for bare_name."""
893 r = {} # type: _Results
896 while not done and count < max:
898 for leaf in self.content:
899 if leaf[0].match(nodes[count], r):
903 assert self.name is not None
904 r[self.name] = nodes[:count]
907 def _recursive_matches(self, nodes, count) -> Iterator[Tuple[int, _Results]]:
908 """Helper to recursively yield the matches."""
909 assert self.content is not None
910 if count >= self.min:
913 for alt in self.content:
914 for c0, r0 in generate_matches(alt, nodes):
915 for c1, r1 in self._recursive_matches(nodes[c0:], count + 1):
922 class NegatedPattern(BasePattern):
923 def __init__(self, content: Optional[Any] = None) -> None:
927 The argument is either a pattern or None. If it is None, this
928 only matches an empty sequence (effectively '$' in regex
929 lingo). If it is not None, this matches whenever the argument
930 pattern doesn't have any matches.
932 if content is not None:
933 assert isinstance(content, BasePattern), repr(content)
934 self.content = content
936 def match(self, node, results=None) -> bool:
937 # We never match a node in its entirety
940 def match_seq(self, nodes, results=None) -> bool:
941 # We only match an empty sequence of nodes in its entirety
942 return len(nodes) == 0
944 def generate_matches(self, nodes) -> Iterator[Tuple[int, _Results]]:
945 if self.content is None:
946 # Return a match if there is an empty sequence
950 # Return a match if the argument pattern has no matches
951 for c, r in self.content.generate_matches(nodes):
956 def generate_matches(
957 patterns: List[BasePattern], nodes: List[NL]
958 ) -> Iterator[Tuple[int, _Results]]:
960 Generator yielding matches for a sequence of patterns and nodes.
963 patterns: a sequence of patterns
964 nodes: a sequence of nodes
967 (count, results) tuples where:
968 count: the entire sequence of patterns matches nodes[:count];
969 results: dict containing named submatches.
974 p, rest = patterns[0], patterns[1:]
975 for c0, r0 in p.generate_matches(nodes):
979 for c1, r1 in generate_matches(rest, nodes[c0:]):