All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
1 # Copyright 2006 Google, Inc. All Rights Reserved.
2 # Licensed to PSF under a Contributor Agreement.
5 Python parse tree definitions.
7 This is a very concrete parse tree; we need to keep every token and
8 even the comments and whitespace between tokens.
10 There's also a pattern matching implementation here.
13 # mypy: allow-untyped-defs, allow-incomplete-defs
27 from blib2to3.pgen2.grammar import Grammar
29 __author__ = "Guido van Rossum <guido@python.org>"
32 from io import StringIO
34 HUGE: int = 0x7FFFFFFF # maximum repeat count, default max
36 _type_reprs: Dict[int, Union[str, int]] = {}
39 def type_repr(type_num: int) -> Union[str, int]:
42 from .pygram import python_symbols
44 # printing tokens is possible but not as useful
45 # from .pgen2 import token // token.__dict__.items():
46 for name in dir(python_symbols):
47 val = getattr(python_symbols, name)
49 _type_reprs[val] = name
50 return _type_reprs.setdefault(type_num, type_num)
53 _P = TypeVar("_P", bound="Base")
55 NL = Union["Node", "Leaf"]
56 Context = Tuple[str, Tuple[int, int]]
57 RawNode = Tuple[int, Optional[str], Optional[Context], Optional[List[NL]]]
63 Abstract base class for Node and Leaf.
65 This provides some default functionality and boilerplate using the
68 A node may be a subnode of at most one parent.
71 # Default values for instance variables
72 type: int # int: token number (< 256) or symbol number (>= 256)
73 parent: Optional["Node"] = None # Parent node pointer, or None
74 children: List[NL] # List of subnodes
75 was_changed: bool = False
76 was_checked: bool = False
78 def __new__(cls, *args, **kwds):
79 """Constructor that prevents Base from being instantiated."""
80 assert cls is not Base, "Cannot instantiate Base"
81 return object.__new__(cls)
83 def __eq__(self, other: Any) -> bool:
85 Compare two nodes for equality.
87 This calls the method _eq().
89 if self.__class__ is not other.__class__:
91 return self._eq(other)
94 def prefix(self) -> str:
95 raise NotImplementedError
97 def _eq(self: _P, other: _P) -> bool:
99 Compare two nodes for equality.
101 This is called by __eq__ and __ne__. It is only called if the two nodes
102 have the same type. This must be implemented by the concrete subclass.
103 Nodes should be considered equal if they have the same structure,
104 ignoring the prefix string and other context information.
106 raise NotImplementedError
108 def __deepcopy__(self: _P, memo: Any) -> _P:
111 def clone(self: _P) -> _P:
113 Return a cloned (deep) copy of self.
115 This must be implemented by the concrete subclass.
117 raise NotImplementedError
119 def post_order(self) -> Iterator[NL]:
121 Return a post-order iterator for the tree.
123 This must be implemented by the concrete subclass.
125 raise NotImplementedError
127 def pre_order(self) -> Iterator[NL]:
129 Return a pre-order iterator for the tree.
131 This must be implemented by the concrete subclass.
133 raise NotImplementedError
135 def replace(self, new: Union[NL, List[NL]]) -> None:
136 """Replace this node with a new one in the parent."""
137 assert self.parent is not None, str(self)
138 assert new is not None
139 if not isinstance(new, list):
143 for ch in self.parent.children:
145 assert not found, (self.parent.children, self, new)
147 l_children.extend(new)
150 l_children.append(ch)
151 assert found, (self.children, self, new)
152 self.parent.children = l_children
153 self.parent.changed()
154 self.parent.invalidate_sibling_maps()
156 x.parent = self.parent
159 def get_lineno(self) -> Optional[int]:
160 """Return the line number which generated the invocant node."""
162 while not isinstance(node, Leaf):
163 if not node.children:
165 node = node.children[0]
168 def changed(self) -> None:
172 self.parent.changed()
173 self.was_changed = True
175 def remove(self) -> Optional[int]:
177 Remove the node from the tree. Returns the position of the node in its
178 parent's children before it was removed.
181 for i, node in enumerate(self.parent.children):
183 del self.parent.children[i]
184 self.parent.changed()
185 self.parent.invalidate_sibling_maps()
191 def next_sibling(self) -> Optional[NL]:
193 The node immediately following the invocant in their parent's children
194 list. If the invocant does not have a next sibling, it is None
196 if self.parent is None:
199 if self.parent.next_sibling_map is None:
200 self.parent.update_sibling_maps()
201 assert self.parent.next_sibling_map is not None
202 return self.parent.next_sibling_map[id(self)]
205 def prev_sibling(self) -> Optional[NL]:
207 The node immediately preceding the invocant in their parent's children
208 list. If the invocant does not have a previous sibling, it is None.
210 if self.parent is None:
213 if self.parent.prev_sibling_map is None:
214 self.parent.update_sibling_maps()
215 assert self.parent.prev_sibling_map is not None
216 return self.parent.prev_sibling_map[id(self)]
218 def leaves(self) -> Iterator["Leaf"]:
219 for child in self.children:
220 yield from child.leaves()
222 def depth(self) -> int:
223 if self.parent is None:
225 return 1 + self.parent.depth()
227 def get_suffix(self) -> str:
229 Return the string immediately following the invocant node. This is
230 effectively equivalent to node.next_sibling.prefix
232 next_sib = self.next_sibling
235 prefix = next_sib.prefix
241 """Concrete implementation for interior nodes."""
243 fixers_applied: Optional[List[Any]]
244 used_names: Optional[Set[str]]
250 context: Optional[Any] = None,
251 prefix: Optional[str] = None,
252 fixers_applied: Optional[List[Any]] = None,
257 Takes a type constant (a symbol number >= 256), a sequence of
258 child nodes, and an optional context keyword argument.
260 As a side effect, the parent pointers of the children are updated.
262 assert type >= 256, type
264 self.children = list(children)
265 for ch in self.children:
266 assert ch.parent is None, repr(ch)
268 self.invalidate_sibling_maps()
269 if prefix is not None:
272 self.fixers_applied = fixers_applied[:]
274 self.fixers_applied = None
276 def __repr__(self) -> str:
277 """Return a canonical string representation."""
278 assert self.type is not None
279 return "{}({}, {!r})".format(
280 self.__class__.__name__,
281 type_repr(self.type),
285 def __str__(self) -> str:
287 Return a pretty string representation.
289 This reproduces the input source exactly.
291 return "".join(map(str, self.children))
293 def _eq(self, other: Base) -> bool:
294 """Compare two nodes for equality."""
295 return (self.type, self.children) == (other.type, other.children)
297 def clone(self) -> "Node":
298 assert self.type is not None
299 """Return a cloned (deep) copy of self."""
302 [ch.clone() for ch in self.children],
303 fixers_applied=self.fixers_applied,
306 def post_order(self) -> Iterator[NL]:
307 """Return a post-order iterator for the tree."""
308 for child in self.children:
309 yield from child.post_order()
312 def pre_order(self) -> Iterator[NL]:
313 """Return a pre-order iterator for the tree."""
315 for child in self.children:
316 yield from child.pre_order()
319 def prefix(self) -> str:
321 The whitespace and comments preceding this node in the input.
323 if not self.children:
325 return self.children[0].prefix
328 def prefix(self, prefix: str) -> None:
330 self.children[0].prefix = prefix
332 def set_child(self, i: int, child: NL) -> None:
334 Equivalent to 'node.children[i] = child'. This method also sets the
335 child's parent attribute appropriately.
338 self.children[i].parent = None
339 self.children[i] = child
341 self.invalidate_sibling_maps()
343 def insert_child(self, i: int, child: NL) -> None:
345 Equivalent to 'node.children.insert(i, child)'. This method also sets
346 the child's parent attribute appropriately.
349 self.children.insert(i, child)
351 self.invalidate_sibling_maps()
353 def append_child(self, child: NL) -> None:
355 Equivalent to 'node.children.append(child)'. This method also sets the
356 child's parent attribute appropriately.
359 self.children.append(child)
361 self.invalidate_sibling_maps()
363 def invalidate_sibling_maps(self) -> None:
364 self.prev_sibling_map: Optional[Dict[int, Optional[NL]]] = None
365 self.next_sibling_map: Optional[Dict[int, Optional[NL]]] = None
367 def update_sibling_maps(self) -> None:
368 _prev: Dict[int, Optional[NL]] = {}
369 _next: Dict[int, Optional[NL]] = {}
370 self.prev_sibling_map = _prev
371 self.next_sibling_map = _next
372 previous: Optional[NL] = None
373 for current in self.children:
374 _prev[id(current)] = previous
375 _next[id(previous)] = current
377 _next[id(current)] = None
382 """Concrete implementation for leaf nodes."""
384 # Default values for instance variables
386 fixers_applied: List[Any]
388 # Changed later in brackets.py
389 opening_bracket: Optional["Leaf"] = None
390 used_names: Optional[Set[str]]
391 _prefix = "" # Whitespace and comments preceding this token in the input
392 lineno: int = 0 # Line where this token starts in the input
393 column: int = 0 # Column where this token starts in the input
394 # If not None, this Leaf is created by converting a block of fmt off/skip
395 # code, and `fmt_pass_converted_first_leaf` points to the first Leaf in the
397 fmt_pass_converted_first_leaf: Optional["Leaf"] = None
403 context: Optional[Context] = None,
404 prefix: Optional[str] = None,
405 fixers_applied: List[Any] = [],
406 opening_bracket: Optional["Leaf"] = None,
407 fmt_pass_converted_first_leaf: Optional["Leaf"] = None,
412 Takes a type constant (a token number < 256), a string value, and an
413 optional context keyword argument.
416 assert 0 <= type < 256, type
417 if context is not None:
418 self._prefix, (self.lineno, self.column) = context
421 if prefix is not None:
422 self._prefix = prefix
423 self.fixers_applied: Optional[List[Any]] = fixers_applied[:]
425 self.opening_bracket = opening_bracket
426 self.fmt_pass_converted_first_leaf = fmt_pass_converted_first_leaf
428 def __repr__(self) -> str:
429 """Return a canonical string representation."""
430 from .pgen2.token import tok_name
432 assert self.type is not None
433 return "{}({}, {!r})".format(
434 self.__class__.__name__,
435 tok_name.get(self.type, self.type),
439 def __str__(self) -> str:
441 Return a pretty string representation.
443 This reproduces the input source exactly.
445 return self._prefix + str(self.value)
447 def _eq(self, other: "Leaf") -> bool:
448 """Compare two nodes for equality."""
449 return (self.type, self.value) == (other.type, other.value)
451 def clone(self) -> "Leaf":
452 assert self.type is not None
453 """Return a cloned (deep) copy of self."""
457 (self.prefix, (self.lineno, self.column)),
458 fixers_applied=self.fixers_applied,
461 def leaves(self) -> Iterator["Leaf"]:
464 def post_order(self) -> Iterator["Leaf"]:
465 """Return a post-order iterator for the tree."""
468 def pre_order(self) -> Iterator["Leaf"]:
469 """Return a pre-order iterator for the tree."""
473 def prefix(self) -> str:
475 The whitespace and comments preceding this token in the input.
480 def prefix(self, prefix: str) -> None:
482 self._prefix = prefix
485 def convert(gr: Grammar, raw_node: RawNode) -> NL:
487 Convert raw node information to a Node or Leaf instance.
489 This is passed to the parser driver which calls it whenever a reduction of a
490 grammar rule produces a new complete node, so that the tree is build
493 type, value, context, children = raw_node
494 if children or type in gr.number2symbol:
495 # If there's exactly one child, return that child instead of
496 # creating a new node.
497 assert children is not None
498 if len(children) == 1:
500 return Node(type, children, context=context)
502 return Leaf(type, value or "", context=context)
505 _Results = Dict[str, NL]
511 A pattern is a tree matching pattern.
513 It looks for a specific node type (token or symbol), and
514 optionally for a specific content.
516 This is an abstract base class. There are three concrete
519 - LeafPattern matches a single leaf node;
520 - NodePattern matches a single node (usually non-leaf);
521 - WildcardPattern matches a sequence of nodes of variable length.
524 # Defaults for instance variables
526 type = None # Node type (token if < 256, symbol if >= 256)
527 content: Any = None # Optional content matching pattern
528 name: Optional[str] = None # Optional name used to store match in results dict
530 def __new__(cls, *args, **kwds):
531 """Constructor that prevents BasePattern from being instantiated."""
532 assert cls is not BasePattern, "Cannot instantiate BasePattern"
533 return object.__new__(cls)
535 def __repr__(self) -> str:
536 assert self.type is not None
537 args = [type_repr(self.type), self.content, self.name]
538 while args and args[-1] is None:
540 return "{}({})".format(self.__class__.__name__, ", ".join(map(repr, args)))
542 def _submatch(self, node, results=None) -> bool:
543 raise NotImplementedError
545 def optimize(self) -> "BasePattern":
547 A subclass can define this as a hook for optimizations.
549 Returns either self or another node with the same effect.
553 def match(self, node: NL, results: Optional[_Results] = None) -> bool:
555 Does this pattern exactly match a node?
557 Returns True if it matches, False if not.
559 If results is not None, it must be a dict which will be
560 updated with the nodes matching named subpatterns.
562 Default implementation for non-wildcard patterns.
564 if self.type is not None and node.type != self.type:
566 if self.content is not None:
567 r: Optional[_Results] = None
568 if results is not None:
570 if not self._submatch(node, r):
573 assert results is not None
575 if results is not None and self.name:
576 results[self.name] = node
579 def match_seq(self, nodes: List[NL], results: Optional[_Results] = None) -> bool:
581 Does this pattern exactly match a sequence of nodes?
583 Default implementation for non-wildcard patterns.
587 return self.match(nodes[0], results)
589 def generate_matches(self, nodes: List[NL]) -> Iterator[Tuple[int, _Results]]:
591 Generator yielding all matches for this pattern.
593 Default implementation for non-wildcard patterns.
596 if nodes and self.match(nodes[0], r):
600 class LeafPattern(BasePattern):
603 type: Optional[int] = None,
604 content: Optional[str] = None,
605 name: Optional[str] = None,
608 Initializer. Takes optional type, content, and name.
610 The type, if given must be a token type (< 256). If not given,
611 this matches any *leaf* node; the content may still be required.
613 The content, if given, must be a string.
615 If a name is given, the matching node is stored in the results
619 assert 0 <= type < 256, type
620 if content is not None:
621 assert isinstance(content, str), repr(content)
623 self.content = content
626 def match(self, node: NL, results=None) -> bool:
627 """Override match() to insist on a leaf node."""
628 if not isinstance(node, Leaf):
630 return BasePattern.match(self, node, results)
632 def _submatch(self, node, results=None):
634 Match the pattern's content to the node's children.
636 This assumes the node type matches and self.content is not None.
638 Returns True if it matches, False if not.
640 If results is not None, it must be a dict which will be
641 updated with the nodes matching named subpatterns.
643 When returning False, the results dict may still be updated.
645 return self.content == node.value
648 class NodePattern(BasePattern):
650 wildcards: bool = False
654 type: Optional[int] = None,
655 content: Optional[Iterable[str]] = None,
656 name: Optional[str] = None,
659 Initializer. Takes optional type, content, and name.
661 The type, if given, must be a symbol type (>= 256). If the
662 type is None this matches *any* single node (leaf or not),
663 except if content is not None, in which it only matches
664 non-leaf nodes that also match the content pattern.
666 The content, if not None, must be a sequence of Patterns that
667 must match the node's children exactly. If the content is
668 given, the type must not be None.
670 If a name is given, the matching node is stored in the results
674 assert type >= 256, type
675 if content is not None:
676 assert not isinstance(content, str), repr(content)
677 newcontent = list(content)
678 for i, item in enumerate(newcontent):
679 assert isinstance(item, BasePattern), (i, item)
680 # I don't even think this code is used anywhere, but it does cause
681 # unreachable errors from mypy. This function's signature does look
682 # odd though *shrug*.
683 if isinstance(item, WildcardPattern): # type: ignore[unreachable]
684 self.wildcards = True # type: ignore[unreachable]
686 self.content = newcontent # TODO: this is unbound when content is None
689 def _submatch(self, node, results=None) -> bool:
691 Match the pattern's content to the node's children.
693 This assumes the node type matches and self.content is not None.
695 Returns True if it matches, False if not.
697 If results is not None, it must be a dict which will be
698 updated with the nodes matching named subpatterns.
700 When returning False, the results dict may still be updated.
703 for c, r in generate_matches(self.content, node.children):
704 if c == len(node.children):
705 if results is not None:
709 if len(self.content) != len(node.children):
711 for subpattern, child in zip(self.content, node.children):
712 if not subpattern.match(child, results):
717 class WildcardPattern(BasePattern):
720 A wildcard pattern can match zero or more nodes.
722 This has all the flexibility needed to implement patterns like:
726 (...)* (...)+ (...)? (...){m,n}
728 except it always uses non-greedy matching.
736 content: Optional[str] = None,
739 name: Optional[str] = None,
745 content: optional sequence of subsequences of patterns;
746 if absent, matches one node;
747 if present, each subsequence is an alternative [*]
748 min: optional minimum number of times to match, default 0
749 max: optional maximum number of times to match, default HUGE
750 name: optional name assigned to this match
752 [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is
753 equivalent to (a b c | d e | f g h); if content is None,
754 this is equivalent to '.' in regular expression terms.
755 The min and max parameters work as follows:
756 min=0, max=maxint: .*
757 min=1, max=maxint: .+
760 If content is not None, replace the dot with the parenthesized
761 list of alternatives, e.g. (a b c | d e | f g h)*
763 assert 0 <= min <= max <= HUGE, (min, max)
764 if content is not None:
765 f = lambda s: tuple(s)
766 wrapped_content = tuple(map(f, content)) # Protect against alterations
767 # Check sanity of alternatives
768 assert len(wrapped_content), repr(
770 ) # Can't have zero alternatives
771 for alt in wrapped_content:
772 assert len(alt), repr(alt) # Can have empty alternatives
773 self.content = wrapped_content
778 def optimize(self) -> Any:
779 """Optimize certain stacked wildcard patterns."""
782 self.content is not None
783 and len(self.content) == 1
784 and len(self.content[0]) == 1
786 subpattern = self.content[0][0]
787 if self.min == 1 and self.max == 1:
788 if self.content is None:
789 return NodePattern(name=self.name)
790 if subpattern is not None and self.name == subpattern.name:
791 return subpattern.optimize()
794 and isinstance(subpattern, WildcardPattern)
795 and subpattern.min <= 1
796 and self.name == subpattern.name
798 return WildcardPattern(
800 self.min * subpattern.min,
801 self.max * subpattern.max,
806 def match(self, node, results=None) -> bool:
807 """Does this pattern exactly match a node?"""
808 return self.match_seq([node], results)
810 def match_seq(self, nodes, results=None) -> bool:
811 """Does this pattern exactly match a sequence of nodes?"""
812 for c, r in self.generate_matches(nodes):
814 if results is not None:
817 results[self.name] = list(nodes)
821 def generate_matches(self, nodes) -> Iterator[Tuple[int, _Results]]:
823 Generator yielding matches for a sequence of nodes.
826 nodes: sequence of nodes
829 (count, results) tuples where:
830 count: the match comprises nodes[:count];
831 results: dict containing named submatches.
833 if self.content is None:
834 # Shortcut for special case (see __init__.__doc__)
835 for count in range(self.min, 1 + min(len(nodes), self.max)):
838 r[self.name] = nodes[:count]
840 elif self.name == "bare_name":
841 yield self._bare_name_matches(nodes)
843 # The reason for this is that hitting the recursion limit usually
844 # results in some ugly messages about how RuntimeErrors are being
845 # ignored. We only have to do this on CPython, though, because other
846 # implementations don't have this nasty bug in the first place.
847 if hasattr(sys, "getrefcount"):
848 save_stderr = sys.stderr
849 sys.stderr = StringIO()
851 for count, r in self._recursive_matches(nodes, 0):
853 r[self.name] = nodes[:count]
856 # We fall back to the iterative pattern matching scheme if the recursive
857 # scheme hits the recursion limit.
858 for count, r in self._iterative_matches(nodes):
860 r[self.name] = nodes[:count]
863 if hasattr(sys, "getrefcount"):
864 sys.stderr = save_stderr
866 def _iterative_matches(self, nodes) -> Iterator[Tuple[int, _Results]]:
867 """Helper to iteratively yield the matches."""
873 # generate matches that use just one alt from self.content
874 for alt in self.content:
875 for c, r in generate_matches(alt, nodes):
877 results.append((c, r))
879 # for each match, iterate down the nodes
882 for c0, r0 in results:
883 # stop if the entire set of nodes has been matched
884 if c0 < nodelen and c0 <= self.max:
885 for alt in self.content:
886 for c1, r1 in generate_matches(alt, nodes[c0:]):
892 new_results.append((c0 + c1, r))
893 results = new_results
895 def _bare_name_matches(self, nodes) -> Tuple[int, _Results]:
896 """Special optimized matcher for bare_name."""
898 r = {} # type: _Results
901 while not done and count < max:
903 for leaf in self.content:
904 if leaf[0].match(nodes[count], r):
908 assert self.name is not None
909 r[self.name] = nodes[:count]
912 def _recursive_matches(self, nodes, count) -> Iterator[Tuple[int, _Results]]:
913 """Helper to recursively yield the matches."""
914 assert self.content is not None
915 if count >= self.min:
918 for alt in self.content:
919 for c0, r0 in generate_matches(alt, nodes):
920 for c1, r1 in self._recursive_matches(nodes[c0:], count + 1):
927 class NegatedPattern(BasePattern):
928 def __init__(self, content: Optional[BasePattern] = None) -> None:
932 The argument is either a pattern or None. If it is None, this
933 only matches an empty sequence (effectively '$' in regex
934 lingo). If it is not None, this matches whenever the argument
935 pattern doesn't have any matches.
937 if content is not None:
938 assert isinstance(content, BasePattern), repr(content)
939 self.content = content
941 def match(self, node, results=None) -> bool:
942 # We never match a node in its entirety
945 def match_seq(self, nodes, results=None) -> bool:
946 # We only match an empty sequence of nodes in its entirety
947 return len(nodes) == 0
949 def generate_matches(self, nodes: List[NL]) -> Iterator[Tuple[int, _Results]]:
950 if self.content is None:
951 # Return a match if there is an empty sequence
955 # Return a match if the argument pattern has no matches
956 for c, r in self.content.generate_matches(nodes):
961 def generate_matches(
962 patterns: List[BasePattern], nodes: List[NL]
963 ) -> Iterator[Tuple[int, _Results]]:
965 Generator yielding matches for a sequence of patterns and nodes.
968 patterns: a sequence of patterns
969 nodes: a sequence of nodes
972 (count, results) tuples where:
973 count: the entire sequence of patterns matches nodes[:count];
974 results: dict containing named submatches.
979 p, rest = patterns[0], patterns[1:]
980 for c0, r0 in p.generate_matches(nodes):
984 for c1, r1 in generate_matches(rest, nodes[c0:]):