All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
1 # Copyright 2006 Google, Inc. All Rights Reserved.
2 # Licensed to PSF under a Contributor Agreement.
5 Python parse tree definitions.
7 This is a very concrete parse tree; we need to keep every token and
8 even the comments and whitespace between tokens.
10 There's also a pattern matching implementation here.
13 __author__ = "Guido van Rossum <guido@python.org>"
16 from io import StringIO
18 HUGE = 0x7FFFFFFF # maximum repeat count, default max
23 def type_repr(type_num):
26 from .pygram import python_symbols
28 # printing tokens is possible but not as useful
29 # from .pgen2 import token // token.__dict__.items():
30 for name, val in python_symbols.__dict__.items():
32 _type_reprs[val] = name
33 return _type_reprs.setdefault(type_num, type_num)
39 Abstract base class for Node and Leaf.
41 This provides some default functionality and boilerplate using the
44 A node may be a subnode of at most one parent.
47 # Default values for instance variables
48 type = None # int: token number (< 256) or symbol number (>= 256)
49 parent = None # Parent node pointer, or None
50 children = () # Tuple of subnodes
54 def __new__(cls, *args, **kwds):
55 """Constructor that prevents Base from being instantiated."""
56 assert cls is not Base, "Cannot instantiate Base"
57 return object.__new__(cls)
59 def __eq__(self, other):
61 Compare two nodes for equality.
63 This calls the method _eq().
65 if self.__class__ is not other.__class__:
67 return self._eq(other)
69 __hash__ = None # For Py3 compatibility.
73 Compare two nodes for equality.
75 This is called by __eq__ and __ne__. It is only called if the two nodes
76 have the same type. This must be implemented by the concrete subclass.
77 Nodes should be considered equal if they have the same structure,
78 ignoring the prefix string and other context information.
80 raise NotImplementedError
84 Return a cloned (deep) copy of self.
86 This must be implemented by the concrete subclass.
88 raise NotImplementedError
92 Return a post-order iterator for the tree.
94 This must be implemented by the concrete subclass.
96 raise NotImplementedError
100 Return a pre-order iterator for the tree.
102 This must be implemented by the concrete subclass.
104 raise NotImplementedError
106 def replace(self, new):
107 """Replace this node with a new one in the parent."""
108 assert self.parent is not None, str(self)
109 assert new is not None
110 if not isinstance(new, list):
114 for ch in self.parent.children:
116 assert not found, (self.parent.children, self, new)
118 l_children.extend(new)
121 l_children.append(ch)
122 assert found, (self.children, self, new)
123 self.parent.children = l_children
124 self.parent.changed()
125 self.parent.invalidate_sibling_maps()
127 x.parent = self.parent
130 def get_lineno(self):
131 """Return the line number which generated the invocant node."""
133 while not isinstance(node, Leaf):
134 if not node.children:
136 node = node.children[0]
143 self.parent.changed()
144 self.was_changed = True
148 Remove the node from the tree. Returns the position of the node in its
149 parent's children before it was removed.
152 for i, node in enumerate(self.parent.children):
154 del self.parent.children[i]
155 self.parent.changed()
156 self.parent.invalidate_sibling_maps()
161 def next_sibling(self):
163 The node immediately following the invocant in their parent's children
164 list. If the invocant does not have a next sibling, it is None
166 if self.parent is None:
169 if self.parent.next_sibling_map is None:
170 self.parent.update_sibling_maps()
171 return self.parent.next_sibling_map[id(self)]
174 def prev_sibling(self):
176 The node immediately preceding the invocant in their parent's children
177 list. If the invocant does not have a previous sibling, it is None.
179 if self.parent is None:
182 if self.parent.prev_sibling_map is None:
183 self.parent.update_sibling_maps()
184 return self.parent.prev_sibling_map[id(self)]
187 for child in self.children:
188 yield from child.leaves()
191 if self.parent is None:
193 return 1 + self.parent.depth()
195 def get_suffix(self):
197 Return the string immediately following the invocant node. This is
198 effectively equivalent to node.next_sibling.prefix
200 next_sib = self.next_sibling
203 return next_sib.prefix
205 if sys.version_info < (3, 0):
208 return str(self).encode("ascii")
213 """Concrete implementation for interior nodes."""
215 def __init__(self, type, children, context=None, prefix=None, fixers_applied=None):
219 Takes a type constant (a symbol number >= 256), a sequence of
220 child nodes, and an optional context keyword argument.
222 As a side effect, the parent pointers of the children are updated.
224 assert type >= 256, type
226 self.children = list(children)
227 for ch in self.children:
228 assert ch.parent is None, repr(ch)
230 self.invalidate_sibling_maps()
231 if prefix is not None:
234 self.fixers_applied = fixers_applied[:]
236 self.fixers_applied = None
239 """Return a canonical string representation."""
240 return "%s(%s, %r)" % (
241 self.__class__.__name__,
242 type_repr(self.type),
246 def __unicode__(self):
248 Return a pretty string representation.
250 This reproduces the input source exactly.
252 return "".join(map(str, self.children))
254 if sys.version_info > (3, 0):
255 __str__ = __unicode__
257 def _eq(self, other):
258 """Compare two nodes for equality."""
259 return (self.type, self.children) == (other.type, other.children)
262 """Return a cloned (deep) copy of self."""
265 [ch.clone() for ch in self.children],
266 fixers_applied=self.fixers_applied,
269 def post_order(self):
270 """Return a post-order iterator for the tree."""
271 for child in self.children:
272 yield from child.post_order()
276 """Return a pre-order iterator for the tree."""
278 for child in self.children:
279 yield from child.pre_order()
284 The whitespace and comments preceding this node in the input.
286 if not self.children:
288 return self.children[0].prefix
291 def prefix(self, prefix):
293 self.children[0].prefix = prefix
295 def set_child(self, i, child):
297 Equivalent to 'node.children[i] = child'. This method also sets the
298 child's parent attribute appropriately.
301 self.children[i].parent = None
302 self.children[i] = child
304 self.invalidate_sibling_maps()
306 def insert_child(self, i, child):
308 Equivalent to 'node.children.insert(i, child)'. This method also sets
309 the child's parent attribute appropriately.
312 self.children.insert(i, child)
314 self.invalidate_sibling_maps()
316 def append_child(self, child):
318 Equivalent to 'node.children.append(child)'. This method also sets the
319 child's parent attribute appropriately.
322 self.children.append(child)
324 self.invalidate_sibling_maps()
326 def invalidate_sibling_maps(self):
327 self.prev_sibling_map = None
328 self.next_sibling_map = None
330 def update_sibling_maps(self):
331 self.prev_sibling_map = _prev = {}
332 self.next_sibling_map = _next = {}
334 for current in self.children:
335 _prev[id(current)] = previous
336 _next[id(previous)] = current
338 _next[id(current)] = None
343 """Concrete implementation for leaf nodes."""
345 # Default values for instance variables
346 _prefix = "" # Whitespace and comments preceding this token in the input
347 lineno = 0 # Line where this token starts in the input
348 column = 0 # Column where this token starts in the input
350 def __init__(self, type, value, context=None, prefix=None, fixers_applied=[]):
354 Takes a type constant (a token number < 256), a string value, and an
355 optional context keyword argument.
357 assert 0 <= type < 256, type
358 if context is not None:
359 self._prefix, (self.lineno, self.column) = context
362 if prefix is not None:
363 self._prefix = prefix
364 self.fixers_applied = fixers_applied[:]
367 """Return a canonical string representation."""
368 from .pgen2.token import tok_name
370 return "%s(%s, %r)" % (
371 self.__class__.__name__,
372 tok_name.get(self.type, self.type),
376 def __unicode__(self):
378 Return a pretty string representation.
380 This reproduces the input source exactly.
382 return self.prefix + str(self.value)
384 if sys.version_info > (3, 0):
385 __str__ = __unicode__
387 def _eq(self, other):
388 """Compare two nodes for equality."""
389 return (self.type, self.value) == (other.type, other.value)
392 """Return a cloned (deep) copy of self."""
396 (self.prefix, (self.lineno, self.column)),
397 fixers_applied=self.fixers_applied,
403 def post_order(self):
404 """Return a post-order iterator for the tree."""
408 """Return a pre-order iterator for the tree."""
414 The whitespace and comments preceding this token in the input.
419 def prefix(self, prefix):
421 self._prefix = prefix
424 def convert(gr, raw_node):
426 Convert raw node information to a Node or Leaf instance.
428 This is passed to the parser driver which calls it whenever a reduction of a
429 grammar rule produces a new complete node, so that the tree is build
432 type, value, context, children = raw_node
433 if children or type in gr.number2symbol:
434 # If there's exactly one child, return that child instead of
435 # creating a new node.
436 if len(children) == 1:
438 return Node(type, children, context=context)
440 return Leaf(type, value, context=context)
443 class BasePattern(object):
446 A pattern is a tree matching pattern.
448 It looks for a specific node type (token or symbol), and
449 optionally for a specific content.
451 This is an abstract base class. There are three concrete
454 - LeafPattern matches a single leaf node;
455 - NodePattern matches a single node (usually non-leaf);
456 - WildcardPattern matches a sequence of nodes of variable length.
459 # Defaults for instance variables
460 type = None # Node type (token if < 256, symbol if >= 256)
461 content = None # Optional content matching pattern
462 name = None # Optional name used to store match in results dict
464 def __new__(cls, *args, **kwds):
465 """Constructor that prevents BasePattern from being instantiated."""
466 assert cls is not BasePattern, "Cannot instantiate BasePattern"
467 return object.__new__(cls)
470 args = [type_repr(self.type), self.content, self.name]
471 while args and args[-1] is None:
473 return "%s(%s)" % (self.__class__.__name__, ", ".join(map(repr, args)))
477 A subclass can define this as a hook for optimizations.
479 Returns either self or another node with the same effect.
483 def match(self, node, results=None):
485 Does this pattern exactly match a node?
487 Returns True if it matches, False if not.
489 If results is not None, it must be a dict which will be
490 updated with the nodes matching named subpatterns.
492 Default implementation for non-wildcard patterns.
494 if self.type is not None and node.type != self.type:
496 if self.content is not None:
498 if results is not None:
500 if not self._submatch(node, r):
504 if results is not None and self.name:
505 results[self.name] = node
508 def match_seq(self, nodes, results=None):
510 Does this pattern exactly match a sequence of nodes?
512 Default implementation for non-wildcard patterns.
516 return self.match(nodes[0], results)
518 def generate_matches(self, nodes):
520 Generator yielding all matches for this pattern.
522 Default implementation for non-wildcard patterns.
525 if nodes and self.match(nodes[0], r):
529 class LeafPattern(BasePattern):
530 def __init__(self, type=None, content=None, name=None):
532 Initializer. Takes optional type, content, and name.
534 The type, if given must be a token type (< 256). If not given,
535 this matches any *leaf* node; the content may still be required.
537 The content, if given, must be a string.
539 If a name is given, the matching node is stored in the results
543 assert 0 <= type < 256, type
544 if content is not None:
545 assert isinstance(content, str), repr(content)
547 self.content = content
550 def match(self, node, results=None):
551 """Override match() to insist on a leaf node."""
552 if not isinstance(node, Leaf):
554 return BasePattern.match(self, node, results)
556 def _submatch(self, node, results=None):
558 Match the pattern's content to the node's children.
560 This assumes the node type matches and self.content is not None.
562 Returns True if it matches, False if not.
564 If results is not None, it must be a dict which will be
565 updated with the nodes matching named subpatterns.
567 When returning False, the results dict may still be updated.
569 return self.content == node.value
572 class NodePattern(BasePattern):
576 def __init__(self, type=None, content=None, name=None):
578 Initializer. Takes optional type, content, and name.
580 The type, if given, must be a symbol type (>= 256). If the
581 type is None this matches *any* single node (leaf or not),
582 except if content is not None, in which it only matches
583 non-leaf nodes that also match the content pattern.
585 The content, if not None, must be a sequence of Patterns that
586 must match the node's children exactly. If the content is
587 given, the type must not be None.
589 If a name is given, the matching node is stored in the results
593 assert type >= 256, type
594 if content is not None:
595 assert not isinstance(content, str), repr(content)
596 content = list(content)
597 for i, item in enumerate(content):
598 assert isinstance(item, BasePattern), (i, item)
599 if isinstance(item, WildcardPattern):
600 self.wildcards = True
602 self.content = content
605 def _submatch(self, node, results=None):
607 Match the pattern's content to the node's children.
609 This assumes the node type matches and self.content is not None.
611 Returns True if it matches, False if not.
613 If results is not None, it must be a dict which will be
614 updated with the nodes matching named subpatterns.
616 When returning False, the results dict may still be updated.
619 for c, r in generate_matches(self.content, node.children):
620 if c == len(node.children):
621 if results is not None:
625 if len(self.content) != len(node.children):
627 for subpattern, child in zip(self.content, node.children):
628 if not subpattern.match(child, results):
633 class WildcardPattern(BasePattern):
636 A wildcard pattern can match zero or more nodes.
638 This has all the flexibility needed to implement patterns like:
642 (...)* (...)+ (...)? (...){m,n}
644 except it always uses non-greedy matching.
647 def __init__(self, content=None, min=0, max=HUGE, name=None):
652 content: optional sequence of subsequences of patterns;
653 if absent, matches one node;
654 if present, each subsequence is an alternative [*]
655 min: optional minimum number of times to match, default 0
656 max: optional maximum number of times to match, default HUGE
657 name: optional name assigned to this match
659 [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is
660 equivalent to (a b c | d e | f g h); if content is None,
661 this is equivalent to '.' in regular expression terms.
662 The min and max parameters work as follows:
663 min=0, max=maxint: .*
664 min=1, max=maxint: .+
667 If content is not None, replace the dot with the parenthesized
668 list of alternatives, e.g. (a b c | d e | f g h)*
670 assert 0 <= min <= max <= HUGE, (min, max)
671 if content is not None:
672 content = tuple(map(tuple, content)) # Protect against alterations
673 # Check sanity of alternatives
674 assert len(content), repr(content) # Can't have zero alternatives
676 assert len(alt), repr(alt) # Can have empty alternatives
677 self.content = content
683 """Optimize certain stacked wildcard patterns."""
686 self.content is not None
687 and len(self.content) == 1
688 and len(self.content[0]) == 1
690 subpattern = self.content[0][0]
691 if self.min == 1 and self.max == 1:
692 if self.content is None:
693 return NodePattern(name=self.name)
694 if subpattern is not None and self.name == subpattern.name:
695 return subpattern.optimize()
698 and isinstance(subpattern, WildcardPattern)
699 and subpattern.min <= 1
700 and self.name == subpattern.name
702 return WildcardPattern(
704 self.min * subpattern.min,
705 self.max * subpattern.max,
710 def match(self, node, results=None):
711 """Does this pattern exactly match a node?"""
712 return self.match_seq([node], results)
714 def match_seq(self, nodes, results=None):
715 """Does this pattern exactly match a sequence of nodes?"""
716 for c, r in self.generate_matches(nodes):
718 if results is not None:
721 results[self.name] = list(nodes)
725 def generate_matches(self, nodes):
727 Generator yielding matches for a sequence of nodes.
730 nodes: sequence of nodes
733 (count, results) tuples where:
734 count: the match comprises nodes[:count];
735 results: dict containing named submatches.
737 if self.content is None:
738 # Shortcut for special case (see __init__.__doc__)
739 for count in range(self.min, 1 + min(len(nodes), self.max)):
742 r[self.name] = nodes[:count]
744 elif self.name == "bare_name":
745 yield self._bare_name_matches(nodes)
747 # The reason for this is that hitting the recursion limit usually
748 # results in some ugly messages about how RuntimeErrors are being
749 # ignored. We only have to do this on CPython, though, because other
750 # implementations don't have this nasty bug in the first place.
751 if hasattr(sys, "getrefcount"):
752 save_stderr = sys.stderr
753 sys.stderr = StringIO()
755 for count, r in self._recursive_matches(nodes, 0):
757 r[self.name] = nodes[:count]
760 # We fall back to the iterative pattern matching scheme if the recursive
761 # scheme hits the recursion limit.
762 for count, r in self._iterative_matches(nodes):
764 r[self.name] = nodes[:count]
767 if hasattr(sys, "getrefcount"):
768 sys.stderr = save_stderr
770 def _iterative_matches(self, nodes):
771 """Helper to iteratively yield the matches."""
777 # generate matches that use just one alt from self.content
778 for alt in self.content:
779 for c, r in generate_matches(alt, nodes):
781 results.append((c, r))
783 # for each match, iterate down the nodes
786 for c0, r0 in results:
787 # stop if the entire set of nodes has been matched
788 if c0 < nodelen and c0 <= self.max:
789 for alt in self.content:
790 for c1, r1 in generate_matches(alt, nodes[c0:]):
796 new_results.append((c0 + c1, r))
797 results = new_results
799 def _bare_name_matches(self, nodes):
800 """Special optimized matcher for bare_name."""
805 while not done and count < max:
807 for leaf in self.content:
808 if leaf[0].match(nodes[count], r):
812 r[self.name] = nodes[:count]
815 def _recursive_matches(self, nodes, count):
816 """Helper to recursively yield the matches."""
817 assert self.content is not None
818 if count >= self.min:
821 for alt in self.content:
822 for c0, r0 in generate_matches(alt, nodes):
823 for c1, r1 in self._recursive_matches(nodes[c0:], count + 1):
830 class NegatedPattern(BasePattern):
831 def __init__(self, content=None):
835 The argument is either a pattern or None. If it is None, this
836 only matches an empty sequence (effectively '$' in regex
837 lingo). If it is not None, this matches whenever the argument
838 pattern doesn't have any matches.
840 if content is not None:
841 assert isinstance(content, BasePattern), repr(content)
842 self.content = content
844 def match(self, node):
845 # We never match a node in its entirety
848 def match_seq(self, nodes):
849 # We only match an empty sequence of nodes in its entirety
850 return len(nodes) == 0
852 def generate_matches(self, nodes):
853 if self.content is None:
854 # Return a match if there is an empty sequence
858 # Return a match if the argument pattern has no matches
859 for c, r in self.content.generate_matches(nodes):
864 def generate_matches(patterns, nodes):
866 Generator yielding matches for a sequence of patterns and nodes.
869 patterns: a sequence of patterns
870 nodes: a sequence of nodes
873 (count, results) tuples where:
874 count: the entire sequence of patterns matches nodes[:count];
875 results: dict containing named submatches.
880 p, rest = patterns[0], patterns[1:]
881 for c0, r0 in p.generate_matches(nodes):
885 for c1, r1 in generate_matches(rest, nodes[c0:]):