All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
1 # Copyright 2006 Google, Inc. All Rights Reserved.
2 # Licensed to PSF under a Contributor Agreement.
5 Python parse tree definitions.
7 This is a very concrete parse tree; we need to keep every token and
8 even the comments and whitespace between tokens.
10 There's also a pattern matching implementation here.
13 __author__ = "Guido van Rossum <guido@python.org>"
16 from io import StringIO
18 HUGE = 0x7FFFFFFF # maximum repeat count, default max
21 def type_repr(type_num):
24 from .pygram import python_symbols
25 # printing tokens is possible but not as useful
26 # from .pgen2 import token // token.__dict__.items():
27 for name, val in python_symbols.__dict__.items():
28 if type(val) == int: _type_reprs[val] = name
29 return _type_reprs.setdefault(type_num, type_num)
34 Abstract base class for Node and Leaf.
36 This provides some default functionality and boilerplate using the
39 A node may be a subnode of at most one parent.
42 # Default values for instance variables
43 type = None # int: token number (< 256) or symbol number (>= 256)
44 parent = None # Parent node pointer, or None
45 children = () # Tuple of subnodes
49 def __new__(cls, *args, **kwds):
50 """Constructor that prevents Base from being instantiated."""
51 assert cls is not Base, "Cannot instantiate Base"
52 return object.__new__(cls)
54 def __eq__(self, other):
56 Compare two nodes for equality.
58 This calls the method _eq().
60 if self.__class__ is not other.__class__:
62 return self._eq(other)
64 __hash__ = None # For Py3 compatibility.
68 Compare two nodes for equality.
70 This is called by __eq__ and __ne__. It is only called if the two nodes
71 have the same type. This must be implemented by the concrete subclass.
72 Nodes should be considered equal if they have the same structure,
73 ignoring the prefix string and other context information.
75 raise NotImplementedError
79 Return a cloned (deep) copy of self.
81 This must be implemented by the concrete subclass.
83 raise NotImplementedError
87 Return a post-order iterator for the tree.
89 This must be implemented by the concrete subclass.
91 raise NotImplementedError
95 Return a pre-order iterator for the tree.
97 This must be implemented by the concrete subclass.
99 raise NotImplementedError
101 def replace(self, new):
102 """Replace this node with a new one in the parent."""
103 assert self.parent is not None, str(self)
104 assert new is not None
105 if not isinstance(new, list):
109 for ch in self.parent.children:
111 assert not found, (self.parent.children, self, new)
113 l_children.extend(new)
116 l_children.append(ch)
117 assert found, (self.children, self, new)
118 self.parent.children = l_children
119 self.parent.changed()
120 self.parent.invalidate_sibling_maps()
122 x.parent = self.parent
125 def get_lineno(self):
126 """Return the line number which generated the invocant node."""
128 while not isinstance(node, Leaf):
129 if not node.children:
131 node = node.children[0]
136 self.parent.changed()
137 self.was_changed = True
141 Remove the node from the tree. Returns the position of the node in its
142 parent's children before it was removed.
145 for i, node in enumerate(self.parent.children):
147 del self.parent.children[i]
148 self.parent.changed()
149 self.parent.invalidate_sibling_maps()
154 def next_sibling(self):
156 The node immediately following the invocant in their parent's children
157 list. If the invocant does not have a next sibling, it is None
159 if self.parent is None:
162 if self.parent.next_sibling_map is None:
163 self.parent.update_sibling_maps()
164 return self.parent.next_sibling_map[id(self)]
167 def prev_sibling(self):
169 The node immediately preceding the invocant in their parent's children
170 list. If the invocant does not have a previous sibling, it is None.
172 if self.parent is None:
175 if self.parent.prev_sibling_map is None:
176 self.parent.update_sibling_maps()
177 return self.parent.prev_sibling_map[id(self)]
180 for child in self.children:
181 yield from child.leaves()
184 if self.parent is None:
186 return 1 + self.parent.depth()
188 def get_suffix(self):
190 Return the string immediately following the invocant node. This is
191 effectively equivalent to node.next_sibling.prefix
193 next_sib = self.next_sibling
196 return next_sib.prefix
198 if sys.version_info < (3, 0):
200 return str(self).encode("ascii")
204 """Concrete implementation for interior nodes."""
206 def __init__(self,type, children,
209 fixers_applied=None):
213 Takes a type constant (a symbol number >= 256), a sequence of
214 child nodes, and an optional context keyword argument.
216 As a side effect, the parent pointers of the children are updated.
218 assert type >= 256, type
220 self.children = list(children)
221 for ch in self.children:
222 assert ch.parent is None, repr(ch)
224 self.invalidate_sibling_maps()
225 if prefix is not None:
228 self.fixers_applied = fixers_applied[:]
230 self.fixers_applied = None
233 """Return a canonical string representation."""
234 return "%s(%s, %r)" % (self.__class__.__name__,
235 type_repr(self.type),
238 def __unicode__(self):
240 Return a pretty string representation.
242 This reproduces the input source exactly.
244 return "".join(map(str, self.children))
246 if sys.version_info > (3, 0):
247 __str__ = __unicode__
249 def _eq(self, other):
250 """Compare two nodes for equality."""
251 return (self.type, self.children) == (other.type, other.children)
254 """Return a cloned (deep) copy of self."""
255 return Node(self.type, [ch.clone() for ch in self.children],
256 fixers_applied=self.fixers_applied)
258 def post_order(self):
259 """Return a post-order iterator for the tree."""
260 for child in self.children:
261 yield from child.post_order()
265 """Return a pre-order iterator for the tree."""
267 for child in self.children:
268 yield from child.pre_order()
273 The whitespace and comments preceding this node in the input.
275 if not self.children:
277 return self.children[0].prefix
280 def prefix(self, prefix):
282 self.children[0].prefix = prefix
284 def set_child(self, i, child):
286 Equivalent to 'node.children[i] = child'. This method also sets the
287 child's parent attribute appropriately.
290 self.children[i].parent = None
291 self.children[i] = child
293 self.invalidate_sibling_maps()
295 def insert_child(self, i, child):
297 Equivalent to 'node.children.insert(i, child)'. This method also sets
298 the child's parent attribute appropriately.
301 self.children.insert(i, child)
303 self.invalidate_sibling_maps()
305 def append_child(self, child):
307 Equivalent to 'node.children.append(child)'. This method also sets the
308 child's parent attribute appropriately.
311 self.children.append(child)
313 self.invalidate_sibling_maps()
315 def invalidate_sibling_maps(self):
316 self.prev_sibling_map = None
317 self.next_sibling_map = None
319 def update_sibling_maps(self):
320 self.prev_sibling_map = _prev = {}
321 self.next_sibling_map = _next = {}
323 for current in self.children:
324 _prev[id(current)] = previous
325 _next[id(previous)] = current
327 _next[id(current)] = None
331 """Concrete implementation for leaf nodes."""
333 # Default values for instance variables
334 _prefix = "" # Whitespace and comments preceding this token in the input
335 lineno = 0 # Line where this token starts in the input
336 column = 0 # Column where this token tarts in the input
338 def __init__(self, type, value,
345 Takes a type constant (a token number < 256), a string value, and an
346 optional context keyword argument.
348 assert 0 <= type < 256, type
349 if context is not None:
350 self._prefix, (self.lineno, self.column) = context
353 if prefix is not None:
354 self._prefix = prefix
355 self.fixers_applied = fixers_applied[:]
358 """Return a canonical string representation."""
359 from .pgen2.token import tok_name
360 return "%s(%s, %r)" % (self.__class__.__name__,
361 tok_name.get(self.type, self.type),
364 def __unicode__(self):
366 Return a pretty string representation.
368 This reproduces the input source exactly.
370 return self.prefix + str(self.value)
372 if sys.version_info > (3, 0):
373 __str__ = __unicode__
375 def _eq(self, other):
376 """Compare two nodes for equality."""
377 return (self.type, self.value) == (other.type, other.value)
380 """Return a cloned (deep) copy of self."""
381 return Leaf(self.type, self.value,
382 (self.prefix, (self.lineno, self.column)),
383 fixers_applied=self.fixers_applied)
388 def post_order(self):
389 """Return a post-order iterator for the tree."""
393 """Return a pre-order iterator for the tree."""
399 The whitespace and comments preceding this token in the input.
404 def prefix(self, prefix):
406 self._prefix = prefix
408 def convert(gr, raw_node):
410 Convert raw node information to a Node or Leaf instance.
412 This is passed to the parser driver which calls it whenever a reduction of a
413 grammar rule produces a new complete node, so that the tree is build
416 type, value, context, children = raw_node
417 if children or type in gr.number2symbol:
418 # If there's exactly one child, return that child instead of
419 # creating a new node.
420 if len(children) == 1:
422 return Node(type, children, context=context)
424 return Leaf(type, value, context=context)
427 class BasePattern(object):
430 A pattern is a tree matching pattern.
432 It looks for a specific node type (token or symbol), and
433 optionally for a specific content.
435 This is an abstract base class. There are three concrete
438 - LeafPattern matches a single leaf node;
439 - NodePattern matches a single node (usually non-leaf);
440 - WildcardPattern matches a sequence of nodes of variable length.
443 # Defaults for instance variables
444 type = None # Node type (token if < 256, symbol if >= 256)
445 content = None # Optional content matching pattern
446 name = None # Optional name used to store match in results dict
448 def __new__(cls, *args, **kwds):
449 """Constructor that prevents BasePattern from being instantiated."""
450 assert cls is not BasePattern, "Cannot instantiate BasePattern"
451 return object.__new__(cls)
454 args = [type_repr(self.type), self.content, self.name]
455 while args and args[-1] is None:
457 return "%s(%s)" % (self.__class__.__name__, ", ".join(map(repr, args)))
461 A subclass can define this as a hook for optimizations.
463 Returns either self or another node with the same effect.
467 def match(self, node, results=None):
469 Does this pattern exactly match a node?
471 Returns True if it matches, False if not.
473 If results is not None, it must be a dict which will be
474 updated with the nodes matching named subpatterns.
476 Default implementation for non-wildcard patterns.
478 if self.type is not None and node.type != self.type:
480 if self.content is not None:
482 if results is not None:
484 if not self._submatch(node, r):
488 if results is not None and self.name:
489 results[self.name] = node
492 def match_seq(self, nodes, results=None):
494 Does this pattern exactly match a sequence of nodes?
496 Default implementation for non-wildcard patterns.
500 return self.match(nodes[0], results)
502 def generate_matches(self, nodes):
504 Generator yielding all matches for this pattern.
506 Default implementation for non-wildcard patterns.
509 if nodes and self.match(nodes[0], r):
513 class LeafPattern(BasePattern):
515 def __init__(self, type=None, content=None, name=None):
517 Initializer. Takes optional type, content, and name.
519 The type, if given must be a token type (< 256). If not given,
520 this matches any *leaf* node; the content may still be required.
522 The content, if given, must be a string.
524 If a name is given, the matching node is stored in the results
528 assert 0 <= type < 256, type
529 if content is not None:
530 assert isinstance(content, str), repr(content)
532 self.content = content
535 def match(self, node, results=None):
536 """Override match() to insist on a leaf node."""
537 if not isinstance(node, Leaf):
539 return BasePattern.match(self, node, results)
541 def _submatch(self, node, results=None):
543 Match the pattern's content to the node's children.
545 This assumes the node type matches and self.content is not None.
547 Returns True if it matches, False if not.
549 If results is not None, it must be a dict which will be
550 updated with the nodes matching named subpatterns.
552 When returning False, the results dict may still be updated.
554 return self.content == node.value
557 class NodePattern(BasePattern):
561 def __init__(self, type=None, content=None, name=None):
563 Initializer. Takes optional type, content, and name.
565 The type, if given, must be a symbol type (>= 256). If the
566 type is None this matches *any* single node (leaf or not),
567 except if content is not None, in which it only matches
568 non-leaf nodes that also match the content pattern.
570 The content, if not None, must be a sequence of Patterns that
571 must match the node's children exactly. If the content is
572 given, the type must not be None.
574 If a name is given, the matching node is stored in the results
578 assert type >= 256, type
579 if content is not None:
580 assert not isinstance(content, str), repr(content)
581 content = list(content)
582 for i, item in enumerate(content):
583 assert isinstance(item, BasePattern), (i, item)
584 if isinstance(item, WildcardPattern):
585 self.wildcards = True
587 self.content = content
590 def _submatch(self, node, results=None):
592 Match the pattern's content to the node's children.
594 This assumes the node type matches and self.content is not None.
596 Returns True if it matches, False if not.
598 If results is not None, it must be a dict which will be
599 updated with the nodes matching named subpatterns.
601 When returning False, the results dict may still be updated.
604 for c, r in generate_matches(self.content, node.children):
605 if c == len(node.children):
606 if results is not None:
610 if len(self.content) != len(node.children):
612 for subpattern, child in zip(self.content, node.children):
613 if not subpattern.match(child, results):
618 class WildcardPattern(BasePattern):
621 A wildcard pattern can match zero or more nodes.
623 This has all the flexibility needed to implement patterns like:
627 (...)* (...)+ (...)? (...){m,n}
629 except it always uses non-greedy matching.
632 def __init__(self, content=None, min=0, max=HUGE, name=None):
637 content: optional sequence of subsequences of patterns;
638 if absent, matches one node;
639 if present, each subsequence is an alternative [*]
640 min: optional minimum number of times to match, default 0
641 max: optional maximum number of times to match, default HUGE
642 name: optional name assigned to this match
644 [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is
645 equivalent to (a b c | d e | f g h); if content is None,
646 this is equivalent to '.' in regular expression terms.
647 The min and max parameters work as follows:
648 min=0, max=maxint: .*
649 min=1, max=maxint: .+
652 If content is not None, replace the dot with the parenthesized
653 list of alternatives, e.g. (a b c | d e | f g h)*
655 assert 0 <= min <= max <= HUGE, (min, max)
656 if content is not None:
657 content = tuple(map(tuple, content)) # Protect against alterations
658 # Check sanity of alternatives
659 assert len(content), repr(content) # Can't have zero alternatives
661 assert len(alt), repr(alt) # Can have empty alternatives
662 self.content = content
668 """Optimize certain stacked wildcard patterns."""
670 if (self.content is not None and
671 len(self.content) == 1 and len(self.content[0]) == 1):
672 subpattern = self.content[0][0]
673 if self.min == 1 and self.max == 1:
674 if self.content is None:
675 return NodePattern(name=self.name)
676 if subpattern is not None and self.name == subpattern.name:
677 return subpattern.optimize()
678 if (self.min <= 1 and isinstance(subpattern, WildcardPattern) and
679 subpattern.min <= 1 and self.name == subpattern.name):
680 return WildcardPattern(subpattern.content,
681 self.min*subpattern.min,
682 self.max*subpattern.max,
686 def match(self, node, results=None):
687 """Does this pattern exactly match a node?"""
688 return self.match_seq([node], results)
690 def match_seq(self, nodes, results=None):
691 """Does this pattern exactly match a sequence of nodes?"""
692 for c, r in self.generate_matches(nodes):
694 if results is not None:
697 results[self.name] = list(nodes)
701 def generate_matches(self, nodes):
703 Generator yielding matches for a sequence of nodes.
706 nodes: sequence of nodes
709 (count, results) tuples where:
710 count: the match comprises nodes[:count];
711 results: dict containing named submatches.
713 if self.content is None:
714 # Shortcut for special case (see __init__.__doc__)
715 for count in range(self.min, 1 + min(len(nodes), self.max)):
718 r[self.name] = nodes[:count]
720 elif self.name == "bare_name":
721 yield self._bare_name_matches(nodes)
723 # The reason for this is that hitting the recursion limit usually
724 # results in some ugly messages about how RuntimeErrors are being
725 # ignored. We only have to do this on CPython, though, because other
726 # implementations don't have this nasty bug in the first place.
727 if hasattr(sys, "getrefcount"):
728 save_stderr = sys.stderr
729 sys.stderr = StringIO()
731 for count, r in self._recursive_matches(nodes, 0):
733 r[self.name] = nodes[:count]
736 # We fall back to the iterative pattern matching scheme if the recursive
737 # scheme hits the recursion limit.
738 for count, r in self._iterative_matches(nodes):
740 r[self.name] = nodes[:count]
743 if hasattr(sys, "getrefcount"):
744 sys.stderr = save_stderr
746 def _iterative_matches(self, nodes):
747 """Helper to iteratively yield the matches."""
753 # generate matches that use just one alt from self.content
754 for alt in self.content:
755 for c, r in generate_matches(alt, nodes):
757 results.append((c, r))
759 # for each match, iterate down the nodes
762 for c0, r0 in results:
763 # stop if the entire set of nodes has been matched
764 if c0 < nodelen and c0 <= self.max:
765 for alt in self.content:
766 for c1, r1 in generate_matches(alt, nodes[c0:]):
772 new_results.append((c0 + c1, r))
773 results = new_results
775 def _bare_name_matches(self, nodes):
776 """Special optimized matcher for bare_name."""
781 while not done and count < max:
783 for leaf in self.content:
784 if leaf[0].match(nodes[count], r):
788 r[self.name] = nodes[:count]
791 def _recursive_matches(self, nodes, count):
792 """Helper to recursively yield the matches."""
793 assert self.content is not None
794 if count >= self.min:
797 for alt in self.content:
798 for c0, r0 in generate_matches(alt, nodes):
799 for c1, r1 in self._recursive_matches(nodes[c0:], count+1):
806 class NegatedPattern(BasePattern):
808 def __init__(self, content=None):
812 The argument is either a pattern or None. If it is None, this
813 only matches an empty sequence (effectively '$' in regex
814 lingo). If it is not None, this matches whenever the argument
815 pattern doesn't have any matches.
817 if content is not None:
818 assert isinstance(content, BasePattern), repr(content)
819 self.content = content
821 def match(self, node):
822 # We never match a node in its entirety
825 def match_seq(self, nodes):
826 # We only match an empty sequence of nodes in its entirety
827 return len(nodes) == 0
829 def generate_matches(self, nodes):
830 if self.content is None:
831 # Return a match if there is an empty sequence
835 # Return a match if the argument pattern has no matches
836 for c, r in self.content.generate_matches(nodes):
841 def generate_matches(patterns, nodes):
843 Generator yielding matches for a sequence of patterns and nodes.
846 patterns: a sequence of patterns
847 nodes: a sequence of nodes
850 (count, results) tuples where:
851 count: the entire sequence of patterns matches nodes[:count];
852 results: dict containing named submatches.
857 p, rest = patterns[0], patterns[1:]
858 for c0, r0 in p.generate_matches(nodes):
862 for c1, r1 in generate_matches(rest, nodes[c0:]):