All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
1 # Copyright 2006 Google, Inc. All Rights Reserved.
2 # Licensed to PSF under a Contributor Agreement.
5 Python parse tree definitions.
7 This is a very concrete parse tree; we need to keep every token and
8 even the comments and whitespace between tokens.
10 There's also a pattern matching implementation here.
13 __author__ = "Guido van Rossum <guido@python.org>"
16 from io import StringIO
18 HUGE = 0x7FFFFFFF # maximum repeat count, default max
21 def type_repr(type_num):
24 from .pygram import python_symbols
25 # printing tokens is possible but not as useful
26 # from .pgen2 import token // token.__dict__.items():
27 for name, val in python_symbols.__dict__.items():
28 if type(val) == int: _type_reprs[val] = name
29 return _type_reprs.setdefault(type_num, type_num)
34 Abstract base class for Node and Leaf.
36 This provides some default functionality and boilerplate using the
39 A node may be a subnode of at most one parent.
42 # Default values for instance variables
43 type = None # int: token number (< 256) or symbol number (>= 256)
44 parent = None # Parent node pointer, or None
45 children = () # Tuple of subnodes
49 def __new__(cls, *args, **kwds):
50 """Constructor that prevents Base from being instantiated."""
51 assert cls is not Base, "Cannot instantiate Base"
52 return object.__new__(cls)
54 def __eq__(self, other):
56 Compare two nodes for equality.
58 This calls the method _eq().
60 if self.__class__ is not other.__class__:
62 return self._eq(other)
64 __hash__ = None # For Py3 compatibility.
68 Compare two nodes for equality.
70 This is called by __eq__ and __ne__. It is only called if the two nodes
71 have the same type. This must be implemented by the concrete subclass.
72 Nodes should be considered equal if they have the same structure,
73 ignoring the prefix string and other context information.
75 raise NotImplementedError
79 Return a cloned (deep) copy of self.
81 This must be implemented by the concrete subclass.
83 raise NotImplementedError
87 Return a post-order iterator for the tree.
89 This must be implemented by the concrete subclass.
91 raise NotImplementedError
95 Return a pre-order iterator for the tree.
97 This must be implemented by the concrete subclass.
99 raise NotImplementedError
101 def replace(self, new):
102 """Replace this node with a new one in the parent."""
103 assert self.parent is not None, str(self)
104 assert new is not None
105 if not isinstance(new, list):
109 for ch in self.parent.children:
111 assert not found, (self.parent.children, self, new)
113 l_children.extend(new)
116 l_children.append(ch)
117 assert found, (self.children, self, new)
118 self.parent.children = l_children
119 self.parent.changed()
120 self.parent.invalidate_sibling_maps()
122 x.parent = self.parent
125 def get_lineno(self):
126 """Return the line number which generated the invocant node."""
128 while not isinstance(node, Leaf):
129 if not node.children:
131 node = node.children[0]
138 self.parent.changed()
139 self.was_changed = True
143 Remove the node from the tree. Returns the position of the node in its
144 parent's children before it was removed.
147 for i, node in enumerate(self.parent.children):
149 del self.parent.children[i]
150 self.parent.changed()
151 self.parent.invalidate_sibling_maps()
156 def next_sibling(self):
158 The node immediately following the invocant in their parent's children
159 list. If the invocant does not have a next sibling, it is None
161 if self.parent is None:
164 if self.parent.next_sibling_map is None:
165 self.parent.update_sibling_maps()
166 return self.parent.next_sibling_map[id(self)]
169 def prev_sibling(self):
171 The node immediately preceding the invocant in their parent's children
172 list. If the invocant does not have a previous sibling, it is None.
174 if self.parent is None:
177 if self.parent.prev_sibling_map is None:
178 self.parent.update_sibling_maps()
179 return self.parent.prev_sibling_map[id(self)]
182 for child in self.children:
183 yield from child.leaves()
186 if self.parent is None:
188 return 1 + self.parent.depth()
190 def get_suffix(self):
192 Return the string immediately following the invocant node. This is
193 effectively equivalent to node.next_sibling.prefix
195 next_sib = self.next_sibling
198 return next_sib.prefix
200 if sys.version_info < (3, 0):
202 return str(self).encode("ascii")
206 """Concrete implementation for interior nodes."""
208 def __init__(self,type, children,
211 fixers_applied=None):
215 Takes a type constant (a symbol number >= 256), a sequence of
216 child nodes, and an optional context keyword argument.
218 As a side effect, the parent pointers of the children are updated.
220 assert type >= 256, type
222 self.children = list(children)
223 for ch in self.children:
224 assert ch.parent is None, repr(ch)
226 self.invalidate_sibling_maps()
227 if prefix is not None:
230 self.fixers_applied = fixers_applied[:]
232 self.fixers_applied = None
235 """Return a canonical string representation."""
236 return "%s(%s, %r)" % (self.__class__.__name__,
237 type_repr(self.type),
240 def __unicode__(self):
242 Return a pretty string representation.
244 This reproduces the input source exactly.
246 return "".join(map(str, self.children))
248 if sys.version_info > (3, 0):
249 __str__ = __unicode__
251 def _eq(self, other):
252 """Compare two nodes for equality."""
253 return (self.type, self.children) == (other.type, other.children)
256 """Return a cloned (deep) copy of self."""
257 return Node(self.type, [ch.clone() for ch in self.children],
258 fixers_applied=self.fixers_applied)
260 def post_order(self):
261 """Return a post-order iterator for the tree."""
262 for child in self.children:
263 yield from child.post_order()
267 """Return a pre-order iterator for the tree."""
269 for child in self.children:
270 yield from child.pre_order()
275 The whitespace and comments preceding this node in the input.
277 if not self.children:
279 return self.children[0].prefix
282 def prefix(self, prefix):
284 self.children[0].prefix = prefix
286 def set_child(self, i, child):
288 Equivalent to 'node.children[i] = child'. This method also sets the
289 child's parent attribute appropriately.
292 self.children[i].parent = None
293 self.children[i] = child
295 self.invalidate_sibling_maps()
297 def insert_child(self, i, child):
299 Equivalent to 'node.children.insert(i, child)'. This method also sets
300 the child's parent attribute appropriately.
303 self.children.insert(i, child)
305 self.invalidate_sibling_maps()
307 def append_child(self, child):
309 Equivalent to 'node.children.append(child)'. This method also sets the
310 child's parent attribute appropriately.
313 self.children.append(child)
315 self.invalidate_sibling_maps()
317 def invalidate_sibling_maps(self):
318 self.prev_sibling_map = None
319 self.next_sibling_map = None
321 def update_sibling_maps(self):
322 self.prev_sibling_map = _prev = {}
323 self.next_sibling_map = _next = {}
325 for current in self.children:
326 _prev[id(current)] = previous
327 _next[id(previous)] = current
329 _next[id(current)] = None
333 """Concrete implementation for leaf nodes."""
335 # Default values for instance variables
336 _prefix = "" # Whitespace and comments preceding this token in the input
337 lineno = 0 # Line where this token starts in the input
338 column = 0 # Column where this token starts in the input
340 def __init__(self, type, value,
347 Takes a type constant (a token number < 256), a string value, and an
348 optional context keyword argument.
350 assert 0 <= type < 256, type
351 if context is not None:
352 self._prefix, (self.lineno, self.column) = context
355 if prefix is not None:
356 self._prefix = prefix
357 self.fixers_applied = fixers_applied[:]
360 """Return a canonical string representation."""
361 from .pgen2.token import tok_name
362 return "%s(%s, %r)" % (self.__class__.__name__,
363 tok_name.get(self.type, self.type),
366 def __unicode__(self):
368 Return a pretty string representation.
370 This reproduces the input source exactly.
372 return self.prefix + str(self.value)
374 if sys.version_info > (3, 0):
375 __str__ = __unicode__
377 def _eq(self, other):
378 """Compare two nodes for equality."""
379 return (self.type, self.value) == (other.type, other.value)
382 """Return a cloned (deep) copy of self."""
383 return Leaf(self.type, self.value,
384 (self.prefix, (self.lineno, self.column)),
385 fixers_applied=self.fixers_applied)
390 def post_order(self):
391 """Return a post-order iterator for the tree."""
395 """Return a pre-order iterator for the tree."""
401 The whitespace and comments preceding this token in the input.
406 def prefix(self, prefix):
408 self._prefix = prefix
410 def convert(gr, raw_node):
412 Convert raw node information to a Node or Leaf instance.
414 This is passed to the parser driver which calls it whenever a reduction of a
415 grammar rule produces a new complete node, so that the tree is build
418 type, value, context, children = raw_node
419 if children or type in gr.number2symbol:
420 # If there's exactly one child, return that child instead of
421 # creating a new node.
422 if len(children) == 1:
424 return Node(type, children, context=context)
426 return Leaf(type, value, context=context)
429 class BasePattern(object):
432 A pattern is a tree matching pattern.
434 It looks for a specific node type (token or symbol), and
435 optionally for a specific content.
437 This is an abstract base class. There are three concrete
440 - LeafPattern matches a single leaf node;
441 - NodePattern matches a single node (usually non-leaf);
442 - WildcardPattern matches a sequence of nodes of variable length.
445 # Defaults for instance variables
446 type = None # Node type (token if < 256, symbol if >= 256)
447 content = None # Optional content matching pattern
448 name = None # Optional name used to store match in results dict
450 def __new__(cls, *args, **kwds):
451 """Constructor that prevents BasePattern from being instantiated."""
452 assert cls is not BasePattern, "Cannot instantiate BasePattern"
453 return object.__new__(cls)
456 args = [type_repr(self.type), self.content, self.name]
457 while args and args[-1] is None:
459 return "%s(%s)" % (self.__class__.__name__, ", ".join(map(repr, args)))
463 A subclass can define this as a hook for optimizations.
465 Returns either self or another node with the same effect.
469 def match(self, node, results=None):
471 Does this pattern exactly match a node?
473 Returns True if it matches, False if not.
475 If results is not None, it must be a dict which will be
476 updated with the nodes matching named subpatterns.
478 Default implementation for non-wildcard patterns.
480 if self.type is not None and node.type != self.type:
482 if self.content is not None:
484 if results is not None:
486 if not self._submatch(node, r):
490 if results is not None and self.name:
491 results[self.name] = node
494 def match_seq(self, nodes, results=None):
496 Does this pattern exactly match a sequence of nodes?
498 Default implementation for non-wildcard patterns.
502 return self.match(nodes[0], results)
504 def generate_matches(self, nodes):
506 Generator yielding all matches for this pattern.
508 Default implementation for non-wildcard patterns.
511 if nodes and self.match(nodes[0], r):
515 class LeafPattern(BasePattern):
517 def __init__(self, type=None, content=None, name=None):
519 Initializer. Takes optional type, content, and name.
521 The type, if given must be a token type (< 256). If not given,
522 this matches any *leaf* node; the content may still be required.
524 The content, if given, must be a string.
526 If a name is given, the matching node is stored in the results
530 assert 0 <= type < 256, type
531 if content is not None:
532 assert isinstance(content, str), repr(content)
534 self.content = content
537 def match(self, node, results=None):
538 """Override match() to insist on a leaf node."""
539 if not isinstance(node, Leaf):
541 return BasePattern.match(self, node, results)
543 def _submatch(self, node, results=None):
545 Match the pattern's content to the node's children.
547 This assumes the node type matches and self.content is not None.
549 Returns True if it matches, False if not.
551 If results is not None, it must be a dict which will be
552 updated with the nodes matching named subpatterns.
554 When returning False, the results dict may still be updated.
556 return self.content == node.value
559 class NodePattern(BasePattern):
563 def __init__(self, type=None, content=None, name=None):
565 Initializer. Takes optional type, content, and name.
567 The type, if given, must be a symbol type (>= 256). If the
568 type is None this matches *any* single node (leaf or not),
569 except if content is not None, in which it only matches
570 non-leaf nodes that also match the content pattern.
572 The content, if not None, must be a sequence of Patterns that
573 must match the node's children exactly. If the content is
574 given, the type must not be None.
576 If a name is given, the matching node is stored in the results
580 assert type >= 256, type
581 if content is not None:
582 assert not isinstance(content, str), repr(content)
583 content = list(content)
584 for i, item in enumerate(content):
585 assert isinstance(item, BasePattern), (i, item)
586 if isinstance(item, WildcardPattern):
587 self.wildcards = True
589 self.content = content
592 def _submatch(self, node, results=None):
594 Match the pattern's content to the node's children.
596 This assumes the node type matches and self.content is not None.
598 Returns True if it matches, False if not.
600 If results is not None, it must be a dict which will be
601 updated with the nodes matching named subpatterns.
603 When returning False, the results dict may still be updated.
606 for c, r in generate_matches(self.content, node.children):
607 if c == len(node.children):
608 if results is not None:
612 if len(self.content) != len(node.children):
614 for subpattern, child in zip(self.content, node.children):
615 if not subpattern.match(child, results):
620 class WildcardPattern(BasePattern):
623 A wildcard pattern can match zero or more nodes.
625 This has all the flexibility needed to implement patterns like:
629 (...)* (...)+ (...)? (...){m,n}
631 except it always uses non-greedy matching.
634 def __init__(self, content=None, min=0, max=HUGE, name=None):
639 content: optional sequence of subsequences of patterns;
640 if absent, matches one node;
641 if present, each subsequence is an alternative [*]
642 min: optional minimum number of times to match, default 0
643 max: optional maximum number of times to match, default HUGE
644 name: optional name assigned to this match
646 [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is
647 equivalent to (a b c | d e | f g h); if content is None,
648 this is equivalent to '.' in regular expression terms.
649 The min and max parameters work as follows:
650 min=0, max=maxint: .*
651 min=1, max=maxint: .+
654 If content is not None, replace the dot with the parenthesized
655 list of alternatives, e.g. (a b c | d e | f g h)*
657 assert 0 <= min <= max <= HUGE, (min, max)
658 if content is not None:
659 content = tuple(map(tuple, content)) # Protect against alterations
660 # Check sanity of alternatives
661 assert len(content), repr(content) # Can't have zero alternatives
663 assert len(alt), repr(alt) # Can have empty alternatives
664 self.content = content
670 """Optimize certain stacked wildcard patterns."""
672 if (self.content is not None and
673 len(self.content) == 1 and len(self.content[0]) == 1):
674 subpattern = self.content[0][0]
675 if self.min == 1 and self.max == 1:
676 if self.content is None:
677 return NodePattern(name=self.name)
678 if subpattern is not None and self.name == subpattern.name:
679 return subpattern.optimize()
680 if (self.min <= 1 and isinstance(subpattern, WildcardPattern) and
681 subpattern.min <= 1 and self.name == subpattern.name):
682 return WildcardPattern(subpattern.content,
683 self.min*subpattern.min,
684 self.max*subpattern.max,
688 def match(self, node, results=None):
689 """Does this pattern exactly match a node?"""
690 return self.match_seq([node], results)
692 def match_seq(self, nodes, results=None):
693 """Does this pattern exactly match a sequence of nodes?"""
694 for c, r in self.generate_matches(nodes):
696 if results is not None:
699 results[self.name] = list(nodes)
703 def generate_matches(self, nodes):
705 Generator yielding matches for a sequence of nodes.
708 nodes: sequence of nodes
711 (count, results) tuples where:
712 count: the match comprises nodes[:count];
713 results: dict containing named submatches.
715 if self.content is None:
716 # Shortcut for special case (see __init__.__doc__)
717 for count in range(self.min, 1 + min(len(nodes), self.max)):
720 r[self.name] = nodes[:count]
722 elif self.name == "bare_name":
723 yield self._bare_name_matches(nodes)
725 # The reason for this is that hitting the recursion limit usually
726 # results in some ugly messages about how RuntimeErrors are being
727 # ignored. We only have to do this on CPython, though, because other
728 # implementations don't have this nasty bug in the first place.
729 if hasattr(sys, "getrefcount"):
730 save_stderr = sys.stderr
731 sys.stderr = StringIO()
733 for count, r in self._recursive_matches(nodes, 0):
735 r[self.name] = nodes[:count]
738 # We fall back to the iterative pattern matching scheme if the recursive
739 # scheme hits the recursion limit.
740 for count, r in self._iterative_matches(nodes):
742 r[self.name] = nodes[:count]
745 if hasattr(sys, "getrefcount"):
746 sys.stderr = save_stderr
748 def _iterative_matches(self, nodes):
749 """Helper to iteratively yield the matches."""
755 # generate matches that use just one alt from self.content
756 for alt in self.content:
757 for c, r in generate_matches(alt, nodes):
759 results.append((c, r))
761 # for each match, iterate down the nodes
764 for c0, r0 in results:
765 # stop if the entire set of nodes has been matched
766 if c0 < nodelen and c0 <= self.max:
767 for alt in self.content:
768 for c1, r1 in generate_matches(alt, nodes[c0:]):
774 new_results.append((c0 + c1, r))
775 results = new_results
777 def _bare_name_matches(self, nodes):
778 """Special optimized matcher for bare_name."""
783 while not done and count < max:
785 for leaf in self.content:
786 if leaf[0].match(nodes[count], r):
790 r[self.name] = nodes[:count]
793 def _recursive_matches(self, nodes, count):
794 """Helper to recursively yield the matches."""
795 assert self.content is not None
796 if count >= self.min:
799 for alt in self.content:
800 for c0, r0 in generate_matches(alt, nodes):
801 for c1, r1 in self._recursive_matches(nodes[c0:], count+1):
808 class NegatedPattern(BasePattern):
810 def __init__(self, content=None):
814 The argument is either a pattern or None. If it is None, this
815 only matches an empty sequence (effectively '$' in regex
816 lingo). If it is not None, this matches whenever the argument
817 pattern doesn't have any matches.
819 if content is not None:
820 assert isinstance(content, BasePattern), repr(content)
821 self.content = content
823 def match(self, node):
824 # We never match a node in its entirety
827 def match_seq(self, nodes):
828 # We only match an empty sequence of nodes in its entirety
829 return len(nodes) == 0
831 def generate_matches(self, nodes):
832 if self.content is None:
833 # Return a match if there is an empty sequence
837 # Return a match if the argument pattern has no matches
838 for c, r in self.content.generate_matches(nodes):
843 def generate_matches(patterns, nodes):
845 Generator yielding matches for a sequence of patterns and nodes.
848 patterns: a sequence of patterns
849 nodes: a sequence of nodes
852 (count, results) tuples where:
853 count: the entire sequence of patterns matches nodes[:count];
854 results: dict containing named submatches.
859 p, rest = patterns[0], patterns[1:]
860 for c0, r0 in p.generate_matches(nodes):
864 for c1, r1 in generate_matches(rest, nodes[c0:]):