All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
1 # Copyright 2006 Google, Inc. All Rights Reserved.
2 # Licensed to PSF under a Contributor Agreement.
5 Python parse tree definitions.
7 This is a very concrete parse tree; we need to keep every token and
8 even the comments and whitespace between tokens.
10 There's also a pattern matching implementation here.
13 __author__ = "Guido van Rossum <guido@python.org>"
16 from io import StringIO
18 HUGE = 0x7FFFFFFF # maximum repeat count, default max
21 def type_repr(type_num):
24 from .pygram import python_symbols
25 # printing tokens is possible but not as useful
26 # from .pgen2 import token // token.__dict__.items():
27 for name, val in python_symbols.__dict__.items():
28 if type(val) == int: _type_reprs[val] = name
29 return _type_reprs.setdefault(type_num, type_num)
34 Abstract base class for Node and Leaf.
36 This provides some default functionality and boilerplate using the
39 A node may be a subnode of at most one parent.
42 # Default values for instance variables
43 type = None # int: token number (< 256) or symbol number (>= 256)
44 parent = None # Parent node pointer, or None
45 children = () # Tuple of subnodes
49 def __new__(cls, *args, **kwds):
50 """Constructor that prevents Base from being instantiated."""
51 assert cls is not Base, "Cannot instantiate Base"
52 return object.__new__(cls)
54 def __eq__(self, other):
56 Compare two nodes for equality.
58 This calls the method _eq().
60 if self.__class__ is not other.__class__:
62 return self._eq(other)
64 __hash__ = None # For Py3 compatibility.
68 Compare two nodes for equality.
70 This is called by __eq__ and __ne__. It is only called if the two nodes
71 have the same type. This must be implemented by the concrete subclass.
72 Nodes should be considered equal if they have the same structure,
73 ignoring the prefix string and other context information.
75 raise NotImplementedError
79 Return a cloned (deep) copy of self.
81 This must be implemented by the concrete subclass.
83 raise NotImplementedError
87 Return a post-order iterator for the tree.
89 This must be implemented by the concrete subclass.
91 raise NotImplementedError
95 Return a pre-order iterator for the tree.
97 This must be implemented by the concrete subclass.
99 raise NotImplementedError
101 def replace(self, new):
102 """Replace this node with a new one in the parent."""
103 assert self.parent is not None, str(self)
104 assert new is not None
105 if not isinstance(new, list):
109 for ch in self.parent.children:
111 assert not found, (self.parent.children, self, new)
113 l_children.extend(new)
116 l_children.append(ch)
117 assert found, (self.children, self, new)
118 self.parent.changed()
119 self.parent.children = l_children
121 x.parent = self.parent
124 def get_lineno(self):
125 """Return the line number which generated the invocant node."""
127 while not isinstance(node, Leaf):
128 if not node.children:
130 node = node.children[0]
135 self.parent.changed()
136 self.was_changed = True
140 Remove the node from the tree. Returns the position of the node in its
141 parent's children before it was removed.
144 for i, node in enumerate(self.parent.children):
146 self.parent.changed()
147 del self.parent.children[i]
152 def next_sibling(self):
154 The node immediately following the invocant in their parent's children
155 list. If the invocant does not have a next sibling, it is None
157 if self.parent is None:
160 # Can't use index(); we need to test by identity
161 for i, child in enumerate(self.parent.children):
164 return self.parent.children[i+1]
169 def prev_sibling(self):
171 The node immediately preceding the invocant in their parent's children
172 list. If the invocant does not have a previous sibling, it is None.
174 if self.parent is None:
177 # Can't use index(); we need to test by identity
178 for i, child in enumerate(self.parent.children):
182 return self.parent.children[i-1]
185 for child in self.children:
186 yield from child.leaves()
189 if self.parent is None:
191 return 1 + self.parent.depth()
193 def get_suffix(self):
195 Return the string immediately following the invocant node. This is
196 effectively equivalent to node.next_sibling.prefix
198 next_sib = self.next_sibling
201 return next_sib.prefix
203 if sys.version_info < (3, 0):
205 return str(self).encode("ascii")
209 """Concrete implementation for interior nodes."""
211 def __init__(self,type, children,
214 fixers_applied=None):
218 Takes a type constant (a symbol number >= 256), a sequence of
219 child nodes, and an optional context keyword argument.
221 As a side effect, the parent pointers of the children are updated.
223 assert type >= 256, type
225 self.children = list(children)
226 for ch in self.children:
227 assert ch.parent is None, repr(ch)
229 if prefix is not None:
232 self.fixers_applied = fixers_applied[:]
234 self.fixers_applied = None
237 """Return a canonical string representation."""
238 return "%s(%s, %r)" % (self.__class__.__name__,
239 type_repr(self.type),
242 def __unicode__(self):
244 Return a pretty string representation.
246 This reproduces the input source exactly.
248 return "".join(map(str, self.children))
250 if sys.version_info > (3, 0):
251 __str__ = __unicode__
253 def _eq(self, other):
254 """Compare two nodes for equality."""
255 return (self.type, self.children) == (other.type, other.children)
258 """Return a cloned (deep) copy of self."""
259 return Node(self.type, [ch.clone() for ch in self.children],
260 fixers_applied=self.fixers_applied)
262 def post_order(self):
263 """Return a post-order iterator for the tree."""
264 for child in self.children:
265 yield from child.post_order()
269 """Return a pre-order iterator for the tree."""
271 for child in self.children:
272 yield from child.pre_order()
277 The whitespace and comments preceding this node in the input.
279 if not self.children:
281 return self.children[0].prefix
284 def prefix(self, prefix):
286 self.children[0].prefix = prefix
288 def set_child(self, i, child):
290 Equivalent to 'node.children[i] = child'. This method also sets the
291 child's parent attribute appropriately.
294 self.children[i].parent = None
295 self.children[i] = child
298 def insert_child(self, i, child):
300 Equivalent to 'node.children.insert(i, child)'. This method also sets
301 the child's parent attribute appropriately.
304 self.children.insert(i, child)
307 def append_child(self, child):
309 Equivalent to 'node.children.append(child)'. This method also sets the
310 child's parent attribute appropriately.
313 self.children.append(child)
319 """Concrete implementation for leaf nodes."""
321 # Default values for instance variables
322 _prefix = "" # Whitespace and comments preceding this token in the input
323 lineno = 0 # Line where this token starts in the input
324 column = 0 # Column where this token tarts in the input
326 def __init__(self, type, value,
333 Takes a type constant (a token number < 256), a string value, and an
334 optional context keyword argument.
336 assert 0 <= type < 256, type
337 if context is not None:
338 self._prefix, (self.lineno, self.column) = context
341 if prefix is not None:
342 self._prefix = prefix
343 self.fixers_applied = fixers_applied[:]
346 """Return a canonical string representation."""
347 from .pgen2.token import tok_name
348 return "%s(%s, %r)" % (self.__class__.__name__,
349 tok_name.get(self.type, self.type),
352 def __unicode__(self):
354 Return a pretty string representation.
356 This reproduces the input source exactly.
358 return self.prefix + str(self.value)
360 if sys.version_info > (3, 0):
361 __str__ = __unicode__
363 def _eq(self, other):
364 """Compare two nodes for equality."""
365 return (self.type, self.value) == (other.type, other.value)
368 """Return a cloned (deep) copy of self."""
369 return Leaf(self.type, self.value,
370 (self.prefix, (self.lineno, self.column)),
371 fixers_applied=self.fixers_applied)
376 def post_order(self):
377 """Return a post-order iterator for the tree."""
381 """Return a pre-order iterator for the tree."""
387 The whitespace and comments preceding this token in the input.
392 def prefix(self, prefix):
394 self._prefix = prefix
396 def convert(gr, raw_node):
398 Convert raw node information to a Node or Leaf instance.
400 This is passed to the parser driver which calls it whenever a reduction of a
401 grammar rule produces a new complete node, so that the tree is build
404 type, value, context, children = raw_node
405 if children or type in gr.number2symbol:
406 # If there's exactly one child, return that child instead of
407 # creating a new node.
408 if len(children) == 1:
410 return Node(type, children, context=context)
412 return Leaf(type, value, context=context)
415 class BasePattern(object):
418 A pattern is a tree matching pattern.
420 It looks for a specific node type (token or symbol), and
421 optionally for a specific content.
423 This is an abstract base class. There are three concrete
426 - LeafPattern matches a single leaf node;
427 - NodePattern matches a single node (usually non-leaf);
428 - WildcardPattern matches a sequence of nodes of variable length.
431 # Defaults for instance variables
432 type = None # Node type (token if < 256, symbol if >= 256)
433 content = None # Optional content matching pattern
434 name = None # Optional name used to store match in results dict
436 def __new__(cls, *args, **kwds):
437 """Constructor that prevents BasePattern from being instantiated."""
438 assert cls is not BasePattern, "Cannot instantiate BasePattern"
439 return object.__new__(cls)
442 args = [type_repr(self.type), self.content, self.name]
443 while args and args[-1] is None:
445 return "%s(%s)" % (self.__class__.__name__, ", ".join(map(repr, args)))
449 A subclass can define this as a hook for optimizations.
451 Returns either self or another node with the same effect.
455 def match(self, node, results=None):
457 Does this pattern exactly match a node?
459 Returns True if it matches, False if not.
461 If results is not None, it must be a dict which will be
462 updated with the nodes matching named subpatterns.
464 Default implementation for non-wildcard patterns.
466 if self.type is not None and node.type != self.type:
468 if self.content is not None:
470 if results is not None:
472 if not self._submatch(node, r):
476 if results is not None and self.name:
477 results[self.name] = node
480 def match_seq(self, nodes, results=None):
482 Does this pattern exactly match a sequence of nodes?
484 Default implementation for non-wildcard patterns.
488 return self.match(nodes[0], results)
490 def generate_matches(self, nodes):
492 Generator yielding all matches for this pattern.
494 Default implementation for non-wildcard patterns.
497 if nodes and self.match(nodes[0], r):
501 class LeafPattern(BasePattern):
503 def __init__(self, type=None, content=None, name=None):
505 Initializer. Takes optional type, content, and name.
507 The type, if given must be a token type (< 256). If not given,
508 this matches any *leaf* node; the content may still be required.
510 The content, if given, must be a string.
512 If a name is given, the matching node is stored in the results
516 assert 0 <= type < 256, type
517 if content is not None:
518 assert isinstance(content, str), repr(content)
520 self.content = content
523 def match(self, node, results=None):
524 """Override match() to insist on a leaf node."""
525 if not isinstance(node, Leaf):
527 return BasePattern.match(self, node, results)
529 def _submatch(self, node, results=None):
531 Match the pattern's content to the node's children.
533 This assumes the node type matches and self.content is not None.
535 Returns True if it matches, False if not.
537 If results is not None, it must be a dict which will be
538 updated with the nodes matching named subpatterns.
540 When returning False, the results dict may still be updated.
542 return self.content == node.value
545 class NodePattern(BasePattern):
549 def __init__(self, type=None, content=None, name=None):
551 Initializer. Takes optional type, content, and name.
553 The type, if given, must be a symbol type (>= 256). If the
554 type is None this matches *any* single node (leaf or not),
555 except if content is not None, in which it only matches
556 non-leaf nodes that also match the content pattern.
558 The content, if not None, must be a sequence of Patterns that
559 must match the node's children exactly. If the content is
560 given, the type must not be None.
562 If a name is given, the matching node is stored in the results
566 assert type >= 256, type
567 if content is not None:
568 assert not isinstance(content, str), repr(content)
569 content = list(content)
570 for i, item in enumerate(content):
571 assert isinstance(item, BasePattern), (i, item)
572 if isinstance(item, WildcardPattern):
573 self.wildcards = True
575 self.content = content
578 def _submatch(self, node, results=None):
580 Match the pattern's content to the node's children.
582 This assumes the node type matches and self.content is not None.
584 Returns True if it matches, False if not.
586 If results is not None, it must be a dict which will be
587 updated with the nodes matching named subpatterns.
589 When returning False, the results dict may still be updated.
592 for c, r in generate_matches(self.content, node.children):
593 if c == len(node.children):
594 if results is not None:
598 if len(self.content) != len(node.children):
600 for subpattern, child in zip(self.content, node.children):
601 if not subpattern.match(child, results):
606 class WildcardPattern(BasePattern):
609 A wildcard pattern can match zero or more nodes.
611 This has all the flexibility needed to implement patterns like:
615 (...)* (...)+ (...)? (...){m,n}
617 except it always uses non-greedy matching.
620 def __init__(self, content=None, min=0, max=HUGE, name=None):
625 content: optional sequence of subsequences of patterns;
626 if absent, matches one node;
627 if present, each subsequence is an alternative [*]
628 min: optional minimum number of times to match, default 0
629 max: optional maximum number of times to match, default HUGE
630 name: optional name assigned to this match
632 [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is
633 equivalent to (a b c | d e | f g h); if content is None,
634 this is equivalent to '.' in regular expression terms.
635 The min and max parameters work as follows:
636 min=0, max=maxint: .*
637 min=1, max=maxint: .+
640 If content is not None, replace the dot with the parenthesized
641 list of alternatives, e.g. (a b c | d e | f g h)*
643 assert 0 <= min <= max <= HUGE, (min, max)
644 if content is not None:
645 content = tuple(map(tuple, content)) # Protect against alterations
646 # Check sanity of alternatives
647 assert len(content), repr(content) # Can't have zero alternatives
649 assert len(alt), repr(alt) # Can have empty alternatives
650 self.content = content
656 """Optimize certain stacked wildcard patterns."""
658 if (self.content is not None and
659 len(self.content) == 1 and len(self.content[0]) == 1):
660 subpattern = self.content[0][0]
661 if self.min == 1 and self.max == 1:
662 if self.content is None:
663 return NodePattern(name=self.name)
664 if subpattern is not None and self.name == subpattern.name:
665 return subpattern.optimize()
666 if (self.min <= 1 and isinstance(subpattern, WildcardPattern) and
667 subpattern.min <= 1 and self.name == subpattern.name):
668 return WildcardPattern(subpattern.content,
669 self.min*subpattern.min,
670 self.max*subpattern.max,
674 def match(self, node, results=None):
675 """Does this pattern exactly match a node?"""
676 return self.match_seq([node], results)
678 def match_seq(self, nodes, results=None):
679 """Does this pattern exactly match a sequence of nodes?"""
680 for c, r in self.generate_matches(nodes):
682 if results is not None:
685 results[self.name] = list(nodes)
689 def generate_matches(self, nodes):
691 Generator yielding matches for a sequence of nodes.
694 nodes: sequence of nodes
697 (count, results) tuples where:
698 count: the match comprises nodes[:count];
699 results: dict containing named submatches.
701 if self.content is None:
702 # Shortcut for special case (see __init__.__doc__)
703 for count in range(self.min, 1 + min(len(nodes), self.max)):
706 r[self.name] = nodes[:count]
708 elif self.name == "bare_name":
709 yield self._bare_name_matches(nodes)
711 # The reason for this is that hitting the recursion limit usually
712 # results in some ugly messages about how RuntimeErrors are being
713 # ignored. We only have to do this on CPython, though, because other
714 # implementations don't have this nasty bug in the first place.
715 if hasattr(sys, "getrefcount"):
716 save_stderr = sys.stderr
717 sys.stderr = StringIO()
719 for count, r in self._recursive_matches(nodes, 0):
721 r[self.name] = nodes[:count]
724 # We fall back to the iterative pattern matching scheme if the recursive
725 # scheme hits the recursion limit.
726 for count, r in self._iterative_matches(nodes):
728 r[self.name] = nodes[:count]
731 if hasattr(sys, "getrefcount"):
732 sys.stderr = save_stderr
734 def _iterative_matches(self, nodes):
735 """Helper to iteratively yield the matches."""
741 # generate matches that use just one alt from self.content
742 for alt in self.content:
743 for c, r in generate_matches(alt, nodes):
745 results.append((c, r))
747 # for each match, iterate down the nodes
750 for c0, r0 in results:
751 # stop if the entire set of nodes has been matched
752 if c0 < nodelen and c0 <= self.max:
753 for alt in self.content:
754 for c1, r1 in generate_matches(alt, nodes[c0:]):
760 new_results.append((c0 + c1, r))
761 results = new_results
763 def _bare_name_matches(self, nodes):
764 """Special optimized matcher for bare_name."""
769 while not done and count < max:
771 for leaf in self.content:
772 if leaf[0].match(nodes[count], r):
776 r[self.name] = nodes[:count]
779 def _recursive_matches(self, nodes, count):
780 """Helper to recursively yield the matches."""
781 assert self.content is not None
782 if count >= self.min:
785 for alt in self.content:
786 for c0, r0 in generate_matches(alt, nodes):
787 for c1, r1 in self._recursive_matches(nodes[c0:], count+1):
794 class NegatedPattern(BasePattern):
796 def __init__(self, content=None):
800 The argument is either a pattern or None. If it is None, this
801 only matches an empty sequence (effectively '$' in regex
802 lingo). If it is not None, this matches whenever the argument
803 pattern doesn't have any matches.
805 if content is not None:
806 assert isinstance(content, BasePattern), repr(content)
807 self.content = content
809 def match(self, node):
810 # We never match a node in its entirety
813 def match_seq(self, nodes):
814 # We only match an empty sequence of nodes in its entirety
815 return len(nodes) == 0
817 def generate_matches(self, nodes):
818 if self.content is None:
819 # Return a match if there is an empty sequence
823 # Return a match if the argument pattern has no matches
824 for c, r in self.content.generate_matches(nodes):
829 def generate_matches(patterns, nodes):
831 Generator yielding matches for a sequence of patterns and nodes.
834 patterns: a sequence of patterns
835 nodes: a sequence of nodes
838 (count, results) tuples where:
839 count: the entire sequence of patterns matches nodes[:count];
840 results: dict containing named submatches.
845 p, rest = patterns[0], patterns[1:]
846 for c0, r0 in p.generate_matches(nodes):
850 for c1, r1 in generate_matches(rest, nodes[c0:]):