All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
1 # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
2 # Licensed to PSF under a Contributor Agreement.
4 """Parser engine for the grammar tables generated by pgen.
6 The grammar table must be loaded first.
8 See Parser/parser.c in the Python distribution for additional info on
9 how this parsing engine works.
27 from blib2to3.pgen2.grammar import Grammar
28 from blib2to3.pytree import NL, Context, RawNode, Leaf, Node
31 Results = Dict[Text, NL]
32 Convert = Callable[[Grammar, RawNode], Union[Node, Leaf]]
33 DFA = List[List[Tuple[int, int]]]
34 DFAS = Tuple[DFA, Dict[int, int]]
37 def lam_sub(grammar: Grammar, node: RawNode) -> NL:
38 assert node[3] is not None
39 return Node(type=node[0], children=node[3], context=node[2])
42 class ParseError(Exception):
43 """Exception to signal the parser is stuck."""
46 self, msg: Text, type: Optional[int], value: Optional[Text], context: Context
49 self, "%s: type=%r, value=%r, context=%r" % (msg, type, value, context)
54 self.context = context
60 The proper usage sequence is:
62 p = Parser(grammar, [converter]) # create instance
63 p.setup([start]) # prepare for parsing
64 <for each input token>:
65 if p.addtoken(...): # parse a token; may raise ParseError
67 root = p.rootnode # root of abstract syntax tree
69 A Parser instance may be reused by calling setup() repeatedly.
71 A Parser instance contains state pertaining to the current token
72 sequence, and should not be used concurrently by different threads
73 to parse separate token sequences.
75 See driver.py for how to get input tokens by tokenizing a file or
78 Parsing is complete when addtoken() returns True; the root of the
79 abstract syntax tree can then be retrieved from the rootnode
80 instance variable. When a syntax error occurs, addtoken() raises
81 the ParseError exception. There is no error recovery; the parser
82 cannot be used after a syntax error was reported (but it can be
83 reinitialized by calling setup()).
87 def __init__(self, grammar: Grammar, convert: Optional[Convert] = None) -> None:
90 The grammar argument is a grammar.Grammar instance; see the
91 grammar module for more information.
93 The parser is not ready yet for parsing; you must call the
94 setup() method to get it started.
96 The optional convert argument is a function mapping concrete
97 syntax tree nodes to abstract syntax tree nodes. If not
98 given, no conversion is done and the syntax tree produced is
99 the concrete syntax tree. If given, it must be a function of
100 two arguments, the first being the grammar (a grammar.Grammar
101 instance), and the second being the concrete syntax tree node
102 to be converted. The syntax tree is converted from the bottom
105 A concrete syntax tree node is a (type, value, context, nodes)
106 tuple, where type is the node type (a token or symbol number),
107 value is None for symbols and a string for tokens, context is
108 None or an opaque value used for error reporting (typically a
109 (lineno, offset) pair), and nodes is a list of children for
110 symbols, and None for tokens.
112 An abstract syntax tree node may be anything; this is entirely
113 up to the converter function.
116 self.grammar = grammar
117 self.convert = convert or lam_sub
119 def setup(self, start: Optional[int] = None) -> None:
120 """Prepare for parsing.
122 This *must* be called before starting to parse.
124 The optional argument is an alternative start symbol; it
125 defaults to the grammar's start symbol.
127 You can use a Parser instance to parse any number of programs;
128 each time you call setup() the parser is reset to an initial
129 state determined by the (implicit or explicit) start symbol.
133 start = self.grammar.start
134 # Each stack entry is a tuple: (dfa, state, node).
135 # A node is a tuple: (type, value, context, children),
136 # where children is a list of nodes or None, and context may be None.
137 newnode: RawNode = (start, None, None, [])
138 stackentry = (self.grammar.dfas[start], 0, newnode)
139 self.stack: List[Tuple[DFAS, int, RawNode]] = [stackentry]
140 self.rootnode: Optional[NL] = None
141 self.used_names: Set[str] = set()
143 def addtoken(self, type: int, value: Optional[Text], context: Context) -> bool:
144 """Add a token; return True iff this is the end of the program."""
145 # Map from token to label
146 ilabel = self.classify(type, value, context)
147 # Loop until the token is shifted; may raise exceptions
149 dfa, state, node = self.stack[-1]
152 # Look for a state with this label
153 for i, newstate in arcs:
154 t, v = self.grammar.labels[i]
156 # Look it up in the list of labels
158 # Shift a token; we're done with it
159 self.shift(type, value, newstate, context)
160 # Pop while we are in an accept-only state
162 while states[state] == [(0, state)]:
167 dfa, state, node = self.stack[-1]
169 # Done with this token
172 # See if it's a symbol and if we're in its first set
173 itsdfa = self.grammar.dfas[t]
174 itsstates, itsfirst = itsdfa
175 if ilabel in itsfirst:
177 self.push(t, self.grammar.dfas[t], newstate, context)
178 break # To continue the outer while loop
180 if (0, state) in arcs:
181 # An accepting state, pop it and try something else
184 # Done parsing, but another token is input
185 raise ParseError("too much input", type, value, context)
187 # No success finding a transition
188 raise ParseError("bad input", type, value, context)
190 def classify(self, type: int, value: Optional[Text], context: Context) -> int:
191 """Turn a token into a label. (Internal)"""
192 if type == token.NAME:
193 # Keep a listing of all used names
194 assert value is not None
195 self.used_names.add(value)
196 # Check for reserved words
197 ilabel = self.grammar.keywords.get(value)
198 if ilabel is not None:
200 ilabel = self.grammar.tokens.get(type)
202 raise ParseError("bad token", type, value, context)
206 self, type: int, value: Optional[Text], newstate: int, context: Context
208 """Shift a token. (Internal)"""
209 dfa, state, node = self.stack[-1]
210 assert value is not None
211 assert context is not None
212 rawnode: RawNode = (type, value, context, None)
213 newnode = self.convert(self.grammar, rawnode)
214 if newnode is not None:
215 assert node[-1] is not None
216 node[-1].append(newnode)
217 self.stack[-1] = (dfa, newstate, node)
219 def push(self, type: int, newdfa: DFAS, newstate: int, context: Context) -> None:
220 """Push a nonterminal. (Internal)"""
221 dfa, state, node = self.stack[-1]
222 newnode: RawNode = (type, None, context, [])
223 self.stack[-1] = (dfa, newstate, node)
224 self.stack.append((newdfa, 0, newnode))
226 def pop(self) -> None:
227 """Pop a nonterminal. (Internal)"""
228 popdfa, popstate, popnode = self.stack.pop()
229 newnode = self.convert(self.grammar, popnode)
230 if newnode is not None:
232 dfa, state, node = self.stack[-1]
233 assert node[-1] is not None
234 node[-1].append(newnode)
236 self.rootnode = newnode
237 self.rootnode.used_names = self.used_names