All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
1 # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
2 # Licensed to PSF under a Contributor Agreement.
4 """Parser engine for the grammar tables generated by pgen.
6 The grammar table must be loaded first.
8 See Parser/parser.c in the Python distribution for additional info on
9 how this parsing engine works.
25 from blib2to3.pgen2.grammar import Grammar
26 from blib2to3.pytree import NL, Context, RawNode, Leaf, Node
29 Results = Dict[Text, NL]
30 Convert = Callable[[Grammar, RawNode], Union[Node, Leaf]]
31 DFA = List[List[Tuple[int, int]]]
32 DFAS = Tuple[DFA, Dict[int, int]]
35 def lam_sub(grammar: Grammar, node: RawNode) -> NL:
36 assert node[3] is not None
37 return Node(type=node[0], children=node[3], context=node[2])
40 class ParseError(Exception):
41 """Exception to signal the parser is stuck."""
44 self, msg: Text, type: Optional[int], value: Optional[Text], context: Context
47 self, "%s: type=%r, value=%r, context=%r" % (msg, type, value, context)
52 self.context = context
58 The proper usage sequence is:
60 p = Parser(grammar, [converter]) # create instance
61 p.setup([start]) # prepare for parsing
62 <for each input token>:
63 if p.addtoken(...): # parse a token; may raise ParseError
65 root = p.rootnode # root of abstract syntax tree
67 A Parser instance may be reused by calling setup() repeatedly.
69 A Parser instance contains state pertaining to the current token
70 sequence, and should not be used concurrently by different threads
71 to parse separate token sequences.
73 See driver.py for how to get input tokens by tokenizing a file or
76 Parsing is complete when addtoken() returns True; the root of the
77 abstract syntax tree can then be retrieved from the rootnode
78 instance variable. When a syntax error occurs, addtoken() raises
79 the ParseError exception. There is no error recovery; the parser
80 cannot be used after a syntax error was reported (but it can be
81 reinitialized by calling setup()).
85 def __init__(self, grammar: Grammar, convert: Optional[Convert] = None) -> None:
88 The grammar argument is a grammar.Grammar instance; see the
89 grammar module for more information.
91 The parser is not ready yet for parsing; you must call the
92 setup() method to get it started.
94 The optional convert argument is a function mapping concrete
95 syntax tree nodes to abstract syntax tree nodes. If not
96 given, no conversion is done and the syntax tree produced is
97 the concrete syntax tree. If given, it must be a function of
98 two arguments, the first being the grammar (a grammar.Grammar
99 instance), and the second being the concrete syntax tree node
100 to be converted. The syntax tree is converted from the bottom
103 A concrete syntax tree node is a (type, value, context, nodes)
104 tuple, where type is the node type (a token or symbol number),
105 value is None for symbols and a string for tokens, context is
106 None or an opaque value used for error reporting (typically a
107 (lineno, offset) pair), and nodes is a list of children for
108 symbols, and None for tokens.
110 An abstract syntax tree node may be anything; this is entirely
111 up to the converter function.
114 self.grammar = grammar
115 self.convert = convert or lam_sub
117 def setup(self, start: Optional[int] = None) -> None:
118 """Prepare for parsing.
120 This *must* be called before starting to parse.
122 The optional argument is an alternative start symbol; it
123 defaults to the grammar's start symbol.
125 You can use a Parser instance to parse any number of programs;
126 each time you call setup() the parser is reset to an initial
127 state determined by the (implicit or explicit) start symbol.
131 start = self.grammar.start
132 # Each stack entry is a tuple: (dfa, state, node).
133 # A node is a tuple: (type, value, context, children),
134 # where children is a list of nodes or None, and context may be None.
135 newnode: RawNode = (start, None, None, [])
136 stackentry = (self.grammar.dfas[start], 0, newnode)
137 self.stack: List[Tuple[DFAS, int, RawNode]] = [stackentry]
138 self.rootnode: Optional[NL] = None
139 self.used_names: Set[str] = set()
141 def addtoken(self, type: int, value: Optional[Text], context: Context) -> bool:
142 """Add a token; return True iff this is the end of the program."""
143 # Map from token to label
144 ilabel = self.classify(type, value, context)
145 # Loop until the token is shifted; may raise exceptions
147 dfa, state, node = self.stack[-1]
150 # Look for a state with this label
151 for i, newstate in arcs:
152 t, v = self.grammar.labels[i]
154 # Look it up in the list of labels
156 # Shift a token; we're done with it
157 self.shift(type, value, newstate, context)
158 # Pop while we are in an accept-only state
160 while states[state] == [(0, state)]:
165 dfa, state, node = self.stack[-1]
167 # Done with this token
170 # See if it's a symbol and if we're in its first set
171 itsdfa = self.grammar.dfas[t]
172 itsstates, itsfirst = itsdfa
173 if ilabel in itsfirst:
175 self.push(t, self.grammar.dfas[t], newstate, context)
176 break # To continue the outer while loop
178 if (0, state) in arcs:
179 # An accepting state, pop it and try something else
182 # Done parsing, but another token is input
183 raise ParseError("too much input", type, value, context)
185 # No success finding a transition
186 raise ParseError("bad input", type, value, context)
188 def classify(self, type: int, value: Optional[Text], context: Context) -> int:
189 """Turn a token into a label. (Internal)"""
190 if type == token.NAME:
191 # Keep a listing of all used names
192 assert value is not None
193 self.used_names.add(value)
194 # Check for reserved words
195 ilabel = self.grammar.keywords.get(value)
196 if ilabel is not None:
198 ilabel = self.grammar.tokens.get(type)
200 raise ParseError("bad token", type, value, context)
204 self, type: int, value: Optional[Text], newstate: int, context: Context
206 """Shift a token. (Internal)"""
207 dfa, state, node = self.stack[-1]
208 assert value is not None
209 assert context is not None
210 rawnode: RawNode = (type, value, context, None)
211 newnode = self.convert(self.grammar, rawnode)
212 if newnode is not None:
213 assert node[-1] is not None
214 node[-1].append(newnode)
215 self.stack[-1] = (dfa, newstate, node)
217 def push(self, type: int, newdfa: DFAS, newstate: int, context: Context) -> None:
218 """Push a nonterminal. (Internal)"""
219 dfa, state, node = self.stack[-1]
220 newnode: RawNode = (type, None, context, [])
221 self.stack[-1] = (dfa, newstate, node)
222 self.stack.append((newdfa, 0, newnode))
224 def pop(self) -> None:
225 """Pop a nonterminal. (Internal)"""
226 popdfa, popstate, popnode = self.stack.pop()
227 newnode = self.convert(self.grammar, popnode)
228 if newnode is not None:
230 dfa, state, node = self.stack[-1]
231 assert node[-1] is not None
232 node[-1].append(newnode)
234 self.rootnode = newnode
235 self.rootnode.used_names = self.used_names