All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
1 # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
2 # Licensed to PSF under a Contributor Agreement.
4 """Convert graminit.[ch] spit out by pgen to Python code.
6 Pgen is the Python parser generator. It is useful to quickly create a
7 parser from a grammar file in Python's grammar notation. But I don't
8 want my parsers to be written in C (yet), so I'm translating the
9 parsing tables to Python data structures and writing a Python parse
12 Note that the token numbers are constants determined by the standard
13 Python tokenizer. The standard token module defines these numbers and
14 their names (the names are not used much). The token numbers are
15 hardcoded into the Python tokenizer and into pgen. A Python
16 implementation of the Python tokenizer is also available, in the
17 standard tokenize module.
19 On the other hand, symbol numbers (representing the grammar's
20 non-terminals) are assigned by pgen based on the actual grammar
23 Note: this module is pretty much obsolete; the pgen module generates
24 equivalent grammar tables directly from the Grammar.txt input file
25 without having to invoke the Python pgen C program.
33 from pgen2 import grammar, token
36 class Converter(grammar.Grammar):
37 """Grammar subclass that reads classic pgen output files.
39 The run() method reads the tables as produced by the pgen parser
40 generator, typically contained in two C files, graminit.h and
41 graminit.c. The other methods are for internal use only.
43 See the base class for more documentation.
47 def run(self, graminit_h, graminit_c):
48 """Load the grammar tables from the text files written by pgen."""
49 self.parse_graminit_h(graminit_h)
50 self.parse_graminit_c(graminit_c)
53 def parse_graminit_h(self, filename):
54 """Parse the .h file written by pgen. (Internal)
56 This file is a sequence of #define statements defining the
57 nonterminals of the grammar as numbers. We build two tables
58 mapping the numbers to names and back.
63 except OSError as err:
64 print("Can't open %s: %s" % (filename, err))
66 self.symbol2number = {}
67 self.number2symbol = {}
71 mo = re.match(r"^#define\s+(\w+)\s+(\d+)$", line)
72 if not mo and line.strip():
73 print("%s(%s): can't parse %s" % (filename, lineno, line.strip()))
75 symbol, number = mo.groups()
77 assert symbol not in self.symbol2number
78 assert number not in self.number2symbol
79 self.symbol2number[symbol] = number
80 self.number2symbol[number] = symbol
83 def parse_graminit_c(self, filename):
84 """Parse the .c file written by pgen. (Internal)
86 The file looks as follows. The first two lines are always this:
88 #include "pgenheaders.h"
91 After that come four blocks:
93 1) one or more state definitions
94 2) a table defining dfas
95 3) a table defining labels
96 4) a struct defining the grammar
98 A state definition has the following form:
99 - one or more arc arrays, each of the form:
100 static arc arcs_<n>_<m>[<k>] = {
104 - followed by a state array, of the form:
105 static state states_<s>[<t>] = {
113 except OSError as err:
114 print("Can't open %s: %s" % (filename, err))
116 # The code below essentially uses f's iterator-ness!
119 # Expect the two #include lines
120 lineno, line = lineno + 1, next(f)
121 assert line == '#include "pgenheaders.h"\n', (lineno, line)
122 lineno, line = lineno + 1, next(f)
123 assert line == '#include "grammar.h"\n', (lineno, line)
125 # Parse the state definitions
126 lineno, line = lineno + 1, next(f)
129 while line.startswith("static arc "):
130 while line.startswith("static arc "):
131 mo = re.match(r"static arc arcs_(\d+)_(\d+)\[(\d+)\] = {$", line)
132 assert mo, (lineno, line)
133 n, m, k = list(map(int, mo.groups()))
136 lineno, line = lineno + 1, next(f)
137 mo = re.match(r"\s+{(\d+), (\d+)},$", line)
138 assert mo, (lineno, line)
139 i, j = list(map(int, mo.groups()))
141 lineno, line = lineno + 1, next(f)
142 assert line == "};\n", (lineno, line)
143 allarcs[(n, m)] = arcs
144 lineno, line = lineno + 1, next(f)
145 mo = re.match(r"static state states_(\d+)\[(\d+)\] = {$", line)
146 assert mo, (lineno, line)
147 s, t = list(map(int, mo.groups()))
148 assert s == len(states), (lineno, line)
151 lineno, line = lineno + 1, next(f)
152 mo = re.match(r"\s+{(\d+), arcs_(\d+)_(\d+)},$", line)
153 assert mo, (lineno, line)
154 k, n, m = list(map(int, mo.groups()))
156 assert k == len(arcs), (lineno, line)
159 lineno, line = lineno + 1, next(f)
160 assert line == "};\n", (lineno, line)
161 lineno, line = lineno + 1, next(f)
166 mo = re.match(r"static dfa dfas\[(\d+)\] = {$", line)
167 assert mo, (lineno, line)
168 ndfas = int(mo.group(1))
169 for i in range(ndfas):
170 lineno, line = lineno + 1, next(f)
171 mo = re.match(r'\s+{(\d+), "(\w+)", (\d+), (\d+), states_(\d+),$', line)
172 assert mo, (lineno, line)
174 number, x, y, z = list(map(int, mo.group(1, 3, 4, 5)))
175 assert self.symbol2number[symbol] == number, (lineno, line)
176 assert self.number2symbol[number] == symbol, (lineno, line)
177 assert x == 0, (lineno, line)
179 assert y == len(state), (lineno, line)
180 lineno, line = lineno + 1, next(f)
181 mo = re.match(r'\s+("(?:\\\d\d\d)*")},$', line)
182 assert mo, (lineno, line)
184 rawbitset = eval(mo.group(1))
185 for i, c in enumerate(rawbitset):
190 dfas[number] = (state, first)
191 lineno, line = lineno + 1, next(f)
192 assert line == "};\n", (lineno, line)
197 lineno, line = lineno + 1, next(f)
198 mo = re.match(r"static label labels\[(\d+)\] = {$", line)
199 assert mo, (lineno, line)
200 nlabels = int(mo.group(1))
201 for i in range(nlabels):
202 lineno, line = lineno + 1, next(f)
203 mo = re.match(r'\s+{(\d+), (0|"\w+")},$', line)
204 assert mo, (lineno, line)
211 labels.append((x, y))
212 lineno, line = lineno + 1, next(f)
213 assert line == "};\n", (lineno, line)
216 # Parse the grammar struct
217 lineno, line = lineno + 1, next(f)
218 assert line == "grammar _PyParser_Grammar = {\n", (lineno, line)
219 lineno, line = lineno + 1, next(f)
220 mo = re.match(r"\s+(\d+),$", line)
221 assert mo, (lineno, line)
222 ndfas = int(mo.group(1))
223 assert ndfas == len(self.dfas)
224 lineno, line = lineno + 1, next(f)
225 assert line == "\tdfas,\n", (lineno, line)
226 lineno, line = lineno + 1, next(f)
227 mo = re.match(r"\s+{(\d+), labels},$", line)
228 assert mo, (lineno, line)
229 nlabels = int(mo.group(1))
230 assert nlabels == len(self.labels), (lineno, line)
231 lineno, line = lineno + 1, next(f)
232 mo = re.match(r"\s+(\d+)$", line)
233 assert mo, (lineno, line)
234 start = int(mo.group(1))
235 assert start in self.number2symbol, (lineno, line)
237 lineno, line = lineno + 1, next(f)
238 assert line == "};\n", (lineno, line)
240 lineno, line = lineno + 1, next(f)
241 except StopIteration:
244 assert 0, (lineno, line)
246 def finish_off(self):
247 """Create additional useful structures. (Internal)."""
248 self.keywords = {} # map from keyword strings to arc labels
249 self.tokens = {} # map from numeric token values to arc labels
250 for ilabel, (type, value) in enumerate(self.labels):
251 if type == token.NAME and value is not None:
252 self.keywords[value] = ilabel
254 self.tokens[type] = ilabel