]> git.madduck.net Git - etc/vim.git/blob - blib2to3/pgen2/driver.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

committers += jelle
[etc/vim.git] / blib2to3 / pgen2 / driver.py
1 # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
2 # Licensed to PSF under a Contributor Agreement.
3
4 # Modifications:
5 # Copyright 2006 Google, Inc. All Rights Reserved.
6 # Licensed to PSF under a Contributor Agreement.
7
8 """Parser driver.
9
10 This provides a high-level interface to parse a file into a syntax tree.
11
12 """
13
14 __author__ = "Guido van Rossum <guido@python.org>"
15
16 __all__ = ["Driver", "load_grammar"]
17
18 # Python imports
19 import codecs
20 import io
21 import os
22 import logging
23 import pkgutil
24 import sys
25
26 # Pgen imports
27 from . import grammar, parse, token, tokenize, pgen
28
29
30 class Driver(object):
31
32     def __init__(self, grammar, convert=None, logger=None):
33         self.grammar = grammar
34         if logger is None:
35             logger = logging.getLogger()
36         self.logger = logger
37         self.convert = convert
38
39     def parse_tokens(self, tokens, debug=False):
40         """Parse a series of tokens and return the syntax tree."""
41         # XXX Move the prefix computation into a wrapper around tokenize.
42         p = parse.Parser(self.grammar, self.convert)
43         p.setup()
44         lineno = 1
45         column = 0
46         indent_columns = []
47         type = value = start = end = line_text = None
48         prefix = ""
49         for quintuple in tokens:
50             type, value, start, end, line_text = quintuple
51             if start != (lineno, column):
52                 assert (lineno, column) <= start, ((lineno, column), start)
53                 s_lineno, s_column = start
54                 if lineno < s_lineno:
55                     prefix += "\n" * (s_lineno - lineno)
56                     lineno = s_lineno
57                     column = 0
58                 if column < s_column:
59                     prefix += line_text[column:s_column]
60                     column = s_column
61             if type in (tokenize.COMMENT, tokenize.NL):
62                 prefix += value
63                 lineno, column = end
64                 if value.endswith("\n"):
65                     lineno += 1
66                     column = 0
67                 continue
68             if type == token.OP:
69                 type = grammar.opmap[value]
70             if debug:
71                 self.logger.debug("%s %r (prefix=%r)",
72                                   token.tok_name[type], value, prefix)
73             if type == token.INDENT:
74                 indent_columns.append(len(value))
75                 _prefix = prefix + value
76                 prefix = ""
77                 value = ""
78             elif type == token.DEDENT:
79                 _indent_col = indent_columns.pop()
80                 prefix, _prefix = self._partially_consume_prefix(prefix, _indent_col)
81             if p.addtoken(type, value, (prefix, start)):
82                 if debug:
83                     self.logger.debug("Stop.")
84                 break
85             prefix = ""
86             if type in {token.INDENT, token.DEDENT}:
87                 prefix = _prefix
88             lineno, column = end
89             if value.endswith("\n"):
90                 lineno += 1
91                 column = 0
92         else:
93             # We never broke out -- EOF is too soon (how can this happen???)
94             raise parse.ParseError("incomplete input",
95                                    type, value, (prefix, start))
96         return p.rootnode
97
98     def parse_stream_raw(self, stream, debug=False):
99         """Parse a stream and return the syntax tree."""
100         tokens = tokenize.generate_tokens(stream.readline)
101         return self.parse_tokens(tokens, debug)
102
103     def parse_stream(self, stream, debug=False):
104         """Parse a stream and return the syntax tree."""
105         return self.parse_stream_raw(stream, debug)
106
107     def parse_file(self, filename, encoding=None, debug=False):
108         """Parse a file and return the syntax tree."""
109         with io.open(filename, "r", encoding=encoding) as stream:
110             return self.parse_stream(stream, debug)
111
112     def parse_string(self, text, debug=False):
113         """Parse a string and return the syntax tree."""
114         tokens = tokenize.generate_tokens(io.StringIO(text).readline)
115         return self.parse_tokens(tokens, debug)
116
117     def _partially_consume_prefix(self, prefix, column):
118         lines = []
119         current_line = ""
120         current_column = 0
121         wait_for_nl = False
122         for char in prefix:
123             current_line += char
124             if wait_for_nl:
125                 if char == '\n':
126                     if current_line.strip() and current_column < column:
127                         res = ''.join(lines)
128                         return res, prefix[len(res):]
129
130                     lines.append(current_line)
131                     current_line = ""
132                     current_column = 0
133                     wait_for_nl = False
134             elif char == ' ':
135                 current_column += 1
136             elif char == '\t':
137                 current_column += 4
138             elif char == '\n':
139                 # unexpected empty line
140                 current_column = 0
141             else:
142                 # indent is finished
143                 wait_for_nl = True
144         return ''.join(lines), current_line
145
146
147 def _generate_pickle_name(gt, cache_dir=None):
148     head, tail = os.path.splitext(gt)
149     if tail == ".txt":
150         tail = ""
151     name = head + tail + ".".join(map(str, sys.version_info)) + ".pickle"
152     if cache_dir:
153         return os.path.join(cache_dir, os.path.basename(name))
154     else:
155         return name
156
157
158 def load_grammar(gt="Grammar.txt", gp=None,
159                  save=True, force=False, logger=None):
160     """Load the grammar (maybe from a pickle)."""
161     if logger is None:
162         logger = logging.getLogger()
163     gp = _generate_pickle_name(gt) if gp is None else gp
164     if force or not _newer(gp, gt):
165         logger.info("Generating grammar tables from %s", gt)
166         g = pgen.generate_grammar(gt)
167         if save:
168             logger.info("Writing grammar tables to %s", gp)
169             try:
170                 g.dump(gp)
171             except OSError as e:
172                 logger.info("Writing failed: %s", e)
173     else:
174         g = grammar.Grammar()
175         g.load(gp)
176     return g
177
178
179 def _newer(a, b):
180     """Inquire whether file a was written since file b."""
181     if not os.path.exists(a):
182         return False
183     if not os.path.exists(b):
184         return True
185     return os.path.getmtime(a) >= os.path.getmtime(b)
186
187
188 def load_packaged_grammar(package, grammar_source, cache_dir=None):
189     """Normally, loads a pickled grammar by doing
190         pkgutil.get_data(package, pickled_grammar)
191     where *pickled_grammar* is computed from *grammar_source* by adding the
192     Python version and using a ``.pickle`` extension.
193
194     However, if *grammar_source* is an extant file, load_grammar(grammar_source)
195     is called instead. This facilitates using a packaged grammar file when needed
196     but preserves load_grammar's automatic regeneration behavior when possible.
197
198     """
199     if os.path.isfile(grammar_source):
200         gp = _generate_pickle_name(grammar_source, cache_dir) if cache_dir else None
201         return load_grammar(grammar_source, gp=gp)
202     pickled_name = _generate_pickle_name(os.path.basename(grammar_source), cache_dir)
203     data = pkgutil.get_data(package, pickled_name)
204     g = grammar.Grammar()
205     g.loads(data)
206     return g
207
208
209 def main(*args):
210     """Main program, when run as a script: produce grammar pickle files.
211
212     Calls load_grammar for each argument, a path to a grammar text file.
213     """
214     if not args:
215         args = sys.argv[1:]
216     logging.basicConfig(level=logging.INFO, stream=sys.stdout,
217                         format='%(message)s')
218     for gt in args:
219         load_grammar(gt, save=True, force=True)
220     return True
221
222 if __name__ == "__main__":
223     sys.exit(int(not main()))