]> git.madduck.net Git - etc/vim.git/blob - blib2to3/pgen2/driver.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

Move tokenizer config onto grammar, rename flag
[etc/vim.git] / blib2to3 / pgen2 / driver.py
1 # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
2 # Licensed to PSF under a Contributor Agreement.
3
4 # Modifications:
5 # Copyright 2006 Google, Inc. All Rights Reserved.
6 # Licensed to PSF under a Contributor Agreement.
7
8 """Parser driver.
9
10 This provides a high-level interface to parse a file into a syntax tree.
11
12 """
13
14 __author__ = "Guido van Rossum <guido@python.org>"
15
16 __all__ = ["Driver", "load_grammar"]
17
18 # Python imports
19 import codecs
20 import io
21 import os
22 import logging
23 import pkgutil
24 import sys
25
26 # Pgen imports
27 from . import grammar, parse, token, tokenize, pgen
28
29
30 class Driver(object):
31
32     def __init__(
33         self,
34         grammar,
35         convert=None,
36         logger=None,
37     ):
38         self.grammar = grammar
39         if logger is None:
40             logger = logging.getLogger(__name__)
41         self.logger = logger
42         self.convert = convert
43
44     def parse_tokens(self, tokens, debug=False):
45         """Parse a series of tokens and return the syntax tree."""
46         # XXX Move the prefix computation into a wrapper around tokenize.
47         p = parse.Parser(self.grammar, self.convert)
48         p.setup()
49         lineno = 1
50         column = 0
51         indent_columns = []
52         type = value = start = end = line_text = None
53         prefix = ""
54         for quintuple in tokens:
55             type, value, start, end, line_text = quintuple
56             if start != (lineno, column):
57                 assert (lineno, column) <= start, ((lineno, column), start)
58                 s_lineno, s_column = start
59                 if lineno < s_lineno:
60                     prefix += "\n" * (s_lineno - lineno)
61                     lineno = s_lineno
62                     column = 0
63                 if column < s_column:
64                     prefix += line_text[column:s_column]
65                     column = s_column
66             if type in (tokenize.COMMENT, tokenize.NL):
67                 prefix += value
68                 lineno, column = end
69                 if value.endswith("\n"):
70                     lineno += 1
71                     column = 0
72                 continue
73             if type == token.OP:
74                 type = grammar.opmap[value]
75             if debug:
76                 self.logger.debug("%s %r (prefix=%r)",
77                                   token.tok_name[type], value, prefix)
78             if type == token.INDENT:
79                 indent_columns.append(len(value))
80                 _prefix = prefix + value
81                 prefix = ""
82                 value = ""
83             elif type == token.DEDENT:
84                 _indent_col = indent_columns.pop()
85                 prefix, _prefix = self._partially_consume_prefix(prefix, _indent_col)
86             if p.addtoken(type, value, (prefix, start)):
87                 if debug:
88                     self.logger.debug("Stop.")
89                 break
90             prefix = ""
91             if type in {token.INDENT, token.DEDENT}:
92                 prefix = _prefix
93             lineno, column = end
94             if value.endswith("\n"):
95                 lineno += 1
96                 column = 0
97         else:
98             # We never broke out -- EOF is too soon (how can this happen???)
99             raise parse.ParseError("incomplete input",
100                                    type, value, (prefix, start))
101         return p.rootnode
102
103     def parse_stream_raw(self, stream, debug=False):
104         """Parse a stream and return the syntax tree."""
105         tokens = tokenize.generate_tokens(stream.readline, grammar=self.grammar)
106         return self.parse_tokens(tokens, debug)
107
108     def parse_stream(self, stream, debug=False):
109         """Parse a stream and return the syntax tree."""
110         return self.parse_stream_raw(stream, debug)
111
112     def parse_file(self, filename, encoding=None, debug=False):
113         """Parse a file and return the syntax tree."""
114         with io.open(filename, "r", encoding=encoding) as stream:
115             return self.parse_stream(stream, debug)
116
117     def parse_string(self, text, debug=False):
118         """Parse a string and return the syntax tree."""
119         tokens = tokenize.generate_tokens(
120             io.StringIO(text).readline,
121             grammar=self.grammar
122         )
123         return self.parse_tokens(tokens, debug)
124
125     def _partially_consume_prefix(self, prefix, column):
126         lines = []
127         current_line = ""
128         current_column = 0
129         wait_for_nl = False
130         for char in prefix:
131             current_line += char
132             if wait_for_nl:
133                 if char == '\n':
134                     if current_line.strip() and current_column < column:
135                         res = ''.join(lines)
136                         return res, prefix[len(res):]
137
138                     lines.append(current_line)
139                     current_line = ""
140                     current_column = 0
141                     wait_for_nl = False
142             elif char in ' \t':
143                 current_column += 1
144             elif char == '\n':
145                 # unexpected empty line
146                 current_column = 0
147             else:
148                 # indent is finished
149                 wait_for_nl = True
150         return ''.join(lines), current_line
151
152
153 def _generate_pickle_name(gt, cache_dir=None):
154     head, tail = os.path.splitext(gt)
155     if tail == ".txt":
156         tail = ""
157     name = head + tail + ".".join(map(str, sys.version_info)) + ".pickle"
158     if cache_dir:
159         return os.path.join(cache_dir, os.path.basename(name))
160     else:
161         return name
162
163
164 def load_grammar(gt="Grammar.txt", gp=None,
165                  save=True, force=False, logger=None):
166     """Load the grammar (maybe from a pickle)."""
167     if logger is None:
168         logger = logging.getLogger(__name__)
169     gp = _generate_pickle_name(gt) if gp is None else gp
170     if force or not _newer(gp, gt):
171         logger.info("Generating grammar tables from %s", gt)
172         g = pgen.generate_grammar(gt)
173         if save:
174             logger.info("Writing grammar tables to %s", gp)
175             try:
176                 g.dump(gp)
177             except OSError as e:
178                 logger.info("Writing failed: %s", e)
179     else:
180         g = grammar.Grammar()
181         g.load(gp)
182     return g
183
184
185 def _newer(a, b):
186     """Inquire whether file a was written since file b."""
187     if not os.path.exists(a):
188         return False
189     if not os.path.exists(b):
190         return True
191     return os.path.getmtime(a) >= os.path.getmtime(b)
192
193
194 def load_packaged_grammar(package, grammar_source, cache_dir=None):
195     """Normally, loads a pickled grammar by doing
196         pkgutil.get_data(package, pickled_grammar)
197     where *pickled_grammar* is computed from *grammar_source* by adding the
198     Python version and using a ``.pickle`` extension.
199
200     However, if *grammar_source* is an extant file, load_grammar(grammar_source)
201     is called instead. This facilitates using a packaged grammar file when needed
202     but preserves load_grammar's automatic regeneration behavior when possible.
203
204     """
205     if os.path.isfile(grammar_source):
206         gp = _generate_pickle_name(grammar_source, cache_dir) if cache_dir else None
207         return load_grammar(grammar_source, gp=gp)
208     pickled_name = _generate_pickle_name(os.path.basename(grammar_source), cache_dir)
209     data = pkgutil.get_data(package, pickled_name)
210     g = grammar.Grammar()
211     g.loads(data)
212     return g
213
214
215 def main(*args):
216     """Main program, when run as a script: produce grammar pickle files.
217
218     Calls load_grammar for each argument, a path to a grammar text file.
219     """
220     if not args:
221         args = sys.argv[1:]
222     logging.basicConfig(level=logging.INFO, stream=sys.stdout,
223                         format='%(message)s')
224     for gt in args:
225         load_grammar(gt, save=True, force=True)
226     return True
227
228 if __name__ == "__main__":
229     sys.exit(int(not main()))