]> git.madduck.net Git - etc/vim.git/blob - blib2to3/pgen2/driver.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

Add failing test data
[etc/vim.git] / blib2to3 / pgen2 / driver.py
1 # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
2 # Licensed to PSF under a Contributor Agreement.
3
4 # Modifications:
5 # Copyright 2006 Google, Inc. All Rights Reserved.
6 # Licensed to PSF under a Contributor Agreement.
7
8 """Parser driver.
9
10 This provides a high-level interface to parse a file into a syntax tree.
11
12 """
13
14 __author__ = "Guido van Rossum <guido@python.org>"
15
16 __all__ = ["Driver", "load_grammar"]
17
18 # Python imports
19 import codecs
20 import io
21 import os
22 import logging
23 import pkgutil
24 import sys
25
26 # Pgen imports
27 from . import grammar, parse, token, tokenize, pgen
28
29
30 class Driver(object):
31
32     def __init__(self, grammar, convert=None, logger=None):
33         self.grammar = grammar
34         if logger is None:
35             logger = logging.getLogger()
36         self.logger = logger
37         self.convert = convert
38
39     def parse_tokens(self, tokens, debug=False):
40         """Parse a series of tokens and return the syntax tree."""
41         # XXX Move the prefix computation into a wrapper around tokenize.
42         p = parse.Parser(self.grammar, self.convert)
43         p.setup()
44         lineno = 1
45         column = 0
46         indent_columns = []
47         type = value = start = end = line_text = None
48         prefix = ""
49         for quintuple in tokens:
50             type, value, start, end, line_text = quintuple
51             if start != (lineno, column):
52                 assert (lineno, column) <= start, ((lineno, column), start)
53                 s_lineno, s_column = start
54                 if lineno < s_lineno:
55                     prefix += "\n" * (s_lineno - lineno)
56                     lineno = s_lineno
57                     column = 0
58                 if column < s_column:
59                     prefix += line_text[column:s_column]
60                     column = s_column
61             if type in (tokenize.COMMENT, tokenize.NL):
62                 prefix += value
63                 lineno, column = end
64                 if value.endswith("\n"):
65                     lineno += 1
66                     column = 0
67                 continue
68             if type == token.OP:
69                 type = grammar.opmap[value]
70             if debug:
71                 self.logger.debug("%s %r (prefix=%r)",
72                                   token.tok_name[type], value, prefix)
73             if type in {token.INDENT, token.DEDENT}:
74                 _prefix = prefix
75                 prefix = ""
76             if type == token.DEDENT:
77                 _indent_col = indent_columns.pop()
78                 prefix, _prefix = self._partially_consume_prefix(_prefix, _indent_col)
79             if p.addtoken(type, value, (prefix, start)):
80                 if debug:
81                     self.logger.debug("Stop.")
82                 break
83             prefix = ""
84             if type == token.INDENT:
85                 indent_columns.append(len(value))
86                 if _prefix.startswith(value):
87                     # Don't double-indent.  Since we're delaying the prefix that
88                     # would normally belong to INDENT, we need to put the value
89                     # at the end versus at the beginning.
90                     _prefix = _prefix[len(value):] + value
91             if type in {token.INDENT, token.DEDENT}:
92                 prefix = _prefix
93             lineno, column = end
94             if value.endswith("\n"):
95                 lineno += 1
96                 column = 0
97         else:
98             # We never broke out -- EOF is too soon (how can this happen???)
99             raise parse.ParseError("incomplete input",
100                                    type, value, (prefix, start))
101         return p.rootnode
102
103     def parse_stream_raw(self, stream, debug=False):
104         """Parse a stream and return the syntax tree."""
105         tokens = tokenize.generate_tokens(stream.readline)
106         return self.parse_tokens(tokens, debug)
107
108     def parse_stream(self, stream, debug=False):
109         """Parse a stream and return the syntax tree."""
110         return self.parse_stream_raw(stream, debug)
111
112     def parse_file(self, filename, encoding=None, debug=False):
113         """Parse a file and return the syntax tree."""
114         with io.open(filename, "r", encoding=encoding) as stream:
115             return self.parse_stream(stream, debug)
116
117     def parse_string(self, text, debug=False):
118         """Parse a string and return the syntax tree."""
119         tokens = tokenize.generate_tokens(io.StringIO(text).readline)
120         return self.parse_tokens(tokens, debug)
121
122     def _partially_consume_prefix(self, prefix, column):
123         lines = []
124         current_line = ""
125         current_column = 0
126         wait_for_nl = False
127         for char in prefix:
128             current_line += char
129             if wait_for_nl:
130                 if char == '\n':
131                     if current_line.strip() and current_column < column:
132                         res = ''.join(lines)
133                         return res, prefix[len(res):]
134
135                     lines.append(current_line)
136                     current_line = ""
137                     current_column = 0
138                     wait_for_nl = False
139             elif char == ' ':
140                 current_column += 1
141             elif char == '\t':
142                 current_column += 4
143             elif char == '\n':
144                 # enexpected empty line
145                 current_column = 0
146             else:
147                 # indent is finished
148                 wait_for_nl = True
149         return ''.join(lines), current_line
150
151
152 def _generate_pickle_name(gt, cache_dir=None):
153     head, tail = os.path.splitext(gt)
154     if tail == ".txt":
155         tail = ""
156     name = head + tail + ".".join(map(str, sys.version_info)) + ".pickle"
157     if cache_dir:
158         return os.path.join(cache_dir, os.path.basename(name))
159     else:
160         return name
161
162
163 def load_grammar(gt="Grammar.txt", gp=None,
164                  save=True, force=False, logger=None):
165     """Load the grammar (maybe from a pickle)."""
166     if logger is None:
167         logger = logging.getLogger()
168     gp = _generate_pickle_name(gt) if gp is None else gp
169     if force or not _newer(gp, gt):
170         logger.info("Generating grammar tables from %s", gt)
171         g = pgen.generate_grammar(gt)
172         if save:
173             logger.info("Writing grammar tables to %s", gp)
174             try:
175                 g.dump(gp)
176             except OSError as e:
177                 logger.info("Writing failed: %s", e)
178     else:
179         g = grammar.Grammar()
180         g.load(gp)
181     return g
182
183
184 def _newer(a, b):
185     """Inquire whether file a was written since file b."""
186     if not os.path.exists(a):
187         return False
188     if not os.path.exists(b):
189         return True
190     return os.path.getmtime(a) >= os.path.getmtime(b)
191
192
193 def load_packaged_grammar(package, grammar_source, cache_dir=None):
194     """Normally, loads a pickled grammar by doing
195         pkgutil.get_data(package, pickled_grammar)
196     where *pickled_grammar* is computed from *grammar_source* by adding the
197     Python version and using a ``.pickle`` extension.
198
199     However, if *grammar_source* is an extant file, load_grammar(grammar_source)
200     is called instead. This facilitates using a packaged grammar file when needed
201     but preserves load_grammar's automatic regeneration behavior when possible.
202
203     """
204     if os.path.isfile(grammar_source):
205         gp = _generate_pickle_name(grammar_source, cache_dir) if cache_dir else None
206         return load_grammar(grammar_source, gp=gp)
207     pickled_name = _generate_pickle_name(os.path.basename(grammar_source), cache_dir)
208     data = pkgutil.get_data(package, pickled_name)
209     g = grammar.Grammar()
210     g.loads(data)
211     return g
212
213
214 def main(*args):
215     """Main program, when run as a script: produce grammar pickle files.
216
217     Calls load_grammar for each argument, a path to a grammar text file.
218     """
219     if not args:
220         args = sys.argv[1:]
221     logging.basicConfig(level=logging.INFO, stream=sys.stdout,
222                         format='%(message)s')
223     for gt in args:
224         load_grammar(gt, save=True, force=True)
225     return True
226
227 if __name__ == "__main__":
228     sys.exit(int(not main()))