]> git.madduck.net Git - etc/vim.git/blob - blib2to3/pgen2/driver.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

fix handling of comments in from imports (#829)
[etc/vim.git] / blib2to3 / pgen2 / driver.py
1 # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
2 # Licensed to PSF under a Contributor Agreement.
3
4 # Modifications:
5 # Copyright 2006 Google, Inc. All Rights Reserved.
6 # Licensed to PSF under a Contributor Agreement.
7
8 """Parser driver.
9
10 This provides a high-level interface to parse a file into a syntax tree.
11
12 """
13
14 __author__ = "Guido van Rossum <guido@python.org>"
15
16 __all__ = ["Driver", "load_grammar"]
17
18 # Python imports
19 import codecs
20 import io
21 import os
22 import logging
23 import pkgutil
24 import sys
25
26 # Pgen imports
27 from . import grammar, parse, token, tokenize, pgen
28
29
30 class Driver(object):
31
32     def __init__(self, grammar, convert=None, logger=None):
33         self.grammar = grammar
34         if logger is None:
35             logger = logging.getLogger(__name__)
36         self.logger = logger
37         self.convert = convert
38
39     def parse_tokens(self, tokens, debug=False):
40         """Parse a series of tokens and return the syntax tree."""
41         # XXX Move the prefix computation into a wrapper around tokenize.
42         p = parse.Parser(self.grammar, self.convert)
43         p.setup()
44         lineno = 1
45         column = 0
46         indent_columns = []
47         type = value = start = end = line_text = None
48         prefix = ""
49         for quintuple in tokens:
50             type, value, start, end, line_text = quintuple
51             if start != (lineno, column):
52                 assert (lineno, column) <= start, ((lineno, column), start)
53                 s_lineno, s_column = start
54                 if lineno < s_lineno:
55                     prefix += "\n" * (s_lineno - lineno)
56                     lineno = s_lineno
57                     column = 0
58                 if column < s_column:
59                     prefix += line_text[column:s_column]
60                     column = s_column
61             if type in (tokenize.COMMENT, tokenize.NL):
62                 prefix += value
63                 lineno, column = end
64                 if value.endswith("\n"):
65                     lineno += 1
66                     column = 0
67                 continue
68             if type == token.OP:
69                 type = grammar.opmap[value]
70             if debug:
71                 self.logger.debug("%s %r (prefix=%r)",
72                                   token.tok_name[type], value, prefix)
73             if type == token.INDENT:
74                 indent_columns.append(len(value))
75                 _prefix = prefix + value
76                 prefix = ""
77                 value = ""
78             elif type == token.DEDENT:
79                 _indent_col = indent_columns.pop()
80                 prefix, _prefix = self._partially_consume_prefix(prefix, _indent_col)
81             if p.addtoken(type, value, (prefix, start)):
82                 if debug:
83                     self.logger.debug("Stop.")
84                 break
85             prefix = ""
86             if type in {token.INDENT, token.DEDENT}:
87                 prefix = _prefix
88             lineno, column = end
89             if value.endswith("\n"):
90                 lineno += 1
91                 column = 0
92         else:
93             # We never broke out -- EOF is too soon (how can this happen???)
94             raise parse.ParseError("incomplete input",
95                                    type, value, (prefix, start))
96         return p.rootnode
97
98     def parse_stream_raw(self, stream, debug=False):
99         """Parse a stream and return the syntax tree."""
100         tokens = tokenize.generate_tokens(stream.readline)
101         return self.parse_tokens(tokens, debug)
102
103     def parse_stream(self, stream, debug=False):
104         """Parse a stream and return the syntax tree."""
105         return self.parse_stream_raw(stream, debug)
106
107     def parse_file(self, filename, encoding=None, debug=False):
108         """Parse a file and return the syntax tree."""
109         with io.open(filename, "r", encoding=encoding) as stream:
110             return self.parse_stream(stream, debug)
111
112     def parse_string(self, text, debug=False):
113         """Parse a string and return the syntax tree."""
114         tokens = tokenize.generate_tokens(io.StringIO(text).readline)
115         return self.parse_tokens(tokens, debug)
116
117     def _partially_consume_prefix(self, prefix, column):
118         lines = []
119         current_line = ""
120         current_column = 0
121         wait_for_nl = False
122         for char in prefix:
123             current_line += char
124             if wait_for_nl:
125                 if char == '\n':
126                     if current_line.strip() and current_column < column:
127                         res = ''.join(lines)
128                         return res, prefix[len(res):]
129
130                     lines.append(current_line)
131                     current_line = ""
132                     current_column = 0
133                     wait_for_nl = False
134             elif char in ' \t':
135                 current_column += 1
136             elif char == '\n':
137                 # unexpected empty line
138                 current_column = 0
139             else:
140                 # indent is finished
141                 wait_for_nl = True
142         return ''.join(lines), current_line
143
144
145 def _generate_pickle_name(gt, cache_dir=None):
146     head, tail = os.path.splitext(gt)
147     if tail == ".txt":
148         tail = ""
149     name = head + tail + ".".join(map(str, sys.version_info)) + ".pickle"
150     if cache_dir:
151         return os.path.join(cache_dir, os.path.basename(name))
152     else:
153         return name
154
155
156 def load_grammar(gt="Grammar.txt", gp=None,
157                  save=True, force=False, logger=None):
158     """Load the grammar (maybe from a pickle)."""
159     if logger is None:
160         logger = logging.getLogger(__name__)
161     gp = _generate_pickle_name(gt) if gp is None else gp
162     if force or not _newer(gp, gt):
163         logger.info("Generating grammar tables from %s", gt)
164         g = pgen.generate_grammar(gt)
165         if save:
166             logger.info("Writing grammar tables to %s", gp)
167             try:
168                 g.dump(gp)
169             except OSError as e:
170                 logger.info("Writing failed: %s", e)
171     else:
172         g = grammar.Grammar()
173         g.load(gp)
174     return g
175
176
177 def _newer(a, b):
178     """Inquire whether file a was written since file b."""
179     if not os.path.exists(a):
180         return False
181     if not os.path.exists(b):
182         return True
183     return os.path.getmtime(a) >= os.path.getmtime(b)
184
185
186 def load_packaged_grammar(package, grammar_source, cache_dir=None):
187     """Normally, loads a pickled grammar by doing
188         pkgutil.get_data(package, pickled_grammar)
189     where *pickled_grammar* is computed from *grammar_source* by adding the
190     Python version and using a ``.pickle`` extension.
191
192     However, if *grammar_source* is an extant file, load_grammar(grammar_source)
193     is called instead. This facilitates using a packaged grammar file when needed
194     but preserves load_grammar's automatic regeneration behavior when possible.
195
196     """
197     if os.path.isfile(grammar_source):
198         gp = _generate_pickle_name(grammar_source, cache_dir) if cache_dir else None
199         return load_grammar(grammar_source, gp=gp)
200     pickled_name = _generate_pickle_name(os.path.basename(grammar_source), cache_dir)
201     data = pkgutil.get_data(package, pickled_name)
202     g = grammar.Grammar()
203     g.loads(data)
204     return g
205
206
207 def main(*args):
208     """Main program, when run as a script: produce grammar pickle files.
209
210     Calls load_grammar for each argument, a path to a grammar text file.
211     """
212     if not args:
213         args = sys.argv[1:]
214     logging.basicConfig(level=logging.INFO, stream=sys.stdout,
215                         format='%(message)s')
216     for gt in args:
217         load_grammar(gt, save=True, force=True)
218     return True
219
220 if __name__ == "__main__":
221     sys.exit(int(not main()))