]> git.madduck.net Git - etc/vim.git/blob - blib2to3/pgen2/driver.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

e681b526a2219ae46dff248bb991173622c7698e
[etc/vim.git] / blib2to3 / pgen2 / driver.py
1 # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
2 # Licensed to PSF under a Contributor Agreement.
3
4 # Modifications:
5 # Copyright 2006 Google, Inc. All Rights Reserved.
6 # Licensed to PSF under a Contributor Agreement.
7
8 """Parser driver.
9
10 This provides a high-level interface to parse a file into a syntax tree.
11
12 """
13
14 __author__ = "Guido van Rossum <guido@python.org>"
15
16 __all__ = ["Driver", "load_grammar"]
17
18 # Python imports
19 import codecs
20 import io
21 import os
22 import logging
23 import pkgutil
24 import sys
25
26 # Pgen imports
27 from . import grammar, parse, token, tokenize, pgen
28
29
30 class Driver(object):
31
32     def __init__(
33         self,
34         grammar,
35         convert=None,
36         logger=None,
37         tokenizer_config=tokenize.TokenizerConfig(),
38     ):
39         self.grammar = grammar
40         if logger is None:
41             logger = logging.getLogger(__name__)
42         self.logger = logger
43         self.convert = convert
44         self.tokenizer_config = tokenizer_config
45
46     def parse_tokens(self, tokens, debug=False):
47         """Parse a series of tokens and return the syntax tree."""
48         # XXX Move the prefix computation into a wrapper around tokenize.
49         p = parse.Parser(self.grammar, self.convert)
50         p.setup()
51         lineno = 1
52         column = 0
53         indent_columns = []
54         type = value = start = end = line_text = None
55         prefix = ""
56         for quintuple in tokens:
57             type, value, start, end, line_text = quintuple
58             if start != (lineno, column):
59                 assert (lineno, column) <= start, ((lineno, column), start)
60                 s_lineno, s_column = start
61                 if lineno < s_lineno:
62                     prefix += "\n" * (s_lineno - lineno)
63                     lineno = s_lineno
64                     column = 0
65                 if column < s_column:
66                     prefix += line_text[column:s_column]
67                     column = s_column
68             if type in (tokenize.COMMENT, tokenize.NL):
69                 prefix += value
70                 lineno, column = end
71                 if value.endswith("\n"):
72                     lineno += 1
73                     column = 0
74                 continue
75             if type == token.OP:
76                 type = grammar.opmap[value]
77             if debug:
78                 self.logger.debug("%s %r (prefix=%r)",
79                                   token.tok_name[type], value, prefix)
80             if type == token.INDENT:
81                 indent_columns.append(len(value))
82                 _prefix = prefix + value
83                 prefix = ""
84                 value = ""
85             elif type == token.DEDENT:
86                 _indent_col = indent_columns.pop()
87                 prefix, _prefix = self._partially_consume_prefix(prefix, _indent_col)
88             if p.addtoken(type, value, (prefix, start)):
89                 if debug:
90                     self.logger.debug("Stop.")
91                 break
92             prefix = ""
93             if type in {token.INDENT, token.DEDENT}:
94                 prefix = _prefix
95             lineno, column = end
96             if value.endswith("\n"):
97                 lineno += 1
98                 column = 0
99         else:
100             # We never broke out -- EOF is too soon (how can this happen???)
101             raise parse.ParseError("incomplete input",
102                                    type, value, (prefix, start))
103         return p.rootnode
104
105     def parse_stream_raw(self, stream, debug=False):
106         """Parse a stream and return the syntax tree."""
107         tokens = tokenize.generate_tokens(stream.readline, config=self.tokenizer_config)
108         return self.parse_tokens(tokens, debug)
109
110     def parse_stream(self, stream, debug=False):
111         """Parse a stream and return the syntax tree."""
112         return self.parse_stream_raw(stream, debug)
113
114     def parse_file(self, filename, encoding=None, debug=False):
115         """Parse a file and return the syntax tree."""
116         with io.open(filename, "r", encoding=encoding) as stream:
117             return self.parse_stream(stream, debug)
118
119     def parse_string(self, text, debug=False):
120         """Parse a string and return the syntax tree."""
121         tokens = tokenize.generate_tokens(
122             io.StringIO(text).readline,
123             config=self.tokenizer_config,
124         )
125         return self.parse_tokens(tokens, debug)
126
127     def _partially_consume_prefix(self, prefix, column):
128         lines = []
129         current_line = ""
130         current_column = 0
131         wait_for_nl = False
132         for char in prefix:
133             current_line += char
134             if wait_for_nl:
135                 if char == '\n':
136                     if current_line.strip() and current_column < column:
137                         res = ''.join(lines)
138                         return res, prefix[len(res):]
139
140                     lines.append(current_line)
141                     current_line = ""
142                     current_column = 0
143                     wait_for_nl = False
144             elif char in ' \t':
145                 current_column += 1
146             elif char == '\n':
147                 # unexpected empty line
148                 current_column = 0
149             else:
150                 # indent is finished
151                 wait_for_nl = True
152         return ''.join(lines), current_line
153
154
155 def _generate_pickle_name(gt, cache_dir=None):
156     head, tail = os.path.splitext(gt)
157     if tail == ".txt":
158         tail = ""
159     name = head + tail + ".".join(map(str, sys.version_info)) + ".pickle"
160     if cache_dir:
161         return os.path.join(cache_dir, os.path.basename(name))
162     else:
163         return name
164
165
166 def load_grammar(gt="Grammar.txt", gp=None,
167                  save=True, force=False, logger=None):
168     """Load the grammar (maybe from a pickle)."""
169     if logger is None:
170         logger = logging.getLogger(__name__)
171     gp = _generate_pickle_name(gt) if gp is None else gp
172     if force or not _newer(gp, gt):
173         logger.info("Generating grammar tables from %s", gt)
174         g = pgen.generate_grammar(gt)
175         if save:
176             logger.info("Writing grammar tables to %s", gp)
177             try:
178                 g.dump(gp)
179             except OSError as e:
180                 logger.info("Writing failed: %s", e)
181     else:
182         g = grammar.Grammar()
183         g.load(gp)
184     return g
185
186
187 def _newer(a, b):
188     """Inquire whether file a was written since file b."""
189     if not os.path.exists(a):
190         return False
191     if not os.path.exists(b):
192         return True
193     return os.path.getmtime(a) >= os.path.getmtime(b)
194
195
196 def load_packaged_grammar(package, grammar_source, cache_dir=None):
197     """Normally, loads a pickled grammar by doing
198         pkgutil.get_data(package, pickled_grammar)
199     where *pickled_grammar* is computed from *grammar_source* by adding the
200     Python version and using a ``.pickle`` extension.
201
202     However, if *grammar_source* is an extant file, load_grammar(grammar_source)
203     is called instead. This facilitates using a packaged grammar file when needed
204     but preserves load_grammar's automatic regeneration behavior when possible.
205
206     """
207     if os.path.isfile(grammar_source):
208         gp = _generate_pickle_name(grammar_source, cache_dir) if cache_dir else None
209         return load_grammar(grammar_source, gp=gp)
210     pickled_name = _generate_pickle_name(os.path.basename(grammar_source), cache_dir)
211     data = pkgutil.get_data(package, pickled_name)
212     g = grammar.Grammar()
213     g.loads(data)
214     return g
215
216
217 def main(*args):
218     """Main program, when run as a script: produce grammar pickle files.
219
220     Calls load_grammar for each argument, a path to a grammar text file.
221     """
222     if not args:
223         args = sys.argv[1:]
224     logging.basicConfig(level=logging.INFO, stream=sys.stdout,
225                         format='%(message)s')
226     for gt in args:
227         load_grammar(gt, save=True, force=True)
228     return True
229
230 if __name__ == "__main__":
231     sys.exit(int(not main()))