]> git.madduck.net Git - etc/vim.git/blob - blib2to3/pgen2/driver.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

Add `explode_split` to documentation
[etc/vim.git] / blib2to3 / pgen2 / driver.py
1 # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
2 # Licensed to PSF under a Contributor Agreement.
3
4 # Modifications:
5 # Copyright 2006 Google, Inc. All Rights Reserved.
6 # Licensed to PSF under a Contributor Agreement.
7
8 """Parser driver.
9
10 This provides a high-level interface to parse a file into a syntax tree.
11
12 """
13
14 __author__ = "Guido van Rossum <guido@python.org>"
15
16 __all__ = ["Driver", "load_grammar"]
17
18 # Python imports
19 import codecs
20 import io
21 import os
22 import logging
23 import pkgutil
24 import sys
25
26 # Pgen imports
27 from . import grammar, parse, token, tokenize, pgen
28
29
30 class Driver(object):
31
32     def __init__(self, grammar, convert=None, logger=None):
33         self.grammar = grammar
34         if logger is None:
35             logger = logging.getLogger()
36         self.logger = logger
37         self.convert = convert
38
39     def parse_tokens(self, tokens, debug=False):
40         """Parse a series of tokens and return the syntax tree."""
41         # XXX Move the prefix computation into a wrapper around tokenize.
42         p = parse.Parser(self.grammar, self.convert)
43         p.setup()
44         lineno = 1
45         column = 0
46         indent_columns = []
47         type = value = start = end = line_text = None
48         prefix = ""
49         for quintuple in tokens:
50             type, value, start, end, line_text = quintuple
51             if start != (lineno, column):
52                 assert (lineno, column) <= start, ((lineno, column), start)
53                 s_lineno, s_column = start
54                 if lineno < s_lineno:
55                     prefix += "\n" * (s_lineno - lineno)
56                     lineno = s_lineno
57                     column = 0
58                 if column < s_column:
59                     prefix += line_text[column:s_column]
60                     column = s_column
61             if type in (tokenize.COMMENT, tokenize.NL):
62                 prefix += value
63                 lineno, column = end
64                 if value.endswith("\n"):
65                     lineno += 1
66                     column = 0
67                 continue
68             if type == token.OP:
69                 type = grammar.opmap[value]
70             if debug:
71                 self.logger.debug("%s %r (prefix=%r)",
72                                   token.tok_name[type], value, prefix)
73             if type in {token.INDENT, token.DEDENT}:
74                 _prefix = prefix
75                 prefix = ""
76             if type == token.DEDENT:
77                 _indent_col = indent_columns.pop()
78                 prefix, _prefix = self._partially_consume_prefix(_prefix, _indent_col)
79             if p.addtoken(type, value, (prefix, start)):
80                 if debug:
81                     self.logger.debug("Stop.")
82                 break
83             prefix = ""
84             if type == token.INDENT:
85                 indent_columns.append(len(value))
86                 if _prefix.startswith(value):
87                     # Don't double-indent.  Since we're delaying the prefix that
88                     # would normally belong to INDENT, we need to put the value
89                     # at the end versus at the beginning.
90                     _prefix = _prefix[len(value):] + value
91             if type in {token.INDENT, token.DEDENT}:
92                 prefix = _prefix
93             lineno, column = end
94             if value.endswith("\n"):
95                 lineno += 1
96                 column = 0
97         else:
98             # We never broke out -- EOF is too soon (how can this happen???)
99             raise parse.ParseError("incomplete input",
100                                    type, value, (prefix, start))
101         return p.rootnode
102
103     def parse_stream_raw(self, stream, debug=False):
104         """Parse a stream and return the syntax tree."""
105         tokens = tokenize.generate_tokens(stream.readline)
106         return self.parse_tokens(tokens, debug)
107
108     def parse_stream(self, stream, debug=False):
109         """Parse a stream and return the syntax tree."""
110         return self.parse_stream_raw(stream, debug)
111
112     def parse_file(self, filename, encoding=None, debug=False):
113         """Parse a file and return the syntax tree."""
114         with io.open(filename, "r", encoding=encoding) as stream:
115             return self.parse_stream(stream, debug)
116
117     def parse_string(self, text, debug=False):
118         """Parse a string and return the syntax tree."""
119         tokens = tokenize.generate_tokens(io.StringIO(text).readline)
120         return self.parse_tokens(tokens, debug)
121
122     def _partially_consume_prefix(self, prefix, column):
123         lines = []
124         current_line = ""
125         current_column = 0
126         wait_for_nl = False
127         for char in prefix:
128             current_line += char
129             if wait_for_nl:
130                 if char == '\n':
131                     if current_line.strip() and current_column < column:
132                         res = ''.join(lines)
133                         return res, prefix[len(res):]
134
135                     lines.append(current_line)
136                     current_line = ""
137                     current_column = 0
138                     wait_for_nl = False
139             elif char == ' ':
140                 current_column += 1
141             elif char == '\t':
142                 current_column += 4
143             elif char == '\n':
144                 # enexpected empty line
145                 current_column = 0
146             else:
147                 # indent is finished
148                 wait_for_nl = True
149         return ''.join(lines), current_line
150
151
152 def _generate_pickle_name(gt):
153     head, tail = os.path.splitext(gt)
154     if tail == ".txt":
155         tail = ""
156     return head + tail + ".".join(map(str, sys.version_info)) + ".pickle"
157
158
159 def load_grammar(gt="Grammar.txt", gp=None,
160                  save=True, force=False, logger=None):
161     """Load the grammar (maybe from a pickle)."""
162     if logger is None:
163         logger = logging.getLogger()
164     gp = _generate_pickle_name(gt) if gp is None else gp
165     if force or not _newer(gp, gt):
166         logger.info("Generating grammar tables from %s", gt)
167         g = pgen.generate_grammar(gt)
168         if save:
169             logger.info("Writing grammar tables to %s", gp)
170             try:
171                 g.dump(gp)
172             except OSError as e:
173                 logger.info("Writing failed: %s", e)
174     else:
175         g = grammar.Grammar()
176         g.load(gp)
177     return g
178
179
180 def _newer(a, b):
181     """Inquire whether file a was written since file b."""
182     if not os.path.exists(a):
183         return False
184     if not os.path.exists(b):
185         return True
186     return os.path.getmtime(a) >= os.path.getmtime(b)
187
188
189 def load_packaged_grammar(package, grammar_source):
190     """Normally, loads a pickled grammar by doing
191         pkgutil.get_data(package, pickled_grammar)
192     where *pickled_grammar* is computed from *grammar_source* by adding the
193     Python version and using a ``.pickle`` extension.
194
195     However, if *grammar_source* is an extant file, load_grammar(grammar_source)
196     is called instead. This facilitates using a packaged grammar file when needed
197     but preserves load_grammar's automatic regeneration behavior when possible.
198
199     """
200     if os.path.isfile(grammar_source):
201         return load_grammar(grammar_source)
202     pickled_name = _generate_pickle_name(os.path.basename(grammar_source))
203     data = pkgutil.get_data(package, pickled_name)
204     g = grammar.Grammar()
205     g.loads(data)
206     return g
207
208
209 def main(*args):
210     """Main program, when run as a script: produce grammar pickle files.
211
212     Calls load_grammar for each argument, a path to a grammar text file.
213     """
214     if not args:
215         args = sys.argv[1:]
216     logging.basicConfig(level=logging.INFO, stream=sys.stdout,
217                         format='%(message)s')
218     for gt in args:
219         load_grammar(gt, save=True, force=True)
220     return True
221
222 if __name__ == "__main__":
223     sys.exit(int(not main()))