]> git.madduck.net Git - etc/vim.git/blob - blib2to3/pgen2/driver.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

Switch from attrs to dataclasses (#1116)
[etc/vim.git] / blib2to3 / pgen2 / driver.py
1 # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
2 # Licensed to PSF under a Contributor Agreement.
3
4 # Modifications:
5 # Copyright 2006 Google, Inc. All Rights Reserved.
6 # Licensed to PSF under a Contributor Agreement.
7
8 """Parser driver.
9
10 This provides a high-level interface to parse a file into a syntax tree.
11
12 """
13
14 __author__ = "Guido van Rossum <guido@python.org>"
15
16 __all__ = ["Driver", "load_grammar"]
17
18 # Python imports
19 import codecs
20 import io
21 import os
22 import logging
23 import pkgutil
24 import sys
25
26 # Pgen imports
27 from . import grammar, parse, token, tokenize, pgen
28
29
30 class Driver(object):
31     def __init__(self, grammar, convert=None, logger=None):
32         self.grammar = grammar
33         if logger is None:
34             logger = logging.getLogger(__name__)
35         self.logger = logger
36         self.convert = convert
37
38     def parse_tokens(self, tokens, debug=False):
39         """Parse a series of tokens and return the syntax tree."""
40         # XXX Move the prefix computation into a wrapper around tokenize.
41         p = parse.Parser(self.grammar, self.convert)
42         p.setup()
43         lineno = 1
44         column = 0
45         indent_columns = []
46         type = value = start = end = line_text = None
47         prefix = ""
48         for quintuple in tokens:
49             type, value, start, end, line_text = quintuple
50             if start != (lineno, column):
51                 assert (lineno, column) <= start, ((lineno, column), start)
52                 s_lineno, s_column = start
53                 if lineno < s_lineno:
54                     prefix += "\n" * (s_lineno - lineno)
55                     lineno = s_lineno
56                     column = 0
57                 if column < s_column:
58                     prefix += line_text[column:s_column]
59                     column = s_column
60             if type in (tokenize.COMMENT, tokenize.NL):
61                 prefix += value
62                 lineno, column = end
63                 if value.endswith("\n"):
64                     lineno += 1
65                     column = 0
66                 continue
67             if type == token.OP:
68                 type = grammar.opmap[value]
69             if debug:
70                 self.logger.debug(
71                     "%s %r (prefix=%r)", token.tok_name[type], value, prefix
72                 )
73             if type == token.INDENT:
74                 indent_columns.append(len(value))
75                 _prefix = prefix + value
76                 prefix = ""
77                 value = ""
78             elif type == token.DEDENT:
79                 _indent_col = indent_columns.pop()
80                 prefix, _prefix = self._partially_consume_prefix(prefix, _indent_col)
81             if p.addtoken(type, value, (prefix, start)):
82                 if debug:
83                     self.logger.debug("Stop.")
84                 break
85             prefix = ""
86             if type in {token.INDENT, token.DEDENT}:
87                 prefix = _prefix
88             lineno, column = end
89             if value.endswith("\n"):
90                 lineno += 1
91                 column = 0
92         else:
93             # We never broke out -- EOF is too soon (how can this happen???)
94             raise parse.ParseError("incomplete input", type, value, (prefix, start))
95         return p.rootnode
96
97     def parse_stream_raw(self, stream, debug=False):
98         """Parse a stream and return the syntax tree."""
99         tokens = tokenize.generate_tokens(stream.readline, grammar=self.grammar)
100         return self.parse_tokens(tokens, debug)
101
102     def parse_stream(self, stream, debug=False):
103         """Parse a stream and return the syntax tree."""
104         return self.parse_stream_raw(stream, debug)
105
106     def parse_file(self, filename, encoding=None, debug=False):
107         """Parse a file and return the syntax tree."""
108         with io.open(filename, "r", encoding=encoding) as stream:
109             return self.parse_stream(stream, debug)
110
111     def parse_string(self, text, debug=False):
112         """Parse a string and return the syntax tree."""
113         tokens = tokenize.generate_tokens(
114             io.StringIO(text).readline, grammar=self.grammar
115         )
116         return self.parse_tokens(tokens, debug)
117
118     def _partially_consume_prefix(self, prefix, column):
119         lines = []
120         current_line = ""
121         current_column = 0
122         wait_for_nl = False
123         for char in prefix:
124             current_line += char
125             if wait_for_nl:
126                 if char == "\n":
127                     if current_line.strip() and current_column < column:
128                         res = "".join(lines)
129                         return res, prefix[len(res) :]
130
131                     lines.append(current_line)
132                     current_line = ""
133                     current_column = 0
134                     wait_for_nl = False
135             elif char in " \t":
136                 current_column += 1
137             elif char == "\n":
138                 # unexpected empty line
139                 current_column = 0
140             else:
141                 # indent is finished
142                 wait_for_nl = True
143         return "".join(lines), current_line
144
145
146 def _generate_pickle_name(gt, cache_dir=None):
147     head, tail = os.path.splitext(gt)
148     if tail == ".txt":
149         tail = ""
150     name = head + tail + ".".join(map(str, sys.version_info)) + ".pickle"
151     if cache_dir:
152         return os.path.join(cache_dir, os.path.basename(name))
153     else:
154         return name
155
156
157 def load_grammar(gt="Grammar.txt", gp=None, save=True, force=False, logger=None):
158     """Load the grammar (maybe from a pickle)."""
159     if logger is None:
160         logger = logging.getLogger(__name__)
161     gp = _generate_pickle_name(gt) if gp is None else gp
162     if force or not _newer(gp, gt):
163         logger.info("Generating grammar tables from %s", gt)
164         g = pgen.generate_grammar(gt)
165         if save:
166             logger.info("Writing grammar tables to %s", gp)
167             try:
168                 g.dump(gp)
169             except OSError as e:
170                 logger.info("Writing failed: %s", e)
171     else:
172         g = grammar.Grammar()
173         g.load(gp)
174     return g
175
176
177 def _newer(a, b):
178     """Inquire whether file a was written since file b."""
179     if not os.path.exists(a):
180         return False
181     if not os.path.exists(b):
182         return True
183     return os.path.getmtime(a) >= os.path.getmtime(b)
184
185
186 def load_packaged_grammar(package, grammar_source, cache_dir=None):
187     """Normally, loads a pickled grammar by doing
188         pkgutil.get_data(package, pickled_grammar)
189     where *pickled_grammar* is computed from *grammar_source* by adding the
190     Python version and using a ``.pickle`` extension.
191
192     However, if *grammar_source* is an extant file, load_grammar(grammar_source)
193     is called instead. This facilitates using a packaged grammar file when needed
194     but preserves load_grammar's automatic regeneration behavior when possible.
195
196     """
197     if os.path.isfile(grammar_source):
198         gp = _generate_pickle_name(grammar_source, cache_dir) if cache_dir else None
199         return load_grammar(grammar_source, gp=gp)
200     pickled_name = _generate_pickle_name(os.path.basename(grammar_source), cache_dir)
201     data = pkgutil.get_data(package, pickled_name)
202     g = grammar.Grammar()
203     g.loads(data)
204     return g
205
206
207 def main(*args):
208     """Main program, when run as a script: produce grammar pickle files.
209
210     Calls load_grammar for each argument, a path to a grammar text file.
211     """
212     if not args:
213         args = sys.argv[1:]
214     logging.basicConfig(level=logging.INFO, stream=sys.stdout, format="%(message)s")
215     for gt in args:
216         load_grammar(gt, save=True, force=True)
217     return True
218
219
220 if __name__ == "__main__":
221     sys.exit(int(not main()))