]> git.madduck.net Git - etc/vim.git/blob - blib2to3/pgen2/driver.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

Added caching (#136)
[etc/vim.git] / blib2to3 / pgen2 / driver.py
1 # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
2 # Licensed to PSF under a Contributor Agreement.
3
4 # Modifications:
5 # Copyright 2006 Google, Inc. All Rights Reserved.
6 # Licensed to PSF under a Contributor Agreement.
7
8 """Parser driver.
9
10 This provides a high-level interface to parse a file into a syntax tree.
11
12 """
13
14 __author__ = "Guido van Rossum <guido@python.org>"
15
16 __all__ = ["Driver", "load_grammar"]
17
18 # Python imports
19 import codecs
20 import io
21 import os
22 import logging
23 import pkgutil
24 import sys
25
26 # Pgen imports
27 from . import grammar, parse, token, tokenize, pgen
28
29
30 class Driver(object):
31
32     def __init__(self, grammar, convert=None, logger=None):
33         self.grammar = grammar
34         if logger is None:
35             logger = logging.getLogger()
36         self.logger = logger
37         self.convert = convert
38
39     def parse_tokens(self, tokens, debug=False):
40         """Parse a series of tokens and return the syntax tree."""
41         # XXX Move the prefix computation into a wrapper around tokenize.
42         p = parse.Parser(self.grammar, self.convert)
43         p.setup()
44         lineno = 1
45         column = 0
46         type = value = start = end = line_text = None
47         prefix = ""
48         for quintuple in tokens:
49             type, value, start, end, line_text = quintuple
50             if start != (lineno, column):
51                 assert (lineno, column) <= start, ((lineno, column), start)
52                 s_lineno, s_column = start
53                 if lineno < s_lineno:
54                     prefix += "\n" * (s_lineno - lineno)
55                     lineno = s_lineno
56                     column = 0
57                 if column < s_column:
58                     prefix += line_text[column:s_column]
59                     column = s_column
60             if type in (tokenize.COMMENT, tokenize.NL):
61                 prefix += value
62                 lineno, column = end
63                 if value.endswith("\n"):
64                     lineno += 1
65                     column = 0
66                 continue
67             if type == token.OP:
68                 type = grammar.opmap[value]
69             if debug:
70                 self.logger.debug("%s %r (prefix=%r)",
71                                   token.tok_name[type], value, prefix)
72             if type in {token.INDENT, token.DEDENT}:
73                 _prefix = prefix
74                 prefix = ""
75             if p.addtoken(type, value, (prefix, start)):
76                 if debug:
77                     self.logger.debug("Stop.")
78                 break
79             prefix = ""
80             if type == token.INDENT:
81                 if _prefix.startswith(value):
82                     # Don't double-indent.  Since we're delaying the prefix that
83                     # would normally belong to INDENT, we need to put the value
84                     # at the end versus at the beginning.
85                     _prefix = _prefix[len(value):] + value
86             if type in {token.INDENT, token.DEDENT}:
87                 prefix = _prefix
88             lineno, column = end
89             if value.endswith("\n"):
90                 lineno += 1
91                 column = 0
92         else:
93             # We never broke out -- EOF is too soon (how can this happen???)
94             raise parse.ParseError("incomplete input",
95                                    type, value, (prefix, start))
96         return p.rootnode
97
98     def parse_stream_raw(self, stream, debug=False):
99         """Parse a stream and return the syntax tree."""
100         tokens = tokenize.generate_tokens(stream.readline)
101         return self.parse_tokens(tokens, debug)
102
103     def parse_stream(self, stream, debug=False):
104         """Parse a stream and return the syntax tree."""
105         return self.parse_stream_raw(stream, debug)
106
107     def parse_file(self, filename, encoding=None, debug=False):
108         """Parse a file and return the syntax tree."""
109         with io.open(filename, "r", encoding=encoding) as stream:
110             return self.parse_stream(stream, debug)
111
112     def parse_string(self, text, debug=False):
113         """Parse a string and return the syntax tree."""
114         tokens = tokenize.generate_tokens(io.StringIO(text).readline)
115         return self.parse_tokens(tokens, debug)
116
117
118 def _generate_pickle_name(gt):
119     head, tail = os.path.splitext(gt)
120     if tail == ".txt":
121         tail = ""
122     return head + tail + ".".join(map(str, sys.version_info)) + ".pickle"
123
124
125 def load_grammar(gt="Grammar.txt", gp=None,
126                  save=True, force=False, logger=None):
127     """Load the grammar (maybe from a pickle)."""
128     if logger is None:
129         logger = logging.getLogger()
130     gp = _generate_pickle_name(gt) if gp is None else gp
131     if force or not _newer(gp, gt):
132         logger.info("Generating grammar tables from %s", gt)
133         g = pgen.generate_grammar(gt)
134         if save:
135             logger.info("Writing grammar tables to %s", gp)
136             try:
137                 g.dump(gp)
138             except OSError as e:
139                 logger.info("Writing failed: %s", e)
140     else:
141         g = grammar.Grammar()
142         g.load(gp)
143     return g
144
145
146 def _newer(a, b):
147     """Inquire whether file a was written since file b."""
148     if not os.path.exists(a):
149         return False
150     if not os.path.exists(b):
151         return True
152     return os.path.getmtime(a) >= os.path.getmtime(b)
153
154
155 def load_packaged_grammar(package, grammar_source):
156     """Normally, loads a pickled grammar by doing
157         pkgutil.get_data(package, pickled_grammar)
158     where *pickled_grammar* is computed from *grammar_source* by adding the
159     Python version and using a ``.pickle`` extension.
160
161     However, if *grammar_source* is an extant file, load_grammar(grammar_source)
162     is called instead. This facilitates using a packaged grammar file when needed
163     but preserves load_grammar's automatic regeneration behavior when possible.
164
165     """
166     if os.path.isfile(grammar_source):
167         return load_grammar(grammar_source)
168     pickled_name = _generate_pickle_name(os.path.basename(grammar_source))
169     data = pkgutil.get_data(package, pickled_name)
170     g = grammar.Grammar()
171     g.loads(data)
172     return g
173
174
175 def main(*args):
176     """Main program, when run as a script: produce grammar pickle files.
177
178     Calls load_grammar for each argument, a path to a grammar text file.
179     """
180     if not args:
181         args = sys.argv[1:]
182     logging.basicConfig(level=logging.INFO, stream=sys.stdout,
183                         format='%(message)s')
184     for gt in args:
185         load_grammar(gt, save=True, force=True)
186     return True
187
188 if __name__ == "__main__":
189     sys.exit(int(not main()))