]> git.madduck.net Git - etc/vim.git/blob - blib2to3/pgen2/driver.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

Initial commit
[etc/vim.git] / blib2to3 / pgen2 / driver.py
1 # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
2 # Licensed to PSF under a Contributor Agreement.
3
4 # Modifications:
5 # Copyright 2006 Google, Inc. All Rights Reserved.
6 # Licensed to PSF under a Contributor Agreement.
7
8 """Parser driver.
9
10 This provides a high-level interface to parse a file into a syntax tree.
11
12 """
13
14 __author__ = "Guido van Rossum <guido@python.org>"
15
16 __all__ = ["Driver", "load_grammar"]
17
18 # Python imports
19 import codecs
20 import io
21 import os
22 import logging
23 import pkgutil
24 import sys
25
26 # Pgen imports
27 from . import grammar, parse, token, tokenize, pgen
28
29
30 class Driver(object):
31
32     def __init__(self, grammar, convert=None, logger=None):
33         self.grammar = grammar
34         if logger is None:
35             logger = logging.getLogger()
36         self.logger = logger
37         self.convert = convert
38
39     def parse_tokens(self, tokens, debug=False):
40         """Parse a series of tokens and return the syntax tree."""
41         # XXX Move the prefix computation into a wrapper around tokenize.
42         p = parse.Parser(self.grammar, self.convert)
43         p.setup()
44         lineno = 1
45         column = 0
46         type = value = start = end = line_text = None
47         prefix = ""
48         for quintuple in tokens:
49             type, value, start, end, line_text = quintuple
50             if start != (lineno, column):
51                 assert (lineno, column) <= start, ((lineno, column), start)
52                 s_lineno, s_column = start
53                 if lineno < s_lineno:
54                     prefix += "\n" * (s_lineno - lineno)
55                     lineno = s_lineno
56                     column = 0
57                 if column < s_column:
58                     prefix += line_text[column:s_column]
59                     column = s_column
60             if type in (tokenize.COMMENT, tokenize.NL):
61                 prefix += value
62                 lineno, column = end
63                 if value.endswith("\n"):
64                     lineno += 1
65                     column = 0
66                 continue
67             if type == token.OP:
68                 type = grammar.opmap[value]
69             if debug:
70                 self.logger.debug("%s %r (prefix=%r)",
71                                   token.tok_name[type], value, prefix)
72             if p.addtoken(type, value, (prefix, start)):
73                 if debug:
74                     self.logger.debug("Stop.")
75                 break
76             prefix = ""
77             lineno, column = end
78             if value.endswith("\n"):
79                 lineno += 1
80                 column = 0
81         else:
82             # We never broke out -- EOF is too soon (how can this happen???)
83             raise parse.ParseError("incomplete input",
84                                    type, value, (prefix, start))
85         return p.rootnode
86
87     def parse_stream_raw(self, stream, debug=False):
88         """Parse a stream and return the syntax tree."""
89         tokens = tokenize.generate_tokens(stream.readline)
90         return self.parse_tokens(tokens, debug)
91
92     def parse_stream(self, stream, debug=False):
93         """Parse a stream and return the syntax tree."""
94         return self.parse_stream_raw(stream, debug)
95
96     def parse_file(self, filename, encoding=None, debug=False):
97         """Parse a file and return the syntax tree."""
98         with io.open(filename, "r", encoding=encoding) as stream:
99             return self.parse_stream(stream, debug)
100
101     def parse_string(self, text, debug=False):
102         """Parse a string and return the syntax tree."""
103         tokens = tokenize.generate_tokens(io.StringIO(text).readline)
104         return self.parse_tokens(tokens, debug)
105
106
107 def _generate_pickle_name(gt):
108     head, tail = os.path.splitext(gt)
109     if tail == ".txt":
110         tail = ""
111     return head + tail + ".".join(map(str, sys.version_info)) + ".pickle"
112
113
114 def load_grammar(gt="Grammar.txt", gp=None,
115                  save=True, force=False, logger=None):
116     """Load the grammar (maybe from a pickle)."""
117     if logger is None:
118         logger = logging.getLogger()
119     gp = _generate_pickle_name(gt) if gp is None else gp
120     if force or not _newer(gp, gt):
121         logger.info("Generating grammar tables from %s", gt)
122         g = pgen.generate_grammar(gt)
123         if save:
124             logger.info("Writing grammar tables to %s", gp)
125             try:
126                 g.dump(gp)
127             except OSError as e:
128                 logger.info("Writing failed: %s", e)
129     else:
130         g = grammar.Grammar()
131         g.load(gp)
132     return g
133
134
135 def _newer(a, b):
136     """Inquire whether file a was written since file b."""
137     if not os.path.exists(a):
138         return False
139     if not os.path.exists(b):
140         return True
141     return os.path.getmtime(a) >= os.path.getmtime(b)
142
143
144 def load_packaged_grammar(package, grammar_source):
145     """Normally, loads a pickled grammar by doing
146         pkgutil.get_data(package, pickled_grammar)
147     where *pickled_grammar* is computed from *grammar_source* by adding the
148     Python version and using a ``.pickle`` extension.
149
150     However, if *grammar_source* is an extant file, load_grammar(grammar_source)
151     is called instead. This facilitates using a packaged grammar file when needed
152     but preserves load_grammar's automatic regeneration behavior when possible.
153
154     """
155     if os.path.isfile(grammar_source):
156         return load_grammar(grammar_source)
157     pickled_name = _generate_pickle_name(os.path.basename(grammar_source))
158     data = pkgutil.get_data(package, pickled_name)
159     g = grammar.Grammar()
160     g.loads(data)
161     return g
162
163
164 def main(*args):
165     """Main program, when run as a script: produce grammar pickle files.
166
167     Calls load_grammar for each argument, a path to a grammar text file.
168     """
169     if not args:
170         args = sys.argv[1:]
171     logging.basicConfig(level=logging.INFO, stream=sys.stdout,
172                         format='%(message)s')
173     for gt in args:
174         load_grammar(gt, save=True, force=True)
175     return True
176
177 if __name__ == "__main__":
178     sys.exit(int(not main()))