]> git.madduck.net Git - etc/vim.git/blob - blib2to3/pgen2/driver.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

blib2to3: Never put prefixes on DEDENT leaves
[etc/vim.git] / blib2to3 / pgen2 / driver.py
1 # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
2 # Licensed to PSF under a Contributor Agreement.
3
4 # Modifications:
5 # Copyright 2006 Google, Inc. All Rights Reserved.
6 # Licensed to PSF under a Contributor Agreement.
7
8 """Parser driver.
9
10 This provides a high-level interface to parse a file into a syntax tree.
11
12 """
13
14 __author__ = "Guido van Rossum <guido@python.org>"
15
16 __all__ = ["Driver", "load_grammar"]
17
18 # Python imports
19 import codecs
20 import io
21 import os
22 import logging
23 import pkgutil
24 import sys
25
26 # Pgen imports
27 from . import grammar, parse, token, tokenize, pgen
28
29
30 class Driver(object):
31
32     def __init__(self, grammar, convert=None, logger=None):
33         self.grammar = grammar
34         if logger is None:
35             logger = logging.getLogger()
36         self.logger = logger
37         self.convert = convert
38
39     def parse_tokens(self, tokens, debug=False):
40         """Parse a series of tokens and return the syntax tree."""
41         # XXX Move the prefix computation into a wrapper around tokenize.
42         p = parse.Parser(self.grammar, self.convert)
43         p.setup()
44         lineno = 1
45         column = 0
46         type = value = start = end = line_text = None
47         prefix = ""
48         for quintuple in tokens:
49             type, value, start, end, line_text = quintuple
50             if start != (lineno, column):
51                 assert (lineno, column) <= start, ((lineno, column), start)
52                 s_lineno, s_column = start
53                 if lineno < s_lineno:
54                     prefix += "\n" * (s_lineno - lineno)
55                     lineno = s_lineno
56                     column = 0
57                 if column < s_column:
58                     prefix += line_text[column:s_column]
59                     column = s_column
60             if type in (tokenize.COMMENT, tokenize.NL):
61                 prefix += value
62                 lineno, column = end
63                 if value.endswith("\n"):
64                     lineno += 1
65                     column = 0
66                 continue
67             if type == token.OP:
68                 type = grammar.opmap[value]
69             if debug:
70                 self.logger.debug("%s %r (prefix=%r)",
71                                   token.tok_name[type], value, prefix)
72             if type == token.DEDENT:
73                 _prefix = prefix
74                 prefix = ""
75             if p.addtoken(type, value, (prefix, start)):
76                 if debug:
77                     self.logger.debug("Stop.")
78                 break
79             prefix = ""
80             if type == token.DEDENT:
81                 prefix = _prefix
82             lineno, column = end
83             if value.endswith("\n"):
84                 lineno += 1
85                 column = 0
86         else:
87             # We never broke out -- EOF is too soon (how can this happen???)
88             raise parse.ParseError("incomplete input",
89                                    type, value, (prefix, start))
90         return p.rootnode
91
92     def parse_stream_raw(self, stream, debug=False):
93         """Parse a stream and return the syntax tree."""
94         tokens = tokenize.generate_tokens(stream.readline)
95         return self.parse_tokens(tokens, debug)
96
97     def parse_stream(self, stream, debug=False):
98         """Parse a stream and return the syntax tree."""
99         return self.parse_stream_raw(stream, debug)
100
101     def parse_file(self, filename, encoding=None, debug=False):
102         """Parse a file and return the syntax tree."""
103         with io.open(filename, "r", encoding=encoding) as stream:
104             return self.parse_stream(stream, debug)
105
106     def parse_string(self, text, debug=False):
107         """Parse a string and return the syntax tree."""
108         tokens = tokenize.generate_tokens(io.StringIO(text).readline)
109         return self.parse_tokens(tokens, debug)
110
111
112 def _generate_pickle_name(gt):
113     head, tail = os.path.splitext(gt)
114     if tail == ".txt":
115         tail = ""
116     return head + tail + ".".join(map(str, sys.version_info)) + ".pickle"
117
118
119 def load_grammar(gt="Grammar.txt", gp=None,
120                  save=True, force=False, logger=None):
121     """Load the grammar (maybe from a pickle)."""
122     if logger is None:
123         logger = logging.getLogger()
124     gp = _generate_pickle_name(gt) if gp is None else gp
125     if force or not _newer(gp, gt):
126         logger.info("Generating grammar tables from %s", gt)
127         g = pgen.generate_grammar(gt)
128         if save:
129             logger.info("Writing grammar tables to %s", gp)
130             try:
131                 g.dump(gp)
132             except OSError as e:
133                 logger.info("Writing failed: %s", e)
134     else:
135         g = grammar.Grammar()
136         g.load(gp)
137     return g
138
139
140 def _newer(a, b):
141     """Inquire whether file a was written since file b."""
142     if not os.path.exists(a):
143         return False
144     if not os.path.exists(b):
145         return True
146     return os.path.getmtime(a) >= os.path.getmtime(b)
147
148
149 def load_packaged_grammar(package, grammar_source):
150     """Normally, loads a pickled grammar by doing
151         pkgutil.get_data(package, pickled_grammar)
152     where *pickled_grammar* is computed from *grammar_source* by adding the
153     Python version and using a ``.pickle`` extension.
154
155     However, if *grammar_source* is an extant file, load_grammar(grammar_source)
156     is called instead. This facilitates using a packaged grammar file when needed
157     but preserves load_grammar's automatic regeneration behavior when possible.
158
159     """
160     if os.path.isfile(grammar_source):
161         return load_grammar(grammar_source)
162     pickled_name = _generate_pickle_name(os.path.basename(grammar_source))
163     data = pkgutil.get_data(package, pickled_name)
164     g = grammar.Grammar()
165     g.loads(data)
166     return g
167
168
169 def main(*args):
170     """Main program, when run as a script: produce grammar pickle files.
171
172     Calls load_grammar for each argument, a path to a grammar text file.
173     """
174     if not args:
175         args = sys.argv[1:]
176     logging.basicConfig(level=logging.INFO, stream=sys.stdout,
177                         format='%(message)s')
178     for gt in args:
179         load_grammar(gt, save=True, force=True)
180     return True
181
182 if __name__ == "__main__":
183     sys.exit(int(not main()))