madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

fix magic comma and experimental string cache flags (#2131)
[etc/vim.git] / src / blib2to3 / pgen2 / driver.py
1 # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
2 # Licensed to PSF under a Contributor Agreement.
3
4 # Modifications:
5 # Copyright 2006 Google, Inc. All Rights Reserved.
6 # Licensed to PSF under a Contributor Agreement.
7
8 """Parser driver.
9
10 This provides a high-level interface to parse a file into a syntax tree.
11
12 """
13
14 __author__ = "Guido van Rossum <guido@python.org>"
15
16 __all__ = ["Driver", "load_grammar"]
17
18 # Python imports
19 import io
20 import os
21 import logging
22 import pkgutil
23 import sys
24 from typing import (
25     Any,
26     IO,
27     Iterable,
28     List,
29     Optional,
30     Text,
31     Tuple,
32     Union,
33 )
34
35 # Pgen imports
36 from . import grammar, parse, token, tokenize, pgen
37 from logging import Logger
38 from blib2to3.pytree import _Convert, NL
39 from blib2to3.pgen2.grammar import Grammar
40
41 Path = Union[str, "os.PathLike[str]"]
42
43
44 class Driver(object):
45     def __init__(
46         self,
47         grammar: Grammar,
48         convert: Optional[_Convert] = None,
49         logger: Optional[Logger] = None,
50     ) -> None:
51         self.grammar = grammar
52         if logger is None:
53             logger = logging.getLogger(__name__)
54         self.logger = logger
55         self.convert = convert
56
57     def parse_tokens(self, tokens: Iterable[Any], debug: bool = False) -> NL:
58         """Parse a series of tokens and return the syntax tree."""
59         # XXX Move the prefix computation into a wrapper around tokenize.
60         p = parse.Parser(self.grammar, self.convert)
61         p.setup()
62         lineno = 1
63         column = 0
64         indent_columns = []
65         type = value = start = end = line_text = None
66         prefix = ""
67         for quintuple in tokens:
68             type, value, start, end, line_text = quintuple
69             if start != (lineno, column):
70                 assert (lineno, column) <= start, ((lineno, column), start)
71                 s_lineno, s_column = start
72                 if lineno < s_lineno:
73                     prefix += "\n" * (s_lineno - lineno)
74                     lineno = s_lineno
75                     column = 0
76                 if column < s_column:
77                     prefix += line_text[column:s_column]
78                     column = s_column
79             if type in (tokenize.COMMENT, tokenize.NL):
80                 prefix += value
81                 lineno, column = end
82                 if value.endswith("\n"):
83                     lineno += 1
84                     column = 0
85                 continue
86             if type == token.OP:
87                 type = grammar.opmap[value]
88             if debug:
89                 self.logger.debug(
90                     "%s %r (prefix=%r)", token.tok_name[type], value, prefix
91                 )
92             if type == token.INDENT:
93                 indent_columns.append(len(value))
94                 _prefix = prefix + value
95                 prefix = ""
96                 value = ""
97             elif type == token.DEDENT:
98                 _indent_col = indent_columns.pop()
99                 prefix, _prefix = self._partially_consume_prefix(prefix, _indent_col)
100             if p.addtoken(type, value, (prefix, start)):
101                 if debug:
102                     self.logger.debug("Stop.")
103                 break
104             prefix = ""
105             if type in {token.INDENT, token.DEDENT}:
106                 prefix = _prefix
107             lineno, column = end
108             if value.endswith("\n"):
109                 lineno += 1
110                 column = 0
111         else:
112             # We never broke out -- EOF is too soon (how can this happen???)
113             assert start is not None
114             raise parse.ParseError("incomplete input", type, value, (prefix, start))
115         assert p.rootnode is not None
116         return p.rootnode
117
118     def parse_stream_raw(self, stream: IO[Text], debug: bool = False) -> NL:
119         """Parse a stream and return the syntax tree."""
120         tokens = tokenize.generate_tokens(stream.readline, grammar=self.grammar)
121         return self.parse_tokens(tokens, debug)
122
123     def parse_stream(self, stream: IO[Text], debug: bool = False) -> NL:
124         """Parse a stream and return the syntax tree."""
125         return self.parse_stream_raw(stream, debug)
126
127     def parse_file(
128         self, filename: Path, encoding: Optional[Text] = None, debug: bool = False
129     ) -> NL:
130         """Parse a file and return the syntax tree."""
131         with io.open(filename, "r", encoding=encoding) as stream:
132             return self.parse_stream(stream, debug)
133
134     def parse_string(self, text: Text, debug: bool = False) -> NL:
135         """Parse a string and return the syntax tree."""
136         tokens = tokenize.generate_tokens(
137             io.StringIO(text).readline, grammar=self.grammar
138         )
139         return self.parse_tokens(tokens, debug)
140
141     def _partially_consume_prefix(self, prefix: Text, column: int) -> Tuple[Text, Text]:
142         lines: List[str] = []
143         current_line = ""
144         current_column = 0
145         wait_for_nl = False
146         for char in prefix:
147             current_line += char
148             if wait_for_nl:
149                 if char == "\n":
150                     if current_line.strip() and current_column < column:
151                         res = "".join(lines)
152                         return res, prefix[len(res) :]
153
154                     lines.append(current_line)
155                     current_line = ""
156                     current_column = 0
157                     wait_for_nl = False
158             elif char in " \t":
159                 current_column += 1
160             elif char == "\n":
161                 # unexpected empty line
162                 current_column = 0
163             else:
164                 # indent is finished
165                 wait_for_nl = True
166         return "".join(lines), current_line
167
168
169 def _generate_pickle_name(gt: Path, cache_dir: Optional[Path] = None) -> Text:
170     head, tail = os.path.splitext(gt)
171     if tail == ".txt":
172         tail = ""
173     name = head + tail + ".".join(map(str, sys.version_info)) + ".pickle"
174     if cache_dir:
175         return os.path.join(cache_dir, os.path.basename(name))
176     else:
177         return name
178
179
180 def load_grammar(
181     gt: Text = "Grammar.txt",
182     gp: Optional[Text] = None,
183     save: bool = True,
184     force: bool = False,
185     logger: Optional[Logger] = None,
186 ) -> Grammar:
187     """Load the grammar (maybe from a pickle)."""
188     if logger is None:
189         logger = logging.getLogger(__name__)
190     gp = _generate_pickle_name(gt) if gp is None else gp
191     if force or not _newer(gp, gt):
192         logger.info("Generating grammar tables from %s", gt)
193         g: grammar.Grammar = pgen.generate_grammar(gt)
194         if save:
195             logger.info("Writing grammar tables to %s", gp)
196             try:
197                 g.dump(gp)
198             except OSError as e:
199                 logger.info("Writing failed: %s", e)
200     else:
201         g = grammar.Grammar()
202         g.load(gp)
203     return g
204
205
206 def _newer(a: Text, b: Text) -> bool:
207     """Inquire whether file a was written since file b."""
208     if not os.path.exists(a):
209         return False
210     if not os.path.exists(b):
211         return True
212     return os.path.getmtime(a) >= os.path.getmtime(b)
213
214
215 def load_packaged_grammar(
216     package: str, grammar_source: Text, cache_dir: Optional[Path] = None
217 ) -> grammar.Grammar:
218     """Normally, loads a pickled grammar by doing
219         pkgutil.get_data(package, pickled_grammar)
220     where *pickled_grammar* is computed from *grammar_source* by adding the
221     Python version and using a ``.pickle`` extension.
222
223     However, if *grammar_source* is an extant file, load_grammar(grammar_source)
224     is called instead. This facilitates using a packaged grammar file when needed
225     but preserves load_grammar's automatic regeneration behavior when possible.
226
227     """
228     if os.path.isfile(grammar_source):
229         gp = _generate_pickle_name(grammar_source, cache_dir) if cache_dir else None
230         return load_grammar(grammar_source, gp=gp)
231     pickled_name = _generate_pickle_name(os.path.basename(grammar_source), cache_dir)
232     data = pkgutil.get_data(package, pickled_name)
233     assert data is not None
234     g = grammar.Grammar()
235     g.loads(data)
236     return g
237
238
239 def main(*args: Text) -> bool:
240     """Main program, when run as a script: produce grammar pickle files.
241
242     Calls load_grammar for each argument, a path to a grammar text file.
243     """
244     if not args:
245         args = tuple(sys.argv[1:])
246     logging.basicConfig(level=logging.INFO, stream=sys.stdout, format="%(message)s")
247     for gt in args:
248         load_grammar(gt, save=True, force=True)
249     return True
250
251
252 if __name__ == "__main__":
253     sys.exit(int(not main()))