]> git.madduck.net Git - etc/vim.git/blob - src/blib2to3/pgen2/driver.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

black/parser: partial support for pattern matching (#2586)
[etc/vim.git] / src / blib2to3 / pgen2 / driver.py
1 # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
2 # Licensed to PSF under a Contributor Agreement.
3
4 # Modifications:
5 # Copyright 2006 Google, Inc. All Rights Reserved.
6 # Licensed to PSF under a Contributor Agreement.
7
8 """Parser driver.
9
10 This provides a high-level interface to parse a file into a syntax tree.
11
12 """
13
14 __author__ = "Guido van Rossum <guido@python.org>"
15
16 __all__ = ["Driver", "load_grammar"]
17
18 # Python imports
19 import io
20 import os
21 import logging
22 import pkgutil
23 import sys
24 from typing import (
25     Any,
26     IO,
27     Iterable,
28     List,
29     Optional,
30     Text,
31     Iterator,
32     Tuple,
33     TypeVar,
34     Generic,
35     Union,
36 )
37 from dataclasses import dataclass, field
38
39 # Pgen imports
40 from . import grammar, parse, token, tokenize, pgen
41 from logging import Logger
42 from blib2to3.pytree import _Convert, NL
43 from blib2to3.pgen2.grammar import Grammar
44 from contextlib import contextmanager
45
46 Path = Union[str, "os.PathLike[str]"]
47
48
49 @dataclass
50 class ReleaseRange:
51     start: int
52     end: Optional[int] = None
53     tokens: List[Any] = field(default_factory=list)
54
55     def lock(self) -> None:
56         total_eaten = len(self.tokens)
57         self.end = self.start + total_eaten
58
59
60 class TokenProxy:
61     def __init__(self, generator: Any) -> None:
62         self._tokens = generator
63         self._counter = 0
64         self._release_ranges: List[ReleaseRange] = []
65
66     @contextmanager
67     def release(self) -> Iterator["TokenProxy"]:
68         release_range = ReleaseRange(self._counter)
69         self._release_ranges.append(release_range)
70         try:
71             yield self
72         finally:
73             # Lock the last release range to the final position that
74             # has been eaten.
75             release_range.lock()
76
77     def eat(self, point: int) -> Any:
78         eaten_tokens = self._release_ranges[-1].tokens
79         if point < len(eaten_tokens):
80             return eaten_tokens[point]
81         else:
82             while point >= len(eaten_tokens):
83                 token = next(self._tokens)
84                 eaten_tokens.append(token)
85             return token
86
87     def __iter__(self) -> "TokenProxy":
88         return self
89
90     def __next__(self) -> Any:
91         # If the current position is already compromised (looked up)
92         # return the eaten token, if not just go further on the given
93         # token producer.
94         for release_range in self._release_ranges:
95             assert release_range.end is not None
96
97             start, end = release_range.start, release_range.end
98             if start <= self._counter < end:
99                 token = release_range.tokens[self._counter - start]
100                 break
101         else:
102             token = next(self._tokens)
103         self._counter += 1
104         return token
105
106     def can_advance(self, to: int) -> bool:
107         # Try to eat, fail if it can't. The eat operation is cached
108         # so there wont be any additional cost of eating here
109         try:
110             self.eat(to)
111         except StopIteration:
112             return False
113         else:
114             return True
115
116
117 class Driver(object):
118     def __init__(
119         self,
120         grammar: Grammar,
121         convert: Optional[_Convert] = None,
122         logger: Optional[Logger] = None,
123     ) -> None:
124         self.grammar = grammar
125         if logger is None:
126             logger = logging.getLogger(__name__)
127         self.logger = logger
128         self.convert = convert
129
130     def parse_tokens(self, tokens: Iterable[Any], debug: bool = False) -> NL:
131         """Parse a series of tokens and return the syntax tree."""
132         # XXX Move the prefix computation into a wrapper around tokenize.
133         proxy = TokenProxy(tokens)
134
135         p = parse.Parser(self.grammar, self.convert)
136         p.setup(proxy=proxy)
137
138         lineno = 1
139         column = 0
140         indent_columns = []
141         type = value = start = end = line_text = None
142         prefix = ""
143
144         for quintuple in proxy:
145             type, value, start, end, line_text = quintuple
146             if start != (lineno, column):
147                 assert (lineno, column) <= start, ((lineno, column), start)
148                 s_lineno, s_column = start
149                 if lineno < s_lineno:
150                     prefix += "\n" * (s_lineno - lineno)
151                     lineno = s_lineno
152                     column = 0
153                 if column < s_column:
154                     prefix += line_text[column:s_column]
155                     column = s_column
156             if type in (tokenize.COMMENT, tokenize.NL):
157                 prefix += value
158                 lineno, column = end
159                 if value.endswith("\n"):
160                     lineno += 1
161                     column = 0
162                 continue
163             if type == token.OP:
164                 type = grammar.opmap[value]
165             if debug:
166                 self.logger.debug(
167                     "%s %r (prefix=%r)", token.tok_name[type], value, prefix
168                 )
169             if type == token.INDENT:
170                 indent_columns.append(len(value))
171                 _prefix = prefix + value
172                 prefix = ""
173                 value = ""
174             elif type == token.DEDENT:
175                 _indent_col = indent_columns.pop()
176                 prefix, _prefix = self._partially_consume_prefix(prefix, _indent_col)
177             if p.addtoken(type, value, (prefix, start)):
178                 if debug:
179                     self.logger.debug("Stop.")
180                 break
181             prefix = ""
182             if type in {token.INDENT, token.DEDENT}:
183                 prefix = _prefix
184             lineno, column = end
185             if value.endswith("\n"):
186                 lineno += 1
187                 column = 0
188         else:
189             # We never broke out -- EOF is too soon (how can this happen???)
190             assert start is not None
191             raise parse.ParseError("incomplete input", type, value, (prefix, start))
192         assert p.rootnode is not None
193         return p.rootnode
194
195     def parse_stream_raw(self, stream: IO[Text], debug: bool = False) -> NL:
196         """Parse a stream and return the syntax tree."""
197         tokens = tokenize.generate_tokens(stream.readline, grammar=self.grammar)
198         return self.parse_tokens(tokens, debug)
199
200     def parse_stream(self, stream: IO[Text], debug: bool = False) -> NL:
201         """Parse a stream and return the syntax tree."""
202         return self.parse_stream_raw(stream, debug)
203
204     def parse_file(
205         self, filename: Path, encoding: Optional[Text] = None, debug: bool = False
206     ) -> NL:
207         """Parse a file and return the syntax tree."""
208         with io.open(filename, "r", encoding=encoding) as stream:
209             return self.parse_stream(stream, debug)
210
211     def parse_string(self, text: Text, debug: bool = False) -> NL:
212         """Parse a string and return the syntax tree."""
213         tokens = tokenize.generate_tokens(
214             io.StringIO(text).readline, grammar=self.grammar
215         )
216         return self.parse_tokens(tokens, debug)
217
218     def _partially_consume_prefix(self, prefix: Text, column: int) -> Tuple[Text, Text]:
219         lines: List[str] = []
220         current_line = ""
221         current_column = 0
222         wait_for_nl = False
223         for char in prefix:
224             current_line += char
225             if wait_for_nl:
226                 if char == "\n":
227                     if current_line.strip() and current_column < column:
228                         res = "".join(lines)
229                         return res, prefix[len(res) :]
230
231                     lines.append(current_line)
232                     current_line = ""
233                     current_column = 0
234                     wait_for_nl = False
235             elif char in " \t":
236                 current_column += 1
237             elif char == "\n":
238                 # unexpected empty line
239                 current_column = 0
240             else:
241                 # indent is finished
242                 wait_for_nl = True
243         return "".join(lines), current_line
244
245
246 def _generate_pickle_name(gt: Path, cache_dir: Optional[Path] = None) -> Text:
247     head, tail = os.path.splitext(gt)
248     if tail == ".txt":
249         tail = ""
250     name = head + tail + ".".join(map(str, sys.version_info)) + ".pickle"
251     if cache_dir:
252         return os.path.join(cache_dir, os.path.basename(name))
253     else:
254         return name
255
256
257 def load_grammar(
258     gt: Text = "Grammar.txt",
259     gp: Optional[Text] = None,
260     save: bool = True,
261     force: bool = False,
262     logger: Optional[Logger] = None,
263 ) -> Grammar:
264     """Load the grammar (maybe from a pickle)."""
265     if logger is None:
266         logger = logging.getLogger(__name__)
267     gp = _generate_pickle_name(gt) if gp is None else gp
268     if force or not _newer(gp, gt):
269         logger.info("Generating grammar tables from %s", gt)
270         g: grammar.Grammar = pgen.generate_grammar(gt)
271         if save:
272             logger.info("Writing grammar tables to %s", gp)
273             try:
274                 g.dump(gp)
275             except OSError as e:
276                 logger.info("Writing failed: %s", e)
277     else:
278         g = grammar.Grammar()
279         g.load(gp)
280     return g
281
282
283 def _newer(a: Text, b: Text) -> bool:
284     """Inquire whether file a was written since file b."""
285     if not os.path.exists(a):
286         return False
287     if not os.path.exists(b):
288         return True
289     return os.path.getmtime(a) >= os.path.getmtime(b)
290
291
292 def load_packaged_grammar(
293     package: str, grammar_source: Text, cache_dir: Optional[Path] = None
294 ) -> grammar.Grammar:
295     """Normally, loads a pickled grammar by doing
296         pkgutil.get_data(package, pickled_grammar)
297     where *pickled_grammar* is computed from *grammar_source* by adding the
298     Python version and using a ``.pickle`` extension.
299
300     However, if *grammar_source* is an extant file, load_grammar(grammar_source)
301     is called instead. This facilitates using a packaged grammar file when needed
302     but preserves load_grammar's automatic regeneration behavior when possible.
303
304     """
305     if os.path.isfile(grammar_source):
306         gp = _generate_pickle_name(grammar_source, cache_dir) if cache_dir else None
307         return load_grammar(grammar_source, gp=gp)
308     pickled_name = _generate_pickle_name(os.path.basename(grammar_source), cache_dir)
309     data = pkgutil.get_data(package, pickled_name)
310     assert data is not None
311     g = grammar.Grammar()
312     g.loads(data)
313     return g
314
315
316 def main(*args: Text) -> bool:
317     """Main program, when run as a script: produce grammar pickle files.
318
319     Calls load_grammar for each argument, a path to a grammar text file.
320     """
321     if not args:
322         args = tuple(sys.argv[1:])
323     logging.basicConfig(level=logging.INFO, stream=sys.stdout, format="%(message)s")
324     for gt in args:
325         load_grammar(gt, save=True, force=True)
326     return True
327
328
329 if __name__ == "__main__":
330     sys.exit(int(not main()))