]> git.madduck.net Git - etc/vim.git/blob - src/blib2to3/pgen2/driver.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

Maintainers += Shantanu Jain (hauntsaninja) (#3792)
[etc/vim.git] / src / blib2to3 / pgen2 / driver.py
1 # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
2 # Licensed to PSF under a Contributor Agreement.
3
4 # Modifications:
5 # Copyright 2006 Google, Inc. All Rights Reserved.
6 # Licensed to PSF under a Contributor Agreement.
7
8 """Parser driver.
9
10 This provides a high-level interface to parse a file into a syntax tree.
11
12 """
13
14 __author__ = "Guido van Rossum <guido@python.org>"
15
16 __all__ = ["Driver", "load_grammar"]
17
18 # Python imports
19 import io
20 import os
21 import logging
22 import pkgutil
23 import sys
24 from typing import (
25     Any,
26     cast,
27     IO,
28     Iterable,
29     List,
30     Optional,
31     Iterator,
32     Tuple,
33     Union,
34 )
35 from contextlib import contextmanager
36 from dataclasses import dataclass, field
37
38 # Pgen imports
39 from . import grammar, parse, token, tokenize, pgen
40 from logging import Logger
41 from blib2to3.pytree import NL
42 from blib2to3.pgen2.grammar import Grammar
43 from blib2to3.pgen2.tokenize import GoodTokenInfo
44
45 Path = Union[str, "os.PathLike[str]"]
46
47
48 @dataclass
49 class ReleaseRange:
50     start: int
51     end: Optional[int] = None
52     tokens: List[Any] = field(default_factory=list)
53
54     def lock(self) -> None:
55         total_eaten = len(self.tokens)
56         self.end = self.start + total_eaten
57
58
59 class TokenProxy:
60     def __init__(self, generator: Any) -> None:
61         self._tokens = generator
62         self._counter = 0
63         self._release_ranges: List[ReleaseRange] = []
64
65     @contextmanager
66     def release(self) -> Iterator["TokenProxy"]:
67         release_range = ReleaseRange(self._counter)
68         self._release_ranges.append(release_range)
69         try:
70             yield self
71         finally:
72             # Lock the last release range to the final position that
73             # has been eaten.
74             release_range.lock()
75
76     def eat(self, point: int) -> Any:
77         eaten_tokens = self._release_ranges[-1].tokens
78         if point < len(eaten_tokens):
79             return eaten_tokens[point]
80         else:
81             while point >= len(eaten_tokens):
82                 token = next(self._tokens)
83                 eaten_tokens.append(token)
84             return token
85
86     def __iter__(self) -> "TokenProxy":
87         return self
88
89     def __next__(self) -> Any:
90         # If the current position is already compromised (looked up)
91         # return the eaten token, if not just go further on the given
92         # token producer.
93         for release_range in self._release_ranges:
94             assert release_range.end is not None
95
96             start, end = release_range.start, release_range.end
97             if start <= self._counter < end:
98                 token = release_range.tokens[self._counter - start]
99                 break
100         else:
101             token = next(self._tokens)
102         self._counter += 1
103         return token
104
105     def can_advance(self, to: int) -> bool:
106         # Try to eat, fail if it can't. The eat operation is cached
107         # so there won't be any additional cost of eating here
108         try:
109             self.eat(to)
110         except StopIteration:
111             return False
112         else:
113             return True
114
115
116 class Driver:
117     def __init__(self, grammar: Grammar, logger: Optional[Logger] = None) -> None:
118         self.grammar = grammar
119         if logger is None:
120             logger = logging.getLogger(__name__)
121         self.logger = logger
122
123     def parse_tokens(self, tokens: Iterable[GoodTokenInfo], debug: bool = False) -> NL:
124         """Parse a series of tokens and return the syntax tree."""
125         # XXX Move the prefix computation into a wrapper around tokenize.
126         proxy = TokenProxy(tokens)
127
128         p = parse.Parser(self.grammar)
129         p.setup(proxy=proxy)
130
131         lineno = 1
132         column = 0
133         indent_columns: List[int] = []
134         type = value = start = end = line_text = None
135         prefix = ""
136
137         for quintuple in proxy:
138             type, value, start, end, line_text = quintuple
139             if start != (lineno, column):
140                 assert (lineno, column) <= start, ((lineno, column), start)
141                 s_lineno, s_column = start
142                 if lineno < s_lineno:
143                     prefix += "\n" * (s_lineno - lineno)
144                     lineno = s_lineno
145                     column = 0
146                 if column < s_column:
147                     prefix += line_text[column:s_column]
148                     column = s_column
149             if type in (tokenize.COMMENT, tokenize.NL):
150                 prefix += value
151                 lineno, column = end
152                 if value.endswith("\n"):
153                     lineno += 1
154                     column = 0
155                 continue
156             if type == token.OP:
157                 type = grammar.opmap[value]
158             if debug:
159                 assert type is not None
160                 self.logger.debug(
161                     "%s %r (prefix=%r)", token.tok_name[type], value, prefix
162                 )
163             if type == token.INDENT:
164                 indent_columns.append(len(value))
165                 _prefix = prefix + value
166                 prefix = ""
167                 value = ""
168             elif type == token.DEDENT:
169                 _indent_col = indent_columns.pop()
170                 prefix, _prefix = self._partially_consume_prefix(prefix, _indent_col)
171             if p.addtoken(cast(int, type), value, (prefix, start)):
172                 if debug:
173                     self.logger.debug("Stop.")
174                 break
175             prefix = ""
176             if type in {token.INDENT, token.DEDENT}:
177                 prefix = _prefix
178             lineno, column = end
179             if value.endswith("\n"):
180                 lineno += 1
181                 column = 0
182         else:
183             # We never broke out -- EOF is too soon (how can this happen???)
184             assert start is not None
185             raise parse.ParseError("incomplete input", type, value, (prefix, start))
186         assert p.rootnode is not None
187         return p.rootnode
188
189     def parse_stream_raw(self, stream: IO[str], debug: bool = False) -> NL:
190         """Parse a stream and return the syntax tree."""
191         tokens = tokenize.generate_tokens(stream.readline, grammar=self.grammar)
192         return self.parse_tokens(tokens, debug)
193
194     def parse_stream(self, stream: IO[str], debug: bool = False) -> NL:
195         """Parse a stream and return the syntax tree."""
196         return self.parse_stream_raw(stream, debug)
197
198     def parse_file(
199         self, filename: Path, encoding: Optional[str] = None, debug: bool = False
200     ) -> NL:
201         """Parse a file and return the syntax tree."""
202         with open(filename, encoding=encoding) as stream:
203             return self.parse_stream(stream, debug)
204
205     def parse_string(self, text: str, debug: bool = False) -> NL:
206         """Parse a string and return the syntax tree."""
207         tokens = tokenize.generate_tokens(
208             io.StringIO(text).readline, grammar=self.grammar
209         )
210         return self.parse_tokens(tokens, debug)
211
212     def _partially_consume_prefix(self, prefix: str, column: int) -> Tuple[str, str]:
213         lines: List[str] = []
214         current_line = ""
215         current_column = 0
216         wait_for_nl = False
217         for char in prefix:
218             current_line += char
219             if wait_for_nl:
220                 if char == "\n":
221                     if current_line.strip() and current_column < column:
222                         res = "".join(lines)
223                         return res, prefix[len(res) :]
224
225                     lines.append(current_line)
226                     current_line = ""
227                     current_column = 0
228                     wait_for_nl = False
229             elif char in " \t":
230                 current_column += 1
231             elif char == "\n":
232                 # unexpected empty line
233                 current_column = 0
234             else:
235                 # indent is finished
236                 wait_for_nl = True
237         return "".join(lines), current_line
238
239
240 def _generate_pickle_name(gt: Path, cache_dir: Optional[Path] = None) -> str:
241     head, tail = os.path.splitext(gt)
242     if tail == ".txt":
243         tail = ""
244     name = head + tail + ".".join(map(str, sys.version_info)) + ".pickle"
245     if cache_dir:
246         return os.path.join(cache_dir, os.path.basename(name))
247     else:
248         return name
249
250
251 def load_grammar(
252     gt: str = "Grammar.txt",
253     gp: Optional[str] = None,
254     save: bool = True,
255     force: bool = False,
256     logger: Optional[Logger] = None,
257 ) -> Grammar:
258     """Load the grammar (maybe from a pickle)."""
259     if logger is None:
260         logger = logging.getLogger(__name__)
261     gp = _generate_pickle_name(gt) if gp is None else gp
262     if force or not _newer(gp, gt):
263         g: grammar.Grammar = pgen.generate_grammar(gt)
264         if save:
265             try:
266                 g.dump(gp)
267             except OSError:
268                 # Ignore error, caching is not vital.
269                 pass
270     else:
271         g = grammar.Grammar()
272         g.load(gp)
273     return g
274
275
276 def _newer(a: str, b: str) -> bool:
277     """Inquire whether file a was written since file b."""
278     if not os.path.exists(a):
279         return False
280     if not os.path.exists(b):
281         return True
282     return os.path.getmtime(a) >= os.path.getmtime(b)
283
284
285 def load_packaged_grammar(
286     package: str, grammar_source: str, cache_dir: Optional[Path] = None
287 ) -> grammar.Grammar:
288     """Normally, loads a pickled grammar by doing
289         pkgutil.get_data(package, pickled_grammar)
290     where *pickled_grammar* is computed from *grammar_source* by adding the
291     Python version and using a ``.pickle`` extension.
292
293     However, if *grammar_source* is an extant file, load_grammar(grammar_source)
294     is called instead. This facilitates using a packaged grammar file when needed
295     but preserves load_grammar's automatic regeneration behavior when possible.
296
297     """
298     if os.path.isfile(grammar_source):
299         gp = _generate_pickle_name(grammar_source, cache_dir) if cache_dir else None
300         return load_grammar(grammar_source, gp=gp)
301     pickled_name = _generate_pickle_name(os.path.basename(grammar_source), cache_dir)
302     data = pkgutil.get_data(package, pickled_name)
303     assert data is not None
304     g = grammar.Grammar()
305     g.loads(data)
306     return g
307
308
309 def main(*args: str) -> bool:
310     """Main program, when run as a script: produce grammar pickle files.
311
312     Calls load_grammar for each argument, a path to a grammar text file.
313     """
314     if not args:
315         args = tuple(sys.argv[1:])
316     logging.basicConfig(level=logging.INFO, stream=sys.stdout, format="%(message)s")
317     for gt in args:
318         load_grammar(gt, save=True, force=True)
319     return True
320
321
322 if __name__ == "__main__":
323     sys.exit(int(not main()))