All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
1 # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
2 # Licensed to PSF under a Contributor Agreement.
5 # Copyright 2006 Google, Inc. All Rights Reserved.
6 # Licensed to PSF under a Contributor Agreement.
10 This provides a high-level interface to parse a file into a syntax tree.
14 __author__ = "Guido van Rossum <guido@python.org>"
16 __all__ = ["Driver", "load_grammar"]
24 from contextlib import contextmanager
25 from dataclasses import dataclass, field
26 from logging import Logger
27 from typing import IO, Any, Iterable, Iterator, List, Optional, Tuple, Union, cast
29 from blib2to3.pgen2.grammar import Grammar
30 from blib2to3.pgen2.tokenize import GoodTokenInfo
31 from blib2to3.pytree import NL
34 from . import grammar, parse, pgen, token, tokenize
36 Path = Union[str, "os.PathLike[str]"]
42 end: Optional[int] = None
43 tokens: List[Any] = field(default_factory=list)
45 def lock(self) -> None:
46 total_eaten = len(self.tokens)
47 self.end = self.start + total_eaten
51 def __init__(self, generator: Any) -> None:
52 self._tokens = generator
54 self._release_ranges: List[ReleaseRange] = []
57 def release(self) -> Iterator["TokenProxy"]:
58 release_range = ReleaseRange(self._counter)
59 self._release_ranges.append(release_range)
63 # Lock the last release range to the final position that
67 def eat(self, point: int) -> Any:
68 eaten_tokens = self._release_ranges[-1].tokens
69 if point < len(eaten_tokens):
70 return eaten_tokens[point]
72 while point >= len(eaten_tokens):
73 token = next(self._tokens)
74 eaten_tokens.append(token)
77 def __iter__(self) -> "TokenProxy":
80 def __next__(self) -> Any:
81 # If the current position is already compromised (looked up)
82 # return the eaten token, if not just go further on the given
84 for release_range in self._release_ranges:
85 assert release_range.end is not None
87 start, end = release_range.start, release_range.end
88 if start <= self._counter < end:
89 token = release_range.tokens[self._counter - start]
92 token = next(self._tokens)
96 def can_advance(self, to: int) -> bool:
97 # Try to eat, fail if it can't. The eat operation is cached
98 # so there won't be any additional cost of eating here
101 except StopIteration:
108 def __init__(self, grammar: Grammar, logger: Optional[Logger] = None) -> None:
109 self.grammar = grammar
111 logger = logging.getLogger(__name__)
114 def parse_tokens(self, tokens: Iterable[GoodTokenInfo], debug: bool = False) -> NL:
115 """Parse a series of tokens and return the syntax tree."""
116 # XXX Move the prefix computation into a wrapper around tokenize.
117 proxy = TokenProxy(tokens)
119 p = parse.Parser(self.grammar)
124 indent_columns: List[int] = []
125 type = value = start = end = line_text = None
128 for quintuple in proxy:
129 type, value, start, end, line_text = quintuple
130 if start != (lineno, column):
131 assert (lineno, column) <= start, ((lineno, column), start)
132 s_lineno, s_column = start
133 if lineno < s_lineno:
134 prefix += "\n" * (s_lineno - lineno)
137 if column < s_column:
138 prefix += line_text[column:s_column]
140 if type in (tokenize.COMMENT, tokenize.NL):
143 if value.endswith("\n"):
148 type = grammar.opmap[value]
150 assert type is not None
152 "%s %r (prefix=%r)", token.tok_name[type], value, prefix
154 if type == token.INDENT:
155 indent_columns.append(len(value))
156 _prefix = prefix + value
159 elif type == token.DEDENT:
160 _indent_col = indent_columns.pop()
161 prefix, _prefix = self._partially_consume_prefix(prefix, _indent_col)
162 if p.addtoken(cast(int, type), value, (prefix, start)):
164 self.logger.debug("Stop.")
167 if type in {token.INDENT, token.DEDENT}:
170 if value.endswith("\n"):
174 # We never broke out -- EOF is too soon (how can this happen???)
175 assert start is not None
176 raise parse.ParseError("incomplete input", type, value, (prefix, start))
177 assert p.rootnode is not None
180 def parse_stream_raw(self, stream: IO[str], debug: bool = False) -> NL:
181 """Parse a stream and return the syntax tree."""
182 tokens = tokenize.generate_tokens(stream.readline, grammar=self.grammar)
183 return self.parse_tokens(tokens, debug)
185 def parse_stream(self, stream: IO[str], debug: bool = False) -> NL:
186 """Parse a stream and return the syntax tree."""
187 return self.parse_stream_raw(stream, debug)
190 self, filename: Path, encoding: Optional[str] = None, debug: bool = False
192 """Parse a file and return the syntax tree."""
193 with open(filename, encoding=encoding) as stream:
194 return self.parse_stream(stream, debug)
196 def parse_string(self, text: str, debug: bool = False) -> NL:
197 """Parse a string and return the syntax tree."""
198 tokens = tokenize.generate_tokens(
199 io.StringIO(text).readline, grammar=self.grammar
201 return self.parse_tokens(tokens, debug)
203 def _partially_consume_prefix(self, prefix: str, column: int) -> Tuple[str, str]:
204 lines: List[str] = []
212 if current_line.strip() and current_column < column:
214 return res, prefix[len(res) :]
216 lines.append(current_line)
223 # unexpected empty line
228 return "".join(lines), current_line
231 def _generate_pickle_name(gt: Path, cache_dir: Optional[Path] = None) -> str:
232 head, tail = os.path.splitext(gt)
235 name = head + tail + ".".join(map(str, sys.version_info)) + ".pickle"
237 return os.path.join(cache_dir, os.path.basename(name))
243 gt: str = "Grammar.txt",
244 gp: Optional[str] = None,
247 logger: Optional[Logger] = None,
249 """Load the grammar (maybe from a pickle)."""
251 logger = logging.getLogger(__name__)
252 gp = _generate_pickle_name(gt) if gp is None else gp
253 if force or not _newer(gp, gt):
254 g: grammar.Grammar = pgen.generate_grammar(gt)
259 # Ignore error, caching is not vital.
262 g = grammar.Grammar()
267 def _newer(a: str, b: str) -> bool:
268 """Inquire whether file a was written since file b."""
269 if not os.path.exists(a):
271 if not os.path.exists(b):
273 return os.path.getmtime(a) >= os.path.getmtime(b)
276 def load_packaged_grammar(
277 package: str, grammar_source: str, cache_dir: Optional[Path] = None
278 ) -> grammar.Grammar:
279 """Normally, loads a pickled grammar by doing
280 pkgutil.get_data(package, pickled_grammar)
281 where *pickled_grammar* is computed from *grammar_source* by adding the
282 Python version and using a ``.pickle`` extension.
284 However, if *grammar_source* is an extant file, load_grammar(grammar_source)
285 is called instead. This facilitates using a packaged grammar file when needed
286 but preserves load_grammar's automatic regeneration behavior when possible.
289 if os.path.isfile(grammar_source):
290 gp = _generate_pickle_name(grammar_source, cache_dir) if cache_dir else None
291 return load_grammar(grammar_source, gp=gp)
292 pickled_name = _generate_pickle_name(os.path.basename(grammar_source), cache_dir)
293 data = pkgutil.get_data(package, pickled_name)
294 assert data is not None
295 g = grammar.Grammar()
300 def main(*args: str) -> bool:
301 """Main program, when run as a script: produce grammar pickle files.
303 Calls load_grammar for each argument, a path to a grammar text file.
306 args = tuple(sys.argv[1:])
307 logging.basicConfig(level=logging.INFO, stream=sys.stdout, format="%(message)s")
309 load_grammar(gt, save=True, force=True)
313 if __name__ == "__main__":
314 sys.exit(int(not main()))