All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
1 # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
2 # Licensed to PSF under a Contributor Agreement.
5 # Copyright 2006 Google, Inc. All Rights Reserved.
6 # Licensed to PSF under a Contributor Agreement.
10 This provides a high-level interface to parse a file into a syntax tree.
14 __author__ = "Guido van Rossum <guido@python.org>"
16 __all__ = ["Driver", "load_grammar"]
35 from contextlib import contextmanager
36 from dataclasses import dataclass, field
39 from . import grammar, parse, token, tokenize, pgen
40 from logging import Logger
41 from blib2to3.pytree import NL
42 from blib2to3.pgen2.grammar import Grammar
43 from blib2to3.pgen2.tokenize import GoodTokenInfo
45 Path = Union[str, "os.PathLike[str]"]
51 end: Optional[int] = None
52 tokens: List[Any] = field(default_factory=list)
54 def lock(self) -> None:
55 total_eaten = len(self.tokens)
56 self.end = self.start + total_eaten
60 def __init__(self, generator: Any) -> None:
61 self._tokens = generator
63 self._release_ranges: List[ReleaseRange] = []
66 def release(self) -> Iterator["TokenProxy"]:
67 release_range = ReleaseRange(self._counter)
68 self._release_ranges.append(release_range)
72 # Lock the last release range to the final position that
76 def eat(self, point: int) -> Any:
77 eaten_tokens = self._release_ranges[-1].tokens
78 if point < len(eaten_tokens):
79 return eaten_tokens[point]
81 while point >= len(eaten_tokens):
82 token = next(self._tokens)
83 eaten_tokens.append(token)
86 def __iter__(self) -> "TokenProxy":
89 def __next__(self) -> Any:
90 # If the current position is already compromised (looked up)
91 # return the eaten token, if not just go further on the given
93 for release_range in self._release_ranges:
94 assert release_range.end is not None
96 start, end = release_range.start, release_range.end
97 if start <= self._counter < end:
98 token = release_range.tokens[self._counter - start]
101 token = next(self._tokens)
105 def can_advance(self, to: int) -> bool:
106 # Try to eat, fail if it can't. The eat operation is cached
107 # so there won't be any additional cost of eating here
110 except StopIteration:
117 def __init__(self, grammar: Grammar, logger: Optional[Logger] = None) -> None:
118 self.grammar = grammar
120 logger = logging.getLogger(__name__)
123 def parse_tokens(self, tokens: Iterable[GoodTokenInfo], debug: bool = False) -> NL:
124 """Parse a series of tokens and return the syntax tree."""
125 # XXX Move the prefix computation into a wrapper around tokenize.
126 proxy = TokenProxy(tokens)
128 p = parse.Parser(self.grammar)
133 indent_columns: List[int] = []
134 type = value = start = end = line_text = None
137 for quintuple in proxy:
138 type, value, start, end, line_text = quintuple
139 if start != (lineno, column):
140 assert (lineno, column) <= start, ((lineno, column), start)
141 s_lineno, s_column = start
142 if lineno < s_lineno:
143 prefix += "\n" * (s_lineno - lineno)
146 if column < s_column:
147 prefix += line_text[column:s_column]
149 if type in (tokenize.COMMENT, tokenize.NL):
152 if value.endswith("\n"):
157 type = grammar.opmap[value]
159 assert type is not None
161 "%s %r (prefix=%r)", token.tok_name[type], value, prefix
163 if type == token.INDENT:
164 indent_columns.append(len(value))
165 _prefix = prefix + value
168 elif type == token.DEDENT:
169 _indent_col = indent_columns.pop()
170 prefix, _prefix = self._partially_consume_prefix(prefix, _indent_col)
171 if p.addtoken(cast(int, type), value, (prefix, start)):
173 self.logger.debug("Stop.")
176 if type in {token.INDENT, token.DEDENT}:
179 if value.endswith("\n"):
183 # We never broke out -- EOF is too soon (how can this happen???)
184 assert start is not None
185 raise parse.ParseError("incomplete input", type, value, (prefix, start))
186 assert p.rootnode is not None
189 def parse_stream_raw(self, stream: IO[str], debug: bool = False) -> NL:
190 """Parse a stream and return the syntax tree."""
191 tokens = tokenize.generate_tokens(stream.readline, grammar=self.grammar)
192 return self.parse_tokens(tokens, debug)
194 def parse_stream(self, stream: IO[str], debug: bool = False) -> NL:
195 """Parse a stream and return the syntax tree."""
196 return self.parse_stream_raw(stream, debug)
199 self, filename: Path, encoding: Optional[str] = None, debug: bool = False
201 """Parse a file and return the syntax tree."""
202 with open(filename, encoding=encoding) as stream:
203 return self.parse_stream(stream, debug)
205 def parse_string(self, text: str, debug: bool = False) -> NL:
206 """Parse a string and return the syntax tree."""
207 tokens = tokenize.generate_tokens(
208 io.StringIO(text).readline, grammar=self.grammar
210 return self.parse_tokens(tokens, debug)
212 def _partially_consume_prefix(self, prefix: str, column: int) -> Tuple[str, str]:
213 lines: List[str] = []
221 if current_line.strip() and current_column < column:
223 return res, prefix[len(res) :]
225 lines.append(current_line)
232 # unexpected empty line
237 return "".join(lines), current_line
240 def _generate_pickle_name(gt: Path, cache_dir: Optional[Path] = None) -> str:
241 head, tail = os.path.splitext(gt)
244 name = head + tail + ".".join(map(str, sys.version_info)) + ".pickle"
246 return os.path.join(cache_dir, os.path.basename(name))
252 gt: str = "Grammar.txt",
253 gp: Optional[str] = None,
256 logger: Optional[Logger] = None,
258 """Load the grammar (maybe from a pickle)."""
260 logger = logging.getLogger(__name__)
261 gp = _generate_pickle_name(gt) if gp is None else gp
262 if force or not _newer(gp, gt):
263 g: grammar.Grammar = pgen.generate_grammar(gt)
268 # Ignore error, caching is not vital.
271 g = grammar.Grammar()
276 def _newer(a: str, b: str) -> bool:
277 """Inquire whether file a was written since file b."""
278 if not os.path.exists(a):
280 if not os.path.exists(b):
282 return os.path.getmtime(a) >= os.path.getmtime(b)
285 def load_packaged_grammar(
286 package: str, grammar_source: str, cache_dir: Optional[Path] = None
287 ) -> grammar.Grammar:
288 """Normally, loads a pickled grammar by doing
289 pkgutil.get_data(package, pickled_grammar)
290 where *pickled_grammar* is computed from *grammar_source* by adding the
291 Python version and using a ``.pickle`` extension.
293 However, if *grammar_source* is an extant file, load_grammar(grammar_source)
294 is called instead. This facilitates using a packaged grammar file when needed
295 but preserves load_grammar's automatic regeneration behavior when possible.
298 if os.path.isfile(grammar_source):
299 gp = _generate_pickle_name(grammar_source, cache_dir) if cache_dir else None
300 return load_grammar(grammar_source, gp=gp)
301 pickled_name = _generate_pickle_name(os.path.basename(grammar_source), cache_dir)
302 data = pkgutil.get_data(package, pickled_name)
303 assert data is not None
304 g = grammar.Grammar()
309 def main(*args: str) -> bool:
310 """Main program, when run as a script: produce grammar pickle files.
312 Calls load_grammar for each argument, a path to a grammar text file.
315 args = tuple(sys.argv[1:])
316 logging.basicConfig(level=logging.INFO, stream=sys.stdout, format="%(message)s")
318 load_grammar(gt, save=True, force=True)
322 if __name__ == "__main__":
323 sys.exit(int(not main()))