All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
1 # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
2 # Licensed to PSF under a Contributor Agreement.
5 # Copyright 2006 Google, Inc. All Rights Reserved.
6 # Licensed to PSF under a Contributor Agreement.
10 This provides a high-level interface to parse a file into a syntax tree.
14 __author__ = "Guido van Rossum <guido@python.org>"
16 __all__ = ["Driver", "load_grammar"]
39 from . import grammar, parse, token, tokenize, pgen
40 from logging import Logger
41 from blib2to3.pytree import _Convert, NL
42 from blib2to3.pgen2.grammar import Grammar
44 Path = Union[str, "os.PathLike[str]"]
51 convert: Optional[_Convert] = None,
52 logger: Optional[Logger] = None,
54 self.grammar = grammar
56 logger = logging.getLogger(__name__)
58 self.convert = convert
60 def parse_tokens(self, tokens: Iterable[Any], debug: bool = False) -> NL:
61 """Parse a series of tokens and return the syntax tree."""
62 # XXX Move the prefix computation into a wrapper around tokenize.
63 p = parse.Parser(self.grammar, self.convert)
68 type = value = start = end = line_text = None
70 for quintuple in tokens:
71 type, value, start, end, line_text = quintuple
72 if start != (lineno, column):
73 assert (lineno, column) <= start, ((lineno, column), start)
74 s_lineno, s_column = start
76 prefix += "\n" * (s_lineno - lineno)
80 prefix += line_text[column:s_column]
82 if type in (tokenize.COMMENT, tokenize.NL):
85 if value.endswith("\n"):
90 type = grammar.opmap[value]
93 "%s %r (prefix=%r)", token.tok_name[type], value, prefix
95 if type == token.INDENT:
96 indent_columns.append(len(value))
97 _prefix = prefix + value
100 elif type == token.DEDENT:
101 _indent_col = indent_columns.pop()
102 prefix, _prefix = self._partially_consume_prefix(prefix, _indent_col)
103 if p.addtoken(type, value, (prefix, start)):
105 self.logger.debug("Stop.")
108 if type in {token.INDENT, token.DEDENT}:
111 if value.endswith("\n"):
115 # We never broke out -- EOF is too soon (how can this happen???)
116 assert start is not None
117 raise parse.ParseError("incomplete input", type, value, (prefix, start))
118 assert p.rootnode is not None
121 def parse_stream_raw(self, stream: IO[Text], debug: bool = False) -> NL:
122 """Parse a stream and return the syntax tree."""
123 tokens = tokenize.generate_tokens(stream.readline, grammar=self.grammar)
124 return self.parse_tokens(tokens, debug)
126 def parse_stream(self, stream: IO[Text], debug: bool = False) -> NL:
127 """Parse a stream and return the syntax tree."""
128 return self.parse_stream_raw(stream, debug)
131 self, filename: Path, encoding: Optional[Text] = None, debug: bool = False,
133 """Parse a file and return the syntax tree."""
134 with io.open(filename, "r", encoding=encoding) as stream:
135 return self.parse_stream(stream, debug)
137 def parse_string(self, text: Text, debug: bool = False) -> NL:
138 """Parse a string and return the syntax tree."""
139 tokens = tokenize.generate_tokens(
140 io.StringIO(text).readline, grammar=self.grammar
142 return self.parse_tokens(tokens, debug)
144 def _partially_consume_prefix(self, prefix: Text, column: int) -> Tuple[Text, Text]:
145 lines: List[str] = []
153 if current_line.strip() and current_column < column:
155 return res, prefix[len(res) :]
157 lines.append(current_line)
164 # unexpected empty line
169 return "".join(lines), current_line
172 def _generate_pickle_name(gt: Path, cache_dir: Optional[Path] = None) -> Text:
173 head, tail = os.path.splitext(gt)
176 name = head + tail + ".".join(map(str, sys.version_info)) + ".pickle"
178 return os.path.join(cache_dir, os.path.basename(name))
184 gt: Text = "Grammar.txt",
185 gp: Optional[Text] = None,
188 logger: Optional[Logger] = None,
190 """Load the grammar (maybe from a pickle)."""
192 logger = logging.getLogger(__name__)
193 gp = _generate_pickle_name(gt) if gp is None else gp
194 if force or not _newer(gp, gt):
195 logger.info("Generating grammar tables from %s", gt)
196 g: grammar.Grammar = pgen.generate_grammar(gt)
198 logger.info("Writing grammar tables to %s", gp)
202 logger.info("Writing failed: %s", e)
204 g = grammar.Grammar()
209 def _newer(a: Text, b: Text) -> bool:
210 """Inquire whether file a was written since file b."""
211 if not os.path.exists(a):
213 if not os.path.exists(b):
215 return os.path.getmtime(a) >= os.path.getmtime(b)
218 def load_packaged_grammar(
219 package: str, grammar_source: Text, cache_dir: Optional[Path] = None
220 ) -> grammar.Grammar:
221 """Normally, loads a pickled grammar by doing
222 pkgutil.get_data(package, pickled_grammar)
223 where *pickled_grammar* is computed from *grammar_source* by adding the
224 Python version and using a ``.pickle`` extension.
226 However, if *grammar_source* is an extant file, load_grammar(grammar_source)
227 is called instead. This facilitates using a packaged grammar file when needed
228 but preserves load_grammar's automatic regeneration behavior when possible.
231 if os.path.isfile(grammar_source):
232 gp = _generate_pickle_name(grammar_source, cache_dir) if cache_dir else None
233 return load_grammar(grammar_source, gp=gp)
234 pickled_name = _generate_pickle_name(os.path.basename(grammar_source), cache_dir)
235 data = pkgutil.get_data(package, pickled_name)
236 assert data is not None
237 g = grammar.Grammar()
242 def main(*args: Text) -> bool:
243 """Main program, when run as a script: produce grammar pickle files.
245 Calls load_grammar for each argument, a path to a grammar text file.
248 args = tuple(sys.argv[1:])
249 logging.basicConfig(level=logging.INFO, stream=sys.stdout, format="%(message)s")
251 load_grammar(gt, save=True, force=True)
255 if __name__ == "__main__":
256 sys.exit(int(not main()))