All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
1 # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
2 # Licensed to PSF under a Contributor Agreement.
5 # Copyright 2006 Google, Inc. All Rights Reserved.
6 # Licensed to PSF under a Contributor Agreement.
10 This provides a high-level interface to parse a file into a syntax tree.
14 __author__ = "Guido van Rossum <guido@python.org>"
16 __all__ = ["Driver", "load_grammar"]
36 from . import grammar, parse, token, tokenize, pgen
37 from logging import Logger
38 from blib2to3.pytree import _Convert, NL
39 from blib2to3.pgen2.grammar import Grammar
41 Path = Union[str, "os.PathLike[str]"]
48 convert: Optional[_Convert] = None,
49 logger: Optional[Logger] = None,
51 self.grammar = grammar
53 logger = logging.getLogger(__name__)
55 self.convert = convert
57 def parse_tokens(self, tokens: Iterable[Any], debug: bool = False) -> NL:
58 """Parse a series of tokens and return the syntax tree."""
59 # XXX Move the prefix computation into a wrapper around tokenize.
60 p = parse.Parser(self.grammar, self.convert)
65 type = value = start = end = line_text = None
67 for quintuple in tokens:
68 type, value, start, end, line_text = quintuple
69 if start != (lineno, column):
70 assert (lineno, column) <= start, ((lineno, column), start)
71 s_lineno, s_column = start
73 prefix += "\n" * (s_lineno - lineno)
77 prefix += line_text[column:s_column]
79 if type in (tokenize.COMMENT, tokenize.NL):
82 if value.endswith("\n"):
87 type = grammar.opmap[value]
90 "%s %r (prefix=%r)", token.tok_name[type], value, prefix
92 if type == token.INDENT:
93 indent_columns.append(len(value))
94 _prefix = prefix + value
97 elif type == token.DEDENT:
98 _indent_col = indent_columns.pop()
99 prefix, _prefix = self._partially_consume_prefix(prefix, _indent_col)
100 if p.addtoken(type, value, (prefix, start)):
102 self.logger.debug("Stop.")
105 if type in {token.INDENT, token.DEDENT}:
108 if value.endswith("\n"):
112 # We never broke out -- EOF is too soon (how can this happen???)
113 assert start is not None
114 raise parse.ParseError("incomplete input", type, value, (prefix, start))
115 assert p.rootnode is not None
118 def parse_stream_raw(self, stream: IO[Text], debug: bool = False) -> NL:
119 """Parse a stream and return the syntax tree."""
120 tokens = tokenize.generate_tokens(stream.readline, grammar=self.grammar)
121 return self.parse_tokens(tokens, debug)
123 def parse_stream(self, stream: IO[Text], debug: bool = False) -> NL:
124 """Parse a stream and return the syntax tree."""
125 return self.parse_stream_raw(stream, debug)
128 self, filename: Path, encoding: Optional[Text] = None, debug: bool = False
130 """Parse a file and return the syntax tree."""
131 with io.open(filename, "r", encoding=encoding) as stream:
132 return self.parse_stream(stream, debug)
134 def parse_string(self, text: Text, debug: bool = False) -> NL:
135 """Parse a string and return the syntax tree."""
136 tokens = tokenize.generate_tokens(
137 io.StringIO(text).readline, grammar=self.grammar
139 return self.parse_tokens(tokens, debug)
141 def _partially_consume_prefix(self, prefix: Text, column: int) -> Tuple[Text, Text]:
142 lines: List[str] = []
150 if current_line.strip() and current_column < column:
152 return res, prefix[len(res) :]
154 lines.append(current_line)
161 # unexpected empty line
166 return "".join(lines), current_line
169 def _generate_pickle_name(gt: Path, cache_dir: Optional[Path] = None) -> Text:
170 head, tail = os.path.splitext(gt)
173 name = head + tail + ".".join(map(str, sys.version_info)) + ".pickle"
175 return os.path.join(cache_dir, os.path.basename(name))
181 gt: Text = "Grammar.txt",
182 gp: Optional[Text] = None,
185 logger: Optional[Logger] = None,
187 """Load the grammar (maybe from a pickle)."""
189 logger = logging.getLogger(__name__)
190 gp = _generate_pickle_name(gt) if gp is None else gp
191 if force or not _newer(gp, gt):
192 logger.info("Generating grammar tables from %s", gt)
193 g: grammar.Grammar = pgen.generate_grammar(gt)
195 logger.info("Writing grammar tables to %s", gp)
199 logger.info("Writing failed: %s", e)
201 g = grammar.Grammar()
206 def _newer(a: Text, b: Text) -> bool:
207 """Inquire whether file a was written since file b."""
208 if not os.path.exists(a):
210 if not os.path.exists(b):
212 return os.path.getmtime(a) >= os.path.getmtime(b)
215 def load_packaged_grammar(
216 package: str, grammar_source: Text, cache_dir: Optional[Path] = None
217 ) -> grammar.Grammar:
218 """Normally, loads a pickled grammar by doing
219 pkgutil.get_data(package, pickled_grammar)
220 where *pickled_grammar* is computed from *grammar_source* by adding the
221 Python version and using a ``.pickle`` extension.
223 However, if *grammar_source* is an extant file, load_grammar(grammar_source)
224 is called instead. This facilitates using a packaged grammar file when needed
225 but preserves load_grammar's automatic regeneration behavior when possible.
228 if os.path.isfile(grammar_source):
229 gp = _generate_pickle_name(grammar_source, cache_dir) if cache_dir else None
230 return load_grammar(grammar_source, gp=gp)
231 pickled_name = _generate_pickle_name(os.path.basename(grammar_source), cache_dir)
232 data = pkgutil.get_data(package, pickled_name)
233 assert data is not None
234 g = grammar.Grammar()
239 def main(*args: Text) -> bool:
240 """Main program, when run as a script: produce grammar pickle files.
242 Calls load_grammar for each argument, a path to a grammar text file.
245 args = tuple(sys.argv[1:])
246 logging.basicConfig(level=logging.INFO, stream=sys.stdout, format="%(message)s")
248 load_grammar(gt, save=True, force=True)
252 if __name__ == "__main__":
253 sys.exit(int(not main()))