]> git.madduck.net Git - etc/vim.git/blob - src/blib2to3/pgen2/driver.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

Correctly handle inline tabs in docstrings (#1810)
[etc/vim.git] / src / blib2to3 / pgen2 / driver.py
1 # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
2 # Licensed to PSF under a Contributor Agreement.
3
4 # Modifications:
5 # Copyright 2006 Google, Inc. All Rights Reserved.
6 # Licensed to PSF under a Contributor Agreement.
7
8 """Parser driver.
9
10 This provides a high-level interface to parse a file into a syntax tree.
11
12 """
13
14 __author__ = "Guido van Rossum <guido@python.org>"
15
16 __all__ = ["Driver", "load_grammar"]
17
18 # Python imports
19 import codecs
20 import io
21 import os
22 import logging
23 import pkgutil
24 import sys
25 from typing import (
26     Any,
27     Callable,
28     IO,
29     Iterable,
30     List,
31     Optional,
32     Text,
33     Tuple,
34     Union,
35     Sequence,
36 )
37
38 # Pgen imports
39 from . import grammar, parse, token, tokenize, pgen
40 from logging import Logger
41 from blib2to3.pytree import _Convert, NL
42 from blib2to3.pgen2.grammar import Grammar
43
44 Path = Union[str, "os.PathLike[str]"]
45
46
47 class Driver(object):
48     def __init__(
49         self,
50         grammar: Grammar,
51         convert: Optional[_Convert] = None,
52         logger: Optional[Logger] = None,
53     ) -> None:
54         self.grammar = grammar
55         if logger is None:
56             logger = logging.getLogger(__name__)
57         self.logger = logger
58         self.convert = convert
59
60     def parse_tokens(self, tokens: Iterable[Any], debug: bool = False) -> NL:
61         """Parse a series of tokens and return the syntax tree."""
62         # XXX Move the prefix computation into a wrapper around tokenize.
63         p = parse.Parser(self.grammar, self.convert)
64         p.setup()
65         lineno = 1
66         column = 0
67         indent_columns = []
68         type = value = start = end = line_text = None
69         prefix = ""
70         for quintuple in tokens:
71             type, value, start, end, line_text = quintuple
72             if start != (lineno, column):
73                 assert (lineno, column) <= start, ((lineno, column), start)
74                 s_lineno, s_column = start
75                 if lineno < s_lineno:
76                     prefix += "\n" * (s_lineno - lineno)
77                     lineno = s_lineno
78                     column = 0
79                 if column < s_column:
80                     prefix += line_text[column:s_column]
81                     column = s_column
82             if type in (tokenize.COMMENT, tokenize.NL):
83                 prefix += value
84                 lineno, column = end
85                 if value.endswith("\n"):
86                     lineno += 1
87                     column = 0
88                 continue
89             if type == token.OP:
90                 type = grammar.opmap[value]
91             if debug:
92                 self.logger.debug(
93                     "%s %r (prefix=%r)", token.tok_name[type], value, prefix
94                 )
95             if type == token.INDENT:
96                 indent_columns.append(len(value))
97                 _prefix = prefix + value
98                 prefix = ""
99                 value = ""
100             elif type == token.DEDENT:
101                 _indent_col = indent_columns.pop()
102                 prefix, _prefix = self._partially_consume_prefix(prefix, _indent_col)
103             if p.addtoken(type, value, (prefix, start)):
104                 if debug:
105                     self.logger.debug("Stop.")
106                 break
107             prefix = ""
108             if type in {token.INDENT, token.DEDENT}:
109                 prefix = _prefix
110             lineno, column = end
111             if value.endswith("\n"):
112                 lineno += 1
113                 column = 0
114         else:
115             # We never broke out -- EOF is too soon (how can this happen???)
116             assert start is not None
117             raise parse.ParseError("incomplete input", type, value, (prefix, start))
118         assert p.rootnode is not None
119         return p.rootnode
120
121     def parse_stream_raw(self, stream: IO[Text], debug: bool = False) -> NL:
122         """Parse a stream and return the syntax tree."""
123         tokens = tokenize.generate_tokens(stream.readline, grammar=self.grammar)
124         return self.parse_tokens(tokens, debug)
125
126     def parse_stream(self, stream: IO[Text], debug: bool = False) -> NL:
127         """Parse a stream and return the syntax tree."""
128         return self.parse_stream_raw(stream, debug)
129
130     def parse_file(
131         self, filename: Path, encoding: Optional[Text] = None, debug: bool = False
132     ) -> NL:
133         """Parse a file and return the syntax tree."""
134         with io.open(filename, "r", encoding=encoding) as stream:
135             return self.parse_stream(stream, debug)
136
137     def parse_string(self, text: Text, debug: bool = False) -> NL:
138         """Parse a string and return the syntax tree."""
139         tokens = tokenize.generate_tokens(
140             io.StringIO(text).readline, grammar=self.grammar
141         )
142         return self.parse_tokens(tokens, debug)
143
144     def _partially_consume_prefix(self, prefix: Text, column: int) -> Tuple[Text, Text]:
145         lines: List[str] = []
146         current_line = ""
147         current_column = 0
148         wait_for_nl = False
149         for char in prefix:
150             current_line += char
151             if wait_for_nl:
152                 if char == "\n":
153                     if current_line.strip() and current_column < column:
154                         res = "".join(lines)
155                         return res, prefix[len(res) :]
156
157                     lines.append(current_line)
158                     current_line = ""
159                     current_column = 0
160                     wait_for_nl = False
161             elif char in " \t":
162                 current_column += 1
163             elif char == "\n":
164                 # unexpected empty line
165                 current_column = 0
166             else:
167                 # indent is finished
168                 wait_for_nl = True
169         return "".join(lines), current_line
170
171
172 def _generate_pickle_name(gt: Path, cache_dir: Optional[Path] = None) -> Text:
173     head, tail = os.path.splitext(gt)
174     if tail == ".txt":
175         tail = ""
176     name = head + tail + ".".join(map(str, sys.version_info)) + ".pickle"
177     if cache_dir:
178         return os.path.join(cache_dir, os.path.basename(name))
179     else:
180         return name
181
182
183 def load_grammar(
184     gt: Text = "Grammar.txt",
185     gp: Optional[Text] = None,
186     save: bool = True,
187     force: bool = False,
188     logger: Optional[Logger] = None,
189 ) -> Grammar:
190     """Load the grammar (maybe from a pickle)."""
191     if logger is None:
192         logger = logging.getLogger(__name__)
193     gp = _generate_pickle_name(gt) if gp is None else gp
194     if force or not _newer(gp, gt):
195         logger.info("Generating grammar tables from %s", gt)
196         g: grammar.Grammar = pgen.generate_grammar(gt)
197         if save:
198             logger.info("Writing grammar tables to %s", gp)
199             try:
200                 g.dump(gp)
201             except OSError as e:
202                 logger.info("Writing failed: %s", e)
203     else:
204         g = grammar.Grammar()
205         g.load(gp)
206     return g
207
208
209 def _newer(a: Text, b: Text) -> bool:
210     """Inquire whether file a was written since file b."""
211     if not os.path.exists(a):
212         return False
213     if not os.path.exists(b):
214         return True
215     return os.path.getmtime(a) >= os.path.getmtime(b)
216
217
218 def load_packaged_grammar(
219     package: str, grammar_source: Text, cache_dir: Optional[Path] = None
220 ) -> grammar.Grammar:
221     """Normally, loads a pickled grammar by doing
222         pkgutil.get_data(package, pickled_grammar)
223     where *pickled_grammar* is computed from *grammar_source* by adding the
224     Python version and using a ``.pickle`` extension.
225
226     However, if *grammar_source* is an extant file, load_grammar(grammar_source)
227     is called instead. This facilitates using a packaged grammar file when needed
228     but preserves load_grammar's automatic regeneration behavior when possible.
229
230     """
231     if os.path.isfile(grammar_source):
232         gp = _generate_pickle_name(grammar_source, cache_dir) if cache_dir else None
233         return load_grammar(grammar_source, gp=gp)
234     pickled_name = _generate_pickle_name(os.path.basename(grammar_source), cache_dir)
235     data = pkgutil.get_data(package, pickled_name)
236     assert data is not None
237     g = grammar.Grammar()
238     g.loads(data)
239     return g
240
241
242 def main(*args: Text) -> bool:
243     """Main program, when run as a script: produce grammar pickle files.
244
245     Calls load_grammar for each argument, a path to a grammar text file.
246     """
247     if not args:
248         args = tuple(sys.argv[1:])
249     logging.basicConfig(level=logging.INFO, stream=sys.stdout, format="%(message)s")
250     for gt in args:
251         load_grammar(gt, save=True, force=True)
252     return True
253
254
255 if __name__ == "__main__":
256     sys.exit(int(not main()))