]> git.madduck.net Git - etc/vim.git/blobdiff - blib2to3/pgen2/driver.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

Skip the broken version of regex (#1209)
[etc/vim.git] / blib2to3 / pgen2 / driver.py
index 2265c26443d2f45280112653f5f25eda59360b07..052c94883cf5ae6eca9c8676d95ebc50586cd1c6 100644 (file)
@@ -22,21 +22,42 @@ import os
 import logging
 import pkgutil
 import sys
+from typing import (
+    Any,
+    Callable,
+    IO,
+    Iterable,
+    List,
+    Optional,
+    Text,
+    Tuple,
+    Union,
+    Sequence,
+)
 
 # Pgen imports
 from . import grammar, parse, token, tokenize, pgen
+from logging import Logger
+from blib2to3.pytree import _Convert, NL
+from blib2to3.pgen2.grammar import Grammar
 
+Path = Union[str, "os.PathLike[str]"]
 
-class Driver(object):
 
-    def __init__(self, grammar, convert=None, logger=None):
+class Driver(object):
+    def __init__(
+        self,
+        grammar: Grammar,
+        convert: Optional[_Convert] = None,
+        logger: Optional[Logger] = None,
+    ) -> None:
         self.grammar = grammar
         if logger is None:
-            logger = logging.getLogger()
+            logger = logging.getLogger(__name__)
         self.logger = logger
         self.convert = convert
 
-    def parse_tokens(self, tokens, debug=False):
+    def parse_tokens(self, tokens: Iterable[Any], debug: bool = False) -> NL:
         """Parse a series of tokens and return the syntax tree."""
         # XXX Move the prefix computation into a wrapper around tokenize.
         p = parse.Parser(self.grammar, self.convert)
@@ -68,26 +89,22 @@ class Driver(object):
             if type == token.OP:
                 type = grammar.opmap[value]
             if debug:
-                self.logger.debug("%s %r (prefix=%r)",
-                                  token.tok_name[type], value, prefix)
-            if type in {token.INDENT, token.DEDENT}:
-                _prefix = prefix
+                self.logger.debug(
+                    "%s %r (prefix=%r)", token.tok_name[type], value, prefix
+                )
+            if type == token.INDENT:
+                indent_columns.append(len(value))
+                _prefix = prefix + value
                 prefix = ""
-            if type == token.DEDENT:
+                value = ""
+            elif type == token.DEDENT:
                 _indent_col = indent_columns.pop()
-                prefix, _prefix = self._partially_consume_prefix(_prefix, _indent_col)
+                prefix, _prefix = self._partially_consume_prefix(prefix, _indent_col)
             if p.addtoken(type, value, (prefix, start)):
                 if debug:
                     self.logger.debug("Stop.")
                 break
             prefix = ""
-            if type == token.INDENT:
-                indent_columns.append(len(value))
-                if _prefix.startswith(value):
-                    # Don't double-indent.  Since we're delaying the prefix that
-                    # would normally belong to INDENT, we need to put the value
-                    # at the end versus at the beginning.
-                    _prefix = _prefix[len(value):] + value
             if type in {token.INDENT, token.DEDENT}:
                 prefix = _prefix
             lineno, column = end
@@ -96,75 +113,87 @@ class Driver(object):
                 column = 0
         else:
             # We never broke out -- EOF is too soon (how can this happen???)
-            raise parse.ParseError("incomplete input",
-                                   type, value, (prefix, start))
+            assert start is not None
+            raise parse.ParseError("incomplete input", type, value, (prefix, start))
+        assert p.rootnode is not None
         return p.rootnode
 
-    def parse_stream_raw(self, stream, debug=False):
+    def parse_stream_raw(self, stream: IO[Text], debug: bool = False) -> NL:
         """Parse a stream and return the syntax tree."""
-        tokens = tokenize.generate_tokens(stream.readline)
+        tokens = tokenize.generate_tokens(stream.readline, grammar=self.grammar)
         return self.parse_tokens(tokens, debug)
 
-    def parse_stream(self, stream, debug=False):
+    def parse_stream(self, stream: IO[Text], debug: bool = False) -> NL:
         """Parse a stream and return the syntax tree."""
         return self.parse_stream_raw(stream, debug)
 
-    def parse_file(self, filename, encoding=None, debug=False):
+    def parse_file(
+        self, filename: Path, encoding: Optional[Text] = None, debug: bool = False,
+    ) -> NL:
         """Parse a file and return the syntax tree."""
         with io.open(filename, "r", encoding=encoding) as stream:
             return self.parse_stream(stream, debug)
 
-    def parse_string(self, text, debug=False):
+    def parse_string(self, text: Text, debug: bool = False) -> NL:
         """Parse a string and return the syntax tree."""
-        tokens = tokenize.generate_tokens(io.StringIO(text).readline)
+        tokens = tokenize.generate_tokens(
+            io.StringIO(text).readline, grammar=self.grammar
+        )
         return self.parse_tokens(tokens, debug)
 
-    def _partially_consume_prefix(self, prefix, column):
-        lines = []
+    def _partially_consume_prefix(self, prefix: Text, column: int) -> Tuple[Text, Text]:
+        lines: List[str] = []
         current_line = ""
         current_column = 0
         wait_for_nl = False
         for char in prefix:
             current_line += char
             if wait_for_nl:
-                if char == '\n':
+                if char == "\n":
                     if current_line.strip() and current_column < column:
-                        res = ''.join(lines)
-                        return res, prefix[len(res):]
+                        res = "".join(lines)
+                        return res, prefix[len(res) :]
 
                     lines.append(current_line)
                     current_line = ""
                     current_column = 0
                     wait_for_nl = False
-            elif char == ' ':
+            elif char in " \t":
                 current_column += 1
-            elif char == '\t':
-                current_column += 4
-            elif char == '\n':
-                # enexpected empty line
+            elif char == "\n":
+                # unexpected empty line
                 current_column = 0
             else:
                 # indent is finished
                 wait_for_nl = True
-        return ''.join(lines), current_line
+        return "".join(lines), current_line
 
 
-def _generate_pickle_name(gt):
+def _generate_pickle_name(gt: Path, cache_dir: Optional[Path] = None) -> Text:
     head, tail = os.path.splitext(gt)
     if tail == ".txt":
         tail = ""
-    return head + tail + ".".join(map(str, sys.version_info)) + ".pickle"
+    name = head + tail + ".".join(map(str, sys.version_info)) + ".pickle"
+    if cache_dir:
+        return os.path.join(cache_dir, os.path.basename(name))
+    else:
+        return name
 
 
-def load_grammar(gt="Grammar.txt", gp=None,
-                 save=True, force=False, logger=None):
+def load_grammar(
+    gt: Text = "Grammar.txt",
+    gp: Optional[Text] = None,
+    save: bool = True,
+    force: bool = False,
+    logger: Optional[Logger] = None,
+) -> Grammar:
     """Load the grammar (maybe from a pickle)."""
     if logger is None:
-        logger = logging.getLogger()
+        logger = logging.getLogger(__name__)
     gp = _generate_pickle_name(gt) if gp is None else gp
     if force or not _newer(gp, gt):
         logger.info("Generating grammar tables from %s", gt)
-        g = pgen.generate_grammar(gt)
+        g: grammar.Grammar = pgen.generate_grammar(gt)
         if save:
             logger.info("Writing grammar tables to %s", gp)
             try:
@@ -177,7 +206,7 @@ def load_grammar(gt="Grammar.txt", gp=None,
     return g
 
 
-def _newer(a, b):
+def _newer(a: Text, b: Text) -> bool:
     """Inquire whether file a was written since file b."""
     if not os.path.exists(a):
         return False
@@ -186,7 +215,9 @@ def _newer(a, b):
     return os.path.getmtime(a) >= os.path.getmtime(b)
 
 
-def load_packaged_grammar(package, grammar_source):
+def load_packaged_grammar(
+    package: str, grammar_source: Text, cache_dir: Optional[Path] = None
+) -> grammar.Grammar:
     """Normally, loads a pickled grammar by doing
         pkgutil.get_data(package, pickled_grammar)
     where *pickled_grammar* is computed from *grammar_source* by adding the
@@ -198,26 +229,28 @@ def load_packaged_grammar(package, grammar_source):
 
     """
     if os.path.isfile(grammar_source):
-        return load_grammar(grammar_source)
-    pickled_name = _generate_pickle_name(os.path.basename(grammar_source))
+        gp = _generate_pickle_name(grammar_source, cache_dir) if cache_dir else None
+        return load_grammar(grammar_source, gp=gp)
+    pickled_name = _generate_pickle_name(os.path.basename(grammar_source), cache_dir)
     data = pkgutil.get_data(package, pickled_name)
+    assert data is not None
     g = grammar.Grammar()
     g.loads(data)
     return g
 
 
-def main(*args):
+def main(*args: Text) -> bool:
     """Main program, when run as a script: produce grammar pickle files.
 
     Calls load_grammar for each argument, a path to a grammar text file.
     """
     if not args:
-        args = sys.argv[1:]
-    logging.basicConfig(level=logging.INFO, stream=sys.stdout,
-                        format='%(message)s')
+        args = tuple(sys.argv[1:])
+    logging.basicConfig(level=logging.INFO, stream=sys.stdout, format="%(message)s")
     for gt in args:
         load_grammar(gt, save=True, force=True)
     return True
 
+
 if __name__ == "__main__":
     sys.exit(int(not main()))