]> git.madduck.net Git - etc/vim.git/blobdiff - src/blib2to3/pgen2/parse.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

vim: Parse skip_magic_trailing_comma from pyproject.toml (#2613)
[etc/vim.git] / src / blib2to3 / pgen2 / parse.py
index 47c8f02b4f5c8ee081ed70f4ba26f351483095a0..dc405264bad454a92469cb613b6dd962c394e837 100644 (file)
@@ -9,22 +9,31 @@ See Parser/parser.c in the Python distribution for additional info on
 how this parsing engine works.
 
 """
+import copy
+from contextlib import contextmanager
 
 # Local imports
-from . import token
+from . import grammar, token, tokenize
 from typing import (
+    cast,
+    Any,
     Optional,
     Text,
     Union,
     Tuple,
     Dict,
     List,
+    Iterator,
     Callable,
     Set,
+    TYPE_CHECKING,
 )
 from blib2to3.pgen2.grammar import Grammar
 from blib2to3.pytree import NL, Context, RawNode, Leaf, Node
 
+if TYPE_CHECKING:
+    from blib2to3.driver import TokenProxy
+
 
 Results = Dict[Text, NL]
 Convert = Callable[[Grammar, RawNode], Union[Node, Leaf]]
@@ -37,6 +46,61 @@ def lam_sub(grammar: Grammar, node: RawNode) -> NL:
     return Node(type=node[0], children=node[3], context=node[2])
 
 
+class Recorder:
+    def __init__(self, parser: "Parser", ilabels: List[int], context: Context) -> None:
+        self.parser = parser
+        self._ilabels = ilabels
+        self.context = context  # not really matter
+
+        self._dead_ilabels: Set[int] = set()
+        self._start_point = copy.deepcopy(self.parser.stack)
+        self._points = {ilabel: copy.deepcopy(self._start_point) for ilabel in ilabels}
+
+    @property
+    def ilabels(self) -> Set[int]:
+        return self._dead_ilabels.symmetric_difference(self._ilabels)
+
+    @contextmanager
+    def switch_to(self, ilabel: int) -> Iterator[None]:
+        self.parser.stack = self._points[ilabel]
+        try:
+            yield
+        except ParseError:
+            self._dead_ilabels.add(ilabel)
+        finally:
+            self.parser.stack = self._start_point
+
+    def add_token(
+        self, tok_type: int, tok_val: Optional[Text], raw: bool = False
+    ) -> None:
+        func: Callable[..., Any]
+        if raw:
+            func = self.parser._addtoken
+        else:
+            func = self.parser.addtoken
+
+        for ilabel in self.ilabels:
+            with self.switch_to(ilabel):
+                args = [tok_type, tok_val, self.context]
+                if raw:
+                    args.insert(0, ilabel)
+                func(*args)
+
+    def determine_route(
+        self, value: Optional[Text] = None, force: bool = False
+    ) -> Optional[int]:
+        alive_ilabels = self.ilabels
+        if len(alive_ilabels) == 0:
+            *_, most_successful_ilabel = self._dead_ilabels
+            raise ParseError("bad input", most_successful_ilabel, value, self.context)
+
+        ilabel, *rest = alive_ilabels
+        if force or not rest:
+            return ilabel
+        else:
+            return None
+
+
 class ParseError(Exception):
     """Exception to signal the parser is stuck."""
 
@@ -114,7 +178,7 @@ class Parser(object):
         self.grammar = grammar
         self.convert = convert or lam_sub
 
-    def setup(self, start: Optional[int] = None) -> None:
+    def setup(self, proxy: "TokenProxy", start: Optional[int] = None) -> None:
         """Prepare for parsing.
 
         This *must* be called before starting to parse.
@@ -137,11 +201,55 @@ class Parser(object):
         self.stack: List[Tuple[DFAS, int, RawNode]] = [stackentry]
         self.rootnode: Optional[NL] = None
         self.used_names: Set[str] = set()
+        self.proxy = proxy
 
     def addtoken(self, type: int, value: Optional[Text], context: Context) -> bool:
         """Add a token; return True iff this is the end of the program."""
         # Map from token to label
-        ilabel = self.classify(type, value, context)
+        ilabels = self.classify(type, value, context)
+        assert len(ilabels) >= 1
+
+        # If we have only one state to advance, we'll directly
+        # take it as is.
+        if len(ilabels) == 1:
+            [ilabel] = ilabels
+            return self._addtoken(ilabel, type, value, context)
+
+        # If there are multiple states which we can advance (only
+        # happen under soft-keywords), then we will try all of them
+        # in parallel and as soon as one state can reach further than
+        # the rest, we'll choose that one. This is a pretty hacky
+        # and hopefully temporary algorithm.
+        #
+        # For a more detailed explanation, check out this post:
+        # https://tree.science/what-the-backtracking.html
+
+        with self.proxy.release() as proxy:
+            counter, force = 0, False
+            recorder = Recorder(self, ilabels, context)
+            recorder.add_token(type, value, raw=True)
+
+            next_token_value = value
+            while recorder.determine_route(next_token_value) is None:
+                if not proxy.can_advance(counter):
+                    force = True
+                    break
+
+                next_token_type, next_token_value, *_ = proxy.eat(counter)
+                if next_token_type == tokenize.OP:
+                    next_token_type = grammar.opmap[cast(str, next_token_value)]
+
+                recorder.add_token(next_token_type, next_token_value)
+                counter += 1
+
+            ilabel = cast(int, recorder.determine_route(next_token_value, force=force))
+            assert ilabel is not None
+
+        return self._addtoken(ilabel, type, value, context)
+
+    def _addtoken(
+        self, ilabel: int, type: int, value: Optional[Text], context: Context
+    ) -> bool:
         # Loop until the token is shifted; may raise exceptions
         while True:
             dfa, state, node = self.stack[-1]
@@ -185,20 +293,29 @@ class Parser(object):
                     # No success finding a transition
                     raise ParseError("bad input", type, value, context)
 
-    def classify(self, type: int, value: Optional[Text], context: Context) -> int:
-        """Turn a token into a label.  (Internal)"""
+    def classify(self, type: int, value: Optional[Text], context: Context) -> List[int]:
+        """Turn a token into a label.  (Internal)
+
+        Depending on whether the value is a soft-keyword or not,
+        this function may return multiple labels to choose from."""
         if type == token.NAME:
             # Keep a listing of all used names
             assert value is not None
             self.used_names.add(value)
             # Check for reserved words
-            ilabel = self.grammar.keywords.get(value)
-            if ilabel is not None:
-                return ilabel
+            if value in self.grammar.keywords:
+                return [self.grammar.keywords[value]]
+            elif value in self.grammar.soft_keywords:
+                assert type in self.grammar.tokens
+                return [
+                    self.grammar.soft_keywords[value],
+                    self.grammar.tokens[type],
+                ]
+
         ilabel = self.grammar.tokens.get(type)
         if ilabel is None:
             raise ParseError("bad token", type, value, context)
-        return ilabel
+        return [ilabel]
 
     def shift(
         self, type: int, value: Optional[Text], newstate: int, context: Context