]> git.madduck.net Git - etc/vim.git/blobdiff - blib2to3/pgen2/pgen.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

Add GitHub Actions badge to README.md (#1134)
[etc/vim.git] / blib2to3 / pgen2 / pgen.py
index 1da6925e5466d5022679fbfdac15965e7ee967fc..774cffc5e1cd991271d6e0be9b5e887a30e3d2fa 100644 (file)
@@ -4,13 +4,40 @@
 # Pgen imports
 from . import grammar, token, tokenize
 
+from typing import (
+    Any,
+    Dict,
+    IO,
+    Iterable,
+    Iterator,
+    List,
+    Optional,
+    Text,
+    Tuple,
+    Union,
+    Sequence,
+    NoReturn,
+)
+from blib2to3.pgen2 import grammar
+from blib2to3.pgen2.tokenize import GoodTokenInfo
+import os
+
+
+Path = Union[str, "os.PathLike[str]"]
+
 
 class PgenGrammar(grammar.Grammar):
     pass
 
 
 class ParserGenerator(object):
-    def __init__(self, filename, stream=None):
+
+    filename: Path
+    stream: IO[Text]
+    generator: Iterator[GoodTokenInfo]
+    first: Dict[Text, Optional[Dict[Text, int]]]
+
+    def __init__(self, filename: Path, stream: Optional[IO[Text]] = None) -> None:
         close_stream = None
         if stream is None:
             stream = open(filename)
@@ -25,7 +52,7 @@ class ParserGenerator(object):
         self.first = {}  # map from symbol name to set of tokens
         self.addfirstsets()
 
-    def make_grammar(self):
+    def make_grammar(self) -> PgenGrammar:
         c = PgenGrammar()
         names = list(self.dfas.keys())
         names.sort()
@@ -50,8 +77,9 @@ class ParserGenerator(object):
         c.start = c.symbol2number[self.startsymbol]
         return c
 
-    def make_first(self, c, name):
+    def make_first(self, c: PgenGrammar, name: Text) -> Dict[int, int]:
         rawfirst = self.first[name]
+        assert rawfirst is not None
         first = {}
         for label in sorted(rawfirst):
             ilabel = self.make_label(c, label)
@@ -59,7 +87,7 @@ class ParserGenerator(object):
             first[ilabel] = 1
         return first
 
-    def make_label(self, c, label):
+    def make_label(self, c: PgenGrammar, label: Text) -> int:
         # XXX Maybe this should be a method on a subclass of converter?
         ilabel = len(c.labels)
         if label[0].isalpha():
@@ -105,7 +133,7 @@ class ParserGenerator(object):
                     c.tokens[itoken] = ilabel
                     return ilabel
 
-    def addfirstsets(self):
+    def addfirstsets(self) -> None:
         names = list(self.dfas.keys())
         names.sort()
         for name in names:
@@ -113,11 +141,11 @@ class ParserGenerator(object):
                 self.calcfirst(name)
             # print name, self.first[name].keys()
 
-    def calcfirst(self, name):
+    def calcfirst(self, name: Text) -> None:
         dfa = self.dfas[name]
         self.first[name] = None  # dummy to detect left recursion
         state = dfa[0]
-        totalset = {}
+        totalset: Dict[str, int] = {}
         overlapcheck = {}
         for label, next in state.arcs.items():
             if label in self.dfas:
@@ -128,12 +156,13 @@ class ParserGenerator(object):
                 else:
                     self.calcfirst(label)
                     fset = self.first[label]
+                    assert fset is not None
                 totalset.update(fset)
                 overlapcheck[label] = fset
             else:
                 totalset[label] = 1
                 overlapcheck[label] = {label: 1}
-        inverse = {}
+        inverse: Dict[str, str] = {}
         for label, itsfirst in overlapcheck.items():
             for symbol in itsfirst:
                 if symbol in inverse:
@@ -145,9 +174,9 @@ class ParserGenerator(object):
                 inverse[symbol] = label
         self.first[name] = totalset
 
-    def parse(self):
+    def parse(self) -> Tuple[Dict[Text, List["DFAState"]], Text]:
         dfas = {}
-        startsymbol = None
+        startsymbol: Optional[str] = None
         # MSTART: (NEWLINE | RULE)* ENDMARKER
         while self.type != token.ENDMARKER:
             while self.type == token.NEWLINE:
@@ -167,9 +196,10 @@ class ParserGenerator(object):
             # print name, oldlen, newlen
             if startsymbol is None:
                 startsymbol = name
+        assert startsymbol is not None
         return dfas, startsymbol
 
-    def make_dfa(self, start, finish):
+    def make_dfa(self, start: "NFAState", finish: "NFAState") -> List["DFAState"]:
         # To turn an NFA into a DFA, we define the states of the DFA
         # to correspond to *sets* of states of the NFA.  Then do some
         # state reduction.  Let's represent sets as dicts with 1 for
@@ -177,12 +207,12 @@ class ParserGenerator(object):
         assert isinstance(start, NFAState)
         assert isinstance(finish, NFAState)
 
-        def closure(state):
-            base = {}
+        def closure(state: NFAState) -> Dict[NFAState, int]:
+            base: Dict[NFAState, int] = {}
             addclosure(state, base)
             return base
 
-        def addclosure(state, base):
+        def addclosure(state: NFAState, base: Dict[NFAState, int]) -> None:
             assert isinstance(state, NFAState)
             if state in base:
                 return
@@ -193,7 +223,7 @@ class ParserGenerator(object):
 
         states = [DFAState(closure(start), finish)]
         for state in states:  # NB states grows while we're iterating
-            arcs = {}
+            arcs: Dict[str, Dict[NFAState, int]] = {}
             for nfastate in state.nfaset:
                 for label, next in nfastate.arcs:
                     if label is not None:
@@ -208,7 +238,7 @@ class ParserGenerator(object):
                 state.addarc(st, label)
         return states  # List of DFAState instances; first one is start
 
-    def dump_nfa(self, name, start, finish):
+    def dump_nfa(self, name: Text, start: "NFAState", finish: "NFAState") -> None:
         print("Dump of NFA for", name)
         todo = [start]
         for i, state in enumerate(todo):
@@ -224,14 +254,14 @@ class ParserGenerator(object):
                 else:
                     print("    %s -> %d" % (label, j))
 
-    def dump_dfa(self, name, dfa):
+    def dump_dfa(self, name: Text, dfa: Sequence["DFAState"]) -> None:
         print("Dump of DFA for", name)
         for i, state in enumerate(dfa):
             print("  State", i, state.isfinal and "(final)" or "")
             for label, next in sorted(state.arcs.items()):
                 print("    %s -> %d" % (label, dfa.index(next)))
 
-    def simplify_dfa(self, dfa):
+    def simplify_dfa(self, dfa: List["DFAState"]) -> None:
         # This is not theoretically optimal, but works well enough.
         # Algorithm: repeatedly look for two states that have the same
         # set of arcs (same labels pointing to the same nodes) and
@@ -252,7 +282,7 @@ class ParserGenerator(object):
                         changes = True
                         break
 
-    def parse_rhs(self):
+    def parse_rhs(self) -> Tuple["NFAState", "NFAState"]:
         # RHS: ALT ('|' ALT)*
         a, z = self.parse_alt()
         if self.value != "|":
@@ -269,7 +299,7 @@ class ParserGenerator(object):
                 z.addarc(zz)
             return aa, zz
 
-    def parse_alt(self):
+    def parse_alt(self) -> Tuple["NFAState", "NFAState"]:
         # ALT: ITEM+
         a, b = self.parse_item()
         while self.value in ("(", "[") or self.type in (token.NAME, token.STRING):
@@ -278,7 +308,7 @@ class ParserGenerator(object):
             b = d
         return a, b
 
-    def parse_item(self):
+    def parse_item(self) -> Tuple["NFAState", "NFAState"]:
         # ITEM: '[' RHS ']' | ATOM ['+' | '*']
         if self.value == "[":
             self.gettoken()
@@ -298,7 +328,7 @@ class ParserGenerator(object):
             else:
                 return a, a
 
-    def parse_atom(self):
+    def parse_atom(self) -> Tuple["NFAState", "NFAState"]:
         # ATOM: '(' RHS ')' | NAME | STRING
         if self.value == "(":
             self.gettoken()
@@ -315,8 +345,9 @@ class ParserGenerator(object):
             self.raise_error(
                 "expected (...) or NAME or STRING, got %s/%s", self.type, self.value
             )
+            assert False
 
-    def expect(self, type, value=None):
+    def expect(self, type: int, value: Optional[Any] = None) -> Text:
         if self.type != type or (value is not None and self.value != value):
             self.raise_error(
                 "expected %s/%s, got %s/%s", type, value, self.type, self.value
@@ -325,14 +356,14 @@ class ParserGenerator(object):
         self.gettoken()
         return value
 
-    def gettoken(self):
+    def gettoken(self) -> None:
         tup = next(self.generator)
         while tup[0] in (tokenize.COMMENT, tokenize.NL):
             tup = next(self.generator)
         self.type, self.value, self.begin, self.end, self.line = tup
         # print token.tok_name[self.type], repr(self.value)
 
-    def raise_error(self, msg, *args):
+    def raise_error(self, msg: str, *args: Any) -> NoReturn:
         if args:
             try:
                 msg = msg % args
@@ -342,17 +373,23 @@ class ParserGenerator(object):
 
 
 class NFAState(object):
-    def __init__(self):
+    arcs: List[Tuple[Optional[Text], "NFAState"]]
+
+    def __init__(self) -> None:
         self.arcs = []  # list of (label, NFAState) pairs
 
-    def addarc(self, next, label=None):
+    def addarc(self, next: "NFAState", label: Optional[Text] = None) -> None:
         assert label is None or isinstance(label, str)
         assert isinstance(next, NFAState)
         self.arcs.append((label, next))
 
 
 class DFAState(object):
-    def __init__(self, nfaset, final):
+    nfaset: Dict[NFAState, Any]
+    isfinal: bool
+    arcs: Dict[Text, "DFAState"]
+
+    def __init__(self, nfaset: Dict[NFAState, Any], final: NFAState) -> None:
         assert isinstance(nfaset, dict)
         assert isinstance(next(iter(nfaset)), NFAState)
         assert isinstance(final, NFAState)
@@ -360,18 +397,18 @@ class DFAState(object):
         self.isfinal = final in nfaset
         self.arcs = {}  # map from label to DFAState
 
-    def addarc(self, next, label):
+    def addarc(self, next: "DFAState", label: Text) -> None:
         assert isinstance(label, str)
         assert label not in self.arcs
         assert isinstance(next, DFAState)
         self.arcs[label] = next
 
-    def unifystate(self, old, new):
+    def unifystate(self, old: "DFAState", new: "DFAState") -> None:
         for label, next in self.arcs.items():
             if next is old:
                 self.arcs[label] = new
 
-    def __eq__(self, other):
+    def __eq__(self, other: Any) -> bool:
         # Equality test -- ignore the nfaset instance variable
         assert isinstance(other, DFAState)
         if self.isfinal != other.isfinal:
@@ -385,9 +422,9 @@ class DFAState(object):
                 return False
         return True
 
-    __hash__ = None  # For Py3 compatibility.
+    __hash__: Any = None  # For Py3 compatibility.
 
 
-def generate_grammar(filename="Grammar.txt"):
+def generate_grammar(filename: Path = "Grammar.txt") -> PgenGrammar:
     p = ParserGenerator(filename)
     return p.make_grammar()