]> git.madduck.net Git - etc/vim.git/blobdiff - blib2to3/pgen2/pgen.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

Improve String Handling (#1132)
[etc/vim.git] / blib2to3 / pgen2 / pgen.py
index b0cbd16c4dad4fbf8b7afc8c776786dc7ad997ec..13ec51d1878f51bd73d9d09778dc5a5a4726ea6c 100644 (file)
@@ -4,12 +4,40 @@
 # Pgen imports
 from . import grammar, token, tokenize
 
+from typing import (
+    Any,
+    Dict,
+    IO,
+    Iterable,
+    Iterator,
+    List,
+    Optional,
+    Text,
+    Tuple,
+    Union,
+    Sequence,
+    NoReturn,
+)
+from blib2to3.pgen2 import grammar
+from blib2to3.pgen2.tokenize import GoodTokenInfo
+import os
+
+
+Path = Union[str, "os.PathLike[str]"]
+
+
 class PgenGrammar(grammar.Grammar):
     pass
 
+
 class ParserGenerator(object):
 
-    def __init__(self, filename, stream=None):
+    filename: Path
+    stream: IO[Text]
+    generator: Iterator[GoodTokenInfo]
+    first: Dict[Text, Optional[Dict[Text, int]]]
+
+    def __init__(self, filename: Path, stream: Optional[IO[Text]] = None) -> None:
         close_stream = None
         if stream is None:
             stream = open(filename)
@@ -17,14 +45,14 @@ class ParserGenerator(object):
         self.filename = filename
         self.stream = stream
         self.generator = tokenize.generate_tokens(stream.readline)
-        self.gettoken() # Initialize lookahead
+        self.gettoken()  # Initialize lookahead
         self.dfas, self.startsymbol = self.parse()
         if close_stream is not None:
             close_stream()
-        self.first = {} # map from symbol name to set of tokens
+        self.first = {}  # map from symbol name to set of tokens
         self.addfirstsets()
 
-    def make_grammar(self):
+    def make_grammar(self) -> PgenGrammar:
         c = PgenGrammar()
         names = list(self.dfas.keys())
         names.sort()
@@ -49,8 +77,9 @@ class ParserGenerator(object):
         c.start = c.symbol2number[self.startsymbol]
         return c
 
-    def make_first(self, c, name):
+    def make_first(self, c: PgenGrammar, name: Text) -> Dict[int, int]:
         rawfirst = self.first[name]
+        assert rawfirst is not None
         first = {}
         for label in sorted(rawfirst):
             ilabel = self.make_label(c, label)
@@ -58,7 +87,7 @@ class ParserGenerator(object):
             first[ilabel] = 1
         return first
 
-    def make_label(self, c, label):
+    def make_label(self, c: PgenGrammar, label: Text) -> int:
         # XXX Maybe this should be a method on a subclass of converter?
         ilabel = len(c.labels)
         if label[0].isalpha():
@@ -96,7 +125,7 @@ class ParserGenerator(object):
                     return ilabel
             else:
                 # An operator (any non-numeric token)
-                itoken = grammar.opmap[value] # Fails if unknown token
+                itoken = grammar.opmap[value]  # Fails if unknown token
                 if itoken in c.tokens:
                     return c.tokens[itoken]
                 else:
@@ -104,19 +133,19 @@ class ParserGenerator(object):
                     c.tokens[itoken] = ilabel
                     return ilabel
 
-    def addfirstsets(self):
+    def addfirstsets(self) -> None:
         names = list(self.dfas.keys())
         names.sort()
         for name in names:
             if name not in self.first:
                 self.calcfirst(name)
-            #print name, self.first[name].keys()
+            # print name, self.first[name].keys()
 
-    def calcfirst(self, name):
+    def calcfirst(self, name: Text) -> None:
         dfa = self.dfas[name]
-        self.first[name] = None # dummy to detect left recursion
+        self.first[name] = None  # dummy to detect left recursion
         state = dfa[0]
-        totalset = {}
+        totalset: Dict[str, int] = {}
         overlapcheck = {}
         for label, next in state.arcs.items():
             if label in self.dfas:
@@ -127,24 +156,27 @@ class ParserGenerator(object):
                 else:
                     self.calcfirst(label)
                     fset = self.first[label]
+                    assert fset is not None
                 totalset.update(fset)
                 overlapcheck[label] = fset
             else:
                 totalset[label] = 1
                 overlapcheck[label] = {label: 1}
-        inverse = {}
+        inverse: Dict[str, str] = {}
         for label, itsfirst in overlapcheck.items():
             for symbol in itsfirst:
                 if symbol in inverse:
-                    raise ValueError("rule %s is ambiguous; %s is in the"
-                                     " first sets of %s as well as %s" %
-                                     (name, symbol, label, inverse[symbol]))
+                    raise ValueError(
+                        "rule %s is ambiguous; %s is in the first sets of %s as well"
+                        " as %s"
+                        % (name, symbol, label, inverse[symbol])
+                    )
                 inverse[symbol] = label
         self.first[name] = totalset
 
-    def parse(self):
+    def parse(self) -> Tuple[Dict[Text, List["DFAState"]], Text]:
         dfas = {}
-        startsymbol = None
+        startsymbol: Optional[str] = None
         # MSTART: (NEWLINE | RULE)* ENDMARKER
         while self.type != token.ENDMARKER:
             while self.type == token.NEWLINE:
@@ -154,30 +186,33 @@ class ParserGenerator(object):
             self.expect(token.OP, ":")
             a, z = self.parse_rhs()
             self.expect(token.NEWLINE)
-            #self.dump_nfa(name, a, z)
+            # self.dump_nfa(name, a, z)
             dfa = self.make_dfa(a, z)
-            #self.dump_dfa(name, dfa)
+            # self.dump_dfa(name, dfa)
             oldlen = len(dfa)
             self.simplify_dfa(dfa)
             newlen = len(dfa)
             dfas[name] = dfa
-            #print name, oldlen, newlen
+            # print name, oldlen, newlen
             if startsymbol is None:
                 startsymbol = name
+        assert startsymbol is not None
         return dfas, startsymbol
 
-    def make_dfa(self, start, finish):
+    def make_dfa(self, start: "NFAState", finish: "NFAState") -> List["DFAState"]:
         # To turn an NFA into a DFA, we define the states of the DFA
         # to correspond to *sets* of states of the NFA.  Then do some
         # state reduction.  Let's represent sets as dicts with 1 for
         # values.
         assert isinstance(start, NFAState)
         assert isinstance(finish, NFAState)
-        def closure(state):
-            base = {}
+
+        def closure(state: NFAState) -> Dict[NFAState, int]:
+            base: Dict[NFAState, int] = {}
             addclosure(state, base)
             return base
-        def addclosure(state, base):
+
+        def addclosure(state: NFAState, base: Dict[NFAState, int]) -> None:
             assert isinstance(state, NFAState)
             if state in base:
                 return
@@ -185,9 +220,10 @@ class ParserGenerator(object):
             for label, next in state.arcs:
                 if label is None:
                     addclosure(next, base)
+
         states = [DFAState(closure(start), finish)]
-        for state in states: # NB states grows while we're iterating
-            arcs = {}
+        for state in states:  # NB states grows while we're iterating
+            arcs: Dict[str, Dict[NFAState, int]] = {}
             for nfastate in state.nfaset:
                 for label, next in nfastate.arcs:
                     if label is not None:
@@ -200,9 +236,9 @@ class ParserGenerator(object):
                     st = DFAState(nfaset, finish)
                     states.append(st)
                 state.addarc(st, label)
-        return states # List of DFAState instances; first one is start
+        return states  # List of DFAState instances; first one is start
 
-    def dump_nfa(self, name, start, finish):
+    def dump_nfa(self, name: Text, start: "NFAState", finish: "NFAState") -> None:
         print("Dump of NFA for", name)
         todo = [start]
         for i, state in enumerate(todo):
@@ -218,14 +254,14 @@ class ParserGenerator(object):
                 else:
                     print("    %s -> %d" % (label, j))
 
-    def dump_dfa(self, name, dfa):
+    def dump_dfa(self, name: Text, dfa: Sequence["DFAState"]) -> None:
         print("Dump of DFA for", name)
         for i, state in enumerate(dfa):
             print("  State", i, state.isfinal and "(final)" or "")
             for label, next in sorted(state.arcs.items()):
                 print("    %s -> %d" % (label, dfa.index(next)))
 
-    def simplify_dfa(self, dfa):
+    def simplify_dfa(self, dfa: List["DFAState"]) -> None:
         # This is not theoretically optimal, but works well enough.
         # Algorithm: repeatedly look for two states that have the same
         # set of arcs (same labels pointing to the same nodes) and
@@ -236,17 +272,17 @@ class ParserGenerator(object):
         while changes:
             changes = False
             for i, state_i in enumerate(dfa):
-                for j in range(i+1, len(dfa)):
+                for j in range(i + 1, len(dfa)):
                     state_j = dfa[j]
                     if state_i == state_j:
-                        #print "  unify", i, j
+                        # print "  unify", i, j
                         del dfa[j]
                         for state in dfa:
                             state.unifystate(state_j, state_i)
                         changes = True
                         break
 
-    def parse_rhs(self):
+    def parse_rhs(self) -> Tuple["NFAState", "NFAState"]:
         # RHS: ALT ('|' ALT)*
         a, z = self.parse_alt()
         if self.value != "|":
@@ -263,17 +299,16 @@ class ParserGenerator(object):
                 z.addarc(zz)
             return aa, zz
 
-    def parse_alt(self):
+    def parse_alt(self) -> Tuple["NFAState", "NFAState"]:
         # ALT: ITEM+
         a, b = self.parse_item()
-        while (self.value in ("(", "[") or
-               self.type in (token.NAME, token.STRING)):
+        while self.value in ("(", "[") or self.type in (token.NAME, token.STRING):
             c, d = self.parse_item()
             b.addarc(c)
             b = d
         return a, b
 
-    def parse_item(self):
+    def parse_item(self) -> Tuple["NFAState", "NFAState"]:
         # ITEM: '[' RHS ']' | ATOM ['+' | '*']
         if self.value == "[":
             self.gettoken()
@@ -293,7 +328,7 @@ class ParserGenerator(object):
             else:
                 return a, a
 
-    def parse_atom(self):
+    def parse_atom(self) -> Tuple["NFAState", "NFAState"]:
         # ATOM: '(' RHS ')' | NAME | STRING
         if self.value == "(":
             self.gettoken()
@@ -307,65 +342,73 @@ class ParserGenerator(object):
             self.gettoken()
             return a, z
         else:
-            self.raise_error("expected (...) or NAME or STRING, got %s/%s",
-                             self.type, self.value)
+            self.raise_error(
+                "expected (...) or NAME or STRING, got %s/%s", self.type, self.value
+            )
+            assert False
 
-    def expect(self, type, value=None):
+    def expect(self, type: int, value: Optional[Any] = None) -> Text:
         if self.type != type or (value is not None and self.value != value):
-            self.raise_error("expected %s/%s, got %s/%s",
-                             type, value, self.type, self.value)
+            self.raise_error(
+                "expected %s/%s, got %s/%s", type, value, self.type, self.value
+            )
         value = self.value
         self.gettoken()
         return value
 
-    def gettoken(self):
+    def gettoken(self) -> None:
         tup = next(self.generator)
         while tup[0] in (tokenize.COMMENT, tokenize.NL):
             tup = next(self.generator)
         self.type, self.value, self.begin, self.end, self.line = tup
-        #print token.tok_name[self.type], repr(self.value)
+        # print token.tok_name[self.type], repr(self.value)
 
-    def raise_error(self, msg, *args):
+    def raise_error(self, msg: str, *args: Any) -> NoReturn:
         if args:
             try:
                 msg = msg % args
             except:
                 msg = " ".join([msg] + list(map(str, args)))
-        raise SyntaxError(msg, (self.filename, self.end[0],
-                                self.end[1], self.line))
+        raise SyntaxError(msg, (self.filename, self.end[0], self.end[1], self.line))
+
 
 class NFAState(object):
+    arcs: List[Tuple[Optional[Text], "NFAState"]]
 
-    def __init__(self):
-        self.arcs = [] # list of (label, NFAState) pairs
+    def __init__(self) -> None:
+        self.arcs = []  # list of (label, NFAState) pairs
 
-    def addarc(self, next, label=None):
+    def addarc(self, next: "NFAState", label: Optional[Text] = None) -> None:
         assert label is None or isinstance(label, str)
         assert isinstance(next, NFAState)
         self.arcs.append((label, next))
 
+
 class DFAState(object):
+    nfaset: Dict[NFAState, Any]
+    isfinal: bool
+    arcs: Dict[Text, "DFAState"]
 
-    def __init__(self, nfaset, final):
+    def __init__(self, nfaset: Dict[NFAState, Any], final: NFAState) -> None:
         assert isinstance(nfaset, dict)
         assert isinstance(next(iter(nfaset)), NFAState)
         assert isinstance(final, NFAState)
         self.nfaset = nfaset
         self.isfinal = final in nfaset
-        self.arcs = {} # map from label to DFAState
+        self.arcs = {}  # map from label to DFAState
 
-    def addarc(self, next, label):
+    def addarc(self, next: "DFAState", label: Text) -> None:
         assert isinstance(label, str)
         assert label not in self.arcs
         assert isinstance(next, DFAState)
         self.arcs[label] = next
 
-    def unifystate(self, old, new):
+    def unifystate(self, old: "DFAState", new: "DFAState") -> None:
         for label, next in self.arcs.items():
             if next is old:
                 self.arcs[label] = new
 
-    def __eq__(self, other):
+    def __eq__(self, other: Any) -> bool:
         # Equality test -- ignore the nfaset instance variable
         assert isinstance(other, DFAState)
         if self.isfinal != other.isfinal:
@@ -379,8 +422,9 @@ class DFAState(object):
                 return False
         return True
 
-    __hash__ = None # For Py3 compatibility.
+    __hash__: Any = None  # For Py3 compatibility.
+
 
-def generate_grammar(filename="Grammar.txt"):
+def generate_grammar(filename: Path = "Grammar.txt") -> PgenGrammar:
     p = ParserGenerator(filename)
     return p.make_grammar()