X-Git-Url: https://git.madduck.net/etc/vim.git/blobdiff_plain/0ff718e1e2b434477bca134e6c8aa0f67c898cbc..729f2d8cafd1b8e44d7c0a6bd841453ffac01c8e:/blib2to3/pgen2/pgen.py diff --git a/blib2to3/pgen2/pgen.py b/blib2to3/pgen2/pgen.py index 1da6925..774cffc 100644 --- a/blib2to3/pgen2/pgen.py +++ b/blib2to3/pgen2/pgen.py @@ -4,13 +4,40 @@ # Pgen imports from . import grammar, token, tokenize +from typing import ( + Any, + Dict, + IO, + Iterable, + Iterator, + List, + Optional, + Text, + Tuple, + Union, + Sequence, + NoReturn, +) +from blib2to3.pgen2 import grammar +from blib2to3.pgen2.tokenize import GoodTokenInfo +import os + + +Path = Union[str, "os.PathLike[str]"] + class PgenGrammar(grammar.Grammar): pass class ParserGenerator(object): - def __init__(self, filename, stream=None): + + filename: Path + stream: IO[Text] + generator: Iterator[GoodTokenInfo] + first: Dict[Text, Optional[Dict[Text, int]]] + + def __init__(self, filename: Path, stream: Optional[IO[Text]] = None) -> None: close_stream = None if stream is None: stream = open(filename) @@ -25,7 +52,7 @@ class ParserGenerator(object): self.first = {} # map from symbol name to set of tokens self.addfirstsets() - def make_grammar(self): + def make_grammar(self) -> PgenGrammar: c = PgenGrammar() names = list(self.dfas.keys()) names.sort() @@ -50,8 +77,9 @@ class ParserGenerator(object): c.start = c.symbol2number[self.startsymbol] return c - def make_first(self, c, name): + def make_first(self, c: PgenGrammar, name: Text) -> Dict[int, int]: rawfirst = self.first[name] + assert rawfirst is not None first = {} for label in sorted(rawfirst): ilabel = self.make_label(c, label) @@ -59,7 +87,7 @@ class ParserGenerator(object): first[ilabel] = 1 return first - def make_label(self, c, label): + def make_label(self, c: PgenGrammar, label: Text) -> int: # XXX Maybe this should be a method on a subclass of converter? ilabel = len(c.labels) if label[0].isalpha(): @@ -105,7 +133,7 @@ class ParserGenerator(object): c.tokens[itoken] = ilabel return ilabel - def addfirstsets(self): + def addfirstsets(self) -> None: names = list(self.dfas.keys()) names.sort() for name in names: @@ -113,11 +141,11 @@ class ParserGenerator(object): self.calcfirst(name) # print name, self.first[name].keys() - def calcfirst(self, name): + def calcfirst(self, name: Text) -> None: dfa = self.dfas[name] self.first[name] = None # dummy to detect left recursion state = dfa[0] - totalset = {} + totalset: Dict[str, int] = {} overlapcheck = {} for label, next in state.arcs.items(): if label in self.dfas: @@ -128,12 +156,13 @@ class ParserGenerator(object): else: self.calcfirst(label) fset = self.first[label] + assert fset is not None totalset.update(fset) overlapcheck[label] = fset else: totalset[label] = 1 overlapcheck[label] = {label: 1} - inverse = {} + inverse: Dict[str, str] = {} for label, itsfirst in overlapcheck.items(): for symbol in itsfirst: if symbol in inverse: @@ -145,9 +174,9 @@ class ParserGenerator(object): inverse[symbol] = label self.first[name] = totalset - def parse(self): + def parse(self) -> Tuple[Dict[Text, List["DFAState"]], Text]: dfas = {} - startsymbol = None + startsymbol: Optional[str] = None # MSTART: (NEWLINE | RULE)* ENDMARKER while self.type != token.ENDMARKER: while self.type == token.NEWLINE: @@ -167,9 +196,10 @@ class ParserGenerator(object): # print name, oldlen, newlen if startsymbol is None: startsymbol = name + assert startsymbol is not None return dfas, startsymbol - def make_dfa(self, start, finish): + def make_dfa(self, start: "NFAState", finish: "NFAState") -> List["DFAState"]: # To turn an NFA into a DFA, we define the states of the DFA # to correspond to *sets* of states of the NFA. Then do some # state reduction. Let's represent sets as dicts with 1 for @@ -177,12 +207,12 @@ class ParserGenerator(object): assert isinstance(start, NFAState) assert isinstance(finish, NFAState) - def closure(state): - base = {} + def closure(state: NFAState) -> Dict[NFAState, int]: + base: Dict[NFAState, int] = {} addclosure(state, base) return base - def addclosure(state, base): + def addclosure(state: NFAState, base: Dict[NFAState, int]) -> None: assert isinstance(state, NFAState) if state in base: return @@ -193,7 +223,7 @@ class ParserGenerator(object): states = [DFAState(closure(start), finish)] for state in states: # NB states grows while we're iterating - arcs = {} + arcs: Dict[str, Dict[NFAState, int]] = {} for nfastate in state.nfaset: for label, next in nfastate.arcs: if label is not None: @@ -208,7 +238,7 @@ class ParserGenerator(object): state.addarc(st, label) return states # List of DFAState instances; first one is start - def dump_nfa(self, name, start, finish): + def dump_nfa(self, name: Text, start: "NFAState", finish: "NFAState") -> None: print("Dump of NFA for", name) todo = [start] for i, state in enumerate(todo): @@ -224,14 +254,14 @@ class ParserGenerator(object): else: print(" %s -> %d" % (label, j)) - def dump_dfa(self, name, dfa): + def dump_dfa(self, name: Text, dfa: Sequence["DFAState"]) -> None: print("Dump of DFA for", name) for i, state in enumerate(dfa): print(" State", i, state.isfinal and "(final)" or "") for label, next in sorted(state.arcs.items()): print(" %s -> %d" % (label, dfa.index(next))) - def simplify_dfa(self, dfa): + def simplify_dfa(self, dfa: List["DFAState"]) -> None: # This is not theoretically optimal, but works well enough. # Algorithm: repeatedly look for two states that have the same # set of arcs (same labels pointing to the same nodes) and @@ -252,7 +282,7 @@ class ParserGenerator(object): changes = True break - def parse_rhs(self): + def parse_rhs(self) -> Tuple["NFAState", "NFAState"]: # RHS: ALT ('|' ALT)* a, z = self.parse_alt() if self.value != "|": @@ -269,7 +299,7 @@ class ParserGenerator(object): z.addarc(zz) return aa, zz - def parse_alt(self): + def parse_alt(self) -> Tuple["NFAState", "NFAState"]: # ALT: ITEM+ a, b = self.parse_item() while self.value in ("(", "[") or self.type in (token.NAME, token.STRING): @@ -278,7 +308,7 @@ class ParserGenerator(object): b = d return a, b - def parse_item(self): + def parse_item(self) -> Tuple["NFAState", "NFAState"]: # ITEM: '[' RHS ']' | ATOM ['+' | '*'] if self.value == "[": self.gettoken() @@ -298,7 +328,7 @@ class ParserGenerator(object): else: return a, a - def parse_atom(self): + def parse_atom(self) -> Tuple["NFAState", "NFAState"]: # ATOM: '(' RHS ')' | NAME | STRING if self.value == "(": self.gettoken() @@ -315,8 +345,9 @@ class ParserGenerator(object): self.raise_error( "expected (...) or NAME or STRING, got %s/%s", self.type, self.value ) + assert False - def expect(self, type, value=None): + def expect(self, type: int, value: Optional[Any] = None) -> Text: if self.type != type or (value is not None and self.value != value): self.raise_error( "expected %s/%s, got %s/%s", type, value, self.type, self.value @@ -325,14 +356,14 @@ class ParserGenerator(object): self.gettoken() return value - def gettoken(self): + def gettoken(self) -> None: tup = next(self.generator) while tup[0] in (tokenize.COMMENT, tokenize.NL): tup = next(self.generator) self.type, self.value, self.begin, self.end, self.line = tup # print token.tok_name[self.type], repr(self.value) - def raise_error(self, msg, *args): + def raise_error(self, msg: str, *args: Any) -> NoReturn: if args: try: msg = msg % args @@ -342,17 +373,23 @@ class ParserGenerator(object): class NFAState(object): - def __init__(self): + arcs: List[Tuple[Optional[Text], "NFAState"]] + + def __init__(self) -> None: self.arcs = [] # list of (label, NFAState) pairs - def addarc(self, next, label=None): + def addarc(self, next: "NFAState", label: Optional[Text] = None) -> None: assert label is None or isinstance(label, str) assert isinstance(next, NFAState) self.arcs.append((label, next)) class DFAState(object): - def __init__(self, nfaset, final): + nfaset: Dict[NFAState, Any] + isfinal: bool + arcs: Dict[Text, "DFAState"] + + def __init__(self, nfaset: Dict[NFAState, Any], final: NFAState) -> None: assert isinstance(nfaset, dict) assert isinstance(next(iter(nfaset)), NFAState) assert isinstance(final, NFAState) @@ -360,18 +397,18 @@ class DFAState(object): self.isfinal = final in nfaset self.arcs = {} # map from label to DFAState - def addarc(self, next, label): + def addarc(self, next: "DFAState", label: Text) -> None: assert isinstance(label, str) assert label not in self.arcs assert isinstance(next, DFAState) self.arcs[label] = next - def unifystate(self, old, new): + def unifystate(self, old: "DFAState", new: "DFAState") -> None: for label, next in self.arcs.items(): if next is old: self.arcs[label] = new - def __eq__(self, other): + def __eq__(self, other: Any) -> bool: # Equality test -- ignore the nfaset instance variable assert isinstance(other, DFAState) if self.isfinal != other.isfinal: @@ -385,9 +422,9 @@ class DFAState(object): return False return True - __hash__ = None # For Py3 compatibility. + __hash__: Any = None # For Py3 compatibility. -def generate_grammar(filename="Grammar.txt"): +def generate_grammar(filename: Path = "Grammar.txt") -> PgenGrammar: p = ParserGenerator(filename) return p.make_grammar()