X-Git-Url: https://git.madduck.net/etc/vim.git/blobdiff_plain/e74117f172e29e8a980e2c9de929ad50d3769150..0721f11731b64b1b134f3cb7c227a6477044e8fa:/blib2to3/pgen2/pgen.py diff --git a/blib2to3/pgen2/pgen.py b/blib2to3/pgen2/pgen.py index b0cbd16..13ec51d 100644 --- a/blib2to3/pgen2/pgen.py +++ b/blib2to3/pgen2/pgen.py @@ -4,12 +4,40 @@ # Pgen imports from . import grammar, token, tokenize +from typing import ( + Any, + Dict, + IO, + Iterable, + Iterator, + List, + Optional, + Text, + Tuple, + Union, + Sequence, + NoReturn, +) +from blib2to3.pgen2 import grammar +from blib2to3.pgen2.tokenize import GoodTokenInfo +import os + + +Path = Union[str, "os.PathLike[str]"] + + class PgenGrammar(grammar.Grammar): pass + class ParserGenerator(object): - def __init__(self, filename, stream=None): + filename: Path + stream: IO[Text] + generator: Iterator[GoodTokenInfo] + first: Dict[Text, Optional[Dict[Text, int]]] + + def __init__(self, filename: Path, stream: Optional[IO[Text]] = None) -> None: close_stream = None if stream is None: stream = open(filename) @@ -17,14 +45,14 @@ class ParserGenerator(object): self.filename = filename self.stream = stream self.generator = tokenize.generate_tokens(stream.readline) - self.gettoken() # Initialize lookahead + self.gettoken() # Initialize lookahead self.dfas, self.startsymbol = self.parse() if close_stream is not None: close_stream() - self.first = {} # map from symbol name to set of tokens + self.first = {} # map from symbol name to set of tokens self.addfirstsets() - def make_grammar(self): + def make_grammar(self) -> PgenGrammar: c = PgenGrammar() names = list(self.dfas.keys()) names.sort() @@ -49,8 +77,9 @@ class ParserGenerator(object): c.start = c.symbol2number[self.startsymbol] return c - def make_first(self, c, name): + def make_first(self, c: PgenGrammar, name: Text) -> Dict[int, int]: rawfirst = self.first[name] + assert rawfirst is not None first = {} for label in sorted(rawfirst): ilabel = self.make_label(c, label) @@ -58,7 +87,7 @@ class ParserGenerator(object): first[ilabel] = 1 return first - def make_label(self, c, label): + def make_label(self, c: PgenGrammar, label: Text) -> int: # XXX Maybe this should be a method on a subclass of converter? ilabel = len(c.labels) if label[0].isalpha(): @@ -96,7 +125,7 @@ class ParserGenerator(object): return ilabel else: # An operator (any non-numeric token) - itoken = grammar.opmap[value] # Fails if unknown token + itoken = grammar.opmap[value] # Fails if unknown token if itoken in c.tokens: return c.tokens[itoken] else: @@ -104,19 +133,19 @@ class ParserGenerator(object): c.tokens[itoken] = ilabel return ilabel - def addfirstsets(self): + def addfirstsets(self) -> None: names = list(self.dfas.keys()) names.sort() for name in names: if name not in self.first: self.calcfirst(name) - #print name, self.first[name].keys() + # print name, self.first[name].keys() - def calcfirst(self, name): + def calcfirst(self, name: Text) -> None: dfa = self.dfas[name] - self.first[name] = None # dummy to detect left recursion + self.first[name] = None # dummy to detect left recursion state = dfa[0] - totalset = {} + totalset: Dict[str, int] = {} overlapcheck = {} for label, next in state.arcs.items(): if label in self.dfas: @@ -127,24 +156,27 @@ class ParserGenerator(object): else: self.calcfirst(label) fset = self.first[label] + assert fset is not None totalset.update(fset) overlapcheck[label] = fset else: totalset[label] = 1 overlapcheck[label] = {label: 1} - inverse = {} + inverse: Dict[str, str] = {} for label, itsfirst in overlapcheck.items(): for symbol in itsfirst: if symbol in inverse: - raise ValueError("rule %s is ambiguous; %s is in the" - " first sets of %s as well as %s" % - (name, symbol, label, inverse[symbol])) + raise ValueError( + "rule %s is ambiguous; %s is in the first sets of %s as well" + " as %s" + % (name, symbol, label, inverse[symbol]) + ) inverse[symbol] = label self.first[name] = totalset - def parse(self): + def parse(self) -> Tuple[Dict[Text, List["DFAState"]], Text]: dfas = {} - startsymbol = None + startsymbol: Optional[str] = None # MSTART: (NEWLINE | RULE)* ENDMARKER while self.type != token.ENDMARKER: while self.type == token.NEWLINE: @@ -154,30 +186,33 @@ class ParserGenerator(object): self.expect(token.OP, ":") a, z = self.parse_rhs() self.expect(token.NEWLINE) - #self.dump_nfa(name, a, z) + # self.dump_nfa(name, a, z) dfa = self.make_dfa(a, z) - #self.dump_dfa(name, dfa) + # self.dump_dfa(name, dfa) oldlen = len(dfa) self.simplify_dfa(dfa) newlen = len(dfa) dfas[name] = dfa - #print name, oldlen, newlen + # print name, oldlen, newlen if startsymbol is None: startsymbol = name + assert startsymbol is not None return dfas, startsymbol - def make_dfa(self, start, finish): + def make_dfa(self, start: "NFAState", finish: "NFAState") -> List["DFAState"]: # To turn an NFA into a DFA, we define the states of the DFA # to correspond to *sets* of states of the NFA. Then do some # state reduction. Let's represent sets as dicts with 1 for # values. assert isinstance(start, NFAState) assert isinstance(finish, NFAState) - def closure(state): - base = {} + + def closure(state: NFAState) -> Dict[NFAState, int]: + base: Dict[NFAState, int] = {} addclosure(state, base) return base - def addclosure(state, base): + + def addclosure(state: NFAState, base: Dict[NFAState, int]) -> None: assert isinstance(state, NFAState) if state in base: return @@ -185,9 +220,10 @@ class ParserGenerator(object): for label, next in state.arcs: if label is None: addclosure(next, base) + states = [DFAState(closure(start), finish)] - for state in states: # NB states grows while we're iterating - arcs = {} + for state in states: # NB states grows while we're iterating + arcs: Dict[str, Dict[NFAState, int]] = {} for nfastate in state.nfaset: for label, next in nfastate.arcs: if label is not None: @@ -200,9 +236,9 @@ class ParserGenerator(object): st = DFAState(nfaset, finish) states.append(st) state.addarc(st, label) - return states # List of DFAState instances; first one is start + return states # List of DFAState instances; first one is start - def dump_nfa(self, name, start, finish): + def dump_nfa(self, name: Text, start: "NFAState", finish: "NFAState") -> None: print("Dump of NFA for", name) todo = [start] for i, state in enumerate(todo): @@ -218,14 +254,14 @@ class ParserGenerator(object): else: print(" %s -> %d" % (label, j)) - def dump_dfa(self, name, dfa): + def dump_dfa(self, name: Text, dfa: Sequence["DFAState"]) -> None: print("Dump of DFA for", name) for i, state in enumerate(dfa): print(" State", i, state.isfinal and "(final)" or "") for label, next in sorted(state.arcs.items()): print(" %s -> %d" % (label, dfa.index(next))) - def simplify_dfa(self, dfa): + def simplify_dfa(self, dfa: List["DFAState"]) -> None: # This is not theoretically optimal, but works well enough. # Algorithm: repeatedly look for two states that have the same # set of arcs (same labels pointing to the same nodes) and @@ -236,17 +272,17 @@ class ParserGenerator(object): while changes: changes = False for i, state_i in enumerate(dfa): - for j in range(i+1, len(dfa)): + for j in range(i + 1, len(dfa)): state_j = dfa[j] if state_i == state_j: - #print " unify", i, j + # print " unify", i, j del dfa[j] for state in dfa: state.unifystate(state_j, state_i) changes = True break - def parse_rhs(self): + def parse_rhs(self) -> Tuple["NFAState", "NFAState"]: # RHS: ALT ('|' ALT)* a, z = self.parse_alt() if self.value != "|": @@ -263,17 +299,16 @@ class ParserGenerator(object): z.addarc(zz) return aa, zz - def parse_alt(self): + def parse_alt(self) -> Tuple["NFAState", "NFAState"]: # ALT: ITEM+ a, b = self.parse_item() - while (self.value in ("(", "[") or - self.type in (token.NAME, token.STRING)): + while self.value in ("(", "[") or self.type in (token.NAME, token.STRING): c, d = self.parse_item() b.addarc(c) b = d return a, b - def parse_item(self): + def parse_item(self) -> Tuple["NFAState", "NFAState"]: # ITEM: '[' RHS ']' | ATOM ['+' | '*'] if self.value == "[": self.gettoken() @@ -293,7 +328,7 @@ class ParserGenerator(object): else: return a, a - def parse_atom(self): + def parse_atom(self) -> Tuple["NFAState", "NFAState"]: # ATOM: '(' RHS ')' | NAME | STRING if self.value == "(": self.gettoken() @@ -307,65 +342,73 @@ class ParserGenerator(object): self.gettoken() return a, z else: - self.raise_error("expected (...) or NAME or STRING, got %s/%s", - self.type, self.value) + self.raise_error( + "expected (...) or NAME or STRING, got %s/%s", self.type, self.value + ) + assert False - def expect(self, type, value=None): + def expect(self, type: int, value: Optional[Any] = None) -> Text: if self.type != type or (value is not None and self.value != value): - self.raise_error("expected %s/%s, got %s/%s", - type, value, self.type, self.value) + self.raise_error( + "expected %s/%s, got %s/%s", type, value, self.type, self.value + ) value = self.value self.gettoken() return value - def gettoken(self): + def gettoken(self) -> None: tup = next(self.generator) while tup[0] in (tokenize.COMMENT, tokenize.NL): tup = next(self.generator) self.type, self.value, self.begin, self.end, self.line = tup - #print token.tok_name[self.type], repr(self.value) + # print token.tok_name[self.type], repr(self.value) - def raise_error(self, msg, *args): + def raise_error(self, msg: str, *args: Any) -> NoReturn: if args: try: msg = msg % args except: msg = " ".join([msg] + list(map(str, args))) - raise SyntaxError(msg, (self.filename, self.end[0], - self.end[1], self.line)) + raise SyntaxError(msg, (self.filename, self.end[0], self.end[1], self.line)) + class NFAState(object): + arcs: List[Tuple[Optional[Text], "NFAState"]] - def __init__(self): - self.arcs = [] # list of (label, NFAState) pairs + def __init__(self) -> None: + self.arcs = [] # list of (label, NFAState) pairs - def addarc(self, next, label=None): + def addarc(self, next: "NFAState", label: Optional[Text] = None) -> None: assert label is None or isinstance(label, str) assert isinstance(next, NFAState) self.arcs.append((label, next)) + class DFAState(object): + nfaset: Dict[NFAState, Any] + isfinal: bool + arcs: Dict[Text, "DFAState"] - def __init__(self, nfaset, final): + def __init__(self, nfaset: Dict[NFAState, Any], final: NFAState) -> None: assert isinstance(nfaset, dict) assert isinstance(next(iter(nfaset)), NFAState) assert isinstance(final, NFAState) self.nfaset = nfaset self.isfinal = final in nfaset - self.arcs = {} # map from label to DFAState + self.arcs = {} # map from label to DFAState - def addarc(self, next, label): + def addarc(self, next: "DFAState", label: Text) -> None: assert isinstance(label, str) assert label not in self.arcs assert isinstance(next, DFAState) self.arcs[label] = next - def unifystate(self, old, new): + def unifystate(self, old: "DFAState", new: "DFAState") -> None: for label, next in self.arcs.items(): if next is old: self.arcs[label] = new - def __eq__(self, other): + def __eq__(self, other: Any) -> bool: # Equality test -- ignore the nfaset instance variable assert isinstance(other, DFAState) if self.isfinal != other.isfinal: @@ -379,8 +422,9 @@ class DFAState(object): return False return True - __hash__ = None # For Py3 compatibility. + __hash__: Any = None # For Py3 compatibility. + -def generate_grammar(filename="Grammar.txt"): +def generate_grammar(filename: Path = "Grammar.txt") -> PgenGrammar: p = ParserGenerator(filename) return p.make_grammar()