X-Git-Url: https://git.madduck.net/etc/vim.git/blobdiff_plain/2f3fa1f6d0cbc2a3f31c7440c422da173b068e7b:/src/blib2to3/pgen2/pgen.py..dc71922c768e543c9c3bbb1db5ea6d7fa801a814:/.vim/bundle/black/src/blib2to3/pgen2/static/git-logo.png diff --git a/src/blib2to3/pgen2/pgen.py b/src/blib2to3/pgen2/pgen.py deleted file mode 100644 index 564ebbd..0000000 --- a/src/blib2to3/pgen2/pgen.py +++ /dev/null @@ -1,428 +0,0 @@ -# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. -# Licensed to PSF under a Contributor Agreement. - -# Pgen imports -from . import grammar, token, tokenize - -from typing import ( - Any, - Dict, - IO, - Iterator, - List, - Optional, - Text, - Tuple, - Union, - Sequence, - NoReturn, -) -from blib2to3.pgen2 import grammar -from blib2to3.pgen2.tokenize import GoodTokenInfo -import os - - -Path = Union[str, "os.PathLike[str]"] - - -class PgenGrammar(grammar.Grammar): - pass - - -class ParserGenerator(object): - - filename: Path - stream: IO[Text] - generator: Iterator[GoodTokenInfo] - first: Dict[Text, Optional[Dict[Text, int]]] - - def __init__(self, filename: Path, stream: Optional[IO[Text]] = None) -> None: - close_stream = None - if stream is None: - stream = open(filename) - close_stream = stream.close - self.filename = filename - self.stream = stream - self.generator = tokenize.generate_tokens(stream.readline) - self.gettoken() # Initialize lookahead - self.dfas, self.startsymbol = self.parse() - if close_stream is not None: - close_stream() - self.first = {} # map from symbol name to set of tokens - self.addfirstsets() - - def make_grammar(self) -> PgenGrammar: - c = PgenGrammar() - names = list(self.dfas.keys()) - names.sort() - names.remove(self.startsymbol) - names.insert(0, self.startsymbol) - for name in names: - i = 256 + len(c.symbol2number) - c.symbol2number[name] = i - c.number2symbol[i] = name - for name in names: - dfa = self.dfas[name] - states = [] - for state in dfa: - arcs = [] - for label, next in sorted(state.arcs.items()): - arcs.append((self.make_label(c, label), dfa.index(next))) - if state.isfinal: - arcs.append((0, dfa.index(state))) - states.append(arcs) - c.states.append(states) - c.dfas[c.symbol2number[name]] = (states, self.make_first(c, name)) - c.start = c.symbol2number[self.startsymbol] - return c - - def make_first(self, c: PgenGrammar, name: Text) -> Dict[int, int]: - rawfirst = self.first[name] - assert rawfirst is not None - first = {} - for label in sorted(rawfirst): - ilabel = self.make_label(c, label) - ##assert ilabel not in first # XXX failed on <> ... != - first[ilabel] = 1 - return first - - def make_label(self, c: PgenGrammar, label: Text) -> int: - # XXX Maybe this should be a method on a subclass of converter? - ilabel = len(c.labels) - if label[0].isalpha(): - # Either a symbol name or a named token - if label in c.symbol2number: - # A symbol name (a non-terminal) - if label in c.symbol2label: - return c.symbol2label[label] - else: - c.labels.append((c.symbol2number[label], None)) - c.symbol2label[label] = ilabel - return ilabel - else: - # A named token (NAME, NUMBER, STRING) - itoken = getattr(token, label, None) - assert isinstance(itoken, int), label - assert itoken in token.tok_name, label - if itoken in c.tokens: - return c.tokens[itoken] - else: - c.labels.append((itoken, None)) - c.tokens[itoken] = ilabel - return ilabel - else: - # Either a keyword or an operator - assert label[0] in ('"', "'"), label - value = eval(label) - if value[0].isalpha(): - # A keyword - if value in c.keywords: - return c.keywords[value] - else: - c.labels.append((token.NAME, value)) - c.keywords[value] = ilabel - return ilabel - else: - # An operator (any non-numeric token) - itoken = grammar.opmap[value] # Fails if unknown token - if itoken in c.tokens: - return c.tokens[itoken] - else: - c.labels.append((itoken, None)) - c.tokens[itoken] = ilabel - return ilabel - - def addfirstsets(self) -> None: - names = list(self.dfas.keys()) - names.sort() - for name in names: - if name not in self.first: - self.calcfirst(name) - # print name, self.first[name].keys() - - def calcfirst(self, name: Text) -> None: - dfa = self.dfas[name] - self.first[name] = None # dummy to detect left recursion - state = dfa[0] - totalset: Dict[str, int] = {} - overlapcheck = {} - for label, next in state.arcs.items(): - if label in self.dfas: - if label in self.first: - fset = self.first[label] - if fset is None: - raise ValueError("recursion for rule %r" % name) - else: - self.calcfirst(label) - fset = self.first[label] - assert fset is not None - totalset.update(fset) - overlapcheck[label] = fset - else: - totalset[label] = 1 - overlapcheck[label] = {label: 1} - inverse: Dict[str, str] = {} - for label, itsfirst in overlapcheck.items(): - for symbol in itsfirst: - if symbol in inverse: - raise ValueError( - "rule %s is ambiguous; %s is in the first sets of %s as well" - " as %s" % (name, symbol, label, inverse[symbol]) - ) - inverse[symbol] = label - self.first[name] = totalset - - def parse(self) -> Tuple[Dict[Text, List["DFAState"]], Text]: - dfas = {} - startsymbol: Optional[str] = None - # MSTART: (NEWLINE | RULE)* ENDMARKER - while self.type != token.ENDMARKER: - while self.type == token.NEWLINE: - self.gettoken() - # RULE: NAME ':' RHS NEWLINE - name = self.expect(token.NAME) - self.expect(token.OP, ":") - a, z = self.parse_rhs() - self.expect(token.NEWLINE) - # self.dump_nfa(name, a, z) - dfa = self.make_dfa(a, z) - # self.dump_dfa(name, dfa) - oldlen = len(dfa) - self.simplify_dfa(dfa) - newlen = len(dfa) - dfas[name] = dfa - # print name, oldlen, newlen - if startsymbol is None: - startsymbol = name - assert startsymbol is not None - return dfas, startsymbol - - def make_dfa(self, start: "NFAState", finish: "NFAState") -> List["DFAState"]: - # To turn an NFA into a DFA, we define the states of the DFA - # to correspond to *sets* of states of the NFA. Then do some - # state reduction. Let's represent sets as dicts with 1 for - # values. - assert isinstance(start, NFAState) - assert isinstance(finish, NFAState) - - def closure(state: NFAState) -> Dict[NFAState, int]: - base: Dict[NFAState, int] = {} - addclosure(state, base) - return base - - def addclosure(state: NFAState, base: Dict[NFAState, int]) -> None: - assert isinstance(state, NFAState) - if state in base: - return - base[state] = 1 - for label, next in state.arcs: - if label is None: - addclosure(next, base) - - states = [DFAState(closure(start), finish)] - for state in states: # NB states grows while we're iterating - arcs: Dict[str, Dict[NFAState, int]] = {} - for nfastate in state.nfaset: - for label, next in nfastate.arcs: - if label is not None: - addclosure(next, arcs.setdefault(label, {})) - for label, nfaset in sorted(arcs.items()): - for st in states: - if st.nfaset == nfaset: - break - else: - st = DFAState(nfaset, finish) - states.append(st) - state.addarc(st, label) - return states # List of DFAState instances; first one is start - - def dump_nfa(self, name: Text, start: "NFAState", finish: "NFAState") -> None: - print("Dump of NFA for", name) - todo = [start] - for i, state in enumerate(todo): - print(" State", i, state is finish and "(final)" or "") - for label, next in state.arcs: - if next in todo: - j = todo.index(next) - else: - j = len(todo) - todo.append(next) - if label is None: - print(" -> %d" % j) - else: - print(" %s -> %d" % (label, j)) - - def dump_dfa(self, name: Text, dfa: Sequence["DFAState"]) -> None: - print("Dump of DFA for", name) - for i, state in enumerate(dfa): - print(" State", i, state.isfinal and "(final)" or "") - for label, next in sorted(state.arcs.items()): - print(" %s -> %d" % (label, dfa.index(next))) - - def simplify_dfa(self, dfa: List["DFAState"]) -> None: - # This is not theoretically optimal, but works well enough. - # Algorithm: repeatedly look for two states that have the same - # set of arcs (same labels pointing to the same nodes) and - # unify them, until things stop changing. - - # dfa is a list of DFAState instances - changes = True - while changes: - changes = False - for i, state_i in enumerate(dfa): - for j in range(i + 1, len(dfa)): - state_j = dfa[j] - if state_i == state_j: - # print " unify", i, j - del dfa[j] - for state in dfa: - state.unifystate(state_j, state_i) - changes = True - break - - def parse_rhs(self) -> Tuple["NFAState", "NFAState"]: - # RHS: ALT ('|' ALT)* - a, z = self.parse_alt() - if self.value != "|": - return a, z - else: - aa = NFAState() - zz = NFAState() - aa.addarc(a) - z.addarc(zz) - while self.value == "|": - self.gettoken() - a, z = self.parse_alt() - aa.addarc(a) - z.addarc(zz) - return aa, zz - - def parse_alt(self) -> Tuple["NFAState", "NFAState"]: - # ALT: ITEM+ - a, b = self.parse_item() - while self.value in ("(", "[") or self.type in (token.NAME, token.STRING): - c, d = self.parse_item() - b.addarc(c) - b = d - return a, b - - def parse_item(self) -> Tuple["NFAState", "NFAState"]: - # ITEM: '[' RHS ']' | ATOM ['+' | '*'] - if self.value == "[": - self.gettoken() - a, z = self.parse_rhs() - self.expect(token.OP, "]") - a.addarc(z) - return a, z - else: - a, z = self.parse_atom() - value = self.value - if value not in ("+", "*"): - return a, z - self.gettoken() - z.addarc(a) - if value == "+": - return a, z - else: - return a, a - - def parse_atom(self) -> Tuple["NFAState", "NFAState"]: - # ATOM: '(' RHS ')' | NAME | STRING - if self.value == "(": - self.gettoken() - a, z = self.parse_rhs() - self.expect(token.OP, ")") - return a, z - elif self.type in (token.NAME, token.STRING): - a = NFAState() - z = NFAState() - a.addarc(z, self.value) - self.gettoken() - return a, z - else: - self.raise_error( - "expected (...) or NAME or STRING, got %s/%s", self.type, self.value - ) - assert False - - def expect(self, type: int, value: Optional[Any] = None) -> Text: - if self.type != type or (value is not None and self.value != value): - self.raise_error( - "expected %s/%s, got %s/%s", type, value, self.type, self.value - ) - value = self.value - self.gettoken() - return value - - def gettoken(self) -> None: - tup = next(self.generator) - while tup[0] in (tokenize.COMMENT, tokenize.NL): - tup = next(self.generator) - self.type, self.value, self.begin, self.end, self.line = tup - # print token.tok_name[self.type], repr(self.value) - - def raise_error(self, msg: str, *args: Any) -> NoReturn: - if args: - try: - msg = msg % args - except: - msg = " ".join([msg] + list(map(str, args))) - raise SyntaxError(msg, (self.filename, self.end[0], self.end[1], self.line)) - - -class NFAState(object): - arcs: List[Tuple[Optional[Text], "NFAState"]] - - def __init__(self) -> None: - self.arcs = [] # list of (label, NFAState) pairs - - def addarc(self, next: "NFAState", label: Optional[Text] = None) -> None: - assert label is None or isinstance(label, str) - assert isinstance(next, NFAState) - self.arcs.append((label, next)) - - -class DFAState(object): - nfaset: Dict[NFAState, Any] - isfinal: bool - arcs: Dict[Text, "DFAState"] - - def __init__(self, nfaset: Dict[NFAState, Any], final: NFAState) -> None: - assert isinstance(nfaset, dict) - assert isinstance(next(iter(nfaset)), NFAState) - assert isinstance(final, NFAState) - self.nfaset = nfaset - self.isfinal = final in nfaset - self.arcs = {} # map from label to DFAState - - def addarc(self, next: "DFAState", label: Text) -> None: - assert isinstance(label, str) - assert label not in self.arcs - assert isinstance(next, DFAState) - self.arcs[label] = next - - def unifystate(self, old: "DFAState", new: "DFAState") -> None: - for label, next in self.arcs.items(): - if next is old: - self.arcs[label] = new - - def __eq__(self, other: Any) -> bool: - # Equality test -- ignore the nfaset instance variable - assert isinstance(other, DFAState) - if self.isfinal != other.isfinal: - return False - # Can't just return self.arcs == other.arcs, because that - # would invoke this method recursively, with cycles... - if len(self.arcs) != len(other.arcs): - return False - for label, next in self.arcs.items(): - if next is not other.arcs.get(label): - return False - return True - - __hash__: Any = None # For Py3 compatibility. - - -def generate_grammar(filename: Path = "Grammar.txt") -> PgenGrammar: - p = ParserGenerator(filename) - return p.make_grammar()