X-Git-Url: https://git.madduck.net/etc/vim.git/blobdiff_plain/e74117f172e29e8a980e2c9de929ad50d3769150..c8ca6b2b9ff3510bee12129824cebfc2fc51e5b2:/blib2to3/pgen2/grammar.py diff --git a/blib2to3/pgen2/grammar.py b/blib2to3/pgen2/grammar.py index 088c58b..2882cda 100644 --- a/blib2to3/pgen2/grammar.py +++ b/blib2to3/pgen2/grammar.py @@ -13,12 +13,20 @@ fallback token code OP, but the parser needs the actual token code. """ # Python imports -import collections +import os import pickle +import tempfile +from typing import Any, Dict, List, Optional, Text, Tuple, TypeVar, Union # Local imports from . import token +_P = TypeVar("_P", bound="Grammar") +Label = Tuple[int, Optional[Text]] +DFA = List[List[Tuple[int, int]]] +DFAS = Tuple[DFA, Dict[int, int]] +Path = Union[str, "os.PathLike[str]"] + class Grammar(object): """Pgen parsing tables conversion class. @@ -74,60 +82,74 @@ class Grammar(object): """ - def __init__(self): - self.symbol2number = {} - self.number2symbol = {} - self.states = [] - self.dfas = {} - self.labels = [(0, "EMPTY")] - self.keywords = {} - self.tokens = {} - self.symbol2label = {} + def __init__(self) -> None: + self.symbol2number: Dict[str, int] = {} + self.number2symbol: Dict[int, str] = {} + self.states: List[DFA] = [] + self.dfas: Dict[int, DFAS] = {} + self.labels: List[Label] = [(0, "EMPTY")] + self.keywords: Dict[str, int] = {} + self.tokens: Dict[int, int] = {} + self.symbol2label: Dict[str, int] = {} self.start = 256 - - def dump(self, filename): - """Dump the grammar tables to a pickle file. - - dump() recursively changes all dict to OrderedDict, so the pickled file - is not exactly the same as what was passed in to dump(). load() uses the - pickled file to create the tables, but only changes OrderedDict to dict - at the top level; it does not recursively change OrderedDict to dict. - So, the loaded tables are different from the original tables that were - passed to load() in that some of the OrderedDict (from the pickled file) - are not changed back to dict. For parsing, this has no effect on - performance because OrderedDict uses dict's __getitem__ with nothing in - between. - """ - with open(filename, "wb") as f: - d = _make_deterministic(self.__dict__) - pickle.dump(d, f, 2) - - def load(self, filename): + # Python 3.7+ parses async as a keyword, not an identifier + self.async_keywords = False + + def dump(self, filename: Path) -> None: + """Dump the grammar tables to a pickle file.""" + + # mypyc generates objects that don't have a __dict__, but they + # do have __getstate__ methods that will return an equivalent + # dictionary + if hasattr(self, "__dict__"): + d = self.__dict__ + else: + d = self.__getstate__() # type: ignore + + with tempfile.NamedTemporaryFile( + dir=os.path.dirname(filename), delete=False + ) as f: + pickle.dump(d, f, pickle.HIGHEST_PROTOCOL) + os.replace(f.name, filename) + + def _update(self, attrs: Dict[str, Any]) -> None: + for k, v in attrs.items(): + setattr(self, k, v) + + def load(self, filename: Path) -> None: """Load the grammar tables from a pickle file.""" with open(filename, "rb") as f: d = pickle.load(f) - self.__dict__.update(d) + self._update(d) - def loads(self, pkl): + def loads(self, pkl: bytes) -> None: """Load the grammar tables from a pickle bytes object.""" - self.__dict__.update(pickle.loads(pkl)) + self._update(pickle.loads(pkl)) - def copy(self): + def copy(self: _P) -> _P: """ Copy the grammar. """ new = self.__class__() - for dict_attr in ("symbol2number", "number2symbol", "dfas", "keywords", - "tokens", "symbol2label"): + for dict_attr in ( + "symbol2number", + "number2symbol", + "dfas", + "keywords", + "tokens", + "symbol2label", + ): setattr(new, dict_attr, getattr(self, dict_attr).copy()) new.labels = self.labels[:] new.states = self.states[:] new.start = self.start + new.async_keywords = self.async_keywords return new - def report(self): + def report(self) -> None: """Dump the grammar tables to standard output, for debugging.""" from pprint import pprint + print("s2n") pprint(self.symbol2number) print("n2s") @@ -141,17 +163,6 @@ class Grammar(object): print("start", self.start) -def _make_deterministic(top): - if isinstance(top, dict): - return collections.OrderedDict( - sorted(((k, _make_deterministic(v)) for k, v in top.items()))) - if isinstance(top, list): - return [_make_deterministic(e) for e in top] - if isinstance(top, tuple): - return tuple(_make_deterministic(e) for e in top) - return top - - # Map from operator to number (since tokenize doesn't do this) opmap_raw = """ @@ -202,6 +213,7 @@ opmap_raw = """ // DOUBLESLASH //= DOUBLESLASHEQUAL -> RARROW +:= COLONEQUAL """ opmap = {}