From: Michael J. Sullivan Date: Wed, 30 Oct 2019 14:29:29 +0000 (-0700) Subject: Support compilation with mypyc (#1009) X-Git-Url: https://git.madduck.net/etc/vim.git/commitdiff_plain/3e60f6d454616a795acb1e3e2b43efa979de4f46?ds=inline Support compilation with mypyc (#1009) * Make most of blib2to3 directly typed and mypyc-compatible This used a combination of retype and pytype's merge-pyi to do the initial merges of the stubs, which then required manual tweaking to make actually typecheck and work with mypyc. Co-authored-by: Sanjit Kalapatapu Co-authored-by: Michael J. Sullivan * Make black able to compile and run with mypyc The changes made fall into a couple categories: * Fixing actual type mistakes that slip through the cracks * Working around a couple mypy bugs (the most annoying of which being that we need to add type annotations in a number of places where variables are initialized to None) Co-authored-by: Sanjit Kalapatapu Co-authored-by: Michael J. Sullivan --- diff --git a/.appveyor.yml b/.appveyor.yml index a968315..d29cca2 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -7,7 +7,7 @@ build: off test_script: - C:\Python36\python.exe tests/test_black.py - - C:\Python36\python.exe -m mypy black.py blackd.py tests/test_black.py + - C:\Python36\python.exe -m mypy black.py blackd.py tests/test_black.py blib2to3 after_test: - C:\Python36\python.exe -m pip install pyinstaller diff --git a/.flake8 b/.flake8 index c321e71..498b2cb 100644 --- a/.flake8 +++ b/.flake8 @@ -3,3 +3,7 @@ ignore = E203, E266, E501, W503 max-line-length = 80 max-complexity = 18 select = B,C,E,F,W,T4,B9 +# We need to configure the mypy.ini because the flake8-mypy's default +# options don't properly override it, so if we don't specify it we get +# half of the config from mypy.ini and half from flake8-mypy. +mypy_config = mypy.ini diff --git a/.gitignore b/.gitignore index 330e2ba..1c8d77c 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,4 @@ pip-wheel-metadata/ _black_version.py .idea .eggs +.dmypy.json diff --git a/Pipfile b/Pipfile index 925a42f..9eada97 100644 --- a/Pipfile +++ b/Pipfile @@ -11,6 +11,8 @@ toml = ">=0.9.4" black = {path = ".",extras = ["d"],editable = true} aiohttp-cors = "*" typed-ast = "==1.4.0" +typing_extensions = ">=3.7.4" +mypy_extensions = ">=0.4.3" regex = ">=2019.8" pathspec = ">=0.6" dataclasses = {version = ">=0.6", python_version = "< 3.7"} diff --git a/Pipfile.lock b/Pipfile.lock index 2ab6f96..21c3a26 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "ad54dbd29085bc14caf655456b93d9f09e8556406ef956a5a05c20e30363ffa1" + "sha256": "9df9582de1e290f76bd43bbe8dc291bc71e4031517c7e824eb67c65d8e01f78f" }, "pipfile-spec": 6, "requires": {}, @@ -134,6 +134,14 @@ ], "version": "==4.5.2" }, + "mypy-extensions": { + "hashes": [ + "sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d", + "sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8" + ], + "index": "pypi", + "version": "==0.4.3" + }, "pathspec": { "hashes": [ "sha256:e285ccc8b0785beadd4c18e5708b12bb8fcf529a1e61215b3feff1d1e559ea5c" @@ -194,6 +202,15 @@ "index": "pypi", "version": "==1.4.0" }, + "typing-extensions": { + "hashes": [ + "sha256:091ecc894d5e908ac75209f10d5b4f118fbdb2eb1ede6a63544054bb1edb41f2", + "sha256:910f4656f54de5993ad9304959ce9bb903f90aadc7c67a0bef07e678014e892d", + "sha256:cf8b63fedea4d89bab840ecbb93e75578af28f76f66c35889bd7065f5af88575" + ], + "index": "pypi", + "version": "==3.7.4.1" + }, "yarl": { "hashes": [ "sha256:024ecdc12bc02b321bc66b41327f930d1c2c543fa9a561b39861da9388ba7aa9", @@ -697,11 +714,12 @@ }, "typing-extensions": { "hashes": [ - "sha256:2ed632b30bb54fc3941c382decfd0ee4148f5c591651c9272473fea2c6397d95", - "sha256:b1edbbf0652660e32ae780ac9433f4231e7339c7f9a8057d0f042fcbcea49b87", - "sha256:d8179012ec2c620d3791ca6fe2bf7979d979acdbef1fca0bc56b37411db682ed" + "sha256:091ecc894d5e908ac75209f10d5b4f118fbdb2eb1ede6a63544054bb1edb41f2", + "sha256:910f4656f54de5993ad9304959ce9bb903f90aadc7c67a0bef07e678014e892d", + "sha256:cf8b63fedea4d89bab840ecbb93e75578af28f76f66c35889bd7065f5af88575" ], - "version": "==3.7.4" + "index": "pypi", + "version": "==3.7.4.1" }, "urllib3": { "hashes": [ diff --git a/black.py b/black.py index ddeaa88..68c0052 100644 --- a/black.py +++ b/black.py @@ -37,6 +37,8 @@ from typing import ( Union, cast, ) +from typing_extensions import Final +from mypy_extensions import mypyc_attr from appdirs import user_cache_dir from dataclasses import dataclass, field, replace @@ -247,6 +249,17 @@ def read_pyproject_toml( return value +def target_version_option_callback( + c: click.Context, p: Union[click.Option, click.Parameter], v: Tuple[str, ...] +) -> List[TargetVersion]: + """Compute the target versions from a --target-version flag. + + This is its own function because mypy couldn't infer the type correctly + when it was a lambda, causing mypyc trouble. + """ + return [TargetVersion[val.upper()] for val in v] + + @click.command(context_settings=dict(help_option_names=["-h", "--help"])) @click.option("-c", "--code", type=str, help="Format the code passed in as a string.") @click.option( @@ -261,7 +274,7 @@ def read_pyproject_toml( "-t", "--target-version", type=click.Choice([v.name.lower() for v in TargetVersion]), - callback=lambda c, p, v: [TargetVersion[val.upper()] for val in v], + callback=target_version_option_callback, multiple=True, help=( "Python versions that should be supported by Black's output. [default: " @@ -388,7 +401,7 @@ def main( verbose: bool, include: str, exclude: str, - src: Tuple[str], + src: Tuple[str, ...], config: Optional[str], ) -> None: """The uncompromising code formatter.""" @@ -470,7 +483,9 @@ def main( ctx.exit(report.return_code) -def path_empty(src: Tuple[str], quiet: bool, verbose: bool, ctx: click.Context) -> None: +def path_empty( + src: Tuple[str, ...], quiet: bool, verbose: bool, ctx: click.Context +) -> None: """ Exit if there is no `src` provided for formatting """ @@ -585,7 +600,7 @@ async def schedule_formatting( ): src for src in sorted(sources) } - pending: Iterable[asyncio.Future] = tasks.keys() + pending: Iterable["asyncio.Future[bool]"] = tasks.keys() try: loop.add_signal_handler(signal.SIGINT, cancel, pending) loop.add_signal_handler(signal.SIGTERM, cancel, pending) @@ -639,10 +654,10 @@ def format_file_in_place( except NothingChanged: return False - if write_back == write_back.YES: + if write_back == WriteBack.YES: with open(src, "w", encoding=encoding, newline=newline) as f: f.write(dst_contents) - elif write_back == write_back.DIFF: + elif write_back == WriteBack.DIFF: now = datetime.utcnow() src_name = f"{src}\t{then} +0000" dst_name = f"{src}\t{now} +0000" @@ -865,8 +880,16 @@ class Visitor(Generic[T]): if node.type < 256: name = token.tok_name[node.type] else: - name = type_repr(node.type) - yield from getattr(self, f"visit_{name}", self.visit_default)(node) + name = str(type_repr(node.type)) + # We explicitly branch on whether a visitor exists (instead of + # using self.visit_default as the default arg to getattr) in order + # to save needing to create a bound method object and so mypyc can + # generate a native call to visit_default. + visitf = getattr(self, f"visit_{name}", None) + if visitf: + yield from visitf(node) + else: + yield from self.visit_default(node) def visit_default(self, node: LN) -> Iterator[T]: """Default `visit_*()` implementation. Recurses to children of `node`.""" @@ -911,8 +934,8 @@ class DebugVisitor(Visitor[T]): list(v.visit(code)) -WHITESPACE = {token.DEDENT, token.INDENT, token.NEWLINE} -STATEMENT = { +WHITESPACE: Final = {token.DEDENT, token.INDENT, token.NEWLINE} +STATEMENT: Final = { syms.if_stmt, syms.while_stmt, syms.for_stmt, @@ -922,10 +945,10 @@ STATEMENT = { syms.funcdef, syms.classdef, } -STANDALONE_COMMENT = 153 +STANDALONE_COMMENT: Final = 153 token.tok_name[STANDALONE_COMMENT] = "STANDALONE_COMMENT" -LOGIC_OPERATORS = {"and", "or"} -COMPARATORS = { +LOGIC_OPERATORS: Final = {"and", "or"} +COMPARATORS: Final = { token.LESS, token.GREATER, token.EQEQUAL, @@ -933,7 +956,7 @@ COMPARATORS = { token.LESSEQUAL, token.GREATEREQUAL, } -MATH_OPERATORS = { +MATH_OPERATORS: Final = { token.VBAR, token.CIRCUMFLEX, token.AMPER, @@ -949,23 +972,23 @@ MATH_OPERATORS = { token.TILDE, token.DOUBLESTAR, } -STARS = {token.STAR, token.DOUBLESTAR} -VARARGS_SPECIALS = STARS | {token.SLASH} -VARARGS_PARENTS = { +STARS: Final = {token.STAR, token.DOUBLESTAR} +VARARGS_SPECIALS: Final = STARS | {token.SLASH} +VARARGS_PARENTS: Final = { syms.arglist, syms.argument, # double star in arglist syms.trailer, # single argument to call syms.typedargslist, syms.varargslist, # lambdas } -UNPACKING_PARENTS = { +UNPACKING_PARENTS: Final = { syms.atom, # single element of a list or set literal syms.dictsetmaker, syms.listmaker, syms.testlist_gexp, syms.testlist_star_expr, } -TEST_DESCENDANTS = { +TEST_DESCENDANTS: Final = { syms.test, syms.lambdef, syms.or_test, @@ -982,7 +1005,7 @@ TEST_DESCENDANTS = { syms.term, syms.power, } -ASSIGNMENTS = { +ASSIGNMENTS: Final = { "=", "+=", "-=", @@ -998,13 +1021,13 @@ ASSIGNMENTS = { "**=", "//=", } -COMPREHENSION_PRIORITY = 20 -COMMA_PRIORITY = 18 -TERNARY_PRIORITY = 16 -LOGIC_PRIORITY = 14 -STRING_PRIORITY = 12 -COMPARATOR_PRIORITY = 10 -MATH_PRIORITIES = { +COMPREHENSION_PRIORITY: Final = 20 +COMMA_PRIORITY: Final = 18 +TERNARY_PRIORITY: Final = 16 +LOGIC_PRIORITY: Final = 14 +STRING_PRIORITY: Final = 12 +COMPARATOR_PRIORITY: Final = 10 +MATH_PRIORITIES: Final = { token.VBAR: 9, token.CIRCUMFLEX: 8, token.AMPER: 7, @@ -1020,7 +1043,7 @@ MATH_PRIORITIES = { token.TILDE: 3, token.DOUBLESTAR: 2, } -DOT_PRIORITY = 1 +DOT_PRIORITY: Final = 1 @dataclass @@ -1729,13 +1752,13 @@ class LineGenerator(Visitor[Line]): self.current_line.append(node) yield from super().visit_default(node) - def visit_INDENT(self, node: Node) -> Iterator[Line]: + def visit_INDENT(self, node: Leaf) -> Iterator[Line]: """Increase indentation level, maybe yield a line.""" # In blib2to3 INDENT never holds comments. yield from self.line(+1) yield from self.visit_default(node) - def visit_DEDENT(self, node: Node) -> Iterator[Line]: + def visit_DEDENT(self, node: Leaf) -> Iterator[Line]: """Decrease indentation level, maybe yield a line.""" # The current line might still wait for trailing comments. At DEDENT time # there won't be any (they would be prefixes on the preceding NEWLINE). @@ -2463,7 +2486,7 @@ def left_hand_split(line: Line, features: Collection[Feature] = ()) -> Iterator[ body_leaves: List[Leaf] = [] head_leaves: List[Leaf] = [] current_leaves = head_leaves - matching_bracket = None + matching_bracket: Optional[Leaf] = None for leaf in line.leaves: if ( current_leaves is body_leaves @@ -2506,8 +2529,8 @@ def right_hand_split( body_leaves: List[Leaf] = [] head_leaves: List[Leaf] = [] current_leaves = tail_leaves - opening_bracket = None - closing_bracket = None + opening_bracket: Optional[Leaf] = None + closing_bracket: Optional[Leaf] = None for leaf in reversed(line.leaves): if current_leaves is body_leaves: if leaf is opening_bracket: @@ -3028,7 +3051,7 @@ def convert_one_fmt_off_pair(node: Node) -> bool: # That happens when one of the `ignored_nodes` ended with a NEWLINE # leaf (possibly followed by a DEDENT). hidden_value = hidden_value[:-1] - first_idx = None + first_idx: Optional[int] = None for ignored in ignored_nodes: index = ignored.remove() if first_idx is None: @@ -3399,8 +3422,8 @@ def generate_trailers_to_omit(line: Line, line_length: int) -> Iterator[Set[Leaf yield omit length = 4 * line.depth - opening_bracket = None - closing_bracket = None + opening_bracket: Optional[Leaf] = None + closing_bracket: Optional[Leaf] = None inner_brackets: Set[LeafID] = set() for index, leaf, leaf_length in enumerate_with_length(line, reversed=True): length += leaf_length @@ -3797,6 +3820,7 @@ def assert_stable(src: str, dst: str, mode: FileMode) -> None: ) from None +@mypyc_attr(patchable=True) def dump_to_file(*output: str) -> str: """Dump `output` to a temporary file. Return path to the file.""" with tempfile.NamedTemporaryFile( @@ -3829,7 +3853,7 @@ def diff(a: str, b: str, a_name: str, b_name: str) -> str: ) -def cancel(tasks: Iterable[asyncio.Task]) -> None: +def cancel(tasks: Iterable["asyncio.Task[Any]"]) -> None: """asyncio signal handler that cancels all `tasks` and reports to stderr.""" err("Aborted!") for task in tasks: diff --git a/blib2to3/__init__.pyi b/blib2to3/__init__.pyi deleted file mode 100644 index 145e31b..0000000 --- a/blib2to3/__init__.pyi +++ /dev/null @@ -1 +0,0 @@ -# Stubs for lib2to3 (Python 3.6) diff --git a/blib2to3/pgen2/__init__.pyi b/blib2to3/pgen2/__init__.pyi deleted file mode 100644 index 1adc82a..0000000 --- a/blib2to3/pgen2/__init__.pyi +++ /dev/null @@ -1,10 +0,0 @@ -# Stubs for lib2to3.pgen2 (Python 3.6) - -import os -import sys -from typing import Text, Union - -if sys.version_info >= (3, 6): - _Path = Union[Text, os.PathLike] -else: - _Path = Text diff --git a/blib2to3/pgen2/conv.py b/blib2to3/pgen2/conv.py index 728b152..7816521 100644 --- a/blib2to3/pgen2/conv.py +++ b/blib2to3/pgen2/conv.py @@ -1,6 +1,8 @@ # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. # Licensed to PSF under a Contributor Agreement. +# mypy: ignore-errors + """Convert graminit.[ch] spit out by pgen to Python code. Pgen is the Python parser generator. It is useful to quickly create a diff --git a/blib2to3/pgen2/driver.py b/blib2to3/pgen2/driver.py index 6ba6b92..052c948 100644 --- a/blib2to3/pgen2/driver.py +++ b/blib2to3/pgen2/driver.py @@ -22,20 +22,42 @@ import os import logging import pkgutil import sys +from typing import ( + Any, + Callable, + IO, + Iterable, + List, + Optional, + Text, + Tuple, + Union, + Sequence, +) # Pgen imports from . import grammar, parse, token, tokenize, pgen +from logging import Logger +from blib2to3.pytree import _Convert, NL +from blib2to3.pgen2.grammar import Grammar + +Path = Union[str, "os.PathLike[str]"] class Driver(object): - def __init__(self, grammar, convert=None, logger=None): + def __init__( + self, + grammar: Grammar, + convert: Optional[_Convert] = None, + logger: Optional[Logger] = None, + ) -> None: self.grammar = grammar if logger is None: logger = logging.getLogger(__name__) self.logger = logger self.convert = convert - def parse_tokens(self, tokens, debug=False): + def parse_tokens(self, tokens: Iterable[Any], debug: bool = False) -> NL: """Parse a series of tokens and return the syntax tree.""" # XXX Move the prefix computation into a wrapper around tokenize. p = parse.Parser(self.grammar, self.convert) @@ -91,32 +113,36 @@ class Driver(object): column = 0 else: # We never broke out -- EOF is too soon (how can this happen???) + assert start is not None raise parse.ParseError("incomplete input", type, value, (prefix, start)) + assert p.rootnode is not None return p.rootnode - def parse_stream_raw(self, stream, debug=False): + def parse_stream_raw(self, stream: IO[Text], debug: bool = False) -> NL: """Parse a stream and return the syntax tree.""" tokens = tokenize.generate_tokens(stream.readline, grammar=self.grammar) return self.parse_tokens(tokens, debug) - def parse_stream(self, stream, debug=False): + def parse_stream(self, stream: IO[Text], debug: bool = False) -> NL: """Parse a stream and return the syntax tree.""" return self.parse_stream_raw(stream, debug) - def parse_file(self, filename, encoding=None, debug=False): + def parse_file( + self, filename: Path, encoding: Optional[Text] = None, debug: bool = False, + ) -> NL: """Parse a file and return the syntax tree.""" with io.open(filename, "r", encoding=encoding) as stream: return self.parse_stream(stream, debug) - def parse_string(self, text, debug=False): + def parse_string(self, text: Text, debug: bool = False) -> NL: """Parse a string and return the syntax tree.""" tokens = tokenize.generate_tokens( io.StringIO(text).readline, grammar=self.grammar ) return self.parse_tokens(tokens, debug) - def _partially_consume_prefix(self, prefix, column): - lines = [] + def _partially_consume_prefix(self, prefix: Text, column: int) -> Tuple[Text, Text]: + lines: List[str] = [] current_line = "" current_column = 0 wait_for_nl = False @@ -143,7 +169,7 @@ class Driver(object): return "".join(lines), current_line -def _generate_pickle_name(gt, cache_dir=None): +def _generate_pickle_name(gt: Path, cache_dir: Optional[Path] = None) -> Text: head, tail = os.path.splitext(gt) if tail == ".txt": tail = "" @@ -154,14 +180,20 @@ def _generate_pickle_name(gt, cache_dir=None): return name -def load_grammar(gt="Grammar.txt", gp=None, save=True, force=False, logger=None): +def load_grammar( + gt: Text = "Grammar.txt", + gp: Optional[Text] = None, + save: bool = True, + force: bool = False, + logger: Optional[Logger] = None, +) -> Grammar: """Load the grammar (maybe from a pickle).""" if logger is None: logger = logging.getLogger(__name__) gp = _generate_pickle_name(gt) if gp is None else gp if force or not _newer(gp, gt): logger.info("Generating grammar tables from %s", gt) - g = pgen.generate_grammar(gt) + g: grammar.Grammar = pgen.generate_grammar(gt) if save: logger.info("Writing grammar tables to %s", gp) try: @@ -174,7 +206,7 @@ def load_grammar(gt="Grammar.txt", gp=None, save=True, force=False, logger=None) return g -def _newer(a, b): +def _newer(a: Text, b: Text) -> bool: """Inquire whether file a was written since file b.""" if not os.path.exists(a): return False @@ -183,7 +215,9 @@ def _newer(a, b): return os.path.getmtime(a) >= os.path.getmtime(b) -def load_packaged_grammar(package, grammar_source, cache_dir=None): +def load_packaged_grammar( + package: str, grammar_source: Text, cache_dir: Optional[Path] = None +) -> grammar.Grammar: """Normally, loads a pickled grammar by doing pkgutil.get_data(package, pickled_grammar) where *pickled_grammar* is computed from *grammar_source* by adding the @@ -199,18 +233,19 @@ def load_packaged_grammar(package, grammar_source, cache_dir=None): return load_grammar(grammar_source, gp=gp) pickled_name = _generate_pickle_name(os.path.basename(grammar_source), cache_dir) data = pkgutil.get_data(package, pickled_name) + assert data is not None g = grammar.Grammar() g.loads(data) return g -def main(*args): +def main(*args: Text) -> bool: """Main program, when run as a script: produce grammar pickle files. Calls load_grammar for each argument, a path to a grammar text file. """ if not args: - args = sys.argv[1:] + args = tuple(sys.argv[1:]) logging.basicConfig(level=logging.INFO, stream=sys.stdout, format="%(message)s") for gt in args: load_grammar(gt, save=True, force=True) diff --git a/blib2to3/pgen2/driver.pyi b/blib2to3/pgen2/driver.pyi deleted file mode 100644 index f098bf5..0000000 --- a/blib2to3/pgen2/driver.pyi +++ /dev/null @@ -1,24 +0,0 @@ -# Stubs for lib2to3.pgen2.driver (Python 3.6) - -import os -import sys -from typing import Any, Callable, IO, Iterable, List, Optional, Text, Tuple, Union - -from logging import Logger -from blib2to3.pytree import _Convert, _NL -from blib2to3.pgen2 import _Path -from blib2to3.pgen2.grammar import Grammar - - -class Driver: - grammar: Grammar - logger: Logger - convert: _Convert - def __init__(self, grammar: Grammar, convert: Optional[_Convert] = ..., logger: Optional[Logger] = ...) -> None: ... - def parse_tokens(self, tokens: Iterable[Any], debug: bool = ...) -> _NL: ... - def parse_stream_raw(self, stream: IO[Text], debug: bool = ...) -> _NL: ... - def parse_stream(self, stream: IO[Text], debug: bool = ...) -> _NL: ... - def parse_file(self, filename: _Path, encoding: Optional[Text] = ..., debug: bool = ...) -> _NL: ... - def parse_string(self, text: Text, debug: bool = ...) -> _NL: ... - -def load_grammar(gt: Text = ..., gp: Optional[Text] = ..., save: bool = ..., force: bool = ..., logger: Optional[Logger] = ...) -> Grammar: ... diff --git a/blib2to3/pgen2/grammar.py b/blib2to3/pgen2/grammar.py index d6f0fc2..2882cda 100644 --- a/blib2to3/pgen2/grammar.py +++ b/blib2to3/pgen2/grammar.py @@ -16,10 +16,17 @@ fallback token code OP, but the parser needs the actual token code. import os import pickle import tempfile +from typing import Any, Dict, List, Optional, Text, Tuple, TypeVar, Union # Local imports from . import token +_P = TypeVar("_P", bound="Grammar") +Label = Tuple[int, Optional[Text]] +DFA = List[List[Tuple[int, int]]] +DFAS = Tuple[DFA, Dict[int, int]] +Path = Union[str, "os.PathLike[str]"] + class Grammar(object): """Pgen parsing tables conversion class. @@ -75,38 +82,51 @@ class Grammar(object): """ - def __init__(self): - self.symbol2number = {} - self.number2symbol = {} - self.states = [] - self.dfas = {} - self.labels = [(0, "EMPTY")] - self.keywords = {} - self.tokens = {} - self.symbol2label = {} + def __init__(self) -> None: + self.symbol2number: Dict[str, int] = {} + self.number2symbol: Dict[int, str] = {} + self.states: List[DFA] = [] + self.dfas: Dict[int, DFAS] = {} + self.labels: List[Label] = [(0, "EMPTY")] + self.keywords: Dict[str, int] = {} + self.tokens: Dict[int, int] = {} + self.symbol2label: Dict[str, int] = {} self.start = 256 # Python 3.7+ parses async as a keyword, not an identifier self.async_keywords = False - def dump(self, filename): + def dump(self, filename: Path) -> None: """Dump the grammar tables to a pickle file.""" + + # mypyc generates objects that don't have a __dict__, but they + # do have __getstate__ methods that will return an equivalent + # dictionary + if hasattr(self, "__dict__"): + d = self.__dict__ + else: + d = self.__getstate__() # type: ignore + with tempfile.NamedTemporaryFile( dir=os.path.dirname(filename), delete=False ) as f: - pickle.dump(self.__dict__, f, pickle.HIGHEST_PROTOCOL) + pickle.dump(d, f, pickle.HIGHEST_PROTOCOL) os.replace(f.name, filename) - def load(self, filename): + def _update(self, attrs: Dict[str, Any]) -> None: + for k, v in attrs.items(): + setattr(self, k, v) + + def load(self, filename: Path) -> None: """Load the grammar tables from a pickle file.""" with open(filename, "rb") as f: d = pickle.load(f) - self.__dict__.update(d) + self._update(d) - def loads(self, pkl): + def loads(self, pkl: bytes) -> None: """Load the grammar tables from a pickle bytes object.""" - self.__dict__.update(pickle.loads(pkl)) + self._update(pickle.loads(pkl)) - def copy(self): + def copy(self: _P) -> _P: """ Copy the grammar. """ @@ -126,7 +146,7 @@ class Grammar(object): new.async_keywords = self.async_keywords return new - def report(self): + def report(self) -> None: """Dump the grammar tables to standard output, for debugging.""" from pprint import pprint diff --git a/blib2to3/pgen2/grammar.pyi b/blib2to3/pgen2/grammar.pyi deleted file mode 100644 index 8173e2f..0000000 --- a/blib2to3/pgen2/grammar.pyi +++ /dev/null @@ -1,30 +0,0 @@ -# Stubs for lib2to3.pgen2.grammar (Python 3.6) - -from blib2to3.pgen2 import _Path - -from typing import Any, Dict, List, Optional, Text, Tuple, TypeVar - -_P = TypeVar('_P') -_Label = Tuple[int, Optional[Text]] -_DFA = List[List[Tuple[int, int]]] -_DFAS = Tuple[_DFA, Dict[int, int]] - -class Grammar: - symbol2number: Dict[Text, int] - number2symbol: Dict[int, Text] - states: List[_DFA] - dfas: Dict[int, _DFAS] - labels: List[_Label] - keywords: Dict[Text, int] - tokens: Dict[int, int] - symbol2label: Dict[Text, int] - start: int - async_keywords: bool - def __init__(self) -> None: ... - def dump(self, filename: _Path) -> None: ... - def load(self, filename: _Path) -> None: ... - def copy(self: _P) -> _P: ... - def report(self) -> None: ... - -opmap_raw: Text -opmap: Dict[Text, Text] diff --git a/blib2to3/pgen2/literals.py b/blib2to3/pgen2/literals.py index 93bee52..b5fe428 100644 --- a/blib2to3/pgen2/literals.py +++ b/blib2to3/pgen2/literals.py @@ -3,9 +3,12 @@ """Safely evaluate Python string literals without using eval().""" -import regex as re +import re -simple_escapes = { +from typing import Dict, Match, Text + + +simple_escapes: Dict[Text, Text] = { "a": "\a", "b": "\b", "f": "\f", @@ -19,7 +22,7 @@ simple_escapes = { } -def escape(m): +def escape(m: Match[Text]) -> Text: all, tail = m.group(0, 1) assert all.startswith("\\") esc = simple_escapes.get(tail) @@ -41,7 +44,7 @@ def escape(m): return chr(i) -def evalString(s): +def evalString(s: Text) -> Text: assert s.startswith("'") or s.startswith('"'), repr(s[:1]) q = s[0] if s[:3] == q * 3: @@ -52,7 +55,7 @@ def evalString(s): return re.sub(r"\\(\'|\"|\\|[abfnrtv]|x.{0,2}|[0-7]{1,3})", escape, s) -def test(): +def test() -> None: for i in range(256): c = chr(i) s = repr(c) diff --git a/blib2to3/pgen2/literals.pyi b/blib2to3/pgen2/literals.pyi deleted file mode 100644 index 8719500..0000000 --- a/blib2to3/pgen2/literals.pyi +++ /dev/null @@ -1,9 +0,0 @@ -# Stubs for lib2to3.pgen2.literals (Python 3.6) - -from typing import Dict, Match, Text - -simple_escapes: Dict[Text, Text] - -def escape(m: Match) -> Text: ... -def evalString(s: Text) -> Text: ... -def test() -> None: ... diff --git a/blib2to3/pgen2/parse.py b/blib2to3/pgen2/parse.py index 22f14c8..8c374d3 100644 --- a/blib2to3/pgen2/parse.py +++ b/blib2to3/pgen2/parse.py @@ -12,12 +12,39 @@ how this parsing engine works. # Local imports from . import token +from typing import ( + Optional, + Text, + Sequence, + Any, + Union, + Tuple, + Dict, + List, + Callable, + Set, +) +from blib2to3.pgen2.grammar import Grammar +from blib2to3.pytree import NL, Context, RawNode, Leaf, Node + + +Results = Dict[Text, NL] +Convert = Callable[[Grammar, RawNode], Union[Node, Leaf]] +DFA = List[List[Tuple[int, int]]] +DFAS = Tuple[DFA, Dict[int, int]] + + +def lam_sub(grammar: Grammar, node: RawNode) -> NL: + assert node[3] is not None + return Node(type=node[0], children=node[3], context=node[2]) class ParseError(Exception): """Exception to signal the parser is stuck.""" - def __init__(self, msg, type, value, context): + def __init__( + self, msg: Text, type: Optional[int], value: Optional[Text], context: Context + ) -> None: Exception.__init__( self, "%s: type=%r, value=%r, context=%r" % (msg, type, value, context) ) @@ -57,7 +84,7 @@ class Parser(object): """ - def __init__(self, grammar, convert=None): + def __init__(self, grammar: Grammar, convert: Optional[Convert] = None) -> None: """Constructor. The grammar argument is a grammar.Grammar instance; see the @@ -87,9 +114,9 @@ class Parser(object): """ self.grammar = grammar - self.convert = convert or (lambda grammar, node: node) + self.convert = convert or lam_sub - def setup(self, start=None): + def setup(self, start: Optional[int] = None) -> None: """Prepare for parsing. This *must* be called before starting to parse. @@ -107,13 +134,13 @@ class Parser(object): # Each stack entry is a tuple: (dfa, state, node). # A node is a tuple: (type, value, context, children), # where children is a list of nodes or None, and context may be None. - newnode = (start, None, None, []) + newnode: RawNode = (start, None, None, []) stackentry = (self.grammar.dfas[start], 0, newnode) - self.stack = [stackentry] - self.rootnode = None - self.used_names = set() # Aliased to self.rootnode.used_names in pop() + self.stack: List[Tuple[DFAS, int, RawNode]] = [stackentry] + self.rootnode: Optional[NL] = None + self.used_names: Set[str] = set() - def addtoken(self, type, value, context): + def addtoken(self, type: int, value: Optional[Text], context: Context) -> bool: """Add a token; return True iff this is the end of the program.""" # Map from token to label ilabel = self.classify(type, value, context) @@ -160,10 +187,11 @@ class Parser(object): # No success finding a transition raise ParseError("bad input", type, value, context) - def classify(self, type, value, context): + def classify(self, type: int, value: Optional[Text], context: Context) -> int: """Turn a token into a label. (Internal)""" if type == token.NAME: # Keep a listing of all used names + assert value is not None self.used_names.add(value) # Check for reserved words ilabel = self.grammar.keywords.get(value) @@ -174,29 +202,35 @@ class Parser(object): raise ParseError("bad token", type, value, context) return ilabel - def shift(self, type, value, newstate, context): + def shift( + self, type: int, value: Optional[Text], newstate: int, context: Context + ) -> None: """Shift a token. (Internal)""" dfa, state, node = self.stack[-1] - newnode = (type, value, context, None) - newnode = self.convert(self.grammar, newnode) + assert value is not None + assert context is not None + rawnode: RawNode = (type, value, context, None) + newnode = self.convert(self.grammar, rawnode) if newnode is not None: + assert node[-1] is not None node[-1].append(newnode) self.stack[-1] = (dfa, newstate, node) - def push(self, type, newdfa, newstate, context): + def push(self, type: int, newdfa: DFAS, newstate: int, context: Context) -> None: """Push a nonterminal. (Internal)""" dfa, state, node = self.stack[-1] - newnode = (type, None, context, []) + newnode: RawNode = (type, None, context, []) self.stack[-1] = (dfa, newstate, node) self.stack.append((newdfa, 0, newnode)) - def pop(self): + def pop(self) -> None: """Pop a nonterminal. (Internal)""" popdfa, popstate, popnode = self.stack.pop() newnode = self.convert(self.grammar, popnode) if newnode is not None: if self.stack: dfa, state, node = self.stack[-1] + assert node[-1] is not None node[-1].append(newnode) else: self.rootnode = newnode diff --git a/blib2to3/pgen2/parse.pyi b/blib2to3/pgen2/parse.pyi deleted file mode 100644 index cbcf941..0000000 --- a/blib2to3/pgen2/parse.pyi +++ /dev/null @@ -1,29 +0,0 @@ -# Stubs for lib2to3.pgen2.parse (Python 3.6) - -from typing import Any, Dict, List, Optional, Sequence, Set, Text, Tuple - -from blib2to3.pgen2.grammar import Grammar, _DFAS -from blib2to3.pytree import _NL, _Convert, _RawNode - -_Context = Sequence[Any] - -class ParseError(Exception): - msg: Text - type: int - value: Optional[Text] - context: _Context - def __init__(self, msg: Text, type: int, value: Optional[Text], context: _Context) -> None: ... - -class Parser: - grammar: Grammar - convert: _Convert - stack: List[Tuple[_DFAS, int, _RawNode]] - rootnode: Optional[_NL] - used_names: Set[Text] - def __init__(self, grammar: Grammar, convert: Optional[_Convert] = ...) -> None: ... - def setup(self, start: Optional[int] = ...) -> None: ... - def addtoken(self, type: int, value: Optional[Text], context: _Context) -> bool: ... - def classify(self, type: int, value: Optional[Text], context: _Context) -> int: ... - def shift(self, type: int, value: Optional[Text], newstate: int, context: _Context) -> None: ... - def push(self, type: int, newdfa: _DFAS, newstate: int, context: _Context) -> None: ... - def pop(self) -> None: ... diff --git a/blib2to3/pgen2/pgen.py b/blib2to3/pgen2/pgen.py index 1da6925..774cffc 100644 --- a/blib2to3/pgen2/pgen.py +++ b/blib2to3/pgen2/pgen.py @@ -4,13 +4,40 @@ # Pgen imports from . import grammar, token, tokenize +from typing import ( + Any, + Dict, + IO, + Iterable, + Iterator, + List, + Optional, + Text, + Tuple, + Union, + Sequence, + NoReturn, +) +from blib2to3.pgen2 import grammar +from blib2to3.pgen2.tokenize import GoodTokenInfo +import os + + +Path = Union[str, "os.PathLike[str]"] + class PgenGrammar(grammar.Grammar): pass class ParserGenerator(object): - def __init__(self, filename, stream=None): + + filename: Path + stream: IO[Text] + generator: Iterator[GoodTokenInfo] + first: Dict[Text, Optional[Dict[Text, int]]] + + def __init__(self, filename: Path, stream: Optional[IO[Text]] = None) -> None: close_stream = None if stream is None: stream = open(filename) @@ -25,7 +52,7 @@ class ParserGenerator(object): self.first = {} # map from symbol name to set of tokens self.addfirstsets() - def make_grammar(self): + def make_grammar(self) -> PgenGrammar: c = PgenGrammar() names = list(self.dfas.keys()) names.sort() @@ -50,8 +77,9 @@ class ParserGenerator(object): c.start = c.symbol2number[self.startsymbol] return c - def make_first(self, c, name): + def make_first(self, c: PgenGrammar, name: Text) -> Dict[int, int]: rawfirst = self.first[name] + assert rawfirst is not None first = {} for label in sorted(rawfirst): ilabel = self.make_label(c, label) @@ -59,7 +87,7 @@ class ParserGenerator(object): first[ilabel] = 1 return first - def make_label(self, c, label): + def make_label(self, c: PgenGrammar, label: Text) -> int: # XXX Maybe this should be a method on a subclass of converter? ilabel = len(c.labels) if label[0].isalpha(): @@ -105,7 +133,7 @@ class ParserGenerator(object): c.tokens[itoken] = ilabel return ilabel - def addfirstsets(self): + def addfirstsets(self) -> None: names = list(self.dfas.keys()) names.sort() for name in names: @@ -113,11 +141,11 @@ class ParserGenerator(object): self.calcfirst(name) # print name, self.first[name].keys() - def calcfirst(self, name): + def calcfirst(self, name: Text) -> None: dfa = self.dfas[name] self.first[name] = None # dummy to detect left recursion state = dfa[0] - totalset = {} + totalset: Dict[str, int] = {} overlapcheck = {} for label, next in state.arcs.items(): if label in self.dfas: @@ -128,12 +156,13 @@ class ParserGenerator(object): else: self.calcfirst(label) fset = self.first[label] + assert fset is not None totalset.update(fset) overlapcheck[label] = fset else: totalset[label] = 1 overlapcheck[label] = {label: 1} - inverse = {} + inverse: Dict[str, str] = {} for label, itsfirst in overlapcheck.items(): for symbol in itsfirst: if symbol in inverse: @@ -145,9 +174,9 @@ class ParserGenerator(object): inverse[symbol] = label self.first[name] = totalset - def parse(self): + def parse(self) -> Tuple[Dict[Text, List["DFAState"]], Text]: dfas = {} - startsymbol = None + startsymbol: Optional[str] = None # MSTART: (NEWLINE | RULE)* ENDMARKER while self.type != token.ENDMARKER: while self.type == token.NEWLINE: @@ -167,9 +196,10 @@ class ParserGenerator(object): # print name, oldlen, newlen if startsymbol is None: startsymbol = name + assert startsymbol is not None return dfas, startsymbol - def make_dfa(self, start, finish): + def make_dfa(self, start: "NFAState", finish: "NFAState") -> List["DFAState"]: # To turn an NFA into a DFA, we define the states of the DFA # to correspond to *sets* of states of the NFA. Then do some # state reduction. Let's represent sets as dicts with 1 for @@ -177,12 +207,12 @@ class ParserGenerator(object): assert isinstance(start, NFAState) assert isinstance(finish, NFAState) - def closure(state): - base = {} + def closure(state: NFAState) -> Dict[NFAState, int]: + base: Dict[NFAState, int] = {} addclosure(state, base) return base - def addclosure(state, base): + def addclosure(state: NFAState, base: Dict[NFAState, int]) -> None: assert isinstance(state, NFAState) if state in base: return @@ -193,7 +223,7 @@ class ParserGenerator(object): states = [DFAState(closure(start), finish)] for state in states: # NB states grows while we're iterating - arcs = {} + arcs: Dict[str, Dict[NFAState, int]] = {} for nfastate in state.nfaset: for label, next in nfastate.arcs: if label is not None: @@ -208,7 +238,7 @@ class ParserGenerator(object): state.addarc(st, label) return states # List of DFAState instances; first one is start - def dump_nfa(self, name, start, finish): + def dump_nfa(self, name: Text, start: "NFAState", finish: "NFAState") -> None: print("Dump of NFA for", name) todo = [start] for i, state in enumerate(todo): @@ -224,14 +254,14 @@ class ParserGenerator(object): else: print(" %s -> %d" % (label, j)) - def dump_dfa(self, name, dfa): + def dump_dfa(self, name: Text, dfa: Sequence["DFAState"]) -> None: print("Dump of DFA for", name) for i, state in enumerate(dfa): print(" State", i, state.isfinal and "(final)" or "") for label, next in sorted(state.arcs.items()): print(" %s -> %d" % (label, dfa.index(next))) - def simplify_dfa(self, dfa): + def simplify_dfa(self, dfa: List["DFAState"]) -> None: # This is not theoretically optimal, but works well enough. # Algorithm: repeatedly look for two states that have the same # set of arcs (same labels pointing to the same nodes) and @@ -252,7 +282,7 @@ class ParserGenerator(object): changes = True break - def parse_rhs(self): + def parse_rhs(self) -> Tuple["NFAState", "NFAState"]: # RHS: ALT ('|' ALT)* a, z = self.parse_alt() if self.value != "|": @@ -269,7 +299,7 @@ class ParserGenerator(object): z.addarc(zz) return aa, zz - def parse_alt(self): + def parse_alt(self) -> Tuple["NFAState", "NFAState"]: # ALT: ITEM+ a, b = self.parse_item() while self.value in ("(", "[") or self.type in (token.NAME, token.STRING): @@ -278,7 +308,7 @@ class ParserGenerator(object): b = d return a, b - def parse_item(self): + def parse_item(self) -> Tuple["NFAState", "NFAState"]: # ITEM: '[' RHS ']' | ATOM ['+' | '*'] if self.value == "[": self.gettoken() @@ -298,7 +328,7 @@ class ParserGenerator(object): else: return a, a - def parse_atom(self): + def parse_atom(self) -> Tuple["NFAState", "NFAState"]: # ATOM: '(' RHS ')' | NAME | STRING if self.value == "(": self.gettoken() @@ -315,8 +345,9 @@ class ParserGenerator(object): self.raise_error( "expected (...) or NAME or STRING, got %s/%s", self.type, self.value ) + assert False - def expect(self, type, value=None): + def expect(self, type: int, value: Optional[Any] = None) -> Text: if self.type != type or (value is not None and self.value != value): self.raise_error( "expected %s/%s, got %s/%s", type, value, self.type, self.value @@ -325,14 +356,14 @@ class ParserGenerator(object): self.gettoken() return value - def gettoken(self): + def gettoken(self) -> None: tup = next(self.generator) while tup[0] in (tokenize.COMMENT, tokenize.NL): tup = next(self.generator) self.type, self.value, self.begin, self.end, self.line = tup # print token.tok_name[self.type], repr(self.value) - def raise_error(self, msg, *args): + def raise_error(self, msg: str, *args: Any) -> NoReturn: if args: try: msg = msg % args @@ -342,17 +373,23 @@ class ParserGenerator(object): class NFAState(object): - def __init__(self): + arcs: List[Tuple[Optional[Text], "NFAState"]] + + def __init__(self) -> None: self.arcs = [] # list of (label, NFAState) pairs - def addarc(self, next, label=None): + def addarc(self, next: "NFAState", label: Optional[Text] = None) -> None: assert label is None or isinstance(label, str) assert isinstance(next, NFAState) self.arcs.append((label, next)) class DFAState(object): - def __init__(self, nfaset, final): + nfaset: Dict[NFAState, Any] + isfinal: bool + arcs: Dict[Text, "DFAState"] + + def __init__(self, nfaset: Dict[NFAState, Any], final: NFAState) -> None: assert isinstance(nfaset, dict) assert isinstance(next(iter(nfaset)), NFAState) assert isinstance(final, NFAState) @@ -360,18 +397,18 @@ class DFAState(object): self.isfinal = final in nfaset self.arcs = {} # map from label to DFAState - def addarc(self, next, label): + def addarc(self, next: "DFAState", label: Text) -> None: assert isinstance(label, str) assert label not in self.arcs assert isinstance(next, DFAState) self.arcs[label] = next - def unifystate(self, old, new): + def unifystate(self, old: "DFAState", new: "DFAState") -> None: for label, next in self.arcs.items(): if next is old: self.arcs[label] = new - def __eq__(self, other): + def __eq__(self, other: Any) -> bool: # Equality test -- ignore the nfaset instance variable assert isinstance(other, DFAState) if self.isfinal != other.isfinal: @@ -385,9 +422,9 @@ class DFAState(object): return False return True - __hash__ = None # For Py3 compatibility. + __hash__: Any = None # For Py3 compatibility. -def generate_grammar(filename="Grammar.txt"): +def generate_grammar(filename: Path = "Grammar.txt") -> PgenGrammar: p = ParserGenerator(filename) return p.make_grammar() diff --git a/blib2to3/pgen2/pgen.pyi b/blib2to3/pgen2/pgen.pyi deleted file mode 100644 index 1529ad0..0000000 --- a/blib2to3/pgen2/pgen.pyi +++ /dev/null @@ -1,49 +0,0 @@ -# Stubs for lib2to3.pgen2.pgen (Python 3.6) - -from typing import Any, Dict, IO, Iterable, Iterator, List, Optional, Text, Tuple -from mypy_extensions import NoReturn - -from blib2to3.pgen2 import _Path, grammar -from blib2to3.pgen2.tokenize import _TokenInfo - -class PgenGrammar(grammar.Grammar): ... - -class ParserGenerator: - filename: _Path - stream: IO[Text] - generator: Iterator[_TokenInfo] - first: Dict[Text, Dict[Text, int]] - def __init__(self, filename: _Path, stream: Optional[IO[Text]] = ...) -> None: ... - def make_grammar(self) -> PgenGrammar: ... - def make_first(self, c: PgenGrammar, name: Text) -> Dict[int, int]: ... - def make_label(self, c: PgenGrammar, label: Text) -> int: ... - def addfirstsets(self) -> None: ... - def calcfirst(self, name: Text) -> None: ... - def parse(self) -> Tuple[Dict[Text, List[DFAState]], Text]: ... - def make_dfa(self, start: NFAState, finish: NFAState) -> List[DFAState]: ... - def dump_nfa(self, name: Text, start: NFAState, finish: NFAState) -> List[DFAState]: ... - def dump_dfa(self, name: Text, dfa: Iterable[DFAState]) -> None: ... - def simplify_dfa(self, dfa: List[DFAState]) -> None: ... - def parse_rhs(self) -> Tuple[NFAState, NFAState]: ... - def parse_alt(self) -> Tuple[NFAState, NFAState]: ... - def parse_item(self) -> Tuple[NFAState, NFAState]: ... - def parse_atom(self) -> Tuple[NFAState, NFAState]: ... - def expect(self, type: int, value: Optional[Any] = ...) -> Text: ... - def gettoken(self) -> None: ... - def raise_error(self, msg: str, *args: Any) -> NoReturn: ... - -class NFAState: - arcs: List[Tuple[Optional[Text], NFAState]] - def __init__(self) -> None: ... - def addarc(self, next: NFAState, label: Optional[Text] = ...) -> None: ... - -class DFAState: - nfaset: Dict[NFAState, Any] - isfinal: bool - arcs: Dict[Text, DFAState] - def __init__(self, nfaset: Dict[NFAState, Any], final: NFAState) -> None: ... - def addarc(self, next: DFAState, label: Text) -> None: ... - def unifystate(self, old: DFAState, new: DFAState) -> None: ... - def __eq__(self, other: Any) -> bool: ... - -def generate_grammar(filename: _Path = ...) -> PgenGrammar: ... diff --git a/blib2to3/pgen2/token.py b/blib2to3/pgen2/token.py index 583a8a1..5870d47 100644 --- a/blib2to3/pgen2/token.py +++ b/blib2to3/pgen2/token.py @@ -1,86 +1,89 @@ """Token constants (from "token.h").""" +from typing import Dict +from typing_extensions import Final + # Taken from Python (r53757) and modified to include some tokens # originally monkeypatched in by pgen2.tokenize # --start constants-- -ENDMARKER = 0 -NAME = 1 -NUMBER = 2 -STRING = 3 -NEWLINE = 4 -INDENT = 5 -DEDENT = 6 -LPAR = 7 -RPAR = 8 -LSQB = 9 -RSQB = 10 -COLON = 11 -COMMA = 12 -SEMI = 13 -PLUS = 14 -MINUS = 15 -STAR = 16 -SLASH = 17 -VBAR = 18 -AMPER = 19 -LESS = 20 -GREATER = 21 -EQUAL = 22 -DOT = 23 -PERCENT = 24 -BACKQUOTE = 25 -LBRACE = 26 -RBRACE = 27 -EQEQUAL = 28 -NOTEQUAL = 29 -LESSEQUAL = 30 -GREATEREQUAL = 31 -TILDE = 32 -CIRCUMFLEX = 33 -LEFTSHIFT = 34 -RIGHTSHIFT = 35 -DOUBLESTAR = 36 -PLUSEQUAL = 37 -MINEQUAL = 38 -STAREQUAL = 39 -SLASHEQUAL = 40 -PERCENTEQUAL = 41 -AMPEREQUAL = 42 -VBAREQUAL = 43 -CIRCUMFLEXEQUAL = 44 -LEFTSHIFTEQUAL = 45 -RIGHTSHIFTEQUAL = 46 -DOUBLESTAREQUAL = 47 -DOUBLESLASH = 48 -DOUBLESLASHEQUAL = 49 -AT = 50 -ATEQUAL = 51 -OP = 52 -COMMENT = 53 -NL = 54 -RARROW = 55 -AWAIT = 56 -ASYNC = 57 -ERRORTOKEN = 58 -COLONEQUAL = 59 -N_TOKENS = 60 -NT_OFFSET = 256 +ENDMARKER: Final = 0 +NAME: Final = 1 +NUMBER: Final = 2 +STRING: Final = 3 +NEWLINE: Final = 4 +INDENT: Final = 5 +DEDENT: Final = 6 +LPAR: Final = 7 +RPAR: Final = 8 +LSQB: Final = 9 +RSQB: Final = 10 +COLON: Final = 11 +COMMA: Final = 12 +SEMI: Final = 13 +PLUS: Final = 14 +MINUS: Final = 15 +STAR: Final = 16 +SLASH: Final = 17 +VBAR: Final = 18 +AMPER: Final = 19 +LESS: Final = 20 +GREATER: Final = 21 +EQUAL: Final = 22 +DOT: Final = 23 +PERCENT: Final = 24 +BACKQUOTE: Final = 25 +LBRACE: Final = 26 +RBRACE: Final = 27 +EQEQUAL: Final = 28 +NOTEQUAL: Final = 29 +LESSEQUAL: Final = 30 +GREATEREQUAL: Final = 31 +TILDE: Final = 32 +CIRCUMFLEX: Final = 33 +LEFTSHIFT: Final = 34 +RIGHTSHIFT: Final = 35 +DOUBLESTAR: Final = 36 +PLUSEQUAL: Final = 37 +MINEQUAL: Final = 38 +STAREQUAL: Final = 39 +SLASHEQUAL: Final = 40 +PERCENTEQUAL: Final = 41 +AMPEREQUAL: Final = 42 +VBAREQUAL: Final = 43 +CIRCUMFLEXEQUAL: Final = 44 +LEFTSHIFTEQUAL: Final = 45 +RIGHTSHIFTEQUAL: Final = 46 +DOUBLESTAREQUAL: Final = 47 +DOUBLESLASH: Final = 48 +DOUBLESLASHEQUAL: Final = 49 +AT: Final = 50 +ATEQUAL: Final = 51 +OP: Final = 52 +COMMENT: Final = 53 +NL: Final = 54 +RARROW: Final = 55 +AWAIT: Final = 56 +ASYNC: Final = 57 +ERRORTOKEN: Final = 58 +COLONEQUAL: Final = 59 +N_TOKENS: Final = 60 +NT_OFFSET: Final = 256 # --end constants-- -tok_name = {} +tok_name: Final[Dict[int, str]] = {} for _name, _value in list(globals().items()): if type(_value) is type(0): tok_name[_value] = _name -def ISTERMINAL(x): +def ISTERMINAL(x: int) -> bool: return x < NT_OFFSET -def ISNONTERMINAL(x): +def ISNONTERMINAL(x: int) -> bool: return x >= NT_OFFSET -def ISEOF(x): +def ISEOF(x: int) -> bool: return x == ENDMARKER diff --git a/blib2to3/pgen2/token.pyi b/blib2to3/pgen2/token.pyi deleted file mode 100644 index b51bbcf..0000000 --- a/blib2to3/pgen2/token.pyi +++ /dev/null @@ -1,74 +0,0 @@ -# Stubs for lib2to3.pgen2.token (Python 3.6) - -import sys -from typing import Dict, Text - -ENDMARKER: int -NAME: int -NUMBER: int -STRING: int -NEWLINE: int -INDENT: int -DEDENT: int -LPAR: int -RPAR: int -LSQB: int -RSQB: int -COLON: int -COMMA: int -SEMI: int -PLUS: int -MINUS: int -STAR: int -SLASH: int -VBAR: int -AMPER: int -LESS: int -GREATER: int -EQUAL: int -DOT: int -PERCENT: int -BACKQUOTE: int -LBRACE: int -RBRACE: int -EQEQUAL: int -NOTEQUAL: int -LESSEQUAL: int -GREATEREQUAL: int -TILDE: int -CIRCUMFLEX: int -LEFTSHIFT: int -RIGHTSHIFT: int -DOUBLESTAR: int -PLUSEQUAL: int -MINEQUAL: int -STAREQUAL: int -SLASHEQUAL: int -PERCENTEQUAL: int -AMPEREQUAL: int -VBAREQUAL: int -CIRCUMFLEXEQUAL: int -LEFTSHIFTEQUAL: int -RIGHTSHIFTEQUAL: int -DOUBLESTAREQUAL: int -DOUBLESLASH: int -DOUBLESLASHEQUAL: int -OP: int -COMMENT: int -NL: int -if sys.version_info >= (3,): - RARROW: int -if sys.version_info >= (3, 5): - AT: int - ATEQUAL: int - AWAIT: int - ASYNC: int -ERRORTOKEN: int -COLONEQUAL: int -N_TOKENS: int -NT_OFFSET: int -tok_name: Dict[int, Text] - -def ISTERMINAL(x: int) -> bool: ... -def ISNONTERMINAL(x: int) -> bool: ... -def ISEOF(x: int) -> bool: ... diff --git a/blib2to3/pgen2/tokenize.py b/blib2to3/pgen2/tokenize.py index 8c1c4cf..9fac9b6 100644 --- a/blib2to3/pgen2/tokenize.py +++ b/blib2to3/pgen2/tokenize.py @@ -1,6 +1,8 @@ # Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006 Python Software Foundation. # All rights reserved. +# mypy: allow-untyped-defs, allow-untyped-calls + """Tokenization help for Python programs. generate_tokens(readline) is a generator that breaks a stream of @@ -25,6 +27,21 @@ are the same, except instead of generating tokens, tokeneater is a callback function to which the 5 fields described above are passed as 5 arguments, each time a new token is found.""" +from typing import ( + Callable, + Iterable, + Iterator, + List, + Optional, + Text, + Tuple, + Pattern, + Union, + cast, +) +from blib2to3.pgen2.token import * +from blib2to3.pgen2.grammar import Grammar + __author__ = "Ka-Ping Yee " __credits__ = "GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, Skip Montanaro" @@ -41,13 +58,6 @@ __all__ = [x for x in dir(token) if x[0] != "_"] + [ ] del token -try: - bytes -except NameError: - # Support bytes type in Python <= 2.5, so 2to3 turns itself into - # valid Python 3 code. - bytes = str - def group(*choices): return "(" + "|".join(choices) + ")" @@ -181,7 +191,11 @@ def printtoken(type, token, xxx_todo_changeme, xxx_todo_changeme1, line): # for ) -def tokenize(readline, tokeneater=printtoken): +Coord = Tuple[int, int] +TokenEater = Callable[[int, Text, Coord, Coord, Text], None] + + +def tokenize(readline: Callable[[], Text], tokeneater: TokenEater = printtoken) -> None: """ The tokenize() function accepts two parameters: one representing the input stream, and one providing an output mechanism for tokenize(). @@ -206,25 +220,36 @@ def tokenize_loop(readline, tokeneater): tokeneater(*token_info) +GoodTokenInfo = Tuple[int, Text, Coord, Coord, Text] +TokenInfo = Union[Tuple[int, str], GoodTokenInfo] + + class Untokenizer: - def __init__(self): + + tokens: List[Text] + prev_row: int + prev_col: int + + def __init__(self) -> None: self.tokens = [] self.prev_row = 1 self.prev_col = 0 - def add_whitespace(self, start): + def add_whitespace(self, start: Coord) -> None: row, col = start assert row <= self.prev_row col_offset = col - self.prev_col if col_offset: self.tokens.append(" " * col_offset) - def untokenize(self, iterable): + def untokenize(self, iterable: Iterable[TokenInfo]) -> Text: for t in iterable: if len(t) == 2: - self.compat(t, iterable) + self.compat(cast(Tuple[int, str], t), iterable) break - tok_type, token, start, end, line = t + tok_type, token, start, end, line = cast( + Tuple[int, Text, Coord, Coord, Text], t + ) self.add_whitespace(start) self.tokens.append(token) self.prev_row, self.prev_col = end @@ -233,7 +258,7 @@ class Untokenizer: self.prev_col = 0 return "".join(self.tokens) - def compat(self, token, iterable): + def compat(self, token: Tuple[int, Text], iterable: Iterable[TokenInfo]) -> None: startline = False indents = [] toks_append = self.tokens.append @@ -266,7 +291,7 @@ cookie_re = re.compile(r"^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)", re.ASCII) blank_re = re.compile(br"^[ \t\f]*(?:[#\r\n]|$)", re.ASCII) -def _get_normal_name(orig_enc): +def _get_normal_name(orig_enc: str) -> str: """Imitates get_normal_name in tokenizer.c.""" # Only care about the first 12 characters. enc = orig_enc[:12].lower().replace("_", "-") @@ -279,7 +304,7 @@ def _get_normal_name(orig_enc): return orig_enc -def detect_encoding(readline): +def detect_encoding(readline: Callable[[], bytes]) -> Tuple[str, List[bytes]]: """ The detect_encoding() function is used to detect the encoding that should be used to decode a Python source file. It requires one argument, readline, @@ -301,13 +326,13 @@ def detect_encoding(readline): encoding = None default = "utf-8" - def read_or_stop(): + def read_or_stop() -> bytes: try: return readline() except StopIteration: return bytes() - def find_cookie(line): + def find_cookie(line: bytes) -> Optional[str]: try: line_string = line.decode("ascii") except UnicodeDecodeError: @@ -354,7 +379,7 @@ def detect_encoding(readline): return default, [first, second] -def untokenize(iterable): +def untokenize(iterable: Iterable[TokenInfo]) -> Text: """Transform tokens back into Python source code. Each element returned by the iterable must be a token sequence @@ -376,7 +401,9 @@ def untokenize(iterable): return ut.untokenize(iterable) -def generate_tokens(readline, grammar=None): +def generate_tokens( + readline: Callable[[], Text], grammar: Optional[Grammar] = None +) -> Iterator[GoodTokenInfo]: """ The generate_tokens() generator requires one argument, readline, which must be a callable object which provides the same interface as the @@ -395,7 +422,7 @@ def generate_tokens(readline, grammar=None): lnum = parenlev = continued = 0 numchars = "0123456789" contstr, needcont = "", 0 - contline = None + contline: Optional[str] = None indents = [0] # If we know we're parsing 3.7+, we can unconditionally parse `async` and @@ -407,6 +434,9 @@ def generate_tokens(readline, grammar=None): async_def_indent = 0 async_def_nl = False + strstart: Tuple[int, int] + endprog: Pattern[str] + while 1: # loop over lines in stream try: line = readline() @@ -416,6 +446,7 @@ def generate_tokens(readline, grammar=None): pos, max = 0, len(line) if contstr: # continued string + assert contline is not None if not line: raise TokenError("EOF in multi-line string", strstart) endmatch = endprog.match(line) diff --git a/blib2to3/pgen2/tokenize.pyi b/blib2to3/pgen2/tokenize.pyi deleted file mode 100644 index d3011a8..0000000 --- a/blib2to3/pgen2/tokenize.pyi +++ /dev/null @@ -1,32 +0,0 @@ -# Stubs for lib2to3.pgen2.tokenize (Python 3.6) -# NOTE: Only elements from __all__ are present. - -from typing import Callable, Iterable, Iterator, List, Optional, Text, Tuple -from blib2to3.pgen2.token import * # noqa -from blib2to3.pygram import Grammar - - -_Coord = Tuple[int, int] -_TokenEater = Callable[[int, Text, _Coord, _Coord, Text], None] -_TokenInfo = Tuple[int, Text, _Coord, _Coord, Text] - - -class TokenError(Exception): ... -class StopTokenizing(Exception): ... - -def tokenize(readline: Callable[[], Text], tokeneater: _TokenEater = ...) -> None: ... - -class Untokenizer: - tokens: List[Text] - prev_row: int - prev_col: int - def __init__(self) -> None: ... - def add_whitespace(self, start: _Coord) -> None: ... - def untokenize(self, iterable: Iterable[_TokenInfo]) -> Text: ... - def compat(self, token: Tuple[int, Text], iterable: Iterable[_TokenInfo]) -> None: ... - -def untokenize(iterable: Iterable[_TokenInfo]) -> Text: ... -def generate_tokens( - readline: Callable[[], Text], - grammar: Optional[Grammar] = ... -) -> Iterator[_TokenInfo]: ... diff --git a/blib2to3/pygram.py b/blib2to3/pygram.py index 7614af7..b8362b8 100644 --- a/blib2to3/pygram.py +++ b/blib2to3/pygram.py @@ -6,17 +6,23 @@ # Python imports import os +from typing import Union + # Local imports from .pgen2 import token from .pgen2 import driver -# The grammar file -_GRAMMAR_FILE = os.path.join(os.path.dirname(__file__), "Grammar.txt") -_PATTERN_GRAMMAR_FILE = os.path.join(os.path.dirname(__file__), "PatternGrammar.txt") +from .pgen2.grammar import Grammar + +# Moved into initialize because mypyc can't handle __file__ (XXX bug) +# # The grammar file +# _GRAMMAR_FILE = os.path.join(os.path.dirname(__file__), "Grammar.txt") +# _PATTERN_GRAMMAR_FILE = os.path.join(os.path.dirname(__file__), +# "PatternGrammar.txt") class Symbols(object): - def __init__(self, grammar): + def __init__(self, grammar: Grammar) -> None: """Initializer. Creates an attribute for each grammar symbol (nonterminal), @@ -26,7 +32,129 @@ class Symbols(object): setattr(self, name, symbol) -def initialize(cache_dir=None): +class _python_symbols(Symbols): + and_expr: int + and_test: int + annassign: int + arglist: int + argument: int + arith_expr: int + assert_stmt: int + async_funcdef: int + async_stmt: int + atom: int + augassign: int + break_stmt: int + classdef: int + comp_for: int + comp_if: int + comp_iter: int + comp_op: int + comparison: int + compound_stmt: int + continue_stmt: int + decorated: int + decorator: int + decorators: int + del_stmt: int + dictsetmaker: int + dotted_as_name: int + dotted_as_names: int + dotted_name: int + encoding_decl: int + eval_input: int + except_clause: int + exec_stmt: int + expr: int + expr_stmt: int + exprlist: int + factor: int + file_input: int + flow_stmt: int + for_stmt: int + funcdef: int + global_stmt: int + if_stmt: int + import_as_name: int + import_as_names: int + import_from: int + import_name: int + import_stmt: int + lambdef: int + listmaker: int + namedexpr_test: int + not_test: int + old_comp_for: int + old_comp_if: int + old_comp_iter: int + old_lambdef: int + old_test: int + or_test: int + parameters: int + pass_stmt: int + power: int + print_stmt: int + raise_stmt: int + return_stmt: int + shift_expr: int + simple_stmt: int + single_input: int + sliceop: int + small_stmt: int + star_expr: int + stmt: int + subscript: int + subscriptlist: int + suite: int + term: int + test: int + testlist: int + testlist1: int + testlist_gexp: int + testlist_safe: int + testlist_star_expr: int + tfpdef: int + tfplist: int + tname: int + trailer: int + try_stmt: int + typedargslist: int + varargslist: int + vfpdef: int + vfplist: int + vname: int + while_stmt: int + with_item: int + with_stmt: int + with_var: int + xor_expr: int + yield_arg: int + yield_expr: int + yield_stmt: int + + +class _pattern_symbols(Symbols): + Alternative: int + Alternatives: int + Details: int + Matcher: int + NegatedUnit: int + Repeater: int + Unit: int + + +python_grammar: Grammar +python_grammar_no_print_statement: Grammar +python_grammar_no_print_statement_no_exec_statement: Grammar +python_grammar_no_print_statement_no_exec_statement_async_keywords: Grammar +python_grammar_no_exec_statement: Grammar +pattern_grammar: Grammar + +python_symbols: _python_symbols +pattern_symbols: _pattern_symbols + + +def initialize(cache_dir: Union[str, "os.PathLike[str]", None] = None) -> None: global python_grammar global python_grammar_no_print_statement global python_grammar_no_print_statement_no_exec_statement @@ -35,10 +163,16 @@ def initialize(cache_dir=None): global pattern_grammar global pattern_symbols + # The grammar file + _GRAMMAR_FILE = os.path.join(os.path.dirname(__file__), "Grammar.txt") + _PATTERN_GRAMMAR_FILE = os.path.join( + os.path.dirname(__file__), "PatternGrammar.txt" + ) + # Python 2 python_grammar = driver.load_packaged_grammar("blib2to3", _GRAMMAR_FILE, cache_dir) - python_symbols = Symbols(python_grammar) + python_symbols = _python_symbols(python_grammar) # Python 2 + from __future__ import print_function python_grammar_no_print_statement = python_grammar.copy() @@ -60,4 +194,4 @@ def initialize(cache_dir=None): pattern_grammar = driver.load_packaged_grammar( "blib2to3", _PATTERN_GRAMMAR_FILE, cache_dir ) - pattern_symbols = Symbols(pattern_grammar) + pattern_symbols = _pattern_symbols(pattern_grammar) diff --git a/blib2to3/pygram.pyi b/blib2to3/pygram.pyi deleted file mode 100644 index 11bf295..0000000 --- a/blib2to3/pygram.pyi +++ /dev/null @@ -1,126 +0,0 @@ -# Stubs for lib2to3.pygram (Python 3.6) - -import os -from typing import Any, Union -from blib2to3.pgen2.grammar import Grammar - -class Symbols: - def __init__(self, grammar: Grammar) -> None: ... - -class python_symbols(Symbols): - and_expr: int - and_test: int - annassign: int - arglist: int - argument: int - arith_expr: int - assert_stmt: int - async_funcdef: int - async_stmt: int - atom: int - augassign: int - break_stmt: int - classdef: int - comp_for: int - comp_if: int - comp_iter: int - comp_op: int - comparison: int - compound_stmt: int - continue_stmt: int - decorated: int - decorator: int - decorators: int - del_stmt: int - dictsetmaker: int - dotted_as_name: int - dotted_as_names: int - dotted_name: int - encoding_decl: int - eval_input: int - except_clause: int - exec_stmt: int - expr: int - expr_stmt: int - exprlist: int - factor: int - file_input: int - flow_stmt: int - for_stmt: int - funcdef: int - global_stmt: int - if_stmt: int - import_as_name: int - import_as_names: int - import_from: int - import_name: int - import_stmt: int - lambdef: int - listmaker: int - namedexpr_test: int - not_test: int - old_comp_for: int - old_comp_if: int - old_comp_iter: int - old_lambdef: int - old_test: int - or_test: int - parameters: int - pass_stmt: int - power: int - print_stmt: int - raise_stmt: int - return_stmt: int - shift_expr: int - simple_stmt: int - single_input: int - sliceop: int - small_stmt: int - star_expr: int - stmt: int - subscript: int - subscriptlist: int - suite: int - term: int - test: int - testlist: int - testlist1: int - testlist_gexp: int - testlist_safe: int - testlist_star_expr: int - tfpdef: int - tfplist: int - tname: int - trailer: int - try_stmt: int - typedargslist: int - varargslist: int - vfpdef: int - vfplist: int - vname: int - while_stmt: int - with_item: int - with_stmt: int - with_var: int - xor_expr: int - yield_arg: int - yield_expr: int - yield_stmt: int - -class pattern_symbols(Symbols): - Alternative: int - Alternatives: int - Details: int - Matcher: int - NegatedUnit: int - Repeater: int - Unit: int - -python_grammar: Grammar -python_grammar_no_print_statement: Grammar -python_grammar_no_print_statement_no_exec_statement: Grammar -python_grammar_no_print_statement_no_exec_statement_async_keywords: Grammar -python_grammar_no_exec_statement: Grammar -pattern_grammar: Grammar - -def initialize(cache_dir: Union[str, os.PathLike, None]) -> None: ... diff --git a/blib2to3/pytree.py b/blib2to3/pytree.py index 6776491..d1bcbe9 100644 --- a/blib2to3/pytree.py +++ b/blib2to3/pytree.py @@ -10,29 +10,56 @@ even the comments and whitespace between tokens. There's also a pattern matching implementation here. """ +# mypy: allow-untyped-defs + +from typing import ( + Any, + Callable, + Dict, + Iterator, + List, + Optional, + Text, + Tuple, + TypeVar, + Union, + Set, + Iterable, + Sequence, +) +from blib2to3.pgen2.grammar import Grammar + __author__ = "Guido van Rossum " import sys from io import StringIO -HUGE = 0x7FFFFFFF # maximum repeat count, default max +HUGE: int = 0x7FFFFFFF # maximum repeat count, default max -_type_reprs = {} +_type_reprs: Dict[int, Union[Text, int]] = {} -def type_repr(type_num): +def type_repr(type_num: int) -> Union[Text, int]: global _type_reprs if not _type_reprs: from .pygram import python_symbols # printing tokens is possible but not as useful # from .pgen2 import token // token.__dict__.items(): - for name, val in python_symbols.__dict__.items(): + for name in dir(python_symbols): + val = getattr(python_symbols, name) if type(val) == int: _type_reprs[val] = name return _type_reprs.setdefault(type_num, type_num) +_P = TypeVar("_P") + +NL = Union["Node", "Leaf"] +Context = Tuple[Text, Tuple[int, int]] +RawNode = Tuple[int, Optional[Text], Optional[Context], Optional[List[NL]]] + + class Base(object): """ @@ -45,18 +72,18 @@ class Base(object): """ # Default values for instance variables - type = None # int: token number (< 256) or symbol number (>= 256) - parent = None # Parent node pointer, or None - children = () # Tuple of subnodes - was_changed = False - was_checked = False + type: int # int: token number (< 256) or symbol number (>= 256) + parent: Optional["Node"] = None # Parent node pointer, or None + children: List[NL] # List of subnodes + was_changed: bool = False + was_checked: bool = False def __new__(cls, *args, **kwds): """Constructor that prevents Base from being instantiated.""" assert cls is not Base, "Cannot instantiate Base" return object.__new__(cls) - def __eq__(self, other): + def __eq__(self, other: Any) -> bool: """ Compare two nodes for equality. @@ -66,9 +93,13 @@ class Base(object): return NotImplemented return self._eq(other) - __hash__ = None # For Py3 compatibility. + __hash__ = None # type: Any # For Py3 compatibility. + + @property + def prefix(self) -> Text: + raise NotImplementedError - def _eq(self, other): + def _eq(self: _P, other: _P) -> bool: """ Compare two nodes for equality. @@ -79,7 +110,7 @@ class Base(object): """ raise NotImplementedError - def clone(self): + def clone(self: _P) -> _P: """ Return a cloned (deep) copy of self. @@ -87,7 +118,7 @@ class Base(object): """ raise NotImplementedError - def post_order(self): + def post_order(self) -> Iterator[NL]: """ Return a post-order iterator for the tree. @@ -95,7 +126,7 @@ class Base(object): """ raise NotImplementedError - def pre_order(self): + def pre_order(self) -> Iterator[NL]: """ Return a pre-order iterator for the tree. @@ -103,7 +134,7 @@ class Base(object): """ raise NotImplementedError - def replace(self, new): + def replace(self, new: Union[NL, List[NL]]) -> None: """Replace this node with a new one in the parent.""" assert self.parent is not None, str(self) assert new is not None @@ -127,23 +158,23 @@ class Base(object): x.parent = self.parent self.parent = None - def get_lineno(self): + def get_lineno(self) -> Optional[int]: """Return the line number which generated the invocant node.""" node = self while not isinstance(node, Leaf): if not node.children: - return + return None node = node.children[0] return node.lineno - def changed(self): + def changed(self) -> None: if self.was_changed: return if self.parent: self.parent.changed() self.was_changed = True - def remove(self): + def remove(self) -> Optional[int]: """ Remove the node from the tree. Returns the position of the node in its parent's children before it was removed. @@ -156,9 +187,10 @@ class Base(object): self.parent.invalidate_sibling_maps() self.parent = None return i + return None @property - def next_sibling(self): + def next_sibling(self) -> Optional[NL]: """ The node immediately following the invocant in their parent's children list. If the invocant does not have a next sibling, it is None @@ -168,10 +200,11 @@ class Base(object): if self.parent.next_sibling_map is None: self.parent.update_sibling_maps() + assert self.parent.next_sibling_map is not None return self.parent.next_sibling_map[id(self)] @property - def prev_sibling(self): + def prev_sibling(self) -> Optional[NL]: """ The node immediately preceding the invocant in their parent's children list. If the invocant does not have a previous sibling, it is None. @@ -181,18 +214,19 @@ class Base(object): if self.parent.prev_sibling_map is None: self.parent.update_sibling_maps() + assert self.parent.prev_sibling_map is not None return self.parent.prev_sibling_map[id(self)] - def leaves(self): + def leaves(self) -> Iterator["Leaf"]: for child in self.children: yield from child.leaves() - def depth(self): + def depth(self) -> int: if self.parent is None: return 0 return 1 + self.parent.depth() - def get_suffix(self): + def get_suffix(self) -> Text: """ Return the string immediately following the invocant node. This is effectively equivalent to node.next_sibling.prefix @@ -200,19 +234,25 @@ class Base(object): next_sib = self.next_sibling if next_sib is None: return "" - return next_sib.prefix - - if sys.version_info < (3, 0): - - def __str__(self): - return str(self).encode("ascii") + prefix = next_sib.prefix + return prefix class Node(Base): """Concrete implementation for interior nodes.""" - def __init__(self, type, children, context=None, prefix=None, fixers_applied=None): + fixers_applied: Optional[List[Any]] + used_names: Optional[Set[Text]] + + def __init__( + self, + type: int, + children: List[NL], + context: Optional[Any] = None, + prefix: Optional[Text] = None, + fixers_applied: Optional[List[Any]] = None, + ) -> None: """ Initializer. @@ -235,15 +275,16 @@ class Node(Base): else: self.fixers_applied = None - def __repr__(self): + def __repr__(self) -> Text: """Return a canonical string representation.""" + assert self.type is not None return "%s(%s, %r)" % ( self.__class__.__name__, type_repr(self.type), self.children, ) - def __unicode__(self): + def __str__(self) -> Text: """ Return a pretty string representation. @@ -251,14 +292,12 @@ class Node(Base): """ return "".join(map(str, self.children)) - if sys.version_info > (3, 0): - __str__ = __unicode__ - - def _eq(self, other): + def _eq(self, other) -> bool: """Compare two nodes for equality.""" return (self.type, self.children) == (other.type, other.children) - def clone(self): + def clone(self) -> "Node": + assert self.type is not None """Return a cloned (deep) copy of self.""" return Node( self.type, @@ -266,20 +305,20 @@ class Node(Base): fixers_applied=self.fixers_applied, ) - def post_order(self): + def post_order(self) -> Iterator[NL]: """Return a post-order iterator for the tree.""" for child in self.children: yield from child.post_order() yield self - def pre_order(self): + def pre_order(self) -> Iterator[NL]: """Return a pre-order iterator for the tree.""" yield self for child in self.children: yield from child.pre_order() @property - def prefix(self): + def prefix(self) -> Text: """ The whitespace and comments preceding this node in the input. """ @@ -288,11 +327,11 @@ class Node(Base): return self.children[0].prefix @prefix.setter - def prefix(self, prefix): + def prefix(self, prefix) -> None: if self.children: self.children[0].prefix = prefix - def set_child(self, i, child): + def set_child(self, i: int, child: NL) -> None: """ Equivalent to 'node.children[i] = child'. This method also sets the child's parent attribute appropriately. @@ -303,7 +342,7 @@ class Node(Base): self.changed() self.invalidate_sibling_maps() - def insert_child(self, i, child): + def insert_child(self, i: int, child: NL) -> None: """ Equivalent to 'node.children.insert(i, child)'. This method also sets the child's parent attribute appropriately. @@ -313,7 +352,7 @@ class Node(Base): self.changed() self.invalidate_sibling_maps() - def append_child(self, child): + def append_child(self, child: NL) -> None: """ Equivalent to 'node.children.append(child)'. This method also sets the child's parent attribute appropriately. @@ -323,14 +362,16 @@ class Node(Base): self.changed() self.invalidate_sibling_maps() - def invalidate_sibling_maps(self): - self.prev_sibling_map = None - self.next_sibling_map = None + def invalidate_sibling_maps(self) -> None: + self.prev_sibling_map: Optional[Dict[int, Optional[NL]]] = None + self.next_sibling_map: Optional[Dict[int, Optional[NL]]] = None - def update_sibling_maps(self): - self.prev_sibling_map = _prev = {} - self.next_sibling_map = _next = {} - previous = None + def update_sibling_maps(self) -> None: + _prev: Dict[int, Optional[NL]] = {} + _next: Dict[int, Optional[NL]] = {} + self.prev_sibling_map = _prev + self.next_sibling_map = _next + previous: Optional[NL] = None for current in self.children: _prev[id(current)] = previous _next[id(previous)] = current @@ -343,17 +384,30 @@ class Leaf(Base): """Concrete implementation for leaf nodes.""" # Default values for instance variables + value: Text + fixers_applied: List[Any] + bracket_depth: int + opening_bracket: "Leaf" + used_names: Optional[Set[Text]] _prefix = "" # Whitespace and comments preceding this token in the input - lineno = 0 # Line where this token starts in the input - column = 0 # Column where this token starts in the input - - def __init__(self, type, value, context=None, prefix=None, fixers_applied=[]): + lineno: int = 0 # Line where this token starts in the input + column: int = 0 # Column where this token starts in the input + + def __init__( + self, + type: int, + value: Text, + context: Optional[Context] = None, + prefix: Optional[Text] = None, + fixers_applied: List[Any] = [], + ) -> None: """ Initializer. Takes a type constant (a token number < 256), a string value, and an optional context keyword argument. """ + assert 0 <= type < 256, type if context is not None: self._prefix, (self.lineno, self.column) = context @@ -361,19 +415,21 @@ class Leaf(Base): self.value = value if prefix is not None: self._prefix = prefix - self.fixers_applied = fixers_applied[:] + self.fixers_applied: Optional[List[Any]] = fixers_applied[:] + self.children = [] - def __repr__(self): + def __repr__(self) -> str: """Return a canonical string representation.""" from .pgen2.token import tok_name + assert self.type is not None return "%s(%s, %r)" % ( self.__class__.__name__, tok_name.get(self.type, self.type), self.value, ) - def __unicode__(self): + def __str__(self) -> Text: """ Return a pretty string representation. @@ -381,14 +437,12 @@ class Leaf(Base): """ return self.prefix + str(self.value) - if sys.version_info > (3, 0): - __str__ = __unicode__ - - def _eq(self, other): + def _eq(self, other) -> bool: """Compare two nodes for equality.""" return (self.type, self.value) == (other.type, other.value) - def clone(self): + def clone(self) -> "Leaf": + assert self.type is not None """Return a cloned (deep) copy of self.""" return Leaf( self.type, @@ -397,31 +451,31 @@ class Leaf(Base): fixers_applied=self.fixers_applied, ) - def leaves(self): + def leaves(self) -> Iterator["Leaf"]: yield self - def post_order(self): + def post_order(self) -> Iterator["Leaf"]: """Return a post-order iterator for the tree.""" yield self - def pre_order(self): + def pre_order(self) -> Iterator["Leaf"]: """Return a pre-order iterator for the tree.""" yield self @property - def prefix(self): + def prefix(self) -> Text: """ The whitespace and comments preceding this token in the input. """ return self._prefix @prefix.setter - def prefix(self, prefix): + def prefix(self, prefix) -> None: self.changed() self._prefix = prefix -def convert(gr, raw_node): +def convert(gr: Grammar, raw_node: RawNode) -> NL: """ Convert raw node information to a Node or Leaf instance. @@ -433,11 +487,15 @@ def convert(gr, raw_node): if children or type in gr.number2symbol: # If there's exactly one child, return that child instead of # creating a new node. + assert children is not None if len(children) == 1: return children[0] return Node(type, children, context=context) else: - return Leaf(type, value, context=context) + return Leaf(type, value or "", context=context) + + +_Results = Dict[Text, NL] class BasePattern(object): @@ -457,22 +515,27 @@ class BasePattern(object): """ # Defaults for instance variables + type: Optional[int] type = None # Node type (token if < 256, symbol if >= 256) - content = None # Optional content matching pattern - name = None # Optional name used to store match in results dict + content: Any = None # Optional content matching pattern + name: Optional[Text] = None # Optional name used to store match in results dict def __new__(cls, *args, **kwds): """Constructor that prevents BasePattern from being instantiated.""" assert cls is not BasePattern, "Cannot instantiate BasePattern" return object.__new__(cls) - def __repr__(self): + def __repr__(self) -> Text: + assert self.type is not None args = [type_repr(self.type), self.content, self.name] while args and args[-1] is None: del args[-1] return "%s(%s)" % (self.__class__.__name__, ", ".join(map(repr, args))) - def optimize(self): + def _submatch(self, node, results=None) -> bool: + raise NotImplementedError + + def optimize(self) -> "BasePattern": """ A subclass can define this as a hook for optimizations. @@ -480,7 +543,7 @@ class BasePattern(object): """ return self - def match(self, node, results=None): + def match(self, node: NL, results: Optional[_Results] = None) -> bool: """ Does this pattern exactly match a node? @@ -494,18 +557,19 @@ class BasePattern(object): if self.type is not None and node.type != self.type: return False if self.content is not None: - r = None + r: Optional[_Results] = None if results is not None: r = {} if not self._submatch(node, r): return False if r: + assert results is not None results.update(r) if results is not None and self.name: results[self.name] = node return True - def match_seq(self, nodes, results=None): + def match_seq(self, nodes: List[NL], results: Optional[_Results] = None) -> bool: """ Does this pattern exactly match a sequence of nodes? @@ -515,19 +579,24 @@ class BasePattern(object): return False return self.match(nodes[0], results) - def generate_matches(self, nodes): + def generate_matches(self, nodes: List[NL]) -> Iterator[Tuple[int, _Results]]: """ Generator yielding all matches for this pattern. Default implementation for non-wildcard patterns. """ - r = {} + r: _Results = {} if nodes and self.match(nodes[0], r): yield 1, r class LeafPattern(BasePattern): - def __init__(self, type=None, content=None, name=None): + def __init__( + self, + type: Optional[int] = None, + content: Optional[Text] = None, + name: Optional[Text] = None, + ) -> None: """ Initializer. Takes optional type, content, and name. @@ -547,7 +616,7 @@ class LeafPattern(BasePattern): self.content = content self.name = name - def match(self, node, results=None): + def match(self, node: NL, results=None): """Override match() to insist on a leaf node.""" if not isinstance(node, Leaf): return False @@ -571,9 +640,14 @@ class LeafPattern(BasePattern): class NodePattern(BasePattern): - wildcards = False + wildcards: bool = False - def __init__(self, type=None, content=None, name=None): + def __init__( + self, + type: Optional[int] = None, + content: Optional[Iterable[Text]] = None, + name: Optional[Text] = None, + ) -> None: """ Initializer. Takes optional type, content, and name. @@ -593,16 +667,16 @@ class NodePattern(BasePattern): assert type >= 256, type if content is not None: assert not isinstance(content, str), repr(content) - content = list(content) - for i, item in enumerate(content): + newcontent = list(content) + for i, item in enumerate(newcontent): assert isinstance(item, BasePattern), (i, item) if isinstance(item, WildcardPattern): self.wildcards = True self.type = type - self.content = content + self.content = newcontent self.name = name - def _submatch(self, node, results=None): + def _submatch(self, node, results=None) -> bool: """ Match the pattern's content to the node's children. @@ -644,7 +718,16 @@ class WildcardPattern(BasePattern): except it always uses non-greedy matching. """ - def __init__(self, content=None, min=0, max=HUGE, name=None): + min: int + max: int + + def __init__( + self, + content: Optional[Text] = None, + min: int = 0, + max: int = HUGE, + name: Optional[Text] = None, + ) -> None: """ Initializer. @@ -669,17 +752,20 @@ class WildcardPattern(BasePattern): """ assert 0 <= min <= max <= HUGE, (min, max) if content is not None: - content = tuple(map(tuple, content)) # Protect against alterations + f = lambda s: tuple(s) + wrapped_content = tuple(map(f, content)) # Protect against alterations # Check sanity of alternatives - assert len(content), repr(content) # Can't have zero alternatives - for alt in content: + assert len(wrapped_content), repr( + wrapped_content + ) # Can't have zero alternatives + for alt in wrapped_content: assert len(alt), repr(alt) # Can have empty alternatives - self.content = content + self.content = wrapped_content self.min = min self.max = max self.name = name - def optimize(self): + def optimize(self) -> Any: """Optimize certain stacked wildcard patterns.""" subpattern = None if ( @@ -707,11 +793,11 @@ class WildcardPattern(BasePattern): ) return self - def match(self, node, results=None): + def match(self, node, results=None) -> bool: """Does this pattern exactly match a node?""" return self.match_seq([node], results) - def match_seq(self, nodes, results=None): + def match_seq(self, nodes, results=None) -> bool: """Does this pattern exactly match a sequence of nodes?""" for c, r in self.generate_matches(nodes): if c == len(nodes): @@ -722,7 +808,7 @@ class WildcardPattern(BasePattern): return True return False - def generate_matches(self, nodes): + def generate_matches(self, nodes) -> Iterator[Tuple[int, _Results]]: """ Generator yielding matches for a sequence of nodes. @@ -767,7 +853,7 @@ class WildcardPattern(BasePattern): if hasattr(sys, "getrefcount"): sys.stderr = save_stderr - def _iterative_matches(self, nodes): + def _iterative_matches(self, nodes) -> Iterator[Tuple[int, _Results]]: """Helper to iteratively yield the matches.""" nodelen = len(nodes) if 0 >= self.min: @@ -796,10 +882,10 @@ class WildcardPattern(BasePattern): new_results.append((c0 + c1, r)) results = new_results - def _bare_name_matches(self, nodes): + def _bare_name_matches(self, nodes) -> Tuple[int, _Results]: """Special optimized matcher for bare_name.""" count = 0 - r = {} + r = {} # type: _Results done = False max = len(nodes) while not done and count < max: @@ -809,10 +895,11 @@ class WildcardPattern(BasePattern): count += 1 done = False break + assert self.name is not None r[self.name] = nodes[:count] return count, r - def _recursive_matches(self, nodes, count): + def _recursive_matches(self, nodes, count) -> Iterator[Tuple[int, _Results]]: """Helper to recursively yield the matches.""" assert self.content is not None if count >= self.min: @@ -828,7 +915,7 @@ class WildcardPattern(BasePattern): class NegatedPattern(BasePattern): - def __init__(self, content=None): + def __init__(self, content: Optional[Any] = None) -> None: """ Initializer. @@ -841,15 +928,15 @@ class NegatedPattern(BasePattern): assert isinstance(content, BasePattern), repr(content) self.content = content - def match(self, node): + def match(self, node, results=None) -> bool: # We never match a node in its entirety return False - def match_seq(self, nodes): + def match_seq(self, nodes, results=None) -> bool: # We only match an empty sequence of nodes in its entirety return len(nodes) == 0 - def generate_matches(self, nodes): + def generate_matches(self, nodes) -> Iterator[Tuple[int, _Results]]: if self.content is None: # Return a match if there is an empty sequence if len(nodes) == 0: @@ -861,7 +948,9 @@ class NegatedPattern(BasePattern): yield 0, {} -def generate_matches(patterns, nodes): +def generate_matches( + patterns: List[BasePattern], nodes: List[NL] +) -> Iterator[Tuple[int, _Results]]: """ Generator yielding matches for a sequence of patterns and nodes. @@ -887,3 +976,6 @@ def generate_matches(patterns, nodes): r.update(r0) r.update(r1) yield c0 + c1, r + + +_Convert = Callable[[Grammar, RawNode], Any] diff --git a/blib2to3/pytree.pyi b/blib2to3/pytree.pyi deleted file mode 100644 index 650acaa..0000000 --- a/blib2to3/pytree.pyi +++ /dev/null @@ -1,89 +0,0 @@ -# Stubs for lib2to3.pytree (Python 3.6) - -import sys -from typing import Any, Callable, Dict, Iterator, List, Optional, Text, Tuple, TypeVar, Union - -from blib2to3.pgen2.grammar import Grammar - -_P = TypeVar('_P') -_NL = Union[Node, Leaf] -_Context = Tuple[Text, int, int] -_Results = Dict[Text, _NL] -_RawNode = Tuple[int, Text, _Context, Optional[List[_NL]]] -_Convert = Callable[[Grammar, _RawNode], Any] - -HUGE: int - -def type_repr(type_num: int) -> Text: ... - -class Base: - type: int - parent: Optional[Node] - prefix: Text - children: List[_NL] - was_changed: bool - was_checked: bool - def __eq__(self, other: Any) -> bool: ... - def _eq(self: _P, other: _P) -> bool: ... - def clone(self: _P) -> _P: ... - def post_order(self) -> Iterator[_NL]: ... - def pre_order(self) -> Iterator[_NL]: ... - def replace(self, new: Union[_NL, List[_NL]]) -> None: ... - def get_lineno(self) -> int: ... - def changed(self) -> None: ... - def remove(self) -> Optional[int]: ... - @property - def next_sibling(self) -> Optional[_NL]: ... - @property - def prev_sibling(self) -> Optional[_NL]: ... - def leaves(self) -> Iterator[Leaf]: ... - def depth(self) -> int: ... - def get_suffix(self) -> Text: ... - if sys.version_info < (3,): - def get_prefix(self) -> Text: ... - def set_prefix(self, prefix: Text) -> None: ... - -class Node(Base): - fixers_applied: List[Any] - def __init__(self, type: int, children: List[_NL], context: Optional[Any] = ..., prefix: Optional[Text] = ..., fixers_applied: Optional[List[Any]] = ...) -> None: ... - def set_child(self, i: int, child: _NL) -> None: ... - def insert_child(self, i: int, child: _NL) -> None: ... - def append_child(self, child: _NL) -> None: ... - -class Leaf(Base): - lineno: int - column: int - value: Text - fixers_applied: List[Any] - def __init__(self, type: int, value: Text, context: Optional[_Context] = ..., prefix: Optional[Text] = ..., fixers_applied: List[Any] = ...) -> None: ... - # bolted on attributes by Black - bracket_depth: int - opening_bracket: Leaf - -def convert(gr: Grammar, raw_node: _RawNode) -> _NL: ... - -class BasePattern: - type: int - content: Optional[Text] - name: Optional[Text] - def optimize(self) -> BasePattern: ... # sic, subclasses are free to optimize themselves into different patterns - def match(self, node: _NL, results: Optional[_Results] = ...) -> bool: ... - def match_seq(self, nodes: List[_NL], results: Optional[_Results] = ...) -> bool: ... - def generate_matches(self, nodes: List[_NL]) -> Iterator[Tuple[int, _Results]]: ... - -class LeafPattern(BasePattern): - def __init__(self, type: Optional[int] = ..., content: Optional[Text] = ..., name: Optional[Text] = ...) -> None: ... - -class NodePattern(BasePattern): - wildcards: bool - def __init__(self, type: Optional[int] = ..., content: Optional[Text] = ..., name: Optional[Text] = ...) -> None: ... - -class WildcardPattern(BasePattern): - min: int - max: int - def __init__(self, content: Optional[Text] = ..., min: int = ..., max: int = ..., name: Optional[Text] = ...) -> None: ... - -class NegatedPattern(BasePattern): - def __init__(self, content: Optional[Text] = ...) -> None: ... - -def generate_matches(patterns: List[BasePattern], nodes: List[_NL]) -> Iterator[Tuple[int, _Results]]: ... diff --git a/mypy.ini b/mypy.ini index d189edf..a484d66 100644 --- a/mypy.ini +++ b/mypy.ini @@ -21,6 +21,7 @@ strict_optional=True warn_no_return=True warn_redundant_casts=True warn_unused_ignores=True +disallow_any_generics=True # The following are off by default. Flip them on if you feel # adventurous. diff --git a/setup.py b/setup.py index 095d04a..7569d51 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,7 @@ # Copyright (C) 2018 Łukasz Langa from setuptools import setup import sys +import os assert sys.version_info >= (3, 6, 0), "black requires Python 3.6+" from pathlib import Path # noqa E402 @@ -15,6 +16,33 @@ def get_long_description() -> str: return ld_file.read() +USE_MYPYC = False +# To compile with mypyc, a mypyc checkout must be present on the PYTHONPATH +if len(sys.argv) > 1 and sys.argv[1] == "--use-mypyc": + sys.argv.pop(1) + USE_MYPYC = True +if os.getenv("BLACK_USE_MYPYC", None) == "1": + USE_MYPYC = True + +if USE_MYPYC: + mypyc_targets = [ + "black.py", + "blib2to3/pytree.py", + "blib2to3/pygram.py", + "blib2to3/pgen2/parse.py", + "blib2to3/pgen2/grammar.py", + "blib2to3/pgen2/token.py", + "blib2to3/pgen2/driver.py", + "blib2to3/pgen2/pgen.py", + ] + + from mypyc.build import mypycify + + opt_level = os.getenv("MYPYC_OPT_LEVEL", "3") + ext_modules = mypycify(mypyc_targets, opt_level=opt_level) +else: + ext_modules = [] + setup( name="black", use_scm_version={ @@ -30,6 +58,7 @@ setup( url="https://github.com/psf/black", license="MIT", py_modules=["black", "blackd", "_black_version"], + ext_modules=ext_modules, packages=["blib2to3", "blib2to3.pgen2"], package_data={"blib2to3": ["*.txt"]}, python_requires=">=3.6", @@ -43,6 +72,8 @@ setup( "regex", "pathspec>=0.6, <1", "dataclasses>=0.6; python_version < '3.7'", + "typing_extensions>=3.7.4", + "mypy_extensions>=0.4.3", ], extras_require={"d": ["aiohttp>=3.3.2", "aiohttp-cors"]}, test_suite="tests.test_black", diff --git a/tests/test_black.py b/tests/test_black.py index 93f853b..40bde36 100644 --- a/tests/test_black.py +++ b/tests/test_black.py @@ -1540,6 +1540,7 @@ class BlackTestCase(unittest.TestCase): # outside of the `root` directory. path.iterdir.return_value = [child] child.resolve.return_value = Path("/a/b/c") + child.as_posix.return_value = "/a/b/c" child.is_symlink.return_value = True try: list(