From: Benjamin Woodruff Date: Wed, 8 May 2019 20:38:38 +0000 (-0400) Subject: Move tokenizer config onto grammar, rename flag X-Git-Url: https://git.madduck.net/etc/vim.git/commitdiff_plain/448885b256ca1741fda7c4ef17f80f750ea614c0 Move tokenizer config onto grammar, rename flag Based on the feedback in https://github.com/python/black/pull/845#issuecomment-490622711 - Remove TokenizerConfig, and add a field to Grammar instead. - Pass the Grammar to the tokenizer. - Rename `ASYNC_IS_RESERVED_KEYWORD` to `ASYNC_KEYWORDS` and `ASYNC_IS_VALID_IDENTIFIER` to `ASYNC_IDENTIFIERS`. --- diff --git a/black.py b/black.py index c8aa30b..17aea7a 100644 --- a/black.py +++ b/black.py @@ -48,7 +48,6 @@ from blib2to3 import pygram, pytree from blib2to3.pgen2 import driver, token from blib2to3.pgen2.grammar import Grammar from blib2to3.pgen2.parse import ParseError -from blib2to3.pgen2.tokenize import TokenizerConfig __version__ = "19.3b0" @@ -139,18 +138,18 @@ class Feature(Enum): TRAILING_COMMA_IN_DEF = 5 # The following two feature-flags are mutually exclusive, and exactly one should be # set for every version of python. - ASYNC_IS_VALID_IDENTIFIER = 6 - ASYNC_IS_RESERVED_KEYWORD = 7 + ASYNC_IDENTIFIERS = 6 + ASYNC_KEYWORDS = 7 VERSION_TO_FEATURES: Dict[TargetVersion, Set[Feature]] = { - TargetVersion.PY27: {Feature.ASYNC_IS_VALID_IDENTIFIER}, - TargetVersion.PY33: {Feature.UNICODE_LITERALS, Feature.ASYNC_IS_VALID_IDENTIFIER}, - TargetVersion.PY34: {Feature.UNICODE_LITERALS, Feature.ASYNC_IS_VALID_IDENTIFIER}, + TargetVersion.PY27: {Feature.ASYNC_IDENTIFIERS}, + TargetVersion.PY33: {Feature.UNICODE_LITERALS, Feature.ASYNC_IDENTIFIERS}, + TargetVersion.PY34: {Feature.UNICODE_LITERALS, Feature.ASYNC_IDENTIFIERS}, TargetVersion.PY35: { Feature.UNICODE_LITERALS, Feature.TRAILING_COMMA_IN_CALL, - Feature.ASYNC_IS_VALID_IDENTIFIER, + Feature.ASYNC_IDENTIFIERS, }, TargetVersion.PY36: { Feature.UNICODE_LITERALS, @@ -158,7 +157,7 @@ VERSION_TO_FEATURES: Dict[TargetVersion, Set[Feature]] = { Feature.NUMERIC_UNDERSCORES, Feature.TRAILING_COMMA_IN_CALL, Feature.TRAILING_COMMA_IN_DEF, - Feature.ASYNC_IS_VALID_IDENTIFIER, + Feature.ASYNC_IDENTIFIERS, }, TargetVersion.PY37: { Feature.UNICODE_LITERALS, @@ -166,7 +165,7 @@ VERSION_TO_FEATURES: Dict[TargetVersion, Set[Feature]] = { Feature.NUMERIC_UNDERSCORES, Feature.TRAILING_COMMA_IN_CALL, Feature.TRAILING_COMMA_IN_DEF, - Feature.ASYNC_IS_RESERVED_KEYWORD, + Feature.ASYNC_KEYWORDS, }, TargetVersion.PY38: { Feature.UNICODE_LITERALS, @@ -174,7 +173,7 @@ VERSION_TO_FEATURES: Dict[TargetVersion, Set[Feature]] = { Feature.NUMERIC_UNDERSCORES, Feature.TRAILING_COMMA_IN_CALL, Feature.TRAILING_COMMA_IN_DEF, - Feature.ASYNC_IS_RESERVED_KEYWORD, + Feature.ASYNC_KEYWORDS, }, } @@ -760,62 +759,42 @@ def decode_bytes(src: bytes) -> Tuple[FileContent, Encoding, NewLine]: return tiow.read(), encoding, newline -@dataclass(frozen=True) -class ParserConfig: - grammar: Grammar - tokenizer_config: TokenizerConfig = TokenizerConfig() - - -def get_parser_configs(target_versions: Set[TargetVersion]) -> List[ParserConfig]: +def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]: if not target_versions: # No target_version specified, so try all grammars. return [ # Python 3.7+ - ParserConfig( - pygram.python_grammar_no_print_statement_no_exec_statement, - TokenizerConfig(async_is_reserved_keyword=True), - ), + pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords, # Python 3.0-3.6 - ParserConfig( - pygram.python_grammar_no_print_statement_no_exec_statement, - TokenizerConfig(async_is_reserved_keyword=False), - ), + pygram.python_grammar_no_print_statement_no_exec_statement, # Python 2.7 with future print_function import - ParserConfig(pygram.python_grammar_no_print_statement), + pygram.python_grammar_no_print_statement, # Python 2.7 - ParserConfig(pygram.python_grammar), + pygram.python_grammar, ] elif all(version.is_python2() for version in target_versions): # Python 2-only code, so try Python 2 grammars. return [ # Python 2.7 with future print_function import - ParserConfig(pygram.python_grammar_no_print_statement), + pygram.python_grammar_no_print_statement, # Python 2.7 - ParserConfig(pygram.python_grammar), + pygram.python_grammar, ] else: # Python 3-compatible code, so only try Python 3 grammar. - configs = [] + grammars = [] # If we have to parse both, try to parse async as a keyword first - if not supports_feature(target_versions, Feature.ASYNC_IS_VALID_IDENTIFIER): + if not supports_feature(target_versions, Feature.ASYNC_IDENTIFIERS): # Python 3.7+ - configs.append( - ParserConfig( - pygram.python_grammar_no_print_statement_no_exec_statement, - TokenizerConfig(async_is_reserved_keyword=True), - ) + grammars.append( + pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords # noqa: B950 ) - if not supports_feature(target_versions, Feature.ASYNC_IS_RESERVED_KEYWORD): + if not supports_feature(target_versions, Feature.ASYNC_KEYWORDS): # Python 3.0-3.6 - configs.append( - ParserConfig( - pygram.python_grammar_no_print_statement_no_exec_statement, - TokenizerConfig(async_is_reserved_keyword=False), - ) - ) + grammars.append(pygram.python_grammar_no_print_statement_no_exec_statement) # At least one of the above branches must have been taken, because every Python - # version has exactly one of the two 'ASYNC_IS_*' flags - return configs + # version has exactly one of the two 'ASYNC_*' flags + return grammars def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -> Node: @@ -823,12 +802,8 @@ def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) - if src_txt[-1:] != "\n": src_txt += "\n" - for parser_config in get_parser_configs(set(target_versions)): - drv = driver.Driver( - parser_config.grammar, - pytree.convert, - tokenizer_config=parser_config.tokenizer_config, - ) + for grammar in get_grammars(set(target_versions)): + drv = driver.Driver(grammar, pytree.convert) try: result = drv.parse_string(src_txt, True) break diff --git a/blib2to3/pgen2/driver.py b/blib2to3/pgen2/driver.py index e681b52..6452c57 100644 --- a/blib2to3/pgen2/driver.py +++ b/blib2to3/pgen2/driver.py @@ -34,14 +34,12 @@ class Driver(object): grammar, convert=None, logger=None, - tokenizer_config=tokenize.TokenizerConfig(), ): self.grammar = grammar if logger is None: logger = logging.getLogger(__name__) self.logger = logger self.convert = convert - self.tokenizer_config = tokenizer_config def parse_tokens(self, tokens, debug=False): """Parse a series of tokens and return the syntax tree.""" @@ -104,7 +102,7 @@ class Driver(object): def parse_stream_raw(self, stream, debug=False): """Parse a stream and return the syntax tree.""" - tokens = tokenize.generate_tokens(stream.readline, config=self.tokenizer_config) + tokens = tokenize.generate_tokens(stream.readline, grammar=self.grammar) return self.parse_tokens(tokens, debug) def parse_stream(self, stream, debug=False): @@ -120,7 +118,7 @@ class Driver(object): """Parse a string and return the syntax tree.""" tokens = tokenize.generate_tokens( io.StringIO(text).readline, - config=self.tokenizer_config, + grammar=self.grammar ) return self.parse_tokens(tokens, debug) diff --git a/blib2to3/pgen2/driver.pyi b/blib2to3/pgen2/driver.pyi index a4a3546..f098bf5 100644 --- a/blib2to3/pgen2/driver.pyi +++ b/blib2to3/pgen2/driver.pyi @@ -8,20 +8,13 @@ from logging import Logger from blib2to3.pytree import _Convert, _NL from blib2to3.pgen2 import _Path from blib2to3.pgen2.grammar import Grammar -from blib2to3.pgen2.tokenize import TokenizerConfig class Driver: grammar: Grammar logger: Logger convert: _Convert - def __init__( - self, - grammar: Grammar, - convert: Optional[_Convert] = ..., - logger: Optional[Logger] = ..., - tokenizer_config: TokenizerConfig = ... - ) -> None: ... + def __init__(self, grammar: Grammar, convert: Optional[_Convert] = ..., logger: Optional[Logger] = ...) -> None: ... def parse_tokens(self, tokens: Iterable[Any], debug: bool = ...) -> _NL: ... def parse_stream_raw(self, stream: IO[Text], debug: bool = ...) -> _NL: ... def parse_stream(self, stream: IO[Text], debug: bool = ...) -> _NL: ... diff --git a/blib2to3/pgen2/grammar.py b/blib2to3/pgen2/grammar.py index 3ccf38f..32d1d8b 100644 --- a/blib2to3/pgen2/grammar.py +++ b/blib2to3/pgen2/grammar.py @@ -85,6 +85,8 @@ class Grammar(object): self.tokens = {} self.symbol2label = {} self.start = 256 + # Python 3.7+ parses async as a keyword, not an identifier + self.async_keywords = False def dump(self, filename): """Dump the grammar tables to a pickle file.""" @@ -113,6 +115,7 @@ class Grammar(object): new.labels = self.labels[:] new.states = self.states[:] new.start = self.start + new.async_keywords = self.async_keywords return new def report(self): diff --git a/blib2to3/pgen2/grammar.pyi b/blib2to3/pgen2/grammar.pyi index 353086d..8173e2f 100644 --- a/blib2to3/pgen2/grammar.pyi +++ b/blib2to3/pgen2/grammar.pyi @@ -19,6 +19,7 @@ class Grammar: tokens: Dict[int, int] symbol2label: Dict[Text, int] start: int + async_keywords: bool def __init__(self) -> None: ... def dump(self, filename: _Path) -> None: ... def load(self, filename: _Path) -> None: ... diff --git a/blib2to3/pgen2/tokenize.py b/blib2to3/pgen2/tokenize.py index 43e1d59..0912f43 100644 --- a/blib2to3/pgen2/tokenize.py +++ b/blib2to3/pgen2/tokenize.py @@ -31,7 +31,6 @@ __credits__ = \ import re from codecs import BOM_UTF8, lookup -from attr import dataclass from blib2to3.pgen2.token import * from . import token @@ -138,10 +137,6 @@ single_quoted = ( tabsize = 8 -@dataclass(frozen=True) -class TokenizerConfig: - async_is_reserved_keyword: bool = False - class TokenError(Exception): pass class StopTokenizing(Exception): pass @@ -339,7 +334,7 @@ def untokenize(iterable): ut = Untokenizer() return ut.untokenize(iterable) -def generate_tokens(readline, config: TokenizerConfig = TokenizerConfig()): +def generate_tokens(readline, grammar=None): """ The generate_tokens() generator requires one argument, readline, which must be a callable object which provides the same interface as the @@ -363,7 +358,7 @@ def generate_tokens(readline, config: TokenizerConfig = TokenizerConfig()): # If we know we're parsing 3.7+, we can unconditionally parse `async` and # `await` as keywords. - async_is_reserved_keyword = config.async_is_reserved_keyword + async_keywords = False if grammar is None else grammar.async_keywords # 'stashed' and 'async_*' are used for async/await parsing stashed = None async_def = False @@ -514,7 +509,7 @@ def generate_tokens(readline, config: TokenizerConfig = TokenizerConfig()): yield (STRING, token, spos, epos, line) elif initial.isidentifier(): # ordinary name if token in ('async', 'await'): - if async_is_reserved_keyword or async_def: + if async_keywords or async_def: yield (ASYNC if token == 'async' else AWAIT, token, spos, epos, line) continue diff --git a/blib2to3/pgen2/tokenize.pyi b/blib2to3/pgen2/tokenize.pyi index ac0f0f1..d3011a8 100644 --- a/blib2to3/pgen2/tokenize.pyi +++ b/blib2to3/pgen2/tokenize.pyi @@ -1,18 +1,15 @@ # Stubs for lib2to3.pgen2.tokenize (Python 3.6) # NOTE: Only elements from __all__ are present. -from typing import Callable, Iterable, Iterator, List, Text, Tuple -from attr import dataclass +from typing import Callable, Iterable, Iterator, List, Optional, Text, Tuple from blib2to3.pgen2.token import * # noqa +from blib2to3.pygram import Grammar _Coord = Tuple[int, int] _TokenEater = Callable[[int, Text, _Coord, _Coord, Text], None] _TokenInfo = Tuple[int, Text, _Coord, _Coord, Text] -@dataclass(frozen=True) -class TokenizerConfig: - async_is_reserved_keyword: bool = False class TokenError(Exception): ... class StopTokenizing(Exception): ... @@ -30,5 +27,6 @@ class Untokenizer: def untokenize(iterable: Iterable[_TokenInfo]) -> Text: ... def generate_tokens( - readline: Callable[[], Text] + readline: Callable[[], Text], + grammar: Optional[Grammar] = ... ) -> Iterator[_TokenInfo]: ... diff --git a/blib2to3/pygram.py b/blib2to3/pygram.py index 725fb69..f6ef001 100644 --- a/blib2to3/pygram.py +++ b/blib2to3/pygram.py @@ -33,6 +33,7 @@ def initialize(cache_dir=None): global python_grammar global python_grammar_no_print_statement global python_grammar_no_print_statement_no_exec_statement + global python_grammar_no_print_statement_no_exec_statement_async_keywords global python_symbols global pattern_grammar global pattern_symbols @@ -47,11 +48,17 @@ def initialize(cache_dir=None): python_grammar_no_print_statement = python_grammar.copy() del python_grammar_no_print_statement.keywords["print"] - # Python 3 + # Python 3.0-3.6 python_grammar_no_print_statement_no_exec_statement = python_grammar.copy() del python_grammar_no_print_statement_no_exec_statement.keywords["print"] del python_grammar_no_print_statement_no_exec_statement.keywords["exec"] + # Python 3.7+ + python_grammar_no_print_statement_no_exec_statement_async_keywords = ( + python_grammar_no_print_statement_no_exec_statement.copy() + ) + python_grammar_no_print_statement_no_exec_statement_async_keywords.async_keywords = True + pattern_grammar = driver.load_packaged_grammar("blib2to3", _PATTERN_GRAMMAR_FILE, cache_dir) pattern_symbols = Symbols(pattern_grammar) diff --git a/blib2to3/pygram.pyi b/blib2to3/pygram.pyi index 2953bfe..1660900 100644 --- a/blib2to3/pygram.pyi +++ b/blib2to3/pygram.pyi @@ -118,6 +118,7 @@ class pattern_symbols(Symbols): python_grammar: Grammar python_grammar_no_print_statement: Grammar python_grammar_no_print_statement_no_exec_statement: Grammar +python_grammar_no_print_statement_no_exec_statement_async_keywords: Grammar python_grammar_no_exec_statement: Grammar pattern_grammar: Grammar diff --git a/tests/data/python37.py b/tests/data/python37.py index 4401b7b..dab8b40 100644 --- a/tests/data/python37.py +++ b/tests/data/python37.py @@ -1,10 +1,16 @@ #!/usr/bin/env python3.7 + def f(): - return (i*2 async for i in arange(42)) + return (i * 2 async for i in arange(42)) + def g(): - return (something_long * something_long async for something_long in async_generator(with_an_argument)) + return ( + something_long * something_long + async for something_long in async_generator(with_an_argument) + ) + async def func(): if test: @@ -15,9 +21,11 @@ async def func(): ) ] + def awaited_generator_value(n): return (await awaitable for awaitable in awaitable_list) + def make_arange(n): return (i * 2 for i in range(n) if await wrap(i))