]> git.madduck.net Git - etc/vim.git/commitdiff

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

Move tokenizer config onto grammar, rename flag
authorBenjamin Woodruff <github@benjam.info>
Wed, 8 May 2019 20:38:38 +0000 (16:38 -0400)
committerŁukasz Langa <lukasz@langa.pl>
Thu, 9 May 2019 15:59:29 +0000 (17:59 +0200)
Based on the feedback in
https://github.com/python/black/pull/845#issuecomment-490622711

- Remove TokenizerConfig, and add a field to Grammar instead.
- Pass the Grammar to the tokenizer.
- Rename `ASYNC_IS_RESERVED_KEYWORD` to `ASYNC_KEYWORDS` and
  `ASYNC_IS_VALID_IDENTIFIER` to `ASYNC_IDENTIFIERS`.

black.py
blib2to3/pgen2/driver.py
blib2to3/pgen2/driver.pyi
blib2to3/pgen2/grammar.py
blib2to3/pgen2/grammar.pyi
blib2to3/pgen2/tokenize.py
blib2to3/pgen2/tokenize.pyi
blib2to3/pygram.py
blib2to3/pygram.pyi
tests/data/python37.py

index c8aa30b812b49795c797e0e262159ebda024f67a..17aea7af264adafc4f5ee18cb9389cdfcdce8e17 100644 (file)
--- a/black.py
+++ b/black.py
@@ -48,7 +48,6 @@ from blib2to3 import pygram, pytree
 from blib2to3.pgen2 import driver, token
 from blib2to3.pgen2.grammar import Grammar
 from blib2to3.pgen2.parse import ParseError
-from blib2to3.pgen2.tokenize import TokenizerConfig
 
 
 __version__ = "19.3b0"
@@ -139,18 +138,18 @@ class Feature(Enum):
     TRAILING_COMMA_IN_DEF = 5
     # The following two feature-flags are mutually exclusive, and exactly one should be
     # set for every version of python.
-    ASYNC_IS_VALID_IDENTIFIER = 6
-    ASYNC_IS_RESERVED_KEYWORD = 7
+    ASYNC_IDENTIFIERS = 6
+    ASYNC_KEYWORDS = 7
 
 
 VERSION_TO_FEATURES: Dict[TargetVersion, Set[Feature]] = {
-    TargetVersion.PY27: {Feature.ASYNC_IS_VALID_IDENTIFIER},
-    TargetVersion.PY33: {Feature.UNICODE_LITERALS, Feature.ASYNC_IS_VALID_IDENTIFIER},
-    TargetVersion.PY34: {Feature.UNICODE_LITERALS, Feature.ASYNC_IS_VALID_IDENTIFIER},
+    TargetVersion.PY27: {Feature.ASYNC_IDENTIFIERS},
+    TargetVersion.PY33: {Feature.UNICODE_LITERALS, Feature.ASYNC_IDENTIFIERS},
+    TargetVersion.PY34: {Feature.UNICODE_LITERALS, Feature.ASYNC_IDENTIFIERS},
     TargetVersion.PY35: {
         Feature.UNICODE_LITERALS,
         Feature.TRAILING_COMMA_IN_CALL,
-        Feature.ASYNC_IS_VALID_IDENTIFIER,
+        Feature.ASYNC_IDENTIFIERS,
     },
     TargetVersion.PY36: {
         Feature.UNICODE_LITERALS,
@@ -158,7 +157,7 @@ VERSION_TO_FEATURES: Dict[TargetVersion, Set[Feature]] = {
         Feature.NUMERIC_UNDERSCORES,
         Feature.TRAILING_COMMA_IN_CALL,
         Feature.TRAILING_COMMA_IN_DEF,
-        Feature.ASYNC_IS_VALID_IDENTIFIER,
+        Feature.ASYNC_IDENTIFIERS,
     },
     TargetVersion.PY37: {
         Feature.UNICODE_LITERALS,
@@ -166,7 +165,7 @@ VERSION_TO_FEATURES: Dict[TargetVersion, Set[Feature]] = {
         Feature.NUMERIC_UNDERSCORES,
         Feature.TRAILING_COMMA_IN_CALL,
         Feature.TRAILING_COMMA_IN_DEF,
-        Feature.ASYNC_IS_RESERVED_KEYWORD,
+        Feature.ASYNC_KEYWORDS,
     },
     TargetVersion.PY38: {
         Feature.UNICODE_LITERALS,
@@ -174,7 +173,7 @@ VERSION_TO_FEATURES: Dict[TargetVersion, Set[Feature]] = {
         Feature.NUMERIC_UNDERSCORES,
         Feature.TRAILING_COMMA_IN_CALL,
         Feature.TRAILING_COMMA_IN_DEF,
-        Feature.ASYNC_IS_RESERVED_KEYWORD,
+        Feature.ASYNC_KEYWORDS,
     },
 }
 
@@ -760,62 +759,42 @@ def decode_bytes(src: bytes) -> Tuple[FileContent, Encoding, NewLine]:
         return tiow.read(), encoding, newline
 
 
-@dataclass(frozen=True)
-class ParserConfig:
-    grammar: Grammar
-    tokenizer_config: TokenizerConfig = TokenizerConfig()
-
-
-def get_parser_configs(target_versions: Set[TargetVersion]) -> List[ParserConfig]:
+def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]:
     if not target_versions:
         # No target_version specified, so try all grammars.
         return [
             # Python 3.7+
-            ParserConfig(
-                pygram.python_grammar_no_print_statement_no_exec_statement,
-                TokenizerConfig(async_is_reserved_keyword=True),
-            ),
+            pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords,
             # Python 3.0-3.6
-            ParserConfig(
-                pygram.python_grammar_no_print_statement_no_exec_statement,
-                TokenizerConfig(async_is_reserved_keyword=False),
-            ),
+            pygram.python_grammar_no_print_statement_no_exec_statement,
             # Python 2.7 with future print_function import
-            ParserConfig(pygram.python_grammar_no_print_statement),
+            pygram.python_grammar_no_print_statement,
             # Python 2.7
-            ParserConfig(pygram.python_grammar),
+            pygram.python_grammar,
         ]
     elif all(version.is_python2() for version in target_versions):
         # Python 2-only code, so try Python 2 grammars.
         return [
             # Python 2.7 with future print_function import
-            ParserConfig(pygram.python_grammar_no_print_statement),
+            pygram.python_grammar_no_print_statement,
             # Python 2.7
-            ParserConfig(pygram.python_grammar),
+            pygram.python_grammar,
         ]
     else:
         # Python 3-compatible code, so only try Python 3 grammar.
-        configs = []
+        grammars = []
         # If we have to parse both, try to parse async as a keyword first
-        if not supports_feature(target_versions, Feature.ASYNC_IS_VALID_IDENTIFIER):
+        if not supports_feature(target_versions, Feature.ASYNC_IDENTIFIERS):
             # Python 3.7+
-            configs.append(
-                ParserConfig(
-                    pygram.python_grammar_no_print_statement_no_exec_statement,
-                    TokenizerConfig(async_is_reserved_keyword=True),
-                )
+            grammars.append(
+                pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords  # noqa: B950
             )
-        if not supports_feature(target_versions, Feature.ASYNC_IS_RESERVED_KEYWORD):
+        if not supports_feature(target_versions, Feature.ASYNC_KEYWORDS):
             # Python 3.0-3.6
-            configs.append(
-                ParserConfig(
-                    pygram.python_grammar_no_print_statement_no_exec_statement,
-                    TokenizerConfig(async_is_reserved_keyword=False),
-                )
-            )
+            grammars.append(pygram.python_grammar_no_print_statement_no_exec_statement)
         # At least one of the above branches must have been taken, because every Python
-        # version has exactly one of the two 'ASYNC_IS_*' flags
-        return configs
+        # version has exactly one of the two 'ASYNC_*' flags
+        return grammars
 
 
 def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -> Node:
@@ -823,12 +802,8 @@ def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -
     if src_txt[-1:] != "\n":
         src_txt += "\n"
 
-    for parser_config in get_parser_configs(set(target_versions)):
-        drv = driver.Driver(
-            parser_config.grammar,
-            pytree.convert,
-            tokenizer_config=parser_config.tokenizer_config,
-        )
+    for grammar in get_grammars(set(target_versions)):
+        drv = driver.Driver(grammar, pytree.convert)
         try:
             result = drv.parse_string(src_txt, True)
             break
index e681b526a2219ae46dff248bb991173622c7698e..6452c57a1fa4b4a9e4c35541205c4919d6e48ddc 100644 (file)
@@ -34,14 +34,12 @@ class Driver(object):
         grammar,
         convert=None,
         logger=None,
-        tokenizer_config=tokenize.TokenizerConfig(),
     ):
         self.grammar = grammar
         if logger is None:
             logger = logging.getLogger(__name__)
         self.logger = logger
         self.convert = convert
-        self.tokenizer_config = tokenizer_config
 
     def parse_tokens(self, tokens, debug=False):
         """Parse a series of tokens and return the syntax tree."""
@@ -104,7 +102,7 @@ class Driver(object):
 
     def parse_stream_raw(self, stream, debug=False):
         """Parse a stream and return the syntax tree."""
-        tokens = tokenize.generate_tokens(stream.readline, config=self.tokenizer_config)
+        tokens = tokenize.generate_tokens(stream.readline, grammar=self.grammar)
         return self.parse_tokens(tokens, debug)
 
     def parse_stream(self, stream, debug=False):
@@ -120,7 +118,7 @@ class Driver(object):
         """Parse a string and return the syntax tree."""
         tokens = tokenize.generate_tokens(
             io.StringIO(text).readline,
-            config=self.tokenizer_config,
+            grammar=self.grammar
         )
         return self.parse_tokens(tokens, debug)
 
index a4a354634385d62e1d71b3517574caf3028b4155..f098bf512a1fd1c1857a2dd51fec490cdd6e529f 100644 (file)
@@ -8,20 +8,13 @@ from logging import Logger
 from blib2to3.pytree import _Convert, _NL
 from blib2to3.pgen2 import _Path
 from blib2to3.pgen2.grammar import Grammar
-from blib2to3.pgen2.tokenize import TokenizerConfig
 
 
 class Driver:
     grammar: Grammar
     logger: Logger
     convert: _Convert
-    def __init__(
-        self,
-        grammar: Grammar,
-        convert: Optional[_Convert] = ...,
-        logger: Optional[Logger] = ...,
-        tokenizer_config: TokenizerConfig = ...
-    ) -> None: ...
+    def __init__(self, grammar: Grammar, convert: Optional[_Convert] = ..., logger: Optional[Logger] = ...) -> None: ...
     def parse_tokens(self, tokens: Iterable[Any], debug: bool = ...) -> _NL: ...
     def parse_stream_raw(self, stream: IO[Text], debug: bool = ...) -> _NL: ...
     def parse_stream(self, stream: IO[Text], debug: bool = ...) -> _NL: ...
index 3ccf38ff9d80acc344ae6acbfb12433a9f9542bc..32d1d8b7226e3b4b8f27068d70628c12cdf42fa6 100644 (file)
@@ -85,6 +85,8 @@ class Grammar(object):
         self.tokens = {}
         self.symbol2label = {}
         self.start = 256
+        # Python 3.7+ parses async as a keyword, not an identifier
+        self.async_keywords = False
 
     def dump(self, filename):
         """Dump the grammar tables to a pickle file."""
@@ -113,6 +115,7 @@ class Grammar(object):
         new.labels = self.labels[:]
         new.states = self.states[:]
         new.start = self.start
+        new.async_keywords = self.async_keywords
         return new
 
     def report(self):
index 353086d6441d66ddc75ac37f1c62234d7501b0fc..8173e2f85ef04c67642584990a471d74f7e8d487 100644 (file)
@@ -19,6 +19,7 @@ class Grammar:
     tokens: Dict[int, int]
     symbol2label: Dict[Text, int]
     start: int
+    async_keywords: bool
     def __init__(self) -> None: ...
     def dump(self, filename: _Path) -> None: ...
     def load(self, filename: _Path) -> None: ...
index 43e1d597bc9b64792dd19229830f7578032fbe41..0912f43b867719f69e987bf1111725f81ed3693a 100644 (file)
@@ -31,7 +31,6 @@ __credits__ = \
 
 import re
 from codecs import BOM_UTF8, lookup
-from attr import dataclass
 from blib2to3.pgen2.token import *
 
 from . import token
@@ -138,10 +137,6 @@ single_quoted = (
 
 tabsize = 8
 
-@dataclass(frozen=True)
-class TokenizerConfig:
-    async_is_reserved_keyword: bool = False
-
 class TokenError(Exception): pass
 
 class StopTokenizing(Exception): pass
@@ -339,7 +334,7 @@ def untokenize(iterable):
     ut = Untokenizer()
     return ut.untokenize(iterable)
 
-def generate_tokens(readline, config: TokenizerConfig = TokenizerConfig()):
+def generate_tokens(readline, grammar=None):
     """
     The generate_tokens() generator requires one argument, readline, which
     must be a callable object which provides the same interface as the
@@ -363,7 +358,7 @@ def generate_tokens(readline, config: TokenizerConfig = TokenizerConfig()):
 
     # If we know we're parsing 3.7+, we can unconditionally parse `async` and
     # `await` as keywords.
-    async_is_reserved_keyword = config.async_is_reserved_keyword
+    async_keywords = False if grammar is None else grammar.async_keywords
     # 'stashed' and 'async_*' are used for async/await parsing
     stashed = None
     async_def = False
@@ -514,7 +509,7 @@ def generate_tokens(readline, config: TokenizerConfig = TokenizerConfig()):
                         yield (STRING, token, spos, epos, line)
                 elif initial.isidentifier():               # ordinary name
                     if token in ('async', 'await'):
-                        if async_is_reserved_keyword or async_def:
+                        if async_keywords or async_def:
                             yield (ASYNC if token == 'async' else AWAIT,
                                    token, spos, epos, line)
                             continue
index ac0f0f1bf6f1f254016e9b006c498e87787f93f6..d3011a8091bfaa83f42ee6b76ff4c934667f79ed 100644 (file)
@@ -1,18 +1,15 @@
 # Stubs for lib2to3.pgen2.tokenize (Python 3.6)
 # NOTE: Only elements from __all__ are present.
 
-from typing import Callable, Iterable, Iterator, List, Text, Tuple
-from attr import dataclass
+from typing import Callable, Iterable, Iterator, List, Optional, Text, Tuple
 from blib2to3.pgen2.token import *  # noqa
+from blib2to3.pygram import Grammar
 
 
 _Coord = Tuple[int, int]
 _TokenEater = Callable[[int, Text, _Coord, _Coord, Text], None]
 _TokenInfo = Tuple[int, Text, _Coord, _Coord, Text]
 
-@dataclass(frozen=True)
-class TokenizerConfig:
-    async_is_reserved_keyword: bool = False
 
 class TokenError(Exception): ...
 class StopTokenizing(Exception): ...
@@ -30,5 +27,6 @@ class Untokenizer:
 
 def untokenize(iterable: Iterable[_TokenInfo]) -> Text: ...
 def generate_tokens(
-    readline: Callable[[], Text]
+    readline: Callable[[], Text],
+    grammar: Optional[Grammar] = ...
 ) -> Iterator[_TokenInfo]: ...
index 725fb699bf62410838f08b95afc0d0d077d03e5d..f6ef00154e13c2fedae946310d9f73e6e576be0f 100644 (file)
@@ -33,6 +33,7 @@ def initialize(cache_dir=None):
     global python_grammar
     global python_grammar_no_print_statement
     global python_grammar_no_print_statement_no_exec_statement
+    global python_grammar_no_print_statement_no_exec_statement_async_keywords
     global python_symbols
     global pattern_grammar
     global pattern_symbols
@@ -47,11 +48,17 @@ def initialize(cache_dir=None):
     python_grammar_no_print_statement = python_grammar.copy()
     del python_grammar_no_print_statement.keywords["print"]
 
-    # Python 3
+    # Python 3.0-3.6
     python_grammar_no_print_statement_no_exec_statement = python_grammar.copy()
     del python_grammar_no_print_statement_no_exec_statement.keywords["print"]
     del python_grammar_no_print_statement_no_exec_statement.keywords["exec"]
 
+    # Python 3.7+
+    python_grammar_no_print_statement_no_exec_statement_async_keywords = (
+        python_grammar_no_print_statement_no_exec_statement.copy()
+    )
+    python_grammar_no_print_statement_no_exec_statement_async_keywords.async_keywords = True
+
     pattern_grammar = driver.load_packaged_grammar("blib2to3", _PATTERN_GRAMMAR_FILE,
                                                    cache_dir)
     pattern_symbols = Symbols(pattern_grammar)
index 2953bfe47d42814e6d0320c6d757a1bc725f4fa9..1660900097ac377d43c98853f40a753f5c60c7b2 100644 (file)
@@ -118,6 +118,7 @@ class pattern_symbols(Symbols):
 python_grammar: Grammar
 python_grammar_no_print_statement: Grammar
 python_grammar_no_print_statement_no_exec_statement: Grammar
+python_grammar_no_print_statement_no_exec_statement_async_keywords: Grammar
 python_grammar_no_exec_statement: Grammar
 pattern_grammar: Grammar
 
index 4401b7b0e72002dc58ccc63d72ee34c82e782195..dab8b404a739c57f5258694e28ebe65bd9714462 100644 (file)
@@ -1,10 +1,16 @@
 #!/usr/bin/env python3.7
 
+
 def f():
-    return (i*2 async for i in arange(42))
+    return (i * 2 async for i in arange(42))
+
 
 def g():
-    return (something_long * something_long async for something_long in async_generator(with_an_argument))
+    return (
+        something_long * something_long
+        async for something_long in async_generator(with_an_argument)
+    )
+
 
 async def func():
     if test:
@@ -15,9 +21,11 @@ async def func():
             )
         ]
 
+
 def awaited_generator_value(n):
     return (await awaitable for awaitable in awaitable_list)
 
+
 def make_arange(n):
     return (i * 2 for i in range(n) if await wrap(i))