]> git.madduck.net Git - etc/vim.git/commitdiff

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

Add support for always tokenizing async/await as keywords
authorBenjamin Woodruff <github@benjam.info>
Wed, 8 May 2019 19:06:44 +0000 (15:06 -0400)
committerŁukasz Langa <lukasz@langa.pl>
Thu, 9 May 2019 15:59:29 +0000 (17:59 +0200)
Fixes #593

I looked into this bug with @ambv and @carljm, and we reached the
conclusion was that it's not possible for the tokenizer to determine if
async/await is a keyword inside all possible generators without breaking
the grammar for older versions of Python.

Instead, we introduce a new tokenizer mode for Python 3.7+ that will
cause all async/await instances to get parsed as a reserved keyword,
which should fix async/await inside generators.

black.py
blib2to3/pgen2/driver.py
blib2to3/pgen2/driver.pyi
blib2to3/pgen2/tokenize.py
blib2to3/pgen2/tokenize.pyi
tests/data/async_as_identifier.py [new file with mode: 0644]
tests/data/python37.py
tests/test_black.py

index c96d205deebff910003f7b70db82ab1d4f97be06..c8aa30b812b49795c797e0e262159ebda024f67a 100644 (file)
--- a/black.py
+++ b/black.py
@@ -48,6 +48,7 @@ from blib2to3 import pygram, pytree
 from blib2to3.pgen2 import driver, token
 from blib2to3.pgen2.grammar import Grammar
 from blib2to3.pgen2.parse import ParseError
+from blib2to3.pgen2.tokenize import TokenizerConfig
 
 
 __version__ = "19.3b0"
@@ -136,19 +137,28 @@ class Feature(Enum):
     NUMERIC_UNDERSCORES = 3
     TRAILING_COMMA_IN_CALL = 4
     TRAILING_COMMA_IN_DEF = 5
+    # The following two feature-flags are mutually exclusive, and exactly one should be
+    # set for every version of python.
+    ASYNC_IS_VALID_IDENTIFIER = 6
+    ASYNC_IS_RESERVED_KEYWORD = 7
 
 
 VERSION_TO_FEATURES: Dict[TargetVersion, Set[Feature]] = {
-    TargetVersion.PY27: set(),
-    TargetVersion.PY33: {Feature.UNICODE_LITERALS},
-    TargetVersion.PY34: {Feature.UNICODE_LITERALS},
-    TargetVersion.PY35: {Feature.UNICODE_LITERALS, Feature.TRAILING_COMMA_IN_CALL},
+    TargetVersion.PY27: {Feature.ASYNC_IS_VALID_IDENTIFIER},
+    TargetVersion.PY33: {Feature.UNICODE_LITERALS, Feature.ASYNC_IS_VALID_IDENTIFIER},
+    TargetVersion.PY34: {Feature.UNICODE_LITERALS, Feature.ASYNC_IS_VALID_IDENTIFIER},
+    TargetVersion.PY35: {
+        Feature.UNICODE_LITERALS,
+        Feature.TRAILING_COMMA_IN_CALL,
+        Feature.ASYNC_IS_VALID_IDENTIFIER,
+    },
     TargetVersion.PY36: {
         Feature.UNICODE_LITERALS,
         Feature.F_STRINGS,
         Feature.NUMERIC_UNDERSCORES,
         Feature.TRAILING_COMMA_IN_CALL,
         Feature.TRAILING_COMMA_IN_DEF,
+        Feature.ASYNC_IS_VALID_IDENTIFIER,
     },
     TargetVersion.PY37: {
         Feature.UNICODE_LITERALS,
@@ -156,6 +166,7 @@ VERSION_TO_FEATURES: Dict[TargetVersion, Set[Feature]] = {
         Feature.NUMERIC_UNDERSCORES,
         Feature.TRAILING_COMMA_IN_CALL,
         Feature.TRAILING_COMMA_IN_DEF,
+        Feature.ASYNC_IS_RESERVED_KEYWORD,
     },
     TargetVersion.PY38: {
         Feature.UNICODE_LITERALS,
@@ -163,6 +174,7 @@ VERSION_TO_FEATURES: Dict[TargetVersion, Set[Feature]] = {
         Feature.NUMERIC_UNDERSCORES,
         Feature.TRAILING_COMMA_IN_CALL,
         Feature.TRAILING_COMMA_IN_DEF,
+        Feature.ASYNC_IS_RESERVED_KEYWORD,
     },
 }
 
@@ -748,20 +760,62 @@ def decode_bytes(src: bytes) -> Tuple[FileContent, Encoding, NewLine]:
         return tiow.read(), encoding, newline
 
 
-def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]:
+@dataclass(frozen=True)
+class ParserConfig:
+    grammar: Grammar
+    tokenizer_config: TokenizerConfig = TokenizerConfig()
+
+
+def get_parser_configs(target_versions: Set[TargetVersion]) -> List[ParserConfig]:
     if not target_versions:
         # No target_version specified, so try all grammars.
         return [
-            pygram.python_grammar_no_print_statement_no_exec_statement,
-            pygram.python_grammar_no_print_statement,
-            pygram.python_grammar,
+            # Python 3.7+
+            ParserConfig(
+                pygram.python_grammar_no_print_statement_no_exec_statement,
+                TokenizerConfig(async_is_reserved_keyword=True),
+            ),
+            # Python 3.0-3.6
+            ParserConfig(
+                pygram.python_grammar_no_print_statement_no_exec_statement,
+                TokenizerConfig(async_is_reserved_keyword=False),
+            ),
+            # Python 2.7 with future print_function import
+            ParserConfig(pygram.python_grammar_no_print_statement),
+            # Python 2.7
+            ParserConfig(pygram.python_grammar),
         ]
     elif all(version.is_python2() for version in target_versions):
         # Python 2-only code, so try Python 2 grammars.
-        return [pygram.python_grammar_no_print_statement, pygram.python_grammar]
+        return [
+            # Python 2.7 with future print_function import
+            ParserConfig(pygram.python_grammar_no_print_statement),
+            # Python 2.7
+            ParserConfig(pygram.python_grammar),
+        ]
     else:
         # Python 3-compatible code, so only try Python 3 grammar.
-        return [pygram.python_grammar_no_print_statement_no_exec_statement]
+        configs = []
+        # If we have to parse both, try to parse async as a keyword first
+        if not supports_feature(target_versions, Feature.ASYNC_IS_VALID_IDENTIFIER):
+            # Python 3.7+
+            configs.append(
+                ParserConfig(
+                    pygram.python_grammar_no_print_statement_no_exec_statement,
+                    TokenizerConfig(async_is_reserved_keyword=True),
+                )
+            )
+        if not supports_feature(target_versions, Feature.ASYNC_IS_RESERVED_KEYWORD):
+            # Python 3.0-3.6
+            configs.append(
+                ParserConfig(
+                    pygram.python_grammar_no_print_statement_no_exec_statement,
+                    TokenizerConfig(async_is_reserved_keyword=False),
+                )
+            )
+        # At least one of the above branches must have been taken, because every Python
+        # version has exactly one of the two 'ASYNC_IS_*' flags
+        return configs
 
 
 def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -> Node:
@@ -769,8 +823,12 @@ def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -
     if src_txt[-1:] != "\n":
         src_txt += "\n"
 
-    for grammar in get_grammars(set(target_versions)):
-        drv = driver.Driver(grammar, pytree.convert)
+    for parser_config in get_parser_configs(set(target_versions)):
+        drv = driver.Driver(
+            parser_config.grammar,
+            pytree.convert,
+            tokenizer_config=parser_config.tokenizer_config,
+        )
         try:
             result = drv.parse_string(src_txt, True)
             break
index 63b60bbad56f533924752f2dd86aea148ffd4f5a..e681b526a2219ae46dff248bb991173622c7698e 100644 (file)
@@ -29,12 +29,19 @@ from . import grammar, parse, token, tokenize, pgen
 
 class Driver(object):
 
-    def __init__(self, grammar, convert=None, logger=None):
+    def __init__(
+        self,
+        grammar,
+        convert=None,
+        logger=None,
+        tokenizer_config=tokenize.TokenizerConfig(),
+    ):
         self.grammar = grammar
         if logger is None:
             logger = logging.getLogger(__name__)
         self.logger = logger
         self.convert = convert
+        self.tokenizer_config = tokenizer_config
 
     def parse_tokens(self, tokens, debug=False):
         """Parse a series of tokens and return the syntax tree."""
@@ -97,7 +104,7 @@ class Driver(object):
 
     def parse_stream_raw(self, stream, debug=False):
         """Parse a stream and return the syntax tree."""
-        tokens = tokenize.generate_tokens(stream.readline)
+        tokens = tokenize.generate_tokens(stream.readline, config=self.tokenizer_config)
         return self.parse_tokens(tokens, debug)
 
     def parse_stream(self, stream, debug=False):
@@ -111,7 +118,10 @@ class Driver(object):
 
     def parse_string(self, text, debug=False):
         """Parse a string and return the syntax tree."""
-        tokens = tokenize.generate_tokens(io.StringIO(text).readline)
+        tokens = tokenize.generate_tokens(
+            io.StringIO(text).readline,
+            config=self.tokenizer_config,
+        )
         return self.parse_tokens(tokens, debug)
 
     def _partially_consume_prefix(self, prefix, column):
index f098bf512a1fd1c1857a2dd51fec490cdd6e529f..a4a354634385d62e1d71b3517574caf3028b4155 100644 (file)
@@ -8,13 +8,20 @@ from logging import Logger
 from blib2to3.pytree import _Convert, _NL
 from blib2to3.pgen2 import _Path
 from blib2to3.pgen2.grammar import Grammar
+from blib2to3.pgen2.tokenize import TokenizerConfig
 
 
 class Driver:
     grammar: Grammar
     logger: Logger
     convert: _Convert
-    def __init__(self, grammar: Grammar, convert: Optional[_Convert] = ..., logger: Optional[Logger] = ...) -> None: ...
+    def __init__(
+        self,
+        grammar: Grammar,
+        convert: Optional[_Convert] = ...,
+        logger: Optional[Logger] = ...,
+        tokenizer_config: TokenizerConfig = ...
+    ) -> None: ...
     def parse_tokens(self, tokens: Iterable[Any], debug: bool = ...) -> _NL: ...
     def parse_stream_raw(self, stream: IO[Text], debug: bool = ...) -> _NL: ...
     def parse_stream(self, stream: IO[Text], debug: bool = ...) -> _NL: ...
index 1f51ff0039fd0227df0a5d7f691bbcd684fea795..43e1d597bc9b64792dd19229830f7578032fbe41 100644 (file)
@@ -31,6 +31,7 @@ __credits__ = \
 
 import re
 from codecs import BOM_UTF8, lookup
+from attr import dataclass
 from blib2to3.pgen2.token import *
 
 from . import token
@@ -137,6 +138,10 @@ single_quoted = (
 
 tabsize = 8
 
+@dataclass(frozen=True)
+class TokenizerConfig:
+    async_is_reserved_keyword: bool = False
+
 class TokenError(Exception): pass
 
 class StopTokenizing(Exception): pass
@@ -334,7 +339,7 @@ def untokenize(iterable):
     ut = Untokenizer()
     return ut.untokenize(iterable)
 
-def generate_tokens(readline):
+def generate_tokens(readline, config: TokenizerConfig = TokenizerConfig()):
     """
     The generate_tokens() generator requires one argument, readline, which
     must be a callable object which provides the same interface as the
@@ -356,6 +361,9 @@ def generate_tokens(readline):
     contline = None
     indents = [0]
 
+    # If we know we're parsing 3.7+, we can unconditionally parse `async` and
+    # `await` as keywords.
+    async_is_reserved_keyword = config.async_is_reserved_keyword
     # 'stashed' and 'async_*' are used for async/await parsing
     stashed = None
     async_def = False
@@ -506,7 +514,7 @@ def generate_tokens(readline):
                         yield (STRING, token, spos, epos, line)
                 elif initial.isidentifier():               # ordinary name
                     if token in ('async', 'await'):
-                        if async_def:
+                        if async_is_reserved_keyword or async_def:
                             yield (ASYNC if token == 'async' else AWAIT,
                                    token, spos, epos, line)
                             continue
index 62352e9b47a26f89a14ae9d6ad0297f5163ef9b0..ac0f0f1bf6f1f254016e9b006c498e87787f93f6 100644 (file)
@@ -2,6 +2,7 @@
 # NOTE: Only elements from __all__ are present.
 
 from typing import Callable, Iterable, Iterator, List, Text, Tuple
+from attr import dataclass
 from blib2to3.pgen2.token import *  # noqa
 
 
@@ -9,6 +10,9 @@ _Coord = Tuple[int, int]
 _TokenEater = Callable[[int, Text, _Coord, _Coord, Text], None]
 _TokenInfo = Tuple[int, Text, _Coord, _Coord, Text]
 
+@dataclass(frozen=True)
+class TokenizerConfig:
+    async_is_reserved_keyword: bool = False
 
 class TokenError(Exception): ...
 class StopTokenizing(Exception): ...
diff --git a/tests/data/async_as_identifier.py b/tests/data/async_as_identifier.py
new file mode 100644 (file)
index 0000000..c36a5e5
--- /dev/null
@@ -0,0 +1,49 @@
+def async():
+    pass
+
+
+def await():
+    pass
+
+
+await = lambda: None
+async = lambda: None
+async()
+await()
+
+
+def sync_fn():
+    await = lambda: None
+    async = lambda: None
+    async()
+    await()
+
+
+async def async_fn():
+    await async_fn()
+
+
+# output
+def async():
+    pass
+
+
+def await():
+    pass
+
+
+await = lambda: None
+async = lambda: None
+async()
+await()
+
+
+def sync_fn():
+    await = lambda: None
+    async = lambda: None
+    async()
+    await()
+
+
+async def async_fn():
+    await async_fn()
index 9781ff6391158773a32fbdd0906bc44cd4a423c6..4401b7b0e72002dc58ccc63d72ee34c82e782195 100644 (file)
@@ -14,6 +14,14 @@ async def func():
                 self.async_inc, arange(8), batch_size=3
             )
         ]
+
+def awaited_generator_value(n):
+    return (await awaitable for awaitable in awaitable_list)
+
+def make_arange(n):
+    return (i * 2 for i in range(n) if await wrap(i))
+
+
 # output
 
 
@@ -39,3 +47,11 @@ async def func():
                 self.async_inc, arange(8), batch_size=3
             )
         ]
+
+
+def awaited_generator_value(n):
+    return (await awaitable for awaitable in awaitable_list)
+
+
+def make_arange(n):
+    return (i * 2 for i in range(n) if await wrap(i))
index 59343efd8b140679ee0d27582f6287d142e18999..0ea4ac55078393c93e74458d3c60ca98102262bb 100644 (file)
@@ -502,8 +502,24 @@ class BlackTestCase(unittest.TestCase):
         self.assertFormatEqual(expected, actual)
         black.assert_stable(source, actual, mode)
 
+    @patch("black.dump_to_file", dump_to_stderr)
+    def test_async_as_identifier(self) -> None:
+        source_path = (THIS_DIR / "data" / "async_as_identifier.py").resolve()
+        source, expected = read_data("async_as_identifier")
+        actual = fs(source)
+        self.assertFormatEqual(expected, actual)
+        major, minor = sys.version_info[:2]
+        if major < 3 or (major <= 3 and minor < 7):
+            black.assert_equivalent(source, actual)
+        black.assert_stable(source, actual, black.FileMode())
+        # ensure black can parse this when the target is 3.6
+        self.invokeBlack([str(source_path), "--target-version", "py36"])
+        # but not on 3.7, because async/await is no longer an identifier
+        self.invokeBlack([str(source_path), "--target-version", "py37"], exit_code=123)
+
     @patch("black.dump_to_file", dump_to_stderr)
     def test_python37(self) -> None:
+        source_path = (THIS_DIR / "data" / "python37.py").resolve()
         source, expected = read_data("python37")
         actual = fs(source)
         self.assertFormatEqual(expected, actual)
@@ -511,6 +527,10 @@ class BlackTestCase(unittest.TestCase):
         if major > 3 or (major == 3 and minor >= 7):
             black.assert_equivalent(source, actual)
         black.assert_stable(source, actual, black.FileMode())
+        # ensure black can parse this when the target is 3.7
+        self.invokeBlack([str(source_path), "--target-version", "py37"])
+        # but not on 3.6, because we use async as a reserved keyword
+        self.invokeBlack([str(source_path), "--target-version", "py36"], exit_code=123)
 
     @patch("black.dump_to_file", dump_to_stderr)
     def test_fmtonoff(self) -> None: