All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
summary |
shortlog |
log |
commit | commitdiff |
tree
raw |
patch |
inline | side by side (from parent 1:
f8617f9)
Based on the feedback in
https://github.com/python/black/pull/845#issuecomment-
490622711
- Remove TokenizerConfig, and add a field to Grammar instead.
- Pass the Grammar to the tokenizer.
- Rename `ASYNC_IS_RESERVED_KEYWORD` to `ASYNC_KEYWORDS` and
`ASYNC_IS_VALID_IDENTIFIER` to `ASYNC_IDENTIFIERS`.
from blib2to3.pgen2 import driver, token
from blib2to3.pgen2.grammar import Grammar
from blib2to3.pgen2.parse import ParseError
from blib2to3.pgen2 import driver, token
from blib2to3.pgen2.grammar import Grammar
from blib2to3.pgen2.parse import ParseError
-from blib2to3.pgen2.tokenize import TokenizerConfig
TRAILING_COMMA_IN_DEF = 5
# The following two feature-flags are mutually exclusive, and exactly one should be
# set for every version of python.
TRAILING_COMMA_IN_DEF = 5
# The following two feature-flags are mutually exclusive, and exactly one should be
# set for every version of python.
- ASYNC_IS_VALID_IDENTIFIER = 6
- ASYNC_IS_RESERVED_KEYWORD = 7
+ ASYNC_IDENTIFIERS = 6
+ ASYNC_KEYWORDS = 7
VERSION_TO_FEATURES: Dict[TargetVersion, Set[Feature]] = {
VERSION_TO_FEATURES: Dict[TargetVersion, Set[Feature]] = {
- TargetVersion.PY27: {Feature.ASYNC_IS_VALID_IDENTIFIER},
- TargetVersion.PY33: {Feature.UNICODE_LITERALS, Feature.ASYNC_IS_VALID_IDENTIFIER},
- TargetVersion.PY34: {Feature.UNICODE_LITERALS, Feature.ASYNC_IS_VALID_IDENTIFIER},
+ TargetVersion.PY27: {Feature.ASYNC_IDENTIFIERS},
+ TargetVersion.PY33: {Feature.UNICODE_LITERALS, Feature.ASYNC_IDENTIFIERS},
+ TargetVersion.PY34: {Feature.UNICODE_LITERALS, Feature.ASYNC_IDENTIFIERS},
TargetVersion.PY35: {
Feature.UNICODE_LITERALS,
Feature.TRAILING_COMMA_IN_CALL,
TargetVersion.PY35: {
Feature.UNICODE_LITERALS,
Feature.TRAILING_COMMA_IN_CALL,
- Feature.ASYNC_IS_VALID_IDENTIFIER,
+ Feature.ASYNC_IDENTIFIERS,
},
TargetVersion.PY36: {
Feature.UNICODE_LITERALS,
},
TargetVersion.PY36: {
Feature.UNICODE_LITERALS,
Feature.NUMERIC_UNDERSCORES,
Feature.TRAILING_COMMA_IN_CALL,
Feature.TRAILING_COMMA_IN_DEF,
Feature.NUMERIC_UNDERSCORES,
Feature.TRAILING_COMMA_IN_CALL,
Feature.TRAILING_COMMA_IN_DEF,
- Feature.ASYNC_IS_VALID_IDENTIFIER,
+ Feature.ASYNC_IDENTIFIERS,
},
TargetVersion.PY37: {
Feature.UNICODE_LITERALS,
},
TargetVersion.PY37: {
Feature.UNICODE_LITERALS,
Feature.NUMERIC_UNDERSCORES,
Feature.TRAILING_COMMA_IN_CALL,
Feature.TRAILING_COMMA_IN_DEF,
Feature.NUMERIC_UNDERSCORES,
Feature.TRAILING_COMMA_IN_CALL,
Feature.TRAILING_COMMA_IN_DEF,
- Feature.ASYNC_IS_RESERVED_KEYWORD,
+ Feature.ASYNC_KEYWORDS,
},
TargetVersion.PY38: {
Feature.UNICODE_LITERALS,
},
TargetVersion.PY38: {
Feature.UNICODE_LITERALS,
Feature.NUMERIC_UNDERSCORES,
Feature.TRAILING_COMMA_IN_CALL,
Feature.TRAILING_COMMA_IN_DEF,
Feature.NUMERIC_UNDERSCORES,
Feature.TRAILING_COMMA_IN_CALL,
Feature.TRAILING_COMMA_IN_DEF,
- Feature.ASYNC_IS_RESERVED_KEYWORD,
+ Feature.ASYNC_KEYWORDS,
return tiow.read(), encoding, newline
return tiow.read(), encoding, newline
-@dataclass(frozen=True)
-class ParserConfig:
- grammar: Grammar
- tokenizer_config: TokenizerConfig = TokenizerConfig()
-
-
-def get_parser_configs(target_versions: Set[TargetVersion]) -> List[ParserConfig]:
+def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]:
if not target_versions:
# No target_version specified, so try all grammars.
return [
# Python 3.7+
if not target_versions:
# No target_version specified, so try all grammars.
return [
# Python 3.7+
- ParserConfig(
- pygram.python_grammar_no_print_statement_no_exec_statement,
- TokenizerConfig(async_is_reserved_keyword=True),
- ),
+ pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords,
- ParserConfig(
- pygram.python_grammar_no_print_statement_no_exec_statement,
- TokenizerConfig(async_is_reserved_keyword=False),
- ),
+ pygram.python_grammar_no_print_statement_no_exec_statement,
# Python 2.7 with future print_function import
# Python 2.7 with future print_function import
- ParserConfig(pygram.python_grammar_no_print_statement),
+ pygram.python_grammar_no_print_statement,
- ParserConfig(pygram.python_grammar),
]
elif all(version.is_python2() for version in target_versions):
# Python 2-only code, so try Python 2 grammars.
return [
# Python 2.7 with future print_function import
]
elif all(version.is_python2() for version in target_versions):
# Python 2-only code, so try Python 2 grammars.
return [
# Python 2.7 with future print_function import
- ParserConfig(pygram.python_grammar_no_print_statement),
+ pygram.python_grammar_no_print_statement,
- ParserConfig(pygram.python_grammar),
]
else:
# Python 3-compatible code, so only try Python 3 grammar.
]
else:
# Python 3-compatible code, so only try Python 3 grammar.
# If we have to parse both, try to parse async as a keyword first
# If we have to parse both, try to parse async as a keyword first
- if not supports_feature(target_versions, Feature.ASYNC_IS_VALID_IDENTIFIER):
+ if not supports_feature(target_versions, Feature.ASYNC_IDENTIFIERS):
- configs.append(
- ParserConfig(
- pygram.python_grammar_no_print_statement_no_exec_statement,
- TokenizerConfig(async_is_reserved_keyword=True),
- )
+ grammars.append(
+ pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords # noqa: B950
- if not supports_feature(target_versions, Feature.ASYNC_IS_RESERVED_KEYWORD):
+ if not supports_feature(target_versions, Feature.ASYNC_KEYWORDS):
- configs.append(
- ParserConfig(
- pygram.python_grammar_no_print_statement_no_exec_statement,
- TokenizerConfig(async_is_reserved_keyword=False),
- )
- )
+ grammars.append(pygram.python_grammar_no_print_statement_no_exec_statement)
# At least one of the above branches must have been taken, because every Python
# At least one of the above branches must have been taken, because every Python
- # version has exactly one of the two 'ASYNC_IS_*' flags
- return configs
+ # version has exactly one of the two 'ASYNC_*' flags
+ return grammars
def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -> Node:
def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -> Node:
if src_txt[-1:] != "\n":
src_txt += "\n"
if src_txt[-1:] != "\n":
src_txt += "\n"
- for parser_config in get_parser_configs(set(target_versions)):
- drv = driver.Driver(
- parser_config.grammar,
- pytree.convert,
- tokenizer_config=parser_config.tokenizer_config,
- )
+ for grammar in get_grammars(set(target_versions)):
+ drv = driver.Driver(grammar, pytree.convert)
try:
result = drv.parse_string(src_txt, True)
break
try:
result = drv.parse_string(src_txt, True)
break
grammar,
convert=None,
logger=None,
grammar,
convert=None,
logger=None,
- tokenizer_config=tokenize.TokenizerConfig(),
):
self.grammar = grammar
if logger is None:
logger = logging.getLogger(__name__)
self.logger = logger
self.convert = convert
):
self.grammar = grammar
if logger is None:
logger = logging.getLogger(__name__)
self.logger = logger
self.convert = convert
- self.tokenizer_config = tokenizer_config
def parse_tokens(self, tokens, debug=False):
"""Parse a series of tokens and return the syntax tree."""
def parse_tokens(self, tokens, debug=False):
"""Parse a series of tokens and return the syntax tree."""
def parse_stream_raw(self, stream, debug=False):
"""Parse a stream and return the syntax tree."""
def parse_stream_raw(self, stream, debug=False):
"""Parse a stream and return the syntax tree."""
- tokens = tokenize.generate_tokens(stream.readline, config=self.tokenizer_config)
+ tokens = tokenize.generate_tokens(stream.readline, grammar=self.grammar)
return self.parse_tokens(tokens, debug)
def parse_stream(self, stream, debug=False):
return self.parse_tokens(tokens, debug)
def parse_stream(self, stream, debug=False):
"""Parse a string and return the syntax tree."""
tokens = tokenize.generate_tokens(
io.StringIO(text).readline,
"""Parse a string and return the syntax tree."""
tokens = tokenize.generate_tokens(
io.StringIO(text).readline,
- config=self.tokenizer_config,
)
return self.parse_tokens(tokens, debug)
)
return self.parse_tokens(tokens, debug)
from blib2to3.pytree import _Convert, _NL
from blib2to3.pgen2 import _Path
from blib2to3.pgen2.grammar import Grammar
from blib2to3.pytree import _Convert, _NL
from blib2to3.pgen2 import _Path
from blib2to3.pgen2.grammar import Grammar
-from blib2to3.pgen2.tokenize import TokenizerConfig
class Driver:
grammar: Grammar
logger: Logger
convert: _Convert
class Driver:
grammar: Grammar
logger: Logger
convert: _Convert
- def __init__(
- self,
- grammar: Grammar,
- convert: Optional[_Convert] = ...,
- logger: Optional[Logger] = ...,
- tokenizer_config: TokenizerConfig = ...
- ) -> None: ...
+ def __init__(self, grammar: Grammar, convert: Optional[_Convert] = ..., logger: Optional[Logger] = ...) -> None: ...
def parse_tokens(self, tokens: Iterable[Any], debug: bool = ...) -> _NL: ...
def parse_stream_raw(self, stream: IO[Text], debug: bool = ...) -> _NL: ...
def parse_stream(self, stream: IO[Text], debug: bool = ...) -> _NL: ...
def parse_tokens(self, tokens: Iterable[Any], debug: bool = ...) -> _NL: ...
def parse_stream_raw(self, stream: IO[Text], debug: bool = ...) -> _NL: ...
def parse_stream(self, stream: IO[Text], debug: bool = ...) -> _NL: ...
self.tokens = {}
self.symbol2label = {}
self.start = 256
self.tokens = {}
self.symbol2label = {}
self.start = 256
+ # Python 3.7+ parses async as a keyword, not an identifier
+ self.async_keywords = False
def dump(self, filename):
"""Dump the grammar tables to a pickle file."""
def dump(self, filename):
"""Dump the grammar tables to a pickle file."""
new.labels = self.labels[:]
new.states = self.states[:]
new.start = self.start
new.labels = self.labels[:]
new.states = self.states[:]
new.start = self.start
+ new.async_keywords = self.async_keywords
return new
def report(self):
return new
def report(self):
tokens: Dict[int, int]
symbol2label: Dict[Text, int]
start: int
tokens: Dict[int, int]
symbol2label: Dict[Text, int]
start: int
def __init__(self) -> None: ...
def dump(self, filename: _Path) -> None: ...
def load(self, filename: _Path) -> None: ...
def __init__(self) -> None: ...
def dump(self, filename: _Path) -> None: ...
def load(self, filename: _Path) -> None: ...
import re
from codecs import BOM_UTF8, lookup
import re
from codecs import BOM_UTF8, lookup
-from attr import dataclass
from blib2to3.pgen2.token import *
from . import token
from blib2to3.pgen2.token import *
from . import token
-@dataclass(frozen=True)
-class TokenizerConfig:
- async_is_reserved_keyword: bool = False
-
class TokenError(Exception): pass
class StopTokenizing(Exception): pass
class TokenError(Exception): pass
class StopTokenizing(Exception): pass
ut = Untokenizer()
return ut.untokenize(iterable)
ut = Untokenizer()
return ut.untokenize(iterable)
-def generate_tokens(readline, config: TokenizerConfig = TokenizerConfig()):
+def generate_tokens(readline, grammar=None):
"""
The generate_tokens() generator requires one argument, readline, which
must be a callable object which provides the same interface as the
"""
The generate_tokens() generator requires one argument, readline, which
must be a callable object which provides the same interface as the
# If we know we're parsing 3.7+, we can unconditionally parse `async` and
# `await` as keywords.
# If we know we're parsing 3.7+, we can unconditionally parse `async` and
# `await` as keywords.
- async_is_reserved_keyword = config.async_is_reserved_keyword
+ async_keywords = False if grammar is None else grammar.async_keywords
# 'stashed' and 'async_*' are used for async/await parsing
stashed = None
async_def = False
# 'stashed' and 'async_*' are used for async/await parsing
stashed = None
async_def = False
yield (STRING, token, spos, epos, line)
elif initial.isidentifier(): # ordinary name
if token in ('async', 'await'):
yield (STRING, token, spos, epos, line)
elif initial.isidentifier(): # ordinary name
if token in ('async', 'await'):
- if async_is_reserved_keyword or async_def:
+ if async_keywords or async_def:
yield (ASYNC if token == 'async' else AWAIT,
token, spos, epos, line)
continue
yield (ASYNC if token == 'async' else AWAIT,
token, spos, epos, line)
continue
# Stubs for lib2to3.pgen2.tokenize (Python 3.6)
# NOTE: Only elements from __all__ are present.
# Stubs for lib2to3.pgen2.tokenize (Python 3.6)
# NOTE: Only elements from __all__ are present.
-from typing import Callable, Iterable, Iterator, List, Text, Tuple
-from attr import dataclass
+from typing import Callable, Iterable, Iterator, List, Optional, Text, Tuple
from blib2to3.pgen2.token import * # noqa
from blib2to3.pgen2.token import * # noqa
+from blib2to3.pygram import Grammar
_Coord = Tuple[int, int]
_TokenEater = Callable[[int, Text, _Coord, _Coord, Text], None]
_TokenInfo = Tuple[int, Text, _Coord, _Coord, Text]
_Coord = Tuple[int, int]
_TokenEater = Callable[[int, Text, _Coord, _Coord, Text], None]
_TokenInfo = Tuple[int, Text, _Coord, _Coord, Text]
-@dataclass(frozen=True)
-class TokenizerConfig:
- async_is_reserved_keyword: bool = False
class TokenError(Exception): ...
class StopTokenizing(Exception): ...
class TokenError(Exception): ...
class StopTokenizing(Exception): ...
def untokenize(iterable: Iterable[_TokenInfo]) -> Text: ...
def generate_tokens(
def untokenize(iterable: Iterable[_TokenInfo]) -> Text: ...
def generate_tokens(
- readline: Callable[[], Text]
+ readline: Callable[[], Text],
+ grammar: Optional[Grammar] = ...
) -> Iterator[_TokenInfo]: ...
) -> Iterator[_TokenInfo]: ...
global python_grammar
global python_grammar_no_print_statement
global python_grammar_no_print_statement_no_exec_statement
global python_grammar
global python_grammar_no_print_statement
global python_grammar_no_print_statement_no_exec_statement
+ global python_grammar_no_print_statement_no_exec_statement_async_keywords
global python_symbols
global pattern_grammar
global pattern_symbols
global python_symbols
global pattern_grammar
global pattern_symbols
python_grammar_no_print_statement = python_grammar.copy()
del python_grammar_no_print_statement.keywords["print"]
python_grammar_no_print_statement = python_grammar.copy()
del python_grammar_no_print_statement.keywords["print"]
python_grammar_no_print_statement_no_exec_statement = python_grammar.copy()
del python_grammar_no_print_statement_no_exec_statement.keywords["print"]
del python_grammar_no_print_statement_no_exec_statement.keywords["exec"]
python_grammar_no_print_statement_no_exec_statement = python_grammar.copy()
del python_grammar_no_print_statement_no_exec_statement.keywords["print"]
del python_grammar_no_print_statement_no_exec_statement.keywords["exec"]
+ # Python 3.7+
+ python_grammar_no_print_statement_no_exec_statement_async_keywords = (
+ python_grammar_no_print_statement_no_exec_statement.copy()
+ )
+ python_grammar_no_print_statement_no_exec_statement_async_keywords.async_keywords = True
+
pattern_grammar = driver.load_packaged_grammar("blib2to3", _PATTERN_GRAMMAR_FILE,
cache_dir)
pattern_symbols = Symbols(pattern_grammar)
pattern_grammar = driver.load_packaged_grammar("blib2to3", _PATTERN_GRAMMAR_FILE,
cache_dir)
pattern_symbols = Symbols(pattern_grammar)
python_grammar: Grammar
python_grammar_no_print_statement: Grammar
python_grammar_no_print_statement_no_exec_statement: Grammar
python_grammar: Grammar
python_grammar_no_print_statement: Grammar
python_grammar_no_print_statement_no_exec_statement: Grammar
+python_grammar_no_print_statement_no_exec_statement_async_keywords: Grammar
python_grammar_no_exec_statement: Grammar
pattern_grammar: Grammar
python_grammar_no_exec_statement: Grammar
pattern_grammar: Grammar
- return (i*2 async for i in arange(42))
+ return (i * 2 async for i in arange(42))
+
- return (something_long * something_long async for something_long in async_generator(with_an_argument))
+ return (
+ something_long * something_long
+ async for something_long in async_generator(with_an_argument)
+ )
+
async def func():
if test:
async def func():
if test:
def awaited_generator_value(n):
return (await awaitable for awaitable in awaitable_list)
def awaited_generator_value(n):
return (await awaitable for awaitable in awaitable_list)
def make_arange(n):
return (i * 2 for i in range(n) if await wrap(i))
def make_arange(n):
return (i * 2 for i in range(n) if await wrap(i))