X-Git-Url: https://git.madduck.net/etc/vim.git/blobdiff_plain/06004cd319a6623a1fc29b582eb81e315179629f..f8617f975d56e81cfb4070ce65584f7b29a77e7a:/black.py diff --git a/black.py b/black.py index e7dce5b..c8aa30b 100644 --- a/black.py +++ b/black.py @@ -40,6 +40,7 @@ from appdirs import user_cache_dir from attr import dataclass, evolve, Factory import click import toml +from typed_ast import ast3, ast27 # lib2to3 fork from blib2to3.pytree import Node, Leaf, type_repr @@ -47,6 +48,7 @@ from blib2to3 import pygram, pytree from blib2to3.pgen2 import driver, token from blib2to3.pgen2.grammar import Grammar from blib2to3.pgen2.parse import ParseError +from blib2to3.pgen2.tokenize import TokenizerConfig __version__ = "19.3b0" @@ -135,19 +137,28 @@ class Feature(Enum): NUMERIC_UNDERSCORES = 3 TRAILING_COMMA_IN_CALL = 4 TRAILING_COMMA_IN_DEF = 5 + # The following two feature-flags are mutually exclusive, and exactly one should be + # set for every version of python. + ASYNC_IS_VALID_IDENTIFIER = 6 + ASYNC_IS_RESERVED_KEYWORD = 7 VERSION_TO_FEATURES: Dict[TargetVersion, Set[Feature]] = { - TargetVersion.PY27: set(), - TargetVersion.PY33: {Feature.UNICODE_LITERALS}, - TargetVersion.PY34: {Feature.UNICODE_LITERALS}, - TargetVersion.PY35: {Feature.UNICODE_LITERALS, Feature.TRAILING_COMMA_IN_CALL}, + TargetVersion.PY27: {Feature.ASYNC_IS_VALID_IDENTIFIER}, + TargetVersion.PY33: {Feature.UNICODE_LITERALS, Feature.ASYNC_IS_VALID_IDENTIFIER}, + TargetVersion.PY34: {Feature.UNICODE_LITERALS, Feature.ASYNC_IS_VALID_IDENTIFIER}, + TargetVersion.PY35: { + Feature.UNICODE_LITERALS, + Feature.TRAILING_COMMA_IN_CALL, + Feature.ASYNC_IS_VALID_IDENTIFIER, + }, TargetVersion.PY36: { Feature.UNICODE_LITERALS, Feature.F_STRINGS, Feature.NUMERIC_UNDERSCORES, Feature.TRAILING_COMMA_IN_CALL, Feature.TRAILING_COMMA_IN_DEF, + Feature.ASYNC_IS_VALID_IDENTIFIER, }, TargetVersion.PY37: { Feature.UNICODE_LITERALS, @@ -155,6 +166,7 @@ VERSION_TO_FEATURES: Dict[TargetVersion, Set[Feature]] = { Feature.NUMERIC_UNDERSCORES, Feature.TRAILING_COMMA_IN_CALL, Feature.TRAILING_COMMA_IN_DEF, + Feature.ASYNC_IS_RESERVED_KEYWORD, }, TargetVersion.PY38: { Feature.UNICODE_LITERALS, @@ -162,6 +174,7 @@ VERSION_TO_FEATURES: Dict[TargetVersion, Set[Feature]] = { Feature.NUMERIC_UNDERSCORES, Feature.TRAILING_COMMA_IN_CALL, Feature.TRAILING_COMMA_IN_DEF, + Feature.ASYNC_IS_RESERVED_KEYWORD, }, } @@ -440,22 +453,10 @@ def main( report=report, ) else: - loop = asyncio.get_event_loop() - executor = ProcessPoolExecutor(max_workers=os.cpu_count()) - try: - loop.run_until_complete( - schedule_formatting( - sources=sources, - fast=fast, - write_back=write_back, - mode=mode, - report=report, - loop=loop, - executor=executor, - ) - ) - finally: - shutdown(loop) + reformat_many( + sources=sources, fast=fast, write_back=write_back, mode=mode, report=report + ) + if verbose or not quiet: bang = "💥 💔 💥" if report.return_code else "✨ 🍰 ✨" out(f"All done! {bang}") @@ -497,6 +498,36 @@ def reformat_one( report.failed(src, str(exc)) +def reformat_many( + sources: Set[Path], + fast: bool, + write_back: WriteBack, + mode: FileMode, + report: "Report", +) -> None: + """Reformat multiple files using a ProcessPoolExecutor.""" + loop = asyncio.get_event_loop() + worker_count = os.cpu_count() + if sys.platform == "win32": + # Work around https://bugs.python.org/issue26903 + worker_count = min(worker_count, 61) + executor = ProcessPoolExecutor(max_workers=worker_count) + try: + loop.run_until_complete( + schedule_formatting( + sources=sources, + fast=fast, + write_back=write_back, + mode=mode, + report=report, + loop=loop, + executor=executor, + ) + ) + finally: + shutdown(loop) + + async def schedule_formatting( sources: Set[Path], fast: bool, @@ -729,20 +760,62 @@ def decode_bytes(src: bytes) -> Tuple[FileContent, Encoding, NewLine]: return tiow.read(), encoding, newline -def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]: +@dataclass(frozen=True) +class ParserConfig: + grammar: Grammar + tokenizer_config: TokenizerConfig = TokenizerConfig() + + +def get_parser_configs(target_versions: Set[TargetVersion]) -> List[ParserConfig]: if not target_versions: # No target_version specified, so try all grammars. return [ - pygram.python_grammar_no_print_statement_no_exec_statement, - pygram.python_grammar_no_print_statement, - pygram.python_grammar, + # Python 3.7+ + ParserConfig( + pygram.python_grammar_no_print_statement_no_exec_statement, + TokenizerConfig(async_is_reserved_keyword=True), + ), + # Python 3.0-3.6 + ParserConfig( + pygram.python_grammar_no_print_statement_no_exec_statement, + TokenizerConfig(async_is_reserved_keyword=False), + ), + # Python 2.7 with future print_function import + ParserConfig(pygram.python_grammar_no_print_statement), + # Python 2.7 + ParserConfig(pygram.python_grammar), ] elif all(version.is_python2() for version in target_versions): # Python 2-only code, so try Python 2 grammars. - return [pygram.python_grammar_no_print_statement, pygram.python_grammar] + return [ + # Python 2.7 with future print_function import + ParserConfig(pygram.python_grammar_no_print_statement), + # Python 2.7 + ParserConfig(pygram.python_grammar), + ] else: # Python 3-compatible code, so only try Python 3 grammar. - return [pygram.python_grammar_no_print_statement_no_exec_statement] + configs = [] + # If we have to parse both, try to parse async as a keyword first + if not supports_feature(target_versions, Feature.ASYNC_IS_VALID_IDENTIFIER): + # Python 3.7+ + configs.append( + ParserConfig( + pygram.python_grammar_no_print_statement_no_exec_statement, + TokenizerConfig(async_is_reserved_keyword=True), + ) + ) + if not supports_feature(target_versions, Feature.ASYNC_IS_RESERVED_KEYWORD): + # Python 3.0-3.6 + configs.append( + ParserConfig( + pygram.python_grammar_no_print_statement_no_exec_statement, + TokenizerConfig(async_is_reserved_keyword=False), + ) + ) + # At least one of the above branches must have been taken, because every Python + # version has exactly one of the two 'ASYNC_IS_*' flags + return configs def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -> Node: @@ -750,8 +823,12 @@ def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) - if src_txt[-1:] != "\n": src_txt += "\n" - for grammar in get_grammars(set(target_versions)): - drv = driver.Driver(grammar, pytree.convert) + for parser_config in get_parser_configs(set(target_versions)): + drv = driver.Driver( + parser_config.grammar, + pytree.convert, + tokenizer_config=parser_config.tokenizer_config, + ) try: result = drv.parse_string(src_txt, True) break @@ -2127,15 +2204,21 @@ def list_comments(prefix: str, *, is_endmarker: bool) -> List[ProtoComment]: consumed = 0 nlines = 0 + ignored_lines = 0 for index, line in enumerate(prefix.split("\n")): consumed += len(line) + 1 # adding the length of the split '\n' line = line.lstrip() if not line: nlines += 1 if not line.startswith("#"): + # Escaped newlines outside of a comment are not really newlines at + # all. We treat a single-line comment following an escaped newline + # as a simple trailing comment. + if line.endswith("\\"): + ignored_lines += 1 continue - if index == 0 and not is_endmarker: + if index == ignored_lines and not is_endmarker: comment_type = token.COMMENT # simple trailing comment else: comment_type = STANDALONE_COMMENT @@ -3356,17 +3439,34 @@ class Report: return ", ".join(report) + "." +def parse_ast(src: str) -> Union[ast3.AST, ast27.AST]: + for feature_version in (7, 6): + try: + return ast3.parse(src, feature_version=feature_version) + except SyntaxError: + continue + + return ast27.parse(src) + + def assert_equivalent(src: str, dst: str) -> None: """Raise AssertionError if `src` and `dst` aren't equivalent.""" - import ast import traceback - def _v(node: ast.AST, depth: int = 0) -> Iterator[str]: + def _v(node: Union[ast3.AST, ast27.AST], depth: int = 0) -> Iterator[str]: """Simple visitor generating strings to compare ASTs by content.""" yield f"{' ' * depth}{node.__class__.__name__}(" for field in sorted(node._fields): + # TypeIgnore has only one field 'lineno' which breaks this comparison + if isinstance(node, (ast3.TypeIgnore, ast27.TypeIgnore)): + break + + # Ignore str kind which is case sensitive / and ignores unicode_literals + if isinstance(node, (ast3.Str, ast27.Str, ast3.Bytes)) and field == "kind": + continue + try: value = getattr(node, field) except AttributeError: @@ -3380,15 +3480,15 @@ def assert_equivalent(src: str, dst: str) -> None: # parentheses and they change the AST. if ( field == "targets" - and isinstance(node, ast.Delete) - and isinstance(item, ast.Tuple) + and isinstance(node, (ast3.Delete, ast27.Delete)) + and isinstance(item, (ast3.Tuple, ast27.Tuple)) ): for item in item.elts: yield from _v(item, depth + 2) - elif isinstance(item, ast.AST): + elif isinstance(item, (ast3.AST, ast27.AST)): yield from _v(item, depth + 2) - elif isinstance(value, ast.AST): + elif isinstance(value, (ast3.AST, ast27.AST)): yield from _v(value, depth + 2) else: @@ -3397,17 +3497,15 @@ def assert_equivalent(src: str, dst: str) -> None: yield f"{' ' * depth}) # /{node.__class__.__name__}" try: - src_ast = ast.parse(src) + src_ast = parse_ast(src) except Exception as exc: - major, minor = sys.version_info[:2] raise AssertionError( - f"cannot use --safe with this file; failed to parse source file " - f"with Python {major}.{minor}'s builtin AST. Re-run with --fast " - f"or stop using deprecated Python 2 syntax. AST error message: {exc}" + f"cannot use --safe with this file; failed to parse source file. " + f"AST error message: {exc}" ) try: - dst_ast = ast.parse(dst) + dst_ast = parse_ast(dst) except Exception as exc: log = dump_to_file("".join(traceback.format_tb(exc.__traceback__)), dst) raise AssertionError(