X-Git-Url: https://git.madduck.net/etc/vim.git/blobdiff_plain/f2ea461e9e9fa5c47bb61fd72d512c748928badc..8a84bebcfcabddfd5b82a8cff0b830a745999b6c:/src/black/parsing.py?ds=sidebyside diff --git a/src/black/parsing.py b/src/black/parsing.py index 8e9feea..76e9de0 100644 --- a/src/black/parsing.py +++ b/src/black/parsing.py @@ -2,23 +2,36 @@ Parse Python code and perform AST validation. """ import ast +import platform import sys -from typing import Iterable, Iterator, List, Set, Union +from typing import Any, AnyStr, Iterable, Iterator, List, Set, Tuple, Type, Union + +if sys.version_info < (3, 8): + from typing_extensions import Final +else: + from typing import Final # lib2to3 fork from blib2to3.pytree import Node, Leaf -from blib2to3 import pygram, pytree +from blib2to3 import pygram from blib2to3.pgen2 import driver from blib2to3.pgen2.grammar import Grammar from blib2to3.pgen2.parse import ParseError +from blib2to3.pgen2.tokenize import TokenError from black.mode import TargetVersion, Feature, supports_feature from black.nodes import syms +ast3: Any +ast27: Any + +_IS_PYPY = platform.python_implementation() == "PyPy" + try: from typed_ast import ast3, ast27 except ImportError: - if sys.version_info < (3, 8): + # Either our python version is too low, or we're on pypy + if sys.version_info < (3, 7) or (sys.version_info < (3, 8) and not _IS_PYPY): print( "The typed_ast package is required but not installed.\n" "You can upgrade to Python 3.8+ or install typed_ast with\n" @@ -30,6 +43,11 @@ except ImportError: ast3 = ast27 = ast +PY310_HINT: Final[ + str +] = "Consider using --target-version py310 to parse Python 3.10 code." + + class InvalidInput(ValueError): """Raised when input source code fails all parse attempts.""" @@ -59,9 +77,14 @@ def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]: # Python 3-compatible code, so only try Python 3 grammar. grammars = [] + if supports_feature(target_versions, Feature.PATTERN_MATCHING): + # Python 3.10+ + grammars.append(pygram.python_grammar_soft_keywords) # If we have to parse both, try to parse async as a keyword first - if not supports_feature(target_versions, Feature.ASYNC_IDENTIFIERS): - # Python 3.7+ + if not supports_feature( + target_versions, Feature.ASYNC_IDENTIFIERS + ) and not supports_feature(target_versions, Feature.PATTERN_MATCHING): + # Python 3.7-3.9 grammars.append( pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords ) @@ -78,8 +101,9 @@ def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) - if not src_txt.endswith("\n"): src_txt += "\n" - for grammar in get_grammars(set(target_versions)): - drv = driver.Driver(grammar, pytree.convert) + grammars = get_grammars(set(target_versions)) + for grammar in grammars: + drv = driver.Driver(grammar) try: result = drv.parse_string(src_txt, True) break @@ -92,7 +116,19 @@ def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) - except IndexError: faulty_line = "" exc = InvalidInput(f"Cannot parse: {lineno}:{column}: {faulty_line}") + + except TokenError as te: + # In edge cases these are raised; and typically don't have a "faulty_line". + lineno, column = te.args[1] + exc = InvalidInput(f"Cannot parse: {lineno}:{column}: {te.args[0]}") + else: + if pygram.python_grammar_soft_keywords not in grammars and matches_grammar( + src_txt, pygram.python_grammar_soft_keywords + ): + original_msg = exc.args[0] + msg = f"{original_msg}\n{PY310_HINT}" + raise InvalidInput(msg) from None raise exc from None if isinstance(result, Leaf): @@ -100,34 +136,69 @@ def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) - return result +def matches_grammar(src_txt: str, grammar: Grammar) -> bool: + drv = driver.Driver(grammar) + try: + drv.parse_string(src_txt, True) + except (ParseError, TokenError, IndentationError): + return False + else: + return True + + def lib2to3_unparse(node: Node) -> str: """Given a lib2to3 node, return its string representation.""" code = str(node) return code -def parse_ast(src: str) -> Union[ast.AST, ast3.AST, ast27.AST]: +def parse_single_version( + src: str, version: Tuple[int, int] +) -> Union[ast.AST, ast3.AST, ast27.AST]: filename = "" - if sys.version_info >= (3, 8): - # TODO: support Python 4+ ;) - for minor_version in range(sys.version_info[1], 4, -1): - try: - return ast.parse(src, filename, feature_version=(3, minor_version)) - except SyntaxError: - continue - else: - for feature_version in (7, 6): - try: - return ast3.parse(src, filename, feature_version=feature_version) - except SyntaxError: - continue - if ast27.__name__ == "ast": - raise SyntaxError( - "The requested source code has invalid Python 3 syntax.\n" - "If you are trying to format Python 2 files please reinstall Black" - " with the 'python2' extra: `python3 -m pip install black[python2]`." - ) - return ast27.parse(src) + # typed_ast is needed because of feature version limitations in the builtin ast + if sys.version_info >= (3, 8) and version >= (3,): + return ast.parse(src, filename, feature_version=version) + elif version >= (3,): + if _IS_PYPY: + return ast3.parse(src, filename) + else: + return ast3.parse(src, filename, feature_version=version[1]) + elif version == (2, 7): + return ast27.parse(src) + raise AssertionError("INTERNAL ERROR: Tried parsing unsupported Python version!") + + +def parse_ast(src: str) -> Union[ast.AST, ast3.AST, ast27.AST]: + # TODO: support Python 4+ ;) + versions = [(3, minor) for minor in range(3, sys.version_info[1] + 1)] + + if ast27.__name__ != "ast": + versions.append((2, 7)) + + first_error = "" + for version in sorted(versions, reverse=True): + try: + return parse_single_version(src, version) + except SyntaxError as e: + if not first_error: + first_error = str(e) + + raise SyntaxError(first_error) + + +ast3_AST: Final[Type[ast3.AST]] = ast3.AST +ast27_AST: Final[Type[ast27.AST]] = ast27.AST + + +def _normalize(lineend: AnyStr, value: AnyStr) -> AnyStr: + # To normalize, we strip any leading and trailing space from + # each line... + stripped: List[AnyStr] = [i.strip() for i in value.splitlines()] + normalized = lineend.join(stripped) + # ...and remove any blank lines at the beginning and end of + # the whole string + return normalized.strip() def stringify_ast( @@ -139,13 +210,16 @@ def stringify_ast( yield f"{' ' * depth}{node.__class__.__name__}(" + type_ignore_classes: Tuple[Type[Any], ...] for field in sorted(node._fields): # noqa: F402 - # TypeIgnore has only one field 'lineno' which breaks this comparison - type_ignore_classes = (ast3.TypeIgnore, ast27.TypeIgnore) - if sys.version_info >= (3, 8): - type_ignore_classes += (ast.TypeIgnore,) - if isinstance(node, type_ignore_classes): - break + # TypeIgnore will not be present using pypy < 3.8, so need for this + if not (_IS_PYPY and sys.version_info < (3, 8)): + # TypeIgnore has only one field 'lineno' which breaks this comparison + type_ignore_classes = (ast3.TypeIgnore, ast27.TypeIgnore) + if sys.version_info >= (3, 8): + type_ignore_classes += (ast.TypeIgnore,) + if isinstance(node, type_ignore_classes): + break try: value = getattr(node, field) @@ -169,7 +243,13 @@ def stringify_ast( elif isinstance(item, (ast.AST, ast3.AST, ast27.AST)): yield from stringify_ast(item, depth + 2) - elif isinstance(value, (ast.AST, ast3.AST, ast27.AST)): + # Note that we are referencing the typed-ast ASTs via global variables and not + # direct module attribute accesses because that breaks mypyc. It's probably + # something to do with the ast3 / ast27 variables being marked as Any leading + # mypy to think this branch is always taken, leaving the rest of the code + # unanalyzed. Tighting up the types for the typed-ast AST types avoids the + # mypyc crash. + elif isinstance(value, (ast.AST, ast3_AST, ast27_AST)): yield from stringify_ast(value, depth + 2) else: @@ -184,14 +264,10 @@ def stringify_ast( and field == "value" and isinstance(value, (str, bytes)) ): - lineend = "\n" if isinstance(value, str) else b"\n" - # To normalize, we strip any leading and trailing space from - # each line... - stripped = [line.strip() for line in value.splitlines()] - normalized = lineend.join(stripped) # type: ignore[attr-defined] - # ...and remove any blank lines at the beginning and end of - # the whole string - normalized = normalized.strip() + if isinstance(value, str): + normalized: Union[str, bytes] = _normalize("\n", value) + else: + normalized = _normalize(b"\n", value) else: normalized = value yield f"{' ' * (depth+2)}{normalized!r}, # {value.__class__.__name__}"