import ast
import platform
import sys
-from typing import Any, Iterable, Iterator, List, Set, Tuple, Type, Union
+from typing import Any, AnyStr, Iterable, Iterator, List, Set, Tuple, Type, Union
if sys.version_info < (3, 8):
from typing_extensions import Final
from blib2to3.pgen2 import driver
from blib2to3.pgen2.grammar import Grammar
from blib2to3.pgen2.parse import ParseError
+from blib2to3.pgen2.tokenize import TokenError
from black.mode import TargetVersion, Feature, supports_feature
from black.nodes import syms
ast3 = ast27 = ast
+PY310_HINT: Final[
+ str
+] = "Consider using --target-version py310 to parse Python 3.10 code."
+
+
class InvalidInput(ValueError):
"""Raised when input source code fails all parse attempts."""
# Python 3.10+
grammars.append(pygram.python_grammar_soft_keywords)
# If we have to parse both, try to parse async as a keyword first
- if not supports_feature(target_versions, Feature.ASYNC_IDENTIFIERS):
- # Python 3.7+
+ if not supports_feature(
+ target_versions, Feature.ASYNC_IDENTIFIERS
+ ) and not supports_feature(target_versions, Feature.PATTERN_MATCHING):
+ # Python 3.7-3.9
grammars.append(
pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords
)
if not src_txt.endswith("\n"):
src_txt += "\n"
- for grammar in get_grammars(set(target_versions)):
+ grammars = get_grammars(set(target_versions))
+ for grammar in grammars:
drv = driver.Driver(grammar)
try:
result = drv.parse_string(src_txt, True)
except IndexError:
faulty_line = "<line number missing in source>"
exc = InvalidInput(f"Cannot parse: {lineno}:{column}: {faulty_line}")
+
+ except TokenError as te:
+ # In edge cases these are raised; and typically don't have a "faulty_line".
+ lineno, column = te.args[1]
+ exc = InvalidInput(f"Cannot parse: {lineno}:{column}: {te.args[0]}")
+
else:
+ if pygram.python_grammar_soft_keywords not in grammars and matches_grammar(
+ src_txt, pygram.python_grammar_soft_keywords
+ ):
+ original_msg = exc.args[0]
+ msg = f"{original_msg}\n{PY310_HINT}"
+ raise InvalidInput(msg) from None
raise exc from None
if isinstance(result, Leaf):
return result
+def matches_grammar(src_txt: str, grammar: Grammar) -> bool:
+ drv = driver.Driver(grammar)
+ try:
+ drv.parse_string(src_txt, True)
+ except (ParseError, TokenError, IndentationError):
+ return False
+ else:
+ return True
+
+
def lib2to3_unparse(node: Node) -> str:
"""Given a lib2to3 node, return its string representation."""
code = str(node)
ast27_AST: Final[Type[ast27.AST]] = ast27.AST
+def _normalize(lineend: AnyStr, value: AnyStr) -> AnyStr:
+ # To normalize, we strip any leading and trailing space from
+ # each line...
+ stripped: List[AnyStr] = [i.strip() for i in value.splitlines()]
+ normalized = lineend.join(stripped)
+ # ...and remove any blank lines at the beginning and end of
+ # the whole string
+ return normalized.strip()
+
+
def stringify_ast(
node: Union[ast.AST, ast3.AST, ast27.AST], depth: int = 0
) -> Iterator[str]:
yield f"{' ' * depth}{node.__class__.__name__}("
+ type_ignore_classes: Tuple[Type[Any], ...]
for field in sorted(node._fields): # noqa: F402
# TypeIgnore will not be present using pypy < 3.8, so need for this
if not (_IS_PYPY and sys.version_info < (3, 8)):
and field == "value"
and isinstance(value, (str, bytes))
):
- lineend = "\n" if isinstance(value, str) else b"\n"
- # To normalize, we strip any leading and trailing space from
- # each line...
- stripped = [line.strip() for line in value.splitlines()]
- normalized = lineend.join(stripped) # type: ignore[attr-defined]
- # ...and remove any blank lines at the beginning and end of
- # the whole string
- normalized = normalized.strip()
+ if isinstance(value, str):
+ normalized: Union[str, bytes] = _normalize("\n", value)
+ else:
+ normalized = _normalize(b"\n", value)
else:
normalized = value
yield f"{' ' * (depth+2)}{normalized!r}, # {value.__class__.__name__}"