X-Git-Url: https://git.madduck.net/etc/vim.git/blobdiff_plain/9a73bb86db59de1e12426fec81dcdb7f3bb9be7b..ced2d656794568517ba9aa28f781f9151d89de54:/src/black/parsing.py

diff --git a/src/black/parsing.py b/src/black/parsing.py
index 32cfa52..76e9de0 100644
--- a/src/black/parsing.py
+++ b/src/black/parsing.py
@@ -4,7 +4,7 @@ Parse Python code and perform AST validation.
 import ast
 import platform
 import sys
-from typing import Any, Iterable, Iterator, List, Set, Tuple, Type, Union
+from typing import Any, AnyStr, Iterable, Iterator, List, Set, Tuple, Type, Union
 
 if sys.version_info < (3, 8):
     from typing_extensions import Final
@@ -17,6 +17,7 @@ from blib2to3 import pygram
 from blib2to3.pgen2 import driver
 from blib2to3.pgen2.grammar import Grammar
 from blib2to3.pgen2.parse import ParseError
+from blib2to3.pgen2.tokenize import TokenError
 
 from black.mode import TargetVersion, Feature, supports_feature
 from black.nodes import syms
@@ -42,6 +43,11 @@ except ImportError:
         ast3 = ast27 = ast
 
 
+PY310_HINT: Final[
+    str
+] = "Consider using --target-version py310 to parse Python 3.10 code."
+
+
 class InvalidInput(ValueError):
     """Raised when input source code fails all parse attempts."""
 
@@ -75,8 +81,10 @@ def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]:
         # Python 3.10+
         grammars.append(pygram.python_grammar_soft_keywords)
     # If we have to parse both, try to parse async as a keyword first
-    if not supports_feature(target_versions, Feature.ASYNC_IDENTIFIERS):
-        # Python 3.7+
+    if not supports_feature(
+        target_versions, Feature.ASYNC_IDENTIFIERS
+    ) and not supports_feature(target_versions, Feature.PATTERN_MATCHING):
+        # Python 3.7-3.9
         grammars.append(
             pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords
         )
@@ -93,7 +101,8 @@ def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -
     if not src_txt.endswith("\n"):
         src_txt += "\n"
 
-    for grammar in get_grammars(set(target_versions)):
+    grammars = get_grammars(set(target_versions))
+    for grammar in grammars:
         drv = driver.Driver(grammar)
         try:
             result = drv.parse_string(src_txt, True)
@@ -107,7 +116,19 @@ def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -
             except IndexError:
                 faulty_line = "<line number missing in source>"
             exc = InvalidInput(f"Cannot parse: {lineno}:{column}: {faulty_line}")
+
+        except TokenError as te:
+            # In edge cases these are raised; and typically don't have a "faulty_line".
+            lineno, column = te.args[1]
+            exc = InvalidInput(f"Cannot parse: {lineno}:{column}: {te.args[0]}")
+
     else:
+        if pygram.python_grammar_soft_keywords not in grammars and matches_grammar(
+            src_txt, pygram.python_grammar_soft_keywords
+        ):
+            original_msg = exc.args[0]
+            msg = f"{original_msg}\n{PY310_HINT}"
+            raise InvalidInput(msg) from None
         raise exc from None
 
     if isinstance(result, Leaf):
@@ -115,6 +136,16 @@ def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -
     return result
 
 
+def matches_grammar(src_txt: str, grammar: Grammar) -> bool:
+    drv = driver.Driver(grammar)
+    try:
+        drv.parse_string(src_txt, True)
+    except (ParseError, TokenError, IndentationError):
+        return False
+    else:
+        return True
+
+
 def lib2to3_unparse(node: Node) -> str:
     """Given a lib2to3 node, return its string representation."""
     code = str(node)
@@ -160,6 +191,16 @@ ast3_AST: Final[Type[ast3.AST]] = ast3.AST
 ast27_AST: Final[Type[ast27.AST]] = ast27.AST
 
 
+def _normalize(lineend: AnyStr, value: AnyStr) -> AnyStr:
+    # To normalize, we strip any leading and trailing space from
+    # each line...
+    stripped: List[AnyStr] = [i.strip() for i in value.splitlines()]
+    normalized = lineend.join(stripped)
+    # ...and remove any blank lines at the beginning and end of
+    # the whole string
+    return normalized.strip()
+
+
 def stringify_ast(
     node: Union[ast.AST, ast3.AST, ast27.AST], depth: int = 0
 ) -> Iterator[str]:
@@ -223,14 +264,10 @@ def stringify_ast(
                 and field == "value"
                 and isinstance(value, (str, bytes))
             ):
-                lineend = "\n" if isinstance(value, str) else b"\n"
-                # To normalize, we strip any leading and trailing space from
-                # each line...
-                stripped = [line.strip() for line in value.splitlines()]
-                normalized = lineend.join(stripped)  # type: ignore[attr-defined]
-                # ...and remove any blank lines at the beginning and end of
-                # the whole string
-                normalized = normalized.strip()
+                if isinstance(value, str):
+                    normalized: Union[str, bytes] = _normalize("\n", value)
+                else:
+                    normalized = _normalize(b"\n", value)
             else:
                 normalized = value
             yield f"{'  ' * (depth+2)}{normalized!r},  # {value.__class__.__name__}"