Action: Support running in a docker container (#2748)

[etc/vim.git] / src / black / parsing.py
diff --git a/src/black/parsing.py b/src/black/parsing.py

index 32cfa5239f117e00c2ce969baab6ee3118a5b519..76e9de023c7fe1bd53eeccd491c71c6fd79c2ba1 100644 (file)
--- a/src/black/parsing.py
+++ b/src/black/parsing.py
@@ -4,7 +4,7 @@ Parse Python code and perform AST validation.
  import ast
  import platform
  import sys
-from typing import Any, Iterable, Iterator, List, Set, Tuple, Type, Union
+from typing import Any, AnyStr, Iterable, Iterator, List, Set, Tuple, Type, Union
  
  if sys.version_info < (3, 8):
      from typing_extensions import Final
@@ -17,6 +17,7 @@ from blib2to3 import pygram
  from blib2to3.pgen2 import driver
  from blib2to3.pgen2.grammar import Grammar
  from blib2to3.pgen2.parse import ParseError
+from blib2to3.pgen2.tokenize import TokenError
  
  from black.mode import TargetVersion, Feature, supports_feature
  from black.nodes import syms
@@ -42,6 +43,11 @@ except ImportError:
          ast3 = ast27 = ast
  
  
+PY310_HINT: Final[
+    str
+] = "Consider using --target-version py310 to parse Python 3.10 code."
+
+
  class InvalidInput(ValueError):
      """Raised when input source code fails all parse attempts."""
  
@@ -75,8 +81,10 @@ def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]:
          # Python 3.10+
          grammars.append(pygram.python_grammar_soft_keywords)
      # If we have to parse both, try to parse async as a keyword first
-    if not supports_feature(target_versions, Feature.ASYNC_IDENTIFIERS):
-        # Python 3.7+
+    if not supports_feature(
+        target_versions, Feature.ASYNC_IDENTIFIERS
+    ) and not supports_feature(target_versions, Feature.PATTERN_MATCHING):
+        # Python 3.7-3.9
          grammars.append(
              pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords
          )
@@ -93,7 +101,8 @@ def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -
      if not src_txt.endswith("\n"):
          src_txt += "\n"
  
-    for grammar in get_grammars(set(target_versions)):
+    grammars = get_grammars(set(target_versions))
+    for grammar in grammars:
          drv = driver.Driver(grammar)
          try:
              result = drv.parse_string(src_txt, True)
@@ -107,7 +116,19 @@ def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -
              except IndexError:
                  faulty_line = "<line number missing in source>"
              exc = InvalidInput(f"Cannot parse: {lineno}:{column}: {faulty_line}")
+
+        except TokenError as te:
+            # In edge cases these are raised; and typically don't have a "faulty_line".
+            lineno, column = te.args[1]
+            exc = InvalidInput(f"Cannot parse: {lineno}:{column}: {te.args[0]}")
+
      else:
+        if pygram.python_grammar_soft_keywords not in grammars and matches_grammar(
+            src_txt, pygram.python_grammar_soft_keywords
+        ):
+            original_msg = exc.args[0]
+            msg = f"{original_msg}\n{PY310_HINT}"
+            raise InvalidInput(msg) from None
          raise exc from None
  
      if isinstance(result, Leaf):
@@ -115,6 +136,16 @@ def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -
      return result
  
  
+def matches_grammar(src_txt: str, grammar: Grammar) -> bool:
+    drv = driver.Driver(grammar)
+    try:
+        drv.parse_string(src_txt, True)
+    except (ParseError, TokenError, IndentationError):
+        return False
+    else:
+        return True
+
+
  def lib2to3_unparse(node: Node) -> str:
      """Given a lib2to3 node, return its string representation."""
      code = str(node)
@@ -160,6 +191,16 @@ ast3_AST: Final[Type[ast3.AST]] = ast3.AST
  ast27_AST: Final[Type[ast27.AST]] = ast27.AST
  
  
+def _normalize(lineend: AnyStr, value: AnyStr) -> AnyStr:
+    # To normalize, we strip any leading and trailing space from
+    # each line...
+    stripped: List[AnyStr] = [i.strip() for i in value.splitlines()]
+    normalized = lineend.join(stripped)
+    # ...and remove any blank lines at the beginning and end of
+    # the whole string
+    return normalized.strip()
+
+
  def stringify_ast(
      node: Union[ast.AST, ast3.AST, ast27.AST], depth: int = 0
  ) -> Iterator[str]:
@@ -223,14 +264,10 @@ def stringify_ast(
                  and field == "value"
                  and isinstance(value, (str, bytes))
              ):
-                lineend = "\n" if isinstance(value, str) else b"\n"
-                # To normalize, we strip any leading and trailing space from
-                # each line...
-                stripped = [line.strip() for line in value.splitlines()]
-                normalized = lineend.join(stripped)  # type: ignore[attr-defined]
-                # ...and remove any blank lines at the beginning and end of
-                # the whole string
-                normalized = normalized.strip()
+                if isinstance(value, str):
+                    normalized: Union[str, bytes] = _normalize("\n", value)
+                else:
+                    normalized = _normalize(b"\n", value)
              else:
                  normalized = value
              yield f"{'  ' * (depth+2)}{normalized!r},  # {value.__class__.__name__}"