All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
2 Parse Python code and perform AST validation.
7 from typing import Any, Iterable, Iterator, List, Set, Tuple, Type, Union
9 if sys.version_info < (3, 8):
10 from typing_extensions import Final
12 from typing import Final
15 from blib2to3.pytree import Node, Leaf
16 from blib2to3 import pygram
17 from blib2to3.pgen2 import driver
18 from blib2to3.pgen2.grammar import Grammar
19 from blib2to3.pgen2.parse import ParseError
20 from blib2to3.pgen2.tokenize import TokenError
22 from black.mode import TargetVersion, Feature, supports_feature
23 from black.nodes import syms
27 _IS_PYPY = platform.python_implementation() == "PyPy"
30 from typed_ast import ast3
32 # Either our python version is too low, or we're on pypy
33 if sys.version_info < (3, 7) or (sys.version_info < (3, 8) and not _IS_PYPY):
35 "The typed_ast package is required but not installed.\n"
36 "You can upgrade to Python 3.8+ or install typed_ast with\n"
37 "`python3 -m pip install typed-ast`.",
45 PY310_HINT: Final = "Consider using --target-version py310 to parse Python 3.10 code."
46 PY2_HINT: Final = "Python 2 support was removed in version 22.0."
49 class InvalidInput(ValueError):
50 """Raised when input source code fails all parse attempts."""
53 def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]:
54 if not target_versions:
55 # No target_version specified, so try all grammars.
58 pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords,
60 pygram.python_grammar_no_print_statement_no_exec_statement,
64 if supports_feature(target_versions, Feature.PATTERN_MATCHING):
66 grammars.append(pygram.python_grammar_soft_keywords)
67 # If we have to parse both, try to parse async as a keyword first
68 if not supports_feature(
69 target_versions, Feature.ASYNC_IDENTIFIERS
70 ) and not supports_feature(target_versions, Feature.PATTERN_MATCHING):
73 pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords
75 if not supports_feature(target_versions, Feature.ASYNC_KEYWORDS):
77 grammars.append(pygram.python_grammar_no_print_statement_no_exec_statement)
78 # At least one of the above branches must have been taken, because every Python
79 # version has exactly one of the two 'ASYNC_*' flags
83 def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -> Node:
84 """Given a string with source, return the lib2to3 Node."""
85 if not src_txt.endswith("\n"):
88 grammars = get_grammars(set(target_versions))
89 for grammar in grammars:
90 drv = driver.Driver(grammar)
92 result = drv.parse_string(src_txt, True)
95 except ParseError as pe:
96 lineno, column = pe.context[1]
97 lines = src_txt.splitlines()
99 faulty_line = lines[lineno - 1]
101 faulty_line = "<line number missing in source>"
102 exc = InvalidInput(f"Cannot parse: {lineno}:{column}: {faulty_line}")
104 except TokenError as te:
105 # In edge cases these are raised; and typically don't have a "faulty_line".
106 lineno, column = te.args[1]
107 exc = InvalidInput(f"Cannot parse: {lineno}:{column}: {te.args[0]}")
110 if pygram.python_grammar_soft_keywords not in grammars and matches_grammar(
111 src_txt, pygram.python_grammar_soft_keywords
113 original_msg = exc.args[0]
114 msg = f"{original_msg}\n{PY310_HINT}"
115 raise InvalidInput(msg) from None
117 if matches_grammar(src_txt, pygram.python_grammar) or matches_grammar(
118 src_txt, pygram.python_grammar_no_print_statement
120 original_msg = exc.args[0]
121 msg = f"{original_msg}\n{PY2_HINT}"
122 raise InvalidInput(msg) from None
126 if isinstance(result, Leaf):
127 result = Node(syms.file_input, [result])
131 def matches_grammar(src_txt: str, grammar: Grammar) -> bool:
132 drv = driver.Driver(grammar)
134 drv.parse_string(src_txt, True)
135 except (ParseError, TokenError, IndentationError):
141 def lib2to3_unparse(node: Node) -> str:
142 """Given a lib2to3 node, return its string representation."""
147 def parse_single_version(
148 src: str, version: Tuple[int, int]
149 ) -> Union[ast.AST, ast3.AST]:
150 filename = "<unknown>"
151 # typed_ast is needed because of feature version limitations in the builtin ast
152 if sys.version_info >= (3, 8) and version >= (3,):
153 return ast.parse(src, filename, feature_version=version)
154 elif version >= (3,):
156 return ast3.parse(src, filename)
158 return ast3.parse(src, filename, feature_version=version[1])
159 raise AssertionError("INTERNAL ERROR: Tried parsing unsupported Python version!")
162 def parse_ast(src: str) -> Union[ast.AST, ast3.AST]:
163 # TODO: support Python 4+ ;)
164 versions = [(3, minor) for minor in range(3, sys.version_info[1] + 1)]
167 for version in sorted(versions, reverse=True):
169 return parse_single_version(src, version)
170 except SyntaxError as e:
174 raise SyntaxError(first_error)
177 ast3_AST: Final[Type[ast3.AST]] = ast3.AST
180 def _normalize(lineend: str, value: str) -> str:
181 # To normalize, we strip any leading and trailing space from
183 stripped: List[str] = [i.strip() for i in value.splitlines()]
184 normalized = lineend.join(stripped)
185 # ...and remove any blank lines at the beginning and end of
187 return normalized.strip()
190 def stringify_ast(node: Union[ast.AST, ast3.AST], depth: int = 0) -> Iterator[str]:
191 """Simple visitor generating strings to compare ASTs by content."""
193 node = fixup_ast_constants(node)
195 yield f"{' ' * depth}{node.__class__.__name__}("
197 type_ignore_classes: Tuple[Type[Any], ...]
198 for field in sorted(node._fields): # noqa: F402
199 # TypeIgnore will not be present using pypy < 3.8, so need for this
200 if not (_IS_PYPY and sys.version_info < (3, 8)):
201 # TypeIgnore has only one field 'lineno' which breaks this comparison
202 type_ignore_classes = (ast3.TypeIgnore,)
203 if sys.version_info >= (3, 8):
204 type_ignore_classes += (ast.TypeIgnore,)
205 if isinstance(node, type_ignore_classes):
209 value: object = getattr(node, field)
210 except AttributeError:
213 yield f"{' ' * (depth+1)}{field}="
215 if isinstance(value, list):
217 # Ignore nested tuples within del statements, because we may insert
218 # parentheses and they change the AST.
221 and isinstance(node, (ast.Delete, ast3.Delete))
222 and isinstance(item, (ast.Tuple, ast3.Tuple))
224 for item in item.elts:
225 yield from stringify_ast(item, depth + 2)
227 elif isinstance(item, (ast.AST, ast3.AST)):
228 yield from stringify_ast(item, depth + 2)
230 # Note that we are referencing the typed-ast ASTs via global variables and not
231 # direct module attribute accesses because that breaks mypyc. It's probably
232 # something to do with the ast3 variables being marked as Any leading
233 # mypy to think this branch is always taken, leaving the rest of the code
234 # unanalyzed. Tighting up the types for the typed-ast AST types avoids the
236 elif isinstance(value, (ast.AST, ast3_AST)):
237 yield from stringify_ast(value, depth + 2)
241 # Constant strings may be indented across newlines, if they are
242 # docstrings; fold spaces after newlines when comparing. Similarly,
243 # trailing and leading space may be removed.
245 isinstance(node, ast.Constant)
247 and isinstance(value, str)
249 normalized = _normalize("\n", value)
252 yield f"{' ' * (depth+2)}{normalized!r}, # {value.__class__.__name__}"
254 yield f"{' ' * depth}) # /{node.__class__.__name__}"
257 def fixup_ast_constants(node: Union[ast.AST, ast3.AST]) -> Union[ast.AST, ast3.AST]:
258 """Map ast nodes deprecated in 3.8 to Constant."""
259 if isinstance(node, (ast.Str, ast3.Str, ast.Bytes, ast3.Bytes)):
260 return ast.Constant(value=node.s)
262 if isinstance(node, (ast.Num, ast3.Num)):
263 return ast.Constant(value=node.n)
265 if isinstance(node, (ast.NameConstant, ast3.NameConstant)):
266 return ast.Constant(value=node.value)