All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
2 Parse Python code and perform AST validation.
7 from typing import Any, Iterable, Iterator, List, Set, Tuple, Type, Union
9 if sys.version_info < (3, 8):
10 from typing_extensions import Final
12 from typing import Final
14 from black.mode import Feature, TargetVersion, supports_feature
15 from black.nodes import syms
16 from blib2to3 import pygram
17 from blib2to3.pgen2 import driver
18 from blib2to3.pgen2.grammar import Grammar
19 from blib2to3.pgen2.parse import ParseError
20 from blib2to3.pgen2.tokenize import TokenError
21 from blib2to3.pytree import Leaf, Node
25 _IS_PYPY = platform.python_implementation() == "PyPy"
28 from typed_ast import ast3
30 # Either our python version is too low, or we're on pypy
31 if sys.version_info < (3, 7) or (sys.version_info < (3, 8) and not _IS_PYPY):
33 "The typed_ast package is required but not installed.\n"
34 "You can upgrade to Python 3.8+ or install typed_ast with\n"
35 "`python3 -m pip install typed-ast`.",
43 PY2_HINT: Final = "Python 2 support was removed in version 22.0."
46 class InvalidInput(ValueError):
47 """Raised when input source code fails all parse attempts."""
50 def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]:
51 if not target_versions:
52 # No target_version specified, so try all grammars.
55 pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords,
57 pygram.python_grammar_no_print_statement_no_exec_statement,
59 pygram.python_grammar_soft_keywords,
63 # If we have to parse both, try to parse async as a keyword first
64 if not supports_feature(
65 target_versions, Feature.ASYNC_IDENTIFIERS
66 ) and not supports_feature(target_versions, Feature.PATTERN_MATCHING):
69 pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords
71 if not supports_feature(target_versions, Feature.ASYNC_KEYWORDS):
73 grammars.append(pygram.python_grammar_no_print_statement_no_exec_statement)
74 if supports_feature(target_versions, Feature.PATTERN_MATCHING):
76 grammars.append(pygram.python_grammar_soft_keywords)
78 # At least one of the above branches must have been taken, because every Python
79 # version has exactly one of the two 'ASYNC_*' flags
83 def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -> Node:
84 """Given a string with source, return the lib2to3 Node."""
85 if not src_txt.endswith("\n"):
88 grammars = get_grammars(set(target_versions))
90 for grammar in grammars:
91 drv = driver.Driver(grammar)
93 result = drv.parse_string(src_txt, True)
96 except ParseError as pe:
97 lineno, column = pe.context[1]
98 lines = src_txt.splitlines()
100 faulty_line = lines[lineno - 1]
102 faulty_line = "<line number missing in source>"
103 errors[grammar.version] = InvalidInput(
104 f"Cannot parse: {lineno}:{column}: {faulty_line}"
107 except TokenError as te:
108 # In edge cases these are raised; and typically don't have a "faulty_line".
109 lineno, column = te.args[1]
110 errors[grammar.version] = InvalidInput(
111 f"Cannot parse: {lineno}:{column}: {te.args[0]}"
115 # Choose the latest version when raising the actual parsing error.
116 assert len(errors) >= 1
117 exc = errors[max(errors)]
119 if matches_grammar(src_txt, pygram.python_grammar) or matches_grammar(
120 src_txt, pygram.python_grammar_no_print_statement
122 original_msg = exc.args[0]
123 msg = f"{original_msg}\n{PY2_HINT}"
124 raise InvalidInput(msg) from None
128 if isinstance(result, Leaf):
129 result = Node(syms.file_input, [result])
133 def matches_grammar(src_txt: str, grammar: Grammar) -> bool:
134 drv = driver.Driver(grammar)
136 drv.parse_string(src_txt, True)
137 except (ParseError, TokenError, IndentationError):
143 def lib2to3_unparse(node: Node) -> str:
144 """Given a lib2to3 node, return its string representation."""
149 def parse_single_version(
150 src: str, version: Tuple[int, int]
151 ) -> Union[ast.AST, ast3.AST]:
152 filename = "<unknown>"
153 # typed-ast is needed because of feature version limitations in the builtin ast 3.8>
154 if sys.version_info >= (3, 8) and version >= (3,):
155 return ast.parse(src, filename, feature_version=version, type_comments=True)
158 # PyPy 3.7 doesn't support type comment tracking which is not ideal, but there's
159 # not much we can do as typed-ast won't work either.
160 if sys.version_info >= (3, 8):
161 return ast3.parse(src, filename, type_comments=True)
163 return ast3.parse(src, filename)
165 # Typed-ast is guaranteed to be used here and automatically tracks type
166 # comments separately.
167 return ast3.parse(src, filename, feature_version=version[1])
169 raise AssertionError("INTERNAL ERROR: Tried parsing unsupported Python version!")
172 def parse_ast(src: str) -> Union[ast.AST, ast3.AST]:
173 # TODO: support Python 4+ ;)
174 versions = [(3, minor) for minor in range(3, sys.version_info[1] + 1)]
177 for version in sorted(versions, reverse=True):
179 return parse_single_version(src, version)
180 except SyntaxError as e:
184 raise SyntaxError(first_error)
187 ast3_AST: Final[Type[ast3.AST]] = ast3.AST
190 def _normalize(lineend: str, value: str) -> str:
191 # To normalize, we strip any leading and trailing space from
193 stripped: List[str] = [i.strip() for i in value.splitlines()]
194 normalized = lineend.join(stripped)
195 # ...and remove any blank lines at the beginning and end of
197 return normalized.strip()
200 def stringify_ast(node: Union[ast.AST, ast3.AST], depth: int = 0) -> Iterator[str]:
201 """Simple visitor generating strings to compare ASTs by content."""
203 node = fixup_ast_constants(node)
205 yield f"{' ' * depth}{node.__class__.__name__}("
207 type_ignore_classes: Tuple[Type[Any], ...]
208 for field in sorted(node._fields): # noqa: F402
209 # TypeIgnore will not be present using pypy < 3.8, so need for this
210 if not (_IS_PYPY and sys.version_info < (3, 8)):
211 # TypeIgnore has only one field 'lineno' which breaks this comparison
212 type_ignore_classes = (ast3.TypeIgnore,)
213 if sys.version_info >= (3, 8):
214 type_ignore_classes += (ast.TypeIgnore,)
215 if isinstance(node, type_ignore_classes):
219 value: object = getattr(node, field)
220 except AttributeError:
223 yield f"{' ' * (depth+1)}{field}="
225 if isinstance(value, list):
227 # Ignore nested tuples within del statements, because we may insert
228 # parentheses and they change the AST.
231 and isinstance(node, (ast.Delete, ast3.Delete))
232 and isinstance(item, (ast.Tuple, ast3.Tuple))
234 for elt in item.elts:
235 yield from stringify_ast(elt, depth + 2)
237 elif isinstance(item, (ast.AST, ast3.AST)):
238 yield from stringify_ast(item, depth + 2)
240 # Note that we are referencing the typed-ast ASTs via global variables and not
241 # direct module attribute accesses because that breaks mypyc. It's probably
242 # something to do with the ast3 variables being marked as Any leading
243 # mypy to think this branch is always taken, leaving the rest of the code
244 # unanalyzed. Tighting up the types for the typed-ast AST types avoids the
246 elif isinstance(value, (ast.AST, ast3_AST)):
247 yield from stringify_ast(value, depth + 2)
251 # Constant strings may be indented across newlines, if they are
252 # docstrings; fold spaces after newlines when comparing. Similarly,
253 # trailing and leading space may be removed.
255 isinstance(node, ast.Constant)
257 and isinstance(value, str)
259 normalized = _normalize("\n", value)
262 yield f"{' ' * (depth+2)}{normalized!r}, # {value.__class__.__name__}"
264 yield f"{' ' * depth}) # /{node.__class__.__name__}"
267 def fixup_ast_constants(node: Union[ast.AST, ast3.AST]) -> Union[ast.AST, ast3.AST]:
268 """Map ast nodes deprecated in 3.8 to Constant."""
269 if isinstance(node, (ast.Str, ast3.Str, ast.Bytes, ast3.Bytes)):
270 return ast.Constant(value=node.s)
272 if isinstance(node, (ast.Num, ast3.Num)):
273 return ast.Constant(value=node.n)
275 if isinstance(node, (ast.NameConstant, ast3.NameConstant)):
276 return ast.Constant(value=node.value)