All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
2 Parse Python code and perform AST validation.
7 from typing import Any, Iterable, Iterator, List, Set, Tuple, Type, Union
9 if sys.version_info < (3, 8):
10 from typing_extensions import Final
12 from typing import Final
15 from blib2to3.pytree import Node, Leaf
16 from blib2to3 import pygram
17 from blib2to3.pgen2 import driver
18 from blib2to3.pgen2.grammar import Grammar
19 from blib2to3.pgen2.parse import ParseError
20 from blib2to3.pgen2.tokenize import TokenError
22 from black.mode import TargetVersion, Feature, supports_feature
23 from black.nodes import syms
27 _IS_PYPY = platform.python_implementation() == "PyPy"
30 from typed_ast import ast3
32 # Either our python version is too low, or we're on pypy
33 if sys.version_info < (3, 7) or (sys.version_info < (3, 8) and not _IS_PYPY):
35 "The typed_ast package is required but not installed.\n"
36 "You can upgrade to Python 3.8+ or install typed_ast with\n"
37 "`python3 -m pip install typed-ast`.",
45 PY2_HINT: Final = "Python 2 support was removed in version 22.0."
48 class InvalidInput(ValueError):
49 """Raised when input source code fails all parse attempts."""
52 def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]:
53 if not target_versions:
54 # No target_version specified, so try all grammars.
57 pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords,
59 pygram.python_grammar_no_print_statement_no_exec_statement,
61 pygram.python_grammar_soft_keywords,
65 # If we have to parse both, try to parse async as a keyword first
66 if not supports_feature(
67 target_versions, Feature.ASYNC_IDENTIFIERS
68 ) and not supports_feature(target_versions, Feature.PATTERN_MATCHING):
71 pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords
73 if not supports_feature(target_versions, Feature.ASYNC_KEYWORDS):
75 grammars.append(pygram.python_grammar_no_print_statement_no_exec_statement)
76 if supports_feature(target_versions, Feature.PATTERN_MATCHING):
78 grammars.append(pygram.python_grammar_soft_keywords)
80 # At least one of the above branches must have been taken, because every Python
81 # version has exactly one of the two 'ASYNC_*' flags
85 def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -> Node:
86 """Given a string with source, return the lib2to3 Node."""
87 if not src_txt.endswith("\n"):
90 grammars = get_grammars(set(target_versions))
92 for grammar in grammars:
93 drv = driver.Driver(grammar)
95 result = drv.parse_string(src_txt, True)
98 except ParseError as pe:
99 lineno, column = pe.context[1]
100 lines = src_txt.splitlines()
102 faulty_line = lines[lineno - 1]
104 faulty_line = "<line number missing in source>"
105 errors[grammar.version] = InvalidInput(
106 f"Cannot parse: {lineno}:{column}: {faulty_line}"
109 except TokenError as te:
110 # In edge cases these are raised; and typically don't have a "faulty_line".
111 lineno, column = te.args[1]
112 errors[grammar.version] = InvalidInput(
113 f"Cannot parse: {lineno}:{column}: {te.args[0]}"
117 # Choose the latest version when raising the actual parsing error.
118 assert len(errors) >= 1
119 exc = errors[max(errors)]
121 if matches_grammar(src_txt, pygram.python_grammar) or matches_grammar(
122 src_txt, pygram.python_grammar_no_print_statement
124 original_msg = exc.args[0]
125 msg = f"{original_msg}\n{PY2_HINT}"
126 raise InvalidInput(msg) from None
130 if isinstance(result, Leaf):
131 result = Node(syms.file_input, [result])
135 def matches_grammar(src_txt: str, grammar: Grammar) -> bool:
136 drv = driver.Driver(grammar)
138 drv.parse_string(src_txt, True)
139 except (ParseError, TokenError, IndentationError):
145 def lib2to3_unparse(node: Node) -> str:
146 """Given a lib2to3 node, return its string representation."""
151 def parse_single_version(
152 src: str, version: Tuple[int, int]
153 ) -> Union[ast.AST, ast3.AST]:
154 filename = "<unknown>"
155 # typed-ast is needed because of feature version limitations in the builtin ast 3.8>
156 if sys.version_info >= (3, 8) and version >= (3,):
157 return ast.parse(src, filename, feature_version=version, type_comments=True)
160 # PyPy 3.7 doesn't support type comment tracking which is not ideal, but there's
161 # not much we can do as typed-ast won't work either.
162 if sys.version_info >= (3, 8):
163 return ast3.parse(src, filename, type_comments=True)
165 return ast3.parse(src, filename)
167 # Typed-ast is guaranteed to be used here and automatically tracks type
168 # comments separately.
169 return ast3.parse(src, filename, feature_version=version[1])
171 raise AssertionError("INTERNAL ERROR: Tried parsing unsupported Python version!")
174 def parse_ast(src: str) -> Union[ast.AST, ast3.AST]:
175 # TODO: support Python 4+ ;)
176 versions = [(3, minor) for minor in range(3, sys.version_info[1] + 1)]
179 for version in sorted(versions, reverse=True):
181 return parse_single_version(src, version)
182 except SyntaxError as e:
186 raise SyntaxError(first_error)
189 ast3_AST: Final[Type[ast3.AST]] = ast3.AST
192 def _normalize(lineend: str, value: str) -> str:
193 # To normalize, we strip any leading and trailing space from
195 stripped: List[str] = [i.strip() for i in value.splitlines()]
196 normalized = lineend.join(stripped)
197 # ...and remove any blank lines at the beginning and end of
199 return normalized.strip()
202 def stringify_ast(node: Union[ast.AST, ast3.AST], depth: int = 0) -> Iterator[str]:
203 """Simple visitor generating strings to compare ASTs by content."""
205 node = fixup_ast_constants(node)
207 yield f"{' ' * depth}{node.__class__.__name__}("
209 type_ignore_classes: Tuple[Type[Any], ...]
210 for field in sorted(node._fields): # noqa: F402
211 # TypeIgnore will not be present using pypy < 3.8, so need for this
212 if not (_IS_PYPY and sys.version_info < (3, 8)):
213 # TypeIgnore has only one field 'lineno' which breaks this comparison
214 type_ignore_classes = (ast3.TypeIgnore,)
215 if sys.version_info >= (3, 8):
216 type_ignore_classes += (ast.TypeIgnore,)
217 if isinstance(node, type_ignore_classes):
221 value: object = getattr(node, field)
222 except AttributeError:
225 yield f"{' ' * (depth+1)}{field}="
227 if isinstance(value, list):
229 # Ignore nested tuples within del statements, because we may insert
230 # parentheses and they change the AST.
233 and isinstance(node, (ast.Delete, ast3.Delete))
234 and isinstance(item, (ast.Tuple, ast3.Tuple))
236 for elt in item.elts:
237 yield from stringify_ast(elt, depth + 2)
239 elif isinstance(item, (ast.AST, ast3.AST)):
240 yield from stringify_ast(item, depth + 2)
242 # Note that we are referencing the typed-ast ASTs via global variables and not
243 # direct module attribute accesses because that breaks mypyc. It's probably
244 # something to do with the ast3 variables being marked as Any leading
245 # mypy to think this branch is always taken, leaving the rest of the code
246 # unanalyzed. Tighting up the types for the typed-ast AST types avoids the
248 elif isinstance(value, (ast.AST, ast3_AST)):
249 yield from stringify_ast(value, depth + 2)
253 # Constant strings may be indented across newlines, if they are
254 # docstrings; fold spaces after newlines when comparing. Similarly,
255 # trailing and leading space may be removed.
257 isinstance(node, ast.Constant)
259 and isinstance(value, str)
261 normalized = _normalize("\n", value)
264 yield f"{' ' * (depth+2)}{normalized!r}, # {value.__class__.__name__}"
266 yield f"{' ' * depth}) # /{node.__class__.__name__}"
269 def fixup_ast_constants(node: Union[ast.AST, ast3.AST]) -> Union[ast.AST, ast3.AST]:
270 """Map ast nodes deprecated in 3.8 to Constant."""
271 if isinstance(node, (ast.Str, ast3.Str, ast.Bytes, ast3.Bytes)):
272 return ast.Constant(value=node.s)
274 if isinstance(node, (ast.Num, ast3.Num)):
275 return ast.Constant(value=node.n)
277 if isinstance(node, (ast.NameConstant, ast3.NameConstant)):
278 return ast.Constant(value=node.value)