All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
2 Parse Python code and perform AST validation.
7 from typing import Any, Iterable, Iterator, List, Set, Tuple, Type, Union
9 if sys.version_info < (3, 8):
10 from typing_extensions import Final
12 from typing import Final
14 from black.mode import VERSION_TO_FEATURES, Feature, TargetVersion, supports_feature
15 from black.nodes import syms
16 from blib2to3 import pygram
17 from blib2to3.pgen2 import driver
18 from blib2to3.pgen2.grammar import Grammar
19 from blib2to3.pgen2.parse import ParseError
20 from blib2to3.pgen2.tokenize import TokenError
21 from blib2to3.pytree import Leaf, Node
25 _IS_PYPY = platform.python_implementation() == "PyPy"
28 from typed_ast import ast3
30 if sys.version_info < (3, 8) and not _IS_PYPY:
33 "The typed_ast package is required but not installed.\n"
34 "You can upgrade to Python 3.8+ or install typed_ast with\n"
35 "`python3 -m pip install typed-ast`."
44 PY2_HINT: Final = "Python 2 support was removed in version 22.0."
47 class InvalidInput(ValueError):
48 """Raised when input source code fails all parse attempts."""
51 def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]:
52 if not target_versions:
53 # No target_version specified, so try all grammars.
56 pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords,
58 pygram.python_grammar_no_print_statement_no_exec_statement,
60 pygram.python_grammar_soft_keywords,
64 # If we have to parse both, try to parse async as a keyword first
65 if not supports_feature(
66 target_versions, Feature.ASYNC_IDENTIFIERS
67 ) and not supports_feature(target_versions, Feature.PATTERN_MATCHING):
70 pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords
72 if not supports_feature(target_versions, Feature.ASYNC_KEYWORDS):
74 grammars.append(pygram.python_grammar_no_print_statement_no_exec_statement)
75 if any(Feature.PATTERN_MATCHING in VERSION_TO_FEATURES[v] for v in target_versions):
77 grammars.append(pygram.python_grammar_soft_keywords)
79 # At least one of the above branches must have been taken, because every Python
80 # version has exactly one of the two 'ASYNC_*' flags
84 def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -> Node:
85 """Given a string with source, return the lib2to3 Node."""
86 if not src_txt.endswith("\n"):
89 grammars = get_grammars(set(target_versions))
91 for grammar in grammars:
92 drv = driver.Driver(grammar)
94 result = drv.parse_string(src_txt, True)
97 except ParseError as pe:
98 lineno, column = pe.context[1]
99 lines = src_txt.splitlines()
101 faulty_line = lines[lineno - 1]
103 faulty_line = "<line number missing in source>"
104 errors[grammar.version] = InvalidInput(
105 f"Cannot parse: {lineno}:{column}: {faulty_line}"
108 except TokenError as te:
109 # In edge cases these are raised; and typically don't have a "faulty_line".
110 lineno, column = te.args[1]
111 errors[grammar.version] = InvalidInput(
112 f"Cannot parse: {lineno}:{column}: {te.args[0]}"
116 # Choose the latest version when raising the actual parsing error.
117 assert len(errors) >= 1
118 exc = errors[max(errors)]
120 if matches_grammar(src_txt, pygram.python_grammar) or matches_grammar(
121 src_txt, pygram.python_grammar_no_print_statement
123 original_msg = exc.args[0]
124 msg = f"{original_msg}\n{PY2_HINT}"
125 raise InvalidInput(msg) from None
129 if isinstance(result, Leaf):
130 result = Node(syms.file_input, [result])
134 def matches_grammar(src_txt: str, grammar: Grammar) -> bool:
135 drv = driver.Driver(grammar)
137 drv.parse_string(src_txt, True)
138 except (ParseError, TokenError, IndentationError):
144 def lib2to3_unparse(node: Node) -> str:
145 """Given a lib2to3 node, return its string representation."""
150 def parse_single_version(
151 src: str, version: Tuple[int, int], *, type_comments: bool
152 ) -> Union[ast.AST, ast3.AST]:
153 filename = "<unknown>"
154 # typed-ast is needed because of feature version limitations in the builtin ast 3.8>
155 if sys.version_info >= (3, 8) and version >= (3,):
157 src, filename, feature_version=version, type_comments=type_comments
161 # PyPy 3.7 doesn't support type comment tracking which is not ideal, but there's
162 # not much we can do as typed-ast won't work either.
163 if sys.version_info >= (3, 8):
164 return ast3.parse(src, filename, type_comments=type_comments)
166 return ast3.parse(src, filename)
169 # Typed-ast is guaranteed to be used here and automatically tracks type
170 # comments separately.
171 return ast3.parse(src, filename, feature_version=version[1])
173 return ast.parse(src, filename)
176 def parse_ast(src: str) -> Union[ast.AST, ast3.AST]:
177 # TODO: support Python 4+ ;)
178 versions = [(3, minor) for minor in range(3, sys.version_info[1] + 1)]
181 for version in sorted(versions, reverse=True):
183 return parse_single_version(src, version, type_comments=True)
184 except SyntaxError as e:
188 # Try to parse without type comments
189 for version in sorted(versions, reverse=True):
191 return parse_single_version(src, version, type_comments=False)
195 raise SyntaxError(first_error)
198 ast3_AST: Final[Type[ast3.AST]] = ast3.AST
201 def _normalize(lineend: str, value: str) -> str:
202 # To normalize, we strip any leading and trailing space from
204 stripped: List[str] = [i.strip() for i in value.splitlines()]
205 normalized = lineend.join(stripped)
206 # ...and remove any blank lines at the beginning and end of
208 return normalized.strip()
211 def stringify_ast(node: Union[ast.AST, ast3.AST], depth: int = 0) -> Iterator[str]:
212 """Simple visitor generating strings to compare ASTs by content."""
214 node = fixup_ast_constants(node)
216 yield f"{' ' * depth}{node.__class__.__name__}("
218 type_ignore_classes: Tuple[Type[Any], ...]
219 for field in sorted(node._fields): # noqa: F402
220 # TypeIgnore will not be present using pypy < 3.8, so need for this
221 if not (_IS_PYPY and sys.version_info < (3, 8)):
222 # TypeIgnore has only one field 'lineno' which breaks this comparison
223 type_ignore_classes = (ast3.TypeIgnore,)
224 if sys.version_info >= (3, 8):
225 type_ignore_classes += (ast.TypeIgnore,)
226 if isinstance(node, type_ignore_classes):
230 value: object = getattr(node, field)
231 except AttributeError:
234 yield f"{' ' * (depth+1)}{field}="
236 if isinstance(value, list):
238 # Ignore nested tuples within del statements, because we may insert
239 # parentheses and they change the AST.
242 and isinstance(node, (ast.Delete, ast3.Delete))
243 and isinstance(item, (ast.Tuple, ast3.Tuple))
245 for elt in item.elts:
246 yield from stringify_ast(elt, depth + 2)
248 elif isinstance(item, (ast.AST, ast3.AST)):
249 yield from stringify_ast(item, depth + 2)
251 # Note that we are referencing the typed-ast ASTs via global variables and not
252 # direct module attribute accesses because that breaks mypyc. It's probably
253 # something to do with the ast3 variables being marked as Any leading
254 # mypy to think this branch is always taken, leaving the rest of the code
255 # unanalyzed. Tighting up the types for the typed-ast AST types avoids the
257 elif isinstance(value, (ast.AST, ast3_AST)):
258 yield from stringify_ast(value, depth + 2)
262 # Constant strings may be indented across newlines, if they are
263 # docstrings; fold spaces after newlines when comparing. Similarly,
264 # trailing and leading space may be removed.
266 isinstance(node, ast.Constant)
268 and isinstance(value, str)
270 normalized = _normalize("\n", value)
273 yield f"{' ' * (depth+2)}{normalized!r}, # {value.__class__.__name__}"
275 yield f"{' ' * depth}) # /{node.__class__.__name__}"
278 def fixup_ast_constants(node: Union[ast.AST, ast3.AST]) -> Union[ast.AST, ast3.AST]:
279 """Map ast nodes deprecated in 3.8 to Constant."""
280 if isinstance(node, (ast.Str, ast3.Str, ast.Bytes, ast3.Bytes)):
281 return ast.Constant(value=node.s)
283 if isinstance(node, (ast.Num, ast3.Num)):
284 return ast.Constant(value=node.n)
286 if isinstance(node, (ast.NameConstant, ast3.NameConstant)):
287 return ast.Constant(value=node.value)