All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
2 Parse Python code and perform AST validation.
7 from typing import Any, AnyStr, Iterable, Iterator, List, Set, Tuple, Type, Union
9 if sys.version_info < (3, 8):
10 from typing_extensions import Final
12 from typing import Final
15 from blib2to3.pytree import Node, Leaf
16 from blib2to3 import pygram
17 from blib2to3.pgen2 import driver
18 from blib2to3.pgen2.grammar import Grammar
19 from blib2to3.pgen2.parse import ParseError
20 from blib2to3.pgen2.tokenize import TokenError
22 from black.mode import TargetVersion, Feature, supports_feature
23 from black.nodes import syms
28 _IS_PYPY = platform.python_implementation() == "PyPy"
31 from typed_ast import ast3, ast27
33 # Either our python version is too low, or we're on pypy
34 if sys.version_info < (3, 7) or (sys.version_info < (3, 8) and not _IS_PYPY):
36 "The typed_ast package is required but not installed.\n"
37 "You can upgrade to Python 3.8+ or install typed_ast with\n"
38 "`python3 -m pip install typed-ast`.",
48 ] = "Consider using --target-version py310 to parse Python 3.10 code."
51 class InvalidInput(ValueError):
52 """Raised when input source code fails all parse attempts."""
55 def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]:
56 if not target_versions:
57 # No target_version specified, so try all grammars.
60 pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords,
62 pygram.python_grammar_no_print_statement_no_exec_statement,
63 # Python 2.7 with future print_function import
64 pygram.python_grammar_no_print_statement,
66 pygram.python_grammar,
69 if all(version.is_python2() for version in target_versions):
70 # Python 2-only code, so try Python 2 grammars.
72 # Python 2.7 with future print_function import
73 pygram.python_grammar_no_print_statement,
75 pygram.python_grammar,
78 # Python 3-compatible code, so only try Python 3 grammar.
80 if supports_feature(target_versions, Feature.PATTERN_MATCHING):
82 grammars.append(pygram.python_grammar_soft_keywords)
83 # If we have to parse both, try to parse async as a keyword first
84 if not supports_feature(
85 target_versions, Feature.ASYNC_IDENTIFIERS
86 ) and not supports_feature(target_versions, Feature.PATTERN_MATCHING):
89 pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords
91 if not supports_feature(target_versions, Feature.ASYNC_KEYWORDS):
93 grammars.append(pygram.python_grammar_no_print_statement_no_exec_statement)
94 # At least one of the above branches must have been taken, because every Python
95 # version has exactly one of the two 'ASYNC_*' flags
99 def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -> Node:
100 """Given a string with source, return the lib2to3 Node."""
101 if not src_txt.endswith("\n"):
104 grammars = get_grammars(set(target_versions))
105 for grammar in grammars:
106 drv = driver.Driver(grammar)
108 result = drv.parse_string(src_txt, True)
111 except ParseError as pe:
112 lineno, column = pe.context[1]
113 lines = src_txt.splitlines()
115 faulty_line = lines[lineno - 1]
117 faulty_line = "<line number missing in source>"
118 exc = InvalidInput(f"Cannot parse: {lineno}:{column}: {faulty_line}")
120 except TokenError as te:
121 # In edge cases these are raised; and typically don't have a "faulty_line".
122 lineno, column = te.args[1]
123 exc = InvalidInput(f"Cannot parse: {lineno}:{column}: {te.args[0]}")
126 if pygram.python_grammar_soft_keywords not in grammars and matches_grammar(
127 src_txt, pygram.python_grammar_soft_keywords
129 original_msg = exc.args[0]
130 msg = f"{original_msg}\n{PY310_HINT}"
131 raise InvalidInput(msg) from None
134 if isinstance(result, Leaf):
135 result = Node(syms.file_input, [result])
139 def matches_grammar(src_txt: str, grammar: Grammar) -> bool:
140 drv = driver.Driver(grammar)
142 drv.parse_string(src_txt, True)
143 except (ParseError, TokenError, IndentationError):
149 def lib2to3_unparse(node: Node) -> str:
150 """Given a lib2to3 node, return its string representation."""
155 def parse_single_version(
156 src: str, version: Tuple[int, int]
157 ) -> Union[ast.AST, ast3.AST, ast27.AST]:
158 filename = "<unknown>"
159 # typed_ast is needed because of feature version limitations in the builtin ast
160 if sys.version_info >= (3, 8) and version >= (3,):
161 return ast.parse(src, filename, feature_version=version)
162 elif version >= (3,):
164 return ast3.parse(src, filename)
166 return ast3.parse(src, filename, feature_version=version[1])
167 elif version == (2, 7):
168 return ast27.parse(src)
169 raise AssertionError("INTERNAL ERROR: Tried parsing unsupported Python version!")
172 def parse_ast(src: str) -> Union[ast.AST, ast3.AST, ast27.AST]:
173 # TODO: support Python 4+ ;)
174 versions = [(3, minor) for minor in range(3, sys.version_info[1] + 1)]
176 if ast27.__name__ != "ast":
177 versions.append((2, 7))
180 for version in sorted(versions, reverse=True):
182 return parse_single_version(src, version)
183 except SyntaxError as e:
187 raise SyntaxError(first_error)
190 ast3_AST: Final[Type[ast3.AST]] = ast3.AST
191 ast27_AST: Final[Type[ast27.AST]] = ast27.AST
194 def _normalize(lineend: AnyStr, value: AnyStr) -> AnyStr:
195 # To normalize, we strip any leading and trailing space from
197 stripped: List[AnyStr] = [i.strip() for i in value.splitlines()]
198 normalized = lineend.join(stripped)
199 # ...and remove any blank lines at the beginning and end of
201 return normalized.strip()
205 node: Union[ast.AST, ast3.AST, ast27.AST], depth: int = 0
207 """Simple visitor generating strings to compare ASTs by content."""
209 node = fixup_ast_constants(node)
211 yield f"{' ' * depth}{node.__class__.__name__}("
213 type_ignore_classes: Tuple[Type[Any], ...]
214 for field in sorted(node._fields): # noqa: F402
215 # TypeIgnore will not be present using pypy < 3.8, so need for this
216 if not (_IS_PYPY and sys.version_info < (3, 8)):
217 # TypeIgnore has only one field 'lineno' which breaks this comparison
218 type_ignore_classes = (ast3.TypeIgnore, ast27.TypeIgnore)
219 if sys.version_info >= (3, 8):
220 type_ignore_classes += (ast.TypeIgnore,)
221 if isinstance(node, type_ignore_classes):
225 value = getattr(node, field)
226 except AttributeError:
229 yield f"{' ' * (depth+1)}{field}="
231 if isinstance(value, list):
233 # Ignore nested tuples within del statements, because we may insert
234 # parentheses and they change the AST.
237 and isinstance(node, (ast.Delete, ast3.Delete, ast27.Delete))
238 and isinstance(item, (ast.Tuple, ast3.Tuple, ast27.Tuple))
240 for item in item.elts:
241 yield from stringify_ast(item, depth + 2)
243 elif isinstance(item, (ast.AST, ast3.AST, ast27.AST)):
244 yield from stringify_ast(item, depth + 2)
246 # Note that we are referencing the typed-ast ASTs via global variables and not
247 # direct module attribute accesses because that breaks mypyc. It's probably
248 # something to do with the ast3 / ast27 variables being marked as Any leading
249 # mypy to think this branch is always taken, leaving the rest of the code
250 # unanalyzed. Tighting up the types for the typed-ast AST types avoids the
252 elif isinstance(value, (ast.AST, ast3_AST, ast27_AST)):
253 yield from stringify_ast(value, depth + 2)
256 # Constant strings may be indented across newlines, if they are
257 # docstrings; fold spaces after newlines when comparing. Similarly,
258 # trailing and leading space may be removed.
259 # Note that when formatting Python 2 code, at least with Windows
260 # line-endings, docstrings can end up here as bytes instead of
261 # str so make sure that we handle both cases.
263 isinstance(node, ast.Constant)
265 and isinstance(value, (str, bytes))
267 if isinstance(value, str):
268 normalized: Union[str, bytes] = _normalize("\n", value)
270 normalized = _normalize(b"\n", value)
273 yield f"{' ' * (depth+2)}{normalized!r}, # {value.__class__.__name__}"
275 yield f"{' ' * depth}) # /{node.__class__.__name__}"
278 def fixup_ast_constants(
279 node: Union[ast.AST, ast3.AST, ast27.AST]
280 ) -> Union[ast.AST, ast3.AST, ast27.AST]:
281 """Map ast nodes deprecated in 3.8 to Constant."""
282 if isinstance(node, (ast.Str, ast3.Str, ast27.Str, ast.Bytes, ast3.Bytes)):
283 return ast.Constant(value=node.s)
285 if isinstance(node, (ast.Num, ast3.Num, ast27.Num)):
286 return ast.Constant(value=node.n)
288 if isinstance(node, (ast.NameConstant, ast3.NameConstant)):
289 return ast.Constant(value=node.value)