All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
2 Parse Python code and perform AST validation.
7 from typing import Any, Iterable, Iterator, List, Set, Tuple, Type, Union
9 if sys.version_info < (3, 8):
10 from typing_extensions import Final
12 from typing import Final
14 from black.mode import VERSION_TO_FEATURES, Feature, TargetVersion, supports_feature
15 from black.nodes import syms
16 from blib2to3 import pygram
17 from blib2to3.pgen2 import driver
18 from blib2to3.pgen2.grammar import Grammar
19 from blib2to3.pgen2.parse import ParseError
20 from blib2to3.pgen2.tokenize import TokenError
21 from blib2to3.pytree import Leaf, Node
25 _IS_PYPY = platform.python_implementation() == "PyPy"
28 from typed_ast import ast3
30 if sys.version_info < (3, 8) and not _IS_PYPY:
32 "The typed_ast package is required but not installed.\n"
33 "You can upgrade to Python 3.8+ or install typed_ast with\n"
34 "`python3 -m pip install typed-ast`.",
42 PY2_HINT: Final = "Python 2 support was removed in version 22.0."
45 class InvalidInput(ValueError):
46 """Raised when input source code fails all parse attempts."""
49 def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]:
50 if not target_versions:
51 # No target_version specified, so try all grammars.
54 pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords,
56 pygram.python_grammar_no_print_statement_no_exec_statement,
58 pygram.python_grammar_soft_keywords,
62 # If we have to parse both, try to parse async as a keyword first
63 if not supports_feature(
64 target_versions, Feature.ASYNC_IDENTIFIERS
65 ) and not supports_feature(target_versions, Feature.PATTERN_MATCHING):
68 pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords
70 if not supports_feature(target_versions, Feature.ASYNC_KEYWORDS):
72 grammars.append(pygram.python_grammar_no_print_statement_no_exec_statement)
73 if any(Feature.PATTERN_MATCHING in VERSION_TO_FEATURES[v] for v in target_versions):
75 grammars.append(pygram.python_grammar_soft_keywords)
77 # At least one of the above branches must have been taken, because every Python
78 # version has exactly one of the two 'ASYNC_*' flags
82 def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -> Node:
83 """Given a string with source, return the lib2to3 Node."""
84 if not src_txt.endswith("\n"):
87 grammars = get_grammars(set(target_versions))
89 for grammar in grammars:
90 drv = driver.Driver(grammar)
92 result = drv.parse_string(src_txt, True)
95 except ParseError as pe:
96 lineno, column = pe.context[1]
97 lines = src_txt.splitlines()
99 faulty_line = lines[lineno - 1]
101 faulty_line = "<line number missing in source>"
102 errors[grammar.version] = InvalidInput(
103 f"Cannot parse: {lineno}:{column}: {faulty_line}"
106 except TokenError as te:
107 # In edge cases these are raised; and typically don't have a "faulty_line".
108 lineno, column = te.args[1]
109 errors[grammar.version] = InvalidInput(
110 f"Cannot parse: {lineno}:{column}: {te.args[0]}"
114 # Choose the latest version when raising the actual parsing error.
115 assert len(errors) >= 1
116 exc = errors[max(errors)]
118 if matches_grammar(src_txt, pygram.python_grammar) or matches_grammar(
119 src_txt, pygram.python_grammar_no_print_statement
121 original_msg = exc.args[0]
122 msg = f"{original_msg}\n{PY2_HINT}"
123 raise InvalidInput(msg) from None
127 if isinstance(result, Leaf):
128 result = Node(syms.file_input, [result])
132 def matches_grammar(src_txt: str, grammar: Grammar) -> bool:
133 drv = driver.Driver(grammar)
135 drv.parse_string(src_txt, True)
136 except (ParseError, TokenError, IndentationError):
142 def lib2to3_unparse(node: Node) -> str:
143 """Given a lib2to3 node, return its string representation."""
148 def parse_single_version(
149 src: str, version: Tuple[int, int], *, type_comments: bool
150 ) -> Union[ast.AST, ast3.AST]:
151 filename = "<unknown>"
152 # typed-ast is needed because of feature version limitations in the builtin ast 3.8>
153 if sys.version_info >= (3, 8) and version >= (3,):
155 src, filename, feature_version=version, type_comments=type_comments
159 # PyPy 3.7 doesn't support type comment tracking which is not ideal, but there's
160 # not much we can do as typed-ast won't work either.
161 if sys.version_info >= (3, 8):
162 return ast3.parse(src, filename, type_comments=type_comments)
164 return ast3.parse(src, filename)
167 # Typed-ast is guaranteed to be used here and automatically tracks type
168 # comments separately.
169 return ast3.parse(src, filename, feature_version=version[1])
171 return ast.parse(src, filename)
174 def parse_ast(src: str) -> Union[ast.AST, ast3.AST]:
175 # TODO: support Python 4+ ;)
176 versions = [(3, minor) for minor in range(3, sys.version_info[1] + 1)]
179 for version in sorted(versions, reverse=True):
181 return parse_single_version(src, version, type_comments=True)
182 except SyntaxError as e:
186 # Try to parse without type comments
187 for version in sorted(versions, reverse=True):
189 return parse_single_version(src, version, type_comments=False)
193 raise SyntaxError(first_error)
196 ast3_AST: Final[Type[ast3.AST]] = ast3.AST
199 def _normalize(lineend: str, value: str) -> str:
200 # To normalize, we strip any leading and trailing space from
202 stripped: List[str] = [i.strip() for i in value.splitlines()]
203 normalized = lineend.join(stripped)
204 # ...and remove any blank lines at the beginning and end of
206 return normalized.strip()
209 def stringify_ast(node: Union[ast.AST, ast3.AST], depth: int = 0) -> Iterator[str]:
210 """Simple visitor generating strings to compare ASTs by content."""
212 node = fixup_ast_constants(node)
214 yield f"{' ' * depth}{node.__class__.__name__}("
216 type_ignore_classes: Tuple[Type[Any], ...]
217 for field in sorted(node._fields): # noqa: F402
218 # TypeIgnore will not be present using pypy < 3.8, so need for this
219 if not (_IS_PYPY and sys.version_info < (3, 8)):
220 # TypeIgnore has only one field 'lineno' which breaks this comparison
221 type_ignore_classes = (ast3.TypeIgnore,)
222 if sys.version_info >= (3, 8):
223 type_ignore_classes += (ast.TypeIgnore,)
224 if isinstance(node, type_ignore_classes):
228 value: object = getattr(node, field)
229 except AttributeError:
232 yield f"{' ' * (depth+1)}{field}="
234 if isinstance(value, list):
236 # Ignore nested tuples within del statements, because we may insert
237 # parentheses and they change the AST.
240 and isinstance(node, (ast.Delete, ast3.Delete))
241 and isinstance(item, (ast.Tuple, ast3.Tuple))
243 for elt in item.elts:
244 yield from stringify_ast(elt, depth + 2)
246 elif isinstance(item, (ast.AST, ast3.AST)):
247 yield from stringify_ast(item, depth + 2)
249 # Note that we are referencing the typed-ast ASTs via global variables and not
250 # direct module attribute accesses because that breaks mypyc. It's probably
251 # something to do with the ast3 variables being marked as Any leading
252 # mypy to think this branch is always taken, leaving the rest of the code
253 # unanalyzed. Tighting up the types for the typed-ast AST types avoids the
255 elif isinstance(value, (ast.AST, ast3_AST)):
256 yield from stringify_ast(value, depth + 2)
260 # Constant strings may be indented across newlines, if they are
261 # docstrings; fold spaces after newlines when comparing. Similarly,
262 # trailing and leading space may be removed.
264 isinstance(node, ast.Constant)
266 and isinstance(value, str)
268 normalized = _normalize("\n", value)
271 yield f"{' ' * (depth+2)}{normalized!r}, # {value.__class__.__name__}"
273 yield f"{' ' * depth}) # /{node.__class__.__name__}"
276 def fixup_ast_constants(node: Union[ast.AST, ast3.AST]) -> Union[ast.AST, ast3.AST]:
277 """Map ast nodes deprecated in 3.8 to Constant."""
278 if isinstance(node, (ast.Str, ast3.Str, ast.Bytes, ast3.Bytes)):
279 return ast.Constant(value=node.s)
281 if isinstance(node, (ast.Num, ast3.Num)):
282 return ast.Constant(value=node.n)
284 if isinstance(node, (ast.NameConstant, ast3.NameConstant)):
285 return ast.Constant(value=node.value)