All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
2 Parse Python code and perform AST validation.
7 from typing import Any, Iterable, Iterator, List, Set, Tuple, Type, Union
9 if sys.version_info < (3, 8):
10 from typing_extensions import Final
12 from typing import Final
15 from blib2to3.pytree import Node, Leaf
16 from blib2to3 import pygram
17 from blib2to3.pgen2 import driver
18 from blib2to3.pgen2.grammar import Grammar
19 from blib2to3.pgen2.parse import ParseError
20 from blib2to3.pgen2.tokenize import TokenError
22 from black.mode import TargetVersion, Feature, supports_feature
23 from black.nodes import syms
28 _IS_PYPY = platform.python_implementation() == "PyPy"
31 from typed_ast import ast3, ast27
33 # Either our python version is too low, or we're on pypy
34 if sys.version_info < (3, 7) or (sys.version_info < (3, 8) and not _IS_PYPY):
36 "The typed_ast package is required but not installed.\n"
37 "You can upgrade to Python 3.8+ or install typed_ast with\n"
38 "`python3 -m pip install typed-ast`.",
46 class InvalidInput(ValueError):
47 """Raised when input source code fails all parse attempts."""
50 def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]:
51 if not target_versions:
52 # No target_version specified, so try all grammars.
55 pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords,
57 pygram.python_grammar_no_print_statement_no_exec_statement,
58 # Python 2.7 with future print_function import
59 pygram.python_grammar_no_print_statement,
61 pygram.python_grammar,
64 if all(version.is_python2() for version in target_versions):
65 # Python 2-only code, so try Python 2 grammars.
67 # Python 2.7 with future print_function import
68 pygram.python_grammar_no_print_statement,
70 pygram.python_grammar,
73 # Python 3-compatible code, so only try Python 3 grammar.
75 if supports_feature(target_versions, Feature.PATTERN_MATCHING):
77 grammars.append(pygram.python_grammar_soft_keywords)
78 # If we have to parse both, try to parse async as a keyword first
79 if not supports_feature(
80 target_versions, Feature.ASYNC_IDENTIFIERS
81 ) and not supports_feature(target_versions, Feature.PATTERN_MATCHING):
84 pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords
86 if not supports_feature(target_versions, Feature.ASYNC_KEYWORDS):
88 grammars.append(pygram.python_grammar_no_print_statement_no_exec_statement)
89 # At least one of the above branches must have been taken, because every Python
90 # version has exactly one of the two 'ASYNC_*' flags
94 def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -> Node:
95 """Given a string with source, return the lib2to3 Node."""
96 if not src_txt.endswith("\n"):
99 for grammar in get_grammars(set(target_versions)):
100 drv = driver.Driver(grammar)
102 result = drv.parse_string(src_txt, True)
105 except ParseError as pe:
106 lineno, column = pe.context[1]
107 lines = src_txt.splitlines()
109 faulty_line = lines[lineno - 1]
111 faulty_line = "<line number missing in source>"
112 exc = InvalidInput(f"Cannot parse: {lineno}:{column}: {faulty_line}")
114 except TokenError as te:
115 # In edge cases these are raised; and typically don't have a "faulty_line".
116 lineno, column = te.args[1]
117 exc = InvalidInput(f"Cannot parse: {lineno}:{column}: {te.args[0]}")
122 if isinstance(result, Leaf):
123 result = Node(syms.file_input, [result])
127 def lib2to3_unparse(node: Node) -> str:
128 """Given a lib2to3 node, return its string representation."""
133 def parse_single_version(
134 src: str, version: Tuple[int, int]
135 ) -> Union[ast.AST, ast3.AST, ast27.AST]:
136 filename = "<unknown>"
137 # typed_ast is needed because of feature version limitations in the builtin ast
138 if sys.version_info >= (3, 8) and version >= (3,):
139 return ast.parse(src, filename, feature_version=version)
140 elif version >= (3,):
142 return ast3.parse(src, filename)
144 return ast3.parse(src, filename, feature_version=version[1])
145 elif version == (2, 7):
146 return ast27.parse(src)
147 raise AssertionError("INTERNAL ERROR: Tried parsing unsupported Python version!")
150 def parse_ast(src: str) -> Union[ast.AST, ast3.AST, ast27.AST]:
151 # TODO: support Python 4+ ;)
152 versions = [(3, minor) for minor in range(3, sys.version_info[1] + 1)]
154 if ast27.__name__ != "ast":
155 versions.append((2, 7))
158 for version in sorted(versions, reverse=True):
160 return parse_single_version(src, version)
161 except SyntaxError as e:
165 raise SyntaxError(first_error)
168 ast3_AST: Final[Type[ast3.AST]] = ast3.AST
169 ast27_AST: Final[Type[ast27.AST]] = ast27.AST
173 node: Union[ast.AST, ast3.AST, ast27.AST], depth: int = 0
175 """Simple visitor generating strings to compare ASTs by content."""
177 node = fixup_ast_constants(node)
179 yield f"{' ' * depth}{node.__class__.__name__}("
181 type_ignore_classes: Tuple[Type[Any], ...]
182 for field in sorted(node._fields): # noqa: F402
183 # TypeIgnore will not be present using pypy < 3.8, so need for this
184 if not (_IS_PYPY and sys.version_info < (3, 8)):
185 # TypeIgnore has only one field 'lineno' which breaks this comparison
186 type_ignore_classes = (ast3.TypeIgnore, ast27.TypeIgnore)
187 if sys.version_info >= (3, 8):
188 type_ignore_classes += (ast.TypeIgnore,)
189 if isinstance(node, type_ignore_classes):
193 value = getattr(node, field)
194 except AttributeError:
197 yield f"{' ' * (depth+1)}{field}="
199 if isinstance(value, list):
201 # Ignore nested tuples within del statements, because we may insert
202 # parentheses and they change the AST.
205 and isinstance(node, (ast.Delete, ast3.Delete, ast27.Delete))
206 and isinstance(item, (ast.Tuple, ast3.Tuple, ast27.Tuple))
208 for item in item.elts:
209 yield from stringify_ast(item, depth + 2)
211 elif isinstance(item, (ast.AST, ast3.AST, ast27.AST)):
212 yield from stringify_ast(item, depth + 2)
214 # Note that we are referencing the typed-ast ASTs via global variables and not
215 # direct module attribute accesses because that breaks mypyc. It's probably
216 # something to do with the ast3 / ast27 variables being marked as Any leading
217 # mypy to think this branch is always taken, leaving the rest of the code
218 # unanalyzed. Tighting up the types for the typed-ast AST types avoids the
220 elif isinstance(value, (ast.AST, ast3_AST, ast27_AST)):
221 yield from stringify_ast(value, depth + 2)
224 # Constant strings may be indented across newlines, if they are
225 # docstrings; fold spaces after newlines when comparing. Similarly,
226 # trailing and leading space may be removed.
227 # Note that when formatting Python 2 code, at least with Windows
228 # line-endings, docstrings can end up here as bytes instead of
229 # str so make sure that we handle both cases.
231 isinstance(node, ast.Constant)
233 and isinstance(value, (str, bytes))
235 lineend = "\n" if isinstance(value, str) else b"\n"
236 # To normalize, we strip any leading and trailing space from
238 stripped = [line.strip() for line in value.splitlines()]
239 normalized = lineend.join(stripped) # type: ignore[attr-defined]
240 # ...and remove any blank lines at the beginning and end of
242 normalized = normalized.strip()
245 yield f"{' ' * (depth+2)}{normalized!r}, # {value.__class__.__name__}"
247 yield f"{' ' * depth}) # /{node.__class__.__name__}"
250 def fixup_ast_constants(
251 node: Union[ast.AST, ast3.AST, ast27.AST]
252 ) -> Union[ast.AST, ast3.AST, ast27.AST]:
253 """Map ast nodes deprecated in 3.8 to Constant."""
254 if isinstance(node, (ast.Str, ast3.Str, ast27.Str, ast.Bytes, ast3.Bytes)):
255 return ast.Constant(value=node.s)
257 if isinstance(node, (ast.Num, ast3.Num, ast27.Num)):
258 return ast.Constant(value=node.n)
260 if isinstance(node, (ast.NameConstant, ast3.NameConstant)):
261 return ast.Constant(value=node.value)