All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
2 Parse Python code and perform AST validation.
7 from typing import Any, Iterable, Iterator, List, Set, Tuple, Type, Union
9 if sys.version_info < (3, 8):
10 from typing_extensions import Final
12 from typing import Final
15 from blib2to3.pytree import Node, Leaf
16 from blib2to3 import pygram
17 from blib2to3.pgen2 import driver
18 from blib2to3.pgen2.grammar import Grammar
19 from blib2to3.pgen2.parse import ParseError
21 from black.mode import TargetVersion, Feature, supports_feature
22 from black.nodes import syms
27 _IS_PYPY = platform.python_implementation() == "PyPy"
30 from typed_ast import ast3, ast27
32 # Either our python version is too low, or we're on pypy
33 if sys.version_info < (3, 7) or (sys.version_info < (3, 8) and not _IS_PYPY):
35 "The typed_ast package is required but not installed.\n"
36 "You can upgrade to Python 3.8+ or install typed_ast with\n"
37 "`python3 -m pip install typed-ast`.",
45 class InvalidInput(ValueError):
46 """Raised when input source code fails all parse attempts."""
49 def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]:
50 if not target_versions:
51 # No target_version specified, so try all grammars.
54 pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords,
56 pygram.python_grammar_no_print_statement_no_exec_statement,
57 # Python 2.7 with future print_function import
58 pygram.python_grammar_no_print_statement,
60 pygram.python_grammar,
63 if all(version.is_python2() for version in target_versions):
64 # Python 2-only code, so try Python 2 grammars.
66 # Python 2.7 with future print_function import
67 pygram.python_grammar_no_print_statement,
69 pygram.python_grammar,
72 # Python 3-compatible code, so only try Python 3 grammar.
74 if supports_feature(target_versions, Feature.PATTERN_MATCHING):
76 grammars.append(pygram.python_grammar_soft_keywords)
77 # If we have to parse both, try to parse async as a keyword first
78 if not supports_feature(
79 target_versions, Feature.ASYNC_IDENTIFIERS
80 ) and not supports_feature(target_versions, Feature.PATTERN_MATCHING):
83 pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords
85 if not supports_feature(target_versions, Feature.ASYNC_KEYWORDS):
87 grammars.append(pygram.python_grammar_no_print_statement_no_exec_statement)
88 # At least one of the above branches must have been taken, because every Python
89 # version has exactly one of the two 'ASYNC_*' flags
93 def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -> Node:
94 """Given a string with source, return the lib2to3 Node."""
95 if not src_txt.endswith("\n"):
98 for grammar in get_grammars(set(target_versions)):
99 drv = driver.Driver(grammar)
101 result = drv.parse_string(src_txt, True)
104 except ParseError as pe:
105 lineno, column = pe.context[1]
106 lines = src_txt.splitlines()
108 faulty_line = lines[lineno - 1]
110 faulty_line = "<line number missing in source>"
111 exc = InvalidInput(f"Cannot parse: {lineno}:{column}: {faulty_line}")
115 if isinstance(result, Leaf):
116 result = Node(syms.file_input, [result])
120 def lib2to3_unparse(node: Node) -> str:
121 """Given a lib2to3 node, return its string representation."""
126 def parse_single_version(
127 src: str, version: Tuple[int, int]
128 ) -> Union[ast.AST, ast3.AST, ast27.AST]:
129 filename = "<unknown>"
130 # typed_ast is needed because of feature version limitations in the builtin ast
131 if sys.version_info >= (3, 8) and version >= (3,):
132 return ast.parse(src, filename, feature_version=version)
133 elif version >= (3,):
135 return ast3.parse(src, filename)
137 return ast3.parse(src, filename, feature_version=version[1])
138 elif version == (2, 7):
139 return ast27.parse(src)
140 raise AssertionError("INTERNAL ERROR: Tried parsing unsupported Python version!")
143 def parse_ast(src: str) -> Union[ast.AST, ast3.AST, ast27.AST]:
144 # TODO: support Python 4+ ;)
145 versions = [(3, minor) for minor in range(3, sys.version_info[1] + 1)]
147 if ast27.__name__ != "ast":
148 versions.append((2, 7))
151 for version in sorted(versions, reverse=True):
153 return parse_single_version(src, version)
154 except SyntaxError as e:
158 raise SyntaxError(first_error)
161 ast3_AST: Final[Type[ast3.AST]] = ast3.AST
162 ast27_AST: Final[Type[ast27.AST]] = ast27.AST
166 node: Union[ast.AST, ast3.AST, ast27.AST], depth: int = 0
168 """Simple visitor generating strings to compare ASTs by content."""
170 node = fixup_ast_constants(node)
172 yield f"{' ' * depth}{node.__class__.__name__}("
174 type_ignore_classes: Tuple[Type[Any], ...]
175 for field in sorted(node._fields): # noqa: F402
176 # TypeIgnore will not be present using pypy < 3.8, so need for this
177 if not (_IS_PYPY and sys.version_info < (3, 8)):
178 # TypeIgnore has only one field 'lineno' which breaks this comparison
179 type_ignore_classes = (ast3.TypeIgnore, ast27.TypeIgnore)
180 if sys.version_info >= (3, 8):
181 type_ignore_classes += (ast.TypeIgnore,)
182 if isinstance(node, type_ignore_classes):
186 value = getattr(node, field)
187 except AttributeError:
190 yield f"{' ' * (depth+1)}{field}="
192 if isinstance(value, list):
194 # Ignore nested tuples within del statements, because we may insert
195 # parentheses and they change the AST.
198 and isinstance(node, (ast.Delete, ast3.Delete, ast27.Delete))
199 and isinstance(item, (ast.Tuple, ast3.Tuple, ast27.Tuple))
201 for item in item.elts:
202 yield from stringify_ast(item, depth + 2)
204 elif isinstance(item, (ast.AST, ast3.AST, ast27.AST)):
205 yield from stringify_ast(item, depth + 2)
207 # Note that we are referencing the typed-ast ASTs via global variables and not
208 # direct module attribute accesses because that breaks mypyc. It's probably
209 # something to do with the ast3 / ast27 variables being marked as Any leading
210 # mypy to think this branch is always taken, leaving the rest of the code
211 # unanalyzed. Tighting up the types for the typed-ast AST types avoids the
213 elif isinstance(value, (ast.AST, ast3_AST, ast27_AST)):
214 yield from stringify_ast(value, depth + 2)
217 # Constant strings may be indented across newlines, if they are
218 # docstrings; fold spaces after newlines when comparing. Similarly,
219 # trailing and leading space may be removed.
220 # Note that when formatting Python 2 code, at least with Windows
221 # line-endings, docstrings can end up here as bytes instead of
222 # str so make sure that we handle both cases.
224 isinstance(node, ast.Constant)
226 and isinstance(value, (str, bytes))
228 lineend = "\n" if isinstance(value, str) else b"\n"
229 # To normalize, we strip any leading and trailing space from
231 stripped = [line.strip() for line in value.splitlines()]
232 normalized = lineend.join(stripped) # type: ignore[attr-defined]
233 # ...and remove any blank lines at the beginning and end of
235 normalized = normalized.strip()
238 yield f"{' ' * (depth+2)}{normalized!r}, # {value.__class__.__name__}"
240 yield f"{' ' * depth}) # /{node.__class__.__name__}"
243 def fixup_ast_constants(
244 node: Union[ast.AST, ast3.AST, ast27.AST]
245 ) -> Union[ast.AST, ast3.AST, ast27.AST]:
246 """Map ast nodes deprecated in 3.8 to Constant."""
247 if isinstance(node, (ast.Str, ast3.Str, ast27.Str, ast.Bytes, ast3.Bytes)):
248 return ast.Constant(value=node.s)
250 if isinstance(node, (ast.Num, ast3.Num, ast27.Num)):
251 return ast.Constant(value=node.n)
253 if isinstance(node, (ast.NameConstant, ast3.NameConstant)):
254 return ast.Constant(value=node.value)