All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
2 Parse Python code and perform AST validation.
6 from typing import Iterable, Iterator, List, Set, Union, Tuple
9 from blib2to3.pytree import Node, Leaf
10 from blib2to3 import pygram, pytree
11 from blib2to3.pgen2 import driver
12 from blib2to3.pgen2.grammar import Grammar
13 from blib2to3.pgen2.parse import ParseError
15 from black.mode import TargetVersion, Feature, supports_feature
16 from black.nodes import syms
19 from typed_ast import ast3, ast27
21 if sys.version_info < (3, 8):
23 "The typed_ast package is required but not installed.\n"
24 "You can upgrade to Python 3.8+ or install typed_ast with\n"
25 "`python3 -m pip install typed-ast`.",
33 class InvalidInput(ValueError):
34 """Raised when input source code fails all parse attempts."""
37 def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]:
38 if not target_versions:
39 # No target_version specified, so try all grammars.
42 pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords,
44 pygram.python_grammar_no_print_statement_no_exec_statement,
45 # Python 2.7 with future print_function import
46 pygram.python_grammar_no_print_statement,
48 pygram.python_grammar,
51 if all(version.is_python2() for version in target_versions):
52 # Python 2-only code, so try Python 2 grammars.
54 # Python 2.7 with future print_function import
55 pygram.python_grammar_no_print_statement,
57 pygram.python_grammar,
60 # Python 3-compatible code, so only try Python 3 grammar.
62 # If we have to parse both, try to parse async as a keyword first
63 if not supports_feature(target_versions, Feature.ASYNC_IDENTIFIERS):
66 pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords
68 if not supports_feature(target_versions, Feature.ASYNC_KEYWORDS):
70 grammars.append(pygram.python_grammar_no_print_statement_no_exec_statement)
71 # At least one of the above branches must have been taken, because every Python
72 # version has exactly one of the two 'ASYNC_*' flags
76 def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -> Node:
77 """Given a string with source, return the lib2to3 Node."""
78 if not src_txt.endswith("\n"):
81 for grammar in get_grammars(set(target_versions)):
82 drv = driver.Driver(grammar, pytree.convert)
84 result = drv.parse_string(src_txt, True)
87 except ParseError as pe:
88 lineno, column = pe.context[1]
89 lines = src_txt.splitlines()
91 faulty_line = lines[lineno - 1]
93 faulty_line = "<line number missing in source>"
94 exc = InvalidInput(f"Cannot parse: {lineno}:{column}: {faulty_line}")
98 if isinstance(result, Leaf):
99 result = Node(syms.file_input, [result])
103 def lib2to3_unparse(node: Node) -> str:
104 """Given a lib2to3 node, return its string representation."""
109 def parse_single_version(
110 src: str, version: Tuple[int, int]
111 ) -> Union[ast.AST, ast3.AST, ast27.AST]:
112 filename = "<unknown>"
113 # typed_ast is needed because of feature version limitations in the builtin ast
114 if sys.version_info >= (3, 8) and version >= (3,):
115 return ast.parse(src, filename, feature_version=version)
116 elif version >= (3,):
117 return ast3.parse(src, filename, feature_version=version[1])
118 elif version == (2, 7):
119 return ast27.parse(src)
120 raise AssertionError("INTERNAL ERROR: Tried parsing unsupported Python version!")
123 def parse_ast(src: str) -> Union[ast.AST, ast3.AST, ast27.AST]:
124 # TODO: support Python 4+ ;)
125 versions = [(3, minor) for minor in range(3, sys.version_info[1] + 1)]
127 if ast27.__name__ != "ast":
128 versions.append((2, 7))
131 for version in sorted(versions, reverse=True):
133 return parse_single_version(src, version)
134 except SyntaxError as e:
138 raise SyntaxError(first_error)
142 node: Union[ast.AST, ast3.AST, ast27.AST], depth: int = 0
144 """Simple visitor generating strings to compare ASTs by content."""
146 node = fixup_ast_constants(node)
148 yield f"{' ' * depth}{node.__class__.__name__}("
150 for field in sorted(node._fields): # noqa: F402
151 # TypeIgnore has only one field 'lineno' which breaks this comparison
152 type_ignore_classes = (ast3.TypeIgnore, ast27.TypeIgnore)
153 if sys.version_info >= (3, 8):
154 type_ignore_classes += (ast.TypeIgnore,)
155 if isinstance(node, type_ignore_classes):
159 value = getattr(node, field)
160 except AttributeError:
163 yield f"{' ' * (depth+1)}{field}="
165 if isinstance(value, list):
167 # Ignore nested tuples within del statements, because we may insert
168 # parentheses and they change the AST.
171 and isinstance(node, (ast.Delete, ast3.Delete, ast27.Delete))
172 and isinstance(item, (ast.Tuple, ast3.Tuple, ast27.Tuple))
174 for item in item.elts:
175 yield from stringify_ast(item, depth + 2)
177 elif isinstance(item, (ast.AST, ast3.AST, ast27.AST)):
178 yield from stringify_ast(item, depth + 2)
180 elif isinstance(value, (ast.AST, ast3.AST, ast27.AST)):
181 yield from stringify_ast(value, depth + 2)
184 # Constant strings may be indented across newlines, if they are
185 # docstrings; fold spaces after newlines when comparing. Similarly,
186 # trailing and leading space may be removed.
187 # Note that when formatting Python 2 code, at least with Windows
188 # line-endings, docstrings can end up here as bytes instead of
189 # str so make sure that we handle both cases.
191 isinstance(node, ast.Constant)
193 and isinstance(value, (str, bytes))
195 lineend = "\n" if isinstance(value, str) else b"\n"
196 # To normalize, we strip any leading and trailing space from
198 stripped = [line.strip() for line in value.splitlines()]
199 normalized = lineend.join(stripped) # type: ignore[attr-defined]
200 # ...and remove any blank lines at the beginning and end of
202 normalized = normalized.strip()
205 yield f"{' ' * (depth+2)}{normalized!r}, # {value.__class__.__name__}"
207 yield f"{' ' * depth}) # /{node.__class__.__name__}"
210 def fixup_ast_constants(
211 node: Union[ast.AST, ast3.AST, ast27.AST]
212 ) -> Union[ast.AST, ast3.AST, ast27.AST]:
213 """Map ast nodes deprecated in 3.8 to Constant."""
214 if isinstance(node, (ast.Str, ast3.Str, ast27.Str, ast.Bytes, ast3.Bytes)):
215 return ast.Constant(value=node.s)
217 if isinstance(node, (ast.Num, ast3.Num, ast27.Num)):
218 return ast.Constant(value=node.n)
220 if isinstance(node, (ast.NameConstant, ast3.NameConstant)):
221 return ast.Constant(value=node.value)