All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
2 Parse Python code and perform AST validation.
6 from typing import Iterable, Iterator, List, Set, Union, Tuple
9 from blib2to3.pytree import Node, Leaf
10 from blib2to3 import pygram, pytree
11 from blib2to3.pgen2 import driver
12 from blib2to3.pgen2.grammar import Grammar
13 from blib2to3.pgen2.parse import ParseError
15 from black.mode import TargetVersion, Feature, supports_feature
16 from black.nodes import syms
19 from typed_ast import ast3, ast27
21 if sys.version_info < (3, 8):
23 "The typed_ast package is required but not installed.\n"
24 "You can upgrade to Python 3.8+ or install typed_ast with\n"
25 "`python3 -m pip install typed-ast`.",
33 class InvalidInput(ValueError):
34 """Raised when input source code fails all parse attempts."""
37 def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]:
38 if not target_versions:
39 # No target_version specified, so try all grammars.
42 pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords,
44 pygram.python_grammar_no_print_statement_no_exec_statement,
45 # Python 2.7 with future print_function import
46 pygram.python_grammar_no_print_statement,
48 pygram.python_grammar,
51 if all(version.is_python2() for version in target_versions):
52 # Python 2-only code, so try Python 2 grammars.
54 # Python 2.7 with future print_function import
55 pygram.python_grammar_no_print_statement,
57 pygram.python_grammar,
60 # Python 3-compatible code, so only try Python 3 grammar.
62 if supports_feature(target_versions, Feature.PATTERN_MATCHING):
64 grammars.append(pygram.python_grammar_soft_keywords)
65 # If we have to parse both, try to parse async as a keyword first
66 if not supports_feature(target_versions, Feature.ASYNC_IDENTIFIERS):
69 pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords
71 if not supports_feature(target_versions, Feature.ASYNC_KEYWORDS):
73 grammars.append(pygram.python_grammar_no_print_statement_no_exec_statement)
74 # At least one of the above branches must have been taken, because every Python
75 # version has exactly one of the two 'ASYNC_*' flags
79 def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -> Node:
80 """Given a string with source, return the lib2to3 Node."""
81 if not src_txt.endswith("\n"):
84 for grammar in get_grammars(set(target_versions)):
85 drv = driver.Driver(grammar, pytree.convert)
87 result = drv.parse_string(src_txt, True)
90 except ParseError as pe:
91 lineno, column = pe.context[1]
92 lines = src_txt.splitlines()
94 faulty_line = lines[lineno - 1]
96 faulty_line = "<line number missing in source>"
97 exc = InvalidInput(f"Cannot parse: {lineno}:{column}: {faulty_line}")
101 if isinstance(result, Leaf):
102 result = Node(syms.file_input, [result])
106 def lib2to3_unparse(node: Node) -> str:
107 """Given a lib2to3 node, return its string representation."""
112 def parse_single_version(
113 src: str, version: Tuple[int, int]
114 ) -> Union[ast.AST, ast3.AST, ast27.AST]:
115 filename = "<unknown>"
116 # typed_ast is needed because of feature version limitations in the builtin ast
117 if sys.version_info >= (3, 8) and version >= (3,):
118 return ast.parse(src, filename, feature_version=version)
119 elif version >= (3,):
120 return ast3.parse(src, filename, feature_version=version[1])
121 elif version == (2, 7):
122 return ast27.parse(src)
123 raise AssertionError("INTERNAL ERROR: Tried parsing unsupported Python version!")
126 def parse_ast(src: str) -> Union[ast.AST, ast3.AST, ast27.AST]:
127 # TODO: support Python 4+ ;)
128 versions = [(3, minor) for minor in range(3, sys.version_info[1] + 1)]
130 if ast27.__name__ != "ast":
131 versions.append((2, 7))
134 for version in sorted(versions, reverse=True):
136 return parse_single_version(src, version)
137 except SyntaxError as e:
141 raise SyntaxError(first_error)
145 node: Union[ast.AST, ast3.AST, ast27.AST], depth: int = 0
147 """Simple visitor generating strings to compare ASTs by content."""
149 node = fixup_ast_constants(node)
151 yield f"{' ' * depth}{node.__class__.__name__}("
153 for field in sorted(node._fields): # noqa: F402
154 # TypeIgnore has only one field 'lineno' which breaks this comparison
155 type_ignore_classes = (ast3.TypeIgnore, ast27.TypeIgnore)
156 if sys.version_info >= (3, 8):
157 type_ignore_classes += (ast.TypeIgnore,)
158 if isinstance(node, type_ignore_classes):
162 value = getattr(node, field)
163 except AttributeError:
166 yield f"{' ' * (depth+1)}{field}="
168 if isinstance(value, list):
170 # Ignore nested tuples within del statements, because we may insert
171 # parentheses and they change the AST.
174 and isinstance(node, (ast.Delete, ast3.Delete, ast27.Delete))
175 and isinstance(item, (ast.Tuple, ast3.Tuple, ast27.Tuple))
177 for item in item.elts:
178 yield from stringify_ast(item, depth + 2)
180 elif isinstance(item, (ast.AST, ast3.AST, ast27.AST)):
181 yield from stringify_ast(item, depth + 2)
183 elif isinstance(value, (ast.AST, ast3.AST, ast27.AST)):
184 yield from stringify_ast(value, depth + 2)
187 # Constant strings may be indented across newlines, if they are
188 # docstrings; fold spaces after newlines when comparing. Similarly,
189 # trailing and leading space may be removed.
190 # Note that when formatting Python 2 code, at least with Windows
191 # line-endings, docstrings can end up here as bytes instead of
192 # str so make sure that we handle both cases.
194 isinstance(node, ast.Constant)
196 and isinstance(value, (str, bytes))
198 lineend = "\n" if isinstance(value, str) else b"\n"
199 # To normalize, we strip any leading and trailing space from
201 stripped = [line.strip() for line in value.splitlines()]
202 normalized = lineend.join(stripped) # type: ignore[attr-defined]
203 # ...and remove any blank lines at the beginning and end of
205 normalized = normalized.strip()
208 yield f"{' ' * (depth+2)}{normalized!r}, # {value.__class__.__name__}"
210 yield f"{' ' * depth}) # /{node.__class__.__name__}"
213 def fixup_ast_constants(
214 node: Union[ast.AST, ast3.AST, ast27.AST]
215 ) -> Union[ast.AST, ast3.AST, ast27.AST]:
216 """Map ast nodes deprecated in 3.8 to Constant."""
217 if isinstance(node, (ast.Str, ast3.Str, ast27.Str, ast.Bytes, ast3.Bytes)):
218 return ast.Constant(value=node.s)
220 if isinstance(node, (ast.Num, ast3.Num, ast27.Num)):
221 return ast.Constant(value=node.n)
223 if isinstance(node, (ast.NameConstant, ast3.NameConstant)):
224 return ast.Constant(value=node.value)