All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
2 Parse Python code and perform AST validation.
6 from typing import Iterable, Iterator, List, Set, Tuple
8 from black.mode import VERSION_TO_FEATURES, Feature, TargetVersion, supports_feature
9 from black.nodes import syms
10 from blib2to3 import pygram
11 from blib2to3.pgen2 import driver
12 from blib2to3.pgen2.grammar import Grammar
13 from blib2to3.pgen2.parse import ParseError
14 from blib2to3.pgen2.tokenize import TokenError
15 from blib2to3.pytree import Leaf, Node
18 class InvalidInput(ValueError):
19 """Raised when input source code fails all parse attempts."""
22 def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]:
23 if not target_versions:
24 # No target_version specified, so try all grammars.
27 pygram.python_grammar_async_keywords,
29 pygram.python_grammar,
31 pygram.python_grammar_soft_keywords,
35 # If we have to parse both, try to parse async as a keyword first
36 if not supports_feature(
37 target_versions, Feature.ASYNC_IDENTIFIERS
38 ) and not supports_feature(target_versions, Feature.PATTERN_MATCHING):
40 grammars.append(pygram.python_grammar_async_keywords)
41 if not supports_feature(target_versions, Feature.ASYNC_KEYWORDS):
43 grammars.append(pygram.python_grammar)
44 if any(Feature.PATTERN_MATCHING in VERSION_TO_FEATURES[v] for v in target_versions):
46 grammars.append(pygram.python_grammar_soft_keywords)
48 # At least one of the above branches must have been taken, because every Python
49 # version has exactly one of the two 'ASYNC_*' flags
53 def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -> Node:
54 """Given a string with source, return the lib2to3 Node."""
55 if not src_txt.endswith("\n"):
58 grammars = get_grammars(set(target_versions))
60 for grammar in grammars:
61 drv = driver.Driver(grammar)
63 result = drv.parse_string(src_txt, True)
66 except ParseError as pe:
67 lineno, column = pe.context[1]
68 lines = src_txt.splitlines()
70 faulty_line = lines[lineno - 1]
72 faulty_line = "<line number missing in source>"
73 errors[grammar.version] = InvalidInput(
74 f"Cannot parse: {lineno}:{column}: {faulty_line}"
77 except TokenError as te:
78 # In edge cases these are raised; and typically don't have a "faulty_line".
79 lineno, column = te.args[1]
80 errors[grammar.version] = InvalidInput(
81 f"Cannot parse: {lineno}:{column}: {te.args[0]}"
85 # Choose the latest version when raising the actual parsing error.
86 assert len(errors) >= 1
87 exc = errors[max(errors)]
90 if isinstance(result, Leaf):
91 result = Node(syms.file_input, [result])
95 def matches_grammar(src_txt: str, grammar: Grammar) -> bool:
96 drv = driver.Driver(grammar)
98 drv.parse_string(src_txt, True)
99 except (ParseError, TokenError, IndentationError):
105 def lib2to3_unparse(node: Node) -> str:
106 """Given a lib2to3 node, return its string representation."""
111 def parse_single_version(
112 src: str, version: Tuple[int, int], *, type_comments: bool
114 filename = "<unknown>"
116 src, filename, feature_version=version, type_comments=type_comments
120 def parse_ast(src: str) -> ast.AST:
121 # TODO: support Python 4+ ;)
122 versions = [(3, minor) for minor in range(3, sys.version_info[1] + 1)]
125 for version in sorted(versions, reverse=True):
127 return parse_single_version(src, version, type_comments=True)
128 except SyntaxError as e:
132 # Try to parse without type comments
133 for version in sorted(versions, reverse=True):
135 return parse_single_version(src, version, type_comments=False)
139 raise SyntaxError(first_error)
142 def _normalize(lineend: str, value: str) -> str:
143 # To normalize, we strip any leading and trailing space from
145 stripped: List[str] = [i.strip() for i in value.splitlines()]
146 normalized = lineend.join(stripped)
147 # ...and remove any blank lines at the beginning and end of
149 return normalized.strip()
152 def stringify_ast(node: ast.AST, depth: int = 0) -> Iterator[str]:
153 """Simple visitor generating strings to compare ASTs by content."""
156 isinstance(node, ast.Constant)
157 and isinstance(node.value, str)
160 # It's a quirk of history that we strip the u prefix over here. We used to
161 # rewrite the AST nodes for Python version compatibility and we never copied
165 yield f"{' ' * depth}{node.__class__.__name__}("
167 for field in sorted(node._fields): # noqa: F402
168 # TypeIgnore has only one field 'lineno' which breaks this comparison
169 if isinstance(node, ast.TypeIgnore):
173 value: object = getattr(node, field)
174 except AttributeError:
177 yield f"{' ' * (depth+1)}{field}="
179 if isinstance(value, list):
181 # Ignore nested tuples within del statements, because we may insert
182 # parentheses and they change the AST.
185 and isinstance(node, ast.Delete)
186 and isinstance(item, ast.Tuple)
188 for elt in item.elts:
189 yield from stringify_ast(elt, depth + 2)
191 elif isinstance(item, ast.AST):
192 yield from stringify_ast(item, depth + 2)
194 elif isinstance(value, ast.AST):
195 yield from stringify_ast(value, depth + 2)
200 isinstance(node, ast.Constant)
202 and isinstance(value, str)
204 # Constant strings may be indented across newlines, if they are
205 # docstrings; fold spaces after newlines when comparing. Similarly,
206 # trailing and leading space may be removed.
207 normalized = _normalize("\n", value)
208 elif field == "type_comment" and isinstance(value, str):
209 # Trailing whitespace in type comments is removed.
210 normalized = value.rstrip()
213 yield f"{' ' * (depth+2)}{normalized!r}, # {value.__class__.__name__}"
215 yield f"{' ' * depth}) # /{node.__class__.__name__}"