All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
2 Parse Python code and perform AST validation.
7 from typing import Iterable, Iterator, List, Set, Tuple
9 from black.mode import VERSION_TO_FEATURES, Feature, TargetVersion, supports_feature
10 from black.nodes import syms
11 from blib2to3 import pygram
12 from blib2to3.pgen2 import driver
13 from blib2to3.pgen2.grammar import Grammar
14 from blib2to3.pgen2.parse import ParseError
15 from blib2to3.pgen2.tokenize import TokenError
16 from blib2to3.pytree import Leaf, Node
19 class InvalidInput(ValueError):
20 """Raised when input source code fails all parse attempts."""
23 def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]:
24 if not target_versions:
25 # No target_version specified, so try all grammars.
28 pygram.python_grammar_async_keywords,
30 pygram.python_grammar,
32 pygram.python_grammar_soft_keywords,
36 # If we have to parse both, try to parse async as a keyword first
37 if not supports_feature(
38 target_versions, Feature.ASYNC_IDENTIFIERS
39 ) and not supports_feature(target_versions, Feature.PATTERN_MATCHING):
41 grammars.append(pygram.python_grammar_async_keywords)
42 if not supports_feature(target_versions, Feature.ASYNC_KEYWORDS):
44 grammars.append(pygram.python_grammar)
45 if any(Feature.PATTERN_MATCHING in VERSION_TO_FEATURES[v] for v in target_versions):
47 grammars.append(pygram.python_grammar_soft_keywords)
49 # At least one of the above branches must have been taken, because every Python
50 # version has exactly one of the two 'ASYNC_*' flags
54 def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -> Node:
55 """Given a string with source, return the lib2to3 Node."""
56 if not src_txt.endswith("\n"):
59 grammars = get_grammars(set(target_versions))
61 for grammar in grammars:
62 drv = driver.Driver(grammar)
64 result = drv.parse_string(src_txt, True)
67 except ParseError as pe:
68 lineno, column = pe.context[1]
69 lines = src_txt.splitlines()
71 faulty_line = lines[lineno - 1]
73 faulty_line = "<line number missing in source>"
74 errors[grammar.version] = InvalidInput(
75 f"Cannot parse: {lineno}:{column}: {faulty_line}"
78 except TokenError as te:
79 # In edge cases these are raised; and typically don't have a "faulty_line".
80 lineno, column = te.args[1]
81 errors[grammar.version] = InvalidInput(
82 f"Cannot parse: {lineno}:{column}: {te.args[0]}"
86 # Choose the latest version when raising the actual parsing error.
87 assert len(errors) >= 1
88 exc = errors[max(errors)]
91 if isinstance(result, Leaf):
92 result = Node(syms.file_input, [result])
96 def matches_grammar(src_txt: str, grammar: Grammar) -> bool:
97 drv = driver.Driver(grammar)
99 drv.parse_string(src_txt, True)
100 except (ParseError, TokenError, IndentationError):
106 def lib2to3_unparse(node: Node) -> str:
107 """Given a lib2to3 node, return its string representation."""
112 def parse_single_version(
113 src: str, version: Tuple[int, int], *, type_comments: bool
115 filename = "<unknown>"
117 src, filename, feature_version=version, type_comments=type_comments
121 def parse_ast(src: str) -> ast.AST:
122 # TODO: support Python 4+ ;)
123 versions = [(3, minor) for minor in range(3, sys.version_info[1] + 1)]
126 for version in sorted(versions, reverse=True):
128 return parse_single_version(src, version, type_comments=True)
129 except SyntaxError as e:
133 # Try to parse without type comments
134 for version in sorted(versions, reverse=True):
136 return parse_single_version(src, version, type_comments=False)
140 raise SyntaxError(first_error)
143 def _normalize(lineend: str, value: str) -> str:
144 # To normalize, we strip any leading and trailing space from
146 stripped: List[str] = [i.strip() for i in value.splitlines()]
147 normalized = lineend.join(stripped)
148 # ...and remove any blank lines at the beginning and end of
150 return normalized.strip()
153 def stringify_ast(node: ast.AST, depth: int = 0) -> Iterator[str]:
154 """Simple visitor generating strings to compare ASTs by content."""
157 isinstance(node, ast.Constant)
158 and isinstance(node.value, str)
161 # It's a quirk of history that we strip the u prefix over here. We used to
162 # rewrite the AST nodes for Python version compatibility and we never copied
166 yield f"{' ' * depth}{node.__class__.__name__}("
168 for field in sorted(node._fields): # noqa: F402
169 # TypeIgnore has only one field 'lineno' which breaks this comparison
170 if isinstance(node, ast.TypeIgnore):
174 value: object = getattr(node, field)
175 except AttributeError:
178 yield f"{' ' * (depth+1)}{field}="
180 if isinstance(value, list):
182 # Ignore nested tuples within del statements, because we may insert
183 # parentheses and they change the AST.
186 and isinstance(node, ast.Delete)
187 and isinstance(item, ast.Tuple)
189 for elt in item.elts:
190 yield from stringify_ast(elt, depth + 2)
192 elif isinstance(item, ast.AST):
193 yield from stringify_ast(item, depth + 2)
195 elif isinstance(value, ast.AST):
196 yield from stringify_ast(value, depth + 2)
201 isinstance(node, ast.Constant)
203 and isinstance(value, str)
205 # Constant strings may be indented across newlines, if they are
206 # docstrings; fold spaces after newlines when comparing. Similarly,
207 # trailing and leading space may be removed.
208 normalized = _normalize("\n", value)
209 elif field == "type_comment" and isinstance(value, str):
210 # Trailing whitespace in type comments is removed.
211 normalized = value.rstrip()
214 yield f"{' ' * (depth+2)}{normalized!r}, # {value.__class__.__name__}"
216 yield f"{' ' * depth}) # /{node.__class__.__name__}"