]> git.madduck.net Git - etc/vim.git/blob - src/black/parsing.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

Report all stacktraces in verbose mode (#3938)
[etc/vim.git] / src / black / parsing.py
1 """
2 Parse Python code and perform AST validation.
3 """
4 import ast
5 import sys
6 from typing import Iterable, Iterator, List, Set, Tuple
7
8 from black.mode import VERSION_TO_FEATURES, Feature, TargetVersion, supports_feature
9 from black.nodes import syms
10 from blib2to3 import pygram
11 from blib2to3.pgen2 import driver
12 from blib2to3.pgen2.grammar import Grammar
13 from blib2to3.pgen2.parse import ParseError
14 from blib2to3.pgen2.tokenize import TokenError
15 from blib2to3.pytree import Leaf, Node
16
17
18 class InvalidInput(ValueError):
19     """Raised when input source code fails all parse attempts."""
20
21
22 def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]:
23     if not target_versions:
24         # No target_version specified, so try all grammars.
25         return [
26             # Python 3.7-3.9
27             pygram.python_grammar_async_keywords,
28             # Python 3.0-3.6
29             pygram.python_grammar,
30             # Python 3.10+
31             pygram.python_grammar_soft_keywords,
32         ]
33
34     grammars = []
35     # If we have to parse both, try to parse async as a keyword first
36     if not supports_feature(
37         target_versions, Feature.ASYNC_IDENTIFIERS
38     ) and not supports_feature(target_versions, Feature.PATTERN_MATCHING):
39         # Python 3.7-3.9
40         grammars.append(pygram.python_grammar_async_keywords)
41     if not supports_feature(target_versions, Feature.ASYNC_KEYWORDS):
42         # Python 3.0-3.6
43         grammars.append(pygram.python_grammar)
44     if any(Feature.PATTERN_MATCHING in VERSION_TO_FEATURES[v] for v in target_versions):
45         # Python 3.10+
46         grammars.append(pygram.python_grammar_soft_keywords)
47
48     # At least one of the above branches must have been taken, because every Python
49     # version has exactly one of the two 'ASYNC_*' flags
50     return grammars
51
52
53 def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -> Node:
54     """Given a string with source, return the lib2to3 Node."""
55     if not src_txt.endswith("\n"):
56         src_txt += "\n"
57
58     grammars = get_grammars(set(target_versions))
59     errors = {}
60     for grammar in grammars:
61         drv = driver.Driver(grammar)
62         try:
63             result = drv.parse_string(src_txt, True)
64             break
65
66         except ParseError as pe:
67             lineno, column = pe.context[1]
68             lines = src_txt.splitlines()
69             try:
70                 faulty_line = lines[lineno - 1]
71             except IndexError:
72                 faulty_line = "<line number missing in source>"
73             errors[grammar.version] = InvalidInput(
74                 f"Cannot parse: {lineno}:{column}: {faulty_line}"
75             )
76
77         except TokenError as te:
78             # In edge cases these are raised; and typically don't have a "faulty_line".
79             lineno, column = te.args[1]
80             errors[grammar.version] = InvalidInput(
81                 f"Cannot parse: {lineno}:{column}: {te.args[0]}"
82             )
83
84     else:
85         # Choose the latest version when raising the actual parsing error.
86         assert len(errors) >= 1
87         exc = errors[max(errors)]
88         raise exc from None
89
90     if isinstance(result, Leaf):
91         result = Node(syms.file_input, [result])
92     return result
93
94
95 def matches_grammar(src_txt: str, grammar: Grammar) -> bool:
96     drv = driver.Driver(grammar)
97     try:
98         drv.parse_string(src_txt, True)
99     except (ParseError, TokenError, IndentationError):
100         return False
101     else:
102         return True
103
104
105 def lib2to3_unparse(node: Node) -> str:
106     """Given a lib2to3 node, return its string representation."""
107     code = str(node)
108     return code
109
110
111 def parse_single_version(
112     src: str, version: Tuple[int, int], *, type_comments: bool
113 ) -> ast.AST:
114     filename = "<unknown>"
115     return ast.parse(
116         src, filename, feature_version=version, type_comments=type_comments
117     )
118
119
120 def parse_ast(src: str) -> ast.AST:
121     # TODO: support Python 4+ ;)
122     versions = [(3, minor) for minor in range(3, sys.version_info[1] + 1)]
123
124     first_error = ""
125     for version in sorted(versions, reverse=True):
126         try:
127             return parse_single_version(src, version, type_comments=True)
128         except SyntaxError as e:
129             if not first_error:
130                 first_error = str(e)
131
132     # Try to parse without type comments
133     for version in sorted(versions, reverse=True):
134         try:
135             return parse_single_version(src, version, type_comments=False)
136         except SyntaxError:
137             pass
138
139     raise SyntaxError(first_error)
140
141
142 def _normalize(lineend: str, value: str) -> str:
143     # To normalize, we strip any leading and trailing space from
144     # each line...
145     stripped: List[str] = [i.strip() for i in value.splitlines()]
146     normalized = lineend.join(stripped)
147     # ...and remove any blank lines at the beginning and end of
148     # the whole string
149     return normalized.strip()
150
151
152 def stringify_ast(node: ast.AST, depth: int = 0) -> Iterator[str]:
153     """Simple visitor generating strings to compare ASTs by content."""
154
155     if (
156         isinstance(node, ast.Constant)
157         and isinstance(node.value, str)
158         and node.kind == "u"
159     ):
160         # It's a quirk of history that we strip the u prefix over here. We used to
161         # rewrite the AST nodes for Python version compatibility and we never copied
162         # over the kind
163         node.kind = None
164
165     yield f"{'  ' * depth}{node.__class__.__name__}("
166
167     for field in sorted(node._fields):  # noqa: F402
168         # TypeIgnore has only one field 'lineno' which breaks this comparison
169         if isinstance(node, ast.TypeIgnore):
170             break
171
172         try:
173             value: object = getattr(node, field)
174         except AttributeError:
175             continue
176
177         yield f"{'  ' * (depth+1)}{field}="
178
179         if isinstance(value, list):
180             for item in value:
181                 # Ignore nested tuples within del statements, because we may insert
182                 # parentheses and they change the AST.
183                 if (
184                     field == "targets"
185                     and isinstance(node, ast.Delete)
186                     and isinstance(item, ast.Tuple)
187                 ):
188                     for elt in item.elts:
189                         yield from stringify_ast(elt, depth + 2)
190
191                 elif isinstance(item, ast.AST):
192                     yield from stringify_ast(item, depth + 2)
193
194         elif isinstance(value, ast.AST):
195             yield from stringify_ast(value, depth + 2)
196
197         else:
198             normalized: object
199             if (
200                 isinstance(node, ast.Constant)
201                 and field == "value"
202                 and isinstance(value, str)
203             ):
204                 # Constant strings may be indented across newlines, if they are
205                 # docstrings; fold spaces after newlines when comparing. Similarly,
206                 # trailing and leading space may be removed.
207                 normalized = _normalize("\n", value)
208             elif field == "type_comment" and isinstance(value, str):
209                 # Trailing whitespace in type comments is removed.
210                 normalized = value.rstrip()
211             else:
212                 normalized = value
213             yield f"{'  ' * (depth+2)}{normalized!r},  # {value.__class__.__name__}"
214
215     yield f"{'  ' * depth})  # /{node.__class__.__name__}"