]> git.madduck.net Git - etc/vim.git/blob - src/black/parsing.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

Document pre-commit mirror (#3828)
[etc/vim.git] / src / black / parsing.py
1 """
2 Parse Python code and perform AST validation.
3 """
4 import ast
5 import sys
6 from typing import Final, Iterable, Iterator, List, Set, Tuple
7
8 from black.mode import VERSION_TO_FEATURES, Feature, TargetVersion, supports_feature
9 from black.nodes import syms
10 from blib2to3 import pygram
11 from blib2to3.pgen2 import driver
12 from blib2to3.pgen2.grammar import Grammar
13 from blib2to3.pgen2.parse import ParseError
14 from blib2to3.pgen2.tokenize import TokenError
15 from blib2to3.pytree import Leaf, Node
16
17 PY2_HINT: Final = "Python 2 support was removed in version 22.0."
18
19
20 class InvalidInput(ValueError):
21     """Raised when input source code fails all parse attempts."""
22
23
24 def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]:
25     if not target_versions:
26         # No target_version specified, so try all grammars.
27         return [
28             # Python 3.7-3.9
29             pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords,
30             # Python 3.0-3.6
31             pygram.python_grammar_no_print_statement_no_exec_statement,
32             # Python 3.10+
33             pygram.python_grammar_soft_keywords,
34         ]
35
36     grammars = []
37     # If we have to parse both, try to parse async as a keyword first
38     if not supports_feature(
39         target_versions, Feature.ASYNC_IDENTIFIERS
40     ) and not supports_feature(target_versions, Feature.PATTERN_MATCHING):
41         # Python 3.7-3.9
42         grammars.append(
43             pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords
44         )
45     if not supports_feature(target_versions, Feature.ASYNC_KEYWORDS):
46         # Python 3.0-3.6
47         grammars.append(pygram.python_grammar_no_print_statement_no_exec_statement)
48     if any(Feature.PATTERN_MATCHING in VERSION_TO_FEATURES[v] for v in target_versions):
49         # Python 3.10+
50         grammars.append(pygram.python_grammar_soft_keywords)
51
52     # At least one of the above branches must have been taken, because every Python
53     # version has exactly one of the two 'ASYNC_*' flags
54     return grammars
55
56
57 def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -> Node:
58     """Given a string with source, return the lib2to3 Node."""
59     if not src_txt.endswith("\n"):
60         src_txt += "\n"
61
62     grammars = get_grammars(set(target_versions))
63     errors = {}
64     for grammar in grammars:
65         drv = driver.Driver(grammar)
66         try:
67             result = drv.parse_string(src_txt, True)
68             break
69
70         except ParseError as pe:
71             lineno, column = pe.context[1]
72             lines = src_txt.splitlines()
73             try:
74                 faulty_line = lines[lineno - 1]
75             except IndexError:
76                 faulty_line = "<line number missing in source>"
77             errors[grammar.version] = InvalidInput(
78                 f"Cannot parse: {lineno}:{column}: {faulty_line}"
79             )
80
81         except TokenError as te:
82             # In edge cases these are raised; and typically don't have a "faulty_line".
83             lineno, column = te.args[1]
84             errors[grammar.version] = InvalidInput(
85                 f"Cannot parse: {lineno}:{column}: {te.args[0]}"
86             )
87
88     else:
89         # Choose the latest version when raising the actual parsing error.
90         assert len(errors) >= 1
91         exc = errors[max(errors)]
92
93         if matches_grammar(src_txt, pygram.python_grammar) or matches_grammar(
94             src_txt, pygram.python_grammar_no_print_statement
95         ):
96             original_msg = exc.args[0]
97             msg = f"{original_msg}\n{PY2_HINT}"
98             raise InvalidInput(msg) from None
99
100         raise exc from None
101
102     if isinstance(result, Leaf):
103         result = Node(syms.file_input, [result])
104     return result
105
106
107 def matches_grammar(src_txt: str, grammar: Grammar) -> bool:
108     drv = driver.Driver(grammar)
109     try:
110         drv.parse_string(src_txt, True)
111     except (ParseError, TokenError, IndentationError):
112         return False
113     else:
114         return True
115
116
117 def lib2to3_unparse(node: Node) -> str:
118     """Given a lib2to3 node, return its string representation."""
119     code = str(node)
120     return code
121
122
123 def parse_single_version(
124     src: str, version: Tuple[int, int], *, type_comments: bool
125 ) -> ast.AST:
126     filename = "<unknown>"
127     return ast.parse(
128         src, filename, feature_version=version, type_comments=type_comments
129     )
130
131
132 def parse_ast(src: str) -> ast.AST:
133     # TODO: support Python 4+ ;)
134     versions = [(3, minor) for minor in range(3, sys.version_info[1] + 1)]
135
136     first_error = ""
137     for version in sorted(versions, reverse=True):
138         try:
139             return parse_single_version(src, version, type_comments=True)
140         except SyntaxError as e:
141             if not first_error:
142                 first_error = str(e)
143
144     # Try to parse without type comments
145     for version in sorted(versions, reverse=True):
146         try:
147             return parse_single_version(src, version, type_comments=False)
148         except SyntaxError:
149             pass
150
151     raise SyntaxError(first_error)
152
153
154 def _normalize(lineend: str, value: str) -> str:
155     # To normalize, we strip any leading and trailing space from
156     # each line...
157     stripped: List[str] = [i.strip() for i in value.splitlines()]
158     normalized = lineend.join(stripped)
159     # ...and remove any blank lines at the beginning and end of
160     # the whole string
161     return normalized.strip()
162
163
164 def stringify_ast(node: ast.AST, depth: int = 0) -> Iterator[str]:
165     """Simple visitor generating strings to compare ASTs by content."""
166
167     if (
168         isinstance(node, ast.Constant)
169         and isinstance(node.value, str)
170         and node.kind == "u"
171     ):
172         # It's a quirk of history that we strip the u prefix over here. We used to
173         # rewrite the AST nodes for Python version compatibility and we never copied
174         # over the kind
175         node.kind = None
176
177     yield f"{'  ' * depth}{node.__class__.__name__}("
178
179     for field in sorted(node._fields):  # noqa: F402
180         # TypeIgnore has only one field 'lineno' which breaks this comparison
181         if isinstance(node, ast.TypeIgnore):
182             break
183
184         try:
185             value: object = getattr(node, field)
186         except AttributeError:
187             continue
188
189         yield f"{'  ' * (depth+1)}{field}="
190
191         if isinstance(value, list):
192             for item in value:
193                 # Ignore nested tuples within del statements, because we may insert
194                 # parentheses and they change the AST.
195                 if (
196                     field == "targets"
197                     and isinstance(node, ast.Delete)
198                     and isinstance(item, ast.Tuple)
199                 ):
200                     for elt in item.elts:
201                         yield from stringify_ast(elt, depth + 2)
202
203                 elif isinstance(item, ast.AST):
204                     yield from stringify_ast(item, depth + 2)
205
206         elif isinstance(value, ast.AST):
207             yield from stringify_ast(value, depth + 2)
208
209         else:
210             normalized: object
211             if (
212                 isinstance(node, ast.Constant)
213                 and field == "value"
214                 and isinstance(value, str)
215             ):
216                 # Constant strings may be indented across newlines, if they are
217                 # docstrings; fold spaces after newlines when comparing. Similarly,
218                 # trailing and leading space may be removed.
219                 normalized = _normalize("\n", value)
220             elif field == "type_comment" and isinstance(value, str):
221                 # Trailing whitespace in type comments is removed.
222                 normalized = value.rstrip()
223             else:
224                 normalized = value
225             yield f"{'  ' * (depth+2)}{normalized!r},  # {value.__class__.__name__}"
226
227     yield f"{'  ' * depth})  # /{node.__class__.__name__}"