]> git.madduck.net Git - etc/vim.git/blob - src/black/parsing.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

Sort DEFAULT_EXCLUDES and add .vscode, .pytest_cache and .ruff_cache (#3691)
[etc/vim.git] / src / black / parsing.py
1 """
2 Parse Python code and perform AST validation.
3 """
4 import ast
5 import platform
6 import sys
7 from typing import Any, Iterable, Iterator, List, Set, Tuple, Type, Union
8
9 if sys.version_info < (3, 8):
10     from typing_extensions import Final
11 else:
12     from typing import Final
13
14 from black.mode import VERSION_TO_FEATURES, Feature, TargetVersion, supports_feature
15 from black.nodes import syms
16 from blib2to3 import pygram
17 from blib2to3.pgen2 import driver
18 from blib2to3.pgen2.grammar import Grammar
19 from blib2to3.pgen2.parse import ParseError
20 from blib2to3.pgen2.tokenize import TokenError
21 from blib2to3.pytree import Leaf, Node
22
23 ast3: Any
24
25 _IS_PYPY = platform.python_implementation() == "PyPy"
26
27 try:
28     from typed_ast import ast3
29 except ImportError:
30     if sys.version_info < (3, 8) and not _IS_PYPY:
31         print(
32             "The typed_ast package is required but not installed.\n"
33             "You can upgrade to Python 3.8+ or install typed_ast with\n"
34             "`python3 -m pip install typed-ast`.",
35             file=sys.stderr,
36         )
37         sys.exit(1)
38     else:
39         ast3 = ast
40
41
42 PY2_HINT: Final = "Python 2 support was removed in version 22.0."
43
44
45 class InvalidInput(ValueError):
46     """Raised when input source code fails all parse attempts."""
47
48
49 def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]:
50     if not target_versions:
51         # No target_version specified, so try all grammars.
52         return [
53             # Python 3.7-3.9
54             pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords,
55             # Python 3.0-3.6
56             pygram.python_grammar_no_print_statement_no_exec_statement,
57             # Python 3.10+
58             pygram.python_grammar_soft_keywords,
59         ]
60
61     grammars = []
62     # If we have to parse both, try to parse async as a keyword first
63     if not supports_feature(
64         target_versions, Feature.ASYNC_IDENTIFIERS
65     ) and not supports_feature(target_versions, Feature.PATTERN_MATCHING):
66         # Python 3.7-3.9
67         grammars.append(
68             pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords
69         )
70     if not supports_feature(target_versions, Feature.ASYNC_KEYWORDS):
71         # Python 3.0-3.6
72         grammars.append(pygram.python_grammar_no_print_statement_no_exec_statement)
73     if any(Feature.PATTERN_MATCHING in VERSION_TO_FEATURES[v] for v in target_versions):
74         # Python 3.10+
75         grammars.append(pygram.python_grammar_soft_keywords)
76
77     # At least one of the above branches must have been taken, because every Python
78     # version has exactly one of the two 'ASYNC_*' flags
79     return grammars
80
81
82 def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -> Node:
83     """Given a string with source, return the lib2to3 Node."""
84     if not src_txt.endswith("\n"):
85         src_txt += "\n"
86
87     grammars = get_grammars(set(target_versions))
88     errors = {}
89     for grammar in grammars:
90         drv = driver.Driver(grammar)
91         try:
92             result = drv.parse_string(src_txt, True)
93             break
94
95         except ParseError as pe:
96             lineno, column = pe.context[1]
97             lines = src_txt.splitlines()
98             try:
99                 faulty_line = lines[lineno - 1]
100             except IndexError:
101                 faulty_line = "<line number missing in source>"
102             errors[grammar.version] = InvalidInput(
103                 f"Cannot parse: {lineno}:{column}: {faulty_line}"
104             )
105
106         except TokenError as te:
107             # In edge cases these are raised; and typically don't have a "faulty_line".
108             lineno, column = te.args[1]
109             errors[grammar.version] = InvalidInput(
110                 f"Cannot parse: {lineno}:{column}: {te.args[0]}"
111             )
112
113     else:
114         # Choose the latest version when raising the actual parsing error.
115         assert len(errors) >= 1
116         exc = errors[max(errors)]
117
118         if matches_grammar(src_txt, pygram.python_grammar) or matches_grammar(
119             src_txt, pygram.python_grammar_no_print_statement
120         ):
121             original_msg = exc.args[0]
122             msg = f"{original_msg}\n{PY2_HINT}"
123             raise InvalidInput(msg) from None
124
125         raise exc from None
126
127     if isinstance(result, Leaf):
128         result = Node(syms.file_input, [result])
129     return result
130
131
132 def matches_grammar(src_txt: str, grammar: Grammar) -> bool:
133     drv = driver.Driver(grammar)
134     try:
135         drv.parse_string(src_txt, True)
136     except (ParseError, TokenError, IndentationError):
137         return False
138     else:
139         return True
140
141
142 def lib2to3_unparse(node: Node) -> str:
143     """Given a lib2to3 node, return its string representation."""
144     code = str(node)
145     return code
146
147
148 def parse_single_version(
149     src: str, version: Tuple[int, int], *, type_comments: bool
150 ) -> Union[ast.AST, ast3.AST]:
151     filename = "<unknown>"
152     # typed-ast is needed because of feature version limitations in the builtin ast 3.8>
153     if sys.version_info >= (3, 8) and version >= (3,):
154         return ast.parse(
155             src, filename, feature_version=version, type_comments=type_comments
156         )
157
158     if _IS_PYPY:
159         # PyPy 3.7 doesn't support type comment tracking which is not ideal, but there's
160         # not much we can do as typed-ast won't work either.
161         if sys.version_info >= (3, 8):
162             return ast3.parse(src, filename, type_comments=type_comments)
163         else:
164             return ast3.parse(src, filename)
165     else:
166         if type_comments:
167             # Typed-ast is guaranteed to be used here and automatically tracks type
168             # comments separately.
169             return ast3.parse(src, filename, feature_version=version[1])
170         else:
171             return ast.parse(src, filename)
172
173
174 def parse_ast(src: str) -> Union[ast.AST, ast3.AST]:
175     # TODO: support Python 4+ ;)
176     versions = [(3, minor) for minor in range(3, sys.version_info[1] + 1)]
177
178     first_error = ""
179     for version in sorted(versions, reverse=True):
180         try:
181             return parse_single_version(src, version, type_comments=True)
182         except SyntaxError as e:
183             if not first_error:
184                 first_error = str(e)
185
186     # Try to parse without type comments
187     for version in sorted(versions, reverse=True):
188         try:
189             return parse_single_version(src, version, type_comments=False)
190         except SyntaxError:
191             pass
192
193     raise SyntaxError(first_error)
194
195
196 ast3_AST: Final[Type[ast3.AST]] = ast3.AST
197
198
199 def _normalize(lineend: str, value: str) -> str:
200     # To normalize, we strip any leading and trailing space from
201     # each line...
202     stripped: List[str] = [i.strip() for i in value.splitlines()]
203     normalized = lineend.join(stripped)
204     # ...and remove any blank lines at the beginning and end of
205     # the whole string
206     return normalized.strip()
207
208
209 def stringify_ast(node: Union[ast.AST, ast3.AST], depth: int = 0) -> Iterator[str]:
210     """Simple visitor generating strings to compare ASTs by content."""
211
212     node = fixup_ast_constants(node)
213
214     yield f"{'  ' * depth}{node.__class__.__name__}("
215
216     type_ignore_classes: Tuple[Type[Any], ...]
217     for field in sorted(node._fields):  # noqa: F402
218         # TypeIgnore will not be present using pypy < 3.8, so need for this
219         if not (_IS_PYPY and sys.version_info < (3, 8)):
220             # TypeIgnore has only one field 'lineno' which breaks this comparison
221             type_ignore_classes = (ast3.TypeIgnore,)
222             if sys.version_info >= (3, 8):
223                 type_ignore_classes += (ast.TypeIgnore,)
224             if isinstance(node, type_ignore_classes):
225                 break
226
227         try:
228             value: object = getattr(node, field)
229         except AttributeError:
230             continue
231
232         yield f"{'  ' * (depth+1)}{field}="
233
234         if isinstance(value, list):
235             for item in value:
236                 # Ignore nested tuples within del statements, because we may insert
237                 # parentheses and they change the AST.
238                 if (
239                     field == "targets"
240                     and isinstance(node, (ast.Delete, ast3.Delete))
241                     and isinstance(item, (ast.Tuple, ast3.Tuple))
242                 ):
243                     for elt in item.elts:
244                         yield from stringify_ast(elt, depth + 2)
245
246                 elif isinstance(item, (ast.AST, ast3.AST)):
247                     yield from stringify_ast(item, depth + 2)
248
249         # Note that we are referencing the typed-ast ASTs via global variables and not
250         # direct module attribute accesses because that breaks mypyc. It's probably
251         # something to do with the ast3 variables being marked as Any leading
252         # mypy to think this branch is always taken, leaving the rest of the code
253         # unanalyzed. Tighting up the types for the typed-ast AST types avoids the
254         # mypyc crash.
255         elif isinstance(value, (ast.AST, ast3_AST)):
256             yield from stringify_ast(value, depth + 2)
257
258         else:
259             normalized: object
260             # Constant strings may be indented across newlines, if they are
261             # docstrings; fold spaces after newlines when comparing. Similarly,
262             # trailing and leading space may be removed.
263             if (
264                 isinstance(node, ast.Constant)
265                 and field == "value"
266                 and isinstance(value, str)
267             ):
268                 normalized = _normalize("\n", value)
269             else:
270                 normalized = value
271             yield f"{'  ' * (depth+2)}{normalized!r},  # {value.__class__.__name__}"
272
273     yield f"{'  ' * depth})  # /{node.__class__.__name__}"
274
275
276 def fixup_ast_constants(node: Union[ast.AST, ast3.AST]) -> Union[ast.AST, ast3.AST]:
277     """Map ast nodes deprecated in 3.8 to Constant."""
278     if isinstance(node, (ast.Str, ast3.Str, ast.Bytes, ast3.Bytes)):
279         return ast.Constant(value=node.s)
280
281     if isinstance(node, (ast.Num, ast3.Num)):
282         return ast.Constant(value=node.n)
283
284     if isinstance(node, (ast.NameConstant, ast3.NameConstant)):
285         return ast.Constant(value=node.value)
286
287     return node