]> git.madduck.net Git - etc/vim.git/blob - src/black/parsing.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

Document black-jupyter hook (#3650)
[etc/vim.git] / src / black / parsing.py
1 """
2 Parse Python code and perform AST validation.
3 """
4 import ast
5 import platform
6 import sys
7 from typing import Any, Iterable, Iterator, List, Set, Tuple, Type, Union
8
9 if sys.version_info < (3, 8):
10     from typing_extensions import Final
11 else:
12     from typing import Final
13
14 from black.mode import VERSION_TO_FEATURES, Feature, TargetVersion, supports_feature
15 from black.nodes import syms
16 from blib2to3 import pygram
17 from blib2to3.pgen2 import driver
18 from blib2to3.pgen2.grammar import Grammar
19 from blib2to3.pgen2.parse import ParseError
20 from blib2to3.pgen2.tokenize import TokenError
21 from blib2to3.pytree import Leaf, Node
22
23 ast3: Any
24
25 _IS_PYPY = platform.python_implementation() == "PyPy"
26
27 try:
28     from typed_ast import ast3
29 except ImportError:
30     if sys.version_info < (3, 8) and not _IS_PYPY:
31         print(
32             (
33                 "The typed_ast package is required but not installed.\n"
34                 "You can upgrade to Python 3.8+ or install typed_ast with\n"
35                 "`python3 -m pip install typed-ast`."
36             ),
37             file=sys.stderr,
38         )
39         sys.exit(1)
40     else:
41         ast3 = ast
42
43
44 PY2_HINT: Final = "Python 2 support was removed in version 22.0."
45
46
47 class InvalidInput(ValueError):
48     """Raised when input source code fails all parse attempts."""
49
50
51 def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]:
52     if not target_versions:
53         # No target_version specified, so try all grammars.
54         return [
55             # Python 3.7-3.9
56             pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords,
57             # Python 3.0-3.6
58             pygram.python_grammar_no_print_statement_no_exec_statement,
59             # Python 3.10+
60             pygram.python_grammar_soft_keywords,
61         ]
62
63     grammars = []
64     # If we have to parse both, try to parse async as a keyword first
65     if not supports_feature(
66         target_versions, Feature.ASYNC_IDENTIFIERS
67     ) and not supports_feature(target_versions, Feature.PATTERN_MATCHING):
68         # Python 3.7-3.9
69         grammars.append(
70             pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords
71         )
72     if not supports_feature(target_versions, Feature.ASYNC_KEYWORDS):
73         # Python 3.0-3.6
74         grammars.append(pygram.python_grammar_no_print_statement_no_exec_statement)
75     if any(Feature.PATTERN_MATCHING in VERSION_TO_FEATURES[v] for v in target_versions):
76         # Python 3.10+
77         grammars.append(pygram.python_grammar_soft_keywords)
78
79     # At least one of the above branches must have been taken, because every Python
80     # version has exactly one of the two 'ASYNC_*' flags
81     return grammars
82
83
84 def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -> Node:
85     """Given a string with source, return the lib2to3 Node."""
86     if not src_txt.endswith("\n"):
87         src_txt += "\n"
88
89     grammars = get_grammars(set(target_versions))
90     errors = {}
91     for grammar in grammars:
92         drv = driver.Driver(grammar)
93         try:
94             result = drv.parse_string(src_txt, True)
95             break
96
97         except ParseError as pe:
98             lineno, column = pe.context[1]
99             lines = src_txt.splitlines()
100             try:
101                 faulty_line = lines[lineno - 1]
102             except IndexError:
103                 faulty_line = "<line number missing in source>"
104             errors[grammar.version] = InvalidInput(
105                 f"Cannot parse: {lineno}:{column}: {faulty_line}"
106             )
107
108         except TokenError as te:
109             # In edge cases these are raised; and typically don't have a "faulty_line".
110             lineno, column = te.args[1]
111             errors[grammar.version] = InvalidInput(
112                 f"Cannot parse: {lineno}:{column}: {te.args[0]}"
113             )
114
115     else:
116         # Choose the latest version when raising the actual parsing error.
117         assert len(errors) >= 1
118         exc = errors[max(errors)]
119
120         if matches_grammar(src_txt, pygram.python_grammar) or matches_grammar(
121             src_txt, pygram.python_grammar_no_print_statement
122         ):
123             original_msg = exc.args[0]
124             msg = f"{original_msg}\n{PY2_HINT}"
125             raise InvalidInput(msg) from None
126
127         raise exc from None
128
129     if isinstance(result, Leaf):
130         result = Node(syms.file_input, [result])
131     return result
132
133
134 def matches_grammar(src_txt: str, grammar: Grammar) -> bool:
135     drv = driver.Driver(grammar)
136     try:
137         drv.parse_string(src_txt, True)
138     except (ParseError, TokenError, IndentationError):
139         return False
140     else:
141         return True
142
143
144 def lib2to3_unparse(node: Node) -> str:
145     """Given a lib2to3 node, return its string representation."""
146     code = str(node)
147     return code
148
149
150 def parse_single_version(
151     src: str, version: Tuple[int, int], *, type_comments: bool
152 ) -> Union[ast.AST, ast3.AST]:
153     filename = "<unknown>"
154     # typed-ast is needed because of feature version limitations in the builtin ast 3.8>
155     if sys.version_info >= (3, 8) and version >= (3,):
156         return ast.parse(
157             src, filename, feature_version=version, type_comments=type_comments
158         )
159
160     if _IS_PYPY:
161         # PyPy 3.7 doesn't support type comment tracking which is not ideal, but there's
162         # not much we can do as typed-ast won't work either.
163         if sys.version_info >= (3, 8):
164             return ast3.parse(src, filename, type_comments=type_comments)
165         else:
166             return ast3.parse(src, filename)
167     else:
168         if type_comments:
169             # Typed-ast is guaranteed to be used here and automatically tracks type
170             # comments separately.
171             return ast3.parse(src, filename, feature_version=version[1])
172         else:
173             return ast.parse(src, filename)
174
175
176 def parse_ast(src: str) -> Union[ast.AST, ast3.AST]:
177     # TODO: support Python 4+ ;)
178     versions = [(3, minor) for minor in range(3, sys.version_info[1] + 1)]
179
180     first_error = ""
181     for version in sorted(versions, reverse=True):
182         try:
183             return parse_single_version(src, version, type_comments=True)
184         except SyntaxError as e:
185             if not first_error:
186                 first_error = str(e)
187
188     # Try to parse without type comments
189     for version in sorted(versions, reverse=True):
190         try:
191             return parse_single_version(src, version, type_comments=False)
192         except SyntaxError:
193             pass
194
195     raise SyntaxError(first_error)
196
197
198 ast3_AST: Final[Type[ast3.AST]] = ast3.AST
199
200
201 def _normalize(lineend: str, value: str) -> str:
202     # To normalize, we strip any leading and trailing space from
203     # each line...
204     stripped: List[str] = [i.strip() for i in value.splitlines()]
205     normalized = lineend.join(stripped)
206     # ...and remove any blank lines at the beginning and end of
207     # the whole string
208     return normalized.strip()
209
210
211 def stringify_ast(node: Union[ast.AST, ast3.AST], depth: int = 0) -> Iterator[str]:
212     """Simple visitor generating strings to compare ASTs by content."""
213
214     node = fixup_ast_constants(node)
215
216     yield f"{'  ' * depth}{node.__class__.__name__}("
217
218     type_ignore_classes: Tuple[Type[Any], ...]
219     for field in sorted(node._fields):  # noqa: F402
220         # TypeIgnore will not be present using pypy < 3.8, so need for this
221         if not (_IS_PYPY and sys.version_info < (3, 8)):
222             # TypeIgnore has only one field 'lineno' which breaks this comparison
223             type_ignore_classes = (ast3.TypeIgnore,)
224             if sys.version_info >= (3, 8):
225                 type_ignore_classes += (ast.TypeIgnore,)
226             if isinstance(node, type_ignore_classes):
227                 break
228
229         try:
230             value: object = getattr(node, field)
231         except AttributeError:
232             continue
233
234         yield f"{'  ' * (depth+1)}{field}="
235
236         if isinstance(value, list):
237             for item in value:
238                 # Ignore nested tuples within del statements, because we may insert
239                 # parentheses and they change the AST.
240                 if (
241                     field == "targets"
242                     and isinstance(node, (ast.Delete, ast3.Delete))
243                     and isinstance(item, (ast.Tuple, ast3.Tuple))
244                 ):
245                     for elt in item.elts:
246                         yield from stringify_ast(elt, depth + 2)
247
248                 elif isinstance(item, (ast.AST, ast3.AST)):
249                     yield from stringify_ast(item, depth + 2)
250
251         # Note that we are referencing the typed-ast ASTs via global variables and not
252         # direct module attribute accesses because that breaks mypyc. It's probably
253         # something to do with the ast3 variables being marked as Any leading
254         # mypy to think this branch is always taken, leaving the rest of the code
255         # unanalyzed. Tighting up the types for the typed-ast AST types avoids the
256         # mypyc crash.
257         elif isinstance(value, (ast.AST, ast3_AST)):
258             yield from stringify_ast(value, depth + 2)
259
260         else:
261             normalized: object
262             # Constant strings may be indented across newlines, if they are
263             # docstrings; fold spaces after newlines when comparing. Similarly,
264             # trailing and leading space may be removed.
265             if (
266                 isinstance(node, ast.Constant)
267                 and field == "value"
268                 and isinstance(value, str)
269             ):
270                 normalized = _normalize("\n", value)
271             else:
272                 normalized = value
273             yield f"{'  ' * (depth+2)}{normalized!r},  # {value.__class__.__name__}"
274
275     yield f"{'  ' * depth})  # /{node.__class__.__name__}"
276
277
278 def fixup_ast_constants(node: Union[ast.AST, ast3.AST]) -> Union[ast.AST, ast3.AST]:
279     """Map ast nodes deprecated in 3.8 to Constant."""
280     if isinstance(node, (ast.Str, ast3.Str, ast.Bytes, ast3.Bytes)):
281         return ast.Constant(value=node.s)
282
283     if isinstance(node, (ast.Num, ast3.Num)):
284         return ast.Constant(value=node.n)
285
286     if isinstance(node, (ast.NameConstant, ast3.NameConstant)):
287         return ast.Constant(value=node.value)
288
289     return node