]> git.madduck.net Git - etc/vim.git/blob - src/black/parsing.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

Fix typo in config docs for --extend-exclude (#3170)
[etc/vim.git] / src / black / parsing.py
1 """
2 Parse Python code and perform AST validation.
3 """
4 import ast
5 import platform
6 import sys
7 from typing import Any, Iterable, Iterator, List, Set, Tuple, Type, Union
8
9 if sys.version_info < (3, 8):
10     from typing_extensions import Final
11 else:
12     from typing import Final
13
14 # lib2to3 fork
15 from blib2to3.pytree import Node, Leaf
16 from blib2to3 import pygram
17 from blib2to3.pgen2 import driver
18 from blib2to3.pgen2.grammar import Grammar
19 from blib2to3.pgen2.parse import ParseError
20 from blib2to3.pgen2.tokenize import TokenError
21
22 from black.mode import TargetVersion, Feature, supports_feature
23 from black.nodes import syms
24
25 ast3: Any
26
27 _IS_PYPY = platform.python_implementation() == "PyPy"
28
29 try:
30     from typed_ast import ast3
31 except ImportError:
32     # Either our python version is too low, or we're on pypy
33     if sys.version_info < (3, 7) or (sys.version_info < (3, 8) and not _IS_PYPY):
34         print(
35             "The typed_ast package is required but not installed.\n"
36             "You can upgrade to Python 3.8+ or install typed_ast with\n"
37             "`python3 -m pip install typed-ast`.",
38             file=sys.stderr,
39         )
40         sys.exit(1)
41     else:
42         ast3 = ast
43
44
45 PY2_HINT: Final = "Python 2 support was removed in version 22.0."
46
47
48 class InvalidInput(ValueError):
49     """Raised when input source code fails all parse attempts."""
50
51
52 def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]:
53     if not target_versions:
54         # No target_version specified, so try all grammars.
55         return [
56             # Python 3.7+
57             pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords,
58             # Python 3.0-3.6
59             pygram.python_grammar_no_print_statement_no_exec_statement,
60             # Python 3.10+
61             pygram.python_grammar_soft_keywords,
62         ]
63
64     grammars = []
65     # If we have to parse both, try to parse async as a keyword first
66     if not supports_feature(
67         target_versions, Feature.ASYNC_IDENTIFIERS
68     ) and not supports_feature(target_versions, Feature.PATTERN_MATCHING):
69         # Python 3.7-3.9
70         grammars.append(
71             pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords
72         )
73     if not supports_feature(target_versions, Feature.ASYNC_KEYWORDS):
74         # Python 3.0-3.6
75         grammars.append(pygram.python_grammar_no_print_statement_no_exec_statement)
76     if supports_feature(target_versions, Feature.PATTERN_MATCHING):
77         # Python 3.10+
78         grammars.append(pygram.python_grammar_soft_keywords)
79
80     # At least one of the above branches must have been taken, because every Python
81     # version has exactly one of the two 'ASYNC_*' flags
82     return grammars
83
84
85 def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -> Node:
86     """Given a string with source, return the lib2to3 Node."""
87     if not src_txt.endswith("\n"):
88         src_txt += "\n"
89
90     grammars = get_grammars(set(target_versions))
91     errors = {}
92     for grammar in grammars:
93         drv = driver.Driver(grammar)
94         try:
95             result = drv.parse_string(src_txt, True)
96             break
97
98         except ParseError as pe:
99             lineno, column = pe.context[1]
100             lines = src_txt.splitlines()
101             try:
102                 faulty_line = lines[lineno - 1]
103             except IndexError:
104                 faulty_line = "<line number missing in source>"
105             errors[grammar.version] = InvalidInput(
106                 f"Cannot parse: {lineno}:{column}: {faulty_line}"
107             )
108
109         except TokenError as te:
110             # In edge cases these are raised; and typically don't have a "faulty_line".
111             lineno, column = te.args[1]
112             errors[grammar.version] = InvalidInput(
113                 f"Cannot parse: {lineno}:{column}: {te.args[0]}"
114             )
115
116     else:
117         # Choose the latest version when raising the actual parsing error.
118         assert len(errors) >= 1
119         exc = errors[max(errors)]
120
121         if matches_grammar(src_txt, pygram.python_grammar) or matches_grammar(
122             src_txt, pygram.python_grammar_no_print_statement
123         ):
124             original_msg = exc.args[0]
125             msg = f"{original_msg}\n{PY2_HINT}"
126             raise InvalidInput(msg) from None
127
128         raise exc from None
129
130     if isinstance(result, Leaf):
131         result = Node(syms.file_input, [result])
132     return result
133
134
135 def matches_grammar(src_txt: str, grammar: Grammar) -> bool:
136     drv = driver.Driver(grammar)
137     try:
138         drv.parse_string(src_txt, True)
139     except (ParseError, TokenError, IndentationError):
140         return False
141     else:
142         return True
143
144
145 def lib2to3_unparse(node: Node) -> str:
146     """Given a lib2to3 node, return its string representation."""
147     code = str(node)
148     return code
149
150
151 def parse_single_version(
152     src: str, version: Tuple[int, int]
153 ) -> Union[ast.AST, ast3.AST]:
154     filename = "<unknown>"
155     # typed-ast is needed because of feature version limitations in the builtin ast 3.8>
156     if sys.version_info >= (3, 8) and version >= (3,):
157         return ast.parse(src, filename, feature_version=version, type_comments=True)
158
159     if _IS_PYPY:
160         # PyPy 3.7 doesn't support type comment tracking which is not ideal, but there's
161         # not much we can do as typed-ast won't work either.
162         if sys.version_info >= (3, 8):
163             return ast3.parse(src, filename, type_comments=True)
164         else:
165             return ast3.parse(src, filename)
166     else:
167         # Typed-ast is guaranteed to be used here and automatically tracks type
168         # comments separately.
169         return ast3.parse(src, filename, feature_version=version[1])
170
171     raise AssertionError("INTERNAL ERROR: Tried parsing unsupported Python version!")
172
173
174 def parse_ast(src: str) -> Union[ast.AST, ast3.AST]:
175     # TODO: support Python 4+ ;)
176     versions = [(3, minor) for minor in range(3, sys.version_info[1] + 1)]
177
178     first_error = ""
179     for version in sorted(versions, reverse=True):
180         try:
181             return parse_single_version(src, version)
182         except SyntaxError as e:
183             if not first_error:
184                 first_error = str(e)
185
186     raise SyntaxError(first_error)
187
188
189 ast3_AST: Final[Type[ast3.AST]] = ast3.AST
190
191
192 def _normalize(lineend: str, value: str) -> str:
193     # To normalize, we strip any leading and trailing space from
194     # each line...
195     stripped: List[str] = [i.strip() for i in value.splitlines()]
196     normalized = lineend.join(stripped)
197     # ...and remove any blank lines at the beginning and end of
198     # the whole string
199     return normalized.strip()
200
201
202 def stringify_ast(node: Union[ast.AST, ast3.AST], depth: int = 0) -> Iterator[str]:
203     """Simple visitor generating strings to compare ASTs by content."""
204
205     node = fixup_ast_constants(node)
206
207     yield f"{'  ' * depth}{node.__class__.__name__}("
208
209     type_ignore_classes: Tuple[Type[Any], ...]
210     for field in sorted(node._fields):  # noqa: F402
211         # TypeIgnore will not be present using pypy < 3.8, so need for this
212         if not (_IS_PYPY and sys.version_info < (3, 8)):
213             # TypeIgnore has only one field 'lineno' which breaks this comparison
214             type_ignore_classes = (ast3.TypeIgnore,)
215             if sys.version_info >= (3, 8):
216                 type_ignore_classes += (ast.TypeIgnore,)
217             if isinstance(node, type_ignore_classes):
218                 break
219
220         try:
221             value: object = getattr(node, field)
222         except AttributeError:
223             continue
224
225         yield f"{'  ' * (depth+1)}{field}="
226
227         if isinstance(value, list):
228             for item in value:
229                 # Ignore nested tuples within del statements, because we may insert
230                 # parentheses and they change the AST.
231                 if (
232                     field == "targets"
233                     and isinstance(node, (ast.Delete, ast3.Delete))
234                     and isinstance(item, (ast.Tuple, ast3.Tuple))
235                 ):
236                     for elt in item.elts:
237                         yield from stringify_ast(elt, depth + 2)
238
239                 elif isinstance(item, (ast.AST, ast3.AST)):
240                     yield from stringify_ast(item, depth + 2)
241
242         # Note that we are referencing the typed-ast ASTs via global variables and not
243         # direct module attribute accesses because that breaks mypyc. It's probably
244         # something to do with the ast3 variables being marked as Any leading
245         # mypy to think this branch is always taken, leaving the rest of the code
246         # unanalyzed. Tighting up the types for the typed-ast AST types avoids the
247         # mypyc crash.
248         elif isinstance(value, (ast.AST, ast3_AST)):
249             yield from stringify_ast(value, depth + 2)
250
251         else:
252             normalized: object
253             # Constant strings may be indented across newlines, if they are
254             # docstrings; fold spaces after newlines when comparing. Similarly,
255             # trailing and leading space may be removed.
256             if (
257                 isinstance(node, ast.Constant)
258                 and field == "value"
259                 and isinstance(value, str)
260             ):
261                 normalized = _normalize("\n", value)
262             else:
263                 normalized = value
264             yield f"{'  ' * (depth+2)}{normalized!r},  # {value.__class__.__name__}"
265
266     yield f"{'  ' * depth})  # /{node.__class__.__name__}"
267
268
269 def fixup_ast_constants(node: Union[ast.AST, ast3.AST]) -> Union[ast.AST, ast3.AST]:
270     """Map ast nodes deprecated in 3.8 to Constant."""
271     if isinstance(node, (ast.Str, ast3.Str, ast.Bytes, ast3.Bytes)):
272         return ast.Constant(value=node.s)
273
274     if isinstance(node, (ast.Num, ast3.Num)):
275         return ast.Constant(value=node.n)
276
277     if isinstance(node, (ast.NameConstant, ast3.NameConstant)):
278         return ast.Constant(value=node.value)
279
280     return node