]> git.madduck.net Git - etc/vim.git/blob - src/black/parsing.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

Move test for g:load_black to improve plugin performance (GH-2896)
[etc/vim.git] / src / black / parsing.py
1 """
2 Parse Python code and perform AST validation.
3 """
4 import ast
5 import platform
6 import sys
7 from typing import Any, Iterable, Iterator, List, Set, Tuple, Type, Union
8
9 if sys.version_info < (3, 8):
10     from typing_extensions import Final
11 else:
12     from typing import Final
13
14 # lib2to3 fork
15 from blib2to3.pytree import Node, Leaf
16 from blib2to3 import pygram
17 from blib2to3.pgen2 import driver
18 from blib2to3.pgen2.grammar import Grammar
19 from blib2to3.pgen2.parse import ParseError
20 from blib2to3.pgen2.tokenize import TokenError
21
22 from black.mode import TargetVersion, Feature, supports_feature
23 from black.nodes import syms
24
25 ast3: Any
26
27 _IS_PYPY = platform.python_implementation() == "PyPy"
28
29 try:
30     from typed_ast import ast3
31 except ImportError:
32     # Either our python version is too low, or we're on pypy
33     if sys.version_info < (3, 7) or (sys.version_info < (3, 8) and not _IS_PYPY):
34         print(
35             "The typed_ast package is required but not installed.\n"
36             "You can upgrade to Python 3.8+ or install typed_ast with\n"
37             "`python3 -m pip install typed-ast`.",
38             file=sys.stderr,
39         )
40         sys.exit(1)
41     else:
42         ast3 = ast
43
44
45 PY2_HINT: Final = "Python 2 support was removed in version 22.0."
46
47
48 class InvalidInput(ValueError):
49     """Raised when input source code fails all parse attempts."""
50
51
52 def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]:
53     if not target_versions:
54         # No target_version specified, so try all grammars.
55         return [
56             # Python 3.7+
57             pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords,
58             # Python 3.0-3.6
59             pygram.python_grammar_no_print_statement_no_exec_statement,
60             # Python 3.10+
61             pygram.python_grammar_soft_keywords,
62         ]
63
64     grammars = []
65     # If we have to parse both, try to parse async as a keyword first
66     if not supports_feature(
67         target_versions, Feature.ASYNC_IDENTIFIERS
68     ) and not supports_feature(target_versions, Feature.PATTERN_MATCHING):
69         # Python 3.7-3.9
70         grammars.append(
71             pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords
72         )
73     if not supports_feature(target_versions, Feature.ASYNC_KEYWORDS):
74         # Python 3.0-3.6
75         grammars.append(pygram.python_grammar_no_print_statement_no_exec_statement)
76     if supports_feature(target_versions, Feature.PATTERN_MATCHING):
77         # Python 3.10+
78         grammars.append(pygram.python_grammar_soft_keywords)
79
80     # At least one of the above branches must have been taken, because every Python
81     # version has exactly one of the two 'ASYNC_*' flags
82     return grammars
83
84
85 def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -> Node:
86     """Given a string with source, return the lib2to3 Node."""
87     if not src_txt.endswith("\n"):
88         src_txt += "\n"
89
90     grammars = get_grammars(set(target_versions))
91     errors = {}
92     for grammar in grammars:
93         drv = driver.Driver(grammar)
94         try:
95             result = drv.parse_string(src_txt, True)
96             break
97
98         except ParseError as pe:
99             lineno, column = pe.context[1]
100             lines = src_txt.splitlines()
101             try:
102                 faulty_line = lines[lineno - 1]
103             except IndexError:
104                 faulty_line = "<line number missing in source>"
105             errors[grammar.version] = InvalidInput(
106                 f"Cannot parse: {lineno}:{column}: {faulty_line}"
107             )
108
109         except TokenError as te:
110             # In edge cases these are raised; and typically don't have a "faulty_line".
111             lineno, column = te.args[1]
112             errors[grammar.version] = InvalidInput(
113                 f"Cannot parse: {lineno}:{column}: {te.args[0]}"
114             )
115
116     else:
117         # Choose the latest version when raising the actual parsing error.
118         assert len(errors) >= 1
119         exc = errors[max(errors)]
120
121         if matches_grammar(src_txt, pygram.python_grammar) or matches_grammar(
122             src_txt, pygram.python_grammar_no_print_statement
123         ):
124             original_msg = exc.args[0]
125             msg = f"{original_msg}\n{PY2_HINT}"
126             raise InvalidInput(msg) from None
127
128         raise exc from None
129
130     if isinstance(result, Leaf):
131         result = Node(syms.file_input, [result])
132     return result
133
134
135 def matches_grammar(src_txt: str, grammar: Grammar) -> bool:
136     drv = driver.Driver(grammar)
137     try:
138         drv.parse_string(src_txt, True)
139     except (ParseError, TokenError, IndentationError):
140         return False
141     else:
142         return True
143
144
145 def lib2to3_unparse(node: Node) -> str:
146     """Given a lib2to3 node, return its string representation."""
147     code = str(node)
148     return code
149
150
151 def parse_single_version(
152     src: str, version: Tuple[int, int]
153 ) -> Union[ast.AST, ast3.AST]:
154     filename = "<unknown>"
155     # typed_ast is needed because of feature version limitations in the builtin ast
156     if sys.version_info >= (3, 8) and version >= (3,):
157         return ast.parse(src, filename, feature_version=version)
158     elif version >= (3,):
159         if _IS_PYPY:
160             return ast3.parse(src, filename)
161         else:
162             return ast3.parse(src, filename, feature_version=version[1])
163     raise AssertionError("INTERNAL ERROR: Tried parsing unsupported Python version!")
164
165
166 def parse_ast(src: str) -> Union[ast.AST, ast3.AST]:
167     # TODO: support Python 4+ ;)
168     versions = [(3, minor) for minor in range(3, sys.version_info[1] + 1)]
169
170     first_error = ""
171     for version in sorted(versions, reverse=True):
172         try:
173             return parse_single_version(src, version)
174         except SyntaxError as e:
175             if not first_error:
176                 first_error = str(e)
177
178     raise SyntaxError(first_error)
179
180
181 ast3_AST: Final[Type[ast3.AST]] = ast3.AST
182
183
184 def _normalize(lineend: str, value: str) -> str:
185     # To normalize, we strip any leading and trailing space from
186     # each line...
187     stripped: List[str] = [i.strip() for i in value.splitlines()]
188     normalized = lineend.join(stripped)
189     # ...and remove any blank lines at the beginning and end of
190     # the whole string
191     return normalized.strip()
192
193
194 def stringify_ast(node: Union[ast.AST, ast3.AST], depth: int = 0) -> Iterator[str]:
195     """Simple visitor generating strings to compare ASTs by content."""
196
197     node = fixup_ast_constants(node)
198
199     yield f"{'  ' * depth}{node.__class__.__name__}("
200
201     type_ignore_classes: Tuple[Type[Any], ...]
202     for field in sorted(node._fields):  # noqa: F402
203         # TypeIgnore will not be present using pypy < 3.8, so need for this
204         if not (_IS_PYPY and sys.version_info < (3, 8)):
205             # TypeIgnore has only one field 'lineno' which breaks this comparison
206             type_ignore_classes = (ast3.TypeIgnore,)
207             if sys.version_info >= (3, 8):
208                 type_ignore_classes += (ast.TypeIgnore,)
209             if isinstance(node, type_ignore_classes):
210                 break
211
212         try:
213             value: object = getattr(node, field)
214         except AttributeError:
215             continue
216
217         yield f"{'  ' * (depth+1)}{field}="
218
219         if isinstance(value, list):
220             for item in value:
221                 # Ignore nested tuples within del statements, because we may insert
222                 # parentheses and they change the AST.
223                 if (
224                     field == "targets"
225                     and isinstance(node, (ast.Delete, ast3.Delete))
226                     and isinstance(item, (ast.Tuple, ast3.Tuple))
227                 ):
228                     for item in item.elts:
229                         yield from stringify_ast(item, depth + 2)
230
231                 elif isinstance(item, (ast.AST, ast3.AST)):
232                     yield from stringify_ast(item, depth + 2)
233
234         # Note that we are referencing the typed-ast ASTs via global variables and not
235         # direct module attribute accesses because that breaks mypyc. It's probably
236         # something to do with the ast3 variables being marked as Any leading
237         # mypy to think this branch is always taken, leaving the rest of the code
238         # unanalyzed. Tighting up the types for the typed-ast AST types avoids the
239         # mypyc crash.
240         elif isinstance(value, (ast.AST, ast3_AST)):
241             yield from stringify_ast(value, depth + 2)
242
243         else:
244             normalized: object
245             # Constant strings may be indented across newlines, if they are
246             # docstrings; fold spaces after newlines when comparing. Similarly,
247             # trailing and leading space may be removed.
248             if (
249                 isinstance(node, ast.Constant)
250                 and field == "value"
251                 and isinstance(value, str)
252             ):
253                 normalized = _normalize("\n", value)
254             else:
255                 normalized = value
256             yield f"{'  ' * (depth+2)}{normalized!r},  # {value.__class__.__name__}"
257
258     yield f"{'  ' * depth})  # /{node.__class__.__name__}"
259
260
261 def fixup_ast_constants(node: Union[ast.AST, ast3.AST]) -> Union[ast.AST, ast3.AST]:
262     """Map ast nodes deprecated in 3.8 to Constant."""
263     if isinstance(node, (ast.Str, ast3.Str, ast.Bytes, ast3.Bytes)):
264         return ast.Constant(value=node.s)
265
266     if isinstance(node, (ast.Num, ast3.Num)):
267         return ast.Constant(value=node.n)
268
269     if isinstance(node, (ast.NameConstant, ast3.NameConstant)):
270         return ast.Constant(value=node.value)
271
272     return node