]> git.madduck.net Git - etc/vim.git/blob - src/black/parsing.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

Update email (#3235)
[etc/vim.git] / src / black / parsing.py
1 """
2 Parse Python code and perform AST validation.
3 """
4 import ast
5 import platform
6 import sys
7 from typing import Any, Iterable, Iterator, List, Set, Tuple, Type, Union
8
9 if sys.version_info < (3, 8):
10     from typing_extensions import Final
11 else:
12     from typing import Final
13
14 from black.mode import Feature, TargetVersion, supports_feature
15 from black.nodes import syms
16 from blib2to3 import pygram
17 from blib2to3.pgen2 import driver
18 from blib2to3.pgen2.grammar import Grammar
19 from blib2to3.pgen2.parse import ParseError
20 from blib2to3.pgen2.tokenize import TokenError
21 from blib2to3.pytree import Leaf, Node
22
23 ast3: Any
24
25 _IS_PYPY = platform.python_implementation() == "PyPy"
26
27 try:
28     from typed_ast import ast3
29 except ImportError:
30     # Either our python version is too low, or we're on pypy
31     if sys.version_info < (3, 7) or (sys.version_info < (3, 8) and not _IS_PYPY):
32         print(
33             "The typed_ast package is required but not installed.\n"
34             "You can upgrade to Python 3.8+ or install typed_ast with\n"
35             "`python3 -m pip install typed-ast`.",
36             file=sys.stderr,
37         )
38         sys.exit(1)
39     else:
40         ast3 = ast
41
42
43 PY2_HINT: Final = "Python 2 support was removed in version 22.0."
44
45
46 class InvalidInput(ValueError):
47     """Raised when input source code fails all parse attempts."""
48
49
50 def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]:
51     if not target_versions:
52         # No target_version specified, so try all grammars.
53         return [
54             # Python 3.7+
55             pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords,
56             # Python 3.0-3.6
57             pygram.python_grammar_no_print_statement_no_exec_statement,
58             # Python 3.10+
59             pygram.python_grammar_soft_keywords,
60         ]
61
62     grammars = []
63     # If we have to parse both, try to parse async as a keyword first
64     if not supports_feature(
65         target_versions, Feature.ASYNC_IDENTIFIERS
66     ) and not supports_feature(target_versions, Feature.PATTERN_MATCHING):
67         # Python 3.7-3.9
68         grammars.append(
69             pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords
70         )
71     if not supports_feature(target_versions, Feature.ASYNC_KEYWORDS):
72         # Python 3.0-3.6
73         grammars.append(pygram.python_grammar_no_print_statement_no_exec_statement)
74     if supports_feature(target_versions, Feature.PATTERN_MATCHING):
75         # Python 3.10+
76         grammars.append(pygram.python_grammar_soft_keywords)
77
78     # At least one of the above branches must have been taken, because every Python
79     # version has exactly one of the two 'ASYNC_*' flags
80     return grammars
81
82
83 def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -> Node:
84     """Given a string with source, return the lib2to3 Node."""
85     if not src_txt.endswith("\n"):
86         src_txt += "\n"
87
88     grammars = get_grammars(set(target_versions))
89     errors = {}
90     for grammar in grammars:
91         drv = driver.Driver(grammar)
92         try:
93             result = drv.parse_string(src_txt, True)
94             break
95
96         except ParseError as pe:
97             lineno, column = pe.context[1]
98             lines = src_txt.splitlines()
99             try:
100                 faulty_line = lines[lineno - 1]
101             except IndexError:
102                 faulty_line = "<line number missing in source>"
103             errors[grammar.version] = InvalidInput(
104                 f"Cannot parse: {lineno}:{column}: {faulty_line}"
105             )
106
107         except TokenError as te:
108             # In edge cases these are raised; and typically don't have a "faulty_line".
109             lineno, column = te.args[1]
110             errors[grammar.version] = InvalidInput(
111                 f"Cannot parse: {lineno}:{column}: {te.args[0]}"
112             )
113
114     else:
115         # Choose the latest version when raising the actual parsing error.
116         assert len(errors) >= 1
117         exc = errors[max(errors)]
118
119         if matches_grammar(src_txt, pygram.python_grammar) or matches_grammar(
120             src_txt, pygram.python_grammar_no_print_statement
121         ):
122             original_msg = exc.args[0]
123             msg = f"{original_msg}\n{PY2_HINT}"
124             raise InvalidInput(msg) from None
125
126         raise exc from None
127
128     if isinstance(result, Leaf):
129         result = Node(syms.file_input, [result])
130     return result
131
132
133 def matches_grammar(src_txt: str, grammar: Grammar) -> bool:
134     drv = driver.Driver(grammar)
135     try:
136         drv.parse_string(src_txt, True)
137     except (ParseError, TokenError, IndentationError):
138         return False
139     else:
140         return True
141
142
143 def lib2to3_unparse(node: Node) -> str:
144     """Given a lib2to3 node, return its string representation."""
145     code = str(node)
146     return code
147
148
149 def parse_single_version(
150     src: str, version: Tuple[int, int]
151 ) -> Union[ast.AST, ast3.AST]:
152     filename = "<unknown>"
153     # typed-ast is needed because of feature version limitations in the builtin ast 3.8>
154     if sys.version_info >= (3, 8) and version >= (3,):
155         return ast.parse(src, filename, feature_version=version, type_comments=True)
156
157     if _IS_PYPY:
158         # PyPy 3.7 doesn't support type comment tracking which is not ideal, but there's
159         # not much we can do as typed-ast won't work either.
160         if sys.version_info >= (3, 8):
161             return ast3.parse(src, filename, type_comments=True)
162         else:
163             return ast3.parse(src, filename)
164     else:
165         # Typed-ast is guaranteed to be used here and automatically tracks type
166         # comments separately.
167         return ast3.parse(src, filename, feature_version=version[1])
168
169     raise AssertionError("INTERNAL ERROR: Tried parsing unsupported Python version!")
170
171
172 def parse_ast(src: str) -> Union[ast.AST, ast3.AST]:
173     # TODO: support Python 4+ ;)
174     versions = [(3, minor) for minor in range(3, sys.version_info[1] + 1)]
175
176     first_error = ""
177     for version in sorted(versions, reverse=True):
178         try:
179             return parse_single_version(src, version)
180         except SyntaxError as e:
181             if not first_error:
182                 first_error = str(e)
183
184     raise SyntaxError(first_error)
185
186
187 ast3_AST: Final[Type[ast3.AST]] = ast3.AST
188
189
190 def _normalize(lineend: str, value: str) -> str:
191     # To normalize, we strip any leading and trailing space from
192     # each line...
193     stripped: List[str] = [i.strip() for i in value.splitlines()]
194     normalized = lineend.join(stripped)
195     # ...and remove any blank lines at the beginning and end of
196     # the whole string
197     return normalized.strip()
198
199
200 def stringify_ast(node: Union[ast.AST, ast3.AST], depth: int = 0) -> Iterator[str]:
201     """Simple visitor generating strings to compare ASTs by content."""
202
203     node = fixup_ast_constants(node)
204
205     yield f"{'  ' * depth}{node.__class__.__name__}("
206
207     type_ignore_classes: Tuple[Type[Any], ...]
208     for field in sorted(node._fields):  # noqa: F402
209         # TypeIgnore will not be present using pypy < 3.8, so need for this
210         if not (_IS_PYPY and sys.version_info < (3, 8)):
211             # TypeIgnore has only one field 'lineno' which breaks this comparison
212             type_ignore_classes = (ast3.TypeIgnore,)
213             if sys.version_info >= (3, 8):
214                 type_ignore_classes += (ast.TypeIgnore,)
215             if isinstance(node, type_ignore_classes):
216                 break
217
218         try:
219             value: object = getattr(node, field)
220         except AttributeError:
221             continue
222
223         yield f"{'  ' * (depth+1)}{field}="
224
225         if isinstance(value, list):
226             for item in value:
227                 # Ignore nested tuples within del statements, because we may insert
228                 # parentheses and they change the AST.
229                 if (
230                     field == "targets"
231                     and isinstance(node, (ast.Delete, ast3.Delete))
232                     and isinstance(item, (ast.Tuple, ast3.Tuple))
233                 ):
234                     for elt in item.elts:
235                         yield from stringify_ast(elt, depth + 2)
236
237                 elif isinstance(item, (ast.AST, ast3.AST)):
238                     yield from stringify_ast(item, depth + 2)
239
240         # Note that we are referencing the typed-ast ASTs via global variables and not
241         # direct module attribute accesses because that breaks mypyc. It's probably
242         # something to do with the ast3 variables being marked as Any leading
243         # mypy to think this branch is always taken, leaving the rest of the code
244         # unanalyzed. Tighting up the types for the typed-ast AST types avoids the
245         # mypyc crash.
246         elif isinstance(value, (ast.AST, ast3_AST)):
247             yield from stringify_ast(value, depth + 2)
248
249         else:
250             normalized: object
251             # Constant strings may be indented across newlines, if they are
252             # docstrings; fold spaces after newlines when comparing. Similarly,
253             # trailing and leading space may be removed.
254             if (
255                 isinstance(node, ast.Constant)
256                 and field == "value"
257                 and isinstance(value, str)
258             ):
259                 normalized = _normalize("\n", value)
260             else:
261                 normalized = value
262             yield f"{'  ' * (depth+2)}{normalized!r},  # {value.__class__.__name__}"
263
264     yield f"{'  ' * depth})  # /{node.__class__.__name__}"
265
266
267 def fixup_ast_constants(node: Union[ast.AST, ast3.AST]) -> Union[ast.AST, ast3.AST]:
268     """Map ast nodes deprecated in 3.8 to Constant."""
269     if isinstance(node, (ast.Str, ast3.Str, ast.Bytes, ast3.Bytes)):
270         return ast.Constant(value=node.s)
271
272     if isinstance(node, (ast.Num, ast3.Num)):
273         return ast.Constant(value=node.n)
274
275     if isinstance(node, (ast.NameConstant, ast3.NameConstant)):
276         return ast.Constant(value=node.value)
277
278     return node