]> git.madduck.net Git - etc/vim.git/blob - src/black/parsing.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

Don't let TokenError bubble up from lib2to3_parse (GH-2343)
[etc/vim.git] / src / black / parsing.py
1 """
2 Parse Python code and perform AST validation.
3 """
4 import ast
5 import platform
6 import sys
7 from typing import Any, Iterable, Iterator, List, Set, Tuple, Type, Union
8
9 if sys.version_info < (3, 8):
10     from typing_extensions import Final
11 else:
12     from typing import Final
13
14 # lib2to3 fork
15 from blib2to3.pytree import Node, Leaf
16 from blib2to3 import pygram
17 from blib2to3.pgen2 import driver
18 from blib2to3.pgen2.grammar import Grammar
19 from blib2to3.pgen2.parse import ParseError
20 from blib2to3.pgen2.tokenize import TokenError
21
22 from black.mode import TargetVersion, Feature, supports_feature
23 from black.nodes import syms
24
25 ast3: Any
26 ast27: Any
27
28 _IS_PYPY = platform.python_implementation() == "PyPy"
29
30 try:
31     from typed_ast import ast3, ast27
32 except ImportError:
33     # Either our python version is too low, or we're on pypy
34     if sys.version_info < (3, 7) or (sys.version_info < (3, 8) and not _IS_PYPY):
35         print(
36             "The typed_ast package is required but not installed.\n"
37             "You can upgrade to Python 3.8+ or install typed_ast with\n"
38             "`python3 -m pip install typed-ast`.",
39             file=sys.stderr,
40         )
41         sys.exit(1)
42     else:
43         ast3 = ast27 = ast
44
45
46 class InvalidInput(ValueError):
47     """Raised when input source code fails all parse attempts."""
48
49
50 def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]:
51     if not target_versions:
52         # No target_version specified, so try all grammars.
53         return [
54             # Python 3.7+
55             pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords,
56             # Python 3.0-3.6
57             pygram.python_grammar_no_print_statement_no_exec_statement,
58             # Python 2.7 with future print_function import
59             pygram.python_grammar_no_print_statement,
60             # Python 2.7
61             pygram.python_grammar,
62         ]
63
64     if all(version.is_python2() for version in target_versions):
65         # Python 2-only code, so try Python 2 grammars.
66         return [
67             # Python 2.7 with future print_function import
68             pygram.python_grammar_no_print_statement,
69             # Python 2.7
70             pygram.python_grammar,
71         ]
72
73     # Python 3-compatible code, so only try Python 3 grammar.
74     grammars = []
75     if supports_feature(target_versions, Feature.PATTERN_MATCHING):
76         # Python 3.10+
77         grammars.append(pygram.python_grammar_soft_keywords)
78     # If we have to parse both, try to parse async as a keyword first
79     if not supports_feature(
80         target_versions, Feature.ASYNC_IDENTIFIERS
81     ) and not supports_feature(target_versions, Feature.PATTERN_MATCHING):
82         # Python 3.7-3.9
83         grammars.append(
84             pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords
85         )
86     if not supports_feature(target_versions, Feature.ASYNC_KEYWORDS):
87         # Python 3.0-3.6
88         grammars.append(pygram.python_grammar_no_print_statement_no_exec_statement)
89     # At least one of the above branches must have been taken, because every Python
90     # version has exactly one of the two 'ASYNC_*' flags
91     return grammars
92
93
94 def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -> Node:
95     """Given a string with source, return the lib2to3 Node."""
96     if not src_txt.endswith("\n"):
97         src_txt += "\n"
98
99     for grammar in get_grammars(set(target_versions)):
100         drv = driver.Driver(grammar)
101         try:
102             result = drv.parse_string(src_txt, True)
103             break
104
105         except ParseError as pe:
106             lineno, column = pe.context[1]
107             lines = src_txt.splitlines()
108             try:
109                 faulty_line = lines[lineno - 1]
110             except IndexError:
111                 faulty_line = "<line number missing in source>"
112             exc = InvalidInput(f"Cannot parse: {lineno}:{column}: {faulty_line}")
113
114         except TokenError as te:
115             # In edge cases these are raised; and typically don't have a "faulty_line".
116             lineno, column = te.args[1]
117             exc = InvalidInput(f"Cannot parse: {lineno}:{column}: {te.args[0]}")
118
119     else:
120         raise exc from None
121
122     if isinstance(result, Leaf):
123         result = Node(syms.file_input, [result])
124     return result
125
126
127 def lib2to3_unparse(node: Node) -> str:
128     """Given a lib2to3 node, return its string representation."""
129     code = str(node)
130     return code
131
132
133 def parse_single_version(
134     src: str, version: Tuple[int, int]
135 ) -> Union[ast.AST, ast3.AST, ast27.AST]:
136     filename = "<unknown>"
137     # typed_ast is needed because of feature version limitations in the builtin ast
138     if sys.version_info >= (3, 8) and version >= (3,):
139         return ast.parse(src, filename, feature_version=version)
140     elif version >= (3,):
141         if _IS_PYPY:
142             return ast3.parse(src, filename)
143         else:
144             return ast3.parse(src, filename, feature_version=version[1])
145     elif version == (2, 7):
146         return ast27.parse(src)
147     raise AssertionError("INTERNAL ERROR: Tried parsing unsupported Python version!")
148
149
150 def parse_ast(src: str) -> Union[ast.AST, ast3.AST, ast27.AST]:
151     # TODO: support Python 4+ ;)
152     versions = [(3, minor) for minor in range(3, sys.version_info[1] + 1)]
153
154     if ast27.__name__ != "ast":
155         versions.append((2, 7))
156
157     first_error = ""
158     for version in sorted(versions, reverse=True):
159         try:
160             return parse_single_version(src, version)
161         except SyntaxError as e:
162             if not first_error:
163                 first_error = str(e)
164
165     raise SyntaxError(first_error)
166
167
168 ast3_AST: Final[Type[ast3.AST]] = ast3.AST
169 ast27_AST: Final[Type[ast27.AST]] = ast27.AST
170
171
172 def stringify_ast(
173     node: Union[ast.AST, ast3.AST, ast27.AST], depth: int = 0
174 ) -> Iterator[str]:
175     """Simple visitor generating strings to compare ASTs by content."""
176
177     node = fixup_ast_constants(node)
178
179     yield f"{'  ' * depth}{node.__class__.__name__}("
180
181     type_ignore_classes: Tuple[Type[Any], ...]
182     for field in sorted(node._fields):  # noqa: F402
183         # TypeIgnore will not be present using pypy < 3.8, so need for this
184         if not (_IS_PYPY and sys.version_info < (3, 8)):
185             # TypeIgnore has only one field 'lineno' which breaks this comparison
186             type_ignore_classes = (ast3.TypeIgnore, ast27.TypeIgnore)
187             if sys.version_info >= (3, 8):
188                 type_ignore_classes += (ast.TypeIgnore,)
189             if isinstance(node, type_ignore_classes):
190                 break
191
192         try:
193             value = getattr(node, field)
194         except AttributeError:
195             continue
196
197         yield f"{'  ' * (depth+1)}{field}="
198
199         if isinstance(value, list):
200             for item in value:
201                 # Ignore nested tuples within del statements, because we may insert
202                 # parentheses and they change the AST.
203                 if (
204                     field == "targets"
205                     and isinstance(node, (ast.Delete, ast3.Delete, ast27.Delete))
206                     and isinstance(item, (ast.Tuple, ast3.Tuple, ast27.Tuple))
207                 ):
208                     for item in item.elts:
209                         yield from stringify_ast(item, depth + 2)
210
211                 elif isinstance(item, (ast.AST, ast3.AST, ast27.AST)):
212                     yield from stringify_ast(item, depth + 2)
213
214         # Note that we are referencing the typed-ast ASTs via global variables and not
215         # direct module attribute accesses because that breaks mypyc. It's probably
216         # something to do with the ast3 / ast27 variables being marked as Any leading
217         # mypy to think this branch is always taken, leaving the rest of the code
218         # unanalyzed. Tighting up the types for the typed-ast AST types avoids the
219         # mypyc crash.
220         elif isinstance(value, (ast.AST, ast3_AST, ast27_AST)):
221             yield from stringify_ast(value, depth + 2)
222
223         else:
224             # Constant strings may be indented across newlines, if they are
225             # docstrings; fold spaces after newlines when comparing. Similarly,
226             # trailing and leading space may be removed.
227             # Note that when formatting Python 2 code, at least with Windows
228             # line-endings, docstrings can end up here as bytes instead of
229             # str so make sure that we handle both cases.
230             if (
231                 isinstance(node, ast.Constant)
232                 and field == "value"
233                 and isinstance(value, (str, bytes))
234             ):
235                 lineend = "\n" if isinstance(value, str) else b"\n"
236                 # To normalize, we strip any leading and trailing space from
237                 # each line...
238                 stripped = [line.strip() for line in value.splitlines()]
239                 normalized = lineend.join(stripped)  # type: ignore[attr-defined]
240                 # ...and remove any blank lines at the beginning and end of
241                 # the whole string
242                 normalized = normalized.strip()
243             else:
244                 normalized = value
245             yield f"{'  ' * (depth+2)}{normalized!r},  # {value.__class__.__name__}"
246
247     yield f"{'  ' * depth})  # /{node.__class__.__name__}"
248
249
250 def fixup_ast_constants(
251     node: Union[ast.AST, ast3.AST, ast27.AST]
252 ) -> Union[ast.AST, ast3.AST, ast27.AST]:
253     """Map ast nodes deprecated in 3.8 to Constant."""
254     if isinstance(node, (ast.Str, ast3.Str, ast27.Str, ast.Bytes, ast3.Bytes)):
255         return ast.Constant(value=node.s)
256
257     if isinstance(node, (ast.Num, ast3.Num, ast27.Num)):
258         return ast.Constant(value=node.n)
259
260     if isinstance(node, (ast.NameConstant, ast3.NameConstant)):
261         return ast.Constant(value=node.value)
262
263     return node