]> git.madduck.net Git - etc/vim.git/blob - src/black/parsing.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

Normalise string prefix order (#2297)
[etc/vim.git] / src / black / parsing.py
1 """
2 Parse Python code and perform AST validation.
3 """
4 import ast
5 import platform
6 import sys
7 from typing import Any, Iterable, Iterator, List, Set, Tuple, Type, Union
8
9 if sys.version_info < (3, 8):
10     from typing_extensions import Final
11 else:
12     from typing import Final
13
14 # lib2to3 fork
15 from blib2to3.pytree import Node, Leaf
16 from blib2to3 import pygram
17 from blib2to3.pgen2 import driver
18 from blib2to3.pgen2.grammar import Grammar
19 from blib2to3.pgen2.parse import ParseError
20 from blib2to3.pgen2.tokenize import TokenError
21
22 from black.mode import TargetVersion, Feature, supports_feature
23 from black.nodes import syms
24
25 ast3: Any
26
27 _IS_PYPY = platform.python_implementation() == "PyPy"
28
29 try:
30     from typed_ast import ast3
31 except ImportError:
32     # Either our python version is too low, or we're on pypy
33     if sys.version_info < (3, 7) or (sys.version_info < (3, 8) and not _IS_PYPY):
34         print(
35             "The typed_ast package is required but not installed.\n"
36             "You can upgrade to Python 3.8+ or install typed_ast with\n"
37             "`python3 -m pip install typed-ast`.",
38             file=sys.stderr,
39         )
40         sys.exit(1)
41     else:
42         ast3 = ast
43
44
45 PY310_HINT: Final = "Consider using --target-version py310 to parse Python 3.10 code."
46 PY2_HINT: Final = "Python 2 support was removed in version 22.0."
47
48
49 class InvalidInput(ValueError):
50     """Raised when input source code fails all parse attempts."""
51
52
53 def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]:
54     if not target_versions:
55         # No target_version specified, so try all grammars.
56         return [
57             # Python 3.7+
58             pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords,
59             # Python 3.0-3.6
60             pygram.python_grammar_no_print_statement_no_exec_statement,
61         ]
62
63     grammars = []
64     if supports_feature(target_versions, Feature.PATTERN_MATCHING):
65         # Python 3.10+
66         grammars.append(pygram.python_grammar_soft_keywords)
67     # If we have to parse both, try to parse async as a keyword first
68     if not supports_feature(
69         target_versions, Feature.ASYNC_IDENTIFIERS
70     ) and not supports_feature(target_versions, Feature.PATTERN_MATCHING):
71         # Python 3.7-3.9
72         grammars.append(
73             pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords
74         )
75     if not supports_feature(target_versions, Feature.ASYNC_KEYWORDS):
76         # Python 3.0-3.6
77         grammars.append(pygram.python_grammar_no_print_statement_no_exec_statement)
78     # At least one of the above branches must have been taken, because every Python
79     # version has exactly one of the two 'ASYNC_*' flags
80     return grammars
81
82
83 def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -> Node:
84     """Given a string with source, return the lib2to3 Node."""
85     if not src_txt.endswith("\n"):
86         src_txt += "\n"
87
88     grammars = get_grammars(set(target_versions))
89     for grammar in grammars:
90         drv = driver.Driver(grammar)
91         try:
92             result = drv.parse_string(src_txt, True)
93             break
94
95         except ParseError as pe:
96             lineno, column = pe.context[1]
97             lines = src_txt.splitlines()
98             try:
99                 faulty_line = lines[lineno - 1]
100             except IndexError:
101                 faulty_line = "<line number missing in source>"
102             exc = InvalidInput(f"Cannot parse: {lineno}:{column}: {faulty_line}")
103
104         except TokenError as te:
105             # In edge cases these are raised; and typically don't have a "faulty_line".
106             lineno, column = te.args[1]
107             exc = InvalidInput(f"Cannot parse: {lineno}:{column}: {te.args[0]}")
108
109     else:
110         if pygram.python_grammar_soft_keywords not in grammars and matches_grammar(
111             src_txt, pygram.python_grammar_soft_keywords
112         ):
113             original_msg = exc.args[0]
114             msg = f"{original_msg}\n{PY310_HINT}"
115             raise InvalidInput(msg) from None
116
117         if matches_grammar(src_txt, pygram.python_grammar) or matches_grammar(
118             src_txt, pygram.python_grammar_no_print_statement
119         ):
120             original_msg = exc.args[0]
121             msg = f"{original_msg}\n{PY2_HINT}"
122             raise InvalidInput(msg) from None
123
124         raise exc from None
125
126     if isinstance(result, Leaf):
127         result = Node(syms.file_input, [result])
128     return result
129
130
131 def matches_grammar(src_txt: str, grammar: Grammar) -> bool:
132     drv = driver.Driver(grammar)
133     try:
134         drv.parse_string(src_txt, True)
135     except (ParseError, TokenError, IndentationError):
136         return False
137     else:
138         return True
139
140
141 def lib2to3_unparse(node: Node) -> str:
142     """Given a lib2to3 node, return its string representation."""
143     code = str(node)
144     return code
145
146
147 def parse_single_version(
148     src: str, version: Tuple[int, int]
149 ) -> Union[ast.AST, ast3.AST]:
150     filename = "<unknown>"
151     # typed_ast is needed because of feature version limitations in the builtin ast
152     if sys.version_info >= (3, 8) and version >= (3,):
153         return ast.parse(src, filename, feature_version=version)
154     elif version >= (3,):
155         if _IS_PYPY:
156             return ast3.parse(src, filename)
157         else:
158             return ast3.parse(src, filename, feature_version=version[1])
159     raise AssertionError("INTERNAL ERROR: Tried parsing unsupported Python version!")
160
161
162 def parse_ast(src: str) -> Union[ast.AST, ast3.AST]:
163     # TODO: support Python 4+ ;)
164     versions = [(3, minor) for minor in range(3, sys.version_info[1] + 1)]
165
166     first_error = ""
167     for version in sorted(versions, reverse=True):
168         try:
169             return parse_single_version(src, version)
170         except SyntaxError as e:
171             if not first_error:
172                 first_error = str(e)
173
174     raise SyntaxError(first_error)
175
176
177 ast3_AST: Final[Type[ast3.AST]] = ast3.AST
178
179
180 def _normalize(lineend: str, value: str) -> str:
181     # To normalize, we strip any leading and trailing space from
182     # each line...
183     stripped: List[str] = [i.strip() for i in value.splitlines()]
184     normalized = lineend.join(stripped)
185     # ...and remove any blank lines at the beginning and end of
186     # the whole string
187     return normalized.strip()
188
189
190 def stringify_ast(node: Union[ast.AST, ast3.AST], depth: int = 0) -> Iterator[str]:
191     """Simple visitor generating strings to compare ASTs by content."""
192
193     node = fixup_ast_constants(node)
194
195     yield f"{'  ' * depth}{node.__class__.__name__}("
196
197     type_ignore_classes: Tuple[Type[Any], ...]
198     for field in sorted(node._fields):  # noqa: F402
199         # TypeIgnore will not be present using pypy < 3.8, so need for this
200         if not (_IS_PYPY and sys.version_info < (3, 8)):
201             # TypeIgnore has only one field 'lineno' which breaks this comparison
202             type_ignore_classes = (ast3.TypeIgnore,)
203             if sys.version_info >= (3, 8):
204                 type_ignore_classes += (ast.TypeIgnore,)
205             if isinstance(node, type_ignore_classes):
206                 break
207
208         try:
209             value = getattr(node, field)
210         except AttributeError:
211             continue
212
213         yield f"{'  ' * (depth+1)}{field}="
214
215         if isinstance(value, list):
216             for item in value:
217                 # Ignore nested tuples within del statements, because we may insert
218                 # parentheses and they change the AST.
219                 if (
220                     field == "targets"
221                     and isinstance(node, (ast.Delete, ast3.Delete))
222                     and isinstance(item, (ast.Tuple, ast3.Tuple))
223                 ):
224                     for item in item.elts:
225                         yield from stringify_ast(item, depth + 2)
226
227                 elif isinstance(item, (ast.AST, ast3.AST)):
228                     yield from stringify_ast(item, depth + 2)
229
230         # Note that we are referencing the typed-ast ASTs via global variables and not
231         # direct module attribute accesses because that breaks mypyc. It's probably
232         # something to do with the ast3 variables being marked as Any leading
233         # mypy to think this branch is always taken, leaving the rest of the code
234         # unanalyzed. Tighting up the types for the typed-ast AST types avoids the
235         # mypyc crash.
236         elif isinstance(value, (ast.AST, ast3_AST)):
237             yield from stringify_ast(value, depth + 2)
238
239         else:
240             # Constant strings may be indented across newlines, if they are
241             # docstrings; fold spaces after newlines when comparing. Similarly,
242             # trailing and leading space may be removed.
243             if (
244                 isinstance(node, ast.Constant)
245                 and field == "value"
246                 and isinstance(value, str)
247             ):
248                 normalized = _normalize("\n", value)
249             else:
250                 normalized = value
251             yield f"{'  ' * (depth+2)}{normalized!r},  # {value.__class__.__name__}"
252
253     yield f"{'  ' * depth})  # /{node.__class__.__name__}"
254
255
256 def fixup_ast_constants(node: Union[ast.AST, ast3.AST]) -> Union[ast.AST, ast3.AST]:
257     """Map ast nodes deprecated in 3.8 to Constant."""
258     if isinstance(node, (ast.Str, ast3.Str, ast.Bytes, ast3.Bytes)):
259         return ast.Constant(value=node.s)
260
261     if isinstance(node, (ast.Num, ast3.Num)):
262         return ast.Constant(value=node.n)
263
264     if isinstance(node, (ast.NameConstant, ast3.NameConstant)):
265         return ast.Constant(value=node.value)
266
267     return node