]> git.madduck.net Git - etc/vim.git/blob - src/black/parsing.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

Bump regex dependency to 2021.4.4 to fix import of Pattern class (#2621)
[etc/vim.git] / src / black / parsing.py
1 """
2 Parse Python code and perform AST validation.
3 """
4 import ast
5 import platform
6 import sys
7 from typing import Any, Iterable, Iterator, List, Set, Tuple, Type, Union
8
9 if sys.version_info < (3, 8):
10     from typing_extensions import Final
11 else:
12     from typing import Final
13
14 # lib2to3 fork
15 from blib2to3.pytree import Node, Leaf
16 from blib2to3 import pygram
17 from blib2to3.pgen2 import driver
18 from blib2to3.pgen2.grammar import Grammar
19 from blib2to3.pgen2.parse import ParseError
20
21 from black.mode import TargetVersion, Feature, supports_feature
22 from black.nodes import syms
23
24 ast3: Any
25 ast27: Any
26
27 _IS_PYPY = platform.python_implementation() == "PyPy"
28
29 try:
30     from typed_ast import ast3, ast27
31 except ImportError:
32     # Either our python version is too low, or we're on pypy
33     if sys.version_info < (3, 7) or (sys.version_info < (3, 8) and not _IS_PYPY):
34         print(
35             "The typed_ast package is required but not installed.\n"
36             "You can upgrade to Python 3.8+ or install typed_ast with\n"
37             "`python3 -m pip install typed-ast`.",
38             file=sys.stderr,
39         )
40         sys.exit(1)
41     else:
42         ast3 = ast27 = ast
43
44
45 class InvalidInput(ValueError):
46     """Raised when input source code fails all parse attempts."""
47
48
49 def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]:
50     if not target_versions:
51         # No target_version specified, so try all grammars.
52         return [
53             # Python 3.7+
54             pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords,
55             # Python 3.0-3.6
56             pygram.python_grammar_no_print_statement_no_exec_statement,
57             # Python 2.7 with future print_function import
58             pygram.python_grammar_no_print_statement,
59             # Python 2.7
60             pygram.python_grammar,
61         ]
62
63     if all(version.is_python2() for version in target_versions):
64         # Python 2-only code, so try Python 2 grammars.
65         return [
66             # Python 2.7 with future print_function import
67             pygram.python_grammar_no_print_statement,
68             # Python 2.7
69             pygram.python_grammar,
70         ]
71
72     # Python 3-compatible code, so only try Python 3 grammar.
73     grammars = []
74     if supports_feature(target_versions, Feature.PATTERN_MATCHING):
75         # Python 3.10+
76         grammars.append(pygram.python_grammar_soft_keywords)
77     # If we have to parse both, try to parse async as a keyword first
78     if not supports_feature(target_versions, Feature.ASYNC_IDENTIFIERS):
79         # Python 3.7+
80         grammars.append(
81             pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords
82         )
83     if not supports_feature(target_versions, Feature.ASYNC_KEYWORDS):
84         # Python 3.0-3.6
85         grammars.append(pygram.python_grammar_no_print_statement_no_exec_statement)
86     # At least one of the above branches must have been taken, because every Python
87     # version has exactly one of the two 'ASYNC_*' flags
88     return grammars
89
90
91 def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -> Node:
92     """Given a string with source, return the lib2to3 Node."""
93     if not src_txt.endswith("\n"):
94         src_txt += "\n"
95
96     for grammar in get_grammars(set(target_versions)):
97         drv = driver.Driver(grammar)
98         try:
99             result = drv.parse_string(src_txt, True)
100             break
101
102         except ParseError as pe:
103             lineno, column = pe.context[1]
104             lines = src_txt.splitlines()
105             try:
106                 faulty_line = lines[lineno - 1]
107             except IndexError:
108                 faulty_line = "<line number missing in source>"
109             exc = InvalidInput(f"Cannot parse: {lineno}:{column}: {faulty_line}")
110     else:
111         raise exc from None
112
113     if isinstance(result, Leaf):
114         result = Node(syms.file_input, [result])
115     return result
116
117
118 def lib2to3_unparse(node: Node) -> str:
119     """Given a lib2to3 node, return its string representation."""
120     code = str(node)
121     return code
122
123
124 def parse_single_version(
125     src: str, version: Tuple[int, int]
126 ) -> Union[ast.AST, ast3.AST, ast27.AST]:
127     filename = "<unknown>"
128     # typed_ast is needed because of feature version limitations in the builtin ast
129     if sys.version_info >= (3, 8) and version >= (3,):
130         return ast.parse(src, filename, feature_version=version)
131     elif version >= (3,):
132         if _IS_PYPY:
133             return ast3.parse(src, filename)
134         else:
135             return ast3.parse(src, filename, feature_version=version[1])
136     elif version == (2, 7):
137         return ast27.parse(src)
138     raise AssertionError("INTERNAL ERROR: Tried parsing unsupported Python version!")
139
140
141 def parse_ast(src: str) -> Union[ast.AST, ast3.AST, ast27.AST]:
142     # TODO: support Python 4+ ;)
143     versions = [(3, minor) for minor in range(3, sys.version_info[1] + 1)]
144
145     if ast27.__name__ != "ast":
146         versions.append((2, 7))
147
148     first_error = ""
149     for version in sorted(versions, reverse=True):
150         try:
151             return parse_single_version(src, version)
152         except SyntaxError as e:
153             if not first_error:
154                 first_error = str(e)
155
156     raise SyntaxError(first_error)
157
158
159 ast3_AST: Final[Type[ast3.AST]] = ast3.AST
160 ast27_AST: Final[Type[ast27.AST]] = ast27.AST
161
162
163 def stringify_ast(
164     node: Union[ast.AST, ast3.AST, ast27.AST], depth: int = 0
165 ) -> Iterator[str]:
166     """Simple visitor generating strings to compare ASTs by content."""
167
168     node = fixup_ast_constants(node)
169
170     yield f"{'  ' * depth}{node.__class__.__name__}("
171
172     for field in sorted(node._fields):  # noqa: F402
173         # TypeIgnore will not be present using pypy < 3.8, so need for this
174         if not (_IS_PYPY and sys.version_info < (3, 8)):
175             # TypeIgnore has only one field 'lineno' which breaks this comparison
176             type_ignore_classes = (ast3.TypeIgnore, ast27.TypeIgnore)
177             if sys.version_info >= (3, 8):
178                 type_ignore_classes += (ast.TypeIgnore,)
179             if isinstance(node, type_ignore_classes):
180                 break
181
182         try:
183             value = getattr(node, field)
184         except AttributeError:
185             continue
186
187         yield f"{'  ' * (depth+1)}{field}="
188
189         if isinstance(value, list):
190             for item in value:
191                 # Ignore nested tuples within del statements, because we may insert
192                 # parentheses and they change the AST.
193                 if (
194                     field == "targets"
195                     and isinstance(node, (ast.Delete, ast3.Delete, ast27.Delete))
196                     and isinstance(item, (ast.Tuple, ast3.Tuple, ast27.Tuple))
197                 ):
198                     for item in item.elts:
199                         yield from stringify_ast(item, depth + 2)
200
201                 elif isinstance(item, (ast.AST, ast3.AST, ast27.AST)):
202                     yield from stringify_ast(item, depth + 2)
203
204         # Note that we are referencing the typed-ast ASTs via global variables and not
205         # direct module attribute accesses because that breaks mypyc. It's probably
206         # something to do with the ast3 / ast27 variables being marked as Any leading
207         # mypy to think this branch is always taken, leaving the rest of the code
208         # unanalyzed. Tighting up the types for the typed-ast AST types avoids the
209         # mypyc crash.
210         elif isinstance(value, (ast.AST, ast3_AST, ast27_AST)):
211             yield from stringify_ast(value, depth + 2)
212
213         else:
214             # Constant strings may be indented across newlines, if they are
215             # docstrings; fold spaces after newlines when comparing. Similarly,
216             # trailing and leading space may be removed.
217             # Note that when formatting Python 2 code, at least with Windows
218             # line-endings, docstrings can end up here as bytes instead of
219             # str so make sure that we handle both cases.
220             if (
221                 isinstance(node, ast.Constant)
222                 and field == "value"
223                 and isinstance(value, (str, bytes))
224             ):
225                 lineend = "\n" if isinstance(value, str) else b"\n"
226                 # To normalize, we strip any leading and trailing space from
227                 # each line...
228                 stripped = [line.strip() for line in value.splitlines()]
229                 normalized = lineend.join(stripped)  # type: ignore[attr-defined]
230                 # ...and remove any blank lines at the beginning and end of
231                 # the whole string
232                 normalized = normalized.strip()
233             else:
234                 normalized = value
235             yield f"{'  ' * (depth+2)}{normalized!r},  # {value.__class__.__name__}"
236
237     yield f"{'  ' * depth})  # /{node.__class__.__name__}"
238
239
240 def fixup_ast_constants(
241     node: Union[ast.AST, ast3.AST, ast27.AST]
242 ) -> Union[ast.AST, ast3.AST, ast27.AST]:
243     """Map ast nodes deprecated in 3.8 to Constant."""
244     if isinstance(node, (ast.Str, ast3.Str, ast27.Str, ast.Bytes, ast3.Bytes)):
245         return ast.Constant(value=node.s)
246
247     if isinstance(node, (ast.Num, ast3.Num, ast27.Num)):
248         return ast.Constant(value=node.n)
249
250     if isinstance(node, (ast.NameConstant, ast3.NameConstant)):
251         return ast.Constant(value=node.value)
252
253     return node