]> git.madduck.net Git - etc/vim.git/blob - src/black/parsing.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

slightly better example link (#2617)
[etc/vim.git] / src / black / parsing.py
1 """
2 Parse Python code and perform AST validation.
3 """
4 import ast
5 import platform
6 import sys
7 from typing import Any, Iterable, Iterator, List, Set, Tuple, Type, Union
8
9 if sys.version_info < (3, 8):
10     from typing_extensions import Final
11 else:
12     from typing import Final
13
14 # lib2to3 fork
15 from blib2to3.pytree import Node, Leaf
16 from blib2to3 import pygram
17 from blib2to3.pgen2 import driver
18 from blib2to3.pgen2.grammar import Grammar
19 from blib2to3.pgen2.parse import ParseError
20
21 from black.mode import TargetVersion, Feature, supports_feature
22 from black.nodes import syms
23
24 ast3: Any
25 ast27: Any
26
27 _IS_PYPY = platform.python_implementation() == "PyPy"
28
29 try:
30     from typed_ast import ast3, ast27
31 except ImportError:
32     # Either our python version is too low, or we're on pypy
33     if sys.version_info < (3, 7) or (sys.version_info < (3, 8) and not _IS_PYPY):
34         print(
35             "The typed_ast package is required but not installed.\n"
36             "You can upgrade to Python 3.8+ or install typed_ast with\n"
37             "`python3 -m pip install typed-ast`.",
38             file=sys.stderr,
39         )
40         sys.exit(1)
41     else:
42         ast3 = ast27 = ast
43
44
45 class InvalidInput(ValueError):
46     """Raised when input source code fails all parse attempts."""
47
48
49 def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]:
50     if not target_versions:
51         # No target_version specified, so try all grammars.
52         return [
53             # Python 3.7+
54             pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords,
55             # Python 3.0-3.6
56             pygram.python_grammar_no_print_statement_no_exec_statement,
57             # Python 2.7 with future print_function import
58             pygram.python_grammar_no_print_statement,
59             # Python 2.7
60             pygram.python_grammar,
61         ]
62
63     if all(version.is_python2() for version in target_versions):
64         # Python 2-only code, so try Python 2 grammars.
65         return [
66             # Python 2.7 with future print_function import
67             pygram.python_grammar_no_print_statement,
68             # Python 2.7
69             pygram.python_grammar,
70         ]
71
72     # Python 3-compatible code, so only try Python 3 grammar.
73     grammars = []
74     if supports_feature(target_versions, Feature.PATTERN_MATCHING):
75         # Python 3.10+
76         grammars.append(pygram.python_grammar_soft_keywords)
77     # If we have to parse both, try to parse async as a keyword first
78     if not supports_feature(
79         target_versions, Feature.ASYNC_IDENTIFIERS
80     ) and not supports_feature(target_versions, Feature.PATTERN_MATCHING):
81         # Python 3.7-3.9
82         grammars.append(
83             pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords
84         )
85     if not supports_feature(target_versions, Feature.ASYNC_KEYWORDS):
86         # Python 3.0-3.6
87         grammars.append(pygram.python_grammar_no_print_statement_no_exec_statement)
88     # At least one of the above branches must have been taken, because every Python
89     # version has exactly one of the two 'ASYNC_*' flags
90     return grammars
91
92
93 def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -> Node:
94     """Given a string with source, return the lib2to3 Node."""
95     if not src_txt.endswith("\n"):
96         src_txt += "\n"
97
98     for grammar in get_grammars(set(target_versions)):
99         drv = driver.Driver(grammar)
100         try:
101             result = drv.parse_string(src_txt, True)
102             break
103
104         except ParseError as pe:
105             lineno, column = pe.context[1]
106             lines = src_txt.splitlines()
107             try:
108                 faulty_line = lines[lineno - 1]
109             except IndexError:
110                 faulty_line = "<line number missing in source>"
111             exc = InvalidInput(f"Cannot parse: {lineno}:{column}: {faulty_line}")
112     else:
113         raise exc from None
114
115     if isinstance(result, Leaf):
116         result = Node(syms.file_input, [result])
117     return result
118
119
120 def lib2to3_unparse(node: Node) -> str:
121     """Given a lib2to3 node, return its string representation."""
122     code = str(node)
123     return code
124
125
126 def parse_single_version(
127     src: str, version: Tuple[int, int]
128 ) -> Union[ast.AST, ast3.AST, ast27.AST]:
129     filename = "<unknown>"
130     # typed_ast is needed because of feature version limitations in the builtin ast
131     if sys.version_info >= (3, 8) and version >= (3,):
132         return ast.parse(src, filename, feature_version=version)
133     elif version >= (3,):
134         if _IS_PYPY:
135             return ast3.parse(src, filename)
136         else:
137             return ast3.parse(src, filename, feature_version=version[1])
138     elif version == (2, 7):
139         return ast27.parse(src)
140     raise AssertionError("INTERNAL ERROR: Tried parsing unsupported Python version!")
141
142
143 def parse_ast(src: str) -> Union[ast.AST, ast3.AST, ast27.AST]:
144     # TODO: support Python 4+ ;)
145     versions = [(3, minor) for minor in range(3, sys.version_info[1] + 1)]
146
147     if ast27.__name__ != "ast":
148         versions.append((2, 7))
149
150     first_error = ""
151     for version in sorted(versions, reverse=True):
152         try:
153             return parse_single_version(src, version)
154         except SyntaxError as e:
155             if not first_error:
156                 first_error = str(e)
157
158     raise SyntaxError(first_error)
159
160
161 ast3_AST: Final[Type[ast3.AST]] = ast3.AST
162 ast27_AST: Final[Type[ast27.AST]] = ast27.AST
163
164
165 def stringify_ast(
166     node: Union[ast.AST, ast3.AST, ast27.AST], depth: int = 0
167 ) -> Iterator[str]:
168     """Simple visitor generating strings to compare ASTs by content."""
169
170     node = fixup_ast_constants(node)
171
172     yield f"{'  ' * depth}{node.__class__.__name__}("
173
174     type_ignore_classes: Tuple[Type[Any], ...]
175     for field in sorted(node._fields):  # noqa: F402
176         # TypeIgnore will not be present using pypy < 3.8, so need for this
177         if not (_IS_PYPY and sys.version_info < (3, 8)):
178             # TypeIgnore has only one field 'lineno' which breaks this comparison
179             type_ignore_classes = (ast3.TypeIgnore, ast27.TypeIgnore)
180             if sys.version_info >= (3, 8):
181                 type_ignore_classes += (ast.TypeIgnore,)
182             if isinstance(node, type_ignore_classes):
183                 break
184
185         try:
186             value = getattr(node, field)
187         except AttributeError:
188             continue
189
190         yield f"{'  ' * (depth+1)}{field}="
191
192         if isinstance(value, list):
193             for item in value:
194                 # Ignore nested tuples within del statements, because we may insert
195                 # parentheses and they change the AST.
196                 if (
197                     field == "targets"
198                     and isinstance(node, (ast.Delete, ast3.Delete, ast27.Delete))
199                     and isinstance(item, (ast.Tuple, ast3.Tuple, ast27.Tuple))
200                 ):
201                     for item in item.elts:
202                         yield from stringify_ast(item, depth + 2)
203
204                 elif isinstance(item, (ast.AST, ast3.AST, ast27.AST)):
205                     yield from stringify_ast(item, depth + 2)
206
207         # Note that we are referencing the typed-ast ASTs via global variables and not
208         # direct module attribute accesses because that breaks mypyc. It's probably
209         # something to do with the ast3 / ast27 variables being marked as Any leading
210         # mypy to think this branch is always taken, leaving the rest of the code
211         # unanalyzed. Tighting up the types for the typed-ast AST types avoids the
212         # mypyc crash.
213         elif isinstance(value, (ast.AST, ast3_AST, ast27_AST)):
214             yield from stringify_ast(value, depth + 2)
215
216         else:
217             # Constant strings may be indented across newlines, if they are
218             # docstrings; fold spaces after newlines when comparing. Similarly,
219             # trailing and leading space may be removed.
220             # Note that when formatting Python 2 code, at least with Windows
221             # line-endings, docstrings can end up here as bytes instead of
222             # str so make sure that we handle both cases.
223             if (
224                 isinstance(node, ast.Constant)
225                 and field == "value"
226                 and isinstance(value, (str, bytes))
227             ):
228                 lineend = "\n" if isinstance(value, str) else b"\n"
229                 # To normalize, we strip any leading and trailing space from
230                 # each line...
231                 stripped = [line.strip() for line in value.splitlines()]
232                 normalized = lineend.join(stripped)  # type: ignore[attr-defined]
233                 # ...and remove any blank lines at the beginning and end of
234                 # the whole string
235                 normalized = normalized.strip()
236             else:
237                 normalized = value
238             yield f"{'  ' * (depth+2)}{normalized!r},  # {value.__class__.__name__}"
239
240     yield f"{'  ' * depth})  # /{node.__class__.__name__}"
241
242
243 def fixup_ast_constants(
244     node: Union[ast.AST, ast3.AST, ast27.AST]
245 ) -> Union[ast.AST, ast3.AST, ast27.AST]:
246     """Map ast nodes deprecated in 3.8 to Constant."""
247     if isinstance(node, (ast.Str, ast3.Str, ast27.Str, ast.Bytes, ast3.Bytes)):
248         return ast.Constant(value=node.s)
249
250     if isinstance(node, (ast.Num, ast3.Num, ast27.Num)):
251         return ast.Constant(value=node.n)
252
253     if isinstance(node, (ast.NameConstant, ast3.NameConstant)):
254         return ast.Constant(value=node.value)
255
256     return node