]> git.madduck.net Git - etc/vim.git/blob - src/black/parsing.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

32cfa5239f117e00c2ce969baab6ee3118a5b519
[etc/vim.git] / src / black / parsing.py
1 """
2 Parse Python code and perform AST validation.
3 """
4 import ast
5 import platform
6 import sys
7 from typing import Any, Iterable, Iterator, List, Set, Tuple, Type, Union
8
9 if sys.version_info < (3, 8):
10     from typing_extensions import Final
11 else:
12     from typing import Final
13
14 # lib2to3 fork
15 from blib2to3.pytree import Node, Leaf
16 from blib2to3 import pygram
17 from blib2to3.pgen2 import driver
18 from blib2to3.pgen2.grammar import Grammar
19 from blib2to3.pgen2.parse import ParseError
20
21 from black.mode import TargetVersion, Feature, supports_feature
22 from black.nodes import syms
23
24 ast3: Any
25 ast27: Any
26
27 _IS_PYPY = platform.python_implementation() == "PyPy"
28
29 try:
30     from typed_ast import ast3, ast27
31 except ImportError:
32     # Either our python version is too low, or we're on pypy
33     if sys.version_info < (3, 7) or (sys.version_info < (3, 8) and not _IS_PYPY):
34         print(
35             "The typed_ast package is required but not installed.\n"
36             "You can upgrade to Python 3.8+ or install typed_ast with\n"
37             "`python3 -m pip install typed-ast`.",
38             file=sys.stderr,
39         )
40         sys.exit(1)
41     else:
42         ast3 = ast27 = ast
43
44
45 class InvalidInput(ValueError):
46     """Raised when input source code fails all parse attempts."""
47
48
49 def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]:
50     if not target_versions:
51         # No target_version specified, so try all grammars.
52         return [
53             # Python 3.7+
54             pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords,
55             # Python 3.0-3.6
56             pygram.python_grammar_no_print_statement_no_exec_statement,
57             # Python 2.7 with future print_function import
58             pygram.python_grammar_no_print_statement,
59             # Python 2.7
60             pygram.python_grammar,
61         ]
62
63     if all(version.is_python2() for version in target_versions):
64         # Python 2-only code, so try Python 2 grammars.
65         return [
66             # Python 2.7 with future print_function import
67             pygram.python_grammar_no_print_statement,
68             # Python 2.7
69             pygram.python_grammar,
70         ]
71
72     # Python 3-compatible code, so only try Python 3 grammar.
73     grammars = []
74     if supports_feature(target_versions, Feature.PATTERN_MATCHING):
75         # Python 3.10+
76         grammars.append(pygram.python_grammar_soft_keywords)
77     # If we have to parse both, try to parse async as a keyword first
78     if not supports_feature(target_versions, Feature.ASYNC_IDENTIFIERS):
79         # Python 3.7+
80         grammars.append(
81             pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords
82         )
83     if not supports_feature(target_versions, Feature.ASYNC_KEYWORDS):
84         # Python 3.0-3.6
85         grammars.append(pygram.python_grammar_no_print_statement_no_exec_statement)
86     # At least one of the above branches must have been taken, because every Python
87     # version has exactly one of the two 'ASYNC_*' flags
88     return grammars
89
90
91 def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -> Node:
92     """Given a string with source, return the lib2to3 Node."""
93     if not src_txt.endswith("\n"):
94         src_txt += "\n"
95
96     for grammar in get_grammars(set(target_versions)):
97         drv = driver.Driver(grammar)
98         try:
99             result = drv.parse_string(src_txt, True)
100             break
101
102         except ParseError as pe:
103             lineno, column = pe.context[1]
104             lines = src_txt.splitlines()
105             try:
106                 faulty_line = lines[lineno - 1]
107             except IndexError:
108                 faulty_line = "<line number missing in source>"
109             exc = InvalidInput(f"Cannot parse: {lineno}:{column}: {faulty_line}")
110     else:
111         raise exc from None
112
113     if isinstance(result, Leaf):
114         result = Node(syms.file_input, [result])
115     return result
116
117
118 def lib2to3_unparse(node: Node) -> str:
119     """Given a lib2to3 node, return its string representation."""
120     code = str(node)
121     return code
122
123
124 def parse_single_version(
125     src: str, version: Tuple[int, int]
126 ) -> Union[ast.AST, ast3.AST, ast27.AST]:
127     filename = "<unknown>"
128     # typed_ast is needed because of feature version limitations in the builtin ast
129     if sys.version_info >= (3, 8) and version >= (3,):
130         return ast.parse(src, filename, feature_version=version)
131     elif version >= (3,):
132         if _IS_PYPY:
133             return ast3.parse(src, filename)
134         else:
135             return ast3.parse(src, filename, feature_version=version[1])
136     elif version == (2, 7):
137         return ast27.parse(src)
138     raise AssertionError("INTERNAL ERROR: Tried parsing unsupported Python version!")
139
140
141 def parse_ast(src: str) -> Union[ast.AST, ast3.AST, ast27.AST]:
142     # TODO: support Python 4+ ;)
143     versions = [(3, minor) for minor in range(3, sys.version_info[1] + 1)]
144
145     if ast27.__name__ != "ast":
146         versions.append((2, 7))
147
148     first_error = ""
149     for version in sorted(versions, reverse=True):
150         try:
151             return parse_single_version(src, version)
152         except SyntaxError as e:
153             if not first_error:
154                 first_error = str(e)
155
156     raise SyntaxError(first_error)
157
158
159 ast3_AST: Final[Type[ast3.AST]] = ast3.AST
160 ast27_AST: Final[Type[ast27.AST]] = ast27.AST
161
162
163 def stringify_ast(
164     node: Union[ast.AST, ast3.AST, ast27.AST], depth: int = 0
165 ) -> Iterator[str]:
166     """Simple visitor generating strings to compare ASTs by content."""
167
168     node = fixup_ast_constants(node)
169
170     yield f"{'  ' * depth}{node.__class__.__name__}("
171
172     type_ignore_classes: Tuple[Type[Any], ...]
173     for field in sorted(node._fields):  # noqa: F402
174         # TypeIgnore will not be present using pypy < 3.8, so need for this
175         if not (_IS_PYPY and sys.version_info < (3, 8)):
176             # TypeIgnore has only one field 'lineno' which breaks this comparison
177             type_ignore_classes = (ast3.TypeIgnore, ast27.TypeIgnore)
178             if sys.version_info >= (3, 8):
179                 type_ignore_classes += (ast.TypeIgnore,)
180             if isinstance(node, type_ignore_classes):
181                 break
182
183         try:
184             value = getattr(node, field)
185         except AttributeError:
186             continue
187
188         yield f"{'  ' * (depth+1)}{field}="
189
190         if isinstance(value, list):
191             for item in value:
192                 # Ignore nested tuples within del statements, because we may insert
193                 # parentheses and they change the AST.
194                 if (
195                     field == "targets"
196                     and isinstance(node, (ast.Delete, ast3.Delete, ast27.Delete))
197                     and isinstance(item, (ast.Tuple, ast3.Tuple, ast27.Tuple))
198                 ):
199                     for item in item.elts:
200                         yield from stringify_ast(item, depth + 2)
201
202                 elif isinstance(item, (ast.AST, ast3.AST, ast27.AST)):
203                     yield from stringify_ast(item, depth + 2)
204
205         # Note that we are referencing the typed-ast ASTs via global variables and not
206         # direct module attribute accesses because that breaks mypyc. It's probably
207         # something to do with the ast3 / ast27 variables being marked as Any leading
208         # mypy to think this branch is always taken, leaving the rest of the code
209         # unanalyzed. Tighting up the types for the typed-ast AST types avoids the
210         # mypyc crash.
211         elif isinstance(value, (ast.AST, ast3_AST, ast27_AST)):
212             yield from stringify_ast(value, depth + 2)
213
214         else:
215             # Constant strings may be indented across newlines, if they are
216             # docstrings; fold spaces after newlines when comparing. Similarly,
217             # trailing and leading space may be removed.
218             # Note that when formatting Python 2 code, at least with Windows
219             # line-endings, docstrings can end up here as bytes instead of
220             # str so make sure that we handle both cases.
221             if (
222                 isinstance(node, ast.Constant)
223                 and field == "value"
224                 and isinstance(value, (str, bytes))
225             ):
226                 lineend = "\n" if isinstance(value, str) else b"\n"
227                 # To normalize, we strip any leading and trailing space from
228                 # each line...
229                 stripped = [line.strip() for line in value.splitlines()]
230                 normalized = lineend.join(stripped)  # type: ignore[attr-defined]
231                 # ...and remove any blank lines at the beginning and end of
232                 # the whole string
233                 normalized = normalized.strip()
234             else:
235                 normalized = value
236             yield f"{'  ' * (depth+2)}{normalized!r},  # {value.__class__.__name__}"
237
238     yield f"{'  ' * depth})  # /{node.__class__.__name__}"
239
240
241 def fixup_ast_constants(
242     node: Union[ast.AST, ast3.AST, ast27.AST]
243 ) -> Union[ast.AST, ast3.AST, ast27.AST]:
244     """Map ast nodes deprecated in 3.8 to Constant."""
245     if isinstance(node, (ast.Str, ast3.Str, ast27.Str, ast.Bytes, ast3.Bytes)):
246         return ast.Constant(value=node.s)
247
248     if isinstance(node, (ast.Num, ast3.Num, ast27.Num)):
249         return ast.Constant(value=node.n)
250
251     if isinstance(node, (ast.NameConstant, ast3.NameConstant)):
252         return ast.Constant(value=node.value)
253
254     return node