]> git.madduck.net Git - etc/vim.git/blob - src/black/parsing.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

Use STDIN project in test_projects to ensure it runs quickly (#2575)
[etc/vim.git] / src / black / parsing.py
1 """
2 Parse Python code and perform AST validation.
3 """
4 import ast
5 import sys
6 from typing import Iterable, Iterator, List, Set, Union, Tuple
7
8 # lib2to3 fork
9 from blib2to3.pytree import Node, Leaf
10 from blib2to3 import pygram, pytree
11 from blib2to3.pgen2 import driver
12 from blib2to3.pgen2.grammar import Grammar
13 from blib2to3.pgen2.parse import ParseError
14
15 from black.mode import TargetVersion, Feature, supports_feature
16 from black.nodes import syms
17
18 try:
19     from typed_ast import ast3, ast27
20 except ImportError:
21     if sys.version_info < (3, 8):
22         print(
23             "The typed_ast package is required but not installed.\n"
24             "You can upgrade to Python 3.8+ or install typed_ast with\n"
25             "`python3 -m pip install typed-ast`.",
26             file=sys.stderr,
27         )
28         sys.exit(1)
29     else:
30         ast3 = ast27 = ast
31
32
33 class InvalidInput(ValueError):
34     """Raised when input source code fails all parse attempts."""
35
36
37 def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]:
38     if not target_versions:
39         # No target_version specified, so try all grammars.
40         return [
41             # Python 3.7+
42             pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords,
43             # Python 3.0-3.6
44             pygram.python_grammar_no_print_statement_no_exec_statement,
45             # Python 2.7 with future print_function import
46             pygram.python_grammar_no_print_statement,
47             # Python 2.7
48             pygram.python_grammar,
49         ]
50
51     if all(version.is_python2() for version in target_versions):
52         # Python 2-only code, so try Python 2 grammars.
53         return [
54             # Python 2.7 with future print_function import
55             pygram.python_grammar_no_print_statement,
56             # Python 2.7
57             pygram.python_grammar,
58         ]
59
60     # Python 3-compatible code, so only try Python 3 grammar.
61     grammars = []
62     # If we have to parse both, try to parse async as a keyword first
63     if not supports_feature(target_versions, Feature.ASYNC_IDENTIFIERS):
64         # Python 3.7+
65         grammars.append(
66             pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords
67         )
68     if not supports_feature(target_versions, Feature.ASYNC_KEYWORDS):
69         # Python 3.0-3.6
70         grammars.append(pygram.python_grammar_no_print_statement_no_exec_statement)
71     # At least one of the above branches must have been taken, because every Python
72     # version has exactly one of the two 'ASYNC_*' flags
73     return grammars
74
75
76 def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -> Node:
77     """Given a string with source, return the lib2to3 Node."""
78     if not src_txt.endswith("\n"):
79         src_txt += "\n"
80
81     for grammar in get_grammars(set(target_versions)):
82         drv = driver.Driver(grammar, pytree.convert)
83         try:
84             result = drv.parse_string(src_txt, True)
85             break
86
87         except ParseError as pe:
88             lineno, column = pe.context[1]
89             lines = src_txt.splitlines()
90             try:
91                 faulty_line = lines[lineno - 1]
92             except IndexError:
93                 faulty_line = "<line number missing in source>"
94             exc = InvalidInput(f"Cannot parse: {lineno}:{column}: {faulty_line}")
95     else:
96         raise exc from None
97
98     if isinstance(result, Leaf):
99         result = Node(syms.file_input, [result])
100     return result
101
102
103 def lib2to3_unparse(node: Node) -> str:
104     """Given a lib2to3 node, return its string representation."""
105     code = str(node)
106     return code
107
108
109 def parse_single_version(
110     src: str, version: Tuple[int, int]
111 ) -> Union[ast.AST, ast3.AST, ast27.AST]:
112     filename = "<unknown>"
113     # typed_ast is needed because of feature version limitations in the builtin ast
114     if sys.version_info >= (3, 8) and version >= (3,):
115         return ast.parse(src, filename, feature_version=version)
116     elif version >= (3,):
117         return ast3.parse(src, filename, feature_version=version[1])
118     elif version == (2, 7):
119         return ast27.parse(src)
120     raise AssertionError("INTERNAL ERROR: Tried parsing unsupported Python version!")
121
122
123 def parse_ast(src: str) -> Union[ast.AST, ast3.AST, ast27.AST]:
124     # TODO: support Python 4+ ;)
125     versions = [(3, minor) for minor in range(3, sys.version_info[1] + 1)]
126
127     if ast27.__name__ != "ast":
128         versions.append((2, 7))
129
130     first_error = ""
131     for version in sorted(versions, reverse=True):
132         try:
133             return parse_single_version(src, version)
134         except SyntaxError as e:
135             if not first_error:
136                 first_error = str(e)
137
138     raise SyntaxError(first_error)
139
140
141 def stringify_ast(
142     node: Union[ast.AST, ast3.AST, ast27.AST], depth: int = 0
143 ) -> Iterator[str]:
144     """Simple visitor generating strings to compare ASTs by content."""
145
146     node = fixup_ast_constants(node)
147
148     yield f"{'  ' * depth}{node.__class__.__name__}("
149
150     for field in sorted(node._fields):  # noqa: F402
151         # TypeIgnore has only one field 'lineno' which breaks this comparison
152         type_ignore_classes = (ast3.TypeIgnore, ast27.TypeIgnore)
153         if sys.version_info >= (3, 8):
154             type_ignore_classes += (ast.TypeIgnore,)
155         if isinstance(node, type_ignore_classes):
156             break
157
158         try:
159             value = getattr(node, field)
160         except AttributeError:
161             continue
162
163         yield f"{'  ' * (depth+1)}{field}="
164
165         if isinstance(value, list):
166             for item in value:
167                 # Ignore nested tuples within del statements, because we may insert
168                 # parentheses and they change the AST.
169                 if (
170                     field == "targets"
171                     and isinstance(node, (ast.Delete, ast3.Delete, ast27.Delete))
172                     and isinstance(item, (ast.Tuple, ast3.Tuple, ast27.Tuple))
173                 ):
174                     for item in item.elts:
175                         yield from stringify_ast(item, depth + 2)
176
177                 elif isinstance(item, (ast.AST, ast3.AST, ast27.AST)):
178                     yield from stringify_ast(item, depth + 2)
179
180         elif isinstance(value, (ast.AST, ast3.AST, ast27.AST)):
181             yield from stringify_ast(value, depth + 2)
182
183         else:
184             # Constant strings may be indented across newlines, if they are
185             # docstrings; fold spaces after newlines when comparing. Similarly,
186             # trailing and leading space may be removed.
187             # Note that when formatting Python 2 code, at least with Windows
188             # line-endings, docstrings can end up here as bytes instead of
189             # str so make sure that we handle both cases.
190             if (
191                 isinstance(node, ast.Constant)
192                 and field == "value"
193                 and isinstance(value, (str, bytes))
194             ):
195                 lineend = "\n" if isinstance(value, str) else b"\n"
196                 # To normalize, we strip any leading and trailing space from
197                 # each line...
198                 stripped = [line.strip() for line in value.splitlines()]
199                 normalized = lineend.join(stripped)  # type: ignore[attr-defined]
200                 # ...and remove any blank lines at the beginning and end of
201                 # the whole string
202                 normalized = normalized.strip()
203             else:
204                 normalized = value
205             yield f"{'  ' * (depth+2)}{normalized!r},  # {value.__class__.__name__}"
206
207     yield f"{'  ' * depth})  # /{node.__class__.__name__}"
208
209
210 def fixup_ast_constants(
211     node: Union[ast.AST, ast3.AST, ast27.AST]
212 ) -> Union[ast.AST, ast3.AST, ast27.AST]:
213     """Map ast nodes deprecated in 3.8 to Constant."""
214     if isinstance(node, (ast.Str, ast3.Str, ast27.Str, ast.Bytes, ast3.Bytes)):
215         return ast.Constant(value=node.s)
216
217     if isinstance(node, (ast.Num, ast3.Num, ast27.Num)):
218         return ast.Constant(value=node.n)
219
220     if isinstance(node, (ast.NameConstant, ast3.NameConstant)):
221         return ast.Constant(value=node.value)
222
223     return node