]> git.madduck.net Git - etc/vim.git/blob - src/black/parsing.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

black/parser: partial support for pattern matching (#2586)
[etc/vim.git] / src / black / parsing.py
1 """
2 Parse Python code and perform AST validation.
3 """
4 import ast
5 import sys
6 from typing import Iterable, Iterator, List, Set, Union, Tuple
7
8 # lib2to3 fork
9 from blib2to3.pytree import Node, Leaf
10 from blib2to3 import pygram, pytree
11 from blib2to3.pgen2 import driver
12 from blib2to3.pgen2.grammar import Grammar
13 from blib2to3.pgen2.parse import ParseError
14
15 from black.mode import TargetVersion, Feature, supports_feature
16 from black.nodes import syms
17
18 try:
19     from typed_ast import ast3, ast27
20 except ImportError:
21     if sys.version_info < (3, 8):
22         print(
23             "The typed_ast package is required but not installed.\n"
24             "You can upgrade to Python 3.8+ or install typed_ast with\n"
25             "`python3 -m pip install typed-ast`.",
26             file=sys.stderr,
27         )
28         sys.exit(1)
29     else:
30         ast3 = ast27 = ast
31
32
33 class InvalidInput(ValueError):
34     """Raised when input source code fails all parse attempts."""
35
36
37 def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]:
38     if not target_versions:
39         # No target_version specified, so try all grammars.
40         return [
41             # Python 3.7+
42             pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords,
43             # Python 3.0-3.6
44             pygram.python_grammar_no_print_statement_no_exec_statement,
45             # Python 2.7 with future print_function import
46             pygram.python_grammar_no_print_statement,
47             # Python 2.7
48             pygram.python_grammar,
49         ]
50
51     if all(version.is_python2() for version in target_versions):
52         # Python 2-only code, so try Python 2 grammars.
53         return [
54             # Python 2.7 with future print_function import
55             pygram.python_grammar_no_print_statement,
56             # Python 2.7
57             pygram.python_grammar,
58         ]
59
60     # Python 3-compatible code, so only try Python 3 grammar.
61     grammars = []
62     if supports_feature(target_versions, Feature.PATTERN_MATCHING):
63         # Python 3.10+
64         grammars.append(pygram.python_grammar_soft_keywords)
65     # If we have to parse both, try to parse async as a keyword first
66     if not supports_feature(target_versions, Feature.ASYNC_IDENTIFIERS):
67         # Python 3.7+
68         grammars.append(
69             pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords
70         )
71     if not supports_feature(target_versions, Feature.ASYNC_KEYWORDS):
72         # Python 3.0-3.6
73         grammars.append(pygram.python_grammar_no_print_statement_no_exec_statement)
74     # At least one of the above branches must have been taken, because every Python
75     # version has exactly one of the two 'ASYNC_*' flags
76     return grammars
77
78
79 def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -> Node:
80     """Given a string with source, return the lib2to3 Node."""
81     if not src_txt.endswith("\n"):
82         src_txt += "\n"
83
84     for grammar in get_grammars(set(target_versions)):
85         drv = driver.Driver(grammar, pytree.convert)
86         try:
87             result = drv.parse_string(src_txt, True)
88             break
89
90         except ParseError as pe:
91             lineno, column = pe.context[1]
92             lines = src_txt.splitlines()
93             try:
94                 faulty_line = lines[lineno - 1]
95             except IndexError:
96                 faulty_line = "<line number missing in source>"
97             exc = InvalidInput(f"Cannot parse: {lineno}:{column}: {faulty_line}")
98     else:
99         raise exc from None
100
101     if isinstance(result, Leaf):
102         result = Node(syms.file_input, [result])
103     return result
104
105
106 def lib2to3_unparse(node: Node) -> str:
107     """Given a lib2to3 node, return its string representation."""
108     code = str(node)
109     return code
110
111
112 def parse_single_version(
113     src: str, version: Tuple[int, int]
114 ) -> Union[ast.AST, ast3.AST, ast27.AST]:
115     filename = "<unknown>"
116     # typed_ast is needed because of feature version limitations in the builtin ast
117     if sys.version_info >= (3, 8) and version >= (3,):
118         return ast.parse(src, filename, feature_version=version)
119     elif version >= (3,):
120         return ast3.parse(src, filename, feature_version=version[1])
121     elif version == (2, 7):
122         return ast27.parse(src)
123     raise AssertionError("INTERNAL ERROR: Tried parsing unsupported Python version!")
124
125
126 def parse_ast(src: str) -> Union[ast.AST, ast3.AST, ast27.AST]:
127     # TODO: support Python 4+ ;)
128     versions = [(3, minor) for minor in range(3, sys.version_info[1] + 1)]
129
130     if ast27.__name__ != "ast":
131         versions.append((2, 7))
132
133     first_error = ""
134     for version in sorted(versions, reverse=True):
135         try:
136             return parse_single_version(src, version)
137         except SyntaxError as e:
138             if not first_error:
139                 first_error = str(e)
140
141     raise SyntaxError(first_error)
142
143
144 def stringify_ast(
145     node: Union[ast.AST, ast3.AST, ast27.AST], depth: int = 0
146 ) -> Iterator[str]:
147     """Simple visitor generating strings to compare ASTs by content."""
148
149     node = fixup_ast_constants(node)
150
151     yield f"{'  ' * depth}{node.__class__.__name__}("
152
153     for field in sorted(node._fields):  # noqa: F402
154         # TypeIgnore has only one field 'lineno' which breaks this comparison
155         type_ignore_classes = (ast3.TypeIgnore, ast27.TypeIgnore)
156         if sys.version_info >= (3, 8):
157             type_ignore_classes += (ast.TypeIgnore,)
158         if isinstance(node, type_ignore_classes):
159             break
160
161         try:
162             value = getattr(node, field)
163         except AttributeError:
164             continue
165
166         yield f"{'  ' * (depth+1)}{field}="
167
168         if isinstance(value, list):
169             for item in value:
170                 # Ignore nested tuples within del statements, because we may insert
171                 # parentheses and they change the AST.
172                 if (
173                     field == "targets"
174                     and isinstance(node, (ast.Delete, ast3.Delete, ast27.Delete))
175                     and isinstance(item, (ast.Tuple, ast3.Tuple, ast27.Tuple))
176                 ):
177                     for item in item.elts:
178                         yield from stringify_ast(item, depth + 2)
179
180                 elif isinstance(item, (ast.AST, ast3.AST, ast27.AST)):
181                     yield from stringify_ast(item, depth + 2)
182
183         elif isinstance(value, (ast.AST, ast3.AST, ast27.AST)):
184             yield from stringify_ast(value, depth + 2)
185
186         else:
187             # Constant strings may be indented across newlines, if they are
188             # docstrings; fold spaces after newlines when comparing. Similarly,
189             # trailing and leading space may be removed.
190             # Note that when formatting Python 2 code, at least with Windows
191             # line-endings, docstrings can end up here as bytes instead of
192             # str so make sure that we handle both cases.
193             if (
194                 isinstance(node, ast.Constant)
195                 and field == "value"
196                 and isinstance(value, (str, bytes))
197             ):
198                 lineend = "\n" if isinstance(value, str) else b"\n"
199                 # To normalize, we strip any leading and trailing space from
200                 # each line...
201                 stripped = [line.strip() for line in value.splitlines()]
202                 normalized = lineend.join(stripped)  # type: ignore[attr-defined]
203                 # ...and remove any blank lines at the beginning and end of
204                 # the whole string
205                 normalized = normalized.strip()
206             else:
207                 normalized = value
208             yield f"{'  ' * (depth+2)}{normalized!r},  # {value.__class__.__name__}"
209
210     yield f"{'  ' * depth})  # /{node.__class__.__name__}"
211
212
213 def fixup_ast_constants(
214     node: Union[ast.AST, ast3.AST, ast27.AST]
215 ) -> Union[ast.AST, ast3.AST, ast27.AST]:
216     """Map ast nodes deprecated in 3.8 to Constant."""
217     if isinstance(node, (ast.Str, ast3.Str, ast27.Str, ast.Bytes, ast3.Bytes)):
218         return ast.Constant(value=node.s)
219
220     if isinstance(node, (ast.Num, ast3.Num, ast27.Num)):
221         return ast.Constant(value=node.n)
222
223     if isinstance(node, (ast.NameConstant, ast3.NameConstant)):
224         return ast.Constant(value=node.value)
225
226     return node