]> git.madduck.net Git - etc/vim.git/blob - src/black/parsing.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

Fix parser bug where "type" was misinterpreted as a keyword inside a match (#3950)
[etc/vim.git] / src / black / parsing.py
1 """
2 Parse Python code and perform AST validation.
3 """
4
5 import ast
6 import sys
7 from typing import Iterable, Iterator, List, Set, Tuple
8
9 from black.mode import VERSION_TO_FEATURES, Feature, TargetVersion, supports_feature
10 from black.nodes import syms
11 from blib2to3 import pygram
12 from blib2to3.pgen2 import driver
13 from blib2to3.pgen2.grammar import Grammar
14 from blib2to3.pgen2.parse import ParseError
15 from blib2to3.pgen2.tokenize import TokenError
16 from blib2to3.pytree import Leaf, Node
17
18
19 class InvalidInput(ValueError):
20     """Raised when input source code fails all parse attempts."""
21
22
23 def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]:
24     if not target_versions:
25         # No target_version specified, so try all grammars.
26         return [
27             # Python 3.7-3.9
28             pygram.python_grammar_async_keywords,
29             # Python 3.0-3.6
30             pygram.python_grammar,
31             # Python 3.10+
32             pygram.python_grammar_soft_keywords,
33         ]
34
35     grammars = []
36     # If we have to parse both, try to parse async as a keyword first
37     if not supports_feature(
38         target_versions, Feature.ASYNC_IDENTIFIERS
39     ) and not supports_feature(target_versions, Feature.PATTERN_MATCHING):
40         # Python 3.7-3.9
41         grammars.append(pygram.python_grammar_async_keywords)
42     if not supports_feature(target_versions, Feature.ASYNC_KEYWORDS):
43         # Python 3.0-3.6
44         grammars.append(pygram.python_grammar)
45     if any(Feature.PATTERN_MATCHING in VERSION_TO_FEATURES[v] for v in target_versions):
46         # Python 3.10+
47         grammars.append(pygram.python_grammar_soft_keywords)
48
49     # At least one of the above branches must have been taken, because every Python
50     # version has exactly one of the two 'ASYNC_*' flags
51     return grammars
52
53
54 def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -> Node:
55     """Given a string with source, return the lib2to3 Node."""
56     if not src_txt.endswith("\n"):
57         src_txt += "\n"
58
59     grammars = get_grammars(set(target_versions))
60     errors = {}
61     for grammar in grammars:
62         drv = driver.Driver(grammar)
63         try:
64             result = drv.parse_string(src_txt, True)
65             break
66
67         except ParseError as pe:
68             lineno, column = pe.context[1]
69             lines = src_txt.splitlines()
70             try:
71                 faulty_line = lines[lineno - 1]
72             except IndexError:
73                 faulty_line = "<line number missing in source>"
74             errors[grammar.version] = InvalidInput(
75                 f"Cannot parse: {lineno}:{column}: {faulty_line}"
76             )
77
78         except TokenError as te:
79             # In edge cases these are raised; and typically don't have a "faulty_line".
80             lineno, column = te.args[1]
81             errors[grammar.version] = InvalidInput(
82                 f"Cannot parse: {lineno}:{column}: {te.args[0]}"
83             )
84
85     else:
86         # Choose the latest version when raising the actual parsing error.
87         assert len(errors) >= 1
88         exc = errors[max(errors)]
89         raise exc from None
90
91     if isinstance(result, Leaf):
92         result = Node(syms.file_input, [result])
93     return result
94
95
96 def matches_grammar(src_txt: str, grammar: Grammar) -> bool:
97     drv = driver.Driver(grammar)
98     try:
99         drv.parse_string(src_txt, True)
100     except (ParseError, TokenError, IndentationError):
101         return False
102     else:
103         return True
104
105
106 def lib2to3_unparse(node: Node) -> str:
107     """Given a lib2to3 node, return its string representation."""
108     code = str(node)
109     return code
110
111
112 def parse_single_version(
113     src: str, version: Tuple[int, int], *, type_comments: bool
114 ) -> ast.AST:
115     filename = "<unknown>"
116     return ast.parse(
117         src, filename, feature_version=version, type_comments=type_comments
118     )
119
120
121 def parse_ast(src: str) -> ast.AST:
122     # TODO: support Python 4+ ;)
123     versions = [(3, minor) for minor in range(3, sys.version_info[1] + 1)]
124
125     first_error = ""
126     for version in sorted(versions, reverse=True):
127         try:
128             return parse_single_version(src, version, type_comments=True)
129         except SyntaxError as e:
130             if not first_error:
131                 first_error = str(e)
132
133     # Try to parse without type comments
134     for version in sorted(versions, reverse=True):
135         try:
136             return parse_single_version(src, version, type_comments=False)
137         except SyntaxError:
138             pass
139
140     raise SyntaxError(first_error)
141
142
143 def _normalize(lineend: str, value: str) -> str:
144     # To normalize, we strip any leading and trailing space from
145     # each line...
146     stripped: List[str] = [i.strip() for i in value.splitlines()]
147     normalized = lineend.join(stripped)
148     # ...and remove any blank lines at the beginning and end of
149     # the whole string
150     return normalized.strip()
151
152
153 def stringify_ast(node: ast.AST, depth: int = 0) -> Iterator[str]:
154     """Simple visitor generating strings to compare ASTs by content."""
155
156     if (
157         isinstance(node, ast.Constant)
158         and isinstance(node.value, str)
159         and node.kind == "u"
160     ):
161         # It's a quirk of history that we strip the u prefix over here. We used to
162         # rewrite the AST nodes for Python version compatibility and we never copied
163         # over the kind
164         node.kind = None
165
166     yield f"{'  ' * depth}{node.__class__.__name__}("
167
168     for field in sorted(node._fields):  # noqa: F402
169         # TypeIgnore has only one field 'lineno' which breaks this comparison
170         if isinstance(node, ast.TypeIgnore):
171             break
172
173         try:
174             value: object = getattr(node, field)
175         except AttributeError:
176             continue
177
178         yield f"{'  ' * (depth+1)}{field}="
179
180         if isinstance(value, list):
181             for item in value:
182                 # Ignore nested tuples within del statements, because we may insert
183                 # parentheses and they change the AST.
184                 if (
185                     field == "targets"
186                     and isinstance(node, ast.Delete)
187                     and isinstance(item, ast.Tuple)
188                 ):
189                     for elt in item.elts:
190                         yield from stringify_ast(elt, depth + 2)
191
192                 elif isinstance(item, ast.AST):
193                     yield from stringify_ast(item, depth + 2)
194
195         elif isinstance(value, ast.AST):
196             yield from stringify_ast(value, depth + 2)
197
198         else:
199             normalized: object
200             if (
201                 isinstance(node, ast.Constant)
202                 and field == "value"
203                 and isinstance(value, str)
204             ):
205                 # Constant strings may be indented across newlines, if they are
206                 # docstrings; fold spaces after newlines when comparing. Similarly,
207                 # trailing and leading space may be removed.
208                 normalized = _normalize("\n", value)
209             elif field == "type_comment" and isinstance(value, str):
210                 # Trailing whitespace in type comments is removed.
211                 normalized = value.rstrip()
212             else:
213                 normalized = value
214             yield f"{'  ' * (depth+2)}{normalized!r},  # {value.__class__.__name__}"
215
216     yield f"{'  ' * depth})  # /{node.__class__.__name__}"