]> git.madduck.net Git - etc/vim.git/blob - src/black/parsing.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

Cover more in the usage docs (#2208)
[etc/vim.git] / src / black / parsing.py
1 """
2 Parse Python code and perform AST validation.
3 """
4 import ast
5 import sys
6 from typing import Iterable, Iterator, List, Set, Union
7
8 # lib2to3 fork
9 from blib2to3.pytree import Node, Leaf
10 from blib2to3 import pygram, pytree
11 from blib2to3.pgen2 import driver
12 from blib2to3.pgen2.grammar import Grammar
13 from blib2to3.pgen2.parse import ParseError
14
15 from black.mode import TargetVersion, Feature, supports_feature
16 from black.nodes import syms
17
18 try:
19     from typed_ast import ast3, ast27
20 except ImportError:
21     if sys.version_info < (3, 8):
22         print(
23             "The typed_ast package is required but not installed.\n"
24             "You can upgrade to Python 3.8+ or install typed_ast with\n"
25             "`python3 -m pip install typed-ast`.",
26             file=sys.stderr,
27         )
28         sys.exit(1)
29     else:
30         ast3 = ast27 = ast
31
32
33 class InvalidInput(ValueError):
34     """Raised when input source code fails all parse attempts."""
35
36
37 def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]:
38     if not target_versions:
39         # No target_version specified, so try all grammars.
40         return [
41             # Python 3.7+
42             pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords,
43             # Python 3.0-3.6
44             pygram.python_grammar_no_print_statement_no_exec_statement,
45             # Python 2.7 with future print_function import
46             pygram.python_grammar_no_print_statement,
47             # Python 2.7
48             pygram.python_grammar,
49         ]
50
51     if all(version.is_python2() for version in target_versions):
52         # Python 2-only code, so try Python 2 grammars.
53         return [
54             # Python 2.7 with future print_function import
55             pygram.python_grammar_no_print_statement,
56             # Python 2.7
57             pygram.python_grammar,
58         ]
59
60     # Python 3-compatible code, so only try Python 3 grammar.
61     grammars = []
62     # If we have to parse both, try to parse async as a keyword first
63     if not supports_feature(target_versions, Feature.ASYNC_IDENTIFIERS):
64         # Python 3.7+
65         grammars.append(
66             pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords
67         )
68     if not supports_feature(target_versions, Feature.ASYNC_KEYWORDS):
69         # Python 3.0-3.6
70         grammars.append(pygram.python_grammar_no_print_statement_no_exec_statement)
71     # At least one of the above branches must have been taken, because every Python
72     # version has exactly one of the two 'ASYNC_*' flags
73     return grammars
74
75
76 def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -> Node:
77     """Given a string with source, return the lib2to3 Node."""
78     if not src_txt.endswith("\n"):
79         src_txt += "\n"
80
81     for grammar in get_grammars(set(target_versions)):
82         drv = driver.Driver(grammar, pytree.convert)
83         try:
84             result = drv.parse_string(src_txt, True)
85             break
86
87         except ParseError as pe:
88             lineno, column = pe.context[1]
89             lines = src_txt.splitlines()
90             try:
91                 faulty_line = lines[lineno - 1]
92             except IndexError:
93                 faulty_line = "<line number missing in source>"
94             exc = InvalidInput(f"Cannot parse: {lineno}:{column}: {faulty_line}")
95     else:
96         raise exc from None
97
98     if isinstance(result, Leaf):
99         result = Node(syms.file_input, [result])
100     return result
101
102
103 def lib2to3_unparse(node: Node) -> str:
104     """Given a lib2to3 node, return its string representation."""
105     code = str(node)
106     return code
107
108
109 def parse_ast(src: str) -> Union[ast.AST, ast3.AST, ast27.AST]:
110     filename = "<unknown>"
111     if sys.version_info >= (3, 8):
112         # TODO: support Python 4+ ;)
113         for minor_version in range(sys.version_info[1], 4, -1):
114             try:
115                 return ast.parse(src, filename, feature_version=(3, minor_version))
116             except SyntaxError:
117                 continue
118     else:
119         for feature_version in (7, 6):
120             try:
121                 return ast3.parse(src, filename, feature_version=feature_version)
122             except SyntaxError:
123                 continue
124     if ast27.__name__ == "ast":
125         raise SyntaxError(
126             "The requested source code has invalid Python 3 syntax.\n"
127             "If you are trying to format Python 2 files please reinstall Black"
128             " with the 'python2' extra: `python3 -m pip install black[python2]`."
129         )
130     return ast27.parse(src)
131
132
133 def stringify_ast(
134     node: Union[ast.AST, ast3.AST, ast27.AST], depth: int = 0
135 ) -> Iterator[str]:
136     """Simple visitor generating strings to compare ASTs by content."""
137
138     node = fixup_ast_constants(node)
139
140     yield f"{'  ' * depth}{node.__class__.__name__}("
141
142     for field in sorted(node._fields):  # noqa: F402
143         # TypeIgnore has only one field 'lineno' which breaks this comparison
144         type_ignore_classes = (ast3.TypeIgnore, ast27.TypeIgnore)
145         if sys.version_info >= (3, 8):
146             type_ignore_classes += (ast.TypeIgnore,)
147         if isinstance(node, type_ignore_classes):
148             break
149
150         try:
151             value = getattr(node, field)
152         except AttributeError:
153             continue
154
155         yield f"{'  ' * (depth+1)}{field}="
156
157         if isinstance(value, list):
158             for item in value:
159                 # Ignore nested tuples within del statements, because we may insert
160                 # parentheses and they change the AST.
161                 if (
162                     field == "targets"
163                     and isinstance(node, (ast.Delete, ast3.Delete, ast27.Delete))
164                     and isinstance(item, (ast.Tuple, ast3.Tuple, ast27.Tuple))
165                 ):
166                     for item in item.elts:
167                         yield from stringify_ast(item, depth + 2)
168
169                 elif isinstance(item, (ast.AST, ast3.AST, ast27.AST)):
170                     yield from stringify_ast(item, depth + 2)
171
172         elif isinstance(value, (ast.AST, ast3.AST, ast27.AST)):
173             yield from stringify_ast(value, depth + 2)
174
175         else:
176             # Constant strings may be indented across newlines, if they are
177             # docstrings; fold spaces after newlines when comparing. Similarly,
178             # trailing and leading space may be removed.
179             # Note that when formatting Python 2 code, at least with Windows
180             # line-endings, docstrings can end up here as bytes instead of
181             # str so make sure that we handle both cases.
182             if (
183                 isinstance(node, ast.Constant)
184                 and field == "value"
185                 and isinstance(value, (str, bytes))
186             ):
187                 lineend = "\n" if isinstance(value, str) else b"\n"
188                 # To normalize, we strip any leading and trailing space from
189                 # each line...
190                 stripped = [line.strip() for line in value.splitlines()]
191                 normalized = lineend.join(stripped)  # type: ignore[attr-defined]
192                 # ...and remove any blank lines at the beginning and end of
193                 # the whole string
194                 normalized = normalized.strip()
195             else:
196                 normalized = value
197             yield f"{'  ' * (depth+2)}{normalized!r},  # {value.__class__.__name__}"
198
199     yield f"{'  ' * depth})  # /{node.__class__.__name__}"
200
201
202 def fixup_ast_constants(
203     node: Union[ast.AST, ast3.AST, ast27.AST]
204 ) -> Union[ast.AST, ast3.AST, ast27.AST]:
205     """Map ast nodes deprecated in 3.8 to Constant."""
206     if isinstance(node, (ast.Str, ast3.Str, ast27.Str, ast.Bytes, ast3.Bytes)):
207         return ast.Constant(value=node.s)
208
209     if isinstance(node, (ast.Num, ast3.Num, ast27.Num)):
210         return ast.Constant(value=node.n)
211
212     if isinstance(node, (ast.NameConstant, ast3.NameConstant)):
213         return ast.Constant(value=node.value)
214
215     return node