All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
3 from dataclasses import dataclass
4 from functools import lru_cache
5 from typing import Iterator, List, Optional, Union
7 if sys.version_info >= (3, 8):
8 from typing import Final
10 from typing_extensions import Final
12 from black.nodes import (
21 from blib2to3.pgen2 import token
22 from blib2to3.pytree import Leaf, Node
25 LN = Union[Leaf, Node]
27 FMT_OFF: Final = {"# fmt: off", "# fmt:off", "# yapf: disable"}
28 FMT_SKIP: Final = {"# fmt: skip", "# fmt:skip"}
29 FMT_PASS: Final = {*FMT_OFF, *FMT_SKIP}
30 FMT_ON: Final = {"# fmt: on", "# fmt:on", "# yapf: enable"}
32 COMMENT_EXCEPTIONS = {True: " !:#'", False: " !:#'%"}
37 """Describes a piece of syntax that is a comment.
39 It's not a :class:`blib2to3.pytree.Leaf` so that:
41 * it can be cached (`Leaf` objects should not be reused more than once as
42 they store their lineno, column, prefix, and parent information);
43 * `newlines` and `consumed` fields are kept separate from the `value`. This
44 simplifies handling of special marker comments like ``# fmt: off/on``.
47 type: int # token.COMMENT or STANDALONE_COMMENT
48 value: str # content of the comment
49 newlines: int # how many newlines before the comment
50 consumed: int # how many characters of the original leaf's prefix did we consume
53 def generate_comments(leaf: LN, *, preview: bool) -> Iterator[Leaf]:
54 """Clean the prefix of the `leaf` and generate comments from it, if any.
56 Comments in lib2to3 are shoved into the whitespace prefix. This happens
57 in `pgen2/driver.py:Driver.parse_tokens()`. This was a brilliant implementation
58 move because it does away with modifying the grammar to include all the
59 possible places in which comments can be placed.
61 The sad consequence for us though is that comments don't "belong" anywhere.
62 This is why this function generates simple parentless Leaf objects for
63 comments. We simply don't know what the correct parent should be.
65 No matter though, we can live without this. We really only need to
66 differentiate between inline and standalone comments. The latter don't
67 share the line with any code.
69 Inline comments are emitted as regular token.COMMENT leaves. Standalone
70 are emitted with a fake STANDALONE_COMMENT token identifier.
72 for pc in list_comments(
73 leaf.prefix, is_endmarker=leaf.type == token.ENDMARKER, preview=preview
75 yield Leaf(pc.type, pc.value, prefix="\n" * pc.newlines)
78 @lru_cache(maxsize=4096)
80 prefix: str, *, is_endmarker: bool, preview: bool
81 ) -> List[ProtoComment]:
82 """Return a list of :class:`ProtoComment` objects parsed from the given `prefix`."""
83 result: List[ProtoComment] = []
84 if not prefix or "#" not in prefix:
90 for index, line in enumerate(re.split("\r?\n", prefix)):
91 consumed += len(line) + 1 # adding the length of the split '\n'
95 if not line.startswith("#"):
96 # Escaped newlines outside of a comment are not really newlines at
97 # all. We treat a single-line comment following an escaped newline
98 # as a simple trailing comment.
99 if line.endswith("\\"):
103 if index == ignored_lines and not is_endmarker:
104 comment_type = token.COMMENT # simple trailing comment
106 comment_type = STANDALONE_COMMENT
107 comment = make_comment(line, preview=preview)
110 type=comment_type, value=comment, newlines=nlines, consumed=consumed
117 def make_comment(content: str, *, preview: bool) -> str:
118 """Return a consistently formatted comment from the given `content` string.
120 All comments (except for "##", "#!", "#:", '#'") should have a single
121 space between the hash sign and the content.
123 If `content` didn't start with a hash sign, one is provided.
125 content = content.rstrip()
129 if content[0] == "#":
130 content = content[1:]
131 NON_BREAKING_SPACE = " "
134 and content[0] == NON_BREAKING_SPACE
135 and not content.lstrip().startswith("type:")
137 content = " " + content[1:] # Replace NBSP by a simple space
138 if content and content[0] not in COMMENT_EXCEPTIONS[preview]:
139 content = " " + content
143 def normalize_fmt_off(node: Node, *, preview: bool) -> None:
144 """Convert content between `# fmt: off`/`# fmt: on` into standalone comments."""
147 try_again = convert_one_fmt_off_pair(node, preview=preview)
150 def convert_one_fmt_off_pair(node: Node, *, preview: bool) -> bool:
151 """Convert content of a single `# fmt: off`/`# fmt: on` into a standalone comment.
153 Returns True if a pair was converted.
155 for leaf in node.leaves():
156 previous_consumed = 0
157 for comment in list_comments(leaf.prefix, is_endmarker=False, preview=preview):
158 if comment.value not in FMT_PASS:
159 previous_consumed = comment.consumed
161 # We only want standalone comments. If there's no previous leaf or
162 # the previous leaf is indentation, it's a standalone comment in
164 if comment.value in FMT_PASS and comment.type != STANDALONE_COMMENT:
165 prev = preceding_leaf(leaf)
167 if comment.value in FMT_OFF and prev.type not in WHITESPACE:
169 if comment.value in FMT_SKIP and prev.type in WHITESPACE:
172 ignored_nodes = list(generate_ignored_nodes(leaf, comment, preview=preview))
173 if not ignored_nodes:
176 first = ignored_nodes[0] # Can be a container node with the `leaf`.
177 parent = first.parent
178 prefix = first.prefix
179 if comment.value in FMT_OFF:
180 first.prefix = prefix[comment.consumed :]
181 if comment.value in FMT_SKIP:
183 standalone_comment_prefix = prefix
185 standalone_comment_prefix = (
186 prefix[:previous_consumed] + "\n" * comment.newlines
188 hidden_value = "".join(str(n) for n in ignored_nodes)
189 if comment.value in FMT_OFF:
190 hidden_value = comment.value + "\n" + hidden_value
191 if comment.value in FMT_SKIP:
192 hidden_value += " " + comment.value
193 if hidden_value.endswith("\n"):
194 # That happens when one of the `ignored_nodes` ended with a NEWLINE
195 # leaf (possibly followed by a DEDENT).
196 hidden_value = hidden_value[:-1]
197 first_idx: Optional[int] = None
198 for ignored in ignored_nodes:
199 index = ignored.remove()
200 if first_idx is None:
202 assert parent is not None, "INTERNAL ERROR: fmt: on/off handling (1)"
203 assert first_idx is not None, "INTERNAL ERROR: fmt: on/off handling (2)"
209 prefix=standalone_comment_prefix,
217 def generate_ignored_nodes(
218 leaf: Leaf, comment: ProtoComment, *, preview: bool
220 """Starting from the container of `leaf`, generate all leaves until `# fmt: on`.
222 If comment is skip, returns leaf only.
223 Stops at the end of the block.
225 if comment.value in FMT_SKIP:
226 yield from _generate_ignored_nodes_from_fmt_skip(leaf, comment, preview=preview)
228 container: Optional[LN] = container_of(leaf)
229 while container is not None and container.type != token.ENDMARKER:
230 if is_fmt_on(container, preview=preview):
233 # fix for fmt: on in children
234 if children_contains_fmt_on(container, preview=preview):
235 for index, child in enumerate(container.children):
236 if isinstance(child, Leaf) and is_fmt_on(child, preview=preview):
237 if child.type in CLOSING_BRACKETS:
238 # This means `# fmt: on` is placed at a different bracket level
239 # than `# fmt: off`. This is an invalid use, but as a courtesy,
240 # we include this closing bracket in the ignored nodes.
241 # The alternative is to fail the formatting.
245 child.type == token.INDENT
246 and index < len(container.children) - 1
247 and children_contains_fmt_on(
248 container.children[index + 1], preview=preview
251 # This means `# fmt: on` is placed right after an indentation
252 # level, and we shouldn't swallow the previous INDENT token.
254 if children_contains_fmt_on(child, preview=preview):
258 if container.type == token.DEDENT and container.next_sibling is None:
259 # This can happen when there is no matching `# fmt: on` comment at the
260 # same level as `# fmt: on`. We need to keep this DEDENT.
263 container = container.next_sibling
266 def _generate_ignored_nodes_from_fmt_skip(
267 leaf: Leaf, comment: ProtoComment, *, preview: bool
269 """Generate all leaves that should be ignored by the `# fmt: skip` from `leaf`."""
270 prev_sibling = leaf.prev_sibling
272 # Need to properly format the leaf prefix to compare it to comment.value,
273 # which is also formatted
274 comments = list_comments(leaf.prefix, is_endmarker=False, preview=preview)
275 if not comments or comment.value != comments[0].value:
277 if prev_sibling is not None:
279 siblings = [prev_sibling]
280 while "\n" not in prev_sibling.prefix and prev_sibling.prev_sibling is not None:
281 prev_sibling = prev_sibling.prev_sibling
282 siblings.insert(0, prev_sibling)
285 parent is not None and parent.type == syms.suite and leaf.type == token.NEWLINE
287 # The `# fmt: skip` is on the colon line of the if/while/def/class/...
288 # statements. The ignored nodes should be previous siblings of the
291 ignored_nodes: List[LN] = []
292 parent_sibling = parent.prev_sibling
293 while parent_sibling is not None and parent_sibling.type != syms.suite:
294 ignored_nodes.insert(0, parent_sibling)
295 parent_sibling = parent_sibling.prev_sibling
296 # Special case for `async_stmt` where the ASYNC token is on the
298 grandparent = parent.parent
300 grandparent is not None
301 and grandparent.prev_sibling is not None
302 and grandparent.prev_sibling.type == token.ASYNC
304 ignored_nodes.insert(0, grandparent.prev_sibling)
305 yield from iter(ignored_nodes)
308 def is_fmt_on(container: LN, preview: bool) -> bool:
309 """Determine whether formatting is switched on within a container.
310 Determined by whether the last `# fmt:` comment is `on` or `off`.
313 for comment in list_comments(container.prefix, is_endmarker=False, preview=preview):
314 if comment.value in FMT_ON:
316 elif comment.value in FMT_OFF:
321 def children_contains_fmt_on(container: LN, *, preview: bool) -> bool:
322 """Determine if children have formatting switched on."""
323 for child in container.children:
324 leaf = first_leaf_of(child)
325 if leaf is not None and is_fmt_on(leaf, preview=preview):
331 def contains_pragma_comment(comment_list: List[Leaf]) -> bool:
334 True iff one of the comments in @comment_list is a pragma used by one
335 of the more common static analysis tools for python (e.g. mypy, flake8,
338 for comment in comment_list:
339 if comment.value.startswith(("# type:", "# noqa", "# pylint:")):