All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
3 from dataclasses import dataclass
4 from functools import lru_cache
5 from typing import Iterator, List, Optional, Union
7 if sys.version_info >= (3, 8):
8 from typing import Final
10 from typing_extensions import Final
12 from black.nodes import (
20 from blib2to3.pgen2 import token
21 from blib2to3.pytree import Leaf, Node, type_repr
24 LN = Union[Leaf, Node]
26 FMT_OFF: Final = {"# fmt: off", "# fmt:off", "# yapf: disable"}
27 FMT_SKIP: Final = {"# fmt: skip", "# fmt:skip"}
28 FMT_PASS: Final = {*FMT_OFF, *FMT_SKIP}
29 FMT_ON: Final = {"# fmt: on", "# fmt:on", "# yapf: enable"}
31 COMMENT_EXCEPTIONS = {True: " !:#'", False: " !:#'%"}
36 """Describes a piece of syntax that is a comment.
38 It's not a :class:`blib2to3.pytree.Leaf` so that:
40 * it can be cached (`Leaf` objects should not be reused more than once as
41 they store their lineno, column, prefix, and parent information);
42 * `newlines` and `consumed` fields are kept separate from the `value`. This
43 simplifies handling of special marker comments like ``# fmt: off/on``.
46 type: int # token.COMMENT or STANDALONE_COMMENT
47 value: str # content of the comment
48 newlines: int # how many newlines before the comment
49 consumed: int # how many characters of the original leaf's prefix did we consume
52 def generate_comments(leaf: LN, *, preview: bool) -> Iterator[Leaf]:
53 """Clean the prefix of the `leaf` and generate comments from it, if any.
55 Comments in lib2to3 are shoved into the whitespace prefix. This happens
56 in `pgen2/driver.py:Driver.parse_tokens()`. This was a brilliant implementation
57 move because it does away with modifying the grammar to include all the
58 possible places in which comments can be placed.
60 The sad consequence for us though is that comments don't "belong" anywhere.
61 This is why this function generates simple parentless Leaf objects for
62 comments. We simply don't know what the correct parent should be.
64 No matter though, we can live without this. We really only need to
65 differentiate between inline and standalone comments. The latter don't
66 share the line with any code.
68 Inline comments are emitted as regular token.COMMENT leaves. Standalone
69 are emitted with a fake STANDALONE_COMMENT token identifier.
71 for pc in list_comments(
72 leaf.prefix, is_endmarker=leaf.type == token.ENDMARKER, preview=preview
74 yield Leaf(pc.type, pc.value, prefix="\n" * pc.newlines)
77 @lru_cache(maxsize=4096)
79 prefix: str, *, is_endmarker: bool, preview: bool
80 ) -> List[ProtoComment]:
81 """Return a list of :class:`ProtoComment` objects parsed from the given `prefix`."""
82 result: List[ProtoComment] = []
83 if not prefix or "#" not in prefix:
89 for index, line in enumerate(re.split("\r?\n", prefix)):
90 consumed += len(line) + 1 # adding the length of the split '\n'
94 if not line.startswith("#"):
95 # Escaped newlines outside of a comment are not really newlines at
96 # all. We treat a single-line comment following an escaped newline
97 # as a simple trailing comment.
98 if line.endswith("\\"):
102 if index == ignored_lines and not is_endmarker:
103 comment_type = token.COMMENT # simple trailing comment
105 comment_type = STANDALONE_COMMENT
106 comment = make_comment(line, preview=preview)
109 type=comment_type, value=comment, newlines=nlines, consumed=consumed
116 def make_comment(content: str, *, preview: bool) -> str:
117 """Return a consistently formatted comment from the given `content` string.
119 All comments (except for "##", "#!", "#:", '#'") should have a single
120 space between the hash sign and the content.
122 If `content` didn't start with a hash sign, one is provided.
124 content = content.rstrip()
128 if content[0] == "#":
129 content = content[1:]
130 NON_BREAKING_SPACE = " "
133 and content[0] == NON_BREAKING_SPACE
134 and not content.lstrip().startswith("type:")
136 content = " " + content[1:] # Replace NBSP by a simple space
137 if content and content[0] not in COMMENT_EXCEPTIONS[preview]:
138 content = " " + content
142 def normalize_fmt_off(node: Node, *, preview: bool) -> None:
143 """Convert content between `# fmt: off`/`# fmt: on` into standalone comments."""
146 try_again = convert_one_fmt_off_pair(node, preview=preview)
149 def convert_one_fmt_off_pair(node: Node, *, preview: bool) -> bool:
150 """Convert content of a single `# fmt: off`/`# fmt: on` into a standalone comment.
152 Returns True if a pair was converted.
154 for leaf in node.leaves():
155 previous_consumed = 0
156 for comment in list_comments(leaf.prefix, is_endmarker=False, preview=preview):
157 if comment.value not in FMT_PASS:
158 previous_consumed = comment.consumed
160 # We only want standalone comments. If there's no previous leaf or
161 # the previous leaf is indentation, it's a standalone comment in
163 if comment.value in FMT_PASS and comment.type != STANDALONE_COMMENT:
164 prev = preceding_leaf(leaf)
166 if comment.value in FMT_OFF and prev.type not in WHITESPACE:
168 if comment.value in FMT_SKIP and prev.type in WHITESPACE:
171 ignored_nodes = list(generate_ignored_nodes(leaf, comment, preview=preview))
172 if not ignored_nodes:
175 first = ignored_nodes[0] # Can be a container node with the `leaf`.
176 parent = first.parent
177 prefix = first.prefix
178 if comment.value in FMT_OFF:
179 first.prefix = prefix[comment.consumed :]
180 if comment.value in FMT_SKIP:
182 standalone_comment_prefix = prefix
184 standalone_comment_prefix = (
185 prefix[:previous_consumed] + "\n" * comment.newlines
187 hidden_value = "".join(str(n) for n in ignored_nodes)
188 if comment.value in FMT_OFF:
189 hidden_value = comment.value + "\n" + hidden_value
190 if comment.value in FMT_SKIP:
191 hidden_value += " " + comment.value
192 if hidden_value.endswith("\n"):
193 # That happens when one of the `ignored_nodes` ended with a NEWLINE
194 # leaf (possibly followed by a DEDENT).
195 hidden_value = hidden_value[:-1]
196 first_idx: Optional[int] = None
197 for ignored in ignored_nodes:
198 index = ignored.remove()
199 if first_idx is None:
201 assert parent is not None, "INTERNAL ERROR: fmt: on/off handling (1)"
202 assert first_idx is not None, "INTERNAL ERROR: fmt: on/off handling (2)"
208 prefix=standalone_comment_prefix,
216 def generate_ignored_nodes(
217 leaf: Leaf, comment: ProtoComment, *, preview: bool
219 """Starting from the container of `leaf`, generate all leaves until `# fmt: on`.
221 If comment is skip, returns leaf only.
222 Stops at the end of the block.
224 if comment.value in FMT_SKIP:
225 yield from _generate_ignored_nodes_from_fmt_skip(leaf, comment, preview=preview)
227 container: Optional[LN] = container_of(leaf)
228 while container is not None and container.type != token.ENDMARKER:
229 if is_fmt_on(container, preview=preview):
232 # fix for fmt: on in children
233 if contains_fmt_on_at_column(container, leaf.column, preview=preview):
234 for child in container.children:
235 if isinstance(child, Leaf) and is_fmt_on(child, preview=preview):
236 if child.type in CLOSING_BRACKETS:
237 # This means `# fmt: on` is placed at a different bracket level
238 # than `# fmt: off`. This is an invalid use, but as a courtesy,
239 # we include this closing bracket in the ignored nodes.
240 # The alternative is to fail the formatting.
243 if contains_fmt_on_at_column(child, leaf.column, preview=preview):
248 container = container.next_sibling
251 def _generate_ignored_nodes_from_fmt_skip(
252 leaf: Leaf, comment: ProtoComment, *, preview: bool
254 """Generate all leaves that should be ignored by the `# fmt: skip` from `leaf`."""
255 prev_sibling = leaf.prev_sibling
257 # Need to properly format the leaf prefix to compare it to comment.value,
258 # which is also formatted
259 comments = list_comments(leaf.prefix, is_endmarker=False, preview=preview)
260 if not comments or comment.value != comments[0].value:
262 if prev_sibling is not None:
264 siblings = [prev_sibling]
265 while "\n" not in prev_sibling.prefix and prev_sibling.prev_sibling is not None:
266 prev_sibling = prev_sibling.prev_sibling
267 siblings.insert(0, prev_sibling)
268 for sibling in siblings:
272 and type_repr(parent.type) == "suite"
273 and leaf.type == token.NEWLINE
275 # The `# fmt: skip` is on the colon line of the if/while/def/class/...
276 # statements. The ignored nodes should be previous siblings of the
279 ignored_nodes: List[LN] = []
280 parent_sibling = parent.prev_sibling
281 while parent_sibling is not None and type_repr(parent_sibling.type) != "suite":
282 ignored_nodes.insert(0, parent_sibling)
283 parent_sibling = parent_sibling.prev_sibling
284 # Special case for `async_stmt` where the ASYNC token is on the
286 grandparent = parent.parent
288 grandparent is not None
289 and grandparent.prev_sibling is not None
290 and grandparent.prev_sibling.type == token.ASYNC
292 ignored_nodes.insert(0, grandparent.prev_sibling)
293 yield from iter(ignored_nodes)
296 def is_fmt_on(container: LN, preview: bool) -> bool:
297 """Determine whether formatting is switched on within a container.
298 Determined by whether the last `# fmt:` comment is `on` or `off`.
301 for comment in list_comments(container.prefix, is_endmarker=False, preview=preview):
302 if comment.value in FMT_ON:
304 elif comment.value in FMT_OFF:
309 def contains_fmt_on_at_column(container: LN, column: int, *, preview: bool) -> bool:
310 """Determine if children at a given column have formatting switched on."""
311 for child in container.children:
313 isinstance(child, Node)
314 and first_leaf_column(child) == column
315 or isinstance(child, Leaf)
316 and child.column == column
318 if is_fmt_on(child, preview=preview):
324 def contains_pragma_comment(comment_list: List[Leaf]) -> bool:
327 True iff one of the comments in @comment_list is a pragma used by one
328 of the more common static analysis tools for python (e.g. mypy, flake8,
331 for comment in comment_list:
332 if comment.value.startswith(("# type:", "# noqa", "# pylint:")):