All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
2 from dataclasses import dataclass
3 from functools import lru_cache
5 from typing import Iterator, List, Optional, Union
7 if sys.version_info >= (3, 8):
8 from typing import Final
10 from typing_extensions import Final
12 from blib2to3.pytree import Node, Leaf, type_repr
13 from blib2to3.pgen2 import token
15 from black.nodes import first_leaf_column, preceding_leaf, container_of
16 from black.nodes import CLOSING_BRACKETS, STANDALONE_COMMENT, WHITESPACE
19 LN = Union[Leaf, Node]
21 FMT_OFF: Final = {"# fmt: off", "# fmt:off", "# yapf: disable"}
22 FMT_SKIP: Final = {"# fmt: skip", "# fmt:skip"}
23 FMT_PASS: Final = {*FMT_OFF, *FMT_SKIP}
24 FMT_ON: Final = {"# fmt: on", "# fmt:on", "# yapf: enable"}
26 COMMENT_EXCEPTIONS = {True: " !:#'", False: " !:#'%"}
31 """Describes a piece of syntax that is a comment.
33 It's not a :class:`blib2to3.pytree.Leaf` so that:
35 * it can be cached (`Leaf` objects should not be reused more than once as
36 they store their lineno, column, prefix, and parent information);
37 * `newlines` and `consumed` fields are kept separate from the `value`. This
38 simplifies handling of special marker comments like ``# fmt: off/on``.
41 type: int # token.COMMENT or STANDALONE_COMMENT
42 value: str # content of the comment
43 newlines: int # how many newlines before the comment
44 consumed: int # how many characters of the original leaf's prefix did we consume
47 def generate_comments(leaf: LN, *, preview: bool) -> Iterator[Leaf]:
48 """Clean the prefix of the `leaf` and generate comments from it, if any.
50 Comments in lib2to3 are shoved into the whitespace prefix. This happens
51 in `pgen2/driver.py:Driver.parse_tokens()`. This was a brilliant implementation
52 move because it does away with modifying the grammar to include all the
53 possible places in which comments can be placed.
55 The sad consequence for us though is that comments don't "belong" anywhere.
56 This is why this function generates simple parentless Leaf objects for
57 comments. We simply don't know what the correct parent should be.
59 No matter though, we can live without this. We really only need to
60 differentiate between inline and standalone comments. The latter don't
61 share the line with any code.
63 Inline comments are emitted as regular token.COMMENT leaves. Standalone
64 are emitted with a fake STANDALONE_COMMENT token identifier.
66 for pc in list_comments(
67 leaf.prefix, is_endmarker=leaf.type == token.ENDMARKER, preview=preview
69 yield Leaf(pc.type, pc.value, prefix="\n" * pc.newlines)
72 @lru_cache(maxsize=4096)
74 prefix: str, *, is_endmarker: bool, preview: bool
75 ) -> List[ProtoComment]:
76 """Return a list of :class:`ProtoComment` objects parsed from the given `prefix`."""
77 result: List[ProtoComment] = []
78 if not prefix or "#" not in prefix:
84 for index, line in enumerate(re.split("\r?\n", prefix)):
85 consumed += len(line) + 1 # adding the length of the split '\n'
89 if not line.startswith("#"):
90 # Escaped newlines outside of a comment are not really newlines at
91 # all. We treat a single-line comment following an escaped newline
92 # as a simple trailing comment.
93 if line.endswith("\\"):
97 if index == ignored_lines and not is_endmarker:
98 comment_type = token.COMMENT # simple trailing comment
100 comment_type = STANDALONE_COMMENT
101 comment = make_comment(line, preview=preview)
104 type=comment_type, value=comment, newlines=nlines, consumed=consumed
111 def make_comment(content: str, *, preview: bool) -> str:
112 """Return a consistently formatted comment from the given `content` string.
114 All comments (except for "##", "#!", "#:", '#'") should have a single
115 space between the hash sign and the content.
117 If `content` didn't start with a hash sign, one is provided.
119 content = content.rstrip()
123 if content[0] == "#":
124 content = content[1:]
125 NON_BREAKING_SPACE = " "
128 and content[0] == NON_BREAKING_SPACE
129 and not content.lstrip().startswith("type:")
131 content = " " + content[1:] # Replace NBSP by a simple space
132 if content and content[0] not in COMMENT_EXCEPTIONS[preview]:
133 content = " " + content
137 def normalize_fmt_off(node: Node, *, preview: bool) -> None:
138 """Convert content between `# fmt: off`/`# fmt: on` into standalone comments."""
141 try_again = convert_one_fmt_off_pair(node, preview=preview)
144 def convert_one_fmt_off_pair(node: Node, *, preview: bool) -> bool:
145 """Convert content of a single `# fmt: off`/`# fmt: on` into a standalone comment.
147 Returns True if a pair was converted.
149 for leaf in node.leaves():
150 previous_consumed = 0
151 for comment in list_comments(leaf.prefix, is_endmarker=False, preview=preview):
152 if comment.value not in FMT_PASS:
153 previous_consumed = comment.consumed
155 # We only want standalone comments. If there's no previous leaf or
156 # the previous leaf is indentation, it's a standalone comment in
158 if comment.value in FMT_PASS and comment.type != STANDALONE_COMMENT:
159 prev = preceding_leaf(leaf)
161 if comment.value in FMT_OFF and prev.type not in WHITESPACE:
163 if comment.value in FMT_SKIP and prev.type in WHITESPACE:
166 ignored_nodes = list(generate_ignored_nodes(leaf, comment, preview=preview))
167 if not ignored_nodes:
170 first = ignored_nodes[0] # Can be a container node with the `leaf`.
171 parent = first.parent
172 prefix = first.prefix
173 if comment.value in FMT_OFF:
174 first.prefix = prefix[comment.consumed :]
175 if comment.value in FMT_SKIP:
177 standalone_comment_prefix = prefix
179 standalone_comment_prefix = (
180 prefix[:previous_consumed] + "\n" * comment.newlines
182 hidden_value = "".join(str(n) for n in ignored_nodes)
183 if comment.value in FMT_OFF:
184 hidden_value = comment.value + "\n" + hidden_value
185 if comment.value in FMT_SKIP:
186 hidden_value += " " + comment.value
187 if hidden_value.endswith("\n"):
188 # That happens when one of the `ignored_nodes` ended with a NEWLINE
189 # leaf (possibly followed by a DEDENT).
190 hidden_value = hidden_value[:-1]
191 first_idx: Optional[int] = None
192 for ignored in ignored_nodes:
193 index = ignored.remove()
194 if first_idx is None:
196 assert parent is not None, "INTERNAL ERROR: fmt: on/off handling (1)"
197 assert first_idx is not None, "INTERNAL ERROR: fmt: on/off handling (2)"
203 prefix=standalone_comment_prefix,
211 def generate_ignored_nodes(
212 leaf: Leaf, comment: ProtoComment, *, preview: bool
214 """Starting from the container of `leaf`, generate all leaves until `# fmt: on`.
216 If comment is skip, returns leaf only.
217 Stops at the end of the block.
219 if comment.value in FMT_SKIP:
220 yield from _generate_ignored_nodes_from_fmt_skip(leaf, comment, preview=preview)
222 container: Optional[LN] = container_of(leaf)
223 while container is not None and container.type != token.ENDMARKER:
224 if is_fmt_on(container, preview=preview):
227 # fix for fmt: on in children
228 if contains_fmt_on_at_column(container, leaf.column, preview=preview):
229 for child in container.children:
230 if isinstance(child, Leaf) and is_fmt_on(child, preview=preview):
231 if child.type in CLOSING_BRACKETS:
232 # This means `# fmt: on` is placed at a different bracket level
233 # than `# fmt: off`. This is an invalid use, but as a courtesy,
234 # we include this closing bracket in the ignored nodes.
235 # The alternative is to fail the formatting.
238 if contains_fmt_on_at_column(child, leaf.column, preview=preview):
243 container = container.next_sibling
246 def _generate_ignored_nodes_from_fmt_skip(
247 leaf: Leaf, comment: ProtoComment, *, preview: bool
249 """Generate all leaves that should be ignored by the `# fmt: skip` from `leaf`."""
250 prev_sibling = leaf.prev_sibling
252 # Need to properly format the leaf prefix to compare it to comment.value,
253 # which is also formatted
254 comments = list_comments(leaf.prefix, is_endmarker=False, preview=preview)
255 if not comments or comment.value != comments[0].value:
257 if prev_sibling is not None:
259 siblings = [prev_sibling]
260 while "\n" not in prev_sibling.prefix and prev_sibling.prev_sibling is not None:
261 prev_sibling = prev_sibling.prev_sibling
262 siblings.insert(0, prev_sibling)
263 for sibling in siblings:
267 and type_repr(parent.type) == "suite"
268 and leaf.type == token.NEWLINE
270 # The `# fmt: skip` is on the colon line of the if/while/def/class/...
271 # statements. The ignored nodes should be previous siblings of the
274 ignored_nodes: List[LN] = []
275 parent_sibling = parent.prev_sibling
276 while parent_sibling is not None and type_repr(parent_sibling.type) != "suite":
277 ignored_nodes.insert(0, parent_sibling)
278 parent_sibling = parent_sibling.prev_sibling
279 # Special case for `async_stmt` where the ASYNC token is on the
281 grandparent = parent.parent
283 grandparent is not None
284 and grandparent.prev_sibling is not None
285 and grandparent.prev_sibling.type == token.ASYNC
287 ignored_nodes.insert(0, grandparent.prev_sibling)
288 yield from iter(ignored_nodes)
291 def is_fmt_on(container: LN, preview: bool) -> bool:
292 """Determine whether formatting is switched on within a container.
293 Determined by whether the last `# fmt:` comment is `on` or `off`.
296 for comment in list_comments(container.prefix, is_endmarker=False, preview=preview):
297 if comment.value in FMT_ON:
299 elif comment.value in FMT_OFF:
304 def contains_fmt_on_at_column(container: LN, column: int, *, preview: bool) -> bool:
305 """Determine if children at a given column have formatting switched on."""
306 for child in container.children:
308 isinstance(child, Node)
309 and first_leaf_column(child) == column
310 or isinstance(child, Leaf)
311 and child.column == column
313 if is_fmt_on(child, preview=preview):
319 def contains_pragma_comment(comment_list: List[Leaf]) -> bool:
322 True iff one of the comments in @comment_list is a pragma used by one
323 of the more common static analysis tools for python (e.g. mypy, flake8,
326 for comment in comment_list:
327 if comment.value.startswith(("# type:", "# noqa", "# pylint:")):