All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
2 from dataclasses import dataclass
3 from functools import lru_cache
5 from typing import Iterator, List, Optional, Union
7 if sys.version_info >= (3, 8):
8 from typing import Final
10 from typing_extensions import Final
12 from blib2to3.pytree import Node, Leaf
13 from blib2to3.pgen2 import token
15 from black.nodes import first_leaf_column, preceding_leaf, container_of
16 from black.nodes import STANDALONE_COMMENT, WHITESPACE
19 LN = Union[Leaf, Node]
21 FMT_OFF: Final = {"# fmt: off", "# fmt:off", "# yapf: disable"}
22 FMT_SKIP: Final = {"# fmt: skip", "# fmt:skip"}
23 FMT_PASS: Final = {*FMT_OFF, *FMT_SKIP}
24 FMT_ON: Final = {"# fmt: on", "# fmt:on", "# yapf: enable"}
29 """Describes a piece of syntax that is a comment.
31 It's not a :class:`blib2to3.pytree.Leaf` so that:
33 * it can be cached (`Leaf` objects should not be reused more than once as
34 they store their lineno, column, prefix, and parent information);
35 * `newlines` and `consumed` fields are kept separate from the `value`. This
36 simplifies handling of special marker comments like ``# fmt: off/on``.
39 type: int # token.COMMENT or STANDALONE_COMMENT
40 value: str # content of the comment
41 newlines: int # how many newlines before the comment
42 consumed: int # how many characters of the original leaf's prefix did we consume
45 def generate_comments(leaf: LN) -> Iterator[Leaf]:
46 """Clean the prefix of the `leaf` and generate comments from it, if any.
48 Comments in lib2to3 are shoved into the whitespace prefix. This happens
49 in `pgen2/driver.py:Driver.parse_tokens()`. This was a brilliant implementation
50 move because it does away with modifying the grammar to include all the
51 possible places in which comments can be placed.
53 The sad consequence for us though is that comments don't "belong" anywhere.
54 This is why this function generates simple parentless Leaf objects for
55 comments. We simply don't know what the correct parent should be.
57 No matter though, we can live without this. We really only need to
58 differentiate between inline and standalone comments. The latter don't
59 share the line with any code.
61 Inline comments are emitted as regular token.COMMENT leaves. Standalone
62 are emitted with a fake STANDALONE_COMMENT token identifier.
64 for pc in list_comments(leaf.prefix, is_endmarker=leaf.type == token.ENDMARKER):
65 yield Leaf(pc.type, pc.value, prefix="\n" * pc.newlines)
68 @lru_cache(maxsize=4096)
69 def list_comments(prefix: str, *, is_endmarker: bool) -> List[ProtoComment]:
70 """Return a list of :class:`ProtoComment` objects parsed from the given `prefix`."""
71 result: List[ProtoComment] = []
72 if not prefix or "#" not in prefix:
78 for index, line in enumerate(re.split("\r?\n", prefix)):
79 consumed += len(line) + 1 # adding the length of the split '\n'
83 if not line.startswith("#"):
84 # Escaped newlines outside of a comment are not really newlines at
85 # all. We treat a single-line comment following an escaped newline
86 # as a simple trailing comment.
87 if line.endswith("\\"):
91 if index == ignored_lines and not is_endmarker:
92 comment_type = token.COMMENT # simple trailing comment
94 comment_type = STANDALONE_COMMENT
95 comment = make_comment(line)
98 type=comment_type, value=comment, newlines=nlines, consumed=consumed
105 def make_comment(content: str) -> str:
106 """Return a consistently formatted comment from the given `content` string.
108 All comments (except for "##", "#!", "#:", '#'", "#%%") should have a single
109 space between the hash sign and the content.
111 If `content` didn't start with a hash sign, one is provided.
113 content = content.rstrip()
117 if content[0] == "#":
118 content = content[1:]
119 NON_BREAKING_SPACE = " "
122 and content[0] == NON_BREAKING_SPACE
123 and not content.lstrip().startswith("type:")
125 content = " " + content[1:] # Replace NBSP by a simple space
126 if content and content[0] not in " !:#'%":
127 content = " " + content
131 def normalize_fmt_off(node: Node) -> None:
132 """Convert content between `# fmt: off`/`# fmt: on` into standalone comments."""
135 try_again = convert_one_fmt_off_pair(node)
138 def convert_one_fmt_off_pair(node: Node) -> bool:
139 """Convert content of a single `# fmt: off`/`# fmt: on` into a standalone comment.
141 Returns True if a pair was converted.
143 for leaf in node.leaves():
144 previous_consumed = 0
145 for comment in list_comments(leaf.prefix, is_endmarker=False):
146 if comment.value not in FMT_PASS:
147 previous_consumed = comment.consumed
149 # We only want standalone comments. If there's no previous leaf or
150 # the previous leaf is indentation, it's a standalone comment in
152 if comment.value in FMT_PASS and comment.type != STANDALONE_COMMENT:
153 prev = preceding_leaf(leaf)
155 if comment.value in FMT_OFF and prev.type not in WHITESPACE:
157 if comment.value in FMT_SKIP and prev.type in WHITESPACE:
160 ignored_nodes = list(generate_ignored_nodes(leaf, comment))
161 if not ignored_nodes:
164 first = ignored_nodes[0] # Can be a container node with the `leaf`.
165 parent = first.parent
166 prefix = first.prefix
167 if comment.value in FMT_OFF:
168 first.prefix = prefix[comment.consumed :]
169 if comment.value in FMT_SKIP:
171 hidden_value = "".join(str(n) for n in ignored_nodes)
172 if comment.value in FMT_OFF:
173 hidden_value = comment.value + "\n" + hidden_value
174 if comment.value in FMT_SKIP:
175 hidden_value += " " + comment.value
176 if hidden_value.endswith("\n"):
177 # That happens when one of the `ignored_nodes` ended with a NEWLINE
178 # leaf (possibly followed by a DEDENT).
179 hidden_value = hidden_value[:-1]
180 first_idx: Optional[int] = None
181 for ignored in ignored_nodes:
182 index = ignored.remove()
183 if first_idx is None:
185 assert parent is not None, "INTERNAL ERROR: fmt: on/off handling (1)"
186 assert first_idx is not None, "INTERNAL ERROR: fmt: on/off handling (2)"
192 prefix=prefix[:previous_consumed] + "\n" * comment.newlines,
200 def generate_ignored_nodes(leaf: Leaf, comment: ProtoComment) -> Iterator[LN]:
201 """Starting from the container of `leaf`, generate all leaves until `# fmt: on`.
203 If comment is skip, returns leaf only.
204 Stops at the end of the block.
206 container: Optional[LN] = container_of(leaf)
207 if comment.value in FMT_SKIP:
208 prev_sibling = leaf.prev_sibling
209 if comment.value in leaf.prefix and prev_sibling is not None:
210 leaf.prefix = leaf.prefix.replace(comment.value, "")
211 siblings = [prev_sibling]
213 "\n" not in prev_sibling.prefix
214 and prev_sibling.prev_sibling is not None
216 prev_sibling = prev_sibling.prev_sibling
217 siblings.insert(0, prev_sibling)
218 for sibling in siblings:
220 elif leaf.parent is not None:
223 while container is not None and container.type != token.ENDMARKER:
224 if is_fmt_on(container):
227 # fix for fmt: on in children
228 if contains_fmt_on_at_column(container, leaf.column):
229 for child in container.children:
230 if contains_fmt_on_at_column(child, leaf.column):
235 container = container.next_sibling
238 def is_fmt_on(container: LN) -> bool:
239 """Determine whether formatting is switched on within a container.
240 Determined by whether the last `# fmt:` comment is `on` or `off`.
243 for comment in list_comments(container.prefix, is_endmarker=False):
244 if comment.value in FMT_ON:
246 elif comment.value in FMT_OFF:
251 def contains_fmt_on_at_column(container: LN, column: int) -> bool:
252 """Determine if children at a given column have formatting switched on."""
253 for child in container.children:
255 isinstance(child, Node)
256 and first_leaf_column(child) == column
257 or isinstance(child, Leaf)
258 and child.column == column
266 def contains_pragma_comment(comment_list: List[Leaf]) -> bool:
269 True iff one of the comments in @comment_list is a pragma used by one
270 of the more common static analysis tools for python (e.g. mypy, flake8,
273 for comment in comment_list:
274 if comment.value.startswith(("# type:", "# noqa", "# pylint:")):