All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
1 from dataclasses import dataclass
2 from functools import lru_cache
4 from typing import Iterator, List, Optional, Union
6 from blib2to3.pytree import Node, Leaf
7 from blib2to3.pgen2 import token
9 from black.nodes import first_leaf_column, preceding_leaf, container_of
10 from black.nodes import STANDALONE_COMMENT, WHITESPACE
13 LN = Union[Leaf, Node]
16 FMT_OFF = {"# fmt: off", "# fmt:off", "# yapf: disable"}
17 FMT_SKIP = {"# fmt: skip", "# fmt:skip"}
18 FMT_PASS = {*FMT_OFF, *FMT_SKIP}
19 FMT_ON = {"# fmt: on", "# fmt:on", "# yapf: enable"}
24 """Describes a piece of syntax that is a comment.
26 It's not a :class:`blib2to3.pytree.Leaf` so that:
28 * it can be cached (`Leaf` objects should not be reused more than once as
29 they store their lineno, column, prefix, and parent information);
30 * `newlines` and `consumed` fields are kept separate from the `value`. This
31 simplifies handling of special marker comments like ``# fmt: off/on``.
34 type: int # token.COMMENT or STANDALONE_COMMENT
35 value: str # content of the comment
36 newlines: int # how many newlines before the comment
37 consumed: int # how many characters of the original leaf's prefix did we consume
40 def generate_comments(leaf: LN) -> Iterator[Leaf]:
41 """Clean the prefix of the `leaf` and generate comments from it, if any.
43 Comments in lib2to3 are shoved into the whitespace prefix. This happens
44 in `pgen2/driver.py:Driver.parse_tokens()`. This was a brilliant implementation
45 move because it does away with modifying the grammar to include all the
46 possible places in which comments can be placed.
48 The sad consequence for us though is that comments don't "belong" anywhere.
49 This is why this function generates simple parentless Leaf objects for
50 comments. We simply don't know what the correct parent should be.
52 No matter though, we can live without this. We really only need to
53 differentiate between inline and standalone comments. The latter don't
54 share the line with any code.
56 Inline comments are emitted as regular token.COMMENT leaves. Standalone
57 are emitted with a fake STANDALONE_COMMENT token identifier.
59 for pc in list_comments(leaf.prefix, is_endmarker=leaf.type == token.ENDMARKER):
60 yield Leaf(pc.type, pc.value, prefix="\n" * pc.newlines)
63 @lru_cache(maxsize=4096)
64 def list_comments(prefix: str, *, is_endmarker: bool) -> List[ProtoComment]:
65 """Return a list of :class:`ProtoComment` objects parsed from the given `prefix`."""
66 result: List[ProtoComment] = []
67 if not prefix or "#" not in prefix:
73 for index, line in enumerate(re.split("\r?\n", prefix)):
74 consumed += len(line) + 1 # adding the length of the split '\n'
78 if not line.startswith("#"):
79 # Escaped newlines outside of a comment are not really newlines at
80 # all. We treat a single-line comment following an escaped newline
81 # as a simple trailing comment.
82 if line.endswith("\\"):
86 if index == ignored_lines and not is_endmarker:
87 comment_type = token.COMMENT # simple trailing comment
89 comment_type = STANDALONE_COMMENT
90 comment = make_comment(line)
93 type=comment_type, value=comment, newlines=nlines, consumed=consumed
100 def make_comment(content: str) -> str:
101 """Return a consistently formatted comment from the given `content` string.
103 All comments (except for "##", "#!", "#:", '#'", "#%%") should have a single
104 space between the hash sign and the content.
106 If `content` didn't start with a hash sign, one is provided.
108 content = content.rstrip()
112 if content[0] == "#":
113 content = content[1:]
114 NON_BREAKING_SPACE = " "
117 and content[0] == NON_BREAKING_SPACE
118 and not content.lstrip().startswith("type:")
120 content = " " + content[1:] # Replace NBSP by a simple space
121 if content and content[0] not in " !:#'%":
122 content = " " + content
126 def normalize_fmt_off(node: Node) -> None:
127 """Convert content between `# fmt: off`/`# fmt: on` into standalone comments."""
130 try_again = convert_one_fmt_off_pair(node)
133 def convert_one_fmt_off_pair(node: Node) -> bool:
134 """Convert content of a single `# fmt: off`/`# fmt: on` into a standalone comment.
136 Returns True if a pair was converted.
138 for leaf in node.leaves():
139 previous_consumed = 0
140 for comment in list_comments(leaf.prefix, is_endmarker=False):
141 if comment.value not in FMT_PASS:
142 previous_consumed = comment.consumed
144 # We only want standalone comments. If there's no previous leaf or
145 # the previous leaf is indentation, it's a standalone comment in
147 if comment.value in FMT_PASS and comment.type != STANDALONE_COMMENT:
148 prev = preceding_leaf(leaf)
150 if comment.value in FMT_OFF and prev.type not in WHITESPACE:
152 if comment.value in FMT_SKIP and prev.type in WHITESPACE:
155 ignored_nodes = list(generate_ignored_nodes(leaf, comment))
156 if not ignored_nodes:
159 first = ignored_nodes[0] # Can be a container node with the `leaf`.
160 parent = first.parent
161 prefix = first.prefix
162 if comment.value in FMT_OFF:
163 first.prefix = prefix[comment.consumed :]
164 if comment.value in FMT_SKIP:
166 hidden_value = "".join(str(n) for n in ignored_nodes)
167 if comment.value in FMT_OFF:
168 hidden_value = comment.value + "\n" + hidden_value
169 if comment.value in FMT_SKIP:
170 hidden_value += " " + comment.value
171 if hidden_value.endswith("\n"):
172 # That happens when one of the `ignored_nodes` ended with a NEWLINE
173 # leaf (possibly followed by a DEDENT).
174 hidden_value = hidden_value[:-1]
175 first_idx: Optional[int] = None
176 for ignored in ignored_nodes:
177 index = ignored.remove()
178 if first_idx is None:
180 assert parent is not None, "INTERNAL ERROR: fmt: on/off handling (1)"
181 assert first_idx is not None, "INTERNAL ERROR: fmt: on/off handling (2)"
187 prefix=prefix[:previous_consumed] + "\n" * comment.newlines,
195 def generate_ignored_nodes(leaf: Leaf, comment: ProtoComment) -> Iterator[LN]:
196 """Starting from the container of `leaf`, generate all leaves until `# fmt: on`.
198 If comment is skip, returns leaf only.
199 Stops at the end of the block.
201 container: Optional[LN] = container_of(leaf)
202 if comment.value in FMT_SKIP:
203 prev_sibling = leaf.prev_sibling
204 if comment.value in leaf.prefix and prev_sibling is not None:
205 leaf.prefix = leaf.prefix.replace(comment.value, "")
206 siblings = [prev_sibling]
208 "\n" not in prev_sibling.prefix
209 and prev_sibling.prev_sibling is not None
211 prev_sibling = prev_sibling.prev_sibling
212 siblings.insert(0, prev_sibling)
213 for sibling in siblings:
215 elif leaf.parent is not None:
218 while container is not None and container.type != token.ENDMARKER:
219 if is_fmt_on(container):
222 # fix for fmt: on in children
223 if contains_fmt_on_at_column(container, leaf.column):
224 for child in container.children:
225 if contains_fmt_on_at_column(child, leaf.column):
230 container = container.next_sibling
233 def is_fmt_on(container: LN) -> bool:
234 """Determine whether formatting is switched on within a container.
235 Determined by whether the last `# fmt:` comment is `on` or `off`.
238 for comment in list_comments(container.prefix, is_endmarker=False):
239 if comment.value in FMT_ON:
241 elif comment.value in FMT_OFF:
246 def contains_fmt_on_at_column(container: LN, column: int) -> bool:
247 """Determine if children at a given column have formatting switched on."""
248 for child in container.children:
250 isinstance(child, Node)
251 and first_leaf_column(child) == column
252 or isinstance(child, Leaf)
253 and child.column == column
261 def contains_pragma_comment(comment_list: List[Leaf]) -> bool:
264 True iff one of the comments in @comment_list is a pragma used by one
265 of the more common static analysis tools for python (e.g. mypy, flake8,
268 for comment in comment_list:
269 if comment.value.startswith(("# type:", "# noqa", "# pylint:")):