+import re
import sys
from dataclasses import dataclass
from functools import lru_cache
-import regex as re
from typing import Iterator, List, Optional, Union
if sys.version_info >= (3, 8):
else:
from typing_extensions import Final
-from blib2to3.pytree import Node, Leaf
+from black.nodes import (
+ CLOSING_BRACKETS,
+ STANDALONE_COMMENT,
+ WHITESPACE,
+ container_of,
+ first_leaf_column,
+ preceding_leaf,
+)
from blib2to3.pgen2 import token
-
-from black.nodes import first_leaf_column, preceding_leaf, container_of
-from black.nodes import STANDALONE_COMMENT, WHITESPACE
+from blib2to3.pytree import Leaf, Node, type_repr
# types
LN = Union[Leaf, Node]
FMT_PASS: Final = {*FMT_OFF, *FMT_SKIP}
FMT_ON: Final = {"# fmt: on", "# fmt:on", "# yapf: enable"}
+COMMENT_EXCEPTIONS = {True: " !:#'", False: " !:#'%"}
+
@dataclass
class ProtoComment:
consumed: int # how many characters of the original leaf's prefix did we consume
-def generate_comments(leaf: LN) -> Iterator[Leaf]:
+def generate_comments(leaf: LN, *, preview: bool) -> Iterator[Leaf]:
"""Clean the prefix of the `leaf` and generate comments from it, if any.
Comments in lib2to3 are shoved into the whitespace prefix. This happens
Inline comments are emitted as regular token.COMMENT leaves. Standalone
are emitted with a fake STANDALONE_COMMENT token identifier.
"""
- for pc in list_comments(leaf.prefix, is_endmarker=leaf.type == token.ENDMARKER):
+ for pc in list_comments(
+ leaf.prefix, is_endmarker=leaf.type == token.ENDMARKER, preview=preview
+ ):
yield Leaf(pc.type, pc.value, prefix="\n" * pc.newlines)
@lru_cache(maxsize=4096)
-def list_comments(prefix: str, *, is_endmarker: bool) -> List[ProtoComment]:
+def list_comments(
+ prefix: str, *, is_endmarker: bool, preview: bool
+) -> List[ProtoComment]:
"""Return a list of :class:`ProtoComment` objects parsed from the given `prefix`."""
result: List[ProtoComment] = []
if not prefix or "#" not in prefix:
comment_type = token.COMMENT # simple trailing comment
else:
comment_type = STANDALONE_COMMENT
- comment = make_comment(line)
+ comment = make_comment(line, preview=preview)
result.append(
ProtoComment(
type=comment_type, value=comment, newlines=nlines, consumed=consumed
return result
-def make_comment(content: str) -> str:
+def make_comment(content: str, *, preview: bool) -> str:
"""Return a consistently formatted comment from the given `content` string.
- All comments (except for "##", "#!", "#:", '#'", "#%%") should have a single
+ All comments (except for "##", "#!", "#:", '#'") should have a single
space between the hash sign and the content.
If `content` didn't start with a hash sign, one is provided.
and not content.lstrip().startswith("type:")
):
content = " " + content[1:] # Replace NBSP by a simple space
- if content and content[0] not in " !:#'%":
+ if content and content[0] not in COMMENT_EXCEPTIONS[preview]:
content = " " + content
return "#" + content
-def normalize_fmt_off(node: Node) -> None:
+def normalize_fmt_off(node: Node, *, preview: bool) -> None:
"""Convert content between `# fmt: off`/`# fmt: on` into standalone comments."""
try_again = True
while try_again:
- try_again = convert_one_fmt_off_pair(node)
+ try_again = convert_one_fmt_off_pair(node, preview=preview)
-def convert_one_fmt_off_pair(node: Node) -> bool:
+def convert_one_fmt_off_pair(node: Node, *, preview: bool) -> bool:
"""Convert content of a single `# fmt: off`/`# fmt: on` into a standalone comment.
Returns True if a pair was converted.
"""
for leaf in node.leaves():
previous_consumed = 0
- for comment in list_comments(leaf.prefix, is_endmarker=False):
+ for comment in list_comments(leaf.prefix, is_endmarker=False, preview=preview):
if comment.value not in FMT_PASS:
previous_consumed = comment.consumed
continue
if comment.value in FMT_SKIP and prev.type in WHITESPACE:
continue
- ignored_nodes = list(generate_ignored_nodes(leaf, comment))
+ ignored_nodes = list(generate_ignored_nodes(leaf, comment, preview=preview))
if not ignored_nodes:
continue
first.prefix = prefix[comment.consumed :]
if comment.value in FMT_SKIP:
first.prefix = ""
+ standalone_comment_prefix = prefix
+ else:
+ standalone_comment_prefix = (
+ prefix[:previous_consumed] + "\n" * comment.newlines
+ )
hidden_value = "".join(str(n) for n in ignored_nodes)
if comment.value in FMT_OFF:
hidden_value = comment.value + "\n" + hidden_value
Leaf(
STANDALONE_COMMENT,
hidden_value,
- prefix=prefix[:previous_consumed] + "\n" * comment.newlines,
+ prefix=standalone_comment_prefix,
),
)
return True
return False
-def generate_ignored_nodes(leaf: Leaf, comment: ProtoComment) -> Iterator[LN]:
+def generate_ignored_nodes(
+ leaf: Leaf, comment: ProtoComment, *, preview: bool
+) -> Iterator[LN]:
"""Starting from the container of `leaf`, generate all leaves until `# fmt: on`.
If comment is skip, returns leaf only.
Stops at the end of the block.
"""
- container: Optional[LN] = container_of(leaf)
if comment.value in FMT_SKIP:
- prev_sibling = leaf.prev_sibling
- if comment.value in leaf.prefix and prev_sibling is not None:
- leaf.prefix = leaf.prefix.replace(comment.value, "")
- siblings = [prev_sibling]
- while (
- "\n" not in prev_sibling.prefix
- and prev_sibling.prev_sibling is not None
- ):
- prev_sibling = prev_sibling.prev_sibling
- siblings.insert(0, prev_sibling)
- for sibling in siblings:
- yield sibling
- elif leaf.parent is not None:
- yield leaf.parent
+ yield from _generate_ignored_nodes_from_fmt_skip(leaf, comment, preview=preview)
return
+ container: Optional[LN] = container_of(leaf)
while container is not None and container.type != token.ENDMARKER:
- if is_fmt_on(container):
+ if is_fmt_on(container, preview=preview):
return
# fix for fmt: on in children
- if contains_fmt_on_at_column(container, leaf.column):
+ if contains_fmt_on_at_column(container, leaf.column, preview=preview):
for child in container.children:
- if contains_fmt_on_at_column(child, leaf.column):
+ if isinstance(child, Leaf) and is_fmt_on(child, preview=preview):
+ if child.type in CLOSING_BRACKETS:
+ # This means `# fmt: on` is placed at a different bracket level
+ # than `# fmt: off`. This is an invalid use, but as a courtesy,
+ # we include this closing bracket in the ignored nodes.
+ # The alternative is to fail the formatting.
+ yield child
+ return
+ if contains_fmt_on_at_column(child, leaf.column, preview=preview):
return
yield child
else:
container = container.next_sibling
-def is_fmt_on(container: LN) -> bool:
+def _generate_ignored_nodes_from_fmt_skip(
+ leaf: Leaf, comment: ProtoComment, *, preview: bool
+) -> Iterator[LN]:
+ """Generate all leaves that should be ignored by the `# fmt: skip` from `leaf`."""
+ prev_sibling = leaf.prev_sibling
+ parent = leaf.parent
+ # Need to properly format the leaf prefix to compare it to comment.value,
+ # which is also formatted
+ comments = list_comments(leaf.prefix, is_endmarker=False, preview=preview)
+ if not comments or comment.value != comments[0].value:
+ return
+ if prev_sibling is not None:
+ leaf.prefix = ""
+ siblings = [prev_sibling]
+ while "\n" not in prev_sibling.prefix and prev_sibling.prev_sibling is not None:
+ prev_sibling = prev_sibling.prev_sibling
+ siblings.insert(0, prev_sibling)
+ for sibling in siblings:
+ yield sibling
+ elif (
+ parent is not None
+ and type_repr(parent.type) == "suite"
+ and leaf.type == token.NEWLINE
+ ):
+ # The `# fmt: skip` is on the colon line of the if/while/def/class/...
+ # statements. The ignored nodes should be previous siblings of the
+ # parent suite node.
+ leaf.prefix = ""
+ ignored_nodes: List[LN] = []
+ parent_sibling = parent.prev_sibling
+ while parent_sibling is not None and type_repr(parent_sibling.type) != "suite":
+ ignored_nodes.insert(0, parent_sibling)
+ parent_sibling = parent_sibling.prev_sibling
+ # Special case for `async_stmt` where the ASYNC token is on the
+ # grandparent node.
+ grandparent = parent.parent
+ if (
+ grandparent is not None
+ and grandparent.prev_sibling is not None
+ and grandparent.prev_sibling.type == token.ASYNC
+ ):
+ ignored_nodes.insert(0, grandparent.prev_sibling)
+ yield from iter(ignored_nodes)
+
+
+def is_fmt_on(container: LN, preview: bool) -> bool:
"""Determine whether formatting is switched on within a container.
Determined by whether the last `# fmt:` comment is `on` or `off`.
"""
fmt_on = False
- for comment in list_comments(container.prefix, is_endmarker=False):
+ for comment in list_comments(container.prefix, is_endmarker=False, preview=preview):
if comment.value in FMT_ON:
fmt_on = True
elif comment.value in FMT_OFF:
return fmt_on
-def contains_fmt_on_at_column(container: LN, column: int) -> bool:
+def contains_fmt_on_at_column(container: LN, column: int, *, preview: bool) -> bool:
"""Determine if children at a given column have formatting switched on."""
for child in container.children:
if (
or isinstance(child, Leaf)
and child.column == column
):
- if is_fmt_on(child):
+ if is_fmt_on(child, preview=preview):
return True
return False