+++ /dev/null
-import re
-from dataclasses import dataclass
-from functools import lru_cache
-from typing import Final, Iterator, List, Optional, Union
-
-from black.nodes import (
- CLOSING_BRACKETS,
- STANDALONE_COMMENT,
- WHITESPACE,
- container_of,
- first_leaf_of,
- preceding_leaf,
- syms,
-)
-from blib2to3.pgen2 import token
-from blib2to3.pytree import Leaf, Node
-
-# types
-LN = Union[Leaf, Node]
-
-FMT_OFF: Final = {"# fmt: off", "# fmt:off", "# yapf: disable"}
-FMT_SKIP: Final = {"# fmt: skip", "# fmt:skip"}
-FMT_PASS: Final = {*FMT_OFF, *FMT_SKIP}
-FMT_ON: Final = {"# fmt: on", "# fmt:on", "# yapf: enable"}
-
-COMMENT_EXCEPTIONS = " !:#'"
-
-
-@dataclass
-class ProtoComment:
- """Describes a piece of syntax that is a comment.
-
- It's not a :class:`blib2to3.pytree.Leaf` so that:
-
- * it can be cached (`Leaf` objects should not be reused more than once as
- they store their lineno, column, prefix, and parent information);
- * `newlines` and `consumed` fields are kept separate from the `value`. This
- simplifies handling of special marker comments like ``# fmt: off/on``.
- """
-
- type: int # token.COMMENT or STANDALONE_COMMENT
- value: str # content of the comment
- newlines: int # how many newlines before the comment
- consumed: int # how many characters of the original leaf's prefix did we consume
-
-
-def generate_comments(leaf: LN) -> Iterator[Leaf]:
- """Clean the prefix of the `leaf` and generate comments from it, if any.
-
- Comments in lib2to3 are shoved into the whitespace prefix. This happens
- in `pgen2/driver.py:Driver.parse_tokens()`. This was a brilliant implementation
- move because it does away with modifying the grammar to include all the
- possible places in which comments can be placed.
-
- The sad consequence for us though is that comments don't "belong" anywhere.
- This is why this function generates simple parentless Leaf objects for
- comments. We simply don't know what the correct parent should be.
-
- No matter though, we can live without this. We really only need to
- differentiate between inline and standalone comments. The latter don't
- share the line with any code.
-
- Inline comments are emitted as regular token.COMMENT leaves. Standalone
- are emitted with a fake STANDALONE_COMMENT token identifier.
- """
- for pc in list_comments(leaf.prefix, is_endmarker=leaf.type == token.ENDMARKER):
- yield Leaf(pc.type, pc.value, prefix="\n" * pc.newlines)
-
-
-@lru_cache(maxsize=4096)
-def list_comments(prefix: str, *, is_endmarker: bool) -> List[ProtoComment]:
- """Return a list of :class:`ProtoComment` objects parsed from the given `prefix`."""
- result: List[ProtoComment] = []
- if not prefix or "#" not in prefix:
- return result
-
- consumed = 0
- nlines = 0
- ignored_lines = 0
- for index, line in enumerate(re.split("\r?\n", prefix)):
- consumed += len(line) + 1 # adding the length of the split '\n'
- line = line.lstrip()
- if not line:
- nlines += 1
- if not line.startswith("#"):
- # Escaped newlines outside of a comment are not really newlines at
- # all. We treat a single-line comment following an escaped newline
- # as a simple trailing comment.
- if line.endswith("\\"):
- ignored_lines += 1
- continue
-
- if index == ignored_lines and not is_endmarker:
- comment_type = token.COMMENT # simple trailing comment
- else:
- comment_type = STANDALONE_COMMENT
- comment = make_comment(line)
- result.append(
- ProtoComment(
- type=comment_type, value=comment, newlines=nlines, consumed=consumed
- )
- )
- nlines = 0
- return result
-
-
-def make_comment(content: str) -> str:
- """Return a consistently formatted comment from the given `content` string.
-
- All comments (except for "##", "#!", "#:", '#'") should have a single
- space between the hash sign and the content.
-
- If `content` didn't start with a hash sign, one is provided.
- """
- content = content.rstrip()
- if not content:
- return "#"
-
- if content[0] == "#":
- content = content[1:]
- NON_BREAKING_SPACE = " "
- if (
- content
- and content[0] == NON_BREAKING_SPACE
- and not content.lstrip().startswith("type:")
- ):
- content = " " + content[1:] # Replace NBSP by a simple space
- if content and content[0] not in COMMENT_EXCEPTIONS:
- content = " " + content
- return "#" + content
-
-
-def normalize_fmt_off(node: Node) -> None:
- """Convert content between `# fmt: off`/`# fmt: on` into standalone comments."""
- try_again = True
- while try_again:
- try_again = convert_one_fmt_off_pair(node)
-
-
-def convert_one_fmt_off_pair(node: Node) -> bool:
- """Convert content of a single `# fmt: off`/`# fmt: on` into a standalone comment.
-
- Returns True if a pair was converted.
- """
- for leaf in node.leaves():
- previous_consumed = 0
- for comment in list_comments(leaf.prefix, is_endmarker=False):
- if comment.value not in FMT_PASS:
- previous_consumed = comment.consumed
- continue
- # We only want standalone comments. If there's no previous leaf or
- # the previous leaf is indentation, it's a standalone comment in
- # disguise.
- if comment.value in FMT_PASS and comment.type != STANDALONE_COMMENT:
- prev = preceding_leaf(leaf)
- if prev:
- if comment.value in FMT_OFF and prev.type not in WHITESPACE:
- continue
- if comment.value in FMT_SKIP and prev.type in WHITESPACE:
- continue
-
- ignored_nodes = list(generate_ignored_nodes(leaf, comment))
- if not ignored_nodes:
- continue
-
- first = ignored_nodes[0] # Can be a container node with the `leaf`.
- parent = first.parent
- prefix = first.prefix
- if comment.value in FMT_OFF:
- first.prefix = prefix[comment.consumed :]
- if comment.value in FMT_SKIP:
- first.prefix = ""
- standalone_comment_prefix = prefix
- else:
- standalone_comment_prefix = (
- prefix[:previous_consumed] + "\n" * comment.newlines
- )
- hidden_value = "".join(str(n) for n in ignored_nodes)
- if comment.value in FMT_OFF:
- hidden_value = comment.value + "\n" + hidden_value
- if comment.value in FMT_SKIP:
- hidden_value += " " + comment.value
- if hidden_value.endswith("\n"):
- # That happens when one of the `ignored_nodes` ended with a NEWLINE
- # leaf (possibly followed by a DEDENT).
- hidden_value = hidden_value[:-1]
- first_idx: Optional[int] = None
- for ignored in ignored_nodes:
- index = ignored.remove()
- if first_idx is None:
- first_idx = index
- assert parent is not None, "INTERNAL ERROR: fmt: on/off handling (1)"
- assert first_idx is not None, "INTERNAL ERROR: fmt: on/off handling (2)"
- parent.insert_child(
- first_idx,
- Leaf(
- STANDALONE_COMMENT,
- hidden_value,
- prefix=standalone_comment_prefix,
- fmt_pass_converted_first_leaf=first_leaf_of(first),
- ),
- )
- return True
-
- return False
-
-
-def generate_ignored_nodes(leaf: Leaf, comment: ProtoComment) -> Iterator[LN]:
- """Starting from the container of `leaf`, generate all leaves until `# fmt: on`.
-
- If comment is skip, returns leaf only.
- Stops at the end of the block.
- """
- if comment.value in FMT_SKIP:
- yield from _generate_ignored_nodes_from_fmt_skip(leaf, comment)
- return
- container: Optional[LN] = container_of(leaf)
- while container is not None and container.type != token.ENDMARKER:
- if is_fmt_on(container):
- return
-
- # fix for fmt: on in children
- if children_contains_fmt_on(container):
- for index, child in enumerate(container.children):
- if isinstance(child, Leaf) and is_fmt_on(child):
- if child.type in CLOSING_BRACKETS:
- # This means `# fmt: on` is placed at a different bracket level
- # than `# fmt: off`. This is an invalid use, but as a courtesy,
- # we include this closing bracket in the ignored nodes.
- # The alternative is to fail the formatting.
- yield child
- return
- if (
- child.type == token.INDENT
- and index < len(container.children) - 1
- and children_contains_fmt_on(container.children[index + 1])
- ):
- # This means `# fmt: on` is placed right after an indentation
- # level, and we shouldn't swallow the previous INDENT token.
- return
- if children_contains_fmt_on(child):
- return
- yield child
- else:
- if container.type == token.DEDENT and container.next_sibling is None:
- # This can happen when there is no matching `# fmt: on` comment at the
- # same level as `# fmt: on`. We need to keep this DEDENT.
- return
- yield container
- container = container.next_sibling
-
-
-def _generate_ignored_nodes_from_fmt_skip(
- leaf: Leaf, comment: ProtoComment
-) -> Iterator[LN]:
- """Generate all leaves that should be ignored by the `# fmt: skip` from `leaf`."""
- prev_sibling = leaf.prev_sibling
- parent = leaf.parent
- # Need to properly format the leaf prefix to compare it to comment.value,
- # which is also formatted
- comments = list_comments(leaf.prefix, is_endmarker=False)
- if not comments or comment.value != comments[0].value:
- return
- if prev_sibling is not None:
- leaf.prefix = ""
- siblings = [prev_sibling]
- while "\n" not in prev_sibling.prefix and prev_sibling.prev_sibling is not None:
- prev_sibling = prev_sibling.prev_sibling
- siblings.insert(0, prev_sibling)
- yield from siblings
- elif (
- parent is not None and parent.type == syms.suite and leaf.type == token.NEWLINE
- ):
- # The `# fmt: skip` is on the colon line of the if/while/def/class/...
- # statements. The ignored nodes should be previous siblings of the
- # parent suite node.
- leaf.prefix = ""
- ignored_nodes: List[LN] = []
- parent_sibling = parent.prev_sibling
- while parent_sibling is not None and parent_sibling.type != syms.suite:
- ignored_nodes.insert(0, parent_sibling)
- parent_sibling = parent_sibling.prev_sibling
- # Special case for `async_stmt` where the ASYNC token is on the
- # grandparent node.
- grandparent = parent.parent
- if (
- grandparent is not None
- and grandparent.prev_sibling is not None
- and grandparent.prev_sibling.type == token.ASYNC
- ):
- ignored_nodes.insert(0, grandparent.prev_sibling)
- yield from iter(ignored_nodes)
-
-
-def is_fmt_on(container: LN) -> bool:
- """Determine whether formatting is switched on within a container.
- Determined by whether the last `# fmt:` comment is `on` or `off`.
- """
- fmt_on = False
- for comment in list_comments(container.prefix, is_endmarker=False):
- if comment.value in FMT_ON:
- fmt_on = True
- elif comment.value in FMT_OFF:
- fmt_on = False
- return fmt_on
-
-
-def children_contains_fmt_on(container: LN) -> bool:
- """Determine if children have formatting switched on."""
- for child in container.children:
- leaf = first_leaf_of(child)
- if leaf is not None and is_fmt_on(leaf):
- return True
-
- return False
-
-
-def contains_pragma_comment(comment_list: List[Leaf]) -> bool:
- """
- Returns:
- True iff one of the comments in @comment_list is a pragma used by one
- of the more common static analysis tools for python (e.g. mypy, flake8,
- pylint).
- """
- for comment in comment_list:
- if comment.value.startswith(("# type:", "# noqa", "# pylint:")):
- return True
-
- return False