src/black/comments.py

   1 from dataclasses import dataclass
   2 from functools import lru_cache
   3 import regex as re
   4 from typing import Iterator, List, Optional, Union
   5
   6 from blib2to3.pytree import Node, Leaf
   7 from blib2to3.pgen2 import token
   8
   9 from black.nodes import first_leaf_column, preceding_leaf, container_of
  10 from black.nodes import STANDALONE_COMMENT, WHITESPACE
  11
  12 # types
  13 LN = Union[Leaf, Node]
  14
  15
  16 FMT_OFF = {"# fmt: off", "# fmt:off", "# yapf: disable"}
  17 FMT_SKIP = {"# fmt: skip", "# fmt:skip"}
  18 FMT_PASS = {*FMT_OFF, *FMT_SKIP}
  19 FMT_ON = {"# fmt: on", "# fmt:on", "# yapf: enable"}
  20
  21
  22 @dataclass
  23 class ProtoComment:
  24     """Describes a piece of syntax that is a comment.
  25
  26     It's not a :class:`blib2to3.pytree.Leaf` so that:
  27
  28     * it can be cached (`Leaf` objects should not be reused more than once as
  29       they store their lineno, column, prefix, and parent information);
  30     * `newlines` and `consumed` fields are kept separate from the `value`. This
  31       simplifies handling of special marker comments like ``# fmt: off/on``.
  32     """
  33
  34     type: int  # token.COMMENT or STANDALONE_COMMENT
  35     value: str  # content of the comment
  36     newlines: int  # how many newlines before the comment
  37     consumed: int  # how many characters of the original leaf's prefix did we consume
  38
  39
  40 def generate_comments(leaf: LN) -> Iterator[Leaf]:
  41     """Clean the prefix of the `leaf` and generate comments from it, if any.
  42
  43     Comments in lib2to3 are shoved into the whitespace prefix.  This happens
  44     in `pgen2/driver.py:Driver.parse_tokens()`.  This was a brilliant implementation
  45     move because it does away with modifying the grammar to include all the
  46     possible places in which comments can be placed.
  47
  48     The sad consequence for us though is that comments don't "belong" anywhere.
  49     This is why this function generates simple parentless Leaf objects for
  50     comments.  We simply don't know what the correct parent should be.
  51
  52     No matter though, we can live without this.  We really only need to
  53     differentiate between inline and standalone comments.  The latter don't
  54     share the line with any code.
  55
  56     Inline comments are emitted as regular token.COMMENT leaves.  Standalone
  57     are emitted with a fake STANDALONE_COMMENT token identifier.
  58     """
  59     for pc in list_comments(leaf.prefix, is_endmarker=leaf.type == token.ENDMARKER):
  60         yield Leaf(pc.type, pc.value, prefix="\n" * pc.newlines)
  61
  62
  63 @lru_cache(maxsize=4096)
  64 def list_comments(prefix: str, *, is_endmarker: bool) -> List[ProtoComment]:
  65     """Return a list of :class:`ProtoComment` objects parsed from the given `prefix`."""
  66     result: List[ProtoComment] = []
  67     if not prefix or "#" not in prefix:
  68         return result
  69
  70     consumed = 0
  71     nlines = 0
  72     ignored_lines = 0
  73     for index, line in enumerate(re.split("\r?\n", prefix)):
  74         consumed += len(line) + 1  # adding the length of the split '\n'
  75         line = line.lstrip()
  76         if not line:
  77             nlines += 1
  78         if not line.startswith("#"):
  79             # Escaped newlines outside of a comment are not really newlines at
  80             # all. We treat a single-line comment following an escaped newline
  81             # as a simple trailing comment.
  82             if line.endswith("\\"):
  83                 ignored_lines += 1
  84             continue
  85
  86         if index == ignored_lines and not is_endmarker:
  87             comment_type = token.COMMENT  # simple trailing comment
  88         else:
  89             comment_type = STANDALONE_COMMENT
  90         comment = make_comment(line)
  91         result.append(
  92             ProtoComment(
  93                 type=comment_type, value=comment, newlines=nlines, consumed=consumed
  94             )
  95         )
  96         nlines = 0
  97     return result
  98
  99
 100 def make_comment(content: str) -> str:
 101     """Return a consistently formatted comment from the given `content` string.
 102
 103     All comments (except for "##", "#!", "#:", '#'", "#%%") should have a single
 104     space between the hash sign and the content.
 105
 106     If `content` didn't start with a hash sign, one is provided.
 107     """
 108     content = content.rstrip()
 109     if not content:
 110         return "#"
 111
 112     if content[0] == "#":
 113         content = content[1:]
 114     NON_BREAKING_SPACE = " "
 115     if (
 116         content
 117         and content[0] == NON_BREAKING_SPACE
 118         and not content.lstrip().startswith("type:")
 119     ):
 120         content = " " + content[1:]  # Replace NBSP by a simple space
 121     if content and content[0] not in " !:#'%":
 122         content = " " + content
 123     return "#" + content
 124
 125
 126 def normalize_fmt_off(node: Node) -> None:
 127     """Convert content between `# fmt: off`/`# fmt: on` into standalone comments."""
 128     try_again = True
 129     while try_again:
 130         try_again = convert_one_fmt_off_pair(node)
 131
 132
 133 def convert_one_fmt_off_pair(node: Node) -> bool:
 134     """Convert content of a single `# fmt: off`/`# fmt: on` into a standalone comment.
 135
 136     Returns True if a pair was converted.
 137     """
 138     for leaf in node.leaves():
 139         previous_consumed = 0
 140         for comment in list_comments(leaf.prefix, is_endmarker=False):
 141             if comment.value not in FMT_PASS:
 142                 previous_consumed = comment.consumed
 143                 continue
 144             # We only want standalone comments. If there's no previous leaf or
 145             # the previous leaf is indentation, it's a standalone comment in
 146             # disguise.
 147             if comment.value in FMT_PASS and comment.type != STANDALONE_COMMENT:
 148                 prev = preceding_leaf(leaf)
 149                 if prev:
 150                     if comment.value in FMT_OFF and prev.type not in WHITESPACE:
 151                         continue
 152                     if comment.value in FMT_SKIP and prev.type in WHITESPACE:
 153                         continue
 154
 155             ignored_nodes = list(generate_ignored_nodes(leaf, comment))
 156             if not ignored_nodes:
 157                 continue
 158
 159             first = ignored_nodes[0]  # Can be a container node with the `leaf`.
 160             parent = first.parent
 161             prefix = first.prefix
 162             if comment.value in FMT_OFF:
 163                 first.prefix = prefix[comment.consumed :]
 164             if comment.value in FMT_SKIP:
 165                 first.prefix = ""
 166             hidden_value = "".join(str(n) for n in ignored_nodes)
 167             if comment.value in FMT_OFF:
 168                 hidden_value = comment.value + "\n" + hidden_value
 169             if comment.value in FMT_SKIP:
 170                 hidden_value += "  " + comment.value
 171             if hidden_value.endswith("\n"):
 172                 # That happens when one of the `ignored_nodes` ended with a NEWLINE
 173                 # leaf (possibly followed by a DEDENT).
 174                 hidden_value = hidden_value[:-1]
 175             first_idx: Optional[int] = None
 176             for ignored in ignored_nodes:
 177                 index = ignored.remove()
 178                 if first_idx is None:
 179                     first_idx = index
 180             assert parent is not None, "INTERNAL ERROR: fmt: on/off handling (1)"
 181             assert first_idx is not None, "INTERNAL ERROR: fmt: on/off handling (2)"
 182             parent.insert_child(
 183                 first_idx,
 184                 Leaf(
 185                     STANDALONE_COMMENT,
 186                     hidden_value,
 187                     prefix=prefix[:previous_consumed] + "\n" * comment.newlines,
 188                 ),
 189             )
 190             return True
 191
 192     return False
 193
 194
 195 def generate_ignored_nodes(leaf: Leaf, comment: ProtoComment) -> Iterator[LN]:
 196     """Starting from the container of `leaf`, generate all leaves until `# fmt: on`.
 197
 198     If comment is skip, returns leaf only.
 199     Stops at the end of the block.
 200     """
 201     container: Optional[LN] = container_of(leaf)
 202     if comment.value in FMT_SKIP:
 203         prev_sibling = leaf.prev_sibling
 204         if comment.value in leaf.prefix and prev_sibling is not None:
 205             leaf.prefix = leaf.prefix.replace(comment.value, "")
 206             siblings = [prev_sibling]
 207             while (
 208                 "\n" not in prev_sibling.prefix
 209                 and prev_sibling.prev_sibling is not None
 210             ):
 211                 prev_sibling = prev_sibling.prev_sibling
 212                 siblings.insert(0, prev_sibling)
 213             for sibling in siblings:
 214                 yield sibling
 215         elif leaf.parent is not None:
 216             yield leaf.parent
 217         return
 218     while container is not None and container.type != token.ENDMARKER:
 219         if is_fmt_on(container):
 220             return
 221
 222         # fix for fmt: on in children
 223         if contains_fmt_on_at_column(container, leaf.column):
 224             for child in container.children:
 225                 if contains_fmt_on_at_column(child, leaf.column):
 226                     return
 227                 yield child
 228         else:
 229             yield container
 230             container = container.next_sibling
 231
 232
 233 def is_fmt_on(container: LN) -> bool:
 234     """Determine whether formatting is switched on within a container.
 235     Determined by whether the last `# fmt:` comment is `on` or `off`.
 236     """
 237     fmt_on = False
 238     for comment in list_comments(container.prefix, is_endmarker=False):
 239         if comment.value in FMT_ON:
 240             fmt_on = True
 241         elif comment.value in FMT_OFF:
 242             fmt_on = False
 243     return fmt_on
 244
 245
 246 def contains_fmt_on_at_column(container: LN, column: int) -> bool:
 247     """Determine if children at a given column have formatting switched on."""
 248     for child in container.children:
 249         if (
 250             isinstance(child, Node)
 251             and first_leaf_column(child) == column
 252             or isinstance(child, Leaf)
 253             and child.column == column
 254         ):
 255             if is_fmt_on(child):
 256                 return True
 257
 258     return False
 259
 260
 261 def contains_pragma_comment(comment_list: List[Leaf]) -> bool:
 262     """
 263     Returns:
 264         True iff one of the comments in @comment_list is a pragma used by one
 265         of the more common static analysis tools for python (e.g. mypy, flake8,
 266         pylint).
 267     """
 268     for comment in comment_list:
 269         if comment.value.startswith(("# type:", "# noqa", "# pylint:")):
 270             return True
 271
 272     return False