Cache generated comments

[etc/vim.git] / black.py
diff --git a/black.py b/black.py

index 6db661c525042b44cd69376c7cda6978caaa82fc..f51b10ef7d0a282e60a8064f2d871c43739cd90d 100644 (file)
--- a/black.py
+++ b/black.py
@@ -1256,18 +1256,18 @@ class Line:
  
      def is_complex_subscript(self, leaf: Leaf) -> bool:
          """Return True iff `leaf` is part of a slice with non-trivial exprs."""
-        open_lsqb = (
-            leaf if leaf.type == token.LSQB else self.bracket_tracker.get_open_lsqb()
-        )
+        open_lsqb = self.bracket_tracker.get_open_lsqb()
          if open_lsqb is None:
              return False
  
          subscript_start = open_lsqb.next_sibling
-        if (
-            isinstance(subscript_start, Node)
-            and subscript_start.type == syms.subscriptlist
-        ):
-            subscript_start = child_towards(subscript_start, leaf)
+
+        if isinstance(subscript_start, Node):
+            if subscript_start.type == syms.listmaker:
+                return False
+
+            if subscript_start.type == syms.subscriptlist:
+                subscript_start = child_towards(subscript_start, leaf)
          return subscript_start is not None and any(
              n.type in TEST_DESCENDANTS for n in subscript_start.pre_order()
          )
@@ -1390,44 +1390,8 @@ class EmptyLineTracker:
                  before = 0 if depth else 1
              else:
                  before = 1 if depth else 2
-        is_decorator = current_line.is_decorator
-        if is_decorator or current_line.is_def or current_line.is_class:
-            if not is_decorator:
-                self.previous_defs.append(depth)
-            if self.previous_line is None:
-                # Don't insert empty lines before the first line in the file.
-                return 0, 0
-
-            if self.previous_line.is_decorator:
-                return 0, 0
-
-            if self.previous_line.depth < current_line.depth and (
-                self.previous_line.is_class or self.previous_line.is_def
-            ):
-                return 0, 0
-
-            if (
-                self.previous_line.is_comment
-                and self.previous_line.depth == current_line.depth
-                and before == 0
-            ):
-                return 0, 0
-
-            if self.is_pyi:
-                if self.previous_line.depth > current_line.depth:
-                    newlines = 1
-                elif current_line.is_class or self.previous_line.is_class:
-                    if current_line.is_stub_class and self.previous_line.is_stub_class:
-                        newlines = 0
-                    else:
-                        newlines = 1
-                else:
-                    newlines = 0
-            else:
-                newlines = 2
-            if current_line.depth and newlines:
-                newlines -= 1
-            return newlines, 0
+        if current_line.is_decorator or current_line.is_def or current_line.is_class:
+            return self._maybe_empty_lines_for_class_or_def(current_line, before)
  
          if (
              self.previous_line
@@ -1446,6 +1410,50 @@ class EmptyLineTracker:
  
          return before, 0
  
+    def _maybe_empty_lines_for_class_or_def(
+        self, current_line: Line, before: int
+    ) -> Tuple[int, int]:
+        if not current_line.is_decorator:
+            self.previous_defs.append(current_line.depth)
+        if self.previous_line is None:
+            # Don't insert empty lines before the first line in the file.
+            return 0, 0
+
+        if self.previous_line.is_decorator:
+            return 0, 0
+
+        if self.previous_line.depth < current_line.depth and (
+            self.previous_line.is_class or self.previous_line.is_def
+        ):
+            return 0, 0
+
+        if (
+            self.previous_line.is_comment
+            and self.previous_line.depth == current_line.depth
+            and before == 0
+        ):
+            return 0, 0
+
+        if self.is_pyi:
+            if self.previous_line.depth > current_line.depth:
+                newlines = 1
+            elif current_line.is_class or self.previous_line.is_class:
+                if current_line.is_stub_class and self.previous_line.is_stub_class:
+                    # No blank line between classes with an emty body
+                    newlines = 0
+                else:
+                    newlines = 1
+            elif current_line.is_def and not self.previous_line.is_def:
+                # Blank line between a block of functions and a block of non-functions
+                newlines = 1
+            else:
+                newlines = 0
+        else:
+            newlines = 2
+        if current_line.depth and newlines:
+            newlines -= 1
+        return newlines, 0
+
  
  @dataclass
  class LineGenerator(Visitor[Line]):
@@ -2037,6 +2045,10 @@ def is_split_before_delimiter(leaf: Leaf, previous: Leaf = None) -> int:
      return 0
  
  
+FMT_OFF = {"# fmt: off", "# fmt:off", "# yapf: disable"}
+FMT_ON = {"# fmt: on", "# fmt:on", "# yapf: enable"}
+
+
  def generate_comments(leaf: LN) -> Iterator[Leaf]:
      """Clean the prefix of the `leaf` and generate comments from it, if any.
  
@@ -2056,16 +2068,37 @@ def generate_comments(leaf: LN) -> Iterator[Leaf]:
      Inline comments are emitted as regular token.COMMENT leaves.  Standalone
      are emitted with a fake STANDALONE_COMMENT token identifier.
      """
-    p = leaf.prefix
-    if not p:
-        return
+    for pc in list_comments(leaf.prefix, is_endmarker=leaf.type == token.ENDMARKER):
+        yield Leaf(pc.type, pc.value, prefix="\n" * pc.newlines)
+        if pc.value in FMT_ON:
+            raise FormatOn(pc.consumed)
  
-    if "#" not in p:
-        return
+        if pc.value in FMT_OFF:
+            if pc.type == STANDALONE_COMMENT:
+                raise FormatOff(pc.consumed)
+
+            prev = preceding_leaf(leaf)
+            if not prev or prev.type in WHITESPACE:  # standalone comment in disguise
+                raise FormatOff(pc.consumed)
+
+
+@dataclass
+class ProtoComment:
+    type: int  # token.COMMENT or STANDALONE_COMMENT
+    value: str  # content of the comment
+    newlines: int  # how many newlines before the comment
+    consumed: int  # how many characters of the original leaf's prefix did we consume
+
+
+@lru_cache(maxsize=4096)
+def list_comments(prefix: str, is_endmarker: bool) -> List[ProtoComment]:
+    result: List[ProtoComment] = []
+    if not prefix or "#" not in prefix:
+        return result
  
      consumed = 0
      nlines = 0
-    for index, line in enumerate(p.split("\n")):
+    for index, line in enumerate(prefix.split("\n")):
          consumed += len(line) + 1  # adding the length of the split '\n'
          line = line.lstrip()
          if not line:
@@ -2073,25 +2106,18 @@ def generate_comments(leaf: LN) -> Iterator[Leaf]:
          if not line.startswith("#"):
              continue
  
-        if index == 0 and leaf.type != token.ENDMARKER:
+        if index == 0 and not is_endmarker:
              comment_type = token.COMMENT  # simple trailing comment
          else:
              comment_type = STANDALONE_COMMENT
          comment = make_comment(line)
-        yield Leaf(comment_type, comment, prefix="\n" * nlines)
-
-        if comment in {"# fmt: on", "# yapf: enable"}:
-            raise FormatOn(consumed)
-
-        if comment in {"# fmt: off", "# yapf: disable"}:
-            if comment_type == STANDALONE_COMMENT:
-                raise FormatOff(consumed)
-
-            prev = preceding_leaf(leaf)
-            if not prev or prev.type in WHITESPACE:  # standalone comment in disguise
-                raise FormatOff(consumed)
-
+        result.append(
+            ProtoComment(
+                type=comment_type, value=comment, newlines=nlines, consumed=consumed
+            )
+        )
          nlines = 0
+    return result
  
  
  def make_comment(content: str) -> str:
@@ -2529,8 +2555,8 @@ def normalize_string_quotes(leaf: Leaf) -> None:
  
      prefix = leaf.value[:first_quote_pos]
      unescaped_new_quote = re.compile(rf"(([^\\]|^)(\\\\)*){new_quote}")
-    escaped_new_quote = re.compile(rf"([^\\]|^)\\(\\\\)*{new_quote}")
-    escaped_orig_quote = re.compile(rf"([^\\]|^)\\(\\\\)*{orig_quote}")
+    escaped_new_quote = re.compile(rf"([^\\]|^)\\((?:\\\\)*){new_quote}")
+    escaped_orig_quote = re.compile(rf"([^\\]|^)\\((?:\\\\)*){orig_quote}")
      body = leaf.value[first_quote_pos + len(orig_quote) : -len(orig_quote)]
      if "r" in prefix.casefold():
          if unescaped_new_quote.search(body):
@@ -2541,14 +2567,20 @@ def normalize_string_quotes(leaf: Leaf) -> None:
          # Do not introduce or remove backslashes in raw strings
          new_body = body
      else:
-        # remove unnecessary quotes
+        # remove unnecessary escapes
          new_body = sub_twice(escaped_new_quote, rf"\1\2{new_quote}", body)
          if body != new_body:
-            # Consider the string without unnecessary quotes as the original
+            # Consider the string without unnecessary escapes as the original
              body = new_body
              leaf.value = f"{prefix}{orig_quote}{body}{orig_quote}"
          new_body = sub_twice(escaped_orig_quote, rf"\1\2{orig_quote}", new_body)
          new_body = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_body)
+    if "f" in prefix.casefold():
+        matches = re.findall(r"[^{]\{(.*?)\}[^}]", new_body)
+        for m in matches:
+            if "\\" in str(m):
+                # Do not introduce backslashes in interpolated expressions
+                return
      if new_quote == '"""' and new_body[-1:] == '"':
          # edge case:
          new_body = new_body[:-1] + '\\"'
@@ -2572,10 +2604,10 @@ def normalize_invisible_parens(node: Node, parens_after: Set[str]) -> None:
      Standardizes on visible parentheses for single-element tuples, and keeps
      existing visible parentheses for other tuples and generator expressions.
      """
-    try:
-        list(generate_comments(node))
-    except FormatOff:
-        return  # This `node` has a prefix with `# fmt: off`, don't mess with parens.
+    for pc in list_comments(node.prefix, is_endmarker=False):
+        if pc.value in FMT_OFF:
+            # This `node` has a prefix with `# fmt: off`, don't mess with parens.
+            return
  
      check_lpar = False
      for index, child in enumerate(list(node.children)):
@@ -2935,11 +2967,24 @@ def gen_python_files_in_dir(
      """Generate all files under `path` whose paths are not excluded by the
      `exclude` regex, but are included by the `include` regex.
  
+    Symbolic links pointing outside of the root directory are ignored.
+
      `report` is where output about exclusions goes.
      """
      assert root.is_absolute(), f"INTERNAL ERROR: `root` must be absolute but is {root}"
      for child in path.iterdir():
-        normalized_path = "/" + child.resolve().relative_to(root).as_posix()
+        try:
+            normalized_path = "/" + child.resolve().relative_to(root).as_posix()
+        except ValueError:
+            if child.is_symlink():
+                report.path_ignored(
+                    child,
+                    "is a symbolic link that points outside of the root directory",
+                )
+                continue
+
+            raise
+
          if child.is_dir():
              normalized_path += "/"
          exclude_match = exclude.search(normalized_path)