Fix string normalization eating all backslashes above 3

[etc/vim.git] / black.py
diff --git a/black.py b/black.py

index cc5075c6666ffca0cc637b652a0550f02aa5a6e1..9d9bada604951bef5bc9afca90033a31411c260e 100644 (file)
--- a/black.py
+++ b/black.py
@@ -3,7 +3,7 @@ from asyncio.base_events import BaseEventLoop
  from concurrent.futures import Executor, ProcessPoolExecutor
  from datetime import datetime
  from enum import Enum, Flag
-from functools import partial, wraps
+from functools import lru_cache, partial, wraps
  import io
  import keyword
  import logging
@@ -38,6 +38,7 @@ from typing import (
  from appdirs import user_cache_dir
  from attr import dataclass, Factory
  import click
+import toml
  
  # lib2to3 fork
  from blib2to3.pytree import Node, Leaf, type_repr
@@ -46,7 +47,7 @@ from blib2to3.pgen2 import driver, token
  from blib2to3.pgen2.parse import ParseError
  
  
-__version__ = "18.6b1"
+__version__ = "18.6b2"
  DEFAULT_LINE_LENGTH = 88
  DEFAULT_EXCLUDES = (
      r"/(\.git|\.hg|\.mypy_cache|\.tox|\.venv|_build|buck-out|build|dist)/"
@@ -156,7 +157,41 @@ class FileMode(Flag):
          return mode
  
  
-@click.command()
+def read_pyproject_toml(
+    ctx: click.Context, param: click.Parameter, value: Union[str, int, bool, None]
+) -> Optional[str]:
+    """Inject Black configuration from "pyproject.toml" into defaults in `ctx`.
+
+    Returns the path to a successfully found and read configuration file, None
+    otherwise.
+    """
+    assert not isinstance(value, (int, bool)), "Invalid parameter type passed"
+    if not value:
+        root = find_project_root(ctx.params.get("src", ()))
+        path = root / "pyproject.toml"
+        if path.is_file():
+            value = str(path)
+        else:
+            return None
+
+    try:
+        pyproject_toml = toml.load(value)
+        config = pyproject_toml.get("tool", {}).get("black", {})
+    except (toml.TomlDecodeError, OSError) as e:
+        raise click.BadOptionUsage(f"Error reading configuration file: {e}", ctx)
+
+    if not config:
+        return None
+
+    if ctx.default_map is None:
+        ctx.default_map = {}
+    ctx.default_map.update(  # type: ignore  # bad types in .pyi
+        {k.replace("--", "").replace("-", "_"): v for k, v in config.items()}
+    )
+    return value
+
+
+@click.command(context_settings=dict(help_option_names=["-h", "--help"]))
  @click.option(
      "-l",
      "--line-length",
@@ -257,6 +292,16 @@ class FileMode(Flag):
      type=click.Path(
          exists=True, file_okay=True, dir_okay=True, readable=True, allow_dash=True
      ),
+    is_eager=True,
+)
+@click.option(
+    "--config",
+    type=click.Path(
+        exists=False, file_okay=True, dir_okay=False, readable=True, allow_dash=False
+    ),
+    is_eager=True,
+    callback=read_pyproject_toml,
+    help="Read configuration from PATH.",
  )
  @click.pass_context
  def main(
@@ -272,26 +317,29 @@ def main(
      verbose: bool,
      include: str,
      exclude: str,
-    src: List[str],
+    src: Tuple[str],
+    config: Optional[str],
  ) -> None:
      """The uncompromising code formatter."""
      write_back = WriteBack.from_configuration(check=check, diff=diff)
      mode = FileMode.from_configuration(
          py36=py36, pyi=pyi, skip_string_normalization=skip_string_normalization
      )
-    report = Report(check=check, quiet=quiet, verbose=verbose)
-    sources: Set[Path] = set()
+    if config and verbose:
+        out(f"Using configuration from {config}.", bold=False, fg="blue")
      try:
-        include_regex = re.compile(include)
+        include_regex = re_compile_maybe_verbose(include)
      except re.error:
          err(f"Invalid regular expression for include given: {include!r}")
          ctx.exit(2)
      try:
-        exclude_regex = re.compile(exclude)
+        exclude_regex = re_compile_maybe_verbose(exclude)
      except re.error:
          err(f"Invalid regular expression for exclude given: {exclude!r}")
          ctx.exit(2)
+    report = Report(check=check, quiet=quiet, verbose=verbose)
      root = find_project_root(src)
+    sources: Set[Path] = set()
      for s in src:
          p = Path(s)
          if p.is_dir():
@@ -307,9 +355,8 @@ def main(
          if verbose or not quiet:
              out("No paths given. Nothing to do 😴")
          ctx.exit(0)
-        return
  
-    elif len(sources) == 1:
+    if len(sources) == 1:
          reformat_one(
              src=sources.pop(),
              line_length=line_length,
@@ -1183,6 +1230,9 @@ class Line:
  
          Provide a non-negative leaf `_index` to speed up the function.
          """
+        if not self.comments:
+            return
+
          if _index == -1:
              for _index, _leaf in enumerate(self.leaves):
                  if leaf is _leaf:
@@ -1206,18 +1256,18 @@ class Line:
  
      def is_complex_subscript(self, leaf: Leaf) -> bool:
          """Return True iff `leaf` is part of a slice with non-trivial exprs."""
-        open_lsqb = (
-            leaf if leaf.type == token.LSQB else self.bracket_tracker.get_open_lsqb()
-        )
+        open_lsqb = self.bracket_tracker.get_open_lsqb()
          if open_lsqb is None:
              return False
  
          subscript_start = open_lsqb.next_sibling
-        if (
-            isinstance(subscript_start, Node)
-            and subscript_start.type == syms.subscriptlist
-        ):
-            subscript_start = child_towards(subscript_start, leaf)
+
+        if isinstance(subscript_start, Node):
+            if subscript_start.type == syms.listmaker:
+                return False
+
+            if subscript_start.type == syms.subscriptlist:
+                subscript_start = child_towards(subscript_start, leaf)
          return subscript_start is not None and any(
              n.type in TEST_DESCENDANTS for n in subscript_start.pre_order()
          )
@@ -1835,7 +1885,7 @@ def whitespace(leaf: Leaf, *, complex_subscript: bool) -> str:  # noqa C901
              elif prevp.type == token.EQUAL and prevp_parent.type == syms.argument:
                  return NO
  
-        elif t == token.NAME or t == token.NUMBER:
+        elif t in {token.NAME, token.NUMBER, token.STRING}:
              return NO
  
      elif p.type == syms.import_from:
@@ -2258,7 +2308,7 @@ def right_hand_split(
              elif head.contains_multiline_strings() or tail.contains_multiline_strings():
                  raise CannotSplit(
                      "The current optional pair of parentheses is bound to fail to "
-                    "satisfy the splitting algorithm becase the head or the tail "
+                    "satisfy the splitting algorithm because the head or the tail "
                      "contains multiline strings which by definition never fit one "
                      "line."
                  )
@@ -2479,8 +2529,8 @@ def normalize_string_quotes(leaf: Leaf) -> None:
  
      prefix = leaf.value[:first_quote_pos]
      unescaped_new_quote = re.compile(rf"(([^\\]|^)(\\\\)*){new_quote}")
-    escaped_new_quote = re.compile(rf"([^\\]|^)\\(\\\\)*{new_quote}")
-    escaped_orig_quote = re.compile(rf"([^\\]|^)\\(\\\\)*{orig_quote}")
+    escaped_new_quote = re.compile(rf"([^\\]|^)\\((?:\\\\)*){new_quote}")
+    escaped_orig_quote = re.compile(rf"([^\\]|^)\\((?:\\\\)*){orig_quote}")
      body = leaf.value[first_quote_pos + len(orig_quote) : -len(orig_quote)]
      if "r" in prefix.casefold():
          if unescaped_new_quote.search(body):
@@ -2491,15 +2541,21 @@ def normalize_string_quotes(leaf: Leaf) -> None:
          # Do not introduce or remove backslashes in raw strings
          new_body = body
      else:
-        # remove unnecessary quotes
+        # remove unnecessary escapes
          new_body = sub_twice(escaped_new_quote, rf"\1\2{new_quote}", body)
          if body != new_body:
-            # Consider the string without unnecessary quotes as the original
+            # Consider the string without unnecessary escapes as the original
              body = new_body
              leaf.value = f"{prefix}{orig_quote}{body}{orig_quote}"
          new_body = sub_twice(escaped_orig_quote, rf"\1\2{orig_quote}", new_body)
          new_body = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_body)
-    if new_quote == '"""' and new_body[-1] == '"':
+    if "f" in prefix.casefold():
+        matches = re.findall(r"[^{]\{(.*?)\}[^}]", new_body)
+        for m in matches:
+            if "\\" in str(m):
+                # Do not introduce backslashes in interpolated expressions
+                return
+    if new_quote == '"""' and new_body[-1:] == '"':
          # edge case:
          new_body = new_body[:-1] + '\\"'
      orig_escape_count = body.count("\\")
@@ -2562,7 +2618,7 @@ def normalize_invisible_parens(node: Node, parens_after: Set[str]) -> None:
  
  
  def maybe_make_parens_invisible_in_atom(node: LN) -> bool:
-    """If it's safe, make the parens in the atom `node` invisible, recusively."""
+    """If it's safe, make the parens in the atom `node` invisible, recursively."""
      if (
          node.type != syms.atom
          or is_empty_tuple(node)
@@ -2894,7 +2950,7 @@ def gen_python_files_in_dir(
              normalized_path += "/"
          exclude_match = exclude.search(normalized_path)
          if exclude_match and exclude_match.group(0):
-            report.path_ignored(child, f"matches --exclude={exclude.pattern}")
+            report.path_ignored(child, f"matches the --exclude regular expression")
              continue
  
          if child.is_dir():
@@ -2906,7 +2962,8 @@ def gen_python_files_in_dir(
                  yield child
  
  
-def find_project_root(srcs: List[str]) -> Path:
+@lru_cache()
+def find_project_root(srcs: Iterable[str]) -> Path:
      """Return a directory containing .git, .hg, or pyproject.toml.
  
      That directory can be one of the directories passed in `srcs` or their
@@ -3164,6 +3221,16 @@ def sub_twice(regex: Pattern[str], replacement: str, original: str) -> str:
      return regex.sub(replacement, regex.sub(replacement, original))
  
  
+def re_compile_maybe_verbose(regex: str) -> Pattern[str]:
+    """Compile a regular expression string in `regex`.
+
+    If it contains newlines, use verbose mode.
+    """
+    if "\n" in regex:
+        regex = "(?x)" + regex
+    return re.compile(regex)
+
+
  def enumerate_reversed(sequence: Sequence[T]) -> Iterator[Tuple[Index, T]]:
      """Like `reversed(enumerate(sequence))` if that were possible."""
      index = len(sequence) - 1
@@ -3335,12 +3402,7 @@ def can_omit_invisible_parens(line: Line, line_length: int) -> bool:
  
  
  def get_cache_file(line_length: int, mode: FileMode) -> Path:
-    pyi = bool(mode & FileMode.PYI)
-    py36 = bool(mode & FileMode.PYTHON36)
-    return (
-        CACHE_DIR
-        / f"cache.{line_length}{'.pyi' if pyi else ''}{'.py36' if py36 else ''}.pickle"
-    )
+    return CACHE_DIR / f"cache.{line_length}.{mode.value}.pickle"
  
  
  def read_cache(line_length: int, mode: FileMode) -> Cache: