]> git.madduck.net Git - etc/vim.git/blobdiff - src/black_primer/lib.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

Do not use gitignore if explicitly passing excludes (#2170)
[etc/vim.git] / src / black_primer / lib.py
index 87028d72509ac5777ad75fd6d9aec3453eadadb8..3ce383f17ce2b1b6ef7d2509b66e3c364e784f9e 100644 (file)
@@ -1,24 +1,37 @@
-#!/usr/bin/env python3
-
-# Module '__future__' has no attribute 'annotations'
-from __future__ import annotations  # type: ignore
-
 import asyncio
 import asyncio
+import errno
 import json
 import logging
 import json
 import logging
+import os
+import stat
+import sys
+from functools import partial
 from pathlib import Path
 from pathlib import Path
+from platform import system
 from shutil import rmtree, which
 from subprocess import CalledProcessError
 from sys import version_info
 from shutil import rmtree, which
 from subprocess import CalledProcessError
 from sys import version_info
-from typing import Any, Dict, NamedTuple, Optional, Sequence, Tuple
+from tempfile import TemporaryDirectory
+from typing import Any, Callable, Dict, NamedTuple, Optional, Sequence, Tuple
 from urllib.parse import urlparse
 
 import click
 
 
 from urllib.parse import urlparse
 
 import click
 
 
+WINDOWS = system() == "Windows"
+BLACK_BINARY = "black.exe" if WINDOWS else "black"
+GIT_BINARY = "git.exe" if WINDOWS else "git"
 LOG = logging.getLogger(__name__)
 
 
 LOG = logging.getLogger(__name__)
 
 
+# Windows needs a ProactorEventLoop if you want to exec subprocesses
+# Starting with 3.8 this is the default - can remove when Black >= 3.8
+# mypy only respects sys.platform if directly in the evaluation
+# https://mypy.readthedocs.io/en/latest/common_issues.html#python-version-and-system-platform-checks  # noqa: B950
+if sys.platform == "win32":
+    asyncio.set_event_loop(asyncio.ProactorEventLoop())
+
+
 class Results(NamedTuple):
     stats: Dict[str, int] = {}
     failed_projects: Dict[str, CalledProcessError] = {}
 class Results(NamedTuple):
     stats: Dict[str, int] = {}
     failed_projects: Dict[str, CalledProcessError] = {}
@@ -26,7 +39,7 @@ class Results(NamedTuple):
 
 async def _gen_check_output(
     cmd: Sequence[str],
 
 async def _gen_check_output(
     cmd: Sequence[str],
-    timeout: float = 30,
+    timeout: float = 300,
     env: Optional[Dict[str, str]] = None,
     cwd: Optional[Path] = None,
 ) -> Tuple[bytes, bytes]:
     env: Optional[Dict[str, str]] = None,
     cwd: Optional[Path] = None,
 ) -> Tuple[bytes, bytes]:
@@ -44,6 +57,11 @@ async def _gen_check_output(
         await process.wait()
         raise
 
         await process.wait()
         raise
 
+    # A non-optional timeout was supplied to asyncio.wait_for, guaranteeing
+    # a timeout or completed process.  A terminated Python process will have a
+    # non-empty returncode value.
+    assert process.returncode is not None
+
     if process.returncode != 0:
         cmd_str = " ".join(cmd)
         raise CalledProcessError(
     if process.returncode != 0:
         cmd_str = " ".join(cmd)
         raise CalledProcessError(
@@ -53,11 +71,11 @@ async def _gen_check_output(
     return (stdout, stderr)
 
 
     return (stdout, stderr)
 
 
-async def analyze_results(project_count: int, results: Results) -> int:
+def analyze_results(project_count: int, results: Results) -> int:
     failed_pct = round(((results.stats["failed"] / project_count) * 100), 2)
     success_pct = round(((results.stats["success"] / project_count) * 100), 2)
 
     failed_pct = round(((results.stats["failed"] / project_count) * 100), 2)
     success_pct = round(((results.stats["success"] / project_count) * 100), 2)
 
-    click.secho(f"-- primer results 📊 --\n", bold=True)
+    click.secho("-- primer results 📊 --\n", bold=True)
     click.secho(
         f"{results.stats['success']} / {project_count} succeeded ({success_pct}%) ✅",
         bold=True,
     click.secho(
         f"{results.stats['success']} / {project_count} succeeded ({success_pct}%) ✅",
         bold=True,
@@ -68,16 +86,18 @@ async def analyze_results(project_count: int, results: Results) -> int:
         bold=bool(results.stats["failed"]),
         fg="red",
     )
         bold=bool(results.stats["failed"]),
         fg="red",
     )
-    click.echo(f" - {results.stats['disabled']} projects Disabled by config")
+    s = "" if results.stats["disabled"] == 1 else "s"
+    click.echo(f" - {results.stats['disabled']} project{s} disabled by config")
+    s = "" if results.stats["wrong_py_ver"] == 1 else "s"
     click.echo(
     click.echo(
-        f" - {results.stats['wrong_py_ver']} projects skipped due to Python Version"
+        f" - {results.stats['wrong_py_ver']} project{s} skipped due to Python version"
     )
     click.echo(
         f" - {results.stats['skipped_long_checkout']} skipped due to long checkout"
     )
 
     if results.failed_projects:
     )
     click.echo(
         f" - {results.stats['skipped_long_checkout']} skipped due to long checkout"
     )
 
     if results.failed_projects:
-        click.secho(f"\nFailed Projects:\n", bold=True)
+        click.secho("\nFailed projects:\n", bold=True)
 
     for project_name, project_cpe in results.failed_projects.items():
         print(f"## {project_name}:")
 
     for project_name, project_cpe in results.failed_projects.items():
         print(f"## {project_name}:")
@@ -92,25 +112,59 @@ async def analyze_results(project_count: int, results: Results) -> int:
 
 
 async def black_run(
 
 
 async def black_run(
-    repo_path: Path, project_config: Dict[str, Any], results: Results
+    repo_path: Path,
+    project_config: Dict[str, Any],
+    results: Results,
+    no_diff: bool = False,
 ) -> None:
 ) -> None:
-    """Run black and record failures"""
-    cmd = [str(which("black"))]
-    if project_config["cli_arguments"]:
-        cmd.extend(*project_config["cli_arguments"])
-    cmd.extend(["--check", "--diff", "."])
+    """Run Black and record failures"""
+    cmd = [str(which(BLACK_BINARY))]
+    if "cli_arguments" in project_config and project_config["cli_arguments"]:
+        cmd.extend(project_config["cli_arguments"])
+    cmd.append("--check")
+    if no_diff:
+        cmd.append(".")
+    else:
+        cmd.extend(["--diff", "."])
+
+    with TemporaryDirectory() as tmp_path:
+        # Prevent reading top-level user configs by manipulating envionment variables
+        env = {
+            **os.environ,
+            "XDG_CONFIG_HOME": tmp_path,  # Unix-like
+            "USERPROFILE": tmp_path,  # Windows (changes `Path.home()` output)
+        }
 
 
-    try:
-        _stdout, _stderr = await _gen_check_output(cmd, cwd=repo_path)
-    except asyncio.TimeoutError:
-        results.stats["failed"] += 1
-        LOG.error(f"Running black for {repo_path} timed out ({cmd})")
-    except CalledProcessError as cpe:
-        # TODO: This might need to be tuned and made smarter for higher signal
-        if not project_config["expect_formatting_changes"] and cpe.returncode == 1:
+        try:
+            _stdout, _stderr = await _gen_check_output(cmd, cwd=repo_path, env=env)
+        except asyncio.TimeoutError:
             results.stats["failed"] += 1
             results.stats["failed"] += 1
-            results.failed_projects[repo_path.name] = cpe
-            return
+            LOG.error(f"Running black for {repo_path} timed out ({cmd})")
+        except CalledProcessError as cpe:
+            # TODO: Tune for smarter for higher signal
+            # If any other return value than 1 we raise - can disable project in config
+            if cpe.returncode == 1:
+                if not project_config["expect_formatting_changes"]:
+                    results.stats["failed"] += 1
+                    results.failed_projects[repo_path.name] = cpe
+                else:
+                    results.stats["success"] += 1
+                return
+            elif cpe.returncode > 1:
+                results.stats["failed"] += 1
+                results.failed_projects[repo_path.name] = cpe
+                return
+
+            LOG.error(f"Unknown error with {repo_path}")
+            raise
+
+    # If we get here and expect formatting changes something is up
+    if project_config["expect_formatting_changes"]:
+        results.stats["failed"] += 1
+        results.failed_projects[repo_path.name] = CalledProcessError(
+            0, cmd, b"Expected formatting changes but didn't get any!", b""
+        )
+        return
 
     results.stats["success"] += 1
 
 
     results.stats["success"] += 1
 
@@ -123,9 +177,9 @@ async def git_checkout_or_rebase(
     depth: int = 1,
 ) -> Optional[Path]:
     """git Clone project or rebase"""
     depth: int = 1,
 ) -> Optional[Path]:
     """git Clone project or rebase"""
-    git_bin = str(which("git"))
+    git_bin = str(which(GIT_BINARY))
     if not git_bin:
     if not git_bin:
-        LOG.error(f"No git binary found")
+        LOG.error("No git binary found")
         return None
 
     repo_url_parts = urlparse(project_config["git_clone_url"])
         return None
 
     repo_url_parts = urlparse(project_config["git_clone_url"])
@@ -149,9 +203,33 @@ async def git_checkout_or_rebase(
     return repo_path
 
 
     return repo_path
 
 
+def handle_PermissionError(
+    func: Callable, path: Path, exc: Tuple[Any, Any, Any]
+) -> None:
+    """
+    Handle PermissionError during shutil.rmtree.
+
+    This checks if the erroring function is either 'os.rmdir' or 'os.unlink', and that
+    the error was EACCES (i.e. Permission denied). If true, the path is set writable,
+    readable, and executable by everyone. Finally, it tries the error causing delete
+    operation again.
+
+    If the check is false, then the original error will be reraised as this function
+    can't handle it.
+    """
+    excvalue = exc[1]
+    LOG.debug(f"Handling {excvalue} from {func.__name__}... ")
+    if func in (os.rmdir, os.unlink) and excvalue.errno == errno.EACCES:
+        LOG.debug(f"Setting {path} writable, readable, and executable by everyone... ")
+        os.chmod(path, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO)  # chmod 0777
+        func(path)  # Try the error causing delete operation again
+    else:
+        raise
+
+
 async def load_projects_queue(
     config_path: Path,
 async def load_projects_queue(
     config_path: Path,
-) -> Tuple[Dict[str, Any], asyncio.Queue[str]]:
+) -> Tuple[Dict[str, Any], asyncio.Queue]:
     """Load project config and fill queue with all the project names"""
     with config_path.open("r") as cfp:
         config = json.load(cfp)
     """Load project config and fill queue with all the project names"""
     with config_path.open("r") as cfp:
         config = json.load(cfp)
@@ -159,7 +237,7 @@ async def load_projects_queue(
     # TODO: Offer more options here
     # e.g. Run on X random packages or specific sub list etc.
     project_names = sorted(config["projects"].keys())
     # TODO: Offer more options here
     # e.g. Run on X random packages or specific sub list etc.
     project_names = sorted(config["projects"].keys())
-    queue: asyncio.Queue[str] = asyncio.Queue(maxsize=len(project_names))
+    queue: asyncio.Queue = asyncio.Queue(maxsize=len(project_names))
     for project in project_names:
         await queue.put(project)
 
     for project in project_names:
         await queue.put(project)
 
@@ -169,14 +247,15 @@ async def load_projects_queue(
 async def project_runner(
     idx: int,
     config: Dict[str, Any],
 async def project_runner(
     idx: int,
     config: Dict[str, Any],
-    queue: asyncio.Queue[str],
+    queue: asyncio.Queue,
     work_path: Path,
     results: Results,
     long_checkouts: bool = False,
     rebase: bool = False,
     keep: bool = False,
     work_path: Path,
     results: Results,
     long_checkouts: bool = False,
     rebase: bool = False,
     keep: bool = False,
+    no_diff: bool = False,
 ) -> None:
 ) -> None:
-    """Checkout project and run black on it + record result"""
+    """Check out project and run Black on it + record result"""
     loop = asyncio.get_event_loop()
     py_version = f"{version_info[0]}.{version_info[1]}"
     while True:
     loop = asyncio.get_event_loop()
     py_version = f"{version_info[0]}.{version_info[1]}"
     while True:
@@ -185,6 +264,7 @@ async def project_runner(
         except asyncio.QueueEmpty:
             LOG.debug(f"project_runner {idx} exiting")
             return
         except asyncio.QueueEmpty:
             LOG.debug(f"project_runner {idx} exiting")
             return
+        LOG.debug(f"worker {idx} working on {project_name}")
 
         project_config = config["projects"][project_name]
 
 
         project_config = config["projects"][project_name]
 
@@ -212,11 +292,16 @@ async def project_runner(
         repo_path = await git_checkout_or_rebase(work_path, project_config, rebase)
         if not repo_path:
             continue
         repo_path = await git_checkout_or_rebase(work_path, project_config, rebase)
         if not repo_path:
             continue
-        await black_run(repo_path, project_config, results)
+        await black_run(repo_path, project_config, results, no_diff)
 
         if not keep:
             LOG.debug(f"Removing {repo_path}")
 
         if not keep:
             LOG.debug(f"Removing {repo_path}")
-            await loop.run_in_executor(None, rmtree, repo_path)
+            rmtree_partial = partial(
+                rmtree, path=repo_path, onerror=handle_PermissionError
+            )
+            await loop.run_in_executor(None, rmtree_partial)
+
+        LOG.info(f"Finished {project_name}")
 
 
 async def process_queue(
 
 
 async def process_queue(
@@ -226,6 +311,7 @@ async def process_queue(
     keep: bool = False,
     long_checkouts: bool = False,
     rebase: bool = False,
     keep: bool = False,
     long_checkouts: bool = False,
     rebase: bool = False,
+    no_diff: bool = False,
 ) -> int:
     """
     Process the queue with X workers and evaluate results
 ) -> int:
     """
     Process the queue with X workers and evaluate results
@@ -242,20 +328,34 @@ async def process_queue(
 
     config, queue = await load_projects_queue(Path(config_file))
     project_count = queue.qsize()
 
     config, queue = await load_projects_queue(Path(config_file))
     project_count = queue.qsize()
-    LOG.info(f"{project_count} projects to run black over")
-    if not project_count:
+    s = "" if project_count == 1 else "s"
+    LOG.info(f"{project_count} project{s} to run Black over")
+    if project_count < 1:
         return -1
 
         return -1
 
-    LOG.debug(f"Using {workers} parallel workers to run black")
+    s = "" if workers == 1 else "s"
+    LOG.debug(f"Using {workers} parallel worker{s} to run Black")
     # Wait until we finish running all the projects before analyzing
     await asyncio.gather(
         *[
             project_runner(
     # Wait until we finish running all the projects before analyzing
     await asyncio.gather(
         *[
             project_runner(
-                i, config, queue, work_path, results, long_checkouts, rebase, keep
+                i,
+                config,
+                queue,
+                work_path,
+                results,
+                long_checkouts,
+                rebase,
+                keep,
+                no_diff,
             )
             for i in range(workers)
         ]
     )
 
     LOG.info("Analyzing results")
             )
             for i in range(workers)
         ]
     )
 
     LOG.info("Analyzing results")
-    return await analyze_results(project_count, results)
+    return analyze_results(project_count, results)
+
+
+if __name__ == "__main__":  # pragma: nocover
+    raise NotImplementedError("lib is a library, funnily enough.")