src/black_primer/lib.py

   1 #!/usr/bin/env python3
   2
   3 import asyncio
   4 import errno
   5 import json
   6 import logging
   7 import os
   8 import stat
   9 import sys
  10 from functools import partial
  11 from pathlib import Path
  12 from platform import system
  13 from shutil import rmtree, which
  14 from subprocess import CalledProcessError
  15 from sys import version_info
  16 from typing import Any, Callable, Dict, NamedTuple, Optional, Sequence, Tuple
  17 from urllib.parse import urlparse
  18
  19 import click
  20
  21
  22 WINDOWS = system() == "Windows"
  23 BLACK_BINARY = "black.exe" if WINDOWS else "black"
  24 GIT_BIANRY = "git.exe" if WINDOWS else "git"
  25 LOG = logging.getLogger(__name__)
  26
  27
  28 # Windows needs a ProactorEventLoop if you want to exec subprocesses
  29 # Starting with 3.8 this is the default - can remove when Black >= 3.8
  30 # mypy only respects sys.platform if directly in the evaluation
  31 # https://mypy.readthedocs.io/en/latest/common_issues.html#python-version-and-system-platform-checks  # noqa: B950
  32 if sys.platform == "win32":
  33     asyncio.set_event_loop(asyncio.ProactorEventLoop())
  34
  35
  36 class Results(NamedTuple):
  37     stats: Dict[str, int] = {}
  38     failed_projects: Dict[str, CalledProcessError] = {}
  39
  40
  41 async def _gen_check_output(
  42     cmd: Sequence[str],
  43     timeout: float = 300,
  44     env: Optional[Dict[str, str]] = None,
  45     cwd: Optional[Path] = None,
  46 ) -> Tuple[bytes, bytes]:
  47     process = await asyncio.create_subprocess_exec(
  48         *cmd,
  49         stdout=asyncio.subprocess.PIPE,
  50         stderr=asyncio.subprocess.STDOUT,
  51         env=env,
  52         cwd=cwd,
  53     )
  54     try:
  55         (stdout, stderr) = await asyncio.wait_for(process.communicate(), timeout)
  56     except asyncio.TimeoutError:
  57         process.kill()
  58         await process.wait()
  59         raise
  60
  61     if process.returncode != 0:
  62         returncode = process.returncode
  63         if returncode is None:
  64             returncode = 69
  65
  66         cmd_str = " ".join(cmd)
  67         raise CalledProcessError(returncode, cmd_str, output=stdout, stderr=stderr)
  68
  69     return (stdout, stderr)
  70
  71
  72 def analyze_results(project_count: int, results: Results) -> int:
  73     failed_pct = round(((results.stats["failed"] / project_count) * 100), 2)
  74     success_pct = round(((results.stats["success"] / project_count) * 100), 2)
  75
  76     click.secho("-- primer results 📊 --\n", bold=True)
  77     click.secho(
  78         f"{results.stats['success']} / {project_count} succeeded ({success_pct}%) ✅",
  79         bold=True,
  80         fg="green",
  81     )
  82     click.secho(
  83         f"{results.stats['failed']} / {project_count} FAILED ({failed_pct}%) 💩",
  84         bold=bool(results.stats["failed"]),
  85         fg="red",
  86     )
  87     s = "" if results.stats["disabled"] == 1 else "s"
  88     click.echo(f" - {results.stats['disabled']} project{s} disabled by config")
  89     s = "" if results.stats["wrong_py_ver"] == 1 else "s"
  90     click.echo(
  91         f" - {results.stats['wrong_py_ver']} project{s} skipped due to Python version"
  92     )
  93     click.echo(
  94         f" - {results.stats['skipped_long_checkout']} skipped due to long checkout"
  95     )
  96
  97     if results.failed_projects:
  98         click.secho("\nFailed projects:\n", bold=True)
  99
 100     for project_name, project_cpe in results.failed_projects.items():
 101         print(f"## {project_name}:")
 102         print(f" - Returned {project_cpe.returncode}")
 103         if project_cpe.stderr:
 104             print(f" - stderr:\n{project_cpe.stderr.decode('utf8')}")
 105         if project_cpe.stdout:
 106             print(f" - stdout:\n{project_cpe.stdout.decode('utf8')}")
 107         print("")
 108
 109     return results.stats["failed"]
 110
 111
 112 async def black_run(
 113     repo_path: Path, project_config: Dict[str, Any], results: Results
 114 ) -> None:
 115     """Run Black and record failures"""
 116     cmd = [str(which(BLACK_BINARY))]
 117     if "cli_arguments" in project_config and project_config["cli_arguments"]:
 118         cmd.extend(*project_config["cli_arguments"])
 119     cmd.extend(["--check", "--diff", "."])
 120
 121     try:
 122         _stdout, _stderr = await _gen_check_output(cmd, cwd=repo_path)
 123     except asyncio.TimeoutError:
 124         results.stats["failed"] += 1
 125         LOG.error(f"Running black for {repo_path} timed out ({cmd})")
 126     except CalledProcessError as cpe:
 127         # TODO: Tune for smarter for higher signal
 128         # If any other return value than 1 we raise - can disable project in config
 129         if cpe.returncode == 1:
 130             if not project_config["expect_formatting_changes"]:
 131                 results.stats["failed"] += 1
 132                 results.failed_projects[repo_path.name] = cpe
 133             else:
 134                 results.stats["success"] += 1
 135             return
 136         elif cpe.returncode > 1:
 137             results.stats["failed"] += 1
 138             results.failed_projects[repo_path.name] = cpe
 139             return
 140
 141         LOG.error(f"Unknown error with {repo_path}")
 142         raise
 143
 144     # If we get here and expect formatting changes something is up
 145     if project_config["expect_formatting_changes"]:
 146         results.stats["failed"] += 1
 147         results.failed_projects[repo_path.name] = CalledProcessError(
 148             0, cmd, b"Expected formatting changes but didn't get any!", b""
 149         )
 150         return
 151
 152     results.stats["success"] += 1
 153
 154
 155 async def git_checkout_or_rebase(
 156     work_path: Path,
 157     project_config: Dict[str, Any],
 158     rebase: bool = False,
 159     *,
 160     depth: int = 1,
 161 ) -> Optional[Path]:
 162     """git Clone project or rebase"""
 163     git_bin = str(which(GIT_BIANRY))
 164     if not git_bin:
 165         LOG.error("No git binary found")
 166         return None
 167
 168     repo_url_parts = urlparse(project_config["git_clone_url"])
 169     path_parts = repo_url_parts.path[1:].split("/", maxsplit=1)
 170
 171     repo_path: Path = work_path / path_parts[1].replace(".git", "")
 172     cmd = [git_bin, "clone", "--depth", str(depth), project_config["git_clone_url"]]
 173     cwd = work_path
 174     if repo_path.exists() and rebase:
 175         cmd = [git_bin, "pull", "--rebase"]
 176         cwd = repo_path
 177     elif repo_path.exists():
 178         return repo_path
 179
 180     try:
 181         _stdout, _stderr = await _gen_check_output(cmd, cwd=cwd)
 182     except (asyncio.TimeoutError, CalledProcessError) as e:
 183         LOG.error(f"Unable to git clone / pull {project_config['git_clone_url']}: {e}")
 184         return None
 185
 186     return repo_path
 187
 188
 189 def handle_PermissionError(
 190     func: Callable, path: Path, exc: Tuple[Any, Any, Any]
 191 ) -> None:
 192     """
 193     Handle PermissionError during shutil.rmtree.
 194
 195     This checks if the erroring function is either 'os.rmdir' or 'os.unlink', and that
 196     the error was EACCES (i.e. Permission denied). If true, the path is set writable,
 197     readable, and executable by everyone. Finally, it tries the error causing delete
 198     operation again.
 199
 200     If the check is false, then the original error will be reraised as this function
 201     can't handle it.
 202     """
 203     excvalue = exc[1]
 204     LOG.debug(f"Handling {excvalue} from {func.__name__}... ")
 205     if func in (os.rmdir, os.unlink) and excvalue.errno == errno.EACCES:
 206         LOG.debug(f"Setting {path} writable, readable, and executable by everyone... ")
 207         os.chmod(path, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO)  # chmod 0777
 208         func(path)  # Try the error causing delete operation again
 209     else:
 210         raise
 211
 212
 213 async def load_projects_queue(
 214     config_path: Path,
 215 ) -> Tuple[Dict[str, Any], asyncio.Queue]:
 216     """Load project config and fill queue with all the project names"""
 217     with config_path.open("r") as cfp:
 218         config = json.load(cfp)
 219
 220     # TODO: Offer more options here
 221     # e.g. Run on X random packages or specific sub list etc.
 222     project_names = sorted(config["projects"].keys())
 223     queue: asyncio.Queue = asyncio.Queue(maxsize=len(project_names))
 224     for project in project_names:
 225         await queue.put(project)
 226
 227     return config, queue
 228
 229
 230 async def project_runner(
 231     idx: int,
 232     config: Dict[str, Any],
 233     queue: asyncio.Queue,
 234     work_path: Path,
 235     results: Results,
 236     long_checkouts: bool = False,
 237     rebase: bool = False,
 238     keep: bool = False,
 239 ) -> None:
 240     """Check out project and run Black on it + record result"""
 241     loop = asyncio.get_event_loop()
 242     py_version = f"{version_info[0]}.{version_info[1]}"
 243     while True:
 244         try:
 245             project_name = queue.get_nowait()
 246         except asyncio.QueueEmpty:
 247             LOG.debug(f"project_runner {idx} exiting")
 248             return
 249         LOG.debug(f"worker {idx} working on {project_name}")
 250
 251         project_config = config["projects"][project_name]
 252
 253         # Check if disabled by config
 254         if "disabled" in project_config and project_config["disabled"]:
 255             results.stats["disabled"] += 1
 256             LOG.info(f"Skipping {project_name} as it's disabled via config")
 257             continue
 258
 259         # Check if we should run on this version of Python
 260         if (
 261             "all" not in project_config["py_versions"]
 262             and py_version not in project_config["py_versions"]
 263         ):
 264             results.stats["wrong_py_ver"] += 1
 265             LOG.debug(f"Skipping {project_name} as it's not enabled for {py_version}")
 266             continue
 267
 268         # Check if we're doing big projects / long checkouts
 269         if not long_checkouts and project_config["long_checkout"]:
 270             results.stats["skipped_long_checkout"] += 1
 271             LOG.debug(f"Skipping {project_name} as it's configured as a long checkout")
 272             continue
 273
 274         repo_path = await git_checkout_or_rebase(work_path, project_config, rebase)
 275         if not repo_path:
 276             continue
 277         await black_run(repo_path, project_config, results)
 278
 279         if not keep:
 280             LOG.debug(f"Removing {repo_path}")
 281             rmtree_partial = partial(
 282                 rmtree, path=repo_path, onerror=handle_PermissionError
 283             )
 284             await loop.run_in_executor(None, rmtree_partial)
 285
 286         LOG.info(f"Finished {project_name}")
 287
 288
 289 async def process_queue(
 290     config_file: str,
 291     work_path: Path,
 292     workers: int,
 293     keep: bool = False,
 294     long_checkouts: bool = False,
 295     rebase: bool = False,
 296 ) -> int:
 297     """
 298     Process the queue with X workers and evaluate results
 299     - Success is guaged via the config "expect_formatting_changes"
 300
 301     Integer return equals the number of failed projects
 302     """
 303     results = Results()
 304     results.stats["disabled"] = 0
 305     results.stats["failed"] = 0
 306     results.stats["skipped_long_checkout"] = 0
 307     results.stats["success"] = 0
 308     results.stats["wrong_py_ver"] = 0
 309
 310     config, queue = await load_projects_queue(Path(config_file))
 311     project_count = queue.qsize()
 312     s = "" if project_count == 1 else "s"
 313     LOG.info(f"{project_count} project{s} to run Black over")
 314     if project_count < 1:
 315         return -1
 316
 317     s = "" if workers == 1 else "s"
 318     LOG.debug(f"Using {workers} parallel worker{s} to run Black")
 319     # Wait until we finish running all the projects before analyzing
 320     await asyncio.gather(
 321         *[
 322             project_runner(
 323                 i, config, queue, work_path, results, long_checkouts, rebase, keep
 324             )
 325             for i in range(workers)
 326         ]
 327     )
 328
 329     LOG.info("Analyzing results")
 330     return analyze_results(project_count, results)
 331
 332
 333 if __name__ == "__main__":  # pragma: nocover
 334     raise NotImplementedError("lib is a library, funnily enough.")