src/black_primer/lib.py

   1 #!/usr/bin/env python3
   2
   3 import asyncio
   4 import errno
   5 import json
   6 import logging
   7 import os
   8 import stat
   9 import sys
  10 from functools import partial
  11 from pathlib import Path
  12 from platform import system
  13 from shutil import rmtree, which
  14 from subprocess import CalledProcessError
  15 from sys import version_info
  16 from typing import Any, Callable, Dict, NamedTuple, Optional, Sequence, Tuple
  17 from urllib.parse import urlparse
  18
  19 import click
  20
  21
  22 WINDOWS = system() == "Windows"
  23 BLACK_BINARY = "black.exe" if WINDOWS else "black"
  24 GIT_BIANRY = "git.exe" if WINDOWS else "git"
  25 LOG = logging.getLogger(__name__)
  26
  27
  28 # Windows needs a ProactorEventLoop if you want to exec subprocesses
  29 # Starting with 3.8 this is the default - can remove when Black >= 3.8
  30 # mypy only respects sys.platform if directly in the evaluation
  31 # https://mypy.readthedocs.io/en/latest/common_issues.html#python-version-and-system-platform-checks  # noqa: B950
  32 if sys.platform == "win32":
  33     asyncio.set_event_loop(asyncio.ProactorEventLoop())
  34
  35
  36 class Results(NamedTuple):
  37     stats: Dict[str, int] = {}
  38     failed_projects: Dict[str, CalledProcessError] = {}
  39
  40
  41 async def _gen_check_output(
  42     cmd: Sequence[str],
  43     timeout: float = 300,
  44     env: Optional[Dict[str, str]] = None,
  45     cwd: Optional[Path] = None,
  46 ) -> Tuple[bytes, bytes]:
  47     process = await asyncio.create_subprocess_exec(
  48         *cmd,
  49         stdout=asyncio.subprocess.PIPE,
  50         stderr=asyncio.subprocess.STDOUT,
  51         env=env,
  52         cwd=cwd,
  53     )
  54     try:
  55         (stdout, stderr) = await asyncio.wait_for(process.communicate(), timeout)
  56     except asyncio.TimeoutError:
  57         process.kill()
  58         await process.wait()
  59         raise
  60
  61     if process.returncode != 0:
  62         cmd_str = " ".join(cmd)
  63         raise CalledProcessError(
  64             process.returncode, cmd_str, output=stdout, stderr=stderr
  65         )
  66
  67     return (stdout, stderr)
  68
  69
  70 def analyze_results(project_count: int, results: Results) -> int:
  71     failed_pct = round(((results.stats["failed"] / project_count) * 100), 2)
  72     success_pct = round(((results.stats["success"] / project_count) * 100), 2)
  73
  74     click.secho("-- primer results 📊 --\n", bold=True)
  75     click.secho(
  76         f"{results.stats['success']} / {project_count} succeeded ({success_pct}%) ✅",
  77         bold=True,
  78         fg="green",
  79     )
  80     click.secho(
  81         f"{results.stats['failed']} / {project_count} FAILED ({failed_pct}%) 💩",
  82         bold=bool(results.stats["failed"]),
  83         fg="red",
  84     )
  85     s = "" if results.stats["disabled"] == 1 else "s"
  86     click.echo(f" - {results.stats['disabled']} project{s} disabled by config")
  87     s = "" if results.stats["wrong_py_ver"] == 1 else "s"
  88     click.echo(
  89         f" - {results.stats['wrong_py_ver']} project{s} skipped due to Python version"
  90     )
  91     click.echo(
  92         f" - {results.stats['skipped_long_checkout']} skipped due to long checkout"
  93     )
  94
  95     if results.failed_projects:
  96         click.secho("\nFailed projects:\n", bold=True)
  97
  98     for project_name, project_cpe in results.failed_projects.items():
  99         print(f"## {project_name}:")
 100         print(f" - Returned {project_cpe.returncode}")
 101         if project_cpe.stderr:
 102             print(f" - stderr:\n{project_cpe.stderr.decode('utf8')}")
 103         if project_cpe.stdout:
 104             print(f" - stdout:\n{project_cpe.stdout.decode('utf8')}")
 105         print("")
 106
 107     return results.stats["failed"]
 108
 109
 110 async def black_run(
 111     repo_path: Path, project_config: Dict[str, Any], results: Results
 112 ) -> None:
 113     """Run Black and record failures"""
 114     cmd = [str(which(BLACK_BINARY))]
 115     if "cli_arguments" in project_config and project_config["cli_arguments"]:
 116         cmd.extend(*project_config["cli_arguments"])
 117     cmd.extend(["--check", "--diff", "."])
 118
 119     try:
 120         _stdout, _stderr = await _gen_check_output(cmd, cwd=repo_path)
 121     except asyncio.TimeoutError:
 122         results.stats["failed"] += 1
 123         LOG.error(f"Running black for {repo_path} timed out ({cmd})")
 124     except CalledProcessError as cpe:
 125         # TODO: Tune for smarter for higher signal
 126         # If any other return value than 1 we raise - can disable project in config
 127         if cpe.returncode == 1:
 128             if not project_config["expect_formatting_changes"]:
 129                 results.stats["failed"] += 1
 130                 results.failed_projects[repo_path.name] = cpe
 131             else:
 132                 results.stats["success"] += 1
 133             return
 134         elif cpe.returncode > 1:
 135             results.stats["failed"] += 1
 136             results.failed_projects[repo_path.name] = cpe
 137             return
 138
 139         LOG.error(f"Unknown error with {repo_path}")
 140         raise
 141
 142     # If we get here and expect formatting changes something is up
 143     if project_config["expect_formatting_changes"]:
 144         results.stats["failed"] += 1
 145         results.failed_projects[repo_path.name] = CalledProcessError(
 146             0, cmd, b"Expected formatting changes but didn't get any!", b""
 147         )
 148         return
 149
 150     results.stats["success"] += 1
 151
 152
 153 async def git_checkout_or_rebase(
 154     work_path: Path,
 155     project_config: Dict[str, Any],
 156     rebase: bool = False,
 157     *,
 158     depth: int = 1,
 159 ) -> Optional[Path]:
 160     """git Clone project or rebase"""
 161     git_bin = str(which(GIT_BIANRY))
 162     if not git_bin:
 163         LOG.error("No git binary found")
 164         return None
 165
 166     repo_url_parts = urlparse(project_config["git_clone_url"])
 167     path_parts = repo_url_parts.path[1:].split("/", maxsplit=1)
 168
 169     repo_path: Path = work_path / path_parts[1].replace(".git", "")
 170     cmd = [git_bin, "clone", "--depth", str(depth), project_config["git_clone_url"]]
 171     cwd = work_path
 172     if repo_path.exists() and rebase:
 173         cmd = [git_bin, "pull", "--rebase"]
 174         cwd = repo_path
 175     elif repo_path.exists():
 176         return repo_path
 177
 178     try:
 179         _stdout, _stderr = await _gen_check_output(cmd, cwd=cwd)
 180     except (asyncio.TimeoutError, CalledProcessError) as e:
 181         LOG.error(f"Unable to git clone / pull {project_config['git_clone_url']}: {e}")
 182         return None
 183
 184     return repo_path
 185
 186
 187 def handle_PermissionError(
 188     func: Callable, path: Path, exc: Tuple[Any, Any, Any]
 189 ) -> None:
 190     """
 191     Handle PermissionError during shutil.rmtree.
 192
 193     This checks if the erroring function is either 'os.rmdir' or 'os.unlink', and that
 194     the error was EACCES (i.e. Permission denied). If true, the path is set writable,
 195     readable, and executable by everyone. Finally, it tries the error causing delete
 196     operation again.
 197
 198     If the check is false, then the original error will be reraised as this function
 199     can't handle it.
 200     """
 201     excvalue = exc[1]
 202     LOG.debug(f"Handling {excvalue} from {func.__name__}... ")
 203     if func in (os.rmdir, os.unlink) and excvalue.errno == errno.EACCES:
 204         LOG.debug(f"Setting {path} writable, readable, and executable by everyone... ")
 205         os.chmod(path, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO)  # chmod 0777
 206         func(path)  # Try the error causing delete operation again
 207     else:
 208         raise
 209
 210
 211 async def load_projects_queue(
 212     config_path: Path,
 213 ) -> Tuple[Dict[str, Any], asyncio.Queue]:
 214     """Load project config and fill queue with all the project names"""
 215     with config_path.open("r") as cfp:
 216         config = json.load(cfp)
 217
 218     # TODO: Offer more options here
 219     # e.g. Run on X random packages or specific sub list etc.
 220     project_names = sorted(config["projects"].keys())
 221     queue: asyncio.Queue = asyncio.Queue(maxsize=len(project_names))
 222     for project in project_names:
 223         await queue.put(project)
 224
 225     return config, queue
 226
 227
 228 async def project_runner(
 229     idx: int,
 230     config: Dict[str, Any],
 231     queue: asyncio.Queue,
 232     work_path: Path,
 233     results: Results,
 234     long_checkouts: bool = False,
 235     rebase: bool = False,
 236     keep: bool = False,
 237 ) -> None:
 238     """Check out project and run Black on it + record result"""
 239     loop = asyncio.get_event_loop()
 240     py_version = f"{version_info[0]}.{version_info[1]}"
 241     while True:
 242         try:
 243             project_name = queue.get_nowait()
 244         except asyncio.QueueEmpty:
 245             LOG.debug(f"project_runner {idx} exiting")
 246             return
 247         LOG.debug(f"worker {idx} working on {project_name}")
 248
 249         project_config = config["projects"][project_name]
 250
 251         # Check if disabled by config
 252         if "disabled" in project_config and project_config["disabled"]:
 253             results.stats["disabled"] += 1
 254             LOG.info(f"Skipping {project_name} as it's disabled via config")
 255             continue
 256
 257         # Check if we should run on this version of Python
 258         if (
 259             "all" not in project_config["py_versions"]
 260             and py_version not in project_config["py_versions"]
 261         ):
 262             results.stats["wrong_py_ver"] += 1
 263             LOG.debug(f"Skipping {project_name} as it's not enabled for {py_version}")
 264             continue
 265
 266         # Check if we're doing big projects / long checkouts
 267         if not long_checkouts and project_config["long_checkout"]:
 268             results.stats["skipped_long_checkout"] += 1
 269             LOG.debug(f"Skipping {project_name} as it's configured as a long checkout")
 270             continue
 271
 272         repo_path = await git_checkout_or_rebase(work_path, project_config, rebase)
 273         if not repo_path:
 274             continue
 275         await black_run(repo_path, project_config, results)
 276
 277         if not keep:
 278             LOG.debug(f"Removing {repo_path}")
 279             rmtree_partial = partial(
 280                 rmtree, path=repo_path, onerror=handle_PermissionError
 281             )
 282             await loop.run_in_executor(None, rmtree_partial)
 283
 284         LOG.info(f"Finished {project_name}")
 285
 286
 287 async def process_queue(
 288     config_file: str,
 289     work_path: Path,
 290     workers: int,
 291     keep: bool = False,
 292     long_checkouts: bool = False,
 293     rebase: bool = False,
 294 ) -> int:
 295     """
 296     Process the queue with X workers and evaluate results
 297     - Success is guaged via the config "expect_formatting_changes"
 298
 299     Integer return equals the number of failed projects
 300     """
 301     results = Results()
 302     results.stats["disabled"] = 0
 303     results.stats["failed"] = 0
 304     results.stats["skipped_long_checkout"] = 0
 305     results.stats["success"] = 0
 306     results.stats["wrong_py_ver"] = 0
 307
 308     config, queue = await load_projects_queue(Path(config_file))
 309     project_count = queue.qsize()
 310     s = "" if project_count == 1 else "s"
 311     LOG.info(f"{project_count} project{s} to run Black over")
 312     if project_count < 1:
 313         return -1
 314
 315     s = "" if workers == 1 else "s"
 316     LOG.debug(f"Using {workers} parallel worker{s} to run Black")
 317     # Wait until we finish running all the projects before analyzing
 318     await asyncio.gather(
 319         *[
 320             project_runner(
 321                 i, config, queue, work_path, results, long_checkouts, rebase, keep
 322             )
 323             for i in range(workers)
 324         ]
 325     )
 326
 327     LOG.info("Analyzing results")
 328     return analyze_results(project_count, results)
 329
 330
 331 if __name__ == "__main__":  # pragma: nocover
 332     raise NotImplementedError("lib is a library, funnily enough.")