src/black_primer/lib.py

   1 #!/usr/bin/env python3
   2
   3 import asyncio
   4 import json
   5 import logging
   6 import sys
   7 from pathlib import Path
   8 from platform import system
   9 from shutil import rmtree, which
  10 from subprocess import CalledProcessError
  11 from sys import version_info
  12 from typing import Any, Dict, NamedTuple, Optional, Sequence, Tuple
  13 from urllib.parse import urlparse
  14
  15 import click
  16
  17
  18 WINDOWS = system() == "Windows"
  19 BLACK_BINARY = "black.exe" if WINDOWS else "black"
  20 GIT_BIANRY = "git.exe" if WINDOWS else "git"
  21 LOG = logging.getLogger(__name__)
  22
  23
  24 # Windows needs a ProactorEventLoop if you want to exec subprocesses
  25 # Starting with 3.8 this is the default - can remove when Black >= 3.8
  26 # mypy only respects sys.platform if directly in the evaluation
  27 # https://mypy.readthedocs.io/en/latest/common_issues.html#python-version-and-system-platform-checks  # noqa: B950
  28 if sys.platform == "win32":
  29     asyncio.set_event_loop(asyncio.ProactorEventLoop())
  30
  31
  32 class Results(NamedTuple):
  33     stats: Dict[str, int] = {}
  34     failed_projects: Dict[str, CalledProcessError] = {}
  35
  36
  37 async def _gen_check_output(
  38     cmd: Sequence[str],
  39     timeout: float = 30,
  40     env: Optional[Dict[str, str]] = None,
  41     cwd: Optional[Path] = None,
  42 ) -> Tuple[bytes, bytes]:
  43     process = await asyncio.create_subprocess_exec(
  44         *cmd,
  45         stdout=asyncio.subprocess.PIPE,
  46         stderr=asyncio.subprocess.STDOUT,
  47         env=env,
  48         cwd=cwd,
  49     )
  50     try:
  51         (stdout, stderr) = await asyncio.wait_for(process.communicate(), timeout)
  52     except asyncio.TimeoutError:
  53         process.kill()
  54         await process.wait()
  55         raise
  56
  57     if process.returncode != 0:
  58         cmd_str = " ".join(cmd)
  59         raise CalledProcessError(
  60             process.returncode, cmd_str, output=stdout, stderr=stderr
  61         )
  62
  63     return (stdout, stderr)
  64
  65
  66 def analyze_results(project_count: int, results: Results) -> int:
  67     failed_pct = round(((results.stats["failed"] / project_count) * 100), 2)
  68     success_pct = round(((results.stats["success"] / project_count) * 100), 2)
  69
  70     click.secho("-- primer results 📊 --\n", bold=True)
  71     click.secho(
  72         f"{results.stats['success']} / {project_count} succeeded ({success_pct}%) ✅",
  73         bold=True,
  74         fg="green",
  75     )
  76     click.secho(
  77         f"{results.stats['failed']} / {project_count} FAILED ({failed_pct}%) 💩",
  78         bold=bool(results.stats["failed"]),
  79         fg="red",
  80     )
  81     s = "" if results.stats["disabled"] == 1 else "s"
  82     click.echo(f" - {results.stats['disabled']} project{s} disabled by config")
  83     s = "" if results.stats["wrong_py_ver"] == 1 else "s"
  84     click.echo(
  85         f" - {results.stats['wrong_py_ver']} project{s} skipped due to Python version"
  86     )
  87     click.echo(
  88         f" - {results.stats['skipped_long_checkout']} skipped due to long checkout"
  89     )
  90
  91     if results.failed_projects:
  92         click.secho("\nFailed projects:\n", bold=True)
  93
  94     for project_name, project_cpe in results.failed_projects.items():
  95         print(f"## {project_name}:")
  96         print(f" - Returned {project_cpe.returncode}")
  97         if project_cpe.stderr:
  98             print(f" - stderr:\n{project_cpe.stderr.decode('utf8')}")
  99         if project_cpe.stdout:
 100             print(f" - stdout:\n{project_cpe.stdout.decode('utf8')}")
 101         print("")
 102
 103     return results.stats["failed"]
 104
 105
 106 async def black_run(
 107     repo_path: Path, project_config: Dict[str, Any], results: Results
 108 ) -> None:
 109     """Run Black and record failures"""
 110     cmd = [str(which(BLACK_BINARY))]
 111     if "cli_arguments" in project_config and project_config["cli_arguments"]:
 112         cmd.extend(*project_config["cli_arguments"])
 113     cmd.extend(["--check", "--diff", "."])
 114
 115     try:
 116         _stdout, _stderr = await _gen_check_output(cmd, cwd=repo_path)
 117     except asyncio.TimeoutError:
 118         results.stats["failed"] += 1
 119         LOG.error(f"Running black for {repo_path} timed out ({cmd})")
 120     except CalledProcessError as cpe:
 121         # TODO: Tune for smarter for higher signal
 122         # If any other reutrn value than 1 we raise - can disable project in config
 123         if cpe.returncode == 1:
 124             if not project_config["expect_formatting_changes"]:
 125                 results.stats["failed"] += 1
 126                 results.failed_projects[repo_path.name] = cpe
 127             else:
 128                 results.stats["success"] += 1
 129             return
 130
 131         LOG.error(f"Unkown error with {repo_path}")
 132         raise
 133
 134     # If we get here and expect formatting changes something is up
 135     if project_config["expect_formatting_changes"]:
 136         results.stats["failed"] += 1
 137         results.failed_projects[repo_path.name] = CalledProcessError(
 138             0, cmd, b"Expected formatting changes but didn't get any!", b""
 139         )
 140         return
 141
 142     results.stats["success"] += 1
 143
 144
 145 async def git_checkout_or_rebase(
 146     work_path: Path,
 147     project_config: Dict[str, Any],
 148     rebase: bool = False,
 149     *,
 150     depth: int = 1,
 151 ) -> Optional[Path]:
 152     """git Clone project or rebase"""
 153     git_bin = str(which(GIT_BIANRY))
 154     if not git_bin:
 155         LOG.error("No git binary found")
 156         return None
 157
 158     repo_url_parts = urlparse(project_config["git_clone_url"])
 159     path_parts = repo_url_parts.path[1:].split("/", maxsplit=1)
 160
 161     repo_path: Path = work_path / path_parts[1].replace(".git", "")
 162     cmd = [git_bin, "clone", "--depth", str(depth), project_config["git_clone_url"]]
 163     cwd = work_path
 164     if repo_path.exists() and rebase:
 165         cmd = [git_bin, "pull", "--rebase"]
 166         cwd = repo_path
 167     elif repo_path.exists():
 168         return repo_path
 169
 170     try:
 171         _stdout, _stderr = await _gen_check_output(cmd, cwd=cwd)
 172     except (asyncio.TimeoutError, CalledProcessError) as e:
 173         LOG.error(f"Unable to git clone / pull {project_config['git_clone_url']}: {e}")
 174         return None
 175
 176     return repo_path
 177
 178
 179 async def load_projects_queue(
 180     config_path: Path,
 181 ) -> Tuple[Dict[str, Any], asyncio.Queue]:
 182     """Load project config and fill queue with all the project names"""
 183     with config_path.open("r") as cfp:
 184         config = json.load(cfp)
 185
 186     # TODO: Offer more options here
 187     # e.g. Run on X random packages or specific sub list etc.
 188     project_names = sorted(config["projects"].keys())
 189     queue: asyncio.Queue = asyncio.Queue(maxsize=len(project_names))
 190     for project in project_names:
 191         await queue.put(project)
 192
 193     return config, queue
 194
 195
 196 async def project_runner(
 197     idx: int,
 198     config: Dict[str, Any],
 199     queue: asyncio.Queue,
 200     work_path: Path,
 201     results: Results,
 202     long_checkouts: bool = False,
 203     rebase: bool = False,
 204     keep: bool = False,
 205 ) -> None:
 206     """Check out project and run Black on it + record result"""
 207     loop = asyncio.get_event_loop()
 208     py_version = f"{version_info[0]}.{version_info[1]}"
 209     while True:
 210         try:
 211             project_name = queue.get_nowait()
 212         except asyncio.QueueEmpty:
 213             LOG.debug(f"project_runner {idx} exiting")
 214             return
 215
 216         project_config = config["projects"][project_name]
 217
 218         # Check if disabled by config
 219         if "disabled" in project_config and project_config["disabled"]:
 220             results.stats["disabled"] += 1
 221             LOG.info(f"Skipping {project_name} as it's disabled via config")
 222             continue
 223
 224         # Check if we should run on this version of Python
 225         if (
 226             "all" not in project_config["py_versions"]
 227             and py_version not in project_config["py_versions"]
 228         ):
 229             results.stats["wrong_py_ver"] += 1
 230             LOG.debug(f"Skipping {project_name} as it's not enabled for {py_version}")
 231             continue
 232
 233         # Check if we're doing big projects / long checkouts
 234         if not long_checkouts and project_config["long_checkout"]:
 235             results.stats["skipped_long_checkout"] += 1
 236             LOG.debug(f"Skipping {project_name} as it's configured as a long checkout")
 237             continue
 238
 239         repo_path = await git_checkout_or_rebase(work_path, project_config, rebase)
 240         if not repo_path:
 241             continue
 242         await black_run(repo_path, project_config, results)
 243
 244         if not keep:
 245             LOG.debug(f"Removing {repo_path}")
 246             await loop.run_in_executor(None, rmtree, repo_path)
 247
 248
 249 async def process_queue(
 250     config_file: str,
 251     work_path: Path,
 252     workers: int,
 253     keep: bool = False,
 254     long_checkouts: bool = False,
 255     rebase: bool = False,
 256 ) -> int:
 257     """
 258     Process the queue with X workers and evaluate results
 259     - Success is guaged via the config "expect_formatting_changes"
 260
 261     Integer return equals the number of failed projects
 262     """
 263     results = Results()
 264     results.stats["disabled"] = 0
 265     results.stats["failed"] = 0
 266     results.stats["skipped_long_checkout"] = 0
 267     results.stats["success"] = 0
 268     results.stats["wrong_py_ver"] = 0
 269
 270     config, queue = await load_projects_queue(Path(config_file))
 271     project_count = queue.qsize()
 272     s = "" if project_count == 1 else "s"
 273     LOG.info(f"{project_count} project{s} to run Black over")
 274     if project_count < 1:
 275         return -1
 276
 277     s = "" if workers == 1 else "s"
 278     LOG.debug(f"Using {workers} parallel worker{s} to run Black")
 279     # Wait until we finish running all the projects before analyzing
 280     await asyncio.gather(
 281         *[
 282             project_runner(
 283                 i, config, queue, work_path, results, long_checkouts, rebase, keep
 284             )
 285             for i in range(workers)
 286         ]
 287     )
 288
 289     LOG.info("Analyzing results")
 290     return analyze_results(project_count, results)
 291
 292
 293 if __name__ == "__main__":  # pragma: nocover
 294     raise NotImplementedError("lib is a library, funnily enough.")