src/black_primer/lib.py

   1 #!/usr/bin/env python3
   2
   3 # Module '__future__' has no attribute 'annotations'
   4 from __future__ import annotations  # type: ignore
   5
   6 import asyncio
   7 import json
   8 import logging
   9 from pathlib import Path
  10 from shutil import rmtree, which
  11 from subprocess import CalledProcessError
  12 from sys import version_info
  13 from typing import Any, Dict, NamedTuple, Optional, Sequence, Tuple
  14 from urllib.parse import urlparse
  15
  16 import click
  17
  18
  19 LOG = logging.getLogger(__name__)
  20
  21
  22 class Results(NamedTuple):
  23     stats: Dict[str, int] = {}
  24     failed_projects: Dict[str, CalledProcessError] = {}
  25
  26
  27 async def _gen_check_output(
  28     cmd: Sequence[str],
  29     timeout: float = 30,
  30     env: Optional[Dict[str, str]] = None,
  31     cwd: Optional[Path] = None,
  32 ) -> Tuple[bytes, bytes]:
  33     process = await asyncio.create_subprocess_exec(
  34         *cmd,
  35         stdout=asyncio.subprocess.PIPE,
  36         stderr=asyncio.subprocess.STDOUT,
  37         env=env,
  38         cwd=cwd,
  39     )
  40     try:
  41         (stdout, stderr) = await asyncio.wait_for(process.communicate(), timeout)
  42     except asyncio.TimeoutError:
  43         process.kill()
  44         await process.wait()
  45         raise
  46
  47     if process.returncode != 0:
  48         cmd_str = " ".join(cmd)
  49         raise CalledProcessError(
  50             process.returncode, cmd_str, output=stdout, stderr=stderr
  51         )
  52
  53     return (stdout, stderr)
  54
  55
  56 async def analyze_results(project_count: int, results: Results) -> int:
  57     failed_pct = round(((results.stats["failed"] / project_count) * 100), 2)
  58     success_pct = round(((results.stats["success"] / project_count) * 100), 2)
  59
  60     click.secho(f"-- primer results 📊 --\n", bold=True)
  61     click.secho(
  62         f"{results.stats['success']} / {project_count} succeeded ({success_pct}%) ✅",
  63         bold=True,
  64         fg="green",
  65     )
  66     click.secho(
  67         f"{results.stats['failed']} / {project_count} FAILED ({failed_pct}%) 💩",
  68         bold=bool(results.stats["failed"]),
  69         fg="red",
  70     )
  71     click.echo(f" - {results.stats['disabled']} projects Disabled by config")
  72     click.echo(
  73         f" - {results.stats['wrong_py_ver']} projects skipped due to Python Version"
  74     )
  75     click.echo(
  76         f" - {results.stats['skipped_long_checkout']} skipped due to long checkout"
  77     )
  78
  79     if results.failed_projects:
  80         click.secho(f"\nFailed Projects:\n", bold=True)
  81
  82     for project_name, project_cpe in results.failed_projects.items():
  83         print(f"## {project_name}:")
  84         print(f" - Returned {project_cpe.returncode}")
  85         if project_cpe.stderr:
  86             print(f" - stderr:\n{project_cpe.stderr.decode('utf8')}")
  87         if project_cpe.stdout:
  88             print(f" - stdout:\n{project_cpe.stdout.decode('utf8')}")
  89         print("")
  90
  91     return results.stats["failed"]
  92
  93
  94 async def black_run(
  95     repo_path: Path, project_config: Dict[str, Any], results: Results
  96 ) -> None:
  97     """Run black and record failures"""
  98     cmd = [str(which("black"))]
  99     if project_config["cli_arguments"]:
 100         cmd.extend(*project_config["cli_arguments"])
 101     cmd.extend(["--check", "--diff", "."])
 102
 103     try:
 104         _stdout, _stderr = await _gen_check_output(cmd, cwd=repo_path)
 105     except asyncio.TimeoutError:
 106         results.stats["failed"] += 1
 107         LOG.error(f"Running black for {repo_path} timed out ({cmd})")
 108     except CalledProcessError as cpe:
 109         # TODO: This might need to be tuned and made smarter for higher signal
 110         if not project_config["expect_formatting_changes"] and cpe.returncode == 1:
 111             results.stats["failed"] += 1
 112             results.failed_projects[repo_path.name] = cpe
 113             return
 114
 115     results.stats["success"] += 1
 116
 117
 118 async def git_checkout_or_rebase(
 119     work_path: Path,
 120     project_config: Dict[str, Any],
 121     rebase: bool = False,
 122     *,
 123     depth: int = 1,
 124 ) -> Optional[Path]:
 125     """git Clone project or rebase"""
 126     git_bin = str(which("git"))
 127     if not git_bin:
 128         LOG.error(f"No git binary found")
 129         return None
 130
 131     repo_url_parts = urlparse(project_config["git_clone_url"])
 132     path_parts = repo_url_parts.path[1:].split("/", maxsplit=1)
 133
 134     repo_path: Path = work_path / path_parts[1].replace(".git", "")
 135     cmd = [git_bin, "clone", "--depth", str(depth), project_config["git_clone_url"]]
 136     cwd = work_path
 137     if repo_path.exists() and rebase:
 138         cmd = [git_bin, "pull", "--rebase"]
 139         cwd = repo_path
 140     elif repo_path.exists():
 141         return repo_path
 142
 143     try:
 144         _stdout, _stderr = await _gen_check_output(cmd, cwd=cwd)
 145     except (asyncio.TimeoutError, CalledProcessError) as e:
 146         LOG.error(f"Unable to git clone / pull {project_config['git_clone_url']}: {e}")
 147         return None
 148
 149     return repo_path
 150
 151
 152 async def load_projects_queue(
 153     config_path: Path,
 154 ) -> Tuple[Dict[str, Any], asyncio.Queue[str]]:
 155     """Load project config and fill queue with all the project names"""
 156     with config_path.open("r") as cfp:
 157         config = json.load(cfp)
 158
 159     # TODO: Offer more options here
 160     # e.g. Run on X random packages or specific sub list etc.
 161     project_names = sorted(config["projects"].keys())
 162     queue: asyncio.Queue[str] = asyncio.Queue(maxsize=len(project_names))
 163     for project in project_names:
 164         await queue.put(project)
 165
 166     return config, queue
 167
 168
 169 async def project_runner(
 170     idx: int,
 171     config: Dict[str, Any],
 172     queue: asyncio.Queue[str],
 173     work_path: Path,
 174     results: Results,
 175     long_checkouts: bool = False,
 176     rebase: bool = False,
 177     keep: bool = False,
 178 ) -> None:
 179     """Checkout project and run black on it + record result"""
 180     loop = asyncio.get_event_loop()
 181     py_version = f"{version_info[0]}.{version_info[1]}"
 182     while True:
 183         try:
 184             project_name = queue.get_nowait()
 185         except asyncio.QueueEmpty:
 186             LOG.debug(f"project_runner {idx} exiting")
 187             return
 188
 189         project_config = config["projects"][project_name]
 190
 191         # Check if disabled by config
 192         if "disabled" in project_config and project_config["disabled"]:
 193             results.stats["disabled"] += 1
 194             LOG.info(f"Skipping {project_name} as it's disabled via config")
 195             continue
 196
 197         # Check if we should run on this version of Python
 198         if (
 199             "all" not in project_config["py_versions"]
 200             and py_version not in project_config["py_versions"]
 201         ):
 202             results.stats["wrong_py_ver"] += 1
 203             LOG.debug(f"Skipping {project_name} as it's not enabled for {py_version}")
 204             continue
 205
 206         # Check if we're doing big projects / long checkouts
 207         if not long_checkouts and project_config["long_checkout"]:
 208             results.stats["skipped_long_checkout"] += 1
 209             LOG.debug(f"Skipping {project_name} as it's configured as a long checkout")
 210             continue
 211
 212         repo_path = await git_checkout_or_rebase(work_path, project_config, rebase)
 213         if not repo_path:
 214             continue
 215         await black_run(repo_path, project_config, results)
 216
 217         if not keep:
 218             LOG.debug(f"Removing {repo_path}")
 219             await loop.run_in_executor(None, rmtree, repo_path)
 220
 221
 222 async def process_queue(
 223     config_file: str,
 224     work_path: Path,
 225     workers: int,
 226     keep: bool = False,
 227     long_checkouts: bool = False,
 228     rebase: bool = False,
 229 ) -> int:
 230     """
 231     Process the queue with X workers and evaluate results
 232     - Success is guaged via the config "expect_formatting_changes"
 233
 234     Integer return equals the number of failed projects
 235     """
 236     results = Results()
 237     results.stats["disabled"] = 0
 238     results.stats["failed"] = 0
 239     results.stats["skipped_long_checkout"] = 0
 240     results.stats["success"] = 0
 241     results.stats["wrong_py_ver"] = 0
 242
 243     config, queue = await load_projects_queue(Path(config_file))
 244     project_count = queue.qsize()
 245     LOG.info(f"{project_count} projects to run black over")
 246     if not project_count:
 247         return -1
 248
 249     LOG.debug(f"Using {workers} parallel workers to run black")
 250     # Wait until we finish running all the projects before analyzing
 251     await asyncio.gather(
 252         *[
 253             project_runner(
 254                 i, config, queue, work_path, results, long_checkouts, rebase, keep
 255             )
 256             for i in range(workers)
 257         ]
 258     )
 259
 260     LOG.info("Analyzing results")
 261     return await analyze_results(project_count, results)