src/black_primer/lib.py

   1 #!/usr/bin/env python3
   2
   3 import asyncio
   4 import json
   5 import logging
   6 import sys
   7 from pathlib import Path
   8 from platform import system
   9 from shutil import rmtree, which
  10 from subprocess import CalledProcessError
  11 from sys import version_info
  12 from typing import Any, Dict, NamedTuple, Optional, Sequence, Tuple
  13 from urllib.parse import urlparse
  14
  15 import click
  16
  17
  18 WINDOWS = system() == "Windows"
  19 BLACK_BINARY = "black.exe" if WINDOWS else "black"
  20 GIT_BIANRY = "git.exe" if WINDOWS else "git"
  21 LOG = logging.getLogger(__name__)
  22
  23
  24 # Windows needs a ProactorEventLoop if you want to exec subprocesses
  25 # Startng 3.8 this is the default - Can remove when black >= 3.8
  26 # mypy only respects sys.platform if directly in the evaluation
  27 # # https://mypy.readthedocs.io/en/latest/common_issues.html#python-version-and-system-platform-checks  # noqa: B950
  28 if sys.platform == "win32":
  29     asyncio.set_event_loop(asyncio.ProactorEventLoop())
  30
  31
  32 class Results(NamedTuple):
  33     stats: Dict[str, int] = {}
  34     failed_projects: Dict[str, CalledProcessError] = {}
  35
  36
  37 async def _gen_check_output(
  38     cmd: Sequence[str],
  39     timeout: float = 30,
  40     env: Optional[Dict[str, str]] = None,
  41     cwd: Optional[Path] = None,
  42 ) -> Tuple[bytes, bytes]:
  43     process = await asyncio.create_subprocess_exec(
  44         *cmd,
  45         stdout=asyncio.subprocess.PIPE,
  46         stderr=asyncio.subprocess.STDOUT,
  47         env=env,
  48         cwd=cwd,
  49     )
  50     try:
  51         (stdout, stderr) = await asyncio.wait_for(process.communicate(), timeout)
  52     except asyncio.TimeoutError:
  53         process.kill()
  54         await process.wait()
  55         raise
  56
  57     if process.returncode != 0:
  58         cmd_str = " ".join(cmd)
  59         raise CalledProcessError(
  60             process.returncode, cmd_str, output=stdout, stderr=stderr
  61         )
  62
  63     return (stdout, stderr)
  64
  65
  66 def analyze_results(project_count: int, results: Results) -> int:
  67     failed_pct = round(((results.stats["failed"] / project_count) * 100), 2)
  68     success_pct = round(((results.stats["success"] / project_count) * 100), 2)
  69
  70     click.secho("-- primer results 📊 --\n", bold=True)
  71     click.secho(
  72         f"{results.stats['success']} / {project_count} succeeded ({success_pct}%) ✅",
  73         bold=True,
  74         fg="green",
  75     )
  76     click.secho(
  77         f"{results.stats['failed']} / {project_count} FAILED ({failed_pct}%) 💩",
  78         bold=bool(results.stats["failed"]),
  79         fg="red",
  80     )
  81     click.echo(f" - {results.stats['disabled']} projects Disabled by config")
  82     click.echo(
  83         f" - {results.stats['wrong_py_ver']} projects skipped due to Python Version"
  84     )
  85     click.echo(
  86         f" - {results.stats['skipped_long_checkout']} skipped due to long checkout"
  87     )
  88
  89     if results.failed_projects:
  90         click.secho("\nFailed Projects:\n", bold=True)
  91
  92     for project_name, project_cpe in results.failed_projects.items():
  93         print(f"## {project_name}:")
  94         print(f" - Returned {project_cpe.returncode}")
  95         if project_cpe.stderr:
  96             print(f" - stderr:\n{project_cpe.stderr.decode('utf8')}")
  97         if project_cpe.stdout:
  98             print(f" - stdout:\n{project_cpe.stdout.decode('utf8')}")
  99         print("")
 100
 101     return results.stats["failed"]
 102
 103
 104 async def black_run(
 105     repo_path: Path, project_config: Dict[str, Any], results: Results
 106 ) -> None:
 107     """Run black and record failures"""
 108     cmd = [str(which(BLACK_BINARY))]
 109     if "cli_arguments" in project_config and project_config["cli_arguments"]:
 110         cmd.extend(*project_config["cli_arguments"])
 111     cmd.extend(["--check", "--diff", "."])
 112
 113     try:
 114         _stdout, _stderr = await _gen_check_output(cmd, cwd=repo_path)
 115     except asyncio.TimeoutError:
 116         results.stats["failed"] += 1
 117         LOG.error(f"Running black for {repo_path} timed out ({cmd})")
 118     except CalledProcessError as cpe:
 119         # TODO: Tune for smarter for higher signal
 120         # If any other reutrn value than 1 we raise - can disable project in config
 121         if cpe.returncode == 1:
 122             if not project_config["expect_formatting_changes"]:
 123                 results.stats["failed"] += 1
 124                 results.failed_projects[repo_path.name] = cpe
 125             else:
 126                 results.stats["success"] += 1
 127             return
 128
 129         LOG.error(f"Unkown error with {repo_path}")
 130         raise
 131
 132     # If we get here and expect formatting changes something is up
 133     if project_config["expect_formatting_changes"]:
 134         results.stats["failed"] += 1
 135         results.failed_projects[repo_path.name] = CalledProcessError(
 136             0, cmd, b"Expected formatting changes but didn't get any!", b""
 137         )
 138         return
 139
 140     results.stats["success"] += 1
 141
 142
 143 async def git_checkout_or_rebase(
 144     work_path: Path,
 145     project_config: Dict[str, Any],
 146     rebase: bool = False,
 147     *,
 148     depth: int = 1,
 149 ) -> Optional[Path]:
 150     """git Clone project or rebase"""
 151     git_bin = str(which(GIT_BIANRY))
 152     if not git_bin:
 153         LOG.error("No git binary found")
 154         return None
 155
 156     repo_url_parts = urlparse(project_config["git_clone_url"])
 157     path_parts = repo_url_parts.path[1:].split("/", maxsplit=1)
 158
 159     repo_path: Path = work_path / path_parts[1].replace(".git", "")
 160     cmd = [git_bin, "clone", "--depth", str(depth), project_config["git_clone_url"]]
 161     cwd = work_path
 162     if repo_path.exists() and rebase:
 163         cmd = [git_bin, "pull", "--rebase"]
 164         cwd = repo_path
 165     elif repo_path.exists():
 166         return repo_path
 167
 168     try:
 169         _stdout, _stderr = await _gen_check_output(cmd, cwd=cwd)
 170     except (asyncio.TimeoutError, CalledProcessError) as e:
 171         LOG.error(f"Unable to git clone / pull {project_config['git_clone_url']}: {e}")
 172         return None
 173
 174     return repo_path
 175
 176
 177 async def load_projects_queue(
 178     config_path: Path,
 179 ) -> Tuple[Dict[str, Any], asyncio.Queue]:
 180     """Load project config and fill queue with all the project names"""
 181     with config_path.open("r") as cfp:
 182         config = json.load(cfp)
 183
 184     # TODO: Offer more options here
 185     # e.g. Run on X random packages or specific sub list etc.
 186     project_names = sorted(config["projects"].keys())
 187     queue: asyncio.Queue = asyncio.Queue(maxsize=len(project_names))
 188     for project in project_names:
 189         await queue.put(project)
 190
 191     return config, queue
 192
 193
 194 async def project_runner(
 195     idx: int,
 196     config: Dict[str, Any],
 197     queue: asyncio.Queue,
 198     work_path: Path,
 199     results: Results,
 200     long_checkouts: bool = False,
 201     rebase: bool = False,
 202     keep: bool = False,
 203 ) -> None:
 204     """Checkout project and run black on it + record result"""
 205     loop = asyncio.get_event_loop()
 206     py_version = f"{version_info[0]}.{version_info[1]}"
 207     while True:
 208         try:
 209             project_name = queue.get_nowait()
 210         except asyncio.QueueEmpty:
 211             LOG.debug(f"project_runner {idx} exiting")
 212             return
 213
 214         project_config = config["projects"][project_name]
 215
 216         # Check if disabled by config
 217         if "disabled" in project_config and project_config["disabled"]:
 218             results.stats["disabled"] += 1
 219             LOG.info(f"Skipping {project_name} as it's disabled via config")
 220             continue
 221
 222         # Check if we should run on this version of Python
 223         if (
 224             "all" not in project_config["py_versions"]
 225             and py_version not in project_config["py_versions"]
 226         ):
 227             results.stats["wrong_py_ver"] += 1
 228             LOG.debug(f"Skipping {project_name} as it's not enabled for {py_version}")
 229             continue
 230
 231         # Check if we're doing big projects / long checkouts
 232         if not long_checkouts and project_config["long_checkout"]:
 233             results.stats["skipped_long_checkout"] += 1
 234             LOG.debug(f"Skipping {project_name} as it's configured as a long checkout")
 235             continue
 236
 237         repo_path = await git_checkout_or_rebase(work_path, project_config, rebase)
 238         if not repo_path:
 239             continue
 240         await black_run(repo_path, project_config, results)
 241
 242         if not keep:
 243             LOG.debug(f"Removing {repo_path}")
 244             await loop.run_in_executor(None, rmtree, repo_path)
 245
 246
 247 async def process_queue(
 248     config_file: str,
 249     work_path: Path,
 250     workers: int,
 251     keep: bool = False,
 252     long_checkouts: bool = False,
 253     rebase: bool = False,
 254 ) -> int:
 255     """
 256     Process the queue with X workers and evaluate results
 257     - Success is guaged via the config "expect_formatting_changes"
 258
 259     Integer return equals the number of failed projects
 260     """
 261     results = Results()
 262     results.stats["disabled"] = 0
 263     results.stats["failed"] = 0
 264     results.stats["skipped_long_checkout"] = 0
 265     results.stats["success"] = 0
 266     results.stats["wrong_py_ver"] = 0
 267
 268     config, queue = await load_projects_queue(Path(config_file))
 269     project_count = queue.qsize()
 270     LOG.info(f"{project_count} projects to run black over")
 271     if project_count < 1:
 272         return -1
 273
 274     LOG.debug(f"Using {workers} parallel workers to run black")
 275     # Wait until we finish running all the projects before analyzing
 276     await asyncio.gather(
 277         *[
 278             project_runner(
 279                 i, config, queue, work_path, results, long_checkouts, rebase, keep
 280             )
 281             for i in range(workers)
 282         ]
 283     )
 284
 285     LOG.info("Analyzing results")
 286     return analyze_results(project_count, results)
 287
 288
 289 if __name__ == "__main__":  # pragma: nocover
 290     raise NotImplementedError("lib is a library, funnily enough.")