src/black_primer/lib.py

   1 #!/usr/bin/env python3
   2
   3 import asyncio
   4 import errno
   5 import json
   6 import logging
   7 import os
   8 import stat
   9 import sys
  10 from functools import partial
  11 from pathlib import Path
  12 from platform import system
  13 from shutil import rmtree, which
  14 from subprocess import CalledProcessError
  15 from sys import version_info
  16 from tempfile import TemporaryDirectory
  17 from typing import Any, Callable, Dict, NamedTuple, Optional, Sequence, Tuple
  18 from urllib.parse import urlparse
  19
  20 import click
  21
  22
  23 WINDOWS = system() == "Windows"
  24 BLACK_BINARY = "black.exe" if WINDOWS else "black"
  25 GIT_BINARY = "git.exe" if WINDOWS else "git"
  26 LOG = logging.getLogger(__name__)
  27
  28
  29 # Windows needs a ProactorEventLoop if you want to exec subprocesses
  30 # Starting with 3.8 this is the default - can remove when Black >= 3.8
  31 # mypy only respects sys.platform if directly in the evaluation
  32 # https://mypy.readthedocs.io/en/latest/common_issues.html#python-version-and-system-platform-checks  # noqa: B950
  33 if sys.platform == "win32":
  34     asyncio.set_event_loop(asyncio.ProactorEventLoop())
  35
  36
  37 class Results(NamedTuple):
  38     stats: Dict[str, int] = {}
  39     failed_projects: Dict[str, CalledProcessError] = {}
  40
  41
  42 async def _gen_check_output(
  43     cmd: Sequence[str],
  44     timeout: float = 300,
  45     env: Optional[Dict[str, str]] = None,
  46     cwd: Optional[Path] = None,
  47 ) -> Tuple[bytes, bytes]:
  48     process = await asyncio.create_subprocess_exec(
  49         *cmd,
  50         stdout=asyncio.subprocess.PIPE,
  51         stderr=asyncio.subprocess.STDOUT,
  52         env=env,
  53         cwd=cwd,
  54     )
  55     try:
  56         (stdout, stderr) = await asyncio.wait_for(process.communicate(), timeout)
  57     except asyncio.TimeoutError:
  58         process.kill()
  59         await process.wait()
  60         raise
  61
  62     # A non-optional timeout was supplied to asyncio.wait_for, guaranteeing
  63     # a timeout or completed process.  A terminated Python process will have a
  64     # non-empty returncode value.
  65     assert process.returncode is not None
  66
  67     if process.returncode != 0:
  68         cmd_str = " ".join(cmd)
  69         raise CalledProcessError(
  70             process.returncode, cmd_str, output=stdout, stderr=stderr
  71         )
  72
  73     return (stdout, stderr)
  74
  75
  76 def analyze_results(project_count: int, results: Results) -> int:
  77     failed_pct = round(((results.stats["failed"] / project_count) * 100), 2)
  78     success_pct = round(((results.stats["success"] / project_count) * 100), 2)
  79
  80     click.secho("-- primer results 📊 --\n", bold=True)
  81     click.secho(
  82         f"{results.stats['success']} / {project_count} succeeded ({success_pct}%) ✅",
  83         bold=True,
  84         fg="green",
  85     )
  86     click.secho(
  87         f"{results.stats['failed']} / {project_count} FAILED ({failed_pct}%) 💩",
  88         bold=bool(results.stats["failed"]),
  89         fg="red",
  90     )
  91     s = "" if results.stats["disabled"] == 1 else "s"
  92     click.echo(f" - {results.stats['disabled']} project{s} disabled by config")
  93     s = "" if results.stats["wrong_py_ver"] == 1 else "s"
  94     click.echo(
  95         f" - {results.stats['wrong_py_ver']} project{s} skipped due to Python version"
  96     )
  97     click.echo(
  98         f" - {results.stats['skipped_long_checkout']} skipped due to long checkout"
  99     )
 100
 101     if results.failed_projects:
 102         click.secho("\nFailed projects:\n", bold=True)
 103
 104     for project_name, project_cpe in results.failed_projects.items():
 105         print(f"## {project_name}:")
 106         print(f" - Returned {project_cpe.returncode}")
 107         if project_cpe.stderr:
 108             print(f" - stderr:\n{project_cpe.stderr.decode('utf8')}")
 109         if project_cpe.stdout:
 110             print(f" - stdout:\n{project_cpe.stdout.decode('utf8')}")
 111         print("")
 112
 113     return results.stats["failed"]
 114
 115
 116 async def black_run(
 117     repo_path: Path, project_config: Dict[str, Any], results: Results
 118 ) -> None:
 119     """Run Black and record failures"""
 120     cmd = [str(which(BLACK_BINARY))]
 121     if "cli_arguments" in project_config and project_config["cli_arguments"]:
 122         cmd.extend(*project_config["cli_arguments"])
 123     cmd.extend(["--check", "--diff", "."])
 124
 125     with TemporaryDirectory() as tmp_path:
 126         # Prevent reading top-level user configs by manipulating envionment variables
 127         env = {
 128             **os.environ,
 129             "XDG_CONFIG_HOME": tmp_path,  # Unix-like
 130             "USERPROFILE": tmp_path,  # Windows (changes `Path.home()` output)
 131         }
 132
 133         try:
 134             _stdout, _stderr = await _gen_check_output(cmd, cwd=repo_path, env=env)
 135         except asyncio.TimeoutError:
 136             results.stats["failed"] += 1
 137             LOG.error(f"Running black for {repo_path} timed out ({cmd})")
 138         except CalledProcessError as cpe:
 139             # TODO: Tune for smarter for higher signal
 140             # If any other return value than 1 we raise - can disable project in config
 141             if cpe.returncode == 1:
 142                 if not project_config["expect_formatting_changes"]:
 143                     results.stats["failed"] += 1
 144                     results.failed_projects[repo_path.name] = cpe
 145                 else:
 146                     results.stats["success"] += 1
 147                 return
 148             elif cpe.returncode > 1:
 149                 results.stats["failed"] += 1
 150                 results.failed_projects[repo_path.name] = cpe
 151                 return
 152
 153             LOG.error(f"Unknown error with {repo_path}")
 154             raise
 155
 156     # If we get here and expect formatting changes something is up
 157     if project_config["expect_formatting_changes"]:
 158         results.stats["failed"] += 1
 159         results.failed_projects[repo_path.name] = CalledProcessError(
 160             0, cmd, b"Expected formatting changes but didn't get any!", b""
 161         )
 162         return
 163
 164     results.stats["success"] += 1
 165
 166
 167 async def git_checkout_or_rebase(
 168     work_path: Path,
 169     project_config: Dict[str, Any],
 170     rebase: bool = False,
 171     *,
 172     depth: int = 1,
 173 ) -> Optional[Path]:
 174     """git Clone project or rebase"""
 175     git_bin = str(which(GIT_BINARY))
 176     if not git_bin:
 177         LOG.error("No git binary found")
 178         return None
 179
 180     repo_url_parts = urlparse(project_config["git_clone_url"])
 181     path_parts = repo_url_parts.path[1:].split("/", maxsplit=1)
 182
 183     repo_path: Path = work_path / path_parts[1].replace(".git", "")
 184     cmd = [git_bin, "clone", "--depth", str(depth), project_config["git_clone_url"]]
 185     cwd = work_path
 186     if repo_path.exists() and rebase:
 187         cmd = [git_bin, "pull", "--rebase"]
 188         cwd = repo_path
 189     elif repo_path.exists():
 190         return repo_path
 191
 192     try:
 193         _stdout, _stderr = await _gen_check_output(cmd, cwd=cwd)
 194     except (asyncio.TimeoutError, CalledProcessError) as e:
 195         LOG.error(f"Unable to git clone / pull {project_config['git_clone_url']}: {e}")
 196         return None
 197
 198     return repo_path
 199
 200
 201 def handle_PermissionError(
 202     func: Callable, path: Path, exc: Tuple[Any, Any, Any]
 203 ) -> None:
 204     """
 205     Handle PermissionError during shutil.rmtree.
 206
 207     This checks if the erroring function is either 'os.rmdir' or 'os.unlink', and that
 208     the error was EACCES (i.e. Permission denied). If true, the path is set writable,
 209     readable, and executable by everyone. Finally, it tries the error causing delete
 210     operation again.
 211
 212     If the check is false, then the original error will be reraised as this function
 213     can't handle it.
 214     """
 215     excvalue = exc[1]
 216     LOG.debug(f"Handling {excvalue} from {func.__name__}... ")
 217     if func in (os.rmdir, os.unlink) and excvalue.errno == errno.EACCES:
 218         LOG.debug(f"Setting {path} writable, readable, and executable by everyone... ")
 219         os.chmod(path, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO)  # chmod 0777
 220         func(path)  # Try the error causing delete operation again
 221     else:
 222         raise
 223
 224
 225 async def load_projects_queue(
 226     config_path: Path,
 227 ) -> Tuple[Dict[str, Any], asyncio.Queue]:
 228     """Load project config and fill queue with all the project names"""
 229     with config_path.open("r") as cfp:
 230         config = json.load(cfp)
 231
 232     # TODO: Offer more options here
 233     # e.g. Run on X random packages or specific sub list etc.
 234     project_names = sorted(config["projects"].keys())
 235     queue: asyncio.Queue = asyncio.Queue(maxsize=len(project_names))
 236     for project in project_names:
 237         await queue.put(project)
 238
 239     return config, queue
 240
 241
 242 async def project_runner(
 243     idx: int,
 244     config: Dict[str, Any],
 245     queue: asyncio.Queue,
 246     work_path: Path,
 247     results: Results,
 248     long_checkouts: bool = False,
 249     rebase: bool = False,
 250     keep: bool = False,
 251 ) -> None:
 252     """Check out project and run Black on it + record result"""
 253     loop = asyncio.get_event_loop()
 254     py_version = f"{version_info[0]}.{version_info[1]}"
 255     while True:
 256         try:
 257             project_name = queue.get_nowait()
 258         except asyncio.QueueEmpty:
 259             LOG.debug(f"project_runner {idx} exiting")
 260             return
 261         LOG.debug(f"worker {idx} working on {project_name}")
 262
 263         project_config = config["projects"][project_name]
 264
 265         # Check if disabled by config
 266         if "disabled" in project_config and project_config["disabled"]:
 267             results.stats["disabled"] += 1
 268             LOG.info(f"Skipping {project_name} as it's disabled via config")
 269             continue
 270
 271         # Check if we should run on this version of Python
 272         if (
 273             "all" not in project_config["py_versions"]
 274             and py_version not in project_config["py_versions"]
 275         ):
 276             results.stats["wrong_py_ver"] += 1
 277             LOG.debug(f"Skipping {project_name} as it's not enabled for {py_version}")
 278             continue
 279
 280         # Check if we're doing big projects / long checkouts
 281         if not long_checkouts and project_config["long_checkout"]:
 282             results.stats["skipped_long_checkout"] += 1
 283             LOG.debug(f"Skipping {project_name} as it's configured as a long checkout")
 284             continue
 285
 286         repo_path = await git_checkout_or_rebase(work_path, project_config, rebase)
 287         if not repo_path:
 288             continue
 289         await black_run(repo_path, project_config, results)
 290
 291         if not keep:
 292             LOG.debug(f"Removing {repo_path}")
 293             rmtree_partial = partial(
 294                 rmtree, path=repo_path, onerror=handle_PermissionError
 295             )
 296             await loop.run_in_executor(None, rmtree_partial)
 297
 298         LOG.info(f"Finished {project_name}")
 299
 300
 301 async def process_queue(
 302     config_file: str,
 303     work_path: Path,
 304     workers: int,
 305     keep: bool = False,
 306     long_checkouts: bool = False,
 307     rebase: bool = False,
 308 ) -> int:
 309     """
 310     Process the queue with X workers and evaluate results
 311     - Success is guaged via the config "expect_formatting_changes"
 312
 313     Integer return equals the number of failed projects
 314     """
 315     results = Results()
 316     results.stats["disabled"] = 0
 317     results.stats["failed"] = 0
 318     results.stats["skipped_long_checkout"] = 0
 319     results.stats["success"] = 0
 320     results.stats["wrong_py_ver"] = 0
 321
 322     config, queue = await load_projects_queue(Path(config_file))
 323     project_count = queue.qsize()
 324     s = "" if project_count == 1 else "s"
 325     LOG.info(f"{project_count} project{s} to run Black over")
 326     if project_count < 1:
 327         return -1
 328
 329     s = "" if workers == 1 else "s"
 330     LOG.debug(f"Using {workers} parallel worker{s} to run Black")
 331     # Wait until we finish running all the projects before analyzing
 332     await asyncio.gather(
 333         *[
 334             project_runner(
 335                 i, config, queue, work_path, results, long_checkouts, rebase, keep
 336             )
 337             for i in range(workers)
 338         ]
 339     )
 340
 341     LOG.info("Analyzing results")
 342     return analyze_results(project_count, results)
 343
 344
 345 if __name__ == "__main__":  # pragma: nocover
 346     raise NotImplementedError("lib is a library, funnily enough.")