src/black_primer/lib.py

   1 import asyncio
   2 import errno
   3 import json
   4 import logging
   5 import os
   6 import stat
   7 import sys
   8 from functools import partial
   9 from pathlib import Path
  10 from platform import system
  11 from shutil import rmtree, which
  12 from subprocess import CalledProcessError
  13 from sys import version_info
  14 from tempfile import TemporaryDirectory
  15 from typing import Any, Callable, Dict, NamedTuple, Optional, Sequence, Tuple
  16 from urllib.parse import urlparse
  17
  18 import click
  19
  20
  21 WINDOWS = system() == "Windows"
  22 BLACK_BINARY = "black.exe" if WINDOWS else "black"
  23 GIT_BINARY = "git.exe" if WINDOWS else "git"
  24 LOG = logging.getLogger(__name__)
  25
  26
  27 # Windows needs a ProactorEventLoop if you want to exec subprocesses
  28 # Starting with 3.8 this is the default - can remove when Black >= 3.8
  29 # mypy only respects sys.platform if directly in the evaluation
  30 # https://mypy.readthedocs.io/en/latest/common_issues.html#python-version-and-system-platform-checks  # noqa: B950
  31 if sys.platform == "win32":
  32     asyncio.set_event_loop(asyncio.ProactorEventLoop())
  33
  34
  35 class Results(NamedTuple):
  36     stats: Dict[str, int] = {}
  37     failed_projects: Dict[str, CalledProcessError] = {}
  38
  39
  40 async def _gen_check_output(
  41     cmd: Sequence[str],
  42     timeout: float = 300,
  43     env: Optional[Dict[str, str]] = None,
  44     cwd: Optional[Path] = None,
  45 ) -> Tuple[bytes, bytes]:
  46     process = await asyncio.create_subprocess_exec(
  47         *cmd,
  48         stdout=asyncio.subprocess.PIPE,
  49         stderr=asyncio.subprocess.STDOUT,
  50         env=env,
  51         cwd=cwd,
  52     )
  53     try:
  54         (stdout, stderr) = await asyncio.wait_for(process.communicate(), timeout)
  55     except asyncio.TimeoutError:
  56         process.kill()
  57         await process.wait()
  58         raise
  59
  60     # A non-optional timeout was supplied to asyncio.wait_for, guaranteeing
  61     # a timeout or completed process.  A terminated Python process will have a
  62     # non-empty returncode value.
  63     assert process.returncode is not None
  64
  65     if process.returncode != 0:
  66         cmd_str = " ".join(cmd)
  67         raise CalledProcessError(
  68             process.returncode, cmd_str, output=stdout, stderr=stderr
  69         )
  70
  71     return (stdout, stderr)
  72
  73
  74 def analyze_results(project_count: int, results: Results) -> int:
  75     failed_pct = round(((results.stats["failed"] / project_count) * 100), 2)
  76     success_pct = round(((results.stats["success"] / project_count) * 100), 2)
  77
  78     click.secho("-- primer results 📊 --\n", bold=True)
  79     click.secho(
  80         f"{results.stats['success']} / {project_count} succeeded ({success_pct}%) ✅",
  81         bold=True,
  82         fg="green",
  83     )
  84     click.secho(
  85         f"{results.stats['failed']} / {project_count} FAILED ({failed_pct}%) 💩",
  86         bold=bool(results.stats["failed"]),
  87         fg="red",
  88     )
  89     s = "" if results.stats["disabled"] == 1 else "s"
  90     click.echo(f" - {results.stats['disabled']} project{s} disabled by config")
  91     s = "" if results.stats["wrong_py_ver"] == 1 else "s"
  92     click.echo(
  93         f" - {results.stats['wrong_py_ver']} project{s} skipped due to Python version"
  94     )
  95     click.echo(
  96         f" - {results.stats['skipped_long_checkout']} skipped due to long checkout"
  97     )
  98
  99     if results.failed_projects:
 100         click.secho("\nFailed projects:\n", bold=True)
 101
 102     for project_name, project_cpe in results.failed_projects.items():
 103         print(f"## {project_name}:")
 104         print(f" - Returned {project_cpe.returncode}")
 105         if project_cpe.stderr:
 106             print(f" - stderr:\n{project_cpe.stderr.decode('utf8')}")
 107         if project_cpe.stdout:
 108             print(f" - stdout:\n{project_cpe.stdout.decode('utf8')}")
 109         print("")
 110
 111     return results.stats["failed"]
 112
 113
 114 async def black_run(
 115     repo_path: Path,
 116     project_config: Dict[str, Any],
 117     results: Results,
 118     no_diff: bool = False,
 119 ) -> None:
 120     """Run Black and record failures"""
 121     cmd = [str(which(BLACK_BINARY))]
 122     if "cli_arguments" in project_config and project_config["cli_arguments"]:
 123         cmd.extend(*project_config["cli_arguments"])
 124     cmd.append("--check")
 125     if no_diff:
 126         cmd.append(".")
 127     else:
 128         cmd.extend(["--diff", "."])
 129
 130     with TemporaryDirectory() as tmp_path:
 131         # Prevent reading top-level user configs by manipulating envionment variables
 132         env = {
 133             **os.environ,
 134             "XDG_CONFIG_HOME": tmp_path,  # Unix-like
 135             "USERPROFILE": tmp_path,  # Windows (changes `Path.home()` output)
 136         }
 137
 138         try:
 139             _stdout, _stderr = await _gen_check_output(cmd, cwd=repo_path, env=env)
 140         except asyncio.TimeoutError:
 141             results.stats["failed"] += 1
 142             LOG.error(f"Running black for {repo_path} timed out ({cmd})")
 143         except CalledProcessError as cpe:
 144             # TODO: Tune for smarter for higher signal
 145             # If any other return value than 1 we raise - can disable project in config
 146             if cpe.returncode == 1:
 147                 if not project_config["expect_formatting_changes"]:
 148                     results.stats["failed"] += 1
 149                     results.failed_projects[repo_path.name] = cpe
 150                 else:
 151                     results.stats["success"] += 1
 152                 return
 153             elif cpe.returncode > 1:
 154                 results.stats["failed"] += 1
 155                 results.failed_projects[repo_path.name] = cpe
 156                 return
 157
 158             LOG.error(f"Unknown error with {repo_path}")
 159             raise
 160
 161     # If we get here and expect formatting changes something is up
 162     if project_config["expect_formatting_changes"]:
 163         results.stats["failed"] += 1
 164         results.failed_projects[repo_path.name] = CalledProcessError(
 165             0, cmd, b"Expected formatting changes but didn't get any!", b""
 166         )
 167         return
 168
 169     results.stats["success"] += 1
 170
 171
 172 async def git_checkout_or_rebase(
 173     work_path: Path,
 174     project_config: Dict[str, Any],
 175     rebase: bool = False,
 176     *,
 177     depth: int = 1,
 178 ) -> Optional[Path]:
 179     """git Clone project or rebase"""
 180     git_bin = str(which(GIT_BINARY))
 181     if not git_bin:
 182         LOG.error("No git binary found")
 183         return None
 184
 185     repo_url_parts = urlparse(project_config["git_clone_url"])
 186     path_parts = repo_url_parts.path[1:].split("/", maxsplit=1)
 187
 188     repo_path: Path = work_path / path_parts[1].replace(".git", "")
 189     cmd = [git_bin, "clone", "--depth", str(depth), project_config["git_clone_url"]]
 190     cwd = work_path
 191     if repo_path.exists() and rebase:
 192         cmd = [git_bin, "pull", "--rebase"]
 193         cwd = repo_path
 194     elif repo_path.exists():
 195         return repo_path
 196
 197     try:
 198         _stdout, _stderr = await _gen_check_output(cmd, cwd=cwd)
 199     except (asyncio.TimeoutError, CalledProcessError) as e:
 200         LOG.error(f"Unable to git clone / pull {project_config['git_clone_url']}: {e}")
 201         return None
 202
 203     return repo_path
 204
 205
 206 def handle_PermissionError(
 207     func: Callable, path: Path, exc: Tuple[Any, Any, Any]
 208 ) -> None:
 209     """
 210     Handle PermissionError during shutil.rmtree.
 211
 212     This checks if the erroring function is either 'os.rmdir' or 'os.unlink', and that
 213     the error was EACCES (i.e. Permission denied). If true, the path is set writable,
 214     readable, and executable by everyone. Finally, it tries the error causing delete
 215     operation again.
 216
 217     If the check is false, then the original error will be reraised as this function
 218     can't handle it.
 219     """
 220     excvalue = exc[1]
 221     LOG.debug(f"Handling {excvalue} from {func.__name__}... ")
 222     if func in (os.rmdir, os.unlink) and excvalue.errno == errno.EACCES:
 223         LOG.debug(f"Setting {path} writable, readable, and executable by everyone... ")
 224         os.chmod(path, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO)  # chmod 0777
 225         func(path)  # Try the error causing delete operation again
 226     else:
 227         raise
 228
 229
 230 async def load_projects_queue(
 231     config_path: Path,
 232 ) -> Tuple[Dict[str, Any], asyncio.Queue]:
 233     """Load project config and fill queue with all the project names"""
 234     with config_path.open("r") as cfp:
 235         config = json.load(cfp)
 236
 237     # TODO: Offer more options here
 238     # e.g. Run on X random packages or specific sub list etc.
 239     project_names = sorted(config["projects"].keys())
 240     queue: asyncio.Queue = asyncio.Queue(maxsize=len(project_names))
 241     for project in project_names:
 242         await queue.put(project)
 243
 244     return config, queue
 245
 246
 247 async def project_runner(
 248     idx: int,
 249     config: Dict[str, Any],
 250     queue: asyncio.Queue,
 251     work_path: Path,
 252     results: Results,
 253     long_checkouts: bool = False,
 254     rebase: bool = False,
 255     keep: bool = False,
 256     no_diff: bool = False,
 257 ) -> None:
 258     """Check out project and run Black on it + record result"""
 259     loop = asyncio.get_event_loop()
 260     py_version = f"{version_info[0]}.{version_info[1]}"
 261     while True:
 262         try:
 263             project_name = queue.get_nowait()
 264         except asyncio.QueueEmpty:
 265             LOG.debug(f"project_runner {idx} exiting")
 266             return
 267         LOG.debug(f"worker {idx} working on {project_name}")
 268
 269         project_config = config["projects"][project_name]
 270
 271         # Check if disabled by config
 272         if "disabled" in project_config and project_config["disabled"]:
 273             results.stats["disabled"] += 1
 274             LOG.info(f"Skipping {project_name} as it's disabled via config")
 275             continue
 276
 277         # Check if we should run on this version of Python
 278         if (
 279             "all" not in project_config["py_versions"]
 280             and py_version not in project_config["py_versions"]
 281         ):
 282             results.stats["wrong_py_ver"] += 1
 283             LOG.debug(f"Skipping {project_name} as it's not enabled for {py_version}")
 284             continue
 285
 286         # Check if we're doing big projects / long checkouts
 287         if not long_checkouts and project_config["long_checkout"]:
 288             results.stats["skipped_long_checkout"] += 1
 289             LOG.debug(f"Skipping {project_name} as it's configured as a long checkout")
 290             continue
 291
 292         repo_path = await git_checkout_or_rebase(work_path, project_config, rebase)
 293         if not repo_path:
 294             continue
 295         await black_run(repo_path, project_config, results, no_diff)
 296
 297         if not keep:
 298             LOG.debug(f"Removing {repo_path}")
 299             rmtree_partial = partial(
 300                 rmtree, path=repo_path, onerror=handle_PermissionError
 301             )
 302             await loop.run_in_executor(None, rmtree_partial)
 303
 304         LOG.info(f"Finished {project_name}")
 305
 306
 307 async def process_queue(
 308     config_file: str,
 309     work_path: Path,
 310     workers: int,
 311     keep: bool = False,
 312     long_checkouts: bool = False,
 313     rebase: bool = False,
 314     no_diff: bool = False,
 315 ) -> int:
 316     """
 317     Process the queue with X workers and evaluate results
 318     - Success is guaged via the config "expect_formatting_changes"
 319
 320     Integer return equals the number of failed projects
 321     """
 322     results = Results()
 323     results.stats["disabled"] = 0
 324     results.stats["failed"] = 0
 325     results.stats["skipped_long_checkout"] = 0
 326     results.stats["success"] = 0
 327     results.stats["wrong_py_ver"] = 0
 328
 329     config, queue = await load_projects_queue(Path(config_file))
 330     project_count = queue.qsize()
 331     s = "" if project_count == 1 else "s"
 332     LOG.info(f"{project_count} project{s} to run Black over")
 333     if project_count < 1:
 334         return -1
 335
 336     s = "" if workers == 1 else "s"
 337     LOG.debug(f"Using {workers} parallel worker{s} to run Black")
 338     # Wait until we finish running all the projects before analyzing
 339     await asyncio.gather(
 340         *[
 341             project_runner(
 342                 i,
 343                 config,
 344                 queue,
 345                 work_path,
 346                 results,
 347                 long_checkouts,
 348                 rebase,
 349                 keep,
 350                 no_diff,
 351             )
 352             for i in range(workers)
 353         ]
 354     )
 355
 356     LOG.info("Analyzing results")
 357     return analyze_results(project_count, results)
 358
 359
 360 if __name__ == "__main__":  # pragma: nocover
 361     raise NotImplementedError("lib is a library, funnily enough.")