src/black_primer/lib.py

   1 import asyncio
   2 import errno
   3 import json
   4 import logging
   5 import os
   6 import stat
   7 import sys
   8 from functools import partial
   9 from pathlib import Path
  10 from platform import system
  11 from shutil import rmtree, which
  12 from subprocess import CalledProcessError
  13 from sys import version_info
  14 from tempfile import TemporaryDirectory
  15 from typing import Any, Callable, Dict, NamedTuple, Optional, Sequence, Tuple
  16 from urllib.parse import urlparse
  17
  18 import click
  19
  20
  21 WINDOWS = system() == "Windows"
  22 BLACK_BINARY = "black.exe" if WINDOWS else "black"
  23 GIT_BINARY = "git.exe" if WINDOWS else "git"
  24 LOG = logging.getLogger(__name__)
  25
  26
  27 # Windows needs a ProactorEventLoop if you want to exec subprocesses
  28 # Starting with 3.8 this is the default - can remove when Black >= 3.8
  29 # mypy only respects sys.platform if directly in the evaluation
  30 # https://mypy.readthedocs.io/en/latest/common_issues.html#python-version-and-system-platform-checks  # noqa: B950
  31 if sys.platform == "win32":
  32     asyncio.set_event_loop(asyncio.ProactorEventLoop())
  33
  34
  35 class Results(NamedTuple):
  36     stats: Dict[str, int] = {}
  37     failed_projects: Dict[str, CalledProcessError] = {}
  38
  39
  40 async def _gen_check_output(
  41     cmd: Sequence[str],
  42     timeout: float = 600,
  43     env: Optional[Dict[str, str]] = None,
  44     cwd: Optional[Path] = None,
  45     stdin: Optional[bytes] = None,
  46 ) -> Tuple[bytes, bytes]:
  47     process = await asyncio.create_subprocess_exec(
  48         *cmd,
  49         stdin=asyncio.subprocess.PIPE,
  50         stdout=asyncio.subprocess.PIPE,
  51         stderr=asyncio.subprocess.STDOUT,
  52         env=env,
  53         cwd=cwd,
  54     )
  55     try:
  56         (stdout, stderr) = await asyncio.wait_for(process.communicate(stdin), timeout)
  57     except asyncio.TimeoutError:
  58         process.kill()
  59         await process.wait()
  60         raise
  61
  62     # A non-optional timeout was supplied to asyncio.wait_for, guaranteeing
  63     # a timeout or completed process.  A terminated Python process will have a
  64     # non-empty returncode value.
  65     assert process.returncode is not None
  66
  67     if process.returncode != 0:
  68         cmd_str = " ".join(cmd)
  69         raise CalledProcessError(
  70             process.returncode, cmd_str, output=stdout, stderr=stderr
  71         )
  72
  73     return (stdout, stderr)
  74
  75
  76 def analyze_results(project_count: int, results: Results) -> int:
  77     failed_pct = round(((results.stats["failed"] / project_count) * 100), 2)
  78     success_pct = round(((results.stats["success"] / project_count) * 100), 2)
  79
  80     click.secho("-- primer results 📊 --\n", bold=True)
  81     click.secho(
  82         f"{results.stats['success']} / {project_count} succeeded ({success_pct}%) ✅",
  83         bold=True,
  84         fg="green",
  85     )
  86     click.secho(
  87         f"{results.stats['failed']} / {project_count} FAILED ({failed_pct}%) 💩",
  88         bold=bool(results.stats["failed"]),
  89         fg="red",
  90     )
  91     s = "" if results.stats["disabled"] == 1 else "s"
  92     click.echo(f" - {results.stats['disabled']} project{s} disabled by config")
  93     s = "" if results.stats["wrong_py_ver"] == 1 else "s"
  94     click.echo(
  95         f" - {results.stats['wrong_py_ver']} project{s} skipped due to Python version"
  96     )
  97     click.echo(
  98         f" - {results.stats['skipped_long_checkout']} skipped due to long checkout"
  99     )
 100
 101     if results.failed_projects:
 102         click.secho("\nFailed projects:\n", bold=True)
 103
 104     for project_name, project_cpe in results.failed_projects.items():
 105         print(f"## {project_name}:")
 106         print(f" - Returned {project_cpe.returncode}")
 107         if project_cpe.stderr:
 108             print(f" - stderr:\n{project_cpe.stderr.decode('utf8')}")
 109         if project_cpe.stdout:
 110             print(f" - stdout:\n{project_cpe.stdout.decode('utf8')}")
 111         print("")
 112
 113     return results.stats["failed"]
 114
 115
 116 async def black_run(
 117     project_name: str,
 118     repo_path: Optional[Path],
 119     project_config: Dict[str, Any],
 120     results: Results,
 121     no_diff: bool = False,
 122 ) -> None:
 123     """Run Black and record failures"""
 124     if not repo_path:
 125         results.stats["failed"] += 1
 126         results.failed_projects[project_name] = CalledProcessError(
 127             69, [], f"{project_name} has no repo_path: {repo_path}".encode(), b""
 128         )
 129         return
 130
 131     stdin_test = project_name.upper() == "STDIN"
 132     cmd = [str(which(BLACK_BINARY))]
 133     if "cli_arguments" in project_config and project_config["cli_arguments"]:
 134         cmd.extend(project_config["cli_arguments"])
 135     cmd.append("--check")
 136     if not no_diff:
 137         cmd.append("--diff")
 138
 139     # Workout if we should read in a python file or search from cwd
 140     stdin = None
 141     if stdin_test:
 142         cmd.append("-")
 143         stdin = repo_path.read_bytes()
 144     else:
 145         cmd.append(".")
 146
 147     with TemporaryDirectory() as tmp_path:
 148         # Prevent reading top-level user configs by manipulating environment variables
 149         env = {
 150             **os.environ,
 151             "XDG_CONFIG_HOME": tmp_path,  # Unix-like
 152             "USERPROFILE": tmp_path,  # Windows (changes `Path.home()` output)
 153         }
 154
 155         cwd_path = repo_path.parent if stdin_test else repo_path
 156         try:
 157             _stdout, _stderr = await _gen_check_output(
 158                 cmd, cwd=cwd_path, env=env, stdin=stdin
 159             )
 160         except asyncio.TimeoutError:
 161             results.stats["failed"] += 1
 162             LOG.error(f"Running black for {repo_path} timed out ({cmd})")
 163         except CalledProcessError as cpe:
 164             # TODO: Tune for smarter for higher signal
 165             # If any other return value than 1 we raise - can disable project in config
 166             if cpe.returncode == 1:
 167                 if not project_config["expect_formatting_changes"]:
 168                     results.stats["failed"] += 1
 169                     results.failed_projects[repo_path.name] = cpe
 170                 else:
 171                     results.stats["success"] += 1
 172                 return
 173             elif cpe.returncode > 1:
 174                 results.stats["failed"] += 1
 175                 results.failed_projects[repo_path.name] = cpe
 176                 return
 177
 178             LOG.error(f"Unknown error with {repo_path}")
 179             raise
 180
 181     # If we get here and expect formatting changes something is up
 182     if project_config["expect_formatting_changes"]:
 183         results.stats["failed"] += 1
 184         results.failed_projects[repo_path.name] = CalledProcessError(
 185             0, cmd, b"Expected formatting changes but didn't get any!", b""
 186         )
 187         return
 188
 189     results.stats["success"] += 1
 190
 191
 192 async def git_checkout_or_rebase(
 193     work_path: Path,
 194     project_config: Dict[str, Any],
 195     rebase: bool = False,
 196     *,
 197     depth: int = 1,
 198 ) -> Optional[Path]:
 199     """git Clone project or rebase"""
 200     git_bin = str(which(GIT_BINARY))
 201     if not git_bin:
 202         LOG.error("No git binary found")
 203         return None
 204
 205     repo_url_parts = urlparse(project_config["git_clone_url"])
 206     path_parts = repo_url_parts.path[1:].split("/", maxsplit=1)
 207
 208     repo_path: Path = work_path / path_parts[1].replace(".git", "")
 209     cmd = [git_bin, "clone", "--depth", str(depth), project_config["git_clone_url"]]
 210     cwd = work_path
 211     if repo_path.exists() and rebase:
 212         cmd = [git_bin, "pull", "--rebase"]
 213         cwd = repo_path
 214     elif repo_path.exists():
 215         return repo_path
 216
 217     try:
 218         _stdout, _stderr = await _gen_check_output(cmd, cwd=cwd)
 219     except (asyncio.TimeoutError, CalledProcessError) as e:
 220         LOG.error(f"Unable to git clone / pull {project_config['git_clone_url']}: {e}")
 221         return None
 222
 223     return repo_path
 224
 225
 226 def handle_PermissionError(
 227     func: Callable, path: Path, exc: Tuple[Any, Any, Any]
 228 ) -> None:
 229     """
 230     Handle PermissionError during shutil.rmtree.
 231
 232     This checks if the erroring function is either 'os.rmdir' or 'os.unlink', and that
 233     the error was EACCES (i.e. Permission denied). If true, the path is set writable,
 234     readable, and executable by everyone. Finally, it tries the error causing delete
 235     operation again.
 236
 237     If the check is false, then the original error will be reraised as this function
 238     can't handle it.
 239     """
 240     excvalue = exc[1]
 241     LOG.debug(f"Handling {excvalue} from {func.__name__}... ")
 242     if func in (os.rmdir, os.unlink) and excvalue.errno == errno.EACCES:
 243         LOG.debug(f"Setting {path} writable, readable, and executable by everyone... ")
 244         os.chmod(path, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO)  # chmod 0777
 245         func(path)  # Try the error causing delete operation again
 246     else:
 247         raise
 248
 249
 250 async def load_projects_queue(
 251     config_path: Path,
 252 ) -> Tuple[Dict[str, Any], asyncio.Queue]:
 253     """Load project config and fill queue with all the project names"""
 254     with config_path.open("r") as cfp:
 255         config = json.load(cfp)
 256
 257     # TODO: Offer more options here
 258     # e.g. Run on X random packages or specific sub list etc.
 259     project_names = sorted(config["projects"].keys())
 260     queue: asyncio.Queue = asyncio.Queue(maxsize=len(project_names))
 261     for project in project_names:
 262         await queue.put(project)
 263
 264     return config, queue
 265
 266
 267 async def project_runner(
 268     idx: int,
 269     config: Dict[str, Any],
 270     queue: asyncio.Queue,
 271     work_path: Path,
 272     results: Results,
 273     long_checkouts: bool = False,
 274     rebase: bool = False,
 275     keep: bool = False,
 276     no_diff: bool = False,
 277 ) -> None:
 278     """Check out project and run Black on it + record result"""
 279     loop = asyncio.get_event_loop()
 280     py_version = f"{version_info[0]}.{version_info[1]}"
 281     while True:
 282         try:
 283             project_name = queue.get_nowait()
 284         except asyncio.QueueEmpty:
 285             LOG.debug(f"project_runner {idx} exiting")
 286             return
 287         LOG.debug(f"worker {idx} working on {project_name}")
 288
 289         project_config = config["projects"][project_name]
 290
 291         # Check if disabled by config
 292         if "disabled" in project_config and project_config["disabled"]:
 293             results.stats["disabled"] += 1
 294             LOG.info(f"Skipping {project_name} as it's disabled via config")
 295             continue
 296
 297         # Check if we should run on this version of Python
 298         if (
 299             "all" not in project_config["py_versions"]
 300             and py_version not in project_config["py_versions"]
 301         ):
 302             results.stats["wrong_py_ver"] += 1
 303             LOG.debug(f"Skipping {project_name} as it's not enabled for {py_version}")
 304             continue
 305
 306         # Check if we're doing big projects / long checkouts
 307         if not long_checkouts and project_config["long_checkout"]:
 308             results.stats["skipped_long_checkout"] += 1
 309             LOG.debug(f"Skipping {project_name} as it's configured as a long checkout")
 310             continue
 311
 312         repo_path: Optional[Path] = Path(__file__)
 313         stdin_project = project_name.upper() == "STDIN"
 314         if not stdin_project:
 315             repo_path = await git_checkout_or_rebase(work_path, project_config, rebase)
 316             if not repo_path:
 317                 continue
 318         await black_run(project_name, repo_path, project_config, results, no_diff)
 319
 320         if not keep and not stdin_project:
 321             LOG.debug(f"Removing {repo_path}")
 322             rmtree_partial = partial(
 323                 rmtree, path=repo_path, onerror=handle_PermissionError
 324             )
 325             await loop.run_in_executor(None, rmtree_partial)
 326
 327         LOG.info(f"Finished {project_name}")
 328
 329
 330 async def process_queue(
 331     config_file: str,
 332     work_path: Path,
 333     workers: int,
 334     keep: bool = False,
 335     long_checkouts: bool = False,
 336     rebase: bool = False,
 337     no_diff: bool = False,
 338 ) -> int:
 339     """
 340     Process the queue with X workers and evaluate results
 341     - Success is guaged via the config "expect_formatting_changes"
 342
 343     Integer return equals the number of failed projects
 344     """
 345     results = Results()
 346     results.stats["disabled"] = 0
 347     results.stats["failed"] = 0
 348     results.stats["skipped_long_checkout"] = 0
 349     results.stats["success"] = 0
 350     results.stats["wrong_py_ver"] = 0
 351
 352     config, queue = await load_projects_queue(Path(config_file))
 353     project_count = queue.qsize()
 354     s = "" if project_count == 1 else "s"
 355     LOG.info(f"{project_count} project{s} to run Black over")
 356     if project_count < 1:
 357         return -1
 358
 359     s = "" if workers == 1 else "s"
 360     LOG.debug(f"Using {workers} parallel worker{s} to run Black")
 361     # Wait until we finish running all the projects before analyzing
 362     await asyncio.gather(
 363         *[
 364             project_runner(
 365                 i,
 366                 config,
 367                 queue,
 368                 work_path,
 369                 results,
 370                 long_checkouts,
 371                 rebase,
 372                 keep,
 373                 no_diff,
 374             )
 375             for i in range(workers)
 376         ]
 377     )
 378
 379     LOG.info("Analyzing results")
 380     return analyze_results(project_count, results)
 381
 382
 383 if __name__ == "__main__":  # pragma: nocover
 384     raise NotImplementedError("lib is a library, funnily enough.")