src/black_primer/lib.py

   1 import asyncio
   2 import errno
   3 import json
   4 import logging
   5 import os
   6 import stat
   7 import sys
   8 from functools import partial
   9 from pathlib import Path
  10 from platform import system
  11 from shutil import rmtree, which
  12 from subprocess import CalledProcessError
  13 from sys import version_info
  14 from tempfile import TemporaryDirectory
  15 from typing import (
  16     Any,
  17     Callable,
  18     Dict,
  19     List,
  20     NamedTuple,
  21     Optional,
  22     Sequence,
  23     Tuple,
  24     Union,
  25 )
  26 from urllib.parse import urlparse
  27
  28 import click
  29
  30
  31 TEN_MINUTES_SECONDS = 600
  32 WINDOWS = system() == "Windows"
  33 BLACK_BINARY = "black.exe" if WINDOWS else "black"
  34 GIT_BINARY = "git.exe" if WINDOWS else "git"
  35 LOG = logging.getLogger(__name__)
  36
  37
  38 # Windows needs a ProactorEventLoop if you want to exec subprocesses
  39 # Starting with 3.8 this is the default - can remove when Black >= 3.8
  40 # mypy only respects sys.platform if directly in the evaluation
  41 # https://mypy.readthedocs.io/en/latest/common_issues.html#python-version-and-system-platform-checks  # noqa: B950
  42 if sys.platform == "win32":
  43     asyncio.set_event_loop(asyncio.ProactorEventLoop())
  44
  45
  46 class Results(NamedTuple):
  47     stats: Dict[str, int] = {}
  48     failed_projects: Dict[str, CalledProcessError] = {}
  49
  50
  51 async def _gen_check_output(
  52     cmd: Sequence[str],
  53     timeout: float = TEN_MINUTES_SECONDS,
  54     env: Optional[Dict[str, str]] = None,
  55     cwd: Optional[Path] = None,
  56     stdin: Optional[bytes] = None,
  57 ) -> Tuple[bytes, bytes]:
  58     process = await asyncio.create_subprocess_exec(
  59         *cmd,
  60         stdin=asyncio.subprocess.PIPE,
  61         stdout=asyncio.subprocess.PIPE,
  62         stderr=asyncio.subprocess.STDOUT,
  63         env=env,
  64         cwd=cwd,
  65     )
  66     try:
  67         (stdout, stderr) = await asyncio.wait_for(process.communicate(stdin), timeout)
  68     except asyncio.TimeoutError:
  69         process.kill()
  70         await process.wait()
  71         raise
  72
  73     # A non-optional timeout was supplied to asyncio.wait_for, guaranteeing
  74     # a timeout or completed process.  A terminated Python process will have a
  75     # non-empty returncode value.
  76     assert process.returncode is not None
  77
  78     if process.returncode != 0:
  79         cmd_str = " ".join(cmd)
  80         raise CalledProcessError(
  81             process.returncode, cmd_str, output=stdout, stderr=stderr
  82         )
  83
  84     return (stdout, stderr)
  85
  86
  87 def analyze_results(project_count: int, results: Results) -> int:
  88     failed_pct = round(((results.stats["failed"] / project_count) * 100), 2)
  89     success_pct = round(((results.stats["success"] / project_count) * 100), 2)
  90
  91     if results.failed_projects:
  92         click.secho("\nFailed projects:\n", bold=True)
  93
  94     for project_name, project_cpe in results.failed_projects.items():
  95         print(f"## {project_name}:")
  96         print(f" - Returned {project_cpe.returncode}")
  97         if project_cpe.stderr:
  98             print(f" - stderr:\n{project_cpe.stderr.decode('utf8')}")
  99         if project_cpe.stdout:
 100             print(f" - stdout:\n{project_cpe.stdout.decode('utf8')}")
 101         print("")
 102
 103     click.secho("-- primer results 📊 --\n", bold=True)
 104     click.secho(
 105         f"{results.stats['success']} / {project_count} succeeded ({success_pct}%) ✅",
 106         bold=True,
 107         fg="green",
 108     )
 109     click.secho(
 110         f"{results.stats['failed']} / {project_count} FAILED ({failed_pct}%) 💩",
 111         bold=bool(results.stats["failed"]),
 112         fg="red",
 113     )
 114     s = "" if results.stats["disabled"] == 1 else "s"
 115     click.echo(f" - {results.stats['disabled']} project{s} disabled by config")
 116     s = "" if results.stats["wrong_py_ver"] == 1 else "s"
 117     click.echo(
 118         f" - {results.stats['wrong_py_ver']} project{s} skipped due to Python version"
 119     )
 120     click.echo(
 121         f" - {results.stats['skipped_long_checkout']} skipped due to long checkout"
 122     )
 123
 124     if results.failed_projects:
 125         failed = ", ".join(results.failed_projects.keys())
 126         click.secho(f"\nFailed projects: {failed}\n", bold=True)
 127
 128     return results.stats["failed"]
 129
 130
 131 def _flatten_cli_args(cli_args: List[Union[Sequence[str], str]]) -> List[str]:
 132     """Allow a user to put long arguments into a list of strs
 133     to make the JSON human readable"""
 134     flat_args = []
 135     for arg in cli_args:
 136         if isinstance(arg, str):
 137             flat_args.append(arg)
 138             continue
 139
 140         args_as_str = "".join(arg)
 141         flat_args.append(args_as_str)
 142
 143     return flat_args
 144
 145
 146 async def black_run(
 147     project_name: str,
 148     repo_path: Optional[Path],
 149     project_config: Dict[str, Any],
 150     results: Results,
 151     no_diff: bool = False,
 152 ) -> None:
 153     """Run Black and record failures"""
 154     if not repo_path:
 155         results.stats["failed"] += 1
 156         results.failed_projects[project_name] = CalledProcessError(
 157             69, [], f"{project_name} has no repo_path: {repo_path}".encode(), b""
 158         )
 159         return
 160
 161     stdin_test = project_name.upper() == "STDIN"
 162     cmd = [str(which(BLACK_BINARY))]
 163     if "cli_arguments" in project_config and project_config["cli_arguments"]:
 164         cmd.extend(_flatten_cli_args(project_config["cli_arguments"]))
 165     cmd.append("--check")
 166     if not no_diff:
 167         cmd.append("--diff")
 168
 169     # Workout if we should read in a python file or search from cwd
 170     stdin = None
 171     if stdin_test:
 172         cmd.append("-")
 173         stdin = repo_path.read_bytes()
 174     elif "base_path" in project_config:
 175         cmd.append(project_config["base_path"])
 176     else:
 177         cmd.append(".")
 178
 179     timeout = (
 180         project_config["timeout_seconds"]
 181         if "timeout_seconds" in project_config
 182         else TEN_MINUTES_SECONDS
 183     )
 184     with TemporaryDirectory() as tmp_path:
 185         # Prevent reading top-level user configs by manipulating environment variables
 186         env = {
 187             **os.environ,
 188             "XDG_CONFIG_HOME": tmp_path,  # Unix-like
 189             "USERPROFILE": tmp_path,  # Windows (changes `Path.home()` output)
 190         }
 191
 192         cwd_path = repo_path.parent if stdin_test else repo_path
 193         try:
 194             LOG.debug(f"Running black for {project_name}: {' '.join(cmd)}")
 195             _stdout, _stderr = await _gen_check_output(
 196                 cmd, cwd=cwd_path, env=env, stdin=stdin, timeout=timeout
 197             )
 198         except asyncio.TimeoutError:
 199             results.stats["failed"] += 1
 200             LOG.error(f"Running black for {repo_path} timed out ({cmd})")
 201         except CalledProcessError as cpe:
 202             # TODO: Tune for smarter for higher signal
 203             # If any other return value than 1 we raise - can disable project in config
 204             if cpe.returncode == 1:
 205                 if not project_config["expect_formatting_changes"]:
 206                     results.stats["failed"] += 1
 207                     results.failed_projects[repo_path.name] = cpe
 208                 else:
 209                     results.stats["success"] += 1
 210                 return
 211             elif cpe.returncode > 1:
 212                 results.stats["failed"] += 1
 213                 results.failed_projects[repo_path.name] = cpe
 214                 return
 215
 216             LOG.error(f"Unknown error with {repo_path}")
 217             raise
 218
 219     # If we get here and expect formatting changes something is up
 220     if project_config["expect_formatting_changes"]:
 221         results.stats["failed"] += 1
 222         results.failed_projects[repo_path.name] = CalledProcessError(
 223             0, cmd, b"Expected formatting changes but didn't get any!", b""
 224         )
 225         return
 226
 227     results.stats["success"] += 1
 228
 229
 230 async def git_checkout_or_rebase(
 231     work_path: Path,
 232     project_config: Dict[str, Any],
 233     rebase: bool = False,
 234     *,
 235     depth: int = 1,
 236 ) -> Optional[Path]:
 237     """git Clone project or rebase"""
 238     git_bin = str(which(GIT_BINARY))
 239     if not git_bin:
 240         LOG.error("No git binary found")
 241         return None
 242
 243     repo_url_parts = urlparse(project_config["git_clone_url"])
 244     path_parts = repo_url_parts.path[1:].split("/", maxsplit=1)
 245
 246     repo_path: Path = work_path / path_parts[1].replace(".git", "")
 247     cmd = [git_bin, "clone", "--depth", str(depth), project_config["git_clone_url"]]
 248     cwd = work_path
 249     if repo_path.exists() and rebase:
 250         cmd = [git_bin, "pull", "--rebase"]
 251         cwd = repo_path
 252     elif repo_path.exists():
 253         return repo_path
 254
 255     try:
 256         _stdout, _stderr = await _gen_check_output(cmd, cwd=cwd)
 257     except (asyncio.TimeoutError, CalledProcessError) as e:
 258         LOG.error(f"Unable to git clone / pull {project_config['git_clone_url']}: {e}")
 259         return None
 260
 261     return repo_path
 262
 263
 264 def handle_PermissionError(
 265     func: Callable[..., None], path: Path, exc: Tuple[Any, Any, Any]
 266 ) -> None:
 267     """
 268     Handle PermissionError during shutil.rmtree.
 269
 270     This checks if the erroring function is either 'os.rmdir' or 'os.unlink', and that
 271     the error was EACCES (i.e. Permission denied). If true, the path is set writable,
 272     readable, and executable by everyone. Finally, it tries the error causing delete
 273     operation again.
 274
 275     If the check is false, then the original error will be reraised as this function
 276     can't handle it.
 277     """
 278     excvalue = exc[1]
 279     LOG.debug(f"Handling {excvalue} from {func.__name__}... ")
 280     if func in (os.rmdir, os.unlink) and excvalue.errno == errno.EACCES:
 281         LOG.debug(f"Setting {path} writable, readable, and executable by everyone... ")
 282         os.chmod(path, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO)  # chmod 0777
 283         func(path)  # Try the error causing delete operation again
 284     else:
 285         raise
 286
 287
 288 async def load_projects_queue(
 289     config_path: Path,
 290     projects_to_run: List[str],
 291 ) -> Tuple[Dict[str, Any], asyncio.Queue]:
 292     """Load project config and fill queue with all the project names"""
 293     with config_path.open("r") as cfp:
 294         config = json.load(cfp)
 295
 296     # TODO: Offer more options here
 297     # e.g. Run on X random packages etc.
 298     queue: asyncio.Queue = asyncio.Queue(maxsize=len(projects_to_run))
 299     for project in projects_to_run:
 300         await queue.put(project)
 301
 302     return config, queue
 303
 304
 305 async def project_runner(
 306     idx: int,
 307     config: Dict[str, Any],
 308     queue: asyncio.Queue,
 309     work_path: Path,
 310     results: Results,
 311     long_checkouts: bool = False,
 312     rebase: bool = False,
 313     keep: bool = False,
 314     no_diff: bool = False,
 315 ) -> None:
 316     """Check out project and run Black on it + record result"""
 317     loop = asyncio.get_event_loop()
 318     py_version = f"{version_info[0]}.{version_info[1]}"
 319     while True:
 320         try:
 321             project_name = queue.get_nowait()
 322         except asyncio.QueueEmpty:
 323             LOG.debug(f"project_runner {idx} exiting")
 324             return
 325         LOG.debug(f"worker {idx} working on {project_name}")
 326
 327         project_config = config["projects"][project_name]
 328
 329         # Check if disabled by config
 330         if "disabled" in project_config and project_config["disabled"]:
 331             results.stats["disabled"] += 1
 332             LOG.info(f"Skipping {project_name} as it's disabled via config")
 333             continue
 334
 335         # Check if we should run on this version of Python
 336         if (
 337             "all" not in project_config["py_versions"]
 338             and py_version not in project_config["py_versions"]
 339         ):
 340             results.stats["wrong_py_ver"] += 1
 341             LOG.debug(f"Skipping {project_name} as it's not enabled for {py_version}")
 342             continue
 343
 344         # Check if we're doing big projects / long checkouts
 345         if not long_checkouts and project_config["long_checkout"]:
 346             results.stats["skipped_long_checkout"] += 1
 347             LOG.debug(f"Skipping {project_name} as it's configured as a long checkout")
 348             continue
 349
 350         repo_path: Optional[Path] = Path(__file__)
 351         stdin_project = project_name.upper() == "STDIN"
 352         if not stdin_project:
 353             repo_path = await git_checkout_or_rebase(work_path, project_config, rebase)
 354             if not repo_path:
 355                 continue
 356         await black_run(project_name, repo_path, project_config, results, no_diff)
 357
 358         if not keep and not stdin_project:
 359             LOG.debug(f"Removing {repo_path}")
 360             rmtree_partial = partial(
 361                 rmtree, path=repo_path, onerror=handle_PermissionError
 362             )
 363             await loop.run_in_executor(None, rmtree_partial)
 364
 365         LOG.info(f"Finished {project_name}")
 366
 367
 368 async def process_queue(
 369     config_file: str,
 370     work_path: Path,
 371     workers: int,
 372     projects_to_run: List[str],
 373     keep: bool = False,
 374     long_checkouts: bool = False,
 375     rebase: bool = False,
 376     no_diff: bool = False,
 377 ) -> int:
 378     """
 379     Process the queue with X workers and evaluate results
 380     - Success is guaged via the config "expect_formatting_changes"
 381
 382     Integer return equals the number of failed projects
 383     """
 384     results = Results()
 385     results.stats["disabled"] = 0
 386     results.stats["failed"] = 0
 387     results.stats["skipped_long_checkout"] = 0
 388     results.stats["success"] = 0
 389     results.stats["wrong_py_ver"] = 0
 390
 391     config, queue = await load_projects_queue(Path(config_file), projects_to_run)
 392     project_count = queue.qsize()
 393     s = "" if project_count == 1 else "s"
 394     LOG.info(f"{project_count} project{s} to run Black over")
 395     if project_count < 1:
 396         return -1
 397
 398     s = "" if workers == 1 else "s"
 399     LOG.debug(f"Using {workers} parallel worker{s} to run Black")
 400     # Wait until we finish running all the projects before analyzing
 401     await asyncio.gather(
 402         *[
 403             project_runner(
 404                 i,
 405                 config,
 406                 queue,
 407                 work_path,
 408                 results,
 409                 long_checkouts,
 410                 rebase,
 411                 keep,
 412                 no_diff,
 413             )
 414             for i in range(workers)
 415         ]
 416     )
 417
 418     LOG.info("Analyzing results")
 419     return analyze_results(project_count, results)
 420
 421
 422 if __name__ == "__main__":  # pragma: nocover
 423     raise NotImplementedError("lib is a library, funnily enough.")