src/black_primer/lib.py

   1 import asyncio
   2 import errno
   3 import json
   4 import logging
   5 import os
   6 import stat
   7 import sys
   8 from functools import partial
   9 from pathlib import Path
  10 from platform import system
  11 from shutil import rmtree, which
  12 from subprocess import CalledProcessError
  13 from sys import version_info
  14 from tempfile import TemporaryDirectory
  15 from typing import (
  16     Any,
  17     Callable,
  18     Dict,
  19     List,
  20     NamedTuple,
  21     Optional,
  22     Sequence,
  23     Tuple,
  24     Union,
  25 )
  26 from urllib.parse import urlparse
  27
  28 import click
  29
  30
  31 TEN_MINUTES_SECONDS = 600
  32 WINDOWS = system() == "Windows"
  33 BLACK_BINARY = "black.exe" if WINDOWS else "black"
  34 GIT_BINARY = "git.exe" if WINDOWS else "git"
  35 LOG = logging.getLogger(__name__)
  36
  37
  38 # Windows needs a ProactorEventLoop if you want to exec subprocesses
  39 # Starting with 3.8 this is the default - can remove when Black >= 3.8
  40 # mypy only respects sys.platform if directly in the evaluation
  41 # https://mypy.readthedocs.io/en/latest/common_issues.html#python-version-and-system-platform-checks  # noqa: B950
  42 if sys.platform == "win32":
  43     asyncio.set_event_loop(asyncio.ProactorEventLoop())
  44
  45
  46 class Results(NamedTuple):
  47     stats: Dict[str, int] = {}
  48     failed_projects: Dict[str, CalledProcessError] = {}
  49
  50
  51 async def _gen_check_output(
  52     cmd: Sequence[str],
  53     timeout: float = TEN_MINUTES_SECONDS,
  54     env: Optional[Dict[str, str]] = None,
  55     cwd: Optional[Path] = None,
  56     stdin: Optional[bytes] = None,
  57 ) -> Tuple[bytes, bytes]:
  58     process = await asyncio.create_subprocess_exec(
  59         *cmd,
  60         stdin=asyncio.subprocess.PIPE,
  61         stdout=asyncio.subprocess.PIPE,
  62         stderr=asyncio.subprocess.STDOUT,
  63         env=env,
  64         cwd=cwd,
  65     )
  66     try:
  67         (stdout, stderr) = await asyncio.wait_for(process.communicate(stdin), timeout)
  68     except asyncio.TimeoutError:
  69         process.kill()
  70         await process.wait()
  71         raise
  72
  73     # A non-optional timeout was supplied to asyncio.wait_for, guaranteeing
  74     # a timeout or completed process.  A terminated Python process will have a
  75     # non-empty returncode value.
  76     assert process.returncode is not None
  77
  78     if process.returncode != 0:
  79         cmd_str = " ".join(cmd)
  80         raise CalledProcessError(
  81             process.returncode, cmd_str, output=stdout, stderr=stderr
  82         )
  83
  84     return (stdout, stderr)
  85
  86
  87 def analyze_results(project_count: int, results: Results) -> int:
  88     failed_pct = round(((results.stats["failed"] / project_count) * 100), 2)
  89     success_pct = round(((results.stats["success"] / project_count) * 100), 2)
  90
  91     click.secho("-- primer results 📊 --\n", bold=True)
  92     click.secho(
  93         f"{results.stats['success']} / {project_count} succeeded ({success_pct}%) ✅",
  94         bold=True,
  95         fg="green",
  96     )
  97     click.secho(
  98         f"{results.stats['failed']} / {project_count} FAILED ({failed_pct}%) 💩",
  99         bold=bool(results.stats["failed"]),
 100         fg="red",
 101     )
 102     s = "" if results.stats["disabled"] == 1 else "s"
 103     click.echo(f" - {results.stats['disabled']} project{s} disabled by config")
 104     s = "" if results.stats["wrong_py_ver"] == 1 else "s"
 105     click.echo(
 106         f" - {results.stats['wrong_py_ver']} project{s} skipped due to Python version"
 107     )
 108     click.echo(
 109         f" - {results.stats['skipped_long_checkout']} skipped due to long checkout"
 110     )
 111
 112     if results.failed_projects:
 113         click.secho("\nFailed projects:\n", bold=True)
 114
 115     for project_name, project_cpe in results.failed_projects.items():
 116         print(f"## {project_name}:")
 117         print(f" - Returned {project_cpe.returncode}")
 118         if project_cpe.stderr:
 119             print(f" - stderr:\n{project_cpe.stderr.decode('utf8')}")
 120         if project_cpe.stdout:
 121             print(f" - stdout:\n{project_cpe.stdout.decode('utf8')}")
 122         print("")
 123
 124     return results.stats["failed"]
 125
 126
 127 def _flatten_cli_args(cli_args: List[Union[Sequence[str], str]]) -> List[str]:
 128     """Allow a user to put long arguments into a list of strs
 129     to make the JSON human readable"""
 130     flat_args = []
 131     for arg in cli_args:
 132         if isinstance(arg, str):
 133             flat_args.append(arg)
 134             continue
 135
 136         args_as_str = "".join(arg)
 137         flat_args.append(args_as_str)
 138
 139     return flat_args
 140
 141
 142 async def black_run(
 143     project_name: str,
 144     repo_path: Optional[Path],
 145     project_config: Dict[str, Any],
 146     results: Results,
 147     no_diff: bool = False,
 148 ) -> None:
 149     """Run Black and record failures"""
 150     if not repo_path:
 151         results.stats["failed"] += 1
 152         results.failed_projects[project_name] = CalledProcessError(
 153             69, [], f"{project_name} has no repo_path: {repo_path}".encode(), b""
 154         )
 155         return
 156
 157     stdin_test = project_name.upper() == "STDIN"
 158     cmd = [str(which(BLACK_BINARY))]
 159     if "cli_arguments" in project_config and project_config["cli_arguments"]:
 160         cmd.extend(_flatten_cli_args(project_config["cli_arguments"]))
 161     cmd.append("--check")
 162     if not no_diff:
 163         cmd.append("--diff")
 164
 165     # Workout if we should read in a python file or search from cwd
 166     stdin = None
 167     if stdin_test:
 168         cmd.append("-")
 169         stdin = repo_path.read_bytes()
 170     elif "base_path" in project_config:
 171         cmd.append(project_config["base_path"])
 172     else:
 173         cmd.append(".")
 174
 175     timeout = (
 176         project_config["timeout_seconds"]
 177         if "timeout_seconds" in project_config
 178         else TEN_MINUTES_SECONDS
 179     )
 180     with TemporaryDirectory() as tmp_path:
 181         # Prevent reading top-level user configs by manipulating environment variables
 182         env = {
 183             **os.environ,
 184             "XDG_CONFIG_HOME": tmp_path,  # Unix-like
 185             "USERPROFILE": tmp_path,  # Windows (changes `Path.home()` output)
 186         }
 187
 188         cwd_path = repo_path.parent if stdin_test else repo_path
 189         try:
 190             LOG.debug(f"Running black for {project_name}: {' '.join(cmd)}")
 191             _stdout, _stderr = await _gen_check_output(
 192                 cmd, cwd=cwd_path, env=env, stdin=stdin, timeout=timeout
 193             )
 194         except asyncio.TimeoutError:
 195             results.stats["failed"] += 1
 196             LOG.error(f"Running black for {repo_path} timed out ({cmd})")
 197         except CalledProcessError as cpe:
 198             # TODO: Tune for smarter for higher signal
 199             # If any other return value than 1 we raise - can disable project in config
 200             if cpe.returncode == 1:
 201                 if not project_config["expect_formatting_changes"]:
 202                     results.stats["failed"] += 1
 203                     results.failed_projects[repo_path.name] = cpe
 204                 else:
 205                     results.stats["success"] += 1
 206                 return
 207             elif cpe.returncode > 1:
 208                 results.stats["failed"] += 1
 209                 results.failed_projects[repo_path.name] = cpe
 210                 return
 211
 212             LOG.error(f"Unknown error with {repo_path}")
 213             raise
 214
 215     # If we get here and expect formatting changes something is up
 216     if project_config["expect_formatting_changes"]:
 217         results.stats["failed"] += 1
 218         results.failed_projects[repo_path.name] = CalledProcessError(
 219             0, cmd, b"Expected formatting changes but didn't get any!", b""
 220         )
 221         return
 222
 223     results.stats["success"] += 1
 224
 225
 226 async def git_checkout_or_rebase(
 227     work_path: Path,
 228     project_config: Dict[str, Any],
 229     rebase: bool = False,
 230     *,
 231     depth: int = 1,
 232 ) -> Optional[Path]:
 233     """git Clone project or rebase"""
 234     git_bin = str(which(GIT_BINARY))
 235     if not git_bin:
 236         LOG.error("No git binary found")
 237         return None
 238
 239     repo_url_parts = urlparse(project_config["git_clone_url"])
 240     path_parts = repo_url_parts.path[1:].split("/", maxsplit=1)
 241
 242     repo_path: Path = work_path / path_parts[1].replace(".git", "")
 243     cmd = [git_bin, "clone", "--depth", str(depth), project_config["git_clone_url"]]
 244     cwd = work_path
 245     if repo_path.exists() and rebase:
 246         cmd = [git_bin, "pull", "--rebase"]
 247         cwd = repo_path
 248     elif repo_path.exists():
 249         return repo_path
 250
 251     try:
 252         _stdout, _stderr = await _gen_check_output(cmd, cwd=cwd)
 253     except (asyncio.TimeoutError, CalledProcessError) as e:
 254         LOG.error(f"Unable to git clone / pull {project_config['git_clone_url']}: {e}")
 255         return None
 256
 257     return repo_path
 258
 259
 260 def handle_PermissionError(
 261     func: Callable, path: Path, exc: Tuple[Any, Any, Any]
 262 ) -> None:
 263     """
 264     Handle PermissionError during shutil.rmtree.
 265
 266     This checks if the erroring function is either 'os.rmdir' or 'os.unlink', and that
 267     the error was EACCES (i.e. Permission denied). If true, the path is set writable,
 268     readable, and executable by everyone. Finally, it tries the error causing delete
 269     operation again.
 270
 271     If the check is false, then the original error will be reraised as this function
 272     can't handle it.
 273     """
 274     excvalue = exc[1]
 275     LOG.debug(f"Handling {excvalue} from {func.__name__}... ")
 276     if func in (os.rmdir, os.unlink) and excvalue.errno == errno.EACCES:
 277         LOG.debug(f"Setting {path} writable, readable, and executable by everyone... ")
 278         os.chmod(path, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO)  # chmod 0777
 279         func(path)  # Try the error causing delete operation again
 280     else:
 281         raise
 282
 283
 284 async def load_projects_queue(
 285     config_path: Path,
 286 ) -> Tuple[Dict[str, Any], asyncio.Queue]:
 287     """Load project config and fill queue with all the project names"""
 288     with config_path.open("r") as cfp:
 289         config = json.load(cfp)
 290
 291     # TODO: Offer more options here
 292     # e.g. Run on X random packages or specific sub list etc.
 293     project_names = sorted(config["projects"].keys())
 294     queue: asyncio.Queue = asyncio.Queue(maxsize=len(project_names))
 295     for project in project_names:
 296         await queue.put(project)
 297
 298     return config, queue
 299
 300
 301 async def project_runner(
 302     idx: int,
 303     config: Dict[str, Any],
 304     queue: asyncio.Queue,
 305     work_path: Path,
 306     results: Results,
 307     long_checkouts: bool = False,
 308     rebase: bool = False,
 309     keep: bool = False,
 310     no_diff: bool = False,
 311 ) -> None:
 312     """Check out project and run Black on it + record result"""
 313     loop = asyncio.get_event_loop()
 314     py_version = f"{version_info[0]}.{version_info[1]}"
 315     while True:
 316         try:
 317             project_name = queue.get_nowait()
 318         except asyncio.QueueEmpty:
 319             LOG.debug(f"project_runner {idx} exiting")
 320             return
 321         LOG.debug(f"worker {idx} working on {project_name}")
 322
 323         project_config = config["projects"][project_name]
 324
 325         # Check if disabled by config
 326         if "disabled" in project_config and project_config["disabled"]:
 327             results.stats["disabled"] += 1
 328             LOG.info(f"Skipping {project_name} as it's disabled via config")
 329             continue
 330
 331         # Check if we should run on this version of Python
 332         if (
 333             "all" not in project_config["py_versions"]
 334             and py_version not in project_config["py_versions"]
 335         ):
 336             results.stats["wrong_py_ver"] += 1
 337             LOG.debug(f"Skipping {project_name} as it's not enabled for {py_version}")
 338             continue
 339
 340         # Check if we're doing big projects / long checkouts
 341         if not long_checkouts and project_config["long_checkout"]:
 342             results.stats["skipped_long_checkout"] += 1
 343             LOG.debug(f"Skipping {project_name} as it's configured as a long checkout")
 344             continue
 345
 346         repo_path: Optional[Path] = Path(__file__)
 347         stdin_project = project_name.upper() == "STDIN"
 348         if not stdin_project:
 349             repo_path = await git_checkout_or_rebase(work_path, project_config, rebase)
 350             if not repo_path:
 351                 continue
 352         await black_run(project_name, repo_path, project_config, results, no_diff)
 353
 354         if not keep and not stdin_project:
 355             LOG.debug(f"Removing {repo_path}")
 356             rmtree_partial = partial(
 357                 rmtree, path=repo_path, onerror=handle_PermissionError
 358             )
 359             await loop.run_in_executor(None, rmtree_partial)
 360
 361         LOG.info(f"Finished {project_name}")
 362
 363
 364 async def process_queue(
 365     config_file: str,
 366     work_path: Path,
 367     workers: int,
 368     keep: bool = False,
 369     long_checkouts: bool = False,
 370     rebase: bool = False,
 371     no_diff: bool = False,
 372 ) -> int:
 373     """
 374     Process the queue with X workers and evaluate results
 375     - Success is guaged via the config "expect_formatting_changes"
 376
 377     Integer return equals the number of failed projects
 378     """
 379     results = Results()
 380     results.stats["disabled"] = 0
 381     results.stats["failed"] = 0
 382     results.stats["skipped_long_checkout"] = 0
 383     results.stats["success"] = 0
 384     results.stats["wrong_py_ver"] = 0
 385
 386     config, queue = await load_projects_queue(Path(config_file))
 387     project_count = queue.qsize()
 388     s = "" if project_count == 1 else "s"
 389     LOG.info(f"{project_count} project{s} to run Black over")
 390     if project_count < 1:
 391         return -1
 392
 393     s = "" if workers == 1 else "s"
 394     LOG.debug(f"Using {workers} parallel worker{s} to run Black")
 395     # Wait until we finish running all the projects before analyzing
 396     await asyncio.gather(
 397         *[
 398             project_runner(
 399                 i,
 400                 config,
 401                 queue,
 402                 work_path,
 403                 results,
 404                 long_checkouts,
 405                 rebase,
 406                 keep,
 407                 no_diff,
 408             )
 409             for i in range(workers)
 410         ]
 411     )
 412
 413     LOG.info("Analyzing results")
 414     return analyze_results(project_count, results)
 415
 416
 417 if __name__ == "__main__":  # pragma: nocover
 418     raise NotImplementedError("lib is a library, funnily enough.")