--- /dev/null
+import asyncio
+import errno
+import json
+import logging
+import os
+import stat
+import sys
+from functools import partial
+from pathlib import Path
+from platform import system
+from shutil import rmtree, which
+from subprocess import CalledProcessError
+from sys import version_info
+from tempfile import TemporaryDirectory
+from typing import (
+ Any,
+ Callable,
+ Dict,
+ List,
+ NamedTuple,
+ Optional,
+ Sequence,
+ Tuple,
+ Union,
+)
+from urllib.parse import urlparse
+
+import click
+
+
+TEN_MINUTES_SECONDS = 600
+WINDOWS = system() == "Windows"
+BLACK_BINARY = "black.exe" if WINDOWS else "black"
+GIT_BINARY = "git.exe" if WINDOWS else "git"
+LOG = logging.getLogger(__name__)
+
+
+# Windows needs a ProactorEventLoop if you want to exec subprocesses
+# Starting with 3.8 this is the default - can remove when Black >= 3.8
+# mypy only respects sys.platform if directly in the evaluation
+# https://mypy.readthedocs.io/en/latest/common_issues.html#python-version-and-system-platform-checks # noqa: B950
+if sys.platform == "win32":
+ asyncio.set_event_loop(asyncio.ProactorEventLoop())
+
+
+class Results(NamedTuple):
+ stats: Dict[str, int] = {}
+ failed_projects: Dict[str, CalledProcessError] = {}
+
+
+async def _gen_check_output(
+ cmd: Sequence[str],
+ timeout: float = TEN_MINUTES_SECONDS,
+ env: Optional[Dict[str, str]] = None,
+ cwd: Optional[Path] = None,
+ stdin: Optional[bytes] = None,
+) -> Tuple[bytes, bytes]:
+ process = await asyncio.create_subprocess_exec(
+ *cmd,
+ stdin=asyncio.subprocess.PIPE,
+ stdout=asyncio.subprocess.PIPE,
+ stderr=asyncio.subprocess.STDOUT,
+ env=env,
+ cwd=cwd,
+ )
+ try:
+ (stdout, stderr) = await asyncio.wait_for(process.communicate(stdin), timeout)
+ except asyncio.TimeoutError:
+ process.kill()
+ await process.wait()
+ raise
+
+ # A non-optional timeout was supplied to asyncio.wait_for, guaranteeing
+ # a timeout or completed process. A terminated Python process will have a
+ # non-empty returncode value.
+ assert process.returncode is not None
+
+ if process.returncode != 0:
+ cmd_str = " ".join(cmd)
+ raise CalledProcessError(
+ process.returncode, cmd_str, output=stdout, stderr=stderr
+ )
+
+ return (stdout, stderr)
+
+
+def analyze_results(project_count: int, results: Results) -> int:
+ failed_pct = round(((results.stats["failed"] / project_count) * 100), 2)
+ success_pct = round(((results.stats["success"] / project_count) * 100), 2)
+
+ click.secho("-- primer results 📊 --\n", bold=True)
+ click.secho(
+ f"{results.stats['success']} / {project_count} succeeded ({success_pct}%) ✅",
+ bold=True,
+ fg="green",
+ )
+ click.secho(
+ f"{results.stats['failed']} / {project_count} FAILED ({failed_pct}%) 💩",
+ bold=bool(results.stats["failed"]),
+ fg="red",
+ )
+ s = "" if results.stats["disabled"] == 1 else "s"
+ click.echo(f" - {results.stats['disabled']} project{s} disabled by config")
+ s = "" if results.stats["wrong_py_ver"] == 1 else "s"
+ click.echo(
+ f" - {results.stats['wrong_py_ver']} project{s} skipped due to Python version"
+ )
+ click.echo(
+ f" - {results.stats['skipped_long_checkout']} skipped due to long checkout"
+ )
+
+ if results.failed_projects:
+ click.secho("\nFailed projects:\n", bold=True)
+
+ for project_name, project_cpe in results.failed_projects.items():
+ print(f"## {project_name}:")
+ print(f" - Returned {project_cpe.returncode}")
+ if project_cpe.stderr:
+ print(f" - stderr:\n{project_cpe.stderr.decode('utf8')}")
+ if project_cpe.stdout:
+ print(f" - stdout:\n{project_cpe.stdout.decode('utf8')}")
+ print("")
+
+ return results.stats["failed"]
+
+
+def _flatten_cli_args(cli_args: List[Union[Sequence[str], str]]) -> List[str]:
+ """Allow a user to put long arguments into a list of strs
+ to make the JSON human readable"""
+ flat_args = []
+ for arg in cli_args:
+ if isinstance(arg, str):
+ flat_args.append(arg)
+ continue
+
+ args_as_str = "".join(arg)
+ flat_args.append(args_as_str)
+
+ return flat_args
+
+
+async def black_run(
+ project_name: str,
+ repo_path: Optional[Path],
+ project_config: Dict[str, Any],
+ results: Results,
+ no_diff: bool = False,
+) -> None:
+ """Run Black and record failures"""
+ if not repo_path:
+ results.stats["failed"] += 1
+ results.failed_projects[project_name] = CalledProcessError(
+ 69, [], f"{project_name} has no repo_path: {repo_path}".encode(), b""
+ )
+ return
+
+ stdin_test = project_name.upper() == "STDIN"
+ cmd = [str(which(BLACK_BINARY))]
+ if "cli_arguments" in project_config and project_config["cli_arguments"]:
+ cmd.extend(_flatten_cli_args(project_config["cli_arguments"]))
+ cmd.append("--check")
+ if not no_diff:
+ cmd.append("--diff")
+
+ # Workout if we should read in a python file or search from cwd
+ stdin = None
+ if stdin_test:
+ cmd.append("-")
+ stdin = repo_path.read_bytes()
+ elif "base_path" in project_config:
+ cmd.append(project_config["base_path"])
+ else:
+ cmd.append(".")
+
+ timeout = (
+ project_config["timeout_seconds"]
+ if "timeout_seconds" in project_config
+ else TEN_MINUTES_SECONDS
+ )
+ with TemporaryDirectory() as tmp_path:
+ # Prevent reading top-level user configs by manipulating environment variables
+ env = {
+ **os.environ,
+ "XDG_CONFIG_HOME": tmp_path, # Unix-like
+ "USERPROFILE": tmp_path, # Windows (changes `Path.home()` output)
+ }
+
+ cwd_path = repo_path.parent if stdin_test else repo_path
+ try:
+ LOG.debug(f"Running black for {project_name}: {' '.join(cmd)}")
+ _stdout, _stderr = await _gen_check_output(
+ cmd, cwd=cwd_path, env=env, stdin=stdin, timeout=timeout
+ )
+ except asyncio.TimeoutError:
+ results.stats["failed"] += 1
+ LOG.error(f"Running black for {repo_path} timed out ({cmd})")
+ except CalledProcessError as cpe:
+ # TODO: Tune for smarter for higher signal
+ # If any other return value than 1 we raise - can disable project in config
+ if cpe.returncode == 1:
+ if not project_config["expect_formatting_changes"]:
+ results.stats["failed"] += 1
+ results.failed_projects[repo_path.name] = cpe
+ else:
+ results.stats["success"] += 1
+ return
+ elif cpe.returncode > 1:
+ results.stats["failed"] += 1
+ results.failed_projects[repo_path.name] = cpe
+ return
+
+ LOG.error(f"Unknown error with {repo_path}")
+ raise
+
+ # If we get here and expect formatting changes something is up
+ if project_config["expect_formatting_changes"]:
+ results.stats["failed"] += 1
+ results.failed_projects[repo_path.name] = CalledProcessError(
+ 0, cmd, b"Expected formatting changes but didn't get any!", b""
+ )
+ return
+
+ results.stats["success"] += 1
+
+
+async def git_checkout_or_rebase(
+ work_path: Path,
+ project_config: Dict[str, Any],
+ rebase: bool = False,
+ *,
+ depth: int = 1,
+) -> Optional[Path]:
+ """git Clone project or rebase"""
+ git_bin = str(which(GIT_BINARY))
+ if not git_bin:
+ LOG.error("No git binary found")
+ return None
+
+ repo_url_parts = urlparse(project_config["git_clone_url"])
+ path_parts = repo_url_parts.path[1:].split("/", maxsplit=1)
+
+ repo_path: Path = work_path / path_parts[1].replace(".git", "")
+ cmd = [git_bin, "clone", "--depth", str(depth), project_config["git_clone_url"]]
+ cwd = work_path
+ if repo_path.exists() and rebase:
+ cmd = [git_bin, "pull", "--rebase"]
+ cwd = repo_path
+ elif repo_path.exists():
+ return repo_path
+
+ try:
+ _stdout, _stderr = await _gen_check_output(cmd, cwd=cwd)
+ except (asyncio.TimeoutError, CalledProcessError) as e:
+ LOG.error(f"Unable to git clone / pull {project_config['git_clone_url']}: {e}")
+ return None
+
+ return repo_path
+
+
+def handle_PermissionError(
+ func: Callable, path: Path, exc: Tuple[Any, Any, Any]
+) -> None:
+ """
+ Handle PermissionError during shutil.rmtree.
+
+ This checks if the erroring function is either 'os.rmdir' or 'os.unlink', and that
+ the error was EACCES (i.e. Permission denied). If true, the path is set writable,
+ readable, and executable by everyone. Finally, it tries the error causing delete
+ operation again.
+
+ If the check is false, then the original error will be reraised as this function
+ can't handle it.
+ """
+ excvalue = exc[1]
+ LOG.debug(f"Handling {excvalue} from {func.__name__}... ")
+ if func in (os.rmdir, os.unlink) and excvalue.errno == errno.EACCES:
+ LOG.debug(f"Setting {path} writable, readable, and executable by everyone... ")
+ os.chmod(path, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO) # chmod 0777
+ func(path) # Try the error causing delete operation again
+ else:
+ raise
+
+
+async def load_projects_queue(
+ config_path: Path,
+) -> Tuple[Dict[str, Any], asyncio.Queue]:
+ """Load project config and fill queue with all the project names"""
+ with config_path.open("r") as cfp:
+ config = json.load(cfp)
+
+ # TODO: Offer more options here
+ # e.g. Run on X random packages or specific sub list etc.
+ project_names = sorted(config["projects"].keys())
+ queue: asyncio.Queue = asyncio.Queue(maxsize=len(project_names))
+ for project in project_names:
+ await queue.put(project)
+
+ return config, queue
+
+
+async def project_runner(
+ idx: int,
+ config: Dict[str, Any],
+ queue: asyncio.Queue,
+ work_path: Path,
+ results: Results,
+ long_checkouts: bool = False,
+ rebase: bool = False,
+ keep: bool = False,
+ no_diff: bool = False,
+) -> None:
+ """Check out project and run Black on it + record result"""
+ loop = asyncio.get_event_loop()
+ py_version = f"{version_info[0]}.{version_info[1]}"
+ while True:
+ try:
+ project_name = queue.get_nowait()
+ except asyncio.QueueEmpty:
+ LOG.debug(f"project_runner {idx} exiting")
+ return
+ LOG.debug(f"worker {idx} working on {project_name}")
+
+ project_config = config["projects"][project_name]
+
+ # Check if disabled by config
+ if "disabled" in project_config and project_config["disabled"]:
+ results.stats["disabled"] += 1
+ LOG.info(f"Skipping {project_name} as it's disabled via config")
+ continue
+
+ # Check if we should run on this version of Python
+ if (
+ "all" not in project_config["py_versions"]
+ and py_version not in project_config["py_versions"]
+ ):
+ results.stats["wrong_py_ver"] += 1
+ LOG.debug(f"Skipping {project_name} as it's not enabled for {py_version}")
+ continue
+
+ # Check if we're doing big projects / long checkouts
+ if not long_checkouts and project_config["long_checkout"]:
+ results.stats["skipped_long_checkout"] += 1
+ LOG.debug(f"Skipping {project_name} as it's configured as a long checkout")
+ continue
+
+ repo_path: Optional[Path] = Path(__file__)
+ stdin_project = project_name.upper() == "STDIN"
+ if not stdin_project:
+ repo_path = await git_checkout_or_rebase(work_path, project_config, rebase)
+ if not repo_path:
+ continue
+ await black_run(project_name, repo_path, project_config, results, no_diff)
+
+ if not keep and not stdin_project:
+ LOG.debug(f"Removing {repo_path}")
+ rmtree_partial = partial(
+ rmtree, path=repo_path, onerror=handle_PermissionError
+ )
+ await loop.run_in_executor(None, rmtree_partial)
+
+ LOG.info(f"Finished {project_name}")
+
+
+async def process_queue(
+ config_file: str,
+ work_path: Path,
+ workers: int,
+ keep: bool = False,
+ long_checkouts: bool = False,
+ rebase: bool = False,
+ no_diff: bool = False,
+) -> int:
+ """
+ Process the queue with X workers and evaluate results
+ - Success is guaged via the config "expect_formatting_changes"
+
+ Integer return equals the number of failed projects
+ """
+ results = Results()
+ results.stats["disabled"] = 0
+ results.stats["failed"] = 0
+ results.stats["skipped_long_checkout"] = 0
+ results.stats["success"] = 0
+ results.stats["wrong_py_ver"] = 0
+
+ config, queue = await load_projects_queue(Path(config_file))
+ project_count = queue.qsize()
+ s = "" if project_count == 1 else "s"
+ LOG.info(f"{project_count} project{s} to run Black over")
+ if project_count < 1:
+ return -1
+
+ s = "" if workers == 1 else "s"
+ LOG.debug(f"Using {workers} parallel worker{s} to run Black")
+ # Wait until we finish running all the projects before analyzing
+ await asyncio.gather(
+ *[
+ project_runner(
+ i,
+ config,
+ queue,
+ work_path,
+ results,
+ long_checkouts,
+ rebase,
+ keep,
+ no_diff,
+ )
+ for i in range(workers)
+ ]
+ )
+
+ LOG.info("Analyzing results")
+ return analyze_results(project_count, results)
+
+
+if __name__ == "__main__": # pragma: nocover
+ raise NotImplementedError("lib is a library, funnily enough.")