scripts/diff_shades_gha_helper.py

   1 """Helper script for psf/black's diff-shades Github Actions integration.
   2
   3 diff-shades is a tool for analyzing what happens when you run Black on
   4 OSS code capturing it for comparisons or other usage. It's used here to
   5 help measure the impact of a change *before* landing it (in particular
   6 posting a comment on completion for PRs).
   7
   8 This script exists as a more maintainable alternative to using inline
   9 Javascript in the workflow YAML files. The revision configuration and
  10 resolving, caching, and PR comment logic is contained here.
  11
  12 For more information, please see the developer docs:
  13
  14 https://black.readthedocs.io/en/latest/contributing/gauging_changes.html#diff-shades
  15 """
  16
  17 import json
  18 import os
  19 import platform
  20 import pprint
  21 import subprocess
  22 import sys
  23 import zipfile
  24 from io import BytesIO
  25 from pathlib import Path
  26 from typing import Any, Optional, Tuple
  27
  28 import click
  29 import urllib3
  30 from packaging.version import Version
  31
  32 if sys.version_info >= (3, 8):
  33     from typing import Final, Literal
  34 else:
  35     from typing_extensions import Final, Literal
  36
  37 COMMENT_FILE: Final = ".pr-comment.json"
  38 DIFF_STEP_NAME: Final = "Generate HTML diff report"
  39 DOCS_URL: Final = (
  40     "https://black.readthedocs.io/en/latest/"
  41     "contributing/gauging_changes.html#diff-shades"
  42 )
  43 USER_AGENT: Final = f"psf/black diff-shades workflow via urllib3/{urllib3.__version__}"
  44 SHA_LENGTH: Final = 10
  45 GH_API_TOKEN: Final = os.getenv("GITHUB_TOKEN")
  46 REPO: Final = os.getenv("GITHUB_REPOSITORY", default="psf/black")
  47 http = urllib3.PoolManager()
  48
  49
  50 def set_output(name: str, value: str) -> None:
  51     if len(value) < 200:
  52         print(f"[INFO]: setting '{name}' to '{value}'")
  53     else:
  54         print(f"[INFO]: setting '{name}' to [{len(value)} chars]")
  55     print(f"::set-output name={name}::{value}")
  56
  57
  58 def http_get(url: str, is_json: bool = True, **kwargs: Any) -> Any:
  59     headers = kwargs.get("headers") or {}
  60     headers["User-Agent"] = USER_AGENT
  61     if "github" in url:
  62         if GH_API_TOKEN:
  63             headers["Authorization"] = f"token {GH_API_TOKEN}"
  64         headers["Accept"] = "application/vnd.github.v3+json"
  65     kwargs["headers"] = headers
  66
  67     r = http.request("GET", url, **kwargs)
  68     if is_json:
  69         data = json.loads(r.data.decode("utf-8"))
  70     else:
  71         data = r.data
  72     print(f"[INFO]: issued GET request for {r.geturl()}")
  73     if not (200 <= r.status < 300):
  74         pprint.pprint(dict(r.info()))
  75         pprint.pprint(data)
  76         raise RuntimeError(f"unexpected status code: {r.status}")
  77
  78     return data
  79
  80
  81 def get_branch_or_tag_revision(sha: str = "main") -> str:
  82     data = http_get(
  83         f"https://api.github.com/repos/{REPO}/commits",
  84         fields={"per_page": "1", "sha": sha},
  85     )
  86     assert isinstance(data[0]["sha"], str)
  87     return data[0]["sha"]
  88
  89
  90 def get_pr_revision(pr: int) -> str:
  91     data = http_get(f"https://api.github.com/repos/{REPO}/pulls/{pr}")
  92     assert isinstance(data["head"]["sha"], str)
  93     return data["head"]["sha"]
  94
  95
  96 def get_pypi_version() -> Version:
  97     data = http_get("https://pypi.org/pypi/black/json")
  98     versions = [Version(v) for v in data["releases"]]
  99     sorted_versions = sorted(versions, reverse=True)
 100     return sorted_versions[0]
 101
 102
 103 def resolve_custom_ref(ref: str) -> Tuple[str, str]:
 104     if ref == ".pypi":
 105         # Special value to get latest PyPI version.
 106         version = str(get_pypi_version())
 107         return version, f"git checkout {version}"
 108
 109     if ref.startswith(".") and ref[1:].isnumeric():
 110         # Special format to get a PR.
 111         number = int(ref[1:])
 112         revision = get_pr_revision(number)
 113         return (
 114             f"pr-{number}-{revision[:SHA_LENGTH]}",
 115             f"gh pr checkout {number} && git merge origin/main",
 116         )
 117
 118     # Alright, it's probably a branch, tag, or a commit SHA, let's find out!
 119     revision = get_branch_or_tag_revision(ref)
 120     # We're cutting the revision short as we might be operating on a short commit SHA.
 121     if revision == ref or revision[: len(ref)] == ref:
 122         # It's *probably* a commit as the resolved SHA isn't different from the REF.
 123         return revision[:SHA_LENGTH], f"git checkout {revision}"
 124
 125     # It's *probably* a pre-existing branch or tag, yay!
 126     return f"{ref}-{revision[:SHA_LENGTH]}", f"git checkout {revision}"
 127
 128
 129 @click.group()
 130 def main() -> None:
 131     pass
 132
 133
 134 @main.command("config", help="Acquire run configuration and metadata.")
 135 @click.argument(
 136     "event", type=click.Choice(["push", "pull_request", "workflow_dispatch"])
 137 )
 138 @click.argument("custom_baseline", required=False)
 139 @click.argument("custom_target", required=False)
 140 @click.option("--baseline-args", default="")
 141 def config(
 142     event: Literal["push", "pull_request", "workflow_dispatch"],
 143     custom_baseline: Optional[str],
 144     custom_target: Optional[str],
 145     baseline_args: str,
 146 ) -> None:
 147     import diff_shades
 148
 149     if event == "push":
 150         # Push on main, let's use PyPI Black as the baseline.
 151         baseline_name = str(get_pypi_version())
 152         baseline_cmd = f"git checkout {baseline_name}"
 153         target_rev = os.getenv("GITHUB_SHA")
 154         assert target_rev is not None
 155         target_name = "main-" + target_rev[:SHA_LENGTH]
 156         target_cmd = f"git checkout {target_rev}"
 157
 158     elif event == "pull_request":
 159         # PR, let's use main as the baseline.
 160         baseline_rev = get_branch_or_tag_revision()
 161         baseline_name = "main-" + baseline_rev[:SHA_LENGTH]
 162         baseline_cmd = f"git checkout {baseline_rev}"
 163
 164         pr_ref = os.getenv("GITHUB_REF")
 165         assert pr_ref is not None
 166         pr_num = int(pr_ref[10:-6])
 167         pr_rev = get_pr_revision(pr_num)
 168         target_name = f"pr-{pr_num}-{pr_rev[:SHA_LENGTH]}"
 169         target_cmd = f"gh pr checkout {pr_num} && git merge origin/main"
 170
 171         # These are only needed for the PR comment.
 172         set_output("baseline-sha", baseline_rev)
 173         set_output("target-sha", pr_rev)
 174     else:
 175         assert custom_baseline is not None and custom_target is not None
 176         baseline_name, baseline_cmd = resolve_custom_ref(custom_baseline)
 177         target_name, target_cmd = resolve_custom_ref(custom_target)
 178         if baseline_name == target_name:
 179             # Alright we're using the same revisions but we're (hopefully) using
 180             # different command line arguments, let's support that too.
 181             baseline_name += "-1"
 182             target_name += "-2"
 183
 184     set_output("baseline-analysis", baseline_name + ".json")
 185     set_output("baseline-setup-cmd", baseline_cmd)
 186     set_output("target-analysis", target_name + ".json")
 187     set_output("target-setup-cmd", target_cmd)
 188
 189     key = f"{platform.system()}-{platform.python_version()}-{diff_shades.__version__}"
 190     key += f"-{baseline_name}-{baseline_args.encode('utf-8').hex()}"
 191     set_output("baseline-cache-key", key)
 192
 193
 194 @main.command("comment-body", help="Generate the body for a summary PR comment.")
 195 @click.argument("baseline", type=click.Path(exists=True, path_type=Path))
 196 @click.argument("target", type=click.Path(exists=True, path_type=Path))
 197 @click.argument("baseline-sha")
 198 @click.argument("target-sha")
 199 @click.argument("pr-num", type=int)
 200 def comment_body(
 201     baseline: Path, target: Path, baseline_sha: str, target_sha: str, pr_num: int
 202 ) -> None:
 203     # fmt: off
 204     cmd = [
 205         sys.executable, "-m", "diff_shades", "--no-color",
 206         "compare", str(baseline), str(target), "--quiet", "--check"
 207     ]
 208     # fmt: on
 209     proc = subprocess.run(cmd, stdout=subprocess.PIPE, encoding="utf-8")
 210     if not proc.returncode:
 211         body = (
 212             f"**diff-shades** reports zero changes comparing this PR ({target_sha}) to"
 213             f" main ({baseline_sha}).\n\n---\n\n"
 214         )
 215     else:
 216         body = (
 217             f"**diff-shades** results comparing this PR ({target_sha}) to main"
 218             f" ({baseline_sha}). The full diff is [available in the logs]"
 219             f'($job-diff-url) under the "{DIFF_STEP_NAME}" step.'
 220         )
 221         body += "\n```text\n" + proc.stdout.strip() + "\n```\n"
 222     body += (
 223         f"[**What is this?**]({DOCS_URL}) | [Workflow run]($workflow-run-url) |"
 224         " [diff-shades documentation](https://github.com/ichard26/diff-shades#readme)"
 225     )
 226     print(f"[INFO]: writing comment details to {COMMENT_FILE}")
 227     with open(COMMENT_FILE, "w", encoding="utf-8") as f:
 228         json.dump({"body": body, "pr-number": pr_num}, f)
 229
 230
 231 @main.command("comment-details", help="Get PR comment resources from a workflow run.")
 232 @click.argument("run-id")
 233 def comment_details(run_id: str) -> None:
 234     data = http_get(f"https://api.github.com/repos/{REPO}/actions/runs/{run_id}")
 235     if data["event"] != "pull_request" or data["conclusion"] == "cancelled":
 236         set_output("needs-comment", "false")
 237         return
 238
 239     set_output("needs-comment", "true")
 240     jobs = http_get(data["jobs_url"])["jobs"]
 241     assert len(jobs) == 1, "multiple jobs not supported nor tested"
 242     job = jobs[0]
 243     steps = {s["name"]: s["number"] for s in job["steps"]}
 244     diff_step = steps[DIFF_STEP_NAME]
 245     diff_url = job["html_url"] + f"#step:{diff_step}:1"
 246
 247     artifacts_data = http_get(data["artifacts_url"])["artifacts"]
 248     artifacts = {a["name"]: a["archive_download_url"] for a in artifacts_data}
 249     comment_url = artifacts[COMMENT_FILE]
 250     comment_zip = BytesIO(http_get(comment_url, is_json=False))
 251     with zipfile.ZipFile(comment_zip) as zfile:
 252         with zfile.open(COMMENT_FILE) as rf:
 253             comment_data = json.loads(rf.read().decode("utf-8"))
 254
 255     set_output("pr-number", str(comment_data["pr-number"]))
 256     body = comment_data["body"]
 257     # It's more convenient to fill in these fields after the first workflow is done
 258     # since this command can access the workflows API (doing it in the main workflow
 259     # while it's still in progress seems impossible).
 260     body = body.replace("$workflow-run-url", data["html_url"])
 261     body = body.replace("$job-diff-url", diff_url)
 262     # https://github.community/t/set-output-truncates-multiline-strings/16852/3
 263     escaped = body.replace("%", "%25").replace("\n", "%0A").replace("\r", "%0D")
 264     set_output("comment-body", escaped)
 265
 266
 267 if __name__ == "__main__":
 268     main()