scripts/diff_shades_gha_helper.py

   1 """Helper script for psf/black's diff-shades Github Actions integration.
   2
   3 diff-shades is a tool for analyzing what happens when you run Black on
   4 OSS code capturing it for comparisons or other usage. It's used here to
   5 help measure the impact of a change *before* landing it (in particular
   6 posting a comment on completion for PRs).
   7
   8 This script exists as a more maintainable alternative to using inline
   9 Javascript in the workflow YAML files. The revision configuration and
  10 resolving, caching, and PR comment logic is contained here.
  11
  12 For more information, please see the developer docs:
  13
  14 https://black.readthedocs.io/en/latest/contributing/gauging_changes.html#diff-shades
  15 """
  16
  17 import json
  18 import os
  19 import platform
  20 import pprint
  21 import subprocess
  22 import sys
  23 import zipfile
  24 from io import BytesIO
  25 from pathlib import Path
  26 from typing import Any, Dict, Optional, Tuple
  27
  28 import click
  29 import urllib3
  30 from packaging.version import Version
  31
  32 if sys.version_info >= (3, 8):
  33     from typing import Final, Literal
  34 else:
  35     from typing_extensions import Final, Literal
  36
  37 COMMENT_BODY_FILE: Final = ".pr-comment-body.md"
  38 DIFF_STEP_NAME: Final = "Generate HTML diff report"
  39 DOCS_URL: Final = (
  40     "https://black.readthedocs.io/en/latest/"
  41     "contributing/gauging_changes.html#diff-shades"
  42 )
  43 USER_AGENT: Final = f"psf/black diff-shades workflow via urllib3/{urllib3.__version__}"
  44 SHA_LENGTH: Final = 10
  45 GH_API_TOKEN: Final = os.getenv("GITHUB_TOKEN")
  46 REPO: Final = os.getenv("GITHUB_REPOSITORY", default="psf/black")
  47 http = urllib3.PoolManager()
  48
  49
  50 def set_output(name: str, value: str) -> None:
  51     if len(value) < 200:
  52         print(f"[INFO]: setting '{name}' to '{value}'")
  53     else:
  54         print(f"[INFO]: setting '{name}' to [{len(value)} chars]")
  55     print(f"::set-output name={name}::{value}")
  56
  57
  58 def http_get(
  59     url: str,
  60     is_json: bool = True,
  61     headers: Optional[Dict[str, str]] = None,
  62     **kwargs: Any,
  63 ) -> Any:
  64     headers = headers or {}
  65     headers["User-Agent"] = USER_AGENT
  66     if "github" in url:
  67         if GH_API_TOKEN:
  68             headers["Authorization"] = f"token {GH_API_TOKEN}"
  69         headers["Accept"] = "application/vnd.github.v3+json"
  70     r = http.request("GET", url, headers=headers, **kwargs)
  71     if is_json:
  72         data = json.loads(r.data.decode("utf-8"))
  73     else:
  74         data = r.data
  75     print(f"[INFO]: issued GET request for {r.geturl()}")
  76     if not (200 <= r.status < 300):
  77         pprint.pprint(dict(r.info()))
  78         pprint.pprint(data)
  79         raise RuntimeError(f"unexpected status code: {r.status}")
  80
  81     return data
  82
  83
  84 def get_branch_or_tag_revision(sha: str = "main") -> str:
  85     data = http_get(
  86         f"https://api.github.com/repos/{REPO}/commits",
  87         fields={"per_page": "1", "sha": sha},
  88     )
  89     assert isinstance(data[0]["sha"], str)
  90     return data[0]["sha"]
  91
  92
  93 def get_pr_revision(pr: int) -> str:
  94     data = http_get(f"https://api.github.com/repos/{REPO}/pulls/{pr}")
  95     assert isinstance(data["head"]["sha"], str)
  96     return data["head"]["sha"]
  97
  98
  99 def get_pypi_version() -> Version:
 100     data = http_get("https://pypi.org/pypi/black/json")
 101     versions = [Version(v) for v in data["releases"]]
 102     sorted_versions = sorted(versions, reverse=True)
 103     return sorted_versions[0]
 104
 105
 106 def resolve_custom_ref(ref: str) -> Tuple[str, str]:
 107     if ref == ".pypi":
 108         # Special value to get latest PyPI version.
 109         version = str(get_pypi_version())
 110         return version, f"git checkout {version}"
 111
 112     if ref.startswith(".") and ref[1:].isnumeric():
 113         # Special format to get a PR.
 114         number = int(ref[1:])
 115         revision = get_pr_revision(number)
 116         return (
 117             f"pr-{number}-{revision[:SHA_LENGTH]}",
 118             f"gh pr checkout {number} && git merge origin/main",
 119         )
 120
 121     # Alright, it's probably a branch, tag, or a commit SHA, let's find out!
 122     revision = get_branch_or_tag_revision(ref)
 123     # We're cutting the revision short as we might be operating on a short commit SHA.
 124     if revision == ref or revision[: len(ref)] == ref:
 125         # It's *probably* a commit as the resolved SHA isn't different from the REF.
 126         return revision[:SHA_LENGTH], f"git checkout {revision}"
 127
 128     # It's *probably* a pre-existing branch or tag, yay!
 129     return f"{ref}-{revision[:SHA_LENGTH]}", f"git checkout {revision}"
 130
 131
 132 @click.group()
 133 def main() -> None:
 134     pass
 135
 136
 137 @main.command("config", help="Acquire run configuration and metadata.")
 138 @click.argument(
 139     "event", type=click.Choice(["push", "pull_request", "workflow_dispatch"])
 140 )
 141 @click.argument("custom_baseline", required=False)
 142 @click.argument("custom_target", required=False)
 143 @click.option("--baseline-args", default="")
 144 def config(
 145     event: Literal["push", "pull_request", "workflow_dispatch"],
 146     custom_baseline: Optional[str],
 147     custom_target: Optional[str],
 148     baseline_args: str,
 149 ) -> None:
 150     import diff_shades
 151
 152     if event == "push":
 153         # Push on main, let's use PyPI Black as the baseline.
 154         baseline_name = str(get_pypi_version())
 155         baseline_cmd = f"git checkout {baseline_name}"
 156         target_rev = os.getenv("GITHUB_SHA")
 157         assert target_rev is not None
 158         target_name = "main-" + target_rev[:SHA_LENGTH]
 159         target_cmd = f"git checkout {target_rev}"
 160
 161     elif event == "pull_request":
 162         # PR, let's use main as the baseline.
 163         baseline_rev = get_branch_or_tag_revision()
 164         baseline_name = "main-" + baseline_rev[:SHA_LENGTH]
 165         baseline_cmd = f"git checkout {baseline_rev}"
 166
 167         pr_ref = os.getenv("GITHUB_REF")
 168         assert pr_ref is not None
 169         pr_num = int(pr_ref[10:-6])
 170         pr_rev = get_pr_revision(pr_num)
 171         target_name = f"pr-{pr_num}-{pr_rev[:SHA_LENGTH]}"
 172         target_cmd = f"gh pr checkout {pr_num} && git merge origin/main"
 173
 174         # These are only needed for the PR comment.
 175         set_output("baseline-sha", baseline_rev)
 176         set_output("target-sha", pr_rev)
 177     else:
 178         assert custom_baseline is not None and custom_target is not None
 179         baseline_name, baseline_cmd = resolve_custom_ref(custom_baseline)
 180         target_name, target_cmd = resolve_custom_ref(custom_target)
 181         if baseline_name == target_name:
 182             # Alright we're using the same revisions but we're (hopefully) using
 183             # different command line arguments, let's support that too.
 184             baseline_name += "-1"
 185             target_name += "-2"
 186
 187     set_output("baseline-analysis", baseline_name + ".json")
 188     set_output("baseline-setup-cmd", baseline_cmd)
 189     set_output("target-analysis", target_name + ".json")
 190     set_output("target-setup-cmd", target_cmd)
 191
 192     key = f"{platform.system()}-{platform.python_version()}-{diff_shades.__version__}"
 193     key += f"-{baseline_name}-{baseline_args.encode('utf-8').hex()}"
 194     set_output("baseline-cache-key", key)
 195
 196
 197 @main.command("comment-body", help="Generate the body for a summary PR comment.")
 198 @click.argument("baseline", type=click.Path(exists=True, path_type=Path))
 199 @click.argument("target", type=click.Path(exists=True, path_type=Path))
 200 @click.argument("baseline-sha")
 201 @click.argument("target-sha")
 202 def comment_body(
 203     baseline: Path, target: Path, baseline_sha: str, target_sha: str
 204 ) -> None:
 205     # fmt: off
 206     cmd = [
 207         sys.executable, "-m", "diff_shades", "--no-color",
 208         "compare", str(baseline), str(target), "--quiet", "--check"
 209     ]
 210     # fmt: on
 211     proc = subprocess.run(cmd, stdout=subprocess.PIPE, encoding="utf-8")
 212     if not proc.returncode:
 213         body = (
 214             f"**diff-shades** reports zero changes comparing this PR ({target_sha}) to"
 215             f" main ({baseline_sha}).\n\n---\n\n"
 216         )
 217     else:
 218         body = (
 219             f"**diff-shades** results comparing this PR ({target_sha}) to main"
 220             f" ({baseline_sha}). The full diff is [available in the logs]"
 221             f'($job-diff-url) under the "{DIFF_STEP_NAME}" step.'
 222         )
 223         body += "\n```text\n" + proc.stdout.strip() + "\n```\n"
 224     body += (
 225         f"[**What is this?**]({DOCS_URL}) | [Workflow run]($workflow-run-url) |"
 226         " [diff-shades documentation](https://github.com/ichard26/diff-shades#readme)"
 227     )
 228     print(f"[INFO]: writing half-completed comment body to {COMMENT_BODY_FILE}")
 229     with open(COMMENT_BODY_FILE, "w", encoding="utf-8") as f:
 230         f.write(body)
 231
 232
 233 @main.command("comment-details", help="Get PR comment resources from a workflow run.")
 234 @click.argument("run-id")
 235 def comment_details(run_id: str) -> None:
 236     data = http_get(f"https://api.github.com/repos/{REPO}/actions/runs/{run_id}")
 237     if data["event"] != "pull_request":
 238         set_output("needs-comment", "false")
 239         return
 240
 241     set_output("needs-comment", "true")
 242     pulls = data["pull_requests"]
 243     assert len(pulls) == 1
 244     pr_number = pulls[0]["number"]
 245     set_output("pr-number", str(pr_number))
 246
 247     jobs_data = http_get(data["jobs_url"])
 248     assert len(jobs_data["jobs"]) == 1, "multiple jobs not supported nor tested"
 249     job = jobs_data["jobs"][0]
 250     steps = {s["name"]: s["number"] for s in job["steps"]}
 251     diff_step = steps[DIFF_STEP_NAME]
 252     diff_url = job["html_url"] + f"#step:{diff_step}:1"
 253
 254     artifacts_data = http_get(data["artifacts_url"])["artifacts"]
 255     artifacts = {a["name"]: a["archive_download_url"] for a in artifacts_data}
 256     body_url = artifacts[COMMENT_BODY_FILE]
 257     body_zip = BytesIO(http_get(body_url, is_json=False))
 258     with zipfile.ZipFile(body_zip) as zfile:
 259         with zfile.open(COMMENT_BODY_FILE) as rf:
 260             body = rf.read().decode("utf-8")
 261     # It's more convenient to fill in these fields after the first workflow is done
 262     # since this command can access the workflows API (doing it in the main workflow
 263     # while it's still in progress seems impossible).
 264     body = body.replace("$workflow-run-url", data["html_url"])
 265     body = body.replace("$job-diff-url", diff_url)
 266     # # https://github.community/t/set-output-truncates-multiline-strings/16852/3
 267     escaped = body.replace("%", "%25").replace("\n", "%0A").replace("\r", "%0D")
 268     set_output("comment-body", escaped)
 269
 270
 271 if __name__ == "__main__":
 272     main()