]> git.madduck.net Git - etc/vim.git/blob - scripts/diff_shades_gha_helper.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

Github now supports .git-blame-ignore-revs (GH-2948)
[etc/vim.git] / scripts / diff_shades_gha_helper.py
1 """Helper script for psf/black's diff-shades Github Actions integration.
2
3 diff-shades is a tool for analyzing what happens when you run Black on
4 OSS code capturing it for comparisons or other usage. It's used here to
5 help measure the impact of a change *before* landing it (in particular
6 posting a comment on completion for PRs).
7
8 This script exists as a more maintainable alternative to using inline
9 Javascript in the workflow YAML files. The revision configuration and
10 resolving, caching, and PR comment logic is contained here.
11
12 For more information, please see the developer docs:
13
14 https://black.readthedocs.io/en/latest/contributing/gauging_changes.html#diff-shades
15 """
16
17 import json
18 import os
19 import platform
20 import pprint
21 import subprocess
22 import sys
23 import zipfile
24 from io import BytesIO
25 from pathlib import Path
26 from typing import Any, Optional, Tuple
27
28 import click
29 import urllib3
30 from packaging.version import Version
31
32 if sys.version_info >= (3, 8):
33     from typing import Final, Literal
34 else:
35     from typing_extensions import Final, Literal
36
37 COMMENT_FILE: Final = ".pr-comment.json"
38 DIFF_STEP_NAME: Final = "Generate HTML diff report"
39 DOCS_URL: Final = (
40     "https://black.readthedocs.io/en/latest/"
41     "contributing/gauging_changes.html#diff-shades"
42 )
43 USER_AGENT: Final = f"psf/black diff-shades workflow via urllib3/{urllib3.__version__}"
44 SHA_LENGTH: Final = 10
45 GH_API_TOKEN: Final = os.getenv("GITHUB_TOKEN")
46 REPO: Final = os.getenv("GITHUB_REPOSITORY", default="psf/black")
47 http = urllib3.PoolManager()
48
49
50 def set_output(name: str, value: str) -> None:
51     if len(value) < 200:
52         print(f"[INFO]: setting '{name}' to '{value}'")
53     else:
54         print(f"[INFO]: setting '{name}' to [{len(value)} chars]")
55     print(f"::set-output name={name}::{value}")
56
57
58 def http_get(url: str, is_json: bool = True, **kwargs: Any) -> Any:
59     headers = kwargs.get("headers") or {}
60     headers["User-Agent"] = USER_AGENT
61     if "github" in url:
62         if GH_API_TOKEN:
63             headers["Authorization"] = f"token {GH_API_TOKEN}"
64         headers["Accept"] = "application/vnd.github.v3+json"
65     kwargs["headers"] = headers
66
67     r = http.request("GET", url, **kwargs)
68     if is_json:
69         data = json.loads(r.data.decode("utf-8"))
70     else:
71         data = r.data
72     print(f"[INFO]: issued GET request for {r.geturl()}")
73     if not (200 <= r.status < 300):
74         pprint.pprint(dict(r.info()))
75         pprint.pprint(data)
76         raise RuntimeError(f"unexpected status code: {r.status}")
77
78     return data
79
80
81 def get_branch_or_tag_revision(sha: str = "main") -> str:
82     data = http_get(
83         f"https://api.github.com/repos/{REPO}/commits",
84         fields={"per_page": "1", "sha": sha},
85     )
86     assert isinstance(data[0]["sha"], str)
87     return data[0]["sha"]
88
89
90 def get_pr_revision(pr: int) -> str:
91     data = http_get(f"https://api.github.com/repos/{REPO}/pulls/{pr}")
92     assert isinstance(data["head"]["sha"], str)
93     return data["head"]["sha"]
94
95
96 def get_pypi_version() -> Version:
97     data = http_get("https://pypi.org/pypi/black/json")
98     versions = [Version(v) for v in data["releases"]]
99     sorted_versions = sorted(versions, reverse=True)
100     return sorted_versions[0]
101
102
103 def resolve_custom_ref(ref: str) -> Tuple[str, str]:
104     if ref == ".pypi":
105         # Special value to get latest PyPI version.
106         version = str(get_pypi_version())
107         return version, f"git checkout {version}"
108
109     if ref.startswith(".") and ref[1:].isnumeric():
110         # Special format to get a PR.
111         number = int(ref[1:])
112         revision = get_pr_revision(number)
113         return (
114             f"pr-{number}-{revision[:SHA_LENGTH]}",
115             f"gh pr checkout {number} && git merge origin/main",
116         )
117
118     # Alright, it's probably a branch, tag, or a commit SHA, let's find out!
119     revision = get_branch_or_tag_revision(ref)
120     # We're cutting the revision short as we might be operating on a short commit SHA.
121     if revision == ref or revision[: len(ref)] == ref:
122         # It's *probably* a commit as the resolved SHA isn't different from the REF.
123         return revision[:SHA_LENGTH], f"git checkout {revision}"
124
125     # It's *probably* a pre-existing branch or tag, yay!
126     return f"{ref}-{revision[:SHA_LENGTH]}", f"git checkout {revision}"
127
128
129 @click.group()
130 def main() -> None:
131     pass
132
133
134 @main.command("config", help="Acquire run configuration and metadata.")
135 @click.argument(
136     "event", type=click.Choice(["push", "pull_request", "workflow_dispatch"])
137 )
138 @click.argument("custom_baseline", required=False)
139 @click.argument("custom_target", required=False)
140 @click.option("--baseline-args", default="")
141 def config(
142     event: Literal["push", "pull_request", "workflow_dispatch"],
143     custom_baseline: Optional[str],
144     custom_target: Optional[str],
145     baseline_args: str,
146 ) -> None:
147     import diff_shades
148
149     if event == "push":
150         # Push on main, let's use PyPI Black as the baseline.
151         baseline_name = str(get_pypi_version())
152         baseline_cmd = f"git checkout {baseline_name}"
153         target_rev = os.getenv("GITHUB_SHA")
154         assert target_rev is not None
155         target_name = "main-" + target_rev[:SHA_LENGTH]
156         target_cmd = f"git checkout {target_rev}"
157
158     elif event == "pull_request":
159         # PR, let's use main as the baseline.
160         baseline_rev = get_branch_or_tag_revision()
161         baseline_name = "main-" + baseline_rev[:SHA_LENGTH]
162         baseline_cmd = f"git checkout {baseline_rev}"
163
164         pr_ref = os.getenv("GITHUB_REF")
165         assert pr_ref is not None
166         pr_num = int(pr_ref[10:-6])
167         pr_rev = get_pr_revision(pr_num)
168         target_name = f"pr-{pr_num}-{pr_rev[:SHA_LENGTH]}"
169         target_cmd = f"gh pr checkout {pr_num} && git merge origin/main"
170
171         # These are only needed for the PR comment.
172         set_output("baseline-sha", baseline_rev)
173         set_output("target-sha", pr_rev)
174     else:
175         assert custom_baseline is not None and custom_target is not None
176         baseline_name, baseline_cmd = resolve_custom_ref(custom_baseline)
177         target_name, target_cmd = resolve_custom_ref(custom_target)
178         if baseline_name == target_name:
179             # Alright we're using the same revisions but we're (hopefully) using
180             # different command line arguments, let's support that too.
181             baseline_name += "-1"
182             target_name += "-2"
183
184     set_output("baseline-analysis", baseline_name + ".json")
185     set_output("baseline-setup-cmd", baseline_cmd)
186     set_output("target-analysis", target_name + ".json")
187     set_output("target-setup-cmd", target_cmd)
188
189     key = f"{platform.system()}-{platform.python_version()}-{diff_shades.__version__}"
190     key += f"-{baseline_name}-{baseline_args.encode('utf-8').hex()}"
191     set_output("baseline-cache-key", key)
192
193
194 @main.command("comment-body", help="Generate the body for a summary PR comment.")
195 @click.argument("baseline", type=click.Path(exists=True, path_type=Path))
196 @click.argument("target", type=click.Path(exists=True, path_type=Path))
197 @click.argument("baseline-sha")
198 @click.argument("target-sha")
199 @click.argument("pr-num", type=int)
200 def comment_body(
201     baseline: Path, target: Path, baseline_sha: str, target_sha: str, pr_num: int
202 ) -> None:
203     # fmt: off
204     cmd = [
205         sys.executable, "-m", "diff_shades", "--no-color",
206         "compare", str(baseline), str(target), "--quiet", "--check"
207     ]
208     # fmt: on
209     proc = subprocess.run(cmd, stdout=subprocess.PIPE, encoding="utf-8")
210     if not proc.returncode:
211         body = (
212             f"**diff-shades** reports zero changes comparing this PR ({target_sha}) to"
213             f" main ({baseline_sha}).\n\n---\n\n"
214         )
215     else:
216         body = (
217             f"**diff-shades** results comparing this PR ({target_sha}) to main"
218             f" ({baseline_sha}). The full diff is [available in the logs]"
219             f'($job-diff-url) under the "{DIFF_STEP_NAME}" step.'
220         )
221         body += "\n```text\n" + proc.stdout.strip() + "\n```\n"
222     body += (
223         f"[**What is this?**]({DOCS_URL}) | [Workflow run]($workflow-run-url) |"
224         " [diff-shades documentation](https://github.com/ichard26/diff-shades#readme)"
225     )
226     print(f"[INFO]: writing comment details to {COMMENT_FILE}")
227     with open(COMMENT_FILE, "w", encoding="utf-8") as f:
228         json.dump({"body": body, "pr-number": pr_num}, f)
229
230
231 @main.command("comment-details", help="Get PR comment resources from a workflow run.")
232 @click.argument("run-id")
233 def comment_details(run_id: str) -> None:
234     data = http_get(f"https://api.github.com/repos/{REPO}/actions/runs/{run_id}")
235     if data["event"] != "pull_request" or data["conclusion"] == "cancelled":
236         set_output("needs-comment", "false")
237         return
238
239     set_output("needs-comment", "true")
240     jobs = http_get(data["jobs_url"])["jobs"]
241     assert len(jobs) == 1, "multiple jobs not supported nor tested"
242     job = jobs[0]
243     steps = {s["name"]: s["number"] for s in job["steps"]}
244     diff_step = steps[DIFF_STEP_NAME]
245     diff_url = job["html_url"] + f"#step:{diff_step}:1"
246
247     artifacts_data = http_get(data["artifacts_url"])["artifacts"]
248     artifacts = {a["name"]: a["archive_download_url"] for a in artifacts_data}
249     comment_url = artifacts[COMMENT_FILE]
250     comment_zip = BytesIO(http_get(comment_url, is_json=False))
251     with zipfile.ZipFile(comment_zip) as zfile:
252         with zfile.open(COMMENT_FILE) as rf:
253             comment_data = json.loads(rf.read().decode("utf-8"))
254
255     set_output("pr-number", str(comment_data["pr-number"]))
256     body = comment_data["body"]
257     # It's more convenient to fill in these fields after the first workflow is done
258     # since this command can access the workflows API (doing it in the main workflow
259     # while it's still in progress seems impossible).
260     body = body.replace("$workflow-run-url", data["html_url"])
261     body = body.replace("$job-diff-url", diff_url)
262     # https://github.community/t/set-output-truncates-multiline-strings/16852/3
263     escaped = body.replace("%", "%25").replace("\n", "%0A").replace("\r", "%0D")
264     set_output("comment-body", escaped)
265
266
267 if __name__ == "__main__":
268     main()