]> git.madduck.net Git - etc/vim.git/blob - scripts/diff_shades_gha_helper.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

Dont require typing-extensions in 3.10 (GH-2772)
[etc/vim.git] / scripts / diff_shades_gha_helper.py
1 """Helper script for psf/black's diff-shades Github Actions integration.
2
3 diff-shades is a tool for analyzing what happens when you run Black on
4 OSS code capturing it for comparisons or other usage. It's used here to
5 help measure the impact of a change *before* landing it (in particular
6 posting a comment on completion for PRs).
7
8 This script exists as a more maintainable alternative to using inline
9 Javascript in the workflow YAML files. The revision configuration and
10 resolving, caching, and PR comment logic is contained here.
11
12 For more information, please see the developer docs:
13
14 https://black.readthedocs.io/en/latest/contributing/gauging_changes.html#diff-shades
15 """
16
17 import json
18 import os
19 import platform
20 import pprint
21 import subprocess
22 import sys
23 import zipfile
24 from io import BytesIO
25 from pathlib import Path
26 from typing import Any, Dict, Optional, Tuple
27
28 import click
29 import urllib3
30 from packaging.version import Version
31
32 if sys.version_info >= (3, 8):
33     from typing import Final, Literal
34 else:
35     from typing_extensions import Final, Literal
36
37 COMMENT_BODY_FILE: Final = ".pr-comment-body.md"
38 DIFF_STEP_NAME: Final = "Generate HTML diff report"
39 DOCS_URL: Final = (
40     "https://black.readthedocs.io/en/latest/"
41     "contributing/gauging_changes.html#diff-shades"
42 )
43 USER_AGENT: Final = f"psf/black diff-shades workflow via urllib3/{urllib3.__version__}"
44 SHA_LENGTH: Final = 10
45 GH_API_TOKEN: Final = os.getenv("GITHUB_TOKEN")
46 REPO: Final = os.getenv("GITHUB_REPOSITORY", default="psf/black")
47 http = urllib3.PoolManager()
48
49
50 def set_output(name: str, value: str) -> None:
51     if len(value) < 200:
52         print(f"[INFO]: setting '{name}' to '{value}'")
53     else:
54         print(f"[INFO]: setting '{name}' to [{len(value)} chars]")
55     print(f"::set-output name={name}::{value}")
56
57
58 def http_get(
59     url: str,
60     is_json: bool = True,
61     headers: Optional[Dict[str, str]] = None,
62     **kwargs: Any,
63 ) -> Any:
64     headers = headers or {}
65     headers["User-Agent"] = USER_AGENT
66     if "github" in url:
67         if GH_API_TOKEN:
68             headers["Authorization"] = f"token {GH_API_TOKEN}"
69         headers["Accept"] = "application/vnd.github.v3+json"
70     r = http.request("GET", url, headers=headers, **kwargs)
71     if is_json:
72         data = json.loads(r.data.decode("utf-8"))
73     else:
74         data = r.data
75     print(f"[INFO]: issued GET request for {r.geturl()}")
76     if not (200 <= r.status < 300):
77         pprint.pprint(dict(r.info()))
78         pprint.pprint(data)
79         raise RuntimeError(f"unexpected status code: {r.status}")
80
81     return data
82
83
84 def get_branch_or_tag_revision(sha: str = "main") -> str:
85     data = http_get(
86         f"https://api.github.com/repos/{REPO}/commits",
87         fields={"per_page": "1", "sha": sha},
88     )
89     assert isinstance(data[0]["sha"], str)
90     return data[0]["sha"]
91
92
93 def get_pr_revision(pr: int) -> str:
94     data = http_get(f"https://api.github.com/repos/{REPO}/pulls/{pr}")
95     assert isinstance(data["head"]["sha"], str)
96     return data["head"]["sha"]
97
98
99 def get_pypi_version() -> Version:
100     data = http_get("https://pypi.org/pypi/black/json")
101     versions = [Version(v) for v in data["releases"]]
102     sorted_versions = sorted(versions, reverse=True)
103     return sorted_versions[0]
104
105
106 def resolve_custom_ref(ref: str) -> Tuple[str, str]:
107     if ref == ".pypi":
108         # Special value to get latest PyPI version.
109         version = str(get_pypi_version())
110         return version, f"git checkout {version}"
111
112     if ref.startswith(".") and ref[1:].isnumeric():
113         # Special format to get a PR.
114         number = int(ref[1:])
115         revision = get_pr_revision(number)
116         return (
117             f"pr-{number}-{revision[:SHA_LENGTH]}",
118             f"gh pr checkout {number} && git merge origin/main",
119         )
120
121     # Alright, it's probably a branch, tag, or a commit SHA, let's find out!
122     revision = get_branch_or_tag_revision(ref)
123     # We're cutting the revision short as we might be operating on a short commit SHA.
124     if revision == ref or revision[: len(ref)] == ref:
125         # It's *probably* a commit as the resolved SHA isn't different from the REF.
126         return revision[:SHA_LENGTH], f"git checkout {revision}"
127
128     # It's *probably* a pre-existing branch or tag, yay!
129     return f"{ref}-{revision[:SHA_LENGTH]}", f"git checkout {revision}"
130
131
132 @click.group()
133 def main() -> None:
134     pass
135
136
137 @main.command("config", help="Acquire run configuration and metadata.")
138 @click.argument(
139     "event", type=click.Choice(["push", "pull_request", "workflow_dispatch"])
140 )
141 @click.argument("custom_baseline", required=False)
142 @click.argument("custom_target", required=False)
143 @click.option("--baseline-args", default="")
144 def config(
145     event: Literal["push", "pull_request", "workflow_dispatch"],
146     custom_baseline: Optional[str],
147     custom_target: Optional[str],
148     baseline_args: str,
149 ) -> None:
150     import diff_shades
151
152     if event == "push":
153         # Push on main, let's use PyPI Black as the baseline.
154         baseline_name = str(get_pypi_version())
155         baseline_cmd = f"git checkout {baseline_name}"
156         target_rev = os.getenv("GITHUB_SHA")
157         assert target_rev is not None
158         target_name = "main-" + target_rev[:SHA_LENGTH]
159         target_cmd = f"git checkout {target_rev}"
160
161     elif event == "pull_request":
162         # PR, let's use main as the baseline.
163         baseline_rev = get_branch_or_tag_revision()
164         baseline_name = "main-" + baseline_rev[:SHA_LENGTH]
165         baseline_cmd = f"git checkout {baseline_rev}"
166
167         pr_ref = os.getenv("GITHUB_REF")
168         assert pr_ref is not None
169         pr_num = int(pr_ref[10:-6])
170         pr_rev = get_pr_revision(pr_num)
171         target_name = f"pr-{pr_num}-{pr_rev[:SHA_LENGTH]}"
172         target_cmd = f"gh pr checkout {pr_num} && git merge origin/main"
173
174         # These are only needed for the PR comment.
175         set_output("baseline-sha", baseline_rev)
176         set_output("target-sha", pr_rev)
177     else:
178         assert custom_baseline is not None and custom_target is not None
179         baseline_name, baseline_cmd = resolve_custom_ref(custom_baseline)
180         target_name, target_cmd = resolve_custom_ref(custom_target)
181         if baseline_name == target_name:
182             # Alright we're using the same revisions but we're (hopefully) using
183             # different command line arguments, let's support that too.
184             baseline_name += "-1"
185             target_name += "-2"
186
187     set_output("baseline-analysis", baseline_name + ".json")
188     set_output("baseline-setup-cmd", baseline_cmd)
189     set_output("target-analysis", target_name + ".json")
190     set_output("target-setup-cmd", target_cmd)
191
192     key = f"{platform.system()}-{platform.python_version()}-{diff_shades.__version__}"
193     key += f"-{baseline_name}-{baseline_args.encode('utf-8').hex()}"
194     set_output("baseline-cache-key", key)
195
196
197 @main.command("comment-body", help="Generate the body for a summary PR comment.")
198 @click.argument("baseline", type=click.Path(exists=True, path_type=Path))
199 @click.argument("target", type=click.Path(exists=True, path_type=Path))
200 @click.argument("baseline-sha")
201 @click.argument("target-sha")
202 def comment_body(
203     baseline: Path, target: Path, baseline_sha: str, target_sha: str
204 ) -> None:
205     # fmt: off
206     cmd = [
207         sys.executable, "-m", "diff_shades", "--no-color",
208         "compare", str(baseline), str(target), "--quiet", "--check"
209     ]
210     # fmt: on
211     proc = subprocess.run(cmd, stdout=subprocess.PIPE, encoding="utf-8")
212     if not proc.returncode:
213         body = (
214             f"**diff-shades** reports zero changes comparing this PR ({target_sha}) to"
215             f" main ({baseline_sha}).\n\n---\n\n"
216         )
217     else:
218         body = (
219             f"**diff-shades** results comparing this PR ({target_sha}) to main"
220             f" ({baseline_sha}). The full diff is [available in the logs]"
221             f'($job-diff-url) under the "{DIFF_STEP_NAME}" step.'
222         )
223         body += "\n```text\n" + proc.stdout.strip() + "\n```\n"
224     body += (
225         f"[**What is this?**]({DOCS_URL}) | [Workflow run]($workflow-run-url) |"
226         " [diff-shades documentation](https://github.com/ichard26/diff-shades#readme)"
227     )
228     print(f"[INFO]: writing half-completed comment body to {COMMENT_BODY_FILE}")
229     with open(COMMENT_BODY_FILE, "w", encoding="utf-8") as f:
230         f.write(body)
231
232
233 @main.command("comment-details", help="Get PR comment resources from a workflow run.")
234 @click.argument("run-id")
235 def comment_details(run_id: str) -> None:
236     data = http_get(f"https://api.github.com/repos/{REPO}/actions/runs/{run_id}")
237     if data["event"] != "pull_request":
238         set_output("needs-comment", "false")
239         return
240
241     set_output("needs-comment", "true")
242     pulls = data["pull_requests"]
243     assert len(pulls) == 1
244     pr_number = pulls[0]["number"]
245     set_output("pr-number", str(pr_number))
246
247     jobs_data = http_get(data["jobs_url"])
248     assert len(jobs_data["jobs"]) == 1, "multiple jobs not supported nor tested"
249     job = jobs_data["jobs"][0]
250     steps = {s["name"]: s["number"] for s in job["steps"]}
251     diff_step = steps[DIFF_STEP_NAME]
252     diff_url = job["html_url"] + f"#step:{diff_step}:1"
253
254     artifacts_data = http_get(data["artifacts_url"])["artifacts"]
255     artifacts = {a["name"]: a["archive_download_url"] for a in artifacts_data}
256     body_url = artifacts[COMMENT_BODY_FILE]
257     body_zip = BytesIO(http_get(body_url, is_json=False))
258     with zipfile.ZipFile(body_zip) as zfile:
259         with zfile.open(COMMENT_BODY_FILE) as rf:
260             body = rf.read().decode("utf-8")
261     # It's more convenient to fill in these fields after the first workflow is done
262     # since this command can access the workflows API (doing it in the main workflow
263     # while it's still in progress seems impossible).
264     body = body.replace("$workflow-run-url", data["html_url"])
265     body = body.replace("$job-diff-url", diff_url)
266     # # https://github.community/t/set-output-truncates-multiline-strings/16852/3
267     escaped = body.replace("%", "%25").replace("\n", "%0A").replace("\r", "%0D")
268     set_output("comment-body", escaped)
269
270
271 if __name__ == "__main__":
272     main()