]> git.madduck.net Git - etc/vim.git/blob - gallery/gallery.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

Top PyPI Packages: Use 30-days data, 365 is no longer available (#2995)
[etc/vim.git] / gallery / gallery.py
1 import atexit
2 import json
3 import subprocess
4 import tarfile
5 import tempfile
6 import traceback
7 import venv
8 import zipfile
9 from argparse import ArgumentParser, Namespace
10 from concurrent.futures import ThreadPoolExecutor
11 from functools import lru_cache, partial
12 from pathlib import Path
13 from typing import (
14     Generator,
15     List,
16     NamedTuple,
17     Optional,
18     Tuple,
19     Union,
20     cast,
21 )
22 from urllib.request import urlopen, urlretrieve
23
24 PYPI_INSTANCE = "https://pypi.org/pypi"
25 PYPI_TOP_PACKAGES = (
26     "https://hugovk.github.io/top-pypi-packages/top-pypi-packages-30-days.min.json"
27 )
28 INTERNAL_BLACK_REPO = f"{tempfile.gettempdir()}/__black"
29
30 ArchiveKind = Union[tarfile.TarFile, zipfile.ZipFile]
31
32 subprocess.run = partial(subprocess.run, check=True)  # type: ignore
33 # https://github.com/python/mypy/issues/1484
34
35
36 class BlackVersion(NamedTuple):
37     version: str
38     config: Optional[str] = None
39
40
41 def get_pypi_download_url(package: str, version: Optional[str]) -> str:
42     with urlopen(PYPI_INSTANCE + f"/{package}/json") as page:
43         metadata = json.load(page)
44
45     if version is None:
46         sources = metadata["urls"]
47     else:
48         if version in metadata["releases"]:
49             sources = metadata["releases"][version]
50         else:
51             raise ValueError(
52                 f"No releases found with version ('{version}') tag. "
53                 f"Found releases: {metadata['releases'].keys()}"
54             )
55
56     for source in sources:
57         if source["python_version"] == "source":
58             break
59     else:
60         raise ValueError(f"Couldn't find any sources for {package}")
61
62     return cast(str, source["url"])
63
64
65 def get_top_packages() -> List[str]:
66     with urlopen(PYPI_TOP_PACKAGES) as page:
67         result = json.load(page)
68
69     return [package["project"] for package in result["rows"]]
70
71
72 def get_package_source(package: str, version: Optional[str]) -> str:
73     if package == "cpython":
74         if version is None:
75             version = "main"
76         return f"https://github.com/python/cpython/archive/{version}.zip"
77     elif package == "pypy":
78         if version is None:
79             version = "branch/default"
80         return (
81             f"https://foss.heptapod.net/pypy/pypy/repository/{version}/archive.tar.bz2"
82         )
83     else:
84         return get_pypi_download_url(package, version)
85
86
87 def get_archive_manager(local_file: str) -> ArchiveKind:
88     if tarfile.is_tarfile(local_file):
89         return tarfile.open(local_file)
90     elif zipfile.is_zipfile(local_file):
91         return zipfile.ZipFile(local_file)
92     else:
93         raise ValueError("Unknown archive kind.")
94
95
96 def get_first_archive_member(archive: ArchiveKind) -> str:
97     if isinstance(archive, tarfile.TarFile):
98         return archive.getnames()[0]
99     elif isinstance(archive, zipfile.ZipFile):
100         return archive.namelist()[0]
101
102
103 def download_and_extract(package: str, version: Optional[str], directory: Path) -> Path:
104     source = get_package_source(package, version)
105
106     local_file, _ = urlretrieve(source, directory / f"{package}-src")
107     with get_archive_manager(local_file) as archive:
108         archive.extractall(path=directory)
109         result_dir = get_first_archive_member(archive)
110     return directory / result_dir
111
112
113 def get_package(
114     package: str, version: Optional[str], directory: Path
115 ) -> Optional[Path]:
116     try:
117         return download_and_extract(package, version, directory)
118     except Exception:
119         print(f"Caught an exception while downloading {package}.")
120         traceback.print_exc()
121         return None
122
123
124 DEFAULT_SLICE = slice(None)  # for flake8
125
126
127 def download_and_extract_top_packages(
128     directory: Path,
129     workers: int = 8,
130     limit: slice = DEFAULT_SLICE,
131 ) -> Generator[Path, None, None]:
132     with ThreadPoolExecutor(max_workers=workers) as executor:
133         bound_downloader = partial(get_package, version=None, directory=directory)
134         for package in executor.map(bound_downloader, get_top_packages()[limit]):
135             if package is not None:
136                 yield package
137
138
139 def git_create_repository(repo: Path) -> None:
140     subprocess.run(["git", "init"], cwd=repo)
141     git_add_and_commit(msg="Initial commit", repo=repo)
142
143
144 def git_add_and_commit(msg: str, repo: Path) -> None:
145     subprocess.run(["git", "add", "."], cwd=repo)
146     subprocess.run(["git", "commit", "-m", msg, "--allow-empty"], cwd=repo)
147
148
149 def git_switch_branch(
150     branch: str, repo: Path, new: bool = False, from_branch: Optional[str] = None
151 ) -> None:
152     args = ["git", "checkout"]
153     if new:
154         args.append("-b")
155     args.append(branch)
156     if from_branch:
157         args.append(from_branch)
158     subprocess.run(args, cwd=repo)
159
160
161 def init_repos(options: Namespace) -> Tuple[Path, ...]:
162     options.output.mkdir(exist_ok=True)
163
164     if options.top_packages:
165         source_directories = tuple(
166             download_and_extract_top_packages(
167                 directory=options.output,
168                 workers=options.workers,
169                 limit=slice(None, options.top_packages),
170             )
171         )
172     else:
173         source_directories = (
174             download_and_extract(
175                 package=options.pypi_package,
176                 version=options.version,
177                 directory=options.output,
178             ),
179         )
180
181     for source_directory in source_directories:
182         git_create_repository(source_directory)
183
184     if options.black_repo is None:
185         subprocess.run(
186             ["git", "clone", "https://github.com/psf/black.git", INTERNAL_BLACK_REPO],
187             cwd=options.output,
188         )
189         options.black_repo = options.output / INTERNAL_BLACK_REPO
190
191     return source_directories
192
193
194 @lru_cache(8)
195 def black_runner(version: str, black_repo: Path) -> Path:
196     directory = tempfile.TemporaryDirectory()
197     venv.create(directory.name, with_pip=True)
198
199     python = Path(directory.name) / "bin" / "python"
200     subprocess.run([python, "-m", "pip", "install", "-e", black_repo])
201
202     atexit.register(directory.cleanup)
203     return python
204
205
206 def format_repo_with_version(
207     repo: Path,
208     from_branch: Optional[str],
209     black_repo: Path,
210     black_version: BlackVersion,
211     input_directory: Path,
212 ) -> str:
213     current_branch = f"black-{black_version.version}"
214     git_switch_branch(black_version.version, repo=black_repo)
215     git_switch_branch(current_branch, repo=repo, new=True, from_branch=from_branch)
216
217     format_cmd: List[Union[Path, str]] = [
218         black_runner(black_version.version, black_repo),
219         (black_repo / "black.py").resolve(),
220         ".",
221     ]
222     if black_version.config:
223         format_cmd.extend(["--config", input_directory / black_version.config])
224
225     subprocess.run(format_cmd, cwd=repo, check=False)  # ensure the process
226     # continuess to run even it can't format some files. Reporting those
227     # should be enough
228     git_add_and_commit(f"Format with black:{black_version.version}", repo=repo)
229
230     return current_branch
231
232
233 def format_repos(repos: Tuple[Path, ...], options: Namespace) -> None:
234     black_versions = tuple(
235         BlackVersion(*version.split(":")) for version in options.versions
236     )
237
238     for repo in repos:
239         from_branch = None
240         for black_version in black_versions:
241             from_branch = format_repo_with_version(
242                 repo=repo,
243                 from_branch=from_branch,
244                 black_repo=options.black_repo,
245                 black_version=black_version,
246                 input_directory=options.input,
247             )
248         git_switch_branch("main", repo=repo)
249
250     git_switch_branch("main", repo=options.black_repo)
251
252
253 def main() -> None:
254     parser = ArgumentParser(
255         description="""Black Gallery is a script that
256     automates the process of applying different Black versions to a selected
257     PyPI package and seeing the results between versions."""
258     )
259
260     group = parser.add_mutually_exclusive_group(required=True)
261     group.add_argument("-p", "--pypi-package", help="PyPI package to download.")
262     group.add_argument(
263         "-t", "--top-packages", help="Top n PyPI packages to download.", type=int
264     )
265
266     parser.add_argument("-b", "--black-repo", help="Black's Git repository.", type=Path)
267     parser.add_argument(
268         "-v",
269         "--version",
270         help=(
271             "Version for given PyPI package. Will be discarded if used with -t option."
272         ),
273     )
274     parser.add_argument(
275         "-w",
276         "--workers",
277         help=(
278             "Maximum number of threads to download with at the same time. "
279             "Will be discarded if used with -p option."
280         ),
281     )
282     parser.add_argument(
283         "-i",
284         "--input",
285         default=Path("/input"),
286         type=Path,
287         help="Input directory to read configuration.",
288     )
289     parser.add_argument(
290         "-o",
291         "--output",
292         default=Path("/output"),
293         type=Path,
294         help="Output directory to download and put result artifacts.",
295     )
296     parser.add_argument("versions", nargs="*", default=("main",), help="")
297
298     options = parser.parse_args()
299     repos = init_repos(options)
300     format_repos(repos, options)
301
302
303 if __name__ == "__main__":
304     main()