]> git.madduck.net Git - etc/vim.git/blob - src/black_primer/lib.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

87028d72509ac5777ad75fd6d9aec3453eadadb8
[etc/vim.git] / src / black_primer / lib.py
1 #!/usr/bin/env python3
2
3 # Module '__future__' has no attribute 'annotations'
4 from __future__ import annotations  # type: ignore
5
6 import asyncio
7 import json
8 import logging
9 from pathlib import Path
10 from shutil import rmtree, which
11 from subprocess import CalledProcessError
12 from sys import version_info
13 from typing import Any, Dict, NamedTuple, Optional, Sequence, Tuple
14 from urllib.parse import urlparse
15
16 import click
17
18
19 LOG = logging.getLogger(__name__)
20
21
22 class Results(NamedTuple):
23     stats: Dict[str, int] = {}
24     failed_projects: Dict[str, CalledProcessError] = {}
25
26
27 async def _gen_check_output(
28     cmd: Sequence[str],
29     timeout: float = 30,
30     env: Optional[Dict[str, str]] = None,
31     cwd: Optional[Path] = None,
32 ) -> Tuple[bytes, bytes]:
33     process = await asyncio.create_subprocess_exec(
34         *cmd,
35         stdout=asyncio.subprocess.PIPE,
36         stderr=asyncio.subprocess.STDOUT,
37         env=env,
38         cwd=cwd,
39     )
40     try:
41         (stdout, stderr) = await asyncio.wait_for(process.communicate(), timeout)
42     except asyncio.TimeoutError:
43         process.kill()
44         await process.wait()
45         raise
46
47     if process.returncode != 0:
48         cmd_str = " ".join(cmd)
49         raise CalledProcessError(
50             process.returncode, cmd_str, output=stdout, stderr=stderr
51         )
52
53     return (stdout, stderr)
54
55
56 async def analyze_results(project_count: int, results: Results) -> int:
57     failed_pct = round(((results.stats["failed"] / project_count) * 100), 2)
58     success_pct = round(((results.stats["success"] / project_count) * 100), 2)
59
60     click.secho(f"-- primer results 📊 --\n", bold=True)
61     click.secho(
62         f"{results.stats['success']} / {project_count} succeeded ({success_pct}%) ✅",
63         bold=True,
64         fg="green",
65     )
66     click.secho(
67         f"{results.stats['failed']} / {project_count} FAILED ({failed_pct}%) 💩",
68         bold=bool(results.stats["failed"]),
69         fg="red",
70     )
71     click.echo(f" - {results.stats['disabled']} projects Disabled by config")
72     click.echo(
73         f" - {results.stats['wrong_py_ver']} projects skipped due to Python Version"
74     )
75     click.echo(
76         f" - {results.stats['skipped_long_checkout']} skipped due to long checkout"
77     )
78
79     if results.failed_projects:
80         click.secho(f"\nFailed Projects:\n", bold=True)
81
82     for project_name, project_cpe in results.failed_projects.items():
83         print(f"## {project_name}:")
84         print(f" - Returned {project_cpe.returncode}")
85         if project_cpe.stderr:
86             print(f" - stderr:\n{project_cpe.stderr.decode('utf8')}")
87         if project_cpe.stdout:
88             print(f" - stdout:\n{project_cpe.stdout.decode('utf8')}")
89         print("")
90
91     return results.stats["failed"]
92
93
94 async def black_run(
95     repo_path: Path, project_config: Dict[str, Any], results: Results
96 ) -> None:
97     """Run black and record failures"""
98     cmd = [str(which("black"))]
99     if project_config["cli_arguments"]:
100         cmd.extend(*project_config["cli_arguments"])
101     cmd.extend(["--check", "--diff", "."])
102
103     try:
104         _stdout, _stderr = await _gen_check_output(cmd, cwd=repo_path)
105     except asyncio.TimeoutError:
106         results.stats["failed"] += 1
107         LOG.error(f"Running black for {repo_path} timed out ({cmd})")
108     except CalledProcessError as cpe:
109         # TODO: This might need to be tuned and made smarter for higher signal
110         if not project_config["expect_formatting_changes"] and cpe.returncode == 1:
111             results.stats["failed"] += 1
112             results.failed_projects[repo_path.name] = cpe
113             return
114
115     results.stats["success"] += 1
116
117
118 async def git_checkout_or_rebase(
119     work_path: Path,
120     project_config: Dict[str, Any],
121     rebase: bool = False,
122     *,
123     depth: int = 1,
124 ) -> Optional[Path]:
125     """git Clone project or rebase"""
126     git_bin = str(which("git"))
127     if not git_bin:
128         LOG.error(f"No git binary found")
129         return None
130
131     repo_url_parts = urlparse(project_config["git_clone_url"])
132     path_parts = repo_url_parts.path[1:].split("/", maxsplit=1)
133
134     repo_path: Path = work_path / path_parts[1].replace(".git", "")
135     cmd = [git_bin, "clone", "--depth", str(depth), project_config["git_clone_url"]]
136     cwd = work_path
137     if repo_path.exists() and rebase:
138         cmd = [git_bin, "pull", "--rebase"]
139         cwd = repo_path
140     elif repo_path.exists():
141         return repo_path
142
143     try:
144         _stdout, _stderr = await _gen_check_output(cmd, cwd=cwd)
145     except (asyncio.TimeoutError, CalledProcessError) as e:
146         LOG.error(f"Unable to git clone / pull {project_config['git_clone_url']}: {e}")
147         return None
148
149     return repo_path
150
151
152 async def load_projects_queue(
153     config_path: Path,
154 ) -> Tuple[Dict[str, Any], asyncio.Queue[str]]:
155     """Load project config and fill queue with all the project names"""
156     with config_path.open("r") as cfp:
157         config = json.load(cfp)
158
159     # TODO: Offer more options here
160     # e.g. Run on X random packages or specific sub list etc.
161     project_names = sorted(config["projects"].keys())
162     queue: asyncio.Queue[str] = asyncio.Queue(maxsize=len(project_names))
163     for project in project_names:
164         await queue.put(project)
165
166     return config, queue
167
168
169 async def project_runner(
170     idx: int,
171     config: Dict[str, Any],
172     queue: asyncio.Queue[str],
173     work_path: Path,
174     results: Results,
175     long_checkouts: bool = False,
176     rebase: bool = False,
177     keep: bool = False,
178 ) -> None:
179     """Checkout project and run black on it + record result"""
180     loop = asyncio.get_event_loop()
181     py_version = f"{version_info[0]}.{version_info[1]}"
182     while True:
183         try:
184             project_name = queue.get_nowait()
185         except asyncio.QueueEmpty:
186             LOG.debug(f"project_runner {idx} exiting")
187             return
188
189         project_config = config["projects"][project_name]
190
191         # Check if disabled by config
192         if "disabled" in project_config and project_config["disabled"]:
193             results.stats["disabled"] += 1
194             LOG.info(f"Skipping {project_name} as it's disabled via config")
195             continue
196
197         # Check if we should run on this version of Python
198         if (
199             "all" not in project_config["py_versions"]
200             and py_version not in project_config["py_versions"]
201         ):
202             results.stats["wrong_py_ver"] += 1
203             LOG.debug(f"Skipping {project_name} as it's not enabled for {py_version}")
204             continue
205
206         # Check if we're doing big projects / long checkouts
207         if not long_checkouts and project_config["long_checkout"]:
208             results.stats["skipped_long_checkout"] += 1
209             LOG.debug(f"Skipping {project_name} as it's configured as a long checkout")
210             continue
211
212         repo_path = await git_checkout_or_rebase(work_path, project_config, rebase)
213         if not repo_path:
214             continue
215         await black_run(repo_path, project_config, results)
216
217         if not keep:
218             LOG.debug(f"Removing {repo_path}")
219             await loop.run_in_executor(None, rmtree, repo_path)
220
221
222 async def process_queue(
223     config_file: str,
224     work_path: Path,
225     workers: int,
226     keep: bool = False,
227     long_checkouts: bool = False,
228     rebase: bool = False,
229 ) -> int:
230     """
231     Process the queue with X workers and evaluate results
232     - Success is guaged via the config "expect_formatting_changes"
233
234     Integer return equals the number of failed projects
235     """
236     results = Results()
237     results.stats["disabled"] = 0
238     results.stats["failed"] = 0
239     results.stats["skipped_long_checkout"] = 0
240     results.stats["success"] = 0
241     results.stats["wrong_py_ver"] = 0
242
243     config, queue = await load_projects_queue(Path(config_file))
244     project_count = queue.qsize()
245     LOG.info(f"{project_count} projects to run black over")
246     if not project_count:
247         return -1
248
249     LOG.debug(f"Using {workers} parallel workers to run black")
250     # Wait until we finish running all the projects before analyzing
251     await asyncio.gather(
252         *[
253             project_runner(
254                 i, config, queue, work_path, results, long_checkouts, rebase, keep
255             )
256             for i in range(workers)
257         ]
258     )
259
260     LOG.info("Analyzing results")
261     return await analyze_results(project_count, results)