]> git.madduck.net Git - etc/vim.git/blob - src/black_primer/lib.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

913f9d5a181de38cc4a24467af9b7471833d0b63
[etc/vim.git] / src / black_primer / lib.py
1 #!/usr/bin/env python3
2
3 import asyncio
4 import errno
5 import json
6 import logging
7 import os
8 import stat
9 import sys
10 from functools import partial
11 from pathlib import Path
12 from platform import system
13 from shutil import rmtree, which
14 from subprocess import CalledProcessError
15 from sys import version_info
16 from typing import Any, Callable, Dict, NamedTuple, Optional, Sequence, Tuple
17 from urllib.parse import urlparse
18
19 import click
20
21
22 WINDOWS = system() == "Windows"
23 BLACK_BINARY = "black.exe" if WINDOWS else "black"
24 GIT_BIANRY = "git.exe" if WINDOWS else "git"
25 LOG = logging.getLogger(__name__)
26
27
28 # Windows needs a ProactorEventLoop if you want to exec subprocesses
29 # Starting with 3.8 this is the default - can remove when Black >= 3.8
30 # mypy only respects sys.platform if directly in the evaluation
31 # https://mypy.readthedocs.io/en/latest/common_issues.html#python-version-and-system-platform-checks  # noqa: B950
32 if sys.platform == "win32":
33     asyncio.set_event_loop(asyncio.ProactorEventLoop())
34
35
36 class Results(NamedTuple):
37     stats: Dict[str, int] = {}
38     failed_projects: Dict[str, CalledProcessError] = {}
39
40
41 async def _gen_check_output(
42     cmd: Sequence[str],
43     timeout: float = 300,
44     env: Optional[Dict[str, str]] = None,
45     cwd: Optional[Path] = None,
46 ) -> Tuple[bytes, bytes]:
47     process = await asyncio.create_subprocess_exec(
48         *cmd,
49         stdout=asyncio.subprocess.PIPE,
50         stderr=asyncio.subprocess.STDOUT,
51         env=env,
52         cwd=cwd,
53     )
54     try:
55         (stdout, stderr) = await asyncio.wait_for(process.communicate(), timeout)
56     except asyncio.TimeoutError:
57         process.kill()
58         await process.wait()
59         raise
60
61     if process.returncode != 0:
62         cmd_str = " ".join(cmd)
63         raise CalledProcessError(
64             process.returncode, cmd_str, output=stdout, stderr=stderr
65         )
66
67     return (stdout, stderr)
68
69
70 def analyze_results(project_count: int, results: Results) -> int:
71     failed_pct = round(((results.stats["failed"] / project_count) * 100), 2)
72     success_pct = round(((results.stats["success"] / project_count) * 100), 2)
73
74     click.secho("-- primer results 📊 --\n", bold=True)
75     click.secho(
76         f"{results.stats['success']} / {project_count} succeeded ({success_pct}%) ✅",
77         bold=True,
78         fg="green",
79     )
80     click.secho(
81         f"{results.stats['failed']} / {project_count} FAILED ({failed_pct}%) 💩",
82         bold=bool(results.stats["failed"]),
83         fg="red",
84     )
85     s = "" if results.stats["disabled"] == 1 else "s"
86     click.echo(f" - {results.stats['disabled']} project{s} disabled by config")
87     s = "" if results.stats["wrong_py_ver"] == 1 else "s"
88     click.echo(
89         f" - {results.stats['wrong_py_ver']} project{s} skipped due to Python version"
90     )
91     click.echo(
92         f" - {results.stats['skipped_long_checkout']} skipped due to long checkout"
93     )
94
95     if results.failed_projects:
96         click.secho("\nFailed projects:\n", bold=True)
97
98     for project_name, project_cpe in results.failed_projects.items():
99         print(f"## {project_name}:")
100         print(f" - Returned {project_cpe.returncode}")
101         if project_cpe.stderr:
102             print(f" - stderr:\n{project_cpe.stderr.decode('utf8')}")
103         if project_cpe.stdout:
104             print(f" - stdout:\n{project_cpe.stdout.decode('utf8')}")
105         print("")
106
107     return results.stats["failed"]
108
109
110 async def black_run(
111     repo_path: Path, project_config: Dict[str, Any], results: Results
112 ) -> None:
113     """Run Black and record failures"""
114     cmd = [str(which(BLACK_BINARY))]
115     if "cli_arguments" in project_config and project_config["cli_arguments"]:
116         cmd.extend(*project_config["cli_arguments"])
117     cmd.extend(["--check", "--diff", "."])
118
119     try:
120         _stdout, _stderr = await _gen_check_output(cmd, cwd=repo_path)
121     except asyncio.TimeoutError:
122         results.stats["failed"] += 1
123         LOG.error(f"Running black for {repo_path} timed out ({cmd})")
124     except CalledProcessError as cpe:
125         # TODO: Tune for smarter for higher signal
126         # If any other return value than 1 we raise - can disable project in config
127         if cpe.returncode == 1:
128             if not project_config["expect_formatting_changes"]:
129                 results.stats["failed"] += 1
130                 results.failed_projects[repo_path.name] = cpe
131             else:
132                 results.stats["success"] += 1
133             return
134
135         LOG.error(f"Unknown error with {repo_path}")
136         raise
137
138     # If we get here and expect formatting changes something is up
139     if project_config["expect_formatting_changes"]:
140         results.stats["failed"] += 1
141         results.failed_projects[repo_path.name] = CalledProcessError(
142             0, cmd, b"Expected formatting changes but didn't get any!", b""
143         )
144         return
145
146     results.stats["success"] += 1
147
148
149 async def git_checkout_or_rebase(
150     work_path: Path,
151     project_config: Dict[str, Any],
152     rebase: bool = False,
153     *,
154     depth: int = 1,
155 ) -> Optional[Path]:
156     """git Clone project or rebase"""
157     git_bin = str(which(GIT_BIANRY))
158     if not git_bin:
159         LOG.error("No git binary found")
160         return None
161
162     repo_url_parts = urlparse(project_config["git_clone_url"])
163     path_parts = repo_url_parts.path[1:].split("/", maxsplit=1)
164
165     repo_path: Path = work_path / path_parts[1].replace(".git", "")
166     cmd = [git_bin, "clone", "--depth", str(depth), project_config["git_clone_url"]]
167     cwd = work_path
168     if repo_path.exists() and rebase:
169         cmd = [git_bin, "pull", "--rebase"]
170         cwd = repo_path
171     elif repo_path.exists():
172         return repo_path
173
174     try:
175         _stdout, _stderr = await _gen_check_output(cmd, cwd=cwd)
176     except (asyncio.TimeoutError, CalledProcessError) as e:
177         LOG.error(f"Unable to git clone / pull {project_config['git_clone_url']}: {e}")
178         return None
179
180     return repo_path
181
182
183 def handle_PermissionError(
184     func: Callable, path: Path, exc: Tuple[Any, Any, Any]
185 ) -> None:
186     """
187     Handle PermissionError during shutil.rmtree.
188
189     This checks if the erroring function is either 'os.rmdir' or 'os.unlink', and that
190     the error was EACCES (i.e. Permission denied). If true, the path is set writable,
191     readable, and executable by everyone. Finally, it tries the error causing delete
192     operation again.
193
194     If the check is false, then the original error will be reraised as this function
195     can't handle it.
196     """
197     excvalue = exc[1]
198     LOG.debug(f"Handling {excvalue} from {func.__name__}... ")
199     if func in (os.rmdir, os.unlink) and excvalue.errno == errno.EACCES:
200         LOG.debug(f"Setting {path} writable, readable, and executable by everyone... ")
201         os.chmod(path, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO)  # chmod 0777
202         func(path)  # Try the error causing delete operation again
203     else:
204         raise
205
206
207 async def load_projects_queue(
208     config_path: Path,
209 ) -> Tuple[Dict[str, Any], asyncio.Queue]:
210     """Load project config and fill queue with all the project names"""
211     with config_path.open("r") as cfp:
212         config = json.load(cfp)
213
214     # TODO: Offer more options here
215     # e.g. Run on X random packages or specific sub list etc.
216     project_names = sorted(config["projects"].keys())
217     queue: asyncio.Queue = asyncio.Queue(maxsize=len(project_names))
218     for project in project_names:
219         await queue.put(project)
220
221     return config, queue
222
223
224 async def project_runner(
225     idx: int,
226     config: Dict[str, Any],
227     queue: asyncio.Queue,
228     work_path: Path,
229     results: Results,
230     long_checkouts: bool = False,
231     rebase: bool = False,
232     keep: bool = False,
233 ) -> None:
234     """Check out project and run Black on it + record result"""
235     loop = asyncio.get_event_loop()
236     py_version = f"{version_info[0]}.{version_info[1]}"
237     while True:
238         try:
239             project_name = queue.get_nowait()
240         except asyncio.QueueEmpty:
241             LOG.debug(f"project_runner {idx} exiting")
242             return
243         LOG.debug(f"worker {idx} working on {project_name}")
244
245         project_config = config["projects"][project_name]
246
247         # Check if disabled by config
248         if "disabled" in project_config and project_config["disabled"]:
249             results.stats["disabled"] += 1
250             LOG.info(f"Skipping {project_name} as it's disabled via config")
251             continue
252
253         # Check if we should run on this version of Python
254         if (
255             "all" not in project_config["py_versions"]
256             and py_version not in project_config["py_versions"]
257         ):
258             results.stats["wrong_py_ver"] += 1
259             LOG.debug(f"Skipping {project_name} as it's not enabled for {py_version}")
260             continue
261
262         # Check if we're doing big projects / long checkouts
263         if not long_checkouts and project_config["long_checkout"]:
264             results.stats["skipped_long_checkout"] += 1
265             LOG.debug(f"Skipping {project_name} as it's configured as a long checkout")
266             continue
267
268         repo_path = await git_checkout_or_rebase(work_path, project_config, rebase)
269         if not repo_path:
270             continue
271         await black_run(repo_path, project_config, results)
272
273         if not keep:
274             LOG.debug(f"Removing {repo_path}")
275             rmtree_partial = partial(
276                 rmtree, path=repo_path, onerror=handle_PermissionError
277             )
278             await loop.run_in_executor(None, rmtree_partial)
279
280         LOG.info(f"Finished {project_name}")
281
282
283 async def process_queue(
284     config_file: str,
285     work_path: Path,
286     workers: int,
287     keep: bool = False,
288     long_checkouts: bool = False,
289     rebase: bool = False,
290 ) -> int:
291     """
292     Process the queue with X workers and evaluate results
293     - Success is guaged via the config "expect_formatting_changes"
294
295     Integer return equals the number of failed projects
296     """
297     results = Results()
298     results.stats["disabled"] = 0
299     results.stats["failed"] = 0
300     results.stats["skipped_long_checkout"] = 0
301     results.stats["success"] = 0
302     results.stats["wrong_py_ver"] = 0
303
304     config, queue = await load_projects_queue(Path(config_file))
305     project_count = queue.qsize()
306     s = "" if project_count == 1 else "s"
307     LOG.info(f"{project_count} project{s} to run Black over")
308     if project_count < 1:
309         return -1
310
311     s = "" if workers == 1 else "s"
312     LOG.debug(f"Using {workers} parallel worker{s} to run Black")
313     # Wait until we finish running all the projects before analyzing
314     await asyncio.gather(
315         *[
316             project_runner(
317                 i, config, queue, work_path, results, long_checkouts, rebase, keep
318             )
319             for i in range(workers)
320         ]
321     )
322
323     LOG.info("Analyzing results")
324     return analyze_results(project_count, results)
325
326
327 if __name__ == "__main__":  # pragma: nocover
328     raise NotImplementedError("lib is a library, funnily enough.")