]> git.madduck.net Git - etc/vim.git/blob - src/black_primer/lib.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

OSS-Fuzz integration (#1930)
[etc/vim.git] / src / black_primer / lib.py
1 #!/usr/bin/env python3
2
3 import asyncio
4 import errno
5 import json
6 import logging
7 import os
8 import stat
9 import sys
10 from functools import partial
11 from pathlib import Path
12 from platform import system
13 from shutil import rmtree, which
14 from subprocess import CalledProcessError
15 from sys import version_info
16 from typing import Any, Callable, Dict, NamedTuple, Optional, Sequence, Tuple
17 from urllib.parse import urlparse
18
19 import click
20
21
22 WINDOWS = system() == "Windows"
23 BLACK_BINARY = "black.exe" if WINDOWS else "black"
24 GIT_BINARY = "git.exe" if WINDOWS else "git"
25 LOG = logging.getLogger(__name__)
26
27
28 # Windows needs a ProactorEventLoop if you want to exec subprocesses
29 # Starting with 3.8 this is the default - can remove when Black >= 3.8
30 # mypy only respects sys.platform if directly in the evaluation
31 # https://mypy.readthedocs.io/en/latest/common_issues.html#python-version-and-system-platform-checks  # noqa: B950
32 if sys.platform == "win32":
33     asyncio.set_event_loop(asyncio.ProactorEventLoop())
34
35
36 class Results(NamedTuple):
37     stats: Dict[str, int] = {}
38     failed_projects: Dict[str, CalledProcessError] = {}
39
40
41 async def _gen_check_output(
42     cmd: Sequence[str],
43     timeout: float = 300,
44     env: Optional[Dict[str, str]] = None,
45     cwd: Optional[Path] = None,
46 ) -> Tuple[bytes, bytes]:
47     process = await asyncio.create_subprocess_exec(
48         *cmd,
49         stdout=asyncio.subprocess.PIPE,
50         stderr=asyncio.subprocess.STDOUT,
51         env=env,
52         cwd=cwd,
53     )
54     try:
55         (stdout, stderr) = await asyncio.wait_for(process.communicate(), timeout)
56     except asyncio.TimeoutError:
57         process.kill()
58         await process.wait()
59         raise
60
61     if process.returncode != 0:
62         returncode = process.returncode
63         if returncode is None:
64             returncode = 69
65
66         cmd_str = " ".join(cmd)
67         raise CalledProcessError(returncode, cmd_str, output=stdout, stderr=stderr)
68
69     return (stdout, stderr)
70
71
72 def analyze_results(project_count: int, results: Results) -> int:
73     failed_pct = round(((results.stats["failed"] / project_count) * 100), 2)
74     success_pct = round(((results.stats["success"] / project_count) * 100), 2)
75
76     click.secho("-- primer results 📊 --\n", bold=True)
77     click.secho(
78         f"{results.stats['success']} / {project_count} succeeded ({success_pct}%) ✅",
79         bold=True,
80         fg="green",
81     )
82     click.secho(
83         f"{results.stats['failed']} / {project_count} FAILED ({failed_pct}%) 💩",
84         bold=bool(results.stats["failed"]),
85         fg="red",
86     )
87     s = "" if results.stats["disabled"] == 1 else "s"
88     click.echo(f" - {results.stats['disabled']} project{s} disabled by config")
89     s = "" if results.stats["wrong_py_ver"] == 1 else "s"
90     click.echo(
91         f" - {results.stats['wrong_py_ver']} project{s} skipped due to Python version"
92     )
93     click.echo(
94         f" - {results.stats['skipped_long_checkout']} skipped due to long checkout"
95     )
96
97     if results.failed_projects:
98         click.secho("\nFailed projects:\n", bold=True)
99
100     for project_name, project_cpe in results.failed_projects.items():
101         print(f"## {project_name}:")
102         print(f" - Returned {project_cpe.returncode}")
103         if project_cpe.stderr:
104             print(f" - stderr:\n{project_cpe.stderr.decode('utf8')}")
105         if project_cpe.stdout:
106             print(f" - stdout:\n{project_cpe.stdout.decode('utf8')}")
107         print("")
108
109     return results.stats["failed"]
110
111
112 async def black_run(
113     repo_path: Path, project_config: Dict[str, Any], results: Results
114 ) -> None:
115     """Run Black and record failures"""
116     cmd = [str(which(BLACK_BINARY))]
117     if "cli_arguments" in project_config and project_config["cli_arguments"]:
118         cmd.extend(*project_config["cli_arguments"])
119     cmd.extend(["--check", "--diff", "."])
120
121     try:
122         _stdout, _stderr = await _gen_check_output(cmd, cwd=repo_path)
123     except asyncio.TimeoutError:
124         results.stats["failed"] += 1
125         LOG.error(f"Running black for {repo_path} timed out ({cmd})")
126     except CalledProcessError as cpe:
127         # TODO: Tune for smarter for higher signal
128         # If any other return value than 1 we raise - can disable project in config
129         if cpe.returncode == 1:
130             if not project_config["expect_formatting_changes"]:
131                 results.stats["failed"] += 1
132                 results.failed_projects[repo_path.name] = cpe
133             else:
134                 results.stats["success"] += 1
135             return
136         elif cpe.returncode > 1:
137             results.stats["failed"] += 1
138             results.failed_projects[repo_path.name] = cpe
139             return
140
141         LOG.error(f"Unknown error with {repo_path}")
142         raise
143
144     # If we get here and expect formatting changes something is up
145     if project_config["expect_formatting_changes"]:
146         results.stats["failed"] += 1
147         results.failed_projects[repo_path.name] = CalledProcessError(
148             0, cmd, b"Expected formatting changes but didn't get any!", b""
149         )
150         return
151
152     results.stats["success"] += 1
153
154
155 async def git_checkout_or_rebase(
156     work_path: Path,
157     project_config: Dict[str, Any],
158     rebase: bool = False,
159     *,
160     depth: int = 1,
161 ) -> Optional[Path]:
162     """git Clone project or rebase"""
163     git_bin = str(which(GIT_BINARY))
164     if not git_bin:
165         LOG.error("No git binary found")
166         return None
167
168     repo_url_parts = urlparse(project_config["git_clone_url"])
169     path_parts = repo_url_parts.path[1:].split("/", maxsplit=1)
170
171     repo_path: Path = work_path / path_parts[1].replace(".git", "")
172     cmd = [git_bin, "clone", "--depth", str(depth), project_config["git_clone_url"]]
173     cwd = work_path
174     if repo_path.exists() and rebase:
175         cmd = [git_bin, "pull", "--rebase"]
176         cwd = repo_path
177     elif repo_path.exists():
178         return repo_path
179
180     try:
181         _stdout, _stderr = await _gen_check_output(cmd, cwd=cwd)
182     except (asyncio.TimeoutError, CalledProcessError) as e:
183         LOG.error(f"Unable to git clone / pull {project_config['git_clone_url']}: {e}")
184         return None
185
186     return repo_path
187
188
189 def handle_PermissionError(
190     func: Callable, path: Path, exc: Tuple[Any, Any, Any]
191 ) -> None:
192     """
193     Handle PermissionError during shutil.rmtree.
194
195     This checks if the erroring function is either 'os.rmdir' or 'os.unlink', and that
196     the error was EACCES (i.e. Permission denied). If true, the path is set writable,
197     readable, and executable by everyone. Finally, it tries the error causing delete
198     operation again.
199
200     If the check is false, then the original error will be reraised as this function
201     can't handle it.
202     """
203     excvalue = exc[1]
204     LOG.debug(f"Handling {excvalue} from {func.__name__}... ")
205     if func in (os.rmdir, os.unlink) and excvalue.errno == errno.EACCES:
206         LOG.debug(f"Setting {path} writable, readable, and executable by everyone... ")
207         os.chmod(path, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO)  # chmod 0777
208         func(path)  # Try the error causing delete operation again
209     else:
210         raise
211
212
213 async def load_projects_queue(
214     config_path: Path,
215 ) -> Tuple[Dict[str, Any], asyncio.Queue]:
216     """Load project config and fill queue with all the project names"""
217     with config_path.open("r") as cfp:
218         config = json.load(cfp)
219
220     # TODO: Offer more options here
221     # e.g. Run on X random packages or specific sub list etc.
222     project_names = sorted(config["projects"].keys())
223     queue: asyncio.Queue = asyncio.Queue(maxsize=len(project_names))
224     for project in project_names:
225         await queue.put(project)
226
227     return config, queue
228
229
230 async def project_runner(
231     idx: int,
232     config: Dict[str, Any],
233     queue: asyncio.Queue,
234     work_path: Path,
235     results: Results,
236     long_checkouts: bool = False,
237     rebase: bool = False,
238     keep: bool = False,
239 ) -> None:
240     """Check out project and run Black on it + record result"""
241     loop = asyncio.get_event_loop()
242     py_version = f"{version_info[0]}.{version_info[1]}"
243     while True:
244         try:
245             project_name = queue.get_nowait()
246         except asyncio.QueueEmpty:
247             LOG.debug(f"project_runner {idx} exiting")
248             return
249         LOG.debug(f"worker {idx} working on {project_name}")
250
251         project_config = config["projects"][project_name]
252
253         # Check if disabled by config
254         if "disabled" in project_config and project_config["disabled"]:
255             results.stats["disabled"] += 1
256             LOG.info(f"Skipping {project_name} as it's disabled via config")
257             continue
258
259         # Check if we should run on this version of Python
260         if (
261             "all" not in project_config["py_versions"]
262             and py_version not in project_config["py_versions"]
263         ):
264             results.stats["wrong_py_ver"] += 1
265             LOG.debug(f"Skipping {project_name} as it's not enabled for {py_version}")
266             continue
267
268         # Check if we're doing big projects / long checkouts
269         if not long_checkouts and project_config["long_checkout"]:
270             results.stats["skipped_long_checkout"] += 1
271             LOG.debug(f"Skipping {project_name} as it's configured as a long checkout")
272             continue
273
274         repo_path = await git_checkout_or_rebase(work_path, project_config, rebase)
275         if not repo_path:
276             continue
277         await black_run(repo_path, project_config, results)
278
279         if not keep:
280             LOG.debug(f"Removing {repo_path}")
281             rmtree_partial = partial(
282                 rmtree, path=repo_path, onerror=handle_PermissionError
283             )
284             await loop.run_in_executor(None, rmtree_partial)
285
286         LOG.info(f"Finished {project_name}")
287
288
289 async def process_queue(
290     config_file: str,
291     work_path: Path,
292     workers: int,
293     keep: bool = False,
294     long_checkouts: bool = False,
295     rebase: bool = False,
296 ) -> int:
297     """
298     Process the queue with X workers and evaluate results
299     - Success is guaged via the config "expect_formatting_changes"
300
301     Integer return equals the number of failed projects
302     """
303     results = Results()
304     results.stats["disabled"] = 0
305     results.stats["failed"] = 0
306     results.stats["skipped_long_checkout"] = 0
307     results.stats["success"] = 0
308     results.stats["wrong_py_ver"] = 0
309
310     config, queue = await load_projects_queue(Path(config_file))
311     project_count = queue.qsize()
312     s = "" if project_count == 1 else "s"
313     LOG.info(f"{project_count} project{s} to run Black over")
314     if project_count < 1:
315         return -1
316
317     s = "" if workers == 1 else "s"
318     LOG.debug(f"Using {workers} parallel worker{s} to run Black")
319     # Wait until we finish running all the projects before analyzing
320     await asyncio.gather(
321         *[
322             project_runner(
323                 i, config, queue, work_path, results, long_checkouts, rebase, keep
324             )
325             for i in range(workers)
326         ]
327     )
328
329     LOG.info("Analyzing results")
330     return analyze_results(project_count, results)
331
332
333 if __name__ == "__main__":  # pragma: nocover
334     raise NotImplementedError("lib is a library, funnily enough.")