]> git.madduck.net Git - etc/vim.git/blob - src/black_primer/lib.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

Add missing changelog entry for fmt: skip (#2025)
[etc/vim.git] / src / black_primer / lib.py
1 #!/usr/bin/env python3
2
3 import asyncio
4 import errno
5 import json
6 import logging
7 import os
8 import stat
9 import sys
10 from functools import partial
11 from pathlib import Path
12 from platform import system
13 from shutil import rmtree, which
14 from subprocess import CalledProcessError
15 from sys import version_info
16 from typing import Any, Callable, Dict, NamedTuple, Optional, Sequence, Tuple
17 from urllib.parse import urlparse
18
19 import click
20
21
22 WINDOWS = system() == "Windows"
23 BLACK_BINARY = "black.exe" if WINDOWS else "black"
24 GIT_BINARY = "git.exe" if WINDOWS else "git"
25 LOG = logging.getLogger(__name__)
26
27
28 # Windows needs a ProactorEventLoop if you want to exec subprocesses
29 # Starting with 3.8 this is the default - can remove when Black >= 3.8
30 # mypy only respects sys.platform if directly in the evaluation
31 # https://mypy.readthedocs.io/en/latest/common_issues.html#python-version-and-system-platform-checks  # noqa: B950
32 if sys.platform == "win32":
33     asyncio.set_event_loop(asyncio.ProactorEventLoop())
34
35
36 class Results(NamedTuple):
37     stats: Dict[str, int] = {}
38     failed_projects: Dict[str, CalledProcessError] = {}
39
40
41 async def _gen_check_output(
42     cmd: Sequence[str],
43     timeout: float = 300,
44     env: Optional[Dict[str, str]] = None,
45     cwd: Optional[Path] = None,
46 ) -> Tuple[bytes, bytes]:
47     process = await asyncio.create_subprocess_exec(
48         *cmd,
49         stdout=asyncio.subprocess.PIPE,
50         stderr=asyncio.subprocess.STDOUT,
51         env=env,
52         cwd=cwd,
53     )
54     try:
55         (stdout, stderr) = await asyncio.wait_for(process.communicate(), timeout)
56     except asyncio.TimeoutError:
57         process.kill()
58         await process.wait()
59         raise
60
61     # A non-optional timeout was supplied to asyncio.wait_for, guaranteeing
62     # a timeout or completed process.  A terminated Python process will have a
63     # non-empty returncode value.
64     assert process.returncode is not None
65
66     if process.returncode != 0:
67         cmd_str = " ".join(cmd)
68         raise CalledProcessError(
69             process.returncode, cmd_str, output=stdout, stderr=stderr
70         )
71
72     return (stdout, stderr)
73
74
75 def analyze_results(project_count: int, results: Results) -> int:
76     failed_pct = round(((results.stats["failed"] / project_count) * 100), 2)
77     success_pct = round(((results.stats["success"] / project_count) * 100), 2)
78
79     click.secho("-- primer results 📊 --\n", bold=True)
80     click.secho(
81         f"{results.stats['success']} / {project_count} succeeded ({success_pct}%) ✅",
82         bold=True,
83         fg="green",
84     )
85     click.secho(
86         f"{results.stats['failed']} / {project_count} FAILED ({failed_pct}%) 💩",
87         bold=bool(results.stats["failed"]),
88         fg="red",
89     )
90     s = "" if results.stats["disabled"] == 1 else "s"
91     click.echo(f" - {results.stats['disabled']} project{s} disabled by config")
92     s = "" if results.stats["wrong_py_ver"] == 1 else "s"
93     click.echo(
94         f" - {results.stats['wrong_py_ver']} project{s} skipped due to Python version"
95     )
96     click.echo(
97         f" - {results.stats['skipped_long_checkout']} skipped due to long checkout"
98     )
99
100     if results.failed_projects:
101         click.secho("\nFailed projects:\n", bold=True)
102
103     for project_name, project_cpe in results.failed_projects.items():
104         print(f"## {project_name}:")
105         print(f" - Returned {project_cpe.returncode}")
106         if project_cpe.stderr:
107             print(f" - stderr:\n{project_cpe.stderr.decode('utf8')}")
108         if project_cpe.stdout:
109             print(f" - stdout:\n{project_cpe.stdout.decode('utf8')}")
110         print("")
111
112     return results.stats["failed"]
113
114
115 async def black_run(
116     repo_path: Path, project_config: Dict[str, Any], results: Results
117 ) -> None:
118     """Run Black and record failures"""
119     cmd = [str(which(BLACK_BINARY))]
120     if "cli_arguments" in project_config and project_config["cli_arguments"]:
121         cmd.extend(*project_config["cli_arguments"])
122     cmd.extend(["--check", "--diff", "."])
123
124     try:
125         _stdout, _stderr = await _gen_check_output(cmd, cwd=repo_path)
126     except asyncio.TimeoutError:
127         results.stats["failed"] += 1
128         LOG.error(f"Running black for {repo_path} timed out ({cmd})")
129     except CalledProcessError as cpe:
130         # TODO: Tune for smarter for higher signal
131         # If any other return value than 1 we raise - can disable project in config
132         if cpe.returncode == 1:
133             if not project_config["expect_formatting_changes"]:
134                 results.stats["failed"] += 1
135                 results.failed_projects[repo_path.name] = cpe
136             else:
137                 results.stats["success"] += 1
138             return
139         elif cpe.returncode > 1:
140             results.stats["failed"] += 1
141             results.failed_projects[repo_path.name] = cpe
142             return
143
144         LOG.error(f"Unknown error with {repo_path}")
145         raise
146
147     # If we get here and expect formatting changes something is up
148     if project_config["expect_formatting_changes"]:
149         results.stats["failed"] += 1
150         results.failed_projects[repo_path.name] = CalledProcessError(
151             0, cmd, b"Expected formatting changes but didn't get any!", b""
152         )
153         return
154
155     results.stats["success"] += 1
156
157
158 async def git_checkout_or_rebase(
159     work_path: Path,
160     project_config: Dict[str, Any],
161     rebase: bool = False,
162     *,
163     depth: int = 1,
164 ) -> Optional[Path]:
165     """git Clone project or rebase"""
166     git_bin = str(which(GIT_BINARY))
167     if not git_bin:
168         LOG.error("No git binary found")
169         return None
170
171     repo_url_parts = urlparse(project_config["git_clone_url"])
172     path_parts = repo_url_parts.path[1:].split("/", maxsplit=1)
173
174     repo_path: Path = work_path / path_parts[1].replace(".git", "")
175     cmd = [git_bin, "clone", "--depth", str(depth), project_config["git_clone_url"]]
176     cwd = work_path
177     if repo_path.exists() and rebase:
178         cmd = [git_bin, "pull", "--rebase"]
179         cwd = repo_path
180     elif repo_path.exists():
181         return repo_path
182
183     try:
184         _stdout, _stderr = await _gen_check_output(cmd, cwd=cwd)
185     except (asyncio.TimeoutError, CalledProcessError) as e:
186         LOG.error(f"Unable to git clone / pull {project_config['git_clone_url']}: {e}")
187         return None
188
189     return repo_path
190
191
192 def handle_PermissionError(
193     func: Callable, path: Path, exc: Tuple[Any, Any, Any]
194 ) -> None:
195     """
196     Handle PermissionError during shutil.rmtree.
197
198     This checks if the erroring function is either 'os.rmdir' or 'os.unlink', and that
199     the error was EACCES (i.e. Permission denied). If true, the path is set writable,
200     readable, and executable by everyone. Finally, it tries the error causing delete
201     operation again.
202
203     If the check is false, then the original error will be reraised as this function
204     can't handle it.
205     """
206     excvalue = exc[1]
207     LOG.debug(f"Handling {excvalue} from {func.__name__}... ")
208     if func in (os.rmdir, os.unlink) and excvalue.errno == errno.EACCES:
209         LOG.debug(f"Setting {path} writable, readable, and executable by everyone... ")
210         os.chmod(path, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO)  # chmod 0777
211         func(path)  # Try the error causing delete operation again
212     else:
213         raise
214
215
216 async def load_projects_queue(
217     config_path: Path,
218 ) -> Tuple[Dict[str, Any], asyncio.Queue]:
219     """Load project config and fill queue with all the project names"""
220     with config_path.open("r") as cfp:
221         config = json.load(cfp)
222
223     # TODO: Offer more options here
224     # e.g. Run on X random packages or specific sub list etc.
225     project_names = sorted(config["projects"].keys())
226     queue: asyncio.Queue = asyncio.Queue(maxsize=len(project_names))
227     for project in project_names:
228         await queue.put(project)
229
230     return config, queue
231
232
233 async def project_runner(
234     idx: int,
235     config: Dict[str, Any],
236     queue: asyncio.Queue,
237     work_path: Path,
238     results: Results,
239     long_checkouts: bool = False,
240     rebase: bool = False,
241     keep: bool = False,
242 ) -> None:
243     """Check out project and run Black on it + record result"""
244     loop = asyncio.get_event_loop()
245     py_version = f"{version_info[0]}.{version_info[1]}"
246     while True:
247         try:
248             project_name = queue.get_nowait()
249         except asyncio.QueueEmpty:
250             LOG.debug(f"project_runner {idx} exiting")
251             return
252         LOG.debug(f"worker {idx} working on {project_name}")
253
254         project_config = config["projects"][project_name]
255
256         # Check if disabled by config
257         if "disabled" in project_config and project_config["disabled"]:
258             results.stats["disabled"] += 1
259             LOG.info(f"Skipping {project_name} as it's disabled via config")
260             continue
261
262         # Check if we should run on this version of Python
263         if (
264             "all" not in project_config["py_versions"]
265             and py_version not in project_config["py_versions"]
266         ):
267             results.stats["wrong_py_ver"] += 1
268             LOG.debug(f"Skipping {project_name} as it's not enabled for {py_version}")
269             continue
270
271         # Check if we're doing big projects / long checkouts
272         if not long_checkouts and project_config["long_checkout"]:
273             results.stats["skipped_long_checkout"] += 1
274             LOG.debug(f"Skipping {project_name} as it's configured as a long checkout")
275             continue
276
277         repo_path = await git_checkout_or_rebase(work_path, project_config, rebase)
278         if not repo_path:
279             continue
280         await black_run(repo_path, project_config, results)
281
282         if not keep:
283             LOG.debug(f"Removing {repo_path}")
284             rmtree_partial = partial(
285                 rmtree, path=repo_path, onerror=handle_PermissionError
286             )
287             await loop.run_in_executor(None, rmtree_partial)
288
289         LOG.info(f"Finished {project_name}")
290
291
292 async def process_queue(
293     config_file: str,
294     work_path: Path,
295     workers: int,
296     keep: bool = False,
297     long_checkouts: bool = False,
298     rebase: bool = False,
299 ) -> int:
300     """
301     Process the queue with X workers and evaluate results
302     - Success is guaged via the config "expect_formatting_changes"
303
304     Integer return equals the number of failed projects
305     """
306     results = Results()
307     results.stats["disabled"] = 0
308     results.stats["failed"] = 0
309     results.stats["skipped_long_checkout"] = 0
310     results.stats["success"] = 0
311     results.stats["wrong_py_ver"] = 0
312
313     config, queue = await load_projects_queue(Path(config_file))
314     project_count = queue.qsize()
315     s = "" if project_count == 1 else "s"
316     LOG.info(f"{project_count} project{s} to run Black over")
317     if project_count < 1:
318         return -1
319
320     s = "" if workers == 1 else "s"
321     LOG.debug(f"Using {workers} parallel worker{s} to run Black")
322     # Wait until we finish running all the projects before analyzing
323     await asyncio.gather(
324         *[
325             project_runner(
326                 i, config, queue, work_path, results, long_checkouts, rebase, keep
327             )
328             for i in range(workers)
329         ]
330     )
331
332     LOG.info("Analyzing results")
333     return analyze_results(project_count, results)
334
335
336 if __name__ == "__main__":  # pragma: nocover
337     raise NotImplementedError("lib is a library, funnily enough.")