]> git.madduck.net Git - etc/vim.git/blob - src/black_primer/lib.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

Reset trailing comma handling
[etc/vim.git] / src / black_primer / lib.py
1 #!/usr/bin/env python3
2
3 import asyncio
4 import errno
5 import json
6 import logging
7 import os
8 import stat
9 import sys
10 from functools import partial
11 from pathlib import Path
12 from platform import system
13 from shutil import rmtree, which
14 from subprocess import CalledProcessError
15 from sys import version_info
16 from typing import Any, Callable, Dict, NamedTuple, Optional, Sequence, Tuple
17 from urllib.parse import urlparse
18
19 import click
20
21
22 WINDOWS = system() == "Windows"
23 BLACK_BINARY = "black.exe" if WINDOWS else "black"
24 GIT_BIANRY = "git.exe" if WINDOWS else "git"
25 LOG = logging.getLogger(__name__)
26
27
28 # Windows needs a ProactorEventLoop if you want to exec subprocesses
29 # Starting with 3.8 this is the default - can remove when Black >= 3.8
30 # mypy only respects sys.platform if directly in the evaluation
31 # https://mypy.readthedocs.io/en/latest/common_issues.html#python-version-and-system-platform-checks  # noqa: B950
32 if sys.platform == "win32":
33     asyncio.set_event_loop(asyncio.ProactorEventLoop())
34
35
36 class Results(NamedTuple):
37     stats: Dict[str, int] = {}
38     failed_projects: Dict[str, CalledProcessError] = {}
39
40
41 async def _gen_check_output(
42     cmd: Sequence[str],
43     timeout: float = 300,
44     env: Optional[Dict[str, str]] = None,
45     cwd: Optional[Path] = None,
46 ) -> Tuple[bytes, bytes]:
47     process = await asyncio.create_subprocess_exec(
48         *cmd,
49         stdout=asyncio.subprocess.PIPE,
50         stderr=asyncio.subprocess.STDOUT,
51         env=env,
52         cwd=cwd,
53     )
54     try:
55         (stdout, stderr) = await asyncio.wait_for(process.communicate(), timeout)
56     except asyncio.TimeoutError:
57         process.kill()
58         await process.wait()
59         raise
60
61     if process.returncode != 0:
62         cmd_str = " ".join(cmd)
63         raise CalledProcessError(
64             process.returncode, cmd_str, output=stdout, stderr=stderr
65         )
66
67     return (stdout, stderr)
68
69
70 def analyze_results(project_count: int, results: Results) -> int:
71     failed_pct = round(((results.stats["failed"] / project_count) * 100), 2)
72     success_pct = round(((results.stats["success"] / project_count) * 100), 2)
73
74     click.secho("-- primer results 📊 --\n", bold=True)
75     click.secho(
76         f"{results.stats['success']} / {project_count} succeeded ({success_pct}%) ✅",
77         bold=True,
78         fg="green",
79     )
80     click.secho(
81         f"{results.stats['failed']} / {project_count} FAILED ({failed_pct}%) 💩",
82         bold=bool(results.stats["failed"]),
83         fg="red",
84     )
85     s = "" if results.stats["disabled"] == 1 else "s"
86     click.echo(f" - {results.stats['disabled']} project{s} disabled by config")
87     s = "" if results.stats["wrong_py_ver"] == 1 else "s"
88     click.echo(
89         f" - {results.stats['wrong_py_ver']} project{s} skipped due to Python version"
90     )
91     click.echo(
92         f" - {results.stats['skipped_long_checkout']} skipped due to long checkout"
93     )
94
95     if results.failed_projects:
96         click.secho("\nFailed projects:\n", bold=True)
97
98     for project_name, project_cpe in results.failed_projects.items():
99         print(f"## {project_name}:")
100         print(f" - Returned {project_cpe.returncode}")
101         if project_cpe.stderr:
102             print(f" - stderr:\n{project_cpe.stderr.decode('utf8')}")
103         if project_cpe.stdout:
104             print(f" - stdout:\n{project_cpe.stdout.decode('utf8')}")
105         print("")
106
107     return results.stats["failed"]
108
109
110 async def black_run(
111     repo_path: Path, project_config: Dict[str, Any], results: Results
112 ) -> None:
113     """Run Black and record failures"""
114     cmd = [str(which(BLACK_BINARY))]
115     if "cli_arguments" in project_config and project_config["cli_arguments"]:
116         cmd.extend(*project_config["cli_arguments"])
117     cmd.extend(["--check", "--diff", "."])
118
119     try:
120         _stdout, _stderr = await _gen_check_output(cmd, cwd=repo_path)
121     except asyncio.TimeoutError:
122         results.stats["failed"] += 1
123         LOG.error(f"Running black for {repo_path} timed out ({cmd})")
124     except CalledProcessError as cpe:
125         # TODO: Tune for smarter for higher signal
126         # If any other return value than 1 we raise - can disable project in config
127         if cpe.returncode == 1:
128             if not project_config["expect_formatting_changes"]:
129                 results.stats["failed"] += 1
130                 results.failed_projects[repo_path.name] = cpe
131             else:
132                 results.stats["success"] += 1
133             return
134         elif cpe.returncode > 1:
135             results.stats["failed"] += 1
136             results.failed_projects[repo_path.name] = cpe
137             return
138
139         LOG.error(f"Unknown error with {repo_path}")
140         raise
141
142     # If we get here and expect formatting changes something is up
143     if project_config["expect_formatting_changes"]:
144         results.stats["failed"] += 1
145         results.failed_projects[repo_path.name] = CalledProcessError(
146             0, cmd, b"Expected formatting changes but didn't get any!", b""
147         )
148         return
149
150     results.stats["success"] += 1
151
152
153 async def git_checkout_or_rebase(
154     work_path: Path,
155     project_config: Dict[str, Any],
156     rebase: bool = False,
157     *,
158     depth: int = 1,
159 ) -> Optional[Path]:
160     """git Clone project or rebase"""
161     git_bin = str(which(GIT_BIANRY))
162     if not git_bin:
163         LOG.error("No git binary found")
164         return None
165
166     repo_url_parts = urlparse(project_config["git_clone_url"])
167     path_parts = repo_url_parts.path[1:].split("/", maxsplit=1)
168
169     repo_path: Path = work_path / path_parts[1].replace(".git", "")
170     cmd = [git_bin, "clone", "--depth", str(depth), project_config["git_clone_url"]]
171     cwd = work_path
172     if repo_path.exists() and rebase:
173         cmd = [git_bin, "pull", "--rebase"]
174         cwd = repo_path
175     elif repo_path.exists():
176         return repo_path
177
178     try:
179         _stdout, _stderr = await _gen_check_output(cmd, cwd=cwd)
180     except (asyncio.TimeoutError, CalledProcessError) as e:
181         LOG.error(f"Unable to git clone / pull {project_config['git_clone_url']}: {e}")
182         return None
183
184     return repo_path
185
186
187 def handle_PermissionError(
188     func: Callable, path: Path, exc: Tuple[Any, Any, Any]
189 ) -> None:
190     """
191     Handle PermissionError during shutil.rmtree.
192
193     This checks if the erroring function is either 'os.rmdir' or 'os.unlink', and that
194     the error was EACCES (i.e. Permission denied). If true, the path is set writable,
195     readable, and executable by everyone. Finally, it tries the error causing delete
196     operation again.
197
198     If the check is false, then the original error will be reraised as this function
199     can't handle it.
200     """
201     excvalue = exc[1]
202     LOG.debug(f"Handling {excvalue} from {func.__name__}... ")
203     if func in (os.rmdir, os.unlink) and excvalue.errno == errno.EACCES:
204         LOG.debug(f"Setting {path} writable, readable, and executable by everyone... ")
205         os.chmod(path, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO)  # chmod 0777
206         func(path)  # Try the error causing delete operation again
207     else:
208         raise
209
210
211 async def load_projects_queue(
212     config_path: Path,
213 ) -> Tuple[Dict[str, Any], asyncio.Queue]:
214     """Load project config and fill queue with all the project names"""
215     with config_path.open("r") as cfp:
216         config = json.load(cfp)
217
218     # TODO: Offer more options here
219     # e.g. Run on X random packages or specific sub list etc.
220     project_names = sorted(config["projects"].keys())
221     queue: asyncio.Queue = asyncio.Queue(maxsize=len(project_names))
222     for project in project_names:
223         await queue.put(project)
224
225     return config, queue
226
227
228 async def project_runner(
229     idx: int,
230     config: Dict[str, Any],
231     queue: asyncio.Queue,
232     work_path: Path,
233     results: Results,
234     long_checkouts: bool = False,
235     rebase: bool = False,
236     keep: bool = False,
237 ) -> None:
238     """Check out project and run Black on it + record result"""
239     loop = asyncio.get_event_loop()
240     py_version = f"{version_info[0]}.{version_info[1]}"
241     while True:
242         try:
243             project_name = queue.get_nowait()
244         except asyncio.QueueEmpty:
245             LOG.debug(f"project_runner {idx} exiting")
246             return
247         LOG.debug(f"worker {idx} working on {project_name}")
248
249         project_config = config["projects"][project_name]
250
251         # Check if disabled by config
252         if "disabled" in project_config and project_config["disabled"]:
253             results.stats["disabled"] += 1
254             LOG.info(f"Skipping {project_name} as it's disabled via config")
255             continue
256
257         # Check if we should run on this version of Python
258         if (
259             "all" not in project_config["py_versions"]
260             and py_version not in project_config["py_versions"]
261         ):
262             results.stats["wrong_py_ver"] += 1
263             LOG.debug(f"Skipping {project_name} as it's not enabled for {py_version}")
264             continue
265
266         # Check if we're doing big projects / long checkouts
267         if not long_checkouts and project_config["long_checkout"]:
268             results.stats["skipped_long_checkout"] += 1
269             LOG.debug(f"Skipping {project_name} as it's configured as a long checkout")
270             continue
271
272         repo_path = await git_checkout_or_rebase(work_path, project_config, rebase)
273         if not repo_path:
274             continue
275         await black_run(repo_path, project_config, results)
276
277         if not keep:
278             LOG.debug(f"Removing {repo_path}")
279             rmtree_partial = partial(
280                 rmtree, path=repo_path, onerror=handle_PermissionError
281             )
282             await loop.run_in_executor(None, rmtree_partial)
283
284         LOG.info(f"Finished {project_name}")
285
286
287 async def process_queue(
288     config_file: str,
289     work_path: Path,
290     workers: int,
291     keep: bool = False,
292     long_checkouts: bool = False,
293     rebase: bool = False,
294 ) -> int:
295     """
296     Process the queue with X workers and evaluate results
297     - Success is guaged via the config "expect_formatting_changes"
298
299     Integer return equals the number of failed projects
300     """
301     results = Results()
302     results.stats["disabled"] = 0
303     results.stats["failed"] = 0
304     results.stats["skipped_long_checkout"] = 0
305     results.stats["success"] = 0
306     results.stats["wrong_py_ver"] = 0
307
308     config, queue = await load_projects_queue(Path(config_file))
309     project_count = queue.qsize()
310     s = "" if project_count == 1 else "s"
311     LOG.info(f"{project_count} project{s} to run Black over")
312     if project_count < 1:
313         return -1
314
315     s = "" if workers == 1 else "s"
316     LOG.debug(f"Using {workers} parallel worker{s} to run Black")
317     # Wait until we finish running all the projects before analyzing
318     await asyncio.gather(
319         *[
320             project_runner(
321                 i, config, queue, work_path, results, long_checkouts, rebase, keep
322             )
323             for i in range(workers)
324         ]
325     )
326
327     LOG.info("Analyzing results")
328     return analyze_results(project_count, results)
329
330
331 if __name__ == "__main__":  # pragma: nocover
332     raise NotImplementedError("lib is a library, funnily enough.")