]> git.madduck.net Git - etc/vim.git/blob - src/black_primer/lib.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

Add `black` Dockerfile (#1916)
[etc/vim.git] / src / black_primer / lib.py
1 #!/usr/bin/env python3
2
3 import asyncio
4 import errno
5 import json
6 import logging
7 import os
8 import stat
9 import sys
10 from functools import partial
11 from pathlib import Path
12 from platform import system
13 from shutil import rmtree, which
14 from subprocess import CalledProcessError
15 from sys import version_info
16 from tempfile import TemporaryDirectory
17 from typing import Any, Callable, Dict, NamedTuple, Optional, Sequence, Tuple
18 from urllib.parse import urlparse
19
20 import click
21
22
23 WINDOWS = system() == "Windows"
24 BLACK_BINARY = "black.exe" if WINDOWS else "black"
25 GIT_BINARY = "git.exe" if WINDOWS else "git"
26 LOG = logging.getLogger(__name__)
27
28
29 # Windows needs a ProactorEventLoop if you want to exec subprocesses
30 # Starting with 3.8 this is the default - can remove when Black >= 3.8
31 # mypy only respects sys.platform if directly in the evaluation
32 # https://mypy.readthedocs.io/en/latest/common_issues.html#python-version-and-system-platform-checks  # noqa: B950
33 if sys.platform == "win32":
34     asyncio.set_event_loop(asyncio.ProactorEventLoop())
35
36
37 class Results(NamedTuple):
38     stats: Dict[str, int] = {}
39     failed_projects: Dict[str, CalledProcessError] = {}
40
41
42 async def _gen_check_output(
43     cmd: Sequence[str],
44     timeout: float = 300,
45     env: Optional[Dict[str, str]] = None,
46     cwd: Optional[Path] = None,
47 ) -> Tuple[bytes, bytes]:
48     process = await asyncio.create_subprocess_exec(
49         *cmd,
50         stdout=asyncio.subprocess.PIPE,
51         stderr=asyncio.subprocess.STDOUT,
52         env=env,
53         cwd=cwd,
54     )
55     try:
56         (stdout, stderr) = await asyncio.wait_for(process.communicate(), timeout)
57     except asyncio.TimeoutError:
58         process.kill()
59         await process.wait()
60         raise
61
62     # A non-optional timeout was supplied to asyncio.wait_for, guaranteeing
63     # a timeout or completed process.  A terminated Python process will have a
64     # non-empty returncode value.
65     assert process.returncode is not None
66
67     if process.returncode != 0:
68         cmd_str = " ".join(cmd)
69         raise CalledProcessError(
70             process.returncode, cmd_str, output=stdout, stderr=stderr
71         )
72
73     return (stdout, stderr)
74
75
76 def analyze_results(project_count: int, results: Results) -> int:
77     failed_pct = round(((results.stats["failed"] / project_count) * 100), 2)
78     success_pct = round(((results.stats["success"] / project_count) * 100), 2)
79
80     click.secho("-- primer results 📊 --\n", bold=True)
81     click.secho(
82         f"{results.stats['success']} / {project_count} succeeded ({success_pct}%) ✅",
83         bold=True,
84         fg="green",
85     )
86     click.secho(
87         f"{results.stats['failed']} / {project_count} FAILED ({failed_pct}%) 💩",
88         bold=bool(results.stats["failed"]),
89         fg="red",
90     )
91     s = "" if results.stats["disabled"] == 1 else "s"
92     click.echo(f" - {results.stats['disabled']} project{s} disabled by config")
93     s = "" if results.stats["wrong_py_ver"] == 1 else "s"
94     click.echo(
95         f" - {results.stats['wrong_py_ver']} project{s} skipped due to Python version"
96     )
97     click.echo(
98         f" - {results.stats['skipped_long_checkout']} skipped due to long checkout"
99     )
100
101     if results.failed_projects:
102         click.secho("\nFailed projects:\n", bold=True)
103
104     for project_name, project_cpe in results.failed_projects.items():
105         print(f"## {project_name}:")
106         print(f" - Returned {project_cpe.returncode}")
107         if project_cpe.stderr:
108             print(f" - stderr:\n{project_cpe.stderr.decode('utf8')}")
109         if project_cpe.stdout:
110             print(f" - stdout:\n{project_cpe.stdout.decode('utf8')}")
111         print("")
112
113     return results.stats["failed"]
114
115
116 async def black_run(
117     repo_path: Path, project_config: Dict[str, Any], results: Results
118 ) -> None:
119     """Run Black and record failures"""
120     cmd = [str(which(BLACK_BINARY))]
121     if "cli_arguments" in project_config and project_config["cli_arguments"]:
122         cmd.extend(*project_config["cli_arguments"])
123     cmd.extend(["--check", "--diff", "."])
124
125     with TemporaryDirectory() as tmp_path:
126         # Prevent reading top-level user configs by manipulating envionment variables
127         env = {
128             **os.environ,
129             "XDG_CONFIG_HOME": tmp_path,  # Unix-like
130             "USERPROFILE": tmp_path,  # Windows (changes `Path.home()` output)
131         }
132
133         try:
134             _stdout, _stderr = await _gen_check_output(cmd, cwd=repo_path, env=env)
135         except asyncio.TimeoutError:
136             results.stats["failed"] += 1
137             LOG.error(f"Running black for {repo_path} timed out ({cmd})")
138         except CalledProcessError as cpe:
139             # TODO: Tune for smarter for higher signal
140             # If any other return value than 1 we raise - can disable project in config
141             if cpe.returncode == 1:
142                 if not project_config["expect_formatting_changes"]:
143                     results.stats["failed"] += 1
144                     results.failed_projects[repo_path.name] = cpe
145                 else:
146                     results.stats["success"] += 1
147                 return
148             elif cpe.returncode > 1:
149                 results.stats["failed"] += 1
150                 results.failed_projects[repo_path.name] = cpe
151                 return
152
153             LOG.error(f"Unknown error with {repo_path}")
154             raise
155
156     # If we get here and expect formatting changes something is up
157     if project_config["expect_formatting_changes"]:
158         results.stats["failed"] += 1
159         results.failed_projects[repo_path.name] = CalledProcessError(
160             0, cmd, b"Expected formatting changes but didn't get any!", b""
161         )
162         return
163
164     results.stats["success"] += 1
165
166
167 async def git_checkout_or_rebase(
168     work_path: Path,
169     project_config: Dict[str, Any],
170     rebase: bool = False,
171     *,
172     depth: int = 1,
173 ) -> Optional[Path]:
174     """git Clone project or rebase"""
175     git_bin = str(which(GIT_BINARY))
176     if not git_bin:
177         LOG.error("No git binary found")
178         return None
179
180     repo_url_parts = urlparse(project_config["git_clone_url"])
181     path_parts = repo_url_parts.path[1:].split("/", maxsplit=1)
182
183     repo_path: Path = work_path / path_parts[1].replace(".git", "")
184     cmd = [git_bin, "clone", "--depth", str(depth), project_config["git_clone_url"]]
185     cwd = work_path
186     if repo_path.exists() and rebase:
187         cmd = [git_bin, "pull", "--rebase"]
188         cwd = repo_path
189     elif repo_path.exists():
190         return repo_path
191
192     try:
193         _stdout, _stderr = await _gen_check_output(cmd, cwd=cwd)
194     except (asyncio.TimeoutError, CalledProcessError) as e:
195         LOG.error(f"Unable to git clone / pull {project_config['git_clone_url']}: {e}")
196         return None
197
198     return repo_path
199
200
201 def handle_PermissionError(
202     func: Callable, path: Path, exc: Tuple[Any, Any, Any]
203 ) -> None:
204     """
205     Handle PermissionError during shutil.rmtree.
206
207     This checks if the erroring function is either 'os.rmdir' or 'os.unlink', and that
208     the error was EACCES (i.e. Permission denied). If true, the path is set writable,
209     readable, and executable by everyone. Finally, it tries the error causing delete
210     operation again.
211
212     If the check is false, then the original error will be reraised as this function
213     can't handle it.
214     """
215     excvalue = exc[1]
216     LOG.debug(f"Handling {excvalue} from {func.__name__}... ")
217     if func in (os.rmdir, os.unlink) and excvalue.errno == errno.EACCES:
218         LOG.debug(f"Setting {path} writable, readable, and executable by everyone... ")
219         os.chmod(path, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO)  # chmod 0777
220         func(path)  # Try the error causing delete operation again
221     else:
222         raise
223
224
225 async def load_projects_queue(
226     config_path: Path,
227 ) -> Tuple[Dict[str, Any], asyncio.Queue]:
228     """Load project config and fill queue with all the project names"""
229     with config_path.open("r") as cfp:
230         config = json.load(cfp)
231
232     # TODO: Offer more options here
233     # e.g. Run on X random packages or specific sub list etc.
234     project_names = sorted(config["projects"].keys())
235     queue: asyncio.Queue = asyncio.Queue(maxsize=len(project_names))
236     for project in project_names:
237         await queue.put(project)
238
239     return config, queue
240
241
242 async def project_runner(
243     idx: int,
244     config: Dict[str, Any],
245     queue: asyncio.Queue,
246     work_path: Path,
247     results: Results,
248     long_checkouts: bool = False,
249     rebase: bool = False,
250     keep: bool = False,
251 ) -> None:
252     """Check out project and run Black on it + record result"""
253     loop = asyncio.get_event_loop()
254     py_version = f"{version_info[0]}.{version_info[1]}"
255     while True:
256         try:
257             project_name = queue.get_nowait()
258         except asyncio.QueueEmpty:
259             LOG.debug(f"project_runner {idx} exiting")
260             return
261         LOG.debug(f"worker {idx} working on {project_name}")
262
263         project_config = config["projects"][project_name]
264
265         # Check if disabled by config
266         if "disabled" in project_config and project_config["disabled"]:
267             results.stats["disabled"] += 1
268             LOG.info(f"Skipping {project_name} as it's disabled via config")
269             continue
270
271         # Check if we should run on this version of Python
272         if (
273             "all" not in project_config["py_versions"]
274             and py_version not in project_config["py_versions"]
275         ):
276             results.stats["wrong_py_ver"] += 1
277             LOG.debug(f"Skipping {project_name} as it's not enabled for {py_version}")
278             continue
279
280         # Check if we're doing big projects / long checkouts
281         if not long_checkouts and project_config["long_checkout"]:
282             results.stats["skipped_long_checkout"] += 1
283             LOG.debug(f"Skipping {project_name} as it's configured as a long checkout")
284             continue
285
286         repo_path = await git_checkout_or_rebase(work_path, project_config, rebase)
287         if not repo_path:
288             continue
289         await black_run(repo_path, project_config, results)
290
291         if not keep:
292             LOG.debug(f"Removing {repo_path}")
293             rmtree_partial = partial(
294                 rmtree, path=repo_path, onerror=handle_PermissionError
295             )
296             await loop.run_in_executor(None, rmtree_partial)
297
298         LOG.info(f"Finished {project_name}")
299
300
301 async def process_queue(
302     config_file: str,
303     work_path: Path,
304     workers: int,
305     keep: bool = False,
306     long_checkouts: bool = False,
307     rebase: bool = False,
308 ) -> int:
309     """
310     Process the queue with X workers and evaluate results
311     - Success is guaged via the config "expect_formatting_changes"
312
313     Integer return equals the number of failed projects
314     """
315     results = Results()
316     results.stats["disabled"] = 0
317     results.stats["failed"] = 0
318     results.stats["skipped_long_checkout"] = 0
319     results.stats["success"] = 0
320     results.stats["wrong_py_ver"] = 0
321
322     config, queue = await load_projects_queue(Path(config_file))
323     project_count = queue.qsize()
324     s = "" if project_count == 1 else "s"
325     LOG.info(f"{project_count} project{s} to run Black over")
326     if project_count < 1:
327         return -1
328
329     s = "" if workers == 1 else "s"
330     LOG.debug(f"Using {workers} parallel worker{s} to run Black")
331     # Wait until we finish running all the projects before analyzing
332     await asyncio.gather(
333         *[
334             project_runner(
335                 i, config, queue, work_path, results, long_checkouts, rebase, keep
336             )
337             for i in range(workers)
338         ]
339     )
340
341     LOG.info("Analyzing results")
342     return analyze_results(project_count, results)
343
344
345 if __name__ == "__main__":  # pragma: nocover
346     raise NotImplementedError("lib is a library, funnily enough.")