]> git.madduck.net Git - etc/vim.git/blob - src/black_primer/lib.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

Update pre-commit config (#2331)
[etc/vim.git] / src / black_primer / lib.py
1 import asyncio
2 import errno
3 import json
4 import logging
5 import os
6 import stat
7 import sys
8 from functools import partial
9 from pathlib import Path
10 from platform import system
11 from shutil import rmtree, which
12 from subprocess import CalledProcessError
13 from sys import version_info
14 from tempfile import TemporaryDirectory
15 from typing import Any, Callable, Dict, NamedTuple, Optional, Sequence, Tuple
16 from urllib.parse import urlparse
17
18 import click
19
20
21 WINDOWS = system() == "Windows"
22 BLACK_BINARY = "black.exe" if WINDOWS else "black"
23 GIT_BINARY = "git.exe" if WINDOWS else "git"
24 LOG = logging.getLogger(__name__)
25
26
27 # Windows needs a ProactorEventLoop if you want to exec subprocesses
28 # Starting with 3.8 this is the default - can remove when Black >= 3.8
29 # mypy only respects sys.platform if directly in the evaluation
30 # https://mypy.readthedocs.io/en/latest/common_issues.html#python-version-and-system-platform-checks  # noqa: B950
31 if sys.platform == "win32":
32     asyncio.set_event_loop(asyncio.ProactorEventLoop())
33
34
35 class Results(NamedTuple):
36     stats: Dict[str, int] = {}
37     failed_projects: Dict[str, CalledProcessError] = {}
38
39
40 async def _gen_check_output(
41     cmd: Sequence[str],
42     timeout: float = 600,
43     env: Optional[Dict[str, str]] = None,
44     cwd: Optional[Path] = None,
45     stdin: Optional[bytes] = None,
46 ) -> Tuple[bytes, bytes]:
47     process = await asyncio.create_subprocess_exec(
48         *cmd,
49         stdin=asyncio.subprocess.PIPE,
50         stdout=asyncio.subprocess.PIPE,
51         stderr=asyncio.subprocess.STDOUT,
52         env=env,
53         cwd=cwd,
54     )
55     try:
56         (stdout, stderr) = await asyncio.wait_for(process.communicate(stdin), timeout)
57     except asyncio.TimeoutError:
58         process.kill()
59         await process.wait()
60         raise
61
62     # A non-optional timeout was supplied to asyncio.wait_for, guaranteeing
63     # a timeout or completed process.  A terminated Python process will have a
64     # non-empty returncode value.
65     assert process.returncode is not None
66
67     if process.returncode != 0:
68         cmd_str = " ".join(cmd)
69         raise CalledProcessError(
70             process.returncode, cmd_str, output=stdout, stderr=stderr
71         )
72
73     return (stdout, stderr)
74
75
76 def analyze_results(project_count: int, results: Results) -> int:
77     failed_pct = round(((results.stats["failed"] / project_count) * 100), 2)
78     success_pct = round(((results.stats["success"] / project_count) * 100), 2)
79
80     click.secho("-- primer results 📊 --\n", bold=True)
81     click.secho(
82         f"{results.stats['success']} / {project_count} succeeded ({success_pct}%) ✅",
83         bold=True,
84         fg="green",
85     )
86     click.secho(
87         f"{results.stats['failed']} / {project_count} FAILED ({failed_pct}%) 💩",
88         bold=bool(results.stats["failed"]),
89         fg="red",
90     )
91     s = "" if results.stats["disabled"] == 1 else "s"
92     click.echo(f" - {results.stats['disabled']} project{s} disabled by config")
93     s = "" if results.stats["wrong_py_ver"] == 1 else "s"
94     click.echo(
95         f" - {results.stats['wrong_py_ver']} project{s} skipped due to Python version"
96     )
97     click.echo(
98         f" - {results.stats['skipped_long_checkout']} skipped due to long checkout"
99     )
100
101     if results.failed_projects:
102         click.secho("\nFailed projects:\n", bold=True)
103
104     for project_name, project_cpe in results.failed_projects.items():
105         print(f"## {project_name}:")
106         print(f" - Returned {project_cpe.returncode}")
107         if project_cpe.stderr:
108             print(f" - stderr:\n{project_cpe.stderr.decode('utf8')}")
109         if project_cpe.stdout:
110             print(f" - stdout:\n{project_cpe.stdout.decode('utf8')}")
111         print("")
112
113     return results.stats["failed"]
114
115
116 async def black_run(
117     project_name: str,
118     repo_path: Optional[Path],
119     project_config: Dict[str, Any],
120     results: Results,
121     no_diff: bool = False,
122 ) -> None:
123     """Run Black and record failures"""
124     if not repo_path:
125         results.stats["failed"] += 1
126         results.failed_projects[project_name] = CalledProcessError(
127             69, [], f"{project_name} has no repo_path: {repo_path}".encode(), b""
128         )
129         return
130
131     stdin_test = project_name.upper() == "STDIN"
132     cmd = [str(which(BLACK_BINARY))]
133     if "cli_arguments" in project_config and project_config["cli_arguments"]:
134         cmd.extend(project_config["cli_arguments"])
135     cmd.append("--check")
136     if not no_diff:
137         cmd.append("--diff")
138
139     # Workout if we should read in a python file or search from cwd
140     stdin = None
141     if stdin_test:
142         cmd.append("-")
143         stdin = repo_path.read_bytes()
144     else:
145         cmd.append(".")
146
147     with TemporaryDirectory() as tmp_path:
148         # Prevent reading top-level user configs by manipulating environment variables
149         env = {
150             **os.environ,
151             "XDG_CONFIG_HOME": tmp_path,  # Unix-like
152             "USERPROFILE": tmp_path,  # Windows (changes `Path.home()` output)
153         }
154
155         cwd_path = repo_path.parent if stdin_test else repo_path
156         try:
157             _stdout, _stderr = await _gen_check_output(
158                 cmd, cwd=cwd_path, env=env, stdin=stdin
159             )
160         except asyncio.TimeoutError:
161             results.stats["failed"] += 1
162             LOG.error(f"Running black for {repo_path} timed out ({cmd})")
163         except CalledProcessError as cpe:
164             # TODO: Tune for smarter for higher signal
165             # If any other return value than 1 we raise - can disable project in config
166             if cpe.returncode == 1:
167                 if not project_config["expect_formatting_changes"]:
168                     results.stats["failed"] += 1
169                     results.failed_projects[repo_path.name] = cpe
170                 else:
171                     results.stats["success"] += 1
172                 return
173             elif cpe.returncode > 1:
174                 results.stats["failed"] += 1
175                 results.failed_projects[repo_path.name] = cpe
176                 return
177
178             LOG.error(f"Unknown error with {repo_path}")
179             raise
180
181     # If we get here and expect formatting changes something is up
182     if project_config["expect_formatting_changes"]:
183         results.stats["failed"] += 1
184         results.failed_projects[repo_path.name] = CalledProcessError(
185             0, cmd, b"Expected formatting changes but didn't get any!", b""
186         )
187         return
188
189     results.stats["success"] += 1
190
191
192 async def git_checkout_or_rebase(
193     work_path: Path,
194     project_config: Dict[str, Any],
195     rebase: bool = False,
196     *,
197     depth: int = 1,
198 ) -> Optional[Path]:
199     """git Clone project or rebase"""
200     git_bin = str(which(GIT_BINARY))
201     if not git_bin:
202         LOG.error("No git binary found")
203         return None
204
205     repo_url_parts = urlparse(project_config["git_clone_url"])
206     path_parts = repo_url_parts.path[1:].split("/", maxsplit=1)
207
208     repo_path: Path = work_path / path_parts[1].replace(".git", "")
209     cmd = [git_bin, "clone", "--depth", str(depth), project_config["git_clone_url"]]
210     cwd = work_path
211     if repo_path.exists() and rebase:
212         cmd = [git_bin, "pull", "--rebase"]
213         cwd = repo_path
214     elif repo_path.exists():
215         return repo_path
216
217     try:
218         _stdout, _stderr = await _gen_check_output(cmd, cwd=cwd)
219     except (asyncio.TimeoutError, CalledProcessError) as e:
220         LOG.error(f"Unable to git clone / pull {project_config['git_clone_url']}: {e}")
221         return None
222
223     return repo_path
224
225
226 def handle_PermissionError(
227     func: Callable, path: Path, exc: Tuple[Any, Any, Any]
228 ) -> None:
229     """
230     Handle PermissionError during shutil.rmtree.
231
232     This checks if the erroring function is either 'os.rmdir' or 'os.unlink', and that
233     the error was EACCES (i.e. Permission denied). If true, the path is set writable,
234     readable, and executable by everyone. Finally, it tries the error causing delete
235     operation again.
236
237     If the check is false, then the original error will be reraised as this function
238     can't handle it.
239     """
240     excvalue = exc[1]
241     LOG.debug(f"Handling {excvalue} from {func.__name__}... ")
242     if func in (os.rmdir, os.unlink) and excvalue.errno == errno.EACCES:
243         LOG.debug(f"Setting {path} writable, readable, and executable by everyone... ")
244         os.chmod(path, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO)  # chmod 0777
245         func(path)  # Try the error causing delete operation again
246     else:
247         raise
248
249
250 async def load_projects_queue(
251     config_path: Path,
252 ) -> Tuple[Dict[str, Any], asyncio.Queue]:
253     """Load project config and fill queue with all the project names"""
254     with config_path.open("r") as cfp:
255         config = json.load(cfp)
256
257     # TODO: Offer more options here
258     # e.g. Run on X random packages or specific sub list etc.
259     project_names = sorted(config["projects"].keys())
260     queue: asyncio.Queue = asyncio.Queue(maxsize=len(project_names))
261     for project in project_names:
262         await queue.put(project)
263
264     return config, queue
265
266
267 async def project_runner(
268     idx: int,
269     config: Dict[str, Any],
270     queue: asyncio.Queue,
271     work_path: Path,
272     results: Results,
273     long_checkouts: bool = False,
274     rebase: bool = False,
275     keep: bool = False,
276     no_diff: bool = False,
277 ) -> None:
278     """Check out project and run Black on it + record result"""
279     loop = asyncio.get_event_loop()
280     py_version = f"{version_info[0]}.{version_info[1]}"
281     while True:
282         try:
283             project_name = queue.get_nowait()
284         except asyncio.QueueEmpty:
285             LOG.debug(f"project_runner {idx} exiting")
286             return
287         LOG.debug(f"worker {idx} working on {project_name}")
288
289         project_config = config["projects"][project_name]
290
291         # Check if disabled by config
292         if "disabled" in project_config and project_config["disabled"]:
293             results.stats["disabled"] += 1
294             LOG.info(f"Skipping {project_name} as it's disabled via config")
295             continue
296
297         # Check if we should run on this version of Python
298         if (
299             "all" not in project_config["py_versions"]
300             and py_version not in project_config["py_versions"]
301         ):
302             results.stats["wrong_py_ver"] += 1
303             LOG.debug(f"Skipping {project_name} as it's not enabled for {py_version}")
304             continue
305
306         # Check if we're doing big projects / long checkouts
307         if not long_checkouts and project_config["long_checkout"]:
308             results.stats["skipped_long_checkout"] += 1
309             LOG.debug(f"Skipping {project_name} as it's configured as a long checkout")
310             continue
311
312         repo_path: Optional[Path] = Path(__file__)
313         stdin_project = project_name.upper() == "STDIN"
314         if not stdin_project:
315             repo_path = await git_checkout_or_rebase(work_path, project_config, rebase)
316             if not repo_path:
317                 continue
318         await black_run(project_name, repo_path, project_config, results, no_diff)
319
320         if not keep and not stdin_project:
321             LOG.debug(f"Removing {repo_path}")
322             rmtree_partial = partial(
323                 rmtree, path=repo_path, onerror=handle_PermissionError
324             )
325             await loop.run_in_executor(None, rmtree_partial)
326
327         LOG.info(f"Finished {project_name}")
328
329
330 async def process_queue(
331     config_file: str,
332     work_path: Path,
333     workers: int,
334     keep: bool = False,
335     long_checkouts: bool = False,
336     rebase: bool = False,
337     no_diff: bool = False,
338 ) -> int:
339     """
340     Process the queue with X workers and evaluate results
341     - Success is guaged via the config "expect_formatting_changes"
342
343     Integer return equals the number of failed projects
344     """
345     results = Results()
346     results.stats["disabled"] = 0
347     results.stats["failed"] = 0
348     results.stats["skipped_long_checkout"] = 0
349     results.stats["success"] = 0
350     results.stats["wrong_py_ver"] = 0
351
352     config, queue = await load_projects_queue(Path(config_file))
353     project_count = queue.qsize()
354     s = "" if project_count == 1 else "s"
355     LOG.info(f"{project_count} project{s} to run Black over")
356     if project_count < 1:
357         return -1
358
359     s = "" if workers == 1 else "s"
360     LOG.debug(f"Using {workers} parallel worker{s} to run Black")
361     # Wait until we finish running all the projects before analyzing
362     await asyncio.gather(
363         *[
364             project_runner(
365                 i,
366                 config,
367                 queue,
368                 work_path,
369                 results,
370                 long_checkouts,
371                 rebase,
372                 keep,
373                 no_diff,
374             )
375             for i in range(workers)
376         ]
377     )
378
379     LOG.info("Analyzing results")
380     return analyze_results(project_count, results)
381
382
383 if __name__ == "__main__":  # pragma: nocover
384     raise NotImplementedError("lib is a library, funnily enough.")