]> git.madduck.net Git - etc/vim.git/blob - src/black_primer/lib.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

Reorganize docs v2 (GH-2174)
[etc/vim.git] / src / black_primer / lib.py
1 import asyncio
2 import errno
3 import json
4 import logging
5 import os
6 import stat
7 import sys
8 from functools import partial
9 from pathlib import Path
10 from platform import system
11 from shutil import rmtree, which
12 from subprocess import CalledProcessError
13 from sys import version_info
14 from tempfile import TemporaryDirectory
15 from typing import Any, Callable, Dict, NamedTuple, Optional, Sequence, Tuple
16 from urllib.parse import urlparse
17
18 import click
19
20
21 WINDOWS = system() == "Windows"
22 BLACK_BINARY = "black.exe" if WINDOWS else "black"
23 GIT_BINARY = "git.exe" if WINDOWS else "git"
24 LOG = logging.getLogger(__name__)
25
26
27 # Windows needs a ProactorEventLoop if you want to exec subprocesses
28 # Starting with 3.8 this is the default - can remove when Black >= 3.8
29 # mypy only respects sys.platform if directly in the evaluation
30 # https://mypy.readthedocs.io/en/latest/common_issues.html#python-version-and-system-platform-checks  # noqa: B950
31 if sys.platform == "win32":
32     asyncio.set_event_loop(asyncio.ProactorEventLoop())
33
34
35 class Results(NamedTuple):
36     stats: Dict[str, int] = {}
37     failed_projects: Dict[str, CalledProcessError] = {}
38
39
40 async def _gen_check_output(
41     cmd: Sequence[str],
42     timeout: float = 300,
43     env: Optional[Dict[str, str]] = None,
44     cwd: Optional[Path] = None,
45 ) -> Tuple[bytes, bytes]:
46     process = await asyncio.create_subprocess_exec(
47         *cmd,
48         stdout=asyncio.subprocess.PIPE,
49         stderr=asyncio.subprocess.STDOUT,
50         env=env,
51         cwd=cwd,
52     )
53     try:
54         (stdout, stderr) = await asyncio.wait_for(process.communicate(), timeout)
55     except asyncio.TimeoutError:
56         process.kill()
57         await process.wait()
58         raise
59
60     # A non-optional timeout was supplied to asyncio.wait_for, guaranteeing
61     # a timeout or completed process.  A terminated Python process will have a
62     # non-empty returncode value.
63     assert process.returncode is not None
64
65     if process.returncode != 0:
66         cmd_str = " ".join(cmd)
67         raise CalledProcessError(
68             process.returncode, cmd_str, output=stdout, stderr=stderr
69         )
70
71     return (stdout, stderr)
72
73
74 def analyze_results(project_count: int, results: Results) -> int:
75     failed_pct = round(((results.stats["failed"] / project_count) * 100), 2)
76     success_pct = round(((results.stats["success"] / project_count) * 100), 2)
77
78     click.secho("-- primer results 📊 --\n", bold=True)
79     click.secho(
80         f"{results.stats['success']} / {project_count} succeeded ({success_pct}%) ✅",
81         bold=True,
82         fg="green",
83     )
84     click.secho(
85         f"{results.stats['failed']} / {project_count} FAILED ({failed_pct}%) 💩",
86         bold=bool(results.stats["failed"]),
87         fg="red",
88     )
89     s = "" if results.stats["disabled"] == 1 else "s"
90     click.echo(f" - {results.stats['disabled']} project{s} disabled by config")
91     s = "" if results.stats["wrong_py_ver"] == 1 else "s"
92     click.echo(
93         f" - {results.stats['wrong_py_ver']} project{s} skipped due to Python version"
94     )
95     click.echo(
96         f" - {results.stats['skipped_long_checkout']} skipped due to long checkout"
97     )
98
99     if results.failed_projects:
100         click.secho("\nFailed projects:\n", bold=True)
101
102     for project_name, project_cpe in results.failed_projects.items():
103         print(f"## {project_name}:")
104         print(f" - Returned {project_cpe.returncode}")
105         if project_cpe.stderr:
106             print(f" - stderr:\n{project_cpe.stderr.decode('utf8')}")
107         if project_cpe.stdout:
108             print(f" - stdout:\n{project_cpe.stdout.decode('utf8')}")
109         print("")
110
111     return results.stats["failed"]
112
113
114 async def black_run(
115     repo_path: Path,
116     project_config: Dict[str, Any],
117     results: Results,
118     no_diff: bool = False,
119 ) -> None:
120     """Run Black and record failures"""
121     cmd = [str(which(BLACK_BINARY))]
122     if "cli_arguments" in project_config and project_config["cli_arguments"]:
123         cmd.extend(project_config["cli_arguments"])
124     cmd.append("--check")
125     if no_diff:
126         cmd.append(".")
127     else:
128         cmd.extend(["--diff", "."])
129
130     with TemporaryDirectory() as tmp_path:
131         # Prevent reading top-level user configs by manipulating envionment variables
132         env = {
133             **os.environ,
134             "XDG_CONFIG_HOME": tmp_path,  # Unix-like
135             "USERPROFILE": tmp_path,  # Windows (changes `Path.home()` output)
136         }
137
138         try:
139             _stdout, _stderr = await _gen_check_output(cmd, cwd=repo_path, env=env)
140         except asyncio.TimeoutError:
141             results.stats["failed"] += 1
142             LOG.error(f"Running black for {repo_path} timed out ({cmd})")
143         except CalledProcessError as cpe:
144             # TODO: Tune for smarter for higher signal
145             # If any other return value than 1 we raise - can disable project in config
146             if cpe.returncode == 1:
147                 if not project_config["expect_formatting_changes"]:
148                     results.stats["failed"] += 1
149                     results.failed_projects[repo_path.name] = cpe
150                 else:
151                     results.stats["success"] += 1
152                 return
153             elif cpe.returncode > 1:
154                 results.stats["failed"] += 1
155                 results.failed_projects[repo_path.name] = cpe
156                 return
157
158             LOG.error(f"Unknown error with {repo_path}")
159             raise
160
161     # If we get here and expect formatting changes something is up
162     if project_config["expect_formatting_changes"]:
163         results.stats["failed"] += 1
164         results.failed_projects[repo_path.name] = CalledProcessError(
165             0, cmd, b"Expected formatting changes but didn't get any!", b""
166         )
167         return
168
169     results.stats["success"] += 1
170
171
172 async def git_checkout_or_rebase(
173     work_path: Path,
174     project_config: Dict[str, Any],
175     rebase: bool = False,
176     *,
177     depth: int = 1,
178 ) -> Optional[Path]:
179     """git Clone project or rebase"""
180     git_bin = str(which(GIT_BINARY))
181     if not git_bin:
182         LOG.error("No git binary found")
183         return None
184
185     repo_url_parts = urlparse(project_config["git_clone_url"])
186     path_parts = repo_url_parts.path[1:].split("/", maxsplit=1)
187
188     repo_path: Path = work_path / path_parts[1].replace(".git", "")
189     cmd = [git_bin, "clone", "--depth", str(depth), project_config["git_clone_url"]]
190     cwd = work_path
191     if repo_path.exists() and rebase:
192         cmd = [git_bin, "pull", "--rebase"]
193         cwd = repo_path
194     elif repo_path.exists():
195         return repo_path
196
197     try:
198         _stdout, _stderr = await _gen_check_output(cmd, cwd=cwd)
199     except (asyncio.TimeoutError, CalledProcessError) as e:
200         LOG.error(f"Unable to git clone / pull {project_config['git_clone_url']}: {e}")
201         return None
202
203     return repo_path
204
205
206 def handle_PermissionError(
207     func: Callable, path: Path, exc: Tuple[Any, Any, Any]
208 ) -> None:
209     """
210     Handle PermissionError during shutil.rmtree.
211
212     This checks if the erroring function is either 'os.rmdir' or 'os.unlink', and that
213     the error was EACCES (i.e. Permission denied). If true, the path is set writable,
214     readable, and executable by everyone. Finally, it tries the error causing delete
215     operation again.
216
217     If the check is false, then the original error will be reraised as this function
218     can't handle it.
219     """
220     excvalue = exc[1]
221     LOG.debug(f"Handling {excvalue} from {func.__name__}... ")
222     if func in (os.rmdir, os.unlink) and excvalue.errno == errno.EACCES:
223         LOG.debug(f"Setting {path} writable, readable, and executable by everyone... ")
224         os.chmod(path, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO)  # chmod 0777
225         func(path)  # Try the error causing delete operation again
226     else:
227         raise
228
229
230 async def load_projects_queue(
231     config_path: Path,
232 ) -> Tuple[Dict[str, Any], asyncio.Queue]:
233     """Load project config and fill queue with all the project names"""
234     with config_path.open("r") as cfp:
235         config = json.load(cfp)
236
237     # TODO: Offer more options here
238     # e.g. Run on X random packages or specific sub list etc.
239     project_names = sorted(config["projects"].keys())
240     queue: asyncio.Queue = asyncio.Queue(maxsize=len(project_names))
241     for project in project_names:
242         await queue.put(project)
243
244     return config, queue
245
246
247 async def project_runner(
248     idx: int,
249     config: Dict[str, Any],
250     queue: asyncio.Queue,
251     work_path: Path,
252     results: Results,
253     long_checkouts: bool = False,
254     rebase: bool = False,
255     keep: bool = False,
256     no_diff: bool = False,
257 ) -> None:
258     """Check out project and run Black on it + record result"""
259     loop = asyncio.get_event_loop()
260     py_version = f"{version_info[0]}.{version_info[1]}"
261     while True:
262         try:
263             project_name = queue.get_nowait()
264         except asyncio.QueueEmpty:
265             LOG.debug(f"project_runner {idx} exiting")
266             return
267         LOG.debug(f"worker {idx} working on {project_name}")
268
269         project_config = config["projects"][project_name]
270
271         # Check if disabled by config
272         if "disabled" in project_config and project_config["disabled"]:
273             results.stats["disabled"] += 1
274             LOG.info(f"Skipping {project_name} as it's disabled via config")
275             continue
276
277         # Check if we should run on this version of Python
278         if (
279             "all" not in project_config["py_versions"]
280             and py_version not in project_config["py_versions"]
281         ):
282             results.stats["wrong_py_ver"] += 1
283             LOG.debug(f"Skipping {project_name} as it's not enabled for {py_version}")
284             continue
285
286         # Check if we're doing big projects / long checkouts
287         if not long_checkouts and project_config["long_checkout"]:
288             results.stats["skipped_long_checkout"] += 1
289             LOG.debug(f"Skipping {project_name} as it's configured as a long checkout")
290             continue
291
292         repo_path = await git_checkout_or_rebase(work_path, project_config, rebase)
293         if not repo_path:
294             continue
295         await black_run(repo_path, project_config, results, no_diff)
296
297         if not keep:
298             LOG.debug(f"Removing {repo_path}")
299             rmtree_partial = partial(
300                 rmtree, path=repo_path, onerror=handle_PermissionError
301             )
302             await loop.run_in_executor(None, rmtree_partial)
303
304         LOG.info(f"Finished {project_name}")
305
306
307 async def process_queue(
308     config_file: str,
309     work_path: Path,
310     workers: int,
311     keep: bool = False,
312     long_checkouts: bool = False,
313     rebase: bool = False,
314     no_diff: bool = False,
315 ) -> int:
316     """
317     Process the queue with X workers and evaluate results
318     - Success is guaged via the config "expect_formatting_changes"
319
320     Integer return equals the number of failed projects
321     """
322     results = Results()
323     results.stats["disabled"] = 0
324     results.stats["failed"] = 0
325     results.stats["skipped_long_checkout"] = 0
326     results.stats["success"] = 0
327     results.stats["wrong_py_ver"] = 0
328
329     config, queue = await load_projects_queue(Path(config_file))
330     project_count = queue.qsize()
331     s = "" if project_count == 1 else "s"
332     LOG.info(f"{project_count} project{s} to run Black over")
333     if project_count < 1:
334         return -1
335
336     s = "" if workers == 1 else "s"
337     LOG.debug(f"Using {workers} parallel worker{s} to run Black")
338     # Wait until we finish running all the projects before analyzing
339     await asyncio.gather(
340         *[
341             project_runner(
342                 i,
343                 config,
344                 queue,
345                 work_path,
346                 results,
347                 long_checkouts,
348                 rebase,
349                 keep,
350                 no_diff,
351             )
352             for i in range(workers)
353         ]
354     )
355
356     LOG.info("Analyzing results")
357     return analyze_results(project_count, results)
358
359
360 if __name__ == "__main__":  # pragma: nocover
361     raise NotImplementedError("lib is a library, funnily enough.")