]> git.madduck.net Git - etc/vim.git/blob - src/black_primer/lib.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

Document jupyter hook (#2416)
[etc/vim.git] / src / black_primer / lib.py
1 import asyncio
2 import errno
3 import json
4 import logging
5 import os
6 import stat
7 import sys
8 from functools import partial
9 from pathlib import Path
10 from platform import system
11 from shutil import rmtree, which
12 from subprocess import CalledProcessError
13 from sys import version_info
14 from tempfile import TemporaryDirectory
15 from typing import (
16     Any,
17     Callable,
18     Dict,
19     List,
20     NamedTuple,
21     Optional,
22     Sequence,
23     Tuple,
24     Union,
25 )
26 from urllib.parse import urlparse
27
28 import click
29
30
31 TEN_MINUTES_SECONDS = 600
32 WINDOWS = system() == "Windows"
33 BLACK_BINARY = "black.exe" if WINDOWS else "black"
34 GIT_BINARY = "git.exe" if WINDOWS else "git"
35 LOG = logging.getLogger(__name__)
36
37
38 # Windows needs a ProactorEventLoop if you want to exec subprocesses
39 # Starting with 3.8 this is the default - can remove when Black >= 3.8
40 # mypy only respects sys.platform if directly in the evaluation
41 # https://mypy.readthedocs.io/en/latest/common_issues.html#python-version-and-system-platform-checks  # noqa: B950
42 if sys.platform == "win32":
43     asyncio.set_event_loop(asyncio.ProactorEventLoop())
44
45
46 class Results(NamedTuple):
47     stats: Dict[str, int] = {}
48     failed_projects: Dict[str, CalledProcessError] = {}
49
50
51 async def _gen_check_output(
52     cmd: Sequence[str],
53     timeout: float = TEN_MINUTES_SECONDS,
54     env: Optional[Dict[str, str]] = None,
55     cwd: Optional[Path] = None,
56     stdin: Optional[bytes] = None,
57 ) -> Tuple[bytes, bytes]:
58     process = await asyncio.create_subprocess_exec(
59         *cmd,
60         stdin=asyncio.subprocess.PIPE,
61         stdout=asyncio.subprocess.PIPE,
62         stderr=asyncio.subprocess.STDOUT,
63         env=env,
64         cwd=cwd,
65     )
66     try:
67         (stdout, stderr) = await asyncio.wait_for(process.communicate(stdin), timeout)
68     except asyncio.TimeoutError:
69         process.kill()
70         await process.wait()
71         raise
72
73     # A non-optional timeout was supplied to asyncio.wait_for, guaranteeing
74     # a timeout or completed process.  A terminated Python process will have a
75     # non-empty returncode value.
76     assert process.returncode is not None
77
78     if process.returncode != 0:
79         cmd_str = " ".join(cmd)
80         raise CalledProcessError(
81             process.returncode, cmd_str, output=stdout, stderr=stderr
82         )
83
84     return (stdout, stderr)
85
86
87 def analyze_results(project_count: int, results: Results) -> int:
88     failed_pct = round(((results.stats["failed"] / project_count) * 100), 2)
89     success_pct = round(((results.stats["success"] / project_count) * 100), 2)
90
91     click.secho("-- primer results 📊 --\n", bold=True)
92     click.secho(
93         f"{results.stats['success']} / {project_count} succeeded ({success_pct}%) ✅",
94         bold=True,
95         fg="green",
96     )
97     click.secho(
98         f"{results.stats['failed']} / {project_count} FAILED ({failed_pct}%) 💩",
99         bold=bool(results.stats["failed"]),
100         fg="red",
101     )
102     s = "" if results.stats["disabled"] == 1 else "s"
103     click.echo(f" - {results.stats['disabled']} project{s} disabled by config")
104     s = "" if results.stats["wrong_py_ver"] == 1 else "s"
105     click.echo(
106         f" - {results.stats['wrong_py_ver']} project{s} skipped due to Python version"
107     )
108     click.echo(
109         f" - {results.stats['skipped_long_checkout']} skipped due to long checkout"
110     )
111
112     if results.failed_projects:
113         click.secho("\nFailed projects:\n", bold=True)
114
115     for project_name, project_cpe in results.failed_projects.items():
116         print(f"## {project_name}:")
117         print(f" - Returned {project_cpe.returncode}")
118         if project_cpe.stderr:
119             print(f" - stderr:\n{project_cpe.stderr.decode('utf8')}")
120         if project_cpe.stdout:
121             print(f" - stdout:\n{project_cpe.stdout.decode('utf8')}")
122         print("")
123
124     return results.stats["failed"]
125
126
127 def _flatten_cli_args(cli_args: List[Union[Sequence[str], str]]) -> List[str]:
128     """Allow a user to put long arguments into a list of strs
129     to make the JSON human readable"""
130     flat_args = []
131     for arg in cli_args:
132         if isinstance(arg, str):
133             flat_args.append(arg)
134             continue
135
136         args_as_str = "".join(arg)
137         flat_args.append(args_as_str)
138
139     return flat_args
140
141
142 async def black_run(
143     project_name: str,
144     repo_path: Optional[Path],
145     project_config: Dict[str, Any],
146     results: Results,
147     no_diff: bool = False,
148 ) -> None:
149     """Run Black and record failures"""
150     if not repo_path:
151         results.stats["failed"] += 1
152         results.failed_projects[project_name] = CalledProcessError(
153             69, [], f"{project_name} has no repo_path: {repo_path}".encode(), b""
154         )
155         return
156
157     stdin_test = project_name.upper() == "STDIN"
158     cmd = [str(which(BLACK_BINARY))]
159     if "cli_arguments" in project_config and project_config["cli_arguments"]:
160         cmd.extend(_flatten_cli_args(project_config["cli_arguments"]))
161     cmd.append("--check")
162     if not no_diff:
163         cmd.append("--diff")
164
165     # Workout if we should read in a python file or search from cwd
166     stdin = None
167     if stdin_test:
168         cmd.append("-")
169         stdin = repo_path.read_bytes()
170     elif "base_path" in project_config:
171         cmd.append(project_config["base_path"])
172     else:
173         cmd.append(".")
174
175     timeout = (
176         project_config["timeout_seconds"]
177         if "timeout_seconds" in project_config
178         else TEN_MINUTES_SECONDS
179     )
180     with TemporaryDirectory() as tmp_path:
181         # Prevent reading top-level user configs by manipulating environment variables
182         env = {
183             **os.environ,
184             "XDG_CONFIG_HOME": tmp_path,  # Unix-like
185             "USERPROFILE": tmp_path,  # Windows (changes `Path.home()` output)
186         }
187
188         cwd_path = repo_path.parent if stdin_test else repo_path
189         try:
190             LOG.debug(f"Running black for {project_name}: {' '.join(cmd)}")
191             _stdout, _stderr = await _gen_check_output(
192                 cmd, cwd=cwd_path, env=env, stdin=stdin, timeout=timeout
193             )
194         except asyncio.TimeoutError:
195             results.stats["failed"] += 1
196             LOG.error(f"Running black for {repo_path} timed out ({cmd})")
197         except CalledProcessError as cpe:
198             # TODO: Tune for smarter for higher signal
199             # If any other return value than 1 we raise - can disable project in config
200             if cpe.returncode == 1:
201                 if not project_config["expect_formatting_changes"]:
202                     results.stats["failed"] += 1
203                     results.failed_projects[repo_path.name] = cpe
204                 else:
205                     results.stats["success"] += 1
206                 return
207             elif cpe.returncode > 1:
208                 results.stats["failed"] += 1
209                 results.failed_projects[repo_path.name] = cpe
210                 return
211
212             LOG.error(f"Unknown error with {repo_path}")
213             raise
214
215     # If we get here and expect formatting changes something is up
216     if project_config["expect_formatting_changes"]:
217         results.stats["failed"] += 1
218         results.failed_projects[repo_path.name] = CalledProcessError(
219             0, cmd, b"Expected formatting changes but didn't get any!", b""
220         )
221         return
222
223     results.stats["success"] += 1
224
225
226 async def git_checkout_or_rebase(
227     work_path: Path,
228     project_config: Dict[str, Any],
229     rebase: bool = False,
230     *,
231     depth: int = 1,
232 ) -> Optional[Path]:
233     """git Clone project or rebase"""
234     git_bin = str(which(GIT_BINARY))
235     if not git_bin:
236         LOG.error("No git binary found")
237         return None
238
239     repo_url_parts = urlparse(project_config["git_clone_url"])
240     path_parts = repo_url_parts.path[1:].split("/", maxsplit=1)
241
242     repo_path: Path = work_path / path_parts[1].replace(".git", "")
243     cmd = [git_bin, "clone", "--depth", str(depth), project_config["git_clone_url"]]
244     cwd = work_path
245     if repo_path.exists() and rebase:
246         cmd = [git_bin, "pull", "--rebase"]
247         cwd = repo_path
248     elif repo_path.exists():
249         return repo_path
250
251     try:
252         _stdout, _stderr = await _gen_check_output(cmd, cwd=cwd)
253     except (asyncio.TimeoutError, CalledProcessError) as e:
254         LOG.error(f"Unable to git clone / pull {project_config['git_clone_url']}: {e}")
255         return None
256
257     return repo_path
258
259
260 def handle_PermissionError(
261     func: Callable, path: Path, exc: Tuple[Any, Any, Any]
262 ) -> None:
263     """
264     Handle PermissionError during shutil.rmtree.
265
266     This checks if the erroring function is either 'os.rmdir' or 'os.unlink', and that
267     the error was EACCES (i.e. Permission denied). If true, the path is set writable,
268     readable, and executable by everyone. Finally, it tries the error causing delete
269     operation again.
270
271     If the check is false, then the original error will be reraised as this function
272     can't handle it.
273     """
274     excvalue = exc[1]
275     LOG.debug(f"Handling {excvalue} from {func.__name__}... ")
276     if func in (os.rmdir, os.unlink) and excvalue.errno == errno.EACCES:
277         LOG.debug(f"Setting {path} writable, readable, and executable by everyone... ")
278         os.chmod(path, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO)  # chmod 0777
279         func(path)  # Try the error causing delete operation again
280     else:
281         raise
282
283
284 async def load_projects_queue(
285     config_path: Path,
286 ) -> Tuple[Dict[str, Any], asyncio.Queue]:
287     """Load project config and fill queue with all the project names"""
288     with config_path.open("r") as cfp:
289         config = json.load(cfp)
290
291     # TODO: Offer more options here
292     # e.g. Run on X random packages or specific sub list etc.
293     project_names = sorted(config["projects"].keys())
294     queue: asyncio.Queue = asyncio.Queue(maxsize=len(project_names))
295     for project in project_names:
296         await queue.put(project)
297
298     return config, queue
299
300
301 async def project_runner(
302     idx: int,
303     config: Dict[str, Any],
304     queue: asyncio.Queue,
305     work_path: Path,
306     results: Results,
307     long_checkouts: bool = False,
308     rebase: bool = False,
309     keep: bool = False,
310     no_diff: bool = False,
311 ) -> None:
312     """Check out project and run Black on it + record result"""
313     loop = asyncio.get_event_loop()
314     py_version = f"{version_info[0]}.{version_info[1]}"
315     while True:
316         try:
317             project_name = queue.get_nowait()
318         except asyncio.QueueEmpty:
319             LOG.debug(f"project_runner {idx} exiting")
320             return
321         LOG.debug(f"worker {idx} working on {project_name}")
322
323         project_config = config["projects"][project_name]
324
325         # Check if disabled by config
326         if "disabled" in project_config and project_config["disabled"]:
327             results.stats["disabled"] += 1
328             LOG.info(f"Skipping {project_name} as it's disabled via config")
329             continue
330
331         # Check if we should run on this version of Python
332         if (
333             "all" not in project_config["py_versions"]
334             and py_version not in project_config["py_versions"]
335         ):
336             results.stats["wrong_py_ver"] += 1
337             LOG.debug(f"Skipping {project_name} as it's not enabled for {py_version}")
338             continue
339
340         # Check if we're doing big projects / long checkouts
341         if not long_checkouts and project_config["long_checkout"]:
342             results.stats["skipped_long_checkout"] += 1
343             LOG.debug(f"Skipping {project_name} as it's configured as a long checkout")
344             continue
345
346         repo_path: Optional[Path] = Path(__file__)
347         stdin_project = project_name.upper() == "STDIN"
348         if not stdin_project:
349             repo_path = await git_checkout_or_rebase(work_path, project_config, rebase)
350             if not repo_path:
351                 continue
352         await black_run(project_name, repo_path, project_config, results, no_diff)
353
354         if not keep and not stdin_project:
355             LOG.debug(f"Removing {repo_path}")
356             rmtree_partial = partial(
357                 rmtree, path=repo_path, onerror=handle_PermissionError
358             )
359             await loop.run_in_executor(None, rmtree_partial)
360
361         LOG.info(f"Finished {project_name}")
362
363
364 async def process_queue(
365     config_file: str,
366     work_path: Path,
367     workers: int,
368     keep: bool = False,
369     long_checkouts: bool = False,
370     rebase: bool = False,
371     no_diff: bool = False,
372 ) -> int:
373     """
374     Process the queue with X workers and evaluate results
375     - Success is guaged via the config "expect_formatting_changes"
376
377     Integer return equals the number of failed projects
378     """
379     results = Results()
380     results.stats["disabled"] = 0
381     results.stats["failed"] = 0
382     results.stats["skipped_long_checkout"] = 0
383     results.stats["success"] = 0
384     results.stats["wrong_py_ver"] = 0
385
386     config, queue = await load_projects_queue(Path(config_file))
387     project_count = queue.qsize()
388     s = "" if project_count == 1 else "s"
389     LOG.info(f"{project_count} project{s} to run Black over")
390     if project_count < 1:
391         return -1
392
393     s = "" if workers == 1 else "s"
394     LOG.debug(f"Using {workers} parallel worker{s} to run Black")
395     # Wait until we finish running all the projects before analyzing
396     await asyncio.gather(
397         *[
398             project_runner(
399                 i,
400                 config,
401                 queue,
402                 work_path,
403                 results,
404                 long_checkouts,
405                 rebase,
406                 keep,
407                 no_diff,
408             )
409             for i in range(workers)
410         ]
411     )
412
413     LOG.info("Analyzing results")
414     return analyze_results(project_count, results)
415
416
417 if __name__ == "__main__":  # pragma: nocover
418     raise NotImplementedError("lib is a library, funnily enough.")