]> git.madduck.net Git - etc/vim.git/blob - src/black_primer/lib.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

Surface links to Stability Policy (GH-2848)
[etc/vim.git] / src / black_primer / lib.py
1 import asyncio
2 import errno
3 import json
4 import logging
5 import os
6 import stat
7 import sys
8 from functools import partial
9 from pathlib import Path
10 from platform import system
11 from shutil import rmtree, which
12 from subprocess import CalledProcessError
13 from sys import version_info
14 from tempfile import TemporaryDirectory
15 from typing import (
16     Any,
17     Callable,
18     Dict,
19     List,
20     NamedTuple,
21     Optional,
22     Sequence,
23     Tuple,
24     Union,
25 )
26 from urllib.parse import urlparse
27
28 import click
29
30
31 TEN_MINUTES_SECONDS = 600
32 WINDOWS = system() == "Windows"
33 BLACK_BINARY = "black.exe" if WINDOWS else "black"
34 GIT_BINARY = "git.exe" if WINDOWS else "git"
35 LOG = logging.getLogger(__name__)
36
37
38 # Windows needs a ProactorEventLoop if you want to exec subprocesses
39 # Starting with 3.8 this is the default - can remove when Black >= 3.8
40 # mypy only respects sys.platform if directly in the evaluation
41 # https://mypy.readthedocs.io/en/latest/common_issues.html#python-version-and-system-platform-checks  # noqa: B950
42 if sys.platform == "win32":
43     asyncio.set_event_loop(asyncio.ProactorEventLoop())
44
45
46 class Results(NamedTuple):
47     stats: Dict[str, int] = {}
48     failed_projects: Dict[str, CalledProcessError] = {}
49
50
51 async def _gen_check_output(
52     cmd: Sequence[str],
53     timeout: float = TEN_MINUTES_SECONDS,
54     env: Optional[Dict[str, str]] = None,
55     cwd: Optional[Path] = None,
56     stdin: Optional[bytes] = None,
57 ) -> Tuple[bytes, bytes]:
58     process = await asyncio.create_subprocess_exec(
59         *cmd,
60         stdin=asyncio.subprocess.PIPE,
61         stdout=asyncio.subprocess.PIPE,
62         stderr=asyncio.subprocess.STDOUT,
63         env=env,
64         cwd=cwd,
65     )
66     try:
67         (stdout, stderr) = await asyncio.wait_for(process.communicate(stdin), timeout)
68     except asyncio.TimeoutError:
69         process.kill()
70         await process.wait()
71         raise
72
73     # A non-optional timeout was supplied to asyncio.wait_for, guaranteeing
74     # a timeout or completed process.  A terminated Python process will have a
75     # non-empty returncode value.
76     assert process.returncode is not None
77
78     if process.returncode != 0:
79         cmd_str = " ".join(cmd)
80         raise CalledProcessError(
81             process.returncode, cmd_str, output=stdout, stderr=stderr
82         )
83
84     return (stdout, stderr)
85
86
87 def analyze_results(project_count: int, results: Results) -> int:
88     failed_pct = round(((results.stats["failed"] / project_count) * 100), 2)
89     success_pct = round(((results.stats["success"] / project_count) * 100), 2)
90
91     if results.failed_projects:
92         click.secho("\nFailed projects:\n", bold=True)
93
94     for project_name, project_cpe in results.failed_projects.items():
95         print(f"## {project_name}:")
96         print(f" - Returned {project_cpe.returncode}")
97         if project_cpe.stderr:
98             print(f" - stderr:\n{project_cpe.stderr.decode('utf8')}")
99         if project_cpe.stdout:
100             print(f" - stdout:\n{project_cpe.stdout.decode('utf8')}")
101         print("")
102
103     click.secho("-- primer results 📊 --\n", bold=True)
104     click.secho(
105         f"{results.stats['success']} / {project_count} succeeded ({success_pct}%) ✅",
106         bold=True,
107         fg="green",
108     )
109     click.secho(
110         f"{results.stats['failed']} / {project_count} FAILED ({failed_pct}%) 💩",
111         bold=bool(results.stats["failed"]),
112         fg="red",
113     )
114     s = "" if results.stats["disabled"] == 1 else "s"
115     click.echo(f" - {results.stats['disabled']} project{s} disabled by config")
116     s = "" if results.stats["wrong_py_ver"] == 1 else "s"
117     click.echo(
118         f" - {results.stats['wrong_py_ver']} project{s} skipped due to Python version"
119     )
120     click.echo(
121         f" - {results.stats['skipped_long_checkout']} skipped due to long checkout"
122     )
123
124     if results.failed_projects:
125         failed = ", ".join(results.failed_projects.keys())
126         click.secho(f"\nFailed projects: {failed}\n", bold=True)
127
128     return results.stats["failed"]
129
130
131 def _flatten_cli_args(cli_args: List[Union[Sequence[str], str]]) -> List[str]:
132     """Allow a user to put long arguments into a list of strs
133     to make the JSON human readable"""
134     flat_args = []
135     for arg in cli_args:
136         if isinstance(arg, str):
137             flat_args.append(arg)
138             continue
139
140         args_as_str = "".join(arg)
141         flat_args.append(args_as_str)
142
143     return flat_args
144
145
146 async def black_run(
147     project_name: str,
148     repo_path: Optional[Path],
149     project_config: Dict[str, Any],
150     results: Results,
151     no_diff: bool = False,
152 ) -> None:
153     """Run Black and record failures"""
154     if not repo_path:
155         results.stats["failed"] += 1
156         results.failed_projects[project_name] = CalledProcessError(
157             69, [], f"{project_name} has no repo_path: {repo_path}".encode(), b""
158         )
159         return
160
161     stdin_test = project_name.upper() == "STDIN"
162     cmd = [str(which(BLACK_BINARY))]
163     if "cli_arguments" in project_config and project_config["cli_arguments"]:
164         cmd.extend(_flatten_cli_args(project_config["cli_arguments"]))
165     cmd.append("--check")
166     if not no_diff:
167         cmd.append("--diff")
168
169     # Workout if we should read in a python file or search from cwd
170     stdin = None
171     if stdin_test:
172         cmd.append("-")
173         stdin = repo_path.read_bytes()
174     elif "base_path" in project_config:
175         cmd.append(project_config["base_path"])
176     else:
177         cmd.append(".")
178
179     timeout = (
180         project_config["timeout_seconds"]
181         if "timeout_seconds" in project_config
182         else TEN_MINUTES_SECONDS
183     )
184     with TemporaryDirectory() as tmp_path:
185         # Prevent reading top-level user configs by manipulating environment variables
186         env = {
187             **os.environ,
188             "XDG_CONFIG_HOME": tmp_path,  # Unix-like
189             "USERPROFILE": tmp_path,  # Windows (changes `Path.home()` output)
190         }
191
192         cwd_path = repo_path.parent if stdin_test else repo_path
193         try:
194             LOG.debug(f"Running black for {project_name}: {' '.join(cmd)}")
195             _stdout, _stderr = await _gen_check_output(
196                 cmd, cwd=cwd_path, env=env, stdin=stdin, timeout=timeout
197             )
198         except asyncio.TimeoutError:
199             results.stats["failed"] += 1
200             LOG.error(f"Running black for {repo_path} timed out ({cmd})")
201         except CalledProcessError as cpe:
202             # TODO: Tune for smarter for higher signal
203             # If any other return value than 1 we raise - can disable project in config
204             if cpe.returncode == 1:
205                 if not project_config["expect_formatting_changes"]:
206                     results.stats["failed"] += 1
207                     results.failed_projects[repo_path.name] = cpe
208                 else:
209                     results.stats["success"] += 1
210                 return
211             elif cpe.returncode > 1:
212                 results.stats["failed"] += 1
213                 results.failed_projects[repo_path.name] = cpe
214                 return
215
216             LOG.error(f"Unknown error with {repo_path}")
217             raise
218
219     # If we get here and expect formatting changes something is up
220     if project_config["expect_formatting_changes"]:
221         results.stats["failed"] += 1
222         results.failed_projects[repo_path.name] = CalledProcessError(
223             0, cmd, b"Expected formatting changes but didn't get any!", b""
224         )
225         return
226
227     results.stats["success"] += 1
228
229
230 async def git_checkout_or_rebase(
231     work_path: Path,
232     project_config: Dict[str, Any],
233     rebase: bool = False,
234     *,
235     depth: int = 1,
236 ) -> Optional[Path]:
237     """git Clone project or rebase"""
238     git_bin = str(which(GIT_BINARY))
239     if not git_bin:
240         LOG.error("No git binary found")
241         return None
242
243     repo_url_parts = urlparse(project_config["git_clone_url"])
244     path_parts = repo_url_parts.path[1:].split("/", maxsplit=1)
245
246     repo_path: Path = work_path / path_parts[1].replace(".git", "")
247     cmd = [git_bin, "clone", "--depth", str(depth), project_config["git_clone_url"]]
248     cwd = work_path
249     if repo_path.exists() and rebase:
250         cmd = [git_bin, "pull", "--rebase"]
251         cwd = repo_path
252     elif repo_path.exists():
253         return repo_path
254
255     try:
256         _stdout, _stderr = await _gen_check_output(cmd, cwd=cwd)
257     except (asyncio.TimeoutError, CalledProcessError) as e:
258         LOG.error(f"Unable to git clone / pull {project_config['git_clone_url']}: {e}")
259         return None
260
261     return repo_path
262
263
264 def handle_PermissionError(
265     func: Callable[..., None], path: Path, exc: Tuple[Any, Any, Any]
266 ) -> None:
267     """
268     Handle PermissionError during shutil.rmtree.
269
270     This checks if the erroring function is either 'os.rmdir' or 'os.unlink', and that
271     the error was EACCES (i.e. Permission denied). If true, the path is set writable,
272     readable, and executable by everyone. Finally, it tries the error causing delete
273     operation again.
274
275     If the check is false, then the original error will be reraised as this function
276     can't handle it.
277     """
278     excvalue = exc[1]
279     LOG.debug(f"Handling {excvalue} from {func.__name__}... ")
280     if func in (os.rmdir, os.unlink) and excvalue.errno == errno.EACCES:
281         LOG.debug(f"Setting {path} writable, readable, and executable by everyone... ")
282         os.chmod(path, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO)  # chmod 0777
283         func(path)  # Try the error causing delete operation again
284     else:
285         raise
286
287
288 async def load_projects_queue(
289     config_path: Path,
290     projects_to_run: List[str],
291 ) -> Tuple[Dict[str, Any], asyncio.Queue]:
292     """Load project config and fill queue with all the project names"""
293     with config_path.open("r") as cfp:
294         config = json.load(cfp)
295
296     # TODO: Offer more options here
297     # e.g. Run on X random packages etc.
298     queue: asyncio.Queue = asyncio.Queue(maxsize=len(projects_to_run))
299     for project in projects_to_run:
300         await queue.put(project)
301
302     return config, queue
303
304
305 async def project_runner(
306     idx: int,
307     config: Dict[str, Any],
308     queue: asyncio.Queue,
309     work_path: Path,
310     results: Results,
311     long_checkouts: bool = False,
312     rebase: bool = False,
313     keep: bool = False,
314     no_diff: bool = False,
315 ) -> None:
316     """Check out project and run Black on it + record result"""
317     loop = asyncio.get_event_loop()
318     py_version = f"{version_info[0]}.{version_info[1]}"
319     while True:
320         try:
321             project_name = queue.get_nowait()
322         except asyncio.QueueEmpty:
323             LOG.debug(f"project_runner {idx} exiting")
324             return
325         LOG.debug(f"worker {idx} working on {project_name}")
326
327         project_config = config["projects"][project_name]
328
329         # Check if disabled by config
330         if "disabled" in project_config and project_config["disabled"]:
331             results.stats["disabled"] += 1
332             LOG.info(f"Skipping {project_name} as it's disabled via config")
333             continue
334
335         # Check if we should run on this version of Python
336         if (
337             "all" not in project_config["py_versions"]
338             and py_version not in project_config["py_versions"]
339         ):
340             results.stats["wrong_py_ver"] += 1
341             LOG.debug(f"Skipping {project_name} as it's not enabled for {py_version}")
342             continue
343
344         # Check if we're doing big projects / long checkouts
345         if not long_checkouts and project_config["long_checkout"]:
346             results.stats["skipped_long_checkout"] += 1
347             LOG.debug(f"Skipping {project_name} as it's configured as a long checkout")
348             continue
349
350         repo_path: Optional[Path] = Path(__file__)
351         stdin_project = project_name.upper() == "STDIN"
352         if not stdin_project:
353             repo_path = await git_checkout_or_rebase(work_path, project_config, rebase)
354             if not repo_path:
355                 continue
356         await black_run(project_name, repo_path, project_config, results, no_diff)
357
358         if not keep and not stdin_project:
359             LOG.debug(f"Removing {repo_path}")
360             rmtree_partial = partial(
361                 rmtree, path=repo_path, onerror=handle_PermissionError
362             )
363             await loop.run_in_executor(None, rmtree_partial)
364
365         LOG.info(f"Finished {project_name}")
366
367
368 async def process_queue(
369     config_file: str,
370     work_path: Path,
371     workers: int,
372     projects_to_run: List[str],
373     keep: bool = False,
374     long_checkouts: bool = False,
375     rebase: bool = False,
376     no_diff: bool = False,
377 ) -> int:
378     """
379     Process the queue with X workers and evaluate results
380     - Success is guaged via the config "expect_formatting_changes"
381
382     Integer return equals the number of failed projects
383     """
384     results = Results()
385     results.stats["disabled"] = 0
386     results.stats["failed"] = 0
387     results.stats["skipped_long_checkout"] = 0
388     results.stats["success"] = 0
389     results.stats["wrong_py_ver"] = 0
390
391     config, queue = await load_projects_queue(Path(config_file), projects_to_run)
392     project_count = queue.qsize()
393     s = "" if project_count == 1 else "s"
394     LOG.info(f"{project_count} project{s} to run Black over")
395     if project_count < 1:
396         return -1
397
398     s = "" if workers == 1 else "s"
399     LOG.debug(f"Using {workers} parallel worker{s} to run Black")
400     # Wait until we finish running all the projects before analyzing
401     await asyncio.gather(
402         *[
403             project_runner(
404                 i,
405                 config,
406                 queue,
407                 work_path,
408                 results,
409                 long_checkouts,
410                 rebase,
411                 keep,
412                 no_diff,
413             )
414             for i in range(workers)
415         ]
416     )
417
418     LOG.info("Analyzing results")
419     return analyze_results(project_count, results)
420
421
422 if __name__ == "__main__":  # pragma: nocover
423     raise NotImplementedError("lib is a library, funnily enough.")