]> git.madduck.net Git - etc/vim.git/blob - src/black_primer/lib.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

4f929f128ce1fd7b949441317f4b89e22bc1b4af
[etc/vim.git] / src / black_primer / lib.py
1 #!/usr/bin/env python3
2
3 import asyncio
4 import json
5 import logging
6 import sys
7 from pathlib import Path
8 from platform import system
9 from shutil import rmtree, which
10 from subprocess import CalledProcessError
11 from sys import version_info
12 from typing import Any, Dict, NamedTuple, Optional, Sequence, Tuple
13 from urllib.parse import urlparse
14
15 import click
16
17
18 WINDOWS = system() == "Windows"
19 BLACK_BINARY = "black.exe" if WINDOWS else "black"
20 GIT_BIANRY = "git.exe" if WINDOWS else "git"
21 LOG = logging.getLogger(__name__)
22
23
24 # Windows needs a ProactorEventLoop if you want to exec subprocesses
25 # Starting with 3.8 this is the default - can remove when Black >= 3.8
26 # mypy only respects sys.platform if directly in the evaluation
27 # https://mypy.readthedocs.io/en/latest/common_issues.html#python-version-and-system-platform-checks  # noqa: B950
28 if sys.platform == "win32":
29     asyncio.set_event_loop(asyncio.ProactorEventLoop())
30
31
32 class Results(NamedTuple):
33     stats: Dict[str, int] = {}
34     failed_projects: Dict[str, CalledProcessError] = {}
35
36
37 async def _gen_check_output(
38     cmd: Sequence[str],
39     timeout: float = 30,
40     env: Optional[Dict[str, str]] = None,
41     cwd: Optional[Path] = None,
42 ) -> Tuple[bytes, bytes]:
43     process = await asyncio.create_subprocess_exec(
44         *cmd,
45         stdout=asyncio.subprocess.PIPE,
46         stderr=asyncio.subprocess.STDOUT,
47         env=env,
48         cwd=cwd,
49     )
50     try:
51         (stdout, stderr) = await asyncio.wait_for(process.communicate(), timeout)
52     except asyncio.TimeoutError:
53         process.kill()
54         await process.wait()
55         raise
56
57     if process.returncode != 0:
58         cmd_str = " ".join(cmd)
59         raise CalledProcessError(
60             process.returncode, cmd_str, output=stdout, stderr=stderr
61         )
62
63     return (stdout, stderr)
64
65
66 def analyze_results(project_count: int, results: Results) -> int:
67     failed_pct = round(((results.stats["failed"] / project_count) * 100), 2)
68     success_pct = round(((results.stats["success"] / project_count) * 100), 2)
69
70     click.secho("-- primer results 📊 --\n", bold=True)
71     click.secho(
72         f"{results.stats['success']} / {project_count} succeeded ({success_pct}%) ✅",
73         bold=True,
74         fg="green",
75     )
76     click.secho(
77         f"{results.stats['failed']} / {project_count} FAILED ({failed_pct}%) 💩",
78         bold=bool(results.stats["failed"]),
79         fg="red",
80     )
81     s = "" if results.stats["disabled"] == 1 else "s"
82     click.echo(f" - {results.stats['disabled']} project{s} disabled by config")
83     s = "" if results.stats["wrong_py_ver"] == 1 else "s"
84     click.echo(
85         f" - {results.stats['wrong_py_ver']} project{s} skipped due to Python version"
86     )
87     click.echo(
88         f" - {results.stats['skipped_long_checkout']} skipped due to long checkout"
89     )
90
91     if results.failed_projects:
92         click.secho("\nFailed projects:\n", bold=True)
93
94     for project_name, project_cpe in results.failed_projects.items():
95         print(f"## {project_name}:")
96         print(f" - Returned {project_cpe.returncode}")
97         if project_cpe.stderr:
98             print(f" - stderr:\n{project_cpe.stderr.decode('utf8')}")
99         if project_cpe.stdout:
100             print(f" - stdout:\n{project_cpe.stdout.decode('utf8')}")
101         print("")
102
103     return results.stats["failed"]
104
105
106 async def black_run(
107     repo_path: Path, project_config: Dict[str, Any], results: Results
108 ) -> None:
109     """Run Black and record failures"""
110     cmd = [str(which(BLACK_BINARY))]
111     if "cli_arguments" in project_config and project_config["cli_arguments"]:
112         cmd.extend(*project_config["cli_arguments"])
113     cmd.extend(["--check", "--diff", "."])
114
115     try:
116         _stdout, _stderr = await _gen_check_output(cmd, cwd=repo_path)
117     except asyncio.TimeoutError:
118         results.stats["failed"] += 1
119         LOG.error(f"Running black for {repo_path} timed out ({cmd})")
120     except CalledProcessError as cpe:
121         # TODO: Tune for smarter for higher signal
122         # If any other return value than 1 we raise - can disable project in config
123         if cpe.returncode == 1:
124             if not project_config["expect_formatting_changes"]:
125                 results.stats["failed"] += 1
126                 results.failed_projects[repo_path.name] = cpe
127             else:
128                 results.stats["success"] += 1
129             return
130
131         LOG.error(f"Unknown error with {repo_path}")
132         raise
133
134     # If we get here and expect formatting changes something is up
135     if project_config["expect_formatting_changes"]:
136         results.stats["failed"] += 1
137         results.failed_projects[repo_path.name] = CalledProcessError(
138             0, cmd, b"Expected formatting changes but didn't get any!", b""
139         )
140         return
141
142     results.stats["success"] += 1
143
144
145 async def git_checkout_or_rebase(
146     work_path: Path,
147     project_config: Dict[str, Any],
148     rebase: bool = False,
149     *,
150     depth: int = 1,
151 ) -> Optional[Path]:
152     """git Clone project or rebase"""
153     git_bin = str(which(GIT_BIANRY))
154     if not git_bin:
155         LOG.error("No git binary found")
156         return None
157
158     repo_url_parts = urlparse(project_config["git_clone_url"])
159     path_parts = repo_url_parts.path[1:].split("/", maxsplit=1)
160
161     repo_path: Path = work_path / path_parts[1].replace(".git", "")
162     cmd = [git_bin, "clone", "--depth", str(depth), project_config["git_clone_url"]]
163     cwd = work_path
164     if repo_path.exists() and rebase:
165         cmd = [git_bin, "pull", "--rebase"]
166         cwd = repo_path
167     elif repo_path.exists():
168         return repo_path
169
170     try:
171         _stdout, _stderr = await _gen_check_output(cmd, cwd=cwd)
172     except (asyncio.TimeoutError, CalledProcessError) as e:
173         LOG.error(f"Unable to git clone / pull {project_config['git_clone_url']}: {e}")
174         return None
175
176     return repo_path
177
178
179 async def load_projects_queue(
180     config_path: Path,
181 ) -> Tuple[Dict[str, Any], asyncio.Queue]:
182     """Load project config and fill queue with all the project names"""
183     with config_path.open("r") as cfp:
184         config = json.load(cfp)
185
186     # TODO: Offer more options here
187     # e.g. Run on X random packages or specific sub list etc.
188     project_names = sorted(config["projects"].keys())
189     queue: asyncio.Queue = asyncio.Queue(maxsize=len(project_names))
190     for project in project_names:
191         await queue.put(project)
192
193     return config, queue
194
195
196 async def project_runner(
197     idx: int,
198     config: Dict[str, Any],
199     queue: asyncio.Queue,
200     work_path: Path,
201     results: Results,
202     long_checkouts: bool = False,
203     rebase: bool = False,
204     keep: bool = False,
205 ) -> None:
206     """Check out project and run Black on it + record result"""
207     loop = asyncio.get_event_loop()
208     py_version = f"{version_info[0]}.{version_info[1]}"
209     while True:
210         try:
211             project_name = queue.get_nowait()
212         except asyncio.QueueEmpty:
213             LOG.debug(f"project_runner {idx} exiting")
214             return
215
216         project_config = config["projects"][project_name]
217
218         # Check if disabled by config
219         if "disabled" in project_config and project_config["disabled"]:
220             results.stats["disabled"] += 1
221             LOG.info(f"Skipping {project_name} as it's disabled via config")
222             continue
223
224         # Check if we should run on this version of Python
225         if (
226             "all" not in project_config["py_versions"]
227             and py_version not in project_config["py_versions"]
228         ):
229             results.stats["wrong_py_ver"] += 1
230             LOG.debug(f"Skipping {project_name} as it's not enabled for {py_version}")
231             continue
232
233         # Check if we're doing big projects / long checkouts
234         if not long_checkouts and project_config["long_checkout"]:
235             results.stats["skipped_long_checkout"] += 1
236             LOG.debug(f"Skipping {project_name} as it's configured as a long checkout")
237             continue
238
239         repo_path = await git_checkout_or_rebase(work_path, project_config, rebase)
240         if not repo_path:
241             continue
242         await black_run(repo_path, project_config, results)
243
244         if not keep:
245             LOG.debug(f"Removing {repo_path}")
246             await loop.run_in_executor(None, rmtree, repo_path)
247
248
249 async def process_queue(
250     config_file: str,
251     work_path: Path,
252     workers: int,
253     keep: bool = False,
254     long_checkouts: bool = False,
255     rebase: bool = False,
256 ) -> int:
257     """
258     Process the queue with X workers and evaluate results
259     - Success is guaged via the config "expect_formatting_changes"
260
261     Integer return equals the number of failed projects
262     """
263     results = Results()
264     results.stats["disabled"] = 0
265     results.stats["failed"] = 0
266     results.stats["skipped_long_checkout"] = 0
267     results.stats["success"] = 0
268     results.stats["wrong_py_ver"] = 0
269
270     config, queue = await load_projects_queue(Path(config_file))
271     project_count = queue.qsize()
272     s = "" if project_count == 1 else "s"
273     LOG.info(f"{project_count} project{s} to run Black over")
274     if project_count < 1:
275         return -1
276
277     s = "" if workers == 1 else "s"
278     LOG.debug(f"Using {workers} parallel worker{s} to run Black")
279     # Wait until we finish running all the projects before analyzing
280     await asyncio.gather(
281         *[
282             project_runner(
283                 i, config, queue, work_path, results, long_checkouts, rebase, keep
284             )
285             for i in range(workers)
286         ]
287     )
288
289     LOG.info("Analyzing results")
290     return analyze_results(project_count, results)
291
292
293 if __name__ == "__main__":  # pragma: nocover
294     raise NotImplementedError("lib is a library, funnily enough.")