]> git.madduck.net Git - etc/vim.git/blob - src/black_primer/lib.py

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

Elaborate on what AST changes Black might perform
[etc/vim.git] / src / black_primer / lib.py
1 import asyncio
2 import errno
3 import json
4 import logging
5 import os
6 import stat
7 import sys
8 from functools import partial
9 from pathlib import Path
10 from platform import system
11 from shutil import rmtree, which
12 from subprocess import CalledProcessError
13 from sys import version_info
14 from tempfile import TemporaryDirectory
15 from typing import Any, Callable, Dict, NamedTuple, Optional, Sequence, Tuple
16 from urllib.parse import urlparse
17
18 import click
19
20
21 WINDOWS = system() == "Windows"
22 BLACK_BINARY = "black.exe" if WINDOWS else "black"
23 GIT_BINARY = "git.exe" if WINDOWS else "git"
24 LOG = logging.getLogger(__name__)
25
26
27 # Windows needs a ProactorEventLoop if you want to exec subprocesses
28 # Starting with 3.8 this is the default - can remove when Black >= 3.8
29 # mypy only respects sys.platform if directly in the evaluation
30 # https://mypy.readthedocs.io/en/latest/common_issues.html#python-version-and-system-platform-checks  # noqa: B950
31 if sys.platform == "win32":
32     asyncio.set_event_loop(asyncio.ProactorEventLoop())
33
34
35 class Results(NamedTuple):
36     stats: Dict[str, int] = {}
37     failed_projects: Dict[str, CalledProcessError] = {}
38
39
40 async def _gen_check_output(
41     cmd: Sequence[str],
42     timeout: float = 300,
43     env: Optional[Dict[str, str]] = None,
44     cwd: Optional[Path] = None,
45 ) -> Tuple[bytes, bytes]:
46     process = await asyncio.create_subprocess_exec(
47         *cmd,
48         stdout=asyncio.subprocess.PIPE,
49         stderr=asyncio.subprocess.STDOUT,
50         env=env,
51         cwd=cwd,
52     )
53     try:
54         (stdout, stderr) = await asyncio.wait_for(process.communicate(), timeout)
55     except asyncio.TimeoutError:
56         process.kill()
57         await process.wait()
58         raise
59
60     # A non-optional timeout was supplied to asyncio.wait_for, guaranteeing
61     # a timeout or completed process.  A terminated Python process will have a
62     # non-empty returncode value.
63     assert process.returncode is not None
64
65     if process.returncode != 0:
66         cmd_str = " ".join(cmd)
67         raise CalledProcessError(
68             process.returncode, cmd_str, output=stdout, stderr=stderr
69         )
70
71     return (stdout, stderr)
72
73
74 def analyze_results(project_count: int, results: Results) -> int:
75     failed_pct = round(((results.stats["failed"] / project_count) * 100), 2)
76     success_pct = round(((results.stats["success"] / project_count) * 100), 2)
77
78     click.secho("-- primer results 📊 --\n", bold=True)
79     click.secho(
80         f"{results.stats['success']} / {project_count} succeeded ({success_pct}%) ✅",
81         bold=True,
82         fg="green",
83     )
84     click.secho(
85         f"{results.stats['failed']} / {project_count} FAILED ({failed_pct}%) 💩",
86         bold=bool(results.stats["failed"]),
87         fg="red",
88     )
89     s = "" if results.stats["disabled"] == 1 else "s"
90     click.echo(f" - {results.stats['disabled']} project{s} disabled by config")
91     s = "" if results.stats["wrong_py_ver"] == 1 else "s"
92     click.echo(
93         f" - {results.stats['wrong_py_ver']} project{s} skipped due to Python version"
94     )
95     click.echo(
96         f" - {results.stats['skipped_long_checkout']} skipped due to long checkout"
97     )
98
99     if results.failed_projects:
100         click.secho("\nFailed projects:\n", bold=True)
101
102     for project_name, project_cpe in results.failed_projects.items():
103         print(f"## {project_name}:")
104         print(f" - Returned {project_cpe.returncode}")
105         if project_cpe.stderr:
106             print(f" - stderr:\n{project_cpe.stderr.decode('utf8')}")
107         if project_cpe.stdout:
108             print(f" - stdout:\n{project_cpe.stdout.decode('utf8')}")
109         print("")
110
111     return results.stats["failed"]
112
113
114 async def black_run(
115     repo_path: Path, project_config: Dict[str, Any], results: Results
116 ) -> None:
117     """Run Black and record failures"""
118     cmd = [str(which(BLACK_BINARY))]
119     if "cli_arguments" in project_config and project_config["cli_arguments"]:
120         cmd.extend(*project_config["cli_arguments"])
121     cmd.extend(["--check", "--diff", "."])
122
123     with TemporaryDirectory() as tmp_path:
124         # Prevent reading top-level user configs by manipulating envionment variables
125         env = {
126             **os.environ,
127             "XDG_CONFIG_HOME": tmp_path,  # Unix-like
128             "USERPROFILE": tmp_path,  # Windows (changes `Path.home()` output)
129         }
130
131         try:
132             _stdout, _stderr = await _gen_check_output(cmd, cwd=repo_path, env=env)
133         except asyncio.TimeoutError:
134             results.stats["failed"] += 1
135             LOG.error(f"Running black for {repo_path} timed out ({cmd})")
136         except CalledProcessError as cpe:
137             # TODO: Tune for smarter for higher signal
138             # If any other return value than 1 we raise - can disable project in config
139             if cpe.returncode == 1:
140                 if not project_config["expect_formatting_changes"]:
141                     results.stats["failed"] += 1
142                     results.failed_projects[repo_path.name] = cpe
143                 else:
144                     results.stats["success"] += 1
145                 return
146             elif cpe.returncode > 1:
147                 results.stats["failed"] += 1
148                 results.failed_projects[repo_path.name] = cpe
149                 return
150
151             LOG.error(f"Unknown error with {repo_path}")
152             raise
153
154     # If we get here and expect formatting changes something is up
155     if project_config["expect_formatting_changes"]:
156         results.stats["failed"] += 1
157         results.failed_projects[repo_path.name] = CalledProcessError(
158             0, cmd, b"Expected formatting changes but didn't get any!", b""
159         )
160         return
161
162     results.stats["success"] += 1
163
164
165 async def git_checkout_or_rebase(
166     work_path: Path,
167     project_config: Dict[str, Any],
168     rebase: bool = False,
169     *,
170     depth: int = 1,
171 ) -> Optional[Path]:
172     """git Clone project or rebase"""
173     git_bin = str(which(GIT_BINARY))
174     if not git_bin:
175         LOG.error("No git binary found")
176         return None
177
178     repo_url_parts = urlparse(project_config["git_clone_url"])
179     path_parts = repo_url_parts.path[1:].split("/", maxsplit=1)
180
181     repo_path: Path = work_path / path_parts[1].replace(".git", "")
182     cmd = [git_bin, "clone", "--depth", str(depth), project_config["git_clone_url"]]
183     cwd = work_path
184     if repo_path.exists() and rebase:
185         cmd = [git_bin, "pull", "--rebase"]
186         cwd = repo_path
187     elif repo_path.exists():
188         return repo_path
189
190     try:
191         _stdout, _stderr = await _gen_check_output(cmd, cwd=cwd)
192     except (asyncio.TimeoutError, CalledProcessError) as e:
193         LOG.error(f"Unable to git clone / pull {project_config['git_clone_url']}: {e}")
194         return None
195
196     return repo_path
197
198
199 def handle_PermissionError(
200     func: Callable, path: Path, exc: Tuple[Any, Any, Any]
201 ) -> None:
202     """
203     Handle PermissionError during shutil.rmtree.
204
205     This checks if the erroring function is either 'os.rmdir' or 'os.unlink', and that
206     the error was EACCES (i.e. Permission denied). If true, the path is set writable,
207     readable, and executable by everyone. Finally, it tries the error causing delete
208     operation again.
209
210     If the check is false, then the original error will be reraised as this function
211     can't handle it.
212     """
213     excvalue = exc[1]
214     LOG.debug(f"Handling {excvalue} from {func.__name__}... ")
215     if func in (os.rmdir, os.unlink) and excvalue.errno == errno.EACCES:
216         LOG.debug(f"Setting {path} writable, readable, and executable by everyone... ")
217         os.chmod(path, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO)  # chmod 0777
218         func(path)  # Try the error causing delete operation again
219     else:
220         raise
221
222
223 async def load_projects_queue(
224     config_path: Path,
225 ) -> Tuple[Dict[str, Any], asyncio.Queue]:
226     """Load project config and fill queue with all the project names"""
227     with config_path.open("r") as cfp:
228         config = json.load(cfp)
229
230     # TODO: Offer more options here
231     # e.g. Run on X random packages or specific sub list etc.
232     project_names = sorted(config["projects"].keys())
233     queue: asyncio.Queue = asyncio.Queue(maxsize=len(project_names))
234     for project in project_names:
235         await queue.put(project)
236
237     return config, queue
238
239
240 async def project_runner(
241     idx: int,
242     config: Dict[str, Any],
243     queue: asyncio.Queue,
244     work_path: Path,
245     results: Results,
246     long_checkouts: bool = False,
247     rebase: bool = False,
248     keep: bool = False,
249 ) -> None:
250     """Check out project and run Black on it + record result"""
251     loop = asyncio.get_event_loop()
252     py_version = f"{version_info[0]}.{version_info[1]}"
253     while True:
254         try:
255             project_name = queue.get_nowait()
256         except asyncio.QueueEmpty:
257             LOG.debug(f"project_runner {idx} exiting")
258             return
259         LOG.debug(f"worker {idx} working on {project_name}")
260
261         project_config = config["projects"][project_name]
262
263         # Check if disabled by config
264         if "disabled" in project_config and project_config["disabled"]:
265             results.stats["disabled"] += 1
266             LOG.info(f"Skipping {project_name} as it's disabled via config")
267             continue
268
269         # Check if we should run on this version of Python
270         if (
271             "all" not in project_config["py_versions"]
272             and py_version not in project_config["py_versions"]
273         ):
274             results.stats["wrong_py_ver"] += 1
275             LOG.debug(f"Skipping {project_name} as it's not enabled for {py_version}")
276             continue
277
278         # Check if we're doing big projects / long checkouts
279         if not long_checkouts and project_config["long_checkout"]:
280             results.stats["skipped_long_checkout"] += 1
281             LOG.debug(f"Skipping {project_name} as it's configured as a long checkout")
282             continue
283
284         repo_path = await git_checkout_or_rebase(work_path, project_config, rebase)
285         if not repo_path:
286             continue
287         await black_run(repo_path, project_config, results)
288
289         if not keep:
290             LOG.debug(f"Removing {repo_path}")
291             rmtree_partial = partial(
292                 rmtree, path=repo_path, onerror=handle_PermissionError
293             )
294             await loop.run_in_executor(None, rmtree_partial)
295
296         LOG.info(f"Finished {project_name}")
297
298
299 async def process_queue(
300     config_file: str,
301     work_path: Path,
302     workers: int,
303     keep: bool = False,
304     long_checkouts: bool = False,
305     rebase: bool = False,
306 ) -> int:
307     """
308     Process the queue with X workers and evaluate results
309     - Success is guaged via the config "expect_formatting_changes"
310
311     Integer return equals the number of failed projects
312     """
313     results = Results()
314     results.stats["disabled"] = 0
315     results.stats["failed"] = 0
316     results.stats["skipped_long_checkout"] = 0
317     results.stats["success"] = 0
318     results.stats["wrong_py_ver"] = 0
319
320     config, queue = await load_projects_queue(Path(config_file))
321     project_count = queue.qsize()
322     s = "" if project_count == 1 else "s"
323     LOG.info(f"{project_count} project{s} to run Black over")
324     if project_count < 1:
325         return -1
326
327     s = "" if workers == 1 else "s"
328     LOG.debug(f"Using {workers} parallel worker{s} to run Black")
329     # Wait until we finish running all the projects before analyzing
330     await asyncio.gather(
331         *[
332             project_runner(
333                 i, config, queue, work_path, results, long_checkouts, rebase, keep
334             )
335             for i in range(workers)
336         ]
337     )
338
339     LOG.info("Analyzing results")
340     return analyze_results(project_count, results)
341
342
343 if __name__ == "__main__":  # pragma: nocover
344     raise NotImplementedError("lib is a library, funnily enough.")