From: Anthony Sottile Date: Thu, 4 Feb 2021 21:03:42 +0000 (-0800) Subject: speed up cache by approximately 42x by avoiding pathlib (#1953) X-Git-Url: https://git.madduck.net/etc/vim.git/commitdiff_plain/3fca540d05a0f0b4480cfcce84d7e0392270b841 speed up cache by approximately 42x by avoiding pathlib (#1953) --- diff --git a/CHANGES.md b/CHANGES.md index ca8a047..5f7ca4f 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -26,6 +26,8 @@ - use lowercase hex strings (#1692) +- speed up caching by avoiding pathlib (#1950) + #### _Packaging_ - Self-contained native _Black_ binaries are now provided for releases via GitHub diff --git a/src/black/__init__.py b/src/black/__init__.py index 9034bf6..7c1a013 100644 --- a/src/black/__init__.py +++ b/src/black/__init__.py @@ -93,7 +93,7 @@ Transformer = Callable[["Line", Collection["Feature"]], Iterator["Line"]] Timestamp = float FileSize = int CacheInfo = Tuple[Timestamp, FileSize] -Cache = Dict[Path, CacheInfo] +Cache = Dict[str, CacheInfo] out = partial(click.secho, bold=True, err=True) err = partial(click.secho, fg="red", err=True) @@ -724,7 +724,8 @@ def reformat_one( if write_back not in (WriteBack.DIFF, WriteBack.COLOR_DIFF): cache = read_cache(mode) res_src = src.resolve() - if res_src in cache and cache[res_src] == get_cache_info(res_src): + res_src_s = str(res_src) + if res_src_s in cache and cache[res_src_s] == get_cache_info(res_src): changed = Changed.CACHED if changed is not Changed.CACHED and format_file_in_place( src, fast=fast, write_back=write_back, mode=mode @@ -6781,8 +6782,8 @@ def filter_cached(cache: Cache, sources: Iterable[Path]) -> Tuple[Set[Path], Set """ todo, done = set(), set() for src in sources: - src = src.resolve() - if cache.get(src) != get_cache_info(src): + res_src = src.resolve() + if cache.get(str(res_src)) != get_cache_info(res_src): todo.add(src) else: done.add(src) @@ -6794,7 +6795,10 @@ def write_cache(cache: Cache, sources: Iterable[Path], mode: Mode) -> None: cache_file = get_cache_file(mode) try: CACHE_DIR.mkdir(parents=True, exist_ok=True) - new_cache = {**cache, **{src.resolve(): get_cache_info(src) for src in sources}} + new_cache = { + **cache, + **{str(src.resolve()): get_cache_info(src) for src in sources}, + } with tempfile.NamedTemporaryFile(dir=str(cache_file.parent), delete=False) as f: pickle.dump(new_cache, f, protocol=4) os.replace(f.name, cache_file) diff --git a/tests/test_black.py b/tests/test_black.py index 28b7578..cfd3cbd 100644 --- a/tests/test_black.py +++ b/tests/test_black.py @@ -1003,7 +1003,7 @@ class BlackTestCase(BlackBaseTestCase): fobj.write("print('hello')") self.invokeBlack([str(src)]) cache = black.read_cache(mode) - self.assertIn(src, cache) + self.assertIn(str(src), cache) def test_cache_single_file_already_cached(self) -> None: mode = DEFAULT_MODE @@ -1035,8 +1035,8 @@ class BlackTestCase(BlackBaseTestCase): with two.open("r") as fobj: self.assertEqual(fobj.read(), 'print("hello")\n') cache = black.read_cache(mode) - self.assertIn(one, cache) - self.assertIn(two, cache) + self.assertIn(str(one), cache) + self.assertIn(str(two), cache) def test_no_cache_when_writeback_diff(self) -> None: mode = DEFAULT_MODE @@ -1116,8 +1116,8 @@ class BlackTestCase(BlackBaseTestCase): src.touch() black.write_cache({}, [src], mode) cache = black.read_cache(mode) - self.assertIn(src, cache) - self.assertEqual(cache[src], black.get_cache_info(src)) + self.assertIn(str(src), cache) + self.assertEqual(cache[str(src)], black.get_cache_info(src)) def test_filter_cached(self) -> None: with TemporaryDirectory() as workspace: @@ -1128,7 +1128,10 @@ class BlackTestCase(BlackBaseTestCase): uncached.touch() cached.touch() cached_but_changed.touch() - cache = {cached: black.get_cache_info(cached), cached_but_changed: (0.0, 0)} + cache = { + str(cached): black.get_cache_info(cached), + str(cached_but_changed): (0.0, 0), + } todo, done = black.filter_cached( cache, {uncached, cached, cached_but_changed} ) @@ -1156,8 +1159,8 @@ class BlackTestCase(BlackBaseTestCase): fobj.write('print("hello")\n') self.invokeBlack([str(workspace)], exit_code=123) cache = black.read_cache(mode) - self.assertNotIn(failing, cache) - self.assertIn(clean, cache) + self.assertNotIn(str(failing), cache) + self.assertIn(str(clean), cache) def test_write_cache_write_fail(self) -> None: mode = DEFAULT_MODE @@ -1210,9 +1213,9 @@ class BlackTestCase(BlackBaseTestCase): path.touch() black.write_cache({}, [path], mode) one = black.read_cache(mode) - self.assertIn(path, one) + self.assertIn(str(path), one) two = black.read_cache(short_mode) - self.assertNotIn(path, two) + self.assertNotIn(str(path), two) def test_single_file_force_pyi(self) -> None: pyi_mode = replace(DEFAULT_MODE, is_pyi=True) @@ -1226,9 +1229,9 @@ class BlackTestCase(BlackBaseTestCase): actual = fh.read() # verify cache with --pyi is separate pyi_cache = black.read_cache(pyi_mode) - self.assertIn(path, pyi_cache) + self.assertIn(str(path), pyi_cache) normal_cache = black.read_cache(DEFAULT_MODE) - self.assertNotIn(path, normal_cache) + self.assertNotIn(str(path), normal_cache) self.assertFormatEqual(expected, actual) black.assert_equivalent(contents, actual) black.assert_stable(contents, actual, pyi_mode) @@ -1255,8 +1258,8 @@ class BlackTestCase(BlackBaseTestCase): pyi_cache = black.read_cache(pyi_mode) normal_cache = black.read_cache(reg_mode) for path in paths: - self.assertIn(path, pyi_cache) - self.assertNotIn(path, normal_cache) + self.assertIn(str(path), pyi_cache) + self.assertNotIn(str(path), normal_cache) def test_pipe_force_pyi(self) -> None: source, expected = read_data("force_pyi") @@ -1280,9 +1283,9 @@ class BlackTestCase(BlackBaseTestCase): actual = fh.read() # verify cache with --target-version is separate py36_cache = black.read_cache(py36_mode) - self.assertIn(path, py36_cache) + self.assertIn(str(path), py36_cache) normal_cache = black.read_cache(reg_mode) - self.assertNotIn(path, normal_cache) + self.assertNotIn(str(path), normal_cache) self.assertEqual(actual, expected) @event_loop() @@ -1307,8 +1310,8 @@ class BlackTestCase(BlackBaseTestCase): pyi_cache = black.read_cache(py36_mode) normal_cache = black.read_cache(reg_mode) for path in paths: - self.assertIn(path, pyi_cache) - self.assertNotIn(path, normal_cache) + self.assertIn(str(path), pyi_cache) + self.assertNotIn(str(path), normal_cache) def test_pipe_force_py36(self) -> None: source, expected = read_data("force_py36")