All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
1 """Caching of formatted files with feature-based invalidation."""
8 from dataclasses import dataclass, field
9 from pathlib import Path
10 from typing import Dict, Iterable, NamedTuple, Set, Tuple
12 from platformdirs import user_cache_dir
14 from _black_version import version as __version__
15 from black.mode import Mode
17 if sys.version_info >= (3, 11):
18 from typing import Self
20 from typing_extensions import Self
23 class FileData(NamedTuple):
29 def get_cache_dir() -> Path:
30 """Get the cache directory used by black.
32 Users can customize this directory on all systems using `BLACK_CACHE_DIR`
33 environment variable. By default, the cache directory is the user cache directory
34 under the black application.
36 This result is immediately set to a constant `black.cache.CACHE_DIR` as to avoid
39 # NOTE: Function mostly exists as a clean way to test getting the cache directory.
40 default_cache_dir = user_cache_dir("black")
41 cache_dir = Path(os.environ.get("BLACK_CACHE_DIR", default_cache_dir))
42 cache_dir = cache_dir / __version__
46 CACHE_DIR = get_cache_dir()
49 def get_cache_file(mode: Mode) -> Path:
50 return CACHE_DIR / f"cache.{mode.get_cache_key()}.pickle"
57 file_data: Dict[str, FileData] = field(default_factory=dict)
60 def read(cls, mode: Mode) -> Self:
61 """Read the cache if it exists and is well formed.
63 If it is not well formed, the call to write later should
66 cache_file = get_cache_file(mode)
67 if not cache_file.exists():
68 return cls(mode, cache_file)
70 with cache_file.open("rb") as fobj:
72 data: Dict[str, Tuple[float, int, str]] = pickle.load(fobj)
73 file_data = {k: FileData(*v) for k, v in data.items()}
74 except (pickle.UnpicklingError, ValueError, IndexError):
75 return cls(mode, cache_file)
77 return cls(mode, cache_file, file_data)
80 def hash_digest(path: Path) -> str:
81 """Return hash digest for path."""
83 data = path.read_bytes()
84 return hashlib.sha256(data).hexdigest()
87 def get_file_data(path: Path) -> FileData:
88 """Return file data for path."""
91 hash = Cache.hash_digest(path)
92 return FileData(stat.st_mtime, stat.st_size, hash)
94 def is_changed(self, source: Path) -> bool:
95 """Check if source has changed compared to cached version."""
96 res_src = source.resolve()
97 old = self.file_data.get(str(res_src))
102 if st.st_size != old.st_size:
104 if int(st.st_mtime) != int(old.st_mtime):
105 new_hash = Cache.hash_digest(res_src)
106 if new_hash != old.hash:
110 def filtered_cached(self, sources: Iterable[Path]) -> Tuple[Set[Path], Set[Path]]:
111 """Split an iterable of paths in `sources` into two sets.
113 The first contains paths of files that modified on disk or are not in the
114 cache. The other contains paths to non-modified files.
116 changed: Set[Path] = set()
117 done: Set[Path] = set()
119 if self.is_changed(src):
125 def write(self, sources: Iterable[Path]) -> None:
126 """Update the cache file data and write a new cache file."""
127 self.file_data.update(
128 **{str(src.resolve()): Cache.get_file_data(src) for src in sources}
131 CACHE_DIR.mkdir(parents=True, exist_ok=True)
132 with tempfile.NamedTemporaryFile(
133 dir=str(self.cache_file.parent), delete=False
135 # We store raw tuples in the cache because pickling NamedTuples
136 # doesn't work with mypyc on Python 3.8, and because it's faster.
137 data: Dict[str, Tuple[float, int, str]] = {
138 k: (*v,) for k, v in self.file_data.items()
140 pickle.dump(data, f, protocol=4)
141 os.replace(f.name, self.cache_file)