polykit_core/
cache.rs

1//! Caching system for scan results.
2
3use std::fs::{self, File, OpenOptions};
4use std::path::{Path, PathBuf};
5use std::time::SystemTime;
6
7use bincode;
8use memmap2::{Mmap, MmapMut};
9use rayon::prelude::*;
10use rustc_hash::FxHashMap;
11use serde::{Deserialize, Serialize};
12use xxhash_rust::xxh3::xxh3_64;
13
14use crate::error::{Error, Result};
15use crate::package::Package;
16
17const CACHE_VERSION: u32 = 3;
18const MAX_SCAN_DEPTH: usize = 3;
19
20#[derive(Debug, Clone, Serialize, Deserialize)]
21struct CacheEntry {
22    version: u32,
23    packages: Vec<Package>,
24    mtimes: FxHashMap<PathBuf, u64>,
25}
26
27pub struct Cache {
28    cache_dir: PathBuf,
29    stats: CacheStats,
30}
31
32#[derive(Debug, Default)]
33pub struct CacheStats {
34    pub hits: u64,
35    pub misses: u64,
36}
37
38impl CacheStats {
39    pub fn hit_rate(&self) -> f64 {
40        let total = self.hits + self.misses;
41        if total == 0 {
42            0.0
43        } else {
44            self.hits as f64 / total as f64
45        }
46    }
47}
48
49impl Cache {
50    pub fn new(cache_dir: impl AsRef<Path>) -> Self {
51        Self {
52            cache_dir: cache_dir.as_ref().to_path_buf(),
53            stats: CacheStats::default(),
54        }
55    }
56
57    pub fn stats(&self) -> &CacheStats {
58        &self.stats
59    }
60
61    pub fn get_cache_path(&self, packages_dir: &Path) -> PathBuf {
62        let cache_key = self.compute_cache_key(packages_dir);
63        let filename = format!("scan_{}.bin", cache_key);
64        let cache_path = self.cache_dir.join(&filename);
65
66        if let Ok(canonical_cache_dir) = self.cache_dir.canonicalize() {
67            if let Ok(canonical_cache_path) = cache_path.canonicalize() {
68                if canonical_cache_path.starts_with(&canonical_cache_dir) {
69                    return cache_path;
70                }
71            } else if let Some(parent) = cache_path.parent() {
72                if let Ok(canonical_parent) = parent.canonicalize() {
73                    let canonical_cache_path = canonical_parent.join(&filename);
74                    if canonical_cache_path.starts_with(&canonical_cache_dir) {
75                        return cache_path;
76                    }
77                }
78            }
79        }
80
81        cache_path
82    }
83
84    pub fn load(&mut self, packages_dir: &Path) -> Result<Option<Vec<Package>>> {
85        let cache_path = self.get_cache_path(packages_dir);
86        if !cache_path.exists() {
87            self.stats.misses += 1;
88            return Ok(None);
89        }
90
91        let file = File::open(&cache_path).map_err(Error::Io)?;
92        let metadata = file.metadata().map_err(Error::Io)?;
93
94        if metadata.len() == 0 {
95            self.stats.misses += 1;
96            return Ok(None);
97        }
98
99        let mmap = unsafe {
100            Mmap::map(&file).map_err(|e| Error::Adapter {
101                package: "cache".to_string(),
102                message: format!("Failed to memory-map cache file: {}", e),
103            })?
104        };
105
106        let content = zstd::decode_all(&mmap[..]).map_err(|e| Error::Adapter {
107            package: "cache".to_string(),
108            message: format!("Failed to decompress cache: {}", e),
109        })?;
110
111        let entry: CacheEntry = bincode::deserialize(&content).map_err(|e| Error::Adapter {
112            package: "cache".to_string(),
113            message: format!("Failed to parse cache: {}", e),
114        })?;
115
116        if entry.version != CACHE_VERSION {
117            self.stats.misses += 1;
118            return Ok(None);
119        }
120
121        if !self.validate_mtimes(packages_dir, &entry.mtimes)? {
122            self.stats.misses += 1;
123            return Ok(None);
124        }
125
126        self.stats.hits += 1;
127        Ok(Some(entry.packages))
128    }
129
130    pub fn save(&self, packages_dir: &Path, packages: &[Package]) -> Result<()> {
131        fs::create_dir_all(&self.cache_dir).map_err(Error::Io)?;
132
133        let mtimes = self.collect_mtimes(packages_dir)?;
134        let entry = CacheEntry {
135            version: CACHE_VERSION,
136            packages: packages.to_vec(),
137            mtimes,
138        };
139
140        let cache_path = self.get_cache_path(packages_dir);
141        let serialized = bincode::serialize(&entry).map_err(|e| Error::Adapter {
142            package: "cache".to_string(),
143            message: format!("Failed to serialize cache: {}", e),
144        })?;
145
146        let compressed = zstd::encode_all(&serialized[..], 3).map_err(|e| Error::Adapter {
147            package: "cache".to_string(),
148            message: format!("Failed to compress cache: {}", e),
149        })?;
150
151        if compressed.len() < 4096 {
152            fs::write(&cache_path, compressed).map_err(Error::Io)?;
153        } else {
154            let file = OpenOptions::new()
155                .read(true)
156                .write(true)
157                .create(true)
158                .truncate(true)
159                .open(&cache_path)
160                .map_err(Error::Io)?;
161
162            file.set_len(compressed.len() as u64).map_err(Error::Io)?;
163
164            let mut mmap = unsafe {
165                MmapMut::map_mut(&file).map_err(|e| Error::Adapter {
166                    package: "cache".to_string(),
167                    message: format!("Failed to memory-map cache file for writing: {}", e),
168                })?
169            };
170
171            mmap.copy_from_slice(&compressed);
172            mmap.flush().map_err(|e| Error::Adapter {
173                package: "cache".to_string(),
174                message: format!("Failed to flush memory-mapped cache: {}", e),
175            })?;
176        }
177
178        Ok(())
179    }
180
181    fn compute_cache_key(&self, packages_dir: &Path) -> String {
182        let path_bytes = packages_dir.as_os_str().as_encoded_bytes();
183        let hash = xxh3_64(path_bytes);
184        format!("{:x}", hash)
185    }
186
187    fn collect_mtimes(&self, packages_dir: &Path) -> Result<FxHashMap<PathBuf, u64>> {
188        let packages_dir = packages_dir.to_path_buf();
189
190        let polykit_files: Vec<PathBuf> = walkdir::WalkDir::new(&packages_dir)
191            .max_depth(MAX_SCAN_DEPTH)
192            .follow_links(false)
193            .into_iter()
194            .filter_map(|e| e.ok())
195            .filter(|e| e.file_name() == "polykit.toml")
196            .map(|e| e.path().to_path_buf())
197            .collect();
198
199        let mtimes_vec: Vec<(PathBuf, u64)> = polykit_files
200            .into_par_iter()
201            .flat_map(|path| {
202                let mut results = Vec::new();
203
204                if let Ok(metadata) = path.metadata() {
205                    if let Ok(mtime) = metadata.modified() {
206                        if let Ok(duration) = mtime.duration_since(SystemTime::UNIX_EPOCH) {
207                            let relative_path = path
208                                .strip_prefix(&packages_dir)
209                                .unwrap_or(&path)
210                                .to_path_buf();
211                            results.push((relative_path, duration.as_secs()));
212                        }
213                    }
214                }
215
216                if let Some(package_dir) = path.parent() {
217                    if let Ok(metadata) = package_dir.metadata() {
218                        if let Ok(mtime) = metadata.modified() {
219                            if let Ok(duration) = mtime.duration_since(SystemTime::UNIX_EPOCH) {
220                                let relative_dir = package_dir
221                                    .strip_prefix(&packages_dir)
222                                    .unwrap_or(package_dir)
223                                    .to_path_buf();
224                                let dir_key = relative_dir.join(".dir");
225                                results.push((dir_key, duration.as_secs()));
226                            }
227                        }
228                    }
229                }
230
231                results
232            })
233            .collect();
234
235        let mut mtimes = FxHashMap::with_capacity_and_hasher(mtimes_vec.len(), Default::default());
236        let mut package_dirs = rustc_hash::FxHashSet::default();
237
238        for (path, mtime) in mtimes_vec {
239            if path.file_name().and_then(|n| n.to_str()) == Some(".dir") {
240                if let Some(parent) = path.parent() {
241                    package_dirs.insert(parent.to_path_buf());
242                }
243            }
244            mtimes.insert(path, mtime);
245        }
246
247        let package_count_key = PathBuf::from(".package_count");
248        mtimes.insert(package_count_key, package_dirs.len() as u64);
249
250        Ok(mtimes)
251    }
252
253    fn validate_mtimes(
254        &self,
255        packages_dir: &Path,
256        cached_mtimes: &FxHashMap<PathBuf, u64>,
257    ) -> Result<bool> {
258        if cached_mtimes.is_empty() {
259            return Ok(false);
260        }
261
262        let package_count_key = PathBuf::from(".package_count");
263        if let Some(cached_count) = cached_mtimes.get(&package_count_key) {
264            let current_count = self.count_packages_fast(packages_dir)?;
265            if current_count != *cached_count {
266                return Ok(false);
267            }
268        }
269
270        let packages_dir = packages_dir.to_path_buf();
271        let package_count_key_clone = package_count_key.clone();
272
273        let validation_results: Vec<bool> = cached_mtimes
274            .par_iter()
275            .filter(|(path, _)| path != &&package_count_key_clone)
276            .map(|(path, cached_time)| {
277                let path_to_check = if path.file_name().and_then(|n| n.to_str()) == Some(".dir") {
278                    if let Some(parent) = path.parent() {
279                        packages_dir.join(parent)
280                    } else {
281                        packages_dir.join(path)
282                    }
283                } else {
284                    packages_dir.join(path)
285                };
286
287                if let Ok(metadata) = path_to_check.metadata() {
288                    if let Ok(mtime) = metadata.modified() {
289                        if let Ok(duration) = mtime.duration_since(SystemTime::UNIX_EPOCH) {
290                            duration.as_secs() == *cached_time
291                        } else {
292                            false
293                        }
294                    } else {
295                        false
296                    }
297                } else {
298                    false
299                }
300            })
301            .collect();
302
303        Ok(validation_results.iter().all(|&valid| valid))
304    }
305
306    fn count_packages_fast(&self, packages_dir: &Path) -> Result<u64> {
307        let count = walkdir::WalkDir::new(packages_dir)
308            .max_depth(MAX_SCAN_DEPTH)
309            .follow_links(false)
310            .into_iter()
311            .filter_map(|e| e.ok())
312            .filter(|e| e.file_name() == "polykit.toml")
313            .count();
314        Ok(count as u64)
315    }
316
317    pub fn clear(&self, packages_dir: &Path) -> Result<()> {
318        let cache_path = self.get_cache_path(packages_dir);
319        if cache_path.exists() {
320            fs::remove_file(&cache_path).map_err(Error::Io)?;
321        }
322        Ok(())
323    }
324}