Skip to main content

cfgd_csi/
cache.rs

1use std::path::{Path, PathBuf};
2
3use cfgd_core::PathDisplayExt;
4
5use crate::errors::CsiError;
6
7const LAST_ACCESS_FILE: &str = ".cfgd-last-access";
8const COMPLETE_SENTINEL: &str = ".cfgd-complete";
9
10/// Node-level LRU cache for OCI module artifacts.
11///
12/// Cache layout: `<root>/<module>/<version>/`
13/// Each entry directory contains extracted module content plus:
14/// - `.cfgd-last-access` — unix timestamp for LRU tracking
15/// - `.cfgd-complete` — sentinel indicating successful extraction
16///
17/// LRU eviction uses the marker file (filesystem atime is unreliable
18/// with noatime/relatime mount options).
19pub struct Cache {
20    root: PathBuf,
21    max_bytes: u64,
22}
23
24impl Cache {
25    pub fn new(root: PathBuf, max_bytes: u64) -> Result<Self, CsiError> {
26        std::fs::create_dir_all(&root)?;
27        Ok(Self { root, max_bytes })
28    }
29
30    /// Return the cache path for a module, or pull it if not cached.
31    ///
32    /// On cache hit, updates access time for LRU tracking.
33    /// On cache miss, pulls the OCI artifact to a temp dir and atomically
34    /// moves it into place (safe under concurrent access).
35    /// After a pull, runs eviction if over capacity.
36    pub fn get_or_pull(
37        &self,
38        module: &str,
39        version: &str,
40        oci_ref: &str,
41    ) -> Result<PathBuf, CsiError> {
42        let entry_dir = self.entry_path(module, version)?;
43
44        if entry_dir.is_dir() && is_complete(&entry_dir) {
45            if let Err(e) = touch_atime(&entry_dir) {
46                tracing::warn!(module = %module, version = %version, error = %e, "failed to update cache atime on hit; LRU ordering may be stale");
47            }
48            return Ok(entry_dir);
49        }
50
51        // Cache miss — pull to temp dir, then atomically move into place
52        let tmp_name = format!(".tmp-{}-{}-{}", module, version, std::process::id());
53        let tmp_dir = self.root.join(&tmp_name);
54        std::fs::create_dir_all(&tmp_dir)?;
55
56        let pull_result = cfgd_core::oci::pull_module(
57            oci_ref,
58            &tmp_dir,
59            cfgd_core::oci::SignaturePolicy::None,
60            None,
61        );
62        if let Err(e) = pull_result {
63            let _ = std::fs::remove_dir_all(&tmp_dir);
64            return Err(e.into());
65        }
66
67        // Mark complete and set access time
68        if let Err(e) = cfgd_core::atomic_write_str(&tmp_dir.join(COMPLETE_SENTINEL), "") {
69            tracing::warn!("failed to write cache sentinel: {e}");
70        }
71        if let Err(e) = touch_atime(&tmp_dir) {
72            tracing::warn!(module = %module, version = %version, error = %e, "failed to record cache atime after pull; entry will look cold to LRU");
73        }
74
75        // Ensure parent dir exists for the final path
76        if let Some(parent) = entry_dir.parent() {
77            std::fs::create_dir_all(parent)?;
78        }
79
80        // Atomic move — if another thread already placed the entry, discard ours.
81        // On rename failure, we can't blindly `Ok(entry_dir)` — that's only
82        // correct when the failure was "lost the race" (another thread/process
83        // completed the pull and placed a valid entry). Any other rename error
84        // (destination permission issue, parent removed, dest on a different
85        // filesystem) leaves `entry_dir` non-existent or incomplete; returning
86        // its path would surface later as a confusing "cache entry missing".
87        if let Err(e) = std::fs::rename(&tmp_dir, &entry_dir) {
88            tracing::warn!(module = %module, version = %version, error = %e, "cache rename race, discarding duplicate pull");
89            let _ = std::fs::remove_dir_all(&tmp_dir);
90            if !(entry_dir.is_dir() && is_complete(&entry_dir)) {
91                return Err(CsiError::Io(std::io::Error::other(format!(
92                    "cache rename for {module}:{version} failed and entry is still missing/incomplete after the race: {e}"
93                ))));
94            }
95        }
96
97        // Best-effort eviction after pull
98        if let Err(e) = self.evict_lru() {
99            tracing::warn!(error = %e, "cache eviction failed");
100        }
101
102        Ok(entry_dir)
103    }
104
105    /// Return the cached path if it exists and is complete, without pulling.
106    pub fn get(&self, module: &str, version: &str) -> Option<PathBuf> {
107        let entry_dir = self.entry_path(module, version).ok()?;
108        if entry_dir.is_dir() && is_complete(&entry_dir) {
109            if let Err(e) = touch_atime(&entry_dir) {
110                tracing::warn!(module = %module, version = %version, error = %e, "failed to update cache atime on get; LRU ordering may be stale");
111            }
112            Some(entry_dir)
113        } else {
114            None
115        }
116    }
117
118    /// Evict least-recently-used entries until cache is under max_bytes.
119    pub fn evict_lru(&self) -> Result<(), CsiError> {
120        let current = self.current_size_bytes();
121        if current <= self.max_bytes {
122            return Ok(());
123        }
124
125        let mut entries = self.list_entries()?;
126        // Sort by access time ascending (oldest first)
127        entries.sort_by_key(|(_, atime)| *atime);
128
129        let mut freed = 0u64;
130        let overflow = current.saturating_sub(self.max_bytes);
131
132        for (path, _) in &entries {
133            if freed >= overflow {
134                break;
135            }
136            let size = dir_size(path);
137            if let Err(e) = std::fs::remove_dir_all(path) {
138                tracing::warn!(path = %path.posix(), error = %e, "failed to evict cache entry");
139                continue;
140            }
141            // Clean up empty parent (module name dir) if no versions remain
142            if let Some(parent) = path.parent() {
143                let _ = std::fs::remove_dir(parent);
144            }
145            freed += size;
146            tracing::info!(path = %path.posix(), freed_bytes = size, "evicted cache entry");
147        }
148
149        Ok(())
150    }
151
152    /// Total bytes used by cached entries (excludes marker files).
153    pub fn current_size_bytes(&self) -> u64 {
154        dir_size_excluding_markers(&self.root)
155    }
156
157    fn entry_path(&self, module: &str, version: &str) -> Result<PathBuf, CsiError> {
158        cfgd_core::validate_no_traversal(Path::new(module)).map_err(|e| {
159            CsiError::InvalidAttribute {
160                key: format!("module: {e}"),
161            }
162        })?;
163        cfgd_core::validate_no_traversal(Path::new(version)).map_err(|e| {
164            CsiError::InvalidAttribute {
165                key: format!("version: {e}"),
166            }
167        })?;
168        Ok(self.root.join(module).join(version))
169    }
170
171    /// List all cache entries as (path, access_time_secs) pairs.
172    fn list_entries(&self) -> Result<Vec<(PathBuf, u64)>, CsiError> {
173        let mut entries = Vec::new();
174
175        let module_dirs = match std::fs::read_dir(&self.root) {
176            Ok(rd) => rd,
177            Err(e) => {
178                tracing::warn!(path = %self.root.posix(), error = %e, "cannot read cache root");
179                return Ok(entries);
180            }
181        };
182
183        for module_entry in module_dirs {
184            let module_entry = module_entry?;
185            let module_path = module_entry.path();
186            if !module_path.is_dir() {
187                continue;
188            }
189            // Skip temp dirs
190            if module_path
191                .file_name()
192                .is_some_and(|n| n.to_str().is_some_and(|s| s.starts_with(".tmp-")))
193            {
194                continue;
195            }
196
197            let version_dirs = match std::fs::read_dir(&module_path) {
198                Ok(rd) => rd,
199                Err(_) => continue,
200            };
201
202            for version_entry in version_dirs {
203                let version_entry = version_entry?;
204                let version_path = version_entry.path();
205                if !version_path.is_dir() {
206                    continue;
207                }
208
209                let atime = read_atime(&version_path);
210                entries.push((version_path, atime));
211            }
212        }
213
214        Ok(entries)
215    }
216}
217
218/// Write a marker file with the current unix timestamp for LRU tracking.
219///
220/// Returns the underlying `io::Error` on failure so callers can decide whether
221/// to propagate or log. Dropped errors would skew the LRU (stale atime sticks
222/// around and makes a hot entry look cold to `evict_lru`).
223fn touch_atime(path: &Path) -> std::io::Result<()> {
224    let now = cfgd_core::unix_secs_now();
225    cfgd_core::atomic_write_str(&path.join(LAST_ACCESS_FILE), &now.to_string())?;
226    Ok(())
227}
228
229/// Read the last-access timestamp from the marker file, or 0 if missing.
230fn read_atime(path: &Path) -> u64 {
231    std::fs::read_to_string(path.join(LAST_ACCESS_FILE))
232        .ok()
233        .and_then(|s| s.trim().parse::<u64>().ok())
234        .unwrap_or(0)
235}
236
237/// Check if an entry has been fully extracted (sentinel present).
238fn is_complete(path: &Path) -> bool {
239    path.join(COMPLETE_SENTINEL).exists()
240}
241
242/// Recursively compute the total size of files, excluding marker files.
243fn dir_size_excluding_markers(path: &Path) -> u64 {
244    let mut total = 0u64;
245    if let Ok(entries) = std::fs::read_dir(path) {
246        for entry in entries.flatten() {
247            let p = entry.path();
248            if p.is_dir() {
249                total = total.saturating_add(dir_size_excluding_markers(&p));
250            } else {
251                let name = p.file_name().and_then(|n| n.to_str()).unwrap_or("");
252                if name == LAST_ACCESS_FILE || name == COMPLETE_SENTINEL {
253                    continue;
254                }
255                if let Ok(meta) = p.metadata() {
256                    total = total.saturating_add(meta.len());
257                }
258            }
259        }
260    }
261    total
262}
263
264/// Recursively compute total dir size (all files).
265fn dir_size(path: &Path) -> u64 {
266    let mut total = 0u64;
267    if let Ok(entries) = std::fs::read_dir(path) {
268        for entry in entries.flatten() {
269            let p = entry.path();
270            if p.is_dir() {
271                total = total.saturating_add(dir_size(&p));
272            } else if let Ok(meta) = p.metadata() {
273                total = total.saturating_add(meta.len());
274            }
275        }
276    }
277    total
278}
279
280#[cfg(test)]
281mod tests {
282    use super::*;
283
284    fn make_cache(dir: &Path, max_bytes: u64) -> Cache {
285        Cache::new(dir.to_path_buf(), max_bytes).unwrap()
286    }
287
288    fn populate_entry(dir: &Path, module: &str, version: &str, content_size: usize, atime: u64) {
289        let entry = dir.join(module).join(version);
290        std::fs::create_dir_all(&entry).unwrap();
291        std::fs::write(entry.join("data.txt"), "x".repeat(content_size)).unwrap();
292        std::fs::write(entry.join(COMPLETE_SENTINEL), "").unwrap();
293        std::fs::write(entry.join(LAST_ACCESS_FILE), atime.to_string()).unwrap();
294    }
295
296    #[test]
297    fn cache_get_returns_none_when_empty() {
298        let dir = tempfile::tempdir().unwrap();
299        let cache = make_cache(dir.path(), 1024 * 1024);
300        assert!(cache.get("nettools", "1.0").is_none());
301    }
302
303    #[test]
304    fn cache_get_returns_path_after_manual_populate() {
305        let dir = tempfile::tempdir().unwrap();
306        let cache = make_cache(dir.path(), 1024 * 1024);
307        populate_entry(dir.path(), "mymod", "1.0", 100, 5000);
308
309        let result = cache.get("mymod", "1.0");
310        assert!(result.is_some());
311        assert_eq!(result.unwrap(), dir.path().join("mymod").join("1.0"));
312    }
313
314    #[test]
315    fn cache_get_returns_none_for_incomplete_entry() {
316        let dir = tempfile::tempdir().unwrap();
317        let cache = make_cache(dir.path(), 1024 * 1024);
318
319        // Create entry without completion sentinel
320        let entry = dir.path().join("partial").join("1.0");
321        std::fs::create_dir_all(&entry).unwrap();
322        std::fs::write(entry.join("data.txt"), "some data").unwrap();
323
324        assert!(cache.get("partial", "1.0").is_none());
325    }
326
327    #[test]
328    fn cache_size_tracking_excludes_markers() {
329        let dir = tempfile::tempdir().unwrap();
330        let cache = make_cache(dir.path(), 1024 * 1024);
331
332        assert_eq!(cache.current_size_bytes(), 0);
333
334        populate_entry(dir.path(), "mod1", "v1", 1000, 5000);
335
336        let size = cache.current_size_bytes();
337        // Should be ~1000 (content) but NOT include marker file sizes
338        assert!(size >= 1000);
339        assert!(size < 1100); // small tolerance — only data.txt counted
340    }
341
342    #[test]
343    fn cache_eviction_removes_oldest() {
344        let dir = tempfile::tempdir().unwrap();
345        let cache = make_cache(dir.path(), 500);
346
347        populate_entry(dir.path(), "old-mod", "v1", 300, 1000);
348        populate_entry(dir.path(), "new-mod", "v1", 300, 9999);
349
350        let old_entry = dir.path().join("old-mod").join("v1");
351        let new_entry = dir.path().join("new-mod").join("v1");
352        assert!(old_entry.exists());
353        assert!(new_entry.exists());
354
355        cache.evict_lru().unwrap();
356
357        assert!(!old_entry.exists(), "old entry should have been evicted");
358        assert!(new_entry.exists(), "new entry should be retained");
359    }
360
361    #[test]
362    fn cache_no_eviction_when_under_limit() {
363        let dir = tempfile::tempdir().unwrap();
364        let cache = make_cache(dir.path(), 1024 * 1024);
365
366        populate_entry(dir.path(), "mod1", "v1", 10, 5000);
367        cache.evict_lru().unwrap();
368        assert!(dir.path().join("mod1").join("v1").exists());
369    }
370
371    #[test]
372    fn entry_path_layout() {
373        let dir = tempfile::tempdir().unwrap();
374        let cache = make_cache(dir.path(), 1024);
375        let path = cache.entry_path("nettools", "1.2.3").unwrap();
376        assert_eq!(path, dir.path().join("nettools").join("1.2.3"));
377    }
378
379    #[test]
380    fn entry_path_rejects_traversal() {
381        let dir = tempfile::tempdir().unwrap();
382        let cache = make_cache(dir.path(), 1024);
383        assert!(cache.entry_path("../../etc", "passwd").is_err());
384        assert!(cache.entry_path("good-mod", "../../../tmp").is_err());
385    }
386
387    #[test]
388    fn is_complete_true_when_sentinel_present() {
389        let dir = tempfile::tempdir().unwrap();
390        std::fs::write(dir.path().join(COMPLETE_SENTINEL), "").unwrap();
391        assert!(is_complete(dir.path()));
392    }
393
394    #[test]
395    fn is_complete_false_when_no_sentinel() {
396        let dir = tempfile::tempdir().unwrap();
397        assert!(!is_complete(dir.path()));
398    }
399
400    #[test]
401    fn cache_eviction_removes_multiple_oldest_until_under_limit() {
402        let dir = tempfile::tempdir().unwrap();
403        // Each entry has 200 bytes of content; capacity allows only ~1 entry
404        let cache = make_cache(dir.path(), 250);
405
406        populate_entry(dir.path(), "mod-a", "v1", 200, 1000);
407        populate_entry(dir.path(), "mod-b", "v1", 200, 2000);
408        populate_entry(dir.path(), "mod-c", "v1", 200, 3000);
409        populate_entry(dir.path(), "mod-d", "v1", 200, 4000);
410
411        // 4 entries x 200 = 800 bytes, capacity is 250
412        assert!(cache.current_size_bytes() >= 800);
413
414        cache.evict_lru().unwrap();
415
416        // Oldest entries should be evicted; newest should survive
417        assert!(
418            !dir.path().join("mod-a").join("v1").exists(),
419            "oldest entry should be evicted"
420        );
421        assert!(
422            !dir.path().join("mod-b").join("v1").exists(),
423            "second oldest should be evicted"
424        );
425        assert!(
426            !dir.path().join("mod-c").join("v1").exists(),
427            "third oldest should be evicted"
428        );
429        assert!(
430            dir.path().join("mod-d").join("v1").exists(),
431            "newest entry should survive"
432        );
433
434        // After eviction, size should be at or below capacity
435        assert!(cache.current_size_bytes() <= 250);
436    }
437
438    #[test]
439    fn cache_eviction_multiple_versions_of_same_module() {
440        let dir = tempfile::tempdir().unwrap();
441        let cache = make_cache(dir.path(), 350);
442
443        populate_entry(dir.path(), "nettools", "1.0", 200, 1000);
444        populate_entry(dir.path(), "nettools", "2.0", 200, 5000);
445
446        // 400 bytes, capacity 350 — oldest version should be evicted
447        cache.evict_lru().unwrap();
448
449        assert!(
450            !dir.path().join("nettools").join("1.0").exists(),
451            "older version should be evicted"
452        );
453        assert!(
454            dir.path().join("nettools").join("2.0").exists(),
455            "newer version should survive"
456        );
457    }
458
459    #[test]
460    fn list_entries_skips_temp_dirs() {
461        let dir = tempfile::tempdir().unwrap();
462        let cache = make_cache(dir.path(), 1024 * 1024);
463
464        populate_entry(dir.path(), "real-mod", "v1", 100, 5000);
465
466        // Create a temp dir that should be skipped during listing
467        let tmp_dir = dir.path().join(".tmp-real-mod-v2-12345");
468        std::fs::create_dir_all(&tmp_dir).unwrap();
469        std::fs::write(tmp_dir.join("data.txt"), "partial").unwrap();
470
471        let entries = cache.list_entries().unwrap();
472        assert_eq!(entries.len(), 1);
473        assert_eq!(entries[0].0, dir.path().join("real-mod").join("v1"));
474    }
475
476    #[test]
477    fn read_atime_returns_zero_for_missing_file() {
478        let dir = tempfile::tempdir().unwrap();
479        assert_eq!(read_atime(dir.path()), 0);
480    }
481
482    #[test]
483    fn touch_atime_writes_timestamp() {
484        let dir = tempfile::tempdir().unwrap();
485        touch_atime(dir.path()).expect("touch_atime");
486
487        let atime = read_atime(dir.path());
488        // Should be a recent unix timestamp (after 2020)
489        assert!(atime > 1_577_836_800);
490    }
491
492    #[test]
493    #[cfg(unix)]
494    fn touch_atime_errors_on_unwritable_dir() {
495        use std::os::unix::fs::PermissionsExt;
496        // atomic_write_str creates parent dirs automatically, so the only
497        // reliable failure mode is a parent that exists but is read-only.
498        // Root bypasses permission bits on Unix, so skip under euid==0
499        // where the write would succeed anyway.
500        if cfgd_core::is_root() {
501            return;
502        }
503        let dir = tempfile::tempdir().unwrap();
504        let ro = dir.path().join("readonly");
505        std::fs::create_dir(&ro).unwrap();
506        std::fs::set_permissions(&ro, std::fs::Permissions::from_mode(0o500)).unwrap();
507
508        let err = touch_atime(&ro).expect_err("should fail on read-only dir");
509        let _ = err.kind();
510
511        // Restore perms so tempdir can clean up.
512        let _ = std::fs::set_permissions(&ro, std::fs::Permissions::from_mode(0o700));
513    }
514
515    #[test]
516    fn cache_size_zero_for_empty() {
517        let dir = tempfile::tempdir().unwrap();
518        let cache = make_cache(dir.path(), 1024);
519        assert_eq!(cache.current_size_bytes(), 0);
520    }
521
522    #[test]
523    fn cache_get_updates_access_time() {
524        let dir = tempfile::tempdir().unwrap();
525        let cache = make_cache(dir.path(), 1024 * 1024);
526        populate_entry(dir.path(), "mymod", "1.0", 100, 1000);
527
528        // Access time should be 1000 initially
529        let atime_before = read_atime(&dir.path().join("mymod").join("1.0"));
530        assert_eq!(atime_before, 1000);
531
532        // get() should update the access time
533        cache.get("mymod", "1.0").unwrap();
534
535        let atime_after = read_atime(&dir.path().join("mymod").join("1.0"));
536        assert!(
537            atime_after > 1000,
538            "access time should be updated after get()"
539        );
540    }
541
542    #[test]
543    fn cache_get_or_pull_returns_path_without_touching_oci_on_hit() {
544        // get_or_pull cache-hit early-return (lines 42-47): pre-populate an
545        // entry with the .cfgd-complete sentinel — get_or_pull must short-
546        // circuit and return entry_path WITHOUT invoking oci::pull_module,
547        // proven by passing a garbage oci_ref that would fail any real call.
548        let dir = tempfile::tempdir().unwrap();
549        let cache = make_cache(dir.path(), 1024 * 1024);
550        populate_entry(dir.path(), "preinstalled", "1.0.0", 256, 1_000);
551
552        let result = cache
553            .get_or_pull("preinstalled", "1.0.0", "not-a-real-oci-ref://garbage")
554            .expect("cache-hit must NOT consult oci::pull_module");
555
556        assert_eq!(result, dir.path().join("preinstalled").join("1.0.0"));
557        assert!(
558            result.join(COMPLETE_SENTINEL).exists(),
559            "sentinel should still mark the entry complete",
560        );
561        // The hit path calls touch_atime — verify atime moved forward.
562        let new_atime = read_atime(&result);
563        assert!(
564            new_atime > 1_000,
565            "atime must refresh on cache hit (was 1000, now {})",
566            new_atime,
567        );
568    }
569
570    #[test]
571    fn list_entries_skips_regular_files_at_root() {
572        // list_entries non-dir skip at root level (line 180): a stray regular
573        // file directly under the cache root (e.g. a README placed by an
574        // operator) must NOT be treated as a module directory. Pin the
575        // contract; otherwise eviction would try to remove_dir_all a regular
576        // file and the cache would surface bogus errors.
577        let dir = tempfile::tempdir().unwrap();
578        let cache = make_cache(dir.path(), 1024 * 1024);
579        populate_entry(dir.path(), "real-mod", "v1", 100, 5_000);
580        std::fs::write(dir.path().join("README"), "not a module dir").unwrap();
581
582        let entries = cache.list_entries().unwrap();
583        assert_eq!(entries.len(), 1, "stray file at root must not be listed");
584        assert_eq!(entries[0].0, dir.path().join("real-mod").join("v1"));
585    }
586
587    #[test]
588    fn list_entries_skips_regular_files_at_version_level() {
589        // list_entries non-dir skip at module/version level (line 199): a
590        // regular file sibling to version dirs inside a module dir (e.g.
591        // module-level metadata.json placed by a future feature) must not be
592        // listed as a version. Without this skip the per-version atime read
593        // would target a non-directory and surface noise.
594        let dir = tempfile::tempdir().unwrap();
595        let cache = make_cache(dir.path(), 1024 * 1024);
596        populate_entry(dir.path(), "vmod", "1.0", 100, 5_000);
597        // Sibling of version dir, but not a version itself.
598        std::fs::write(dir.path().join("vmod").join("notes.txt"), "stray").unwrap();
599
600        let entries = cache.list_entries().unwrap();
601        assert_eq!(
602            entries.len(),
603            1,
604            "stray file under module dir must be skipped"
605        );
606        assert_eq!(entries[0].0, dir.path().join("vmod").join("1.0"));
607    }
608}