Skip to main content

cache_manager/
lib.rs

1#![doc = include_str!("../README.md")]
2#![warn(missing_docs)]
3
4use std::env;
5use std::fs;
6use std::fs::OpenOptions;
7use std::io;
8use std::path::{Path, PathBuf};
9use std::time::{Duration, SystemTime, UNIX_EPOCH};
10
11/// Optional eviction controls applied by `CacheGroup::ensure_dir_with_policy`
12/// and `CacheRoot::ensure_group_with_policy`.
13///
14/// Rules are enforced in this order:
15/// 1. `max_age` (remove files older than or equal to threshold)
16/// 2. `max_files` (keep at most N files)
17/// 3. `max_bytes` (keep total bytes at or below threshold)
18///
19/// For `max_files` and `max_bytes`, candidates are ordered by modified time
20/// ascending (oldest first), then by path for deterministic tie-breaking.
21#[derive(Clone, Debug, PartialEq, Eq, Default)]
22pub struct EvictPolicy {
23    /// Maximum number of files to keep under the managed directory tree.
24    ///
25    /// If exceeded, the oldest files are removed first until the count is
26    /// `<= max_files`.
27    pub max_files: Option<usize>,
28    /// Maximum total size in bytes to keep under the managed directory tree.
29    ///
30    /// If exceeded, files are removed oldest-first until total bytes are
31    /// `<= max_bytes`.
32    ///
33    /// Notes:
34    /// - The limit applies to regular files recursively under the directory.
35    /// - Directories are not counted toward the byte total.
36    /// - Enforced only when using a policy-aware `ensure_*_with_policy` call.
37    pub max_bytes: Option<u64>,
38    /// Maximum file age allowed under the managed directory tree.
39    ///
40    /// Files with age `>= max_age` are removed.
41    pub max_age: Option<Duration>,
42}
43
44/// Files selected for eviction by policy evaluation.
45#[derive(Clone, Debug, PartialEq, Eq, Default)]
46pub struct EvictionReport {
47    /// Absolute paths marked for eviction, in the order they would be applied.
48    pub marked_for_eviction: Vec<PathBuf>,
49}
50
51#[derive(Clone, Debug)]
52struct FileEntry {
53    path: PathBuf,
54    modified: SystemTime,
55    len: u64,
56}
57
58#[derive(Clone, Debug, PartialEq, Eq)]
59/// Represents a discovered or explicit cache root directory.
60///
61/// Use `CacheRoot::discover()` to find the nearest crate root from the
62/// current working directory, or `CacheRoot::from_root(...)` to construct one
63/// from an explicit path.
64pub struct CacheRoot {
65    root: PathBuf,
66}
67
68impl CacheRoot {
69    /// Discover the cache root by searching parent directories for `Cargo.toml`.
70    ///
71    /// Falls back to the current working directory when no crate root is found.
72    pub fn discover() -> io::Result<Self> {
73        let cwd = env::current_dir()?;
74        let root = find_crate_root(&cwd).unwrap_or(cwd);
75        // Prefer a canonicalized path when possible to avoid surprising
76        // differences between logically-equal paths (symlinks, tempdir
77        // representations, etc.) used by callers and tests.
78        let root = root.canonicalize().unwrap_or(root);
79        Ok(Self { root })
80    }
81
82    /// Create a `CacheRoot` from an explicit filesystem path.
83    pub fn from_root<P: Into<PathBuf>>(root: P) -> Self {
84        Self { root: root.into() }
85    }
86
87    /// Like `discover()` but never returns an `io::Result` — falls back to `.` on error.
88    pub fn discover_or_cwd() -> Self {
89        let cwd = env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
90        let root = find_crate_root(&cwd).unwrap_or(cwd);
91        let root = root.canonicalize().unwrap_or(root);
92        Self { root }
93    }
94
95    /// Return the underlying path for this `CacheRoot`.
96    pub fn path(&self) -> &Path {
97        &self.root
98    }
99
100    /// Build a `CacheGroup` for a relative subdirectory under this root.
101    pub fn group<P: AsRef<Path>>(&self, relative_group: P) -> CacheGroup {
102        let path = self.root.join(relative_group.as_ref());
103        CacheGroup { path }
104    }
105
106    /// Resolve a relative group path to an absolute `PathBuf` under this root.
107    pub fn group_path<P: AsRef<Path>>(&self, relative_group: P) -> PathBuf {
108        self.root.join(relative_group.as_ref())
109    }
110
111    /// Ensure the given group directory exists, creating parents as required.
112    pub fn ensure_group<P: AsRef<Path>>(&self, relative_group: P) -> io::Result<PathBuf> {
113        let group = self.group_path(relative_group);
114        fs::create_dir_all(&group)?;
115        Ok(group)
116    }
117
118    /// Ensure the given group exists and optionally apply an eviction policy.
119    ///
120    /// When `policy` is `Some`, files will be evaluated and removed according
121    /// to the `EvictPolicy` rules. Passing `None` performs only directory creation.
122    pub fn ensure_group_with_policy<P: AsRef<Path>>(
123        &self,
124        relative_group: P,
125        policy: Option<&EvictPolicy>,
126    ) -> io::Result<PathBuf> {
127        let group = self.group(relative_group);
128        group.ensure_dir_with_policy(policy)?;
129        Ok(group.path().to_path_buf())
130    }
131
132    /// Resolve a cache entry path given a cache directory (relative to the root)
133    /// and a relative entry path. Absolute `relative_path` values are returned
134    /// unchanged.
135    pub fn cache_path<P: AsRef<Path>, Q: AsRef<Path>>(
136        &self,
137        cache_dir: P,
138        relative_path: Q,
139    ) -> PathBuf {
140        let rel = relative_path.as_ref();
141        if rel.is_absolute() {
142            return rel.to_path_buf();
143        }
144        self.group(cache_dir).entry_path(rel)
145    }
146
147    /// Discover the crate root (or use cwd) and resolve a cache entry path.
148    ///
149    /// Convenience wrapper for `CacheRoot::discover_or_cwd().cache_path(...)`.
150    pub fn discover_cache_path<P: AsRef<Path>, Q: AsRef<Path>>(
151        cache_dir: P,
152        relative_path: Q,
153    ) -> PathBuf {
154        Self::discover_or_cwd().cache_path(cache_dir, relative_path)
155    }
156}
157
158#[derive(Clone, Debug, PartialEq, Eq)]
159/// A group (subdirectory) under a `CacheRoot` that manages cache entries.
160///
161/// Use `CacheRoot::group(...)` to construct a `CacheGroup` rooted under a
162/// `CacheRoot`.
163pub struct CacheGroup {
164    path: PathBuf,
165}
166
167impl CacheGroup {
168    /// Return the path of this cache group.
169    pub fn path(&self) -> &Path {
170        &self.path
171    }
172
173    /// Ensure the group directory exists on disk, creating parents as needed.
174    pub fn ensure_dir(&self) -> io::Result<&Path> {
175        fs::create_dir_all(&self.path)?;
176        Ok(&self.path)
177    }
178
179    /// Ensures this directory exists, then applies optional eviction.
180    ///
181    /// Eviction is applied recursively to files under this directory. The
182    /// policy is best-effort for removals: individual delete failures are
183    /// ignored so initialization can continue.
184    pub fn ensure_dir_with_policy(&self, policy: Option<&EvictPolicy>) -> io::Result<&Path> {
185        fs::create_dir_all(&self.path)?;
186        if let Some(policy) = policy {
187            apply_evict_policy(&self.path, policy)?;
188        }
189        Ok(&self.path)
190    }
191
192    /// Returns a report of files that would be evicted under `policy`.
193    ///
194    /// This does not delete files. The selection order matches the internal
195    /// order used by `ensure_dir_with_policy`.
196    /// Return a report of files that would be evicted by `policy`.
197    ///
198    /// The report is non-destructive and mirrors the selection used by
199    /// `ensure_dir_with_policy` so it can be used for previewing or testing.
200    pub fn eviction_report(&self, policy: &EvictPolicy) -> io::Result<EvictionReport> {
201        build_eviction_report(&self.path, policy)
202    }
203
204    /// Create a nested subgroup under this group.
205    pub fn subgroup<P: AsRef<Path>>(&self, relative_group: P) -> Self {
206        Self {
207            path: self.path.join(relative_group.as_ref()),
208        }
209    }
210
211    /// Resolve a relative entry path under this group.
212    pub fn entry_path<P: AsRef<Path>>(&self, relative_file: P) -> PathBuf {
213        self.path.join(relative_file.as_ref())
214    }
215
216    /// Create or update (touch) a file under this group, creating parent
217    /// directories as needed. Returns the absolute path to the entry.
218    pub fn touch<P: AsRef<Path>>(&self, relative_file: P) -> io::Result<PathBuf> {
219        let entry = self.entry_path(relative_file);
220        if let Some(parent) = entry.parent() {
221            fs::create_dir_all(parent)?;
222        }
223        OpenOptions::new().create(true).append(true).open(&entry)?;
224        Ok(entry)
225    }
226}
227
228fn find_crate_root(start: &Path) -> Option<PathBuf> {
229    let mut current = start.to_path_buf();
230    loop {
231        if current.join("Cargo.toml").is_file() {
232            return Some(current);
233        }
234        if !current.pop() {
235            return None;
236        }
237    }
238}
239
240fn apply_evict_policy(root: &Path, policy: &EvictPolicy) -> io::Result<()> {
241    let report = build_eviction_report(root, policy)?;
242
243    for path in report.marked_for_eviction {
244        let _ = fs::remove_file(path);
245    }
246
247    Ok(())
248}
249
250fn sort_entries_oldest_first(entries: &mut [FileEntry]) {
251    entries.sort_by(|a, b| {
252        let ta = a
253            .modified
254            .duration_since(UNIX_EPOCH)
255            .unwrap_or(Duration::ZERO);
256        let tb = b
257            .modified
258            .duration_since(UNIX_EPOCH)
259            .unwrap_or(Duration::ZERO);
260        ta.cmp(&tb).then_with(|| a.path.cmp(&b.path))
261    });
262}
263
264fn build_eviction_report(root: &Path, policy: &EvictPolicy) -> io::Result<EvictionReport> {
265    let mut entries = collect_files(root)?;
266    let mut marked_for_eviction = Vec::new();
267
268    if let Some(max_age) = policy.max_age {
269        let now = SystemTime::now();
270        let mut survivors = Vec::with_capacity(entries.len());
271        for entry in entries {
272            let age = now.duration_since(entry.modified).unwrap_or(Duration::ZERO);
273            if age >= max_age {
274                marked_for_eviction.push(entry.path);
275            } else {
276                survivors.push(entry);
277            }
278        }
279        entries = survivors;
280    }
281
282    sort_entries_oldest_first(&mut entries);
283
284    if let Some(max_files) = policy.max_files
285        && entries.len() > max_files
286    {
287        let to_remove = entries.len() - max_files;
288        for entry in entries.iter().take(to_remove) {
289            marked_for_eviction.push(entry.path.clone());
290        }
291        entries = entries.into_iter().skip(to_remove).collect();
292        sort_entries_oldest_first(&mut entries);
293    }
294
295    if let Some(max_bytes) = policy.max_bytes {
296        let mut total: u64 = entries.iter().map(|e| e.len).sum();
297        if total > max_bytes {
298            for entry in &entries {
299                if total <= max_bytes {
300                    break;
301                }
302                marked_for_eviction.push(entry.path.clone());
303                total = total.saturating_sub(entry.len);
304            }
305        }
306    }
307
308    Ok(EvictionReport {
309        marked_for_eviction,
310    })
311}
312
313fn collect_files(root: &Path) -> io::Result<Vec<FileEntry>> {
314    let mut out = Vec::new();
315    collect_files_recursive(root, &mut out)?;
316    Ok(out)
317}
318
319fn collect_files_recursive(dir: &Path, out: &mut Vec<FileEntry>) -> io::Result<()> {
320    for entry in fs::read_dir(dir)? {
321        let entry = entry?;
322        let path = entry.path();
323        let meta = entry.metadata()?;
324        if meta.is_dir() {
325            collect_files_recursive(&path, out)?;
326        } else if meta.is_file() {
327            out.push(FileEntry {
328                path,
329                modified: meta.modified().unwrap_or(UNIX_EPOCH),
330                len: meta.len(),
331            });
332        }
333    }
334    Ok(())
335}
336
337#[cfg(test)]
338mod tests {
339    use super::*;
340    use std::collections::BTreeSet;
341    use tempfile::TempDir;
342
343    struct CwdGuard {
344        previous: PathBuf,
345    }
346
347    impl CwdGuard {
348        fn swap_to(path: &Path) -> io::Result<Self> {
349            let previous = env::current_dir()?;
350            env::set_current_dir(path)?;
351            Ok(Self { previous })
352        }
353    }
354
355    impl Drop for CwdGuard {
356        fn drop(&mut self) {
357            let _ = env::set_current_dir(&self.previous);
358        }
359    }
360
361    #[test]
362    fn discover_falls_back_to_cwd_when_no_cargo_toml() {
363        let tmp = TempDir::new().expect("tempdir");
364        let _guard = CwdGuard::swap_to(tmp.path()).expect("set cwd");
365
366        let cache = CacheRoot::discover().expect("discover");
367        let got = cache
368            .path()
369            .canonicalize()
370            .expect("canonicalize discovered root");
371        let expected = tmp.path().canonicalize().expect("canonicalize temp path");
372        assert_eq!(got, expected);
373    }
374
375    #[test]
376    fn discover_prefers_nearest_crate_root() {
377        let tmp = TempDir::new().expect("tempdir");
378        let crate_root = tmp.path().join("workspace");
379        let nested = crate_root.join("src").join("nested");
380        fs::create_dir_all(&nested).expect("create nested");
381        fs::write(
382            crate_root.join("Cargo.toml"),
383            "[package]\nname='x'\nversion='0.1.0'\nedition='2024'\n",
384        )
385        .expect("write cargo");
386
387        let _guard = CwdGuard::swap_to(&nested).expect("set cwd");
388        let cache = CacheRoot::discover().expect("discover");
389        let got = cache
390            .path()
391            .canonicalize()
392            .expect("canonicalize discovered root");
393        let expected = crate_root.canonicalize().expect("canonicalize crate root");
394        assert_eq!(got, expected);
395    }
396
397    #[test]
398    fn from_root_supports_arbitrary_path_and_grouping() {
399        let tmp = TempDir::new().expect("tempdir");
400        let root = CacheRoot::from_root(tmp.path().join("custom-cache-root"));
401        let group = root.group("taxonomy/v1");
402
403        assert_eq!(group.path(), root.path().join("taxonomy/v1").as_path());
404    }
405
406    #[test]
407    fn group_path_building_and_dir_creation() {
408        let tmp = TempDir::new().expect("tempdir");
409        let cache = CacheRoot::from_root(tmp.path());
410        let group = cache.group("artifacts/json");
411
412        let nested_group = group.subgroup("v1");
413        let ensured = nested_group.ensure_dir().expect("ensure nested dir");
414        let expected_group_suffix = Path::new("artifacts").join("json").join("v1");
415        assert!(ensured.ends_with(&expected_group_suffix));
416        assert!(ensured.exists());
417
418        let entry = nested_group.entry_path("a/b/cache.json");
419        let expected_entry_suffix = Path::new("artifacts")
420            .join("json")
421            .join("v1")
422            .join("a")
423            .join("b")
424            .join("cache.json");
425        assert!(entry.ends_with(&expected_entry_suffix));
426    }
427
428    #[test]
429    fn touch_creates_blank_file_and_is_idempotent() {
430        let tmp = TempDir::new().expect("tempdir");
431        let cache = CacheRoot::from_root(tmp.path());
432        let group = cache.group("artifacts/json");
433
434        let touched = group.touch("a/b/cache.json").expect("touch file");
435        assert!(touched.exists());
436        let meta = fs::metadata(&touched).expect("metadata");
437        assert_eq!(meta.len(), 0);
438
439        let touched_again = group.touch("a/b/cache.json").expect("touch file again");
440        assert_eq!(touched_again, touched);
441        let meta_again = fs::metadata(&touched_again).expect("metadata again");
442        assert_eq!(meta_again.len(), 0);
443    }
444
445    #[test]
446    fn discover_cache_path_uses_root_and_group() {
447        let tmp = TempDir::new().expect("tempdir");
448        let crate_root = tmp.path().join("workspace");
449        let nested = crate_root.join("src").join("nested");
450        fs::create_dir_all(&nested).expect("create nested");
451        fs::write(
452            crate_root.join("Cargo.toml"),
453            "[package]\nname='x'\nversion='0.1.0'\nedition='2024'\n",
454        )
455        .expect("write cargo");
456
457        let _guard = CwdGuard::swap_to(&nested).expect("set cwd");
458        let p = CacheRoot::discover_cache_path(".cache", "taxonomy/taxonomy_cache.json");
459        let parent = p.parent().expect("cache path parent");
460        fs::create_dir_all(parent).expect("create cache parent");
461        let got_parent = p
462            .parent()
463            .expect("cache path parent")
464            .canonicalize()
465            .expect("canonicalize cache parent");
466        let expected_parent = crate_root
467            .join(".cache")
468            .join("taxonomy")
469            .canonicalize()
470            .expect("canonicalize expected parent");
471        assert_eq!(got_parent, expected_parent);
472        assert_eq!(
473            p.file_name().and_then(|s| s.to_str()),
474            Some("taxonomy_cache.json")
475        );
476    }
477
478    #[test]
479    fn cache_path_preserves_absolute_paths() {
480        let root = CacheRoot::from_root("/tmp/project");
481        let absolute = PathBuf::from("/tmp/custom/cache.json");
482        let resolved = root.cache_path(".cache", &absolute);
483        assert_eq!(resolved, absolute);
484    }
485
486    #[test]
487    fn ensure_dir_with_policy_max_files() {
488        let tmp = TempDir::new().expect("tempdir");
489        let cache = CacheRoot::from_root(tmp.path());
490        let group = cache.group("artifacts");
491        group.ensure_dir().expect("ensure dir");
492
493        fs::write(group.entry_path("a.txt"), b"1").expect("write a");
494        fs::write(group.entry_path("b.txt"), b"1").expect("write b");
495        fs::write(group.entry_path("c.txt"), b"1").expect("write c");
496
497        let policy = EvictPolicy {
498            max_files: Some(2),
499            ..EvictPolicy::default()
500        };
501        group
502            .ensure_dir_with_policy(Some(&policy))
503            .expect("ensure with policy");
504
505        let files = collect_files(group.path()).expect("collect files");
506        assert_eq!(files.len(), 2);
507    }
508
509    #[test]
510    fn ensure_dir_with_policy_max_bytes() {
511        let tmp = TempDir::new().expect("tempdir");
512        let cache = CacheRoot::from_root(tmp.path());
513        let group = cache.group("artifacts");
514        group.ensure_dir().expect("ensure dir");
515
516        fs::write(group.entry_path("a.bin"), vec![1u8; 5]).expect("write a");
517        fs::write(group.entry_path("b.bin"), vec![1u8; 5]).expect("write b");
518        fs::write(group.entry_path("c.bin"), vec![1u8; 5]).expect("write c");
519
520        let policy = EvictPolicy {
521            max_bytes: Some(10),
522            ..EvictPolicy::default()
523        };
524        group
525            .ensure_dir_with_policy(Some(&policy))
526            .expect("ensure with policy");
527
528        let total: u64 = collect_files(group.path())
529            .expect("collect files")
530            .iter()
531            .map(|f| f.len)
532            .sum();
533        assert!(total <= 10);
534    }
535
536    #[test]
537    fn ensure_dir_with_policy_max_age_zero_evicts_all() {
538        let tmp = TempDir::new().expect("tempdir");
539        let cache = CacheRoot::from_root(tmp.path());
540        let group = cache.group("artifacts");
541        group.ensure_dir().expect("ensure dir");
542
543        fs::write(group.entry_path("a.txt"), b"1").expect("write a");
544        fs::write(group.entry_path("b.txt"), b"1").expect("write b");
545
546        let policy = EvictPolicy {
547            max_age: Some(Duration::ZERO),
548            ..EvictPolicy::default()
549        };
550        group
551            .ensure_dir_with_policy(Some(&policy))
552            .expect("ensure with policy");
553
554        let files = collect_files(group.path()).expect("collect files");
555        assert!(files.is_empty());
556    }
557
558    #[test]
559    fn eviction_report_matches_applied_evictions() {
560        let tmp = TempDir::new().expect("tempdir");
561        let cache = CacheRoot::from_root(tmp.path());
562        let group = cache.group("artifacts");
563        group.ensure_dir().expect("ensure dir");
564
565        fs::write(group.entry_path("a.bin"), vec![1u8; 5]).expect("write a");
566        fs::write(group.entry_path("b.bin"), vec![1u8; 5]).expect("write b");
567        fs::write(group.entry_path("c.bin"), vec![1u8; 5]).expect("write c");
568
569        let policy = EvictPolicy {
570            max_bytes: Some(10),
571            ..EvictPolicy::default()
572        };
573
574        let before: BTreeSet<PathBuf> = collect_files(group.path())
575            .expect("collect before")
576            .into_iter()
577            .map(|f| f.path)
578            .collect();
579
580        let report = group.eviction_report(&policy).expect("eviction report");
581        let planned: BTreeSet<PathBuf> = report.marked_for_eviction.iter().cloned().collect();
582
583        group
584            .ensure_dir_with_policy(Some(&policy))
585            .expect("ensure with policy");
586
587        let after: BTreeSet<PathBuf> = collect_files(group.path())
588            .expect("collect after")
589            .into_iter()
590            .map(|f| f.path)
591            .collect();
592
593        let expected_after: BTreeSet<PathBuf> = before.difference(&planned).cloned().collect();
594        assert_eq!(after, expected_after);
595    }
596
597    #[test]
598    fn no_policy_and_default_policy_report_do_not_mark_evictions() {
599        let tmp = TempDir::new().expect("tempdir");
600        let cache = CacheRoot::from_root(tmp.path());
601        let group = cache.group("artifacts");
602        group.ensure_dir().expect("ensure dir");
603
604        fs::write(group.entry_path("a.txt"), b"1").expect("write a");
605        fs::write(group.entry_path("b.txt"), b"1").expect("write b");
606
607        let report = group
608            .eviction_report(&EvictPolicy::default())
609            .expect("eviction report");
610        assert!(report.marked_for_eviction.is_empty());
611
612        group
613            .ensure_dir_with_policy(None)
614            .expect("ensure with no policy");
615
616        let files = collect_files(group.path()).expect("collect files");
617        assert_eq!(files.len(), 2);
618    }
619
620    #[test]
621    fn single_root_supports_distinct_policies_per_subdirectory() {
622        let tmp = TempDir::new().expect("tempdir");
623        let cache = CacheRoot::from_root(tmp.path());
624
625        let images = cache.group("artifacts/images");
626        let reports = cache.group("artifacts/reports");
627
628        images.ensure_dir().expect("ensure images dir");
629        reports.ensure_dir().expect("ensure reports dir");
630
631        fs::write(images.entry_path("img1.bin"), vec![1u8; 5]).expect("write img1");
632        fs::write(images.entry_path("img2.bin"), vec![1u8; 5]).expect("write img2");
633        fs::write(images.entry_path("img3.bin"), vec![1u8; 5]).expect("write img3");
634
635        fs::write(reports.entry_path("a.txt"), b"1").expect("write report a");
636        fs::write(reports.entry_path("b.txt"), b"1").expect("write report b");
637        fs::write(reports.entry_path("c.txt"), b"1").expect("write report c");
638
639        let images_policy = EvictPolicy {
640            max_bytes: Some(10),
641            ..EvictPolicy::default()
642        };
643        let reports_policy = EvictPolicy {
644            max_files: Some(1),
645            ..EvictPolicy::default()
646        };
647
648        images
649            .ensure_dir_with_policy(Some(&images_policy))
650            .expect("apply images policy");
651        reports
652            .ensure_dir_with_policy(Some(&reports_policy))
653            .expect("apply reports policy");
654
655        let images_total: u64 = collect_files(images.path())
656            .expect("collect images files")
657            .iter()
658            .map(|f| f.len)
659            .sum();
660        assert!(images_total <= 10);
661
662        let reports_files = collect_files(reports.path()).expect("collect reports files");
663        assert_eq!(reports_files.len(), 1);
664    }
665}