Skip to main content

haz_cache/
clean.rs

1//! Cache invalidation per `CACHE-021` and `AUX-022..AUX-027`.
2//!
3//! Two entry points:
4//!
5//! - [`Cache::clear`] is the unconditional reset behind
6//!   `haz cache clear`: it removes the entire cache root in a
7//!   single recursive call. Subsequent lookups against the same
8//!   workspace are misses until new entries are stored.
9//! - [`Cache::clean`] is the composable selective reclamation
10//!   behind `haz cache clean`. It accepts a [`CleanOptions`] mode
11//!   set and applies the modes in spec-mandated priority order:
12//!     1. `soft` (`CACHE-022`): reclaim every *objectively
13//!        stale* artefact, entry directories with a missing,
14//!        unparseable, or schema-mismatched manifest plus
15//!        `.tmp-<key>-<random>` store-time directories and
16//!        `.restore-<key>-<random>` restore-time staging
17//!        directories.
18//!     2. `max_age` (`AUX-023` step 4): evict every remaining
19//!        well-formed entry whose `created_at` is strictly older
20//!        than `now_unix - max_age`.
21//!     3. `max_size` (`AUX-023` step 5): if the well-formed
22//!        survivors' footprint exceeds `max_size`, evict
23//!        oldest-`created_at`-first until the residual footprint
24//!        is at or below `max_size`.
25//!     4. `dry_run` (`AUX-024`): compute the eviction set but
26//!        make no on-disk changes.
27//!
28//! Per `AUX-024`, when more than one mode would name the same
29//! entry, the entry counts in the highest-priority mode
30//! (`soft` > `max_age` > `max_size`). [`CleanReport::evicted_entries`]
31//! carries one [`EvictedEntry`] per evicted entry, labelled with
32//! its priority mode.
33//!
34//! Both methods are idempotent on an absent cache root: calling
35//! them when `<workspace>/.haz/cache` does not exist is a no-op,
36//! not an error.
37
38use std::path::{Path, PathBuf};
39
40use haz_domain::settings::cache_clean::max_age::MaxAge;
41use haz_domain::settings::cache_clean::max_size::MaxSize;
42use haz_vfs::{EntryKind, FsError, WritableFilesystem};
43use snafu::{ResultExt, Snafu};
44
45use crate::cache::Cache;
46use crate::layout;
47use crate::manifest::{HashFunctionLabel, Manifest};
48
49/// Failure modes shared by [`Cache::clear`] and [`Cache::clean`].
50#[derive(Debug, Snafu)]
51pub enum CleanError {
52    /// Underlying filesystem error during the walk or removal.
53    /// The wrapped [`FsError`] carries the specific path.
54    #[snafu(display("filesystem error during cache invalidation: {source}"))]
55    Io {
56        /// The originating filesystem error.
57        source: FsError,
58    },
59}
60
61/// Composable mode flags for [`Cache::clean`] per `AUX-022`.
62///
63/// At least one of `soft`, `max_age`, `max_size` MUST be supplied
64/// for the call to remove anything; the CLI layer enforces the
65/// "must supply a mode" rule per `AUX-022`, so a [`Cache::clean`]
66/// call with all-false / all-`None` mode fields is a well-defined
67/// no-op (the report shows zero counts).
68#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
69pub struct CleanOptions {
70    /// `CACHE-022` / `--soft`: reclaim objectively-stale state
71    /// (manifest missing, unparseable, or schema-mismatched, plus
72    /// orphan `.tmp-` and `.restore-` staging directories).
73    pub soft: bool,
74    /// `AUX-022` / `--max-age <DURATION>`: evict well-formed
75    /// entries whose `created_at` is strictly older than
76    /// `now_unix - max_age`. The cutoff is computed via
77    /// [`u64::saturating_sub`], so a workspace clock that has not
78    /// advanced past the threshold yields an empty eviction set
79    /// rather than an overflow.
80    pub max_age: Option<MaxAge>,
81    /// `AUX-022` / `--max-size <BYTES>`: after `soft` and
82    /// `max_age` have run, evict oldest-`created_at`-first
83    /// well-formed survivors until the residual footprint is at
84    /// or below `max_size`.
85    pub max_size: Option<MaxSize>,
86    /// `AUX-024` / `--dry-run`: compute the eviction set but DO
87    /// NOT remove anything from disk. The returned [`CleanReport`]
88    /// mirrors the non-dry-run report exactly, modulo the absence
89    /// of on-disk changes.
90    pub dry_run: bool,
91    /// Reference "now" in Unix seconds since the epoch. Used as
92    /// the right-hand side of the `AUX-023` step 4 cutoff
93    /// (`now_unix - max_age`). Required to be non-zero when
94    /// `max_age` is set; ignored otherwise. The injected value
95    /// makes the operation deterministic under test.
96    pub now_unix: u64,
97}
98
99/// Mode that accounted for evicting a given entry per `AUX-024`.
100///
101/// When more than one mode would match the same entry, the
102/// highest-priority mode wins (in this declaration order).
103#[derive(Debug, Clone, Copy, PartialEq, Eq)]
104pub enum EvictionMode {
105    /// `CACHE-022` `--soft` pass: schema-stale or incomplete
106    /// entry.
107    Soft,
108    /// `AUX-023` `--max-age` pass: well-formed but too old.
109    MaxAge,
110    /// `AUX-023` `--max-size` pass: well-formed but oldest among
111    /// survivors of the previous passes when the residual
112    /// footprint exceeded the bound.
113    MaxSize,
114}
115
116/// Per-entry eviction detail surfaced for `AUX-024` dry-run
117/// rendering.
118///
119/// One instance per evicted entry; the CLI displays the list
120/// only under `--dry-run` per `AUX-024`, but the cache layer
121/// always populates it (the cost is one `Vec` allocation of
122/// modest size, and the audit-friendliness is worth it).
123#[derive(Debug, Clone, PartialEq, Eq)]
124pub struct EvictedEntry {
125    /// First eight hex characters of the entry's cache key, per
126    /// `AUX-024` ("first 8 hex characters suffice for
127    /// human-readable output"). Sourced from the entry
128    /// directory's basename when the manifest is unparseable.
129    pub key_hex_prefix: String,
130    /// Manifest's `created_at_unix` field when available; zero
131    /// when the manifest is missing or unparseable.
132    pub created_at_unix: u64,
133    /// Manifest-declared footprint of the entry (`stdout_len +
134    /// stderr_len + sum(outputs[].size)`). Zero when the manifest
135    /// is unavailable.
136    pub footprint: u64,
137    /// Priority mode that accounted for this entry's eviction.
138    pub matched_mode: EvictionMode,
139}
140
141/// Outcome of [`Cache::clean`] per `AUX-024`.
142///
143/// All counts are zero on an absent cache root. Per-mode counts
144/// follow the `AUX-024` priority rule (`soft` > `max_age` >
145/// `max_size`): an entry never contributes to more than one
146/// per-mode counter.
147#[derive(Debug, Default, Clone, PartialEq, Eq)]
148pub struct CleanReport {
149    /// `AUX-024` step 1: total number of entry directories the
150    /// walk looked at (well-formed + corrupt). Excludes
151    /// `.tmp-`/`.restore-` reclaimables.
152    pub inspected: u64,
153    /// `AUX-024` step 2: entries evicted under the `--soft` mode.
154    pub evicted_by_soft: u64,
155    /// `AUX-024` step 2: entries evicted under the `--max-age`
156    /// mode.
157    pub evicted_by_max_age: u64,
158    /// `AUX-024` step 2: entries evicted under the `--max-size`
159    /// mode.
160    pub evicted_by_max_size: u64,
161    /// `CACHE-022`: `.tmp-<key>-<random>` directories reclaimed.
162    /// Always zero when `soft` is false.
163    pub removed_tmp_dirs: u64,
164    /// `CACHE-022`: `.restore-<key>-<random>` directories
165    /// reclaimed. Always zero when `soft` is false.
166    pub removed_restore_dirs: u64,
167    /// `AUX-024` step 3: total bytes reclaimed (or projected
168    /// under `--dry-run`). Sums each evicted entry's
169    /// manifest-declared footprint; entries with no parseable
170    /// manifest contribute zero. `.tmp-`/`.restore-` reclamations
171    /// contribute zero.
172    pub bytes_reclaimed: u64,
173    /// One detail per evicted entry per `AUX-024`'s dry-run
174    /// requirement (always populated regardless of `dry_run`).
175    /// Sorted by `(matched_mode priority, created_at, key)` for
176    /// determinism.
177    pub evicted_entries: Vec<EvictedEntry>,
178}
179
180impl<Fs: WritableFilesystem> Cache<Fs> {
181    /// Remove every cache entry under
182    /// `<workspace_root>/.haz/cache`, per `CACHE-021`.
183    ///
184    /// Idempotent on an absent cache root: calling `clear` when
185    /// the cache tree does not exist returns `Ok(())`, not an
186    /// error. The implementation only touches paths under the
187    /// cache root; the rest of the workspace is left alone.
188    ///
189    /// # Errors
190    ///
191    /// Returns [`CleanError::Io`] wrapping the underlying
192    /// [`FsError`] if the recursive removal fails for any reason
193    /// other than "the cache root did not exist".
194    pub fn clear(&self) -> Result<(), CleanError> {
195        match self.fs().remove_dir_all(self.cache_root()) {
196            Ok(()) | Err(FsError::NotFound { .. }) => Ok(()),
197            Err(e) => Err(CleanError::Io { source: e }),
198        }
199    }
200
201    /// Walk the cache root and reclaim every artefact named by the
202    /// `CleanOptions` mode flags per `AUX-022..AUX-027`.
203    ///
204    /// Eviction priority follows `AUX-024` (`soft` > `max_age` >
205    /// `max_size`); each entry contributes to exactly one
206    /// per-mode count. Under `dry_run`, the eviction set is
207    /// computed and reported but no file or directory is removed.
208    ///
209    /// Idempotent on an absent cache root.
210    ///
211    /// # Errors
212    ///
213    /// Returns [`CleanError::Io`] wrapping the underlying
214    /// [`FsError`] if any filesystem operation along the walk
215    /// fails. Unparseable manifests are NOT an error: those
216    /// entries surface as the `soft`-eligible "objectively stale"
217    /// case.
218    pub fn clean(&self, opts: &CleanOptions) -> Result<CleanReport, CleanError> {
219        let Some(enumerated) = self.enumerate_for_clean()? else {
220            return Ok(CleanReport::default());
221        };
222        let CleanEnumeration {
223            well_formed,
224            corrupt,
225            tmp_paths,
226            restore_paths,
227        } = enumerated;
228
229        let mut report = CleanReport {
230            inspected: (well_formed.len() + corrupt.len()) as u64,
231            ..CleanReport::default()
232        };
233
234        let mut plan: Vec<PlannedEviction> = Vec::new();
235        apply_soft_pass(opts, corrupt, &mut plan, &mut report);
236        let survivors = apply_max_age_pass(opts, well_formed, &mut plan, &mut report);
237        apply_max_size_pass(opts, survivors, &mut plan, &mut report);
238
239        if !opts.dry_run {
240            for planned in &plan {
241                self.fs().remove_dir_all(&planned.path).context(IoSnafu)?;
242            }
243        }
244        report.bytes_reclaimed = plan.iter().map(|p| p.detail.footprint).sum();
245
246        if opts.soft {
247            report.removed_tmp_dirs = tmp_paths.len() as u64;
248            report.removed_restore_dirs = restore_paths.len() as u64;
249            if !opts.dry_run {
250                for p in &tmp_paths {
251                    self.fs().remove_dir_all(p).context(IoSnafu)?;
252                }
253                for p in &restore_paths {
254                    self.fs().remove_dir_all(p).context(IoSnafu)?;
255                }
256            }
257        }
258
259        report.evicted_entries = finalize_evicted_entries(plan);
260        Ok(report)
261    }
262
263    fn enumerate_for_clean(&self) -> Result<Option<CleanEnumeration>, CleanError> {
264        let cache_entries = match self.fs().read_dir(self.cache_root()) {
265            Ok(es) => es,
266            Err(FsError::NotFound { .. }) => return Ok(None),
267            Err(e) => return Err(CleanError::Io { source: e }),
268        };
269        let mut e = CleanEnumeration::default();
270        for cache_entry in cache_entries {
271            let name = cache_entry
272                .path
273                .file_name()
274                .map(|n| n.to_string_lossy().into_owned())
275                .unwrap_or_default();
276
277            if name.starts_with(".restore-") {
278                e.restore_paths.push(cache_entry.path);
279                continue;
280            }
281            if cache_entry.metadata.kind != EntryKind::Dir {
282                continue;
283            }
284            self.clean_classify_shard(
285                &cache_entry.path,
286                &mut e.well_formed,
287                &mut e.corrupt,
288                &mut e.tmp_paths,
289            )?;
290        }
291        Ok(Some(e))
292    }
293
294    fn clean_classify_shard(
295        &self,
296        shard_dir: &Path,
297        well_formed: &mut Vec<EntryRecord>,
298        corrupt: &mut Vec<EntryRecord>,
299        tmp_paths: &mut Vec<PathBuf>,
300    ) -> Result<(), CleanError> {
301        let shard_entries = self.fs().read_dir(shard_dir).context(IoSnafu)?;
302        for shard_entry in shard_entries {
303            let sname = shard_entry
304                .path
305                .file_name()
306                .map(|n| n.to_string_lossy().into_owned())
307                .unwrap_or_default();
308
309            if sname.starts_with(".tmp-") {
310                tmp_paths.push(shard_entry.path);
311                continue;
312            }
313
314            if shard_entry.metadata.kind != EntryKind::Dir {
315                continue;
316            }
317
318            self.clean_classify_entry(&shard_entry.path, &sname, well_formed, corrupt)?;
319        }
320        Ok(())
321    }
322
323    fn clean_classify_entry(
324        &self,
325        entry_dir: &Path,
326        basename: &str,
327        well_formed: &mut Vec<EntryRecord>,
328        corrupt: &mut Vec<EntryRecord>,
329    ) -> Result<(), CleanError> {
330        let key_hex_prefix: String = basename.chars().take(8).collect();
331        let manifest_path = entry_dir.join(layout::MANIFEST_FILE_NAME);
332        let bytes = match self.fs().read(&manifest_path) {
333            Ok(b) => b,
334            Err(FsError::NotFound { .. } | FsError::NotAFile { .. }) => {
335                corrupt.push(EntryRecord {
336                    path: entry_dir.to_path_buf(),
337                    key_hex_prefix,
338                    created_at_unix: 0,
339                    footprint: 0,
340                });
341                return Ok(());
342            }
343            Err(e) => return Err(CleanError::Io { source: e }),
344        };
345        let Ok(manifest) = Manifest::from_json(&bytes) else {
346            corrupt.push(EntryRecord {
347                path: entry_dir.to_path_buf(),
348                key_hex_prefix,
349                created_at_unix: 0,
350                footprint: 0,
351            });
352            return Ok(());
353        };
354        let chapter_ok = manifest.current_chapter_revision_matches();
355        let hash_ok = HashFunctionLabel::from(self.hash_algo()) == manifest.hash_function;
356        let footprint = manifest_footprint(&manifest);
357        let record = EntryRecord {
358            path: entry_dir.to_path_buf(),
359            key_hex_prefix,
360            created_at_unix: manifest.created_at_unix,
361            footprint,
362        };
363        if chapter_ok && hash_ok {
364            well_formed.push(record);
365        } else {
366            corrupt.push(record);
367        }
368        Ok(())
369    }
370}
371
372struct EntryRecord {
373    path: PathBuf,
374    key_hex_prefix: String,
375    created_at_unix: u64,
376    footprint: u64,
377}
378
379struct PlannedEviction {
380    path: PathBuf,
381    detail: EvictedEntry,
382}
383
384#[derive(Default)]
385struct CleanEnumeration {
386    well_formed: Vec<EntryRecord>,
387    corrupt: Vec<EntryRecord>,
388    tmp_paths: Vec<PathBuf>,
389    restore_paths: Vec<PathBuf>,
390}
391
392fn apply_soft_pass(
393    opts: &CleanOptions,
394    corrupt: Vec<EntryRecord>,
395    plan: &mut Vec<PlannedEviction>,
396    report: &mut CleanReport,
397) {
398    if !opts.soft {
399        return;
400    }
401    let soft_count = corrupt.len() as u64;
402    for c in corrupt {
403        plan.push(PlannedEviction {
404            path: c.path,
405            detail: EvictedEntry {
406                key_hex_prefix: c.key_hex_prefix,
407                created_at_unix: c.created_at_unix,
408                footprint: c.footprint,
409                matched_mode: EvictionMode::Soft,
410            },
411        });
412    }
413    report.evicted_by_soft = soft_count;
414}
415
416fn apply_max_age_pass(
417    opts: &CleanOptions,
418    well_formed: Vec<EntryRecord>,
419    plan: &mut Vec<PlannedEviction>,
420    report: &mut CleanReport,
421) -> Vec<EntryRecord> {
422    let Some(max_age) = opts.max_age else {
423        return well_formed;
424    };
425    let cutoff = opts
426        .now_unix
427        .saturating_sub(max_age.as_duration().as_secs());
428    let mut survivors: Vec<EntryRecord> = Vec::with_capacity(well_formed.len());
429    let mut evicted: u64 = 0;
430    for wf in well_formed {
431        if wf.created_at_unix < cutoff {
432            plan.push(PlannedEviction {
433                path: wf.path.clone(),
434                detail: EvictedEntry {
435                    key_hex_prefix: wf.key_hex_prefix.clone(),
436                    created_at_unix: wf.created_at_unix,
437                    footprint: wf.footprint,
438                    matched_mode: EvictionMode::MaxAge,
439                },
440            });
441            evicted += 1;
442        } else {
443            survivors.push(wf);
444        }
445    }
446    report.evicted_by_max_age = evicted;
447    survivors
448}
449
450fn apply_max_size_pass(
451    opts: &CleanOptions,
452    mut survivors: Vec<EntryRecord>,
453    plan: &mut Vec<PlannedEviction>,
454    report: &mut CleanReport,
455) {
456    let Some(max_size) = opts.max_size else {
457        return;
458    };
459    let limit = max_size.as_bytes();
460    let total: u64 = survivors.iter().map(|e| e.footprint).sum();
461    if total <= limit {
462        return;
463    }
464    survivors.sort_by(|a, b| {
465        a.created_at_unix
466            .cmp(&b.created_at_unix)
467            .then_with(|| a.key_hex_prefix.cmp(&b.key_hex_prefix))
468    });
469    let mut remaining = total;
470    let mut evicted: u64 = 0;
471    for wf in &survivors {
472        if remaining <= limit {
473            break;
474        }
475        plan.push(PlannedEviction {
476            path: wf.path.clone(),
477            detail: EvictedEntry {
478                key_hex_prefix: wf.key_hex_prefix.clone(),
479                created_at_unix: wf.created_at_unix,
480                footprint: wf.footprint,
481                matched_mode: EvictionMode::MaxSize,
482            },
483        });
484        remaining = remaining.saturating_sub(wf.footprint);
485        evicted += 1;
486    }
487    report.evicted_by_max_size = evicted;
488}
489
490fn finalize_evicted_entries(plan: Vec<PlannedEviction>) -> Vec<EvictedEntry> {
491    let mut details: Vec<EvictedEntry> = plan.into_iter().map(|p| p.detail).collect();
492    details.sort_by(|a, b| {
493        mode_rank(a.matched_mode)
494            .cmp(&mode_rank(b.matched_mode))
495            .then(a.created_at_unix.cmp(&b.created_at_unix))
496            .then_with(|| a.key_hex_prefix.cmp(&b.key_hex_prefix))
497    });
498    details
499}
500
501fn manifest_footprint(m: &Manifest) -> u64 {
502    let mut total = m.stdout_len.saturating_add(m.stderr_len);
503    for o in &m.outputs {
504        total = total.saturating_add(o.size);
505    }
506    total
507}
508
509const fn mode_rank(m: EvictionMode) -> u8 {
510    match m {
511        EvictionMode::Soft => 0,
512        EvictionMode::MaxAge => 1,
513        EvictionMode::MaxSize => 2,
514    }
515}
516
517#[cfg(test)]
518mod tests {
519    use std::path::Path;
520
521    use haz_domain::path::CanonicalPath;
522    use haz_domain::settings::cache::HashAlgo;
523    use haz_domain::settings::cache_clean::max_age::MaxAge;
524    use haz_domain::settings::cache_clean::max_size::MaxSize;
525    use haz_vfs::{Filesystem, MemFilesystem, WritableFilesystem};
526
527    use crate::cache::Cache;
528    use crate::clean::{CleanOptions, CleanReport, EvictionMode};
529    use crate::key::CacheKey;
530    use crate::key::prefix::CHAPTER_REVISION;
531    use crate::layout;
532    use crate::manifest::{HashFunctionLabel, Manifest, OutputBlob};
533    use crate::store::{StoreInputs, StoredOutput};
534
535    fn cp(s: &str) -> CanonicalPath {
536        CanonicalPath::parse_workspace_absolute(s)
537            .expect("test helper expects a valid workspace-absolute path")
538    }
539
540    const WORKSPACE_ROOT: &str = "/ws";
541
542    fn make_cache(fs: MemFilesystem, algo: HashAlgo) -> Cache<MemFilesystem> {
543        Cache::new(fs, Path::new(WORKSPACE_ROOT), algo)
544    }
545
546    fn key_with_first_byte(first: u8) -> CacheKey {
547        let mut bytes = [0u8; 32];
548        bytes[0] = first;
549        CacheKey::from_bytes(bytes)
550    }
551
552    fn store_entry_at(
553        cache: &Cache<MemFilesystem>,
554        key: &CacheKey,
555        rel: &str,
556        bytes: &[u8],
557        created_at_unix: u64,
558    ) {
559        let target = Path::new(WORKSPACE_ROOT).join(rel);
560        let anchored = format!("/{rel}");
561        cache.fs().create_dir_all(target.parent().unwrap()).unwrap();
562        cache.fs().write_file(&target, bytes).unwrap();
563        let outs = [StoredOutput {
564            workspace_absolute_path: &anchored,
565            on_disk_path: &target,
566            mode: 0o644,
567        }];
568        cache
569            .store(
570                key,
571                &StoreInputs {
572                    outputs: &outs,
573                    stdout: b"",
574                    stderr: b"",
575                    created_at_unix,
576                },
577            )
578            .unwrap();
579    }
580
581    fn store_a_valid_entry(cache: &Cache<MemFilesystem>, key: &CacheKey, rel: &str, bytes: &[u8]) {
582        store_entry_at(cache, key, rel, bytes, 0);
583    }
584
585    fn write_manifest_to_entry(cache: &Cache<MemFilesystem>, key: &CacheKey, manifest: &Manifest) {
586        cache
587            .fs()
588            .create_dir_all(&layout::entry_dir(cache.cache_root(), key))
589            .unwrap();
590        cache
591            .fs()
592            .write_file(
593                &layout::manifest_path(cache.cache_root(), key),
594                &manifest.to_json_bytes(),
595            )
596            .unwrap();
597    }
598
599    fn soft_only() -> CleanOptions {
600        CleanOptions {
601            soft: true,
602            ..Default::default()
603        }
604    }
605
606    // ---- clear ----
607
608    #[test]
609    fn cache_021_clear_empties_a_populated_cache() {
610        let mut fs = MemFilesystem::new();
611        fs.add_dir("/ws").unwrap();
612        let cache = make_cache(fs, HashAlgo::Blake3);
613        let key = key_with_first_byte(0xAB);
614        store_a_valid_entry(&cache, &key, "proj/out", b"x");
615
616        assert!(cache.lookup(&key).is_some(), "precondition: entry present");
617        cache.clear().unwrap();
618        assert!(
619            cache.lookup(&key).is_none(),
620            "lookup must be a miss after clear"
621        );
622    }
623
624    #[test]
625    fn cache_021_clear_on_fresh_cache_is_a_noop_not_an_error() {
626        let mut fs = MemFilesystem::new();
627        fs.add_dir("/ws").unwrap();
628        let cache = make_cache(fs, HashAlgo::Blake3);
629        cache.clear().unwrap();
630    }
631
632    #[test]
633    fn cache_021_clear_does_not_touch_files_outside_cache_root() {
634        let mut fs = MemFilesystem::new();
635        fs.add_dir("/ws").unwrap();
636        fs.add_file("/ws/unrelated.txt", b"keep me".to_vec())
637            .unwrap();
638        let cache = make_cache(fs, HashAlgo::Blake3);
639        let key = key_with_first_byte(0xAB);
640        store_a_valid_entry(&cache, &key, "proj/out", b"x");
641
642        cache.clear().unwrap();
643        assert_eq!(
644            cache.fs().read(Path::new("/ws/unrelated.txt")).unwrap(),
645            b"keep me"
646        );
647    }
648
649    // ---- clean: no-op cases ----
650
651    #[test]
652    fn cache_022_clean_soft_on_fresh_cache_is_a_noop_with_zero_counts() {
653        let mut fs = MemFilesystem::new();
654        fs.add_dir("/ws").unwrap();
655        let cache = make_cache(fs, HashAlgo::Blake3);
656        let report = cache.clean(&soft_only()).unwrap();
657        assert_eq!(report, CleanReport::default());
658    }
659
660    #[test]
661    fn aux_022_clean_with_no_modes_is_a_noop_on_a_populated_cache() {
662        let mut fs = MemFilesystem::new();
663        fs.add_dir("/ws").unwrap();
664        let cache = make_cache(fs, HashAlgo::Blake3);
665        let key = key_with_first_byte(0xAB);
666        store_a_valid_entry(&cache, &key, "proj/out", b"x");
667
668        let report = cache.clean(&CleanOptions::default()).unwrap();
669        assert_eq!(report.evicted_by_soft, 0);
670        assert_eq!(report.evicted_by_max_age, 0);
671        assert_eq!(report.evicted_by_max_size, 0);
672        assert_eq!(report.removed_tmp_dirs, 0);
673        assert_eq!(report.removed_restore_dirs, 0);
674        assert_eq!(report.inspected, 1);
675        assert!(cache.lookup(&key).is_some());
676    }
677
678    #[test]
679    fn cache_022_clean_soft_keeps_a_valid_entry_intact() {
680        let mut fs = MemFilesystem::new();
681        fs.add_dir("/ws").unwrap();
682        let cache = make_cache(fs, HashAlgo::Blake3);
683        let key = key_with_first_byte(0xAB);
684        store_a_valid_entry(&cache, &key, "proj/out", b"x");
685
686        let report = cache.clean(&soft_only()).unwrap();
687        assert_eq!(report.evicted_by_soft, 0);
688        assert!(cache.lookup(&key).is_some());
689    }
690
691    // ---- clean --soft: schema mismatch ----
692
693    #[test]
694    fn cache_022_clean_soft_removes_entry_with_chapter_revision_mismatch() {
695        let mut fs = MemFilesystem::new();
696        fs.add_dir("/ws").unwrap();
697        let cache = make_cache(fs, HashAlgo::Blake3);
698        let key = key_with_first_byte(0xAB);
699        let manifest = Manifest {
700            chapter_revision: CHAPTER_REVISION.saturating_add(1),
701            hash_function: HashFunctionLabel::Blake3,
702            key,
703            outputs: vec![],
704            stdout_len: 0,
705            stderr_len: 0,
706            stdout_hash: [0u8; 32],
707            stderr_hash: [0u8; 32],
708            exit_status: 0,
709            created_at_unix: 0,
710        };
711        write_manifest_to_entry(&cache, &key, &manifest);
712        assert!(
713            cache
714                .fs()
715                .metadata(&layout::entry_dir(cache.cache_root(), &key))
716                .is_ok()
717        );
718
719        let report = cache.clean(&soft_only()).unwrap();
720        assert_eq!(report.evicted_by_soft, 1);
721        assert!(
722            cache
723                .fs()
724                .metadata(&layout::entry_dir(cache.cache_root(), &key))
725                .is_err()
726        );
727    }
728
729    #[test]
730    fn cache_022_clean_soft_removes_entry_with_hash_function_mismatch() {
731        let mut fs = MemFilesystem::new();
732        fs.add_dir("/ws").unwrap();
733        let cache = make_cache(fs, HashAlgo::Blake3);
734        let key = key_with_first_byte(0xAB);
735        let manifest = Manifest {
736            chapter_revision: CHAPTER_REVISION,
737            hash_function: HashFunctionLabel::Sha256,
738            key,
739            outputs: vec![],
740            stdout_len: 0,
741            stderr_len: 0,
742            stdout_hash: [0u8; 32],
743            stderr_hash: [0u8; 32],
744            exit_status: 0,
745            created_at_unix: 0,
746        };
747        write_manifest_to_entry(&cache, &key, &manifest);
748
749        let report = cache.clean(&soft_only()).unwrap();
750        assert_eq!(report.evicted_by_soft, 1);
751    }
752
753    // ---- clean --soft: incomplete entry ----
754
755    #[test]
756    fn cache_022_clean_soft_removes_entry_without_a_manifest() {
757        let mut fs = MemFilesystem::new();
758        fs.add_dir("/ws").unwrap();
759        let cache = make_cache(fs, HashAlgo::Blake3);
760        let key = key_with_first_byte(0xAB);
761        cache
762            .fs()
763            .create_dir_all(&layout::entry_dir(cache.cache_root(), &key))
764            .unwrap();
765
766        let report = cache.clean(&soft_only()).unwrap();
767        assert_eq!(report.evicted_by_soft, 1);
768    }
769
770    #[test]
771    fn cache_022_clean_soft_removes_entry_with_unparseable_manifest() {
772        let mut fs = MemFilesystem::new();
773        fs.add_dir("/ws").unwrap();
774        let cache = make_cache(fs, HashAlgo::Blake3);
775        let key = key_with_first_byte(0xAB);
776        cache
777            .fs()
778            .create_dir_all(&layout::entry_dir(cache.cache_root(), &key))
779            .unwrap();
780        cache
781            .fs()
782            .write_file(
783                &layout::manifest_path(cache.cache_root(), &key),
784                b"this is not json",
785            )
786            .unwrap();
787
788        let report = cache.clean(&soft_only()).unwrap();
789        assert_eq!(report.evicted_by_soft, 1);
790    }
791
792    // ---- clean --soft: tmp / restore dirs ----
793
794    #[test]
795    fn cache_022_clean_soft_removes_store_tmp_directory() {
796        let mut fs = MemFilesystem::new();
797        fs.add_dir("/ws").unwrap();
798        let cache = make_cache(fs, HashAlgo::Blake3);
799        let key = key_with_first_byte(0xAB);
800        let tmp = layout::tmp_entry_dir(cache.cache_root(), &key, "abcdef");
801        cache.fs().create_dir_all(&tmp).unwrap();
802        cache
803            .fs()
804            .write_file(&tmp.join("manifest.json"), b"partial")
805            .unwrap();
806
807        let report = cache.clean(&soft_only()).unwrap();
808        assert_eq!(report.removed_tmp_dirs, 1);
809        assert!(cache.fs().metadata(&tmp).is_err());
810    }
811
812    #[test]
813    fn cache_022_clean_soft_removes_restore_staging_directory() {
814        let mut fs = MemFilesystem::new();
815        fs.add_dir("/ws").unwrap();
816        let cache = make_cache(fs, HashAlgo::Blake3);
817        let key = key_with_first_byte(0xAB);
818        let staging = layout::restore_staging_dir(cache.cache_root(), &key, "feedface");
819        cache.fs().create_dir_all(&staging).unwrap();
820        cache
821            .fs()
822            .write_file(&staging.join("00000000"), b"leftover")
823            .unwrap();
824
825        let report = cache.clean(&soft_only()).unwrap();
826        assert_eq!(report.removed_restore_dirs, 1);
827        assert!(cache.fs().metadata(&staging).is_err());
828    }
829
830    // ---- clean --soft: mixed state ----
831
832    #[test]
833    fn cache_022_clean_soft_is_selective_when_mixed_state_is_present() {
834        let mut fs = MemFilesystem::new();
835        fs.add_dir("/ws").unwrap();
836        let cache = make_cache(fs, HashAlgo::Blake3);
837
838        let key_good = key_with_first_byte(0xAB);
839        store_a_valid_entry(&cache, &key_good, "proj/out", b"x");
840
841        let key_stale = key_with_first_byte(0xCD);
842        let stale_manifest = Manifest {
843            chapter_revision: CHAPTER_REVISION,
844            hash_function: HashFunctionLabel::Sha256,
845            key: key_stale,
846            outputs: vec![],
847            stdout_len: 0,
848            stderr_len: 0,
849            stdout_hash: [0u8; 32],
850            stderr_hash: [0u8; 32],
851            exit_status: 0,
852            created_at_unix: 0,
853        };
854        write_manifest_to_entry(&cache, &key_stale, &stale_manifest);
855
856        let key_tmp = key_with_first_byte(0xEF);
857        let tmp = layout::tmp_entry_dir(cache.cache_root(), &key_tmp, "rnd1");
858        cache.fs().create_dir_all(&tmp).unwrap();
859
860        let key_restore = key_with_first_byte(0x12);
861        let staging = layout::restore_staging_dir(cache.cache_root(), &key_restore, "rnd2");
862        cache.fs().create_dir_all(&staging).unwrap();
863
864        let report = cache.clean(&soft_only()).unwrap();
865        assert_eq!(report.evicted_by_soft, 1);
866        assert_eq!(report.removed_tmp_dirs, 1);
867        assert_eq!(report.removed_restore_dirs, 1);
868        assert!(cache.lookup(&key_good).is_some());
869    }
870
871    #[test]
872    fn cache_022_clean_soft_does_not_touch_files_outside_cache_root() {
873        let mut fs = MemFilesystem::new();
874        fs.add_dir("/ws").unwrap();
875        fs.add_file("/ws/sibling.txt", b"don't touch".to_vec())
876            .unwrap();
877        let cache = make_cache(fs, HashAlgo::Blake3);
878        let key = key_with_first_byte(0xAB);
879        cache
880            .fs()
881            .create_dir_all(&layout::entry_dir(cache.cache_root(), &key))
882            .unwrap();
883        cache.clean(&soft_only()).unwrap();
884        assert_eq!(
885            cache.fs().read(Path::new("/ws/sibling.txt")).unwrap(),
886            b"don't touch"
887        );
888    }
889
890    #[test]
891    fn cache_022_clean_soft_does_not_inspect_blob_contents() {
892        // `CACHE-022` only checks `chapter_revision`/
893        // `hash_function` and manifest presence/parseability. A
894        // blob-content mismatch is a lookup-time concern, not a
895        // clean-soft concern.
896        let mut fs = MemFilesystem::new();
897        fs.add_dir("/ws").unwrap();
898        let cache = make_cache(fs, HashAlgo::Blake3);
899        let key = key_with_first_byte(0xAB);
900
901        let manifest = Manifest {
902            chapter_revision: CHAPTER_REVISION,
903            hash_function: HashFunctionLabel::Blake3,
904            key,
905            outputs: vec![OutputBlob {
906                workspace_absolute_path: cp("/proj/out"),
907                content_hash: [0xAAu8; 32],
908                size: 42,
909                mode: 0o644,
910            }],
911            stdout_len: 0,
912            stderr_len: 0,
913            stdout_hash: [0u8; 32],
914            stderr_hash: [0u8; 32],
915            exit_status: 0,
916            created_at_unix: 0,
917        };
918        write_manifest_to_entry(&cache, &key, &manifest);
919
920        let report = cache.clean(&soft_only()).unwrap();
921        assert_eq!(report.evicted_by_soft, 0);
922        assert!(
923            cache
924                .fs()
925                .metadata(&layout::entry_dir(cache.cache_root(), &key))
926                .is_ok()
927        );
928    }
929
930    // ---- clean --max-age ----
931
932    #[test]
933    fn aux_023_clean_max_age_evicts_entries_strictly_older_than_cutoff() {
934        let mut fs = MemFilesystem::new();
935        fs.add_dir("/ws").unwrap();
936        let cache = make_cache(fs, HashAlgo::Blake3);
937
938        let key_old = key_with_first_byte(0xAA);
939        store_entry_at(&cache, &key_old, "proj/old", b"x", 100);
940        let key_new = key_with_first_byte(0xBB);
941        store_entry_at(&cache, &key_new, "proj/new", b"y", 260);
942
943        let opts = CleanOptions {
944            max_age: Some(MaxAge::parse("50s").unwrap()),
945            now_unix: 300,
946            ..Default::default()
947        };
948        let report = cache.clean(&opts).unwrap();
949        assert_eq!(report.evicted_by_max_age, 1);
950        assert_eq!(report.evicted_by_soft, 0);
951        assert_eq!(report.evicted_by_max_size, 0);
952        assert!(cache.lookup(&key_old).is_none());
953        assert!(cache.lookup(&key_new).is_some());
954    }
955
956    #[test]
957    fn aux_023_clean_max_age_keeps_entry_at_exactly_cutoff() {
958        let mut fs = MemFilesystem::new();
959        fs.add_dir("/ws").unwrap();
960        let cache = make_cache(fs, HashAlgo::Blake3);
961
962        let key = key_with_first_byte(0xAA);
963        store_entry_at(&cache, &key, "proj/x", b"x", 100);
964
965        let opts = CleanOptions {
966            max_age: Some(MaxAge::parse("100s").unwrap()),
967            now_unix: 200,
968            ..Default::default()
969        };
970        let report = cache.clean(&opts).unwrap();
971        assert_eq!(report.evicted_by_max_age, 0);
972        assert!(cache.lookup(&key).is_some());
973    }
974
975    #[test]
976    fn aux_023_clean_max_age_ignores_corrupt_entries() {
977        let mut fs = MemFilesystem::new();
978        fs.add_dir("/ws").unwrap();
979        let cache = make_cache(fs, HashAlgo::Blake3);
980
981        let key_corrupt = key_with_first_byte(0xCC);
982        let m = Manifest {
983            chapter_revision: CHAPTER_REVISION,
984            hash_function: HashFunctionLabel::Sha256,
985            key: key_corrupt,
986            outputs: vec![],
987            stdout_len: 0,
988            stderr_len: 0,
989            stdout_hash: [0u8; 32],
990            stderr_hash: [0u8; 32],
991            exit_status: 0,
992            created_at_unix: 0,
993        };
994        write_manifest_to_entry(&cache, &key_corrupt, &m);
995
996        let key_stale = key_with_first_byte(0xAA);
997        store_entry_at(&cache, &key_stale, "proj/x", b"x", 100);
998
999        let opts = CleanOptions {
1000            max_age: Some(MaxAge::parse("50s").unwrap()),
1001            now_unix: 300,
1002            ..Default::default()
1003        };
1004        let report = cache.clean(&opts).unwrap();
1005        assert_eq!(report.evicted_by_max_age, 1);
1006        assert_eq!(report.evicted_by_soft, 0);
1007        assert!(
1008            cache
1009                .fs()
1010                .metadata(&layout::entry_dir(cache.cache_root(), &key_corrupt))
1011                .is_ok()
1012        );
1013        assert!(cache.lookup(&key_stale).is_none());
1014    }
1015
1016    // ---- clean --max-size ----
1017
1018    #[test]
1019    fn aux_023_clean_max_size_is_noop_when_under_limit() {
1020        let mut fs = MemFilesystem::new();
1021        fs.add_dir("/ws").unwrap();
1022        let cache = make_cache(fs, HashAlgo::Blake3);
1023
1024        let key = key_with_first_byte(0xAA);
1025        store_entry_at(&cache, &key, "proj/x", b"hello", 100);
1026
1027        let opts = CleanOptions {
1028            max_size: Some(MaxSize::parse("1KB").unwrap()),
1029            ..Default::default()
1030        };
1031        let report = cache.clean(&opts).unwrap();
1032        assert_eq!(report.evicted_by_max_size, 0);
1033        assert!(cache.lookup(&key).is_some());
1034    }
1035
1036    #[test]
1037    fn aux_023_clean_max_size_evicts_oldest_first_until_at_or_below_limit() {
1038        let mut fs = MemFilesystem::new();
1039        fs.add_dir("/ws").unwrap();
1040        let cache = make_cache(fs, HashAlgo::Blake3);
1041
1042        let bytes = b"0123456789"; // 10 bytes per entry footprint
1043        let key_old = key_with_first_byte(0x11);
1044        let key_mid = key_with_first_byte(0x22);
1045        let key_new = key_with_first_byte(0x33);
1046        store_entry_at(&cache, &key_old, "proj/a", bytes, 100);
1047        store_entry_at(&cache, &key_mid, "proj/b", bytes, 200);
1048        store_entry_at(&cache, &key_new, "proj/c", bytes, 300);
1049
1050        // Total 30 bytes; limit 15. Evict oldest until <= 15.
1051        let opts = CleanOptions {
1052            max_size: Some(MaxSize::parse("15").unwrap()),
1053            ..Default::default()
1054        };
1055        let report = cache.clean(&opts).unwrap();
1056        assert_eq!(report.evicted_by_max_size, 2);
1057        assert!(cache.lookup(&key_old).is_none());
1058        assert!(cache.lookup(&key_mid).is_none());
1059        assert!(cache.lookup(&key_new).is_some());
1060        assert_eq!(report.bytes_reclaimed, 20);
1061    }
1062
1063    #[test]
1064    fn aux_023_clean_max_size_zero_evicts_every_well_formed_entry() {
1065        let mut fs = MemFilesystem::new();
1066        fs.add_dir("/ws").unwrap();
1067        let cache = make_cache(fs, HashAlgo::Blake3);
1068
1069        let key = key_with_first_byte(0xAA);
1070        store_entry_at(&cache, &key, "proj/x", b"x", 100);
1071
1072        let opts = CleanOptions {
1073            max_size: Some(MaxSize::parse("0").unwrap()),
1074            ..Default::default()
1075        };
1076        let report = cache.clean(&opts).unwrap();
1077        assert_eq!(report.evicted_by_max_size, 1);
1078        assert!(cache.lookup(&key).is_none());
1079    }
1080
1081    // ---- clean: mode composition ----
1082
1083    #[test]
1084    fn aux_023_clean_soft_and_max_age_count_separately_per_priority() {
1085        let mut fs = MemFilesystem::new();
1086        fs.add_dir("/ws").unwrap();
1087        let cache = make_cache(fs, HashAlgo::Blake3);
1088
1089        let key_corrupt = key_with_first_byte(0xCC);
1090        let m = Manifest {
1091            chapter_revision: CHAPTER_REVISION.saturating_add(1),
1092            hash_function: HashFunctionLabel::Blake3,
1093            key: key_corrupt,
1094            outputs: vec![],
1095            stdout_len: 0,
1096            stderr_len: 0,
1097            stdout_hash: [0u8; 32],
1098            stderr_hash: [0u8; 32],
1099            exit_status: 0,
1100            created_at_unix: 0,
1101        };
1102        write_manifest_to_entry(&cache, &key_corrupt, &m);
1103
1104        let key_stale = key_with_first_byte(0xAA);
1105        store_entry_at(&cache, &key_stale, "proj/x", b"x", 100);
1106
1107        let opts = CleanOptions {
1108            soft: true,
1109            max_age: Some(MaxAge::parse("50s").unwrap()),
1110            now_unix: 300,
1111            ..Default::default()
1112        };
1113        let report = cache.clean(&opts).unwrap();
1114        assert_eq!(report.evicted_by_soft, 1);
1115        assert_eq!(report.evicted_by_max_age, 1);
1116        assert_eq!(report.inspected, 2);
1117    }
1118
1119    #[test]
1120    fn aux_023_clean_evicted_entries_sorted_by_mode_then_created_at() {
1121        let mut fs = MemFilesystem::new();
1122        fs.add_dir("/ws").unwrap();
1123        let cache = make_cache(fs, HashAlgo::Blake3);
1124
1125        // Two well-formed entries that both fall under max-age.
1126        let key_a = key_with_first_byte(0x11);
1127        let key_b = key_with_first_byte(0x22);
1128        store_entry_at(&cache, &key_a, "proj/a", b"x", 100);
1129        store_entry_at(&cache, &key_b, "proj/b", b"y", 200);
1130
1131        // One corrupt entry under --soft.
1132        let key_corrupt = key_with_first_byte(0xCC);
1133        let stale = Manifest {
1134            chapter_revision: CHAPTER_REVISION.saturating_add(1),
1135            hash_function: HashFunctionLabel::Blake3,
1136            key: key_corrupt,
1137            outputs: vec![],
1138            stdout_len: 0,
1139            stderr_len: 0,
1140            stdout_hash: [0u8; 32],
1141            stderr_hash: [0u8; 32],
1142            exit_status: 0,
1143            created_at_unix: 0,
1144        };
1145        write_manifest_to_entry(&cache, &key_corrupt, &stale);
1146
1147        let opts = CleanOptions {
1148            soft: true,
1149            max_age: Some(MaxAge::parse("50s").unwrap()),
1150            now_unix: 300,
1151            ..Default::default()
1152        };
1153        let report = cache.clean(&opts).unwrap();
1154        assert_eq!(report.evicted_entries.len(), 3);
1155        assert_eq!(report.evicted_entries[0].matched_mode, EvictionMode::Soft);
1156        assert_eq!(report.evicted_entries[1].matched_mode, EvictionMode::MaxAge);
1157        assert_eq!(
1158            report.evicted_entries[1].created_at_unix, 100,
1159            "older max-age entry sorts before newer one"
1160        );
1161        assert_eq!(report.evicted_entries[2].matched_mode, EvictionMode::MaxAge);
1162        assert_eq!(report.evicted_entries[2].created_at_unix, 200);
1163    }
1164
1165    // ---- clean: dry-run ----
1166
1167    #[test]
1168    fn aux_023_clean_dry_run_does_not_modify_disk() {
1169        let mut fs = MemFilesystem::new();
1170        fs.add_dir("/ws").unwrap();
1171        let cache = make_cache(fs, HashAlgo::Blake3);
1172
1173        let key = key_with_first_byte(0xAA);
1174        store_entry_at(&cache, &key, "proj/x", b"x", 100);
1175
1176        let opts = CleanOptions {
1177            max_age: Some(MaxAge::parse("50s").unwrap()),
1178            now_unix: 300,
1179            dry_run: true,
1180            ..Default::default()
1181        };
1182        let report = cache.clean(&opts).unwrap();
1183        assert_eq!(report.evicted_by_max_age, 1);
1184        assert_eq!(report.evicted_entries.len(), 1);
1185        assert_eq!(report.evicted_entries[0].matched_mode, EvictionMode::MaxAge);
1186        assert!(
1187            cache.lookup(&key).is_some(),
1188            "dry-run must leave the entry on disk"
1189        );
1190    }
1191
1192    #[test]
1193    fn aux_023_clean_dry_run_under_soft_keeps_tmp_and_restore_dirs() {
1194        let mut fs = MemFilesystem::new();
1195        fs.add_dir("/ws").unwrap();
1196        let cache = make_cache(fs, HashAlgo::Blake3);
1197
1198        let key_tmp = key_with_first_byte(0xEF);
1199        let tmp = layout::tmp_entry_dir(cache.cache_root(), &key_tmp, "rnd1");
1200        cache.fs().create_dir_all(&tmp).unwrap();
1201
1202        let key_restore = key_with_first_byte(0x12);
1203        let staging = layout::restore_staging_dir(cache.cache_root(), &key_restore, "rnd2");
1204        cache.fs().create_dir_all(&staging).unwrap();
1205
1206        let opts = CleanOptions {
1207            soft: true,
1208            dry_run: true,
1209            ..Default::default()
1210        };
1211        let report = cache.clean(&opts).unwrap();
1212        assert_eq!(report.removed_tmp_dirs, 1);
1213        assert_eq!(report.removed_restore_dirs, 1);
1214        assert!(cache.fs().metadata(&tmp).is_ok());
1215        assert!(cache.fs().metadata(&staging).is_ok());
1216    }
1217
1218    #[test]
1219    fn aux_023_clean_bytes_reclaimed_sums_evicted_footprints() {
1220        let mut fs = MemFilesystem::new();
1221        fs.add_dir("/ws").unwrap();
1222        let cache = make_cache(fs, HashAlgo::Blake3);
1223
1224        let key_a = key_with_first_byte(0x11);
1225        store_entry_at(&cache, &key_a, "proj/a", b"hello", 100); // 5 bytes
1226        let key_b = key_with_first_byte(0x22);
1227        store_entry_at(&cache, &key_b, "proj/b", b"world!", 200); // 6 bytes
1228
1229        let opts = CleanOptions {
1230            max_age: Some(MaxAge::parse("50s").unwrap()),
1231            now_unix: 300,
1232            ..Default::default()
1233        };
1234        let report = cache.clean(&opts).unwrap();
1235        assert_eq!(report.evicted_by_max_age, 2);
1236        assert_eq!(report.bytes_reclaimed, 11);
1237    }
1238}