1use std::collections::BTreeMap;
25use std::path::Path;
26
27use haz_vfs::{EntryKind, FsError, WritableFilesystem};
28use snafu::{ResultExt, Snafu};
29
30use crate::cache::Cache;
31use crate::layout;
32use crate::manifest::{HashFunctionLabel, Manifest};
33
34#[derive(Debug, Snafu)]
42pub enum CacheInfoError {
43 #[snafu(display("filesystem error during cache info walk: {source}"))]
46 Io {
47 source: FsError,
49 },
50}
51
52pub type SchemaPrefix = (u8, HashFunctionLabel);
57
58#[derive(Debug, Default, Clone, PartialEq, Eq)]
67pub struct CacheInfoReport {
68 pub well_formed_entries: u64,
71 pub corrupt_entries: u64,
74 pub orphan_tmp_dirs: u64,
77 pub orphan_restore_dirs: u64,
80 pub total_bytes: u64,
83 pub by_schema: BTreeMap<SchemaPrefix, u64>,
86}
87
88impl<Fs: WritableFilesystem> Cache<Fs> {
89 pub fn info(&self) -> Result<CacheInfoReport, CacheInfoError> {
102 let mut report = CacheInfoReport::default();
103
104 let cache_entries = match self.fs().read_dir(self.cache_root()) {
105 Ok(es) => es,
106 Err(FsError::NotFound { .. }) => return Ok(report),
107 Err(e) => return Err(CacheInfoError::Io { source: e }),
108 };
109
110 for entry in cache_entries {
111 let name = entry
112 .path
113 .file_name()
114 .map(|n| n.to_string_lossy().into_owned())
115 .unwrap_or_default();
116
117 if name.starts_with(".restore-") {
118 report.orphan_restore_dirs += 1;
119 report.total_bytes = report
120 .total_bytes
121 .saturating_add(self.sum_recursive(&entry.path)?);
122 continue;
123 }
124
125 match entry.metadata.kind {
126 EntryKind::Dir => self.walk_shard(&entry.path, &mut report)?,
127 EntryKind::File => {
128 report.total_bytes = report.total_bytes.saturating_add(entry.metadata.size);
129 }
130 _ => {}
131 }
132 }
133
134 Ok(report)
135 }
136
137 fn walk_shard(
138 &self,
139 shard_dir: &Path,
140 report: &mut CacheInfoReport,
141 ) -> Result<(), CacheInfoError> {
142 let shard_entries = self.fs().read_dir(shard_dir).context(IoSnafu)?;
143 for shard_entry in shard_entries {
144 let sname = shard_entry
145 .path
146 .file_name()
147 .map(|n| n.to_string_lossy().into_owned())
148 .unwrap_or_default();
149
150 if sname.starts_with(".tmp-") {
151 report.orphan_tmp_dirs += 1;
152 report.total_bytes = report
153 .total_bytes
154 .saturating_add(self.sum_recursive(&shard_entry.path)?);
155 continue;
156 }
157
158 match shard_entry.metadata.kind {
159 EntryKind::Dir => self.classify_entry(&shard_entry.path, report)?,
160 EntryKind::File => {
161 report.total_bytes =
162 report.total_bytes.saturating_add(shard_entry.metadata.size);
163 }
164 _ => {}
165 }
166 }
167 Ok(())
168 }
169
170 fn classify_entry(
171 &self,
172 entry_dir: &Path,
173 report: &mut CacheInfoReport,
174 ) -> Result<(), CacheInfoError> {
175 report.total_bytes = report
176 .total_bytes
177 .saturating_add(self.sum_recursive(entry_dir)?);
178
179 let manifest_path = entry_dir.join(layout::MANIFEST_FILE_NAME);
180 let bytes = match self.fs().read(&manifest_path) {
181 Ok(b) => b,
182 Err(FsError::NotFound { .. } | FsError::NotAFile { .. }) => {
183 report.corrupt_entries += 1;
184 return Ok(());
185 }
186 Err(e) => return Err(CacheInfoError::Io { source: e }),
187 };
188
189 let Ok(manifest) = Manifest::from_json(&bytes) else {
190 report.corrupt_entries += 1;
191 return Ok(());
192 };
193
194 let chapter_ok = manifest.current_chapter_revision_matches();
195 let hash_ok = HashFunctionLabel::from(self.hash_algo()) == manifest.hash_function;
196 if !chapter_ok || !hash_ok {
197 report.corrupt_entries += 1;
198 } else {
199 report.well_formed_entries += 1;
200 let schema_key: SchemaPrefix = (manifest.chapter_revision, manifest.hash_function);
201 *report.by_schema.entry(schema_key).or_insert(0) += 1;
202 }
203 Ok(())
204 }
205
206 fn sum_recursive(&self, path: &Path) -> Result<u64, CacheInfoError> {
212 let entries = self.fs().read_dir(path).context(IoSnafu)?;
213 let mut sum = 0u64;
214 for entry in entries {
215 match entry.metadata.kind {
216 EntryKind::File => sum = sum.saturating_add(entry.metadata.size),
217 EntryKind::Dir => sum = sum.saturating_add(self.sum_recursive(&entry.path)?),
218 _ => {}
219 }
220 }
221 Ok(sum)
222 }
223}
224
225#[cfg(test)]
226mod tests {
227 use std::collections::BTreeMap;
228 use std::path::Path;
229
230 use haz_domain::path::CanonicalPath;
231 use haz_domain::settings::cache::HashAlgo;
232 use haz_vfs::{Filesystem, MemFilesystem, WritableFilesystem};
233
234 use crate::cache::Cache;
235 use crate::info::{CacheInfoReport, SchemaPrefix};
236 use crate::key::CacheKey;
237 use crate::key::prefix::CHAPTER_REVISION;
238 use crate::layout;
239 use crate::manifest::{HashFunctionLabel, Manifest, OutputBlob};
240 use crate::store::{StoreInputs, StoredOutput};
241
242 const WORKSPACE_ROOT: &str = "/ws";
243
244 fn cp(s: &str) -> CanonicalPath {
245 CanonicalPath::parse_workspace_absolute(s)
246 .expect("test helper expects a valid workspace-absolute path")
247 }
248
249 fn make_cache(fs: MemFilesystem, algo: HashAlgo) -> Cache<MemFilesystem> {
250 Cache::new(fs, Path::new(WORKSPACE_ROOT), algo)
251 }
252
253 fn key_with_first_byte(first: u8) -> CacheKey {
254 let mut bytes = [0u8; 32];
255 bytes[0] = first;
256 CacheKey::from_bytes(bytes)
257 }
258
259 fn store_a_valid_entry(cache: &Cache<MemFilesystem>, key: &CacheKey, rel: &str, bytes: &[u8]) {
260 let target = Path::new(WORKSPACE_ROOT).join(rel);
261 let anchored = format!("/{rel}");
262 cache.fs().create_dir_all(target.parent().unwrap()).unwrap();
263 cache.fs().write_file(&target, bytes).unwrap();
264 let outs = [StoredOutput {
265 workspace_absolute_path: &anchored,
266 on_disk_path: &target,
267 mode: 0o644,
268 }];
269 cache
270 .store(
271 key,
272 &StoreInputs {
273 outputs: &outs,
274 stdout: b"",
275 stderr: b"",
276 created_at_unix: 0,
277 },
278 )
279 .unwrap();
280 }
281
282 fn write_manifest_to_entry(cache: &Cache<MemFilesystem>, key: &CacheKey, manifest: &Manifest) {
283 cache
284 .fs()
285 .create_dir_all(&layout::entry_dir(cache.cache_root(), key))
286 .unwrap();
287 cache
288 .fs()
289 .write_file(
290 &layout::manifest_path(cache.cache_root(), key),
291 &manifest.to_json_bytes(),
292 )
293 .unwrap();
294 }
295
296 fn schema_blake3_current() -> SchemaPrefix {
297 (CHAPTER_REVISION, HashFunctionLabel::Blake3)
298 }
299
300 #[test]
303 fn aux_019_info_on_absent_cache_root_reports_zero() {
304 let mut fs = MemFilesystem::new();
305 fs.add_dir("/ws").unwrap();
306 let cache = make_cache(fs, HashAlgo::Blake3);
307 let report = cache.info().unwrap();
308 assert_eq!(report, CacheInfoReport::default());
309 }
310
311 #[test]
314 fn aux_019_info_counts_one_well_formed_entry() {
315 let mut fs = MemFilesystem::new();
316 fs.add_dir("/ws").unwrap();
317 let cache = make_cache(fs, HashAlgo::Blake3);
318 let key = key_with_first_byte(0xAB);
319 store_a_valid_entry(&cache, &key, "proj/out", b"hello");
320
321 let report = cache.info().unwrap();
322 assert_eq!(report.well_formed_entries, 1);
323 assert_eq!(report.corrupt_entries, 0);
324 assert_eq!(report.orphan_tmp_dirs, 0);
325 assert_eq!(report.orphan_restore_dirs, 0);
326 let mut expected = BTreeMap::new();
327 expected.insert(schema_blake3_current(), 1);
328 assert_eq!(report.by_schema, expected);
329 assert!(
333 report.total_bytes >= 5,
334 "expected at least 5 bytes for the `hello` blob, got {}",
335 report.total_bytes,
336 );
337 }
338
339 #[test]
342 fn aux_019_info_counts_entry_without_a_manifest_as_corrupt() {
343 let mut fs = MemFilesystem::new();
344 fs.add_dir("/ws").unwrap();
345 let cache = make_cache(fs, HashAlgo::Blake3);
346 let key = key_with_first_byte(0xAB);
347 cache
348 .fs()
349 .create_dir_all(&layout::entry_dir(cache.cache_root(), &key))
350 .unwrap();
351
352 let report = cache.info().unwrap();
353 assert_eq!(report.corrupt_entries, 1);
354 assert_eq!(report.well_formed_entries, 0);
355 assert!(report.by_schema.is_empty());
356 }
357
358 #[test]
361 fn aux_019_info_counts_entry_with_unparseable_manifest_as_corrupt() {
362 let mut fs = MemFilesystem::new();
363 fs.add_dir("/ws").unwrap();
364 let cache = make_cache(fs, HashAlgo::Blake3);
365 let key = key_with_first_byte(0xAB);
366 cache
367 .fs()
368 .create_dir_all(&layout::entry_dir(cache.cache_root(), &key))
369 .unwrap();
370 cache
371 .fs()
372 .write_file(
373 &layout::manifest_path(cache.cache_root(), &key),
374 b"this is not json",
375 )
376 .unwrap();
377
378 let report = cache.info().unwrap();
379 assert_eq!(report.corrupt_entries, 1);
380 assert_eq!(report.well_formed_entries, 0);
381 assert!(report.by_schema.is_empty());
382 }
383
384 #[test]
387 fn aux_019_info_counts_schema_mismatched_entry_as_corrupt() {
388 let mut fs = MemFilesystem::new();
389 fs.add_dir("/ws").unwrap();
390 let cache = make_cache(fs, HashAlgo::Blake3);
391 let key = key_with_first_byte(0xAB);
392 let manifest = Manifest {
393 chapter_revision: CHAPTER_REVISION,
394 hash_function: HashFunctionLabel::Sha256, key,
396 outputs: vec![],
397 stdout_len: 0,
398 stderr_len: 0,
399 stdout_hash: [0u8; 32],
400 stderr_hash: [0u8; 32],
401 exit_status: 0,
402 created_at_unix: 0,
403 };
404 write_manifest_to_entry(&cache, &key, &manifest);
405
406 let report = cache.info().unwrap();
407 assert_eq!(report.corrupt_entries, 1);
408 assert_eq!(report.well_formed_entries, 0);
409 assert!(report.by_schema.is_empty());
410 }
411
412 #[test]
415 fn aux_019_info_counts_orphan_tmp_directory() {
416 let mut fs = MemFilesystem::new();
417 fs.add_dir("/ws").unwrap();
418 let cache = make_cache(fs, HashAlgo::Blake3);
419 let key = key_with_first_byte(0xAB);
420 let tmp = layout::tmp_entry_dir(cache.cache_root(), &key, "abcdef");
421 cache.fs().create_dir_all(&tmp).unwrap();
422 cache
423 .fs()
424 .write_file(&tmp.join("partial.bin"), &[0u8; 17])
425 .unwrap();
426
427 let report = cache.info().unwrap();
428 assert_eq!(report.orphan_tmp_dirs, 1);
429 assert_eq!(report.well_formed_entries, 0);
430 assert_eq!(report.corrupt_entries, 0);
431 assert!(report.total_bytes >= 17);
432 }
433
434 #[test]
435 fn aux_019_info_counts_orphan_restore_directory() {
436 let mut fs = MemFilesystem::new();
437 fs.add_dir("/ws").unwrap();
438 let cache = make_cache(fs, HashAlgo::Blake3);
439 let key = key_with_first_byte(0xAB);
440 let staging = layout::restore_staging_dir(cache.cache_root(), &key, "feedface");
441 cache.fs().create_dir_all(&staging).unwrap();
442 cache
443 .fs()
444 .write_file(&staging.join("leftover.bin"), &[0u8; 9])
445 .unwrap();
446
447 let report = cache.info().unwrap();
448 assert_eq!(report.orphan_restore_dirs, 1);
449 assert_eq!(report.orphan_tmp_dirs, 0);
450 assert!(report.total_bytes >= 9);
451 }
452
453 #[test]
456 fn aux_019_info_breaks_down_by_schema_prefix() {
457 let mut fs = MemFilesystem::new();
458 fs.add_dir("/ws").unwrap();
459 let cache = make_cache(fs, HashAlgo::Blake3);
460
461 let key_a = key_with_first_byte(0xAA);
463 store_a_valid_entry(&cache, &key_a, "proj/out_a", b"x");
464 let key_b = key_with_first_byte(0xBB);
465 store_a_valid_entry(&cache, &key_b, "proj/out_b", b"y");
466
467 let report = cache.info().unwrap();
468 assert_eq!(report.well_formed_entries, 2);
469 let mut expected = BTreeMap::new();
470 expected.insert(schema_blake3_current(), 2);
471 assert_eq!(report.by_schema, expected);
472 }
473
474 #[test]
477 fn aux_019_info_classifies_mixed_state_correctly() {
478 let mut fs = MemFilesystem::new();
482 fs.add_dir("/ws").unwrap();
483 let cache = make_cache(fs, HashAlgo::Blake3);
484
485 let key_good = key_with_first_byte(0xAB);
486 store_a_valid_entry(&cache, &key_good, "proj/out", b"x");
487
488 let key_stale = key_with_first_byte(0xCD);
489 let stale_manifest = Manifest {
490 chapter_revision: CHAPTER_REVISION,
491 hash_function: HashFunctionLabel::Sha256,
492 key: key_stale,
493 outputs: vec![],
494 stdout_len: 0,
495 stderr_len: 0,
496 stdout_hash: [0u8; 32],
497 stderr_hash: [0u8; 32],
498 exit_status: 0,
499 created_at_unix: 0,
500 };
501 write_manifest_to_entry(&cache, &key_stale, &stale_manifest);
502
503 let key_tmp = key_with_first_byte(0xEF);
504 let tmp = layout::tmp_entry_dir(cache.cache_root(), &key_tmp, "rnd1");
505 cache.fs().create_dir_all(&tmp).unwrap();
506
507 let key_restore = key_with_first_byte(0x12);
508 let staging = layout::restore_staging_dir(cache.cache_root(), &key_restore, "rnd2");
509 cache.fs().create_dir_all(&staging).unwrap();
510
511 let report = cache.info().unwrap();
512 assert_eq!(report.well_formed_entries, 1);
513 assert_eq!(report.corrupt_entries, 1);
514 assert_eq!(report.orphan_tmp_dirs, 1);
515 assert_eq!(report.orphan_restore_dirs, 1);
516 let mut expected = BTreeMap::new();
517 expected.insert(schema_blake3_current(), 1);
518 assert_eq!(report.by_schema, expected);
519 }
520
521 #[test]
524 fn aux_019_info_total_bytes_sums_blob_sizes() {
525 let mut fs = MemFilesystem::new();
530 fs.add_dir("/ws").unwrap();
531 let cache = make_cache(fs, HashAlgo::Blake3);
532 let key = key_with_first_byte(0xAB);
533
534 let small = Path::new(WORKSPACE_ROOT).join("proj/small.txt");
535 let larger = Path::new(WORKSPACE_ROOT).join("proj/larger.txt");
536 cache.fs().create_dir_all(small.parent().unwrap()).unwrap();
537 cache.fs().write_file(&small, b"hello").unwrap();
538 cache.fs().write_file(&larger, b"helloXX").unwrap();
539 let outs = [
540 StoredOutput {
541 workspace_absolute_path: "/proj/small.txt",
542 on_disk_path: &small,
543 mode: 0o644,
544 },
545 StoredOutput {
546 workspace_absolute_path: "/proj/larger.txt",
547 on_disk_path: &larger,
548 mode: 0o644,
549 },
550 ];
551 cache
552 .store(
553 &key,
554 &StoreInputs {
555 outputs: &outs,
556 stdout: b"",
557 stderr: b"",
558 created_at_unix: 0,
559 },
560 )
561 .unwrap();
562
563 let report = cache.info().unwrap();
564 assert!(
565 report.total_bytes >= 12,
566 "expected at least 12 bytes for the two blobs; got {}",
567 report.total_bytes,
568 );
569 assert_eq!(report.well_formed_entries, 1);
570 }
571
572 #[test]
575 fn aux_018_info_does_not_mutate_the_cache_root() {
576 let mut fs = MemFilesystem::new();
581 fs.add_dir("/ws").unwrap();
582 let cache = make_cache(fs, HashAlgo::Blake3);
583
584 let key_good = key_with_first_byte(0xAB);
586 store_a_valid_entry(&cache, &key_good, "proj/out", b"x");
587 let key_stale = key_with_first_byte(0xCD);
588 let stale_manifest = Manifest {
589 chapter_revision: CHAPTER_REVISION,
590 hash_function: HashFunctionLabel::Sha256,
591 key: key_stale,
592 outputs: vec![OutputBlob {
593 workspace_absolute_path: cp("/proj/missing"),
594 content_hash: [0u8; 32],
595 size: 0,
596 mode: 0o644,
597 }],
598 stdout_len: 0,
599 stderr_len: 0,
600 stdout_hash: [0u8; 32],
601 stderr_hash: [0u8; 32],
602 exit_status: 0,
603 created_at_unix: 0,
604 };
605 write_manifest_to_entry(&cache, &key_stale, &stale_manifest);
606 let tmp = layout::tmp_entry_dir(cache.cache_root(), &key_with_first_byte(0xEF), "r1");
607 cache.fs().create_dir_all(&tmp).unwrap();
608 cache.fs().write_file(&tmp.join("x"), b"y").unwrap();
609 let staging =
610 layout::restore_staging_dir(cache.cache_root(), &key_with_first_byte(0x12), "r2");
611 cache.fs().create_dir_all(&staging).unwrap();
612
613 let before = snapshot_cache(&cache);
614 cache.info().unwrap();
615 let after = snapshot_cache(&cache);
616 assert_eq!(
617 before, after,
618 "cache root state must not change under info()",
619 );
620 }
621
622 fn snapshot_cache(cache: &Cache<MemFilesystem>) -> BTreeMap<String, Vec<u8>> {
623 let mut out = BTreeMap::new();
624 snapshot_into(cache, cache.cache_root(), &mut out);
625 out
626 }
627
628 fn snapshot_into(
629 cache: &Cache<MemFilesystem>,
630 path: &Path,
631 out: &mut BTreeMap<String, Vec<u8>>,
632 ) {
633 let Ok(entries) = cache.fs().read_dir(path) else {
634 return;
635 };
636 for entry in entries {
637 match entry.metadata.kind {
638 haz_vfs::EntryKind::File => {
639 let key = entry.path.to_string_lossy().into_owned();
640 let bytes = cache.fs().read(&entry.path).unwrap_or_default();
641 out.insert(key, bytes);
642 }
643 haz_vfs::EntryKind::Dir => snapshot_into(cache, &entry.path, out),
644 _ => {}
645 }
646 }
647 }
648}