1use std::collections::BTreeMap;
25use std::path::Path;
26
27use haz_vfs::{EntryKind, Filesystem, FsError};
28use snafu::{ResultExt, Snafu};
29
30use crate::layout;
31use crate::manifest::{HashFunctionLabel, Manifest};
32use crate::reader::CacheReader;
33
34#[derive(Debug, Snafu)]
42pub enum CacheInfoError {
43 #[snafu(display("filesystem error during cache info walk: {source}"))]
46 Io {
47 source: FsError,
49 },
50}
51
52pub type SchemaPrefix = (u8, HashFunctionLabel);
57
58#[derive(Debug, Default, Clone, PartialEq, Eq)]
67pub struct CacheInfoReport {
68 pub well_formed_entries: u64,
71 pub corrupt_entries: u64,
74 pub orphan_tmp_dirs: u64,
77 pub orphan_restore_dirs: u64,
80 pub total_bytes: u64,
83 pub by_schema: BTreeMap<SchemaPrefix, u64>,
86}
87
88impl<Fs: Filesystem> CacheReader<Fs> {
89 pub fn info(&self) -> Result<CacheInfoReport, CacheInfoError> {
102 let mut report = CacheInfoReport::default();
103
104 let cache_entries = match self.fs().read_dir(self.cache_root()) {
105 Ok(es) => es,
106 Err(FsError::NotFound { .. }) => return Ok(report),
107 Err(e) => return Err(CacheInfoError::Io { source: e }),
108 };
109
110 for entry in cache_entries {
111 let name = entry
112 .path
113 .file_name()
114 .map(|n| n.to_string_lossy().into_owned())
115 .unwrap_or_default();
116
117 if name.starts_with(".restore-") {
118 report.orphan_restore_dirs += 1;
119 report.total_bytes = report
120 .total_bytes
121 .saturating_add(self.sum_recursive(&entry.path)?);
122 continue;
123 }
124
125 match entry.metadata.kind {
126 EntryKind::Dir => self.walk_shard(&entry.path, &mut report)?,
127 EntryKind::File => {
128 report.total_bytes = report.total_bytes.saturating_add(entry.metadata.size);
129 }
130 _ => {}
131 }
132 }
133
134 Ok(report)
135 }
136
137 fn walk_shard(
138 &self,
139 shard_dir: &Path,
140 report: &mut CacheInfoReport,
141 ) -> Result<(), CacheInfoError> {
142 let shard_entries = self.fs().read_dir(shard_dir).context(IoSnafu)?;
143 for shard_entry in shard_entries {
144 let sname = shard_entry
145 .path
146 .file_name()
147 .map(|n| n.to_string_lossy().into_owned())
148 .unwrap_or_default();
149
150 if sname.starts_with(".tmp-") {
151 report.orphan_tmp_dirs += 1;
152 report.total_bytes = report
153 .total_bytes
154 .saturating_add(self.sum_recursive(&shard_entry.path)?);
155 continue;
156 }
157
158 match shard_entry.metadata.kind {
159 EntryKind::Dir => self.classify_entry(&shard_entry.path, report)?,
160 EntryKind::File => {
161 report.total_bytes =
162 report.total_bytes.saturating_add(shard_entry.metadata.size);
163 }
164 _ => {}
165 }
166 }
167 Ok(())
168 }
169
170 fn classify_entry(
171 &self,
172 entry_dir: &Path,
173 report: &mut CacheInfoReport,
174 ) -> Result<(), CacheInfoError> {
175 report.total_bytes = report
176 .total_bytes
177 .saturating_add(self.sum_recursive(entry_dir)?);
178
179 let manifest_path = entry_dir.join(layout::MANIFEST_FILE_NAME);
180 let bytes = match self.fs().read(&manifest_path) {
181 Ok(b) => b,
182 Err(FsError::NotFound { .. } | FsError::NotAFile { .. }) => {
183 report.corrupt_entries += 1;
184 return Ok(());
185 }
186 Err(e) => return Err(CacheInfoError::Io { source: e }),
187 };
188
189 let Ok(manifest) = Manifest::from_json(&bytes) else {
190 report.corrupt_entries += 1;
191 return Ok(());
192 };
193
194 let chapter_ok = manifest.current_chapter_revision_matches();
195 let hash_ok = HashFunctionLabel::from(self.hash_algo()) == manifest.hash_function;
196 if !chapter_ok || !hash_ok {
197 report.corrupt_entries += 1;
198 } else {
199 report.well_formed_entries += 1;
200 let schema_key: SchemaPrefix = (manifest.chapter_revision, manifest.hash_function);
201 *report.by_schema.entry(schema_key).or_insert(0) += 1;
202 }
203 Ok(())
204 }
205
206 fn sum_recursive(&self, path: &Path) -> Result<u64, CacheInfoError> {
212 let entries = self.fs().read_dir(path).context(IoSnafu)?;
213 let mut sum = 0u64;
214 for entry in entries {
215 match entry.metadata.kind {
216 EntryKind::File => sum = sum.saturating_add(entry.metadata.size),
217 EntryKind::Dir => sum = sum.saturating_add(self.sum_recursive(&entry.path)?),
218 _ => {}
219 }
220 }
221 Ok(sum)
222 }
223}
224
225#[cfg(test)]
226mod tests {
227 use std::collections::BTreeMap;
228 use std::path::Path;
229
230 use haz_domain::path::CanonicalPath;
231 use haz_domain::settings::cache::HashAlgo;
232 use haz_vfs::{Filesystem, WritableFilesystem};
233 use haz_vfs_testing::MemFilesystem;
234
235 use crate::info::{CacheInfoReport, SchemaPrefix};
236 use crate::key::CacheKey;
237 use crate::key::prefix::CHAPTER_REVISION;
238 use crate::layout;
239 use crate::manifest::{HashFunctionLabel, Manifest, OutputBlob};
240 use crate::store::{StoreInputs, StoredOutput};
241 use crate::writer::CacheWriter;
242
243 const WORKSPACE_ROOT: &str = "/ws";
244
245 fn cp(s: &str) -> CanonicalPath {
246 CanonicalPath::parse_workspace_absolute(s)
247 .expect("test helper expects a valid workspace-absolute path")
248 }
249
250 fn make_cache(fs: MemFilesystem, algo: HashAlgo) -> CacheWriter<MemFilesystem> {
251 CacheWriter::new(fs, Path::new(WORKSPACE_ROOT), algo)
252 }
253
254 fn key_with_first_byte(first: u8) -> CacheKey {
255 let mut bytes = [0u8; 32];
256 bytes[0] = first;
257 CacheKey::from_bytes(bytes)
258 }
259
260 fn store_a_valid_entry(
261 cache: &CacheWriter<MemFilesystem>,
262 key: &CacheKey,
263 rel: &str,
264 bytes: &[u8],
265 ) {
266 let target = Path::new(WORKSPACE_ROOT).join(rel);
267 let anchored = format!("/{rel}");
268 cache.fs().create_dir_all(target.parent().unwrap()).unwrap();
269 cache.fs().write_file(&target, bytes).unwrap();
270 let outs = [StoredOutput {
271 workspace_absolute_path: &anchored,
272 on_disk_path: &target,
273 mode: 0o644,
274 }];
275 cache
276 .store(
277 key,
278 &StoreInputs {
279 outputs: &outs,
280 stdout: b"",
281 stderr: b"",
282 created_at_unix: 0,
283 },
284 )
285 .unwrap();
286 }
287
288 fn write_manifest_to_entry(
289 cache: &CacheWriter<MemFilesystem>,
290 key: &CacheKey,
291 manifest: &Manifest,
292 ) {
293 cache
294 .fs()
295 .create_dir_all(&layout::entry_dir(cache.cache_root(), key))
296 .unwrap();
297 cache
298 .fs()
299 .write_file(
300 &layout::manifest_path(cache.cache_root(), key),
301 &manifest.to_json_bytes(),
302 )
303 .unwrap();
304 }
305
306 fn schema_blake3_current() -> SchemaPrefix {
307 (CHAPTER_REVISION, HashFunctionLabel::Blake3)
308 }
309
310 #[test]
313 fn aux_019_info_on_absent_cache_root_reports_zero() {
314 let mut fs = MemFilesystem::new();
315 fs.add_dir("/ws").unwrap();
316 let cache = make_cache(fs, HashAlgo::Blake3);
317 let report = cache.reader().info().unwrap();
318 assert_eq!(report, CacheInfoReport::default());
319 }
320
321 #[test]
324 fn aux_019_info_counts_one_well_formed_entry() {
325 let mut fs = MemFilesystem::new();
326 fs.add_dir("/ws").unwrap();
327 let cache = make_cache(fs, HashAlgo::Blake3);
328 let key = key_with_first_byte(0xAB);
329 store_a_valid_entry(&cache, &key, "proj/out", b"hello");
330
331 let report = cache.reader().info().unwrap();
332 assert_eq!(report.well_formed_entries, 1);
333 assert_eq!(report.corrupt_entries, 0);
334 assert_eq!(report.orphan_tmp_dirs, 0);
335 assert_eq!(report.orphan_restore_dirs, 0);
336 let mut expected = BTreeMap::new();
337 expected.insert(schema_blake3_current(), 1);
338 assert_eq!(report.by_schema, expected);
339 assert!(
343 report.total_bytes >= 5,
344 "expected at least 5 bytes for the `hello` blob, got {}",
345 report.total_bytes,
346 );
347 }
348
349 #[test]
352 fn aux_019_info_counts_entry_without_a_manifest_as_corrupt() {
353 let mut fs = MemFilesystem::new();
354 fs.add_dir("/ws").unwrap();
355 let cache = make_cache(fs, HashAlgo::Blake3);
356 let key = key_with_first_byte(0xAB);
357 cache
358 .fs()
359 .create_dir_all(&layout::entry_dir(cache.cache_root(), &key))
360 .unwrap();
361
362 let report = cache.reader().info().unwrap();
363 assert_eq!(report.corrupt_entries, 1);
364 assert_eq!(report.well_formed_entries, 0);
365 assert!(report.by_schema.is_empty());
366 }
367
368 #[test]
371 fn aux_019_info_counts_entry_with_unparseable_manifest_as_corrupt() {
372 let mut fs = MemFilesystem::new();
373 fs.add_dir("/ws").unwrap();
374 let cache = make_cache(fs, HashAlgo::Blake3);
375 let key = key_with_first_byte(0xAB);
376 cache
377 .fs()
378 .create_dir_all(&layout::entry_dir(cache.cache_root(), &key))
379 .unwrap();
380 cache
381 .fs()
382 .write_file(
383 &layout::manifest_path(cache.cache_root(), &key),
384 b"this is not json",
385 )
386 .unwrap();
387
388 let report = cache.reader().info().unwrap();
389 assert_eq!(report.corrupt_entries, 1);
390 assert_eq!(report.well_formed_entries, 0);
391 assert!(report.by_schema.is_empty());
392 }
393
394 #[test]
397 fn aux_019_info_counts_schema_mismatched_entry_as_corrupt() {
398 let mut fs = MemFilesystem::new();
399 fs.add_dir("/ws").unwrap();
400 let cache = make_cache(fs, HashAlgo::Blake3);
401 let key = key_with_first_byte(0xAB);
402 let manifest = Manifest {
403 chapter_revision: CHAPTER_REVISION,
404 hash_function: HashFunctionLabel::Sha256, key,
406 outputs: vec![],
407 stdout_len: 0,
408 stderr_len: 0,
409 stdout_hash: [0u8; 32],
410 stderr_hash: [0u8; 32],
411 exit_status: 0,
412 created_at_unix: 0,
413 };
414 write_manifest_to_entry(&cache, &key, &manifest);
415
416 let report = cache.reader().info().unwrap();
417 assert_eq!(report.corrupt_entries, 1);
418 assert_eq!(report.well_formed_entries, 0);
419 assert!(report.by_schema.is_empty());
420 }
421
422 #[test]
425 fn aux_019_info_counts_orphan_tmp_directory() {
426 let mut fs = MemFilesystem::new();
427 fs.add_dir("/ws").unwrap();
428 let cache = make_cache(fs, HashAlgo::Blake3);
429 let key = key_with_first_byte(0xAB);
430 let tmp = layout::tmp_entry_dir(cache.cache_root(), &key, "abcdef");
431 cache.fs().create_dir_all(&tmp).unwrap();
432 cache
433 .fs()
434 .write_file(&tmp.join("partial.bin"), &[0u8; 17])
435 .unwrap();
436
437 let report = cache.reader().info().unwrap();
438 assert_eq!(report.orphan_tmp_dirs, 1);
439 assert_eq!(report.well_formed_entries, 0);
440 assert_eq!(report.corrupt_entries, 0);
441 assert!(report.total_bytes >= 17);
442 }
443
444 #[test]
445 fn aux_019_info_counts_orphan_restore_directory() {
446 let mut fs = MemFilesystem::new();
447 fs.add_dir("/ws").unwrap();
448 let cache = make_cache(fs, HashAlgo::Blake3);
449 let key = key_with_first_byte(0xAB);
450 let staging = layout::restore_staging_dir(cache.cache_root(), &key, "feedface");
451 cache.fs().create_dir_all(&staging).unwrap();
452 cache
453 .fs()
454 .write_file(&staging.join("leftover.bin"), &[0u8; 9])
455 .unwrap();
456
457 let report = cache.reader().info().unwrap();
458 assert_eq!(report.orphan_restore_dirs, 1);
459 assert_eq!(report.orphan_tmp_dirs, 0);
460 assert!(report.total_bytes >= 9);
461 }
462
463 #[test]
466 fn aux_019_info_breaks_down_by_schema_prefix() {
467 let mut fs = MemFilesystem::new();
468 fs.add_dir("/ws").unwrap();
469 let cache = make_cache(fs, HashAlgo::Blake3);
470
471 let key_a = key_with_first_byte(0xAA);
473 store_a_valid_entry(&cache, &key_a, "proj/out_a", b"x");
474 let key_b = key_with_first_byte(0xBB);
475 store_a_valid_entry(&cache, &key_b, "proj/out_b", b"y");
476
477 let report = cache.reader().info().unwrap();
478 assert_eq!(report.well_formed_entries, 2);
479 let mut expected = BTreeMap::new();
480 expected.insert(schema_blake3_current(), 2);
481 assert_eq!(report.by_schema, expected);
482 }
483
484 #[test]
487 fn aux_019_info_classifies_mixed_state_correctly() {
488 let mut fs = MemFilesystem::new();
492 fs.add_dir("/ws").unwrap();
493 let cache = make_cache(fs, HashAlgo::Blake3);
494
495 let key_good = key_with_first_byte(0xAB);
496 store_a_valid_entry(&cache, &key_good, "proj/out", b"x");
497
498 let key_stale = key_with_first_byte(0xCD);
499 let stale_manifest = Manifest {
500 chapter_revision: CHAPTER_REVISION,
501 hash_function: HashFunctionLabel::Sha256,
502 key: key_stale,
503 outputs: vec![],
504 stdout_len: 0,
505 stderr_len: 0,
506 stdout_hash: [0u8; 32],
507 stderr_hash: [0u8; 32],
508 exit_status: 0,
509 created_at_unix: 0,
510 };
511 write_manifest_to_entry(&cache, &key_stale, &stale_manifest);
512
513 let key_tmp = key_with_first_byte(0xEF);
514 let tmp = layout::tmp_entry_dir(cache.cache_root(), &key_tmp, "rnd1");
515 cache.fs().create_dir_all(&tmp).unwrap();
516
517 let key_restore = key_with_first_byte(0x12);
518 let staging = layout::restore_staging_dir(cache.cache_root(), &key_restore, "rnd2");
519 cache.fs().create_dir_all(&staging).unwrap();
520
521 let report = cache.reader().info().unwrap();
522 assert_eq!(report.well_formed_entries, 1);
523 assert_eq!(report.corrupt_entries, 1);
524 assert_eq!(report.orphan_tmp_dirs, 1);
525 assert_eq!(report.orphan_restore_dirs, 1);
526 let mut expected = BTreeMap::new();
527 expected.insert(schema_blake3_current(), 1);
528 assert_eq!(report.by_schema, expected);
529 }
530
531 #[test]
534 fn aux_019_info_total_bytes_sums_blob_sizes() {
535 let mut fs = MemFilesystem::new();
540 fs.add_dir("/ws").unwrap();
541 let cache = make_cache(fs, HashAlgo::Blake3);
542 let key = key_with_first_byte(0xAB);
543
544 let small = Path::new(WORKSPACE_ROOT).join("proj/small.txt");
545 let larger = Path::new(WORKSPACE_ROOT).join("proj/larger.txt");
546 cache.fs().create_dir_all(small.parent().unwrap()).unwrap();
547 cache.fs().write_file(&small, b"hello").unwrap();
548 cache.fs().write_file(&larger, b"helloXX").unwrap();
549 let outs = [
550 StoredOutput {
551 workspace_absolute_path: "/proj/small.txt",
552 on_disk_path: &small,
553 mode: 0o644,
554 },
555 StoredOutput {
556 workspace_absolute_path: "/proj/larger.txt",
557 on_disk_path: &larger,
558 mode: 0o644,
559 },
560 ];
561 cache
562 .store(
563 &key,
564 &StoreInputs {
565 outputs: &outs,
566 stdout: b"",
567 stderr: b"",
568 created_at_unix: 0,
569 },
570 )
571 .unwrap();
572
573 let report = cache.reader().info().unwrap();
574 assert!(
575 report.total_bytes >= 12,
576 "expected at least 12 bytes for the two blobs; got {}",
577 report.total_bytes,
578 );
579 assert_eq!(report.well_formed_entries, 1);
580 }
581
582 #[test]
585 fn aux_018_info_does_not_mutate_the_cache_root() {
586 let mut fs = MemFilesystem::new();
591 fs.add_dir("/ws").unwrap();
592 let cache = make_cache(fs, HashAlgo::Blake3);
593
594 let key_good = key_with_first_byte(0xAB);
596 store_a_valid_entry(&cache, &key_good, "proj/out", b"x");
597 let key_stale = key_with_first_byte(0xCD);
598 let stale_manifest = Manifest {
599 chapter_revision: CHAPTER_REVISION,
600 hash_function: HashFunctionLabel::Sha256,
601 key: key_stale,
602 outputs: vec![OutputBlob {
603 workspace_absolute_path: cp("/proj/missing"),
604 content_hash: [0u8; 32],
605 size: 0,
606 mode: 0o644,
607 }],
608 stdout_len: 0,
609 stderr_len: 0,
610 stdout_hash: [0u8; 32],
611 stderr_hash: [0u8; 32],
612 exit_status: 0,
613 created_at_unix: 0,
614 };
615 write_manifest_to_entry(&cache, &key_stale, &stale_manifest);
616 let tmp = layout::tmp_entry_dir(cache.cache_root(), &key_with_first_byte(0xEF), "r1");
617 cache.fs().create_dir_all(&tmp).unwrap();
618 cache.fs().write_file(&tmp.join("x"), b"y").unwrap();
619 let staging =
620 layout::restore_staging_dir(cache.cache_root(), &key_with_first_byte(0x12), "r2");
621 cache.fs().create_dir_all(&staging).unwrap();
622
623 let before = snapshot_cache(&cache);
624 cache.reader().info().unwrap();
625 let after = snapshot_cache(&cache);
626 assert_eq!(
627 before, after,
628 "cache root state must not change under info()",
629 );
630 }
631
632 fn snapshot_cache(cache: &CacheWriter<MemFilesystem>) -> BTreeMap<String, Vec<u8>> {
633 let mut out = BTreeMap::new();
634 snapshot_into(cache, cache.cache_root(), &mut out);
635 out
636 }
637
638 fn snapshot_into(
639 cache: &CacheWriter<MemFilesystem>,
640 path: &Path,
641 out: &mut BTreeMap<String, Vec<u8>>,
642 ) {
643 let Ok(entries) = cache.fs().read_dir(path) else {
644 return;
645 };
646 for entry in entries {
647 match entry.metadata.kind {
648 haz_vfs::EntryKind::File => {
649 let key = entry.path.to_string_lossy().into_owned();
650 let bytes = cache.fs().read(&entry.path).unwrap_or_default();
651 out.insert(key, bytes);
652 }
653 haz_vfs::EntryKind::Dir => snapshot_into(cache, &entry.path, out),
654 _ => {}
655 }
656 }
657 }
658}