1use std::path::Path;
40
41use haz_domain::path::{CanonicalPath, ParseAbsoluteError};
42use haz_domain::settings::cache::HashAlgo;
43use haz_vfs::{FsError, WritableFilesystem};
44use snafu::{ResultExt, Snafu};
45
46use crate::cache::Cache;
47use crate::hasher::Hasher;
48use crate::hex;
49use crate::key::CacheKey;
50use crate::key::prefix::CHAPTER_REVISION;
51use crate::layout;
52use crate::manifest::{HashFunctionLabel, Manifest, OutputBlob};
53
54#[derive(Debug, Clone, Copy)]
64pub struct StoredOutput<'a> {
65 pub workspace_absolute_path: &'a str,
68 pub on_disk_path: &'a Path,
72 pub mode: u32,
75}
76
77#[derive(Debug, Clone, Copy)]
79pub struct StoreInputs<'a> {
80 pub outputs: &'a [StoredOutput<'a>],
82 pub stdout: &'a [u8],
84 pub stderr: &'a [u8],
86 pub created_at_unix: u64,
92}
93
94#[derive(Debug, Snafu)]
96pub enum StoreError {
97 #[snafu(display("filesystem error during cache store: {source}"))]
102 Io {
103 source: FsError,
105 },
106
107 #[snafu(display("invalid workspace-absolute output path '{path}': {source}"))]
114 InvalidOutputPath {
115 path: String,
117 source: ParseAbsoluteError,
119 },
120}
121
122impl<Fs: WritableFilesystem> Cache<Fs> {
123 pub fn store(&self, key: &CacheKey, inputs: &StoreInputs<'_>) -> Result<(), StoreError> {
144 let suffix = random_suffix_hex();
145 let shard_dir = layout::shard_dir(self.cache_root(), key);
146 let tmp_dir = layout::tmp_entry_dir(self.cache_root(), key, &suffix);
147 let outputs_dir = tmp_dir.join(layout::OUTPUTS_SUBDIR);
148
149 self.fs().create_dir_all(&outputs_dir).context(IoSnafu)?;
150
151 let manifest_outputs = self.write_output_blobs(&outputs_dir, inputs.outputs)?;
152
153 let stdout_path = tmp_dir.join(layout::STDOUT_FILE_NAME);
154 self.fs()
155 .write_file(&stdout_path, inputs.stdout)
156 .context(IoSnafu)?;
157 self.fs().fsync_file(&stdout_path).context(IoSnafu)?;
158
159 let stderr_path = tmp_dir.join(layout::STDERR_FILE_NAME);
160 self.fs()
161 .write_file(&stderr_path, inputs.stderr)
162 .context(IoSnafu)?;
163 self.fs().fsync_file(&stderr_path).context(IoSnafu)?;
164
165 let stdout_hash = hash_bytes(self.hash_algo(), inputs.stdout);
166 let stderr_hash = hash_bytes(self.hash_algo(), inputs.stderr);
167
168 #[allow(clippy::cast_possible_truncation)]
169 let stdout_len = inputs.stdout.len() as u64;
170 #[allow(clippy::cast_possible_truncation)]
171 let stderr_len = inputs.stderr.len() as u64;
172
173 let manifest = Manifest {
174 chapter_revision: CHAPTER_REVISION,
175 hash_function: HashFunctionLabel::from(self.hash_algo()),
176 key: *key,
177 outputs: manifest_outputs,
178 stdout_len,
179 stderr_len,
180 stdout_hash,
181 stderr_hash,
182 exit_status: 0,
183 created_at_unix: inputs.created_at_unix,
184 };
185
186 let manifest_path = tmp_dir.join(layout::MANIFEST_FILE_NAME);
187 self.fs()
188 .write_file(&manifest_path, &manifest.to_json_bytes())
189 .context(IoSnafu)?;
190 self.fs().fsync_file(&manifest_path).context(IoSnafu)?;
191 self.fs().fsync_dir(&tmp_dir).context(IoSnafu)?;
192
193 let entry_dir = layout::entry_dir(self.cache_root(), key);
194 match self.fs().remove_dir_all(&entry_dir) {
195 Ok(()) | Err(FsError::NotFound { .. }) => {}
196 Err(e) => return Err(StoreError::Io { source: e }),
197 }
198 self.fs().rename(&tmp_dir, &entry_dir).context(IoSnafu)?;
199 self.fs().fsync_dir(&shard_dir).context(IoSnafu)?;
200
201 Ok(())
202 }
203
204 fn write_output_blobs(
210 &self,
211 outputs_dir: &Path,
212 outputs: &[StoredOutput<'_>],
213 ) -> Result<Vec<OutputBlob>, StoreError> {
214 let mut entries = Vec::with_capacity(outputs.len());
215 for out in outputs {
216 let workspace_absolute_path = CanonicalPath::parse_workspace_absolute(
217 out.workspace_absolute_path,
218 )
219 .map_err(|source| StoreError::InvalidOutputPath {
220 path: out.workspace_absolute_path.to_owned(),
221 source,
222 })?;
223
224 let bytes = self.fs().read(out.on_disk_path).context(IoSnafu)?;
225 let content_hash = hash_bytes(self.hash_algo(), &bytes);
226
227 let blob_path = outputs_dir.join(hex::encode_32(&content_hash));
228 self.fs().write_file(&blob_path, &bytes).context(IoSnafu)?;
229 self.fs()
230 .set_permissions(&blob_path, out.mode)
231 .context(IoSnafu)?;
232 self.fs().fsync_file(&blob_path).context(IoSnafu)?;
233
234 #[allow(clippy::cast_possible_truncation)]
235 let size = bytes.len() as u64;
236 entries.push(OutputBlob {
237 workspace_absolute_path,
238 content_hash,
239 size,
240 mode: out.mode,
241 });
242 }
243 Ok(entries)
244 }
245}
246
247fn random_suffix_hex() -> String {
253 let r: u64 = rand::random();
254 format!("{r:016x}")
255}
256
257fn hash_bytes(algo: HashAlgo, data: &[u8]) -> [u8; 32] {
258 let mut h = Hasher::new(algo);
259 h.update(data);
260 h.finalize()
261}
262
263#[cfg(test)]
264mod tests {
265 use std::path::{Path, PathBuf};
266
267 use haz_domain::path::ParseAbsoluteError;
268 use haz_domain::settings::cache::HashAlgo;
269 use haz_vfs::{EntryKind, Filesystem, MemFilesystem, WritableFilesystem};
270
271 use crate::cache::Cache;
272 use crate::hasher::Hasher;
273 use crate::key::CacheKey;
274 use crate::key::prefix::CHAPTER_REVISION;
275 use crate::layout;
276 use crate::manifest::HashFunctionLabel;
277 use crate::store::{StoreError, StoreInputs, StoredOutput};
278
279 const WORKSPACE_ROOT: &str = "/ws";
280 const PROJ_OUT_ABS: &str = "/proj/out";
281 const PROJ_OUT_DISK: &str = "/ws/proj/out";
282
283 fn sample_key() -> CacheKey {
284 let mut bytes = [0u8; 32];
285 bytes[0] = 0xAB;
286 bytes[1] = 0xCD;
287 CacheKey::from_bytes(bytes)
288 }
289
290 fn hash_bytes(algo: HashAlgo, data: &[u8]) -> [u8; 32] {
291 let mut h = Hasher::new(algo);
292 h.update(data);
293 h.finalize()
294 }
295
296 fn fs_with_one_output(path: &Path, bytes: &[u8], mode: u32) -> MemFilesystem {
299 let mut fs = MemFilesystem::new();
300 fs.add_dir(path.parent().unwrap()).unwrap();
301 fs.add_file_with_mode(path, bytes.to_vec(), mode).unwrap();
302 fs
303 }
304
305 fn make_cache(fs: MemFilesystem, algo: HashAlgo) -> Cache<MemFilesystem> {
306 Cache::new(fs, Path::new(WORKSPACE_ROOT), algo)
307 }
308
309 #[test]
312 fn cache_017_store_then_lookup_round_trips() {
313 let blob = b"output-bytes-v1";
314 let on_disk = PathBuf::from(PROJ_OUT_DISK);
315 let fs = fs_with_one_output(&on_disk, blob, 0o644);
316 let cache = make_cache(fs, HashAlgo::Blake3);
317 let key = sample_key();
318
319 let outs = [StoredOutput {
320 workspace_absolute_path: PROJ_OUT_ABS,
321 on_disk_path: &on_disk,
322 mode: 0o644,
323 }];
324 let inputs = StoreInputs {
325 outputs: &outs,
326 stdout: b"hello, stdout",
327 stderr: b"hello, stderr",
328 created_at_unix: 1_715_700_000,
329 };
330 cache.store(&key, &inputs).unwrap();
331
332 let manifest = cache.lookup(&key).expect("expected a hit after store");
333 assert_eq!(manifest.outputs.len(), 1);
334 assert_eq!(
335 manifest.outputs[0].workspace_absolute_path.to_string(),
336 PROJ_OUT_ABS
337 );
338 #[allow(clippy::cast_possible_truncation)]
339 let expected_size = blob.len() as u64;
340 assert_eq!(manifest.outputs[0].size, expected_size);
341 assert_eq!(manifest.outputs[0].mode, 0o644);
342 assert_eq!(
343 manifest.outputs[0].content_hash,
344 hash_bytes(HashAlgo::Blake3, blob)
345 );
346 }
347
348 #[test]
351 fn cache_011_manifest_records_chapter_revision_and_active_hash_function() {
352 let blob = b"x";
353 let on_disk = PathBuf::from(PROJ_OUT_DISK);
354 let fs = fs_with_one_output(&on_disk, blob, 0o600);
355 let cache = make_cache(fs, HashAlgo::Sha256);
356 let key = sample_key();
357 let outs = [StoredOutput {
358 workspace_absolute_path: PROJ_OUT_ABS,
359 on_disk_path: &on_disk,
360 mode: 0o600,
361 }];
362 let inputs = StoreInputs {
363 outputs: &outs,
364 stdout: b"",
365 stderr: b"",
366 created_at_unix: 7,
367 };
368 cache.store(&key, &inputs).unwrap();
369 let manifest = cache.lookup(&key).unwrap();
370 assert_eq!(manifest.chapter_revision, CHAPTER_REVISION);
371 assert_eq!(manifest.hash_function, HashFunctionLabel::Sha256);
372 assert_eq!(manifest.exit_status, 0);
373 assert_eq!(manifest.created_at_unix, 7);
374 assert_eq!(manifest.key, key);
375 }
376
377 #[test]
378 fn cache_011_stream_hashes_match_finalised_hasher_output() {
379 let blob = b"";
380 let on_disk = PathBuf::from(PROJ_OUT_DISK);
381 let fs = fs_with_one_output(&on_disk, blob, 0o644);
382 let cache = make_cache(fs, HashAlgo::Blake3);
383 let key = sample_key();
384 let stdout = b"line on stdout\n".as_ref();
385 let stderr = b"line on stderr\n".as_ref();
386 let outs = [StoredOutput {
387 workspace_absolute_path: PROJ_OUT_ABS,
388 on_disk_path: &on_disk,
389 mode: 0o644,
390 }];
391 let inputs = StoreInputs {
392 outputs: &outs,
393 stdout,
394 stderr,
395 created_at_unix: 0,
396 };
397 cache.store(&key, &inputs).unwrap();
398 let manifest = cache.lookup(&key).unwrap();
399 assert_eq!(manifest.stdout_hash, hash_bytes(HashAlgo::Blake3, stdout));
400 assert_eq!(manifest.stderr_hash, hash_bytes(HashAlgo::Blake3, stderr));
401 #[allow(clippy::cast_possible_truncation)]
402 let stdout_len = stdout.len() as u64;
403 #[allow(clippy::cast_possible_truncation)]
404 let stderr_len = stderr.len() as u64;
405 assert_eq!(manifest.stdout_len, stdout_len);
406 assert_eq!(manifest.stderr_len, stderr_len);
407 }
408
409 #[test]
412 fn cache_017_store_with_no_outputs_and_empty_streams_still_round_trips() {
413 let mut fs = MemFilesystem::new();
414 fs.add_dir("/ws").unwrap();
415 let cache = make_cache(fs, HashAlgo::Blake3);
416 let key = sample_key();
417 let inputs = StoreInputs {
418 outputs: &[],
419 stdout: b"",
420 stderr: b"",
421 created_at_unix: 0,
422 };
423 cache.store(&key, &inputs).unwrap();
424 let manifest = cache
425 .lookup(&key)
426 .expect("zero-output entry is still a hit");
427 assert_eq!(manifest.outputs.len(), 0);
428 assert_eq!(manifest.stdout_len, 0);
429 assert_eq!(manifest.stderr_len, 0);
430 assert_eq!(manifest.stdout_hash, hash_bytes(HashAlgo::Blake3, b""));
433 assert_eq!(manifest.stderr_hash, hash_bytes(HashAlgo::Blake3, b""));
434 }
435
436 #[test]
437 fn cache_017_store_with_multiple_outputs_records_them_in_order() {
438 let mut fs = MemFilesystem::new();
439 fs.add_dir("/ws/proj").unwrap();
440 fs.add_file_with_mode("/ws/proj/a", b"alpha".to_vec(), 0o644)
441 .unwrap();
442 fs.add_file_with_mode("/ws/proj/b", b"beta-bytes".to_vec(), 0o755)
443 .unwrap();
444 let cache = make_cache(fs, HashAlgo::Blake3);
445 let key = sample_key();
446
447 let on_a = PathBuf::from("/ws/proj/a");
448 let on_b = PathBuf::from("/ws/proj/b");
449 let outs = [
450 StoredOutput {
451 workspace_absolute_path: "/proj/a",
452 on_disk_path: &on_a,
453 mode: 0o644,
454 },
455 StoredOutput {
456 workspace_absolute_path: "/proj/b",
457 on_disk_path: &on_b,
458 mode: 0o755,
459 },
460 ];
461 let inputs = StoreInputs {
462 outputs: &outs,
463 stdout: b"",
464 stderr: b"",
465 created_at_unix: 0,
466 };
467 cache.store(&key, &inputs).unwrap();
468 let manifest = cache.lookup(&key).unwrap();
469 assert_eq!(manifest.outputs.len(), 2);
470 assert_eq!(
471 manifest.outputs[0].workspace_absolute_path.to_string(),
472 "/proj/a"
473 );
474 assert_eq!(manifest.outputs[0].mode, 0o644);
475 assert_eq!(
476 manifest.outputs[1].workspace_absolute_path.to_string(),
477 "/proj/b"
478 );
479 assert_eq!(manifest.outputs[1].mode, 0o755);
480 }
481
482 #[test]
485 fn cache_011_after_store_blob_file_has_recorded_mode() {
486 let blob = b"executable";
487 let on_disk = PathBuf::from(PROJ_OUT_DISK);
488 let fs = fs_with_one_output(&on_disk, blob, 0o755);
489 let cache = make_cache(fs, HashAlgo::Blake3);
490 let key = sample_key();
491 let outs = [StoredOutput {
492 workspace_absolute_path: PROJ_OUT_ABS,
493 on_disk_path: &on_disk,
494 mode: 0o755,
495 }];
496 let inputs = StoreInputs {
497 outputs: &outs,
498 stdout: b"",
499 stderr: b"",
500 created_at_unix: 0,
501 };
502 cache.store(&key, &inputs).unwrap();
503
504 let content_hash = hash_bytes(HashAlgo::Blake3, blob);
505 let blob_path = layout::output_blob_path(cache.cache_root(), &key, &content_hash);
506 let mode = cache.fs().mode_of(&blob_path).unwrap();
507 assert_eq!(mode, 0o755);
508 }
509
510 #[test]
511 fn cache_017_after_store_tmp_directory_no_longer_exists() {
512 let blob = b"";
513 let on_disk = PathBuf::from(PROJ_OUT_DISK);
514 let fs = fs_with_one_output(&on_disk, blob, 0o644);
515 let cache = make_cache(fs, HashAlgo::Blake3);
516 let key = sample_key();
517 let outs = [StoredOutput {
518 workspace_absolute_path: PROJ_OUT_ABS,
519 on_disk_path: &on_disk,
520 mode: 0o644,
521 }];
522 cache
523 .store(
524 &key,
525 &StoreInputs {
526 outputs: &outs,
527 stdout: b"",
528 stderr: b"",
529 created_at_unix: 0,
530 },
531 )
532 .unwrap();
533
534 let shard = layout::shard_dir(cache.cache_root(), &key);
538 let mut saw_entry = false;
539 for entry in cache.fs().read_dir(&shard).unwrap() {
540 let name = entry
541 .path
542 .file_name()
543 .unwrap()
544 .to_string_lossy()
545 .into_owned();
546 assert!(
547 !name.starts_with(".tmp-"),
548 "expected no tmp directory after a successful store, found: {name}"
549 );
550 if name == key.to_hex() {
551 saw_entry = true;
552 assert_eq!(entry.metadata.kind, EntryKind::Dir);
553 }
554 }
555 assert!(saw_entry, "final entry directory must be present");
556 }
557
558 #[test]
561 fn cache_014_second_store_of_same_key_overwrites_and_remains_a_hit() {
562 let blob_v1 = b"v1";
563 let on_disk = PathBuf::from(PROJ_OUT_DISK);
564 let fs = fs_with_one_output(&on_disk, blob_v1, 0o644);
565 let cache = make_cache(fs, HashAlgo::Blake3);
566 let key = sample_key();
567 let outs_v1 = [StoredOutput {
568 workspace_absolute_path: PROJ_OUT_ABS,
569 on_disk_path: &on_disk,
570 mode: 0o644,
571 }];
572 cache
573 .store(
574 &key,
575 &StoreInputs {
576 outputs: &outs_v1,
577 stdout: b"first",
578 stderr: b"first-err",
579 created_at_unix: 1,
580 },
581 )
582 .unwrap();
583
584 cache.fs().write_file(&on_disk, b"v2-longer").unwrap();
588 cache.fs().set_permissions(&on_disk, 0o644).unwrap();
589
590 cache
591 .store(
592 &key,
593 &StoreInputs {
594 outputs: &outs_v1,
595 stdout: b"second",
596 stderr: b"second-err",
597 created_at_unix: 2,
598 },
599 )
600 .unwrap();
601
602 let manifest = cache
603 .lookup(&key)
604 .expect("entry must still hit after a second store");
605 assert_eq!(manifest.stdout_len, b"second".len() as u64);
606 assert_eq!(manifest.created_at_unix, 2);
607 assert_eq!(
608 manifest.outputs[0].content_hash,
609 hash_bytes(HashAlgo::Blake3, b"v2-longer")
610 );
611 }
612
613 #[test]
616 fn store_propagates_missing_output_file_as_io_error() {
617 let mut fs = MemFilesystem::new();
620 fs.add_dir("/ws").unwrap();
621 let cache = make_cache(fs, HashAlgo::Blake3);
622 let key = sample_key();
623 let on_disk = PathBuf::from("/ws/missing");
624 let outs = [StoredOutput {
625 workspace_absolute_path: "/missing",
626 on_disk_path: &on_disk,
627 mode: 0o644,
628 }];
629 let err = cache
630 .store(
631 &key,
632 &StoreInputs {
633 outputs: &outs,
634 stdout: b"",
635 stderr: b"",
636 created_at_unix: 0,
637 },
638 )
639 .unwrap_err();
640 let msg = format!("{err}");
641 assert!(msg.contains("filesystem error"), "got: {msg}");
642 assert!(cache.lookup(&key).is_none());
644 }
645
646 #[test]
647 fn store_rejects_output_with_traversal_in_workspace_absolute_path() {
648 let mut fs = MemFilesystem::new();
649 fs.add_dir("/ws/proj").unwrap();
650 fs.add_file_with_mode("/ws/proj/out", b"x".to_vec(), 0o644)
651 .unwrap();
652 let cache = make_cache(fs, HashAlgo::Blake3);
653 let key = sample_key();
654 let on_disk = PathBuf::from("/ws/proj/out");
655 let outs = [StoredOutput {
656 workspace_absolute_path: "/proj/../etc/passwd",
657 on_disk_path: &on_disk,
658 mode: 0o644,
659 }];
660 let err = cache
661 .store(
662 &key,
663 &StoreInputs {
664 outputs: &outs,
665 stdout: b"",
666 stderr: b"",
667 created_at_unix: 0,
668 },
669 )
670 .unwrap_err();
671 assert!(
672 matches!(err, StoreError::InvalidOutputPath { .. }),
673 "expected InvalidOutputPath, got {err:?}"
674 );
675 assert!(cache.lookup(&key).is_none());
677 }
678
679 #[test]
680 fn store_rejects_output_with_project_relative_workspace_absolute_path() {
681 let mut fs = MemFilesystem::new();
682 fs.add_dir("/ws/proj").unwrap();
683 fs.add_file_with_mode("/ws/proj/out", b"x".to_vec(), 0o644)
684 .unwrap();
685 let cache = make_cache(fs, HashAlgo::Blake3);
686 let key = sample_key();
687 let on_disk = PathBuf::from("/ws/proj/out");
688 let outs = [StoredOutput {
689 workspace_absolute_path: "proj/out", on_disk_path: &on_disk,
691 mode: 0o644,
692 }];
693 let err = cache
694 .store(
695 &key,
696 &StoreInputs {
697 outputs: &outs,
698 stdout: b"",
699 stderr: b"",
700 created_at_unix: 0,
701 },
702 )
703 .unwrap_err();
704 assert!(
705 matches!(
706 err,
707 StoreError::InvalidOutputPath {
708 source: ParseAbsoluteError::NotWorkspaceAbsolute,
709 ..
710 }
711 ),
712 "expected NotWorkspaceAbsolute, got {err:?}"
713 );
714 }
715}