1use std::path::Path;
40
41use haz_domain::path::{CanonicalPath, ParseAbsoluteError};
42use haz_domain::settings::cache::HashAlgo;
43use haz_vfs::{FsError, WritableFilesystem};
44use snafu::{ResultExt, Snafu};
45
46use crate::hasher::Hasher;
47use crate::hex;
48use crate::key::CacheKey;
49use crate::key::prefix::CHAPTER_REVISION;
50use crate::layout;
51use crate::manifest::{HashFunctionLabel, Manifest, OutputBlob};
52use crate::writer::CacheWriter;
53
54#[derive(Debug, Clone, Copy)]
64pub struct StoredOutput<'a> {
65 pub workspace_absolute_path: &'a str,
68 pub on_disk_path: &'a Path,
72 pub mode: u32,
75}
76
77#[derive(Debug, Clone, Copy)]
79pub struct StoreInputs<'a> {
80 pub outputs: &'a [StoredOutput<'a>],
82 pub stdout: &'a [u8],
84 pub stderr: &'a [u8],
86 pub created_at_unix: u64,
92}
93
94#[derive(Debug, Snafu)]
96pub enum StoreError {
97 #[snafu(display("filesystem error during cache store: {source}"))]
102 Io {
103 source: FsError,
105 },
106
107 #[snafu(display("invalid workspace-absolute output path '{path}': {source}"))]
114 InvalidOutputPath {
115 path: String,
117 source: ParseAbsoluteError,
119 },
120}
121
122impl<Fs: WritableFilesystem> CacheWriter<Fs> {
123 pub fn store(&self, key: &CacheKey, inputs: &StoreInputs<'_>) -> Result<(), StoreError> {
144 let suffix = random_suffix_hex();
145 let shard_dir = layout::shard_dir(self.cache_root(), key);
146 let tmp_dir = layout::tmp_entry_dir(self.cache_root(), key, &suffix);
147 let outputs_dir = tmp_dir.join(layout::OUTPUTS_SUBDIR);
148
149 self.fs().create_dir_all(&outputs_dir).context(IoSnafu)?;
150
151 let manifest_outputs = self.write_output_blobs(&outputs_dir, inputs.outputs)?;
152
153 let stdout_path = tmp_dir.join(layout::STDOUT_FILE_NAME);
154 self.fs()
155 .write_file(&stdout_path, inputs.stdout)
156 .context(IoSnafu)?;
157 self.fs().fsync_file(&stdout_path).context(IoSnafu)?;
158
159 let stderr_path = tmp_dir.join(layout::STDERR_FILE_NAME);
160 self.fs()
161 .write_file(&stderr_path, inputs.stderr)
162 .context(IoSnafu)?;
163 self.fs().fsync_file(&stderr_path).context(IoSnafu)?;
164
165 let stdout_hash = hash_bytes(self.hash_algo(), inputs.stdout);
166 let stderr_hash = hash_bytes(self.hash_algo(), inputs.stderr);
167
168 #[allow(clippy::cast_possible_truncation)]
169 let stdout_len = inputs.stdout.len() as u64;
170 #[allow(clippy::cast_possible_truncation)]
171 let stderr_len = inputs.stderr.len() as u64;
172
173 let manifest = Manifest {
174 chapter_revision: CHAPTER_REVISION,
175 hash_function: HashFunctionLabel::from(self.hash_algo()),
176 key: *key,
177 outputs: manifest_outputs,
178 stdout_len,
179 stderr_len,
180 stdout_hash,
181 stderr_hash,
182 exit_status: 0,
183 created_at_unix: inputs.created_at_unix,
184 };
185
186 let manifest_path = tmp_dir.join(layout::MANIFEST_FILE_NAME);
187 self.fs()
188 .write_file(&manifest_path, &manifest.to_json_bytes())
189 .context(IoSnafu)?;
190 self.fs().fsync_file(&manifest_path).context(IoSnafu)?;
191 self.fs().fsync_dir(&tmp_dir).context(IoSnafu)?;
192
193 let entry_dir = layout::entry_dir(self.cache_root(), key);
194 match self.fs().remove_dir_all(&entry_dir) {
195 Ok(()) | Err(FsError::NotFound { .. }) => {}
196 Err(e) => return Err(StoreError::Io { source: e }),
197 }
198 self.fs().rename(&tmp_dir, &entry_dir).context(IoSnafu)?;
199 self.fs().fsync_dir(&shard_dir).context(IoSnafu)?;
200
201 Ok(())
202 }
203
204 fn write_output_blobs(
210 &self,
211 outputs_dir: &Path,
212 outputs: &[StoredOutput<'_>],
213 ) -> Result<Vec<OutputBlob>, StoreError> {
214 let mut entries = Vec::with_capacity(outputs.len());
215 for out in outputs {
216 let workspace_absolute_path = CanonicalPath::parse_workspace_absolute(
217 out.workspace_absolute_path,
218 )
219 .map_err(|source| StoreError::InvalidOutputPath {
220 path: out.workspace_absolute_path.to_owned(),
221 source,
222 })?;
223
224 let bytes = self.fs().read(out.on_disk_path).context(IoSnafu)?;
225 let content_hash = hash_bytes(self.hash_algo(), &bytes);
226
227 let blob_path = outputs_dir.join(hex::encode_32(&content_hash));
228 self.fs().write_file(&blob_path, &bytes).context(IoSnafu)?;
229 self.fs()
230 .set_permissions(&blob_path, out.mode)
231 .context(IoSnafu)?;
232 self.fs().fsync_file(&blob_path).context(IoSnafu)?;
233
234 #[allow(clippy::cast_possible_truncation)]
235 let size = bytes.len() as u64;
236 entries.push(OutputBlob {
237 workspace_absolute_path,
238 content_hash,
239 size,
240 mode: out.mode,
241 });
242 }
243 Ok(entries)
244 }
245}
246
247fn random_suffix_hex() -> String {
253 let r: u64 = rand::random();
254 format!("{r:016x}")
255}
256
257fn hash_bytes(algo: HashAlgo, data: &[u8]) -> [u8; 32] {
258 let mut h = Hasher::new(algo);
259 h.update(data);
260 h.finalize()
261}
262
263#[cfg(test)]
264mod tests {
265 use std::path::{Path, PathBuf};
266
267 use haz_domain::path::ParseAbsoluteError;
268 use haz_domain::settings::cache::HashAlgo;
269 use haz_vfs::{EntryKind, Filesystem, WritableFilesystem};
270 use haz_vfs_testing::MemFilesystem;
271
272 use crate::hasher::Hasher;
273 use crate::key::CacheKey;
274 use crate::key::prefix::CHAPTER_REVISION;
275 use crate::layout;
276 use crate::manifest::HashFunctionLabel;
277 use crate::store::{StoreError, StoreInputs, StoredOutput};
278 use crate::writer::CacheWriter;
279
280 const WORKSPACE_ROOT: &str = "/ws";
281 const PROJ_OUT_ABS: &str = "/proj/out";
282 const PROJ_OUT_DISK: &str = "/ws/proj/out";
283
284 fn sample_key() -> CacheKey {
285 let mut bytes = [0u8; 32];
286 bytes[0] = 0xAB;
287 bytes[1] = 0xCD;
288 CacheKey::from_bytes(bytes)
289 }
290
291 fn hash_bytes(algo: HashAlgo, data: &[u8]) -> [u8; 32] {
292 let mut h = Hasher::new(algo);
293 h.update(data);
294 h.finalize()
295 }
296
297 fn fs_with_one_output(path: &Path, bytes: &[u8], mode: u32) -> MemFilesystem {
300 let mut fs = MemFilesystem::new();
301 fs.add_dir(path.parent().unwrap()).unwrap();
302 fs.add_file_with_mode(path, bytes.to_vec(), mode).unwrap();
303 fs
304 }
305
306 fn make_cache(fs: MemFilesystem, algo: HashAlgo) -> CacheWriter<MemFilesystem> {
307 CacheWriter::new(fs, Path::new(WORKSPACE_ROOT), algo)
308 }
309
310 #[test]
313 fn cache_017_store_then_lookup_round_trips() {
314 let blob = b"output-bytes-v1";
315 let on_disk = PathBuf::from(PROJ_OUT_DISK);
316 let fs = fs_with_one_output(&on_disk, blob, 0o644);
317 let cache = make_cache(fs, HashAlgo::Blake3);
318 let key = sample_key();
319
320 let outs = [StoredOutput {
321 workspace_absolute_path: PROJ_OUT_ABS,
322 on_disk_path: &on_disk,
323 mode: 0o644,
324 }];
325 let inputs = StoreInputs {
326 outputs: &outs,
327 stdout: b"hello, stdout",
328 stderr: b"hello, stderr",
329 created_at_unix: 1_715_700_000,
330 };
331 cache.store(&key, &inputs).unwrap();
332
333 let manifest = cache
334 .reader()
335 .lookup(&key)
336 .expect("expected a hit after store");
337 assert_eq!(manifest.outputs.len(), 1);
338 assert_eq!(
339 manifest.outputs[0].workspace_absolute_path.to_string(),
340 PROJ_OUT_ABS
341 );
342 #[allow(clippy::cast_possible_truncation)]
343 let expected_size = blob.len() as u64;
344 assert_eq!(manifest.outputs[0].size, expected_size);
345 assert_eq!(manifest.outputs[0].mode, 0o644);
346 assert_eq!(
347 manifest.outputs[0].content_hash,
348 hash_bytes(HashAlgo::Blake3, blob)
349 );
350 }
351
352 #[test]
355 fn cache_011_manifest_records_chapter_revision_and_active_hash_function() {
356 let blob = b"x";
357 let on_disk = PathBuf::from(PROJ_OUT_DISK);
358 let fs = fs_with_one_output(&on_disk, blob, 0o600);
359 let cache = make_cache(fs, HashAlgo::Sha256);
360 let key = sample_key();
361 let outs = [StoredOutput {
362 workspace_absolute_path: PROJ_OUT_ABS,
363 on_disk_path: &on_disk,
364 mode: 0o600,
365 }];
366 let inputs = StoreInputs {
367 outputs: &outs,
368 stdout: b"",
369 stderr: b"",
370 created_at_unix: 7,
371 };
372 cache.store(&key, &inputs).unwrap();
373 let manifest = cache.reader().lookup(&key).unwrap();
374 assert_eq!(manifest.chapter_revision, CHAPTER_REVISION);
375 assert_eq!(manifest.hash_function, HashFunctionLabel::Sha256);
376 assert_eq!(manifest.exit_status, 0);
377 assert_eq!(manifest.created_at_unix, 7);
378 assert_eq!(manifest.key, key);
379 }
380
381 #[test]
382 fn cache_011_stream_hashes_match_finalised_hasher_output() {
383 let blob = b"";
384 let on_disk = PathBuf::from(PROJ_OUT_DISK);
385 let fs = fs_with_one_output(&on_disk, blob, 0o644);
386 let cache = make_cache(fs, HashAlgo::Blake3);
387 let key = sample_key();
388 let stdout = b"line on stdout\n".as_ref();
389 let stderr = b"line on stderr\n".as_ref();
390 let outs = [StoredOutput {
391 workspace_absolute_path: PROJ_OUT_ABS,
392 on_disk_path: &on_disk,
393 mode: 0o644,
394 }];
395 let inputs = StoreInputs {
396 outputs: &outs,
397 stdout,
398 stderr,
399 created_at_unix: 0,
400 };
401 cache.store(&key, &inputs).unwrap();
402 let manifest = cache.reader().lookup(&key).unwrap();
403 assert_eq!(manifest.stdout_hash, hash_bytes(HashAlgo::Blake3, stdout));
404 assert_eq!(manifest.stderr_hash, hash_bytes(HashAlgo::Blake3, stderr));
405 #[allow(clippy::cast_possible_truncation)]
406 let stdout_len = stdout.len() as u64;
407 #[allow(clippy::cast_possible_truncation)]
408 let stderr_len = stderr.len() as u64;
409 assert_eq!(manifest.stdout_len, stdout_len);
410 assert_eq!(manifest.stderr_len, stderr_len);
411 }
412
413 #[test]
416 fn cache_017_store_with_no_outputs_and_empty_streams_still_round_trips() {
417 let mut fs = MemFilesystem::new();
418 fs.add_dir("/ws").unwrap();
419 let cache = make_cache(fs, HashAlgo::Blake3);
420 let key = sample_key();
421 let inputs = StoreInputs {
422 outputs: &[],
423 stdout: b"",
424 stderr: b"",
425 created_at_unix: 0,
426 };
427 cache.store(&key, &inputs).unwrap();
428 let manifest = cache
429 .reader()
430 .lookup(&key)
431 .expect("zero-output entry is still a hit");
432 assert_eq!(manifest.outputs.len(), 0);
433 assert_eq!(manifest.stdout_len, 0);
434 assert_eq!(manifest.stderr_len, 0);
435 assert_eq!(manifest.stdout_hash, hash_bytes(HashAlgo::Blake3, b""));
438 assert_eq!(manifest.stderr_hash, hash_bytes(HashAlgo::Blake3, b""));
439 }
440
441 #[test]
442 fn cache_017_store_with_multiple_outputs_records_them_in_order() {
443 let mut fs = MemFilesystem::new();
444 fs.add_dir("/ws/proj").unwrap();
445 fs.add_file_with_mode("/ws/proj/a", b"alpha".to_vec(), 0o644)
446 .unwrap();
447 fs.add_file_with_mode("/ws/proj/b", b"beta-bytes".to_vec(), 0o755)
448 .unwrap();
449 let cache = make_cache(fs, HashAlgo::Blake3);
450 let key = sample_key();
451
452 let on_a = PathBuf::from("/ws/proj/a");
453 let on_b = PathBuf::from("/ws/proj/b");
454 let outs = [
455 StoredOutput {
456 workspace_absolute_path: "/proj/a",
457 on_disk_path: &on_a,
458 mode: 0o644,
459 },
460 StoredOutput {
461 workspace_absolute_path: "/proj/b",
462 on_disk_path: &on_b,
463 mode: 0o755,
464 },
465 ];
466 let inputs = StoreInputs {
467 outputs: &outs,
468 stdout: b"",
469 stderr: b"",
470 created_at_unix: 0,
471 };
472 cache.store(&key, &inputs).unwrap();
473 let manifest = cache.reader().lookup(&key).unwrap();
474 assert_eq!(manifest.outputs.len(), 2);
475 assert_eq!(
476 manifest.outputs[0].workspace_absolute_path.to_string(),
477 "/proj/a"
478 );
479 assert_eq!(manifest.outputs[0].mode, 0o644);
480 assert_eq!(
481 manifest.outputs[1].workspace_absolute_path.to_string(),
482 "/proj/b"
483 );
484 assert_eq!(manifest.outputs[1].mode, 0o755);
485 }
486
487 #[test]
490 fn cache_011_after_store_blob_file_has_recorded_mode() {
491 let blob = b"executable";
492 let on_disk = PathBuf::from(PROJ_OUT_DISK);
493 let fs = fs_with_one_output(&on_disk, blob, 0o755);
494 let cache = make_cache(fs, HashAlgo::Blake3);
495 let key = sample_key();
496 let outs = [StoredOutput {
497 workspace_absolute_path: PROJ_OUT_ABS,
498 on_disk_path: &on_disk,
499 mode: 0o755,
500 }];
501 let inputs = StoreInputs {
502 outputs: &outs,
503 stdout: b"",
504 stderr: b"",
505 created_at_unix: 0,
506 };
507 cache.store(&key, &inputs).unwrap();
508
509 let content_hash = hash_bytes(HashAlgo::Blake3, blob);
510 let blob_path = layout::output_blob_path(cache.cache_root(), &key, &content_hash);
511 let mode = cache.fs().mode_of(&blob_path).unwrap();
512 assert_eq!(mode, 0o755);
513 }
514
515 #[test]
516 fn cache_017_after_store_tmp_directory_no_longer_exists() {
517 let blob = b"";
518 let on_disk = PathBuf::from(PROJ_OUT_DISK);
519 let fs = fs_with_one_output(&on_disk, blob, 0o644);
520 let cache = make_cache(fs, HashAlgo::Blake3);
521 let key = sample_key();
522 let outs = [StoredOutput {
523 workspace_absolute_path: PROJ_OUT_ABS,
524 on_disk_path: &on_disk,
525 mode: 0o644,
526 }];
527 cache
528 .store(
529 &key,
530 &StoreInputs {
531 outputs: &outs,
532 stdout: b"",
533 stderr: b"",
534 created_at_unix: 0,
535 },
536 )
537 .unwrap();
538
539 let shard = layout::shard_dir(cache.cache_root(), &key);
543 let mut saw_entry = false;
544 for entry in cache.fs().read_dir(&shard).unwrap() {
545 let name = entry
546 .path
547 .file_name()
548 .unwrap()
549 .to_string_lossy()
550 .into_owned();
551 assert!(
552 !name.starts_with(".tmp-"),
553 "expected no tmp directory after a successful store, found: {name}"
554 );
555 if name == key.to_hex() {
556 saw_entry = true;
557 assert_eq!(entry.metadata.kind, EntryKind::Dir);
558 }
559 }
560 assert!(saw_entry, "final entry directory must be present");
561 }
562
563 #[test]
566 fn cache_014_second_store_of_same_key_overwrites_and_remains_a_hit() {
567 let blob_v1 = b"v1";
568 let on_disk = PathBuf::from(PROJ_OUT_DISK);
569 let fs = fs_with_one_output(&on_disk, blob_v1, 0o644);
570 let cache = make_cache(fs, HashAlgo::Blake3);
571 let key = sample_key();
572 let outs_v1 = [StoredOutput {
573 workspace_absolute_path: PROJ_OUT_ABS,
574 on_disk_path: &on_disk,
575 mode: 0o644,
576 }];
577 cache
578 .store(
579 &key,
580 &StoreInputs {
581 outputs: &outs_v1,
582 stdout: b"first",
583 stderr: b"first-err",
584 created_at_unix: 1,
585 },
586 )
587 .unwrap();
588
589 cache.fs().write_file(&on_disk, b"v2-longer").unwrap();
593 cache.fs().set_permissions(&on_disk, 0o644).unwrap();
594
595 cache
596 .store(
597 &key,
598 &StoreInputs {
599 outputs: &outs_v1,
600 stdout: b"second",
601 stderr: b"second-err",
602 created_at_unix: 2,
603 },
604 )
605 .unwrap();
606
607 let manifest = cache
608 .reader()
609 .lookup(&key)
610 .expect("entry must still hit after a second store");
611 assert_eq!(manifest.stdout_len, b"second".len() as u64);
612 assert_eq!(manifest.created_at_unix, 2);
613 assert_eq!(
614 manifest.outputs[0].content_hash,
615 hash_bytes(HashAlgo::Blake3, b"v2-longer")
616 );
617 }
618
619 #[test]
622 fn store_propagates_missing_output_file_as_io_error() {
623 let mut fs = MemFilesystem::new();
626 fs.add_dir("/ws").unwrap();
627 let cache = make_cache(fs, HashAlgo::Blake3);
628 let key = sample_key();
629 let on_disk = PathBuf::from("/ws/missing");
630 let outs = [StoredOutput {
631 workspace_absolute_path: "/missing",
632 on_disk_path: &on_disk,
633 mode: 0o644,
634 }];
635 let err = cache
636 .store(
637 &key,
638 &StoreInputs {
639 outputs: &outs,
640 stdout: b"",
641 stderr: b"",
642 created_at_unix: 0,
643 },
644 )
645 .unwrap_err();
646 let msg = format!("{err}");
647 assert!(msg.contains("filesystem error"), "got: {msg}");
648 assert!(cache.reader().lookup(&key).is_none());
650 }
651
652 #[test]
653 fn store_rejects_output_with_traversal_in_workspace_absolute_path() {
654 let mut fs = MemFilesystem::new();
655 fs.add_dir("/ws/proj").unwrap();
656 fs.add_file_with_mode("/ws/proj/out", b"x".to_vec(), 0o644)
657 .unwrap();
658 let cache = make_cache(fs, HashAlgo::Blake3);
659 let key = sample_key();
660 let on_disk = PathBuf::from("/ws/proj/out");
661 let outs = [StoredOutput {
662 workspace_absolute_path: "/proj/../etc/passwd",
663 on_disk_path: &on_disk,
664 mode: 0o644,
665 }];
666 let err = cache
667 .store(
668 &key,
669 &StoreInputs {
670 outputs: &outs,
671 stdout: b"",
672 stderr: b"",
673 created_at_unix: 0,
674 },
675 )
676 .unwrap_err();
677 assert!(
678 matches!(err, StoreError::InvalidOutputPath { .. }),
679 "expected InvalidOutputPath, got {err:?}"
680 );
681 assert!(cache.reader().lookup(&key).is_none());
683 }
684
685 #[test]
686 fn store_rejects_output_with_project_relative_workspace_absolute_path() {
687 let mut fs = MemFilesystem::new();
688 fs.add_dir("/ws/proj").unwrap();
689 fs.add_file_with_mode("/ws/proj/out", b"x".to_vec(), 0o644)
690 .unwrap();
691 let cache = make_cache(fs, HashAlgo::Blake3);
692 let key = sample_key();
693 let on_disk = PathBuf::from("/ws/proj/out");
694 let outs = [StoredOutput {
695 workspace_absolute_path: "proj/out", on_disk_path: &on_disk,
697 mode: 0o644,
698 }];
699 let err = cache
700 .store(
701 &key,
702 &StoreInputs {
703 outputs: &outs,
704 stdout: b"",
705 stderr: b"",
706 created_at_unix: 0,
707 },
708 )
709 .unwrap_err();
710 assert!(
711 matches!(
712 err,
713 StoreError::InvalidOutputPath {
714 source: ParseAbsoluteError::NotWorkspaceAbsolute,
715 ..
716 }
717 ),
718 "expected NotWorkspaceAbsolute, got {err:?}"
719 );
720 }
721}