Skip to main content

haz_cache/
restore.rs

1//! [`Cache::restore`] per `CACHE-019` and `CACHE-020`.
2//!
3//! Restoration materialises the recorded outputs of a cache
4//! entry at their workspace-absolute paths and returns the
5//! captured `stdout`/`stderr` byte streams for the caller to
6//! emit. The caller drives stream emission (subject to the
7//! task's `output.mode`); the cache layer is only responsible
8//! for producing the bytes and publishing the files.
9//!
10//! Two-phase publish per `CACHE-020`:
11//!
12//! 1. **Stage.** A per-restore directory under [`crate::layout::cache_root`]
13//!    named `.restore-<hex-key>-<random>/` collects every output
14//!    blob, byte-identical to what the entry directory records.
15//!    Each staged file is written with the recorded mode and
16//!    `fsync`-ed. The target's parent directory is created in
17//!    this phase too, so phase 2 can be pure renames.
18//! 2. **Publish.** Each staged file is renamed onto its target
19//!    workspace-absolute path. Renames are atomic on the host
20//!    filesystem (same FS, since both source and target sit
21//!    under `<workspace_root>`).
22//!
23//! The cache holds the workspace root so that
24//! `workspace_absolute_path` strings recorded in the manifest
25//! (rooted at `/`) can be mapped to real filesystem paths.
26//!
27//! Failure handling matches the spec's "all or nothing" intent
28//! best-effort: every failure inside `restore` returns an error,
29//! and the staging directory is wiped on the way out (success or
30//! failure) so transient publishing state is not leaked. If a
31//! failure occurs partway through phase 2, some targets are
32//! published and others are not; the caller is expected to treat
33//! the result as a miss and re-run the task fresh, per
34//! `CACHE-020` second paragraph.
35
36use std::path::{Path, PathBuf};
37
38use haz_domain::path::CanonicalPath;
39use haz_vfs::{FsError, WritableFilesystem};
40use snafu::{ResultExt, Snafu};
41
42use crate::cache::Cache;
43use crate::layout;
44use crate::manifest::Manifest;
45
46/// Captured streams returned to the caller on a successful
47/// restore. The caller decides how to emit them per the task's
48/// configured `output.mode` (`CACHE-019` steps 2 and 3); the
49/// cache is opinion-free on emission.
50#[derive(Debug, Clone, PartialEq, Eq)]
51pub struct RestoredStreams {
52    /// Bytes that were captured on the run's `stdout`.
53    pub stdout: Vec<u8>,
54    /// Bytes that were captured on the run's `stderr`.
55    pub stderr: Vec<u8>,
56}
57
58/// Failure modes for [`Cache::restore`].
59#[derive(Debug, Snafu)]
60pub enum RestoreError {
61    /// Underlying filesystem error during one of the restore
62    /// phases (reading a cached blob, staging the copy, creating
63    /// the target's parent directory, renaming onto the target).
64    /// The wrapped [`FsError`] carries the specific path.
65    #[snafu(display("filesystem error during cache restore: {source}"))]
66    Io {
67        /// The originating filesystem error.
68        source: FsError,
69    },
70}
71
72impl<Fs: WritableFilesystem> Cache<Fs> {
73    /// Restore the cache entry described by `manifest` per
74    /// `CACHE-019`.
75    ///
76    /// Materialises every output declared in the manifest at its
77    /// workspace-absolute path with the recorded mode and
78    /// returns the captured `stdout`/`stderr` bytes. The caller
79    /// MUST have just obtained `manifest` from
80    /// [`Cache::lookup`]; the cache trusts the manifest content
81    /// (paths, content hashes, sizes, modes) as truth and does
82    /// not re-verify it.
83    ///
84    /// On error, the staging directory under the cache root is
85    /// removed regardless of which phase failed, so no transient
86    /// scratch space leaks. If the error occurs after one or
87    /// more targets have already been renamed onto, those
88    /// targets remain published; the caller MUST treat the error
89    /// as a miss and re-run the task fresh
90    /// (`CACHE-020` second paragraph).
91    ///
92    /// # Errors
93    ///
94    /// Returns [`RestoreError::Io`] wrapping the underlying
95    /// [`FsError`] if any filesystem operation along the phases
96    /// fails.
97    pub fn restore(&self, manifest: &Manifest) -> Result<RestoredStreams, RestoreError> {
98        let suffix = random_suffix_hex();
99        let stage_dir = layout::restore_staging_dir(self.cache_root(), &manifest.key, &suffix);
100        let result = self.restore_inner(manifest, &stage_dir);
101        // Best-effort cleanup of the staging directory. On
102        // success it is already empty after every rename; on
103        // failure it may hold staged files that never made it to
104        // their targets. Either way we drop it. We deliberately
105        // ignore errors here; the caller is informed of the
106        // primary failure via `result`.
107        let _ = self.fs().remove_dir_all(&stage_dir);
108        result
109    }
110
111    fn restore_inner(
112        &self,
113        manifest: &Manifest,
114        stage_dir: &Path,
115    ) -> Result<RestoredStreams, RestoreError> {
116        self.fs().create_dir_all(stage_dir).context(IoSnafu)?;
117
118        let stdout = self
119            .fs()
120            .read(&layout::stdout_path(self.cache_root(), &manifest.key))
121            .context(IoSnafu)?;
122        let stderr = self
123            .fs()
124            .read(&layout::stderr_path(self.cache_root(), &manifest.key))
125            .context(IoSnafu)?;
126
127        // Stage outputs into the sibling restore directory and
128        // prepare each target's parent.
129        let mut planned: Vec<(PathBuf, PathBuf)> = Vec::with_capacity(manifest.outputs.len());
130        for (i, blob) in manifest.outputs.iter().enumerate() {
131            let src =
132                layout::output_blob_path(self.cache_root(), &manifest.key, &blob.content_hash);
133            let bytes = self.fs().read(&src).context(IoSnafu)?;
134            let staged = stage_dir.join(format!("{i:08}"));
135            self.fs().write_file(&staged, &bytes).context(IoSnafu)?;
136            self.fs()
137                .set_permissions(&staged, blob.mode)
138                .context(IoSnafu)?;
139            self.fs().fsync_file(&staged).context(IoSnafu)?;
140
141            let target =
142                workspace_path_from_canonical(self.workspace_root(), &blob.workspace_absolute_path);
143            if let Some(parent) = target.parent() {
144                self.fs().create_dir_all(parent).context(IoSnafu)?;
145            }
146            planned.push((staged, target));
147        }
148
149        // Atomically publish each staged file into place.
150        for (staged, target) in &planned {
151            self.fs().rename(staged, target).context(IoSnafu)?;
152        }
153
154        Ok(RestoredStreams { stdout, stderr })
155    }
156}
157
158/// Map a workspace-anchored [`CanonicalPath`] onto a real
159/// filesystem path by joining each validated segment onto
160/// `workspace_root`.
161///
162/// Walks segments rather than concatenating the rendered string,
163/// so a host-OS separator inside a single segment cannot be
164/// reinterpreted as a separator. This is belt-and-braces: the
165/// [`CanonicalPath`] type's construction already rejects
166/// segments that contain `/` (`PATH-002`) or that resolve to `.`
167/// or `..`. The segment walk preserves that invariant across the
168/// boundary into [`std::path::PathBuf`].
169fn workspace_path_from_canonical(workspace_root: &Path, canonical: &CanonicalPath) -> PathBuf {
170    let mut p = workspace_root.to_path_buf();
171    for segment in canonical.segments() {
172        p.push(segment.as_str());
173    }
174    p
175}
176
177/// 16 lowercase hex characters of randomness, same shape as the
178/// store-time tmp suffix. Kept module-local rather than shared
179/// with `store.rs` to avoid a thin shared module for a four-line
180/// helper; the two call sites are independent and divergence is
181/// not a concern.
182fn random_suffix_hex() -> String {
183    let r: u64 = rand::random();
184    format!("{r:016x}")
185}
186
187#[cfg(test)]
188mod tests {
189    use std::path::{Path, PathBuf};
190
191    use haz_domain::settings::cache::HashAlgo;
192    use haz_vfs::{Filesystem, MemFilesystem, WritableFilesystem};
193
194    use crate::cache::Cache;
195    use crate::key::CacheKey;
196    use crate::store::{StoreInputs, StoredOutput};
197
198    const WORKSPACE_ROOT: &str = "/ws";
199
200    fn sample_key() -> CacheKey {
201        let mut bytes = [0u8; 32];
202        bytes[0] = 0xAB;
203        bytes[1] = 0xCD;
204        CacheKey::from_bytes(bytes)
205    }
206
207    fn make_cache(fs: MemFilesystem, algo: HashAlgo) -> Cache<MemFilesystem> {
208        Cache::new(fs, Path::new(WORKSPACE_ROOT), algo)
209    }
210
211    /// Build a [`MemFilesystem`] preloaded with a workspace and
212    /// a single output file on disk.
213    fn fs_with_one_output(target: &Path, bytes: &[u8], mode: u32) -> MemFilesystem {
214        let mut fs = MemFilesystem::new();
215        fs.add_dir(target.parent().unwrap()).unwrap();
216        fs.add_file_with_mode(target, bytes.to_vec(), mode).unwrap();
217        fs
218    }
219
220    /// Drive store then restore as the executor would. Returns
221    /// the cache (so tests can inspect on-disk state after) and
222    /// the [`RestoredStreams`] returned by `restore`.
223    fn store_then_restore(
224        fs: MemFilesystem,
225        algo: HashAlgo,
226        outputs: &[StoredOutput<'_>],
227        stdout: &[u8],
228        stderr: &[u8],
229    ) -> (
230        Cache<MemFilesystem>,
231        crate::restore::RestoredStreams,
232        crate::manifest::Manifest,
233    ) {
234        let cache = make_cache(fs, algo);
235        let key = sample_key();
236        let inputs = StoreInputs {
237            outputs,
238            stdout,
239            stderr,
240            created_at_unix: 1_715_700_000,
241        };
242        cache.store(&key, &inputs).unwrap();
243        let manifest = cache.lookup(&key).expect("store should produce a hit");
244        let restored = cache.restore(&manifest).expect("restore should succeed");
245        (cache, restored, manifest)
246    }
247
248    // ---- happy path: round-trip ----
249
250    #[test]
251    fn cache_019_restore_after_store_round_trips_outputs() {
252        let blob = b"hello-world";
253        let target = PathBuf::from("/ws/proj/out");
254        let fs = fs_with_one_output(&target, blob, 0o644);
255
256        let outs = [StoredOutput {
257            workspace_absolute_path: "/proj/out",
258            on_disk_path: &target,
259            mode: 0o644,
260        }];
261        let (cache, _restored, _manifest) = store_then_restore(
262            fs,
263            HashAlgo::Blake3,
264            &outs,
265            b"stdout-bytes",
266            b"stderr-bytes",
267        );
268
269        // Target on disk holds the restored bytes.
270        let got = cache.fs().read(&target).unwrap();
271        assert_eq!(got, blob);
272        let mode = cache.fs().mode_of(&target).unwrap();
273        assert_eq!(mode, 0o644);
274    }
275
276    #[test]
277    fn cache_019_restore_returns_captured_stdout_and_stderr_bytes() {
278        let blob = b"";
279        let target = PathBuf::from("/ws/proj/out");
280        let fs = fs_with_one_output(&target, blob, 0o644);
281        let outs = [StoredOutput {
282            workspace_absolute_path: "/proj/out",
283            on_disk_path: &target,
284            mode: 0o644,
285        }];
286        let (_cache, restored, _manifest) =
287            store_then_restore(fs, HashAlgo::Blake3, &outs, b"out-bytes\n", b"err-bytes\n");
288        assert_eq!(restored.stdout, b"out-bytes\n");
289        assert_eq!(restored.stderr, b"err-bytes\n");
290    }
291
292    // ---- degenerate input shapes ----
293
294    #[test]
295    fn cache_019_restore_with_no_outputs_returns_empty_streams_when_streams_are_empty() {
296        let mut fs = MemFilesystem::new();
297        fs.add_dir("/ws").unwrap();
298        let (_cache, restored, manifest) = store_then_restore(fs, HashAlgo::Blake3, &[], b"", b"");
299        assert!(restored.stdout.is_empty());
300        assert!(restored.stderr.is_empty());
301        assert_eq!(manifest.outputs.len(), 0);
302    }
303
304    #[test]
305    fn cache_019_restore_with_multiple_outputs_materialises_each_at_its_path() {
306        let mut fs = MemFilesystem::new();
307        fs.add_dir("/ws/proj").unwrap();
308        fs.add_file_with_mode("/ws/proj/a", b"alpha".to_vec(), 0o644)
309            .unwrap();
310        fs.add_file_with_mode("/ws/proj/b", b"beta-bytes".to_vec(), 0o755)
311            .unwrap();
312        let on_a = PathBuf::from("/ws/proj/a");
313        let on_b = PathBuf::from("/ws/proj/b");
314        let outs = [
315            StoredOutput {
316                workspace_absolute_path: "/proj/a",
317                on_disk_path: &on_a,
318                mode: 0o644,
319            },
320            StoredOutput {
321                workspace_absolute_path: "/proj/b",
322                on_disk_path: &on_b,
323                mode: 0o755,
324            },
325        ];
326        let (cache, _restored, _manifest) =
327            store_then_restore(fs, HashAlgo::Blake3, &outs, b"", b"");
328        assert_eq!(cache.fs().read(&on_a).unwrap(), b"alpha");
329        assert_eq!(cache.fs().read(&on_b).unwrap(), b"beta-bytes");
330        assert_eq!(cache.fs().mode_of(&on_a).unwrap(), 0o644);
331        assert_eq!(cache.fs().mode_of(&on_b).unwrap(), 0o755);
332    }
333
334    // ---- intermediate parent directories ----
335
336    #[test]
337    fn cache_019_restore_creates_missing_intermediate_directories_for_target() {
338        let blob = b"deep-output";
339        let target = PathBuf::from("/ws/proj/nested/deep/out");
340        // Build fs with the deep file present (so store can read
341        // it), then drop the nested chain BEFORE restore to model
342        // the target's parent vanishing between store and
343        // restore. We do this by issuing a fresh store on a
344        // brand-new filesystem.
345        let mut fs = MemFilesystem::new();
346        fs.add_dir("/ws/proj/nested/deep").unwrap();
347        fs.add_file_with_mode(&target, blob.to_vec(), 0o644)
348            .unwrap();
349
350        let cache = make_cache(fs, HashAlgo::Blake3);
351        let key = sample_key();
352        let outs = [StoredOutput {
353            workspace_absolute_path: "/proj/nested/deep/out",
354            on_disk_path: &target,
355            mode: 0o644,
356        }];
357        let inputs = StoreInputs {
358            outputs: &outs,
359            stdout: b"",
360            stderr: b"",
361            created_at_unix: 0,
362        };
363        cache.store(&key, &inputs).unwrap();
364
365        // Now wipe the workspace's proj/ tree to simulate
366        // "outputs are gone between store and restore".
367        cache.fs().remove_dir_all(Path::new("/ws/proj")).unwrap();
368
369        let manifest = cache.lookup(&key).expect("entry still hits");
370        cache
371            .restore(&manifest)
372            .expect("restore must re-create the path");
373        assert_eq!(cache.fs().read(&target).unwrap(), blob);
374    }
375
376    // ---- overwrite of existing target ----
377
378    #[test]
379    fn cache_020_cache_019_restore_overwrites_an_existing_target_file() {
380        let target = PathBuf::from("/ws/proj/out");
381        let fs = fs_with_one_output(&target, b"original", 0o644);
382        let outs = [StoredOutput {
383            workspace_absolute_path: "/proj/out",
384            on_disk_path: &target,
385            mode: 0o644,
386        }];
387        let cache = make_cache(fs, HashAlgo::Blake3);
388        let key = sample_key();
389        cache
390            .store(
391                &key,
392                &StoreInputs {
393                    outputs: &outs,
394                    stdout: b"",
395                    stderr: b"",
396                    created_at_unix: 0,
397                },
398            )
399            .unwrap();
400
401        // Mutate the file in place to model a divergent run.
402        cache.fs().write_file(&target, b"divergent").unwrap();
403
404        let manifest = cache.lookup(&key).unwrap();
405        cache.restore(&manifest).unwrap();
406        assert_eq!(cache.fs().read(&target).unwrap(), b"original");
407    }
408
409    // ---- I/O errors propagate ----
410
411    #[test]
412    fn cache_019_restore_propagates_missing_cached_blob_as_io_error() {
413        let target = PathBuf::from("/ws/proj/out");
414        let fs = fs_with_one_output(&target, b"x", 0o644);
415        let cache = make_cache(fs, HashAlgo::Blake3);
416        let key = sample_key();
417        let outs = [StoredOutput {
418            workspace_absolute_path: "/proj/out",
419            on_disk_path: &target,
420            mode: 0o644,
421        }];
422        cache
423            .store(
424                &key,
425                &StoreInputs {
426                    outputs: &outs,
427                    stdout: b"",
428                    stderr: b"",
429                    created_at_unix: 0,
430                },
431            )
432            .unwrap();
433
434        let manifest = cache.lookup(&key).unwrap();
435
436        // Tamper: delete the cache entry directory after the
437        // lookup but before the restore. Lookup observed the
438        // entry; restore must surface the missing-blob failure.
439        let entry = crate::layout::entry_dir(cache.cache_root(), &key);
440        cache.fs().remove_dir_all(&entry).unwrap();
441
442        let err = cache.restore(&manifest).unwrap_err();
443        let msg = format!("{err}");
444        assert!(msg.contains("filesystem error"), "got: {msg}");
445    }
446
447    // ---- staging cleanup ----
448
449    #[test]
450    fn cache_019_restore_leaves_no_staging_directory_after_success() {
451        let target = PathBuf::from("/ws/proj/out");
452        let fs = fs_with_one_output(&target, b"x", 0o644);
453        let outs = [StoredOutput {
454            workspace_absolute_path: "/proj/out",
455            on_disk_path: &target,
456            mode: 0o644,
457        }];
458        let (cache, _restored, _manifest) =
459            store_then_restore(fs, HashAlgo::Blake3, &outs, b"", b"");
460
461        for entry in cache.fs().read_dir(cache.cache_root()).unwrap() {
462            let name = entry
463                .path
464                .file_name()
465                .unwrap()
466                .to_string_lossy()
467                .into_owned();
468            assert!(
469                !name.starts_with(".restore-"),
470                "staging directory must not persist after a successful restore, found: {name}"
471            );
472        }
473    }
474
475    #[test]
476    fn cache_019_restore_leaves_no_staging_directory_after_failure() {
477        let target = PathBuf::from("/ws/proj/out");
478        let fs = fs_with_one_output(&target, b"x", 0o644);
479        let cache = make_cache(fs, HashAlgo::Blake3);
480        let key = sample_key();
481        let outs = [StoredOutput {
482            workspace_absolute_path: "/proj/out",
483            on_disk_path: &target,
484            mode: 0o644,
485        }];
486        cache
487            .store(
488                &key,
489                &StoreInputs {
490                    outputs: &outs,
491                    stdout: b"",
492                    stderr: b"",
493                    created_at_unix: 0,
494                },
495            )
496            .unwrap();
497        let manifest = cache.lookup(&key).unwrap();
498
499        // Force a phase-1 failure by deleting the cache entry
500        // (so reading the cached blob fails).
501        let entry = crate::layout::entry_dir(cache.cache_root(), &key);
502        cache.fs().remove_dir_all(&entry).unwrap();
503
504        let _ = cache.restore(&manifest).unwrap_err();
505        for entry in cache.fs().read_dir(cache.cache_root()).unwrap() {
506            let name = entry
507                .path
508                .file_name()
509                .unwrap()
510                .to_string_lossy()
511                .into_owned();
512            assert!(
513                !name.starts_with(".restore-"),
514                "staging directory must be cleaned up after a failed restore, found: {name}"
515            );
516        }
517    }
518
519    // ---- different hash algo ----
520
521    #[test]
522    fn cache_019_restore_works_under_sha256() {
523        let target = PathBuf::from("/ws/proj/out");
524        let fs = fs_with_one_output(&target, b"sha-bytes", 0o600);
525        let outs = [StoredOutput {
526            workspace_absolute_path: "/proj/out",
527            on_disk_path: &target,
528            mode: 0o600,
529        }];
530        let (cache, restored, _manifest) =
531            store_then_restore(fs, HashAlgo::Sha256, &outs, b"sha-stdout", b"sha-stderr");
532        assert_eq!(cache.fs().read(&target).unwrap(), b"sha-bytes");
533        assert_eq!(cache.fs().mode_of(&target).unwrap(), 0o600);
534        assert_eq!(restored.stdout, b"sha-stdout");
535        assert_eq!(restored.stderr, b"sha-stderr");
536    }
537}