supermachine 0.7.69

Run any OCI/Docker image as a hardware-isolated microVM on macOS HVF (Linux KVM and Windows WHP in progress). Single library API, zero flags for the common case, sub-100 ms cold-restore from snapshot.
Documentation
//! Cross-platform golden-base selection for snapshot dedup.
//!
//! Picks the best existing on-disk snapshot to dedup a freshly-baked one
//! against: same `image`, `memory_mib`, `baked_by_version`, AND snapshot-file
//! size, excluding the fresh snapshot itself, newest-first. Matching all four is
//! a hard correctness guard — deduping against an incompatible snapshot would
//! corrupt the restore.
//!
//! Two consumers share this selector:
//!   * macOS/HVF — `dedup::auto_dedup_on_bake` → APFS `clonefile` block sharing
//!     of `restore.snap`.
//!   * Linux/KVM — golden-base diff snapshots (`SMSNAP*D`): save the fresh
//!     `vm.snap` as a diff against the golden so its base RAM is CoW-shared
//!     across every VM on that golden (see
//!     `docs/design/linux-memory-dedup-2026-06-08.md`).
//!
//! Hence `snap_file` is a parameter (`restore.snap` vs `vm.snap`).

use std::path::{Path, PathBuf};

/// Find the most-recent snapshot sibling of `fresh_snap_dir` suitable as a dedup
/// golden base: same image / memory / version / `snap_file` size, excluding the
/// fresh dir itself. `Ok(None)` if none qualifies. Returns the path to the
/// chosen sibling's `snap_file`.
pub fn find_best_base(
    snapshots_dir: &Path,
    fresh_snap_dir: &Path,
    snap_file: &str,
    image: &str,
    memory_mib: u32,
    baked_by_version: &str,
) -> std::io::Result<Option<PathBuf>> {
    let fresh_canonical = std::fs::canonicalize(fresh_snap_dir).ok();
    let Some(fresh_size) = std::fs::metadata(fresh_snap_dir.join(snap_file))
        .map(|m| m.len())
        .ok()
    else {
        return Ok(None);
    };

    let mut best: Option<(std::time::SystemTime, PathBuf)> = None;
    for entry in std::fs::read_dir(snapshots_dir)?.flatten() {
        let path = entry.path();
        if !path.is_dir() {
            continue;
        }
        // Skip the fresh dir itself (canonicalized, to handle symlink/`.` noise).
        if fresh_canonical
            .as_ref()
            .and_then(|c| std::fs::canonicalize(&path).ok().map(|p| p == *c))
            .unwrap_or(false)
        {
            continue;
        }
        let snap = path.join(snap_file);
        let Ok(stat) = std::fs::metadata(&snap) else {
            continue;
        };
        if stat.len() != fresh_size {
            continue;
        }
        let Ok(text) = std::fs::read_to_string(path.join("metadata.json")) else {
            continue;
        };
        let Ok(meta) = serde_json::from_str::<serde_json::Value>(&text) else {
            continue;
        };
        if meta.get("image").and_then(|v| v.as_str()) != Some(image)
            || meta.get("memory_mib").and_then(|v| v.as_u64()) != Some(memory_mib as u64)
            || meta.get("baked_by_version").and_then(|v| v.as_str()) != Some(baked_by_version)
        {
            continue;
        }
        // Full-resolution mtime tiebreak (newest wins) — `SystemTime` compares
        // sub-second, unlike a seconds-truncated value which can't order siblings
        // baked within the same second.
        let mtime = stat.modified().unwrap_or(std::time::UNIX_EPOCH);
        if best.as_ref().is_none_or(|(m, _)| mtime > *m) {
            best = Some((mtime, snap));
        }
    }
    Ok(best.map(|(_, p)| p))
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::fs;
    use std::sync::atomic::{AtomicU64, Ordering};

    fn unique_root(tag: &str) -> PathBuf {
        static N: AtomicU64 = AtomicU64::new(0);
        let d = std::env::temp_dir().join(format!(
            "sm-snapdedup-{tag}-{}-{}",
            std::process::id(),
            N.fetch_add(1, Ordering::Relaxed)
        ));
        let _ = fs::remove_dir_all(&d);
        fs::create_dir_all(&d).unwrap();
        d
    }

    fn make_snap(
        root: &Path,
        name: &str,
        snap_file: &str,
        size: usize,
        image: &str,
        mem: u32,
        ver: &str,
    ) -> PathBuf {
        let d = root.join(name);
        fs::create_dir_all(&d).unwrap();
        fs::write(d.join(snap_file), vec![0u8; size]).unwrap();
        let meta = serde_json::json!({
            "image": image, "memory_mib": mem, "baked_by_version": ver,
        });
        fs::write(d.join("metadata.json"), serde_json::to_vec(&meta).unwrap()).unwrap();
        d
    }

    #[test]
    fn picks_only_a_fully_matching_sibling() {
        // Use the KVM file name to exercise the parameterization.
        let sf = "vm.snap";
        let root = unique_root("match");
        let fresh = make_snap(&root, "fresh", sf, 1000, "img:1", 512, "v1");
        make_snap(&root, "wrong_size", sf, 999, "img:1", 512, "v1");
        make_snap(&root, "wrong_image", sf, 1000, "img:2", 512, "v1");
        make_snap(&root, "wrong_mem", sf, 1000, "img:1", 256, "v1");
        make_snap(&root, "wrong_ver", sf, 1000, "img:1", 512, "v2");
        let good = make_snap(&root, "good", sf, 1000, "img:1", 512, "v1");
        let r = find_best_base(&root, &fresh, sf, "img:1", 512, "v1").unwrap();
        assert_eq!(r, Some(good.join(sf)));
        let _ = fs::remove_dir_all(&root);
    }

    #[test]
    fn snap_file_name_is_respected() {
        // A restore.snap sibling must NOT match a vm.snap search and vice versa.
        let root = unique_root("filename");
        let fresh = make_snap(&root, "fresh", "vm.snap", 500, "i", 128, "v");
        make_snap(&root, "hvf_style", "restore.snap", 500, "i", 128, "v");
        assert_eq!(
            find_best_base(&root, &fresh, "vm.snap", "i", 128, "v").unwrap(),
            None
        );
        let _ = fs::remove_dir_all(&root);
    }

    #[test]
    fn excludes_self_and_none_without_sibling() {
        let root = unique_root("self");
        let fresh = make_snap(&root, "fresh", "vm.snap", 500, "i", 128, "v");
        assert_eq!(
            find_best_base(&root, &fresh, "vm.snap", "i", 128, "v").unwrap(),
            None
        );
        let _ = fs::remove_dir_all(&root);
    }

    #[test]
    fn none_when_fresh_snap_missing() {
        let root = unique_root("nofresh");
        let fresh = root.join("fresh");
        fs::create_dir_all(&fresh).unwrap();
        make_snap(&root, "other", "vm.snap", 100, "i", 64, "v");
        assert_eq!(
            find_best_base(&root, &fresh, "vm.snap", "i", 64, "v").unwrap(),
            None
        );
        let _ = fs::remove_dir_all(&root);
    }

    #[test]
    fn newest_sibling_wins_at_subsecond_resolution() {
        let sf = "vm.snap";
        let root = unique_root("newest");
        let fresh = make_snap(&root, "fresh", sf, 800, "i", 256, "v");
        make_snap(&root, "older", sf, 800, "i", 256, "v");
        std::thread::sleep(std::time::Duration::from_millis(30));
        let newer = make_snap(&root, "newer", sf, 800, "i", 256, "v");
        let r = find_best_base(&root, &fresh, sf, "i", 256, "v").unwrap();
        assert_eq!(
            r,
            Some(newer.join(sf)),
            "must pick the most-recent matching sibling (sub-second mtime)"
        );
        let _ = fs::remove_dir_all(&root);
    }
}