supermachine 0.7.82

Run any OCI/Docker image as a hardware-isolated microVM on macOS HVF (Linux KVM and Windows WHP in progress). Single library API, zero flags for the common case, sub-100 ms cold-restore from snapshot.
Documentation
//! Guest-RAM cross-snapshot page-overlap probe (measurement only).
//!
//! Quantifies the upside of the "shared golden base + per-VM CoW diff" memory
//! dedup BEFORE building the golden-selection policy. The diff machinery already
//! exists (SMSNAP*D + `restore_diff_from_file` CoW-maps the base file); the
//! missing piece is auto-selecting a golden base at bake. This probe answers:
//! how small would a snapshot be as a diff against a sibling (= disk saving AND
//! shared-RAM fraction), and how much do guest pages dedup globally.
//!
//! Method: boot each provided cold image, snapshot its guest RAM (`vm.snap`),
//! then page-compare (4 KiB) the RAM sections. The FIRST snapshot is the golden;
//! every other snapshot reports the fraction of pages identical to the golden
//! (shared if diffed against it) vs differing (the diff it would store). Also
//! reports zero-page fraction and the global unique-page count.
//!
//! Usage: `_mem_dedup_probe COLD_SNAP_DIR [COLD_SNAP_DIR ...]`
//! (the first dir is also booted a 2nd time as a same-image sibling).

#[cfg(not(all(target_os = "linux", target_arch = "x86_64")))]
fn main() {
    eprintln!("_mem_dedup_probe: Linux/x86_64 (KVM) only");
}

#[cfg(all(target_os = "linux", target_arch = "x86_64"))]
fn main() -> Result<(), Box<dyn std::error::Error>> {
    use std::collections::HashSet;
    use std::hash::Hasher;
    use std::io::{Read, Seek, SeekFrom};
    use std::time::Duration;
    use supermachine::{Image, VmConfig};

    const PG: usize = 4096;

    let bases: Vec<String> = std::env::args().skip(1).collect();
    if bases.is_empty() {
        eprintln!("usage: _mem_dedup_probe COLD_SNAP_DIR [more ...]");
        std::process::exit(2);
    }

    // Boot+snapshot plan: each base once, plus the first base a 2nd time as a
    // same-image sibling (the key "golden diff" comparison).
    let mut plan: Vec<(String, String)> = Vec::new(); // (label, base_dir)
    for (i, b) in bases.iter().enumerate() {
        let name = std::path::Path::new(b)
            .file_name()
            .and_then(|s| s.to_str())
            .unwrap_or("img")
            .to_string();
        plan.push((format!("{name}#1"), b.clone()));
        if i == 0 {
            plan.push((format!("{name}#2-sibling"), b.clone()));
        }
    }

    let workdir = std::env::temp_dir().join(format!("sm-memdedup-{}", std::process::id()));
    let _ = std::fs::remove_dir_all(&workdir);
    std::fs::create_dir_all(&workdir)?;

    // Produce a vm.snap per plan entry.
    let mut snaps: Vec<(String, std::path::PathBuf)> = Vec::new();
    for (label, base) in &plan {
        eprintln!("booting + snapshotting {label} (from {base}) ...");
        let cold = Image::from_snapshot(base)?;
        let vm = cold.start(&VmConfig::new())?;
        std::thread::sleep(Duration::from_secs(5)); // let the guest settle
        let dir = workdir.join(label.replace(['#', '/'], "_"));
        let _img = vm.snapshot(&dir)?; // consumes vm, writes dir/vm.snap
        snaps.push((label.clone(), dir.join("vm.snap")));
    }

    // Read a snapshot's RAM section as 4 KiB page hashes (+ zero-page count).
    let read_pages = |path: &std::path::Path| -> std::io::Result<(Vec<u64>, u64)> {
        let mut f = std::fs::File::open(path)?;
        let mut magic = [0u8; 8];
        f.read_exact(&mut magic)?;
        let mut ob = [0u8; 8];
        f.read_exact(&mut ob)?;
        let ram_offset = u64::from_le_bytes(ob);
        let len = f.metadata()?.len();
        let ram_len = len.saturating_sub(ram_offset);
        f.seek(SeekFrom::Start(ram_offset))?;
        let mut hashes = Vec::with_capacity((ram_len as usize) / PG + 1);
        let mut zeros = 0u64;
        let mut buf = vec![0u8; PG];
        let mut remaining = ram_len as usize;
        while remaining >= PG {
            f.read_exact(&mut buf)?;
            if buf.iter().all(|&b| b == 0) {
                zeros += 1;
            }
            let mut h = std::collections::hash_map::DefaultHasher::new();
            h.write(&buf);
            hashes.push(h.finish());
            remaining -= PG;
        }
        Ok((hashes, zeros))
    };

    // Golden = first snapshot.
    let (golden_label, golden_path) = snaps[0].clone();
    let (golden_hashes, golden_zeros) = read_pages(&golden_path)?;
    let gp = golden_hashes.len();
    let mut global: HashSet<u64> = HashSet::new();
    for &h in &golden_hashes {
        global.insert(h);
    }
    let mut total_pages = gp as u64;

    let pct = |a: u64, b: u64| {
        if b > 0 {
            100.0 * a as f64 / b as f64
        } else {
            0.0
        }
    };
    let mib = |pages: u64| pages as f64 * PG as f64 / (1024.0 * 1024.0);

    eprintln!(
        "\n=== guest-RAM page-overlap ({} pages = {:.0} MiB each) ===",
        gp,
        mib(gp as u64)
    );
    eprintln!(
        "{:<22} {:>7} {:>10} {:>14} {:>16}",
        "snapshot", "zero%", "vs-golden", "diff(=stored)", "shared(=density)"
    );
    eprintln!(
        "{:<22} {:>6.1}% {:>10} {:>14} {:>16}",
        golden_label,
        pct(golden_zeros, gp as u64),
        "(golden)",
        "",
        ""
    );

    for (label, path) in snaps.iter().skip(1) {
        let (hashes, zeros) = read_pages(path)?;
        for &h in &hashes {
            global.insert(h);
        }
        total_pages += hashes.len() as u64;
        // Same-offset page comparison against golden (what save_diff stores).
        let n = hashes.len().min(gp);
        let mut differ = 0u64;
        for i in 0..n {
            if hashes[i] != golden_hashes[i] {
                differ += 1;
            }
        }
        let same = n as u64 - differ;
        eprintln!(
            "{:<22} {:>6.1}% {:>10} {:>9.1}% {:>15.1}%",
            label,
            pct(zeros, hashes.len() as u64),
            "",
            pct(differ, n as u64),
            pct(same, n as u64),
        );
    }

    let unique = global.len() as u64;
    eprintln!(
        "\nglobal page dedup: {} total pages -> {} unique ({:.1}% unique, {:.1}% redundant)",
        total_pages,
        unique,
        pct(unique, total_pages),
        100.0 - pct(unique, total_pages),
    );
    eprintln!(
        "interpretation: a same-image sibling stored as a diff keeps only its 'diff' pages\n\
         on disk, and shares the 'shared' fraction of RAM with every VM on the golden.\n\
         The global-redundant % is the KSM/full-dedup ceiling across the whole set.",
    );

    let _ = std::fs::remove_dir_all(&workdir);
    Ok(())
}