supermachine 0.7.82

Run any OCI/Docker image as a hardware-isolated microVM on macOS HVF (Linux KVM and Windows WHP in progress). Single library API, zero flags for the common case, sub-100 ms cold-restore from snapshot.
Documentation
//! Demonstrate the REAL cross-VM RAM-density lever on KVM: KSM page-dedup.
//! (See docs/design/in-vm-builder-density-finding-2026-06-07.md — the artifact
//! format doesn't share pages across VMs; KSM does, because the build's outputs
//! + base rootfs + kernel/libc are byte-identical across VMs from one image.)
//!
//! Builds an image with a big IDENTICAL blob, boots N VMs (each reads the blob
//! so the pages are resident), then enables KSM and watches host RSS collapse as
//! the duplicate pages merge to one copy. Guest RAM is marked MADV_MERGEABLE by
//! the runtime (advise_mergeable), so no per-VM setup is needed.
//!
//! Run as root (writes /sys/kernel/mm/ksm/*). Restores KSM run-state on exit.

#[cfg(all(target_os = "linux", target_arch = "x86_64"))]
fn rss_mib() -> u64 {
    let s = std::fs::read_to_string("/proc/self/status").unwrap_or_default();
    for line in s.lines() {
        if let Some(r) = line.strip_prefix("VmRSS:") {
            return r
                .split_whitespace()
                .next()
                .and_then(|v| v.parse::<u64>().ok())
                .unwrap_or(0)
                / 1024;
        }
    }
    0
}

/// Host-wide available memory (MiB) from /proc/meminfo — the ground-truth metric
/// for KSM, since VmRSS counts KSM-shared pages per-mapping (all VMs live in one
/// process here, so dedup within it doesn't lower VmRSS, only physical RAM use).
#[cfg(all(target_os = "linux", target_arch = "x86_64"))]
fn mem_avail_mib() -> i64 {
    let s = std::fs::read_to_string("/proc/meminfo").unwrap_or_default();
    for line in s.lines() {
        if let Some(r) = line.strip_prefix("MemAvailable:") {
            return r
                .split_whitespace()
                .next()
                .and_then(|v| v.parse::<i64>().ok())
                .unwrap_or(0)
                / 1024;
        }
    }
    0
}

#[cfg(all(target_os = "linux", target_arch = "x86_64"))]
fn ksm_read(attr: &str) -> i64 {
    std::fs::read_to_string(format!("/sys/kernel/mm/ksm/{attr}"))
        .ok()
        .and_then(|s| s.trim().parse().ok())
        .unwrap_or(-1)
}

#[cfg(all(target_os = "linux", target_arch = "x86_64"))]
fn ksm_write(attr: &str, val: &str) {
    let _ = std::fs::write(format!("/sys/kernel/mm/ksm/{attr}"), val);
}

#[cfg(all(target_os = "linux", target_arch = "x86_64"))]
fn main() {
    use std::time::{Duration, Instant};
    use supermachine::{builder, Image, VmConfig};

    if !std::path::Path::new("/sys/kernel/mm/ksm/run").exists() {
        eprintln!("KSM not available on this host (CONFIG_KSM=n) — skipping");
        return;
    }
    let n: usize = std::env::args()
        .nth(1)
        .and_then(|s| s.parse().ok())
        .unwrap_or(4);
    let mib = 128u64;

    // THP and KSM are mutually exclusive for dedup: KSM cannot merge 2 MiB
    // transparent hugepages (it scans 4 KiB pages). Guest RAM is advised
    // MADV_HUGEPAGE, so force host THP off BEFORE the VMs allocate, then restore.
    let thp_path = "/sys/kernel/mm/transparent_hugepage/enabled";
    let saved_thp = std::fs::read_to_string(thp_path).ok().and_then(|s| {
        s.split('[')
            .nth(1)
            .and_then(|t| t.split(']').next())
            .map(str::to_string)
    });
    let _ = std::fs::write(thp_path, "never");
    eprintln!("THP set to 'never' (was {saved_thp:?}) so KSM can scan 4 KiB pages");

    let dockerfile = format!(
        "FROM alpine\n\
         RUN mkdir -p /data && dd if=/dev/urandom of=/data/blob bs=1M count={mib} 2>/dev/null\n"
    );
    let df = builder::parse(&dockerfile).expect("parse");
    let ctx = std::env::temp_dir().join("sm-ksm-ctx");
    let _ = std::fs::create_dir_all(&ctx);
    let build_dest = std::env::temp_dir().join("sm-ksm-build");
    let _ = std::fs::remove_dir_all(&build_dest);

    let base = Image::from_oci("alpine").expect("from_oci");
    eprintln!("=== build ({mib} MiB identical blob) ===");
    let out = builder::build_linear(&df, &base, &ctx, &build_dest).expect("build_linear");

    // Boot from the COMMITTED read-only squashfs image: it COLD-BOOTS, so guest
    // RAM is an ANONYMOUS mapping (MADV_MERGEABLE works → KSM-scannable). The
    // CoW file-backed restore path (restore_from_file) is NOT KSM-mergeable
    // (madvise MERGEABLE only applies to anonymous VMAs), so KSM can only dedup
    // the cold-boot path. Each VM decompresses the IDENTICAL blob into anon guest
    // RAM → KSM collapses those duplicate pages to one host copy.
    let commit_dir = std::env::temp_dir().join("sm-ksm-commit");
    let _ = std::fs::remove_dir_all(&commit_dir);
    eprintln!("=== commit -> read-only squashfs bootable image (cold-boot/anon RAM) ===");
    let (_c, image) =
        builder::commit_kvm_bootable(&out.image, &commit_dir).expect("commit_kvm_bootable");

    // Boot N VMs, each reads the whole blob so its pages are resident + identical.
    let avail_base = mem_avail_mib();
    let mut held = Vec::new();
    for i in 0..n {
        let vm = image.start(&VmConfig::new()).expect("start");
        std::thread::sleep(Duration::from_millis(3500));
        let _ = vm
            .exec_builder()
            .argv([
                "/bin/sh",
                "-c",
                "dd if=/data/blob of=/dev/null bs=1M 2>/dev/null",
            ])
            .output();
        held.push(vm);
        eprintln!(
            "booted VM #{i}: host MemAvailable = {} MiB (RSS {} MiB)",
            mem_avail_mib(),
            rss_mib()
        );
    }
    let avail_pre = mem_avail_mib();
    let used = avail_base - avail_pre;
    eprintln!(
        "\n=== {n} VMs up, KSM OFF: host used +{used} MiB ({} MiB/VM) ===",
        used / n as i64
    );

    // Enable KSM aggressively and watch the duplicate pages merge.
    let saved_run = ksm_read("run");
    let saved_pts = ksm_read("pages_to_scan");
    let saved_sleep = ksm_read("sleep_millisecs");
    ksm_write("pages_to_scan", "20000");
    ksm_write("sleep_millisecs", "10");
    ksm_write("run", "1");
    eprintln!("=== KSM ON (pages_to_scan=20000 sleep=10ms); watching up to 90s ===");

    let started = Instant::now();
    let deadline = started + Duration::from_secs(150);
    let mut prev_sharing = -1i64;
    loop {
        std::thread::sleep(Duration::from_secs(5));
        let sharing = ksm_read("pages_sharing");
        eprintln!(
            "  t={:>3}s  host MemAvailable = {} MiB  pages_sharing={sharing} (≈{} MiB deduped) full_scans={}",
            started.elapsed().as_secs(),
            mem_avail_mib(),
            sharing.max(0) * 4096 / (1024 * 1024),
            ksm_read("full_scans")
        );
        if Instant::now() >= deadline {
            break;
        }
        // Converged once merging plateaus: sharing grew by < 512 pages (2 MiB)
        // since the last poll, after it has actually started (> 0).
        if sharing > 0 && prev_sharing >= 0 && (sharing - prev_sharing) < 512 {
            break;
        }
        prev_sharing = sharing;
    }
    let avail_post = mem_avail_mib();
    let dedup_pages = ksm_read("pages_sharing").max(0);
    eprintln!(
        "\n=== RESULT ({n} cold-boot VMs, {mib} MiB identical blob each) ===\n\
         KSM pages_sharing = {dedup_pages} → ~{} MiB physically deduplicated\n\
         host MemAvailable: {avail_pre}{avail_post} MiB (+{} MiB reclaimed by KSM)\n\
         per-VM physical: {} MiB (KSM off) → {} MiB effective (KSM on)",
        dedup_pages * 4096 / (1024 * 1024),
        avail_post - avail_pre,
        used / n as i64,
        (avail_base - avail_post) / n as i64,
    );

    // Restore KSM to its prior state.
    if saved_pts >= 0 {
        ksm_write("pages_to_scan", &saved_pts.to_string());
    }
    if saved_sleep >= 0 {
        ksm_write("sleep_millisecs", &saved_sleep.to_string());
    }
    ksm_write("run", &saved_run.max(0).to_string());
    if let Some(thp) = saved_thp {
        let _ = std::fs::write(thp_path, thp);
    }
    drop(held);
}

#[cfg(not(all(target_os = "linux", target_arch = "x86_64")))]
fn main() {
    eprintln!("kvm_ksm_density is Linux/x86_64 only");
}