supermachine 0.7.76

//! Boot a Linux kernel through `crate::kvm::run::LinuxVm` — the in-tree KVM
//! backend's run loop. Linux/x86_64 only (needs /dev/kvm).
//!
//!   cargo run --example kvm_boot -- <bzImage> [disk.ext4] [initrd.cpio]
//!
//! With a disk and no initrd, it boots `root=/dev/vda`. With an initrd it
//! unpacks that as the rootfs. Serial console streams to stdout.

#[cfg(all(target_os = "linux", target_arch = "x86_64"))]
fn main() {
    use supermachine::kvm::run::{LinuxVm, LinuxVmConfig};

    // Cross-process restore: load a saved snapshot and resume it (no boot, no
    // kernel arg needed — the saved RAM is the running image).
    if let Ok(path) = std::env::var("RESTORE_FROM") {
        // Copy-on-write restore: mmap the file's RAM instead of copying it.
        let t0 = std::time::Instant::now();
        let mut vm = LinuxVm::restore_from_file(std::path::Path::new(&path)).expect("restore");
        eprintln!(
            "=== restored from {path} in {:.2} ms (CoW mmap) — resuming ===",
            t0.elapsed().as_secs_f64() * 1000.0
        );
        let reason = vm.run().expect("run (restored)");
        eprintln!("\n=== exit (restored): {reason:?} ===");
        return;
    }

    let args: Vec<String> = std::env::args().collect();
    let kpath = args
        .get(1)
        .expect("usage: kvm_boot <bzImage> [disk.ext4] [initrd.cpio]");
    let kernel = std::fs::read(kpath).expect("read kernel");
    // arg 2 is the disk; "none" (or absent) means boot without a virtio-blk.
    let disk = args.get(2).map(|s| s.as_str()).filter(|s| *s != "none");
    let initrd = args.get(3).map(|p| std::fs::read(p).expect("read initrd"));

    let mut cmdline = String::from("console=ttyS0 earlyprintk=ttyS0,keep panic=-1 reboot=t");
    if disk.is_some() && initrd.is_none() {
        cmdline.push_str(" root=/dev/vda rw rootfstype=ext4");
    }
    let disk_size = disk
        .map(|p| std::fs::metadata(p).map(|m| m.len()).unwrap_or(0))
        .unwrap_or(0);

    // SMP CPU count via SMP_CPUS (default 1).
    let num_cpus: u8 = std::env::var("SMP_CPUS")
        .ok()
        .and_then(|s| s.parse().ok())
        .unwrap_or(1);

    let cfg = LinuxVmConfig {
        mem_size: 512 * 1024 * 1024,
        num_cpus,
        kernel: &kernel,
        initrd: initrd.as_deref(),
        disk_path: disk,
        disk_size,
        cmdline: &cmdline,
        enable_vsock: std::env::var_os("VSOCK").is_some(),
        volumes: &[],
        virtiofs: &[],
        tsi_token: None,
        enable_balloon: std::env::var_os("BALLOON").is_some(),
    };

    eprintln!(
        "=== booting {kpath} (cpus={num_cpus}, disk={disk:?}, initrd={}) ===",
        initrd.is_some()
    );
    let mut vm = LinuxVm::new(&cfg).expect("LinuxVm::new");

    // Feed host stdin → guest serial RX on a background thread (interactive
    // shell / piped commands). Detached: it dies with the process when run()
    // returns. The handle shares the device via Arc, so it works concurrently
    // with run()'s &mut borrow.
    let input = vm.serial_input();
    std::thread::spawn(move || {
        use std::io::Read;
        let mut stdin = std::io::stdin();
        let mut byte = [0u8; 1];
        while let Ok(1) = stdin.read(&mut byte) {
            input.push(byte[0]);
        }
    });

    // vsock round-trip demo: with VSOCK_TEST=<port> set, wait for the guest's
    // listener to come up, connect host→guest over vsock, send a line, and
    // print the guest's echo. Runs on a detached thread alongside run().
    if let Some(port) = std::env::var("VSOCK_TEST")
        .ok()
        .and_then(|s| s.parse::<u32>().ok())
    {
        if let Some(vh) = vm.vsock_handle() {
            std::thread::spawn(move || {
                use std::io::{Read, Write};
                std::thread::sleep(std::time::Duration::from_millis(5000));
                let mut s = match vh.connect(port) {
                    Ok(s) => s,
                    Err(e) => {
                        eprintln!("VSOCK CONNECT FAILED: {e}");
                        return;
                    }
                };
                // connect() only sends the REQUEST; let the guest accept + the
                // muxer register the stream (RESPONSE) before we write, so the
                // write produces a fresh readable edge the muxer io-thread sees.
                std::thread::sleep(std::time::Duration::from_millis(1000));
                if s.write_all(b"vsock-ping\n").is_err() {
                    eprintln!("VSOCK WRITE FAILED");
                    return;
                }
                // Poll for the echo (short timeouts so a missed edge still drains).
                let _ = s.set_read_timeout(Some(std::time::Duration::from_millis(500)));
                let mut buf = [0u8; 64];
                for _ in 0..16 {
                    match s.read(&mut buf) {
                        Ok(n) if n > 0 => {
                            eprintln!(
                                "VSOCK ROUNDTRIP OK: guest echoed {:?}",
                                String::from_utf8_lossy(&buf[..n]).trim_end()
                            );
                            return;
                        }
                        Ok(_) => break,
                        Err(ref e)
                            if e.kind() == std::io::ErrorKind::WouldBlock
                                || e.kind() == std::io::ErrorKind::TimedOut => {}
                        Err(e) => {
                            eprintln!("VSOCK READ FAILED: {e}");
                            return;
                        }
                    }
                }
                eprintln!("VSOCK ROUNDTRIP: no echo received");
            });
        } else {
            eprintln!("VSOCK_TEST set but vsock not enabled (set VSOCK=1)");
        }
    }

    // Exec demo: with VSOCK_EXEC set, dial the guest agent over the proven
    // vsock path and speak its protocol directly (REQUEST frame → STDOUT/EXIT
    // frames). Isolates the agent+vsock from the ExecBuilder/bridge layer.
    if let Some(exec_port) = std::env::var("VSOCK_EXEC")
        .ok()
        .and_then(|s| s.parse::<u32>().ok())
    {
        if let Some(vh) = vm.vsock_handle() {
            std::thread::spawn(move || {
                use std::io::{Read, Write};
                use std::time::Duration;
                std::thread::sleep(Duration::from_millis(5000)); // boot + agent ready
                let mut s = match vh.connect(exec_port) {
                    Ok(s) => s,
                    Err(e) => {
                        eprintln!("EXEC connect failed: {e}");
                        return;
                    }
                };
                std::thread::sleep(Duration::from_millis(1000)); // connection establish
                let req = br#"{"argv":["/bin/busybox","echo","HELLO_FROM_KVM_EXEC"]}"#;
                let mut frame = vec![0xffu8];
                frame.extend_from_slice(&(req.len() as u32).to_be_bytes());
                frame.extend_from_slice(req);
                if let Err(e) = s.write_all(&frame) {
                    eprintln!("EXEC write failed: {e}");
                    return;
                }
                let _ = s.set_read_timeout(Some(Duration::from_secs(6)));
                let mut stdout = Vec::new();
                let mut status: Option<u32> = None;
                loop {
                    let mut hdr = [0u8; 5];
                    if s.read_exact(&mut hdr).is_err() {
                        break;
                    }
                    let typ = hdr[0];
                    let len = u32::from_be_bytes([hdr[1], hdr[2], hdr[3], hdr[4]]) as usize;
                    let mut body = vec![0u8; len];
                    if len > 0 && s.read_exact(&mut body).is_err() {
                        break;
                    }
                    match typ {
                        1 => stdout.extend_from_slice(&body), // STDOUT
                        5 => {
                            status = Some(u32::from_be_bytes([body[0], body[1], body[2], body[3]]));
                            break;
                        }
                        6 => {
                            eprintln!("EXEC agent ERROR: {}", String::from_utf8_lossy(&body));
                            break;
                        }
                        _ => {}
                    }
                }
                eprintln!(
                    "EXEC RESULT: status={status:?} stdout={:?}",
                    String::from_utf8_lossy(&stdout).trim_end()
                );
            });
        }
    }

    // Snapshot/restore demo: with SNAPSHOT_AFTER_MS set, run the VM for that
    // long, snapshot the full running state, drop it, restore into a fresh VM,
    // and resume — proving the guest continues across the snapshot.
    if let Some(ms) = std::env::var("SNAPSHOT_AFTER_MS")
        .ok()
        .and_then(|s| s.parse().ok())
    {
        let snap = vm
            .snapshot_after(std::time::Duration::from_millis(ms))
            .expect("snapshot_after");
        // SNAPSHOT_TO=<file>: persist to disk + exit (restore later with
        // RESTORE_FROM, possibly in another process).
        if let Ok(path) = std::env::var("SNAPSHOT_TO") {
            snap.save(std::path::Path::new(&path))
                .expect("save snapshot");
            eprintln!(
                "\n=== SAVED snapshot to {path} ({} MiB) ===",
                snap.mem_size() >> 20
            );
            return;
        }
        eprintln!(
            "\n=== SNAPSHOTTED ({} MiB RAM, {} vCPUs) — dropping + restoring into a fresh VM ===",
            snap.mem_size() >> 20,
            snap.num_cpus()
        );
        drop(vm);
        let mut restored = LinuxVm::restore(&snap).expect("restore");
        let reason = restored.run().expect("run (restored)");
        eprintln!("\n=== exit (restored): {reason:?} ===");
        return;
    }

    // Run-control demo (the product entry point): start the vCPUs on background
    // threads via the non-blocking handle and keep the guest live (serving
    // exec-over-vsock, set VSOCK_EXEC too) instead of run()'s blocking loop.
    // RUN_CONTROL_SNAP_MS additionally snapshots the running VM through the
    // handle after that many ms, then restores + resumes.
    if std::env::var_os("RUN_CONTROL").is_some()
        || std::env::var_os("RUN_CONTROL_SNAP_MS").is_some()
    {
        let handle = vm.start_running();
        if let Some(ms) = std::env::var("RUN_CONTROL_SNAP_MS")
            .ok()
            .and_then(|s| s.parse().ok())
        {
            std::thread::sleep(std::time::Duration::from_millis(ms));
            let snap = handle.snapshot().expect("RunningVm::snapshot");
            eprintln!(
                "\n=== run-control SNAPSHOT ({} MiB, {} vCPUs) — restoring + resuming ===",
                snap.mem_size() >> 20,
                snap.num_cpus()
            );
            let mut restored = LinuxVm::restore(&snap).expect("restore");
            let reason = restored.run().expect("run (restored)");
            eprintln!("\n=== exit (restored via run-control): {reason:?} ===");
            return;
        }
        let reason = handle.wait();
        eprintln!("\n=== exit (run-control): {reason:?} ===");
        return;
    }

    let reason = vm.run().expect("run");
    eprintln!("\n=== exit: {reason:?} ===");
}

#[cfg(not(all(target_os = "linux", target_arch = "x86_64")))]
fn main() {
    eprintln!("kvm_boot is Linux/x86_64 only");
}