Skip to main content

vm_benchmark/
vm_benchmark.rs

1//! Benchmark VM / sandbox systems for **agentic AI use** — the spawn-and-tear-down
2//! sandbox loop an agent runtime drives (one isolated environment per tool call).
3//!
4//! Ranks AetherVM against Firecracker, Cloud Hypervisor, gVisor, Kata, QEMU/KVM,
5//! and Docker on five agent-native axes (start-latency, density, isolation,
6//! snapshotting, agent-control), then shows a head-to-head and the evidence.
7//!
8//! Run: `cargo run -p agentic-eval --example vm_benchmark`
9
10use agentic_eval::vms::{compare_vms, profile, rank_vms, Vm};
11
12fn main() {
13    println!("agentic-eval — VM/sandbox systems for agentic AI use");
14    println!("axes: start-latency, density, isolation, snapshotting, agent-control\n");
15
16    // ── Ranked benchmark (best-first by composite agentic fitness) ───────────
17    println!(
18        "{:<17} {:>7}   {:>5} {:>7} {:>9} {:>8} {:>13}",
19        "system", "fitness", "start", "density", "isolation", "snapshot", "agent-control"
20    );
21    for p in rank_vms() {
22        println!(
23            "{:<17} {:>7.2}   {:>5.2} {:>7.2} {:>9.2} {:>8.2} {:>13.2}",
24            p.vm.name(),
25            p.fitness(),
26            p.start_latency,
27            p.density,
28            p.isolation,
29            p.snapshotting,
30            p.agent_control,
31        );
32    }
33
34    // ── Head-to-head: AetherVM vs the microVM reference (Firecracker) ────────
35    println!("\nhead-to-head (positive = AetherVM fits agentic use better):");
36    print!("{}", compare_vms(Vm::AetherVm, Vm::Firecracker));
37
38    // ── Evidence behind the subject's profile ────────────────────────────────
39    println!("\nwhy AetherVM scores where it does:");
40    for e in &profile(Vm::AetherVm).evidence {
41        println!("  - {e}");
42    }
43
44    println!(
45        "\nReading: AetherVM leads on the agent-native axes it was designed for\n\
46         (instant CoW branching + an MCP-native control plane), while microVMs\n\
47         (Firecracker/Cloud Hypervisor) lead on raw cold-start and battle-tested\n\
48         isolation. Shared-kernel containers (Docker) win speed/density but rank\n\
49         low on isolation for untrusted, agent-generated code."
50    );
51}