1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
//! Measure: how does worker RSS / phys_footprint evolve across a
//! pool of N idle VMs over time?
//!
//! What we want to learn:
//! 1. Initial RSS right after restore (before balloon inflates).
//! 2. RSS at `inflate_complete` (balloon driver finished pushing
//! PFNs, host madvise(MADV_FREE)'d them).
//! 3. Steady-state RSS over a 30 s idle window.
//! 4. Whether RSS recovers if we put memory pressure on the host.
//!
//! Output: a CSV-style row per sample so we can plot if useful.
//!
//! Usage:
//! SUPERMACHINE_WORKER_BIN=$(pwd)/target/release/supermachine-worker \
//! ./target/release/examples/_balloon_rss
//!
//! Env knobs:
//! POOL_SIZE=10 default
//! SAMPLE_INTERVAL_MS=500
//! SAMPLE_DURATION_S=30
//! IMAGE=alpine:latest
use std::process::Command;
use std::time::{Duration, Instant};
use supermachine::Image;
fn ps_rss_kb(pid: u32) -> Option<u64> {
// macOS `ps -o rss=` prints kilobytes. phys_footprint via
// `footprint` is more accurate but harder to get; rss is fine
// for relative trend tracking.
let out = Command::new("ps")
.args(["-o", "rss=", "-p", &pid.to_string()])
.output()
.ok()?;
if !out.status.success() {
return None;
}
String::from_utf8(out.stdout).ok()?.trim().parse().ok()
}
/// macOS `footprint -p PID` reports phys_footprint, which is the
/// authoritative "this process is responsible for N bytes of
/// physical memory" number. Unlike RSS, it accounts correctly for
/// MADV_FREE'd pages (they're excluded). Slow (~30 ms per call)
/// because it forks the footprint tool, so we use it only for
/// summary samples, not the per-tick trace.
fn footprint_mib(pid: u32) -> Option<u64> {
let out = Command::new("footprint")
.args(["-p", &pid.to_string()])
.output()
.ok()?;
if !out.status.success() {
return None;
}
let s = String::from_utf8(out.stdout).ok()?;
// Parse the header banner line, format like:
// supermachine-w [97248]: 64-bit Footprint: 86.6 MB (16384 bytes per page)
// OR (smaller processes):
// zsh [14583]: 64-bit Footprint: 2128 KB (16384 bytes per page)
for line in s.lines() {
let lower = line.to_ascii_lowercase();
if let Some(idx) = lower.find("footprint:") {
let rest = &line[idx + "footprint:".len()..];
let parts: Vec<&str> = rest.trim().split_whitespace().collect();
if parts.len() >= 2 {
if let Ok(n) = parts[0].parse::<f64>() {
let unit = parts[1].to_ascii_lowercase();
let mib = match unit.as_str() {
"kb" => n / 1024.0,
"mb" => n,
"gb" => n * 1024.0,
_ => continue,
};
return Some(mib as u64);
}
}
}
}
None
}
fn worker_pids() -> Vec<u32> {
// pgrep matches by command name. We assume the parent is *us*
// so we grab child supermachine-worker processes only.
let our_pid = std::process::id();
let out = Command::new("pgrep")
.args(["-P", &our_pid.to_string(), "-f", "supermachine-worker"])
.output()
.ok();
out.and_then(|o| String::from_utf8(o.stdout).ok())
.map(|s| {
s.lines()
.filter_map(|l| l.trim().parse().ok())
.collect()
})
.unwrap_or_default()
}
fn main() -> Result<(), Box<dyn std::error::Error>> {
let pool_size: usize = std::env::var("POOL_SIZE")
.ok()
.and_then(|s| s.parse().ok())
.unwrap_or(10);
let sample_interval = Duration::from_millis(
std::env::var("SAMPLE_INTERVAL_MS")
.ok()
.and_then(|s| s.parse().ok())
.unwrap_or(500),
);
let duration = Duration::from_secs(
std::env::var("SAMPLE_DURATION_S")
.ok()
.and_then(|s| s.parse().ok())
.unwrap_or(30),
);
let image_ref = std::env::var("IMAGE").unwrap_or_else(|_| "alpine:latest".to_owned());
let home = std::env::var("HOME")?;
let snap_name = "_balloon_rss_bench";
let snap_dir = format!("{home}/.local/supermachine-snapshots/{snap_name}");
if !std::path::Path::new(&format!("{snap_dir}/restore.snap")).exists() {
eprintln!("[bench] baking fresh snapshot of {image_ref}...");
Image::builder(&image_ref)
.with_name(snap_name)
.with_memory_mib(256)
.build()?;
std::thread::sleep(Duration::from_secs(2)); // let bg save complete
}
let memory_mib = 256u64; // matches build above
let total_mib_if_no_balloon = memory_mib * pool_size as u64;
eprintln!(
"[bench] pool of {pool_size} workers, {memory_mib} MiB each \
(would be {total_mib_if_no_balloon} MiB total without balloon)"
);
let img = Image::from_snapshot(&snap_dir)?;
let pool_t0 = Instant::now();
let pool = img
.pool()
.min(pool_size)
.max(pool_size)
.restore_on_release(true)
.build()?;
let pool_build_ms = pool_t0.elapsed().as_millis();
eprintln!("[bench] pool built in {pool_build_ms} ms");
// Optionally cycle every worker through an acquire/exec/release
// so we measure post-workload steady-state, not just idle.
if std::env::var("CYCLE_WORKLOAD").ok().as_deref() == Some("1") {
eprintln!(
"[bench] cycling each worker through acquire→exec→release \
to measure post-workload memory..."
);
let cycle_t0 = Instant::now();
for _ in 0..pool_size {
let vm = pool.acquire()?;
// Touch ~30 MiB of guest memory by allocating in tmpfs,
// then deleting. Avoids /dev/urandom (slow entropy on
// alpine in a microVM) and avoids capturing 50MB of
// stdout (which the agent's output buffer would have
// to ferry over vsock). `dd if=/dev/zero` is fast and
// its stdout goes to /tmp/x, not back to us.
let _ = vm
.exec_builder()
.argv([
"sh",
"-c",
"dd if=/dev/zero of=/tmp/x bs=1M count=30 2>/dev/null; rm /tmp/x",
])
.timeout(Duration::from_secs(15))
.output();
drop(vm);
}
eprintln!(
"[bench] cycled all {pool_size} workers in {} ms",
cycle_t0.elapsed().as_millis()
);
}
// Grab worker PIDs. Pool is min-eager so all spawned by now.
let pids = worker_pids();
eprintln!("[bench] tracking {} worker PIDs: {:?}", pids.len(), pids);
if pids.len() != pool_size {
eprintln!(
"[bench] WARNING: expected {pool_size} pids, got {} — \
pgrep filter may have missed some",
pids.len()
);
}
println!("t_ms,total_rss_mib,avg_rss_mib_per_worker,workers_seen");
let t0 = Instant::now();
while t0.elapsed() < duration {
let mut total_kb = 0u64;
let mut seen = 0usize;
for &pid in &pids {
if let Some(kb) = ps_rss_kb(pid) {
total_kb += kb;
seen += 1;
}
}
let total_mib = total_kb / 1024;
let avg_mib = if seen > 0 { total_kb / seen as u64 / 1024 } else { 0 };
println!(
"{},{},{},{}",
t0.elapsed().as_millis(),
total_mib,
avg_mib,
seen
);
std::thread::sleep(sample_interval);
}
// After the trace, take ONE phys_footprint sample on the first
// worker — that's the authoritative MADV_FREE-aware number.
eprintln!("\n[bench] phys_footprint (authoritative per-worker MiB):");
let mut fp_total = 0u64;
let mut fp_seen = 0usize;
for &pid in &pids {
if let Some(mib) = footprint_mib(pid) {
eprintln!(" pid={pid:6} phys_footprint={mib:4} MiB");
fp_total += mib;
fp_seen += 1;
}
}
if fp_seen > 0 {
eprintln!(
"[bench] phys_footprint TOTAL: {} MiB across {} workers (avg {} MiB/worker)",
fp_total,
fp_seen,
fp_total / fp_seen as u64
);
}
Ok(())
}