supermachine 0.7.2

Run any OCI/Docker image as a hardware-isolated microVM on macOS HVF (Linux KVM and Windows WHP in progress). Single library API, zero flags for the common case, sub-100 ms cold-restore from snapshot.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
//! First-run codesign autopilot.
//!
//! macOS HVF requires the `com.apple.security.hypervisor`
//! entitlement on whatever process calls `hv_vm_create`. In our
//! architecture that's the `supermachine-worker` binary, not the
//! `supermachine` CLI itself.
//!
//! `cargo install supermachine` builds an unsigned worker on the
//! user's machine. To make `cargo install … && supermachine run X`
//! Just Work with no manual setup, the CLI signs the worker with
//! the bundled entitlements plist on its first invocation —
//! transparently, in ~30–50 ms — and writes a sentinel so
//! subsequent invocations skip the signing in ~1 ms.
//!
//! The same path covers the dev-tree case where `cargo build`
//! strips the entitlement on every rebuild: the next CLI launch
//! re-signs automatically.

use std::path::{Path, PathBuf};
use std::process::{Command, Stdio};
use std::sync::OnceLock;
use std::time::Duration;

/// Spawn `worker_path --version` once, parse the
/// `supermachine-worker <semver>` line, compare to this crate's
/// version. Mismatch → `Err` with an integrator-actionable message
/// that names the path being used and the fix.
///
/// This catches the deadlock pattern where a stale
/// `~/.cargo/bin/supermachine-worker` (from an older `cargo install
/// supermachine`) is silently picked up by the locator
/// (priority 4 in `locate_worker_bin`) when the embedder bumps
/// their library dependency. Without this check the symptom is a
/// silent hang at "pool-worker connected" — the library expects
/// new-protocol messages (BAKE_READY/SNAPSHOT_ASYNC, added in
/// 0.4.6) the old worker doesn't speak, and both sides block
/// reading from each other.
///
/// Bounded at 5 s; if `--version` doesn't return in that window
/// we treat it as a stale binary too (a healthy worker prints +
/// exits in <100 ms).
pub fn verify_worker_version(worker_path: &Path) -> Result<(), String> {
    static VERSION_CHECKED: OnceLock<()> = OnceLock::new();
    if VERSION_CHECKED.get().is_some() {
        return Ok(());
    }
    let lib_version = env!("CARGO_PKG_VERSION");
    // Run `<worker> --version`. Capture stdout. We pipe a separate
    // thread to enforce the 5 s ceiling; child.wait_timeout would
    // be cleaner but pulls a dep.
    let output = match Command::new(worker_path)
        .arg("--version")
        .stdin(Stdio::null())
        .stdout(Stdio::piped())
        .stderr(Stdio::piped())
        .spawn()
    {
        Ok(c) => c,
        Err(e) => {
            return Err(format!(
                "verify_worker_version: spawn {} --version: {e}",
                worker_path.display()
            ));
        }
    };
    let pid = output.id();
    let stdout_handle = std::thread::spawn(move || -> Result<(std::process::Output, ()), String> {
        let out = output
            .wait_with_output()
            .map_err(|e| format!("wait_with_output: {e}"))?;
        Ok((out, ()))
    });
    let deadline = std::time::Instant::now() + Duration::from_secs(5);
    let out = loop {
        if stdout_handle.is_finished() {
            break stdout_handle
                .join()
                .map_err(|_| "version-probe thread panicked".to_string())?
                .map(|(o, _)| o)?;
        }
        if std::time::Instant::now() > deadline {
            // SAFETY: kill(2) by pid is best-effort; thread will
            // wake on EPIPE and surface the reaped child.
            unsafe {
                libc::kill(pid as i32, libc::SIGKILL);
            }
            return Err(format!(
                "supermachine: worker `{}` did not respond to --version within 5s. \
                 This is almost certainly a stale supermachine-worker from an older \
                 release (the --version flag was added in 0.4.18). Run `cargo install \
                 supermachine --force` to update, or set SUPERMACHINE_WORKER_BIN to a \
                 freshly-built worker matching library version {lib_version}.",
                worker_path.display()
            ));
        }
        std::thread::sleep(Duration::from_millis(20));
    };
    if !out.status.success() {
        // `unknown arg: --version` from older workers (pre-0.4.18)
        // lands here. Surface the actionable fix.
        let stderr = String::from_utf8_lossy(&out.stderr);
        return Err(format!(
            "supermachine: worker `{}` is from an older supermachine release \
             (does not recognize --version, added in 0.4.18). Library is \
             v{lib_version}; the supervisor protocol changed between releases \
             (0.4.6 added BAKE_READY/SNAPSHOT_ASYNC) and a stale worker \
             deadlocks pipelined-bake silently. \
             Fix: `cargo install supermachine --force` to update the worker, \
             or set SUPERMACHINE_WORKER_BIN to a freshly-built one. \
             (worker stderr: {})",
            worker_path.display(),
            stderr.trim(),
        ));
    }
    let stdout = String::from_utf8_lossy(&out.stdout);
    let line = stdout.lines().next().unwrap_or("").trim();
    // Expected: `supermachine-worker <semver>`
    let worker_version = line
        .strip_prefix("supermachine-worker ")
        .ok_or_else(|| {
            format!(
                "supermachine: worker `{}` --version output unparseable: {line:?}. \
                 Expected `supermachine-worker <semver>`. Suggests a non-supermachine \
                 binary at this path or a build with a different output format.",
                worker_path.display()
            )
        })?;
    if worker_version != lib_version {
        return Err(format!(
            "supermachine: worker/library version mismatch. \
             Worker `{}` reports v{worker_version}; library is v{lib_version}. \
             Snapshot format and supervisor protocol are tied to crate version — \
             pin both to the same `=`-version. \
             Fix: `cargo install supermachine --force` to update the worker, \
             or set SUPERMACHINE_WORKER_BIN to a worker matching v{lib_version}.",
            worker_path.display()
        ));
    }
    VERSION_CHECKED.set(()).ok();
    Ok(())
}

/// Sign `worker_path` with the bundled HVF entitlement (ad-hoc),
/// idempotent. Caches the result both in-process (one signing
/// attempt per CLI invocation) and on disk (one across CLI
/// invocations until the worker binary changes).
///
/// Returns `Ok(())` on success or no-op skip; `Err` on codesign
/// failure with a message suitable for the user. Callers should
/// not panic on `Err` — we'd rather let `hv_vm_create` surface
/// its own error than block on a codesign issue.
pub fn ensure_worker_signed(worker_path: &Path) -> Result<(), String> {
    static IN_PROCESS_DONE: OnceLock<()> = OnceLock::new();
    if IN_PROCESS_DONE.get().is_some() {
        return Ok(());
    }

    // The sentinel records the (size, mtime, path) the worker had
    // **after the most recent successful sign**. We compare today's
    // stat against that and skip re-signing if it matches. Critical
    // detail: codesign rewrites the binary, which bumps mtime on
    // every successful sign — so the cached sentinel must record
    // the *post-sign* mtime, not the pre-sign one. Otherwise every
    // future call sees current_mtime ≠ sentinel_mtime, re-signs,
    // bumps mtime again, and the bake-key (which includes the
    // worker's mtime) never matches a previously baked snapshot —
    // every fresh process rebakes from scratch.
    let stat_marker = |path: &Path| -> Option<String> {
        let meta = std::fs::metadata(path).ok()?;
        let mtime = meta
            .modified()
            .ok()?
            .duration_since(std::time::UNIX_EPOCH)
            .ok()?
            .as_secs();
        Some(format!(
            "size={}\nmtime={}\npath={}\n",
            meta.len(),
            mtime,
            path.display()
        ))
    };

    let current_marker = stat_marker(worker_path).ok_or_else(|| {
        format!("stat {}: file disappeared", worker_path.display())
    })?;

    if let Some(sentinel) = sentinel_path() {
        if let Ok(existing) = std::fs::read_to_string(&sentinel) {
            if existing == current_marker {
                IN_PROCESS_DONE.set(()).ok();
                return Ok(());
            }
        }
    }

    // Drop the entitlements plist (compile-time include_str) to a
    // temp file; codesign needs it on disk. Unique-per-pid so two
    // concurrent CLI invocations don't race on the same temp path.
    let plist = std::env::temp_dir().join(format!(
        "supermachine-entitlements-{}.plist",
        std::process::id()
    ));
    std::fs::write(&plist, crate::assets::ENTITLEMENTS_PLIST)
        .map_err(|e| format!("write entitlements plist: {e}"))?;

    let status = Command::new("codesign")
        .args(["-s", "-", "--entitlements"])
        .arg(&plist)
        .arg("--force")
        .arg(worker_path)
        .stdout(Stdio::null())
        .stderr(Stdio::piped())
        .status();
    let _ = std::fs::remove_file(&plist);

    match status {
        Ok(s) if s.success() => {
            // Re-stat AFTER signing so the sentinel records the
            // post-sign mtime — that's what future stat() calls
            // (and the bake-key calculation) will see.
            if let (Some(sentinel), Some(post_marker)) =
                (sentinel_path(), stat_marker(worker_path))
            {
                if let Some(parent) = sentinel.parent() {
                    let _ = std::fs::create_dir_all(parent);
                }
                let _ = std::fs::write(&sentinel, post_marker);
            }
            IN_PROCESS_DONE.set(()).ok();
            Ok(())
        }
        Ok(s) => Err(format!(
            "codesign exited with {:?} for {}",
            s.code(),
            worker_path.display()
        )),
        Err(e) => Err(format!(
            "failed to spawn codesign for {}: {e}\n\
             (codesign ships with macOS by default; if missing, \
             reinstall Xcode Command Line Tools)",
            worker_path.display()
        )),
    }
}

/// Locate `supermachine-worker`. The single source of truth —
/// the bake pipeline delegates here so the two paths can't drift
/// apart again. Resolution order, returning the first hit:
///
///   1. `$SUPERMACHINE_WORKER_BIN` (explicit override; must
///      exist as a file or it's ignored — protects against stale
///      env vars silently shadowing a working install).
///   2. Sibling of the currently running binary
///      (`cargo install supermachine` layout, where every
///      supermachine-* binary lands in the same directory).
///   3. Sibling of the *canonicalized* running binary
///      (handles `~/.local/bin/supermachine` → dev-tree symlinks).
///   4. `$CARGO_HOME/bin/supermachine-worker` (or
///      `~/.cargo/bin/supermachine-worker`) — the canonical
///      `cargo install` location, picked up even when an
///      embedder's *own* binary is `current_exe`.
///   5. Walk `$PATH` for `supermachine-worker` — anything on
///      PATH counts (release tarball, package manager install,
///      symlink farm, …).
///   6. Ancestor walk for `target/release/supermachine-worker`
///      (cargo dev-tree fallback when running tests/examples).
pub fn locate_worker_bin() -> Option<PathBuf> {
    if let Some(p) = std::env::var_os("SUPERMACHINE_WORKER_BIN") {
        let p = PathBuf::from(p);
        if p.is_file() {
            return Some(p);
        }
    }
    if let Ok(exe) = std::env::current_exe() {
        if let Some(p) = sibling_worker(&exe) {
            return Some(p);
        }
        if let Ok(canonical) = std::fs::canonicalize(&exe) {
            if canonical != exe {
                if let Some(p) = sibling_worker(&canonical) {
                    return Some(p);
                }
            }
        }
    }
    if let Some(p) = cargo_bin_worker() {
        return Some(p);
    }
    if let Some(p) = path_walk_worker() {
        return Some(p);
    }
    if let Ok(exe) = std::env::current_exe() {
        for ancestor in exe.ancestors() {
            let p = ancestor.join("target/release/supermachine-worker");
            if p.is_file() {
                return Some(p);
            }
        }
        if let Ok(canonical) = std::fs::canonicalize(&exe) {
            for ancestor in canonical.ancestors() {
                let p = ancestor.join("target/release/supermachine-worker");
                if p.is_file() {
                    return Some(p);
                }
            }
        }
    }
    None
}

fn sibling_worker(exe: &Path) -> Option<PathBuf> {
    let dir = exe.parent()?;
    let p = dir.join("supermachine-worker");
    if p.is_file() {
        Some(p)
    } else {
        None
    }
}

fn cargo_bin_worker() -> Option<PathBuf> {
    let bin_dir = if let Some(cargo) = std::env::var_os("CARGO_HOME") {
        PathBuf::from(cargo).join("bin")
    } else if let Some(home) = std::env::var_os("HOME") {
        PathBuf::from(home).join(".cargo").join("bin")
    } else {
        return None;
    };
    let p = bin_dir.join("supermachine-worker");
    if p.is_file() {
        Some(p)
    } else {
        None
    }
}

fn path_walk_worker() -> Option<PathBuf> {
    let path = std::env::var_os("PATH")?;
    for dir in std::env::split_paths(&path) {
        let p = dir.join("supermachine-worker");
        if p.is_file() {
            return Some(p);
        }
    }
    None
}

/// Check whether the *currently running* binary has the
/// `com.apple.security.hypervisor` entitlement. Returns `Ok(())`
/// if it does, or a [`String`] describing the situation for the
/// caller to surface as an error if it doesn't.
///
/// Used by [`crate::Vm::start`] to fail fast with a clear message
/// instead of letting `hv_vm_create` return the cryptic
/// `Hv(-85377017)` (HV_DENIED) when the embedder forgot to sign
/// their binary. `Image::acquire` doesn't need this check — the
/// auto-signed worker subprocess handles HVF for those callers.
///
/// We cache the result in-process: the entitlement on a running
/// binary doesn't change underneath us.
pub fn check_self_has_hvf_entitlement() -> Result<(), String> {
    static CACHED: OnceLock<Result<(), String>> = OnceLock::new();
    CACHED
        .get_or_init(check_self_has_hvf_entitlement_uncached)
        .clone()
}

fn check_self_has_hvf_entitlement_uncached() -> Result<(), String> {
    let exe = std::env::current_exe().map_err(|e| {
        format!(
            "could not resolve current_exe to check HVF entitlement: {e} \
             (your binary may need to be codesigned with \
             `cargo supermachine build`)"
        )
    })?;
    let output = Command::new("codesign")
        .args(["--display", "--entitlements", "-", "--xml"])
        .arg(&exe)
        .stdout(Stdio::piped())
        .stderr(Stdio::piped())
        .output();
    let output = match output {
        Ok(o) => o,
        Err(_) => return Ok(()),  // codesign missing — best-effort skip
    };
    if !output.status.success() {
        // Not signed at all, or signature is invalid.
        return Err(missing_entitlement_message(&exe));
    }
    let stdout = String::from_utf8_lossy(&output.stdout);
    if stdout.contains("com.apple.security.hypervisor") {
        Ok(())
    } else {
        Err(missing_entitlement_message(&exe))
    }
}

fn missing_entitlement_message(exe: &Path) -> String {
    format!(
        "this binary lacks the `com.apple.security.hypervisor` entitlement, \
         so `Vm::start` cannot call `hv_vm_create` (it would return HV_DENIED).\n\
         \n\
         Two ways to fix:\n\
         \n\
           (a) Use `Image::acquire` / `Image::acquire_with` instead of \
               `Vm::start`. The library spawns a pre-signed \
               `supermachine-worker` subprocess that handles HVF on your \
               behalf, so your own binary never calls into HVF and doesn't \
               need codesigning. This is the recommended path for embedders.\n\
           (b) Build your binary with the bundled cargo plugin:\n\
                   cargo supermachine build --release\n\
               which wraps `cargo build` and codesigns the output with the \
               HVF entitlement. Use this if you specifically want the \
               in-process VM thread (`Vm::start`).\n\
         \n\
         Path: {}",
        exe.display()
    )
}

/// `$XDG_DATA_HOME/supermachine/v{VERSION}/.worker-signed` (or the
/// `$HOME/.local/share/...` fallback). Versioned so a supermachine
/// upgrade doesn't reuse the prior version's sentinel.
fn sentinel_path() -> Option<PathBuf> {
    let base = if let Some(d) = std::env::var_os("XDG_DATA_HOME") {
        PathBuf::from(d)
    } else if let Some(h) = std::env::var_os("HOME") {
        PathBuf::from(h).join(".local/share")
    } else {
        return None;
    };
    Some(
        base.join("supermachine")
            .join(format!("v{}", env!("CARGO_PKG_VERSION")))
            .join(".worker-signed"),
    )
}