boxlite 0.9.6

Embeddable virtual machine runtime for secure, isolated code execution
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
//! Subprocess spawning for boxlite-shim binary.

use std::{
    path::Path,
    process::{Child, Stdio},
};

use crate::jailer::{Jail, JailerBuilder};
use crate::runtime::layout::BoxFilesystemLayout;
use crate::runtime::options::BoxOptions;
use crate::util::configure_library_env;
use boxlite_shared::errors::{BoxliteError, BoxliteResult};

use super::watchdog;

/// A shim that was spawned, with its child process handle and optional keepalive.
///
/// The `keepalive` holds the parent side of the watchdog pipe. While it exists,
/// the shim's watchdog thread blocks on `poll()`. Dropping it closes the pipe
/// write end, delivering POLLHUP to the shim and triggering graceful shutdown.
pub struct SpawnedShim {
    pub child: Child,
    /// Parent-side watchdog keepalive. Dropping triggers shim shutdown.
    /// `None` for detached boxes (no watchdog).
    pub keepalive: Option<watchdog::Keepalive>,
}

/// Spawns `boxlite-shim` with full isolation, environment, and watchdog.
///
/// Composes: Jailer (isolation) + watchdog (lifecycle) + env/stdio setup.
///
/// # Fields
///
/// Stable inputs grouped into the struct; variable inputs (`config_json`, `detach`)
/// are passed to [`spawn()`](Self::spawn).
pub struct ShimSpawner<'a> {
    binary_path: &'a Path,
    layout: &'a BoxFilesystemLayout,
    box_id: &'a str,
    options: &'a BoxOptions,
}

impl<'a> ShimSpawner<'a> {
    pub fn new(
        binary_path: &'a Path,
        layout: &'a BoxFilesystemLayout,
        box_id: &'a str,
        options: &'a BoxOptions,
    ) -> Self {
        Self {
            binary_path,
            layout,
            box_id,
            options,
        }
    }

    /// Spawn the shim subprocess with jailer isolation and optional watchdog.
    ///
    /// When `detach` is false, creates a watchdog pipe so the shim detects
    /// parent death via POLLHUP. When `detach` is true, no watchdog is created.
    ///
    /// # Returns
    /// * `SpawnedShim` containing the child process and optional keepalive
    pub fn spawn(&self, config_json: &str, detach: bool) -> BoxliteResult<SpawnedShim> {
        // 1. Create watchdog pipe (non-detached only)
        let (keepalive, child_setup) = if !detach {
            let (k, s) = watchdog::create()?;
            (Some(k), Some(s))
        } else {
            (None, None)
        };

        // 2. Build jailer with optional FD preservation for watchdog pipe.
        // `with_detach(detach)` threads the lifecycle choice into the
        // jailer's pre_exec chain (setsid vs. process_group).
        let mut builder = JailerBuilder::new()
            .with_box_id(self.box_id)
            .with_layout(self.layout.clone())
            .with_security(self.options.advanced.security.clone())
            .with_volumes(self.options.volumes.clone())
            .with_detach(detach);

        if let Some(ref setup) = child_setup {
            builder = builder.with_preserved_fd(setup.raw_fd(), watchdog::PIPE_FD);
        }

        let jail = builder.build()?;

        // 3. Setup pre-spawn isolation (cgroups on Linux, no-op on macOS)
        jail.prepare()?;

        // 4. Build isolated command — no CLI args, config sent via stdin pipe
        let no_args: &[String] = &[];
        let mut cmd = jail.command(self.binary_path, no_args);

        // 5. Configure environment
        self.configure_env(&mut cmd);

        // 6. Configure stdio
        // stdin=piped: config JSON is sent via stdin to avoid /proc/cmdline exposure
        // (config contains CA private keys and secret values)
        let stderr_file = self.create_stderr_file()?;
        cmd.stdin(Stdio::piped());
        cmd.stdout(Stdio::null());
        cmd.stderr(Stdio::from(stderr_file));

        // 7. Spawn
        let mut child = cmd.spawn().map_err(|e| {
            let err_msg = format!(
                "Failed to spawn VM subprocess at {}: {}",
                self.binary_path.display(),
                e
            );
            tracing::error!("{}", err_msg);
            BoxliteError::Engine(err_msg)
        })?;

        // 8. Write config to stdin, then close (shim reads until EOF).
        // The child is already spawned and will read from stdin, so this is a
        // producer-consumer pattern via the kernel pipe buffer. For typical
        // configs (~2-5KB), write_all completes immediately. For large configs
        // (>16KB on macOS, >64KB on Linux), write_all blocks until the child
        // drains the buffer — which it does as its first action in main().
        if let Some(mut stdin) = child.stdin.take() {
            use std::io::Write;
            stdin.write_all(config_json.as_bytes()).map_err(|e| {
                BoxliteError::Engine(format!("Failed to write config to shim stdin: {e}"))
            })?;
            drop(stdin); // close write end — shim sees EOF
        }

        // 9. Close read end in parent (child inherited it via fork)
        drop(child_setup);

        Ok(SpawnedShim { child, keepalive })
    }

    fn configure_env(&self, cmd: &mut std::process::Command) {
        // Non-sensitive process marker used by recovery to validate shim PIDs
        // without putting the full InstanceSpec back into /proc/<pid>/cmdline.
        cmd.env("BOXLITE_BOX_ID", self.box_id);

        // Pass debugging environment variables to subprocess
        if let Ok(rust_log) = std::env::var("RUST_LOG") {
            cmd.env("RUST_LOG", rust_log);
        }
        if let Ok(rust_backtrace) = std::env::var("RUST_BACKTRACE") {
            cmd.env("RUST_BACKTRACE", rust_backtrace);
        }

        // Keep temp artifacts inside the box-scoped allowlist when using the
        // built-in macOS seatbelt profile. libkrun may create a transient
        // `krun-empty-root-*` under `env::temp_dir()` when booting from block
        // devices; under deny-default seatbelt this must resolve to an
        // explicitly granted path.
        if self.options.advanced.security.jailer_enabled
            && self.options.advanced.security.sandbox_profile.is_none()
        {
            let tmp_dir = self.layout.tmp_dir();
            cmd.env("TMPDIR", &tmp_dir);
            cmd.env("TMP", &tmp_dir);
            cmd.env("TEMP", &tmp_dir);
        }

        // Set library search paths for bundled dependencies (e.g., libkrunfw.so)
        configure_library_env(cmd, std::ptr::null());
    }

    fn create_stderr_file(&self) -> BoxliteResult<std::fs::File> {
        // Create stderr file BEFORE spawn to capture ALL errors including pre-main dyld errors.
        // This is critical: dyld errors happen before main() and would go to /dev/null otherwise.
        let stderr_file_path = self.layout.stderr_file_path();
        std::fs::File::create(&stderr_file_path).map_err(|e| {
            BoxliteError::Storage(format!(
                "Failed to create stderr file {}: {}",
                stderr_file_path.display(),
                e
            ))
        })
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::ffi::OsStr;

    #[test]
    fn test_build_shim_args() {
        use crate::runtime::layout::{BoxFilesystemLayout, FsLayoutConfig};
        use std::path::PathBuf;

        let layout = BoxFilesystemLayout::new(
            PathBuf::from("/tmp/box"),
            FsLayoutConfig::without_bind_mount(),
            false,
        );
        let options = BoxOptions::default();

        let spawner = ShimSpawner::new(
            Path::new("/usr/bin/boxlite-shim"),
            &layout,
            "test-box",
            &options,
        );

        // No CLI args — config is sent via stdin pipe
        // Just verify the spawner was created without error
        assert_eq!(spawner.box_id, "test-box");
    }

    #[test]
    fn test_configure_env_sets_box_scoped_temp_dir() {
        use crate::runtime::advanced_options::{AdvancedBoxOptions, SecurityOptions};
        use crate::runtime::layout::{BoxFilesystemLayout, FsLayoutConfig};
        use std::path::PathBuf;

        let layout = BoxFilesystemLayout::new(
            PathBuf::from("/tmp/box"),
            FsLayoutConfig::without_bind_mount(),
            false,
        );
        let options = BoxOptions {
            advanced: AdvancedBoxOptions {
                security: SecurityOptions {
                    jailer_enabled: true,
                    ..SecurityOptions::default()
                },
                ..AdvancedBoxOptions::default()
            },
            ..BoxOptions::default()
        };

        let spawner = ShimSpawner::new(
            Path::new("/usr/bin/boxlite-shim"),
            &layout,
            "test-box",
            &options,
        );

        let mut cmd = std::process::Command::new("/usr/bin/true");
        spawner.configure_env(&mut cmd);

        let envs: std::collections::HashMap<_, _> = cmd.get_envs().collect();
        let expected = layout.tmp_dir();

        assert_eq!(
            envs.get(OsStr::new("BOXLITE_BOX_ID")).and_then(|v| *v),
            Some(OsStr::new("test-box"))
        );
        assert_eq!(
            envs.get(OsStr::new("TMPDIR")).and_then(|v| *v),
            Some(expected.as_os_str())
        );
        assert_eq!(
            envs.get(OsStr::new("TMP")).and_then(|v| *v),
            Some(expected.as_os_str())
        );
        assert_eq!(
            envs.get(OsStr::new("TEMP")).and_then(|v| *v),
            Some(expected.as_os_str())
        );
    }

    #[test]
    fn test_configure_env_does_not_override_temp_for_custom_profile() {
        use crate::runtime::advanced_options::{AdvancedBoxOptions, SecurityOptions};
        use crate::runtime::layout::{BoxFilesystemLayout, FsLayoutConfig};
        use std::path::PathBuf;

        let layout = BoxFilesystemLayout::new(
            PathBuf::from("/tmp/box"),
            FsLayoutConfig::without_bind_mount(),
            false,
        );
        let options = BoxOptions {
            advanced: AdvancedBoxOptions {
                security: SecurityOptions {
                    jailer_enabled: true,
                    sandbox_profile: Some(PathBuf::from("/tmp/custom.sbpl")),
                    ..SecurityOptions::default()
                },
                ..AdvancedBoxOptions::default()
            },
            ..BoxOptions::default()
        };

        let spawner = ShimSpawner::new(
            Path::new("/usr/bin/boxlite-shim"),
            &layout,
            "test-box",
            &options,
        );

        let mut cmd = std::process::Command::new("/usr/bin/true");
        spawner.configure_env(&mut cmd);

        let envs: std::collections::HashMap<_, _> = cmd.get_envs().collect();
        assert!(!envs.contains_key(OsStr::new("TMPDIR")));
        assert!(!envs.contains_key(OsStr::new("TMP")));
        assert!(!envs.contains_key(OsStr::new("TEMP")));
    }

    /// Detached spawn must produce a child that is its own session
    /// leader. Without `setsid`, a SIGHUP to the parent's controlling
    /// terminal cascades into the daemon — breaking detach.
    ///
    /// Revert procedure: comment out the
    /// `.with_detach(detach)` builder call in `spawn()`.
    /// This test must then fail with `child_sid == parent_sid`.
    #[cfg(unix)]
    #[test]
    fn shim_spawner_detached_creates_new_session() {
        use crate::runtime::advanced_options::SecurityOptions;
        use crate::runtime::layout::{BoxFilesystemLayout, FsLayoutConfig};
        use std::time::Duration;
        use tempfile::TempDir;

        let parent_sid = unsafe { libc::getsid(0) };

        let tmp = TempDir::new_in("/tmp").expect("tempdir");
        let box_dir = tmp.path().join("box");
        std::fs::create_dir_all(&box_dir).expect("mkdir box");
        let layout = BoxFilesystemLayout::new(box_dir, FsLayoutConfig::without_bind_mount(), false);
        // Disable jailer: on macOS the default wraps the child in
        // sandbox-exec, which would block the `/usr/bin/yes` stand-in.
        // The setsid pre_exec hook is unaffected by sandbox state.
        let mut options = BoxOptions::default();
        options.advanced.security = SecurityOptions::development();
        let spawner = ShimSpawner::new(
            std::path::Path::new("/usr/bin/yes"),
            &layout,
            "shimspawnertest",
            &options,
        );

        let spawned = spawner.spawn("", true).expect("spawn detached");
        let pid = spawned.child.id();

        std::thread::sleep(Duration::from_millis(100));
        let child_sid = unsafe { libc::getsid(pid as i32) };

        unsafe {
            libc::kill(pid as i32, libc::SIGKILL);
            libc::waitpid(pid as i32, std::ptr::null_mut(), 0);
        }

        assert_eq!(
            child_sid, pid as i32,
            "detached ShimSpawner::spawn must produce a session-leader child. \
             Got sid={child_sid}, expected {pid}. parent_sid={parent_sid}. \
             Without setsid, a SIGHUP to the parent's controlling terminal \
             would cascade into the detached shim."
        );
        assert_ne!(
            child_sid, parent_sid,
            "shim's session id must differ from parent's"
        );
    }

    /// Non-detached spawn must produce a child that is its own
    /// process-group leader so `killpg(shim_pid, SIGKILL)` reaps the
    /// shim + grandchildren (libkrun threads, gvproxy) atomically.
    ///
    /// Revert procedure: comment out the
    /// `.with_detach(detach)` builder call in `spawn()`.
    /// This test must then fail with `child_pgid == parent_pgid`.
    #[cfg(unix)]
    #[test]
    fn shim_spawner_non_detached_creates_new_pgroup() {
        use crate::runtime::advanced_options::SecurityOptions;
        use crate::runtime::layout::{BoxFilesystemLayout, FsLayoutConfig};
        use std::time::Duration;
        use tempfile::TempDir;

        let parent_pgid = unsafe { libc::getpgid(0) };

        let tmp = TempDir::new_in("/tmp").expect("tempdir");
        let box_dir = tmp.path().join("box");
        std::fs::create_dir_all(&box_dir).expect("mkdir box");
        let layout = BoxFilesystemLayout::new(box_dir, FsLayoutConfig::without_bind_mount(), false);
        let mut options = BoxOptions::default();
        options.advanced.security = SecurityOptions::development();
        let spawner = ShimSpawner::new(
            std::path::Path::new("/usr/bin/yes"),
            &layout,
            "shimspawnertest",
            &options,
        );

        let spawned = spawner.spawn("", false).expect("spawn non-detached");
        let pid = spawned.child.id();

        std::thread::sleep(Duration::from_millis(100));
        let child_pgid = unsafe { libc::getpgid(pid as i32) };

        unsafe {
            libc::kill(pid as i32, libc::SIGKILL);
            libc::waitpid(pid as i32, std::ptr::null_mut(), 0);
        }

        assert_eq!(
            child_pgid, pid as i32,
            "non-detached ShimSpawner::spawn must produce a pgroup-leader child. \
             Got pgid={child_pgid}, expected {pid}. parent_pgid={parent_pgid}. \
             Without process_group(0), killpg(shim_pid) would target the \
             parent's pgroup."
        );
        assert_ne!(
            child_pgid, parent_pgid,
            "shim's pgid must differ from parent's"
        );
    }
}