1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
//! BwrapSandbox — Linux isolation via bubblewrap.
//!
//! Implements the [`Sandbox`] trait using bubblewrap (bwrap) for
//! namespace isolation, bind mounts, and environment sanitization.
use super::{Sandbox, SandboxContext};
use crate::jailer::{bwrap, cgroup};
use boxlite_shared::errors::{BoxliteError, BoxliteResult};
use std::process::Command;
/// Linux sandbox using bubblewrap for namespace isolation.
#[derive(Debug)]
pub struct BwrapSandbox;
impl BwrapSandbox {
pub fn new() -> Self {
Self
}
}
impl Default for BwrapSandbox {
fn default() -> Self {
Self::new()
}
}
impl Sandbox for BwrapSandbox {
fn is_available(&self) -> bool {
bwrap::is_available()
}
fn setup(&self, ctx: &SandboxContext) -> BoxliteResult<()> {
// Preflight: verify bwrap can create user namespaces before proceeding.
if bwrap::is_available()
&& let Err(diagnostic) = bwrap::can_create_user_namespace()
{
return Err(BoxliteError::Config(format!(
"Sandbox preflight failed: bwrap cannot create user namespaces.\n\n\
{diagnostic}\n\n\
To skip the sandbox (development only):\n \
SecurityOptions::disabled()"
)));
}
let cgroup_config = cgroup::CgroupConfig::from(ctx.resource_limits);
match cgroup::setup_cgroup(ctx.id, &cgroup_config) {
Ok(path) => {
tracing::info!(id = %ctx.id, path = %path.display(), "Cgroup created");
}
Err(e) => {
tracing::warn!(id = %ctx.id, error = %e,
"Cgroup setup failed (continuing without cgroup limits)");
}
}
Ok(())
}
fn apply(&self, ctx: &SandboxContext, cmd: &mut Command) {
let binary = cmd.get_program().to_owned();
let args: Vec<String> = cmd
.get_args()
.map(|a| a.to_string_lossy().into_owned())
.collect();
let mut bwrap_cmd = bwrap::BwrapCommand::new();
// =====================================================================
// Namespace and session isolation
// =====================================================================
bwrap_cmd.with_default_namespaces();
// A detached box (`run -d`) must outlive the launching process: bwrap's
// --die-with-parent (PR_SET_PDEATHSIG) would otherwise kill the shim/VM
// the instant the launcher returns, so the box is born Stopped. Only
// foreground boxes — which should die with their launcher — get it.
if !ctx.detached {
bwrap_cmd.with_die_with_parent();
}
bwrap_cmd.with_new_session();
// =====================================================================
// System directories (read-only)
// =====================================================================
bwrap_cmd
.ro_bind_if_exists("/usr", "/usr")
.ro_bind_if_exists("/lib", "/lib")
.ro_bind_if_exists("/lib64", "/lib64")
.ro_bind_if_exists("/bin", "/bin")
.ro_bind_if_exists("/sbin", "/sbin")
// DNS resolver config: gvproxy resolves `allow_net` hostnames
// host-side (it runs in this shim) via the Go resolver, which reads
// these. Without them the sandbox has no /etc/resolv.conf, every
// lookup in buildAllowNetDNSZones fails, and allow-listed hosts
// sinkhole to 0.0.0.0 — the allowlist silently blocks everything
// whenever the jailer is enabled (#645).
.ro_bind_if_exists("/etc/resolv.conf", "/etc/resolv.conf")
.ro_bind_if_exists("/etc/hosts", "/etc/hosts")
.ro_bind_if_exists("/etc/nsswitch.conf", "/etc/nsswitch.conf");
// =====================================================================
// Devices and special mounts
// =====================================================================
bwrap_cmd
.with_dev()
.dev_bind_if_exists("/dev/kvm", "/dev/kvm")
.dev_bind_if_exists("/dev/net/tun", "/dev/net/tun")
.with_proc()
.tmpfs("/tmp");
// =====================================================================
// Bind all pre-computed paths (system dirs + user volumes)
// =====================================================================
for pa in ctx.writable_paths() {
bwrap_cmd.bind(&pa.path, &pa.path);
tracing::debug!(path = %pa.path.display(), "bwrap: bind (rw)");
}
for pa in ctx.readonly_paths() {
bwrap_cmd.ro_bind(&pa.path, &pa.path);
tracing::debug!(path = %pa.path.display(), "bwrap: ro-bind");
}
// =====================================================================
// Environment sanitization
// =====================================================================
// The statically-linked shim dlopen's libkrunfw via LD_LIBRARY_PATH (its
// `$ORIGIN` rpath is ineffective), and `--clearenv` wipes it — without
// this the VM fails to start ("Couldn't find or load libkrunfw.so.5",
// libkrun status=-2). Point it at the shim's own directory (`<box>/bin`),
// which is bound into the sandbox and is exactly where `copy_libkrunfw`
// placed the library the shim loads.
let shim_dir = std::path::Path::new(&binary)
.parent()
.map(|dir| dir.to_string_lossy().into_owned())
.unwrap_or_default();
bwrap_cmd
.with_clearenv()
.setenv("PATH", "/usr/bin:/bin:/usr/sbin:/sbin")
.setenv("HOME", "/root")
.setenv("LD_LIBRARY_PATH", shim_dir);
// Preserve debugging environment variables
if let Ok(rust_log) = std::env::var("RUST_LOG") {
bwrap_cmd.setenv("RUST_LOG", rust_log);
}
if let Ok(rust_backtrace) = std::env::var("RUST_BACKTRACE") {
bwrap_cmd.setenv("RUST_BACKTRACE", rust_backtrace);
}
bwrap_cmd.chdir("/");
// Replace the command with bwrap-wrapped version.
*cmd = bwrap_cmd.build(std::path::Path::new(&binary), &args);
// Add cgroup join as a pre_exec hook (async-signal-safe).
if let Some(cgroup_procs) = cgroup::build_cgroup_procs_path(ctx.id) {
use std::os::unix::process::CommandExt;
unsafe {
cmd.pre_exec(move || {
let _ = cgroup::add_self_to_cgroup_raw(&cgroup_procs);
Ok(())
});
}
}
}
fn name(&self) -> &'static str {
"bwrap"
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::runtime::advanced_options::ResourceLimits;
/// The shim is statically linked, so libkrun's `dlopen` of `libkrunfw.so.5`
/// can only be satisfied via `LD_LIBRARY_PATH` inside the `--clearenv`
/// sandbox — the shim's `$ORIGIN` rpath is absent and the inherited
/// `LD_LIBRARY_PATH` is wiped by `--clearenv`. Without this the VM fails to
/// start ("Couldn't find or load libkrunfw.so.5", libkrun status=-2). This
/// guards the env var the composable `apply()` dropped relative to the
/// legacy `build_shim_command`.
#[test]
fn apply_sets_ld_library_path_to_shim_dir() {
if !bwrap::is_available() {
eprintln!("skipping apply_sets_ld_library_path_to_shim_dir: bwrap not available");
return;
}
let limits = Box::leak(Box::new(ResourceLimits::default()));
let ctx = SandboxContext {
id: "test-box",
paths: vec![],
resource_limits: limits,
network_enabled: false,
sandbox_profile: None,
detached: false,
};
let shim = "/var/lib/boxlite/boxes/abc/bin/boxlite-shim";
let mut cmd = Command::new(shim);
BwrapSandbox::new().apply(&ctx, &mut cmd);
let args: Vec<String> = cmd
.get_args()
.map(|a| a.to_string_lossy().into_owned())
.collect();
let pos = args
.windows(3)
.position(|w| w[0] == "--setenv" && w[1] == "LD_LIBRARY_PATH")
.expect("bwrap must --setenv LD_LIBRARY_PATH so the static shim can dlopen libkrunfw");
assert_eq!(
args[pos + 2],
"/var/lib/boxlite/boxes/abc/bin",
"LD_LIBRARY_PATH must point at the shim's own directory (where libkrunfw is copied)"
);
}
/// A detached box must outlive the launcher, so it must NOT get bwrap's
/// `--die-with-parent` (PR_SET_PDEATHSIG kills the shim/VM the instant
/// `run -d` returns, leaving the box born-Stopped). Foreground boxes keep it
/// so they die with their launcher.
#[test]
fn apply_sets_die_with_parent_only_for_foreground() {
if !bwrap::is_available() {
eprintln!(
"skipping apply_sets_die_with_parent_only_for_foreground: bwrap not available"
);
return;
}
fn has_die_with_parent(detached: bool) -> bool {
let limits = Box::leak(Box::new(ResourceLimits::default()));
let ctx = SandboxContext {
id: "test-box",
paths: vec![],
resource_limits: limits,
network_enabled: false,
sandbox_profile: None,
detached,
};
let mut cmd = Command::new("/var/lib/boxlite/boxes/abc/bin/boxlite-shim");
BwrapSandbox::new().apply(&ctx, &mut cmd);
cmd.get_args().any(|a| a == "--die-with-parent")
}
assert!(
has_die_with_parent(false),
"foreground box must get --die-with-parent so it dies with its launcher"
);
assert!(
!has_die_with_parent(true),
"detached box must not get --die-with-parent or it is killed when run -d returns"
);
}
}