Skip to main content

evalbox_sandbox/isolation/
lockdown.rs

1//! Security lockdown for sandboxed processes.
2//!
3//! Applies all security restrictions to the child process.
4//! The order of operations is critical for security:
5//!
6//! 0. **`NO_NEW_PRIVS`** - Required before Landlock and seccomp
7//! 1. **Landlock v5** - Filesystem, network, signal, and IPC access control
8//! 2. **Rlimits** - Resource limits (memory, CPU, files, processes)
9//! 3. **Securebits** - Lock capability state permanently
10//! 4. **Capabilities** - Drop all capabilities
11//!
12//! Note: Seccomp filters and fd closing are handled separately in `child_process()`
13//! because the notify filter must return a listener fd that gets sent to the parent.
14//!
15//! After lockdown, the process cannot:
16//! - Access files outside allowed paths
17//! - Make network connections (if network blocked, requires Landlock ABI 4+)
18//! - Send signals to processes outside the sandbox (Landlock ABI 5+)
19//! - Connect to abstract unix sockets outside the sandbox (Landlock ABI 5+)
20//! - Exceed resource limits
21//! - Gain new privileges
22
23use std::ffi::CString;
24use std::os::fd::{AsRawFd, FromRawFd, OwnedFd};
25use std::os::unix::ffi::OsStrExt;
26use std::path::Path;
27
28use evalbox_sys::landlock::{
29    self, LANDLOCK_ACCESS_FS_EXECUTE, LANDLOCK_ACCESS_FS_MAKE_DIR, LANDLOCK_ACCESS_FS_MAKE_FIFO,
30    LANDLOCK_ACCESS_FS_MAKE_REG, LANDLOCK_ACCESS_FS_MAKE_SYM, LANDLOCK_ACCESS_FS_READ_DIR,
31    LANDLOCK_ACCESS_FS_READ_FILE, LANDLOCK_ACCESS_FS_REMOVE_DIR, LANDLOCK_ACCESS_FS_REMOVE_FILE,
32    LANDLOCK_ACCESS_FS_TRUNCATE, LANDLOCK_ACCESS_FS_WRITE_FILE, LandlockPathBeneathAttr,
33    LandlockRulesetAttr, fs_access_for_abi, landlock_add_rule_path, landlock_create_ruleset,
34    landlock_restrict_self, net_access_for_abi, scope_for_abi,
35};
36use evalbox_sys::last_errno;
37use rustix::io::Errno;
38use thiserror::Error;
39
40use super::rlimits::apply_rlimits;
41use crate::plan::Plan;
42
43/// Error during security lockdown.
44#[derive(Debug, Error)]
45pub enum LockdownError {
46    #[error("landlock: {0}")]
47    Landlock(Errno),
48
49    #[error("seccomp: {0}")]
50    Seccomp(Errno),
51
52    #[error("rlimit: {0}")]
53    Rlimit(Errno),
54
55    #[error("capability: {0}")]
56    Capability(Errno),
57
58    #[error("securebits: {0}")]
59    Securebits(Errno),
60}
61
62/// Apply security lockdown to the current process.
63///
64/// `workspace_root` is the real absolute path to the workspace directory
65/// (no `pivot_root`, so we use real paths).
66///
67/// `extra_readonly_paths` are additional paths that should be readable
68/// (e.g., resolved binary mount paths).
69pub fn lockdown(
70    plan: &Plan,
71    workspace_root: &Path,
72    extra_readonly_paths: &[&str],
73) -> Result<(), LockdownError> {
74    // NO_NEW_PRIVS must be set before landlock_restrict_self and seccomp.
75    set_no_new_privs()?;
76    apply_landlock_v5(plan, workspace_root, extra_readonly_paths)?;
77    apply_rlimits(plan).map_err(LockdownError::Rlimit)?;
78    apply_securebits()?;
79    drop_all_caps()?;
80    Ok(())
81}
82
83fn apply_landlock_v5(
84    plan: &Plan,
85    workspace_root: &Path,
86    extra_readonly_paths: &[&str],
87) -> Result<(), LockdownError> {
88    let abi = match landlock::landlock_abi_version() {
89        Ok(v) => v,
90        Err(_) => return Ok(()), // Landlock not available
91    };
92
93    if abi < 5 {
94        eprintln!("warning: landlock ABI {abi} < 5, signal/IPC scoping unavailable");
95    }
96
97    let fs_access = fs_access_for_abi(abi);
98    let net_access = if plan.network_blocked && abi >= 4 {
99        net_access_for_abi(abi)
100    } else {
101        0
102    };
103    let scoped = scope_for_abi(abi);
104
105    let attr = LandlockRulesetAttr {
106        handled_access_fs: fs_access,
107        handled_access_net: net_access,
108        scoped,
109    };
110    let ruleset_fd = landlock_create_ruleset(&attr).map_err(LockdownError::Landlock)?;
111
112    let read_access =
113        LANDLOCK_ACCESS_FS_EXECUTE | LANDLOCK_ACCESS_FS_READ_FILE | LANDLOCK_ACCESS_FS_READ_DIR;
114    let write_access = read_access
115        | LANDLOCK_ACCESS_FS_WRITE_FILE
116        | LANDLOCK_ACCESS_FS_MAKE_REG
117        | LANDLOCK_ACCESS_FS_MAKE_DIR
118        | LANDLOCK_ACCESS_FS_MAKE_SYM
119        | LANDLOCK_ACCESS_FS_MAKE_FIFO
120        | LANDLOCK_ACCESS_FS_REMOVE_FILE
121        | LANDLOCK_ACCESS_FS_REMOVE_DIR
122        | LANDLOCK_ACCESS_FS_TRUNCATE;
123
124    // Read-only mounts from plan (system paths computed by evalbox or user-specified)
125    for mount in &plan.mounts {
126        if !mount.writable {
127            let access = if mount.executable {
128                read_access
129            } else {
130                read_access & !LANDLOCK_ACCESS_FS_EXECUTE
131            };
132            add_path_rule(&ruleset_fd, &mount.source, access);
133        }
134    }
135
136    // Extra readonly paths (resolved binary mounts)
137    for path in extra_readonly_paths {
138        add_path_rule(&ruleset_fd, path, read_access);
139    }
140
141    // Writable workspace paths (real absolute paths, no pivot_root)
142    add_path_rule(&ruleset_fd, workspace_root.join("work"), write_access);
143    add_path_rule(&ruleset_fd, workspace_root.join("tmp"), write_access);
144    add_path_rule(&ruleset_fd, workspace_root.join("home"), write_access);
145
146    // System paths (read-only with execute)
147    for path in ["/usr", "/bin", "/lib", "/lib64", "/etc"] {
148        add_path_rule(&ruleset_fd, path, read_access);
149    }
150
151    // NixOS store
152    if Path::new("/nix/store").exists() {
153        add_path_rule(&ruleset_fd, "/nix/store", read_access);
154    }
155    if Path::new("/run/current-system").exists() {
156        add_path_rule(&ruleset_fd, "/run/current-system", read_access);
157    }
158
159    // Proc (read-only)
160    add_path_rule(
161        &ruleset_fd,
162        "/proc",
163        read_access & !LANDLOCK_ACCESS_FS_EXECUTE,
164    );
165
166    // Dev (read + write for /dev/null etc.)
167    add_path_rule(
168        &ruleset_fd,
169        "/dev",
170        (read_access & !LANDLOCK_ACCESS_FS_EXECUTE) | LANDLOCK_ACCESS_FS_WRITE_FILE,
171    );
172
173    landlock_restrict_self(&ruleset_fd).map_err(LockdownError::Landlock)
174}
175
176/// Add a path rule to the Landlock ruleset.
177///
178/// Errors are logged but not propagated - the path simply won't be
179/// accessible in the sandbox. Missing paths (like /nix/store on non-NixOS)
180/// should not prevent sandbox creation.
181fn add_path_rule(ruleset_fd: &OwnedFd, path: impl AsRef<Path>, access: u64) {
182    let path = path.as_ref();
183    let fd = match open_path(path) {
184        Ok(fd) => fd,
185        Err(_) => return, // Path doesn't exist, skip silently
186    };
187
188    let rule = LandlockPathBeneathAttr {
189        allowed_access: access,
190        parent_fd: fd.as_raw_fd(),
191    };
192    if let Err(e) = landlock_add_rule_path(ruleset_fd, &rule) {
193        eprintln!("warning: landlock rule for {path:?} failed: {e}");
194    }
195}
196
197#[inline]
198fn open_path(path: impl AsRef<Path>) -> Result<OwnedFd, Errno> {
199    let path_c = CString::new(path.as_ref().as_os_str().as_bytes()).map_err(|_| Errno::INVAL)?;
200    let fd = unsafe { libc::open(path_c.as_ptr(), libc::O_PATH | libc::O_CLOEXEC) };
201    if fd < 0 {
202        Err(last_errno())
203    } else {
204        Ok(unsafe { OwnedFd::from_raw_fd(fd) })
205    }
206}
207
208// Securebits constants (from <linux/securebits.h>)
209const SECBIT_NOROOT: u64 = 1 << 0;
210const SECBIT_NOROOT_LOCKED: u64 = 1 << 1;
211const SECBIT_NO_SETUID_FIXUP: u64 = 1 << 2;
212const SECBIT_NO_SETUID_FIXUP_LOCKED: u64 = 1 << 3;
213const SECBIT_KEEP_CAPS_LOCKED: u64 = 1 << 5;
214const SECBIT_NO_CAP_AMBIENT_RAISE: u64 = 1 << 6;
215const SECBIT_NO_CAP_AMBIENT_RAISE_LOCKED: u64 = 1 << 7;
216
217/// Apply securebits to lock capability state permanently.
218///
219/// This prevents the process from ever regaining capabilities through
220/// any mechanism (exec of setuid, ambient capabilities, etc.).
221fn apply_securebits() -> Result<(), LockdownError> {
222    let bits = SECBIT_NOROOT
223        | SECBIT_NOROOT_LOCKED
224        | SECBIT_NO_SETUID_FIXUP
225        | SECBIT_NO_SETUID_FIXUP_LOCKED
226        | SECBIT_KEEP_CAPS_LOCKED
227        | SECBIT_NO_CAP_AMBIENT_RAISE
228        | SECBIT_NO_CAP_AMBIENT_RAISE_LOCKED;
229
230    let ret = unsafe { libc::prctl(libc::PR_SET_SECUREBITS, bits, 0, 0, 0) };
231    if ret != 0 {
232        // Not fatal — securebits may require capabilities we don't have.
233        // The important thing is NO_NEW_PRIVS + dropping all caps.
234        eprintln!("warning: PR_SET_SECUREBITS failed: {}", last_errno());
235    }
236    Ok(())
237}
238
239/// Set `PR_SET_NO_NEW_PRIVS` — required before `landlock_restrict_self` and seccomp.
240fn set_no_new_privs() -> Result<(), LockdownError> {
241    let ret = unsafe { libc::prctl(libc::PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) };
242    if ret != 0 {
243        Err(LockdownError::Capability(last_errno()))
244    } else {
245        Ok(())
246    }
247}
248
249fn drop_all_caps() -> Result<(), LockdownError> {
250    unsafe {
251        libc::prctl(
252            libc::PR_CAP_AMBIENT,
253            libc::PR_CAP_AMBIENT_CLEAR_ALL,
254            0,
255            0,
256            0,
257        );
258        for cap in 0..64 {
259            libc::prctl(libc::PR_CAPBSET_DROP, cap, 0, 0, 0);
260        }
261    }
262    Ok(())
263}
264
265/// Close all file descriptors > 2 using `close_range` syscall.
266///
267/// This is called separately from lockdown because it must happen after
268/// seccomp filter installation and listener fd transfer.
269pub fn close_extra_fds() {
270    // close_range(3, MAX, 0) — close all fds from 3 to MAX
271    unsafe {
272        libc::syscall(libc::SYS_close_range, 3u32, u32::MAX, 0u32);
273    }
274}
275
276#[cfg(test)]
277mod tests {
278    use super::*;
279
280    #[test]
281    fn open_path_valid() {
282        assert!(open_path("/tmp").is_ok());
283    }
284}