use crate::capability::{AccessMode, CapabilitySet, NetworkMode};
use crate::error::{NonoError, Result};
use crate::sandbox::SupportInfo;
use landlock::{
Access, AccessFs, AccessNet, BitFlags, CompatLevel, Compatible, NetPort, PathBeneath, PathFd,
Ruleset, RulesetAttr, RulesetCreatedAttr, ABI,
};
use tracing::{debug, info};
const TARGET_ABI: ABI = ABI::V5;
pub fn is_supported() -> bool {
Ruleset::default()
.handle_access(AccessFs::from_all(TARGET_ABI))
.and_then(|r| r.create())
.is_ok()
}
pub fn support_info() -> SupportInfo {
match Ruleset::default()
.handle_access(AccessFs::from_all(TARGET_ABI))
.and_then(|r| r.create())
{
Ok(_) => SupportInfo {
is_supported: true,
platform: "linux",
details: format!("Landlock available (targeting ABI v{:?})", TARGET_ABI),
},
Err(_) => SupportInfo {
is_supported: false,
platform: "linux",
details: "Landlock not available. Requires Linux kernel 5.13+ with Landlock enabled."
.to_string(),
},
}
}
fn access_to_landlock(access: AccessMode, _abi: ABI) -> BitFlags<AccessFs> {
match access {
AccessMode::Read => AccessFs::ReadFile | AccessFs::ReadDir | AccessFs::Execute,
AccessMode::Write => {
AccessFs::WriteFile
| AccessFs::MakeChar
| AccessFs::MakeDir
| AccessFs::MakeReg
| AccessFs::MakeSock
| AccessFs::MakeFifo
| AccessFs::MakeBlock
| AccessFs::MakeSym
| AccessFs::RemoveFile
| AccessFs::RemoveDir
| AccessFs::Refer
| AccessFs::Truncate
}
AccessMode::ReadWrite => {
access_to_landlock(AccessMode::Read, _abi) | access_to_landlock(AccessMode::Write, _abi)
}
}
}
pub fn apply(caps: &CapabilitySet) -> Result<()> {
info!("Using Landlock ABI {:?}", TARGET_ABI);
let handled_fs = AccessFs::from_all(TARGET_ABI);
debug!("Handling filesystem access: {:?}", handled_fs);
let ruleset_builder = Ruleset::default()
.handle_access(handled_fs)
.map_err(|e| NonoError::SandboxInit(format!("Failed to handle fs access: {}", e)))?;
let needs_network_handling = !matches!(caps.network_mode(), NetworkMode::AllowAll)
|| !caps.tcp_connect_ports().is_empty()
|| !caps.tcp_bind_ports().is_empty();
let ruleset_builder = if needs_network_handling {
let handled_net = AccessNet::from_all(TARGET_ABI);
if !handled_net.is_empty() {
debug!("Handling network access: {:?}", handled_net);
ruleset_builder
.set_compatibility(CompatLevel::HardRequirement)
.handle_access(handled_net)
.map_err(|e| {
NonoError::SandboxInit(format!(
"Network filtering requested but unsupported by this kernel: {}",
e
))
})?
.set_compatibility(CompatLevel::BestEffort)
} else {
return Err(NonoError::SandboxInit(
"Network filtering requested but kernel Landlock ABI doesn't support it \
(requires V4+). Refusing to start without network restrictions."
.to_string(),
));
}
} else {
ruleset_builder
};
let mut ruleset = ruleset_builder
.create()
.map_err(|e| NonoError::SandboxInit(format!("Failed to create ruleset: {}", e)))?;
if let NetworkMode::ProxyOnly { port, bind_ports } = caps.network_mode() {
debug!("Adding ProxyOnly TCP connect rule for port {}", port);
ruleset = ruleset
.add_rule(NetPort::new(*port, AccessNet::ConnectTcp))
.map_err(|e| {
NonoError::SandboxInit(format!(
"Cannot add TCP connect rule for proxy port {}: {}",
port, e
))
})?;
for bp in bind_ports {
debug!("Adding ProxyOnly TCP bind rule for port {}", bp);
ruleset = ruleset
.add_rule(NetPort::new(*bp, AccessNet::BindTcp))
.map_err(|e| {
NonoError::SandboxInit(format!(
"Cannot add TCP bind rule for port {}: {}",
bp, e
))
})?;
}
}
for port in caps.tcp_connect_ports() {
debug!("Adding TCP connect rule for port {}", port);
ruleset = ruleset
.add_rule(NetPort::new(*port, AccessNet::ConnectTcp))
.map_err(|e| {
NonoError::SandboxInit(format!(
"Cannot add TCP connect rule for port {}: {}",
port, e
))
})?;
}
for port in caps.tcp_bind_ports() {
debug!("Adding TCP bind rule for port {}", port);
ruleset = ruleset
.add_rule(NetPort::new(*port, AccessNet::BindTcp))
.map_err(|e| {
NonoError::SandboxInit(format!("Cannot add TCP bind rule for port {}: {}", port, e))
})?;
}
if !matches!(caps.network_mode(), NetworkMode::AllowAll) {
for port in caps.localhost_ports() {
debug!("Adding localhost TCP connect rule for port {}", port);
ruleset = ruleset
.add_rule(NetPort::new(*port, AccessNet::ConnectTcp))
.map_err(|e| {
NonoError::SandboxInit(format!(
"Cannot add TCP connect rule for localhost port {}: {}",
port, e
))
})?;
debug!("Adding localhost TCP bind rule for port {}", port);
ruleset = ruleset
.add_rule(NetPort::new(*port, AccessNet::BindTcp))
.map_err(|e| {
NonoError::SandboxInit(format!(
"Cannot add TCP bind rule for localhost port {}: {}",
port, e
))
})?;
}
}
for cap in caps.fs_capabilities() {
let access = access_to_landlock(cap.access, TARGET_ABI);
debug!(
"Adding rule: {} with access {:?}",
cap.resolved.display(),
access
);
let path_fd = PathFd::new(&cap.resolved)?;
ruleset = ruleset
.add_rule(PathBeneath::new(path_fd, access))
.map_err(|e| {
NonoError::SandboxInit(format!(
"Cannot add Landlock rule for {}: {} (filesystem may not support Landlock)",
cap.resolved.display(),
e
))
})?;
}
let status = ruleset
.restrict_self()
.map_err(|e| NonoError::SandboxInit(format!("Failed to restrict self: {}", e)))?;
match status.ruleset {
landlock::RulesetStatus::FullyEnforced => {
info!("Landlock sandbox fully enforced");
}
landlock::RulesetStatus::PartiallyEnforced => {
debug!("Landlock sandbox enforced in best-effort mode (partially enforced)");
}
landlock::RulesetStatus::NotEnforced => {
return Err(NonoError::SandboxInit(
"Landlock sandbox was not enforced".to_string(),
));
}
}
Ok(())
}
#[repr(C)]
#[derive(Debug, Clone)]
pub struct SeccompNotif {
pub id: u64,
pub pid: u32,
pub flags: u32,
pub data: SeccompData,
}
#[repr(C)]
#[derive(Debug, Clone, Default)]
pub struct SeccompData {
pub nr: i32,
pub arch: u32,
pub instruction_pointer: u64,
pub args: [u64; 6],
}
#[repr(C)]
#[derive(Debug)]
struct SeccompNotifResp {
id: u64,
val: i64,
error: i32,
flags: u32,
}
#[repr(C)]
#[derive(Debug)]
struct SeccompNotifAddfd {
id: u64,
flags: u32,
srcfd: u32,
newfd: u32,
newfd_flags: u32,
}
const SECCOMP_SET_MODE_FILTER: libc::c_uint = 1;
const SECCOMP_FILTER_FLAG_NEW_LISTENER: libc::c_uint = 1 << 3;
const SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV: libc::c_uint = 1 << 4;
const SECCOMP_IOCTL_NOTIF_RECV: libc::c_ulong = 0xc0502100;
const SECCOMP_IOCTL_NOTIF_SEND: libc::c_ulong = 0xc0182101;
const SECCOMP_IOCTL_NOTIF_ID_VALID: libc::c_ulong = 0x40082102;
const SECCOMP_IOCTL_NOTIF_ADDFD: libc::c_ulong = 0x40182103;
const SECCOMP_ADDFD_FLAG_SEND: u32 = 1 << 1;
const BPF_LD: u16 = 0x00;
const BPF_W: u16 = 0x00;
const BPF_ABS: u16 = 0x20;
const BPF_JMP: u16 = 0x05;
const BPF_JEQ: u16 = 0x10;
const BPF_K: u16 = 0x00;
const BPF_RET: u16 = 0x06;
const SECCOMP_RET_USER_NOTIF: u32 = 0x7fc0_0000;
const SECCOMP_RET_ALLOW: u32 = 0x7fff_0000;
#[cfg(target_arch = "x86_64")]
pub const SYS_OPENAT: i32 = 257;
#[cfg(target_arch = "x86_64")]
pub const SYS_OPENAT2: i32 = 437;
#[cfg(target_arch = "aarch64")]
pub const SYS_OPENAT: i32 = 56;
#[cfg(target_arch = "aarch64")]
pub const SYS_OPENAT2: i32 = 437;
#[repr(C)]
#[derive(Debug, Clone, Default)]
pub struct OpenHow {
pub flags: u64,
pub mode: u64,
pub resolve: u64,
}
#[must_use]
pub fn classify_access_from_flags(flags: i32) -> crate::AccessMode {
match flags & libc::O_ACCMODE {
libc::O_RDONLY => crate::AccessMode::Read,
libc::O_WRONLY => crate::AccessMode::Write,
_ => crate::AccessMode::ReadWrite,
}
}
const OPENAT2_HOW_SIZE_MAX: usize = 4096;
#[must_use]
pub fn validate_openat2_size(how_size: usize) -> bool {
let min_size = std::mem::size_of::<OpenHow>();
how_size >= min_size && how_size <= OPENAT2_HOW_SIZE_MAX
}
const SECCOMP_DATA_NR_OFFSET: u32 = 0;
#[repr(C)]
#[derive(Debug, Clone, Copy)]
struct SockFilterInsn {
code: u16,
jt: u8,
jf: u8,
k: u32,
}
#[repr(C)]
struct SockFprog {
len: u16,
filter: *const SockFilterInsn,
}
pub fn install_seccomp_notify() -> Result<std::os::fd::OwnedFd> {
use std::os::fd::FromRawFd;
let filter = [
SockFilterInsn {
code: BPF_LD | BPF_W | BPF_ABS,
jt: 0,
jf: 0,
k: SECCOMP_DATA_NR_OFFSET,
},
SockFilterInsn {
code: BPF_JMP | BPF_JEQ | BPF_K,
jt: 2, jf: 0,
k: SYS_OPENAT as u32,
},
SockFilterInsn {
code: BPF_JMP | BPF_JEQ | BPF_K,
jt: 1, jf: 0,
k: SYS_OPENAT2 as u32,
},
SockFilterInsn {
code: BPF_RET | BPF_K,
jt: 0,
jf: 0,
k: SECCOMP_RET_ALLOW,
},
SockFilterInsn {
code: BPF_RET | BPF_K,
jt: 0,
jf: 0,
k: SECCOMP_RET_USER_NOTIF,
},
];
let prog = SockFprog {
len: filter.len() as u16,
filter: filter.as_ptr(),
};
let ret = unsafe { libc::prctl(libc::PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) };
if ret != 0 {
return Err(NonoError::SandboxInit(format!(
"prctl(PR_SET_NO_NEW_PRIVS) failed: {}",
std::io::Error::last_os_error()
)));
}
let flags = SECCOMP_FILTER_FLAG_NEW_LISTENER | SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV;
let ret = unsafe {
libc::syscall(
libc::SYS_seccomp,
SECCOMP_SET_MODE_FILTER,
flags,
&prog as *const SockFprog,
)
};
let notify_fd = if ret < 0 {
let flags = SECCOMP_FILTER_FLAG_NEW_LISTENER;
let ret = unsafe {
libc::syscall(
libc::SYS_seccomp,
SECCOMP_SET_MODE_FILTER,
flags,
&prog as *const SockFprog,
)
};
if ret < 0 {
return Err(NonoError::SandboxInit(format!(
"seccomp(SECCOMP_SET_MODE_FILTER) failed: {}. \
Requires kernel >= 5.0 with SECCOMP_FILTER_FLAG_NEW_LISTENER.",
std::io::Error::last_os_error()
)));
}
ret as i32
} else {
ret as i32
};
Ok(unsafe { std::os::fd::OwnedFd::from_raw_fd(notify_fd) })
}
pub fn recv_notif(notify_fd: std::os::fd::RawFd) -> Result<SeccompNotif> {
let mut notif = SeccompNotif {
id: 0,
pid: 0,
flags: 0,
data: SeccompData::default(),
};
let ret = unsafe {
libc::ioctl(
notify_fd,
SECCOMP_IOCTL_NOTIF_RECV,
&mut notif as *mut SeccompNotif,
)
};
if ret < 0 {
return Err(NonoError::SandboxInit(format!(
"SECCOMP_IOCTL_NOTIF_RECV failed: {}",
std::io::Error::last_os_error()
)));
}
Ok(notif)
}
pub fn read_notif_path(pid: u32, addr: u64) -> Result<std::path::PathBuf> {
use std::io::Read;
let mem_path = format!("/proc/{}/mem", pid);
let mut file = std::fs::File::open(&mem_path)
.map_err(|e| NonoError::SandboxInit(format!("Failed to open {}: {}", mem_path, e)))?;
std::io::Seek::seek(&mut file, std::io::SeekFrom::Start(addr))
.map_err(|e| NonoError::SandboxInit(format!("Failed to seek in {}: {}", mem_path, e)))?;
let mut buf = vec![0u8; 4096];
let n = file.read(&mut buf).map_err(|e| {
NonoError::SandboxInit(format!("Failed to read path from {}: {}", mem_path, e))
})?;
let end = buf[..n].iter().position(|&b| b == 0).unwrap_or(n);
if end == 0 || end >= 4096 {
return Err(NonoError::SandboxInit(
"Invalid path in seccomp notification (empty or too long)".to_string(),
));
}
let path_str = std::str::from_utf8(&buf[..end]).map_err(|_| {
NonoError::SandboxInit("Path in seccomp notification is not valid UTF-8".to_string())
})?;
Ok(std::path::PathBuf::from(path_str))
}
pub fn read_open_how(pid: u32, addr: u64) -> Result<OpenHow> {
use std::io::Read;
let mem_path = format!("/proc/{}/mem", pid);
let mut file = std::fs::File::open(&mem_path)
.map_err(|e| NonoError::SandboxInit(format!("Failed to open {}: {}", mem_path, e)))?;
std::io::Seek::seek(&mut file, std::io::SeekFrom::Start(addr))
.map_err(|e| NonoError::SandboxInit(format!("Failed to seek in {}: {}", mem_path, e)))?;
let mut buf = [0u8; std::mem::size_of::<OpenHow>()];
file.read_exact(&mut buf).map_err(|e| {
NonoError::SandboxInit(format!("Failed to read open_how from {}: {}", mem_path, e))
})?;
let open_how: OpenHow = unsafe { std::ptr::read_unaligned(buf.as_ptr().cast()) };
Ok(open_how)
}
pub fn notif_id_valid(notify_fd: std::os::fd::RawFd, notif_id: u64) -> Result<bool> {
let ret = unsafe {
libc::ioctl(
notify_fd,
SECCOMP_IOCTL_NOTIF_ID_VALID,
¬if_id as *const u64,
)
};
if ret < 0 {
let err = std::io::Error::last_os_error();
if err.raw_os_error() == Some(libc::ENOENT) {
return Ok(false);
}
return Err(NonoError::SandboxInit(format!(
"SECCOMP_IOCTL_NOTIF_ID_VALID failed: {}",
err
)));
}
Ok(true)
}
pub fn inject_fd(
notify_fd: std::os::fd::RawFd,
notif_id: u64,
fd: std::os::fd::RawFd,
) -> Result<()> {
let addfd = SeccompNotifAddfd {
id: notif_id,
flags: SECCOMP_ADDFD_FLAG_SEND,
srcfd: fd as u32,
newfd: 0, newfd_flags: libc::O_CLOEXEC as u32, };
let ret = unsafe {
libc::ioctl(
notify_fd,
SECCOMP_IOCTL_NOTIF_ADDFD,
&addfd as *const SeccompNotifAddfd,
)
};
if ret < 0 {
return Err(NonoError::SandboxInit(format!(
"SECCOMP_IOCTL_NOTIF_ADDFD failed: {}. Requires kernel >= 5.14.",
std::io::Error::last_os_error()
)));
}
Ok(())
}
pub fn deny_notif(notify_fd: std::os::fd::RawFd, notif_id: u64) -> Result<()> {
let resp = SeccompNotifResp {
id: notif_id,
val: 0,
error: -libc::EPERM,
flags: 0,
};
let ret = unsafe {
libc::ioctl(
notify_fd,
SECCOMP_IOCTL_NOTIF_SEND,
&resp as *const SeccompNotifResp,
)
};
if ret < 0 {
return Err(NonoError::SandboxInit(format!(
"SECCOMP_IOCTL_NOTIF_SEND failed: {}",
std::io::Error::last_os_error()
)));
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_is_supported() {
let _ = is_supported();
}
#[test]
fn test_support_info() {
let info = support_info();
assert!(!info.details.is_empty());
}
#[test]
fn test_access_conversion() {
let abi = ABI::V3;
let read = access_to_landlock(AccessMode::Read, abi);
assert!(read.contains(AccessFs::ReadFile));
assert!(!read.contains(AccessFs::WriteFile));
let write = access_to_landlock(AccessMode::Write, abi);
assert!(write.contains(AccessFs::WriteFile));
assert!(!write.contains(AccessFs::ReadFile));
assert!(write.contains(AccessFs::RemoveFile));
assert!(write.contains(AccessFs::RemoveDir));
assert!(write.contains(AccessFs::Refer));
assert!(write.contains(AccessFs::Truncate));
let rw = access_to_landlock(AccessMode::ReadWrite, abi);
assert!(rw.contains(AccessFs::ReadFile));
assert!(rw.contains(AccessFs::WriteFile));
assert!(rw.contains(AccessFs::RemoveFile));
assert!(rw.contains(AccessFs::RemoveDir));
assert!(rw.contains(AccessFs::Refer));
assert!(rw.contains(AccessFs::Truncate));
}
#[test]
fn test_seccomp_notif_struct_sizes() {
use std::mem;
assert_eq!(mem::size_of::<SeccompData>(), 64);
assert_eq!(mem::size_of::<SeccompNotif>(), 80);
assert_eq!(mem::size_of::<SeccompNotifResp>(), 24);
assert_eq!(mem::size_of::<SeccompNotifAddfd>(), 24);
}
#[test]
fn test_bpf_filter_instruction_count() {
let filter = [
SockFilterInsn {
code: BPF_LD | BPF_W | BPF_ABS,
jt: 0,
jf: 0,
k: SECCOMP_DATA_NR_OFFSET,
},
SockFilterInsn {
code: BPF_JMP | BPF_JEQ | BPF_K,
jt: 2,
jf: 0,
k: SYS_OPENAT as u32,
},
SockFilterInsn {
code: BPF_JMP | BPF_JEQ | BPF_K,
jt: 1,
jf: 0,
k: SYS_OPENAT2 as u32,
},
SockFilterInsn {
code: BPF_RET | BPF_K,
jt: 0,
jf: 0,
k: SECCOMP_RET_ALLOW,
},
SockFilterInsn {
code: BPF_RET | BPF_K,
jt: 0,
jf: 0,
k: SECCOMP_RET_USER_NOTIF,
},
];
assert_eq!(filter.len(), 5);
}
#[test]
fn test_open_how_struct_size() {
use std::mem;
assert_eq!(mem::size_of::<OpenHow>(), 24);
}
#[test]
fn test_syscall_numbers_distinct() {
assert_ne!(SYS_OPENAT, SYS_OPENAT2);
}
#[test]
fn test_syscall_numbers_match_seccomp_data_nr_type() {
let _: i32 = SYS_OPENAT;
let _: i32 = SYS_OPENAT2;
}
#[test]
fn test_classify_access_rdonly() {
let access = classify_access_from_flags(libc::O_RDONLY);
assert!(matches!(access, crate::AccessMode::Read));
}
#[test]
fn test_classify_access_wronly() {
let access = classify_access_from_flags(libc::O_WRONLY);
assert!(matches!(access, crate::AccessMode::Write));
}
#[test]
fn test_classify_access_rdwr() {
let access = classify_access_from_flags(libc::O_RDWR);
assert!(matches!(access, crate::AccessMode::ReadWrite));
}
#[test]
fn test_classify_access_with_extra_flags() {
let flags = libc::O_RDONLY | libc::O_CREAT | libc::O_TRUNC;
let access = classify_access_from_flags(flags);
assert!(matches!(access, crate::AccessMode::Read));
let flags = libc::O_WRONLY | libc::O_APPEND;
let access = classify_access_from_flags(flags);
assert!(matches!(access, crate::AccessMode::Write));
let flags = libc::O_RDWR | libc::O_CLOEXEC;
let access = classify_access_from_flags(flags);
assert!(matches!(access, crate::AccessMode::ReadWrite));
}
#[test]
fn test_classify_access_pointer_as_flags_gives_readwrite() {
let fake_pointer = 0x7fff_1234_5678_i64 as i32; let access = classify_access_from_flags(fake_pointer);
let _ = access; }
#[test]
fn test_validate_openat2_size_rejects_zero() {
assert!(!validate_openat2_size(0));
}
#[test]
fn test_validate_openat2_size_rejects_undersized() {
assert!(!validate_openat2_size(1));
assert!(!validate_openat2_size(8));
assert!(!validate_openat2_size(16));
assert!(!validate_openat2_size(23));
}
#[test]
fn test_validate_openat2_size_accepts_exact() {
let exact_size = std::mem::size_of::<OpenHow>();
assert_eq!(exact_size, 24);
assert!(validate_openat2_size(exact_size));
}
#[test]
fn test_validate_openat2_size_accepts_larger() {
assert!(validate_openat2_size(32));
assert!(validate_openat2_size(64));
assert!(validate_openat2_size(128));
}
#[test]
fn test_validate_openat2_size_rejects_unreasonably_large() {
assert!(!validate_openat2_size(4097));
assert!(!validate_openat2_size(usize::MAX));
}
}