use std::os::unix::io::RawFd;
use std::sync::Arc;
use tokio::sync::Mutex;
use crate::procfs::{build_dirent64, DT_DIR, DT_LNK, DT_REG};
use crate::seccomp::notif::{read_child_mem, write_child_mem, NotifAction, SupervisorState};
use crate::sys::structs::SeccompNotif;
fn read_path(notif: &SeccompNotif, addr: u64, notif_fd: RawFd) -> Option<String> {
if addr == 0 {
return None;
}
const PAGE_SIZE: u64 = 4096;
let mut result = Vec::with_capacity(256);
let mut cur = addr;
while result.len() < 4096 {
let page_remaining = PAGE_SIZE - (cur % PAGE_SIZE);
let to_read = page_remaining.min((4096 - result.len()) as u64) as usize;
let bytes = read_child_mem(notif_fd, notif.id, notif.pid, cur, to_read).ok()?;
if let Some(nul) = bytes.iter().position(|&b| b == 0) {
result.extend_from_slice(&bytes[..nul]);
return String::from_utf8(result).ok();
}
result.extend_from_slice(&bytes);
cur += to_read as u64;
}
String::from_utf8(result).ok()
}
fn resolve_at_path(notif: &SeccompNotif, dirfd: i64, path: &str) -> String {
if std::path::Path::new(path).is_absolute() {
return path.to_string();
}
let dirfd32 = dirfd as i32;
if dirfd32 == libc::AT_FDCWD {
if let Ok(cwd) = std::fs::read_link(format!("/proc/{}/cwd", notif.pid)) {
return format!("{}/{}", cwd.display(), path);
}
return path.to_string();
}
if let Ok(base) = std::fs::read_link(format!("/proc/{}/fd/{}", notif.pid, dirfd)) {
format!("{}/{}", base.display(), path)
} else {
path.to_string()
}
}
pub(crate) async fn handle_cow_open(
notif: &SeccompNotif,
state: &Arc<Mutex<SupervisorState>>,
notif_fd: RawFd,
) -> NotifAction {
let dirfd = notif.data.args[0] as i64;
let path_ptr = notif.data.args[1];
let flags = notif.data.args[2];
let rel_path = match read_path(notif, path_ptr, notif_fd) {
Some(p) => p,
None => return NotifAction::Continue,
};
let path = resolve_at_path(notif, dirfd, &rel_path);
let mut st = state.lock().await;
let cow = match st.cow_branch.as_mut() {
Some(c) => c,
None => return NotifAction::Continue,
};
if !cow.matches(&path) {
return NotifAction::Continue;
}
let real_path = match cow.handle_open(&path, flags) {
Some(p) => p,
None => return NotifAction::Continue,
};
drop(st);
let c_path = match std::ffi::CString::new(real_path.to_str().unwrap_or("")) {
Ok(c) => c,
Err(_) => return NotifAction::Continue,
};
let fd = unsafe { libc::open(c_path.as_ptr(), flags as i32, 0o666) };
if fd < 0 {
return NotifAction::Continue;
}
NotifAction::InjectFdSend { srcfd: fd }
}
pub(crate) async fn handle_cow_write(
notif: &SeccompNotif,
state: &Arc<Mutex<SupervisorState>>,
notif_fd: RawFd,
) -> NotifAction {
let nr = notif.data.nr as i64;
if nr == libc::SYS_unlinkat {
let dirfd = notif.data.args[0] as i64;
let path = match read_path(notif, notif.data.args[1], notif_fd) {
Some(p) => resolve_at_path(notif, dirfd, &p),
None => return NotifAction::Continue,
};
let is_dir = (notif.data.args[2] & libc::AT_REMOVEDIR as u64) != 0;
let mut st = state.lock().await;
if let Some(cow) = st.cow_branch.as_mut() {
if cow.matches(&path) && cow.handle_unlink(&path, is_dir) {
return NotifAction::ReturnValue(0);
}
}
} else if nr == libc::SYS_mkdirat {
let dirfd = notif.data.args[0] as i64;
let path = match read_path(notif, notif.data.args[1], notif_fd) {
Some(p) => resolve_at_path(notif, dirfd, &p),
None => return NotifAction::Continue,
};
let mut st = state.lock().await;
if let Some(cow) = st.cow_branch.as_mut() {
if cow.matches(&path) && cow.handle_mkdir(&path) {
return NotifAction::ReturnValue(0);
}
}
} else if nr == libc::SYS_renameat2 {
let old_dirfd = notif.data.args[0] as i64;
let new_dirfd = notif.data.args[2] as i64;
let old_path = match read_path(notif, notif.data.args[1], notif_fd) {
Some(p) => resolve_at_path(notif, old_dirfd, &p),
None => return NotifAction::Continue,
};
let new_path = match read_path(notif, notif.data.args[3], notif_fd) {
Some(p) => resolve_at_path(notif, new_dirfd, &p),
None => return NotifAction::Continue,
};
let mut st = state.lock().await;
if let Some(cow) = st.cow_branch.as_mut() {
if cow.matches(&old_path) && cow.handle_rename(&old_path, &new_path) {
return NotifAction::ReturnValue(0);
}
}
} else if nr == libc::SYS_symlinkat {
let target = match read_path(notif, notif.data.args[0], notif_fd) {
Some(p) => p,
None => return NotifAction::Continue,
};
let dirfd = notif.data.args[1] as i64;
let linkpath = match read_path(notif, notif.data.args[2], notif_fd) {
Some(p) => resolve_at_path(notif, dirfd, &p),
None => return NotifAction::Continue,
};
let mut st = state.lock().await;
if let Some(cow) = st.cow_branch.as_mut() {
if cow.matches(&linkpath) && cow.handle_symlink(&target, &linkpath) {
return NotifAction::ReturnValue(0);
}
}
} else if nr == libc::SYS_linkat {
let old_dirfd = notif.data.args[0] as i64;
let new_dirfd = notif.data.args[2] as i64;
let old_path = match read_path(notif, notif.data.args[1], notif_fd) {
Some(p) => resolve_at_path(notif, old_dirfd, &p),
None => return NotifAction::Continue,
};
let new_path = match read_path(notif, notif.data.args[3], notif_fd) {
Some(p) => resolve_at_path(notif, new_dirfd, &p),
None => return NotifAction::Continue,
};
let mut st = state.lock().await;
if let Some(cow) = st.cow_branch.as_mut() {
if cow.matches(&new_path) && cow.handle_link(&old_path, &new_path) {
return NotifAction::ReturnValue(0);
}
}
} else if nr == libc::SYS_fchmodat {
let dirfd = notif.data.args[0] as i64;
let path = match read_path(notif, notif.data.args[1], notif_fd) {
Some(p) => resolve_at_path(notif, dirfd, &p),
None => return NotifAction::Continue,
};
let mode = (notif.data.args[2] & 0o7777) as u32;
let mut st = state.lock().await;
if let Some(cow) = st.cow_branch.as_mut() {
if cow.matches(&path) && cow.handle_chmod(&path, mode) {
return NotifAction::ReturnValue(0);
}
}
} else if nr == libc::SYS_fchownat {
let dirfd = notif.data.args[0] as i64;
let path = match read_path(notif, notif.data.args[1], notif_fd) {
Some(p) => resolve_at_path(notif, dirfd, &p),
None => return NotifAction::Continue,
};
let uid = notif.data.args[2] as u32;
let gid = notif.data.args[3] as u32;
let mut st = state.lock().await;
if let Some(cow) = st.cow_branch.as_mut() {
if cow.matches(&path) && cow.handle_chown(&path, uid, gid) {
return NotifAction::ReturnValue(0);
}
}
} else if nr == libc::SYS_truncate {
let path = match read_path(notif, notif.data.args[0], notif_fd) {
Some(p) => p,
None => return NotifAction::Continue,
};
let length = notif.data.args[1] as i64;
let mut st = state.lock().await;
if let Some(cow) = st.cow_branch.as_mut() {
if cow.matches(&path) && cow.handle_truncate(&path, length) {
return NotifAction::ReturnValue(0);
}
}
}
NotifAction::Continue
}
const SYS_FACCESSAT2: i64 = 439;
pub(crate) async fn handle_cow_stat(
notif: &SeccompNotif,
state: &Arc<Mutex<SupervisorState>>,
notif_fd: RawFd,
) -> NotifAction {
let nr = notif.data.nr as i64;
let dirfd = notif.data.args[0] as i64;
let path = match read_path(notif, notif.data.args[1], notif_fd) {
Some(p) => resolve_at_path(notif, dirfd, &p),
None => return NotifAction::Continue,
};
let st = state.lock().await;
let cow = match st.cow_branch.as_ref() {
Some(c) => c,
None => return NotifAction::Continue,
};
if !cow.matches(&path) {
return NotifAction::Continue;
}
let real_path = match cow.handle_stat(&path) {
Some(p) => p,
None => {
return NotifAction::Errno(libc::ENOENT);
}
};
drop(st);
if nr == libc::SYS_faccessat || nr == SYS_FACCESSAT2 {
if real_path.exists() || real_path.is_symlink() {
return NotifAction::ReturnValue(0);
}
return NotifAction::Errno(libc::ENOENT);
}
let statbuf_addr = notif.data.args[2];
let flags = notif.data.args[3];
let follow = (flags & libc::AT_SYMLINK_NOFOLLOW as u64) == 0;
let meta = if follow {
std::fs::metadata(&real_path)
} else {
std::fs::symlink_metadata(&real_path)
};
let meta = match meta {
Ok(m) => m,
Err(_) => return NotifAction::Errno(libc::ENOENT),
};
use std::os::unix::fs::MetadataExt;
let mut buf = vec![0u8; 144];
let mut off = 0;
macro_rules! pack_u64 { ($v:expr) => { buf[off..off+8].copy_from_slice(&($v as u64).to_ne_bytes()); off += 8; } }
macro_rules! pack_u32 { ($v:expr) => { buf[off..off+4].copy_from_slice(&($v as u32).to_ne_bytes()); off += 4; } }
pack_u64!(meta.dev());
pack_u64!(meta.ino());
pack_u64!(meta.nlink());
pack_u32!(meta.mode());
pack_u32!(meta.uid());
pack_u32!(meta.gid());
pack_u32!(0u32); pack_u64!(meta.rdev());
pack_u64!(meta.size() as u64);
pack_u64!(meta.blksize());
pack_u64!(meta.blocks() as u64);
pack_u64!(meta.atime() as u64);
pack_u64!(meta.atime_nsec() as u64);
pack_u64!(meta.mtime() as u64);
pack_u64!(meta.mtime_nsec() as u64);
pack_u64!(meta.ctime() as u64);
pack_u64!(meta.ctime_nsec() as u64);
let _ = off;
if write_child_mem(notif_fd, notif.id, notif.pid, statbuf_addr, &buf).is_err() {
return NotifAction::Continue;
}
NotifAction::ReturnValue(0)
}
pub(crate) async fn handle_cow_statx(
notif: &SeccompNotif,
state: &Arc<Mutex<SupervisorState>>,
notif_fd: RawFd,
) -> NotifAction {
let dirfd = notif.data.args[0] as i64;
let path = match read_path(notif, notif.data.args[1], notif_fd) {
Some(p) => resolve_at_path(notif, dirfd, &p),
None => return NotifAction::Continue,
};
let st = state.lock().await;
let cow = match st.cow_branch.as_ref() {
Some(c) => c,
None => return NotifAction::Continue,
};
if !cow.matches(&path) {
return NotifAction::Continue;
}
match cow.handle_stat(&path) {
Some(_) => NotifAction::Continue, None => NotifAction::Errno(libc::ENOENT), }
}
pub(crate) async fn handle_cow_readlink(
notif: &SeccompNotif,
state: &Arc<Mutex<SupervisorState>>,
notif_fd: RawFd,
) -> NotifAction {
let dirfd = notif.data.args[0] as i64;
let path = match read_path(notif, notif.data.args[1], notif_fd) {
Some(p) => resolve_at_path(notif, dirfd, &p),
None => return NotifAction::Continue,
};
let buf_addr = notif.data.args[2];
let bufsiz = (notif.data.args[3] & 0xFFFFFFFF) as usize;
let st = state.lock().await;
let cow = match st.cow_branch.as_ref() {
Some(c) => c,
None => return NotifAction::Continue,
};
if !cow.matches(&path) {
return NotifAction::Continue;
}
let target = match cow.handle_readlink(&path) {
Some(t) => t,
None => return NotifAction::Errno(libc::ENOENT),
};
drop(st);
let target_bytes = target.as_bytes();
let write_len = target_bytes.len().min(bufsiz);
if write_child_mem(notif_fd, notif.id, notif.pid, buf_addr, &target_bytes[..write_len]).is_err()
{
return NotifAction::Continue;
}
NotifAction::ReturnValue(write_len as i64)
}
pub(crate) async fn handle_cow_getdents(
notif: &SeccompNotif,
state: &Arc<Mutex<SupervisorState>>,
notif_fd: RawFd,
) -> NotifAction {
let pid = notif.pid;
let child_fd = (notif.data.args[0] & 0xFFFFFFFF) as u32;
let buf_addr = notif.data.args[1];
let buf_size = (notif.data.args[2] & 0xFFFFFFFF) as usize;
let link_path = format!("/proc/{}/fd/{}", pid, child_fd);
let target = match std::fs::read_link(&link_path) {
Ok(t) => t.to_string_lossy().into_owned(),
Err(_) => return NotifAction::Continue,
};
let mut st = state.lock().await;
let cow = match st.cow_branch.as_ref() {
Some(c) => c,
None => return NotifAction::Continue,
};
if !cow.matches(&target) {
return NotifAction::Continue;
}
let cache_key = (pid as i32, child_fd);
if !st.cow_dir_cache.contains_key(&cache_key) {
let cow = st.cow_branch.as_ref().unwrap();
let workdir_str = cow.workdir_str();
let rel_path = if target == workdir_str {
".".to_string()
} else {
target
.strip_prefix(&format!("{}/", workdir_str))
.unwrap_or(".")
.to_string()
};
let merged = cow.list_merged_dir(&rel_path);
let upper_dir = cow.upper_dir().join(&rel_path);
let lower_dir = cow.workdir().join(&rel_path);
let mut entries = Vec::new();
let mut d_off: i64 = 0;
for name in &merged {
d_off += 1;
let upper_p = upper_dir.join(name);
let lower_p = lower_dir.join(name);
let check = if upper_p.exists() || upper_p.is_symlink() {
&upper_p
} else {
&lower_p
};
let d_type = if check.is_dir() {
DT_DIR
} else if check.is_symlink() {
DT_LNK
} else {
DT_REG
};
use std::os::unix::fs::MetadataExt;
let d_ino = std::fs::symlink_metadata(check)
.map(|m| m.ino())
.unwrap_or(0);
entries.push(build_dirent64(d_ino, d_off, d_type, name));
}
st.cow_dir_cache.insert(cache_key, entries);
}
let entries = match st.cow_dir_cache.get_mut(&cache_key) {
Some(e) => e,
None => return NotifAction::Continue,
};
let mut result = Vec::new();
let mut consumed = 0;
for entry in entries.iter() {
if result.len() + entry.len() > buf_size {
break;
}
result.extend_from_slice(entry);
consumed += 1;
}
if consumed > 0 {
entries.drain(..consumed);
}
if entries.is_empty() {
st.cow_dir_cache.remove(&cache_key);
}
drop(st);
if !result.is_empty() {
if write_child_mem(notif_fd, notif.id, pid, buf_addr, &result).is_err() {
return NotifAction::Continue;
}
}
NotifAction::ReturnValue(result.len() as i64)
}