use std::{
option::Option,
os::fd::{BorrowedFd, RawFd},
sync::{
atomic::{AtomicUsize, Ordering},
Arc, OnceLock,
},
thread::Thread,
};
use dur::Duration;
use nix::{
errno::Errno,
sys::signal::{SigSet, Signal},
unistd::{gettid, Pid},
};
use crate::{
alert,
cache::{
ptrace_map_new, ptrace_resp_queue_new, signal_map_new, sys_interrupt_map_new,
sys_result_map_new, unix_map_new, ChdirEntry, PtraceMap, PtraceRespQueue,
SegvGuardExpiryMap, SegvGuardSuspensionSet, SighandleInfo, SignalMap,
SigreturnTrampolineIP, SysInterrupt, SysInterruptMap, SysQueue, SysResultMap, UnixMap,
SIG_NEST_MAX,
},
confine::{ScmpNotifReq, SydNotifResp},
cookie::{safe_kill, safe_write},
expiry::ExpiringMap,
fs::{block_signal, sigtimedpoll, tgkill, unblock_signal},
kernel::ptrace::mmap::MmapSyscall,
lookup::FileInfo,
path::XPath,
proc::proc_tgid,
ptrace::{ptrace_cont, ptrace_set_arg, ptrace_skip_syscall, ptrace_syscall},
retry::retry_on_eintr,
sandbox::Action,
sigset::SydSigSet,
workers::aes::AesLock,
};
pub(crate) mod aes;
pub(crate) mod int;
pub(crate) mod not;
pub(crate) mod out;
pub(crate) mod ipc;
pub(crate) mod emu;
pub(crate) mod run;
pub(crate) struct WorkerCache {
pub(crate) signal_map: SignalMap,
pub(crate) sysint_map: SysInterruptMap,
pub(crate) sysres_map: SysResultMap,
pub(crate) unix_map: UnixMap,
pub(crate) ptrace_map: PtraceMap,
pub(crate) crypt_map: Option<AesLock>,
pub(crate) sysreq_queue: SysQueue,
pub(crate) sysreq_pipe: RawFd,
pub(crate) ptrace_resp: PtraceRespQueue,
pub(crate) segvguard_expiry: SegvGuardExpiryMap,
pub(crate) segvguard_suspension: SegvGuardSuspensionSet,
mon_thread: OnceLock<Thread>,
}
impl WorkerCache {
pub(crate) fn new(
crypt_map: Option<AesLock>,
sysreq_queue: SysQueue,
sysreq_pipe: RawFd,
) -> Self {
Self {
signal_map: signal_map_new(),
sysint_map: sys_interrupt_map_new(),
sysres_map: sys_result_map_new(),
unix_map: unix_map_new(),
ptrace_map: ptrace_map_new(),
crypt_map,
sysreq_queue,
sysreq_pipe,
ptrace_resp: ptrace_resp_queue_new(),
segvguard_expiry: Arc::new(ExpiringMap::new()),
segvguard_suspension: Arc::new(ExpiringMap::new()),
mon_thread: OnceLock::new(),
}
}
pub(crate) fn notify_emu(&self, queue_wr_fd: RawFd) -> Result<(), Errno> {
let fd = unsafe { BorrowedFd::borrow_raw(queue_wr_fd) };
retry_on_eintr(|| safe_write(fd, &[42u8])).map(drop)
}
fn notify_int(&self) {
if let Some(thread) = self.sysint_map.int_thread.get() {
thread.unpark();
}
}
pub(crate) fn notify_mon(&self) {
if let Some(thread) = self.mon_thread.get() {
thread.unpark();
}
}
pub(crate) fn set_mon_thread(&self, thread: Thread) {
let _ = self.mon_thread.set(thread);
}
pub(crate) fn push_sig_handle(&self, tid: Pid) -> Result<(), Errno> {
let _reserve = self.signal_map.sig_handle.reserve(1).ok_or(Errno::ENOMEM)?;
let mut info = self
.signal_map
.sig_handle
.entry_sync(tid)
.or_insert_with(|| SighandleInfo {
depth: 0,
frames: [None; SIG_NEST_MAX],
in_sigreturn: false,
in_singlestep: false,
trampoline_ip: None,
});
let depth = usize::from(info.get().depth);
if depth >= SIG_NEST_MAX {
info.get_mut().frames.copy_within(1..SIG_NEST_MAX, 0);
info.get_mut().frames[SIG_NEST_MAX - 1] = Some(());
} else {
info.get_mut().depth = info.get().depth.checked_add(1).ok_or(Errno::ENOSPC)?;
info.get_mut().frames[depth] = Some(());
}
Ok(())
}
pub(crate) fn get_sig_trampoline_ip(&self, tid: Pid) -> Option<SigreturnTrampolineIP> {
self.signal_map
.sig_handle
.read_sync(&tid, |_, info| info.trampoline_ip)
.flatten()
}
pub(crate) fn get_sig_in_singlestep(&self, tid: Pid) -> bool {
self.signal_map
.sig_handle
.read_sync(&tid, |_, info| info.in_singlestep)
.unwrap_or(false)
}
pub(crate) fn set_sig_in_singlestep(&self, tid: Pid, state: bool) {
self.signal_map.sig_handle.update_sync(&tid, |_, info| {
info.in_singlestep = state;
});
}
pub(crate) fn set_sig_trampoline_ip(&self, tid: Pid, ip: SigreturnTrampolineIP) {
self.signal_map.sig_handle.update_sync(&tid, |_, info| {
info.in_singlestep = false;
info.trampoline_ip = Some(ip);
});
}
pub(crate) fn del_sig_trampoline_ip(&self, tid: Pid) {
self.signal_map.sig_handle.update_sync(&tid, |_, info| {
info.in_singlestep = false;
info.trampoline_ip = None;
});
}
pub(crate) fn depth_sig_handle(&self, tid: Pid) -> u8 {
self.signal_map
.sig_handle
.read_sync(&tid, |_, info| info.depth)
.unwrap_or(0)
}
pub(crate) fn has_sig_handle(&self, tid: Pid) -> bool {
self.signal_map
.sig_handle
.read_sync(&tid, |_, info| info.in_sigreturn)
.unwrap_or(false)
}
pub(crate) fn enter_sig_handle(&self, tid: Pid) -> bool {
self.signal_map
.sig_handle
.update_sync(&tid, |_, info| {
if info.depth == 0 {
return false;
}
info.in_sigreturn = true;
true
})
.unwrap_or(false)
}
pub(crate) fn exit_sig_handle(&self, tid: Pid) -> bool {
let should_remove = self.signal_map.sig_handle.update_sync(&tid, |_, info| {
if !info.in_sigreturn || info.depth == 0 {
return (false, false);
}
info.in_sigreturn = false;
let depth = info.depth.saturating_sub(1);
info.frames[usize::from(depth)] = None;
info.depth = depth;
(true, info.depth == 0)
});
match should_remove {
Some((true, true)) => {
self.signal_map.sig_handle.remove_sync(&tid);
true
}
Some((true, false)) => true,
_ => false,
}
}
pub(crate) fn retire_sig_handle(&self, tid: Pid) {
self.signal_map.sig_handle.remove_sync(&tid);
}
pub(crate) fn retire_ptrace_tgid(&self, tgid: Pid) {
self.ptrace_map.retain_sync(|_, &mut pid| pid != tgid);
}
pub(crate) fn retire_ptrace_tid(&self, tid: Pid) {
self.ptrace_map.remove_sync(&tid);
}
pub(crate) fn add_chdir(&self, pid: Pid, data: u16, info: FileInfo) -> Result<(), Errno> {
let _reserve = self
.sysres_map
.trace_chdir
.reserve(1)
.ok_or(Errno::ENOMEM)?;
self.sysres_map
.trace_chdir
.upsert_sync(pid, ChdirEntry { data, info });
Ok(())
}
pub(crate) fn get_chdir(&self, pid: Pid) -> Option<ChdirEntry> {
self.sysres_map
.trace_chdir
.remove_sync(&pid)
.map(|(_, v)| v)
}
pub(crate) fn add_mmap(&self, pid: Pid, sys: MmapSyscall) -> Result<(), Errno> {
let _reserve = self.sysres_map.trace_mmap.reserve(1).ok_or(Errno::ENOMEM)?;
self.sysres_map.trace_mmap.upsert_sync(pid, sys);
Ok(())
}
pub(crate) fn get_mmap(&self, pid: Pid) -> Option<MmapSyscall> {
self.sysres_map.trace_mmap.remove_sync(&pid).map(|(_, v)| v)
}
pub(crate) fn add_error(&self, pid: Pid, errno: Option<Errno>) -> Result<(), Errno> {
let _reserve = self
.sysres_map
.trace_error
.reserve(1)
.ok_or(Errno::ENOMEM)?;
self.sysres_map.trace_error.upsert_sync(pid, errno);
Ok(())
}
pub(crate) fn get_error(&self, pid: Pid) -> Option<(Pid, Option<Errno>)> {
self.sysres_map.trace_error.remove_sync(&pid)
}
pub(crate) fn add_sig_restart(&self, request_tgid: Pid, sig: libc::c_int) -> Result<(), Errno> {
if self
.sysint_map
.sig_restart
.update_sync(&request_tgid, |_, set| {
set.add(sig);
})
.is_some()
{
return Ok(());
}
let _reserve = self
.sysint_map
.sig_restart
.reserve(1)
.ok_or(Errno::ENOMEM)?;
let mut set = SydSigSet::new(0);
set.add(sig);
if self
.sysint_map
.sig_restart
.insert_sync(request_tgid, set)
.is_err()
{
self.sysint_map
.sig_restart
.update_sync(&request_tgid, |_, existing| {
existing.add(sig);
});
}
Ok(())
}
pub(crate) fn del_sig_restart(&self, request_tgid: Pid, sig: libc::c_int) {
let is_empty = self
.sysint_map
.sig_restart
.update_sync(&request_tgid, |_, set| {
set.del(sig);
set.is_empty()
});
if is_empty == Some(true) {
self.sysint_map.sig_restart.remove_sync(&request_tgid);
}
}
pub(crate) fn retire_sig_restart(&self, tgid: Pid) {
self.sysint_map.sig_restart.remove_sync(&tgid);
}
pub(crate) fn add_sys_block(
&self,
request: ScmpNotifReq,
ignore_restart: bool,
) -> Result<(), Errno> {
let handler_tid = gettid();
let tgid = proc_tgid(request.pid())?;
let interrupt = SysInterrupt::new(request, handler_tid, tgid, ignore_restart)?;
self.sysint_map
.sys_queue
.push(interrupt)
.or(Err(Errno::EINTR))?;
self.notify_int();
self.notify_mon();
let mut mask = SigSet::empty();
mask.add(Signal::SIGALRM);
let _ = retry_on_eintr(|| sigtimedpoll(&mask, None));
unblock_signal(libc::SIGALRM)
}
pub(crate) fn del_sys_block(&self, request_id: u64) -> Result<(), Errno> {
block_signal(libc::SIGALRM)?;
if self.sysint_map.sys_delete.push(request_id).is_err() {
if let Some(thread) = self.sysint_map.int_thread.get() {
thread.unpark();
}
std::thread::yield_now();
let _ = self.sysint_map.sys_delete.push(request_id);
}
if let Some(thread) = self.sysint_map.int_thread.get() {
thread.unpark();
}
Ok(())
}
pub(crate) fn retire_unix_map(&self, pid: Pid) {
self.unix_map.retain_sync(|_, val| val.pid != pid);
}
pub(crate) fn del_tid(&self, tid: Pid) {
self.retire_sig_handle(tid);
self.retire_ptrace_tid(tid);
self.retire_unix_map(tid);
let _ = self.get_error(tid);
let _ = self.get_chdir(tid);
}
pub(crate) fn del_tgid(&self, tgid: Pid) {
self.retire_sig_restart(tgid);
self.retire_ptrace_tgid(tgid);
self.del_tid(tgid);
}
pub(crate) fn interrupt_run(&self) {
let pid = Pid::this();
match retry_on_eintr(|| tgkill(pid, pid, libc::SIGRTMIN())) {
Ok(_) | Err(Errno::ESRCH) => {}
Err(errno) => {
alert!("ctx": "emu", "op": "interrupt_run",
"msg": format!("failed to interrupt: {errno}"),
"tid": pid.as_raw(), "err": errno as i32);
std::process::exit(101);
}
}
}
pub(crate) fn handle_ptrace_response(&self, response: SydNotifResp) {
match response {
SydNotifResp::Cont { pid, signal } => {
let _ = ptrace_cont(pid, signal);
}
SydNotifResp::Exit { pid, signal } => {
let _ = ptrace_syscall(pid, signal);
}
SydNotifResp::Deny { pid, arch, errno } => {
let errno = if errno == Errno::ECANCELED {
None
} else {
Some(errno)
};
if let Err(errno) = ptrace_skip_syscall(pid, arch, errno) {
if errno != Errno::ESRCH {
let _ = safe_kill(pid, libc::SIGKILL);
}
} else if cfg!(any(
target_arch = "mips",
target_arch = "mips32r6",
target_arch = "mips64",
target_arch = "mips64r6",
target_arch = "s390x"
)) {
if self.add_error(pid, errno).is_err() {
let _ = safe_kill(pid, libc::SIGKILL);
}
} else {
let _ = ptrace_cont(pid, None);
}
}
SydNotifResp::SetGroupsZero { pid, arch } => {
if let Err(errno) = ptrace_set_arg(pid, arch, 0, 0) {
if errno != Errno::ESRCH {
let _ = safe_kill(pid, libc::SIGKILL);
}
return;
}
let _ = ptrace_cont(pid, None);
}
}
}
pub(crate) fn add_segvguard_crash(
&self,
path: &XPath,
expiry: Duration,
suspension: Duration,
maxcrashes: u8,
) -> Result<(bool, bool, u8), Errno> {
let num_crashes = self
.segvguard_expiry
.try_upsert(
path.try_to_owned()?,
|v| {
*v = v.saturating_add(1);
*v
},
1, expiry,
)?
.unwrap_or(1);
let (was_suspended, is_suspended) = if num_crashes >= maxcrashes {
let was_new = self
.segvguard_suspension
.try_insert(path.try_to_owned()?, (), suspension)?
.is_none();
(true, was_new)
} else {
(false, false)
};
Ok((was_suspended, is_suspended, num_crashes))
}
pub(crate) fn check_segvguard(
&self,
path: &XPath,
segvguard_act: Action,
segvguard_expiry: Duration,
) -> Option<Action> {
if segvguard_act == Action::Allow
|| segvguard_expiry.is_zero()
|| !self.segvguard_suspension.contains_key(path)
{
None
} else {
Some(segvguard_act)
}
}
}
const MAX_SIZE: usize = (1 << (usize::BITS / 2)) - 1;
const WORKER_BUSY_MASK: usize = MAX_SIZE;
const INCREMENT_TOTAL: usize = 1 << (usize::BITS / 2);
const INCREMENT_BUSY: usize = 1;
pub(crate) struct WorkerData {
pub(crate) counter: AtomicUsize,
}
impl WorkerData {
pub(crate) fn decrement_both(&self) -> (usize, usize) {
let old_val = self
.counter
.fetch_sub(INCREMENT_TOTAL | INCREMENT_BUSY, Ordering::Relaxed);
Self::split(old_val)
}
pub(crate) fn increment_worker_total(&self) -> usize {
let old_val = self.counter.fetch_add(INCREMENT_TOTAL, Ordering::Relaxed);
Self::total(old_val)
}
pub(crate) fn decrement_worker_total(&self) -> usize {
let old_val = self.counter.fetch_sub(INCREMENT_TOTAL, Ordering::AcqRel);
Self::total(old_val)
}
pub(crate) fn increment_worker_busy(&self) -> usize {
let old_val = self.counter.fetch_add(INCREMENT_BUSY, Ordering::AcqRel);
Self::busy(old_val)
}
pub(crate) fn decrement_worker_busy(&self) -> usize {
let old_val = self.counter.fetch_sub(INCREMENT_BUSY, Ordering::Relaxed);
Self::busy(old_val)
}
pub(crate) fn split(val: usize) -> (usize, usize) {
let total_count = val >> (usize::BITS / 2);
let busy_count = val & WORKER_BUSY_MASK;
(total_count, busy_count)
}
fn total(val: usize) -> usize {
val >> (usize::BITS / 2)
}
fn busy(val: usize) -> usize {
val & WORKER_BUSY_MASK
}
pub(crate) fn new() -> Self {
Self {
counter: AtomicUsize::new(0),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_worker_data_1() {
assert_eq!(WorkerData::total(0), 0);
}
#[test]
fn test_worker_data_2() {
assert_eq!(WorkerData::busy(0), 0);
}
#[test]
fn test_worker_data_3() {
let val = INCREMENT_TOTAL;
assert_eq!(WorkerData::total(val), 1);
assert_eq!(WorkerData::busy(val), 0);
}
#[test]
fn test_worker_data_4() {
let val = INCREMENT_BUSY;
assert_eq!(WorkerData::busy(val), 1);
assert_eq!(WorkerData::total(val), 0);
}
#[test]
fn test_worker_data_5() {
let val = INCREMENT_TOTAL | INCREMENT_BUSY;
assert_eq!(WorkerData::total(val), 1);
assert_eq!(WorkerData::busy(val), 1);
}
#[test]
fn test_worker_data_6() {
assert_eq!(WorkerData::busy(MAX_SIZE), MAX_SIZE);
assert_eq!(WorkerData::total(MAX_SIZE), 0);
}
}