#![forbid(unsafe_code)]
use std::io::IoSlice;
use libc::{c_int, c_uint, iovec, size_t, socklen_t, SIGPIPE};
use libseccomp::ScmpNotifResp;
use nix::{errno::Errno, sys::socket::SockaddrStorage, unistd::Pid};
use zeroize::Zeroizing;
use crate::{
compat::{
addr_family, fstatx, iovec32, mmsghdr, mmsghdr32, msghdr, msghdr32, pack_cmsg_buf,
sendmmsg, sendmsg, try_from_bytes, AddressFamily, Cmsg, CmsgOwned, MmsgHdr, MsgFlags,
MsgHdr, ToByteArray, PF_ALG, PF_INET, PF_INET6, PF_NETLINK, PF_UNIX, STATX_INO, UIO_MAXIOV,
},
config::MAX_RW_COUNT,
confine::scmp_arch_is_compat32,
fd::{get_nonblock, has_recv_timeout, SafeOwnedFd},
ip::{clear_scope6, make_lo6addr, make_loaddr, SocketCall},
kernel::{
net::{canon_addr, get_addr, sandbox_addr, to_msgflags},
sandbox_path,
},
lookup::CanonicalPath,
path::XPath,
proc::proc_tgid,
req::UNotifyEventRequest,
sandbox::{Capability, Flags, Options, SandboxGuard},
warn,
};
pub(crate) fn handle_sendmsg(
fd: SafeOwnedFd,
request: &UNotifyEventRequest,
sock_dom: AddressFamily,
args: &[u64; 6],
flags: Flags,
options: Options,
) -> Result<ScmpNotifResp, Errno> {
let msgflags = to_msgflags(args[2]);
if !options.allow_unsafe_oob() && msgflags.contains(MsgFlags::MSG_OOB) {
return Err(Errno::EOPNOTSUPP);
}
let req = request.scmpreq;
let is32 = scmp_arch_is_compat32(req.data.arch);
if !is32 && msgflags.contains(MsgFlags::MSG_CMSG_COMPAT) {
return Err(Errno::EINVAL);
}
let size = if is32 {
size_of::<msghdr32>()
} else {
size_of::<msghdr>()
};
let buf = request.read_vec_all_zeroed(args[1], size)?;
let msg: msghdr = if is32 {
msghdr::from(try_from_bytes::<msghdr32>(&buf)?)
} else {
try_from_bytes(&buf)?
};
let is_unix = sock_dom == AddressFamily::Unix;
let sandbox = request.get_sandbox();
let addr = check_addr(
request,
&sandbox,
SocketCall::SendMsg,
sock_dom,
&msg,
options,
)?;
let cmsgs = check_cmsgs(
request,
&sandbox,
SocketCall::SendMsg,
&msg,
&addr,
flags,
is_unix,
)?;
drop(sandbox);
let io_buffers = read_iov(request, &msg, is32)?;
let mut io_slices: Vec<IoSlice> = Vec::new();
io_slices
.try_reserve(io_buffers.len())
.or(Err(Errno::ENOMEM))?;
for buf in &io_buffers {
io_slices.push(IoSlice::new(buf));
}
let cmsg_slice = cmsgs.as_deref().unwrap_or(&[]);
let mut control_messages: Vec<Cmsg<'_>> = Vec::new();
control_messages
.try_reserve(cmsg_slice.len())
.or(Err(Errno::ENOMEM))?;
for cmsg in cmsg_slice {
control_messages.push(Cmsg::from(cmsg));
}
let unix_data = if is_unix {
let unix = addr.as_ref().and_then(|addr| addr.argaddr.as_unix_addr());
request.add_send(&fd, req.pid(), unix).ok()
} else {
None
};
let is_blocking = if !msgflags.contains(MsgFlags::MSG_DONTWAIT) && !get_nonblock(&fd)? {
request.cache.add_sys_block(req, has_recv_timeout(&fd)?)?;
true
} else {
false
};
let result = if let Some(ref addr) = addr {
sendmsg(
&fd,
&io_slices,
&control_messages,
msgflags,
Some(&addr.addr),
)
} else {
sendmsg::<_, SockaddrStorage>(&fd, &io_slices, &control_messages, msgflags, None)
};
if is_blocking {
request.cache.del_sys_block(req.id)?;
}
if result.is_err() {
if let Some((inode, dest)) = unix_data {
let _ = request.del_send(inode, dest);
}
}
#[expect(clippy::cast_possible_wrap)]
Ok(match result {
Ok(n) => request.return_syscall(n as i64),
Err(Errno::EPIPE) if !msgflags.contains(MsgFlags::MSG_NOSIGNAL) => {
request.pidfd_kill(SIGPIPE)?;
request.fail_syscall(Errno::EPIPE)
}
Err(errno) => request.fail_syscall(errno),
})
}
pub(crate) fn handle_sendmmsg(
fd: SafeOwnedFd,
request: &UNotifyEventRequest,
sock_dom: AddressFamily,
args: &[u64; 6],
flags: Flags,
options: Options,
) -> Result<ScmpNotifResp, Errno> {
let msgflags = to_msgflags(args[3]);
if !options.allow_unsafe_oob() && msgflags.contains(MsgFlags::MSG_OOB) {
return Err(Errno::EOPNOTSUPP);
}
let req = request.scmpreq;
let is32 = scmp_arch_is_compat32(req.data.arch);
if !is32 && msgflags.contains(MsgFlags::MSG_CMSG_COMPAT) {
return Err(Errno::EINVAL);
}
let addr = args[1];
#[expect(clippy::cast_possible_truncation)]
let vlen = args[2] as c_uint as usize;
if vlen == 0 {
return Ok(request.return_syscall(0));
}
let vlen = vlen.min(UIO_MAXIOV);
let mut mmsghdrs = read_mmsghdrs(request, addr, vlen, is32)?;
let (is_blocking, ignore_restart) =
if !msgflags.contains(MsgFlags::MSG_DONTWAIT) && !get_nonblock(&fd)? {
(true, has_recv_timeout(&fd)?)
} else {
(false, false)
};
let must_signal = !msgflags.contains(MsgFlags::MSG_NOSIGNAL);
let inode = fstatx(&fd, STATX_INO).map(|statx| statx.stx_ino)?;
let tgid = proc_tgid(req.pid())?;
let mut cmsgs: Vec<CheckedMsg> = Vec::new();
cmsgs.try_reserve(vlen).or(Err(Errno::ENOMEM))?;
for mmsg in &mmsghdrs {
let msg = check_msg(
request,
sock_dom,
(&mmsg.msg_hdr, msgflags),
(flags, options),
(inode, tgid),
)?;
cmsgs.push(msg);
}
let msg_count = cmsgs.len();
let mut msg_io_bufs: Vec<Vec<Vec<u8>>> = Vec::new();
let mut msg_cmsg_bufs: Vec<Vec<u8>> = Vec::new();
msg_io_bufs.try_reserve(msg_count).or(Err(Errno::ENOMEM))?;
msg_cmsg_bufs
.try_reserve(msg_count)
.or(Err(Errno::ENOMEM))?;
for (idx, mmsg) in mmsghdrs.iter().enumerate().take(msg_count) {
msg_io_bufs.push(read_iov(request, &mmsg.msg_hdr, is32)?);
msg_cmsg_bufs.push(pack_cmsgs(&cmsgs[idx].cmsgs)?);
}
let mut msg_io_slices: Vec<Vec<IoSlice<'_>>> = Vec::new();
msg_io_slices
.try_reserve(msg_count)
.or(Err(Errno::ENOMEM))?;
for io_bufs in &msg_io_bufs {
let mut slices = Vec::new();
slices.try_reserve(io_bufs.len()).or(Err(Errno::ENOMEM))?;
for buf in io_bufs {
slices.push(IoSlice::new(buf));
}
msg_io_slices.push(slices);
}
let mut mmsghdr_vec = build_mmsghdr_vec(&cmsgs, &msg_io_slices, &mut msg_cmsg_bufs)?;
if is_blocking {
request.cache.add_sys_block(req, ignore_restart)?;
}
let result = sendmmsg(&fd, &mut mmsghdr_vec[..msg_count], msgflags);
if is_blocking {
request.cache.del_sys_block(req.id)?;
}
let datagrams = match result {
Ok(sent) => sent,
Err(Errno::EPIPE) if must_signal => {
delete_senders(request, &cmsgs);
let _ = request.pidfd_kill(SIGPIPE);
return Err(Errno::EPIPE);
}
Err(errno) => {
delete_senders(request, &cmsgs);
return Err(errno);
}
};
delete_senders(request, &cmsgs[datagrams..]);
let msg_count = write_mmsghdrs(request, &mut mmsghdrs, &mmsghdr_vec, datagrams, addr, is32)?;
#[expect(clippy::cast_possible_wrap)]
Ok(request.return_syscall(msg_count as i64))
}
struct CheckedAddr {
root: Option<CanonicalPath>,
addr: SockaddrStorage,
argaddr: SockaddrStorage,
}
struct UnixSender {
ino: u64,
dst: Option<(u32, u32)>, }
struct CheckedMsg {
msgflags: MsgFlags,
addr: Option<CheckedAddr>,
cmsgs: Option<Vec<CmsgOwned>>,
sender: Option<UnixSender>,
}
#[expect(clippy::cognitive_complexity)]
fn check_addr(
request: &UNotifyEventRequest,
sandbox: &SandboxGuard<'_>,
subcall: SocketCall,
sock_dom: AddressFamily,
msg: &msghdr,
options: Options,
) -> Result<Option<CheckedAddr>, Errno> {
let addr_remote = msg.msg_name;
let addr_len: socklen_t = if addr_remote.is_null() {
0
} else if msg.msg_namelen < 0 {
return Err(Errno::EINVAL);
} else {
#[expect(clippy::cast_possible_truncation)]
#[expect(clippy::cast_sign_loss)]
(msg.msg_namelen as socklen_t).min(size_of::<libc::sockaddr_storage>() as socklen_t)
};
if addr_remote.is_null() || addr_len == 0 {
return Ok(None);
}
let any_addr = sandbox.flags.allow_unsafe_any_addr();
let local_net = sandbox.flags.force_local_net();
let argaddr = get_addr(request, subcall, sock_dom, addr_remote as u64, addr_len)?;
let (mut addr, root) = canon_addr(request, sandbox, &argaddr, Capability::CAP_NET_CONNECT)?;
match addr_family(&addr) {
PF_UNIX => {
sandbox_addr(
request,
sandbox,
subcall,
&addr,
&root,
Capability::CAP_NET_CONNECT,
)?;
}
PF_INET => {
if !any_addr {
make_loaddr(subcall, &mut addr, local_net)?;
}
sandbox_addr(
request,
sandbox,
subcall,
&addr,
&root,
Capability::CAP_NET_CONNECT,
)?;
}
PF_INET6 => {
if !any_addr {
make_lo6addr(subcall, &mut addr, local_net)?;
}
if !sandbox.flags.allow_unsafe_ipv6_scope() {
if let Some((scope_id, ip, port)) = clear_scope6(&mut addr) {
warn!("ctx": "net", "op": "zero_scope_id",
"sys": subcall.name(), "pid": request.scmpreq.pid().as_raw(),
"addr": format!("{ip}!{port}"), "scope_id": scope_id,
"msg": format!("zeroed sin6_scope_id={scope_id} on {ip}!{port}"),
"tip": "configure `trace/allow_unsafe_ipv6_scope:1'");
}
}
sandbox_addr(
request,
sandbox,
subcall,
&addr,
&root,
Capability::CAP_NET_CONNECT,
)?;
}
PF_ALG | PF_NETLINK => {}
_ if options.allow_unsupp_socket() => {}
_ => return Err(Errno::EAFNOSUPPORT),
}
Ok(Some(CheckedAddr {
addr,
root,
argaddr,
}))
}
#[expect(clippy::cognitive_complexity)]
fn check_cmsgs(
request: &UNotifyEventRequest,
sandbox: &crate::sandbox::SandboxGuard<'_>,
subcall: SocketCall,
msg: &msghdr,
checked_addr: &Option<CheckedAddr>,
flags: Flags,
is_unix: bool,
) -> Result<Option<Vec<CmsgOwned>>, Errno> {
#[expect(clippy::useless_conversion)]
if usize::try_from(msg.msg_iovlen).or(Err(Errno::EMSGSIZE))? > UIO_MAXIOV {
return Err(Errno::EMSGSIZE);
}
if msg.msg_controllen > c_int::MAX as size_t {
return Err(Errno::ENOBUFS);
}
if msg.msg_controllen == 0 {
return Ok(None);
}
if msg.msg_control.is_null() {
return Err(Errno::EFAULT);
}
#[expect(clippy::useless_conversion)]
let cmsg_len = usize::try_from(msg.msg_controllen)
.or(Err(Errno::EINVAL))?
.min(*MAX_RW_COUNT);
let cmsg_buf = request.read_vec_all(msg.msg_control as u64, cmsg_len)?;
let mut control_data = request.parse_cmsgs(&cmsg_buf)?;
if !sandbox.flags.allow_unsafe_ip_pktinfo() {
let cmsg_count_orig = control_data.len();
control_data.retain(|cmsg| {
!matches!(
cmsg,
CmsgOwned::Ipv4PacketInfo(_) | CmsgOwned::Ipv6PacketInfo(_)
)
});
if control_data.len() != cmsg_count_orig {
warn!("ctx": "net", "op": "strip_pktinfo",
"sys": subcall.name(), "pid": request.scmpreq.pid().as_raw(),
"msg": "stripped IP_PKTINFO and/or IPV6_PKTINFO control messages",
"tip": "configure `trace/allow_unsafe_ip_pktinfo:1'");
}
}
if !sandbox.flags.allow_unsafe_ip_retopts() {
let cmsg_count_orig = control_data.len();
control_data.retain(|cmsg| !matches!(cmsg, CmsgOwned::Ipv4ReturnOpts(_)));
if control_data.len() != cmsg_count_orig {
warn!("ctx": "net", "op": "strip_retopts",
"sys": subcall.name(), "pid": request.scmpreq.pid().as_raw(),
"msg": "stripped IP_RETOPTS control message",
"tip": "configure `trace/allow_unsafe_ip_retopts:1'");
}
}
if !sandbox.flags.allow_unsafe_ipv6_rthdr() {
let cmsg_count_orig = control_data.len();
control_data.retain(|cmsg| !matches!(cmsg, CmsgOwned::Ipv6RoutingHdr(_)));
if control_data.len() != cmsg_count_orig {
warn!("ctx": "net", "op": "strip_rthdr",
"sys": subcall.name(), "pid": request.scmpreq.pid().as_raw(),
"msg": "stripped IPV6_RTHDR control message",
"tip": "configure `trace/allow_unsafe_ipv6_rthdr:1'");
}
}
let has_fds = is_unix
&& control_data
.iter()
.any(|cmsg| matches!(cmsg, CmsgOwned::ScmRights(..)));
if has_fds {
if let Some(ref ca) = checked_addr {
sandbox_addr(
request,
sandbox,
subcall,
&ca.addr,
&ca.root,
Capability::CAP_NET_SENDFD,
)?;
} else {
sandbox_path(
Some(request),
sandbox,
request.scmpreq.pid(),
XPath::from_bytes(b"!unnamed"),
Capability::CAP_NET_SENDFD,
subcall.name(),
)?;
}
}
if is_unix {
let log_scmp = sandbox.log_scmp();
for cmsg in &control_data {
if let CmsgOwned::ScmRights(fds) = cmsg {
for fd in fds {
request.check_scm_rights(fd, flags, subcall, log_scmp)?;
}
}
}
}
Ok(Some(control_data))
}
fn read_iov(
request: &UNotifyEventRequest,
msg: &msghdr,
is32: bool,
) -> Result<Vec<Vec<u8>>, Errno> {
let mut io_buffers: Vec<Vec<u8>> = Vec::new();
#[expect(clippy::useless_conversion)]
let len = usize::try_from(msg.msg_iovlen).or(Err(Errno::EMSGSIZE))?;
if len > UIO_MAXIOV {
return Err(Errno::EMSGSIZE);
}
if len == 0 {
return Ok(io_buffers);
}
if msg.msg_iov.is_null() {
return Err(Errno::EFAULT);
}
let size = if is32 {
len.checked_mul(size_of::<iovec32>())
} else {
len.checked_mul(size_of::<iovec>())
}
.ok_or(Errno::EMSGSIZE)?;
let buf = request.read_vec_all_zeroed(msg.msg_iov as u64, size)?;
let mut iovecs: Vec<iovec> = Vec::new();
if is32 {
for chunk in buf.chunks(size_of::<iovec32>()) {
let iov32: iovec32 = try_from_bytes(chunk)?;
iovecs.push(iov32.into());
}
} else {
for chunk in buf.chunks(size_of::<iovec>()) {
iovecs.push(try_from_bytes(chunk)?);
}
}
let iov_cap: usize = if is32 {
i32::MAX as usize
} else {
isize::MAX as usize
};
let mut total_len: usize = 0;
for iov in iovecs {
if iov.iov_len > iov_cap {
return Err(Errno::EINVAL);
}
if iov.iov_base.is_null() && iov.iov_len > 0 {
return Err(Errno::EFAULT);
}
if iov.iov_len == 0 {
continue;
}
let rem_len = MAX_RW_COUNT
.checked_sub(total_len)
.ok_or(Errno::EOVERFLOW)?;
#[expect(clippy::unnecessary_cast)]
let iov_len = (iov.iov_len as usize).min(rem_len);
let data_buf = request.read_vec_all(iov.iov_base as u64, iov_len)?;
io_buffers.try_reserve(1).or(Err(Errno::ENOMEM))?;
io_buffers.push(data_buf);
total_len = total_len.checked_add(iov_len).ok_or(Errno::EOVERFLOW)?;
}
Ok(io_buffers)
}
fn pack_cmsgs(data: &Option<Vec<CmsgOwned>>) -> Result<Vec<u8>, Errno> {
let Some(ref data) = data else {
return Ok(Vec::new());
};
let mut cmsgs: Vec<Cmsg<'_>> = Vec::new();
cmsgs.try_reserve(data.len()).or(Err(Errno::ENOMEM))?;
for cmsg in data {
cmsgs.push(Cmsg::from(cmsg));
}
pack_cmsg_buf(&cmsgs)
}
fn read_mmsghdrs(
request: &UNotifyEventRequest,
addr: u64,
vlen: usize,
is32: bool,
) -> Result<Vec<mmsghdr>, Errno> {
let size = if is32 {
vlen.checked_mul(size_of::<mmsghdr32>())
} else {
vlen.checked_mul(size_of::<mmsghdr>())
}
.ok_or(Errno::EINVAL)?;
let buf = request.read_vec_all_zeroed(addr, size)?;
let mut mmsghdrs: Vec<mmsghdr> = Vec::new();
mmsghdrs.try_reserve(vlen).or(Err(Errno::ENOMEM))?;
if is32 {
for chunk in buf.chunks(size_of::<mmsghdr32>()) {
let m: mmsghdr32 = try_from_bytes(chunk)?;
mmsghdrs.push(m.into());
}
} else {
for chunk in buf.chunks(size_of::<mmsghdr>()) {
mmsghdrs.push(try_from_bytes(chunk)?);
}
}
Ok(mmsghdrs)
}
fn check_msg(
request: &UNotifyEventRequest,
sock_dom: AddressFamily,
msg_data: (&msghdr, MsgFlags),
opt_data: (Flags, Options),
ino_data: (u64, Pid),
) -> Result<CheckedMsg, Errno> {
let (msghdr, msgflags) = msg_data;
let (flags, options) = opt_data;
let (inode, tgid) = ino_data;
let is_unix = sock_dom == AddressFamily::Unix;
let sandbox = request.get_sandbox();
let addr = check_addr(
request,
&sandbox,
SocketCall::SendMmsg,
sock_dom,
msghdr,
options,
)?;
let cmsgs = check_cmsgs(
request,
&sandbox,
SocketCall::SendMmsg,
msghdr,
&addr,
flags,
is_unix,
)?;
drop(sandbox);
let sender = if is_unix {
let unix = addr.as_ref().and_then(|addr| addr.argaddr.as_unix_addr());
request
.add_send2(inode, tgid, unix)
.ok()
.map(|(ino, dst)| UnixSender { ino, dst })
} else {
None
};
Ok(CheckedMsg {
msgflags,
addr,
cmsgs,
sender,
})
}
fn write_mmsghdrs(
request: &UNotifyEventRequest,
mmsghdrs: &mut [mmsghdr],
mmsghdr_vec: &[MmsgHdr],
datagrams: usize,
addr: u64,
is32: bool,
) -> Result<usize, Errno> {
for i in 0..datagrams {
mmsghdrs[i].msg_len = mmsghdr_vec[i].msg_len();
}
let entry_size = if is32 {
size_of::<mmsghdr32>() as u64
} else {
size_of::<mmsghdr>() as u64
};
let mut msg_count: usize = 0;
for (idx, mmsghdr) in mmsghdrs.iter().enumerate().take(datagrams) {
match write_one_mmsghdr(request, mmsghdr, addr, entry_size, idx, is32) {
Ok(()) => msg_count = msg_count.checked_add(1).ok_or(Errno::EOVERFLOW)?,
Err(_) if msg_count > 0 => break,
Err(errno) => return Err(errno),
}
}
Ok(msg_count)
}
fn write_one_mmsghdr(
request: &UNotifyEventRequest,
mmsghdr: &mmsghdr,
addr: u64,
entry_size: u64,
idx: usize,
is32: bool,
) -> Result<(), Errno> {
let entry_offs = (idx as u64)
.checked_mul(entry_size)
.ok_or(Errno::EOVERFLOW)?;
let offset = addr.checked_add(entry_offs).ok_or(Errno::EOVERFLOW)?;
if is32 {
let m32: mmsghdr32 = (*mmsghdr).try_into()?;
let bytes: Zeroizing<[u8; size_of::<mmsghdr32>()]> = Zeroizing::new(m32.to_byte_array());
request.write_mem_all(&*bytes, offset)?;
} else {
let bytes: Zeroizing<[u8; size_of::<mmsghdr>()]> = Zeroizing::new(mmsghdr.to_byte_array());
request.write_mem_all(&*bytes, offset)?;
}
Ok(())
}
fn delete_senders(request: &UNotifyEventRequest, msgs: &[CheckedMsg]) {
for msg in msgs {
if let Some(ref sender) = msg.sender {
let _ = request.del_send(sender.ino, sender.dst);
}
}
}
fn build_mmsghdr_vec(
msgs: &[CheckedMsg],
io_slices: &[Vec<IoSlice<'_>>],
cmsg_bufs: &mut [Vec<u8>],
) -> Result<Vec<MmsgHdr>, Errno> {
let count = msgs.len();
let mut vec: Vec<MmsgHdr> = Vec::new();
vec.try_reserve(count).or(Err(Errno::ENOMEM))?;
for i in 0..count {
let mut mhdr = MsgHdr::default();
if let Some(ref addr) = msgs[i].addr {
mhdr.set_addr(&addr.addr);
}
mhdr.set_iov(&io_slices[i]);
mhdr.set_control(&mut cmsg_bufs[i]);
mhdr.set_flags(msgs[i].msgflags.bits());
let mut mmhdr = MmsgHdr::default();
mmhdr.set_msg_hdr(mhdr.into_inner());
vec.push(mmhdr);
}
Ok(vec)
}