use std::collections::HashSet;
use std::io;
use std::net::{IpAddr, Ipv4Addr, Ipv6Addr};
use std::os::unix::io::{AsRawFd, RawFd};
use std::sync::Arc;
use crate::seccomp::ctx::SupervisorCtx;
use crate::seccomp::notif::{read_child_mem, NotifAction};
use crate::sys::structs::{SeccompNotif, AF_INET, AF_INET6, ECONNREFUSED};
const MAX_SEND_BUF: usize = 64 << 20;
fn parse_ip_from_sockaddr(bytes: &[u8]) -> Option<IpAddr> {
if bytes.len() < 2 {
return None;
}
let family = u16::from_ne_bytes([bytes[0], bytes[1]]) as u32;
match family {
f if f == AF_INET => {
if bytes.len() < 8 {
return None;
}
Some(IpAddr::V4(Ipv4Addr::new(
bytes[4], bytes[5], bytes[6], bytes[7],
)))
}
f if f == AF_INET6 => {
if bytes.len() < 24 {
return None;
}
let mut addr_bytes = [0u8; 16];
addr_bytes.copy_from_slice(&bytes[8..24]);
Some(IpAddr::V6(Ipv6Addr::from(addr_bytes)))
}
_ => None,
}
}
fn parse_port_from_sockaddr(bytes: &[u8]) -> Option<u16> {
if bytes.len() < 4 {
return None;
}
let family = u16::from_ne_bytes([bytes[0], bytes[1]]) as u32;
match family {
f if f == AF_INET || f == AF_INET6 => {
Some(u16::from_be_bytes([bytes[2], bytes[3]]))
}
_ => None,
}
}
async fn connect_on_behalf(
notif: &SeccompNotif,
ctx: &Arc<SupervisorCtx>,
notif_fd: RawFd,
) -> NotifAction {
let args = ¬if.data.args;
let sockfd = args[0] as i32;
let addr_ptr = args[1];
let addr_len = args[2] as u32;
let addr_bytes =
match read_child_mem(notif_fd, notif.id, notif.pid, addr_ptr, addr_len as usize) {
Ok(b) => b,
Err(_) => return NotifAction::Errno(libc::EIO),
};
if let Some(ip) = parse_ip_from_sockaddr(&addr_bytes) {
let ns = ctx.network.lock().await;
let live_policy = {
let pfs = ctx.policy_fn.lock().await;
pfs.live_policy.clone()
};
if let crate::seccomp::notif::NetworkPolicy::AllowList(ref allowed) =
ns.effective_network_policy(notif.pid, live_policy.as_ref())
{
if !allowed.contains(&ip) {
return NotifAction::Errno(ECONNREFUSED);
}
}
let dest_port = parse_port_from_sockaddr(&addr_bytes);
let http_acl_addr = ns.http_acl_addr;
let http_acl_intercept = dest_port.map_or(false, |p| ns.http_acl_ports.contains(&p));
let http_acl_orig_dest = ns.http_acl_orig_dest.clone();
drop(ns);
let mut redirected = false;
let is_ipv6 = parse_ip_from_sockaddr(&addr_bytes)
.map_or(false, |ip| ip.is_ipv6());
let (connect_addr, connect_len) = if let Some(proxy_addr) = http_acl_addr {
if http_acl_intercept {
redirected = true;
if is_ipv6 {
let mut sa6: libc::sockaddr_in6 = unsafe { std::mem::zeroed() };
sa6.sin6_family = libc::AF_INET6 as u16;
sa6.sin6_port = proxy_addr.port().to_be();
let mapped = std::net::Ipv6Addr::from(
match proxy_addr {
std::net::SocketAddr::V4(v4) => v4.ip().to_ipv6_mapped(),
std::net::SocketAddr::V6(v6) => *v6.ip(),
}
);
sa6.sin6_addr.s6_addr = mapped.octets();
let bytes = unsafe {
std::slice::from_raw_parts(
&sa6 as *const _ as *const u8,
std::mem::size_of::<libc::sockaddr_in6>(),
)
}
.to_vec();
(bytes, std::mem::size_of::<libc::sockaddr_in6>() as u32)
} else {
let mut sa: libc::sockaddr_in = unsafe { std::mem::zeroed() };
sa.sin_family = libc::AF_INET as u16;
sa.sin_port = proxy_addr.port().to_be();
match proxy_addr {
std::net::SocketAddr::V4(v4) => {
sa.sin_addr.s_addr = u32::from_ne_bytes(v4.ip().octets());
}
std::net::SocketAddr::V6(_) => {
return NotifAction::Errno(libc::EAFNOSUPPORT);
}
}
let bytes = unsafe {
std::slice::from_raw_parts(
&sa as *const _ as *const u8,
std::mem::size_of::<libc::sockaddr_in>(),
)
}
.to_vec();
(bytes, std::mem::size_of::<libc::sockaddr_in>() as u32)
}
} else {
(addr_bytes.clone(), addr_len)
}
} else {
(addr_bytes.clone(), addr_len)
};
let dup_fd = match crate::seccomp::notif::dup_fd_from_pid(notif.pid, sockfd) {
Ok(fd) => fd,
Err(_) => return NotifAction::Errno(libc::ENOSYS),
};
if redirected {
if let Some(ref orig_dest_map) = http_acl_orig_dest {
if let Some(orig_ip) = parse_ip_from_sockaddr(&addr_bytes) {
if is_ipv6 {
let mut bind_sa6: libc::sockaddr_in6 = unsafe { std::mem::zeroed() };
bind_sa6.sin6_family = libc::AF_INET6 as u16;
unsafe {
libc::bind(
dup_fd.as_raw_fd(),
&bind_sa6 as *const _ as *const libc::sockaddr,
std::mem::size_of::<libc::sockaddr_in6>() as libc::socklen_t,
);
}
let mut local_sa6: libc::sockaddr_in6 = unsafe { std::mem::zeroed() };
let mut local_len: libc::socklen_t =
std::mem::size_of::<libc::sockaddr_in6>() as libc::socklen_t;
let gs_ret = unsafe {
libc::getsockname(
dup_fd.as_raw_fd(),
&mut local_sa6 as *mut _ as *mut libc::sockaddr,
&mut local_len,
)
};
if gs_ret == 0 {
let local_port = u16::from_be(local_sa6.sin6_port);
let local_ip = Ipv6Addr::from(local_sa6.sin6_addr.s6_addr);
let local_addr = std::net::SocketAddr::V6(
std::net::SocketAddrV6::new(local_ip, local_port, 0, 0),
);
if let Ok(mut map) = orig_dest_map.write() {
map.insert(local_addr, orig_ip);
}
}
} else {
let mut bind_sa: libc::sockaddr_in = unsafe { std::mem::zeroed() };
bind_sa.sin_family = libc::AF_INET as u16;
unsafe {
libc::bind(
dup_fd.as_raw_fd(),
&bind_sa as *const _ as *const libc::sockaddr,
std::mem::size_of::<libc::sockaddr_in>() as libc::socklen_t,
);
}
let mut local_sa: libc::sockaddr_in = unsafe { std::mem::zeroed() };
let mut local_len: libc::socklen_t =
std::mem::size_of::<libc::sockaddr_in>() as libc::socklen_t;
let gs_ret = unsafe {
libc::getsockname(
dup_fd.as_raw_fd(),
&mut local_sa as *mut _ as *mut libc::sockaddr,
&mut local_len,
)
};
if gs_ret == 0 {
let local_port = u16::from_be(local_sa.sin_port);
let local_ip = Ipv4Addr::from(u32::from_be(local_sa.sin_addr.s_addr));
let local_addr = std::net::SocketAddr::V4(
std::net::SocketAddrV4::new(local_ip, local_port),
);
if let Ok(mut map) = orig_dest_map.write() {
map.insert(local_addr, orig_ip);
}
}
}
}
}
}
let ret = unsafe {
libc::connect(
dup_fd.as_raw_fd(),
connect_addr.as_ptr() as *const libc::sockaddr,
connect_len as libc::socklen_t,
)
};
if ret == 0 {
NotifAction::ReturnValue(0)
} else {
let errno = unsafe { *libc::__errno_location() };
NotifAction::Errno(errno)
}
} else {
NotifAction::Continue
}
}
async fn sendto_on_behalf(
notif: &SeccompNotif,
ctx: &Arc<SupervisorCtx>,
notif_fd: RawFd,
) -> NotifAction {
let args = ¬if.data.args;
let sockfd = args[0] as i32;
let buf_ptr = args[1];
let buf_len = args[2] as usize;
if buf_len > MAX_SEND_BUF {
return NotifAction::Errno(libc::EMSGSIZE);
}
let flags = args[3] as i32;
let addr_ptr = args[4];
let addr_len = args[5] as u32;
if addr_ptr == 0 {
return NotifAction::Continue; }
let addr_bytes =
match read_child_mem(notif_fd, notif.id, notif.pid, addr_ptr, addr_len as usize) {
Ok(b) => b,
Err(_) => return NotifAction::Errno(libc::EIO),
};
if let Some(ip) = parse_ip_from_sockaddr(&addr_bytes) {
let ns = ctx.network.lock().await;
let live_policy = {
let pfs = ctx.policy_fn.lock().await;
pfs.live_policy.clone()
};
if let crate::seccomp::notif::NetworkPolicy::AllowList(ref allowed) =
ns.effective_network_policy(notif.pid, live_policy.as_ref())
{
if !allowed.contains(&ip) {
return NotifAction::Errno(ECONNREFUSED);
}
}
drop(ns);
let data = match read_child_mem(notif_fd, notif.id, notif.pid, buf_ptr, buf_len) {
Ok(b) => b,
Err(_) => return NotifAction::Errno(libc::EIO),
};
let dup_fd = match crate::seccomp::notif::dup_fd_from_pid(notif.pid, sockfd) {
Ok(fd) => fd,
Err(_) => return NotifAction::Errno(libc::ENOSYS),
};
let ret = unsafe {
libc::sendto(
dup_fd.as_raw_fd(),
data.as_ptr() as *const libc::c_void,
data.len(),
flags,
addr_bytes.as_ptr() as *const libc::sockaddr,
addr_len as libc::socklen_t,
)
};
if ret >= 0 {
NotifAction::ReturnValue(ret as i64)
} else {
let errno = unsafe { *libc::__errno_location() };
NotifAction::Errno(errno)
}
} else {
NotifAction::Continue
}
}
async fn sendmsg_on_behalf(
notif: &SeccompNotif,
ctx: &Arc<SupervisorCtx>,
notif_fd: RawFd,
) -> NotifAction {
let args = ¬if.data.args;
let sockfd = args[0] as i32;
let msghdr_ptr = args[1];
let flags = args[2] as i32;
let msghdr_bytes = match read_child_mem(notif_fd, notif.id, notif.pid, msghdr_ptr, 56) {
Ok(b) if b.len() >= 56 => b,
_ => return NotifAction::Errno(libc::EFAULT),
};
let msg_name_ptr = u64::from_ne_bytes(msghdr_bytes[0..8].try_into().unwrap());
let msg_namelen = u32::from_ne_bytes(msghdr_bytes[8..12].try_into().unwrap());
let msg_iov_ptr = u64::from_ne_bytes(msghdr_bytes[16..24].try_into().unwrap());
let msg_iovlen = u64::from_ne_bytes(msghdr_bytes[24..32].try_into().unwrap());
let msg_control_ptr = u64::from_ne_bytes(msghdr_bytes[32..40].try_into().unwrap());
let msg_controllen = u64::from_ne_bytes(msghdr_bytes[40..48].try_into().unwrap());
if msg_name_ptr == 0 {
return NotifAction::Continue; }
let addr_bytes = match read_child_mem(
notif_fd, notif.id, notif.pid, msg_name_ptr, msg_namelen as usize,
) {
Ok(b) => b,
Err(_) => return NotifAction::Errno(libc::EIO),
};
let ip = match parse_ip_from_sockaddr(&addr_bytes) {
Some(ip) => ip,
None => return NotifAction::Continue, };
let ns = ctx.network.lock().await;
let live_policy = {
let pfs = ctx.policy_fn.lock().await;
pfs.live_policy.clone()
};
if let crate::seccomp::notif::NetworkPolicy::AllowList(ref allowed) =
ns.effective_network_policy(notif.pid, live_policy.as_ref())
{
if !allowed.contains(&ip) {
return NotifAction::Errno(ECONNREFUSED);
}
}
drop(ns);
let iovlen = (msg_iovlen as usize).min(1024);
let iov_size = iovlen * 16; let iov_bytes = match read_child_mem(notif_fd, notif.id, notif.pid, msg_iov_ptr, iov_size) {
Ok(b) => b,
Err(_) => return NotifAction::Errno(libc::EIO),
};
let mut data_bufs: Vec<Vec<u8>> = Vec::with_capacity(iovlen);
let mut local_iovs: Vec<libc::iovec> = Vec::with_capacity(iovlen);
for i in 0..iovlen {
let off = i * 16;
if off + 16 > iov_bytes.len() { break; }
let iov_base = u64::from_ne_bytes(iov_bytes[off..off + 8].try_into().unwrap());
let iov_len = u64::from_ne_bytes(iov_bytes[off + 8..off + 16].try_into().unwrap()) as usize;
if iov_len > MAX_SEND_BUF {
return NotifAction::Errno(libc::EMSGSIZE);
}
if iov_base == 0 || iov_len == 0 {
data_bufs.push(Vec::new());
continue;
}
let buf = match read_child_mem(notif_fd, notif.id, notif.pid, iov_base, iov_len) {
Ok(b) => b,
Err(_) => return NotifAction::Errno(libc::EIO),
};
data_bufs.push(buf);
}
for buf in &data_bufs {
local_iovs.push(libc::iovec {
iov_base: buf.as_ptr() as *mut libc::c_void,
iov_len: buf.len(),
});
}
let control_buf = if msg_control_ptr != 0 && msg_controllen > 0 {
let len = (msg_controllen as usize).min(4096);
read_child_mem(notif_fd, notif.id, notif.pid, msg_control_ptr, len).ok()
} else {
None
};
let dup_fd = match crate::seccomp::notif::dup_fd_from_pid(notif.pid, sockfd) {
Ok(fd) => fd,
Err(_) => return NotifAction::Errno(libc::ENOSYS),
};
let mut msg: libc::msghdr = unsafe { std::mem::zeroed() };
msg.msg_name = addr_bytes.as_ptr() as *mut libc::c_void;
msg.msg_namelen = addr_bytes.len() as u32;
msg.msg_iov = local_iovs.as_mut_ptr();
msg.msg_iovlen = local_iovs.len();
if let Some(ref ctrl) = control_buf {
msg.msg_control = ctrl.as_ptr() as *mut libc::c_void;
msg.msg_controllen = ctrl.len();
}
let ret = unsafe { libc::sendmsg(dup_fd.as_raw_fd(), &msg, flags) };
if ret >= 0 {
NotifAction::ReturnValue(ret as i64)
} else {
let errno = unsafe { *libc::__errno_location() };
NotifAction::Errno(errno)
}
}
pub(crate) async fn handle_net(
notif: &SeccompNotif,
ctx: &Arc<SupervisorCtx>,
notif_fd: RawFd,
) -> NotifAction {
let nr = notif.data.nr as i64;
if nr == libc::SYS_connect {
connect_on_behalf(notif, ctx, notif_fd).await
} else if nr == libc::SYS_sendto {
sendto_on_behalf(notif, ctx, notif_fd).await
} else if nr == libc::SYS_sendmsg {
sendmsg_on_behalf(notif, ctx, notif_fd).await
} else {
NotifAction::Continue
}
}
pub struct ResolvedHosts {
pub ips: HashSet<IpAddr>,
pub etc_hosts: String,
}
pub async fn resolve_hosts(hosts: &[String]) -> io::Result<ResolvedHosts> {
let mut ips = HashSet::new();
ips.insert(IpAddr::V4(Ipv4Addr::LOCALHOST));
ips.insert(IpAddr::V6(Ipv6Addr::LOCALHOST));
let mut etc_hosts = String::from("127.0.0.1 localhost\n::1 localhost\n");
for host in hosts {
let addr = format!("{}:0", host);
let result = tokio::net::lookup_host(addr.as_str()).await;
match result {
Ok(resolved) => {
for socket_addr in resolved {
let ip = socket_addr.ip();
ips.insert(ip);
etc_hosts.push_str(&format!("{} {}\n", ip, host));
}
}
Err(e) => {
return Err(io::Error::new(
e.kind(),
format!("failed to resolve host '{}': {}", host, e),
));
}
}
}
Ok(ResolvedHosts { ips, etc_hosts })
}
#[cfg(test)]
mod tests {
use super::*;
#[tokio::test]
async fn test_resolve_hosts_loopback() {
let resolved = resolve_hosts(&[]).await.unwrap();
assert!(resolved.ips.contains(&IpAddr::V4(Ipv4Addr::LOCALHOST)));
assert!(resolved.ips.contains(&IpAddr::V6(Ipv6Addr::LOCALHOST)));
}
#[tokio::test]
async fn test_resolve_hosts_with_domain() {
let hosts = vec!["localhost".to_string()];
let resolved = resolve_hosts(&hosts).await.unwrap();
assert!(
resolved.ips.contains(&IpAddr::V4(Ipv4Addr::LOCALHOST))
|| resolved.ips.contains(&IpAddr::V6(Ipv6Addr::LOCALHOST))
);
assert!(resolved.etc_hosts.contains("localhost"));
}
}