syd 3.55.0

rock-solid application kernel
Documentation
// Syd: rock-solid application kernel
// src/kernel/net/accept.rs: accept(2) and accept4(2) handler
//
// Copyright (c) 2023, 2024, 2025, 2026 Ali Polatel <alip@chesswob.org>
//
// SPDX-License-Identifier: GPL-3.0

use std::{net::IpAddr, os::fd::AsFd};

use bitflags::bitflags;
use libseccomp::ScmpNotifResp;
use nix::{
    errno::Errno,
    sys::socket::{SockFlag, SockaddrLike},
};

use crate::{
    cache::UnixVal,
    compat::{sockaddr_family, AddressFamily},
    cookie::{safe_accept4, safe_getsockdomain, SizedSockaddrStorage},
    fd::{fd_inode, has_recv_timeout, peer_inode, SafeOwnedFd},
    ip::SocketCall,
    kernel::net::sandbox_addr,
    proc::proc_tgid,
    req::UNotifyEventRequest,
    sandbox::Capability,
    unix::unix_addr_len,
};

bitflags! {
    #[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
    struct Accept4Flags: u8 {
        const WANT_ADDR = 1 << 0; // Want source address?
        const INTERRUPT = 1 << 1; // Blocking call?
        const IGNORERES = 1 << 2; // Ignore restarts?
    }
}

pub(crate) fn handle_accept(
    fd: SafeOwnedFd,
    request: &UNotifyEventRequest,
    args: &[u64; 6],
    subcall: SocketCall,
    is_nonblock: bool,
) -> Result<ScmpNotifResp, Errno> {
    // Validate flags first.
    let mut flags = if subcall == SocketCall::Accept4 {
        // accept4: Linux kernel truncates upper bits.
        #[expect(clippy::cast_possible_truncation)]
        SockFlag::from_bits(args[3] as libc::c_int).ok_or(Errno::EINVAL)?
    } else {
        // accept
        SockFlag::empty()
    };

    let sandbox = request.get_sandbox();
    let force_cloexec = sandbox.flags.force_cloexec();
    let force_rand_fd = sandbox.flags.force_rand_fd();
    drop(sandbox); // release read lock.
    let cloexec = force_cloexec || flags.contains(SockFlag::SOCK_CLOEXEC);
    flags.insert(SockFlag::SOCK_CLOEXEC);

    // Check whether we should block and ignore restarts.
    let (is_blocking, ignore_restart) = if !is_nonblock {
        let ignore_restart = has_recv_timeout(&fd)?;
        (true, ignore_restart)
    } else {
        (false, false)
    };

    let mut aflags = Accept4Flags::empty();
    if args[1] != 0 || matches!(safe_getsockdomain(&fd)?, libc::AF_INET | libc::AF_INET6) {
        // Source address is only checked for access for IPv{4,6} sockets.
        aflags.insert(Accept4Flags::WANT_ADDR);
    }
    if is_blocking {
        aflags.insert(Accept4Flags::INTERRUPT);
    }
    if ignore_restart {
        aflags.insert(Accept4Flags::IGNORERES);
    }

    // Do the accept call.
    let (fd, maybe_addr) = do_accept4(fd, request, flags, aflags)?;

    // Determine address length if specified.
    //
    // Linux doesn't dereference addrlen if addr is NULL.
    // Linux validates address length after the connection is dequeued.
    let addrlen = if args[1] != 0 && args[2] != 0 {
        const SIZEOF_SOCKLEN_T: usize = size_of::<libc::socklen_t>();
        let mut buf = [0u8; SIZEOF_SOCKLEN_T];
        if request.read_mem(&mut buf, args[2], SIZEOF_SOCKLEN_T)? == SIZEOF_SOCKLEN_T {
            // libc defines socklen_t as u32.
            // Linux rejects negative length.
            let len = i32::from_ne_bytes(buf);
            let len = libc::socklen_t::try_from(len).or(Err(Errno::EINVAL))?;
            Some(len)
        } else {
            // Linux returns EFAULT for invalid address length pointer.
            return Err(Errno::EFAULT);
        }
    } else if args[1] != 0 {
        // Linux returns EFAULT when addr is non-NULL and addrlen is NULL.
        return Err(Errno::EFAULT);
    } else {
        None
    };

    // Check sandbox access for IPv{4,6} sockets.
    if let Some(SizedSockaddrStorage { addr, .. }) = maybe_addr.as_ref().filter(|addr| {
        matches!(
            sockaddr_family(&addr.addr),
            AddressFamily::Inet | AddressFamily::Inet6
        )
    }) {
        let sandbox = request.get_sandbox();
        sandbox_addr(
            request,
            &sandbox,
            subcall,
            addr,
            &None, /*root*/
            Capability::empty(),
        )?;

        // Move domain on accept as necessary.
        if let Some((ip, port)) = addr
            .as_sockaddr_in()
            .map(|sin| (IpAddr::V4(sin.ip()), sin.port()))
            .or_else(|| {
                addr.as_sockaddr_in6()
                    .map(|sa6| (sa6.ip().to_canonical(), sa6.port()))
            })
        {
            sandbox.move_on_accept(ip, port);
        }
    }

    // Get accepted socket inode for AF_UNIX handling.
    let ino = matches!(safe_getsockdomain(&fd), Ok(libc::AF_UNIX))
        .then(|| fd_inode(&fd).ok())
        .flatten();

    // Record peer PID for SO_PEERCRED.
    if let Some(ino) = ino {
        if let Ok(peer) = peer_inode(ino) {
            let cpid = request.get_unix(peer).and_then(|u| u.self_pid);
            if let (Some(cpid), Ok(spid)) = (cpid, proc_tgid(request.scmpreq.pid())) {
                let _ = request.set_unix_peer(ino, spid, cpid);
            }
        }
    }

    // Write address buffer as necessary.
    if let Some(addrlen) = addrlen {
        // addrlen.is_some() asserts maybe_addr.is_some().
        #[expect(clippy::disallowed_methods)]
        let SizedSockaddrStorage { addr, mut size } = maybe_addr.unwrap();

        // Change peer address as necessary for UNIX domain sockets.
        let addr = if let Some(ino) = ino {
            if addr
                .as_unix_addr()
                .map(|addr| addr.path().is_some())
                .unwrap_or(false)
            {
                if let Ok(addr) = request.resolve_unix_peer(&addr, ino) {
                    size = addr.as_unix_addr().map_or(size, unix_addr_len);

                    let _ = request.add_unix(
                        &fd,
                        request.scmpreq.pid(),
                        UnixVal {
                            peer: addr.as_unix_addr().copied(),
                            ..UnixVal::default()
                        },
                    );

                    addr
                } else {
                    addr
                }
            } else {
                addr
            }
        } else {
            addr
        };

        // Linux writes address length before address.
        //
        // Write back full address length.
        // This must be socklen_t and _not_ usize!
        let buf = (size as libc::socklen_t).to_ne_bytes();
        request.write_mem_all(&buf, args[2])?;

        // Create a byte slice from the socket address.
        // SAFETY: SockaddrStorage is initialized; as_ptr() and len() return valid bounds.
        let buf =
            unsafe { std::slice::from_raw_parts(addr.as_ptr().cast::<u8>(), addr.len() as usize) };

        // Write the truncated socket address into memory.
        // We truncate late to avoid potential UB in std::slice::slice_from_raw_parts().
        let len = size.min(addrlen) as usize;
        request.write_mem_all(&buf[..len], args[1])?;
    }

    // Send the fd and return.
    request.send_fd(fd, cloexec, force_rand_fd)
}

fn do_accept4<Fd: AsFd>(
    fd: Fd,
    request: &UNotifyEventRequest,
    flags: SockFlag,
    aflags: Accept4Flags,
) -> Result<(SafeOwnedFd, Option<SizedSockaddrStorage>), Errno> {
    // Record blocking call so it can get invalidated.
    if aflags.contains(Accept4Flags::INTERRUPT) {
        request
            .cache
            .add_sys_block(request.scmpreq, aflags.contains(Accept4Flags::IGNORERES))?;
    };

    // Make the accept4(2) call.
    let result = safe_accept4(&fd, flags, aflags.contains(Accept4Flags::WANT_ADDR));

    // Remove invalidation record.
    if aflags.contains(Accept4Flags::INTERRUPT) {
        request.cache.del_sys_block(request.scmpreq.id)?;
    }

    result
}