syd 3.56.0

rock-solid application kernel
Documentation
// Syd: rock-solid application kernel
// src/kernel/net/accept.rs: accept(2) and accept4(2) handler
//
// Copyright (c) 2023, 2024, 2025, 2026 Ali Polatel <alip@chesswob.org>
//
// SPDX-License-Identifier: GPL-3.0

use std::{net::IpAddr, os::fd::AsFd};

use bitflags::bitflags;
use libseccomp::ScmpNotifResp;
use nix::{
    errno::Errno,
    sys::socket::{SockFlag, SockaddrLike, SockaddrStorage},
};

use crate::{
    cache::UnixVal,
    compat::{sockaddr_family, AddressFamily},
    cookie::{safe_accept4, safe_getsockdomain, safe_getsockproto, SizedSockaddrStorage},
    fd::{fd_inode, has_recv_timeout, peer_inode, SafeOwnedFd},
    ip::{IpProto, SocketCall},
    kernel::net::sandbox_addr,
    proc::proc_tgid,
    req::UNotifyEventRequest,
    sandbox::Capability,
    unix::unix_addr_len,
};

bitflags! {
    #[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
    struct Accept4Flags: u8 {
        const WANT_ADDR = 1 << 0; // Want source address?
        const INTERRUPT = 1 << 1; // Blocking call?
        const IGNORERES = 1 << 2; // Ignore restarts?
    }
}

#[expect(clippy::cognitive_complexity)]
pub(crate) fn handle_accept(
    request: &UNotifyEventRequest,
    fd: SafeOwnedFd,
    args: &[u64; 6],
    subcall: SocketCall,
    is_nonblock: bool,
) -> Result<ScmpNotifResp, Errno> {
    // Validate flags first.
    let mut flags = if subcall == SocketCall::Accept4 {
        // accept4: Linux kernel truncates upper bits.
        #[expect(clippy::cast_possible_truncation)]
        SockFlag::from_bits(args[3] as libc::c_int).ok_or(Errno::EINVAL)?
    } else {
        // accept
        SockFlag::empty()
    };

    let sandbox = request.get_sandbox();
    let force_cloexec = sandbox.flags.force_cloexec();
    let force_rand_fd = sandbox.flags.force_rand_fd();
    drop(sandbox); // release read lock.
    let cloexec = force_cloexec || flags.contains(SockFlag::SOCK_CLOEXEC);
    flags.insert(SockFlag::SOCK_CLOEXEC);

    // Check whether we should block and ignore restarts.
    let (is_blocking, ignore_restart) = if !is_nonblock {
        let ignore_restart = has_recv_timeout(&fd)?;
        (true, ignore_restart)
    } else {
        (false, false)
    };

    // Determine socket domain of the listener.
    let sock_dom = safe_getsockdomain(&fd)?;

    let mut aflags = Accept4Flags::empty();
    if args[1] != 0 || matches!(sock_dom, libc::AF_INET | libc::AF_INET6) {
        // Source address is only checked for access for IPv{4,6} sockets.
        aflags.insert(Accept4Flags::WANT_ADDR);
    }
    if is_blocking {
        aflags.insert(Accept4Flags::INTERRUPT);
    }
    if ignore_restart {
        aflags.insert(Accept4Flags::IGNORERES);
    }

    // Save listener's bound address before accept(2) for UNIX sockets.
    let listen_addr = if sock_dom == libc::AF_UNIX {
        fd_inode(&fd)
            .ok()
            .and_then(|ino| request.get_unix(ino))
            .and_then(|unix| unix.addr)
    } else {
        None
    };

    // Do the accept call.
    let (fd, maybe_addr) = do_accept4(fd, request, flags, aflags)?;

    // Determine address length if specified.
    //
    // Linux doesn't dereference addrlen if addr is NULL.
    // Linux validates address length after the connection is dequeued.
    let addrlen = if args[1] != 0 && args[2] != 0 {
        const SIZEOF_SOCKLEN_T: usize = size_of::<libc::socklen_t>();
        let mut buf = [0u8; SIZEOF_SOCKLEN_T];
        if request.read_mem(&mut buf, args[2], SIZEOF_SOCKLEN_T)? == SIZEOF_SOCKLEN_T {
            // libc defines socklen_t as u32.
            // Linux rejects negative length.
            let len = i32::from_ne_bytes(buf);
            let len = libc::socklen_t::try_from(len).or(Err(Errno::EINVAL))?;
            Some(len)
        } else {
            // Linux returns EFAULT for invalid address length pointer.
            return Err(Errno::EFAULT);
        }
    } else if args[1] != 0 {
        // Linux returns EFAULT when addr is non-NULL and addrlen is NULL.
        return Err(Errno::EFAULT);
    } else {
        None
    };

    // Check sandbox access for IPv{4,6} sockets.
    if let Some(SizedSockaddrStorage { addr, .. }) = maybe_addr.as_ref().filter(|addr| {
        matches!(
            sockaddr_family(&addr.addr),
            AddressFamily::Inet | AddressFamily::Inet6
        )
    }) {
        let sandbox = request.get_sandbox();

        // accept{,4} is IP blocklist only.
        // No protocol filtering is done.
        sandbox_addr(
            request,
            &sandbox,
            subcall,
            addr,
            &None, /*root*/
            Capability::empty(),
            None, /*proto*/
        )?;

        // Move domain on accept as necessary.
        if let Some((ip, port)) = addr
            .as_sockaddr_in()
            .map(|sin| (IpAddr::V4(sin.ip()), sin.port()))
            .or_else(|| {
                addr.as_sockaddr_in6()
                    .map(|sa6| (sa6.ip().to_canonical(), sa6.port()))
            })
        {
            let sock_proto = if sandbox.has_accept_proto_move() {
                safe_getsockproto(&fd).ok().and_then(IpProto::from_raw)
            } else {
                None
            };
            sandbox.move_on_accept(ip, port, sock_proto);
        }
    }

    // Get accepted socket inode for AF_UNIX handling.
    let ino = (sock_dom == libc::AF_UNIX)
        .then(|| fd_inode(&fd).ok())
        .flatten();

    // Write address buffer as necessary.
    let mut peer_addr = None;
    if let Some(addrlen) = addrlen {
        // addrlen.is_some() asserts maybe_addr.is_some().
        #[expect(clippy::disallowed_methods)]
        let SizedSockaddrStorage { addr, mut size } = maybe_addr.unwrap();

        // Change peer address as necessary for UNIX domain sockets.
        let addr = if let Some(ino) = ino {
            if addr
                .as_unix_addr()
                .map(|addr| addr.path().is_some())
                .unwrap_or(false)
            {
                if let Some(my_peer_addr) = request.peer_unix_addr(ino) {
                    peer_addr = Some(my_peer_addr);
                    size = unix_addr_len(&my_peer_addr);

                    // SAFETY:
                    // "my_peer_addr" is a valid UnixAddr from Unix map.
                    // Its as_ptr()/unix_addr_len() give a valid pointer/length pair.
                    unsafe { SockaddrStorage::from_raw(my_peer_addr.as_ptr().cast(), Some(size)) }
                        .unwrap_or(addr)
                } else {
                    addr
                }
            } else {
                addr
            }
        } else {
            addr
        };

        // Linux writes address length before address.
        //
        // Write back full address length.
        // This must be socklen_t and _not_ usize!
        let buf = (size as libc::socklen_t).to_ne_bytes();
        request.write_mem_all(&buf, args[2])?;

        // Create a byte slice from the socket address.
        // SAFETY: SockaddrStorage is initialized; as_ptr() and len() return valid bounds.
        let buf =
            unsafe { std::slice::from_raw_parts(addr.as_ptr().cast::<u8>(), addr.len() as usize) };

        // Write the truncated socket address into memory.
        // We truncate late to avoid potential UB in std::slice::slice_from_raw_parts().
        let len = size.min(addrlen) as usize;
        request.write_mem_all(&buf[..len], args[1])?;
    }

    // Record information for accepted UNIX socket.
    if let Some(ino) = ino {
        let peer_unix = peer_inode(ino).ok().and_then(|peer| request.get_unix(peer));
        let peer_addr = peer_addr.or_else(|| peer_unix.as_ref().and_then(|unix| unix.addr));
        let peer_pid = peer_unix.and_then(|unix| unix.self_pid);

        if listen_addr.is_some() || peer_addr.is_some() || peer_pid.is_some() {
            if let Ok(pid) = proc_tgid(request.scmpreq.pid()) {
                let _ = request.add_unix2(
                    ino,
                    pid,
                    UnixVal {
                        peer_pid,
                        addr: listen_addr,
                        peer: peer_addr,
                        ..UnixVal::default()
                    },
                );
            }
        }
    }

    // Send the fd and return.
    request.send_fd(fd, cloexec, force_rand_fd)
}

fn do_accept4<Fd: AsFd>(
    fd: Fd,
    request: &UNotifyEventRequest,
    flags: SockFlag,
    aflags: Accept4Flags,
) -> Result<(SafeOwnedFd, Option<SizedSockaddrStorage>), Errno> {
    // Record blocking call so it can get invalidated.
    if aflags.contains(Accept4Flags::INTERRUPT) {
        request
            .cache
            .add_sys_block(request.scmpreq, aflags.contains(Accept4Flags::IGNORERES))?;
    };

    // Make the accept4(2) call.
    let result = safe_accept4(&fd, flags, aflags.contains(Accept4Flags::WANT_ADDR));

    // Remove invalidation record.
    if aflags.contains(Accept4Flags::INTERRUPT) {
        request.cache.del_sys_block(request.scmpreq.id)?;
    }

    result
}