syd 3.52.0

rock-solid application kernel
Documentation
//
// Syd: rock-solid application kernel
// src/kernel/net/bind.rs: bind(2) handler
//
// Copyright (c) 2023, 2024, 2025, 2026 Ali Polatel <alip@chesswob.org>
//
// SPDX-License-Identifier: GPL-3.0

use std::{net::IpAddr, os::fd::AsRawFd};

use ipnet::IpNet;
use libseccomp::ScmpNotifResp;
use nix::{
    errno::Errno,
    sys::socket::{getsockname, SockaddrLike, SockaddrStorage},
};

use crate::{
    cache::UnixVal,
    compat::{sockaddr_family, AddressFamily},
    cookie::{safe_bind, safe_fchdir, safe_umask},
    fd::SafeOwnedFd,
    info,
    lookup::CanonicalPath,
    path::XPathBuf,
    proc::proc_umask,
    req::UNotifyEventRequest,
    sandbox::{Action, AddressPattern, Capability, CidrRule},
    wildmatch::MatchMethod,
};

pub(crate) fn handle_bind(
    fd: SafeOwnedFd,
    addr: (SockaddrStorage, SockaddrStorage),
    root: Option<CanonicalPath>,
    request: &UNotifyEventRequest,
    allow_safe_bind: bool,
) -> Result<ScmpNotifResp, Errno> {
    let req = request.scmpreq;
    let (addr, argaddr) = addr;

    // Prepare environment for UNIX domain sockets.
    if addr.as_unix_addr().and_then(|addr| addr.path()).is_some() {
        let mask = proc_umask(req.pid())?;

        // Honour directory for too long sockets.
        // Current working directory is per-thread here.
        // We cannot resolve symlinks in root or we risk TOCTOU!
        #[expect(clippy::disallowed_methods)]
        let dirfd = root.as_ref().unwrap().dir();
        safe_fchdir(dirfd)?;

        // Honour process' umask:
        // Umask is per-thread here.
        safe_umask(mask);
    }

    // Record blocking call so it can get invalidated.
    request.cache.add_sys_block(req, false)?;

    // All done, call underlying system call.
    // bind(2) doesn't follow symlinks in basename.
    let result = safe_bind(&fd, &addr);

    // Remove invalidation record.
    request.cache.del_sys_block(req.id)?;

    // Check errors after critical section.
    result?;

    // Handle trace/allow_safe_bind and bind_map.
    // Ignore errors as bind has already succeeded.
    //
    // Configure sandbox:
    // Remove and re-add the address so repeated binds to the same
    // address cannot overflow the vector.
    #[expect(clippy::cognitive_complexity)]
    let _result = (|fd: SafeOwnedFd, request: &UNotifyEventRequest| -> Result<(), Errno> {
        let (addr, port) = match sockaddr_family(&addr) {
            AddressFamily::Unix => {
                let addr = addr.as_unix_addr().ok_or(Errno::EINVAL)?;
                let unix = match (addr.path(), addr.as_abstract()) {
                    (Some(_), _) => {
                        // Case 1: UNIX domain socket
                        //
                        // addr.path()=Some asserts root is Some.
                        #[expect(clippy::disallowed_methods)]
                        let unix = root.unwrap().take();

                        // Handle bind_map after successful bind(2) for UNIX sockets.
                        // We ignore errors because there's nothing we can do about them.
                        // We use original address structure for path for getsockname(2).
                        let _ = request.add_unix(
                            &fd,
                            request.scmpreq.pid(),
                            UnixVal {
                                addr: argaddr.as_unix_addr().copied(),
                                ..UnixVal::default()
                            },
                        );
                        drop(fd); // Close our copy of the socket.

                        if !allow_safe_bind {
                            return Ok(());
                        }

                        unix
                    }
                    (_, Some(path)) => {
                        // Case 2: UNIX abstract socket
                        //
                        // Prefix UNIX abstract sockets with `@' before access check.
                        // Abstract socket names may contain embedded NUL bytes.
                        let mut unix = XPathBuf::from("@");
                        unix.append_bytes(path);

                        // Handle bind_map after successful bind for UNIX sockets.
                        // We ignore errors because there's nothing we can do
                        // about them.
                        // BindMap is only used for SO_PEERCRED for UNIX abstract sockets.
                        let _ = request.add_unix(&fd, request.scmpreq.pid(), UnixVal::default());
                        drop(fd); // Close our copy of the socket.

                        if !allow_safe_bind {
                            return Ok(());
                        }

                        unix
                    }
                    _ => {
                        // Case 3: unnamed UNIX socket.
                        let unix = if addr.len() as usize == size_of::<libc::sa_family_t>() {
                            // Autobind on abstract UNIX socket.
                            getsockname::<SockaddrStorage>(fd.as_raw_fd())?
                                .as_unix_addr()
                                .ok_or(Errno::EINVAL)?
                                .as_abstract()
                                .map(|path| {
                                    // Prefix UNIX abstract sockets with `@' before access check.
                                    // Abstract socket names may contain embedded NUL bytes.
                                    let mut unix = XPathBuf::from("@");
                                    unix.append_bytes(path);
                                    unix
                                })
                                .ok_or(Errno::EINVAL)?
                        } else {
                            // Use dummy path `!unnamed' for unnamed UNIX sockets.
                            XPathBuf::from("!unnamed")
                        };

                        // Handle bind_map after successful bind for UNIX sockets.
                        // We ignore errors because there's nothing we can do
                        // about them.
                        // BindMap is only used for SO_PEERCRED for UNIX abstract sockets.
                        let _ = request.add_unix(&fd, request.scmpreq.pid(), UnixVal::default());
                        drop(fd); // Close our copy of the socket.

                        if !allow_safe_bind {
                            return Ok(());
                        }

                        unix
                    }
                };

                info!("ctx": "bind", "op": "allow_safe_bind",
                    "sys": "bind", "pid": request.scmpreq.pid().as_raw(), "unix": &unix,
                    "msg": format!("add rule `allow/net/connect+{unix}' after bind"));
                let mut sandbox = request.get_mut_sandbox();
                let acl = sandbox.get_acl_mut(Capability::CAP_NET_CONNECT);
                if let Some(idx) = acl.iter().position(|(p, m, a)| {
                    *m == MatchMethod::Literal && *a == Action::Allow && p.is_equal(unix.as_bytes())
                }) {
                    acl.remove(idx);
                }
                return acl.push_front((unix, MatchMethod::Literal, Action::Allow));
            }
            AddressFamily::Inet => {
                if !allow_safe_bind {
                    return Ok(());
                }

                let addr = addr.as_sockaddr_in().ok_or(Errno::EINVAL)?;
                let mut port = addr.port();
                let addr = IpNet::new_assert(IpAddr::V4(addr.ip()), 32);

                if port == 0 {
                    port = getsockname::<SockaddrStorage>(fd.as_raw_fd())?
                        .as_sockaddr_in()
                        .ok_or(Errno::EINVAL)?
                        .port();
                }
                drop(fd); // Close our copy of the socket.

                (addr, port)
            }
            AddressFamily::Inet6 => {
                if !allow_safe_bind {
                    return Ok(());
                }

                let addr = addr.as_sockaddr_in6().ok_or(Errno::EINVAL)?;
                let mut port = addr.port();
                let addr = addr.ip();
                let addr = if let Some(addr) = addr.to_ipv4_mapped() {
                    IpNet::new_assert(IpAddr::V4(addr), 32)
                } else {
                    IpNet::new_assert(IpAddr::V6(addr), 128)
                };

                if port == 0 {
                    port = getsockname::<SockaddrStorage>(fd.as_raw_fd())?
                        .as_sockaddr_in6()
                        .ok_or(Errno::EINVAL)?
                        .port();
                }
                drop(fd); // Close our copy of the socket.

                (addr, port)
            }
            _ => return Ok(()),
        };

        let addr = AddressPattern {
            addr,
            port: port.into(),
        };
        info!("ctx": "bind", "op": "allow_safe_bind",
            "sys": "bind", "pid": request.scmpreq.pid().as_raw(), "rule": &addr,
            "msg": format!("add rule `allow/net/connect+{addr}' after bind"));

        let rule = CidrRule {
            act: Action::Allow,
            cap: Capability::CAP_NET_CONNECT,
            pat: addr,
        };

        let mut sandbox = request.get_mut_sandbox();
        if let Some(idx) = sandbox.cidr_rules.iter().position(|r| *r == rule) {
            sandbox.cidr_rules.remove(idx);
        }
        sandbox.cidr_rules.push_front(rule)?;

        // 1. The sandbox lock will be released on drop here.
        // 2. The socket fd will be closed on drop here.
        Ok(())
    })(fd, request);

    Ok(request.return_syscall(0))
}