syd 3.52.0

rock-solid application kernel
Documentation
// Syd: rock-solid application kernel
// src/kernel/net/getsockopt.rs: getsockopt(2) handler
//
// Copyright (c) 2025, 2026 Ali Polatel <alip@chesswob.org>
//
// SPDX-License-Identifier: GPL-3.0

use std::{mem::size_of, os::fd::RawFd};

use libseccomp::ScmpNotifResp;
use nix::{
    errno::Errno,
    sys::socket::{getsockopt, sockopt::PeerPidfd},
    unistd::Pid,
};

use crate::{
    confine::{is_valid_ptr, scmp_arch_is_compat32},
    fd::{peer_creds, SafeOwnedFd},
    req::UNotifyEventRequest,
};

// Size of 32-bit integer.
const SIZEOF_INT_T: usize = size_of::<i32>();

// SO_PEERPIDFD returns a single RawFd.
const SIZEOF_FD: usize = size_of::<RawFd>();

pub(crate) fn handle_getsockopt(
    fd: SafeOwnedFd,
    request: &UNotifyEventRequest,
    args: &[u64; 6],
    randomize_fds: bool,
) -> Result<ScmpNotifResp, Errno> {
    const SOL_SOCKET: u32 = libc::SOL_SOCKET as u32;
    const SO_PEERCRED: u32 = libc::SO_PEERCRED as u32;
    const SO_PEERPIDFD: u32 = libc::SO_PEERPIDFD as u32;

    // Linux truncates level and optname to 32 bits.
    #[expect(clippy::cast_possible_truncation)]
    let level = args[1] as u32;
    #[expect(clippy::cast_possible_truncation)]
    let optname = args[2] as u32;

    if level != SOL_SOCKET {
        // We only hook into SOL_SOCKET, however socketcall(2) can still end up here.
        // SAFETY: No pointer dereference in access check.
        return Ok(unsafe { request.continue_syscall() });
    }

    match optname {
        SO_PEERCRED => handle_getsockopt_peercred(fd, request, args),
        SO_PEERPIDFD => handle_getsockopt_peerpidfd(fd, request, args, randomize_fds),
        _ => {
            // SAFETY: No pointer dereference in access check.
            Ok(unsafe { request.continue_syscall() })
        }
    }
}

fn handle_getsockopt_peercred(
    fd: SafeOwnedFd,
    request: &UNotifyEventRequest,
    args: &[u64; 6],
) -> Result<ScmpNotifResp, Errno> {
    // optval and optlen pointers in tracee
    let optval_ptr = args[3];
    let optlen_ptr = args[4];

    // optlen pointer must not be NULL.
    if !is_valid_ptr(optlen_ptr, request.scmpreq.data.arch) {
        return Err(Errno::EFAULT);
    }

    // Check for 32-bit tracee.
    let req = request.scmpreq;
    let is32 = scmp_arch_is_compat32(req.data.arch);

    // Linux reads optlen pointer as signed int.
    // Linux returns EFAULT for invalid optlen pointer.
    let mut len_buf = [0u8; SIZEOF_INT_T];
    let read = request.read_mem(&mut len_buf, optlen_ptr, SIZEOF_INT_T)?;
    if read != SIZEOF_INT_T {
        return Err(Errno::EFAULT);
    }

    // Convert bytes to usize respecting native endianness.
    // Linux rejects negative length.
    let orig_optlen = i32::from_ne_bytes(len_buf);
    if orig_optlen < 0 {
        return Err(Errno::EINVAL);
    }
    #[expect(clippy::cast_sign_loss)]
    let orig_optlen = orig_optlen as usize;

    // If optval == NULL but *optlen > 0, kernel returns EFAULT.
    if !is_valid_ptr(optval_ptr, request.scmpreq.data.arch) && orig_optlen > 0 {
        return Err(Errno::EFAULT);
    }

    // Build credentials to return.
    let ucred = peer_creds(&fd)?;
    let uid = ucred.uid();
    let gid = ucred.gid();
    let pid = if ucred.pid() != Pid::this().as_raw() {
        ucred.pid()
    } else {
        request.fix_cred_pid(&fd).as_raw()
    };

    // Prepare ucred size for tracee ABI.
    let ucred_size = if is32 {
        12usize
    } else {
        size_of::<libc::ucred>()
    };

    // How many bytes we'll actually copy back.
    let to_copy = std::cmp::min(orig_optlen, ucred_size);

    if to_copy > 0 {
        // For 32-bit tracee: 3 x 32-bit little/big-endian values (pid, uid, gid)
        #[expect(clippy::cast_sign_loss)]
        if is32 {
            let mut b = [0u8; 12];

            b[0..4].copy_from_slice(&(pid as u32).to_ne_bytes());
            b[4..8].copy_from_slice(&uid.to_ne_bytes());
            b[8..12].copy_from_slice(&gid.to_ne_bytes());

            // Write only the first to_copy bytes.
            request.write_mem_all(&b[..to_copy], optval_ptr)?;
        } else {
            // Native layout: use libc::ucred.
            let native = libc::ucred { pid, uid, gid };

            // SAFETY: Read native bytes as byte slice.
            let native_bytes: &[u8] = unsafe {
                std::slice::from_raw_parts(
                    (&raw const native) as *const u8,
                    size_of::<libc::ucred>(),
                )
            };
            request.write_mem_all(&native_bytes[..to_copy], optval_ptr)?;
        }
    }

    // Write back the resulting length into *optlen.
    #[expect(clippy::cast_possible_truncation)]
    let buf = (to_copy as u32).to_ne_bytes();
    request.write_mem_all(&buf, optlen_ptr)?;

    Ok(request.return_syscall(0))
}

fn handle_getsockopt_peerpidfd(
    fd: SafeOwnedFd,
    request: &UNotifyEventRequest,
    args: &[u64; 6],
    randomize_fds: bool,
) -> Result<ScmpNotifResp, Errno> {
    // optval and optlen pointers in tracee
    let optval_ptr = args[3];
    let optlen_ptr = args[4];

    // optlen pointer must not be NULL.
    if !is_valid_ptr(optlen_ptr, request.scmpreq.data.arch) {
        return Err(Errno::EFAULT);
    }

    // Linux reads optlen pointer as signed int.
    // Linux returns EFAULT for invalid optlen pointer.
    let mut len_buf = [0u8; SIZEOF_INT_T];
    let read = request.read_mem(&mut len_buf, optlen_ptr, SIZEOF_INT_T)?;
    if read != SIZEOF_INT_T {
        return Err(Errno::EFAULT);
    }

    // Convert bytes to usize respecting native endianness.
    // Linux rejects negative length.
    let orig_optlen = i32::from_ne_bytes(len_buf);
    if orig_optlen < 0 {
        return Err(Errno::EINVAL);
    }
    #[expect(clippy::cast_sign_loss)]
    let orig_optlen = orig_optlen as usize;

    // If optval == NULL but *optlen > 0, kernel returns EFAULT.
    if !is_valid_ptr(optval_ptr, request.scmpreq.data.arch) && orig_optlen > 0 {
        return Err(Errno::EFAULT);
    }

    // Linux caps len to sizeof(pidfd).
    let to_copy = orig_optlen.min(SIZEOF_FD);

    // Fix PIDFd as necessary.
    let ucred = peer_creds(&fd)?;
    let pidfd = if ucred.pid() != Pid::this().as_raw() {
        getsockopt(&fd, PeerPidfd)?.into()
    } else {
        request.fix_scm_pidfd(&fd)?
    };

    // Ensure memory is writable before installing fd.
    //
    // This is best effort, we can still leak a fd if page protections
    // change after this call but before the next write memory call.
    let pid0 = 0i32.to_ne_bytes();
    request.write_mem_all(&pid0[..to_copy], optval_ptr)?;

    // Add the fd to sandbox process, close our copy.
    let pidfd = request.add_fd(pidfd, true /* close-on-exec*/, randomize_fds)?;

    // Write only to_copy bytes of the fd value.
    let pidfd = pidfd.to_ne_bytes();
    request.write_mem_all(&pidfd[..to_copy], optval_ptr)?;

    // Write back truncated length into *optlen.
    #[expect(clippy::cast_possible_truncation)]
    let buf = (to_copy as u32).to_ne_bytes();
    request.write_mem_all(&buf, optlen_ptr)?;

    Ok(request.return_syscall(0))
}