syd 3.56.0

rock-solid application kernel
Documentation
//
// Syd: rock-solid application kernel
// src/hash.rs: Utilities for caching
//
// Copyright (c) 2024, 2025, 2026 Ali Polatel <alip@chesswob.org>
//
// SPDX-License-Identifier: GPL-3.0

// SAFETY: This module has been liberated from unsafe code!
#![forbid(unsafe_code)]

use std::{
    sync::{
        atomic::{AtomicBool, AtomicI32},
        Arc, OnceLock,
    },
    thread::Thread,
};

use ahash::HashMapExt;
use concurrent_queue::ConcurrentQueue;
use libseccomp::ScmpSyscall;
use nix::{errno::Errno, sys::socket::UnixAddr, unistd::Pid};
use serde::{ser::SerializeMap, Serializer};

use crate::{
    config::{HASH_CACHE, SYSBLOCK_CAPACITY},
    confine::{ScmpNotifReq, SydArch},
    expiry::ExpiringMap,
    fd::SafeOwnedFd,
    hash::{hash_pipe, SydHashMap, SydRandomState},
    kernel::ptrace::mmap::MmapSyscall,
    lookup::FileInfo,
    path::XPathBuf,
    sigset::SydSigSet,
};

/// Metadata on a blocking syscall invocation
#[derive(Debug)]
pub(crate) struct SysInterrupt {
    /// Syd handler thread ID
    pub(crate) handler: Pid,
    /// Thread group ID of sandbox process
    pub(crate) tgid: Pid,
    /// System call request
    pub(crate) request: ScmpNotifReq,
    /// proc_pid_status(5) file handle
    pub(crate) status: Option<SafeOwnedFd>,
    /// Used by syd_emu to signal syd_int to delete the entry and close the file.
    /// This is because the status file descriptor is not valid in syd_emu's fs space.
    pub(crate) delete: bool,
    /// Used by syd_mon to signal syd_int to signal stuck emulators manually,
    /// when not enough resources are available to spawn new emulator threads.
    /// This is because the status file descriptor is not valid in syd_mon's fs space.
    pub(crate) signal: bool,
    /// True if `SA_RESTART` is ignored
    /// (e.g. due to a socket timeout).
    pub(crate) ignore_restart: bool,
}

/// Map of restarting signals by TGID.
pub(crate) type RestartMap = scc::HashMap<Pid, SydSigSet, SydRandomState>;

/// This is the data type used to handle syscall interrupts.
#[derive(Debug)]
pub(crate) struct SysInterruptMap {
    /// Queue for new blocking syscall entries.
    pub(crate) sys_queue: Arc<ConcurrentQueue<SysInterrupt>>,
    /// Queue for deletion requests by seccomp request id.
    pub(crate) sys_delete: Arc<ConcurrentQueue<u64>>,
    /// Signal-all flag, when set, interrupter signals all stuck emulators.
    pub(crate) sys_signal: Arc<AtomicBool>,
    /// Interrupter thread handle for unpark() notification.
    pub(crate) int_thread: Arc<OnceLock<Thread>>,
    /// Notifier thread TID for signaling at exit.
    pub(crate) not_tid: Arc<AtomicI32>,
    /// Map of restarting signals by TGID.
    /// Used for SA_RESTART tracking.
    pub(crate) sig_restart: Arc<RestartMap>,
}

/// Syscall-agnostic error map.
pub(crate) type ErrorMap = scc::HashMap<Pid, Option<Errno>, SydRandomState>;

/// chdir(2) sysenter result, stored for TOCTOU validation at sysexit.
#[derive(Debug)]
pub(crate) struct ChdirEntry {
    /// Seccomp trace data: "SydSys::Chdir" or "SydSys::Fchdir".
    #[cfg_attr(not(feature = "kcov"), expect(dead_code))]
    pub(crate) data: u16,
    /// File information of the target directory.
    pub(crate) info: FileInfo,
    /// Target path used for domain transitions.
    pub(crate) path: Option<XPathBuf>,
}

/// chdir(2) result map.
pub(crate) type ChdirMap = scc::HashMap<Pid, ChdirEntry, SydRandomState>;

/// mmap(2) sysenter result, stored for TOCTOU validation at sysexit.
#[derive(Debug)]
pub(crate) struct MmapEntry {
    /// The mmap syscall variant (mmap/mmap2).
    pub(crate) sys: MmapSyscall,
    /// Target path used for domain transitions.
    pub(crate) path: Option<XPathBuf>,
}

/// mmap(2) pid map.
pub(crate) type MmapMap = scc::HashMap<Pid, MmapEntry, SydRandomState>;

// [inode,(pid,path)] map of unix binds.
// Path is only used for UNIX domain sockets.
//
// SAFETY:
// 1. /proc/net/unix only gives inode information,
//    and does not include information on device id
//    or mount id so unfortunately we cannot check
//    for that here.
// 2. Pid is used for SO_PEERCRED getsockopt(2).
#[derive(Clone, Default)]
pub(crate) struct UnixVal {
    // Thread group ID of the socket owner, if known.
    pub(crate) self_pid: Option<Pid>,
    // Thread group ID of the socket peer from connect(2), if connected.
    pub(crate) peer_pid: Option<Pid>,
    // bind(2) address of this socket, if filesystem-bound.
    pub(crate) addr: Option<UnixAddr>,
    // Peer address from connect(2), if connected.
    pub(crate) peer: Option<UnixAddr>,
    // Device ID and inode of recent send(2) destinations.
    // Used at recv(2) to match the receiver's VFS identity.
    pub(crate) dest: Vec<(u32, u32)>,
}

pub(crate) type UnixMap = Arc<scc::HashMap<u64, UnixVal, SydRandomState>>;

// [tid, tgid] map for ptrace(PTRACE_TRACEME) calling tids.
// This is used to prevent ptrace(2) detection efficiently.
pub(crate) type PtraceMap = Arc<scc::HashMap<Pid, Pid, SydRandomState>>;

// SegvGuard expiry map with paths and crash count.
pub(crate) type SegvGuardExpiryMap = Arc<ExpiringMap<XPathBuf, u8>>;

// SegvGuard suspension set with paths.
pub(crate) type SegvGuardSuspensionSet = Arc<ExpiringMap<XPathBuf, ()>>;

// Results map for ptrace(2).
#[derive(Debug)]
pub(crate) struct SysResultMap {
    // syscall-agnostic error map
    pub(crate) trace_error: Arc<ErrorMap>,
    // chdir(2) result map
    pub(crate) trace_chdir: Arc<ChdirMap>,
    // mmap(2) pid set.
    pub(crate) trace_mmap: Arc<MmapMap>,
}

impl SysInterrupt {
    pub(crate) fn new(
        request: ScmpNotifReq,
        handler: Pid,
        tgid: Pid,
        ignore_restart: bool,
    ) -> Result<Self, Errno> {
        Ok(Self {
            handler,
            tgid,
            request,
            ignore_restart,
            status: None,
            delete: false,
            signal: false,
        })
    }

    // Marks the interrupt for deletion as needed.
    //
    // Returns true if drop should be handled by syd_int.
    pub(crate) fn delete(&mut self) -> bool {
        // interrupt.status is Some if syd_int thread has already opened
        // proc_pid_status(5). In this case we let it close the file
        // because the file descriptor is not valid in syd_emu's FS
        // space.
        if self.status.is_some() {
            self.delete = true;
            true // syd_int drops interrupt.
        } else {
            false // syd_emu drops interrupt.
        }
    }
}

impl serde::Serialize for SysInterrupt {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: Serializer,
    {
        let mut map = serializer.serialize_map(Some(6))?;

        let data = &self.request.data;
        let syscall = ScmpSyscall::get_name_by_arch(data.syscall, data.arch)
            .unwrap_or_else(|_| format!("{}", i32::from(data.syscall)));
        let _ = map.serialize_entry("pid", &self.request.pid);
        let _ = map.serialize_entry("sys", &syscall);
        let _ = map.serialize_entry("arch", &SydArch::from(data.arch));
        let _ = map.serialize_entry("args", &data.args);
        let _ = map.serialize_entry("handler", &self.handler.as_raw());
        let _ = map.serialize_entry("ignore_restart", &self.ignore_restart);

        map.end()
    }
}

/// Create a new UnixMap.
pub(crate) fn unix_map_new() -> UnixMap {
    Arc::new(scc::HashMap::with_hasher(SydRandomState::new()))
}

/// Create a new PtraceMap.
pub(crate) fn ptrace_map_new() -> PtraceMap {
    Arc::new(scc::HashMap::with_hasher(SydRandomState::new()))
}

/// Create a new SysInterruptMap.
pub(crate) fn sys_interrupt_map_new() -> SysInterruptMap {
    SysInterruptMap {
        sys_queue: Arc::new(ConcurrentQueue::bounded(SYSBLOCK_CAPACITY)),
        sys_delete: Arc::new(ConcurrentQueue::bounded(SYSBLOCK_CAPACITY)),
        sys_signal: Arc::new(AtomicBool::new(false)),
        int_thread: Arc::new(OnceLock::new()),
        not_tid: Arc::new(AtomicI32::new(0)),
        sig_restart: Arc::new(scc::HashMap::with_hasher(SydRandomState::new())),
    }
}

/// Create a new SysResultMap.
pub(crate) fn sys_result_map_new() -> SysResultMap {
    SysResultMap {
        trace_error: Arc::new(scc::HashMap::with_hasher(SydRandomState::new())),
        trace_chdir: Arc::new(scc::HashMap::with_hasher(SydRandomState::new())),
        trace_mmap: Arc::new(scc::HashMap::with_hasher(SydRandomState::new())),
    }
}

/// Cache for AF_ALG hash algorithm probing results.
pub(crate) struct HashCache {
    map: SydHashMap<String, Result<Vec<u8>, Errno>>,
}

impl HashCache {
    // Creates an empty hash cache (map not yet allocated).
    pub(crate) fn new() -> Self {
        Self {
            map: SydHashMap::new(),
        }
    }

    // Probes an algorithm lazily, returning the cached Result.
    // Ok(empty_digest) = supported, Err(errno) = unsupported.
    fn probe(&mut self, alg: &str) -> &Result<Vec<u8>, Errno> {
        if !self.map.contains_key(alg) {
            let result = hash_pipe(alg, None::<SafeOwnedFd>);
            self.map.insert(alg.to_string(), result);
        }
        &self.map[alg]
    }

    /// Returns `true` if the algorithm is supported by the running kernel.
    pub(crate) fn is_supported(alg: &str) -> bool {
        HASH_CACHE
            .lock()
            .unwrap_or_else(|err| err.into_inner())
            .probe(alg)
            .is_ok()
    }

    /// Validates a checksum against the cached algorithm metadata.
    pub(crate) fn is_valid_checksum(alg: &str, key: &[u8]) -> bool {
        match HASH_CACHE
            .lock()
            .unwrap_or_else(|err| err.into_inner())
            .probe(alg)
        {
            Ok(sum) => key.len() == sum.len() && key != sum.as_slice(),
            Err(_) => false,
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_unix_map_new() {
        let map = unix_map_new();
        assert!(map.is_empty());
    }

    #[test]
    fn test_ptrace_map_new() {
        let map = ptrace_map_new();
        assert!(map.is_empty());
    }

    #[test]
    fn test_sys_interrupt_map_new() {
        let map = sys_interrupt_map_new();
        assert!(map.sys_queue.is_empty());
        assert!(map.sys_delete.is_empty());
        assert!(!map.sys_signal.load(std::sync::atomic::Ordering::Relaxed));
        assert!(map.sig_restart.is_empty());
    }

    #[test]
    fn test_sys_result_map_new() {
        let map = sys_result_map_new();
        assert!(map.trace_error.is_empty());
        assert!(map.trace_chdir.is_empty());
        assert!(map.trace_mmap.is_empty());
    }

    #[test]
    fn test_hash_cache_1() {
        let cache = HashCache::new();
        assert!(cache.map.is_empty());
    }

    #[test]
    fn test_hash_cache_2() {
        // sha256 should be available on most kernels, but skip gracefully.
        if HashCache::is_supported("sha256") {
            assert!(HashCache::is_supported("sha256"));
        } else {
            eprintln!("sha256 not supported by kernel, skipping.");
        }
    }

    #[test]
    fn test_hash_cache_3() {
        assert!(!HashCache::is_supported("Pink Floyd"));
    }

    #[test]
    fn test_hash_cache_4() {
        // Unsupported algorithm: always reject.
        assert!(!HashCache::is_valid_checksum("Pink Floyd", &[0u8; 32]));

        if !HashCache::is_supported("sha256") {
            eprintln!("sha256 not available, skipping checksum tests.");
            return;
        }

        // Wrong length: reject.
        assert!(!HashCache::is_valid_checksum("sha256", &[0u8; 16]));

        // Empty-digest hardening: reject checksum equal to hash of empty input.
        let empty = HASH_CACHE
            .lock()
            .unwrap()
            .probe("sha256")
            .as_ref()
            .unwrap()
            .clone();
        assert!(!HashCache::is_valid_checksum("sha256", &empty));

        // Valid checksum (right length, not empty-digest): accept.
        let mut valid = vec![0xffu8; 32];
        valid[0] ^= 0x01;
        assert!(HashCache::is_valid_checksum("sha256", &valid));
    }

    #[test]
    fn test_hash_cache_5() {
        let first = {
            HASH_CACHE
                .lock()
                .unwrap_or_else(|err| err.into_inner())
                .probe("sha256")
                .clone()
        };
        let second = {
            HASH_CACHE
                .lock()
                .unwrap_or_else(|err| err.into_inner())
                .probe("sha256")
                .clone()
        };
        match (&first, &second) {
            (Ok(a), Ok(b)) => assert_eq!(a, b),
            (Err(a), Err(b)) => assert_eq!(a, b),
            _ => panic!("probe returned different Result variants"),
        }
    }
}