syd 3.52.0

rock-solid application kernel
Documentation
//
// Syd: rock-solid application kernel
// src/kernel/inotify.rs: inotify_add_watch(2) handler
//
// Copyright (c) 2023, 2024, 2025, 2026 Ali Polatel <alip@chesswob.org>
//
// SPDX-License-Identifier: GPL-3.0

use std::os::fd::AsRawFd;

use libseccomp::ScmpNotifResp;
use nix::{errno::Errno, NixPath};

use crate::{
    compat::{AddWatchFlags, FsType},
    fd::{to_fd, PROC_FILE},
    fs::{inotify_add_watch, readlinkat},
    kernel::syscall_path_handler,
    lookup::FsFlags,
    path::XPathBuf,
    req::{SysArg, UNotifyEventRequest},
};

pub(crate) fn sys_inotify_add_watch(request: UNotifyEventRequest) -> ScmpNotifResp {
    let req = request.scmpreq;

    // Linux kernel truncates upper bits.
    #[expect(clippy::cast_possible_truncation)]
    let mask = req.data.args[2] as u32;

    // Reject invalid/zero mask.
    let mask = match AddWatchFlags::from_bits(mask) {
        Some(mask) if !mask.is_empty() => mask,
        _ => return request.fail_syscall(Errno::EINVAL),
    };

    // Validate inotify(7) fd is a valid file descriptor.
    // Linux checks fd validity before AddWatchFlags checks,
    let infd = match to_fd(req.data.args[0]) {
        Ok(fd) => fd,
        Err(errno) => return request.fail_syscall(errno),
    };

    // Get the INotify fd.
    let infd = match request.get_fd(infd) {
        Ok(fd) => fd,
        Err(errno) => return request.fail_syscall(errno),
    };

    // Linux rejects the combination IN_MASK_ADD|IN_MASK_CREATE.
    if mask.contains(AddWatchFlags::IN_MASK_ADD | AddWatchFlags::IN_MASK_CREATE) {
        return request.fail_syscall(Errno::EINVAL);
    }

    // Linux rejects non-inotify fds with EINVAL before path lookup.
    match FsType::get(&infd) {
        Ok(fst) if fst.is_anon_inode() => {
            let pfd = match XPathBuf::from_self_fd(infd.as_raw_fd()) {
                Ok(pfd) => pfd,
                Err(errno) => return request.fail_syscall(errno),
            };
            match readlinkat(PROC_FILE(), &pfd) {
                Ok(target) if target.is_equal(b"anon_inode:inotify") => {}
                _ => return request.fail_syscall(Errno::EINVAL),
            }
        }
        Ok(_) => return request.fail_syscall(Errno::EINVAL),
        Err(errno) => return request.fail_syscall(errno),
    }

    // inotify(7) requires read access to the file or directory.
    let mut fsflags = FsFlags::MUST_PATH;
    if mask.contains(AddWatchFlags::IN_DONT_FOLLOW) {
        fsflags |= FsFlags::NO_FOLLOW_LAST;
    }

    let argv = &[SysArg {
        dirfd: None,
        path: Some(1),
        fsflags,
        ..Default::default()
    }];
    syscall_path_handler(
        request,
        "inotify_add_watch",
        argv,
        |path_args, request, sandbox| {
            let restrict_notify_bdev = !sandbox.flags.allow_unsafe_notify_bdev();
            let restrict_notify_cdev = !sandbox.flags.allow_unsafe_notify_cdev();
            drop(sandbox); // release the read-lock.

            // SysArg has one element.
            #[expect(clippy::disallowed_methods)]
            let path = &path_args.0.as_ref().unwrap().path;
            assert!(path.base().is_empty()); // MUST_PATH!

            // Strip IN_DONT_FOLLOW from mask so proc(5) indirection works.
            // If mask becomes zero, add IN_UNMOUNT which is implied to avoid EINVAL.
            let mut mask = mask & !AddWatchFlags::IN_DONT_FOLLOW;
            if mask.is_empty() {
                mask = AddWatchFlags::IN_UNMOUNT;
            }

            if restrict_notify_bdev || restrict_notify_cdev {
                // Strip IN_{ACCESS,MODIFY} if we're marking a sidechannel device.
                // Strip IN_DONT_FOLLOW which has already been handled during canonicalization.
                // MUST_PATH asserts `path.typ` is Some.
                #[expect(clippy::disallowed_methods)]
                let typ = path.typ.as_ref().unwrap();
                if (restrict_notify_bdev && typ.is_block_device())
                    || (restrict_notify_cdev && typ.is_char_device())
                {
                    mask.remove(AddWatchFlags::IN_ACCESS);
                    mask.remove(AddWatchFlags::IN_MODIFY);
                }
            }

            // We open a FD to the path and then use the proc(5) path
            // /proc/thread-self/fd/$fd in address' path argument to
            // avoid symlink TOCTOU.
            let mut pfd = XPathBuf::from("/proc/thread-self/fd");
            pfd.push_fd(path.dir().as_raw_fd());

            // Record blocking call so it can get invalidated.
            request.cache.add_sys_block(req, false)?;

            // All done, call underlying system call.
            let result = inotify_add_watch(&infd, &pfd, mask);

            // Remove invalidation record.
            request.cache.del_sys_block(req.id)?;

            result.map(|retval| request.return_syscall(i64::from(retval)))
        },
    )
}