Skip to main content

sandlock_core/seccomp/
dispatch.rs

1// Table-driven syscall dispatch — routes seccomp notifications to handler chains.
2//
3// Each syscall number maps to an ordered chain of handlers.  The chain is walked
4// until a handler returns a non-Continue action (or the chain is exhausted, in
5// which case Continue is returned).
6//
7// Continue safety (issue #27):
8//   - The chain walker treats Continue as "this handler did not intervene,
9//     try the next one." A final Continue (no handler intervened, or chain
10//     exhausted) means the syscall passes through to the kernel as-issued.
11//     The kernel still enforces Landlock and the BPF filter on the
12//     untouched syscall, so dispatch-level Continue is not a security
13//     decision — it's the absence of one.
14//   - The conditional shim closures (random/hostname/etc_hosts opens) that
15//     wrap an Option-returning helper translate `None` into Continue,
16//     which is the same "not my path, next handler" semantics. None of
17//     them approve a syscall based on user-memory contents.
18
19use std::collections::HashMap;
20use std::os::unix::io::RawFd;
21use std::sync::Arc;
22
23use super::ctx::SupervisorCtx;
24use super::notif::{NotifAction, NotifPolicy};
25use super::state::ResourceState;
26use super::syscall::SyscallError;
27use crate::arch;
28use crate::sys::structs::SeccompNotif;
29
30use thiserror::Error;
31use tokio::sync::Mutex;
32
33// ============================================================
34// Types
35// ============================================================
36
37// ============================================================
38// Handler trait — the new public extension API.
39// ============================================================
40
41/// Public extension trait for sandlock seccomp-notif handlers.
42///
43/// Each implementor is registered against a [`crate::seccomp::syscall::Syscall`]
44/// through [`crate::Sandbox::run_with_handlers`] /
45/// [`crate::Sandbox::run_interactive_with_handlers`].  Receives
46/// `&HandlerCtx` borrowed for the call; cannot outlive the dispatch
47/// invocation.
48///
49/// State lives on the implementor — no `Arc::clone` ladders, no
50/// closure ceremony at registration time.
51///
52/// `handle` returns a boxed `Future` so the trait stays dyn-compatible
53/// (the supervisor stores user handlers as `Vec<Arc<dyn Handler>>`,
54/// keyed by syscall number).  Returning `impl Future` directly via
55/// RPITIT would be more efficient but is not object-safe, and changing
56/// the storage to a non-erased shape would force a generic dispatch
57/// chain incompatible with arbitrary user handler types.
58pub trait Handler: Send + Sync + 'static {
59    fn handle<'a>(
60        &'a self,
61        cx: &'a HandlerCtx,
62    ) -> std::pin::Pin<Box<dyn std::future::Future<Output = NotifAction> + Send + 'a>>;
63}
64
65/// Context passed to `Handler::handle`.
66///
67/// `notif` is the kernel notification (owned by value — it's a small
68/// `repr(C)` struct, cheap to copy).  `notif_fd` is the supervisor's
69/// seccomp listener fd, used by helpers like `read_child_mem` /
70/// `write_child_mem` / `read_child_cstr` for TOCTOU-safe child memory
71/// access.
72///
73/// Handler state lives on the implementor (`&self`).  Supervisor-internal
74/// state is intentionally not exposed here so the `SupervisorCtx`
75/// internal fields are not part of the downstream extension contract.
76pub struct HandlerCtx {
77    pub notif: SeccompNotif,
78    pub notif_fd: RawFd,
79}
80
81// Blanket impl: any Fn(&HandlerCtx) -> Future is a Handler.
82//
83// Lets lightweight closure-style handlers work without ceremony at the
84// call site.  Handlers that need state should use `struct + explicit
85// impl Handler` instead.
86impl<F, Fut> Handler for F
87where
88    F: Fn(&HandlerCtx) -> Fut + Send + Sync + 'static,
89    Fut: std::future::Future<Output = NotifAction> + Send + 'static,
90{
91    fn handle<'a>(
92        &'a self,
93        cx: &'a HandlerCtx,
94    ) -> std::pin::Pin<Box<dyn std::future::Future<Output = NotifAction> + Send + 'a>> {
95        Box::pin((self)(cx))
96    }
97}
98
99// Concrete impls for `Box<dyn Handler>` and `Arc<dyn Handler>` so callers
100// can erase concrete handler types behind a smart pointer when mixing
101// different handler shapes in one `IntoIterator` passed to
102// `run_with_handlers` — e.g. `Vec<(i64, Box<dyn Handler>)>` lets a
103// downstream register handlers of different concrete types without
104// writing a per-crate wrapper enum.
105//
106// These are concrete `Box<dyn Handler>` / `Arc<dyn Handler>` rather than
107// `<H: Handler + ?Sized>` blankets to avoid coherence overlap with the
108// `impl<F, Fut> Handler for F where F: Fn(&HandlerCtx) -> Fut` blanket
109// above.
110impl Handler for Box<dyn Handler> {
111    fn handle<'a>(
112        &'a self,
113        cx: &'a HandlerCtx,
114    ) -> std::pin::Pin<Box<dyn std::future::Future<Output = NotifAction> + Send + 'a>> {
115        (**self).handle(cx)
116    }
117}
118
119impl Handler for std::sync::Arc<dyn Handler> {
120    fn handle<'a>(
121        &'a self,
122        cx: &'a HandlerCtx,
123    ) -> std::pin::Pin<Box<dyn std::future::Future<Output = NotifAction> + Send + 'a>> {
124        (**self).handle(cx)
125    }
126}
127
128/// Errors raised when registering user handlers via
129/// [`crate::Sandbox::run_with_handlers`].
130#[derive(Debug, Error, PartialEq, Eq)]
131pub enum HandlerError {
132    #[error("invalid syscall in handler registration: {0}")]
133    InvalidSyscall(#[from] SyscallError),
134
135    #[error(
136        "handler on syscall {syscall_nr} conflicts with the policy syscall blocklist \
137         and would let user code bypass it via SECCOMP_USER_NOTIF_FLAG_CONTINUE"
138    )]
139    OnDenySyscall { syscall_nr: i64 },
140}
141
142/// Reject handler registrations that would weaken sandlock's confinement
143/// guarantees.
144///
145/// The cBPF program emits notif JEQs *before* deny JEQs, so a syscall
146/// present in both lists hits `SECCOMP_RET_USER_NOTIF` first.  A handler
147/// registered on a syscall that is on the blocklist would therefore
148/// convert a kernel-deny into a user-supervised path: a handler returning
149/// `NotifAction::Continue` becomes `SECCOMP_USER_NOTIF_FLAG_CONTINUE` and
150/// the kernel actually runs the syscall — silently bypassing deny.
151///
152/// The blocklist is whatever [`crate::context::blocklist_syscall_numbers`]
153/// resolves from Sandlock's default syscall blocklist plus policy extras.
154///
155/// Every open-family syscall a path-keyed handler must intercept to be
156/// leak-proof: `open` (legacy), `openat`, and `openat2`. A handler that
157/// only registers `openat` is bypassed by any libc that picks one of
158/// the others. The corresponding BPF notif list (in `context::notif_syscalls`)
159/// must register the same set so the kernel actually produces a
160/// notification — otherwise `RET_ALLOW` makes the handler unreachable.
161fn open_family_syscalls() -> Vec<i64> {
162    let mut v = vec![libc::SYS_openat, arch::SYS_OPENAT2];
163    if let Some(legacy_open) = arch::sys_open() {
164        v.push(legacy_open);
165    }
166    v
167}
168
169/// Takes only the syscall numbers because that's all it needs to check.
170/// Called from the `run_with_handlers` entry points before any
171/// handler is registered against the dispatch table.
172///
173/// Returns the offending syscall number on rejection so the caller can
174/// surface it to the end user.
175pub(crate) fn validate_handler_syscalls_against_policy(
176    syscall_nrs: &[i64],
177    policy: &crate::sandbox::Sandbox,
178) -> Result<(), i64> {
179    let blocklist: std::collections::HashSet<u32> =
180        crate::context::blocklist_syscall_numbers(policy).into_iter().collect();
181    for &nr in syscall_nrs {
182        if blocklist.contains(&(nr as u32)) {
183            return Err(nr);
184        }
185    }
186    Ok(())
187}
188
189
190/// Ordered chain of handlers for a single syscall number.
191struct HandlerChain {
192    handlers: Vec<std::sync::Arc<dyn Handler>>,
193}
194
195/// Maps syscall numbers to handler chains.
196pub struct DispatchTable {
197    chains: HashMap<i64, HandlerChain>,
198}
199
200impl DispatchTable {
201    /// Create an empty dispatch table.
202    pub fn new() -> Self {
203        Self {
204            chains: HashMap::new(),
205        }
206    }
207
208    /// Register a handler for the given syscall number.  Handlers are
209    /// called in registration order; the first non-Continue result wins.
210    ///
211    /// Generic over `H: Handler` — accepts either a struct with explicit
212    /// `impl Handler for ...` or a closure (via blanket impl).
213    pub fn register<H: Handler>(&mut self, syscall_nr: i64, handler: H) {
214        self.register_arc(syscall_nr, std::sync::Arc::new(handler));
215    }
216
217    /// Register a pre-`Arc`'d handler.  Used both by builtin chunks
218    /// that share state via `Arc::clone` (one `ForkHandler` instance
219    /// registers against `SYS_clone`/`SYS_clone3`/`SYS_vfork`) and by
220    /// `run_with_handlers` when each item already arrives as
221    /// `Arc<dyn Handler>`.
222    pub(crate) fn register_arc(
223        &mut self,
224        syscall_nr: i64,
225        handler: std::sync::Arc<dyn Handler>,
226    ) {
227        self.chains
228            .entry(syscall_nr)
229            .or_insert_with(|| HandlerChain { handlers: Vec::new() })
230            .handlers
231            .push(handler);
232    }
233
234    /// Dispatch a notification through the handler chain for its syscall number.
235    pub(crate) async fn dispatch(
236        &self,
237        notif: SeccompNotif,
238        notif_fd: RawFd,
239    ) -> NotifAction {
240        let nr = notif.data.nr as i64;
241        if let Some(chain) = self.chains.get(&nr) {
242            let handler_ctx = HandlerCtx { notif, notif_fd };
243            for handler in &chain.handlers {
244                let action = handler.handle(&handler_ctx).await;
245                if !matches!(action, NotifAction::Continue) {
246                    return action;
247                }
248            }
249        }
250        NotifAction::Continue
251    }
252}
253
254// ============================================================
255// Table builder — mechanical translation of old dispatch()
256// ============================================================
257
258/// Build the dispatch table from a `NotifPolicy`.  Every branch from the old
259/// monolithic `dispatch()` function is translated into a `table.register()` call.
260/// Priority is preserved by registration order.
261///
262/// `pending_handlers` are appended **after** all builtin handlers, so they
263/// observe the post-builtin view (e.g. `chroot`-normalized paths on
264/// `openat`).  Builtins cannot be overridden or removed — this is the
265/// security boundary for downstream crates.
266pub(crate) fn build_dispatch_table(
267    policy: &Arc<NotifPolicy>,
268    resource: &Arc<Mutex<ResourceState>>,
269    ctx: &Arc<SupervisorCtx>,
270    pending_handlers: Vec<(i64, std::sync::Arc<dyn Handler>)>,
271) -> DispatchTable {
272    let mut table = DispatchTable::new();
273
274    // ------------------------------------------------------------------
275    // Fork/clone family (always on)
276    // ------------------------------------------------------------------
277    for nr in arch::fork_like_syscalls() {
278        let policy_for_fork = Arc::clone(policy);
279        let resource_for_fork = Arc::clone(resource);
280        table.register(nr, move |cx: &HandlerCtx| {
281            let notif = cx.notif;
282            let notif_fd = cx.notif_fd;
283            let policy = Arc::clone(&policy_for_fork);
284            let resource = Arc::clone(&resource_for_fork);
285            async move {
286                crate::resource::handle_fork(&notif, notif_fd, &resource, &policy).await
287            }
288        });
289    }
290
291    // ------------------------------------------------------------------
292    // Wait family (always on)
293    // ------------------------------------------------------------------
294    for &nr in &[libc::SYS_wait4, libc::SYS_waitid] {
295        let resource_for_wait = Arc::clone(resource);
296        table.register(nr, move |cx: &HandlerCtx| {
297            let notif = cx.notif;
298            let resource = Arc::clone(&resource_for_wait);
299            async move {
300                crate::resource::handle_wait(&notif, &resource).await
301            }
302        });
303    }
304
305    // ------------------------------------------------------------------
306    // Memory management (conditional on has_memory_limit)
307    // ------------------------------------------------------------------
308    if policy.has_memory_limit {
309        for &nr in &[
310            libc::SYS_mmap, libc::SYS_munmap, libc::SYS_brk,
311            libc::SYS_mremap, libc::SYS_shmget,
312        ] {
313            let policy_for_mem = Arc::clone(policy);
314            let __sup = Arc::clone(ctx);
315            table.register(nr, move |cx: &HandlerCtx| {
316                let notif = cx.notif;
317                let sup = Arc::clone(&__sup);
318                let policy = Arc::clone(&policy_for_mem);
319                async move {
320                    crate::resource::handle_memory(&notif, &sup, &policy).await
321                }
322            });
323        }
324    }
325
326    // ------------------------------------------------------------------
327    // Network (conditional on has_net_allowlist || has_http_acl)
328    // ------------------------------------------------------------------
329    if policy.has_net_allowlist || policy.has_http_acl {
330        for &nr in &[
331            libc::SYS_connect,
332            libc::SYS_sendto,
333            libc::SYS_sendmsg,
334            libc::SYS_sendmmsg,
335        ] {
336            let __sup = Arc::clone(ctx);
337            table.register(nr, move |cx: &HandlerCtx| {
338                let notif = cx.notif;
339                let sup = Arc::clone(&__sup);
340                let notif_fd = cx.notif_fd;
341                async move {
342                    crate::network::handle_net(&notif, &sup, notif_fd).await
343                }
344            });
345        }
346    }
347
348    // ------------------------------------------------------------------
349    // Deterministic random — getrandom()
350    // ------------------------------------------------------------------
351    if policy.has_random_seed {
352        let __sup = Arc::clone(ctx);
353        table.register(libc::SYS_getrandom, move |cx: &HandlerCtx| {
354            let notif = cx.notif;
355            let sup = Arc::clone(&__sup);
356            let notif_fd = cx.notif_fd;
357            async move {
358                let mut tr = sup.time_random.lock().await;
359                if let Some(ref mut rng) = tr.random_state {
360                    crate::random::handle_getrandom(&notif, rng, notif_fd)
361                } else {
362                    NotifAction::Continue
363                }
364            }
365        });
366    }
367
368    // ------------------------------------------------------------------
369    // Deterministic random — /dev/urandom and /dev/random opens.
370    // Registered for every open-family syscall so dirfd-relative and
371    // legacy `open` spellings can't slip past the seed and read the
372    // kernel's real entropy.
373    // ------------------------------------------------------------------
374    if policy.has_random_seed {
375        for nr in open_family_syscalls() {
376            let __sup = Arc::clone(ctx);
377            table.register(nr, move |cx: &HandlerCtx| {
378                let notif = cx.notif;
379                let sup = Arc::clone(&__sup);
380                let notif_fd = cx.notif_fd;
381                async move {
382                    let mut tr = sup.time_random.lock().await;
383                    if let Some(ref mut rng) = tr.random_state {
384                        if let Some(action) = crate::random::handle_random_open(&notif, rng, notif_fd) {
385                            return action;
386                        }
387                    }
388                    NotifAction::Continue
389                }
390            });
391        }
392    }
393
394    // ------------------------------------------------------------------
395    // Timer adjustment (conditional on has_time_start)
396    // ------------------------------------------------------------------
397    if policy.has_time_start {
398        let time_offset = policy.time_offset;
399        for &nr in &[
400            libc::SYS_clock_nanosleep as i64,
401            libc::SYS_timerfd_settime as i64,
402            libc::SYS_timer_settime as i64,
403        ] {
404            table.register(nr, move |cx: &HandlerCtx| {
405                let notif = cx.notif;
406                let notif_fd = cx.notif_fd;
407                async move {
408                    crate::time::handle_timer(&notif, time_offset, notif_fd)
409                }
410            });
411        }
412    }
413
414    // ------------------------------------------------------------------
415    // /etc/hosts virtualization: always on. The synthetic file contains
416    // the loopback base (or, in chroot/image mode, the image's own
417    // `/etc/hosts` merged with a loopback fallback) plus any concrete
418    // hostnames resolved from `net_allow`, so the host's on-disk
419    // `/etc/hosts` never leaks in and image-baked entries are preserved.
420    //
421    // Registered for every open-family syscall (see `open_family_syscalls`).
422    // Must run *before* the chroot handler so that in chroot mode the
423    // synthetic memfd wins over a direct open of `<chroot>/etc/hosts` —
424    // the chroot handler always intercepts opens within the chroot and
425    // would otherwise serve the raw image file, defeating the merge.
426    // ------------------------------------------------------------------
427    {
428        let etc_hosts = policy.virtual_etc_hosts.clone();
429        for nr in open_family_syscalls() {
430            let etc_hosts = etc_hosts.clone();
431            table.register(nr, move |cx: &HandlerCtx| {
432                let notif = cx.notif;
433                let notif_fd = cx.notif_fd;
434                let etc_hosts = etc_hosts.clone();
435                async move {
436                    if let Some(action) = crate::procfs::handle_etc_hosts_open(&notif, &etc_hosts, notif_fd) {
437                        action
438                    } else {
439                        NotifAction::Continue
440                    }
441                }
442            });
443        }
444    }
445
446    // ------------------------------------------------------------------
447    // CA injection: splice the active MITM CA into user-declared trust
448    // bundles. Registered before chroot/COW so the substituted memfd wins
449    // over a real open of the bundle file. Only active when MITM is on and
450    // the user declared at least one --http-inject-ca path.
451    // ------------------------------------------------------------------
452    if let Some(ca_pem) = policy.ca_inject_pem.clone() {
453        if !policy.ca_inject_paths.is_empty() {
454            let inject_paths = std::sync::Arc::new(policy.ca_inject_paths.clone());
455            for nr in open_family_syscalls() {
456                let ca_pem = std::sync::Arc::clone(&ca_pem);
457                let inject_paths = std::sync::Arc::clone(&inject_paths);
458                table.register(nr, move |cx: &HandlerCtx| {
459                    let notif = cx.notif;
460                    let notif_fd = cx.notif_fd;
461                    let ca_pem = std::sync::Arc::clone(&ca_pem);
462                    let inject_paths = std::sync::Arc::clone(&inject_paths);
463                    async move {
464                        crate::ca_inject::handle_ca_inject_open(
465                            &notif, &inject_paths, &ca_pem, notif_fd,
466                        )
467                        .unwrap_or(NotifAction::Continue)
468                    }
469                });
470            }
471        }
472    }
473
474    // ------------------------------------------------------------------
475    // Chroot path interception (before COW)
476    // ------------------------------------------------------------------
477    if policy.chroot_root.is_some() {
478        register_chroot_handlers(&mut table, policy, ctx);
479    }
480
481    // ------------------------------------------------------------------
482    // COW filesystem interception
483    // ------------------------------------------------------------------
484    if policy.cow_enabled {
485        register_cow_handlers(&mut table, ctx);
486    }
487
488    // ------------------------------------------------------------------
489    // /proc virtualization (always on). The handler does both
490    // sensitive-path blocking and per-PID filtering, both of which are
491    // security boundaries, so it has to catch every open-family spelling.
492    // ------------------------------------------------------------------
493    for nr in open_family_syscalls() {
494        let policy_for_proc_open = Arc::clone(policy);
495        let resource_for_proc_open = Arc::clone(resource);
496        let __sup = Arc::clone(ctx);
497        table.register(nr, move |cx: &HandlerCtx| {
498            let notif = cx.notif;
499            let sup = Arc::clone(&__sup);
500            let notif_fd = cx.notif_fd;
501            let policy = Arc::clone(&policy_for_proc_open);
502            let resource = Arc::clone(&resource_for_proc_open);
503            async move {
504                let processes = Arc::clone(&sup.processes);
505                let network = Arc::clone(&sup.network);
506                crate::procfs::handle_proc_open(&notif, &processes, &resource, &network, &policy, notif_fd).await
507            }
508        });
509    }
510    let mut getdents_nrs = vec![libc::SYS_getdents64];
511    if let Some(getdents) = arch::sys_getdents() {
512        getdents_nrs.push(getdents);
513    }
514    for nr in getdents_nrs {
515        let policy_for_getdents = Arc::clone(policy);
516        let __sup = Arc::clone(ctx);
517        table.register(nr, move |cx: &HandlerCtx| {
518            let notif = cx.notif;
519            let sup = Arc::clone(&__sup);
520            let notif_fd = cx.notif_fd;
521            let policy = Arc::clone(&policy_for_getdents);
522            async move {
523                let processes = Arc::clone(&sup.processes);
524                crate::procfs::handle_getdents(&notif, &processes, &policy, notif_fd).await
525            }
526        });
527    }
528
529    // ------------------------------------------------------------------
530    // Virtual CPU count
531    // ------------------------------------------------------------------
532    if let Some(n) = policy.num_cpus {
533        table.register(libc::SYS_sched_getaffinity, move |cx: &HandlerCtx| {
534            let notif = cx.notif;
535            let notif_fd = cx.notif_fd;
536            async move {
537                crate::procfs::handle_sched_getaffinity(&notif, n, notif_fd)
538            }
539        });
540    }
541
542    // ------------------------------------------------------------------
543    // Hostname virtualization. The `/etc/hostname` shim is registered
544    // for every open-family syscall so dirfd-relative and legacy `open`
545    // spellings can't leak the host's real hostname.
546    // ------------------------------------------------------------------
547    if let Some(ref hostname) = policy.virtual_hostname {
548        let hostname_for_uname = hostname.clone();
549        let hostname_for_open = hostname.clone();
550        table.register(libc::SYS_uname, move |cx: &HandlerCtx| {
551            let notif = cx.notif;
552            let notif_fd = cx.notif_fd;
553            let hostname = hostname_for_uname.clone();
554            async move {
555                crate::procfs::handle_uname(&notif, &hostname, notif_fd)
556            }
557        });
558        for nr in open_family_syscalls() {
559            let hostname = hostname_for_open.clone();
560            table.register(nr, move |cx: &HandlerCtx| {
561                let notif = cx.notif;
562                let notif_fd = cx.notif_fd;
563                let hostname = hostname.clone();
564                async move {
565                    if let Some(action) = crate::procfs::handle_hostname_open(&notif, &hostname, notif_fd) {
566                        action
567                    } else {
568                        NotifAction::Continue
569                    }
570                }
571            });
572        }
573    }
574
575    // /etc/hosts is registered above the chroot block — see the comment there.
576
577    // ------------------------------------------------------------------
578    // Deterministic directory listing
579    // ------------------------------------------------------------------
580    if policy.deterministic_dirs {
581        let mut getdents_nrs = vec![libc::SYS_getdents64];
582        if let Some(getdents) = arch::sys_getdents() {
583            getdents_nrs.push(getdents);
584        }
585        for nr in getdents_nrs {
586            let __sup = Arc::clone(ctx);
587            table.register(nr, move |cx: &HandlerCtx| {
588                let notif = cx.notif;
589                let sup = Arc::clone(&__sup);
590                let notif_fd = cx.notif_fd;
591                async move {
592                    let processes = Arc::clone(&sup.processes);
593                    crate::procfs::handle_sorted_getdents(&notif, &processes, notif_fd).await
594                }
595            });
596        }
597    }
598
599    // ------------------------------------------------------------------
600    // NETLINK_ROUTE virtualization (always on).
601    //
602    // Send/recv traffic flows through a `socketpair(AF_UNIX,
603    // SOCK_SEQPACKET)` whose supervisor-side end is driven by a tokio
604    // task spawned in `handle_socket`.  Only `socket`, `bind`,
605    // `getsockname`, `recvmsg`/`recvfrom`, and `close` need supervisor
606    // intercepts; send uses the kernel directly.
607    //
608    // Must register before `port_remap` so the netlink `bind` handler
609    // runs first and returns `Continue` for non-cookie fds.
610    // ------------------------------------------------------------------
611    {
612        let __sup = Arc::clone(ctx);
613        table.register(libc::SYS_socket, move |cx: &HandlerCtx| {
614            let notif = cx.notif;
615            let sup = Arc::clone(&__sup);
616            async move {
617                let state = Arc::clone(&sup.netlink);
618                crate::netlink::handlers::handle_socket(&notif, &state).await
619            }
620        });
621        let __sup = Arc::clone(ctx);
622        table.register(libc::SYS_bind, move |cx: &HandlerCtx| {
623            let notif = cx.notif;
624            let sup = Arc::clone(&__sup);
625            async move {
626                let state = Arc::clone(&sup.netlink);
627                crate::netlink::handlers::handle_bind(&notif, &state).await
628            }
629        });
630        let __sup = Arc::clone(ctx);
631        table.register(libc::SYS_getsockname, move |cx: &HandlerCtx| {
632            let notif = cx.notif;
633            let sup = Arc::clone(&__sup);
634            let notif_fd = cx.notif_fd;
635            async move {
636                let state = Arc::clone(&sup.netlink);
637                crate::netlink::handlers::handle_getsockname(&notif, &state, notif_fd).await
638            }
639        });
640        // Zero the msg_name region on recv so glibc sees nl_pid=0
641        // (the kernel only writes sun_family on unix socketpair recvmsg,
642        //  leaving the rest of the buffer as stack garbage otherwise).
643        for &nr in &[libc::SYS_recvfrom, libc::SYS_recvmsg] {
644            let __sup = Arc::clone(ctx);
645            table.register(nr, move |cx: &HandlerCtx| {
646                let notif = cx.notif;
647                let sup = Arc::clone(&__sup);
648                let notif_fd = cx.notif_fd;
649                async move {
650                    let state = Arc::clone(&sup.netlink);
651                    crate::netlink::handlers::handle_netlink_recvmsg(&notif, &state, notif_fd).await
652                }
653            });
654        }
655        // Unregister on close so the (pid, fd) slot isn't left in the
656        // cookie set once the child reuses the fd for something else.
657        let __sup = Arc::clone(ctx);
658        table.register(libc::SYS_close, move |cx: &HandlerCtx| {
659            let notif = cx.notif;
660            let sup = Arc::clone(&__sup);
661            async move {
662                let state = Arc::clone(&sup.netlink);
663                crate::netlink::handlers::handle_close(&notif, &state).await
664            }
665        });
666    }
667
668    // ------------------------------------------------------------------
669    // Bind — on-behalf
670    // ------------------------------------------------------------------
671    if policy.port_remap || policy.has_net_allowlist || policy.has_bind_denylist {
672        let __sup = Arc::clone(ctx);
673        table.register(libc::SYS_bind, move |cx: &HandlerCtx| {
674            let notif = cx.notif;
675            let sup = Arc::clone(&__sup);
676            let notif_fd = cx.notif_fd;
677            async move {
678                crate::port_remap::handle_bind(&notif, &sup.network, notif_fd).await
679            }
680        });
681    }
682
683    // ------------------------------------------------------------------
684    // getsockname — port remap
685    // ------------------------------------------------------------------
686    if policy.port_remap {
687        let __sup = Arc::clone(ctx);
688        table.register(libc::SYS_getsockname, move |cx: &HandlerCtx| {
689            let notif = cx.notif;
690            let sup = Arc::clone(&__sup);
691            let notif_fd = cx.notif_fd;
692            async move {
693                crate::port_remap::handle_getsockname(&notif, &sup.network, notif_fd).await
694            }
695        });
696    }
697
698    // ------------------------------------------------------------------
699    // Pending user handlers — appended after builtins so builtin handlers
700    // keep their security-critical priority (chroot path normalization,
701    // COW writes, resource accounting).
702    // ------------------------------------------------------------------
703    for (nr, h) in pending_handlers {
704        table.register_arc(nr, h);
705    }
706
707    table
708}
709
710// ============================================================
711// Chroot handler registration
712// ============================================================
713
714fn register_chroot_handlers(
715    table: &mut DispatchTable,
716    policy: &Arc<NotifPolicy>,
717    ctx: &Arc<SupervisorCtx>,
718) {
719    use crate::chroot::dispatch::ChrootCtx;
720
721    // Helper macro — produces a closure satisfying Handler via blanket impl.
722    // The closure clones `policy` (Arc) before the async block; inside the
723    // async block it borrows fields of that cloned Arc to build `ChrootCtx`.
724    macro_rules! chroot_handler {
725        ($policy:expr, $handler:expr) => {{
726            let policy = Arc::clone($policy);
727            let chroot_state = Arc::clone(&ctx.chroot);
728            let cow_state = Arc::clone(&ctx.cow);
729            move |cx: &HandlerCtx| {
730                let notif = cx.notif;
731                let chroot_state = Arc::clone(&chroot_state);
732                let cow_state = Arc::clone(&cow_state);
733                let notif_fd = cx.notif_fd;
734                let policy = Arc::clone(&policy);
735                async move {
736                    let chroot_ctx = ChrootCtx {
737                        root: policy.chroot_root.as_ref().unwrap(),
738                        readable: &policy.chroot_readable,
739                        writable: &policy.chroot_writable,
740                        denied: &policy.chroot_denied,
741                        mounts: &policy.chroot_mounts,
742                    };
743                    $handler(&notif, &chroot_state, &cow_state, notif_fd, &chroot_ctx).await
744                }
745            }
746        }};
747    }
748
749    // Same shape for fall-through variants (semantically identical here;
750    // kept separate for symmetry with the old code).
751    macro_rules! chroot_handler_fallthrough {
752        ($policy:expr, $handler:expr) => {{
753            let policy = Arc::clone($policy);
754            let chroot_state = Arc::clone(&ctx.chroot);
755            let cow_state = Arc::clone(&ctx.cow);
756            move |cx: &HandlerCtx| {
757                let notif = cx.notif;
758                let chroot_state = Arc::clone(&chroot_state);
759                let cow_state = Arc::clone(&cow_state);
760                let notif_fd = cx.notif_fd;
761                let policy = Arc::clone(&policy);
762                async move {
763                    let chroot_ctx = ChrootCtx {
764                        root: policy.chroot_root.as_ref().unwrap(),
765                        readable: &policy.chroot_readable,
766                        writable: &policy.chroot_writable,
767                        denied: &policy.chroot_denied,
768                        mounts: &policy.chroot_mounts,
769                    };
770                    $handler(&notif, &chroot_state, &cow_state, notif_fd, &chroot_ctx).await
771                }
772            }
773        }};
774    }
775
776    // openat — fallthrough if Continue
777    table.register(libc::SYS_openat, chroot_handler_fallthrough!(policy,
778        crate::chroot::dispatch::handle_chroot_open));
779
780    // open (legacy) — fallthrough if Continue
781    if let Some(open) = arch::sys_open() {
782        table.register(open, chroot_handler_fallthrough!(policy,
783            crate::chroot::dispatch::handle_chroot_legacy_open));
784    }
785
786    // execve, execveat — unconditional return
787    for &nr in &[libc::SYS_execve, libc::SYS_execveat] {
788        table.register(nr, chroot_handler!(policy,
789            crate::chroot::dispatch::handle_chroot_exec));
790    }
791
792    // Modern write syscalls
793    for &nr in &[
794        libc::SYS_unlinkat, libc::SYS_mkdirat, libc::SYS_renameat2,
795        libc::SYS_symlinkat, libc::SYS_linkat, libc::SYS_fchmodat,
796        libc::SYS_fchownat, libc::SYS_truncate,
797    ] {
798        table.register(nr, chroot_handler!(policy,
799            crate::chroot::dispatch::handle_chroot_write));
800    }
801
802    // Legacy write syscalls
803    if let Some(nr) = arch::sys_unlink() {
804        table.register(nr, chroot_handler!(policy,
805            crate::chroot::dispatch::handle_chroot_legacy_unlink));
806    }
807    if let Some(nr) = arch::sys_rmdir() {
808        table.register(nr, chroot_handler!(policy,
809            crate::chroot::dispatch::handle_chroot_legacy_rmdir));
810    }
811    if let Some(nr) = arch::sys_mkdir() {
812        table.register(nr, chroot_handler!(policy,
813            crate::chroot::dispatch::handle_chroot_legacy_mkdir));
814    }
815    if let Some(nr) = arch::sys_rename() {
816        table.register(nr, chroot_handler!(policy,
817            crate::chroot::dispatch::handle_chroot_legacy_rename));
818    }
819    if let Some(nr) = arch::sys_symlink() {
820        table.register(nr, chroot_handler!(policy,
821            crate::chroot::dispatch::handle_chroot_legacy_symlink));
822    }
823    if let Some(nr) = arch::sys_link() {
824        table.register(nr, chroot_handler!(policy,
825            crate::chroot::dispatch::handle_chroot_legacy_link));
826    }
827    if let Some(nr) = arch::sys_chmod() {
828        table.register(nr, chroot_handler!(policy,
829            crate::chroot::dispatch::handle_chroot_legacy_chmod));
830    }
831
832    // chown — non-follow
833    if let Some(chown) = arch::sys_chown() {
834        let policy_for_chown = Arc::clone(policy);
835        let __sup = Arc::clone(ctx);
836        table.register(chown, move |cx: &HandlerCtx| {
837            let notif = cx.notif;
838            let sup = Arc::clone(&__sup);
839            let notif_fd = cx.notif_fd;
840            let policy = Arc::clone(&policy_for_chown);
841            async move {
842                let chroot_ctx = ChrootCtx {
843                    root: policy.chroot_root.as_ref().unwrap(),
844                    readable: &policy.chroot_readable,
845                    writable: &policy.chroot_writable,
846                    denied: &policy.chroot_denied,
847                    mounts: &policy.chroot_mounts,
848                };
849                crate::chroot::dispatch::handle_chroot_legacy_chown(&notif, &sup.chroot, &sup.cow, notif_fd, &chroot_ctx, false).await
850            }
851        });
852    }
853
854    // lchown — follow
855    if let Some(lchown) = arch::sys_lchown() {
856        let policy_for_lchown = Arc::clone(policy);
857        let __sup = Arc::clone(ctx);
858        table.register(lchown, move |cx: &HandlerCtx| {
859            let notif = cx.notif;
860            let sup = Arc::clone(&__sup);
861            let notif_fd = cx.notif_fd;
862            let policy = Arc::clone(&policy_for_lchown);
863            async move {
864                let chroot_ctx = ChrootCtx {
865                    root: policy.chroot_root.as_ref().unwrap(),
866                    readable: &policy.chroot_readable,
867                    writable: &policy.chroot_writable,
868                    denied: &policy.chroot_denied,
869                    mounts: &policy.chroot_mounts,
870                };
871                crate::chroot::dispatch::handle_chroot_legacy_chown(&notif, &sup.chroot, &sup.cow, notif_fd, &chroot_ctx, true).await
872            }
873        });
874    }
875
876    // stat family
877    for &nr in &[
878        libc::SYS_newfstatat,
879        libc::SYS_faccessat,
880        arch::SYS_FACCESSAT2,
881    ] {
882        table.register(nr, chroot_handler!(policy,
883            crate::chroot::dispatch::handle_chroot_stat));
884    }
885
886    // Legacy stat
887    if let Some(nr) = arch::sys_stat() {
888        table.register(nr, chroot_handler!(policy,
889            crate::chroot::dispatch::handle_chroot_legacy_stat));
890    }
891    if let Some(nr) = arch::sys_lstat() {
892        table.register(nr, chroot_handler!(policy,
893            crate::chroot::dispatch::handle_chroot_legacy_lstat));
894    }
895    if let Some(nr) = arch::sys_access() {
896        table.register(nr, chroot_handler!(policy,
897            crate::chroot::dispatch::handle_chroot_legacy_access));
898    }
899
900    // statx
901    table.register(libc::SYS_statx, chroot_handler!(policy,
902        crate::chroot::dispatch::handle_chroot_statx));
903
904    // readlink
905    table.register(libc::SYS_readlinkat, chroot_handler!(policy,
906        crate::chroot::dispatch::handle_chroot_readlink));
907    if let Some(nr) = arch::sys_readlink() {
908        table.register(nr, chroot_handler!(policy,
909            crate::chroot::dispatch::handle_chroot_legacy_readlink));
910    }
911
912    // getdents
913    let mut getdents_nrs = vec![libc::SYS_getdents64];
914    if let Some(getdents) = arch::sys_getdents() {
915        getdents_nrs.push(getdents);
916    }
917    for nr in getdents_nrs {
918        table.register(nr, chroot_handler!(policy,
919            crate::chroot::dispatch::handle_chroot_getdents));
920    }
921
922    // chdir, getcwd, statfs, utimensat
923    table.register(libc::SYS_chdir as i64, chroot_handler!(policy,
924        crate::chroot::dispatch::handle_chroot_chdir));
925    table.register(libc::SYS_getcwd as i64, chroot_handler!(policy,
926        crate::chroot::dispatch::handle_chroot_getcwd));
927    table.register(libc::SYS_statfs as i64, chroot_handler!(policy,
928        crate::chroot::dispatch::handle_chroot_statfs));
929    table.register(libc::SYS_utimensat as i64, chroot_handler!(policy,
930        crate::chroot::dispatch::handle_chroot_utimensat));
931
932    // xattr family (path-based) — get/set/list/remove and their l* variants
933    for &nr in &[
934        libc::SYS_getxattr, libc::SYS_lgetxattr,
935        libc::SYS_setxattr, libc::SYS_lsetxattr,
936        libc::SYS_listxattr, libc::SYS_llistxattr,
937        libc::SYS_removexattr, libc::SYS_lremovexattr,
938    ] {
939        table.register(nr, chroot_handler!(policy,
940            crate::chroot::dispatch::handle_chroot_xattr));
941    }
942}
943
944// ============================================================
945// COW handler registration
946// ============================================================
947
948fn register_cow_handlers(table: &mut DispatchTable, ctx: &Arc<SupervisorCtx>) {
949    // Helper that captures `ctx.cow` and `ctx.processes` once at table-build
950    // time, then re-clones the per-handler `Arc`s on each invocation.
951    macro_rules! cow_call {
952        ($handler:expr) => {{
953            let cow_state = Arc::clone(&ctx.cow);
954            let processes_state = Arc::clone(&ctx.processes);
955            move |cx: &HandlerCtx| {
956                let notif = cx.notif;
957                let cow_state = Arc::clone(&cow_state);
958                let processes_state = Arc::clone(&processes_state);
959                let notif_fd = cx.notif_fd;
960                async move {
961                    $handler(&notif, &cow_state, &processes_state, notif_fd).await
962                }
963            }
964        }};
965    }
966
967    // Write syscalls (*at variants + legacy)
968    let mut write_nrs = vec![
969        libc::SYS_unlinkat, libc::SYS_mkdirat, libc::SYS_renameat2,
970        libc::SYS_symlinkat, libc::SYS_linkat, libc::SYS_fchmodat,
971        libc::SYS_fchownat, libc::SYS_truncate,
972    ];
973    write_nrs.extend([
974        arch::sys_unlink(), arch::sys_rmdir(), arch::sys_mkdir(), arch::sys_rename(),
975        arch::sys_symlink(), arch::sys_link(), arch::sys_chmod(), arch::sys_chown(),
976        arch::sys_lchown(),
977    ].into_iter().flatten());
978    for nr in write_nrs {
979        table.register(nr, cow_call!(crate::cow::dispatch::handle_cow_write));
980    }
981
982    table.register(libc::SYS_utimensat, cow_call!(crate::cow::dispatch::handle_cow_utimensat));
983
984    let mut access_nrs = vec![libc::SYS_faccessat, arch::SYS_FACCESSAT2];
985    access_nrs.extend(arch::sys_access());
986    for nr in access_nrs {
987        table.register(nr, cow_call!(crate::cow::dispatch::handle_cow_access));
988    }
989
990    let mut open_nrs = vec![libc::SYS_openat];
991    open_nrs.extend(arch::sys_open());
992    for nr in open_nrs {
993        table.register(nr, cow_call!(crate::cow::dispatch::handle_cow_open));
994    }
995
996    let mut stat_nrs = vec![libc::SYS_newfstatat, libc::SYS_faccessat];
997    stat_nrs.extend([arch::sys_stat(), arch::sys_lstat(), arch::sys_access()].into_iter().flatten());
998    for nr in stat_nrs {
999        table.register(nr, cow_call!(crate::cow::dispatch::handle_cow_stat));
1000    }
1001
1002    table.register(libc::SYS_statx, cow_call!(crate::cow::dispatch::handle_cow_statx));
1003
1004    let mut readlink_nrs = vec![libc::SYS_readlinkat];
1005    readlink_nrs.extend(arch::sys_readlink());
1006    for nr in readlink_nrs {
1007        table.register(nr, cow_call!(crate::cow::dispatch::handle_cow_readlink));
1008    }
1009
1010    let mut getdents_nrs = vec![libc::SYS_getdents64];
1011    getdents_nrs.extend(arch::sys_getdents());
1012    for nr in getdents_nrs {
1013        table.register(nr, cow_call!(crate::cow::dispatch::handle_cow_getdents));
1014    }
1015
1016    table.register(libc::SYS_chdir, cow_call!(crate::cow::dispatch::handle_cow_chdir));
1017    table.register(libc::SYS_getcwd, cow_call!(crate::cow::dispatch::handle_cow_getcwd));
1018
1019    for &nr in &[libc::SYS_execve, libc::SYS_execveat] {
1020        table.register(nr, cow_call!(crate::cow::dispatch::handle_cow_exec));
1021    }
1022}
1023
1024// ============================================================
1025// Tests
1026// ============================================================
1027
1028#[cfg(test)]
1029mod handler_tests {
1030    //! Unit tests for the user-supplied handler extension API.
1031    //!
1032    //! Drive the actual `DispatchTable::dispatch` walker against a minimal
1033    //! `SupervisorCtx` constructed from default-state pieces.  Handler
1034    //! closures here ignore the context (no notif fd, no real child), so
1035    //! the dispatch invariants under test (registration order, chain
1036    //! short-circuit on first non-`Continue`, append-after-builtin
1037    //! placement) are exercised end-to-end without needing a live
1038    //! Landlock+seccomp sandbox — those scenarios live under
1039    //! `crates/sandlock-core/tests/integration/test_handlers.rs`.
1040    use super::*;
1041    use crate::netlink::NetlinkState;
1042    use crate::seccomp::ctx::SupervisorCtx;
1043    use crate::seccomp::notif::NotifPolicy;
1044    use crate::seccomp::state::{
1045        ChrootState, CowState, NetworkState, PolicyFnState, ProcessIndex, ProcfsState,
1046        ResourceState, TimeRandomState,
1047    };
1048    use crate::sys::structs::{SeccompData, SeccompNotif};
1049    use std::sync::atomic::{AtomicUsize, Ordering};
1050
1051    fn fake_notif(nr: i32) -> SeccompNotif {
1052        SeccompNotif {
1053            id: 0,
1054            pid: 1,
1055            flags: 0,
1056            data: SeccompData {
1057                nr,
1058                arch: 0,
1059                instruction_pointer: 0,
1060                args: [0; 6],
1061            },
1062        }
1063    }
1064
1065    /// Minimal `SupervisorCtx` for unit tests.  Every field is built from
1066    /// the corresponding state's `new()`/default constructor — no syscalls,
1067    /// no fds, no spawned children.  Handlers in these tests do not
1068    /// actually inspect the context, so the values do not need to match
1069    /// any real run; they only need to satisfy the type signature so we
1070    /// can call `dispatch()`.
1071    fn fake_supervisor_ctx() -> Arc<SupervisorCtx> {
1072        Arc::new(SupervisorCtx {
1073            resource: Arc::new(Mutex::new(ResourceState::new(0, 0))),
1074            cow: Arc::new(Mutex::new(CowState::new())),
1075            procfs: Arc::new(Mutex::new(ProcfsState::new())),
1076            network: Arc::new(Mutex::new(NetworkState::new())),
1077            time_random: Arc::new(Mutex::new(TimeRandomState::new(None, None))),
1078            policy_fn: Arc::new(Mutex::new(PolicyFnState::new())),
1079            chroot: Arc::new(Mutex::new(ChrootState::new())),
1080            netlink: Arc::new(NetlinkState::new()),
1081            processes: Arc::new(ProcessIndex::new()),
1082            policy: Arc::new(NotifPolicy {
1083                max_memory_bytes: 0,
1084                max_processes: 0,
1085                has_memory_limit: false,
1086                has_net_allowlist: false,
1087                has_bind_denylist: false,
1088                has_random_seed: false,
1089                has_time_start: false,
1090                time_offset: 0,
1091                num_cpus: None,
1092                argv_safety_required: false,
1093                port_remap: false,
1094                cow_enabled: false,
1095                chroot_root: None,
1096                chroot_readable: Vec::new(),
1097                chroot_writable: Vec::new(),
1098                chroot_denied: Vec::new(),
1099                chroot_mounts: Vec::new(),
1100                deterministic_dirs: false,
1101                virtual_hostname: None,
1102                has_http_acl: false,
1103                virtual_etc_hosts: String::new(),
1104                ca_inject_paths: Vec::new(),
1105                ca_inject_pem: None,
1106            }),
1107            child_pidfd: None,
1108            notif_fd: -1,
1109        })
1110    }
1111
1112    /// All registered handlers run, in registration order, when each
1113    /// returns `Continue`.  Verifies that `register` appends to the
1114    /// underlying `Vec` and that `dispatch` walks it front-to-back.
1115    #[tokio::test]
1116    async fn dispatch_walks_chain_in_registration_order() {
1117        let mut table = DispatchTable::new();
1118        let order = Arc::new(std::sync::Mutex::new(Vec::<u8>::new()));
1119
1120        for tag in [1u8, 2u8, 3u8] {
1121            let order_clone = Arc::clone(&order);
1122            table.register(
1123                libc::SYS_openat,
1124                move |_cx: &HandlerCtx| {
1125                    let order = Arc::clone(&order_clone);
1126                    async move {
1127                        order.lock().unwrap().push(tag);
1128                        NotifAction::Continue
1129                    }
1130                },
1131            );
1132        }
1133
1134        let _ctx = fake_supervisor_ctx();
1135        let action = table
1136            .dispatch(fake_notif(libc::SYS_openat as i32), -1)
1137            .await;
1138
1139        assert!(matches!(action, NotifAction::Continue));
1140        let recorded = order.lock().unwrap();
1141        assert_eq!(
1142            *recorded,
1143            [1u8, 2u8, 3u8],
1144            "every handler must run, in the order it was registered"
1145        );
1146    }
1147
1148    /// Append-after-builtin contract: when a user handler is registered
1149    /// after a builtin, dispatch invokes the builtin first and the
1150    /// user handler second.  This is the security-load-bearing invariant —
1151    /// a builtin returning a non-`Continue` `NotifAction` must short-circuit
1152    /// before the user handler runs (covered by
1153    /// `dispatch_stops_at_first_non_continue`); when the builtin returns
1154    /// `Continue`, the user handler observes the post-builtin view.
1155    #[tokio::test]
1156    async fn dispatch_runs_builtin_before_extra() {
1157        let mut table = DispatchTable::new();
1158        let order = Arc::new(std::sync::Mutex::new(Vec::<u8>::new()));
1159
1160        // Builtin first, tagged 'B'.
1161        let order_builtin = Arc::clone(&order);
1162        table.register(
1163            libc::SYS_openat,
1164            move |_cx: &HandlerCtx| {
1165                let order = Arc::clone(&order_builtin);
1166                async move {
1167                    order.lock().unwrap().push(b'B');
1168                    NotifAction::Continue
1169                }
1170            },
1171        );
1172
1173        // Extra after, tagged 'E'.  Registered after builtin to mirror
1174        // append-after-builtin placement from `build_dispatch_table`.
1175        let order_extra = Arc::clone(&order);
1176        table.register(
1177            libc::SYS_openat,
1178            move |_cx: &HandlerCtx| {
1179                let order = Arc::clone(&order_extra);
1180                async move {
1181                    order.lock().unwrap().push(b'E');
1182                    NotifAction::Continue
1183                }
1184            },
1185        );
1186
1187        let _ctx = fake_supervisor_ctx();
1188        let action = table
1189            .dispatch(fake_notif(libc::SYS_openat as i32), -1)
1190            .await;
1191
1192        assert!(matches!(action, NotifAction::Continue));
1193        let recorded = order.lock().unwrap();
1194        assert_eq!(
1195            *recorded,
1196            [b'B', b'E'],
1197            "builtin must run before extra (insertion order preserved)"
1198        );
1199    }
1200
1201    /// First non-`Continue` wins: a handler returning `Errno` short-circuits
1202    /// the chain, and subsequent handlers must not run.  This is the
1203    /// invariant that prevents a user-supplied extra from being observed
1204    /// (or, in the inverse direction, prevents an extra's `Errno` from
1205    /// being silently overridden by a later handler that happens to also
1206    /// be registered for the same syscall).
1207    #[tokio::test]
1208    async fn dispatch_stops_at_first_non_continue() {
1209        let mut table = DispatchTable::new();
1210        let calls = Arc::new(AtomicUsize::new(0));
1211
1212        // First handler — returns Errno, must terminate the chain.
1213        let calls_first = Arc::clone(&calls);
1214        table.register(
1215            libc::SYS_openat,
1216            move |_cx: &HandlerCtx| {
1217                let calls = Arc::clone(&calls_first);
1218                async move {
1219                    calls.fetch_add(1, Ordering::SeqCst);
1220                    NotifAction::Errno(libc::EACCES)
1221                }
1222            },
1223        );
1224
1225        // Second handler — must NOT be called.
1226        let calls_second = Arc::clone(&calls);
1227        table.register(
1228            libc::SYS_openat,
1229            move |_cx: &HandlerCtx| {
1230                let calls = Arc::clone(&calls_second);
1231                async move {
1232                    calls.fetch_add(1, Ordering::SeqCst);
1233                    NotifAction::Continue
1234                }
1235            },
1236        );
1237
1238        let _ctx = fake_supervisor_ctx();
1239        let action = table
1240            .dispatch(fake_notif(libc::SYS_openat as i32), -1)
1241            .await;
1242
1243        match action {
1244            NotifAction::Errno(e) => assert_eq!(e, libc::EACCES),
1245            other => panic!("expected Errno(EACCES), got {:?}", other),
1246        }
1247        assert_eq!(
1248            calls.load(Ordering::SeqCst),
1249            1,
1250            "second handler must not run after first returned non-Continue"
1251        );
1252    }
1253
1254    /// A handler returning `Defer` is non-`Continue`, so it must short-circuit
1255    /// the chain exactly like `Errno`/`ReturnValue`: later handlers on the same
1256    /// syscall do not run.  Deferral is therefore a terminal decision.
1257    #[tokio::test]
1258    async fn dispatch_short_circuits_on_defer() {
1259        let mut table = DispatchTable::new();
1260        let later_ran = Arc::new(AtomicUsize::new(0));
1261
1262        table.register(
1263            libc::SYS_openat,
1264            |_cx: &HandlerCtx| async { NotifAction::defer(async { NotifAction::ReturnValue(1) }) },
1265        );
1266
1267        let later = Arc::clone(&later_ran);
1268        table.register(
1269            libc::SYS_openat,
1270            move |_cx: &HandlerCtx| {
1271                let later = Arc::clone(&later);
1272                async move {
1273                    later.fetch_add(1, Ordering::SeqCst);
1274                    NotifAction::Continue
1275                }
1276            },
1277        );
1278
1279        let _ctx = fake_supervisor_ctx();
1280        let action = table
1281            .dispatch(fake_notif(libc::SYS_openat as i32), -1)
1282            .await;
1283
1284        assert!(
1285            matches!(action, NotifAction::Defer(_)),
1286            "dispatch must return the Defer produced by the first handler"
1287        );
1288        assert_eq!(
1289            later_ran.load(Ordering::SeqCst),
1290            0,
1291            "Defer must short-circuit the chain like any non-Continue action"
1292        );
1293    }
1294
1295    /// `validate_handler_syscalls_against_policy` must reject handlers whose
1296    /// syscall is in the policy's user-specified blocklist, with the same
1297    /// rationale as DEFAULT_BLOCKLIST: the BPF program emits notif JEQs before
1298    /// deny JEQs, so a user handler returning `Continue` would translate into
1299    /// `SECCOMP_USER_NOTIF_FLAG_CONTINUE` and silently bypass the kernel-level
1300    /// block.
1301    ///
1302    /// Uses `mremap` because it is in `syscall_name_to_nr` but not in
1303    /// `DEFAULT_BLOCKLIST_SYSCALLS` — putting it into `extra_deny_syscalls` is the only
1304    /// way it ends up on the extra blocklist, so the test isolates the user-supplied
1305    /// path of `blocklist_syscall_numbers` from the default branch covered by
1306    /// `handler_on_default_blocklist_syscall_is_rejected`.
1307    ///
1308    /// Pure-logic counterpart to the integration test of the same name —
1309    /// runs without a live sandbox so the contract is enforced even on
1310    /// hosts where seccomp integration tests are skipped.
1311    #[test]
1312    fn validate_extras_rejects_user_specified_blocklist() {
1313        let policy = crate::sandbox::Sandbox::builder()
1314            .extra_deny_syscalls(vec!["mremap".into()])
1315            .build()
1316            .expect("policy builds");
1317
1318        let result = validate_handler_syscalls_against_policy(&[libc::SYS_mremap], &policy);
1319        assert_eq!(
1320            result,
1321            Err(libc::SYS_mremap),
1322            "handler on user-specified blocklist must be rejected, naming the offending syscall"
1323        );
1324    }
1325
1326    // ---- Handler trait tests --------------------------------------
1327
1328    #[tokio::test]
1329    async fn handler_via_blanket_impl_dispatches_closures() {
1330        use std::sync::atomic::{AtomicU64, Ordering};
1331        let counter = Arc::new(AtomicU64::new(0));
1332        let counter_clone = Arc::clone(&counter);
1333
1334        let h = move |cx: &HandlerCtx| {
1335            let counter = Arc::clone(&counter_clone);
1336            async move {
1337                counter.fetch_add(1, Ordering::SeqCst);
1338                let _ = cx.notif.pid; // touch ctx so it's exercised
1339                NotifAction::Continue
1340            }
1341        };
1342
1343        let _sup = fake_supervisor_ctx();
1344        let notif = fake_notif(libc::SYS_openat as i32);
1345        let cx = HandlerCtx { notif, notif_fd: -1 };
1346
1347        let action = h.handle(&cx).await;
1348        assert!(matches!(action, NotifAction::Continue));
1349        assert_eq!(counter.load(Ordering::SeqCst), 1);
1350    }
1351
1352    /// Struct-based `Handler` registered through `DispatchTable::register`
1353    /// MUST be invoked when `dispatch()` walks the chain — and `&self`
1354    /// state MUST persist across notifications.  Bridges the gap between
1355    /// the trait-shape unit tests above (which call `.handle()` directly)
1356    /// and the dispatch ordering tests (which use closures via blanket
1357    /// impl).  Without this test, a regression where the dispatch walker
1358    /// dropped `Arc<dyn Handler>` calls but kept closures working would
1359    /// not be caught at the unit layer.
1360    #[tokio::test]
1361    async fn dispatch_invokes_struct_handler_with_persistent_self_state() {
1362        use std::sync::atomic::{AtomicU64, Ordering};
1363
1364        struct StructHandler {
1365            calls: AtomicU64,
1366        }
1367
1368        impl Handler for StructHandler {
1369            fn handle<'a>(
1370                &'a self,
1371                _cx: &'a HandlerCtx,
1372            ) -> std::pin::Pin<Box<dyn std::future::Future<Output = NotifAction> + Send + 'a>> {
1373                Box::pin(async move {
1374                    self.calls.fetch_add(1, Ordering::SeqCst);
1375                    NotifAction::Continue
1376                })
1377            }
1378        }
1379
1380        let mut table = DispatchTable::new();
1381        let handler = std::sync::Arc::new(StructHandler {
1382            calls: AtomicU64::new(0),
1383        });
1384        table.register_arc(libc::SYS_openat, handler.clone() as std::sync::Arc<dyn Handler>);
1385
1386        let _sup = fake_supervisor_ctx();
1387        let notif = fake_notif(libc::SYS_openat as i32);
1388
1389        // Three independent dispatches against the same registered handler.
1390        // Walker MUST hit the struct's handle() each time, accumulating
1391        // state on &self.calls.
1392        for _ in 0..3 {
1393            let action = table.dispatch(notif, -1).await;
1394            assert!(matches!(action, NotifAction::Continue));
1395        }
1396
1397        assert_eq!(
1398            handler.calls.load(Ordering::SeqCst),
1399            3,
1400            "dispatch must invoke the struct-based handler on every walk"
1401        );
1402    }
1403}