Skip to main content

sandlock_core/seccomp/
dispatch.rs

1// Table-driven syscall dispatch — routes seccomp notifications to handler chains.
2//
3// Each syscall number maps to an ordered chain of handlers.  The chain is walked
4// until a handler returns a non-Continue action (or the chain is exhausted, in
5// which case Continue is returned).
6
7use std::collections::HashMap;
8use std::future::Future;
9use std::os::unix::io::RawFd;
10use std::pin::Pin;
11use std::sync::Arc;
12
13use super::ctx::SupervisorCtx;
14use super::notif::{NotifAction, NotifPolicy};
15use super::state::ResourceState;
16use crate::sys::structs::SeccompNotif;
17
18use tokio::sync::Mutex;
19
20// ============================================================
21// Types
22// ============================================================
23
24/// An async handler function.  Receives the notification, the supervisor
25/// context, and the notif fd.  Returns a `NotifAction`.
26pub type HandlerFn = Box<
27    dyn Fn(SeccompNotif, Arc<SupervisorCtx>, RawFd) -> Pin<Box<dyn Future<Output = NotifAction> + Send>>
28        + Send
29        + Sync,
30>;
31
32/// Ordered chain of handlers for a single syscall number.
33struct HandlerChain {
34    handlers: Vec<HandlerFn>,
35}
36
37/// Maps syscall numbers to handler chains.
38pub struct DispatchTable {
39    chains: HashMap<i64, HandlerChain>,
40}
41
42impl DispatchTable {
43    /// Create an empty dispatch table.
44    pub fn new() -> Self {
45        Self {
46            chains: HashMap::new(),
47        }
48    }
49
50    /// Register a handler for the given syscall number.  Handlers are called in
51    /// registration order; the first non-Continue result wins.
52    pub fn register(&mut self, syscall_nr: i64, handler: HandlerFn) {
53        self.chains
54            .entry(syscall_nr)
55            .or_insert_with(|| HandlerChain {
56                handlers: Vec::new(),
57            })
58            .handlers
59            .push(handler);
60    }
61
62    /// Dispatch a notification through the handler chain for its syscall number.
63    pub async fn dispatch(
64        &self,
65        notif: SeccompNotif,
66        ctx: &Arc<SupervisorCtx>,
67        notif_fd: RawFd,
68    ) -> NotifAction {
69        let nr = notif.data.nr as i64;
70        if let Some(chain) = self.chains.get(&nr) {
71            for handler in &chain.handlers {
72                let action = handler(notif, Arc::clone(ctx), notif_fd).await;
73                if !matches!(action, NotifAction::Continue) {
74                    return action;
75                }
76            }
77        }
78        NotifAction::Continue
79    }
80}
81
82// ============================================================
83// Table builder — mechanical translation of old dispatch()
84// ============================================================
85
86/// Build the dispatch table from a `NotifPolicy`.  Every branch from the old
87/// monolithic `dispatch()` function is translated into a `table.register()` call.
88/// Priority is preserved by registration order.
89pub fn build_dispatch_table(
90    policy: &Arc<NotifPolicy>,
91    resource: &Arc<Mutex<ResourceState>>,
92) -> DispatchTable {
93    let mut table = DispatchTable::new();
94
95    // ------------------------------------------------------------------
96    // Fork/clone family (always on)
97    // ------------------------------------------------------------------
98    for &nr in &[libc::SYS_clone, libc::SYS_clone3, libc::SYS_vfork] {
99        let policy = Arc::clone(policy);
100        let resource = Arc::clone(resource);
101        table.register(nr, Box::new(move |notif, ctx, _notif_fd| {
102            let policy = Arc::clone(&policy);
103            let resource = Arc::clone(&resource);
104            let procfs_inner = Arc::clone(&ctx.procfs);
105            Box::pin(async move {
106                crate::resource::handle_fork(&notif, &resource, &procfs_inner, &policy).await
107            })
108        }));
109    }
110
111    // ------------------------------------------------------------------
112    // Wait family (always on)
113    // ------------------------------------------------------------------
114    for &nr in &[libc::SYS_wait4, libc::SYS_waitid] {
115        let resource = Arc::clone(resource);
116        table.register(nr, Box::new(move |notif, _ctx, _notif_fd| {
117            let resource = Arc::clone(&resource);
118            Box::pin(async move {
119                crate::resource::handle_wait(&notif, &resource).await
120            })
121        }));
122    }
123
124    // ------------------------------------------------------------------
125    // Memory management (conditional on has_memory_limit)
126    // ------------------------------------------------------------------
127    if policy.has_memory_limit {
128        for &nr in &[
129            libc::SYS_mmap, libc::SYS_munmap, libc::SYS_brk,
130            libc::SYS_mremap, libc::SYS_shmget,
131        ] {
132            let policy = Arc::clone(policy);
133            let resource = Arc::clone(resource);
134            table.register(nr, Box::new(move |notif, _ctx, _notif_fd| {
135                let policy = Arc::clone(&policy);
136                let resource = Arc::clone(&resource);
137                Box::pin(async move {
138                    crate::resource::handle_memory(&notif, &resource, &policy).await
139                })
140            }));
141        }
142    }
143
144    // ------------------------------------------------------------------
145    // Network (conditional on has_net_allowlist || has_http_acl)
146    // ------------------------------------------------------------------
147    if policy.has_net_allowlist || policy.has_http_acl {
148        for &nr in &[libc::SYS_connect, libc::SYS_sendto, libc::SYS_sendmsg] {
149            table.register(nr, Box::new(|notif, ctx, notif_fd| {
150                Box::pin(async move {
151                    crate::network::handle_net(&notif, &ctx, notif_fd).await
152                })
153            }));
154        }
155    }
156
157    // ------------------------------------------------------------------
158    // Deterministic random — getrandom()
159    // ------------------------------------------------------------------
160    if policy.has_random_seed {
161        table.register(libc::SYS_getrandom, Box::new(|notif, ctx, notif_fd| {
162            Box::pin(async move {
163                let mut tr = ctx.time_random.lock().await;
164                if let Some(ref mut rng) = tr.random_state {
165                    crate::random::handle_getrandom(&notif, rng, notif_fd)
166                } else {
167                    NotifAction::Continue
168                }
169            })
170        }));
171    }
172
173    // ------------------------------------------------------------------
174    // Deterministic random — /dev/urandom opens (openat)
175    // ------------------------------------------------------------------
176    if policy.has_random_seed {
177        table.register(libc::SYS_openat, Box::new(|notif, ctx, notif_fd| {
178            Box::pin(async move {
179                let mut tr = ctx.time_random.lock().await;
180                if let Some(ref mut rng) = tr.random_state {
181                    if let Some(action) = crate::random::handle_random_open(&notif, rng, notif_fd) {
182                        return action;
183                    }
184                }
185                NotifAction::Continue
186            })
187        }));
188    }
189
190    // ------------------------------------------------------------------
191    // Timer adjustment (conditional on has_time_start)
192    // ------------------------------------------------------------------
193    if policy.has_time_start {
194        let time_offset = policy.time_offset;
195        for &nr in &[
196            libc::SYS_clock_nanosleep as i64,
197            libc::SYS_timerfd_settime as i64,
198            libc::SYS_timer_settime as i64,
199        ] {
200            table.register(nr, Box::new(move |notif, _ctx, notif_fd| {
201                Box::pin(async move {
202                    crate::time::handle_timer(&notif, time_offset, notif_fd)
203                })
204            }));
205        }
206    }
207
208    // ------------------------------------------------------------------
209    // Chroot path interception (before COW)
210    // ------------------------------------------------------------------
211    if policy.chroot_root.is_some() {
212        register_chroot_handlers(&mut table, policy);
213    }
214
215    // ------------------------------------------------------------------
216    // COW filesystem interception
217    // ------------------------------------------------------------------
218    if policy.cow_enabled {
219        register_cow_handlers(&mut table);
220    }
221
222    // ------------------------------------------------------------------
223    // /proc virtualization (always on)
224    // ------------------------------------------------------------------
225    {
226        let policy = Arc::clone(policy);
227        let resource = Arc::clone(resource);
228        table.register(libc::SYS_openat, Box::new(move |notif, ctx, notif_fd| {
229            let policy = Arc::clone(&policy);
230            let resource = Arc::clone(&resource);
231            let procfs_inner = Arc::clone(&ctx.procfs);
232            let network = Arc::clone(&ctx.network);
233            Box::pin(async move {
234                crate::procfs::handle_proc_open(&notif, &procfs_inner, &resource, &network, &policy, notif_fd).await
235            })
236        }));
237    }
238    for &nr in &[libc::SYS_getdents64, libc::SYS_getdents as i64] {
239        let policy = Arc::clone(policy);
240        table.register(nr, Box::new(move |notif, ctx, notif_fd| {
241            let policy = Arc::clone(&policy);
242            let procfs_inner = Arc::clone(&ctx.procfs);
243            Box::pin(async move {
244                crate::procfs::handle_getdents(&notif, &procfs_inner, &policy, notif_fd).await
245            })
246        }));
247    }
248
249    // ------------------------------------------------------------------
250    // Virtual CPU count
251    // ------------------------------------------------------------------
252    if let Some(n) = policy.num_cpus {
253        table.register(libc::SYS_sched_getaffinity, Box::new(move |notif, _ctx, notif_fd| {
254            Box::pin(async move {
255                crate::procfs::handle_sched_getaffinity(&notif, n, notif_fd)
256            })
257        }));
258    }
259
260    // ------------------------------------------------------------------
261    // Hostname virtualization
262    // ------------------------------------------------------------------
263    if let Some(ref hostname) = policy.hostname {
264        let hostname = hostname.clone();
265        let hostname2 = hostname.clone();
266        table.register(libc::SYS_uname, Box::new(move |notif, _ctx, notif_fd| {
267            let hostname = hostname.clone();
268            Box::pin(async move {
269                crate::procfs::handle_uname(&notif, &hostname, notif_fd)
270            })
271        }));
272        table.register(libc::SYS_openat, Box::new(move |notif, _ctx, notif_fd| {
273            let hostname = hostname2.clone();
274            Box::pin(async move {
275                if let Some(action) = crate::procfs::handle_hostname_open(&notif, &hostname, notif_fd) {
276                    action
277                } else {
278                    NotifAction::Continue
279                }
280            })
281        }));
282    }
283
284    // ------------------------------------------------------------------
285    // /etc/hosts virtualization (for net_allow_hosts)
286    // ------------------------------------------------------------------
287    if let Some(ref etc_hosts) = policy.virtual_etc_hosts {
288        let etc_hosts = etc_hosts.clone();
289        table.register(libc::SYS_openat, Box::new(move |notif, _ctx, notif_fd| {
290            let etc_hosts = etc_hosts.clone();
291            Box::pin(async move {
292                if let Some(action) = crate::procfs::handle_etc_hosts_open(&notif, &etc_hosts, notif_fd) {
293                    action
294                } else {
295                    NotifAction::Continue
296                }
297            })
298        }));
299    }
300
301    // ------------------------------------------------------------------
302    // Deterministic directory listing
303    // ------------------------------------------------------------------
304    if policy.deterministic_dirs {
305        for &nr in &[libc::SYS_getdents64, libc::SYS_getdents as i64] {
306            table.register(nr, Box::new(|notif, ctx, notif_fd| {
307                let procfs_inner = Arc::clone(&ctx.procfs);
308                Box::pin(async move {
309                    crate::procfs::handle_sorted_getdents(&notif, &procfs_inner, notif_fd).await
310                })
311            }));
312        }
313    }
314
315    // ------------------------------------------------------------------
316    // Bind — on-behalf
317    // ------------------------------------------------------------------
318    if policy.port_remap || policy.has_net_allowlist {
319        table.register(libc::SYS_bind, Box::new(|notif, ctx, notif_fd| {
320            Box::pin(async move {
321                crate::port_remap::handle_bind(&notif, &ctx.network, notif_fd).await
322            })
323        }));
324    }
325
326    // ------------------------------------------------------------------
327    // getsockname — port remap
328    // ------------------------------------------------------------------
329    if policy.port_remap {
330        table.register(libc::SYS_getsockname, Box::new(|notif, ctx, notif_fd| {
331            Box::pin(async move {
332                crate::port_remap::handle_getsockname(&notif, &ctx.network, notif_fd).await
333            })
334        }));
335    }
336
337    table
338}
339
340// ============================================================
341// Chroot handler registration
342// ============================================================
343
344fn register_chroot_handlers(table: &mut DispatchTable, policy: &Arc<NotifPolicy>) {
345    use crate::chroot::dispatch::ChrootCtx;
346
347    // Helper macro to reduce boilerplate for chroot handlers that unconditionally
348    // return (non-fallthrough).
349    macro_rules! chroot_handler {
350        ($policy:expr, $handler:expr) => {{
351            let policy = Arc::clone($policy);
352            let handler_fn: HandlerFn = Box::new(move |notif, ctx, notif_fd| {
353                let policy = Arc::clone(&policy);
354                Box::pin(async move {
355                    let chroot_ctx = ChrootCtx {
356                        root: policy.chroot_root.as_ref().unwrap(),
357                        readable: &policy.chroot_readable,
358                        writable: &policy.chroot_writable,
359                        denied: &policy.chroot_denied,
360                        mounts: &policy.chroot_mounts,
361                    };
362                    $handler(&notif, &ctx.chroot, &ctx.cow, notif_fd, &chroot_ctx).await
363                })
364            });
365            handler_fn
366        }};
367    }
368
369    // Helper for chroot handlers that may fall through (return Continue).
370    macro_rules! chroot_handler_fallthrough {
371        ($policy:expr, $handler:expr) => {{
372            let policy = Arc::clone($policy);
373            let handler_fn: HandlerFn = Box::new(move |notif, ctx, notif_fd| {
374                let policy = Arc::clone(&policy);
375                Box::pin(async move {
376                    let chroot_ctx = ChrootCtx {
377                        root: policy.chroot_root.as_ref().unwrap(),
378                        readable: &policy.chroot_readable,
379                        writable: &policy.chroot_writable,
380                        denied: &policy.chroot_denied,
381                        mounts: &policy.chroot_mounts,
382                    };
383                    $handler(&notif, &ctx.chroot, &ctx.cow, notif_fd, &chroot_ctx).await
384                })
385            });
386            handler_fn
387        }};
388    }
389
390    // openat — fallthrough if Continue
391    table.register(libc::SYS_openat, chroot_handler_fallthrough!(policy,
392        crate::chroot::dispatch::handle_chroot_open));
393
394    // open (legacy) — fallthrough if Continue
395    table.register(libc::SYS_open as i64, chroot_handler_fallthrough!(policy,
396        crate::chroot::dispatch::handle_chroot_legacy_open));
397
398    // execve, execveat — unconditional return
399    for &nr in &[libc::SYS_execve, libc::SYS_execveat] {
400        table.register(nr, chroot_handler!(policy,
401            crate::chroot::dispatch::handle_chroot_exec));
402    }
403
404    // Modern write syscalls
405    for &nr in &[
406        libc::SYS_unlinkat, libc::SYS_mkdirat, libc::SYS_renameat2,
407        libc::SYS_symlinkat, libc::SYS_linkat, libc::SYS_fchmodat,
408        libc::SYS_fchownat, libc::SYS_truncate,
409    ] {
410        table.register(nr, chroot_handler!(policy,
411            crate::chroot::dispatch::handle_chroot_write));
412    }
413
414    // Legacy write syscalls
415    table.register(libc::SYS_unlink as i64, chroot_handler!(policy,
416        crate::chroot::dispatch::handle_chroot_legacy_unlink));
417    table.register(libc::SYS_rmdir as i64, chroot_handler!(policy,
418        crate::chroot::dispatch::handle_chroot_legacy_rmdir));
419    table.register(libc::SYS_mkdir as i64, chroot_handler!(policy,
420        crate::chroot::dispatch::handle_chroot_legacy_mkdir));
421    table.register(libc::SYS_rename as i64, chroot_handler!(policy,
422        crate::chroot::dispatch::handle_chroot_legacy_rename));
423    table.register(libc::SYS_symlink as i64, chroot_handler!(policy,
424        crate::chroot::dispatch::handle_chroot_legacy_symlink));
425    table.register(libc::SYS_link as i64, chroot_handler!(policy,
426        crate::chroot::dispatch::handle_chroot_legacy_link));
427    table.register(libc::SYS_chmod as i64, chroot_handler!(policy,
428        crate::chroot::dispatch::handle_chroot_legacy_chmod));
429
430    // chown — non-follow
431    {
432        let policy = Arc::clone(policy);
433        table.register(libc::SYS_chown as i64, Box::new(move |notif, ctx, notif_fd| {
434            let policy = Arc::clone(&policy);
435            Box::pin(async move {
436                let chroot_ctx = ChrootCtx {
437                    root: policy.chroot_root.as_ref().unwrap(),
438                    readable: &policy.chroot_readable,
439                    writable: &policy.chroot_writable,
440                    denied: &policy.chroot_denied,
441                    mounts: &policy.chroot_mounts,
442                };
443                crate::chroot::dispatch::handle_chroot_legacy_chown(&notif, &ctx.chroot, &ctx.cow, notif_fd, &chroot_ctx, false).await
444            })
445        }));
446    }
447
448    // lchown — follow
449    {
450        let policy = Arc::clone(policy);
451        table.register(libc::SYS_lchown as i64, Box::new(move |notif, ctx, notif_fd| {
452            let policy = Arc::clone(&policy);
453            Box::pin(async move {
454                let chroot_ctx = ChrootCtx {
455                    root: policy.chroot_root.as_ref().unwrap(),
456                    readable: &policy.chroot_readable,
457                    writable: &policy.chroot_writable,
458                    denied: &policy.chroot_denied,
459                    mounts: &policy.chroot_mounts,
460                };
461                crate::chroot::dispatch::handle_chroot_legacy_chown(&notif, &ctx.chroot, &ctx.cow, notif_fd, &chroot_ctx, true).await
462            })
463        }));
464    }
465
466    // stat family
467    for &nr in &[
468        libc::SYS_newfstatat,
469        libc::SYS_faccessat,
470        crate::chroot::dispatch::SYS_FACCESSAT2,
471    ] {
472        table.register(nr, chroot_handler!(policy,
473            crate::chroot::dispatch::handle_chroot_stat));
474    }
475
476    // Legacy stat
477    table.register(libc::SYS_stat as i64, chroot_handler!(policy,
478        crate::chroot::dispatch::handle_chroot_legacy_stat));
479    table.register(libc::SYS_lstat as i64, chroot_handler!(policy,
480        crate::chroot::dispatch::handle_chroot_legacy_lstat));
481    table.register(libc::SYS_access as i64, chroot_handler!(policy,
482        crate::chroot::dispatch::handle_chroot_legacy_access));
483
484    // statx
485    table.register(libc::SYS_statx, chroot_handler!(policy,
486        crate::chroot::dispatch::handle_chroot_statx));
487
488    // readlink
489    table.register(libc::SYS_readlinkat, chroot_handler!(policy,
490        crate::chroot::dispatch::handle_chroot_readlink));
491    table.register(libc::SYS_readlink as i64, chroot_handler!(policy,
492        crate::chroot::dispatch::handle_chroot_legacy_readlink));
493
494    // getdents
495    for &nr in &[libc::SYS_getdents64, libc::SYS_getdents as i64] {
496        table.register(nr, chroot_handler!(policy,
497            crate::chroot::dispatch::handle_chroot_getdents));
498    }
499
500    // chdir, getcwd, statfs, utimensat
501    table.register(libc::SYS_chdir as i64, chroot_handler!(policy,
502        crate::chroot::dispatch::handle_chroot_chdir));
503    table.register(libc::SYS_getcwd as i64, chroot_handler!(policy,
504        crate::chroot::dispatch::handle_chroot_getcwd));
505    table.register(libc::SYS_statfs as i64, chroot_handler!(policy,
506        crate::chroot::dispatch::handle_chroot_statfs));
507    table.register(libc::SYS_utimensat as i64, chroot_handler!(policy,
508        crate::chroot::dispatch::handle_chroot_utimensat));
509}
510
511// ============================================================
512// COW handler registration
513// ============================================================
514
515fn register_cow_handlers(table: &mut DispatchTable) {
516    // Write syscalls (*at variants + legacy)
517    for &nr in &[
518        libc::SYS_unlinkat, libc::SYS_mkdirat, libc::SYS_renameat2,
519        libc::SYS_symlinkat, libc::SYS_linkat, libc::SYS_fchmodat,
520        libc::SYS_fchownat, libc::SYS_truncate,
521        libc::SYS_unlink as i64, libc::SYS_rmdir as i64,
522        libc::SYS_mkdir as i64, libc::SYS_rename as i64,
523        libc::SYS_symlink as i64, libc::SYS_link as i64,
524        libc::SYS_chmod as i64, libc::SYS_chown as i64,
525        libc::SYS_lchown as i64,
526    ] {
527        table.register(nr, Box::new(|notif, ctx, notif_fd| {
528            let cow = Arc::clone(&ctx.cow);
529            Box::pin(async move {
530                crate::cow::dispatch::handle_cow_write(&notif, &cow, notif_fd).await
531            })
532        }));
533    }
534
535    // utimensat — unconditional return
536    table.register(libc::SYS_utimensat, Box::new(|notif, ctx, notif_fd| {
537        let cow = Arc::clone(&ctx.cow);
538        Box::pin(async move {
539            crate::cow::dispatch::handle_cow_utimensat(&notif, &cow, notif_fd).await
540        })
541    }));
542
543    // faccessat/access — fallthrough
544    for &nr in &[
545        libc::SYS_faccessat,
546        crate::cow::dispatch::SYS_FACCESSAT2,
547        libc::SYS_access as i64,
548    ] {
549        table.register(nr, Box::new(|notif, ctx, notif_fd| {
550            let cow = Arc::clone(&ctx.cow);
551            Box::pin(async move {
552                crate::cow::dispatch::handle_cow_access(&notif, &cow, notif_fd).await
553            })
554        }));
555    }
556
557    // openat/open — fallthrough
558    for &nr in &[libc::SYS_openat, libc::SYS_open as i64] {
559        table.register(nr, Box::new(|notif, ctx, notif_fd| {
560            let cow = Arc::clone(&ctx.cow);
561            Box::pin(async move {
562                crate::cow::dispatch::handle_cow_open(&notif, &cow, notif_fd).await
563            })
564        }));
565    }
566
567    // stat family — fallthrough
568    for &nr in &[
569        libc::SYS_newfstatat, libc::SYS_faccessat,
570        libc::SYS_stat as i64, libc::SYS_lstat as i64,
571        libc::SYS_access as i64,
572    ] {
573        table.register(nr, Box::new(|notif, ctx, notif_fd| {
574            let cow = Arc::clone(&ctx.cow);
575            Box::pin(async move {
576                crate::cow::dispatch::handle_cow_stat(&notif, &cow, notif_fd).await
577            })
578        }));
579    }
580
581    // statx — fallthrough
582    table.register(libc::SYS_statx, Box::new(|notif, ctx, notif_fd| {
583        let cow = Arc::clone(&ctx.cow);
584        Box::pin(async move {
585            crate::cow::dispatch::handle_cow_statx(&notif, &cow, notif_fd).await
586        })
587    }));
588
589    // readlink — fallthrough
590    for &nr in &[libc::SYS_readlinkat, libc::SYS_readlink as i64] {
591        table.register(nr, Box::new(|notif, ctx, notif_fd| {
592            let cow = Arc::clone(&ctx.cow);
593            Box::pin(async move {
594                crate::cow::dispatch::handle_cow_readlink(&notif, &cow, notif_fd).await
595            })
596        }));
597    }
598
599    // getdents — fallthrough
600    for &nr in &[libc::SYS_getdents64, libc::SYS_getdents as i64] {
601        table.register(nr, Box::new(|notif, ctx, notif_fd| {
602            let cow = Arc::clone(&ctx.cow);
603            Box::pin(async move {
604                crate::cow::dispatch::handle_cow_getdents(&notif, &cow, notif_fd).await
605            })
606        }));
607    }
608
609    // chdir — redirect to upper dir if target was created by COW
610    table.register(libc::SYS_chdir, Box::new(|notif, ctx, notif_fd| {
611        let cow = Arc::clone(&ctx.cow);
612        Box::pin(async move {
613            crate::cow::dispatch::handle_cow_chdir(&notif, &cow, notif_fd).await
614        })
615    }));
616}