1use sandbox_core::{Result, SandboxError};
4use std::collections::HashSet;
5
6#[derive(Debug, Clone, PartialEq, Eq)]
11pub enum SeccompProfile {
12 Essential,
14 Minimal,
16 IoHeavy,
18 Compute,
20 Network,
22 Unrestricted,
24}
25
26impl SeccompProfile {
27 pub fn all() -> Vec<Self> {
29 vec![
30 SeccompProfile::Essential,
31 SeccompProfile::Minimal,
32 SeccompProfile::IoHeavy,
33 SeccompProfile::Compute,
34 SeccompProfile::Network,
35 SeccompProfile::Unrestricted,
36 ]
37 }
38
39 pub fn description(&self) -> &'static str {
41 match self {
42 SeccompProfile::Essential => "Process bootstrap only (~40 syscalls)",
43 SeccompProfile::Minimal => "Essential + signals, pipes, timers, process control",
44 SeccompProfile::IoHeavy => "Minimal + file manipulation (mkdir, chmod, rename, …)",
45 SeccompProfile::Compute => "IoHeavy + advanced scheduling/NUMA",
46 SeccompProfile::Network => "Compute + socket operations",
47 SeccompProfile::Unrestricted => "Network + privileged operations",
48 }
49 }
50}
51
52#[derive(Debug, Clone)]
54pub struct SeccompFilter {
55 allowed: HashSet<String>,
56 blocked: HashSet<String>,
57 kill_on_violation: bool,
58 profile: SeccompProfile,
59}
60
61impl SeccompFilter {
62 pub fn from_profile(profile: SeccompProfile) -> Self {
64 let allowed = Self::syscalls_for_profile(&profile);
65 Self {
66 allowed,
67 blocked: HashSet::new(),
68 kill_on_violation: true,
69 profile,
70 }
71 }
72
73 pub fn minimal() -> Self {
75 Self::from_profile(SeccompProfile::Minimal)
76 }
77
78 fn essential_syscalls() -> Vec<&'static str> {
80 vec![
81 "exit",
83 "exit_group",
84 "execve",
86 "execveat",
87 "brk",
89 "mmap",
90 "munmap",
91 "mprotect",
92 "madvise",
93 "openat",
95 "open",
96 "read",
97 "write",
98 "close",
99 "close_range",
100 "fstat",
102 "stat",
103 "lstat",
104 "newfstatat",
105 "statx",
106 "access",
108 "faccessat",
109 "faccessat2",
110 "lseek",
112 "readlink",
114 "readlinkat",
115 "arch_prctl",
117 "set_tid_address",
118 "set_robust_list",
119 "futex",
120 "getrandom",
121 "rseq",
122 "prlimit64",
123 "prctl",
124 "getcwd",
126 "getpid",
128 "gettid",
129 "getuid",
130 "geteuid",
131 "getgid",
132 "getegid",
133 "fcntl",
135 ]
136 }
137
138 fn minimal_extras() -> Vec<&'static str> {
140 vec![
141 "rt_sigaction",
143 "rt_sigprocmask",
144 "rt_sigpending",
145 "rt_sigtimedwait",
146 "rt_sigqueueinfo",
147 "rt_sigreturn",
148 "sigaltstack",
149 "kill",
150 "tkill",
151 "tgkill",
152 "clone",
154 "clone3",
155 "fork",
156 "vfork",
157 "wait4",
158 "waitpid",
159 "waitid",
160 "readv",
162 "writev",
163 "pread64",
164 "pwrite64",
165 "ioctl",
166 "flock",
167 "dup",
169 "dup2",
170 "dup3",
171 "pipe",
172 "pipe2",
173 "eventfd2",
174 "clock_gettime",
176 "clock_getres",
177 "gettimeofday",
178 "time",
179 "nanosleep",
180 "clock_nanosleep",
181 "timer_create",
183 "timer_settime",
184 "timer_gettime",
185 "timer_getoverrun",
186 "timer_delete",
187 "getppid",
189 "getresuid",
190 "getresgid",
191 "uname",
192 "umask",
193 "sysinfo",
194 "getpgrp",
195 "getpgid",
196 "setpgid",
197 "getsid",
198 "setsid",
199 "sched_getaffinity",
201 "sched_yield",
202 "getrlimit",
204 "setrlimit",
205 "getrusage",
206 "pselect6",
208 "ppoll",
209 "epoll_create1",
210 "epoll_ctl",
211 "epoll_wait",
212 "poll",
213 "select",
214 "chdir",
216 "fchdir",
217 "getdents",
218 "getdents64",
219 "mremap",
221 "mlock",
222 "munlock",
223 "mlockall",
224 "munlockall",
225 "memfd_create",
226 "get_robust_list",
228 ]
229 }
230
231 fn io_heavy_extras() -> Vec<&'static str> {
233 vec![
234 "mkdir",
235 "mkdirat",
236 "rmdir",
237 "unlink",
238 "unlinkat",
239 "rename",
240 "renameat",
241 "link",
242 "linkat",
243 "symlink",
244 "symlinkat",
245 "chmod",
246 "fchmod",
247 "fchmodat",
248 "chown",
249 "fchown",
250 "fchownat",
251 "lchown",
252 "utimes",
253 "futimesat",
254 "utime",
255 "utimensat",
256 "truncate",
257 "ftruncate",
258 "fallocate",
259 "sendfile",
260 "splice",
261 "tee",
262 "vmsplice",
263 "statfs",
264 "fstatfs",
265 "fsync",
266 "fdatasync",
267 ]
268 }
269
270 fn compute_extras() -> Vec<&'static str> {
272 vec![
273 "sched_getscheduler",
274 "sched_setscheduler",
275 "sched_getparam",
276 "sched_setparam",
277 "sched_get_priority_max",
278 "sched_get_priority_min",
279 "sched_rr_get_interval",
280 "sched_setaffinity",
281 "mbind",
282 "get_mempolicy",
283 "set_mempolicy",
284 "migrate_pages",
285 "move_pages",
286 "membarrier",
287 ]
288 }
289
290 fn network_extras() -> Vec<&'static str> {
292 vec![
293 "socket",
294 "socketpair",
295 "bind",
296 "listen",
297 "accept",
298 "accept4",
299 "connect",
300 "shutdown",
301 "sendto",
302 "recvfrom",
303 "sendmsg",
304 "recvmsg",
305 "sendmmsg",
306 "recvmmsg",
307 "setsockopt",
308 "getsockopt",
309 "getsockname",
310 "getpeername",
311 ]
312 }
313
314 fn unrestricted_extras() -> Vec<&'static str> {
316 vec![
317 "ptrace",
318 "process_vm_readv",
319 "process_vm_writev",
320 "perf_event_open",
321 "bpf",
322 "seccomp",
323 "mount",
324 "umount2",
325 "pivot_root",
326 "capget",
327 "capset",
328 "setuid",
329 "setgid",
330 "setreuid",
331 "setregid",
332 "setresuid",
333 "setresgid",
334 "getgroups",
335 "setgroups",
336 "setfsgid",
337 "setfsuid",
338 ]
339 }
340
341 fn syscalls_for_profile(profile: &SeccompProfile) -> HashSet<String> {
343 let mut syscalls = HashSet::new();
344
345 let mut add = |list: Vec<&str>| {
346 for s in list {
347 syscalls.insert(s.to_string());
348 }
349 };
350
351 add(Self::essential_syscalls());
353
354 if matches!(
355 profile,
356 SeccompProfile::Minimal
357 | SeccompProfile::IoHeavy
358 | SeccompProfile::Compute
359 | SeccompProfile::Network
360 | SeccompProfile::Unrestricted
361 ) {
362 add(Self::minimal_extras());
363 }
364
365 if matches!(
366 profile,
367 SeccompProfile::IoHeavy
368 | SeccompProfile::Compute
369 | SeccompProfile::Network
370 | SeccompProfile::Unrestricted
371 ) {
372 add(Self::io_heavy_extras());
373 }
374
375 if matches!(
376 profile,
377 SeccompProfile::Compute | SeccompProfile::Network | SeccompProfile::Unrestricted
378 ) {
379 add(Self::compute_extras());
380 }
381
382 if matches!(
383 profile,
384 SeccompProfile::Network | SeccompProfile::Unrestricted
385 ) {
386 add(Self::network_extras());
387 }
388
389 if matches!(profile, SeccompProfile::Unrestricted) {
390 add(Self::unrestricted_extras());
391 }
392
393 syscalls
394 }
395
396 pub fn allow_syscall(&mut self, name: impl Into<String>) {
398 self.allowed.insert(name.into());
399 }
400
401 pub fn block_syscall(&mut self, name: impl Into<String>) {
403 self.blocked.insert(name.into());
404 }
405
406 pub fn is_allowed(&self, name: &str) -> bool {
408 if self.blocked.contains(name) {
409 return false;
410 }
411 self.allowed.contains(name)
412 }
413
414 pub fn allowed_syscalls(&self) -> &HashSet<String> {
416 &self.allowed
417 }
418
419 pub fn blocked_syscalls(&self) -> &HashSet<String> {
421 &self.blocked
422 }
423
424 pub fn allowed_count(&self) -> usize {
426 self.allowed.len() - self.blocked.len()
427 }
428
429 pub fn is_kill_on_violation(&self) -> bool {
431 self.kill_on_violation
432 }
433
434 pub fn set_kill_on_violation(&mut self, kill: bool) {
436 self.kill_on_violation = kill;
437 }
438
439 pub fn profile(&self) -> SeccompProfile {
441 self.profile.clone()
442 }
443
444 pub fn validate(&self) -> Result<()> {
446 if self.allowed.is_empty() && self.profile != SeccompProfile::Unrestricted {
447 return Err(SandboxError::Seccomp(
448 "Filter has no allowed syscalls".to_string(),
449 ));
450 }
451 Ok(())
452 }
453
454 pub fn export(&self) -> Result<Vec<String>> {
456 self.validate()?;
457 let mut list: Vec<_> = self.allowed.iter().cloned().collect();
458 list.sort();
459 Ok(list)
460 }
461}
462
463#[cfg(test)]
464mod tests {
465 use super::*;
466
467 #[test]
468 fn test_seccomp_profile_all() {
469 let profiles = SeccompProfile::all();
470 assert_eq!(profiles.len(), 6);
471 }
472
473 #[test]
474 fn test_seccomp_profile_description() {
475 assert!(!SeccompProfile::Essential.description().is_empty());
476 assert!(!SeccompProfile::Minimal.description().is_empty());
477 assert_ne!(
478 SeccompProfile::Essential.description(),
479 SeccompProfile::Minimal.description()
480 );
481 assert_ne!(
482 SeccompProfile::Minimal.description(),
483 SeccompProfile::Network.description()
484 );
485 }
486
487 #[test]
488 fn test_seccomp_filter_essential() {
489 let filter = SeccompFilter::from_profile(SeccompProfile::Essential);
490 assert!(filter.is_allowed("read"));
492 assert!(filter.is_allowed("write"));
493 assert!(filter.is_allowed("exit"));
494 assert!(filter.is_allowed("execve"));
495 assert!(filter.is_allowed("mmap"));
496 assert!(filter.is_allowed("brk"));
497 assert!(filter.is_allowed("openat"));
498 assert!(filter.is_allowed("close"));
499 assert!(filter.is_allowed("arch_prctl"));
500 assert!(filter.is_allowed("futex"));
501 assert!(filter.is_allowed("getpid"));
502 assert!(filter.is_allowed("gettid"));
503 assert!(filter.is_allowed("lseek"));
504 assert!(filter.is_allowed("fcntl"));
505
506 assert!(!filter.is_allowed("clone"));
508 assert!(!filter.is_allowed("rt_sigaction"));
509 assert!(!filter.is_allowed("nanosleep"));
510 assert!(!filter.is_allowed("socket"));
511 assert!(!filter.is_allowed("ptrace"));
512 assert!(!filter.is_allowed("mkdir"));
513
514 let count = filter.allowed_count();
515 assert!(
516 (35..=50).contains(&count),
517 "Essential profile should have ~40 syscalls, got {}",
518 count
519 );
520 }
521
522 #[test]
523 fn test_seccomp_filter_minimal() {
524 let filter = SeccompFilter::minimal();
525 assert!(filter.is_allowed("read"));
526 assert!(filter.is_allowed("write"));
527 assert!(filter.is_allowed("exit"));
528 assert!(filter.is_allowed("clone3"));
529 assert!(filter.is_allowed("lseek"));
530 assert!(filter.is_allowed("sched_getaffinity"));
531 assert!(filter.is_allowed("nanosleep"));
532 assert!(filter.is_allowed("gettid"));
533 assert!(filter.is_allowed("rt_sigaction"));
534 assert!(!filter.is_allowed("ptrace"));
535 assert!(!filter.is_allowed("mkdir"));
536 assert!(!filter.is_allowed("socket"));
537 assert!(
538 filter.allowed_count() > 100,
539 "Minimal profile should have > 100 syscalls for runtime compatibility, got {}",
540 filter.allowed_count()
541 );
542 }
543
544 #[test]
545 fn test_seccomp_filter_io_heavy() {
546 let filter = SeccompFilter::from_profile(SeccompProfile::IoHeavy);
547 assert!(filter.is_allowed("read"));
548 assert!(filter.is_allowed("mkdir"));
549 assert!(filter.is_allowed("unlink"));
550 assert!(filter.is_allowed("clone"));
552 assert!(filter.is_allowed("rt_sigaction"));
553 let io_count = filter.allowed_count();
554
555 let minimal = SeccompFilter::minimal();
556 assert!(io_count > minimal.allowed_count());
557 }
558
559 #[test]
560 fn test_seccomp_filter_network() {
561 let filter = SeccompFilter::from_profile(SeccompProfile::Network);
562 assert!(filter.is_allowed("socket"));
563 assert!(filter.is_allowed("connect"));
564 assert!(filter.is_allowed("bind"));
565 assert!(filter.is_allowed("mkdir"));
567 assert!(filter.is_allowed("sched_setscheduler"));
569 }
570
571 #[test]
572 fn test_seccomp_filter_allow_syscall() {
573 let mut filter = SeccompFilter::minimal();
574 filter.allow_syscall("custom_syscall");
575 assert!(filter.is_allowed("custom_syscall"));
576 }
577
578 #[test]
579 fn test_seccomp_filter_block_syscall() {
580 let mut filter = SeccompFilter::minimal();
581 filter.block_syscall("read");
582 assert!(!filter.is_allowed("read"));
583 }
584
585 #[test]
586 fn test_seccomp_filter_block_overrides_allow() {
587 let mut filter = SeccompFilter::minimal();
588 assert!(filter.is_allowed("write"));
589 filter.block_syscall("write");
590 assert!(!filter.is_allowed("write"));
591 }
592
593 #[test]
594 fn test_seccomp_filter_validate() {
595 let filter = SeccompFilter::minimal();
596 assert!(filter.validate().is_ok());
597
598 let empty_filter = SeccompFilter {
599 allowed: HashSet::new(),
600 blocked: HashSet::new(),
601 kill_on_violation: true,
602 profile: SeccompProfile::Minimal,
603 };
604 assert!(empty_filter.validate().is_err());
605 }
606
607 #[test]
608 fn test_seccomp_filter_export() {
609 let filter = SeccompFilter::minimal();
610 let syscalls = filter.export().unwrap();
611 assert!(!syscalls.is_empty());
612 assert!(syscalls.contains(&"read".to_string()));
613
614 let mut sorted = syscalls.clone();
616 sorted.sort();
617 assert_eq!(syscalls, sorted);
618 }
619
620 #[test]
621 fn test_seccomp_kill_on_violation() {
622 let mut filter = SeccompFilter::minimal();
623 assert!(filter.is_kill_on_violation());
624
625 filter.set_kill_on_violation(false);
626 assert!(!filter.is_kill_on_violation());
627 }
628
629 #[test]
630 fn test_validate_unrestricted_with_no_allowed() {
631 let filter = SeccompFilter {
632 allowed: HashSet::new(),
633 blocked: HashSet::new(),
634 kill_on_violation: true,
635 profile: SeccompProfile::Unrestricted,
636 };
637 assert!(filter.validate().is_ok());
638 }
639
640 #[test]
641 fn test_profiles_are_cumulative() {
642 let essential = SeccompFilter::from_profile(SeccompProfile::Essential);
643 let minimal = SeccompFilter::from_profile(SeccompProfile::Minimal);
644 let io_heavy = SeccompFilter::from_profile(SeccompProfile::IoHeavy);
645 let compute = SeccompFilter::from_profile(SeccompProfile::Compute);
646 let network = SeccompFilter::from_profile(SeccompProfile::Network);
647 let unrestricted = SeccompFilter::from_profile(SeccompProfile::Unrestricted);
648
649 assert!(
651 essential
652 .allowed_syscalls()
653 .is_subset(minimal.allowed_syscalls()),
654 "Essential should be a subset of Minimal"
655 );
656 assert!(
657 minimal
658 .allowed_syscalls()
659 .is_subset(io_heavy.allowed_syscalls()),
660 "Minimal should be a subset of IoHeavy"
661 );
662 assert!(
663 io_heavy
664 .allowed_syscalls()
665 .is_subset(compute.allowed_syscalls()),
666 "IoHeavy should be a subset of Compute"
667 );
668 assert!(
669 compute
670 .allowed_syscalls()
671 .is_subset(network.allowed_syscalls()),
672 "Compute should be a subset of Network"
673 );
674 assert!(
675 network
676 .allowed_syscalls()
677 .is_subset(unrestricted.allowed_syscalls()),
678 "Network should be a subset of Unrestricted"
679 );
680
681 assert!(minimal.allowed_count() > essential.allowed_count());
683 assert!(io_heavy.allowed_count() > minimal.allowed_count());
684 assert!(compute.allowed_count() > io_heavy.allowed_count());
685 assert!(network.allowed_count() > compute.allowed_count());
686 assert!(unrestricted.allowed_count() > network.allowed_count());
687 }
688}