sandbox_rs/isolation/
seccomp.rs

1//! Seccomp filter building and management
2
3use crate::errors::{Result, SandboxError};
4use std::collections::HashSet;
5
6/// Seccomp filter profile
7#[derive(Debug, Clone, PartialEq, Eq)]
8pub enum SeccompProfile {
9    /// Minimal profile - only essential syscalls
10    Minimal,
11    /// IO-heavy profile - includes file operations
12    IoHeavy,
13    /// Compute profile - includes memory operations
14    Compute,
15    /// Network profile - includes socket operations
16    Network,
17    /// Unrestricted - allow most syscalls
18    Unrestricted,
19}
20
21impl SeccompProfile {
22    /// Get all profiles
23    pub fn all() -> Vec<Self> {
24        vec![
25            SeccompProfile::Minimal,
26            SeccompProfile::IoHeavy,
27            SeccompProfile::Compute,
28            SeccompProfile::Network,
29            SeccompProfile::Unrestricted,
30        ]
31    }
32
33    /// Get description of profile
34    pub fn description(&self) -> &'static str {
35        match self {
36            SeccompProfile::Minimal => "Minimal syscalls only",
37            SeccompProfile::IoHeavy => "With file I/O operations",
38            SeccompProfile::Compute => "With memory operations",
39            SeccompProfile::Network => "With socket operations",
40            SeccompProfile::Unrestricted => "Allow most syscalls",
41        }
42    }
43}
44
45/// Seccomp filter builder
46#[derive(Debug, Clone)]
47pub struct SeccompFilter {
48    allowed: HashSet<String>,
49    blocked: HashSet<String>,
50    kill_on_violation: bool,
51    profile: SeccompProfile,
52    allow_unknown_syscalls: bool,
53}
54
55impl SeccompFilter {
56    /// Create filter from profile
57    pub fn from_profile(profile: SeccompProfile) -> Self {
58        let allowed = Self::syscalls_for_profile(&profile);
59        Self {
60            allowed,
61            blocked: HashSet::new(),
62            kill_on_violation: true,
63            profile,
64            allow_unknown_syscalls: false,
65        }
66    }
67
68    /// Create minimal filter
69    pub fn minimal() -> Self {
70        Self::from_profile(SeccompProfile::Minimal)
71    }
72
73    /// Get syscalls for a profile
74    fn syscalls_for_profile(profile: &SeccompProfile) -> HashSet<String> {
75        let mut syscalls = HashSet::new();
76
77        // Always allowed
78        let always_allowed = vec![
79            // Process management
80            "exit",
81            "exit_group",
82            "clone",
83            "fork",
84            "vfork",
85            // Signal handling
86            "rt_sigaction",
87            "rt_sigprocmask",
88            "rt_sigpending",
89            "rt_sigtimedwait",
90            "rt_sigqueueinfo",
91            "rt_sigreturn",
92            "kill",
93            "tkill",
94            "tgkill",
95            "sigaltstack",
96            // Basic I/O
97            "read",
98            "write",
99            "readv",
100            "writev",
101            "pread64",
102            "pwrite64",
103            "access",
104            "faccessat",
105            // File operations
106            "open",
107            "openat",
108            "close",
109            "stat",
110            "fstat",
111            "lstat",
112            "fcntl",
113            "ioctl",
114            // Memory
115            "mmap",
116            "munmap",
117            "mremap",
118            "mprotect",
119            "madvise",
120            "brk",
121            "mlock",
122            "munlock",
123            "mlockall",
124            "munlockall",
125            // Process execution
126            "execve",
127            "execveat",
128            // Waiting
129            "wait4",
130            "waitpid",
131            "waitid",
132            // File descriptors
133            "dup",
134            "dup2",
135            "dup3",
136            "pipe",
137            "pipe2",
138            // Getting time
139            "clock_gettime",
140            "clock_getres",
141            "gettimeofday",
142            "time",
143            // Process info
144            "getpid",
145            "getppid",
146            "getuid",
147            "geteuid",
148            "getgid",
149            "getegid",
150            "uname",
151            "getpgrp",
152            "getpgid",
153            "setpgid",
154            "getsid",
155            "setsid",
156            // Limits
157            "getrlimit",
158            "setrlimit",
159            "getrusage",
160            // Misc allowed
161            "futex",
162            "rt_sigpending",
163            "set_tid_address",
164            "set_robust_list",
165            "get_robust_list",
166            "pselect6",
167            "ppoll",
168            "epoll_create1",
169            "epoll_ctl",
170            "epoll_wait",
171            "poll",
172            "select",
173            "getcwd",
174            "chdir",
175            "fchdir",
176            "getdents",
177            "getdents64",
178            "prctl",
179            "arch_prctl",
180            "rseq",
181            "newfstatat",
182            "getrandom",
183            "statx",
184            "prlimit64",
185        ];
186
187        for syscall in always_allowed {
188            syscalls.insert(syscall.to_string());
189        }
190
191        // Profile-specific syscalls
192        match profile {
193            SeccompProfile::Minimal => {
194                // Just the basics above
195            }
196            SeccompProfile::IoHeavy => {
197                for syscall in &[
198                    "mkdir",
199                    "mkdirat",
200                    "rmdir",
201                    "unlink",
202                    "unlinkat",
203                    "rename",
204                    "renameat",
205                    "link",
206                    "linkat",
207                    "symlink",
208                    "symlinkat",
209                    "readlink",
210                    "readlinkat",
211                    "chmod",
212                    "fchmod",
213                    "fchmodat",
214                    "chown",
215                    "fchown",
216                    "fchownat",
217                    "lchown",
218                    "utimes",
219                    "futimesat",
220                    "utime",
221                    "utimensat",
222                    "truncate",
223                    "ftruncate",
224                    "fallocate",
225                    "access",
226                    "faccessat",
227                    "sendfile",
228                    "splice",
229                    "tee",
230                    "vmsplice",
231                    "statfs",
232                    "fstatfs",
233                    "fsync",
234                    "fdatasync",
235                ] {
236                    syscalls.insert(syscall.to_string());
237                }
238            }
239            SeccompProfile::Compute => {
240                for syscall in &[
241                    "sigaltstack",
242                    "sched_yield",
243                    "sched_getscheduler",
244                    "sched_setscheduler",
245                    "sched_getparam",
246                    "sched_setparam",
247                    "sched_get_priority_max",
248                    "sched_get_priority_min",
249                    "sched_rr_get_interval",
250                    "sched_getaffinity",
251                    "sched_setaffinity",
252                    "mbind",
253                    "get_mempolicy",
254                    "set_mempolicy",
255                    "migrate_pages",
256                    "move_pages",
257                    "membarrier",
258                ] {
259                    syscalls.insert(syscall.to_string());
260                }
261            }
262            SeccompProfile::Network => {
263                for syscall in &[
264                    "socket",
265                    "socketpair",
266                    "bind",
267                    "listen",
268                    "accept",
269                    "accept4",
270                    "connect",
271                    "shutdown",
272                    "sendto",
273                    "recvfrom",
274                    "sendmsg",
275                    "recvmsg",
276                    "sendmmsg",
277                    "recvmmsg",
278                    "setsockopt",
279                    "getsockopt",
280                    "getsockname",
281                    "getpeername",
282                ] {
283                    syscalls.insert(syscall.to_string());
284                }
285                // Also include IoHeavy syscalls
286                for syscall in &["open", "openat", "read", "write", "close"] {
287                    syscalls.insert(syscall.to_string());
288                }
289            }
290            SeccompProfile::Unrestricted => {
291                // Add many more syscalls for unrestricted
292                for syscall in &[
293                    "ptrace",
294                    "process_vm_readv",
295                    "process_vm_writev",
296                    "perf_event_open",
297                    "bpf",
298                    "seccomp",
299                    "mount",
300                    "umount2",
301                    "pivot_root",
302                    "capget",
303                    "capset",
304                    "setuid",
305                    "setgid",
306                    "setreuid",
307                    "setregid",
308                    "setresuid",
309                    "setresgid",
310                    "getgroups",
311                    "setgroups",
312                    "setfsgid",
313                    "setfsuid",
314                ] {
315                    syscalls.insert(syscall.to_string());
316                }
317            }
318        }
319
320        syscalls
321    }
322
323    /// Add syscall to whitelist
324    pub fn allow_syscall(&mut self, name: impl Into<String>) {
325        self.allowed.insert(name.into());
326    }
327
328    /// Block a syscall (deny even if in whitelist)
329    pub fn block_syscall(&mut self, name: impl Into<String>) {
330        self.blocked.insert(name.into());
331    }
332
333    /// Check if syscall is allowed
334    pub fn is_allowed(&self, name: &str) -> bool {
335        if self.blocked.contains(name) {
336            return false;
337        }
338        self.allowed.contains(name)
339    }
340
341    /// Get allowed syscalls
342    pub fn allowed_syscalls(&self) -> &HashSet<String> {
343        &self.allowed
344    }
345
346    /// Get blocked syscalls
347    pub fn blocked_syscalls(&self) -> &HashSet<String> {
348        &self.blocked
349    }
350
351    /// Count allowed syscalls
352    pub fn allowed_count(&self) -> usize {
353        self.allowed.len() - self.blocked.len()
354    }
355
356    /// Check if killing on violation
357    pub fn is_kill_on_violation(&self) -> bool {
358        self.kill_on_violation
359    }
360
361    /// Set kill on violation
362    pub fn set_kill_on_violation(&mut self, kill: bool) {
363        self.kill_on_violation = kill;
364    }
365
366    /// Get the profile used to create this filter
367    pub fn profile(&self) -> SeccompProfile {
368        self.profile.clone()
369    }
370
371    /// Set whether unknown syscalls should be allowed (warnings only)
372    ///
373    /// Default is false, which means unknown syscalls cause compilation errors.
374    /// Setting this to true allows filters with unknown syscalls to compile,
375    /// but those syscalls will be silently ignored.
376    pub fn set_allow_unknown_syscalls(&mut self, allow: bool) {
377        self.allow_unknown_syscalls = allow;
378    }
379
380    /// Check if unknown syscalls are allowed
381    pub fn allows_unknown_syscalls(&self) -> bool {
382        self.allow_unknown_syscalls
383    }
384
385    /// Validate that filter is correct
386    pub fn validate(&self) -> Result<()> {
387        if self.allowed.is_empty() && self.profile != SeccompProfile::Unrestricted {
388            return Err(SandboxError::Seccomp(
389                "Filter has no allowed syscalls".to_string(),
390            ));
391        }
392        Ok(())
393    }
394
395    /// Export as BPF program (simplified - just returns syscall names)
396    pub fn export(&self) -> Result<Vec<String>> {
397        self.validate()?;
398        let mut list: Vec<_> = self.allowed.iter().cloned().collect();
399        list.sort();
400        Ok(list)
401    }
402}
403
404#[cfg(test)]
405mod tests {
406    use super::*;
407
408    #[test]
409    fn test_seccomp_profile_all() {
410        let profiles = SeccompProfile::all();
411        assert_eq!(profiles.len(), 5);
412    }
413
414    #[test]
415    fn test_seccomp_profile_description() {
416        assert!(!SeccompProfile::Minimal.description().is_empty());
417        assert_ne!(
418            SeccompProfile::Minimal.description(),
419            SeccompProfile::Network.description()
420        );
421    }
422
423    #[test]
424    fn test_seccomp_filter_minimal() {
425        let filter = SeccompFilter::minimal();
426        assert!(filter.is_allowed("read"));
427        assert!(filter.is_allowed("write"));
428        assert!(filter.is_allowed("exit"));
429        assert!(!filter.is_allowed("ptrace"));
430        assert!(filter.allowed_count() > 20);
431    }
432
433    #[test]
434    fn test_seccomp_filter_io_heavy() {
435        let filter = SeccompFilter::from_profile(SeccompProfile::IoHeavy);
436        assert!(filter.is_allowed("read"));
437        assert!(filter.is_allowed("mkdir"));
438        assert!(filter.is_allowed("unlink"));
439        let io_count = filter.allowed_count();
440
441        let minimal = SeccompFilter::minimal();
442        assert!(io_count > minimal.allowed_count());
443    }
444
445    #[test]
446    fn test_seccomp_filter_network() {
447        let filter = SeccompFilter::from_profile(SeccompProfile::Network);
448        assert!(filter.is_allowed("socket"));
449        assert!(filter.is_allowed("connect"));
450        assert!(filter.is_allowed("bind"));
451    }
452
453    #[test]
454    fn test_seccomp_filter_allow_syscall() {
455        let mut filter = SeccompFilter::minimal();
456        filter.allow_syscall("custom_syscall");
457        assert!(filter.is_allowed("custom_syscall"));
458    }
459
460    #[test]
461    fn test_seccomp_filter_block_syscall() {
462        let mut filter = SeccompFilter::minimal();
463        filter.block_syscall("read");
464        assert!(!filter.is_allowed("read"));
465    }
466
467    #[test]
468    fn test_seccomp_filter_block_overrides_allow() {
469        let mut filter = SeccompFilter::minimal();
470        assert!(filter.is_allowed("write"));
471        filter.block_syscall("write");
472        assert!(!filter.is_allowed("write"));
473    }
474
475    #[test]
476    fn test_seccomp_filter_validate() {
477        let filter = SeccompFilter::minimal();
478        assert!(filter.validate().is_ok());
479
480        let empty_filter = SeccompFilter {
481            allowed: HashSet::new(),
482            blocked: HashSet::new(),
483            kill_on_violation: true,
484            profile: SeccompProfile::Minimal,
485            allow_unknown_syscalls: false,
486        };
487        assert!(empty_filter.validate().is_err());
488    }
489
490    #[test]
491    fn test_seccomp_filter_export() {
492        let filter = SeccompFilter::minimal();
493        let syscalls = filter.export().unwrap();
494        assert!(!syscalls.is_empty());
495        assert!(syscalls.contains(&"read".to_string()));
496
497        // Should be sorted
498        let mut sorted = syscalls.clone();
499        sorted.sort();
500        assert_eq!(syscalls, sorted);
501    }
502
503    #[test]
504    fn test_seccomp_kill_on_violation() {
505        let mut filter = SeccompFilter::minimal();
506        assert!(filter.is_kill_on_violation());
507
508        filter.set_kill_on_violation(false);
509        assert!(!filter.is_kill_on_violation());
510    }
511
512    #[test]
513    fn test_seccomp_filter_comparison() {
514        let minimal = SeccompFilter::minimal();
515        let compute = SeccompFilter::from_profile(SeccompProfile::Compute);
516
517        // Compute should have at least all minimal syscalls
518        for syscall in minimal.allowed_syscalls() {
519            if !minimal.blocked_syscalls().contains(syscall) {
520                // Most minimal should be in compute, but let's just check it doesn't error
521                let _ = compute.is_allowed(syscall);
522            }
523        }
524    }
525
526    #[test]
527    fn test_allow_unknown_syscalls_flag() {
528        let mut filter = SeccompFilter::minimal();
529        assert!(!filter.allows_unknown_syscalls());
530
531        filter.set_allow_unknown_syscalls(true);
532        assert!(filter.allows_unknown_syscalls());
533    }
534
535    #[test]
536    fn test_validate_unrestricted_with_no_allowed() {
537        let filter = SeccompFilter {
538            allowed: HashSet::new(),
539            blocked: HashSet::new(),
540            kill_on_violation: true,
541            profile: SeccompProfile::Unrestricted,
542            allow_unknown_syscalls: false,
543        };
544        assert!(filter.validate().is_ok());
545    }
546}