sandbox_rs/isolation/
seccomp.rs

1//! Seccomp filter building and management
2
3use crate::errors::{Result, SandboxError};
4use std::collections::HashSet;
5
6/// Seccomp filter profile
7#[derive(Debug, Clone, PartialEq, Eq)]
8pub enum SeccompProfile {
9    /// Minimal profile - only essential syscalls
10    Minimal,
11    /// IO-heavy profile - includes file operations
12    IoHeavy,
13    /// Compute profile - includes memory operations
14    Compute,
15    /// Network profile - includes socket operations
16    Network,
17    /// Unrestricted - allow most syscalls
18    Unrestricted,
19}
20
21impl SeccompProfile {
22    /// Get all profiles
23    pub fn all() -> Vec<Self> {
24        vec![
25            SeccompProfile::Minimal,
26            SeccompProfile::IoHeavy,
27            SeccompProfile::Compute,
28            SeccompProfile::Network,
29            SeccompProfile::Unrestricted,
30        ]
31    }
32
33    /// Get description of profile
34    pub fn description(&self) -> &'static str {
35        match self {
36            SeccompProfile::Minimal => "Minimal syscalls only",
37            SeccompProfile::IoHeavy => "With file I/O operations",
38            SeccompProfile::Compute => "With memory operations",
39            SeccompProfile::Network => "With socket operations",
40            SeccompProfile::Unrestricted => "Allow most syscalls",
41        }
42    }
43}
44
45/// Seccomp filter builder
46#[derive(Debug, Clone)]
47pub struct SeccompFilter {
48    allowed: HashSet<String>,
49    blocked: HashSet<String>,
50    kill_on_violation: bool,
51    profile: SeccompProfile,
52}
53
54impl SeccompFilter {
55    /// Create filter from profile
56    pub fn from_profile(profile: SeccompProfile) -> Self {
57        let allowed = Self::syscalls_for_profile(&profile);
58        Self {
59            allowed,
60            blocked: HashSet::new(),
61            kill_on_violation: true,
62            profile,
63        }
64    }
65
66    /// Create minimal filter
67    pub fn minimal() -> Self {
68        Self::from_profile(SeccompProfile::Minimal)
69    }
70
71    /// Get syscalls for a profile
72    fn syscalls_for_profile(profile: &SeccompProfile) -> HashSet<String> {
73        let mut syscalls = HashSet::new();
74
75        // Always allowed
76        let always_allowed = vec![
77            // Process management
78            "exit",
79            "exit_group",
80            "clone",
81            "fork",
82            "vfork",
83            // Signal handling
84            "rt_sigaction",
85            "rt_sigprocmask",
86            "rt_sigpending",
87            "rt_sigtimedwait",
88            "rt_sigqueueinfo",
89            "rt_sigreturn",
90            "kill",
91            "tkill",
92            "tgkill",
93            "sigaltstack",
94            // Basic I/O
95            "read",
96            "write",
97            "readv",
98            "writev",
99            "pread64",
100            "pwrite64",
101            // File operations
102            "open",
103            "openat",
104            "close",
105            "stat",
106            "fstat",
107            "lstat",
108            "fcntl",
109            "ioctl",
110            // Memory
111            "mmap",
112            "munmap",
113            "mremap",
114            "mprotect",
115            "madvise",
116            "brk",
117            "mlock",
118            "munlock",
119            "mlockall",
120            "munlockall",
121            // Process execution
122            "execve",
123            "execveat",
124            // Waiting
125            "wait4",
126            "waitpid",
127            "waitid",
128            // File descriptors
129            "dup",
130            "dup2",
131            "dup3",
132            // Getting time
133            "clock_gettime",
134            "clock_getres",
135            "gettimeofday",
136            "time",
137            // Process info
138            "getpid",
139            "getppid",
140            "getuid",
141            "geteuid",
142            "getgid",
143            "getegid",
144            "getpgrp",
145            "getpgid",
146            "getsid",
147            // Limits
148            "getrlimit",
149            "setrlimit",
150            "getrusage",
151            // Misc allowed
152            "futex",
153            "rt_sigpending",
154            "set_tid_address",
155            "set_robust_list",
156            "get_robust_list",
157            "pselect6",
158            "ppoll",
159            "epoll_create1",
160            "epoll_ctl",
161            "epoll_wait",
162            "poll",
163            "select",
164            "getcwd",
165            "chdir",
166            "fchdir",
167            "getdents",
168            "getdents64",
169            "prctl",
170            "arch_prctl",
171        ];
172
173        for syscall in always_allowed {
174            syscalls.insert(syscall.to_string());
175        }
176
177        // Profile-specific syscalls
178        match profile {
179            SeccompProfile::Minimal => {
180                // Just the basics above
181            }
182            SeccompProfile::IoHeavy => {
183                for syscall in &[
184                    "mkdir",
185                    "mkdirat",
186                    "rmdir",
187                    "unlink",
188                    "unlinkat",
189                    "rename",
190                    "renameat",
191                    "link",
192                    "linkat",
193                    "symlink",
194                    "symlinkat",
195                    "readlink",
196                    "readlinkat",
197                    "chmod",
198                    "fchmod",
199                    "fchmodat",
200                    "chown",
201                    "fchown",
202                    "fchownat",
203                    "lchown",
204                    "utimes",
205                    "futimes",
206                    "utime",
207                    "utimensat",
208                    "truncate",
209                    "ftruncate",
210                    "fallocate",
211                    "access",
212                    "faccessat",
213                    "sendfile",
214                    "splice",
215                    "tee",
216                    "vmsplice",
217                    "statfs",
218                    "fstatfs",
219                    "fsync",
220                    "fdatasync",
221                ] {
222                    syscalls.insert(syscall.to_string());
223                }
224            }
225            SeccompProfile::Compute => {
226                for syscall in &[
227                    "sigaltstack",
228                    "sigprocmask",
229                    "signal",
230                    "sched_yield",
231                    "sched_getscheduler",
232                    "sched_setscheduler",
233                    "sched_getparam",
234                    "sched_setparam",
235                    "sched_get_priority_max",
236                    "sched_get_priority_min",
237                    "sched_rr_get_interval",
238                    "sched_getaffinity",
239                    "sched_setaffinity",
240                    "mbind",
241                    "get_mempolicy",
242                    "set_mempolicy",
243                    "migrate_pages",
244                    "move_pages",
245                    "membarrier",
246                ] {
247                    syscalls.insert(syscall.to_string());
248                }
249            }
250            SeccompProfile::Network => {
251                for syscall in &[
252                    "socket",
253                    "socketpair",
254                    "bind",
255                    "listen",
256                    "accept",
257                    "accept4",
258                    "connect",
259                    "shutdown",
260                    "sendto",
261                    "recvfrom",
262                    "sendmsg",
263                    "recvmsg",
264                    "sendmmsg",
265                    "recvmmsg",
266                    "setsockopt",
267                    "getsockopt",
268                    "setsockname",
269                    "getsockname",
270                    "getpeername",
271                    "socketcall",
272                ] {
273                    syscalls.insert(syscall.to_string());
274                }
275                // Also include IoHeavy syscalls
276                for syscall in &["open", "openat", "read", "write", "close"] {
277                    syscalls.insert(syscall.to_string());
278                }
279            }
280            SeccompProfile::Unrestricted => {
281                // Add many more syscalls for unrestricted
282                for syscall in &[
283                    "ptrace",
284                    "process_vm_readv",
285                    "process_vm_writev",
286                    "perf_event_open",
287                    "bpf",
288                    "seccomp",
289                    "mount",
290                    "umount2",
291                    "pivot_root",
292                    "capget",
293                    "capset",
294                    "setuid",
295                    "setgid",
296                    "setreuid",
297                    "setregid",
298                    "setresuid",
299                    "setresgid",
300                    "getgroups",
301                    "setgroups",
302                    "setfsgid",
303                    "setfsuid",
304                ] {
305                    syscalls.insert(syscall.to_string());
306                }
307            }
308        }
309
310        syscalls
311    }
312
313    /// Add syscall to whitelist
314    pub fn allow_syscall(&mut self, name: impl Into<String>) {
315        self.allowed.insert(name.into());
316    }
317
318    /// Block a syscall (deny even if in whitelist)
319    pub fn block_syscall(&mut self, name: impl Into<String>) {
320        self.blocked.insert(name.into());
321    }
322
323    /// Check if syscall is allowed
324    pub fn is_allowed(&self, name: &str) -> bool {
325        if self.blocked.contains(name) {
326            return false;
327        }
328        self.allowed.contains(name)
329    }
330
331    /// Get allowed syscalls
332    pub fn allowed_syscalls(&self) -> &HashSet<String> {
333        &self.allowed
334    }
335
336    /// Get blocked syscalls
337    pub fn blocked_syscalls(&self) -> &HashSet<String> {
338        &self.blocked
339    }
340
341    /// Count allowed syscalls
342    pub fn allowed_count(&self) -> usize {
343        self.allowed.len() - self.blocked.len()
344    }
345
346    /// Check if killing on violation
347    pub fn is_kill_on_violation(&self) -> bool {
348        self.kill_on_violation
349    }
350
351    /// Set kill on violation
352    pub fn set_kill_on_violation(&mut self, kill: bool) {
353        self.kill_on_violation = kill;
354    }
355
356    /// Get the profile used to create this filter
357    pub fn profile(&self) -> SeccompProfile {
358        self.profile.clone()
359    }
360
361    /// Validate that filter is correct
362    pub fn validate(&self) -> Result<()> {
363        if self.allowed.is_empty() {
364            return Err(SandboxError::Seccomp(
365                "Filter has no allowed syscalls".to_string(),
366            ));
367        }
368        Ok(())
369    }
370
371    /// Export as BPF program (simplified - just returns syscall names)
372    pub fn export(&self) -> Result<Vec<String>> {
373        self.validate()?;
374        let mut list: Vec<_> = self.allowed.iter().cloned().collect();
375        list.sort();
376        Ok(list)
377    }
378}
379
380#[cfg(test)]
381mod tests {
382    use super::*;
383
384    #[test]
385    fn test_seccomp_profile_all() {
386        let profiles = SeccompProfile::all();
387        assert_eq!(profiles.len(), 5);
388    }
389
390    #[test]
391    fn test_seccomp_profile_description() {
392        assert!(!SeccompProfile::Minimal.description().is_empty());
393        assert_ne!(
394            SeccompProfile::Minimal.description(),
395            SeccompProfile::Network.description()
396        );
397    }
398
399    #[test]
400    fn test_seccomp_filter_minimal() {
401        let filter = SeccompFilter::minimal();
402        assert!(filter.is_allowed("read"));
403        assert!(filter.is_allowed("write"));
404        assert!(filter.is_allowed("exit"));
405        assert!(!filter.is_allowed("ptrace"));
406        assert!(filter.allowed_count() > 20);
407    }
408
409    #[test]
410    fn test_seccomp_filter_io_heavy() {
411        let filter = SeccompFilter::from_profile(SeccompProfile::IoHeavy);
412        assert!(filter.is_allowed("read"));
413        assert!(filter.is_allowed("mkdir"));
414        assert!(filter.is_allowed("unlink"));
415        let io_count = filter.allowed_count();
416
417        let minimal = SeccompFilter::minimal();
418        assert!(io_count > minimal.allowed_count());
419    }
420
421    #[test]
422    fn test_seccomp_filter_network() {
423        let filter = SeccompFilter::from_profile(SeccompProfile::Network);
424        assert!(filter.is_allowed("socket"));
425        assert!(filter.is_allowed("connect"));
426        assert!(filter.is_allowed("bind"));
427    }
428
429    #[test]
430    fn test_seccomp_filter_allow_syscall() {
431        let mut filter = SeccompFilter::minimal();
432        filter.allow_syscall("custom_syscall");
433        assert!(filter.is_allowed("custom_syscall"));
434    }
435
436    #[test]
437    fn test_seccomp_filter_block_syscall() {
438        let mut filter = SeccompFilter::minimal();
439        filter.block_syscall("read");
440        assert!(!filter.is_allowed("read"));
441    }
442
443    #[test]
444    fn test_seccomp_filter_block_overrides_allow() {
445        let mut filter = SeccompFilter::minimal();
446        assert!(filter.is_allowed("write"));
447        filter.block_syscall("write");
448        assert!(!filter.is_allowed("write"));
449    }
450
451    #[test]
452    fn test_seccomp_filter_validate() {
453        let filter = SeccompFilter::minimal();
454        assert!(filter.validate().is_ok());
455
456        let empty_filter = SeccompFilter {
457            allowed: HashSet::new(),
458            blocked: HashSet::new(),
459            kill_on_violation: true,
460            profile: SeccompProfile::Minimal,
461        };
462        assert!(empty_filter.validate().is_err());
463    }
464
465    #[test]
466    fn test_seccomp_filter_export() {
467        let filter = SeccompFilter::minimal();
468        let syscalls = filter.export().unwrap();
469        assert!(!syscalls.is_empty());
470        assert!(syscalls.contains(&"read".to_string()));
471
472        // Should be sorted
473        let mut sorted = syscalls.clone();
474        sorted.sort();
475        assert_eq!(syscalls, sorted);
476    }
477
478    #[test]
479    fn test_seccomp_kill_on_violation() {
480        let mut filter = SeccompFilter::minimal();
481        assert!(filter.is_kill_on_violation());
482
483        filter.set_kill_on_violation(false);
484        assert!(!filter.is_kill_on_violation());
485    }
486
487    #[test]
488    fn test_seccomp_filter_comparison() {
489        let minimal = SeccompFilter::minimal();
490        let compute = SeccompFilter::from_profile(SeccompProfile::Compute);
491
492        // Compute should have at least all minimal syscalls
493        for syscall in minimal.allowed_syscalls() {
494            if !minimal.blocked_syscalls().contains(syscall) {
495                // Most minimal should be in compute, but let's just check it doesn't error
496                let _ = compute.is_allowed(syscall);
497            }
498        }
499    }
500}