1use super::seccomp::SeccompFilter;
4use crate::errors::{Result, SandboxError};
5
6#[repr(C)]
8#[derive(Debug, Clone, Copy)]
9pub struct BpfInstr {
10 pub code: u16,
11 pub jt: u8,
12 pub jf: u8,
13 pub k: u32,
14}
15
16pub mod actions {
18 pub const SECCOMP_RET_KILL: u32 = 0x00000000;
20 pub const SECCOMP_RET_TRAP: u32 = 0x00030000;
22 pub const SECCOMP_RET_ERRNO: u32 = 0x00050000;
24 pub const SECCOMP_RET_TRACE: u32 = 0x7ff00000;
26 pub const SECCOMP_RET_ALLOW: u32 = 0x7fff0000;
28}
29
30pub mod arch {
32 pub const AUDIT_ARCH_X86_64: u32 = 0xc000003e;
33 pub const AUDIT_ARCH_I386: u32 = 0x40000003;
34 pub const AUDIT_ARCH_ARM: u32 = 0x40000028;
35 pub const AUDIT_ARCH_AARCH64: u32 = 0xc00000b7;
36}
37
38pub fn get_arch() -> u32 {
40 #[cfg(target_arch = "x86_64")]
41 {
42 arch::AUDIT_ARCH_X86_64
43 }
44 #[cfg(target_arch = "x86")]
45 {
46 arch::AUDIT_ARCH_I386
47 }
48 #[cfg(target_arch = "arm")]
49 {
50 arch::AUDIT_ARCH_ARM
51 }
52 #[cfg(target_arch = "aarch64")]
53 {
54 arch::AUDIT_ARCH_AARCH64
55 }
56 #[cfg(not(any(
57 target_arch = "x86_64",
58 target_arch = "x86",
59 target_arch = "arm",
60 target_arch = "aarch64"
61 )))]
62 {
63 0
64 }
65}
66
67#[derive(Debug, Clone, Copy)]
69pub struct SyscallNumber(pub u32);
70
71impl SyscallNumber {
72 pub fn from_name(name: &str) -> Option<Self> {
74 let num = match name {
75 "exit" => 60,
77 "exit_group" => 231,
78 "clone" => 56,
79 "fork" => 57,
80 "vfork" => 58,
81 "rt_sigaction" => 13,
83 "rt_sigprocmask" => 14,
84 "rt_sigpending" => 127,
85 "rt_sigtimedwait" => 128,
86 "rt_sigqueueinfo" => 129,
87 "rt_sigreturn" => 15,
88 "kill" => 62,
89 "tkill" => 200,
90 "tgkill" => 268,
91 "sigaltstack" => 131,
92 "read" => 0,
94 "write" => 1,
95 "readv" => 19,
96 "writev" => 20,
97 "pread64" => 17,
98 "pwrite64" => 18,
99 "open" => 2,
101 "openat" => 257,
102 "close" => 3,
103 "stat" => 4,
104 "fstat" => 5,
105 "lstat" => 6,
106 "fcntl" => 72,
107 "ioctl" => 16,
108 "mmap" => 9,
110 "munmap" => 11,
111 "mremap" => 25,
112 "mprotect" => 10,
113 "madvise" => 28,
114 "brk" => 12,
115 "mlock" => 149,
116 "munlock" => 150,
117 "mlockall" => 151,
118 "munlockall" => 152,
119 "execve" => 59,
121 "execveat" => 322,
122 "wait4" => 114,
124 "waitpid" => 114,
125 "waitid" => 247,
126 "dup" => 32,
128 "dup2" => 33,
129 "dup3" => 292,
130 "clock_gettime" => 228,
132 "clock_getres" => 229,
133 "gettimeofday" => 96,
134 "time" => 201,
135 "getpid" => 39,
137 "getppid" => 110,
138 "getuid" => 102,
139 "geteuid" => 107,
140 "getgid" => 104,
141 "getegid" => 108,
142 "getpgrp" => 111,
143 "getpgid" => 121,
144 "getsid" => 124,
145 "getrlimit" => 97,
147 "setrlimit" => 160,
148 "getrusage" => 98,
149 "futex" => 202,
151 "set_tid_address" => 218,
152 "set_robust_list" => 273,
153 "get_robust_list" => 274,
154 "pselect6" => 270,
155 "ppoll" => 271,
156 "epoll_create1" => 291,
157 "epoll_ctl" => 233,
158 "epoll_wait" => 232,
159 "poll" => 7,
160 "select" => 23,
161 "getcwd" => 79,
162 "chdir" => 80,
163 "fchdir" => 81,
164 "getdents" => 78,
165 "getdents64" => 217,
166 "prctl" => 157,
167 "arch_prctl" => 158,
168 "mkdir" => 83,
170 "mkdirat" => 258,
171 "rmdir" => 84,
172 "unlink" => 87,
173 "unlinkat" => 263,
174 "rename" => 82,
175 "renameat" => 264,
176 "link" => 86,
177 "linkat" => 265,
178 "symlink" => 88,
179 "symlinkat" => 266,
180 "readlink" => 89,
181 "readlinkat" => 267,
182 "chmod" => 90,
183 "fchmod" => 91,
184 "fchmodat" => 268,
185 "chown" => 92,
186 "fchown" => 93,
187 "fchownat" => 260,
188 "lchown" => 94,
189 "utimes" => 235,
190 "futimes" => 271,
191 "utime" => 132,
192 "utimensat" => 280,
193 "truncate" => 76,
194 "ftruncate" => 77,
195 "fallocate" => 285,
196 "access" => 21,
197 "faccessat" => 269,
198 "sendfile" => 40,
199 "splice" => 275,
200 "tee" => 276,
201 "vmsplice" => 278,
202 "statfs" => 137,
203 "fstatfs" => 138,
204 "fsync" => 74,
205 "fdatasync" => 75,
206 "socket" => 41,
208 "socketpair" => 53,
209 "bind" => 49,
210 "listen" => 50,
211 "accept" => 43,
212 "accept4" => 288,
213 "connect" => 42,
214 "shutdown" => 48,
215 "sendto" => 44,
216 "recvfrom" => 45,
217 "sendmsg" => 46,
218 "recvmsg" => 47,
219 "sendmmsg" => 307,
220 "recvmmsg" => 299,
221 "setsockopt" => 54,
222 "getsockopt" => 55,
223 "setsockname" => 106,
224 "getsockname" => 51,
225 "getpeername" => 52,
226 "ptrace" => 101,
228 "process_vm_readv" => 310,
229 "process_vm_writev" => 311,
230 "perf_event_open" => 298,
231 "bpf" => 321,
232 "seccomp" => 317,
233 "mount" => 165,
234 "umount2" => 166,
235 "pivot_root" => 155,
236 "capget" => 125,
237 "capset" => 126,
238 "setuid" => 105,
239 "setgid" => 106,
240 "setreuid" => 113,
241 "setregid" => 114,
242 "setresuid" => 164,
243 "setresgid" => 170,
244 "getgroups" => 115,
245 "setgroups" => 116,
246 "setfsgid" => 123,
247 "setfsuid" => 122,
248 _ => return None,
249 };
250 Some(SyscallNumber(num as u32))
251 }
252}
253
254pub struct SeccompCompiler;
256
257impl SeccompCompiler {
258 pub fn compile(filter: &SeccompFilter) -> Result<Vec<BpfInstr>> {
260 let mut instrs = Vec::new();
261
262 instrs.push(BpfInstr {
264 code: 0x20, jt: 0,
266 jf: 0,
267 k: 4, });
269
270 let arch = get_arch();
271 instrs.push(BpfInstr {
272 code: 0x15, jt: 1, jf: 0, k: arch,
276 });
277
278 instrs.push(BpfInstr {
280 code: 0x06, jt: 0,
282 jf: 0,
283 k: actions::SECCOMP_RET_KILL,
284 });
285
286 instrs.push(BpfInstr {
288 code: 0x20, jt: 0,
290 jf: 0,
291 k: 0, });
293
294 let allowed = filter.allowed_syscalls();
296 let blocked = filter.blocked_syscalls();
297
298 for syscall_name in allowed.iter() {
299 if blocked.contains(syscall_name) {
300 continue; }
302
303 if let Some(SyscallNumber(num)) = SyscallNumber::from_name(syscall_name) {
304 instrs.push(BpfInstr {
305 code: 0x15, jt: 1, jf: 0, k: num,
309 });
310
311 instrs.push(BpfInstr {
313 code: 0x06, jt: 0,
315 jf: 0,
316 k: actions::SECCOMP_RET_ALLOW,
317 });
318 }
319 }
320
321 if filter.is_kill_on_violation() {
323 instrs.push(BpfInstr {
324 code: 0x06, jt: 0,
326 jf: 0,
327 k: actions::SECCOMP_RET_KILL,
328 });
329 } else {
330 instrs.push(BpfInstr {
331 code: 0x06, jt: 0,
333 jf: 0,
334 k: actions::SECCOMP_RET_TRAP,
335 });
336 }
337
338 Ok(instrs)
339 }
340
341 pub fn load(filter: &SeccompFilter) -> Result<()> {
343 let instrs = Self::compile(filter)?;
344
345 let (_native_instrs, prog) = instrs_to_sock_fprog(&instrs);
347
348 unsafe {
349 if libc::prctl(libc::PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) != 0 {
351 return Err(SandboxError::Seccomp(format!(
352 "Failed to set PR_SET_NO_NEW_PRIVS: {}",
353 std::io::Error::last_os_error()
354 )));
355 }
356
357 let ret = libc::prctl(
358 libc::PR_SET_SECCOMP,
359 libc::SECCOMP_MODE_FILTER,
360 &prog as *const _,
361 );
362
363 if ret != 0 {
364 return Err(SandboxError::Seccomp(format!(
365 "Failed to load seccomp filter: {}",
366 std::io::Error::last_os_error()
367 )));
368 }
369 }
370
371 Ok(())
372 }
373}
374
375fn instrs_to_sock_fprog(instrs: &[BpfInstr]) -> (Vec<bpf_insn>, sockfprog) {
377 let native_instrs: Vec<bpf_insn> = instrs.iter().copied().map(bpf_insn::from).collect();
378 let prog = sockfprog {
379 len: native_instrs.len() as u16,
380 filter: native_instrs.as_ptr() as *mut bpf_insn,
381 };
382 (native_instrs, prog)
383}
384
385#[repr(C)]
387#[derive(Clone, Copy)]
388pub struct bpf_insn {
389 pub code: u16,
390 pub jt: u8,
391 pub jf: u8,
392 pub k: u32,
393}
394
395impl From<BpfInstr> for bpf_insn {
396 fn from(instr: BpfInstr) -> Self {
397 bpf_insn {
398 code: instr.code,
399 jt: instr.jt,
400 jf: instr.jf,
401 k: instr.k,
402 }
403 }
404}
405
406#[repr(C)]
408struct sockfprog {
409 len: u16,
410 filter: *mut bpf_insn,
411}
412
413#[cfg(test)]
414mod tests {
415 use super::super::seccomp::{SeccompFilter, SeccompProfile};
416 use super::*;
417
418 #[test]
419 fn test_get_arch() {
420 let arch = get_arch();
421 #[cfg(target_arch = "x86_64")]
422 assert_eq!(arch, arch::AUDIT_ARCH_X86_64);
423 }
424
425 #[test]
426 fn test_syscall_number_read() {
427 let num = SyscallNumber::from_name("read").unwrap();
428 assert_eq!(num.0, 0);
429 }
430
431 #[test]
432 fn test_syscall_number_write() {
433 let num = SyscallNumber::from_name("write").unwrap();
434 assert_eq!(num.0, 1);
435 }
436
437 #[test]
438 fn test_syscall_number_invalid() {
439 let num = SyscallNumber::from_name("invalid_syscall");
440 assert!(num.is_none());
441 }
442
443 #[test]
444 fn test_syscall_number_exit() {
445 let num = SyscallNumber::from_name("exit").unwrap();
446 assert_eq!(num.0, 60);
447 }
448
449 #[test]
450 fn test_syscall_number_execve() {
451 let num = SyscallNumber::from_name("execve").unwrap();
452 assert_eq!(num.0, 59);
453 }
454
455 #[test]
456 fn test_compile_minimal_filter() {
457 let filter = SeccompFilter::minimal();
458 let result = SeccompCompiler::compile(&filter);
459 assert!(result.is_ok());
460
461 let instrs = result.unwrap();
462 assert!(!instrs.is_empty());
463 }
464
465 #[test]
466 fn test_compile_io_heavy_filter() {
467 let filter = SeccompFilter::from_profile(SeccompProfile::IoHeavy);
468 let result = SeccompCompiler::compile(&filter);
469 assert!(result.is_ok());
470
471 let instrs = result.unwrap();
472 assert!(instrs.len() > 5);
473 }
474
475 #[test]
476 fn test_bpf_instr_creation() {
477 let instr = BpfInstr {
478 code: 0x06,
479 jt: 0,
480 jf: 0,
481 k: actions::SECCOMP_RET_ALLOW,
482 };
483
484 assert_eq!(instr.code, 0x06);
485 assert_eq!(instr.k, actions::SECCOMP_RET_ALLOW);
486 }
487
488 #[test]
489 fn test_actions_values() {
490 assert_eq!(actions::SECCOMP_RET_KILL, 0x00000000);
491 assert_eq!(actions::SECCOMP_RET_ALLOW, 0x7fff0000);
492 }
493
494 #[test]
495 fn test_multiple_syscall_numbers() {
496 let syscalls = vec!["read", "write", "open", "close", "fork"];
497
498 for syscall in syscalls {
499 let num = SyscallNumber::from_name(syscall);
500 assert!(num.is_some(), "Failed to get number for {}", syscall);
501 }
502 }
503}