northstar_runtime/seccomp/profiles/
default.rs

1use crate::{
2    common::non_nul_string::NonNulString,
3    seccomp::{
4        bpf::{builder_from_rules, Builder},
5        SyscallArgRule, SyscallRule,
6    },
7};
8use std::{collections::HashMap, convert::TryInto};
9
10// Filter lists that mimic docker's default list
11// (https://github.com/moby/moby/blob/master/profiles/seccomp/default.json)
12
13/// Unconditional list of allowed syscalls
14pub const SYSCALLS_BASE: &[&str] = &[
15    "accept",
16    "accept4",
17    "access",
18    "adjtimex",
19    "alarm",
20    "bind",
21    "brk",
22    "capget",
23    "capset",
24    "chdir",
25    "chmod",
26    "chown",
27    "chown32",
28    "clock_adjtime",
29    "clock_adjtime64",
30    "clock_getres",
31    "clock_getres_time64",
32    "clock_gettime",
33    "clock_gettime64",
34    "clock_nanosleep",
35    "clock_nanosleep_time64",
36    "close",
37    "close_range",
38    "connect",
39    "copy_file_range",
40    "creat",
41    "dup",
42    "dup2",
43    "dup3",
44    "epoll_create",
45    "epoll_create1",
46    "epoll_ctl",
47    "epoll_ctl_old",
48    "epoll_pwait",
49    "epoll_pwait2",
50    "epoll_wait",
51    "epoll_wait_old",
52    "eventfd",
53    "eventfd2",
54    "execve",
55    "execveat",
56    "exit",
57    "exit_group",
58    "faccessat",
59    "faccessat2",
60    "fadvise64",
61    "fadvise64_64",
62    "fallocate",
63    "fanotify_mark",
64    "fchdir",
65    "fchmod",
66    "fchmodat",
67    "fchown",
68    "fchown32",
69    "fchownat",
70    "fcntl",
71    "fcntl64",
72    "fdatasync",
73    "fgetxattr",
74    "flistxattr",
75    "flock",
76    "fork",
77    "fremovexattr",
78    "fsetxattr",
79    "fstat",
80    "fstat64",
81    "fstatat64",
82    "fstatfs",
83    "fstatfs64",
84    "fsync",
85    "ftruncate",
86    "ftruncate64",
87    "futex",
88    "futex_time64",
89    "futimesat",
90    "getcpu",
91    "getcwd",
92    "getdents",
93    "getdents64",
94    "getegid",
95    "getegid32",
96    "geteuid",
97    "geteuid32",
98    "getgid",
99    "getgid32",
100    "getgroups",
101    "getgroups32",
102    "getitimer",
103    "getpeername",
104    "getpgid",
105    "getpgrp",
106    "getpid",
107    "getppid",
108    "getpriority",
109    "getrandom",
110    "getresgid",
111    "getresgid32",
112    "getresuid",
113    "getresuid32",
114    "getrlimit",
115    "get_robust_list",
116    "getrusage",
117    "getsid",
118    "getsockname",
119    "getsockopt",
120    "get_thread_area",
121    "gettid",
122    "gettimeofday",
123    "getuid",
124    "getuid32",
125    "getxattr",
126    "inotify_add_watch",
127    "inotify_init",
128    "inotify_init1",
129    "inotify_rm_watch",
130    "io_cancel",
131    "ioctl",
132    "io_destroy",
133    "io_getevents",
134    "io_pgetevents",
135    "io_pgetevents_time64",
136    "ioprio_get",
137    "ioprio_set",
138    "io_setup",
139    "io_submit",
140    "io_uring_enter",
141    "io_uring_register",
142    "io_uring_setup",
143    "ipc",
144    "kill",
145    "lchown",
146    "lchown32",
147    "lgetxattr",
148    "link",
149    "linkat",
150    "listen",
151    "listxattr",
152    "llistxattr",
153    "_llseek",
154    "lremovexattr",
155    "lseek",
156    "lsetxattr",
157    "lstat",
158    "lstat64",
159    "madvise",
160    "membarrier",
161    "memfd_create",
162    "mincore",
163    "mkdir",
164    "mkdirat",
165    "mknod",
166    "mknodat",
167    "mlock",
168    "mlock2",
169    "mlockall",
170    "mmap",
171    "mmap2",
172    "mprotect",
173    "mq_getsetattr",
174    "mq_notify",
175    "mq_open",
176    "mq_timedreceive",
177    "mq_timedreceive_time64",
178    "mq_timedsend",
179    "mq_timedsend_time64",
180    "mq_unlink",
181    "mremap",
182    "msgctl",
183    "msgget",
184    "msgrcv",
185    "msgsnd",
186    "msync",
187    "munlock",
188    "munlockall",
189    "munmap",
190    "nanosleep",
191    "newfstatat",
192    "_newselect",
193    "open",
194    "openat",
195    "openat2",
196    "pause",
197    "pidfd_open",
198    "pidfd_send_signal",
199    "pipe",
200    "pipe2",
201    "poll",
202    "ppoll",
203    "ppoll_time64",
204    "prctl",
205    "pread64",
206    "preadv",
207    "preadv2",
208    "prlimit64",
209    "pselect6",
210    "pselect6_time64",
211    "pwrite64",
212    "pwritev",
213    "pwritev2",
214    "read",
215    "readahead",
216    "readlink",
217    "readlinkat",
218    "readv",
219    "recv",
220    "recvfrom",
221    "recvmmsg",
222    "recvmmsg_time64",
223    "recvmsg",
224    "remap_file_pages",
225    "removexattr",
226    "rename",
227    "renameat",
228    "renameat2",
229    "restart_syscall",
230    "rmdir",
231    "rseq",
232    "rt_sigaction",
233    "rt_sigpending",
234    "rt_sigprocmask",
235    "rt_sigqueueinfo",
236    "rt_sigreturn",
237    "rt_sigsuspend",
238    "rt_sigtimedwait",
239    "rt_sigtimedwait_time64",
240    "rt_tgsigqueueinfo",
241    "sched_getaffinity",
242    "sched_getattr",
243    "sched_getparam",
244    "sched_get_priority_max",
245    "sched_get_priority_min",
246    "sched_getscheduler",
247    "sched_rr_get_interval",
248    "sched_rr_get_interval_time64",
249    "sched_setaffinity",
250    "sched_setattr",
251    "sched_setparam",
252    "sched_setscheduler",
253    "sched_yield",
254    "seccomp",
255    "select",
256    "semctl",
257    "semget",
258    "semop",
259    "semtimedop",
260    "semtimedop_time64",
261    "send",
262    "sendfile",
263    "sendfile64",
264    "sendmmsg",
265    "sendmsg",
266    "sendto",
267    "setfsgid",
268    "setfsgid32",
269    "setfsuid",
270    "setfsuid32",
271    "setgid",
272    "setgid32",
273    "setgroups",
274    "setgroups32",
275    "setitimer",
276    "setpgid",
277    "setpriority",
278    "setregid",
279    "setregid32",
280    "setresgid",
281    "setresgid32",
282    "setresuid",
283    "setresuid32",
284    "setreuid",
285    "setreuid32",
286    "setrlimit",
287    "set_robust_list",
288    "setsid",
289    "setsockopt",
290    "set_thread_area",
291    "set_tid_address",
292    "setuid",
293    "setuid32",
294    "setxattr",
295    "shmat",
296    "shmctl",
297    "shmdt",
298    "shmget",
299    "shutdown",
300    "sigaltstack",
301    "signalfd",
302    "signalfd4",
303    "sigprocmask",
304    "sigreturn",
305    "socket",
306    "socketcall",
307    "socketpair",
308    "splice",
309    "stat",
310    "stat64",
311    "statfs",
312    "statfs64",
313    "statx",
314    "symlink",
315    "symlinkat",
316    "sync",
317    "sync_file_range",
318    "syncfs",
319    "sysinfo",
320    "tee",
321    "tgkill",
322    "time",
323    "timer_create",
324    "timer_delete",
325    "timer_getoverrun",
326    "timer_gettime",
327    "timer_gettime64",
328    "timer_settime",
329    "timer_settime64",
330    "timerfd_create",
331    "timerfd_gettime",
332    "timerfd_gettime64",
333    "timerfd_settime",
334    "timerfd_settime64",
335    "times",
336    "tkill",
337    "truncate",
338    "truncate64",
339    "ugetrlimit",
340    "umask",
341    "uname",
342    "unlink",
343    "unlinkat",
344    "utime",
345    "utimensat",
346    "utimensat_time64",
347    "utimes",
348    "vfork",
349    "vmsplice",
350    "wait4",
351    "waitid",
352    "waitpid",
353    "write",
354    "writev",
355    "process_vm_readv",  // "minKernel": "4.8"
356    "process_vm_writev", // "minKernel": "4.8"
357    "ptrace",            // "minKernel": "4.8"
358    // Parameter condition: index=0, value={0x00, 0x08, 0x20000, 0x20008, 0xFFFFFFFF}, op=SCMP_CMP_EQ
359    // (https://github.com/moby/moby/blob/20.10/profiles/seccomp/default.json#L414)
360    "personality",
361    #[cfg(target_arch = "aarch64")]
362    "arm_fadvise64_64",
363    #[cfg(target_arch = "aarch64")]
364    "arm_sync_file_range",
365    #[cfg(target_arch = "aarch64")]
366    "sync_file_range2",
367    #[cfg(target_arch = "aarch64")]
368    "breakpoint",
369    #[cfg(target_arch = "aarch64")]
370    "cacheflush",
371    #[cfg(target_arch = "aarch64")]
372    "set_tls",
373    #[cfg(target_arch = "x86_64")]
374    "arch_prctl", // only on "amd64" and "x32"
375    #[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
376    "modify_ldt", // only on "amd64", "x32" and "x86"
377];
378
379// syscalls to be added if a given capability is present
380pub const SYSCALLS_CAP_DAC_READ_SEARCH: &[&str] = &["open_by_handle_at"];
381pub const SYSCALLS_CAP_SYS_ADMIN: &[&str] = &[
382    "bpf",
383    "clone",
384    "fanotify_init",
385    "fsconfig",
386    "fsmount",
387    "fsopen",
388    "fspick",
389    "lookup_dcookie",
390    "mount",
391    "move_mount",
392    "name_to_handle_at",
393    "open_tree",
394    "perf_event_open",
395    "quotactl",
396    "setdomainname",
397    "sethostname",
398    "setns",
399    "syslog",
400    "umount",
401    "umount2",
402    "unshare",
403];
404pub const SYSCALLS_CAP_SYS_BOOT: &[&str] = &["reboot"];
405pub const SYSCALLS_CAP_SYS_CHROOT: &[&str] = &["chroot"];
406pub const SYSCALLS_CAP_SYS_MODULE: &[&str] = &["delete_module", "init_module", "finit_module"];
407pub const SYSCALLS_CAP_SYS_PACCT: &[&str] = &["acct"];
408pub const SYSCALLS_CAP_SYS_PTRACE: &[&str] = &[
409    "kcmp",
410    "pidfd_getfd",
411    "process_madvise",
412    "process_vm_readv",
413    "process_vm_writev",
414    "ptrace",
415];
416pub const SYSCALLS_CAP_SYS_RAWIO: &[&str] = &["iopl", "ioperm"];
417pub const SYSCALLS_CAP_SYS_TIME: &[&str] = &["settimeofday", "stime", "clock_settime"];
418pub const SYSCALLS_CAP_SYS_TTY_CONFIG: &[&str] = &["vhangup"];
419pub const SYSCALLS_CAP_SYS_NICE: &[&str] = &["get_mempolicy", "mbind", "set_mempolicy"];
420pub const SYSCALLS_CAP_SYSLOG: &[&str] = &["syslog"];
421
422// syscalls to be added if a given capability is _missing_
423pub const SYSCALLS_NON_CAP_SYS_ADMIN: &[&str] = &[
424    // Parameter condition: index=0, value=0x7E020000, op=SCMP_CMP_MASKED_EQ
425    // (https://github.com/moby/moby/blob/20.10/profiles/seccomp/default.json#L624)
426    "clone",
427];
428
429// pre-computed builders
430lazy_static::lazy_static! {
431    pub static ref BASE: Builder = {
432        let mut hm: HashMap<NonNulString, SyscallRule> = HashMap::with_capacity(SYSCALLS_BASE.len());
433        for name in SYSCALLS_BASE {
434            // Parameter condition: index=0, value={0x00, 0x08, 0x20000, 0x20008, 0xFFFFFFFF}, op=SCMP_CMP_EQ
435            // (https://github.com/moby/moby/blob/20.10/profiles/seccomp/default.json#L414)
436            if *name == "personality" {
437                #[allow(clippy::unwrap_used)]
438                hm.insert(name.to_string().try_into().unwrap(), SyscallRule::Args(SyscallArgRule{
439                    index: 0,
440                    values: Some([0x00, 0x08, 0x20000, 0x20008, 0xFFFFFFFF].to_vec()),
441                    mask: None}));
442            }
443            else {
444                #[allow(clippy::unwrap_used)]
445                hm.insert(name.to_string().try_into().unwrap(), SyscallRule::Any);
446            }
447        }
448        builder_from_rules(&hm)
449    };
450    pub static ref CAP_DAC_READ_SEARCH: Builder = {
451        let mut hm: HashMap<NonNulString, SyscallRule> = HashMap::with_capacity(SYSCALLS_CAP_DAC_READ_SEARCH.len());
452        for name in SYSCALLS_CAP_DAC_READ_SEARCH {
453            #[allow(clippy::unwrap_used)]
454            hm.insert(name.to_string().try_into().unwrap(), SyscallRule::Any);
455        }
456        builder_from_rules(&hm)
457    };
458    pub static ref CAP_SYS_ADMIN: Builder = {
459        let mut hm: HashMap<NonNulString, SyscallRule> = HashMap::with_capacity(SYSCALLS_CAP_SYS_ADMIN.len());
460        for name in SYSCALLS_CAP_SYS_ADMIN {
461            #[allow(clippy::unwrap_used)]
462            hm.insert(name.to_string().try_into().unwrap(), SyscallRule::Any);
463        }
464        builder_from_rules(&hm)
465    };
466    pub static ref CAP_SYS_BOOT: Builder = {
467        let mut hm: HashMap<NonNulString, SyscallRule> = HashMap::with_capacity(SYSCALLS_CAP_SYS_BOOT.len());
468        for name in SYSCALLS_CAP_SYS_BOOT {
469            #[allow(clippy::unwrap_used)]
470            hm.insert(name.to_string().try_into().unwrap(), SyscallRule::Any);
471        }
472        builder_from_rules(&hm)
473    };
474    pub static ref CAP_SYS_CHROOT: Builder = {
475        let mut hm: HashMap<NonNulString, SyscallRule> = HashMap::with_capacity(SYSCALLS_CAP_SYS_CHROOT.len());
476        for name in SYSCALLS_CAP_SYS_CHROOT {
477            #[allow(clippy::unwrap_used)]
478            hm.insert(name.to_string().try_into().unwrap(), SyscallRule::Any);
479        }
480        builder_from_rules(&hm)
481    };
482    pub static ref CAP_SYS_MODULE: Builder = {
483        let mut hm: HashMap<NonNulString, SyscallRule> = HashMap::with_capacity(SYSCALLS_CAP_SYS_MODULE.len());
484        for name in SYSCALLS_CAP_SYS_MODULE {
485            #[allow(clippy::unwrap_used)]
486            hm.insert(name.to_string().try_into().unwrap(), SyscallRule::Any);
487        }
488        builder_from_rules(&hm)
489    };
490    pub static ref CAP_SYS_PACCT: Builder = {
491        let mut hm: HashMap<NonNulString, SyscallRule> = HashMap::with_capacity(SYSCALLS_CAP_SYS_PACCT.len());
492        for name in SYSCALLS_CAP_SYS_PACCT {
493            #[allow(clippy::unwrap_used)]
494            hm.insert(name.to_string().try_into().unwrap(), SyscallRule::Any);
495        }
496        builder_from_rules(&hm)
497    };
498    pub static ref CAP_SYS_PTRACE: Builder = {
499        let mut hm: HashMap<NonNulString, SyscallRule> = HashMap::with_capacity(SYSCALLS_CAP_SYS_PTRACE.len());
500        for name in SYSCALLS_CAP_SYS_PTRACE {
501            #[allow(clippy::unwrap_used)]
502            hm.insert(name.to_string().try_into().unwrap(), SyscallRule::Any);
503        }
504        builder_from_rules(&hm)
505    };
506    pub static ref CAP_SYS_RAWIO: Builder = {
507        let mut hm: HashMap<NonNulString, SyscallRule> = HashMap::with_capacity(SYSCALLS_CAP_SYS_RAWIO.len());
508        for name in SYSCALLS_CAP_SYS_RAWIO {
509            #[allow(clippy::unwrap_used)]
510            hm.insert(name.to_string().try_into().unwrap(), SyscallRule::Any);
511        }
512        builder_from_rules(&hm)
513    };
514    pub static ref CAP_SYS_TIME: Builder = {
515        let mut hm: HashMap<NonNulString, SyscallRule> = HashMap::with_capacity(SYSCALLS_CAP_SYS_TIME.len());
516        for name in SYSCALLS_CAP_SYS_TIME {
517            #[allow(clippy::unwrap_used)]
518            hm.insert(name.to_string().try_into().unwrap(), SyscallRule::Any);
519        }
520        builder_from_rules(&hm)
521    };
522    pub static ref CAP_SYS_TTY_CONFIG: Builder = {
523        let mut hm: HashMap<NonNulString, SyscallRule> = HashMap::with_capacity(SYSCALLS_CAP_SYS_TTY_CONFIG.len());
524        for name in SYSCALLS_CAP_SYS_TTY_CONFIG {
525            #[allow(clippy::unwrap_used)]
526            hm.insert(name.to_string().try_into().unwrap(), SyscallRule::Any);
527        }
528        builder_from_rules(&hm)
529    };
530    pub static ref CAP_SYS_NICE: Builder = {
531        let mut hm: HashMap<NonNulString, SyscallRule> = HashMap::with_capacity(SYSCALLS_CAP_SYS_NICE.len());
532        for name in SYSCALLS_CAP_SYS_NICE {
533            #[allow(clippy::unwrap_used)]
534            hm.insert(name.to_string().try_into().unwrap(), SyscallRule::Any);
535        }
536        builder_from_rules(&hm)
537    };
538    pub static ref CAP_SYSLOG: Builder = {
539        let mut hm: HashMap<NonNulString, SyscallRule> = HashMap::with_capacity(SYSCALLS_CAP_SYSLOG.len());
540        for name in SYSCALLS_CAP_SYSLOG {
541            #[allow(clippy::unwrap_used)]
542            hm.insert(name.to_string().try_into().unwrap(), SyscallRule::Any);
543        }
544        builder_from_rules(&hm)
545    };
546    pub static ref NON_CAP_SYS_ADMIN: Builder = {
547        let mut hm: HashMap<NonNulString, SyscallRule> = HashMap::with_capacity(SYSCALLS_NON_CAP_SYS_ADMIN.len());
548        for name in SYSCALLS_NON_CAP_SYS_ADMIN {
549            if *name == "clone" {
550                // Parameter condition: index=0, value=0x7E020000, op=SCMP_CMP_MASKED_EQ
551                // (https://github.com/moby/moby/blob/20.10/profiles/seccomp/default.json#L624)
552                #[allow(clippy::unwrap_used)]
553                hm.insert(name.to_string().try_into().unwrap(), SyscallRule::Args(SyscallArgRule{
554                    index: 0,
555                    values: None,
556                    // Docker allows a masked syscall argument only if it is equal to 0.
557                    // This effectively prohibits the use of the bits covered by the mask. Since our
558                    // logic specifically allows arguments that match the mask, we invert the
559                    // bitmask of docker here to achieve the same behavior.
560                    mask: Some(!0x7E020000)}));
561            }
562        }
563        builder_from_rules(&hm)
564    };
565}