northstar_runtime/seccomp/
bpf.rs

1use crate::{
2    common::non_nul_string::NonNulString,
3    npk::manifest::capabilities::Capability,
4    seccomp::{profiles::default, Profile, SyscallArgRule, SyscallRule},
5};
6use anyhow::{bail, Result};
7use bindings::{
8    seccomp_data, sock_filter, sock_fprog, BPF_ABS, BPF_ALU, BPF_AND, BPF_IMM, BPF_JEQ, BPF_JMP,
9    BPF_K, BPF_LD, BPF_MAXINSNS, BPF_MEM, BPF_NEG, BPF_OR, BPF_RET, BPF_ST, BPF_W, SYSCALL_MAP,
10};
11use log::trace;
12use nix::errno::Errno;
13use serde::{Deserialize, Deserializer, Serialize, Serializer};
14use std::{
15    collections::{HashMap, HashSet},
16    mem::size_of,
17};
18
19#[allow(unused, non_snake_case, non_camel_case_types, non_upper_case_globals)]
20mod bindings {
21    include!(concat!(env!("OUT_DIR"), "/syscall_bindings.rs"));
22    include!(concat!(env!("OUT_DIR"), "/seccomp_bindings.rs"));
23}
24
25#[cfg(target_arch = "aarch64")]
26const AUDIT_ARCH: u32 = bindings::AUDIT_ARCH_AARCH64;
27#[cfg(target_arch = "x86_64")]
28const AUDIT_ARCH: u32 = bindings::AUDIT_ARCH_X86_64;
29
30/// Syscalls used by northstar after the seccomp rules are applied and before the actual execve is done.
31const REQUIRED_SYSCALLS: &[u32] = &[bindings::SYS_execve];
32
33/// Jump to next instruction and execute
34const EVAL_NEXT: u8 = 0;
35/// Skip next instruction
36const SKIP_NEXT: u8 = 1;
37
38/// Construct a allowlist syscall filter that is applied post clone.
39pub fn seccomp_filter(
40    profile: Option<&Profile>,
41    rules: Option<&HashMap<NonNulString, SyscallRule>>,
42    caps: &HashSet<Capability>,
43) -> AllowList {
44    check_platform_requirements();
45
46    let mut builder = Builder::new();
47    if let Some(profile) = profile {
48        builder.extend(builder_from_profile(profile, caps));
49    }
50    if let Some(rules) = rules {
51        builder.extend(builder_from_rules(rules));
52    }
53    builder.build()
54}
55
56/// Create an AllowList Builder from a list of syscall names
57pub(crate) fn builder_from_rules(rules: &HashMap<NonNulString, SyscallRule>) -> Builder {
58    let mut builder = Builder::new();
59    for (name, call_rule) in rules {
60        let arg_rule = match call_rule {
61            SyscallRule::Any => None,
62            SyscallRule::Args(a) => Some(a),
63        };
64        if let Err(e) = builder.allow_syscall_name(name, arg_rule.cloned()) {
65            // Only issue a warning as a missing syscall on the allow list does not lead to insecure behaviour
66            trace!("failed to allow syscall {}: {}", &name.to_string(), e);
67        }
68    }
69    builder
70}
71
72/// Create an AllowList Builder from a pre-defined profile
73fn builder_from_profile(profile: &Profile, caps: &HashSet<Capability>) -> Builder {
74    match profile {
75        Profile::Default => {
76            let mut builder = default::BASE.clone();
77
78            // Allow additional syscalls depending on granted capabilities
79            if !caps.is_empty() {
80                let mut cap_sys_admin = false;
81                for cap in caps {
82                    match cap {
83                        Capability::CAP_CHOWN => {}
84                        Capability::CAP_DAC_OVERRIDE => {}
85                        Capability::CAP_DAC_READ_SEARCH => {
86                            builder.extend(default::CAP_DAC_READ_SEARCH.clone());
87                        }
88                        Capability::CAP_FOWNER => {}
89                        Capability::CAP_FSETID => {}
90                        Capability::CAP_KILL => {}
91                        Capability::CAP_SETGID => {}
92                        Capability::CAP_SETUID => {}
93                        Capability::CAP_SETPCAP => {}
94                        Capability::CAP_LINUX_IMMUTABLE => {}
95                        Capability::CAP_NET_BIND_SERVICE => {}
96                        Capability::CAP_NET_BROADCAST => {}
97                        Capability::CAP_NET_ADMIN => {}
98                        Capability::CAP_NET_RAW => {}
99                        Capability::CAP_IPC_LOCK => {}
100                        Capability::CAP_IPC_OWNER => {}
101                        Capability::CAP_SYS_MODULE => {
102                            builder.extend(default::CAP_SYS_MODULE.clone());
103                        }
104                        Capability::CAP_SYS_RAWIO => {
105                            builder.extend(default::CAP_SYS_RAWIO.clone());
106                        }
107                        Capability::CAP_SYS_CHROOT => {
108                            builder.extend(default::CAP_SYS_CHROOT.clone());
109                        }
110                        Capability::CAP_SYS_PTRACE => {
111                            builder.extend(default::CAP_SYS_PTRACE.clone());
112                        }
113                        Capability::CAP_SYS_PACCT => {
114                            builder.extend(default::CAP_SYS_PACCT.clone());
115                        }
116                        Capability::CAP_SYS_ADMIN => {
117                            cap_sys_admin = true;
118                            builder.extend(default::CAP_SYS_ADMIN.clone());
119                        }
120                        Capability::CAP_SYS_BOOT => {
121                            builder.extend(default::CAP_SYS_BOOT.clone());
122                        }
123                        Capability::CAP_SYS_NICE => {
124                            builder.extend(default::CAP_SYS_NICE.clone());
125                        }
126                        Capability::CAP_SYS_RESOURCE => {}
127                        Capability::CAP_SYS_TIME => {
128                            builder.extend(default::CAP_SYS_TIME.clone());
129                        }
130                        Capability::CAP_SYS_TTY_CONFIG => {
131                            builder.extend(default::CAP_SYS_TTY_CONFIG.clone());
132                        }
133                        Capability::CAP_MKNOD => {}
134                        Capability::CAP_LEASE => {}
135                        Capability::CAP_AUDIT_WRITE => {}
136                        Capability::CAP_AUDIT_CONTROL => {}
137                        Capability::CAP_SETFCAP => {}
138                        Capability::CAP_MAC_OVERRIDE => {}
139                        Capability::CAP_MAC_ADMIN => {}
140                        Capability::CAP_SYSLOG => {
141                            builder.extend(default::CAP_SYSLOG.clone());
142                        }
143                        Capability::CAP_WAKE_ALARM => {}
144                        Capability::CAP_BLOCK_SUSPEND => {}
145                        Capability::CAP_AUDIT_READ => {}
146                        Capability::CAP_PERFMON => {}
147                        Capability::CAP_BPF => {}
148                        Capability::CAP_CHECKPOINT_RESTORE => {}
149                    };
150                }
151                if !cap_sys_admin {
152                    builder.extend(default::NON_CAP_SYS_ADMIN.clone());
153                }
154            }
155            builder
156        }
157    }
158}
159
160/// Check if the current platform is supported and return an error if not
161fn check_platform_requirements() {
162    #[cfg(not(any(target_arch = "aarch64", target_arch = "x86_64")))]
163    compile_error!("seccomp is only supported on aarch64 and x86_64");
164    #[cfg(target_pointer_width = "32")]
165    compile_error!("seccomp is not supported on 32 Bit architectures");
166    #[cfg(target_endian = "big")]
167    compile_error!("seccomp is not supported on Big Endian architectures");
168}
169
170#[derive(Clone, Debug, PartialEq, Eq)]
171pub struct SockFilter {
172    pub code: u16,
173    pub jt: u8,
174    pub jf: u8,
175    pub k: u32,
176}
177
178impl Serialize for SockFilter {
179    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
180    where
181        S: Serializer,
182    {
183        let a = (self.code as u32) << 16 | (self.jt as u32) << 8 | self.jf as u32;
184        let value = (a as u64) << 32 | self.k as u64;
185        serializer.serialize_u64(value)
186    }
187}
188
189impl<'de> Deserialize<'de> for SockFilter {
190    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
191    where
192        D: Deserializer<'de>,
193    {
194        let value = u64::deserialize(deserializer)?;
195        let a = (value >> 32) as u32;
196        let code = ((a & 0xFFFF0000) >> 16) as u16;
197        let jt = ((a & 0xFF00) >> 8) as u8;
198        let jf = (a & 0xFF) as u8;
199        let k = (value & 0xFFFFFFFF) as u32;
200        Ok(SockFilter { code, jt, jf, k })
201    }
202}
203
204impl From<&SockFilter> for sock_filter {
205    fn from(s: &SockFilter) -> sock_filter {
206        sock_filter {
207            code: s.code,
208            jt: s.jt,
209            jf: s.jf,
210            k: s.k,
211        }
212    }
213}
214
215/// Read-only list of allowed syscalls. Methods do not cause memory allocations on the heap.
216#[derive(Clone, Debug, Default, Serialize, Deserialize)]
217pub struct AllowList {
218    list: Vec<SockFilter>,
219}
220
221impl AllowList {
222    /// Apply this seccomp filter settings to the current thread
223    pub fn apply(&self) -> Result<()> {
224        #[cfg(target_os = "android")]
225        const PR_SET_SECCOMP: nix::libc::c_int = 22;
226        #[cfg(target_os = "android")]
227        const SECCOMP_MODE_FILTER: nix::libc::c_int = 2;
228
229        #[cfg(not(target_os = "android"))]
230        use nix::libc::{PR_SET_SECCOMP, SECCOMP_MODE_FILTER};
231
232        if self.list.len() > BPF_MAXINSNS as usize {
233            bail!("seccomp filter list exceeds maximum number of BPF statements");
234        }
235
236        // Convert the list of instructions into the bindings sock_filter
237        let list = self
238            .list
239            .iter()
240            .map(Into::into)
241            .collect::<Vec<sock_filter>>();
242
243        let sf_prog = sock_fprog {
244            len: list.len() as u16,
245            filter: list.as_ptr() as *mut bindings::sock_filter,
246        };
247        let sf_prog_ptr = &sf_prog as *const sock_fprog;
248        let result = unsafe { nix::libc::prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, sf_prog_ptr) };
249        Errno::result(result)?;
250        Ok(())
251    }
252}
253
254#[derive(Clone, Eq, PartialEq, Debug)]
255struct NumericSyscallRule {
256    /// Number of syscall
257    nr: u32,
258    /// Allowed argument values. If no values are defined, the syscall is allowed unconditionally.
259    arg_rule: Option<SyscallArgRule>,
260}
261
262/// Builder for AllowList struct
263#[derive(Default, Clone)]
264pub struct Builder {
265    allowlist: Vec<NumericSyscallRule>,
266    log_only: bool,
267}
268
269impl Builder {
270    /// Create a new seccomp builder
271    pub fn new() -> Self {
272        let mut builder: Builder = Default::default();
273
274        // Add required syscalls (e.g. for execve)
275        for syscall in REQUIRED_SYSCALLS {
276            builder.allow_syscall_nr(*syscall, None);
277        }
278        builder
279    }
280
281    /// Add syscall to allowlist by number
282    pub(crate) fn allow_syscall_nr(
283        &mut self,
284        nr: u32,
285        arg_rule: Option<SyscallArgRule>,
286    ) -> &mut Builder {
287        self.allowlist.push(NumericSyscallRule { nr, arg_rule });
288        self
289    }
290
291    /// Add syscall to allowlist by name
292    pub(crate) fn allow_syscall_name(
293        &mut self,
294        name: &str,
295        arg_rule: Option<SyscallArgRule>,
296    ) -> Result<&mut Builder> {
297        match translate_syscall(name) {
298            Some(nr) => Ok(self.allow_syscall_nr(nr, arg_rule)),
299            None => bail!("unknown system call {}", name),
300        }
301    }
302
303    /// Log syscall violations instead of aborting the program
304    #[allow(unused)]
305    pub(crate) fn log_only(&mut self) -> &mut Builder {
306        self.log_only = true;
307        self
308    }
309
310    /// Extend one builder with another builder.
311    /// Note: The 'log_only' property of the extended builder is only set to true if it was true in both original builders.
312    pub(crate) fn extend(&mut self, other: Builder) -> &mut Builder {
313        self.allowlist.extend(other.allowlist);
314        self.log_only &= other.log_only;
315        self
316    }
317
318    /// Create seccomp filter ready to apply
319    pub(crate) fn build(mut self) -> AllowList {
320        // sort and dedup syscall numbers to check common syscalls first
321        self.allowlist.sort_unstable_by_key(|rule| rule.nr);
322        self.allowlist.dedup();
323
324        let mut filter = AllowList { list: vec![] };
325
326        // Load architecture into accumulator
327        load_arch_into_acc(&mut filter);
328
329        // Kill process if architecture does not match
330        jump_if_acc_is_equal(&mut filter, AUDIT_ARCH, SKIP_NEXT, EVAL_NEXT);
331        filter
332            .list
333            .push(bpf_ret(nix::libc::SECCOMP_RET_KILL_PROCESS));
334
335        // Load syscall number into accumulator for subsequent filtering
336        load_syscall_nr_into_acc(&mut filter);
337
338        // Add filter block for every allowed syscall
339        for rule in &self.allowlist {
340            if let Some(arg_rule) = &rule.arg_rule {
341                if let Some(values) = &arg_rule.values {
342                    trace!("Adding seccomp argument block (nr={})", rule.nr);
343
344                    // Precalculate number of instructions to skip if syscall number does not match
345                    assert!(values.len() <= ((u8::MAX - 5) / 4) as usize); // Detect u8 overflow
346                    let skip_if_no_match: u8 = (4 + 4 * values.len() + 1) as u8;
347
348                    // If syscall matches continue to check its arguments
349                    jump_if_acc_is_equal(&mut filter, rule.nr, EVAL_NEXT, skip_if_no_match);
350                    // Helper instruction counter to verify precalculated jump value
351                    let mut insts = 0;
352                    // Load syscall argument into scratch memory
353                    insts += load_syscall_arg_into_scratch(&mut filter, arg_rule);
354                    // Compare syscall argument against allowed values
355                    insts += jump_if_scratch_matches(&mut filter, values, EVAL_NEXT, SKIP_NEXT);
356                    // If syscall argument matches return 'allow' directly
357                    insts += return_success(&mut filter);
358                    assert_eq!(skip_if_no_match as u32, insts);
359                    // Restore accumulator with syscall number for possible next iteration
360                    load_syscall_nr_into_acc(&mut filter);
361
362                    trace!("Finished seccomp argument block (nr={})", rule.nr);
363                }
364                if let Some(mask) = arg_rule.mask {
365                    trace!(
366                        "Adding seccomp argument block (nr={}, mask={})",
367                        rule.nr,
368                        mask
369                    );
370
371                    // Precalculate number of instructions to skip if syscall number does not match
372                    let skip_if_no_match: u8 = (4 + 6 + 1) as u8;
373
374                    // If syscall matches continue to check its arguments
375                    jump_if_acc_is_equal(&mut filter, rule.nr, EVAL_NEXT, skip_if_no_match);
376                    // Helper instruction counter to verify precalculated jump value
377                    let mut insts = 0;
378                    // Load syscall argument into accumulator (32 bit) or scratch memory (64 bit)
379                    insts += load_syscall_arg_into_scratch(&mut filter, arg_rule);
380                    // Compare syscall argument against mask
381                    insts += jump_if_scratch_matches_mask(&mut filter, mask, EVAL_NEXT, SKIP_NEXT);
382                    insts += return_success(&mut filter);
383                    // Restore accumulator with syscall number for possible next iteration
384                    assert_eq!(skip_if_no_match as u32, insts);
385                    load_syscall_nr_into_acc(&mut filter);
386
387                    trace!(
388                        "Finished seccomp arg. block (nr={}, mask={})",
389                        rule.nr,
390                        mask
391                    );
392                }
393            } else {
394                trace!("Adding seccomp syscall block (nr={})", rule.nr);
395
396                // If syscall matches return 'allow' directly
397                jump_if_acc_is_equal(&mut filter, rule.nr, EVAL_NEXT, SKIP_NEXT);
398                return_success(&mut filter);
399                // No need to restore accumulator with syscall number as we did not overwrite it
400
401                trace!("Finished seccomp syscall block (nr={})", rule.nr);
402            }
403        }
404
405        // Fall through consequence if not filter rule matched
406        return_fail(&mut filter, self.log_only);
407
408        filter
409    }
410}
411
412/// Get syscall number by name
413fn translate_syscall(name: &str) -> Option<u32> {
414    SYSCALL_MAP.get(name).cloned()
415}
416
417/// Load architecture identifier number into accumulator
418fn load_arch_into_acc(filter: &mut AllowList) -> u32 {
419    filter.list.push(bpf_stmt(
420        BPF_LD | BPF_W | BPF_ABS,
421        memoffset::offset_of!(seccomp_data, arch) as u32,
422    ));
423    1
424}
425
426/// Load the number of the syscall into accumulator
427fn load_syscall_nr_into_acc(filter: &mut AllowList) -> u32 {
428    filter.list.push(bpf_stmt(
429        BPF_LD | BPF_W | BPF_ABS,
430        memoffset::offset_of!(seccomp_data, nr) as u32,
431    ));
432    1
433}
434
435/// Load syscall argument into the first two 32-bit registers of scratch memory
436fn load_syscall_arg_into_scratch(filter: &mut AllowList, arg_rule: &SyscallArgRule) -> u32 {
437    // Load high and low parts into scratch memory separately
438    let mut insts = 0;
439    insts += load_arg_low_into_acc(filter, arg_rule);
440    insts += store_acc_in_scratch_low(filter);
441    insts += load_arg_high_into_acc(filter, arg_rule);
442    insts += store_acc_in_scratch_high(filter);
443    insts
444}
445
446/// Load 32 low bits of syscall argument into 32-bit accumulator
447fn load_arg_low_into_acc(filter: &mut AllowList, arg_rule: &SyscallArgRule) -> u32 {
448    filter.list.push(bpf_stmt(
449        BPF_LD | BPF_W | BPF_ABS,
450        arg_low_array_offset(arg_rule.index) as u32,
451    ));
452    1
453}
454
455/// Load 32 high bits of syscall argument into 32-bit accumulator
456fn load_arg_high_into_acc(filter: &mut AllowList, arg_rule: &SyscallArgRule) -> u32 {
457    filter.list.push(bpf_stmt(
458        BPF_LD | BPF_W | BPF_ABS,
459        arg_high_array_offset(arg_rule.index) as u32,
460    ));
461    1
462}
463
464// From seccomp man page:
465// struct seccomp_data {
466//     int   nr;                   /* System call number */
467//     __u32 arch;                 /* AUDIT_ARCH_* value (see <linux/audit.h>) */
468//     __u64 instruction_pointer;  /* CPU instruction pointer */
469//     __u64 args[6];              /* Up to 6 system call arguments */
470// };
471/// Size of elements of 'args' array
472const SECCOMP_DATA_ARGS_SIZE: usize = size_of::<u64>();
473
474/// Get the offset of 'args' array entry in the 'seccomp_data' struct that holds the 32 low bits of syscall argument
475fn arg_low_array_offset(index: usize) -> usize {
476    memoffset::offset_of!(seccomp_data, args) + (index * SECCOMP_DATA_ARGS_SIZE)
477}
478
479/// Get the offset of 'args' array entry in the 'seccomp_data' struct that holds the 32 high bits of syscall argument
480fn arg_high_array_offset(index: usize) -> usize {
481    memoffset::offset_of!(seccomp_data, args)
482        + (index * SECCOMP_DATA_ARGS_SIZE)
483        + (SECCOMP_DATA_ARGS_SIZE / 2)
484}
485
486/// Load given value into accumulator
487fn _load_into_acc(filter: &mut AllowList, value: u32) -> u32 {
488    filter.list.push(bpf_stmt(BPF_LD | BPF_IMM, value));
489    1
490}
491
492const SCRATCH_LOW_INDEX: u32 = 0;
493const SCRATCH_HIGH_INDEX: u32 = 1;
494
495/// Load the first 32-bit register of scratch memory into register
496fn load_scratch_low_into_acc(filter: &mut AllowList) -> u32 {
497    filter
498        .list
499        .push(bpf_stmt(BPF_LD | BPF_MEM, SCRATCH_LOW_INDEX));
500    1
501}
502
503/// Load the second 32-bit register of scratch memory into register
504fn load_scratch_high_into_acc(filter: &mut AllowList) -> u32 {
505    filter
506        .list
507        .push(bpf_stmt(BPF_LD | BPF_MEM, SCRATCH_HIGH_INDEX));
508    1
509}
510
511/// Store accumulator into the first 32-bit register of scratch memory
512fn store_acc_in_scratch_low(filter: &mut AllowList) -> u32 {
513    filter.list.push(bpf_stmt(BPF_ST, SCRATCH_LOW_INDEX));
514    1
515}
516
517/// Store accumulator into the second 32-bit register of scratch memory
518fn store_acc_in_scratch_high(filter: &mut AllowList) -> u32 {
519    filter.list.push(bpf_stmt(BPF_ST, SCRATCH_HIGH_INDEX));
520    1
521}
522
523/// Perform jump if the first two 32-bit scratch registers match the given 64-bit value
524fn jump_if_scratch_matches(
525    filter: &mut AllowList,
526    values: &[u64],
527    jump_true: u8,
528    jump_false: u8,
529) -> u32 {
530    assert!(values.len() <= u8::MAX as usize);
531    let mut insts = 0;
532
533    for (iteration, value) in values.iter().enumerate() {
534        const INSTS_PER_ITER: u8 = 4; // 2 * load_scratch + 2 * jump_if_acc_is_equal
535
536        // Overflow check
537        assert!(values.len() > iteration);
538        let offset_adjust = INSTS_PER_ITER
539            .checked_mul((values.len() - iteration - 1) as u8)
540            .expect("BCP offset overflow");
541
542        // Adjust offsets depending on the number of allowed arguments
543        let jump_true = jump_true + offset_adjust;
544        let jump_false = jump_false + offset_adjust;
545
546        // Compare accumulator with scratch memory
547        let insts_before = insts;
548        insts += jump_if_scratch_is_equal(filter, *value, jump_true, jump_false);
549        assert_eq!(insts_before + INSTS_PER_ITER as u32, insts);
550    }
551    insts
552}
553
554/// Compare accumulator (32 bit) against given value
555fn jump_if_acc_is_equal(filter: &mut AllowList, value: u32, jump_true: u8, jump_false: u8) -> u32 {
556    filter.list.push(bpf_jump(
557        BPF_JMP | BPF_JEQ | BPF_K,
558        value,
559        jump_true,
560        jump_false,
561    ));
562    1
563}
564
565/// Jump if accumulator has no bits set outside the given mask
566fn jump_if_acc_matches_mask(
567    filter: &mut AllowList,
568    mask: u32,
569    jump_true: u8,
570    jump_false: u8,
571) -> u32 {
572    let mut insts = 0;
573    filter.list.push(bpf_and(!mask)); // Keep only non-masked ones
574    insts += 1;
575    insts += jump_if_acc_is_equal(filter, 0, jump_true, jump_false);
576    insts
577}
578
579/// Compare first two 32 bit registers of scratch memory with value
580fn jump_if_scratch_is_equal(
581    filter: &mut AllowList,
582    value: u64,
583    jump_true: u8,
584    jump_false: u8,
585) -> u32 {
586    // Compare high and low parts of scratch memory separately
587    let low: u32 = value as u32;
588    let high: u32 = (value >> 32) as u32;
589    let mut insts = 0;
590    insts += load_scratch_low_into_acc(filter);
591    insts += jump_if_acc_is_equal(filter, low, EVAL_NEXT, jump_false + 2);
592    insts += load_scratch_high_into_acc(filter);
593    insts += jump_if_acc_is_equal(filter, high, jump_true, jump_false);
594    insts
595}
596
597/// Match first two 32 bit registers of scratch memory against bitmask
598fn jump_if_scratch_matches_mask(
599    filter: &mut AllowList,
600    mask: u64,
601    jump_true: u8,
602    jump_false: u8,
603) -> u32 {
604    const INSTS_PER_CHECK: u8 = 3;
605
606    // Check high and low parts of scratch memory separately
607    let low: u32 = mask as u32;
608    let high: u32 = (mask >> 32) as u32;
609    let mut insts = 0;
610    let insts_before = insts;
611    insts += load_scratch_low_into_acc(filter);
612    insts += jump_if_acc_matches_mask(filter, low, EVAL_NEXT, jump_false + INSTS_PER_CHECK);
613    assert_eq!(insts_before + INSTS_PER_CHECK as u32, insts);
614    insts += load_scratch_high_into_acc(filter);
615    insts += jump_if_acc_matches_mask(filter, high, jump_true, jump_false);
616    assert_eq!(insts_before + 2 * INSTS_PER_CHECK as u32, insts);
617    insts
618}
619
620/// Add statement that causes the BPF program return and prohibit the syscall
621fn return_fail(filter: &mut AllowList, log_only: bool) -> u32 {
622    if log_only {
623        filter.list.push(bpf_ret(nix::libc::SECCOMP_RET_LOG));
624    } else {
625        filter
626            .list
627            .push(bpf_ret(nix::libc::SECCOMP_RET_KILL_PROCESS));
628    }
629    1
630}
631
632/// Add statement that causes the BPF program return and allow the syscall
633fn return_success(filter: &mut AllowList) -> u32 {
634    trace!("add_success");
635    filter.list.push(bpf_ret(nix::libc::SECCOMP_RET_ALLOW));
636    1
637}
638
639/// Negate accumulator
640fn _bpf_neg() -> SockFilter {
641    trace!("bpf_neg");
642    bpf_stmt(BPF_ALU | BPF_NEG, 0)
643}
644
645/// And accumulator with value
646fn bpf_and(k: u32) -> SockFilter {
647    trace!("bpf_and({})", k);
648    bpf_stmt(BPF_ALU | BPF_AND | BPF_K, k)
649}
650
651/// Or accumulator with value
652fn _bpf_or(k: u32) -> SockFilter {
653    trace!("bpf_or({})", k);
654    bpf_stmt(BPF_ALU | BPF_OR | BPF_K, k)
655}
656
657/// Add return clause (e.g. allow, kill, log)
658fn bpf_ret(k: u32) -> SockFilter {
659    trace!("bpf_ret({})", k);
660    bpf_stmt(BPF_RET | BPF_K, k)
661}
662
663// https://elixir.bootlin.com/linux/latest/source/include/uapi/linux/filter.h#L48
664fn bpf_stmt(code: u32, k: u32) -> SockFilter {
665    trace!("bpf_stmt({}, {})", code, k);
666    bpf_jump(code, k, 0, 0)
667}
668
669// https://elixir.bootlin.com/linux/latest/source/include/uapi/linux/filter.h#L51
670fn bpf_jump(code: u32, k: u32, jt: u8, jf: u8) -> SockFilter {
671    trace!("*bpf_jump({}, {}, {}, {})", code, k, jt, jf);
672    SockFilter {
673        code: code as u16,
674        k,
675        jt,
676        jf,
677    }
678}
679
680#[cfg(test)]
681#[allow(clippy::unwrap_used)]
682mod test {
683    use super::SockFilter;
684    use proptest::prelude::*;
685
686    proptest! {
687        #[test]
688        fn sock_filter_serialize_deserialize(a in 0..100, b in 0i32..10) {
689            let filter = SockFilter {
690                code: (a + b) as u16,
691                jt: a as u8,
692                jf: b as u8,
693                k: (a * b) as u32,
694            };
695            let serialized = serde_json::to_string(&filter).unwrap();
696            let deserialized: SockFilter = serde_json::from_str(&serialized).unwrap();
697            prop_assert_eq!(filter, deserialized);
698        }
699    }
700}