Skip to main content

nucleus/security/
seccomp_generate.rs

1//! Seccomp profile generator: create minimal profiles from trace data.
2//!
3//! Reads NDJSON trace files produced by `--seccomp-mode trace` and
4//! generates a minimal OCI-format seccomp profile containing only
5//! the syscalls actually used by the workload.
6
7use crate::error::{NucleusError, Result};
8use crate::security::seccomp_trace::TraceRecord;
9use serde::{Deserialize, Serialize};
10use std::collections::HashSet;
11use std::io::BufRead;
12use std::path::Path;
13use tracing::info;
14
15/// OCI-format seccomp profile (subset).
16#[derive(Debug, Clone, Serialize, Deserialize)]
17#[serde(rename_all = "camelCase")]
18pub struct SeccompProfile {
19    /// Default action for unlisted syscalls.
20    pub default_action: String,
21
22    /// Target architectures.
23    #[serde(default)]
24    pub architectures: Vec<String>,
25
26    /// Syscall groups with their action.
27    #[serde(default)]
28    pub syscalls: Vec<SeccompSyscallGroup>,
29}
30
31/// A group of syscalls sharing the same action.
32#[derive(Debug, Clone, Serialize, Deserialize)]
33pub struct SeccompSyscallGroup {
34    /// Syscall names.
35    pub names: Vec<String>,
36
37    /// Action: typically "SCMP_ACT_ALLOW".
38    pub action: String,
39
40    /// Optional argument filters (not generated, but preserved).
41    #[serde(default, skip_serializing_if = "Vec::is_empty")]
42    pub args: Vec<SeccompArgFilter>,
43}
44
45/// Argument-level filter for a syscall.
46#[derive(Debug, Clone, Serialize, Deserialize)]
47pub struct SeccompArgFilter {
48    /// Argument index (0-5).
49    pub index: u32,
50    /// Comparison operator.
51    pub op: String,
52    /// Comparison value.
53    pub value: u64,
54}
55
56/// Return the OCI seccomp architecture constant for the current target.
57///
58/// Detected at compile time via `cfg!(target_arch)` so generated profiles
59/// always match the binary's architecture.
60fn native_scmp_arch() -> &'static str {
61    if cfg!(target_arch = "x86_64") {
62        "SCMP_ARCH_X86_64"
63    } else if cfg!(target_arch = "aarch64") {
64        "SCMP_ARCH_AARCH64"
65    } else if cfg!(target_arch = "x86") {
66        "SCMP_ARCH_X86"
67    } else if cfg!(target_arch = "arm") {
68        "SCMP_ARCH_ARM"
69    } else if cfg!(target_arch = "riscv64") {
70        "SCMP_ARCH_RISCV64"
71    } else if cfg!(target_arch = "s390x") {
72        "SCMP_ARCH_S390X"
73    } else {
74        "SCMP_ARCH_NATIVE"
75    }
76}
77
78/// Generate a minimal seccomp profile from a trace file.
79///
80/// Reads NDJSON records, collects unique syscalls, and produces
81/// an OCI-format JSON profile that allows exactly those syscalls.
82pub fn generate_from_trace(trace_path: &Path) -> Result<SeccompProfile> {
83    let file = std::fs::File::open(trace_path).map_err(|e| {
84        NucleusError::ConfigError(format!("Failed to open trace file {:?}: {}", trace_path, e))
85    })?;
86
87    let reader = std::io::BufReader::new(file);
88    let mut syscall_set: HashSet<String> = HashSet::new();
89
90    for line in reader.lines() {
91        let line = line
92            .map_err(|e| NucleusError::ConfigError(format!("Failed to read trace line: {}", e)))?;
93
94        if line.trim().is_empty() {
95            continue;
96        }
97
98        let record: TraceRecord = serde_json::from_str(&line).map_err(|e| {
99            NucleusError::ConfigError(format!(
100                "Failed to parse trace record: {}: line='{}'",
101                e, line
102            ))
103        })?;
104
105        let name = record.name.unwrap_or_else(|| {
106            syscall_number_to_name(record.syscall)
107                .map(String::from)
108                .unwrap_or_else(|| format!("__NR_{}", record.syscall))
109        });
110
111        syscall_set.insert(name);
112    }
113
114    let mut syscall_names: Vec<String> = syscall_set.into_iter().collect();
115
116    syscall_names.sort();
117
118    info!(
119        "Generated seccomp profile with {} syscalls from {:?}",
120        syscall_names.len(),
121        trace_path
122    );
123
124    Ok(SeccompProfile {
125        default_action: "SCMP_ACT_KILL_PROCESS".to_string(),
126        architectures: vec![native_scmp_arch().to_string()],
127        syscalls: vec![SeccompSyscallGroup {
128            names: syscall_names,
129            action: "SCMP_ACT_ALLOW".to_string(),
130            args: vec![],
131        }],
132    })
133}
134
135/// Map a syscall number back to its name (inverse of syscall_name_to_number).
136pub fn syscall_number_to_name(nr: i64) -> Option<&'static str> {
137    SYSCALL_TABLE
138        .iter()
139        .find(|&&(_, n)| n == nr)
140        .map(|&(name, _)| name)
141}
142
143/// (name, number) pairs for all mapped syscalls.
144static SYSCALL_TABLE: &[(&str, i64)] = &[
145    ("read", libc::SYS_read),
146    ("write", libc::SYS_write),
147    ("open", libc::SYS_open),
148    ("openat", libc::SYS_openat),
149    ("close", libc::SYS_close),
150    ("stat", libc::SYS_stat),
151    ("fstat", libc::SYS_fstat),
152    ("lstat", libc::SYS_lstat),
153    ("lseek", libc::SYS_lseek),
154    ("access", libc::SYS_access),
155    ("fcntl", libc::SYS_fcntl),
156    ("readv", libc::SYS_readv),
157    ("writev", libc::SYS_writev),
158    ("pread64", libc::SYS_pread64),
159    ("pwrite64", libc::SYS_pwrite64),
160    ("readlink", libc::SYS_readlink),
161    ("readlinkat", libc::SYS_readlinkat),
162    ("newfstatat", libc::SYS_newfstatat),
163    ("statx", libc::SYS_statx),
164    ("faccessat", libc::SYS_faccessat),
165    ("faccessat2", libc::SYS_faccessat2),
166    ("dup", libc::SYS_dup),
167    ("dup2", libc::SYS_dup2),
168    ("dup3", libc::SYS_dup3),
169    ("pipe", libc::SYS_pipe),
170    ("pipe2", libc::SYS_pipe2),
171    ("unlink", libc::SYS_unlink),
172    ("unlinkat", libc::SYS_unlinkat),
173    ("rename", libc::SYS_rename),
174    ("renameat", libc::SYS_renameat),
175    ("renameat2", libc::SYS_renameat2),
176    ("link", libc::SYS_link),
177    ("linkat", libc::SYS_linkat),
178    ("symlink", libc::SYS_symlink),
179    ("symlinkat", libc::SYS_symlinkat),
180    ("chmod", libc::SYS_chmod),
181    ("fchmod", libc::SYS_fchmod),
182    ("fchmodat", libc::SYS_fchmodat),
183    ("truncate", libc::SYS_truncate),
184    ("ftruncate", libc::SYS_ftruncate),
185    ("fallocate", libc::SYS_fallocate),
186    ("fadvise64", libc::SYS_fadvise64),
187    ("fsync", libc::SYS_fsync),
188    ("fdatasync", libc::SYS_fdatasync),
189    ("flock", libc::SYS_flock),
190    ("sendfile", libc::SYS_sendfile),
191    ("copy_file_range", libc::SYS_copy_file_range),
192    ("splice", libc::SYS_splice),
193    ("tee", libc::SYS_tee),
194    ("mmap", libc::SYS_mmap),
195    ("munmap", libc::SYS_munmap),
196    ("mprotect", libc::SYS_mprotect),
197    ("brk", libc::SYS_brk),
198    ("mremap", libc::SYS_mremap),
199    ("madvise", libc::SYS_madvise),
200    ("msync", libc::SYS_msync),
201    ("mlock", libc::SYS_mlock),
202    ("munlock", libc::SYS_munlock),
203    ("fork", libc::SYS_fork),
204    ("clone", libc::SYS_clone),
205    ("clone3", libc::SYS_clone3),
206    ("execve", libc::SYS_execve),
207    ("execveat", libc::SYS_execveat),
208    ("wait4", libc::SYS_wait4),
209    ("waitid", libc::SYS_waitid),
210    ("exit", libc::SYS_exit),
211    ("exit_group", libc::SYS_exit_group),
212    ("getpid", libc::SYS_getpid),
213    ("gettid", libc::SYS_gettid),
214    ("getuid", libc::SYS_getuid),
215    ("getgid", libc::SYS_getgid),
216    ("geteuid", libc::SYS_geteuid),
217    ("getegid", libc::SYS_getegid),
218    ("getppid", libc::SYS_getppid),
219    ("getpgrp", libc::SYS_getpgrp),
220    ("setsid", libc::SYS_setsid),
221    ("getgroups", libc::SYS_getgroups),
222    ("rt_sigaction", libc::SYS_rt_sigaction),
223    ("rt_sigprocmask", libc::SYS_rt_sigprocmask),
224    ("rt_sigreturn", libc::SYS_rt_sigreturn),
225    ("rt_sigsuspend", libc::SYS_rt_sigsuspend),
226    ("sigaltstack", libc::SYS_sigaltstack),
227    ("kill", libc::SYS_kill),
228    ("tgkill", libc::SYS_tgkill),
229    ("clock_gettime", libc::SYS_clock_gettime),
230    ("clock_getres", libc::SYS_clock_getres),
231    ("clock_nanosleep", libc::SYS_clock_nanosleep),
232    ("gettimeofday", libc::SYS_gettimeofday),
233    ("nanosleep", libc::SYS_nanosleep),
234    ("getcwd", libc::SYS_getcwd),
235    ("chdir", libc::SYS_chdir),
236    ("fchdir", libc::SYS_fchdir),
237    ("mkdir", libc::SYS_mkdir),
238    ("mkdirat", libc::SYS_mkdirat),
239    ("rmdir", libc::SYS_rmdir),
240    ("getdents", libc::SYS_getdents),
241    ("getdents64", libc::SYS_getdents64),
242    ("socket", libc::SYS_socket),
243    ("connect", libc::SYS_connect),
244    ("sendto", libc::SYS_sendto),
245    ("recvfrom", libc::SYS_recvfrom),
246    ("sendmsg", libc::SYS_sendmsg),
247    ("recvmsg", libc::SYS_recvmsg),
248    ("shutdown", libc::SYS_shutdown),
249    ("bind", libc::SYS_bind),
250    ("listen", libc::SYS_listen),
251    ("accept", libc::SYS_accept),
252    ("accept4", libc::SYS_accept4),
253    ("setsockopt", libc::SYS_setsockopt),
254    ("getsockopt", libc::SYS_getsockopt),
255    ("getsockname", libc::SYS_getsockname),
256    ("getpeername", libc::SYS_getpeername),
257    ("socketpair", libc::SYS_socketpair),
258    ("poll", libc::SYS_poll),
259    ("ppoll", libc::SYS_ppoll),
260    ("select", libc::SYS_select),
261    ("pselect6", libc::SYS_pselect6),
262    ("epoll_create", libc::SYS_epoll_create),
263    ("epoll_create1", libc::SYS_epoll_create1),
264    ("epoll_ctl", libc::SYS_epoll_ctl),
265    ("epoll_wait", libc::SYS_epoll_wait),
266    ("epoll_pwait", libc::SYS_epoll_pwait),
267    ("eventfd", libc::SYS_eventfd),
268    ("eventfd2", libc::SYS_eventfd2),
269    ("signalfd", libc::SYS_signalfd),
270    ("signalfd4", libc::SYS_signalfd4),
271    ("timerfd_create", libc::SYS_timerfd_create),
272    ("timerfd_settime", libc::SYS_timerfd_settime),
273    ("timerfd_gettime", libc::SYS_timerfd_gettime),
274    ("uname", libc::SYS_uname),
275    ("getrandom", libc::SYS_getrandom),
276    ("futex", libc::SYS_futex),
277    ("set_tid_address", libc::SYS_set_tid_address),
278    ("set_robust_list", libc::SYS_set_robust_list),
279    ("get_robust_list", libc::SYS_get_robust_list),
280    ("arch_prctl", libc::SYS_arch_prctl),
281    ("sysinfo", libc::SYS_sysinfo),
282    ("umask", libc::SYS_umask),
283    ("getrlimit", libc::SYS_getrlimit),
284    ("prlimit64", libc::SYS_prlimit64),
285    ("getrusage", libc::SYS_getrusage),
286    ("times", libc::SYS_times),
287    ("sched_yield", libc::SYS_sched_yield),
288    ("sched_getaffinity", libc::SYS_sched_getaffinity),
289    ("getcpu", libc::SYS_getcpu),
290    ("rseq", libc::SYS_rseq),
291    ("close_range", libc::SYS_close_range),
292    ("memfd_create", libc::SYS_memfd_create),
293    ("ioctl", libc::SYS_ioctl),
294    ("prctl", libc::SYS_prctl),
295    ("landlock_create_ruleset", libc::SYS_landlock_create_ruleset),
296    ("landlock_add_rule", libc::SYS_landlock_add_rule),
297    ("landlock_restrict_self", libc::SYS_landlock_restrict_self),
298];
299
300#[cfg(test)]
301mod tests {
302    use super::*;
303
304    #[test]
305    fn test_syscall_number_to_name() {
306        assert_eq!(syscall_number_to_name(libc::SYS_read), Some("read"));
307        assert_eq!(syscall_number_to_name(libc::SYS_write), Some("write"));
308        assert_eq!(syscall_number_to_name(libc::SYS_openat), Some("openat"));
309        assert_eq!(syscall_number_to_name(99999), None);
310    }
311
312    #[test]
313    fn test_generate_from_trace() {
314        let dir = tempfile::tempdir().unwrap();
315        let trace_path = dir.path().join("trace.ndjson");
316
317        // Write test trace data
318        std::fs::write(
319            &trace_path,
320            r#"{"syscall":0,"name":"read","count":10}
321{"syscall":1,"name":"write","count":5}
322{"syscall":257,"name":"openat","count":3}
323"#,
324        )
325        .unwrap();
326
327        let profile = generate_from_trace(&trace_path).unwrap();
328        assert_eq!(profile.default_action, "SCMP_ACT_KILL_PROCESS");
329        assert_eq!(profile.syscalls.len(), 1);
330
331        let names = &profile.syscalls[0].names;
332        assert_eq!(names.len(), 3);
333        assert!(names.contains(&"read".to_string()));
334        assert!(names.contains(&"write".to_string()));
335        assert!(names.contains(&"openat".to_string()));
336    }
337
338    #[test]
339    fn test_profile_serialization() {
340        let profile = SeccompProfile {
341            default_action: "SCMP_ACT_KILL_PROCESS".to_string(),
342            architectures: vec!["SCMP_ARCH_X86_64".to_string()],
343            syscalls: vec![SeccompSyscallGroup {
344                names: vec!["read".to_string(), "write".to_string()],
345                action: "SCMP_ACT_ALLOW".to_string(),
346                args: vec![],
347            }],
348        };
349
350        let json = serde_json::to_string_pretty(&profile).unwrap();
351        assert!(json.contains("\"defaultAction\""));
352        assert!(json.contains("SCMP_ACT_KILL_PROCESS"));
353        assert!(json.contains("\"read\""));
354
355        // Roundtrip
356        let parsed: SeccompProfile = serde_json::from_str(&json).unwrap();
357        assert_eq!(parsed.syscalls[0].names.len(), 2);
358    }
359
360    #[test]
361    fn test_native_scmp_arch_matches_target() {
362        let arch = native_scmp_arch();
363        #[cfg(target_arch = "x86_64")]
364        assert_eq!(arch, "SCMP_ARCH_X86_64");
365        #[cfg(target_arch = "aarch64")]
366        assert_eq!(arch, "SCMP_ARCH_AARCH64");
367        // Always starts with SCMP_ARCH_
368        assert!(arch.starts_with("SCMP_ARCH_"));
369    }
370
371    #[test]
372    fn test_generated_profile_uses_native_arch() {
373        let dir = tempfile::tempdir().unwrap();
374        let trace_path = dir.path().join("trace.ndjson");
375        std::fs::write(
376            &trace_path,
377            r#"{"syscall":0,"name":"read","count":1}
378"#,
379        )
380        .unwrap();
381
382        let profile = generate_from_trace(&trace_path).unwrap();
383        assert_eq!(profile.architectures.len(), 1);
384        assert_eq!(profile.architectures[0], native_scmp_arch());
385    }
386}