1use std::collections::HashSet;
2use std::num::TryFromIntError;
3use std::os::unix::io;
4
5use libseccomp::{
6 ScmpAction, ScmpArch, ScmpArgCompare, ScmpCompareOp, ScmpFilterContext, ScmpSyscall,
7};
8use oci_spec::runtime::{
9 Arch, LinuxSeccomp, LinuxSeccompAction, LinuxSeccompFilterFlag, LinuxSeccompOperator,
10};
11
12#[derive(Debug, thiserror::Error)]
13pub enum SeccompError {
14 #[error("failed to translate trace action due to failed to convert errno {errno} into i16")]
15 TraceAction { source: TryFromIntError, errno: i32 },
16 #[error("SCMP_ACT_NOTIFY cannot be used as default action")]
17 NotifyAsDefaultAction,
18 #[error("SCMP_ACT_NOTIFY cannot be used for the write syscall")]
19 NotifyWriteSyscall,
20 #[error("failed to add arch to seccomp")]
21 AddArch {
22 source: libseccomp::error::SeccompError,
23 arch: Arch,
24 },
25 #[error("failed to load seccomp context")]
26 LoadContext {
27 source: libseccomp::error::SeccompError,
28 },
29 #[error("failed to get seccomp notify id")]
30 GetNotifyId {
31 source: libseccomp::error::SeccompError,
32 },
33 #[error("failed to add rule to seccomp")]
34 AddRule {
35 source: libseccomp::error::SeccompError,
36 },
37 #[error("failed to create new seccomp filter")]
38 NewFilter {
39 source: libseccomp::error::SeccompError,
40 default: LinuxSeccompAction,
41 },
42 #[error("failed to set filter flag")]
43 SetFilterFlag {
44 source: libseccomp::error::SeccompError,
45 flag: LinuxSeccompFilterFlag,
46 },
47 #[error("failed to set SCMP_FLTATR_CTL_NNP")]
48 SetCtlNnp {
49 source: libseccomp::error::SeccompError,
50 },
51}
52
53type Result<T> = std::result::Result<T, SeccompError>;
54
55fn translate_arch(arch: Arch) -> ScmpArch {
56 match arch {
57 Arch::ScmpArchNative => ScmpArch::Native,
58 Arch::ScmpArchX86 => ScmpArch::X86,
59 Arch::ScmpArchX86_64 => ScmpArch::X8664,
60 Arch::ScmpArchX32 => ScmpArch::X32,
61 Arch::ScmpArchArm => ScmpArch::Arm,
62 Arch::ScmpArchAarch64 => ScmpArch::Aarch64,
63 Arch::ScmpArchMips => ScmpArch::Mips,
64 Arch::ScmpArchMips64 => ScmpArch::Mips64,
65 Arch::ScmpArchMips64n32 => ScmpArch::Mips64N32,
66 Arch::ScmpArchMipsel => ScmpArch::Mipsel,
67 Arch::ScmpArchMipsel64 => ScmpArch::Mipsel64,
68 Arch::ScmpArchMipsel64n32 => ScmpArch::Mipsel64N32,
69 Arch::ScmpArchPpc => ScmpArch::Ppc,
70 Arch::ScmpArchPpc64 => ScmpArch::Ppc64,
71 Arch::ScmpArchPpc64le => ScmpArch::Ppc64Le,
72 Arch::ScmpArchS390 => ScmpArch::S390,
73 Arch::ScmpArchS390x => ScmpArch::S390X,
74 Arch::ScmpArchRiscv64 => ScmpArch::Riscv64,
75 }
76}
77
78fn translate_action(action: LinuxSeccompAction, errno: Option<u32>) -> Result<ScmpAction> {
79 tracing::trace!(?action, ?errno, "translating action");
80 let errno = errno.map(|e| e as i32).unwrap_or(libc::EPERM);
81 let action = match action {
82 LinuxSeccompAction::ScmpActKill => ScmpAction::KillThread,
83 LinuxSeccompAction::ScmpActTrap => ScmpAction::Trap,
84 LinuxSeccompAction::ScmpActErrno => ScmpAction::Errno(errno),
85 LinuxSeccompAction::ScmpActTrace => ScmpAction::Trace(
86 errno
87 .try_into()
88 .map_err(|err| SeccompError::TraceAction { source: err, errno })?,
89 ),
90 LinuxSeccompAction::ScmpActAllow => ScmpAction::Allow,
91 LinuxSeccompAction::ScmpActKillProcess => ScmpAction::KillProcess,
92 LinuxSeccompAction::ScmpActNotify => ScmpAction::Notify,
93 LinuxSeccompAction::ScmpActLog => ScmpAction::Log,
94 LinuxSeccompAction::ScmpActKillThread => ScmpAction::KillThread,
95 };
96
97 tracing::trace!(?action, "translated action");
98 Ok(action)
99}
100
101fn translate_op(op: LinuxSeccompOperator, datum_b: Option<u64>) -> ScmpCompareOp {
102 match op {
103 LinuxSeccompOperator::ScmpCmpNe => ScmpCompareOp::NotEqual,
104 LinuxSeccompOperator::ScmpCmpLt => ScmpCompareOp::Less,
105 LinuxSeccompOperator::ScmpCmpLe => ScmpCompareOp::LessOrEqual,
106 LinuxSeccompOperator::ScmpCmpEq => ScmpCompareOp::Equal,
107 LinuxSeccompOperator::ScmpCmpGe => ScmpCompareOp::GreaterEqual,
108 LinuxSeccompOperator::ScmpCmpGt => ScmpCompareOp::Greater,
109 LinuxSeccompOperator::ScmpCmpMaskedEq => ScmpCompareOp::MaskedEqual(datum_b.unwrap_or(0)),
110 }
111}
112
113fn check_seccomp(seccomp: &LinuxSeccomp) -> Result<()> {
114 if seccomp.default_action() == LinuxSeccompAction::ScmpActNotify {
125 return Err(SeccompError::NotifyAsDefaultAction);
126 }
127
128 if let Some(syscalls) = seccomp.syscalls() {
129 for syscall in syscalls {
130 if syscall.action() == LinuxSeccompAction::ScmpActNotify {
131 for name in syscall.names() {
132 if name == "write" {
133 return Err(SeccompError::NotifyWriteSyscall);
134 }
135 }
136 }
137 }
138 }
139
140 Ok(())
141}
142
143#[tracing::instrument(level = "trace", skip(seccomp))]
144pub fn initialize_seccomp(seccomp: &LinuxSeccomp) -> Result<Option<io::RawFd>> {
145 check_seccomp(seccomp)?;
146
147 tracing::trace!(default_action = ?seccomp.default_action(), errno = ?seccomp.default_errno_ret(), "initializing seccomp");
148 let default_action = translate_action(seccomp.default_action(), seccomp.default_errno_ret())?;
149 let mut ctx =
150 ScmpFilterContext::new(default_action).map_err(|err| SeccompError::NewFilter {
151 source: err,
152 default: seccomp.default_action(),
153 })?;
154
155 if let Some(flags) = seccomp.flags() {
156 for flag in flags {
157 match flag {
158 LinuxSeccompFilterFlag::SeccompFilterFlagLog => ctx.set_ctl_log(true),
159 LinuxSeccompFilterFlag::SeccompFilterFlagTsync => ctx.set_ctl_tsync(true),
160 LinuxSeccompFilterFlag::SeccompFilterFlagSpecAllow => ctx.set_ctl_ssb(true),
161 LinuxSeccompFilterFlag::SeccompFilterFlagWaitKillableRecv => {
162 ctx.set_ctl_waitkill(true)
163 }
164 }
165 .map_err(|err| SeccompError::SetFilterFlag {
166 source: err,
167 flag: *flag,
168 })?;
169 }
170 }
171
172 if let Some(architectures) = seccomp.architectures() {
173 for &arch in architectures {
174 tracing::trace!(?arch, "adding architecture");
175 ctx.add_arch(translate_arch(arch))
176 .map_err(|err| SeccompError::AddArch { source: err, arch })?;
177 }
178 }
179
180 ctx.set_ctl_nnp(false)
188 .map_err(|err| SeccompError::SetCtlNnp { source: err })?;
189
190 if let Some(syscalls) = seccomp.syscalls() {
191 for syscall in syscalls {
192 let action = translate_action(syscall.action(), syscall.errno_ret())?;
193 if action == default_action {
194 tracing::warn!(
197 "detect a seccomp action that is the same as the default action: {:?}",
198 syscall
199 );
200 continue;
201 }
202
203 for name in syscall.names() {
204 let sc = match ScmpSyscall::from_name(name) {
205 Ok(x) => x,
206 Err(_) => {
207 tracing::warn!(
210 "failed to resolve syscall, likely kernel doesn't support this. {:?}",
211 name
212 );
213 continue;
214 }
215 };
216 match syscall.args() {
217 Some(args) => {
226 let mut comparators = Vec::<ScmpArgCompare>::with_capacity(args.len());
227 let mut seen = HashSet::new();
228 let mut has_duplicate_index = false;
229
230 for arg in args {
231 let index = arg.index() as u32;
232 let comparator = ScmpArgCompare::new(
233 index,
234 translate_op(arg.op(), arg.value_two()),
235 arg.value(),
236 );
237 if !seen.insert(index) {
238 has_duplicate_index = true;
239 }
240 comparators.push(comparator);
241 }
242
243 if has_duplicate_index {
244 for comparator in &comparators {
245 tracing::trace!(
246 ?name,
247 ?action,
248 ?comparator,
249 "add seccomp conditional rule separately"
250 );
251 ctx.add_rule_conditional(action, sc, std::slice::from_ref(comparator))
252 .map_err(|err| {
253 tracing::error!(
254 "failed to add seccomp action: {:?}. Comparator: {:?} Syscall: {name}",
255 &action,
256 comparator,
257 );
258 SeccompError::AddRule { source: err }
259 })?;
260 }
261 } else {
262 tracing::trace!(
263 ?name,
264 ?action,
265 ?comparators,
266 "add seccomp conditional rule"
267 );
268 ctx.add_rule_conditional(action, sc, &comparators)
269 .map_err(|err| {
270 tracing::error!(
271 "failed to add seccomp action: {:?}. Comparators: {:?} Syscall: {name}",
272 &action,
273 comparators,
274 );
275 SeccompError::AddRule { source: err }
276 })?;
277 }
278 }
279 None => {
280 tracing::trace!(?name, ?action, "add seccomp rule");
281 ctx.add_rule(action, sc).map_err(|err| {
282 tracing::error!(
283 "failed to add seccomp rule: {:?}. Syscall: {name}",
284 &sc
285 );
286 SeccompError::AddRule { source: err }
287 })?;
288 }
289 }
290 }
291 }
292 }
293
294 ctx.load()
299 .map_err(|err| SeccompError::LoadContext { source: err })?;
300
301 let fd = if is_notify(seccomp) {
302 Some(
303 ctx.get_notify_fd()
304 .map_err(|err| SeccompError::GetNotifyId { source: err })?,
305 )
306 } else {
307 None
308 };
309
310 Ok(fd)
311}
312
313pub fn is_notify(seccomp: &LinuxSeccomp) -> bool {
314 seccomp
315 .syscalls()
316 .iter()
317 .flatten()
318 .any(|syscall| syscall.action() == LinuxSeccompAction::ScmpActNotify)
319}
320
321#[cfg(test)]
322mod tests {
323 use std::path;
324
325 use anyhow::{Context, Result};
326 use oci_spec::runtime::{
327 Arch, LinuxSeccompArgBuilder, LinuxSeccompBuilder, LinuxSyscallBuilder,
328 };
329 use serial_test::serial;
330
331 use super::*;
332 use crate::test_utils::{self, TestCallbackError};
333
334 #[test]
335 #[serial]
336 fn test_basic() -> Result<()> {
337 let expect_error = libc::EAGAIN;
348
349 let syscall = LinuxSyscallBuilder::default()
350 .names(vec![String::from("getcwd")])
351 .action(LinuxSeccompAction::ScmpActErrno)
352 .errno_ret(expect_error as u32)
353 .build()?;
354 let seccomp_profile = LinuxSeccompBuilder::default()
355 .default_action(LinuxSeccompAction::ScmpActAllow)
356 .architectures(vec![Arch::ScmpArchNative])
357 .syscalls(vec![syscall])
358 .build()?;
359
360 test_utils::test_in_child_process(|| {
361 let _ = prctl::set_no_new_privileges(true);
362 initialize_seccomp(&seccomp_profile).expect("failed to initialize seccomp");
363 let ret = nix::unistd::getcwd();
364 if ret.is_ok() {
365 Err(TestCallbackError::Custom(
366 "getcwd didn't error out as seccomp profile specified".to_string(),
367 ))?;
368 }
369
370 if let Some(errno) = ret.err() {
371 if errno != nix::errno::Errno::from_raw(expect_error) {
372 Err(TestCallbackError::Custom(format!(
373 "getcwd failed but we didn't get the expected error from seccomp profile: {}",
374 errno
375 )))?;
376 }
377 }
378
379 Ok(())
380 })?;
381
382 Ok(())
383 }
384
385 #[test]
386 #[serial]
387 fn test_moby() -> Result<()> {
388 let fixture_path =
389 path::PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("src/seccomp/fixture/config.json");
390 let spec = oci_spec::runtime::Spec::load(fixture_path)
391 .context("Failed to load test spec for seccomp")?;
392
393 let seccomp_profile = spec.linux().as_ref().unwrap().seccomp().as_ref().unwrap();
395 test_utils::test_in_child_process(|| {
396 let _ = prctl::set_no_new_privileges(true);
397 initialize_seccomp(seccomp_profile).expect("failed to initialize seccomp");
398
399 Ok(())
400 })?;
401
402 Ok(())
403 }
404
405 #[test]
406 #[serial]
407 fn test_seccomp_notify() -> Result<()> {
408 let syscall = LinuxSyscallBuilder::default()
409 .names(vec![String::from("getcwd")])
410 .action(LinuxSeccompAction::ScmpActNotify)
411 .build()?;
412 let seccomp_profile = LinuxSeccompBuilder::default()
413 .default_action(LinuxSeccompAction::ScmpActAllow)
414 .architectures(vec![Arch::ScmpArchNative])
415 .syscalls(vec![syscall])
416 .build()?;
417 test_utils::test_in_child_process(|| {
418 let _ = prctl::set_no_new_privileges(true);
419 let fd =
420 initialize_seccomp(&seccomp_profile).expect("failed to initialize seccomp profile");
421 if fd.is_none() {
422 Err(TestCallbackError::Custom(
423 "failed to get a seccomp notify fd with notify seccomp profile".to_string(),
424 ))?;
425 }
426
427 Ok(())
428 })?;
429
430 Ok(())
431 }
432
433 #[test]
434 #[serial]
435 fn test_seccomp_conditional_rule_multiple_distinct_args() -> Result<()> {
436 let syscall = LinuxSyscallBuilder::default()
437 .names(vec![String::from("socket")])
438 .action(LinuxSeccompAction::ScmpActErrno)
439 .errno_ret(libc::EAGAIN as u32)
440 .args(vec![
441 LinuxSeccompArgBuilder::default()
442 .index(0_usize)
443 .value(libc::AF_INET as u64)
444 .op(LinuxSeccompOperator::ScmpCmpEq)
445 .build()?,
446 LinuxSeccompArgBuilder::default()
447 .index(1_usize)
448 .value(libc::SOCK_STREAM as u64)
449 .op(LinuxSeccompOperator::ScmpCmpEq)
450 .build()?,
451 ])
452 .build()?;
453
454 let seccomp_profile = LinuxSeccompBuilder::default()
455 .default_action(LinuxSeccompAction::ScmpActAllow)
456 .architectures(vec![Arch::ScmpArchNative])
457 .syscalls(vec![syscall])
458 .build()?;
459
460 test_utils::test_in_child_process(|| {
461 let _ = prctl::set_no_new_privileges(true);
462 initialize_seccomp(&seccomp_profile).expect("failed to initialize seccomp");
463 Ok(())
464 })?;
465
466 Ok(())
467 }
468
469 #[test]
470 #[serial]
471 fn test_seccomp_conditional_rule_duplicate_arg_index() -> Result<()> {
472 let syscall = LinuxSyscallBuilder::default()
473 .names(vec![String::from("socket")])
474 .action(LinuxSeccompAction::ScmpActErrno)
475 .errno_ret(libc::EAGAIN as u32)
476 .args(vec![
477 LinuxSeccompArgBuilder::default()
478 .index(0_usize)
479 .value(libc::AF_INET as u64)
480 .op(LinuxSeccompOperator::ScmpCmpEq)
481 .build()?,
482 LinuxSeccompArgBuilder::default()
483 .index(0_usize)
484 .value(libc::AF_UNIX as u64)
485 .op(LinuxSeccompOperator::ScmpCmpNe)
486 .build()?,
487 ])
488 .build()?;
489
490 let seccomp_profile = LinuxSeccompBuilder::default()
491 .default_action(LinuxSeccompAction::ScmpActAllow)
492 .architectures(vec![Arch::ScmpArchNative])
493 .syscalls(vec![syscall])
494 .build()?;
495
496 test_utils::test_in_child_process(|| {
497 let _ = prctl::set_no_new_privileges(true);
498 initialize_seccomp(&seccomp_profile).expect("failed to initialize seccomp");
499 Ok(())
500 })?;
501
502 Ok(())
503 }
504
505 #[test]
506 #[serial]
507 fn test_seccomp_multiple_syscall_entries_for_same_name() -> Result<()> {
508 let rule1 = LinuxSyscallBuilder::default()
509 .names(vec!["socket".into()])
510 .action(LinuxSeccompAction::ScmpActErrno)
511 .errno_ret(libc::EAGAIN as u32)
512 .args(vec![
513 LinuxSeccompArgBuilder::default()
514 .index(0_usize)
515 .value(libc::AF_NETLINK as u64)
516 .op(LinuxSeccompOperator::ScmpCmpEq)
517 .build()?,
518 LinuxSeccompArgBuilder::default()
519 .index(2_usize)
520 .value(libc::NETLINK_AUDIT as u64)
521 .op(LinuxSeccompOperator::ScmpCmpNe)
522 .build()?,
523 ])
524 .build()?;
525
526 let rule2 = LinuxSyscallBuilder::default()
527 .names(vec!["socket".into()])
528 .action(LinuxSeccompAction::ScmpActErrno)
529 .errno_ret(libc::EAGAIN as u32)
530 .args(vec![
531 LinuxSeccompArgBuilder::default()
532 .index(0_usize)
533 .value(libc::AF_INET as u64)
534 .op(LinuxSeccompOperator::ScmpCmpNe)
535 .build()?,
536 ])
537 .build()?;
538
539 let profile = LinuxSeccompBuilder::default()
540 .default_action(LinuxSeccompAction::ScmpActAllow)
541 .architectures(vec![Arch::ScmpArchNative])
542 .syscalls(vec![rule1, rule2])
543 .build()?;
544
545 test_utils::test_in_child_process(|| {
546 let _ = prctl::set_no_new_privileges(true);
547 initialize_seccomp(&profile).expect("failed to initialize seccomp");
548
549 Ok(())
550 })?;
551
552 Ok(())
553 }
554}