Skip to main content

memf_linux/
seccomp.rs

1//! Linux seccomp-BPF filter analysis for container security forensics.
2//!
3//! Seccomp (secure computing) profiles restrict the syscalls available to
4//! a process. In containerized environments, seccomp-BPF filters are the
5//! primary syscall-level sandbox. Analyzing these from memory helps detect
6//! container escape attempts -- processes running with no seccomp filter
7//! (unconfined) inside a container are highly suspicious.
8//!
9//! The kernel stores seccomp state in `task_struct.seccomp`:
10//! - `seccomp.mode`: 0 = disabled, 1 = strict, 2 = filter
11//! - `seccomp.filter`: pointer to a chain of `seccomp_filter` structs
12//!   linked via `seccomp_filter.prev`.
13
14use memf_core::object_reader::ObjectReader;
15use memf_format::PhysicalMemoryProvider;
16
17use crate::{ProcessInfo, Result};
18
19/// Seccomp profile information extracted from a process's `task_struct`.
20#[derive(Debug, Clone, serde::Serialize)]
21pub struct SeccompInfo {
22    /// Process ID.
23    pub pid: u64,
24    /// Process command name.
25    pub comm: String,
26    /// Seccomp mode: 0 = disabled, 1 = strict, 2 = filter.
27    pub seccomp_mode: u8,
28    /// Number of chained seccomp-BPF filters.
29    pub filter_count: u32,
30    /// True if the process has no seccomp enforcement at all.
31    /// Suspicious for containerized workloads.
32    pub is_unconfined: bool,
33}
34
35/// Maximum number of chained filters to follow (cycle protection).
36const MAX_FILTER_CHAIN: usize = 256;
37
38/// Walk seccomp profile information for each process in the provided list.
39///
40/// For each process, reads `task_struct.seccomp.mode` to determine the
41/// seccomp enforcement level. When mode is 2 (filter), follows the
42/// `seccomp.filter` pointer chain (`seccomp_filter.prev`) to count the
43/// number of stacked BPF filters.
44///
45/// Returns `Ok(Vec::new())` if the required struct/field symbols are
46/// missing from the profile (e.g., older kernel without seccomp support).
47pub fn walk_seccomp_profiles<P: PhysicalMemoryProvider>(
48    reader: &ObjectReader<P>,
49    processes: &[ProcessInfo],
50) -> Result<Vec<SeccompInfo>> {
51    if processes.is_empty() {
52        return Ok(Vec::new());
53    }
54
55    // Verify the required struct fields exist in the symbol table.
56    // If seccomp fields are absent, the kernel may not have seccomp support.
57    let seccomp_offset = match reader.symbols().field_offset("task_struct", "seccomp") {
58        Some(off) => off,
59        None => return Ok(Vec::new()),
60    };
61
62    // Verify the mode field exists; we don't use the offset directly since
63    // read_field resolves it, but its absence means no seccomp support.
64    if reader.symbols().field_offset("seccomp", "mode").is_none() {
65        return Ok(Vec::new());
66    }
67
68    let filter_field_offset = reader.symbols().field_offset("seccomp", "filter");
69    let prev_field_offset = reader.symbols().field_offset("seccomp_filter", "prev");
70
71    let mut results = Vec::with_capacity(processes.len());
72
73    for proc in processes {
74        let seccomp_base = proc.vaddr + seccomp_offset;
75
76        // Read seccomp.mode (stored as int, we read 4 bytes).
77        let mode_raw: u32 = reader
78            .read_field(seccomp_base, "seccomp", "mode")
79            .unwrap_or(0);
80        let seccomp_mode = mode_raw as u8;
81
82        // Count filters in the chain if mode == 2 (filter) and symbols exist.
83        let filter_count = if seccomp_mode == 2 {
84            if let (Some(_filter_off), Some(_prev_off)) = (filter_field_offset, prev_field_offset) {
85                let filter_ptr: u64 = reader
86                    .read_field(seccomp_base, "seccomp", "filter")
87                    .unwrap_or(0);
88                count_filter_chain(reader, filter_ptr)
89            } else {
90                // We know mode is filter but can't walk the chain.
91                0
92            }
93        } else {
94            0
95        };
96
97        let is_unconfined = seccomp_mode == 0;
98
99        results.push(SeccompInfo {
100            pid: proc.pid,
101            comm: proc.comm.clone(),
102            seccomp_mode,
103            filter_count,
104            is_unconfined,
105        });
106    }
107
108    Ok(results)
109}
110
111/// Walk the `seccomp_filter.prev` linked list to count chained filters.
112fn count_filter_chain<P: PhysicalMemoryProvider>(
113    reader: &ObjectReader<P>,
114    first_filter: u64,
115) -> u32 {
116    if first_filter == 0 {
117        return 0;
118    }
119
120    let mut count: u32 = 0;
121    let mut current = first_filter;
122
123    for _ in 0..MAX_FILTER_CHAIN {
124        if current == 0 {
125            break;
126        }
127        count += 1;
128
129        // Read the `prev` pointer to follow the chain.
130        let prev: u64 = reader
131            .read_field(current, "seccomp_filter", "prev")
132            .unwrap_or(0);
133        current = prev;
134    }
135
136    count
137}
138
139#[cfg(test)]
140mod tests {
141    use super::*;
142    use memf_core::test_builders::{flags, PageTableBuilder};
143    use memf_core::vas::{TranslationMode, VirtualAddressSpace};
144    use memf_symbols::isf::IsfResolver;
145    use memf_symbols::test_builders::IsfBuilder;
146
147    /// Helper: build an ObjectReader from an IsfBuilder and PageTableBuilder.
148    fn make_reader(
149        isf: &IsfBuilder,
150        ptb: PageTableBuilder,
151    ) -> ObjectReader<memf_core::test_builders::SyntheticPhysMem> {
152        let json = isf.build_json();
153        let resolver = IsfResolver::from_value(&json).unwrap();
154        let (cr3, mem) = ptb.build();
155        let vas = VirtualAddressSpace::new(mem, cr3, TranslationMode::X86_64FourLevel);
156        ObjectReader::new(vas, Box::new(resolver))
157    }
158
159    /// Helper: build a minimal ProcessInfo for testing.
160    fn fake_process(pid: u64, comm: &str, vaddr: u64) -> ProcessInfo {
161        ProcessInfo {
162            pid,
163            ppid: 1,
164            comm: comm.to_string(),
165            state: crate::types::ProcessState::Running,
166            vaddr,
167            cr3: None,
168            start_time: 0,
169        }
170    }
171
172    #[test]
173    fn walk_seccomp_empty() {
174        // Empty process list should return empty Vec.
175        let isf = IsfBuilder::new();
176        let ptb = PageTableBuilder::new();
177        let reader = make_reader(&isf, ptb);
178
179        let result = walk_seccomp_profiles(&reader, &[]).unwrap();
180        assert!(
181            result.is_empty(),
182            "expected empty vec for empty process list"
183        );
184    }
185
186    #[test]
187    fn seccomp_mode_disabled() {
188        // A process with seccomp mode 0 should be flagged as unconfined.
189        let task_vaddr: u64 = 0xFFFF_8000_0010_0000;
190        let task_paddr: u64 = 0x0080_0000;
191        // seccomp struct offset within task_struct: we define it at offset 2048
192        // seccomp.mode is an int at offset 0 within seccomp struct
193        let seccomp_offset: u64 = 2048;
194
195        let isf = IsfBuilder::new()
196            .add_struct("task_struct", 9024)
197            .add_field("task_struct", "seccomp", seccomp_offset, "seccomp")
198            .add_struct("seccomp", 16)
199            .add_field("seccomp", "mode", 0, "int")
200            .add_field("seccomp", "filter", 8, "pointer");
201
202        let ptb = PageTableBuilder::new()
203            .map_4k(task_vaddr, task_paddr, flags::WRITABLE)
204            // Write seccomp.mode = 0 (disabled) at task_struct + seccomp_offset + 0
205            .write_phys_u64(task_paddr + seccomp_offset, 0u64);
206
207        let reader = make_reader(&isf, ptb);
208        let procs = vec![fake_process(100, "nginx", task_vaddr)];
209
210        let result = walk_seccomp_profiles(&reader, &procs).unwrap();
211        assert_eq!(result.len(), 1);
212        assert_eq!(result[0].pid, 100);
213        assert_eq!(result[0].seccomp_mode, 0);
214        assert!(result[0].is_unconfined, "mode 0 should be unconfined");
215        assert_eq!(result[0].filter_count, 0);
216    }
217
218    #[test]
219    fn seccomp_mode_filter() {
220        // A process with seccomp mode 2 (filter) and one filter in the chain.
221        let task_vaddr: u64 = 0xFFFF_8000_0020_0000;
222        let task_paddr: u64 = 0x0040_0000;
223        let filter_vaddr: u64 = 0xFFFF_8000_0030_0000;
224        let filter_paddr: u64 = 0x0060_0000;
225        let seccomp_offset: u64 = 2048;
226
227        let isf = IsfBuilder::new()
228            .add_struct("task_struct", 9024)
229            .add_field("task_struct", "seccomp", seccomp_offset, "seccomp")
230            .add_struct("seccomp", 16)
231            .add_field("seccomp", "mode", 0, "int")
232            .add_field("seccomp", "filter", 8, "pointer")
233            .add_struct("seccomp_filter", 16)
234            .add_field("seccomp_filter", "prev", 0, "pointer");
235
236        let ptb = PageTableBuilder::new()
237            .map_4k(task_vaddr, task_paddr, flags::WRITABLE)
238            .map_4k(filter_vaddr, filter_paddr, flags::WRITABLE)
239            // seccomp.mode = 2 (filter)
240            .write_phys_u64(task_paddr + seccomp_offset, 2u64)
241            // seccomp.filter = pointer to filter struct
242            .write_phys_u64(task_paddr + seccomp_offset + 8, filter_vaddr)
243            // filter.prev = 0 (null, end of chain — single filter)
244            .write_phys_u64(filter_paddr, 0u64);
245
246        let reader = make_reader(&isf, ptb);
247        let procs = vec![fake_process(200, "containerd", task_vaddr)];
248
249        let result = walk_seccomp_profiles(&reader, &procs).unwrap();
250        assert_eq!(result.len(), 1);
251        assert_eq!(result[0].pid, 200);
252        assert_eq!(result[0].comm, "containerd");
253        assert_eq!(result[0].seccomp_mode, 2);
254        assert!(!result[0].is_unconfined, "mode 2 should not be unconfined");
255        assert_eq!(result[0].filter_count, 1);
256    }
257}