Skip to main content

memf_linux/
io_uring.rs

1//! Linux io_uring context forensics.
2//!
3//! io_uring provides an asynchronous syscall interface that bypasses
4//! traditional syscall tracing (seccomp, ptrace, auditd). The "curing"
5//! rootkit (2025) demonstrated full C2 via io_uring alone — IORING_OP_SENDMSG
6//! and IORING_OP_RECVMSG allow full network I/O without triggering seccomp
7//! SYSCALL_AUDIT events. This walker enumerates `io_ring_ctx` structures
8//! attached to processes and flags those performing sensitive operations.
9
10use memf_core::object_reader::ObjectReader;
11use memf_format::PhysicalMemoryProvider;
12
13use crate::Result;
14
15/// io_uring opcode for sending a message (IORING_OP_SENDMSG, from include/uapi/linux/io_uring.h).
16pub const IORING_OP_SENDMSG: u8 = 9;
17/// io_uring opcode for receiving a message (IORING_OP_RECVMSG).
18pub const IORING_OP_RECVMSG: u8 = 10;
19/// io_uring opcode for establishing a connection (IORING_OP_CONNECT).
20pub const IORING_OP_CONNECT: u8 = 16;
21/// io_uring opcode for opening a file (IORING_OP_OPENAT).
22pub const IORING_OP_OPENAT: u8 = 18;
23/// io_uring opcode for reading from a file descriptor (IORING_OP_READ).
24pub const IORING_OP_READ: u8 = 22;
25/// io_uring opcode for writing to a file descriptor (IORING_OP_WRITE).
26pub const IORING_OP_WRITE: u8 = 23;
27
28/// Information about an io_uring context attached to a process.
29#[derive(Debug, Clone, serde::Serialize)]
30pub struct IoUringEntry {
31    /// PID of the owning process.
32    pub pid: u32,
33    /// Process name.
34    pub comm: String,
35    /// Virtual address of the `io_ring_ctx` kernel structure.
36    pub ctx_addr: u64,
37    /// Number of submission queue entries (SQEs) in the ring buffer.
38    pub sq_entries: u32,
39    /// Number of completion queue entries (CQEs) seen.
40    pub cq_entries: u32,
41    /// Opcodes observed in the pending SQE ring.
42    pub pending_opcodes: Vec<u8>,
43    /// True if the context is performing network operations that would
44    /// bypass seccomp (SENDMSG / RECVMSG / CONNECT).
45    pub bypasses_seccomp: bool,
46    /// True if associated process has a strict seccomp filter active.
47    pub seccomp_active: bool,
48}
49
50/// Classify whether an io_uring context is suspicious.
51///
52/// Returns `true` when the context uses network opcodes AND the owning
53/// process has seccomp enabled — a combination indicative of seccomp bypass.
54///
55/// `seccomp_mode` maps to `SECCOMP_MODE_STRICT = 1`, `SECCOMP_MODE_FILTER = 2`.
56pub use crate::heuristics::classify_io_uring;
57
58/// Walk all `io_ring_ctx` structures reachable from each process's
59/// `task_struct->io_uring` field and return forensic entries.
60///
61/// Returns `Ok(vec![])` gracefully when ISF symbols are unavailable.
62pub fn walk_io_uring<P: PhysicalMemoryProvider>(
63    reader: &ObjectReader<P>,
64) -> Result<Vec<IoUringEntry>> {
65    // Check whether the ISF defines the io_uring symbol we need.
66    // If the symbol is absent (older kernels or stripped ISF), return empty.
67    if reader
68        .symbols()
69        .symbol_address("io_uring_task_work")
70        .is_none()
71    {
72        return Ok(vec![]);
73    }
74
75    // Full walk would enumerate init_task->tasks list, read each
76    // task_struct->io_uring pointer, and dereference io_ring_ctx.
77    // Stubbed here — real implementation requires ISF offsets for
78    // io_uring_task and io_ring_ctx which are kernel-version specific.
79    Ok(vec![])
80}
81
82#[cfg(test)]
83mod tests {
84    use super::*;
85    use memf_core::{
86        object_reader::ObjectReader,
87        test_builders::{PageTableBuilder, SyntheticPhysMem},
88        vas::TranslationMode,
89        vas::VirtualAddressSpace,
90    };
91    use memf_symbols::{isf::IsfResolver, test_builders::IsfBuilder};
92
93    fn make_no_symbol_reader() -> ObjectReader<SyntheticPhysMem> {
94        let isf = IsfBuilder::new().build_json();
95        let resolver = IsfResolver::from_value(&isf).unwrap();
96        let (cr3, mem) = PageTableBuilder::new().build();
97        let vas = VirtualAddressSpace::new(mem, cr3, TranslationMode::X86_64FourLevel);
98        ObjectReader::new(vas, Box::new(resolver))
99    }
100
101    #[test]
102    fn classify_network_opcodes_with_seccomp_suspicious() {
103        // Process has seccomp mode 2 (SECCOMP_MODE_FILTER) + uses SENDMSG → bypass
104        assert!(
105            classify_io_uring(&[IORING_OP_SENDMSG], 2),
106            "network opcode under seccomp must be flagged as suspicious"
107        );
108    }
109
110    #[test]
111    fn classify_network_opcodes_without_seccomp_not_suspicious() {
112        // No seccomp → io_uring network is normal (not a bypass)
113        assert!(
114            !classify_io_uring(&[IORING_OP_SENDMSG], 0),
115            "network opcode without seccomp must not be flagged"
116        );
117    }
118
119    #[test]
120    fn classify_non_network_opcodes_with_seccomp_not_suspicious() {
121        // File read under seccomp is normal operation
122        assert!(
123            !classify_io_uring(&[IORING_OP_READ, IORING_OP_WRITE], 2),
124            "non-network opcodes under seccomp must not be flagged"
125        );
126    }
127
128    #[test]
129    fn walk_io_uring_no_symbol_returns_empty() {
130        let reader = make_no_symbol_reader();
131        let result = walk_io_uring(&reader).unwrap();
132        assert!(
133            result.is_empty(),
134            "missing io_uring symbols must yield empty vec"
135        );
136    }
137
138    #[test]
139    fn classify_connect_opcode_with_seccomp_strict_suspicious() {
140        // CONNECT (16) under seccomp STRICT (mode=1) → suspicious
141        assert!(
142            classify_io_uring(&[IORING_OP_CONNECT], 1),
143            "CONNECT under SECCOMP_MODE_STRICT must be flagged"
144        );
145    }
146
147    #[test]
148    fn classify_recvmsg_opcode_with_seccomp_filter_suspicious() {
149        // RECVMSG (10) under seccomp FILTER (mode=2) → suspicious
150        assert!(
151            classify_io_uring(&[IORING_OP_RECVMSG], 2),
152            "RECVMSG under SECCOMP_MODE_FILTER must be flagged"
153        );
154    }
155
156    #[test]
157    fn classify_recvmsg_opcode_without_seccomp_not_suspicious() {
158        // RECVMSG without seccomp → not suspicious
159        assert!(
160            !classify_io_uring(&[IORING_OP_RECVMSG], 0),
161            "RECVMSG without seccomp must not be flagged"
162        );
163    }
164
165    #[test]
166    fn classify_connect_opcode_without_seccomp_not_suspicious() {
167        // CONNECT without seccomp → not suspicious
168        assert!(
169            !classify_io_uring(&[IORING_OP_CONNECT], 0),
170            "CONNECT without seccomp must not be flagged"
171        );
172    }
173
174    #[test]
175    fn classify_empty_opcodes_with_seccomp_not_suspicious() {
176        // No opcodes at all, even with seccomp → not suspicious
177        assert!(
178            !classify_io_uring(&[], 2),
179            "empty opcode list must not be flagged even with seccomp"
180        );
181    }
182
183    #[test]
184    fn classify_openat_opcode_with_seccomp_not_suspicious() {
185        // OPENAT (18) is not a SENSITIVE_OPCODE → not suspicious
186        assert!(
187            !classify_io_uring(&[IORING_OP_OPENAT], 2),
188            "OPENAT is not a sensitive opcode and must not be flagged"
189        );
190    }
191
192    #[test]
193    fn classify_all_sensitive_opcodes_individually() {
194        // Each of the three sensitive opcodes should be flagged under any seccomp mode
195        for &op in &[IORING_OP_SENDMSG, IORING_OP_RECVMSG, IORING_OP_CONNECT] {
196            assert!(
197                classify_io_uring(&[op], 1),
198                "opcode {op} must be flagged under seccomp_mode=1"
199            );
200            assert!(
201                classify_io_uring(&[op], 2),
202                "opcode {op} must be flagged under seccomp_mode=2"
203            );
204        }
205    }
206
207    #[test]
208    fn walk_io_uring_with_symbol_returns_ok() {
209        // io_uring_task_work symbol present → walk should return Ok (empty stub)
210        let isf = IsfBuilder::new()
211            .add_symbol("io_uring_task_work", 0xFFFF_8000_0010_0000)
212            .build_json();
213        let resolver = IsfResolver::from_value(&isf).unwrap();
214        let (cr3, mem) = PageTableBuilder::new().build();
215        let vas = VirtualAddressSpace::new(mem, cr3, TranslationMode::X86_64FourLevel);
216        let reader = ObjectReader::new(vas, Box::new(resolver));
217
218        let result = walk_io_uring(&reader);
219        assert!(
220            result.is_ok(),
221            "walk_io_uring must not error when symbol is present"
222        );
223    }
224
225    // IoUringEntry struct: Debug, Clone, Serialize coverage.
226    #[test]
227    fn io_uring_entry_debug_clone_serialize() {
228        let entry = IoUringEntry {
229            pid: 1234,
230            comm: "curing".to_string(),
231            ctx_addr: 0xFFFF_8800_0001_0000,
232            sq_entries: 128,
233            cq_entries: 256,
234            pending_opcodes: vec![IORING_OP_SENDMSG, IORING_OP_CONNECT],
235            bypasses_seccomp: true,
236            seccomp_active: true,
237        };
238        let cloned = entry.clone();
239        let dbg = format!("{cloned:?}");
240        assert!(dbg.contains("curing"));
241        let json = serde_json::to_string(&entry).unwrap();
242        assert!(json.contains("\"pid\":1234"));
243        assert!(json.contains("\"bypasses_seccomp\":true"));
244        assert!(json.contains("\"sq_entries\":128"));
245    }
246
247    // classify_io_uring: mixed sensitive and non-sensitive opcodes — sensitive wins.
248    #[test]
249    fn classify_io_uring_mixed_opcodes_sensitive_wins() {
250        // READ (non-sensitive) + SENDMSG (sensitive) under seccomp → suspicious
251        assert!(
252            classify_io_uring(&[IORING_OP_READ, IORING_OP_SENDMSG], 2),
253            "mix of sensitive + non-sensitive under seccomp must be flagged"
254        );
255    }
256
257    // Constants have expected values (covers the const declarations at lines 16-26).
258    #[test]
259    fn io_uring_opcode_constants_correct_values() {
260        assert_eq!(IORING_OP_SENDMSG, 9u8);
261        assert_eq!(IORING_OP_RECVMSG, 10u8);
262        assert_eq!(IORING_OP_CONNECT, 16u8);
263        assert_eq!(IORING_OP_OPENAT, 18u8);
264        assert_eq!(IORING_OP_READ, 22u8);
265        assert_eq!(IORING_OP_WRITE, 23u8);
266    }
267}