Skip to main content

memf_linux/
cmdline.rs

1//! Linux process command line walker.
2//!
3//! Reads process command lines from `mm_struct.arg_start`..`arg_end`
4//! for each process. The argument region contains null-separated argv
5//! strings. Kernel threads (NULL mm) are silently skipped.
6
7use memf_core::object_reader::ObjectReader;
8use memf_format::PhysicalMemoryProvider;
9
10use crate::{CmdlineInfo, Error, Result};
11
12/// Maximum argument region size to read (256 KiB safety limit).
13const MAX_ARG_SIZE: u64 = 256 * 1024;
14
15/// Walk command lines for all processes in the task list.
16pub fn walk_cmdlines<P: PhysicalMemoryProvider>(
17    reader: &ObjectReader<P>,
18) -> Result<Vec<CmdlineInfo>> {
19    let init_task_addr = reader
20        .symbols()
21        .symbol_address("init_task")
22        .ok_or_else(|| Error::MissingKernelSymbol {
23            name: "init_task".into(),
24        })?;
25
26    let tasks_offset = reader
27        .symbols()
28        .field_offset("task_struct", "tasks")
29        .ok_or_else(|| Error::MissingField {
30            struct_name: "task_struct".into(),
31            field_name: "tasks".into(),
32        })?;
33
34    let head_vaddr = init_task_addr + tasks_offset;
35    let task_addrs = reader.walk_list(head_vaddr, "task_struct", "tasks")?;
36
37    let mut cmdlines = Vec::new();
38
39    if let Ok(info) = walk_process_cmdline(reader, init_task_addr) {
40        cmdlines.push(info);
41    }
42
43    for &task_addr in &task_addrs {
44        if let Ok(info) = walk_process_cmdline(reader, task_addr) {
45            cmdlines.push(info);
46        }
47    }
48
49    Ok(cmdlines)
50}
51
52/// Read command line for a single process.
53pub fn walk_process_cmdline<P: PhysicalMemoryProvider>(
54    reader: &ObjectReader<P>,
55    task_addr: u64,
56) -> Result<CmdlineInfo> {
57    let pid: u32 = reader.read_field(task_addr, "task_struct", "pid")?;
58    let comm = reader.read_field_string(task_addr, "task_struct", "comm", 16)?;
59    let mm_ptr: u64 = reader.read_field(task_addr, "task_struct", "mm")?;
60
61    if mm_ptr == 0 {
62        return Err(Error::WalkFailed {
63            walker: "walk_process_cmdline",
64            reason: format!("task {comm} (PID {pid}) has NULL mm (kernel thread)"),
65        });
66    }
67
68    let arg_start: u64 = reader.read_field(mm_ptr, "mm_struct", "arg_start")?;
69    let arg_end: u64 = reader.read_field(mm_ptr, "mm_struct", "arg_end")?;
70
71    if arg_start == 0 || arg_end <= arg_start {
72        return Ok(CmdlineInfo {
73            pid: u64::from(pid),
74            comm,
75            cmdline: String::new(),
76        });
77    }
78
79    let size = (arg_end - arg_start).min(MAX_ARG_SIZE);
80    let data = reader.read_bytes(arg_start, size as usize)?;
81
82    Ok(CmdlineInfo {
83        pid: u64::from(pid),
84        comm,
85        cmdline: parse_arg_region(&data),
86    })
87}
88
89/// Parse null-separated argv entries into a single space-joined string.
90fn parse_arg_region(data: &[u8]) -> String {
91    let args: Vec<&str> = data
92        .split(|&b| b == 0)
93        .filter_map(|chunk| {
94            if chunk.is_empty() {
95                None
96            } else {
97                Some(std::str::from_utf8(chunk).unwrap_or_default())
98            }
99        })
100        .collect();
101    args.join(" ")
102}
103
104#[cfg(test)]
105mod tests {
106    use super::*;
107    use memf_core::object_reader::ObjectReader;
108    use memf_core::test_builders::{flags, PageTableBuilder, SyntheticPhysMem};
109    use memf_core::vas::{TranslationMode, VirtualAddressSpace};
110    use memf_symbols::isf::IsfResolver;
111    use memf_symbols::test_builders::IsfBuilder;
112
113    fn make_test_reader(
114        data: &[u8],
115        vaddr: u64,
116        paddr: u64,
117        extra_mappings: &[(u64, u64, &[u8])],
118    ) -> ObjectReader<SyntheticPhysMem> {
119        let isf = IsfBuilder::new()
120            .add_struct("task_struct", 128)
121            .add_field("task_struct", "pid", 0, "int")
122            .add_field("task_struct", "state", 4, "long")
123            .add_field("task_struct", "tasks", 16, "list_head")
124            .add_field("task_struct", "comm", 32, "char")
125            .add_field("task_struct", "mm", 48, "pointer")
126            .add_struct("list_head", 16)
127            .add_field("list_head", "next", 0, "pointer")
128            .add_field("list_head", "prev", 8, "pointer")
129            .add_struct("mm_struct", 128)
130            .add_field("mm_struct", "pgd", 0, "pointer")
131            .add_field("mm_struct", "arg_start", 64, "unsigned long")
132            .add_field("mm_struct", "arg_end", 72, "unsigned long")
133            .add_symbol("init_task", vaddr)
134            .build_json();
135
136        let resolver = IsfResolver::from_value(&isf).unwrap();
137        let mut builder = PageTableBuilder::new()
138            .map_4k(vaddr, paddr, flags::WRITABLE)
139            .write_phys(paddr, data);
140
141        for &(ev, ep, edata) in extra_mappings {
142            builder = builder
143                .map_4k(ev, ep, flags::WRITABLE)
144                .write_phys(ep, edata);
145        }
146
147        let (cr3, mem) = builder.build();
148        let vas = VirtualAddressSpace::new(mem, cr3, TranslationMode::X86_64FourLevel);
149        ObjectReader::new(vas, Box::new(resolver))
150    }
151
152    #[test]
153    fn single_process_cmdline() {
154        let vaddr: u64 = 0xFFFF_8000_0010_0000;
155        let paddr: u64 = 0x0080_0000;
156        let mut data = vec![0u8; 4096];
157
158        data[0..4].copy_from_slice(&100u32.to_le_bytes());
159        let tasks_addr = vaddr + 16;
160        data[16..24].copy_from_slice(&tasks_addr.to_le_bytes());
161        data[24..32].copy_from_slice(&tasks_addr.to_le_bytes());
162        data[32..36].copy_from_slice(b"sshd");
163        let mm_addr = vaddr + 0x200;
164        data[48..56].copy_from_slice(&mm_addr.to_le_bytes());
165
166        let arg_vaddr: u64 = 0xFFFF_8000_0020_0000;
167        data[0x200..0x208].copy_from_slice(&0x1000u64.to_le_bytes());
168        data[0x240..0x248].copy_from_slice(&arg_vaddr.to_le_bytes());
169        let arg_data = b"/usr/sbin/sshd\0-D\0-p\x002222\0";
170        let arg_end = arg_vaddr + arg_data.len() as u64;
171        data[0x248..0x250].copy_from_slice(&arg_end.to_le_bytes());
172
173        let arg_paddr: u64 = 0x0090_0000;
174        let reader = make_test_reader(
175            &data,
176            vaddr,
177            paddr,
178            &[(arg_vaddr, arg_paddr, arg_data.as_slice())],
179        );
180
181        let result = walk_process_cmdline(&reader, vaddr).unwrap();
182        assert_eq!(result.pid, 100);
183        assert_eq!(result.comm, "sshd");
184        assert_eq!(result.cmdline, "/usr/sbin/sshd -D -p 2222");
185    }
186
187    #[test]
188    fn kernel_thread_returns_error() {
189        let vaddr: u64 = 0xFFFF_8000_0010_0000;
190        let paddr: u64 = 0x0080_0000;
191        let mut data = vec![0u8; 4096];
192
193        data[0..4].copy_from_slice(&2u32.to_le_bytes());
194        let tasks_addr = vaddr + 16;
195        data[16..24].copy_from_slice(&tasks_addr.to_le_bytes());
196        data[24..32].copy_from_slice(&tasks_addr.to_le_bytes());
197        data[32..40].copy_from_slice(b"kthreadd");
198        data[48..56].copy_from_slice(&0u64.to_le_bytes());
199
200        let reader = make_test_reader(&data, vaddr, paddr, &[]);
201        let result = walk_process_cmdline(&reader, vaddr);
202        assert!(result.is_err());
203    }
204
205    #[test]
206    fn empty_arg_region() {
207        let vaddr: u64 = 0xFFFF_8000_0010_0000;
208        let paddr: u64 = 0x0080_0000;
209        let mut data = vec![0u8; 4096];
210
211        data[0..4].copy_from_slice(&50u32.to_le_bytes());
212        let tasks_addr = vaddr + 16;
213        data[16..24].copy_from_slice(&tasks_addr.to_le_bytes());
214        data[24..32].copy_from_slice(&tasks_addr.to_le_bytes());
215        data[32..36].copy_from_slice(b"node");
216        let mm_addr = vaddr + 0x200;
217        data[48..56].copy_from_slice(&mm_addr.to_le_bytes());
218
219        data[0x200..0x208].copy_from_slice(&0x1000u64.to_le_bytes());
220        data[0x240..0x248].copy_from_slice(&0u64.to_le_bytes());
221        data[0x248..0x250].copy_from_slice(&0u64.to_le_bytes());
222
223        let reader = make_test_reader(&data, vaddr, paddr, &[]);
224        let result = walk_process_cmdline(&reader, vaddr).unwrap();
225        assert_eq!(result.pid, 50);
226        assert_eq!(result.cmdline, "");
227    }
228
229    #[test]
230    fn walk_cmdlines_skips_kernel_threads() {
231        let vaddr: u64 = 0xFFFF_8000_0010_0000;
232        let paddr: u64 = 0x0080_0000;
233        let mut data = vec![0u8; 4096];
234
235        data[0..4].copy_from_slice(&0u32.to_le_bytes());
236        let tasks_addr = vaddr + 16;
237        data[16..24].copy_from_slice(&tasks_addr.to_le_bytes());
238        data[24..32].copy_from_slice(&tasks_addr.to_le_bytes());
239        data[32..41].copy_from_slice(b"swapper/0");
240        data[48..56].copy_from_slice(&0u64.to_le_bytes());
241
242        let reader = make_test_reader(&data, vaddr, paddr, &[]);
243        let cmdlines = walk_cmdlines(&reader).unwrap();
244        assert!(cmdlines.is_empty());
245    }
246
247    #[test]
248    fn parse_arg_region_joins_with_spaces() {
249        let result = parse_arg_region(b"python3\0-m\0http.server\x008080\0");
250        assert_eq!(result, "python3 -m http.server 8080");
251    }
252
253    #[test]
254    fn parse_arg_region_single_arg() {
255        let result = parse_arg_region(b"/bin/bash\0");
256        assert_eq!(result, "/bin/bash");
257    }
258
259    #[test]
260    fn parse_arg_region_empty() {
261        let result = parse_arg_region(b"");
262        assert_eq!(result, "");
263    }
264
265    #[test]
266    fn walk_cmdlines_missing_init_task_returns_missing_kernel_symbol() {
267        let isf = IsfBuilder::new().build_json();
268        let resolver = IsfResolver::from_value(&isf).unwrap();
269        let (cr3, mem) = PageTableBuilder::new().build();
270        let vas = VirtualAddressSpace::new(mem, cr3, TranslationMode::X86_64FourLevel);
271        let reader: ObjectReader<SyntheticPhysMem> = ObjectReader::new(vas, Box::new(resolver));
272        let result = walk_cmdlines(&reader);
273        assert!(
274            matches!(result, Err(crate::Error::MissingKernelSymbol { ref name }) if name == "init_task"),
275            "expected MissingKernelSymbol {{name: \"init_task\"}}, got {result:?}"
276        );
277    }
278
279    #[test]
280    fn walk_cmdlines_missing_tasks_field_returns_missing_field() {
281        let vaddr: u64 = 0xFFFF_8000_0010_0000;
282        let paddr: u64 = 0x0080_0000;
283        let data = vec![0u8; 4096];
284
285        let isf = IsfBuilder::new()
286            .add_struct("task_struct", 128)
287            .add_field("task_struct", "pid", 0, "int")
288            .add_struct("list_head", 16)
289            .add_field("list_head", "next", 0, "pointer")
290            .add_field("list_head", "prev", 8, "pointer")
291            .add_struct("mm_struct", 128)
292            .add_field("mm_struct", "pgd", 0, "pointer")
293            .add_field("mm_struct", "arg_start", 64, "unsigned long")
294            .add_field("mm_struct", "arg_end", 72, "unsigned long")
295            .add_symbol("init_task", vaddr)
296            .build_json();
297
298        let resolver = IsfResolver::from_value(&isf).unwrap();
299        let (cr3, mem) = PageTableBuilder::new()
300            .map_4k(vaddr, paddr, flags::WRITABLE)
301            .write_phys(paddr, &data)
302            .build();
303        let vas = VirtualAddressSpace::new(mem, cr3, TranslationMode::X86_64FourLevel);
304        let reader: ObjectReader<SyntheticPhysMem> = ObjectReader::new(vas, Box::new(resolver));
305
306        let result = walk_cmdlines(&reader);
307        assert!(
308            matches!(result, Err(crate::Error::MissingField { ref struct_name, ref field_name }) if struct_name == "task_struct" && field_name == "tasks"),
309            "expected MissingField task_struct.tasks, got {result:?}"
310        );
311    }
312
313    #[test]
314    fn walk_process_cmdline_arg_end_before_arg_start_returns_empty() {
315        let vaddr: u64 = 0xFFFF_8000_0010_0000;
316        let paddr: u64 = 0x0080_0000;
317        let mut data = vec![0u8; 4096];
318
319        data[0..4].copy_from_slice(&77u32.to_le_bytes());
320        let tasks_addr = vaddr + 16;
321        data[16..24].copy_from_slice(&tasks_addr.to_le_bytes());
322        data[24..32].copy_from_slice(&tasks_addr.to_le_bytes());
323        data[32..37].copy_from_slice(b"proc\0");
324        let mm_addr = vaddr + 0x200;
325        data[48..56].copy_from_slice(&mm_addr.to_le_bytes());
326
327        data[0x200..0x208].copy_from_slice(&0x1000u64.to_le_bytes());
328        data[0x240..0x248].copy_from_slice(&0x5000u64.to_le_bytes());
329        data[0x248..0x250].copy_from_slice(&0x4000u64.to_le_bytes());
330
331        let reader = make_test_reader(&data, vaddr, paddr, &[]);
332        let result = walk_process_cmdline(&reader, vaddr).unwrap();
333        assert_eq!(result.pid, 77);
334        assert_eq!(
335            result.cmdline, "",
336            "arg_end <= arg_start must produce empty cmdline"
337        );
338    }
339
340    #[test]
341    fn parse_arg_region_consecutive_nulls_filtered() {
342        let result = parse_arg_region(b"arg0\0\0arg2\0");
343        assert_eq!(result, "arg0 arg2");
344    }
345
346    #[test]
347    fn cmdline_info_clone_eq() {
348        let a = CmdlineInfo {
349            pid: 1,
350            comm: "bash".to_string(),
351            cmdline: "bash -c true".to_string(),
352        };
353        let b = a.clone();
354        assert_eq!(a, b);
355        let dbg = format!("{a:?}");
356        assert!(dbg.contains("bash"));
357    }
358
359    #[test]
360    fn walk_cmdlines_two_processes_both_pushed() {
361        let tasks_off: u64 = 16u64;
362        let mm_off: u64 = 48u64;
363        let arg_off_in_mm: u64 = 64u64;
364        let arg_end_off_in_mm: u64 = 72u64;
365
366        let init_vaddr: u64 = 0xFFFF_8000_0040_0000;
367        let init_paddr: u64 = 0x0040_0000;
368        let task2_vaddr: u64 = 0xFFFF_8000_0041_0000;
369        let task2_paddr: u64 = 0x0041_0000;
370        let mm1_vaddr: u64 = 0xFFFF_8000_0042_0000;
371        let mm1_paddr: u64 = 0x0042_0000;
372        let mm2_vaddr: u64 = 0xFFFF_8000_0043_0000;
373        let mm2_paddr: u64 = 0x0043_0000;
374        let arg1_vaddr: u64 = 0xFFFF_8000_0044_0000;
375        let arg1_paddr: u64 = 0x0044_0000;
376        let arg2_vaddr: u64 = 0xFFFF_8000_0045_0000;
377        let arg2_paddr: u64 = 0x0045_0000;
378
379        let arg1_data = b"/sbin/init\0";
380        let arg2_data = b"/bin/sh\0-c\0true\0";
381
382        let mut page1 = vec![0u8; 4096];
383        page1[0..4].copy_from_slice(&1u32.to_le_bytes());
384        let task2_tasks_vaddr = task2_vaddr + tasks_off;
385        page1[tasks_off as usize..tasks_off as usize + 8]
386            .copy_from_slice(&task2_tasks_vaddr.to_le_bytes());
387        page1[24..32].copy_from_slice(&(init_vaddr + tasks_off).to_le_bytes());
388        page1[32..36].copy_from_slice(b"init");
389        page1[mm_off as usize..mm_off as usize + 8].copy_from_slice(&mm1_vaddr.to_le_bytes());
390
391        let mut page2 = vec![0u8; 4096];
392        page2[0..4].copy_from_slice(&2u32.to_le_bytes());
393        let init_tasks_vaddr = init_vaddr + tasks_off;
394        page2[tasks_off as usize..tasks_off as usize + 8]
395            .copy_from_slice(&init_tasks_vaddr.to_le_bytes());
396        page2[24..32].copy_from_slice(&(task2_vaddr + tasks_off).to_le_bytes());
397        page2[32..34].copy_from_slice(b"sh");
398        page2[mm_off as usize..mm_off as usize + 8].copy_from_slice(&mm2_vaddr.to_le_bytes());
399
400        let mut mm1_page = vec![0u8; 4096];
401        mm1_page[arg_off_in_mm as usize..arg_off_in_mm as usize + 8]
402            .copy_from_slice(&arg1_vaddr.to_le_bytes());
403        let arg1_end = arg1_vaddr + arg1_data.len() as u64;
404        mm1_page[arg_end_off_in_mm as usize..arg_end_off_in_mm as usize + 8]
405            .copy_from_slice(&arg1_end.to_le_bytes());
406
407        let mut mm2_page = vec![0u8; 4096];
408        mm2_page[arg_off_in_mm as usize..arg_off_in_mm as usize + 8]
409            .copy_from_slice(&arg2_vaddr.to_le_bytes());
410        let arg2_end = arg2_vaddr + arg2_data.len() as u64;
411        mm2_page[arg_end_off_in_mm as usize..arg_end_off_in_mm as usize + 8]
412            .copy_from_slice(&arg2_end.to_le_bytes());
413
414        let isf = IsfBuilder::new()
415            .add_struct("task_struct", 128)
416            .add_field("task_struct", "pid", 0, "int")
417            .add_field("task_struct", "state", 4, "long")
418            .add_field("task_struct", "tasks", 16, "list_head")
419            .add_field("task_struct", "comm", 32, "char")
420            .add_field("task_struct", "mm", 48, "pointer")
421            .add_struct("list_head", 16)
422            .add_field("list_head", "next", 0, "pointer")
423            .add_field("list_head", "prev", 8, "pointer")
424            .add_struct("mm_struct", 128)
425            .add_field("mm_struct", "pgd", 0, "pointer")
426            .add_field("mm_struct", "arg_start", 64, "unsigned long")
427            .add_field("mm_struct", "arg_end", 72, "unsigned long")
428            .add_symbol("init_task", init_vaddr)
429            .build_json();
430        let resolver = IsfResolver::from_value(&isf).unwrap();
431
432        let (cr3, mem) = PageTableBuilder::new()
433            .map_4k(init_vaddr, init_paddr, flags::WRITABLE)
434            .write_phys(init_paddr, &page1)
435            .map_4k(task2_vaddr, task2_paddr, flags::WRITABLE)
436            .write_phys(task2_paddr, &page2)
437            .map_4k(mm1_vaddr, mm1_paddr, flags::WRITABLE)
438            .write_phys(mm1_paddr, &mm1_page)
439            .map_4k(mm2_vaddr, mm2_paddr, flags::WRITABLE)
440            .write_phys(mm2_paddr, &mm2_page)
441            .map_4k(arg1_vaddr, arg1_paddr, flags::WRITABLE)
442            .write_phys(arg1_paddr, arg1_data.as_slice())
443            .map_4k(arg2_vaddr, arg2_paddr, flags::WRITABLE)
444            .write_phys(arg2_paddr, arg2_data.as_slice())
445            .build();
446
447        let vas = VirtualAddressSpace::new(mem, cr3, TranslationMode::X86_64FourLevel);
448        let reader: ObjectReader<SyntheticPhysMem> = ObjectReader::new(vas, Box::new(resolver));
449
450        let result = walk_cmdlines(&reader).unwrap();
451        assert_eq!(
452            result.len(),
453            2,
454            "both init_task and task2 should have cmdlines"
455        );
456        let pids: Vec<u64> = result.iter().map(|r| r.pid).collect();
457        assert!(pids.contains(&1), "init_task (pid=1) must be in results");
458        assert!(pids.contains(&2), "task2 (pid=2) must be in results");
459        let init_cmdline = result.iter().find(|r| r.pid == 1).unwrap();
460        assert_eq!(init_cmdline.cmdline, "/sbin/init");
461        let sh_cmdline = result.iter().find(|r| r.pid == 2).unwrap();
462        assert_eq!(sh_cmdline.cmdline, "/bin/sh -c true");
463    }
464}