Skip to main content

memf_linux/
bash.rs

1//! Linux bash command history recovery.
2//!
3//! Scans bash process heap memory for `HIST_ENTRY` structures to recover
4//! command history. Works by finding bash processes, walking their VMAs
5//! to locate anonymous RW regions (the heap), then pattern-matching
6//! for valid `HIST_ENTRY` structs (24 bytes: line ptr, timestamp ptr, data ptr).
7
8use memf_core::object_reader::ObjectReader;
9use memf_format::PhysicalMemoryProvider;
10
11use crate::{vma_walker::for_each_task_vma, BashHistoryInfo, Error, Result};
12
13/// Maximum heap region size to scan (1 MiB safety limit).
14const MAX_HEAP_SCAN: u64 = 1024 * 1024;
15
16/// Maximum length for a valid command string.
17const MAX_COMMAND_LEN: usize = 4096;
18
19/// Walk all bash processes and recover command history from their heaps.
20///
21/// Finds processes with `comm == "bash"`, then scans their anonymous
22/// RW VMAs for `HIST_ENTRY` patterns — 24-byte structs where the first
23/// pointer leads to a printable ASCII string and the second leads to
24/// a `#DIGITS` timestamp string.
25pub fn walk_bash_history<P: PhysicalMemoryProvider>(
26    reader: &ObjectReader<P>,
27) -> Result<Vec<BashHistoryInfo>> {
28    let init_task_addr = reader
29        .symbols()
30        .symbol_address("init_task")
31        .ok_or_else(|| Error::MissingKernelSymbol {
32            name: "init_task".into(),
33        })?;
34
35    let tasks_offset = reader
36        .symbols()
37        .field_offset("task_struct", "tasks")
38        .ok_or_else(|| Error::MissingField {
39            struct_name: "task_struct".into(),
40            field_name: "tasks".into(),
41        })?;
42
43    let head_vaddr = init_task_addr + tasks_offset;
44    let task_addrs = reader.walk_list(head_vaddr, "task_struct", "tasks")?;
45
46    let mut results = Vec::new();
47
48    // Include init_task itself
49    scan_process_history(reader, init_task_addr, &mut results);
50
51    for &task_addr in &task_addrs {
52        scan_process_history(reader, task_addr, &mut results);
53    }
54
55    Ok(results)
56}
57
58/// Scan a single process for bash history entries.
59fn scan_process_history<P: PhysicalMemoryProvider>(
60    reader: &ObjectReader<P>,
61    task_addr: u64,
62    out: &mut Vec<BashHistoryInfo>,
63) {
64    let Ok(comm) = reader.read_field_string(task_addr, "task_struct", "comm", 16) else {
65        return;
66    };
67
68    if comm != "bash" {
69        return;
70    }
71
72    let pid: u32 = match reader.read_field(task_addr, "task_struct", "pid") {
73        Ok(v) => v,
74        Err(_) => return,
75    };
76
77    // Collect VMA ranges for pointer validation and identify heap candidates.
78    let mut vma_ranges: Vec<(u64, u64)> = Vec::new();
79    let mut heap_regions: Vec<(u64, u64)> = Vec::new();
80
81    for_each_task_vma(reader, task_addr, &mut |e| {
82        vma_ranges.push((e.start, e.end));
83        // Heap candidate: anonymous (file_ptr == 0), read+write, not exec
84        if e.file_ptr == 0 && e.flags.read && e.flags.write && !e.flags.exec {
85            heap_regions.push((e.start, e.end));
86        }
87    });
88
89    // Scan each heap region for HIST_ENTRY patterns
90    let mut index = 0u64;
91    for &(start, end) in &heap_regions {
92        let size = (end - start).min(MAX_HEAP_SCAN) as usize;
93        let Ok(data) = reader.read_bytes(start, size) else {
94            continue;
95        };
96
97        scan_heap_for_entries(
98            reader,
99            &data,
100            &vma_ranges,
101            u64::from(pid),
102            &comm,
103            &mut index,
104            out,
105        );
106    }
107}
108
109/// Scan a heap region for HIST_ENTRY structs.
110///
111/// HIST_ENTRY layout (24 bytes on 64-bit):
112///   offset 0:  char *line      (pointer to command string)
113///   offset 8:  char *timestamp (pointer to "#DIGITS" string, or NULL)
114///   offset 16: histdata_t *data (usually NULL)
115fn scan_heap_for_entries<P: PhysicalMemoryProvider>(
116    reader: &ObjectReader<P>,
117    data: &[u8],
118    vma_ranges: &[(u64, u64)],
119    pid: u64,
120    comm: &str,
121    index: &mut u64,
122    out: &mut Vec<BashHistoryInfo>,
123) {
124    if data.len() < 24 {
125        return;
126    }
127
128    // Scan at 8-byte alignment for HIST_ENTRY candidates
129    let limit = data.len() - 23;
130    let mut off = 0;
131    while off < limit {
132        let line_ptr = data[off..off + 8].try_into().map_or(0, u64::from_le_bytes);
133        let ts_ptr = data[off + 8..off + 16]
134            .try_into()
135            .map_or(0, u64::from_le_bytes);
136
137        // Quick reject: line_ptr must be non-zero and within a VMA
138        if line_ptr == 0 || !addr_in_vmas(line_ptr, vma_ranges) {
139            off += 8;
140            continue;
141        }
142
143        // ts_ptr must be NULL or within a VMA
144        if ts_ptr != 0 && !addr_in_vmas(ts_ptr, vma_ranges) {
145            off += 8;
146            continue;
147        }
148
149        // Try to read the command string
150        let Ok(line_str) = reader.read_string(line_ptr, MAX_COMMAND_LEN) else {
151            off += 8;
152            continue;
153        };
154
155        if line_str.is_empty() || !is_printable_ascii(line_str.as_bytes()) {
156            off += 8;
157            continue;
158        }
159
160        // Try to read and parse the timestamp
161        let timestamp = if ts_ptr != 0 {
162            reader
163                .read_string(ts_ptr, 32)
164                .ok()
165                .and_then(|s| parse_bash_timestamp(&s))
166        } else {
167            None
168        };
169
170        // Validate timestamp pointer actually looks like a bash timestamp
171        if ts_ptr != 0 && timestamp.is_none() {
172            off += 8;
173            continue;
174        }
175
176        out.push(BashHistoryInfo {
177            pid,
178            comm: comm.to_string(),
179            command: line_str,
180            timestamp,
181            index: *index,
182        });
183        *index += 1;
184
185        // Skip past this HIST_ENTRY (24 bytes)
186        off += 24;
187    }
188}
189
190/// Check whether an address falls within any of the given VMA ranges.
191fn addr_in_vmas(addr: u64, ranges: &[(u64, u64)]) -> bool {
192    ranges
193        .iter()
194        .any(|&(start, end)| addr >= start && addr < end)
195}
196
197/// Check whether a byte sequence is printable ASCII (no control chars except tab).
198fn is_printable_ascii(bytes: &[u8]) -> bool {
199    !bytes.is_empty()
200        && bytes
201            .iter()
202            .all(|&b| b == b'\t' || (0x20..=0x7E).contains(&b))
203}
204
205/// Parse a bash timestamp string (`#1700000000`) into a Unix timestamp.
206fn parse_bash_timestamp(s: &str) -> Option<i64> {
207    let digits = s.strip_prefix('#')?;
208    if digits.is_empty() {
209        return None;
210    }
211    digits.parse::<i64>().ok()
212}
213
214#[cfg(test)]
215mod tests {
216    use super::*;
217    use memf_core::test_builders::{flags as ptflags, PageTableBuilder, SyntheticPhysMem};
218    use memf_core::vas::{TranslationMode, VirtualAddressSpace};
219    use memf_symbols::isf::IsfResolver;
220    use memf_symbols::test_builders::IsfBuilder;
221
222    fn make_test_reader(
223        data: &[u8],
224        vaddr: u64,
225        paddr: u64,
226        extra_mappings: &[(u64, u64, &[u8])],
227    ) -> ObjectReader<SyntheticPhysMem> {
228        let isf = IsfBuilder::new()
229            .add_struct("task_struct", 128)
230            .add_field("task_struct", "pid", 0, "int")
231            .add_field("task_struct", "state", 4, "long")
232            .add_field("task_struct", "tasks", 16, "list_head")
233            .add_field("task_struct", "comm", 32, "char")
234            .add_field("task_struct", "mm", 48, "pointer")
235            .add_struct("list_head", 16)
236            .add_field("list_head", "next", 0, "pointer")
237            .add_field("list_head", "prev", 8, "pointer")
238            .add_struct("mm_struct", 128)
239            .add_field("mm_struct", "pgd", 0, "pointer")
240            .add_field("mm_struct", "mmap", 8, "pointer")
241            .add_struct("vm_area_struct", 64)
242            .add_field("vm_area_struct", "vm_start", 0, "unsigned long")
243            .add_field("vm_area_struct", "vm_end", 8, "unsigned long")
244            .add_field("vm_area_struct", "vm_next", 16, "pointer")
245            .add_field("vm_area_struct", "vm_flags", 24, "unsigned long")
246            .add_field("vm_area_struct", "vm_pgoff", 32, "unsigned long")
247            .add_field("vm_area_struct", "vm_file", 40, "pointer")
248            .add_symbol("init_task", vaddr)
249            .build_json();
250
251        let resolver = IsfResolver::from_value(&isf).unwrap();
252        let mut builder = PageTableBuilder::new()
253            .map_4k(vaddr, paddr, ptflags::WRITABLE)
254            .write_phys(paddr, data);
255
256        for &(ev, ep, edata) in extra_mappings {
257            builder = builder
258                .map_4k(ev, ep, ptflags::WRITABLE)
259                .write_phys(ep, edata);
260        }
261
262        let (cr3, mem) = builder.build();
263        let vas = VirtualAddressSpace::new(mem, cr3, TranslationMode::X86_64FourLevel);
264        ObjectReader::new(vas, Box::new(resolver))
265    }
266
267    /// Build a synthetic heap page containing HIST_ENTRY structs and strings.
268    ///
269    /// Layout at `heap_vaddr`:
270    ///   0x000: "ls -la\0"
271    ///   0x010: "#1700000000\0"
272    ///   0x020: "whoami\0"
273    ///   0x030: "#1700000001\0"
274    ///   0x040: "cat /etc/shadow\0"
275    ///   0x050: "#1700000002\0"
276    ///   0x100: HIST_ENTRY[0] { line=heap+0, ts=heap+0x10, data=0 }
277    ///   0x118: HIST_ENTRY[1] { line=heap+0x20, ts=heap+0x30, data=0 }
278    ///   0x130: HIST_ENTRY[2] { line=heap+0x40, ts=heap+0x50, data=0 }
279    fn build_heap_with_history(heap_vaddr: u64) -> Vec<u8> {
280        let mut heap = vec![0u8; 4096];
281
282        // String pool
283        let strings: &[(&[u8], usize)] = &[
284            (b"ls -la\0", 0x000),
285            (b"#1700000000\0", 0x010),
286            (b"whoami\0", 0x020),
287            (b"#1700000001\0", 0x030),
288            (b"cat /etc/shadow\0", 0x040),
289            (b"#1700000002\0", 0x050),
290        ];
291        for &(s, off) in strings {
292            heap[off..off + s.len()].copy_from_slice(s);
293        }
294
295        // HIST_ENTRY structs (24 bytes each: line ptr, timestamp ptr, data ptr)
296        let entries: &[(u64, u64)] = &[
297            (heap_vaddr, heap_vaddr + 0x010),         // ls -la
298            (heap_vaddr + 0x020, heap_vaddr + 0x030), // whoami
299            (heap_vaddr + 0x040, heap_vaddr + 0x050), // cat /etc/shadow
300        ];
301        let mut off = 0x100;
302        for &(line_ptr, ts_ptr) in entries {
303            heap[off..off + 8].copy_from_slice(&line_ptr.to_le_bytes());
304            heap[off + 8..off + 16].copy_from_slice(&ts_ptr.to_le_bytes());
305            heap[off + 16..off + 24].copy_from_slice(&0u64.to_le_bytes()); // data = NULL
306            off += 24;
307        }
308
309        heap
310    }
311
312    // regression guard: anonymous RW VMA selected as heap candidate, file-backed skipped
313    #[test]
314    fn recovers_bash_history_from_heap() {
315        let vaddr: u64 = 0xFFFF_8000_0010_0000;
316        let paddr: u64 = 0x0080_0000;
317        let mut data = vec![0u8; 4096];
318
319        // init_task (PID 42, comm "bash")
320        data[0..4].copy_from_slice(&42u32.to_le_bytes());
321        let tasks_addr = vaddr + 16;
322        data[16..24].copy_from_slice(&tasks_addr.to_le_bytes()); // tasks.next = self
323        data[24..32].copy_from_slice(&tasks_addr.to_le_bytes()); // tasks.prev = self
324        data[32..36].copy_from_slice(b"bash");
325        let mm_addr = vaddr + 0x200;
326        data[48..56].copy_from_slice(&mm_addr.to_le_bytes()); // mm
327
328        // mm_struct at +0x200
329        data[0x200..0x208].copy_from_slice(&0x1000u64.to_le_bytes()); // pgd
330        let vma_addr = vaddr + 0x300;
331        data[0x208..0x210].copy_from_slice(&vma_addr.to_le_bytes()); // mmap
332
333        // VMA: anonymous RW heap region in userspace
334        let heap_vaddr: u64 = 0x0000_5555_0000_0000;
335        let heap_paddr: u64 = 0x0090_0000;
336        data[0x300..0x308].copy_from_slice(&heap_vaddr.to_le_bytes()); // vm_start
337        data[0x308..0x310].copy_from_slice(&(heap_vaddr + 0x1000).to_le_bytes()); // vm_end
338        data[0x310..0x318].copy_from_slice(&0u64.to_le_bytes()); // vm_next = NULL
339        data[0x318..0x320].copy_from_slice(&0x3u64.to_le_bytes()); // vm_flags: rw-
340        data[0x320..0x328].copy_from_slice(&0u64.to_le_bytes()); // vm_pgoff
341        data[0x328..0x330].copy_from_slice(&0u64.to_le_bytes()); // vm_file = NULL (anon)
342
343        let heap = build_heap_with_history(heap_vaddr);
344
345        let reader = make_test_reader(&data, vaddr, paddr, &[(heap_vaddr, heap_paddr, &heap)]);
346        let results = walk_bash_history(&reader).unwrap();
347
348        assert_eq!(results.len(), 3);
349        assert_eq!(results[0].pid, 42);
350        assert_eq!(results[0].comm, "bash");
351        assert_eq!(results[0].command, "ls -la");
352        assert_eq!(results[0].timestamp, Some(1_700_000_000));
353        assert_eq!(results[1].command, "whoami");
354        assert_eq!(results[1].timestamp, Some(1_700_000_001));
355        assert_eq!(results[2].command, "cat /etc/shadow");
356        assert_eq!(results[2].timestamp, Some(1_700_000_002));
357    }
358
359    #[test]
360    fn skips_non_bash_processes() {
361        let vaddr: u64 = 0xFFFF_8000_0010_0000;
362        let paddr: u64 = 0x0080_0000;
363        let mut data = vec![0u8; 4096];
364
365        // init_task (PID 1, comm "nginx") — not bash
366        data[0..4].copy_from_slice(&1u32.to_le_bytes());
367        let tasks_addr = vaddr + 16;
368        data[16..24].copy_from_slice(&tasks_addr.to_le_bytes());
369        data[24..32].copy_from_slice(&tasks_addr.to_le_bytes());
370        data[32..37].copy_from_slice(b"nginx");
371        let mm_addr = vaddr + 0x200;
372        data[48..56].copy_from_slice(&mm_addr.to_le_bytes());
373
374        data[0x200..0x208].copy_from_slice(&0x1000u64.to_le_bytes());
375        data[0x208..0x210].copy_from_slice(&0u64.to_le_bytes()); // mmap = NULL
376
377        let reader = make_test_reader(&data, vaddr, paddr, &[]);
378        let results = walk_bash_history(&reader).unwrap();
379
380        assert!(results.is_empty());
381    }
382
383    // regression guard: mm==0 kernel thread produces no history entries
384    #[test]
385    fn skips_kernel_threads() {
386        let vaddr: u64 = 0xFFFF_8000_0010_0000;
387        let paddr: u64 = 0x0080_0000;
388        let mut data = vec![0u8; 4096];
389
390        // comm is "bash" but mm = NULL (kernel thread)
391        data[0..4].copy_from_slice(&0u32.to_le_bytes());
392        let tasks_addr = vaddr + 16;
393        data[16..24].copy_from_slice(&tasks_addr.to_le_bytes());
394        data[24..32].copy_from_slice(&tasks_addr.to_le_bytes());
395        data[32..36].copy_from_slice(b"bash");
396        data[48..56].copy_from_slice(&0u64.to_le_bytes()); // mm = NULL
397
398        let reader = make_test_reader(&data, vaddr, paddr, &[]);
399        let results = walk_bash_history(&reader).unwrap();
400
401        assert!(results.is_empty());
402    }
403
404    #[test]
405    fn is_printable_ascii_validates() {
406        assert!(is_printable_ascii(b"hello world"));
407        assert!(is_printable_ascii(b"ls -la /etc"));
408        assert!(is_printable_ascii(b"echo\t\"test\""));
409        assert!(!is_printable_ascii(b"")); // empty
410        assert!(!is_printable_ascii(b"\x01\x02")); // control chars
411        assert!(!is_printable_ascii(b"hello\x00world")); // embedded null
412    }
413
414    #[test]
415    fn parse_bash_timestamp_valid() {
416        assert_eq!(parse_bash_timestamp("#1700000000"), Some(1_700_000_000));
417        assert_eq!(parse_bash_timestamp("#0"), Some(0));
418        assert_eq!(parse_bash_timestamp("#999999999999"), Some(999_999_999_999));
419    }
420
421    #[test]
422    fn parse_bash_timestamp_invalid() {
423        assert_eq!(parse_bash_timestamp("1700000000"), None); // missing #
424        assert_eq!(parse_bash_timestamp("#abc"), None); // not digits
425        assert_eq!(parse_bash_timestamp("#"), None); // just hash
426        assert_eq!(parse_bash_timestamp(""), None); // empty
427    }
428
429    #[test]
430    fn missing_init_task_symbol() {
431        let isf = IsfBuilder::new()
432            .add_struct("task_struct", 64)
433            .add_field("task_struct", "pid", 0, "int")
434            .add_field("task_struct", "tasks", 8, "list_head")
435            .add_struct("list_head", 16)
436            .add_field("list_head", "next", 0, "pointer")
437            .add_field("list_head", "prev", 8, "pointer")
438            .build_json();
439
440        let resolver = IsfResolver::from_value(&isf).unwrap();
441        let (cr3, mem) = PageTableBuilder::new().build();
442        let vas = VirtualAddressSpace::new(mem, cr3, TranslationMode::X86_64FourLevel);
443        let reader = ObjectReader::new(vas, Box::new(resolver));
444
445        let result = walk_bash_history(&reader);
446        assert!(
447            matches!(result, Err(crate::Error::MissingKernelSymbol { ref name }) if name == "init_task"),
448            "expected MissingKernelSymbol {{name: \"init_task\"}}, got {result:?}"
449        );
450    }
451
452    #[test]
453    fn missing_tasks_field_returns_missing_field() {
454        let isf = IsfBuilder::new()
455            .add_struct("task_struct", 64)
456            .add_field("task_struct", "pid", 0, "int")
457            // tasks intentionally omitted
458            .add_struct("list_head", 16)
459            .add_field("list_head", "next", 0, "pointer")
460            .add_field("list_head", "prev", 8, "pointer")
461            .add_symbol("init_task", 0xFFFF_8000_0010_0000)
462            .build_json();
463        let resolver = IsfResolver::from_value(&isf).unwrap();
464        let (cr3, mem) = PageTableBuilder::new().build();
465        let vas = VirtualAddressSpace::new(mem, cr3, TranslationMode::X86_64FourLevel);
466        let reader: ObjectReader<SyntheticPhysMem> = ObjectReader::new(vas, Box::new(resolver));
467        let result = walk_bash_history(&reader);
468        assert!(
469            matches!(result, Err(crate::Error::MissingField { ref struct_name, ref field_name }) if struct_name == "task_struct" && field_name == "tasks"),
470            "expected MissingField task_struct.tasks, got {result:?}"
471        );
472    }
473}