use memf_core::object_reader::ObjectReader;
use memf_format::PhysicalMemoryProvider;
use crate::{vma_walker::for_each_task_vma, BashHistoryInfo, Error, Result};
const MAX_HEAP_SCAN: u64 = 1024 * 1024;
const MAX_COMMAND_LEN: usize = 4096;
pub fn walk_bash_history<P: PhysicalMemoryProvider>(
reader: &ObjectReader<P>,
) -> Result<Vec<BashHistoryInfo>> {
let init_task_addr = reader
.symbols()
.symbol_address("init_task")
.ok_or_else(|| Error::MissingKernelSymbol {
name: "init_task".into(),
})?;
let tasks_offset = reader
.symbols()
.field_offset("task_struct", "tasks")
.ok_or_else(|| Error::MissingField {
struct_name: "task_struct".into(),
field_name: "tasks".into(),
})?;
let head_vaddr = init_task_addr + tasks_offset;
let task_addrs = reader.walk_list(head_vaddr, "task_struct", "tasks")?;
let mut results = Vec::new();
scan_process_history(reader, init_task_addr, &mut results);
for &task_addr in &task_addrs {
scan_process_history(reader, task_addr, &mut results);
}
Ok(results)
}
fn scan_process_history<P: PhysicalMemoryProvider>(
reader: &ObjectReader<P>,
task_addr: u64,
out: &mut Vec<BashHistoryInfo>,
) {
let Ok(comm) = reader.read_field_string(task_addr, "task_struct", "comm", 16) else {
return;
};
if comm != "bash" {
return;
}
let pid: u32 = match reader.read_field(task_addr, "task_struct", "pid") {
Ok(v) => v,
Err(_) => return,
};
let mut vma_ranges: Vec<(u64, u64)> = Vec::new();
let mut heap_regions: Vec<(u64, u64)> = Vec::new();
for_each_task_vma(reader, task_addr, &mut |e| {
vma_ranges.push((e.start, e.end));
if e.file_ptr == 0 && e.flags.read && e.flags.write && !e.flags.exec {
heap_regions.push((e.start, e.end));
}
});
let mut index = 0u64;
for &(start, end) in &heap_regions {
let size = (end - start).min(MAX_HEAP_SCAN) as usize;
let Ok(data) = reader.read_bytes(start, size) else {
continue;
};
scan_heap_for_entries(
reader,
&data,
&vma_ranges,
u64::from(pid),
&comm,
&mut index,
out,
);
}
}
fn scan_heap_for_entries<P: PhysicalMemoryProvider>(
reader: &ObjectReader<P>,
data: &[u8],
vma_ranges: &[(u64, u64)],
pid: u64,
comm: &str,
index: &mut u64,
out: &mut Vec<BashHistoryInfo>,
) {
if data.len() < 24 {
return;
}
let limit = data.len() - 23;
let mut off = 0;
while off < limit {
let line_ptr = data[off..off + 8].try_into().map_or(0, u64::from_le_bytes);
let ts_ptr = data[off + 8..off + 16]
.try_into()
.map_or(0, u64::from_le_bytes);
if line_ptr == 0 || !addr_in_vmas(line_ptr, vma_ranges) {
off += 8;
continue;
}
if ts_ptr != 0 && !addr_in_vmas(ts_ptr, vma_ranges) {
off += 8;
continue;
}
let Ok(line_str) = reader.read_string(line_ptr, MAX_COMMAND_LEN) else {
off += 8;
continue;
};
if line_str.is_empty() || !is_printable_ascii(line_str.as_bytes()) {
off += 8;
continue;
}
let timestamp = if ts_ptr != 0 {
reader
.read_string(ts_ptr, 32)
.ok()
.and_then(|s| parse_bash_timestamp(&s))
} else {
None
};
if ts_ptr != 0 && timestamp.is_none() {
off += 8;
continue;
}
out.push(BashHistoryInfo {
pid,
comm: comm.to_string(),
command: line_str,
timestamp,
index: *index,
});
*index += 1;
off += 24;
}
}
fn addr_in_vmas(addr: u64, ranges: &[(u64, u64)]) -> bool {
ranges
.iter()
.any(|&(start, end)| addr >= start && addr < end)
}
fn is_printable_ascii(bytes: &[u8]) -> bool {
!bytes.is_empty()
&& bytes
.iter()
.all(|&b| b == b'\t' || (0x20..=0x7E).contains(&b))
}
fn parse_bash_timestamp(s: &str) -> Option<i64> {
let digits = s.strip_prefix('#')?;
if digits.is_empty() {
return None;
}
digits.parse::<i64>().ok()
}
#[cfg(test)]
mod tests {
use super::*;
use memf_core::test_builders::{flags as ptflags, PageTableBuilder, SyntheticPhysMem};
use memf_core::vas::{TranslationMode, VirtualAddressSpace};
use memf_symbols::isf::IsfResolver;
use memf_symbols::test_builders::IsfBuilder;
fn make_test_reader(
data: &[u8],
vaddr: u64,
paddr: u64,
extra_mappings: &[(u64, u64, &[u8])],
) -> ObjectReader<SyntheticPhysMem> {
let isf = IsfBuilder::new()
.add_struct("task_struct", 128)
.add_field("task_struct", "pid", 0, "int")
.add_field("task_struct", "state", 4, "long")
.add_field("task_struct", "tasks", 16, "list_head")
.add_field("task_struct", "comm", 32, "char")
.add_field("task_struct", "mm", 48, "pointer")
.add_struct("list_head", 16)
.add_field("list_head", "next", 0, "pointer")
.add_field("list_head", "prev", 8, "pointer")
.add_struct("mm_struct", 128)
.add_field("mm_struct", "pgd", 0, "pointer")
.add_field("mm_struct", "mmap", 8, "pointer")
.add_struct("vm_area_struct", 64)
.add_field("vm_area_struct", "vm_start", 0, "unsigned long")
.add_field("vm_area_struct", "vm_end", 8, "unsigned long")
.add_field("vm_area_struct", "vm_next", 16, "pointer")
.add_field("vm_area_struct", "vm_flags", 24, "unsigned long")
.add_field("vm_area_struct", "vm_pgoff", 32, "unsigned long")
.add_field("vm_area_struct", "vm_file", 40, "pointer")
.add_symbol("init_task", vaddr)
.build_json();
let resolver = IsfResolver::from_value(&isf).unwrap();
let mut builder = PageTableBuilder::new()
.map_4k(vaddr, paddr, ptflags::WRITABLE)
.write_phys(paddr, data);
for &(ev, ep, edata) in extra_mappings {
builder = builder
.map_4k(ev, ep, ptflags::WRITABLE)
.write_phys(ep, edata);
}
let (cr3, mem) = builder.build();
let vas = VirtualAddressSpace::new(mem, cr3, TranslationMode::X86_64FourLevel);
ObjectReader::new(vas, Box::new(resolver))
}
fn build_heap_with_history(heap_vaddr: u64) -> Vec<u8> {
let mut heap = vec![0u8; 4096];
let strings: &[(&[u8], usize)] = &[
(b"ls -la\0", 0x000),
(b"#1700000000\0", 0x010),
(b"whoami\0", 0x020),
(b"#1700000001\0", 0x030),
(b"cat /etc/shadow\0", 0x040),
(b"#1700000002\0", 0x050),
];
for &(s, off) in strings {
heap[off..off + s.len()].copy_from_slice(s);
}
let entries: &[(u64, u64)] = &[
(heap_vaddr, heap_vaddr + 0x010), (heap_vaddr + 0x020, heap_vaddr + 0x030), (heap_vaddr + 0x040, heap_vaddr + 0x050), ];
let mut off = 0x100;
for &(line_ptr, ts_ptr) in entries {
heap[off..off + 8].copy_from_slice(&line_ptr.to_le_bytes());
heap[off + 8..off + 16].copy_from_slice(&ts_ptr.to_le_bytes());
heap[off + 16..off + 24].copy_from_slice(&0u64.to_le_bytes()); off += 24;
}
heap
}
#[test]
fn recovers_bash_history_from_heap() {
let vaddr: u64 = 0xFFFF_8000_0010_0000;
let paddr: u64 = 0x0080_0000;
let mut data = vec![0u8; 4096];
data[0..4].copy_from_slice(&42u32.to_le_bytes());
let tasks_addr = vaddr + 16;
data[16..24].copy_from_slice(&tasks_addr.to_le_bytes()); data[24..32].copy_from_slice(&tasks_addr.to_le_bytes()); data[32..36].copy_from_slice(b"bash");
let mm_addr = vaddr + 0x200;
data[48..56].copy_from_slice(&mm_addr.to_le_bytes());
data[0x200..0x208].copy_from_slice(&0x1000u64.to_le_bytes()); let vma_addr = vaddr + 0x300;
data[0x208..0x210].copy_from_slice(&vma_addr.to_le_bytes());
let heap_vaddr: u64 = 0x0000_5555_0000_0000;
let heap_paddr: u64 = 0x0090_0000;
data[0x300..0x308].copy_from_slice(&heap_vaddr.to_le_bytes()); data[0x308..0x310].copy_from_slice(&(heap_vaddr + 0x1000).to_le_bytes()); data[0x310..0x318].copy_from_slice(&0u64.to_le_bytes()); data[0x318..0x320].copy_from_slice(&0x3u64.to_le_bytes()); data[0x320..0x328].copy_from_slice(&0u64.to_le_bytes()); data[0x328..0x330].copy_from_slice(&0u64.to_le_bytes());
let heap = build_heap_with_history(heap_vaddr);
let reader = make_test_reader(&data, vaddr, paddr, &[(heap_vaddr, heap_paddr, &heap)]);
let results = walk_bash_history(&reader).unwrap();
assert_eq!(results.len(), 3);
assert_eq!(results[0].pid, 42);
assert_eq!(results[0].comm, "bash");
assert_eq!(results[0].command, "ls -la");
assert_eq!(results[0].timestamp, Some(1_700_000_000));
assert_eq!(results[1].command, "whoami");
assert_eq!(results[1].timestamp, Some(1_700_000_001));
assert_eq!(results[2].command, "cat /etc/shadow");
assert_eq!(results[2].timestamp, Some(1_700_000_002));
}
#[test]
fn skips_non_bash_processes() {
let vaddr: u64 = 0xFFFF_8000_0010_0000;
let paddr: u64 = 0x0080_0000;
let mut data = vec![0u8; 4096];
data[0..4].copy_from_slice(&1u32.to_le_bytes());
let tasks_addr = vaddr + 16;
data[16..24].copy_from_slice(&tasks_addr.to_le_bytes());
data[24..32].copy_from_slice(&tasks_addr.to_le_bytes());
data[32..37].copy_from_slice(b"nginx");
let mm_addr = vaddr + 0x200;
data[48..56].copy_from_slice(&mm_addr.to_le_bytes());
data[0x200..0x208].copy_from_slice(&0x1000u64.to_le_bytes());
data[0x208..0x210].copy_from_slice(&0u64.to_le_bytes());
let reader = make_test_reader(&data, vaddr, paddr, &[]);
let results = walk_bash_history(&reader).unwrap();
assert!(results.is_empty());
}
#[test]
fn skips_kernel_threads() {
let vaddr: u64 = 0xFFFF_8000_0010_0000;
let paddr: u64 = 0x0080_0000;
let mut data = vec![0u8; 4096];
data[0..4].copy_from_slice(&0u32.to_le_bytes());
let tasks_addr = vaddr + 16;
data[16..24].copy_from_slice(&tasks_addr.to_le_bytes());
data[24..32].copy_from_slice(&tasks_addr.to_le_bytes());
data[32..36].copy_from_slice(b"bash");
data[48..56].copy_from_slice(&0u64.to_le_bytes());
let reader = make_test_reader(&data, vaddr, paddr, &[]);
let results = walk_bash_history(&reader).unwrap();
assert!(results.is_empty());
}
#[test]
fn is_printable_ascii_validates() {
assert!(is_printable_ascii(b"hello world"));
assert!(is_printable_ascii(b"ls -la /etc"));
assert!(is_printable_ascii(b"echo\t\"test\""));
assert!(!is_printable_ascii(b"")); assert!(!is_printable_ascii(b"\x01\x02")); assert!(!is_printable_ascii(b"hello\x00world")); }
#[test]
fn parse_bash_timestamp_valid() {
assert_eq!(parse_bash_timestamp("#1700000000"), Some(1_700_000_000));
assert_eq!(parse_bash_timestamp("#0"), Some(0));
assert_eq!(parse_bash_timestamp("#999999999999"), Some(999_999_999_999));
}
#[test]
fn parse_bash_timestamp_invalid() {
assert_eq!(parse_bash_timestamp("1700000000"), None); assert_eq!(parse_bash_timestamp("#abc"), None); assert_eq!(parse_bash_timestamp("#"), None); assert_eq!(parse_bash_timestamp(""), None); }
#[test]
fn missing_init_task_symbol() {
let isf = IsfBuilder::new()
.add_struct("task_struct", 64)
.add_field("task_struct", "pid", 0, "int")
.add_field("task_struct", "tasks", 8, "list_head")
.add_struct("list_head", 16)
.add_field("list_head", "next", 0, "pointer")
.add_field("list_head", "prev", 8, "pointer")
.build_json();
let resolver = IsfResolver::from_value(&isf).unwrap();
let (cr3, mem) = PageTableBuilder::new().build();
let vas = VirtualAddressSpace::new(mem, cr3, TranslationMode::X86_64FourLevel);
let reader = ObjectReader::new(vas, Box::new(resolver));
let result = walk_bash_history(&reader);
assert!(
matches!(result, Err(crate::Error::MissingKernelSymbol { ref name }) if name == "init_task"),
"expected MissingKernelSymbol {{name: \"init_task\"}}, got {result:?}"
);
}
#[test]
fn missing_tasks_field_returns_missing_field() {
let isf = IsfBuilder::new()
.add_struct("task_struct", 64)
.add_field("task_struct", "pid", 0, "int")
.add_struct("list_head", 16)
.add_field("list_head", "next", 0, "pointer")
.add_field("list_head", "prev", 8, "pointer")
.add_symbol("init_task", 0xFFFF_8000_0010_0000)
.build_json();
let resolver = IsfResolver::from_value(&isf).unwrap();
let (cr3, mem) = PageTableBuilder::new().build();
let vas = VirtualAddressSpace::new(mem, cr3, TranslationMode::X86_64FourLevel);
let reader: ObjectReader<SyntheticPhysMem> = ObjectReader::new(vas, Box::new(resolver));
let result = walk_bash_history(&reader);
assert!(
matches!(result, Err(crate::Error::MissingField { ref struct_name, ref field_name }) if struct_name == "task_struct" && field_name == "tasks"),
"expected MissingField task_struct.tasks, got {result:?}"
);
}
}