Skip to main content

memf_linux/
elfinfo.rs

1//! Linux ELF header extraction from process memory.
2//!
3//! Walks process VMAs and checks for the ELF magic (`\x7fELF`) at the
4//! start of file-backed regions. Extracts ELF header fields to identify
5//! loaded binaries and shared libraries.
6
7use memf_core::object_reader::ObjectReader;
8use memf_format::PhysicalMemoryProvider;
9
10use crate::{vma_walker::for_each_task_vma, ElfInfo, ElfType, Error, Result};
11
12/// ELF magic bytes.
13const ELF_MAGIC: [u8; 4] = [0x7f, b'E', b'L', b'F'];
14
15/// Minimum ELF header size (64-bit).
16const ELF64_HEADER_SIZE: usize = 64;
17
18/// Walk all process VMAs and extract ELF headers.
19///
20/// For each process, walks the VMA list and reads the first
21/// [`ELF64_HEADER_SIZE`] bytes from each region. Regions starting
22/// with the ELF magic are parsed and returned.
23pub fn walk_elfinfo<P: PhysicalMemoryProvider>(reader: &ObjectReader<P>) -> Result<Vec<ElfInfo>> {
24    let init_task_addr = reader
25        .symbols()
26        .symbol_address("init_task")
27        .ok_or_else(|| Error::MissingKernelSymbol {
28            name: "init_task".into(),
29        })?;
30
31    let tasks_offset = reader
32        .symbols()
33        .field_offset("task_struct", "tasks")
34        .ok_or_else(|| Error::MissingField {
35            struct_name: "task_struct".into(),
36            field_name: "tasks".into(),
37        })?;
38
39    let head_vaddr = init_task_addr + tasks_offset;
40    let task_addrs = reader.walk_list(head_vaddr, "task_struct", "tasks")?;
41
42    let mut results = Vec::new();
43
44    scan_process_elfs(reader, init_task_addr, &mut results);
45
46    for &task_addr in &task_addrs {
47        scan_process_elfs(reader, task_addr, &mut results);
48    }
49
50    Ok(results)
51}
52
53/// Scan a single process's VMAs for ELF headers.
54fn scan_process_elfs<P: PhysicalMemoryProvider>(
55    reader: &ObjectReader<P>,
56    task_addr: u64,
57    out: &mut Vec<ElfInfo>,
58) {
59    let pid: u32 = match reader.read_field(task_addr, "task_struct", "pid") {
60        Ok(v) => v,
61        Err(_) => return,
62    };
63    let comm = reader
64        .read_field_string(task_addr, "task_struct", "comm", 16)
65        .unwrap_or_default();
66
67    for_each_task_vma(reader, task_addr, &mut |e| {
68        // Read the first 64 bytes and check for ELF magic
69        if let Ok(header_bytes) = reader.read_bytes(e.start, ELF64_HEADER_SIZE) {
70            if let Some(info) = parse_elf64_header(&header_bytes, u64::from(pid), &comm, e.start) {
71                out.push(info);
72            }
73        }
74    });
75}
76
77/// Parse a 64-bit ELF header from raw bytes.
78///
79/// Returns `None` if the magic doesn't match or the header is too short.
80fn parse_elf64_header(bytes: &[u8], pid: u64, comm: &str, vma_start: u64) -> Option<ElfInfo> {
81    if bytes.len() < ELF64_HEADER_SIZE {
82        return None;
83    }
84    if bytes[0..4] != ELF_MAGIC {
85        return None;
86    }
87    // Verify ELFCLASS64 (e_ident[4] == 2)
88    if bytes[4] != 2 {
89        return None;
90    }
91
92    let e_type = bytes[16..18].try_into().map_or(0, u16::from_le_bytes);
93    let e_machine = bytes[18..20].try_into().map_or(0, u16::from_le_bytes);
94    let e_entry = bytes[24..32].try_into().map_or(0, u64::from_le_bytes);
95
96    Some(ElfInfo {
97        pid,
98        comm: comm.to_string(),
99        vma_start,
100        elf_type: ElfType::from_raw(e_type),
101        machine: e_machine,
102        entry_point: e_entry,
103    })
104}
105
106#[cfg(test)]
107mod tests {
108    use super::*;
109    use memf_core::test_builders::{flags as ptflags, PageTableBuilder, SyntheticPhysMem};
110    use memf_core::vas::{TranslationMode, VirtualAddressSpace};
111    use memf_symbols::isf::IsfResolver;
112    use memf_symbols::test_builders::IsfBuilder;
113
114    fn make_test_reader(
115        data: &[u8],
116        vaddr: u64,
117        paddr: u64,
118        extra_mappings: &[(u64, u64, &[u8])],
119    ) -> ObjectReader<SyntheticPhysMem> {
120        let isf = IsfBuilder::new()
121            .add_struct("task_struct", 128)
122            .add_field("task_struct", "pid", 0, "int")
123            .add_field("task_struct", "state", 4, "long")
124            .add_field("task_struct", "tasks", 16, "list_head")
125            .add_field("task_struct", "comm", 32, "char")
126            .add_field("task_struct", "mm", 48, "pointer")
127            .add_struct("list_head", 16)
128            .add_field("list_head", "next", 0, "pointer")
129            .add_field("list_head", "prev", 8, "pointer")
130            .add_struct("mm_struct", 128)
131            .add_field("mm_struct", "pgd", 0, "pointer")
132            .add_field("mm_struct", "mmap", 8, "pointer")
133            .add_struct("vm_area_struct", 64)
134            .add_field("vm_area_struct", "vm_start", 0, "unsigned long")
135            .add_field("vm_area_struct", "vm_end", 8, "unsigned long")
136            .add_field("vm_area_struct", "vm_next", 16, "pointer")
137            .add_field("vm_area_struct", "vm_flags", 24, "unsigned long")
138            .add_field("vm_area_struct", "vm_pgoff", 32, "unsigned long")
139            .add_field("vm_area_struct", "vm_file", 40, "pointer")
140            .add_symbol("init_task", vaddr)
141            .build_json();
142
143        let resolver = IsfResolver::from_value(&isf).unwrap();
144        let mut builder = PageTableBuilder::new()
145            .map_4k(vaddr, paddr, ptflags::WRITABLE)
146            .write_phys(paddr, data);
147
148        for &(ev, ep, edata) in extra_mappings {
149            builder = builder
150                .map_4k(ev, ep, ptflags::WRITABLE)
151                .write_phys(ep, edata);
152        }
153
154        let (cr3, mem) = builder.build();
155        let vas = VirtualAddressSpace::new(mem, cr3, TranslationMode::X86_64FourLevel);
156        ObjectReader::new(vas, Box::new(resolver))
157    }
158
159    /// Build a minimal ELF64 header for testing.
160    fn build_elf64_header(elf_type: u16, machine: u16, entry: u64) -> Vec<u8> {
161        let mut hdr = vec![0u8; 4096];
162        // e_ident
163        hdr[0..4].copy_from_slice(&ELF_MAGIC);
164        hdr[4] = 2; // ELFCLASS64
165        hdr[5] = 1; // ELFDATA2LSB
166        hdr[6] = 1; // EV_CURRENT
167                    // e_type (offset 16)
168        hdr[16..18].copy_from_slice(&elf_type.to_le_bytes());
169        // e_machine (offset 18)
170        hdr[18..20].copy_from_slice(&machine.to_le_bytes());
171        // e_version (offset 20)
172        hdr[20..24].copy_from_slice(&1u32.to_le_bytes());
173        // e_entry (offset 24)
174        hdr[24..32].copy_from_slice(&entry.to_le_bytes());
175        hdr
176    }
177
178    // regression guard: VMA with ELF magic at vm_start produces ElfInfo entry
179    #[test]
180    fn detects_elf_in_process_vma() {
181        let vaddr: u64 = 0xFFFF_8000_0010_0000;
182        let paddr: u64 = 0x0080_0000;
183        let mut data = vec![0u8; 4096];
184
185        // init_task (PID 1, "cat")
186        data[0..4].copy_from_slice(&1u32.to_le_bytes());
187        let tasks_addr = vaddr + 16;
188        data[16..24].copy_from_slice(&tasks_addr.to_le_bytes());
189        data[24..32].copy_from_slice(&tasks_addr.to_le_bytes());
190        data[32..35].copy_from_slice(b"cat");
191        let mm_addr = vaddr + 0x200;
192        data[48..56].copy_from_slice(&mm_addr.to_le_bytes());
193
194        // mm_struct
195        data[0x200..0x208].copy_from_slice(&0x1000u64.to_le_bytes()); // pgd
196        let vma_addr = vaddr + 0x300;
197        data[0x208..0x210].copy_from_slice(&vma_addr.to_le_bytes()); // mmap
198
199        // VMA: code segment (r-x, file-backed)
200        let code_vaddr: u64 = 0x0000_5555_0000_0000;
201        let code_paddr: u64 = 0x0090_0000;
202        data[0x300..0x308].copy_from_slice(&code_vaddr.to_le_bytes());
203        data[0x308..0x310].copy_from_slice(&(code_vaddr + 0x1000).to_le_bytes());
204        data[0x310..0x318].copy_from_slice(&0u64.to_le_bytes()); // vm_next = NULL
205        data[0x318..0x320].copy_from_slice(&0x5u64.to_le_bytes()); // r-x
206        data[0x328..0x330].copy_from_slice(&0xABCDu64.to_le_bytes()); // vm_file non-null
207
208        let elf = build_elf64_header(
209            3,  // ET_DYN (PIE executable)
210            62, // EM_X86_64
211            0x0000_5555_0000_1000,
212        );
213
214        let reader = make_test_reader(&data, vaddr, paddr, &[(code_vaddr, code_paddr, &elf)]);
215        let results = walk_elfinfo(&reader).unwrap();
216
217        assert_eq!(results.len(), 1);
218        assert_eq!(results[0].pid, 1);
219        assert_eq!(results[0].comm, "cat");
220        assert_eq!(results[0].elf_type, ElfType::SharedObject);
221        assert_eq!(results[0].machine, 62);
222        assert_eq!(results[0].entry_point, 0x0000_5555_0000_1000);
223    }
224
225    #[test]
226    fn skips_non_elf_regions() {
227        let vaddr: u64 = 0xFFFF_8000_0010_0000;
228        let paddr: u64 = 0x0080_0000;
229        let mut data = vec![0u8; 4096];
230
231        data[0..4].copy_from_slice(&1u32.to_le_bytes());
232        let tasks_addr = vaddr + 16;
233        data[16..24].copy_from_slice(&tasks_addr.to_le_bytes());
234        data[24..32].copy_from_slice(&tasks_addr.to_le_bytes());
235        data[32..36].copy_from_slice(b"test");
236        let mm_addr = vaddr + 0x200;
237        data[48..56].copy_from_slice(&mm_addr.to_le_bytes());
238
239        data[0x200..0x208].copy_from_slice(&0x1000u64.to_le_bytes());
240        let vma_addr = vaddr + 0x300;
241        data[0x208..0x210].copy_from_slice(&vma_addr.to_le_bytes());
242
243        // VMA with non-ELF data
244        let region_vaddr: u64 = 0x0000_5555_0000_0000;
245        let region_paddr: u64 = 0x0090_0000;
246        data[0x300..0x308].copy_from_slice(&region_vaddr.to_le_bytes());
247        data[0x308..0x310].copy_from_slice(&(region_vaddr + 0x1000).to_le_bytes());
248        data[0x310..0x318].copy_from_slice(&0u64.to_le_bytes());
249        data[0x318..0x320].copy_from_slice(&0x5u64.to_le_bytes());
250        data[0x328..0x330].copy_from_slice(&0xABCDu64.to_le_bytes());
251
252        let non_elf = vec![0xFFu8; 4096]; // garbage, not ELF
253
254        let reader = make_test_reader(
255            &data,
256            vaddr,
257            paddr,
258            &[(region_vaddr, region_paddr, &non_elf)],
259        );
260        let results = walk_elfinfo(&reader).unwrap();
261
262        assert!(results.is_empty());
263    }
264
265    #[test]
266    fn parse_elf64_header_validates_magic() {
267        let mut bad = vec![0u8; 64];
268        bad[0..4].copy_from_slice(b"NOPE");
269        assert!(parse_elf64_header(&bad, 1, "test", 0x1000).is_none());
270    }
271
272    #[test]
273    fn parse_elf64_header_too_short() {
274        let short = vec![0x7f, b'E', b'L', b'F']; // only 4 bytes
275        assert!(parse_elf64_header(&short, 1, "test", 0x1000).is_none());
276    }
277
278    #[test]
279    fn missing_init_task_symbol() {
280        let isf = IsfBuilder::new()
281            .add_struct("task_struct", 64)
282            .add_field("task_struct", "pid", 0, "int")
283            .add_field("task_struct", "tasks", 8, "list_head")
284            .add_struct("list_head", 16)
285            .add_field("list_head", "next", 0, "pointer")
286            .add_field("list_head", "prev", 8, "pointer")
287            .build_json();
288
289        let resolver = IsfResolver::from_value(&isf).unwrap();
290        let (cr3, mem) = PageTableBuilder::new().build();
291        let vas = VirtualAddressSpace::new(mem, cr3, TranslationMode::X86_64FourLevel);
292        let reader = ObjectReader::new(vas, Box::new(resolver));
293
294        let result = walk_elfinfo(&reader);
295        assert!(
296            matches!(result, Err(crate::Error::MissingKernelSymbol { ref name }) if name == "init_task"),
297            "expected MissingKernelSymbol {{name: \"init_task\"}}, got {result:?}"
298        );
299    }
300
301    #[test]
302    fn missing_tasks_field_returns_missing_field() {
303        let isf = IsfBuilder::new()
304            .add_struct("task_struct", 64)
305            .add_field("task_struct", "pid", 0, "int")
306            // tasks intentionally omitted
307            .add_struct("list_head", 16)
308            .add_field("list_head", "next", 0, "pointer")
309            .add_field("list_head", "prev", 8, "pointer")
310            .add_symbol("init_task", 0xFFFF_8000_0010_0000)
311            .build_json();
312        let resolver = IsfResolver::from_value(&isf).unwrap();
313        let (cr3, mem) = PageTableBuilder::new().build();
314        let vas = VirtualAddressSpace::new(mem, cr3, TranslationMode::X86_64FourLevel);
315        let reader: ObjectReader<SyntheticPhysMem> = ObjectReader::new(vas, Box::new(resolver));
316        let result = walk_elfinfo(&reader);
317        assert!(
318            matches!(result, Err(crate::Error::MissingField { ref struct_name, ref field_name }) if struct_name == "task_struct" && field_name == "tasks"),
319            "expected MissingField task_struct.tasks, got {result:?}"
320        );
321    }
322}