p8n_types/
loader.rs

1// Panopticon - A libre program analysis library for machine code
2// Copyright (C) 2014-2018  The Panopticon Developers
3//
4// This library is free software; you can redistribute it and/or
5// modify it under the terms of the GNU Lesser General Public
6// License as published by the Free Software Foundation; either
7// version 2.1 of the License, or (at your option) any later version.
8//
9// This library is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12// Lesser General Public License for more details.
13//
14// You should have received a copy of the GNU Lesser General Public
15// License along with this library; if not, write to the Free Software
16// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
17
18//! Loader for 32 and 64-bit ELF, PE, and Mach-o files.
19
20use std::fs::File;
21use std::path::Path;
22
23use memmap::{MmapOptions, Mmap};
24use goblin::{self, Hint, pe, elf, mach};
25use goblin::elf::program_header;
26use goblin::pe::section_table::SectionTable;
27
28use {Str, Region, Result};
29
30/// CPU the binary file is intended for.
31#[derive(Clone,Copy,Debug)]
32pub enum Machine {
33    /// 8-bit AVR
34    Avr,
35    /// AMD64
36    Amd64,
37    /// Intel x86
38    Ia32,
39}
40
41/// Named pointer inside a region.
42#[derive(Debug, Clone)]
43pub struct Pointer {
44    /// Region name
45    pub segment: Str,
46    /// Pointed-to name
47    pub name: Str,
48    /// Offset from the start of the region
49    pub offset: u64,
50}
51
52/// Binary file meta information. Starting point for disassembler routines.
53#[derive(Debug, Clone)]
54pub struct Content {
55    /// CPU ISA
56    pub machine: Machine,
57    /// Public functions
58    pub entry_points: Vec<Pointer>,
59    /// Regions
60    pub segments: Vec<Region>,
61    /// Imported functions
62    pub symbolic: Vec<Pointer>,
63}
64
65impl Content {
66    /// Load an ELF or PE file from disk and creates a `Project` from it. Returns the `Project` instance and
67    /// the CPU its intended for.
68    pub fn load(path: &Path) -> Result<Self> {
69        Self::load_all(path).and_then(|x| match x.into_iter().next() {
70            Some(x) => Ok(x),
71            None => Err(format!("Not a supported file format").into()),
72        })
73    }
74
75    /// Load all programs inside a ELF or PE file.
76    pub fn load_all(path: &Path) -> Result<Vec<Self>> {
77        let fd = File::open(path)?;
78        let map = unsafe { Mmap::map(&fd)? };
79        let mut magic = [0u8; 16];
80
81        magic.copy_from_slice(&map[0..16]);
82
83        match goblin::peek_bytes(&magic)? {
84            Hint::Unknown(magic) => Err(format!("Tried to load an unknown file. Magic: {}", magic).into()),
85            Hint::Elf(_) => Self::load_elf(&map, &fd, path).map(|x| vec![x]),
86            Hint::PE => Self::load_pe(&map, &fd, path).map(|x| vec![x]),
87            Hint::Mach(_) => Self::load_mach(&map, &fd, path).map(|x| vec![x]),
88            Hint::MachFat(_) => {
89                unimplemented!()
90            }
91            Hint::Archive => {
92                unimplemented!()
93            }
94        }
95    }
96
97    fn load_mach(map: &Mmap, fd: &File, path: &Path) -> Result<Content> {
98        let binary = mach::MachO::parse(&map[..], 0)?;
99        debug!("mach: {:#?}", &binary);
100        let mut base = 0x0;
101        let cputype = binary.header.cputype;
102        let (machine, addr_bits) = match cputype {
103            mach::cputype::CPU_TYPE_X86 => {
104                (Machine::Ia32, 32)
105            }
106            mach::cputype::CPU_TYPE_X86_64 => {
107                (Machine::Amd64, 64)
108            }
109            machine => {
110                return Err(
111                    format!(
112                        "Unsupported machine ({:#x})",
113                        machine,
114                        )
115                    .into()
116                    )
117            }
118        };
119        let mut regs = Vec::default();
120        let mut syms = Vec::default();
121        let mut entries = Vec::default();
122
123        for segment in &*binary.segments {
124            let offset = segment.fileoff as usize;
125            let filesize = segment.filesize as usize;
126            if offset + filesize > map.len() {
127                return Err(
128                    format!(
129                        "Failed to read segment: range {:?} greater than len {}",
130                        offset..offset + filesize,
131                        map.len()
132                        )
133                    .into()
134                    );
135            }
136            let start = segment.vmaddr;
137            let name = segment.name()?;
138
139            debug!(
140                "Load mach segment {:?}: {} bytes segment to {:#x}",
141                name,
142                segment.vmsize,
143                start
144                );
145
146            let reg = if filesize > 0 {
147                Self::load_section(name.to_string().into(), fd, path, filesize, addr_bits, offset, start)?
148            } else {
149                Region::undefined(name.to_string(), addr_bits, None)
150            };
151            regs.push(reg);
152
153            if name == "__TEXT" {
154                base = segment.vmaddr;
155                debug!("Setting vm address base to {:#x}", base);
156            }
157        }
158
159        let entry = binary.entry;
160
161        if entry != 0 {
162            match Self::resolve_reference(entry as u64, "(entry)", &regs) {
163                Some(e) => { entries.push(e); }
164                None => { /* do nothing */ }
165            }
166        }
167
168        for export in binary.exports()? {
169            if export.offset != 0 {
170                debug!("adding: {:?}", &export);
171
172                match Self::resolve_reference(export.offset as u64 + base, &export.name, &regs) {
173                    Some(e) => { entries.push(e); }
174                    None => { /* do nothing */ }
175                }
176            }
177        }
178
179        for import in binary.imports()? {
180            debug!("Import {}: {:#x}", import.name, import.offset);
181
182            match Self::resolve_reference(import.offset as u64, import.name, &regs) {
183                Some(e) => { syms.push(e); }
184                None => { /* do nothing */ }
185            }
186        }
187
188        let c = Content{
189            machine: machine,
190            entry_points: entries,
191            symbolic: syms,
192            segments: regs,
193        };
194        Ok(c)
195    }
196
197    /// Parses an ELF 32/64-bit binary from `bytes` and creates a `Project` from it. Returns the `Project` instance and
198    /// the CPU its intended for.
199    fn load_elf(map: &Mmap, fd: &File, path: &Path) -> Result<Content> {
200        use std::collections::HashSet;
201
202        let binary = elf::Elf::parse(&map[..])?;
203        let mut regs = vec![];
204        let mut entries: Vec<Pointer> = vec![];
205        let mut syms: Vec<Pointer> = vec![];
206
207        debug!("elf: {:#?}", &binary);
208
209        let (machine, addr_bits) = match binary.header.e_machine {
210            elf::header::EM_X86_64 => {
211                (Machine::Amd64, 64)
212            }
213            elf::header::EM_386 => {
214                (Machine::Ia32, 32)
215            }
216            elf::header::EM_AVR => {
217                (Machine::Avr, 22)
218            }
219            machine => return Err(format!("Unsupported machine: {}", machine).into()),
220        };
221
222
223        for (idx, ph) in binary.program_headers.iter().enumerate() {
224            if ph.p_type == program_header::PT_LOAD {
225                debug!("Load ELF {} bytes segment to {:#x}", ph.p_filesz, ph.p_vaddr);
226
227                let reg = Self::load_section(format!("sec{}", idx).into(), fd, path, ph.p_filesz as usize, addr_bits, ph.p_offset as usize, ph.p_vaddr)?;
228                regs.push(reg);
229            }
230        }
231        let mut seen_syms = HashSet::<u64>::new();
232
233        // add dynamic symbol information (non-strippable)
234        for sym in &binary.dynsyms {
235            let name = &binary.dynstrtab[sym.st_name];
236
237            Self::add_elf_symbol(&sym, name, &regs, &mut syms, &mut entries);
238            seen_syms.insert(sym.st_value);
239
240            let name = &binary.dynstrtab[sym.st_name];
241            if !Self::resolve_elf_import_address(&binary.pltrelocs, name, &regs, &binary, &mut syms) {
242                if sym.is_function() {
243                    if !Self::resolve_elf_import_address(&binary.dynrelas, name, &regs, &binary, &mut syms) {
244                        Self::resolve_elf_import_address(&binary.dynrels, name, &regs, &binary, &mut syms);
245                    }
246                }
247            }
248        }
249
250        // add strippable symbol information
251        for sym in &binary.syms {
252            let name = &binary.strtab[sym.st_name];
253            if !seen_syms.contains(&sym.st_value) {
254                Self::add_elf_symbol(&sym, &name, &regs, &mut syms, &mut entries);
255            }
256            seen_syms.insert(sym.st_value);
257        }
258
259        // binary entry point
260        match Self::resolve_reference(binary.entry, "(entry)", &regs) {
261            Some(e) => { entries.push(e); }
262            None => { /* do nothing */ }
263        }
264
265        let c = Content{
266            machine: machine,
267            entry_points: entries,
268            symbolic: syms,
269            segments: regs,
270        };
271        Ok(c)
272    }
273
274    fn add_elf_symbol(sym: &elf::Sym, name: &str, regs: &[Region], syms: &mut Vec<Pointer>, entries: &mut Vec<Pointer>) {
275        let name = name.to_string();
276        let addr = sym.st_value;
277
278        debug!("Symbol: {} @ 0x{:x}: {:?}", name, addr, sym);
279
280        if sym.is_function() {
281            match Self::resolve_reference(addr, &name, &regs) {
282                Some(e) =>
283                    if sym.is_import() { syms.push(e); }
284                    else { entries.push(e); },
285                None => { /* do nothing */ }
286            }
287        }
288    }
289
290    fn resolve_elf_import_address(relocs: &elf::RelocSection, name: &str, regs: &[Region], binary: &elf::Elf, syms: &mut Vec<Pointer>) -> bool {
291        for reloc in relocs.iter() {
292            if let Some(pltsym) = &binary.dynsyms.get(reloc.r_sym) {
293                let pltname = &binary.dynstrtab[pltsym.st_name];
294                if pltname == name {
295                    debug!("Import match {}: {:#x} {:?}", name, reloc.r_offset, pltsym);
296
297                    match Self::resolve_reference(reloc.r_offset as u64, name.into(), &regs) {
298                        Some(e) => { syms.push(e); }
299                        None => { /* do nothing */ }
300                    }
301                    return true;
302                }
303            }
304        }
305        false
306    }
307
308    /// Parses a PE32/PE32+ file from `bytes` and create a project from it.
309    fn load_pe(map: &Mmap, fd: &File, path: &Path) -> Result<Self> {
310        let pe = pe::PE::parse(&map[..])?;
311        let image_base = pe.image_base as u64;
312        let mut regs = vec![];
313        let mut entries = vec![];
314        let entry = (pe.image_base + pe.entry) as u64;
315        let size = fd.metadata()?.len();
316        let address_bits = if pe.is_64 { 64 } else { 32 };
317
318        debug!("loading PE: {:#?}", &pe);
319
320        // alloc. segment for each section
321        for section in &pe.sections {
322            debug!("loading PE section: {:?}", section.name);
323
324            let ret = Self::load_pe_section(section, fd, path, size as usize, image_base, address_bits)?;
325            let addr = section.virtual_address as u64 + pe.image_base as u64;
326
327            if entry >= addr && entry < addr + section.virtual_size as u64 {
328                let ent = Pointer{
329                    segment: ret.name().clone(),
330                    name: "main".into(),
331                    offset: entry,
332                };
333                entries.push(ent);
334            }
335
336            regs.push(ret);
337        }
338
339        debug!("PE file entry at {:#x}", entry);
340
341        // add exported functions as entry points
342        for (i, export) in pe.exports.iter().enumerate() {
343            debug!("adding PE export: {:?}", &export);
344
345            let nam = export.name.map(|x| x.to_string()).unwrap_or(format!("exp{}", i));
346            match Self::resolve_reference(export.rva as u64 + image_base, &nam, &regs) {
347                Some(e) => {
348                    entries.push(e);
349                }
350                None => {
351                    error!("PE export {:?} at {:#x} not mapped", export.name, export.rva);
352                }
353            }
354        }
355
356        let mut syms = vec![];
357
358        // add imports as symbolic functions
359        for import in pe.imports {
360            debug!("adding PE import: {:?}", &import);
361
362            match Self::resolve_reference(import.rva as u64 + image_base, &import.name, &regs) {
363                Some(e) => {
364                    syms.push(e);
365                }
366                None => {
367                    error!("PE import {} at {:#x} not mapped", import.name, import.rva);
368                }
369            }
370        }
371
372        let c = Content{
373            machine: Machine::Ia32,
374            entry_points: entries,
375            symbolic: syms,
376            segments: regs,
377        };
378        Ok(c)
379    }
380
381    fn load_pe_section(sec: &SectionTable, fd: &File, path: &Path, fsize: usize, image_base: u64, address_bits: usize) -> Result<Region> {
382        let voffset = sec.virtual_address as u64 + image_base;
383        let vsize = sec.virtual_size as u64;
384        let offset = sec.pointer_to_raw_data as usize;
385        let size = sec.size_of_raw_data as usize;
386        let name = String::from_utf8(sec.name[..].to_vec())?;
387
388        if size > 0 {
389            if offset + size > fsize {
390                return Err(format!("PE section out of range: {:#x} + {:#x} >= {:#x}",offset,size,fsize).into());
391            }
392
393            // XXX: implemented larger vsize in Region
394            debug!("PE section {} mapped from {:?} to {:?}", name, offset..offset + size, voffset..voffset + size as u64);
395            Self::load_section(name.into(), fd, path, size, address_bits, offset, voffset)
396        } else {
397            debug!("PE section {} mapped to {:?}", name, voffset..voffset + vsize);
398            Ok(Region::undefined(name, address_bits, None))
399        }
400    }
401
402    fn resolve_reference(addr: u64, name: &str, regs: &[Region]) -> Option<Pointer> {
403        for r in regs {
404            if r.in_range(addr..addr + 1) {
405                return Some(Pointer{
406                    segment: r.name().clone(),
407                    name: name.to_string().into(),
408                    offset: addr,
409                });
410            }
411        }
412
413        None
414    }
415
416    fn load_section(name: Str, fd: &File, path: &Path, size: usize, address_bits: usize, file_offset: usize, load_offset: u64) -> Result<Region> {
417        let mmap = unsafe {
418            MmapOptions::new()
419                .len(size)
420                .offset(file_offset)
421                .map(fd)?
422        };
423        Ok(Region::from_mmap(name, address_bits, mmap, path.to_path_buf(), file_offset as u64, load_offset, None))
424    }
425}
426
427#[cfg(test)]
428mod tests {
429    use super::*;
430
431    #[test]
432    fn pe() {
433        use std::path::Path;
434
435        let p = format!("{}/tests/data/test.exe", env!("CARGO_MANIFEST_DIR"));
436        let c = Content::load(Path::new(&p)).unwrap();
437
438        println!("{:?}", c);
439    }
440
441    #[test]
442    fn elf() {
443        use std::path::Path;
444
445        let p = format!("{}/tests/data/dynamic-32", env!("CARGO_MANIFEST_DIR"));
446        let c = Content::load(Path::new(&p)).unwrap();
447
448        println!("{:?}", c);
449    }
450
451    #[test]
452    fn mach() {
453        use std::path::Path;
454
455        let p = format!("{}/tests/data/deadbeef.mach", env!("CARGO_MANIFEST_DIR"));
456        let c = Content::load(Path::new(&p)).unwrap();
457
458        println!("{:?}", c);
459    }
460
461}