1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
//! Loading executable binaries into Falcon.
//!
//! ```
//! # use falcon::error::*;
//! use falcon::loader::Elf;
//! use falcon::loader::Loader;
//! use std::path::Path;
//!
//! # fn example () -> Result<()> {
//! // Load an elf for analysis
//! let elf = Elf::from_file(Path::new("test_binaries/simple-0/simple-0"))?;
//! // Lift a program from the elf
//! let program = elf.program()?;
//! for function in program.functions() {
//!     println!("0x{:08x}: {}", function.address(), function.name());
//! }
//! # Ok(())
//! # }
//! ```

use crate::architecture::Architecture;
use crate::error::*;
use crate::executor::eval;
use crate::il;
use crate::memory;
use std::any::Any;
use std::collections::{HashMap, HashSet};
use std::fmt;

mod elf;
mod json;
mod pe;
mod symbol;

pub use self::elf::*;
pub use self::json::*;
pub use self::pe::*;
pub use self::symbol::Symbol;

/// A declared entry point for a function.
#[derive(Clone, Debug, PartialEq)]
pub struct FunctionEntry {
    address: u64,
    name: Option<String>,
}

impl FunctionEntry {
    /// Create a new `FunctionEntry`.
    ///
    /// If no name is provided: `sup_{:X}` will be used to name the function.
    pub fn new(address: u64, name: Option<String>) -> FunctionEntry {
        FunctionEntry {
            address: address,
            name: name,
        }
    }

    /// Get the address for this `FunctionEntry`.
    pub fn address(&self) -> u64 {
        self.address
    }

    /// Get the name for this `FunctionEntry`.
    pub fn name(&self) -> Option<&str> {
        self.name.as_ref().map(|s| s.as_str())
    }
}

impl fmt::Display for FunctionEntry {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        match self.name {
            Some(ref name) => write!(f, "FunctionEntry({} -> 0x{:X})", name, self.address),
            None => write!(f, "FunctionEntry(0x{:X})", self.address),
        }
    }
}

/// Generic trait for all loaders
pub trait Loader: fmt::Debug + Send + Sync {
    /// Get a model of the memory contained in the binary
    fn memory(&self) -> Result<memory::backing::Memory>;

    /// Get addresses for known function entries
    fn function_entries(&self) -> Result<Vec<FunctionEntry>>;

    /// The address program execution should begin at
    fn program_entry(&self) -> u64;

    /// Get the architecture of the binary
    fn architecture(&self) -> &dyn Architecture;

    /// Lift just one function from the executable
    fn function(&self, address: u64) -> Result<il::Function> {
        let translator = self.architecture().translator();
        let memory = self.memory()?;
        Ok(translator.translate_function(&memory, address)?)
    }

    /// Cast loader to `Any`
    fn as_any(&self) -> &dyn Any;

    /// Get the symbols for this loader
    fn symbols(&self) -> Vec<Symbol>;

    /// Get the symbols as a hashmap by address
    fn symbols_map(&self) -> HashMap<u64, Symbol> {
        self.symbols()
            .into_iter()
            .map(|symbol| (symbol.address(), symbol))
            .collect()
    }

    /// Lift executable into an il::Program.
    ///
    /// Individual functions which fail to lift are omitted and ignored.
    fn program(&self) -> Result<il::Program> {
        Ok(self.program_verbose()?.0)
    }

    /// Lift executable into an `il::Program`.
    ///
    /// Errors encountered while lifting specific functions are collected, and
    /// returned with the `FunctionEntry` identifying the function. Only
    /// catastrophic errors should cause this function call to fail.
    fn program_verbose(
        &self,
    ) -> std::result::Result<(il::Program, Vec<(FunctionEntry, Error)>), Error> {
        // Get out architecture-specific translator
        let translator = self.architecture().translator();

        // Create a mapping of the file memory
        let memory = self.memory()?;

        let mut program = il::Program::new();

        let mut translation_errors: Vec<(FunctionEntry, Error)> = Vec::new();

        for function_entry in self.function_entries()? {
            let address = function_entry.address();
            // Ensure this memory is marked executable
            if memory
                .permissions(address)
                .map_or(false, |p| p.contains(memory::MemoryPermissions::EXECUTE))
            {
                match translator.translate_function(&memory, address) {
                    Ok(mut function) => {
                        function.set_name(function_entry.name().map(|n| n.to_string()));
                        program.add_function(function);
                    }
                    Err(e) => translation_errors.push((function_entry.clone(), e)),
                };
            }
        }

        Ok((program, translation_errors))
    }

    /// Lift executable into an `il::Program`, while recursively resolving branch
    /// targets into functions.
    ///
    /// program_recursive silently drops any functions that cause lifting
    /// errors. If you care about those, use `program_recursive_verbose`.
    fn program_recursive(&self) -> Result<il::Program> {
        Ok(self.program_recursive_verbose()?.0)
    }

    /// Lift executable into an `il::Program`, while recursively resolving branch
    /// targets into functions.
    ///
    /// Works in a similar manner to `program_recursive`
    fn program_recursive_verbose(
        &self,
    ) -> std::result::Result<(il::Program, Vec<(FunctionEntry, Error)>), Error> {
        fn call_targets(function: &il::Function) -> Result<Vec<u64>> {
            let call_targets =
                function
                    .blocks()
                    .iter()
                    .fold(Vec::new(), |mut call_targets, block| {
                        block.instructions().iter().for_each(|instruction| {
                            match *instruction.operation() {
                                il::Operation::Branch { ref target } => {
                                    eval(target).ok().map(|constant| {
                                        call_targets.push(constant.value_u64().unwrap())
                                    });
                                }
                                _ => {}
                            }
                        });
                        call_targets
                    });
            Ok(call_targets)
        }

        let (mut program, mut translation_errors) = self.program_verbose()?;
        let mut processed = HashSet::new();

        loop {
            // Get the address of every function currently in the program
            let function_addresses = program
                .functions()
                .into_iter()
                .map(|function| function.address())
                .collect::<Vec<u64>>();

            let addresses = {
                // For every function in the program which is not currentl a
                // member of our processed set
                let functions = program
                    .functions()
                    .into_iter()
                    .filter(|function| !processed.contains(&function.address()))
                    .collect::<Vec<&il::Function>>();

                // Insert this function into the processed set
                functions.iter().for_each(|function| {
                    processed.insert(function.address());
                });

                // Collect the call targets in all functions that have not yet
                // been processed, and filter them against the functions already
                // in program.
                let addresses = functions
                    .into_iter()
                    .fold(HashSet::new(), |mut targets, function| {
                        call_targets(function)
                            .unwrap()
                            .into_iter()
                            .for_each(|target| {
                                targets.insert(target);
                            });
                        targets
                    })
                    .into_iter()
                    .filter(|address| !function_addresses.contains(address))
                    .collect::<Vec<u64>>();

                if addresses.is_empty() {
                    break;
                }

                addresses
            };

            // For each address, attempt to lift a function
            for address in addresses {
                match self.function(address) {
                    Ok(function) => program.add_function(function),
                    Err(e) => {
                        let function_entry = FunctionEntry::new(address, None);
                        translation_errors.push((function_entry, e));
                    }
                }
            }
        }

        Ok((program, translation_errors))
    }
}