haybale 0.7.1

Symbolic execution of LLVM IR, written in Rust
Documentation
use crate::backend::{Backend, SolverRef};
use crate::function_hooks::FunctionHook;
use llvm_ir::module::{GlobalVariable, Linkage};
use llvm_ir::*;
use log::{debug, warn};
use std::cell::Cell;
use std::collections::hash_map::Entry;
use std::collections::HashMap;
use std::fmt;
use std::hash::{Hash, Hasher};

/// `GlobalAllocations` is responsible for keeping track of which global variable
/// names in which modules resolve to which addresses.
///
/// It has to take into account both module-private and public definitions, of
/// both the strong and weak varieties.
#[derive(Clone)]
pub(crate) struct GlobalAllocations<'p, B: Backend> {
    /// Map from `Name`s of global variables and `Function`s, to either
    /// "strong" or "weak" `GlobalAllocation`s.
    /// See notes on [`Definition`](enum.Definition.html).
    allocated_globals: HashMap<Name, Definition<GlobalAllocation<'p, B::BV>>>,
    /// Map from `FunctionHook`s to addresses at which they are allocated.
    /// Currently, `FunctionHook` definitions are always "strong".
    allocated_hooks: HashMap<FunctionHook<'p, B>, B::BV>,
    /// Somewhat a reverse of the above two maps: this is a map from an address
    /// to the `Callable` which was allocated at that address (if any)
    addr_to_function: HashMap<u64, Callable<'p, B>>,
    /// While `allocated_globals` is for "public" (non-module-private) globals,
    /// this is a similar map for module-private globals.
    /// It maps module names to maps of global names to `GlobalAllocation`s.
    /// Module-private definitions are always strong; they can never be weak.
    module_private_allocated_globals: HashMap<String, HashMap<Name, GlobalAllocation<'p, B::BV>>>,
    /// This is to `module_private_allocated_globals` as `addr_to_function` is
    /// to `allocated_globals`
    module_private_addr_to_function: HashMap<String, HashMap<u64, Callable<'p, B>>>,
}

#[derive(Clone)]
pub(crate) enum GlobalAllocation<'p, V> {
    GlobalVariable {
        /// The address at which the global variable is allocated
        addr: V,
        /// The initializer associated with the global variable
        initializer: ConstantRef,
        /// Whether the global variable has been initialized yet
        initialized: Cell<bool>,
    },
    Function {
        /// The prevailing definition of the `Function`
        func: &'p Function,
        /// The `Module` in which the prevailing definition of the `Function` was found
        module: &'p Module,
        /// The address at which the `Function` is allocated
        addr: V,
    },
}

impl<'p, V> GlobalAllocation<'p, V> {
    pub fn get_addr(&self) -> &V {
        match self {
            Self::GlobalVariable { addr, .. } => addr,
            Self::Function { addr, .. } => addr,
        }
    }

    fn set_addr(&mut self, new_addr: V) {
        match self {
            Self::GlobalVariable { addr, .. } => *addr = new_addr,
            Self::Function { addr, .. } => *addr = new_addr,
        }
    }
}

/// Strong and weak definitions.
///
/// Our definitions of "strong" and "weak" are slightly different than the LLVM
/// ones. In the case of multiple definitions of a single name in the same scope:
///   - Two strong definitions is an error
///   - One strong and one weak definition, the strong definition wins
///   - Two weak definitions, one will be chosen arbitrarily
#[derive(PartialEq, Eq, Clone, Copy, Debug)]
enum Definition<V> {
    Strong(V),
    Weak(V),
}

impl<V> Definition<V> {
    fn get(&self) -> &V {
        match self {
            Definition::Strong(v) => &v,
            Definition::Weak(v) => &v,
        }
    }

    fn get_mut(&mut self) -> &mut V {
        match self {
            Definition::Strong(ref mut v) => v,
            Definition::Weak(ref mut v) => v,
        }
    }
}

/// Both LLVM `Function`s and `FunctionHook`s can be assigned addresses, and
/// function pointers can point to either
pub(crate) enum Callable<'p, B: Backend> {
    LLVMFunction(&'p Function),
    FunctionHook(FunctionHook<'p, B>),
}

impl<'p, B: Backend> Clone for Callable<'p, B> {
    fn clone(&self) -> Self {
        match self {
            Callable::LLVMFunction(f) => Callable::LLVMFunction(f),
            Callable::FunctionHook(h) => Callable::FunctionHook(h.clone()),
        }
    }
}

impl<'p, B: Backend> fmt::Debug for Callable<'p, B> {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        match self {
            Callable::LLVMFunction(func) => write!(f, "<Function {:?}>", &func.name),
            Callable::FunctionHook(_) => write!(f, "<FunctionHook>"),
        }
    }
}

impl<'p, B: Backend> PartialEq for Callable<'p, B> {
    fn eq(&self, other: &Self) -> bool {
        match (self, other) {
            (Callable::LLVMFunction(f1), Callable::LLVMFunction(f2)) => f1.name == f2.name, // assume functions are unique by name
            (Callable::FunctionHook(f1), Callable::FunctionHook(f2)) => f1 == f2,
            (_, _) => false,
        }
    }
}

// our implementation of `PartialEq` satisfies `Eq` under our assumptions
impl<'p, B: Backend> Eq for Callable<'p, B> {}

impl<'p, B: Backend> Hash for Callable<'p, B> {
    fn hash<H: Hasher>(&self, state: &mut H) {
        match self {
            Callable::LLVMFunction(f) => f.name.hash(state), // assume functions are unique by name
            Callable::FunctionHook(fh) => fh.hash(state),
        }
    }
}

/// Trait which unifies `GlobalVariable` and `Function`, which are both global objects in LLVM
trait Global {
    fn get_linkage(&self) -> Linkage;
    fn get_name(&self) -> Name;
}

impl Global for GlobalVariable {
    fn get_linkage(&self) -> Linkage {
        self.linkage
    }
    fn get_name(&self) -> Name {
        self.name.clone()
    }
}

impl Global for Function {
    fn get_linkage(&self) -> Linkage {
        self.linkage
    }
    fn get_name(&self) -> Name {
        Name::from(&*self.name)
    }
}

#[derive(PartialEq, Eq, Clone, Copy, Debug)]
enum AllocationResult {
    /// Allocated the global as public
    Public,
    /// Allocated the global as module-private
    ModulePrivate,
    /// Did not allocate the global (some other definition took precedence)
    NoAllocate,
}

impl<'p, B: Backend> GlobalAllocations<'p, B> {
    pub fn new() -> Self {
        Self {
            allocated_globals: HashMap::new(),
            allocated_hooks: HashMap::new(),
            addr_to_function: HashMap::new(),
            module_private_allocated_globals: HashMap::new(),
            module_private_addr_to_function: HashMap::new(),
        }
    }

    /// `var`: a global variable *definition* (not a declaration)
    ///
    /// `module`: `Module` in which the definition appears
    ///
    /// `addr`: Address at which the global variable should be allocated
    ///
    /// The global variable will be assumed not-yet-initialized;
    /// see notes on `get_global_allocation()`.
    pub fn allocate_global_var(
        &mut self,
        var: &'p GlobalVariable,
        module: &'p Module,
        addr: B::BV,
    ) {
        let initializer = var
            .initializer
            .as_ref()
            .expect("Can't call allocate_global_var() with a global declaration, only a definition")
            .clone();
        let allocation = GlobalAllocation::GlobalVariable {
            addr,
            initializer,
            initialized: Cell::new(false),
        };
        self.allocate_global(var, module, allocation);
    }

    /// `func`: a function definition
    ///
    /// `module`: `Module` in which the definition appears
    ///
    /// `addr`: Address at which the function should be allocated.
    ///
    /// Note that we have to pretend to allocate `Function`s so that we can have
    /// pointers to them. (As of this writing, we actually only allocate 64 bits
    /// for every `Function`)
    pub fn allocate_function(
        &mut self,
        func: &'p Function,
        module: &'p Module,
        addr: u64,
        addr_bv: B::BV,
    ) {
        let allocation = GlobalAllocation::Function {
            func,
            module,
            addr: addr_bv,
        };
        match self.allocate_global(func, module, allocation) {
            AllocationResult::Public => {
                self.addr_to_function
                    .insert(addr, Callable::LLVMFunction(func));
            },
            AllocationResult::ModulePrivate => {
                self.module_private_addr_to_function
                    .entry(module.name.clone())
                    .or_default()
                    .insert(addr, Callable::LLVMFunction(func));
            },
            AllocationResult::NoAllocate => {},
        }
    }

    /// `hook`: a `FunctionHook`
    ///
    /// `addr`: Address at which the function hook should be allocated.
    /// Pointers with this value will be considered to point to `hook`.
    ///
    /// Note that all function hooks are considered to have global visibility; we
    /// don't at this time support module-private function hooks.
    /// You can still hook module-private functions, but those hooks will apply
    /// to all functions of that name in all modules.
    pub fn allocate_function_hook(&mut self, hook: FunctionHook<'p, B>, addr: u64, addr_bv: B::BV) {
        self.allocated_hooks.insert(hook.clone(), addr_bv);
        self.addr_to_function
            .insert(addr, Callable::FunctionHook(hook));
    }

    fn allocate_global(
        &mut self,
        global: &'p impl Global,
        module: &'p Module,
        allocation: GlobalAllocation<'p, B::BV>,
    ) -> AllocationResult {
        match global.get_linkage() {
            Linkage::Private | Linkage::Internal => {
                // Module-private global, strong definition
                debug!(
                    "Allocating {:?} (module-private to {:?}) at {:?}",
                    global.get_name(),
                    &module.name,
                    allocation.get_addr()
                );
                match self.module_private_allocated_globals
                    .entry(module.name.clone())
                    .or_default()
                    .entry(global.get_name())
                {
                    Entry::Vacant(entry) => entry.insert(allocation),
                    Entry::Occupied(_) => panic!("Duplicate definitions found for module-private global variable or function {:?} in module {:?}", global.get_name(), &module.name),
                };
                AllocationResult::ModulePrivate
            },
            Linkage::External => {
                // Public global, strong definition
                debug!(
                    "Allocating {:?} (public, strong) at {:?}",
                    global.get_name(),
                    allocation.get_addr()
                );
                match self.allocated_globals.entry(global.get_name()) {
                    Entry::Vacant(entry) => {
                        entry.insert(Definition::Strong(allocation));
                    },
                    Entry::Occupied(mut entry) => {
                        match entry.get() {
                            Definition::Strong(_) => panic!("Duplicate strong definitions found for public global variable or function {:?}", global.get_name()),
                            Definition::Weak(_) => entry.insert(
                                // discard the weak definition in favor of this strong one
                                Definition::Strong(allocation)
                            ),
                        };
                    },
                };
                AllocationResult::Public
            },
            Linkage::AvailableExternally
            | Linkage::LinkOnceAny
            | Linkage::WeakAny
            | Linkage::Common
            | Linkage::ExternalWeak
            | Linkage::LinkOnceODR
            | Linkage::WeakODR => {
                // We treat all of these modes as "Public global, weak definition" under our semantics
                match self.allocated_globals.entry(global.get_name()) {
                    Entry::Vacant(entry) => {
                        debug!(
                            "Allocating {:?} (public, weak) at {:?}",
                            global.get_name(),
                            allocation.get_addr()
                        );
                        entry.insert(Definition::Weak(allocation));
                        AllocationResult::Public
                    },
                    Entry::Occupied(_) => {
                        // don't override an existing definition. If the existing definition
                        // was weak, we arbitrarily choose to leave it rather than replace it
                        // with this weak definition.
                        debug!(
                            "Skipping definition of {:?} (public, weak) as already defined",
                            global.get_name()
                        );
                        AllocationResult::NoAllocate
                    },
                }
            },
            Linkage::Appending => {
                warn!("Global {:?} has 'appending' linkage type, which is not supported. Any attempted use of this global will result in an error.", global.get_name());
                AllocationResult::NoAllocate
            },
            _ => unimplemented!("Linkage type {:?}", global.get_linkage()),
        }
    }

    /// Get the `GlobalAllocation` for the global variable or function with the
    /// given `Name`; or `None` if not found. The `GlobalAllocation` includes the
    /// address at which the global variable or function has been allocated, and
    /// also information about whether the global variable has been initialized
    /// (irrelevant for functions).
    ///
    /// `module`: The `Module` in which the `Name` appeared. Note that modules
    /// may have their own module-private globals with the same name, so the name
    /// alone is not sufficient to identify a unique global.
    ///
    /// If the global variable hasn't been initialized, the caller probably wants
    /// to initialize it. If so, be sure to update the `.initialized` field of
    /// the `GlobalAllocation`.
    pub fn get_global_allocation(
        &self,
        name: &Name,
        module: &Module,
    ) -> Option<&GlobalAllocation<'p, B::BV>> {
        // First look for a module-private definition. We allow this to have precedence over any public definition that may exist.
        self.module_private_allocated_globals
            .get(&module.name)
            .and_then(|hm| hm.get(name))
            .or_else(|| {
                // Module-private definition not found. Look for a public definition
                self.allocated_globals.get(name).map(Definition::get)
            })
    }

    /// Get the address at which the given `FunctionHook` has been allocated; or
    /// `None` if not found.
    pub fn get_function_hook_address(&self, hook: &FunctionHook<'p, B>) -> Option<&B::BV> {
        self.allocated_hooks.get(hook)
    }

    /// Given an address, get the `Callable` which was allocated at that address;
    /// or `None` if no `Callable` was allocated at that address.
    ///
    /// `module`: The `Module` in which the address appeared. Note that modules
    /// may have their own module-private functions with the same name, so the
    /// name alone is not sufficient to identify a unique global.
    pub fn get_func_for_address(&self, addr: u64, module: &Module) -> Option<Callable<'p, B>> {
        self.addr_to_function.get(&addr).cloned().or_else(|| {
            self.module_private_addr_to_function
                .get(&module.name)
                .and_then(|hm| hm.get(&addr).cloned())
        })
    }

    /// Adapt the `GlobalAllocations` to a new solver instance.
    ///
    /// The new solver instance should have been created (possibly transitively)
    /// via `SolverRef::duplicate()` from the `SolverRef` which the vars in the
    /// `GlobalAllocations` were originally created with (or most recently
    /// changed to). Further, no new variables should have been allocated since
    /// the call to `SolverRef::duplicate()`.
    pub fn change_solver(&mut self, new_solver: B::SolverRef) {
        for def in self.allocated_globals.values_mut() {
            let new_bv = new_solver.match_bv(&def.get().get_addr()).unwrap();
            def.get_mut().set_addr(new_bv);
        }
        for bv in self.allocated_hooks.values_mut() {
            *bv = new_solver.match_bv(bv).unwrap();
        }
        for hm in self.module_private_allocated_globals.values_mut() {
            for ga in hm.values_mut() {
                ga.set_addr(new_solver.match_bv(ga.get_addr()).unwrap());
            }
        }
    }
}