1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284
//! This crate provides various analyses of LLVM IR, such as control-flow
//! graphs, dominator trees, control dependence graphs, etc.
//!
//! For a more thorough introduction to the crate and how to get started,
//! see the [crate's README](https://github.com/cdisselkoen/llvm-ir-analysis/blob/main/README.md).
mod call_graph;
mod control_dep_graph;
mod control_flow_graph;
mod dominator_tree;
mod functions_by_type;
pub use crate::call_graph::CallGraph;
pub use crate::control_dep_graph::ControlDependenceGraph;
pub use crate::control_flow_graph::{CFGNode, ControlFlowGraph};
pub use crate::dominator_tree::{DominatorTree, PostDominatorTree};
pub use crate::functions_by_type::FunctionsByType;
use llvm_ir::{Function, Module};
use log::debug;
use std::cell::{Ref, RefCell};
use std::collections::HashMap;
// Re-export the llvm-ir crate so that our consumers can have only one Cargo.toml entry and don't
// have to worry about matching versions.
pub use llvm_ir;
/// Computes (and caches the results of) various analyses on a given `Module`
pub struct ModuleAnalysis<'m> {
/// Reference to the `llvm-ir` `Module`
module: &'m Module,
/// Call graph for the module
call_graph: SimpleCache<CallGraph<'m>>,
/// `FunctionsByType`, which allows you to iterate over the module's
/// functions by type
functions_by_type: SimpleCache<FunctionsByType<'m>>,
/// Map from function name to the `FunctionAnalysis` for that function
fn_analyses: HashMap<&'m str, FunctionAnalysis<'m>>,
}
impl<'m> ModuleAnalysis<'m> {
/// Create a new `ModuleAnalysis` for the given `Module`.
///
/// This method itself is cheap; individual analyses will be computed lazily
/// on demand.
pub fn new(module: &'m Module) -> Self {
Self {
module,
call_graph: SimpleCache::new(),
functions_by_type: SimpleCache::new(),
fn_analyses: module
.functions
.iter()
.map(|f| (f.name.as_str(), FunctionAnalysis::new(f)))
.collect(),
}
}
/// Get a reference to the `Module` which the `ModuleAnalysis` was created
/// with.
pub fn module(&self) -> &'m Module {
self.module
}
/// Get the `CallGraph` for the `Module`.
pub fn call_graph(&self) -> Ref<CallGraph<'m>> {
self.call_graph.get_or_insert_with(|| {
let functions_by_type = self.functions_by_type();
debug!("computing single-module call graph");
CallGraph::new(std::iter::once(self.module), &functions_by_type)
})
}
/// Get the `FunctionsByType` for the `Module`.
pub fn functions_by_type(&self) -> Ref<FunctionsByType<'m>> {
self.functions_by_type.get_or_insert_with(|| {
debug!("computing single-module functions-by-type");
FunctionsByType::new(std::iter::once(self.module))
})
}
/// Get the `FunctionAnalysis` for the function with the given name.
///
/// Panics if no function of that name exists in the `Module` which the
/// `ModuleAnalysis` was created with.
pub fn fn_analysis<'s>(&'s self, func_name: &str) -> &'s FunctionAnalysis<'m> {
self.fn_analyses
.get(func_name)
.unwrap_or_else(|| panic!("Function named {:?} not found in the Module", func_name))
}
}
/// Analyzes multiple `Module`s, providing a `ModuleAnalysis` for each; and also
/// provides a few additional cross-module analyses (e.g., a cross-module call
/// graph)
pub struct CrossModuleAnalysis<'m> {
/// Reference to the `llvm-ir` `Module`s
modules: Vec<&'m Module>,
/// Cross-module call graph
call_graph: SimpleCache<CallGraph<'m>>,
/// `FunctionsByType`, which allows you to iterate over functions by type
functions_by_type: SimpleCache<FunctionsByType<'m>>,
/// Map from module name to the `ModuleAnalysis` for that module
module_analyses: HashMap<&'m str, ModuleAnalysis<'m>>,
}
impl<'m> CrossModuleAnalysis<'m> {
/// Create a new `CrossModuleAnalysis` for the given set of `Module`s.
///
/// This method itself is cheap; individual analyses will be computed lazily
/// on demand.
pub fn new(modules: impl IntoIterator<Item = &'m Module>) -> Self {
let modules: Vec<&'m Module> = modules.into_iter().collect();
let module_analyses = modules
.iter()
.copied()
.map(|m| (m.name.as_str(), ModuleAnalysis::new(m)))
.collect();
Self {
modules,
call_graph: SimpleCache::new(),
functions_by_type: SimpleCache::new(),
module_analyses,
}
}
/// Iterate over the analyzed `Module`(s).
pub fn modules<'s>(&'s self) -> impl Iterator<Item = &'m Module> + 's {
self.modules.iter().copied()
}
/// Iterate over all the `Function`s in the analyzed `Module`(s).
pub fn functions<'s>(&'s self) -> impl Iterator<Item = &'m Function> + 's {
self.modules().map(|m| m.functions.iter()).flatten()
}
/// Get the full `CallGraph` for the `Module`(s).
///
/// This will include both cross-module and within-module calls.
pub fn call_graph(&self) -> Ref<CallGraph<'m>> {
self.call_graph.get_or_insert_with(|| {
let functions_by_type = self.functions_by_type();
debug!("computing multi-module call graph");
CallGraph::new(self.modules(), &functions_by_type)
})
}
/// Get the `FunctionsByType` for the `Module`(s).
pub fn functions_by_type(&self) -> Ref<FunctionsByType<'m>> {
self.functions_by_type.get_or_insert_with(|| {
debug!("computing multi-module functions-by-type");
FunctionsByType::new(self.modules())
})
}
/// Get the `ModuleAnalysis` for the module with the given name.
///
/// Panics if no module of that name exists in the `Module`(s) which the
/// `CrossModuleAnalysis` was created with.
pub fn module_analysis<'s>(&'s self, mod_name: &str) -> &'s ModuleAnalysis<'m> {
self.module_analyses.get(mod_name).unwrap_or_else(|| {
panic!(
"Module named {:?} not found in the CrossModuleAnalysis",
mod_name
)
})
}
/// Get the `Function` with the given name from the analyzed `Module`(s).
///
/// Returns both the `Function` and the `Module` it was found in, or `None`
/// if no function was found with that name.
pub fn get_func_by_name(&self, func_name: &str) -> Option<(&'m Function, &'m Module)> {
let mut retval = None;
for &module in &self.modules {
if let Some(func) = module.get_func_by_name(func_name) {
match retval {
None => retval = Some((func, module)),
Some((_, retmod)) => panic!("Multiple functions found with name {:?}: one in module {:?}, another in module {:?}", func_name, &retmod.name, &module.name),
}
}
}
retval
}
}
/// Computes (and caches the results of) various analyses on a given `Function`
pub struct FunctionAnalysis<'m> {
/// Reference to the `llvm-ir` `Function`
function: &'m Function,
/// Control flow graph for the function
control_flow_graph: SimpleCache<ControlFlowGraph<'m>>,
/// Dominator tree for the function
dominator_tree: SimpleCache<DominatorTree<'m>>,
/// Postdominator tree for the function
postdominator_tree: SimpleCache<PostDominatorTree<'m>>,
/// Control dependence graph for the function
control_dep_graph: SimpleCache<ControlDependenceGraph<'m>>,
}
impl<'m> FunctionAnalysis<'m> {
/// Create a new `FunctionAnalysis` for the given `Function`.
///
/// This method itself is cheap; individual analyses will be computed lazily
/// on demand.
pub fn new(function: &'m Function) -> Self {
Self {
function,
control_flow_graph: SimpleCache::new(),
dominator_tree: SimpleCache::new(),
postdominator_tree: SimpleCache::new(),
control_dep_graph: SimpleCache::new(),
}
}
/// Get the `ControlFlowGraph` for the function.
pub fn control_flow_graph(&self) -> Ref<ControlFlowGraph<'m>> {
self.control_flow_graph.get_or_insert_with(|| {
debug!("computing control flow graph for {}", &self.function.name);
ControlFlowGraph::new(self.function)
})
}
/// Get the `DominatorTree` for the function.
pub fn dominator_tree(&self) -> Ref<DominatorTree<'m>> {
self.dominator_tree.get_or_insert_with(|| {
let cfg = self.control_flow_graph();
debug!("computing dominator tree for {}", &self.function.name);
DominatorTree::new(&cfg)
})
}
/// Get the `PostDominatorTree` for the function.
pub fn postdominator_tree(&self) -> Ref<PostDominatorTree<'m>> {
self.postdominator_tree.get_or_insert_with(|| {
let cfg = self.control_flow_graph();
debug!("computing postdominator tree for {}", &self.function.name);
PostDominatorTree::new(&cfg)
})
}
/// Get the `ControlDependenceGraph` for the function.
pub fn control_dependence_graph(&self) -> Ref<ControlDependenceGraph<'m>> {
self.control_dep_graph.get_or_insert_with(|| {
let cfg = self.control_flow_graph();
let postdomtree = self.postdominator_tree();
debug!(
"computing control dependence graph for {}",
&self.function.name
);
ControlDependenceGraph::new(&cfg, &postdomtree)
})
}
}
struct SimpleCache<T> {
/// `None` if not computed yet
data: RefCell<Option<T>>,
}
impl<T> SimpleCache<T> {
fn new() -> Self {
Self {
data: RefCell::new(None),
}
}
/// Get the cached value, or if no value is cached, compute the value using
/// the given closure, then cache that result and return it
fn get_or_insert_with(&self, f: impl FnOnce() -> T) -> Ref<T> {
// borrow mutably only if it's empty. else don't even try to borrow mutably
let need_mutable_borrow = self.data.borrow().is_none();
if need_mutable_borrow {
let old_val = self.data.borrow_mut().replace(f());
debug_assert!(old_val.is_none());
}
// now, either way, it's populated, so we borrow immutably and return.
// future users can also borrow immutably using this function (even
// while this borrow is still outstanding), since it won't try to borrow
// mutably in the future.
Ref::map(self.data.borrow(), |o| {
o.as_ref().expect("should be populated now")
})
}
}