llvm_ir_analysis/
lib.rs

1//! This crate provides various analyses of LLVM IR, such as control-flow
2//! graphs, dominator trees, control dependence graphs, etc.
3//!
4//! For a more thorough introduction to the crate and how to get started,
5//! see the [crate's README](https://github.com/cdisselkoen/llvm-ir-analysis/blob/main/README.md).
6
7mod call_graph;
8mod control_dep_graph;
9mod control_flow_graph;
10mod dominator_tree;
11mod functions_by_type;
12
13pub use crate::call_graph::CallGraph;
14pub use crate::control_dep_graph::ControlDependenceGraph;
15pub use crate::control_flow_graph::{CFGNode, ControlFlowGraph};
16pub use crate::dominator_tree::{DominatorTree, PostDominatorTree};
17pub use crate::functions_by_type::FunctionsByType;
18use llvm_ir::{Function, Module};
19use log::debug;
20use std::cell::{Ref, RefCell};
21use std::collections::HashMap;
22
23// Re-export the llvm-ir crate so that our consumers can have only one Cargo.toml entry and don't
24// have to worry about matching versions.
25pub use llvm_ir;
26
27/// Computes (and caches the results of) various analyses on a given `Module`
28pub struct ModuleAnalysis<'m> {
29    /// Reference to the `llvm-ir` `Module`
30    module: &'m Module,
31    /// Call graph for the module
32    call_graph: SimpleCache<CallGraph<'m>>,
33    /// `FunctionsByType`, which allows you to iterate over the module's
34    /// functions by type
35    functions_by_type: SimpleCache<FunctionsByType<'m>>,
36    /// Map from function name to the `FunctionAnalysis` for that function
37    fn_analyses: HashMap<&'m str, FunctionAnalysis<'m>>,
38}
39
40impl<'m> ModuleAnalysis<'m> {
41    /// Create a new `ModuleAnalysis` for the given `Module`.
42    ///
43    /// This method itself is cheap; individual analyses will be computed lazily
44    /// on demand.
45    pub fn new(module: &'m Module) -> Self {
46        Self {
47            module,
48            call_graph: SimpleCache::new(),
49            functions_by_type: SimpleCache::new(),
50            fn_analyses: module
51                .functions
52                .iter()
53                .map(|f| (f.name.as_str(), FunctionAnalysis::new(f)))
54                .collect(),
55        }
56    }
57
58    /// Get a reference to the `Module` which the `ModuleAnalysis` was created
59    /// with.
60    pub fn module(&self) -> &'m Module {
61        self.module
62    }
63
64    /// Get the `CallGraph` for the `Module`.
65    pub fn call_graph(&self) -> Ref<CallGraph<'m>> {
66        self.call_graph.get_or_insert_with(|| {
67            let functions_by_type = self.functions_by_type();
68            debug!("computing single-module call graph");
69            CallGraph::new(std::iter::once(self.module), &functions_by_type)
70        })
71    }
72
73    /// Get the `FunctionsByType` for the `Module`.
74    pub fn functions_by_type(&self) -> Ref<FunctionsByType<'m>> {
75        self.functions_by_type.get_or_insert_with(|| {
76            debug!("computing single-module functions-by-type");
77            FunctionsByType::new(std::iter::once(self.module))
78        })
79    }
80
81    /// Get the `FunctionAnalysis` for the function with the given name.
82    ///
83    /// Panics if no function of that name exists in the `Module` which the
84    /// `ModuleAnalysis` was created with.
85    pub fn fn_analysis<'s>(&'s self, func_name: &str) -> &'s FunctionAnalysis<'m> {
86        self.fn_analyses
87            .get(func_name)
88            .unwrap_or_else(|| panic!("Function named {:?} not found in the Module", func_name))
89    }
90}
91
92/// Analyzes multiple `Module`s, providing a `ModuleAnalysis` for each; and also
93/// provides a few additional cross-module analyses (e.g., a cross-module call
94/// graph)
95pub struct CrossModuleAnalysis<'m> {
96    /// Reference to the `llvm-ir` `Module`s
97    modules: Vec<&'m Module>,
98    /// Cross-module call graph
99    call_graph: SimpleCache<CallGraph<'m>>,
100    /// `FunctionsByType`, which allows you to iterate over functions by type
101    functions_by_type: SimpleCache<FunctionsByType<'m>>,
102    /// Map from module name to the `ModuleAnalysis` for that module
103    module_analyses: HashMap<&'m str, ModuleAnalysis<'m>>,
104}
105
106impl<'m> CrossModuleAnalysis<'m> {
107    /// Create a new `CrossModuleAnalysis` for the given set of `Module`s.
108    ///
109    /// This method itself is cheap; individual analyses will be computed lazily
110    /// on demand.
111    pub fn new(modules: impl IntoIterator<Item = &'m Module>) -> Self {
112        let modules: Vec<&'m Module> = modules.into_iter().collect();
113        let module_analyses = modules
114            .iter()
115            .copied()
116            .map(|m| (m.name.as_str(), ModuleAnalysis::new(m)))
117            .collect();
118        Self {
119            modules,
120            call_graph: SimpleCache::new(),
121            functions_by_type: SimpleCache::new(),
122            module_analyses,
123        }
124    }
125
126    /// Iterate over the analyzed `Module`(s).
127    pub fn modules<'s>(&'s self) -> impl Iterator<Item = &'m Module> + 's {
128        self.modules.iter().copied()
129    }
130
131    /// Iterate over all the `Function`s in the analyzed `Module`(s).
132    pub fn functions<'s>(&'s self) -> impl Iterator<Item = &'m Function> + 's {
133        self.modules().map(|m| m.functions.iter()).flatten()
134    }
135
136    /// Get the full `CallGraph` for the `Module`(s).
137    ///
138    /// This will include both cross-module and within-module calls.
139    pub fn call_graph(&self) -> Ref<CallGraph<'m>> {
140        self.call_graph.get_or_insert_with(|| {
141            let functions_by_type = self.functions_by_type();
142            debug!("computing multi-module call graph");
143            CallGraph::new(self.modules(), &functions_by_type)
144        })
145    }
146
147    /// Get the `FunctionsByType` for the `Module`(s).
148    pub fn functions_by_type(&self) -> Ref<FunctionsByType<'m>> {
149        self.functions_by_type.get_or_insert_with(|| {
150            debug!("computing multi-module functions-by-type");
151            FunctionsByType::new(self.modules())
152        })
153    }
154
155    /// Get the `ModuleAnalysis` for the module with the given name.
156    ///
157    /// Panics if no module of that name exists in the `Module`(s) which the
158    /// `CrossModuleAnalysis` was created with.
159    pub fn module_analysis<'s>(&'s self, mod_name: &str) -> &'s ModuleAnalysis<'m> {
160        self.module_analyses.get(mod_name).unwrap_or_else(|| {
161            panic!(
162                "Module named {:?} not found in the CrossModuleAnalysis",
163                mod_name
164            )
165        })
166    }
167
168    /// Get the `Function` with the given name from the analyzed `Module`(s).
169    ///
170    /// Returns both the `Function` and the `Module` it was found in, or `None`
171    /// if no function was found with that name.
172    pub fn get_func_by_name(&self, func_name: &str) -> Option<(&'m Function, &'m Module)> {
173        let mut retval = None;
174        for &module in &self.modules {
175            if let Some(func) = module.get_func_by_name(func_name) {
176                match retval {
177                    None => retval = Some((func, module)),
178                    Some((_, retmod)) => panic!("Multiple functions found with name {:?}: one in module {:?}, another in module {:?}", func_name, &retmod.name, &module.name),
179                }
180            }
181        }
182        retval
183    }
184}
185
186/// Computes (and caches the results of) various analyses on a given `Function`
187pub struct FunctionAnalysis<'m> {
188    /// Reference to the `llvm-ir` `Function`
189    function: &'m Function,
190    /// Control flow graph for the function
191    control_flow_graph: SimpleCache<ControlFlowGraph<'m>>,
192    /// Dominator tree for the function
193    dominator_tree: SimpleCache<DominatorTree<'m>>,
194    /// Postdominator tree for the function
195    postdominator_tree: SimpleCache<PostDominatorTree<'m>>,
196    /// Control dependence graph for the function
197    control_dep_graph: SimpleCache<ControlDependenceGraph<'m>>,
198}
199
200impl<'m> FunctionAnalysis<'m> {
201    /// Create a new `FunctionAnalysis` for the given `Function`.
202    ///
203    /// This method itself is cheap; individual analyses will be computed lazily
204    /// on demand.
205    pub fn new(function: &'m Function) -> Self {
206        Self {
207            function,
208            control_flow_graph: SimpleCache::new(),
209            dominator_tree: SimpleCache::new(),
210            postdominator_tree: SimpleCache::new(),
211            control_dep_graph: SimpleCache::new(),
212        }
213    }
214
215    /// Get the `ControlFlowGraph` for the function.
216    pub fn control_flow_graph(&self) -> Ref<ControlFlowGraph<'m>> {
217        self.control_flow_graph.get_or_insert_with(|| {
218            debug!("computing control flow graph for {}", &self.function.name);
219            ControlFlowGraph::new(self.function)
220        })
221    }
222
223    /// Get the `DominatorTree` for the function.
224    pub fn dominator_tree(&self) -> Ref<DominatorTree<'m>> {
225        self.dominator_tree.get_or_insert_with(|| {
226            let cfg = self.control_flow_graph();
227            debug!("computing dominator tree for {}", &self.function.name);
228            DominatorTree::new(&cfg)
229        })
230    }
231
232    /// Get the `PostDominatorTree` for the function.
233    pub fn postdominator_tree(&self) -> Ref<PostDominatorTree<'m>> {
234        self.postdominator_tree.get_or_insert_with(|| {
235            let cfg = self.control_flow_graph();
236            debug!("computing postdominator tree for {}", &self.function.name);
237            PostDominatorTree::new(&cfg)
238        })
239    }
240
241    /// Get the `ControlDependenceGraph` for the function.
242    pub fn control_dependence_graph(&self) -> Ref<ControlDependenceGraph<'m>> {
243        self.control_dep_graph.get_or_insert_with(|| {
244            let cfg = self.control_flow_graph();
245            let postdomtree = self.postdominator_tree();
246            debug!(
247                "computing control dependence graph for {}",
248                &self.function.name
249            );
250            ControlDependenceGraph::new(&cfg, &postdomtree)
251        })
252    }
253}
254
255struct SimpleCache<T> {
256    /// `None` if not computed yet
257    data: RefCell<Option<T>>,
258}
259
260impl<T> SimpleCache<T> {
261    fn new() -> Self {
262        Self {
263            data: RefCell::new(None),
264        }
265    }
266
267    /// Get the cached value, or if no value is cached, compute the value using
268    /// the given closure, then cache that result and return it
269    fn get_or_insert_with(&self, f: impl FnOnce() -> T) -> Ref<T> {
270        // borrow mutably only if it's empty. else don't even try to borrow mutably
271        let need_mutable_borrow = self.data.borrow().is_none();
272        if need_mutable_borrow {
273            let old_val = self.data.borrow_mut().replace(f());
274            debug_assert!(old_val.is_none());
275        }
276        // now, either way, it's populated, so we borrow immutably and return.
277        // future users can also borrow immutably using this function (even
278        // while this borrow is still outstanding), since it won't try to borrow
279        // mutably in the future.
280        Ref::map(self.data.borrow(), |o| {
281            o.as_ref().expect("should be populated now")
282        })
283    }
284}