monster/path_exploration/
control_flow_graph.rs

1//! # Handle control flow graphs
2//!
3//! This module defines and handles control flow graphs.
4//!
5//! There are three different kind of edges:
6//! - trivial edges (`pc = pc + 4;`)
7//!   - any non control flow instruction
8//!   - `beq`: false edge
9//! - pure edges
10//!   - `beq`: true edge
11//!   - `jal`: when link not used (=> `rd` is zero)
12//! - stateful edges
13//!   - `jal`: when link is used (=> `rd` is `ra`)
14//!   - `jalr`
15
16use anyhow::{ensure, Context, Error, Result};
17use byteorder::{ByteOrder, LittleEndian};
18use petgraph::{
19    dot::Dot,
20    graph::{EdgeIndex, NodeIndex},
21    visit::EdgeRef,
22};
23use riscu::{decode, Instruction, Program, Register};
24use std::{fmt, mem::size_of, vec::Vec};
25
26type Edge = (NodeIndex, NodeIndex, Option<ProcedureCallId>);
27
28#[derive(Copy, Clone, Debug, PartialEq)]
29pub enum ProcedureCallId {
30    Call(u64),
31    Return(u64),
32}
33
34pub type Graph = petgraph::Graph<Instruction, Option<ProcedureCallId>>;
35
36pub struct ControlFlowGraph {
37    pub graph: Graph,
38    pub start: NodeIndex,
39    pub exit: NodeIndex,
40}
41
42impl ControlFlowGraph {
43    pub fn build_for(program: &Program) -> Result<ControlFlowGraph> {
44        let mut graph = create_instruction_graph(program)?;
45
46        fn add_edges(graph: &mut Graph, edges: &[Edge]) {
47            edges.iter().for_each(|e| {
48                graph.add_edge(e.0, e.1, e.2);
49            });
50        }
51
52        let edges = compute_edges(&graph, construct_edge_if_trivial);
53        add_edges(&mut graph, &edges);
54
55        let pure_edges = compute_edges(&graph, construct_edge_if_pure);
56        add_edges(&mut graph, &pure_edges);
57
58        let jump_edges = StatefulEdgeBuilder::new().compute_stateful_edges(&graph);
59        add_edges(&mut graph, &jump_edges);
60
61        let exit = fix_exit_ecall(&mut graph)?;
62
63        Ok(ControlFlowGraph {
64            graph,
65            start: NodeIndex::new(0),
66            exit,
67        })
68    }
69}
70
71impl fmt::Display for ControlFlowGraph {
72    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
73        let dot_graph = Dot::with_config(&self.graph, &[]);
74
75        write!(f, "{:?}", dot_graph)
76    }
77}
78
79pub type DataSegment = Vec<u8>;
80
81/// Create a `ControlFlowGraph` from an Program without fixing edges
82fn create_instruction_graph(program: &Program) -> Result<Graph> {
83    ensure!(
84        program.code.content.len() % size_of::<u32>() == 0,
85        "RISC-U instructions are 32 bits, so the length of the binary must be a multiple of 4"
86    );
87
88    let mut g = Graph::new();
89
90    program
91        .code
92        .content
93        .chunks_exact(size_of::<u32>())
94        .map(LittleEndian::read_u32)
95        .try_for_each(|raw| {
96            decode(raw).map(|i| {
97                g.add_node(i);
98            })
99        })
100        .context("Failed to decode instructions of program")?;
101
102    Ok(g)
103}
104
105/// Compute trivial edges
106fn construct_edge_if_trivial(graph: &Graph, idx: NodeIndex) -> Option<Edge> {
107    match graph[idx] {
108        Instruction::Jal(_) | Instruction::Jalr(_) => None,
109        _ if idx.index() + 1 < graph.node_count() => {
110            Some((idx, NodeIndex::new(idx.index() + 1), None))
111        }
112        _ => None,
113    }
114}
115
116/// Compute pure edges
117fn construct_edge_if_pure(graph: &Graph, idx: NodeIndex) -> Option<Edge> {
118    match graph[idx] {
119        Instruction::Jal(i) if i.rd() == Register::Zero => Some((
120            idx,
121            NodeIndex::new((((idx.index() as u64) * 4).wrapping_add(i.imm() as u64) / 4) as usize),
122            None,
123        )),
124        Instruction::Beq(i) => Some((
125            idx,
126            NodeIndex::new((((idx.index() as u64) * 4).wrapping_add(i.imm() as u64) / 4) as usize),
127            None,
128        )),
129        _ => None,
130    }
131}
132
133/// Compute all edges in `graph`
134fn compute_edges<F>(graph: &Graph, f: F) -> Vec<Edge>
135where
136    F: Fn(&Graph, NodeIndex) -> Option<Edge>,
137{
138    graph
139        .node_indices()
140        .filter_map(|idx| f(graph, idx))
141        .collect::<Vec<Edge>>()
142}
143
144/// Compute all return locations in a given function starting at idx.
145fn compute_return_edge_position(graph: &Graph, idx: NodeIndex) -> NodeIndex {
146    match graph[idx] {
147        Instruction::Jalr(_) => idx,
148        Instruction::Jal(i) if i.rd() != Register::Zero => {
149            compute_return_edge_position(graph, NodeIndex::new(idx.index() + 1))
150        }
151        Instruction::Beq(_) => compute_return_edge_position(graph, {
152            // second edge is the true branch edge, which jumps to the end of the loop (Selfie
153            graph
154                .edges(idx)
155                .find(|e| e.target().index() != idx.index() + 1)
156                .expect("all BEQ edges are constructed already")
157                .target()
158        }),
159        _ => compute_return_edge_position(
160            graph,
161            graph
162                .edges(idx)
163                .next()
164                .expect("all trivial edges are constructed already")
165                .target(),
166        ),
167    }
168}
169
170struct StatefulEdgeBuilder {
171    procedure_call_id_seed: u64,
172}
173
174impl StatefulEdgeBuilder {
175    pub fn new() -> Self {
176        Self {
177            procedure_call_id_seed: 0,
178        }
179    }
180
181    /// Calculate stateful edges and return a vector containing them
182    pub fn compute_stateful_edges(&mut self, graph: &Graph) -> Vec<Edge> {
183        graph
184            .node_indices()
185            .filter_map(|idx| self.construct_edge_if_stateful(idx, graph))
186            .flatten()
187            .collect()
188    }
189
190    /// Fix stateful edges and return a vector containing them
191    fn construct_edge_if_stateful(&mut self, idx: NodeIndex, graph: &Graph) -> Option<Vec<Edge>> {
192        match graph[idx] {
193            Instruction::Jal(jtype) if jtype.rd() != Register::Zero => {
194                // jump and link => function call
195                let jump_dest = NodeIndex::new(
196                    (((idx.index() as u64) * 4).wrapping_add(jtype.imm() as u64) / 4) as usize,
197                );
198                let return_dest = NodeIndex::new(idx.index() + 1);
199                let id = self.allocate_procedure_call_id();
200
201                let call_edge = (idx, jump_dest, Some(ProcedureCallId::Call(id)));
202
203                let return_edge = (
204                    compute_return_edge_position(graph, jump_dest),
205                    return_dest,
206                    Some(ProcedureCallId::Return(id)),
207                );
208
209                Some(vec![call_edge, return_edge])
210            }
211            _ => None,
212        }
213    }
214
215    fn allocate_procedure_call_id(&mut self) -> u64 {
216        let id = self.procedure_call_id_seed;
217
218        self.procedure_call_id_seed += 1;
219
220        id
221    }
222}
223
224/// Get exit edge if possible
225fn find_possible_exit_edge(graph: &Graph, idx: NodeIndex) -> Option<EdgeIndex> {
226    let prev_idx = NodeIndex::new(idx.index() - 1);
227    let next_idx = NodeIndex::new(idx.index() + 1);
228    match graph[prev_idx] {
229        Instruction::Addi(a) => {
230            let edge = graph.find_edge(idx, next_idx);
231            if a.imm() == 93 {
232                edge
233            } else {
234                None
235            }
236        }
237        _ => None,
238    }
239}
240
241/// Fix the exit ecall edge
242fn fix_exit_ecall(graph: &mut Graph) -> Result<NodeIndex> {
243    graph
244        .node_indices()
245        .find(|idx| {
246            if let Instruction::Ecall(_) = graph[*idx] {
247                if let Some(edge) = find_possible_exit_edge(graph, *idx) {
248                    graph.remove_edge(edge);
249                    return true;
250                }
251            }
252            false
253        })
254        .ok_or_else(|| Error::msg("Could not find exit ecall in binary"))
255}