use std::{collections::HashSet, fmt::Write, sync::OnceLock};
use crate::{
analysis::cfg::{detect_loops, CfgEdge, CfgEdgeKind, LoopForest, LoopInfo},
assembly::{BasicBlock, FlowType, Operand},
utils::{
escape_dot,
graph::{
algorithms::{self, DominatorTree},
DirectedGraph, EdgeId, GraphBase, NodeId, Predecessors, RootedGraph, Successors,
},
},
Error::GraphError,
Result,
};
#[derive(Debug)]
pub struct ControlFlowGraph<'a> {
graph: DirectedGraph<'a, BasicBlock, CfgEdge>,
entry: NodeId,
exits: Vec<NodeId>,
dominators: OnceLock<DominatorTree>,
dominance_frontiers: OnceLock<Vec<HashSet<NodeId>>>,
loop_forest: OnceLock<LoopForest>,
}
impl ControlFlowGraph<'static> {
pub fn from_basic_blocks(blocks: Vec<BasicBlock>) -> Result<Self> {
if blocks.is_empty() {
return Err(GraphError(
"Cannot create CFG from empty block list".to_string(),
));
}
let block_count = blocks.len();
let mut graph: DirectedGraph<BasicBlock, CfgEdge> =
DirectedGraph::with_capacity(block_count, block_count * 2);
let node_ids: Vec<NodeId> = blocks
.into_iter()
.map(|block| graph.add_node(block))
.collect();
for node_id in &node_ids {
let block = graph.node(*node_id).ok_or_else(|| {
GraphError(format!(
"Internal error: node {} not found in graph",
node_id.index()
))
})?;
let successors = block.successors.clone();
let last_instruction = block.instructions.last();
let flow_type = last_instruction.map(|i| i.flow_type);
for (idx, &succ_idx) in successors.iter().enumerate() {
if succ_idx >= block_count {
return Err(GraphError(format!(
"Block {} has successor index {} which exceeds block count {}",
node_id.index(),
succ_idx,
block_count
)));
}
let target_node = node_ids[succ_idx];
let edge_kind = Self::classify_edge(flow_type, idx, successors.len());
let edge = CfgEdge::new(succ_idx, edge_kind);
graph.add_edge(*node_id, target_node, edge)?;
}
}
let entry = node_ids[0]; let mut exits: Vec<NodeId> = Vec::new();
for &node_id in &node_ids {
let block = graph.node(node_id).ok_or_else(|| {
GraphError(format!(
"Internal error: node {} not found in graph",
node_id.index()
))
})?;
let is_exit = block.successors.is_empty()
|| block
.instructions
.last()
.is_some_and(|i| i.flow_type == FlowType::Return);
if is_exit {
exits.push(node_id);
}
}
Ok(Self {
graph,
entry,
exits,
dominators: OnceLock::new(),
dominance_frontiers: OnceLock::new(),
loop_forest: OnceLock::new(),
})
}
}
impl<'a> ControlFlowGraph<'a> {
pub fn from_blocks_ref(blocks: &'a [BasicBlock]) -> Result<Self> {
if blocks.is_empty() {
return Err(GraphError(
"Cannot create CFG from empty block list".to_string(),
));
}
let block_count = blocks.len();
let mut graph: DirectedGraph<'a, BasicBlock, CfgEdge> =
DirectedGraph::from_nodes_borrowed(blocks);
for (block_idx, block) in blocks.iter().enumerate() {
let node_id = NodeId::new(block_idx);
let last_instruction = block.instructions.last();
let flow_type = last_instruction.map(|i| i.flow_type);
for (idx, &succ_idx) in block.successors.iter().enumerate() {
if succ_idx >= block_count {
return Err(GraphError(format!(
"Block {block_idx} has successor index {succ_idx} which exceeds block count {block_count}"
)));
}
let target_node = NodeId::new(succ_idx);
let edge_kind = Self::classify_edge(flow_type, idx, block.successors.len());
let edge = CfgEdge::new(succ_idx, edge_kind);
graph.add_edge(node_id, target_node, edge)?;
}
}
let entry = NodeId::new(0); let exits: Vec<NodeId> = blocks
.iter()
.enumerate()
.filter(|(_, block)| {
block.successors.is_empty()
|| block
.instructions
.last()
.is_some_and(|i| i.flow_type == FlowType::Return)
})
.map(|(block_idx, _)| NodeId::new(block_idx))
.collect();
Ok(Self {
graph,
entry,
exits,
dominators: OnceLock::new(),
dominance_frontiers: OnceLock::new(),
loop_forest: OnceLock::new(),
})
}
#[must_use]
pub fn into_owned(self) -> ControlFlowGraph<'static> {
ControlFlowGraph {
graph: self.graph.into_owned(),
entry: self.entry,
exits: self.exits,
dominators: self.dominators,
dominance_frontiers: self.dominance_frontiers,
loop_forest: self.loop_forest,
}
}
fn classify_edge(
flow_type: Option<FlowType>,
successor_index: usize,
successor_count: usize,
) -> CfgEdgeKind {
match flow_type {
Some(FlowType::ConditionalBranch) => {
if successor_index == 0 {
CfgEdgeKind::ConditionalTrue
} else {
CfgEdgeKind::ConditionalFalse
}
}
Some(FlowType::Switch) => {
if successor_index == successor_count - 1 && successor_count > 1 {
CfgEdgeKind::Switch { case_value: None }
} else {
let case_value = i32::try_from(successor_index).ok();
CfgEdgeKind::Switch { case_value }
}
}
Some(FlowType::Leave) => CfgEdgeKind::Leave,
Some(FlowType::EndFinally) => CfgEdgeKind::EndFinally,
_ => CfgEdgeKind::Unconditional,
}
}
#[must_use]
pub const fn entry(&self) -> NodeId {
self.entry
}
#[must_use]
pub fn exits(&self) -> &[NodeId] {
&self.exits
}
#[must_use]
pub fn block_count(&self) -> usize {
self.graph.node_count()
}
#[must_use]
pub fn block(&self, node_id: NodeId) -> Option<&BasicBlock> {
self.graph.node(node_id)
}
#[must_use]
pub fn dominators(&self) -> &DominatorTree {
self.dominators
.get_or_init(|| algorithms::compute_dominators(&self.graph, self.entry))
}
#[must_use]
pub fn dominance_frontiers(&self) -> &Vec<HashSet<NodeId>> {
self.dominance_frontiers
.get_or_init(|| algorithms::compute_dominance_frontiers(&self.graph, self.dominators()))
}
#[must_use]
pub fn loop_forest(&self) -> &LoopForest {
self.loop_forest.get_or_init(|| self.detect_loop_forest())
}
#[must_use]
pub fn loops(&self) -> &[LoopInfo] {
self.loop_forest().loops()
}
fn detect_loop_forest(&self) -> LoopForest {
detect_loops(&self.graph, self.dominators())
}
#[must_use]
pub fn has_loops(&self) -> bool {
!self.loop_forest().is_empty()
}
#[must_use]
pub fn innermost_loop(&self, node: NodeId) -> Option<&LoopInfo> {
self.loop_forest().innermost_loop(node)
}
pub fn successors(&self, node_id: NodeId) -> impl Iterator<Item = NodeId> + '_ {
self.graph.successors(node_id)
}
pub fn predecessors(&self, node_id: NodeId) -> impl Iterator<Item = NodeId> + '_ {
self.graph.predecessors(node_id)
}
pub fn outgoing_edges(
&self,
node_id: NodeId,
) -> impl Iterator<Item = (EdgeId, NodeId, &CfgEdge)> + '_ {
self.graph
.outgoing_edges(node_id)
.filter_map(|(edge_id, edge_data)| {
self.graph
.edge_endpoints(edge_id)
.map(|(_, target)| (edge_id, target, edge_data))
})
}
#[must_use]
pub fn reverse_postorder(&self) -> Vec<NodeId> {
algorithms::reverse_postorder(&self.graph, self.entry)
}
#[must_use]
pub fn postorder(&self) -> Vec<NodeId> {
algorithms::postorder(&self.graph, self.entry)
}
pub fn dfs(&self) -> impl Iterator<Item = NodeId> + '_ {
algorithms::dfs(&self.graph, self.entry)
}
pub fn bfs(&self) -> impl Iterator<Item = NodeId> + '_ {
algorithms::bfs(&self.graph, self.entry)
}
#[must_use]
pub fn graph(&self) -> &DirectedGraph<'a, BasicBlock, CfgEdge> {
&self.graph
}
pub fn node_ids(&self) -> impl Iterator<Item = NodeId> + '_ {
self.graph.node_ids()
}
#[must_use]
pub fn dominates(&self, dominator: NodeId, dominated: NodeId) -> bool {
self.dominators().dominates(dominator, dominated)
}
#[must_use]
pub fn idom(&self, node_id: NodeId) -> Option<NodeId> {
self.dominators().immediate_dominator(node_id)
}
#[must_use]
pub fn to_dot(&self, title: Option<&str>) -> String {
let mut dot = String::new();
dot.push_str("digraph CFG {\n");
if let Some(name) = title {
let _ = writeln!(dot, " label=\"CFG: {}\";", escape_dot(name));
}
dot.push_str(" labelloc=t;\n");
dot.push_str(" node [shape=box, fontname=\"Courier\", fontsize=10];\n");
dot.push_str(" edge [fontname=\"Courier\", fontsize=9];\n\n");
for block_idx in 0..self.block_count() {
let node_id = NodeId::new(block_idx);
if let Some(block) = self.block(node_id) {
let is_entry = node_id == self.entry;
let is_exit = self.exits.contains(&node_id);
let node_name = format!("B{block_idx}_{:04X}", block.rva);
let mut label = format!("B{block_idx}_{:04X}", block.rva);
if is_entry {
label.push_str(" (entry)");
}
if is_exit {
label.push_str(" (exit)");
}
label.push_str("\\l");
for instr in &block.instructions {
let _ = write!(label, "{:04X}: {}", instr.rva, escape_dot(instr.mnemonic));
match &instr.operand {
Operand::None => {}
Operand::Immediate(imm) => {
let _ = write!(label, " {}", escape_dot(&format!("{imm:?}")));
}
Operand::Target(addr) => {
let _ = write!(label, " 0x{addr:X}");
}
Operand::Token(tok) => {
let _ = write!(label, " {tok:?}");
}
Operand::Local(idx) => {
let _ = write!(label, " V_{idx}");
}
Operand::Argument(idx) => {
let _ = write!(label, " A_{idx}");
}
Operand::Switch(targets) => {
let _ = write!(label, " [{}]", targets.len());
}
}
label.push_str("\\l"); }
let style = if is_entry {
", style=filled, fillcolor=lightgreen"
} else if is_exit {
", style=filled, fillcolor=lightcoral"
} else {
""
};
let _ = writeln!(dot, " {node_name} [label=\"{label}\"{style}];");
}
}
dot.push('\n');
for block_idx in 0..self.block_count() {
let node_id = NodeId::new(block_idx);
let source_rva = self.block(node_id).map_or(0, |b| b.rva);
let source_name = format!("B{block_idx}_{source_rva:04X}");
for (_, target, edge) in self.outgoing_edges(node_id) {
let target_rva = self.block(target).map_or(0, |b| b.rva);
let target_name = format!("B{}_{target_rva:04X}", target.index());
let edge_label = match edge.kind() {
CfgEdgeKind::Unconditional => String::new(),
CfgEdgeKind::ConditionalTrue => "true".to_string(),
CfgEdgeKind::ConditionalFalse => "false".to_string(),
CfgEdgeKind::Switch { case_value } => {
case_value.map_or("default".to_string(), |v| format!("case {v}"))
}
CfgEdgeKind::ExceptionHandler { .. } => "catch".to_string(),
CfgEdgeKind::Leave => "leave".to_string(),
CfgEdgeKind::EndFinally => "endfinally".to_string(),
};
let color = match edge.kind() {
CfgEdgeKind::Unconditional => "black",
CfgEdgeKind::ConditionalTrue => "green",
CfgEdgeKind::ConditionalFalse => "red",
CfgEdgeKind::Switch { .. } => "blue",
_ => "purple",
};
let _ = writeln!(
dot,
" {source_name} -> {target_name} [label=\"{}\", color={color}];",
escape_dot(&edge_label)
);
}
}
dot.push_str("}\n");
dot
}
}
impl GraphBase for ControlFlowGraph<'_> {
fn node_count(&self) -> usize {
self.graph.node_count()
}
fn node_ids(&self) -> impl Iterator<Item = NodeId> {
self.graph.node_ids()
}
}
impl Successors for ControlFlowGraph<'_> {
fn successors(&self, node: NodeId) -> impl Iterator<Item = NodeId> {
self.graph.successors(node)
}
}
impl Predecessors for ControlFlowGraph<'_> {
fn predecessors(&self, node: NodeId) -> impl Iterator<Item = NodeId> {
self.graph.predecessors(node)
}
}
impl RootedGraph for ControlFlowGraph<'_> {
fn entry(&self) -> NodeId {
self.entry
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::assembly::{Instruction, InstructionCategory, Operand, StackBehavior};
fn make_block(id: usize, successors: Vec<usize>, flow_type: FlowType) -> BasicBlock {
let mut block = BasicBlock::new(id, 0x1000 + (id * 0x10) as u64, id * 0x10);
block.successors = successors;
let instruction = Instruction {
rva: block.rva,
offset: block.offset as u64,
size: 1,
opcode: 0,
prefix: 0,
mnemonic: "test",
category: InstructionCategory::ControlFlow,
flow_type,
operand: Operand::None,
stack_behavior: StackBehavior {
pops: 0,
pushes: 0,
net_effect: 0,
},
branch_targets: vec![],
};
block.instructions.push(instruction);
block
}
#[test]
fn test_cfg_from_empty_blocks() {
let result = ControlFlowGraph::from_basic_blocks(vec![]);
assert!(result.is_err());
}
#[test]
fn test_cfg_single_block() {
let blocks = vec![make_block(0, vec![], FlowType::Return)];
let cfg = ControlFlowGraph::from_basic_blocks(blocks).unwrap();
assert_eq!(cfg.block_count(), 1);
assert_eq!(cfg.entry(), NodeId::new(0));
assert_eq!(cfg.exits().len(), 1);
assert_eq!(cfg.exits()[0], NodeId::new(0));
}
#[test]
fn test_cfg_linear_blocks() {
let blocks = vec![
make_block(0, vec![1], FlowType::Sequential),
make_block(1, vec![2], FlowType::Sequential),
make_block(2, vec![], FlowType::Return),
];
let cfg = ControlFlowGraph::from_basic_blocks(blocks).unwrap();
assert_eq!(cfg.block_count(), 3);
assert_eq!(cfg.entry(), NodeId::new(0));
assert_eq!(cfg.exits().len(), 1);
let succ_0: Vec<_> = cfg.successors(NodeId::new(0)).collect();
assert_eq!(succ_0, vec![NodeId::new(1)]);
let succ_1: Vec<_> = cfg.successors(NodeId::new(1)).collect();
assert_eq!(succ_1, vec![NodeId::new(2)]);
let succ_2: Vec<_> = cfg.successors(NodeId::new(2)).collect();
assert!(succ_2.is_empty());
}
#[test]
fn test_cfg_diamond_shape() {
let blocks = vec![
make_block(0, vec![1, 2], FlowType::ConditionalBranch),
make_block(1, vec![3], FlowType::Sequential),
make_block(2, vec![3], FlowType::Sequential),
make_block(3, vec![], FlowType::Return),
];
let cfg = ControlFlowGraph::from_basic_blocks(blocks).unwrap();
assert_eq!(cfg.block_count(), 4);
let edges: Vec<_> = cfg.outgoing_edges(NodeId::new(0)).collect();
assert_eq!(edges.len(), 2);
assert_eq!(*edges[0].2.kind(), CfgEdgeKind::ConditionalTrue);
assert_eq!(*edges[1].2.kind(), CfgEdgeKind::ConditionalFalse);
let dominators = cfg.dominators();
assert!(dominators.dominates(NodeId::new(0), NodeId::new(1)));
assert!(dominators.dominates(NodeId::new(0), NodeId::new(2)));
assert!(dominators.dominates(NodeId::new(0), NodeId::new(3)));
assert!(!dominators.dominates(NodeId::new(1), NodeId::new(3)));
assert!(!dominators.dominates(NodeId::new(2), NodeId::new(3)));
}
#[test]
fn test_cfg_with_loop() {
let blocks = vec![
make_block(0, vec![1], FlowType::Sequential),
make_block(1, vec![2], FlowType::Sequential),
make_block(2, vec![1, 3], FlowType::ConditionalBranch),
make_block(3, vec![], FlowType::Return),
];
let cfg = ControlFlowGraph::from_basic_blocks(blocks).unwrap();
assert_eq!(cfg.block_count(), 4);
let pred_1: Vec<_> = cfg.predecessors(NodeId::new(1)).collect();
assert_eq!(pred_1.len(), 2);
assert!(pred_1.contains(&NodeId::new(0)));
assert!(pred_1.contains(&NodeId::new(2)));
assert!(cfg.dominates(NodeId::new(1), NodeId::new(2)));
}
#[test]
fn test_cfg_traversal_orders() {
let blocks = vec![
make_block(0, vec![1], FlowType::Sequential),
make_block(1, vec![2], FlowType::Sequential),
make_block(2, vec![], FlowType::Return),
];
let cfg = ControlFlowGraph::from_basic_blocks(blocks).unwrap();
let dfs_order: Vec<_> = cfg.dfs().collect();
assert_eq!(dfs_order.len(), 3);
assert_eq!(dfs_order[0], NodeId::new(0));
let bfs_order: Vec<_> = cfg.bfs().collect();
assert_eq!(bfs_order.len(), 3);
assert_eq!(bfs_order[0], NodeId::new(0));
let rpo = cfg.reverse_postorder();
assert_eq!(rpo.len(), 3);
assert_eq!(rpo[0], NodeId::new(0));
assert_eq!(rpo[2], NodeId::new(2));
}
#[test]
fn test_cfg_invalid_successor() {
let blocks = vec![make_block(0, vec![5], FlowType::Sequential)];
let result = ControlFlowGraph::from_basic_blocks(blocks);
assert!(result.is_err());
}
#[test]
fn test_cfg_switch_edges() {
let blocks = vec![
make_block(0, vec![1, 2, 3], FlowType::Switch), make_block(1, vec![4], FlowType::Sequential), make_block(2, vec![4], FlowType::Sequential), make_block(3, vec![4], FlowType::Sequential), make_block(4, vec![], FlowType::Return),
];
let cfg = ControlFlowGraph::from_basic_blocks(blocks).unwrap();
let edges: Vec<_> = cfg.outgoing_edges(NodeId::new(0)).collect();
assert_eq!(edges.len(), 3);
assert_eq!(
*edges[0].2.kind(),
CfgEdgeKind::Switch {
case_value: Some(0)
}
);
assert_eq!(
*edges[1].2.kind(),
CfgEdgeKind::Switch {
case_value: Some(1)
}
);
assert_eq!(*edges[2].2.kind(), CfgEdgeKind::Switch { case_value: None });
}
#[test]
fn test_cfg_no_loops() {
let blocks = vec![
make_block(0, vec![1], FlowType::Sequential),
make_block(1, vec![2], FlowType::Sequential),
make_block(2, vec![], FlowType::Return),
];
let cfg = ControlFlowGraph::from_basic_blocks(blocks).unwrap();
assert!(!cfg.has_loops());
assert_eq!(cfg.loops().len(), 0);
}
#[test]
fn test_cfg_simple_loop_detection() {
let blocks = vec![
make_block(0, vec![1], FlowType::Sequential),
make_block(1, vec![2], FlowType::Sequential),
make_block(2, vec![1, 3], FlowType::ConditionalBranch),
make_block(3, vec![], FlowType::Return),
];
let cfg = ControlFlowGraph::from_basic_blocks(blocks).unwrap();
assert!(cfg.has_loops());
let loops = cfg.loops();
assert_eq!(loops.len(), 1);
let loop0 = &loops[0];
assert_eq!(loop0.header, NodeId::new(1)); assert!(loop0.body.contains(&NodeId::new(1))); assert!(loop0.body.contains(&NodeId::new(2))); assert!(!loop0.body.contains(&NodeId::new(0))); assert!(!loop0.body.contains(&NodeId::new(3))); assert_eq!(loop0.latches.len(), 1);
assert_eq!(loop0.latches[0], NodeId::new(2)); assert_eq!(loop0.depth, 0); }
#[test]
fn test_cfg_nested_loops() {
let blocks = vec![
make_block(0, vec![1], FlowType::Sequential),
make_block(1, vec![2], FlowType::Sequential),
make_block(2, vec![3], FlowType::Sequential),
make_block(3, vec![2, 1, 4], FlowType::Switch), make_block(4, vec![], FlowType::Return),
];
let cfg = ControlFlowGraph::from_basic_blocks(blocks).unwrap();
assert!(cfg.has_loops());
let loops = cfg.loops();
assert_eq!(loops.len(), 2);
let outer_loop = loops.iter().find(|l| l.header == NodeId::new(1)).unwrap();
let inner_loop = loops.iter().find(|l| l.header == NodeId::new(2)).unwrap();
assert!(inner_loop.depth > outer_loop.depth);
for node in &inner_loop.body {
assert!(outer_loop.body.contains(node));
}
}
#[test]
fn test_cfg_innermost_loop() {
let blocks = vec![
make_block(0, vec![1], FlowType::Sequential),
make_block(1, vec![2], FlowType::Sequential),
make_block(2, vec![3], FlowType::Sequential),
make_block(3, vec![2, 1, 4], FlowType::Switch),
make_block(4, vec![], FlowType::Return),
];
let cfg = ControlFlowGraph::from_basic_blocks(blocks).unwrap();
assert!(cfg.innermost_loop(NodeId::new(0)).is_none());
assert!(cfg.innermost_loop(NodeId::new(4)).is_none());
let innermost = cfg.innermost_loop(NodeId::new(3)).unwrap();
assert_eq!(innermost.header, NodeId::new(2));
}
#[test]
fn test_cfg_self_loop() {
let blocks = vec![
make_block(0, vec![1], FlowType::Sequential),
make_block(1, vec![1, 2], FlowType::ConditionalBranch), make_block(2, vec![], FlowType::Return),
];
let cfg = ControlFlowGraph::from_basic_blocks(blocks).unwrap();
assert!(cfg.has_loops());
let loops = cfg.loops();
assert_eq!(loops.len(), 1);
let self_loop = &loops[0];
assert_eq!(self_loop.header, NodeId::new(1));
assert_eq!(self_loop.size(), 1); assert_eq!(self_loop.latches.len(), 1);
assert_eq!(self_loop.latches[0], NodeId::new(1)); }
}