use anyhow::Result;
use petgraph::graph::DiGraph;
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet};
use crate::storage::{Backend, CfgBlockData};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CfgDiff {
pub function_id: i64,
pub function_name: String,
pub before_snapshot: String,
pub after_snapshot: String,
pub added_blocks: Vec<BlockDiff>,
pub deleted_blocks: Vec<BlockDiff>,
pub modified_blocks: Vec<BlockChange>,
pub added_edges: Vec<EdgeDiff>,
pub deleted_edges: Vec<EdgeDiff>,
pub structural_similarity: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
pub struct BlockDiff {
pub block_id: i64,
pub kind: String,
pub terminator: String,
pub source_location: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BlockChange {
pub block_id: i64,
pub before: BlockDiff,
pub after: BlockDiff,
pub change_type: ChangeType,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum ChangeType {
TerminatorChanged { before: String, after: String },
SourceLocationChanged,
BothChanged,
EdgesChanged,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
pub struct EdgeDiff {
pub from_block: i64,
pub to_block: i64,
pub edge_type: String,
}
pub fn compute_cfg_diff(
backend: &Backend,
function_id: i64,
before_snapshot: &str,
after_snapshot: &str,
) -> Result<CfgDiff> {
let function_name = backend
.get_entity(function_id)
.map(|e| e.name.clone())
.unwrap_or_else(|| format!("<function_{}>", function_id));
let blocks_before = backend.get_cfg_blocks(function_id)?;
let blocks_after = backend.get_cfg_blocks(function_id)?;
let before_map: HashMap<i64, BlockDiff> = blocks_before
.iter()
.enumerate()
.map(|(idx, b)| {
(
idx as i64,
BlockDiff {
block_id: idx as i64,
kind: b.kind.clone(),
terminator: b.terminator.clone(),
source_location: format!(
"{}:{}:{}-{}:{}",
"",
b.start_line,
b.start_col,
b.end_line,
b.end_col
),
},
)
})
.collect();
let after_map: HashMap<i64, BlockDiff> = blocks_after
.iter()
.enumerate()
.map(|(idx, b)| {
(
idx as i64,
BlockDiff {
block_id: idx as i64,
kind: b.kind.clone(),
terminator: b.terminator.clone(),
source_location: format!(
"{}:{}:{}-{}:{}",
"",
b.start_line,
b.start_col,
b.end_line,
b.end_col
),
},
)
})
.collect();
let before_ids: HashSet<i64> = before_map.keys().copied().collect();
let after_ids: HashSet<i64> = after_map.keys().copied().collect();
let added_block_ids: Vec<_> = after_ids.difference(&before_ids).copied().collect();
let deleted_block_ids: Vec<_> = before_ids.difference(&after_ids).copied().collect();
let common_ids: Vec<_> = before_ids.intersection(&after_ids).copied().collect();
let added_blocks: Vec<BlockDiff> = added_block_ids
.iter()
.filter_map(|id| after_map.get(id).cloned())
.collect();
let deleted_blocks: Vec<BlockDiff> = deleted_block_ids
.iter()
.filter_map(|id| before_map.get(id).cloned())
.collect();
let modified_blocks: Vec<BlockChange> = common_ids
.iter()
.filter_map(|id| {
let before = before_map.get(id)?;
let after = after_map.get(id)?;
if before == after {
return None;
}
let terminator_changed = before.terminator != after.terminator;
let location_changed = before.source_location != after.source_location;
let change_type = match (terminator_changed, location_changed) {
(true, false) => ChangeType::TerminatorChanged {
before: before.terminator.clone(),
after: after.terminator.clone(),
},
(false, true) => ChangeType::SourceLocationChanged,
(true, true) => ChangeType::BothChanged,
(false, false) => return None, };
Some(BlockChange {
block_id: *id,
before: before.clone(),
after: after.clone(),
change_type,
})
})
.collect();
let (added_edges, deleted_edges) = compute_edge_diff(&before_map, &after_map)?;
let total_changes = added_blocks.len()
+ deleted_blocks.len()
+ modified_blocks.len()
+ added_edges.len()
+ deleted_edges.len();
let total_blocks_before = before_map.len().max(1);
let structural_similarity = if total_changes == 0 {
1.0
} else {
let max_changes = total_blocks_before * 2;
let similarity_ratio = 1.0 - (total_changes as f64 / max_changes.max(1) as f64);
similarity_ratio.max(0.0)
};
Ok(CfgDiff {
function_id,
function_name,
before_snapshot: before_snapshot.to_string(),
after_snapshot: after_snapshot.to_string(),
added_blocks,
deleted_blocks,
modified_blocks,
added_edges,
deleted_edges,
structural_similarity,
})
}
fn compute_edge_diff(
before: &HashMap<i64, BlockDiff>,
after: &HashMap<i64, BlockDiff>,
) -> Result<(Vec<EdgeDiff>, Vec<EdgeDiff>)> {
let before_edges = derive_edges(before);
let after_edges = derive_edges(after);
let before_set: HashSet<_> = before_edges.iter().collect();
let after_set: HashSet<_> = after_edges.iter().collect();
let added: Vec<_> = after_set
.difference(&before_set)
.map(|e| (*e).clone())
.collect();
let deleted: Vec<_> = before_set
.difference(&after_set)
.map(|e| (*e).clone())
.collect();
Ok((added, deleted))
}
fn derive_edges(blocks: &HashMap<i64, BlockDiff>) -> Vec<EdgeDiff> {
let mut edges = Vec::new();
let mut block_ids: Vec<_> = blocks.keys().copied().collect();
block_ids.sort();
for (idx, &block_id) in block_ids.iter().enumerate() {
let block = match blocks.get(&block_id) {
Some(b) => b,
None => continue,
};
match block.terminator.as_str() {
"fallthrough" | "goto" => {
if idx + 1 < block_ids.len() {
edges.push(EdgeDiff {
from_block: block_id,
to_block: block_ids[idx + 1],
edge_type: "fallthrough".to_string(),
});
}
}
"conditional" => {
if idx + 1 < block_ids.len() {
edges.push(EdgeDiff {
from_block: block_id,
to_block: block_ids[idx + 1],
edge_type: "true_branch".to_string(),
});
}
if idx + 2 < block_ids.len() {
edges.push(EdgeDiff {
from_block: block_id,
to_block: block_ids[idx + 2],
edge_type: "false_branch".to_string(),
});
}
}
"return" | "panic" => {
}
"call" => {
if idx + 1 < block_ids.len() {
edges.push(EdgeDiff {
from_block: block_id,
to_block: block_ids[idx + 1],
edge_type: "call".to_string(),
});
}
}
_ => {
}
}
}
edges
}
pub fn blocks_to_petgraph(blocks: &[CfgBlockData]) -> DiGraph<i64, ()> {
let mut graph = DiGraph::new();
let mut node_indices = HashMap::new();
for (idx, _block) in blocks.iter().enumerate() {
let block_id = idx as i64;
let node_idx = graph.add_node(block_id);
node_indices.insert(block_id, node_idx);
}
for (idx, block) in blocks.iter().enumerate() {
let from_id = idx as i64;
let from_idx = node_indices[&from_id];
match block.terminator.as_str() {
"fallthrough" | "goto" => {
if idx + 1 < blocks.len() {
let to_idx = node_indices[&((idx + 1) as i64)];
graph.add_edge(from_idx, to_idx, ());
}
}
"conditional" => {
if idx + 1 < blocks.len() {
let to_idx = node_indices[&((idx + 1) as i64)];
graph.add_edge(from_idx, to_idx, ());
}
if idx + 2 < blocks.len() {
let to_idx = node_indices[&((idx + 2) as i64)];
graph.add_edge(from_idx, to_idx, ());
}
}
"call" => {
if idx + 1 < blocks.len() {
let to_idx = node_indices[&((idx + 1) as i64)];
graph.add_edge(from_idx, to_idx, ());
}
}
_ => {
}
}
}
graph
}
#[cfg(test)]
mod tests {
use super::*;
use crate::storage::CfgBlockData;
#[test]
fn test_block_diff_equality() {
let block1 = BlockDiff {
block_id: 1,
kind: "entry".to_string(),
terminator: "fallthrough".to_string(),
source_location: "1:0-10:0".to_string(),
};
let block2 = BlockDiff {
block_id: 1,
kind: "entry".to_string(),
terminator: "fallthrough".to_string(),
source_location: "1:0-10:0".to_string(),
};
assert_eq!(block1, block2);
}
#[test]
fn test_blocks_to_petgraph() {
let blocks = vec![
CfgBlockData {
id: 0,
kind: "entry".to_string(),
terminator: "fallthrough".to_string(),
byte_start: 0,
byte_end: 10,
start_line: 1,
start_col: 0,
end_line: 2,
end_col: 0,
coord_x: 0,
coord_y: 0,
coord_z: 0,
},
CfgBlockData {
id: 1,
kind: "normal".to_string(),
terminator: "return".to_string(),
byte_start: 10,
byte_end: 20,
start_line: 2,
start_col: 0,
end_line: 3,
end_col: 0,
coord_x: 0,
coord_y: 0,
coord_z: 0,
},
];
let graph = blocks_to_petgraph(&blocks);
assert_eq!(graph.node_count(), 2);
assert_eq!(graph.edge_count(), 1);
}
#[test]
fn test_derive_edges() {
let mut blocks = HashMap::new();
blocks.insert(
0,
BlockDiff {
block_id: 0,
kind: "entry".to_string(),
terminator: "fallthrough".to_string(),
source_location: "1:0-5:0".to_string(),
},
);
blocks.insert(
1,
BlockDiff {
block_id: 1,
kind: "normal".to_string(),
terminator: "return".to_string(),
source_location: "5:0-10:0".to_string(),
},
);
let edges = derive_edges(&blocks);
assert_eq!(edges.len(), 1);
assert_eq!(edges[0].from_block, 0);
assert_eq!(edges[0].to_block, 1);
assert_eq!(edges[0].edge_type, "fallthrough");
}
#[test]
fn test_compute_edge_diff() {
let mut before = HashMap::new();
before.insert(
0,
BlockDiff {
block_id: 0,
kind: "entry".to_string(),
terminator: "fallthrough".to_string(),
source_location: "1:0-5:0".to_string(),
},
);
before.insert(
1,
BlockDiff {
block_id: 1,
kind: "normal".to_string(),
terminator: "return".to_string(),
source_location: "5:0-10:0".to_string(),
},
);
let mut after = HashMap::new();
after.insert(
0,
BlockDiff {
block_id: 0,
kind: "entry".to_string(),
terminator: "return".to_string(), source_location: "1:0-5:0".to_string(),
},
);
after.insert(
1,
BlockDiff {
block_id: 1,
kind: "normal".to_string(),
terminator: "return".to_string(),
source_location: "5:0-10:0".to_string(),
},
);
let (added, deleted) = compute_edge_diff(&before, &after).unwrap();
assert_eq!(added.len(), 0);
assert_eq!(deleted.len(), 1);
}
}