use std::time::Instant;
use petgraph::Directed;
use petgraph::algo::isomorphism;
use petgraph::graph::{DefaultIx, Graph, NodeIndex};
use crate::{errors::SqliteGraphError, graph::SqliteGraph, progress::ProgressCallback};
type PgDiGraph = Graph<i64, (), Directed, DefaultIx>;
#[derive(Debug, Clone, Default)]
pub struct SimilarityBounds {
pub max_matches: Option<usize>,
pub timeout_ms: Option<u64>,
pub similarity_threshold: Option<f64>,
}
impl SimilarityBounds {
#[inline]
pub fn new() -> Self {
Self::default()
}
#[inline]
pub fn with_max_matches(mut self, max: usize) -> Self {
self.max_matches = Some(max);
self
}
#[inline]
pub fn with_timeout(mut self, timeout_ms: u64) -> Self {
self.timeout_ms = Some(timeout_ms);
self
}
#[inline]
pub fn with_threshold(mut self, threshold: f64) -> Self {
self.similarity_threshold = Some(threshold);
self
}
#[inline]
pub fn is_similar_enough(&self, score: f64) -> bool {
match self.similarity_threshold {
Some(threshold) => score >= threshold,
None => false,
}
}
#[inline]
pub fn is_bounded(&self) -> bool {
self.max_matches.is_some()
|| self.timeout_ms.is_some()
|| self.similarity_threshold.is_some()
}
}
#[derive(Debug, Clone)]
pub struct SimilarityResult {
pub isomorphic: bool,
pub mcs_similarity: f64,
pub ged_distance: f64,
pub mcs_size: usize,
pub graph1_size: usize,
pub graph2_size: usize,
pub computation_time_ms: u128,
}
impl SimilarityResult {
#[inline]
pub fn is_very_similar(&self) -> bool {
self.mcs_similarity >= 0.8
}
#[inline]
pub fn is_moderately_similar(&self) -> bool {
self.mcs_similarity >= 0.5
}
#[inline]
pub fn similarity_class(&self) -> &'static str {
if self.isomorphic {
"Identical"
} else if self.mcs_similarity >= 0.8 {
"Very Similar"
} else if self.mcs_similarity >= 0.5 {
"Similar"
} else if self.mcs_similarity > 0.0 {
"Different"
} else {
"No Common Structure"
}
}
#[inline]
pub fn has_no_common_structure(&self) -> bool {
self.mcs_similarity == 0.0
}
}
fn graph_to_petgraph(graph: &SqliteGraph) -> Result<PgDiGraph, SqliteGraphError> {
let entity_ids = graph.all_entity_ids()?;
let mut pg = PgDiGraph::new();
let mut id_to_index: std::collections::HashMap<i64, NodeIndex> =
std::collections::HashMap::new();
for &id in &entity_ids {
let idx = pg.add_node(id);
id_to_index.insert(id, idx);
}
for &from_id in &entity_ids {
if let Ok(outgoing) = graph.fetch_outgoing(from_id) {
for &to_id in &outgoing {
if let (Some(&from_idx), Some(&to_idx)) =
(id_to_index.get(&from_id), id_to_index.get(&to_id))
{
pg.add_edge(from_idx, to_idx, ());
}
}
}
}
Ok(pg)
}
fn maximum_common_subgraph(g1: &PgDiGraph, g2: &PgDiGraph, bounds: &SimilarityBounds) -> usize {
let _start_time = Instant::now();
let (pattern, target) = if g1.node_count() <= g2.node_count() {
(g1, g2)
} else {
(g2, g1)
};
let pattern_count = pattern.node_count();
if pattern_count == 0 {
return 0;
}
let target_count = target.node_count();
if pattern_count > target_count {
return 0;
}
if pattern_count == target_count {
let mut node_match = |_: &i64, _: &i64| -> bool { true };
let mut edge_match = |_: &(), _: &()| -> bool { true };
let pattern_ref = &pattern;
let target_ref = ⌖
if isomorphism::is_isomorphic_matching(
&pattern_ref,
&target_ref,
&mut node_match,
&mut edge_match,
) {
return pattern_count; }
}
let _timeout = bounds.timeout_ms.map(std::time::Duration::from_millis);
let mut max_size = 0usize;
let mut matches_checked = 0usize;
let pattern_ref = &pattern;
let target_ref = ⌖
let mut node_match = |_: &i64, _: &i64| -> bool { true };
let mut edge_match = |_: &(), _: &()| -> bool { true };
let iso_iter = isomorphism::subgraph_isomorphisms_iter(
&pattern_ref,
&target_ref,
&mut node_match,
&mut edge_match,
);
if let Some(mut iso_iter) = iso_iter {
let has_match = iso_iter.next().is_some();
if has_match {
max_size = pattern_count;
matches_checked = 1;
}
}
let _ = matches_checked; max_size
}
pub fn structural_similarity(
graph1: &SqliteGraph,
graph2: &SqliteGraph,
bounds: SimilarityBounds,
) -> Result<SimilarityResult, SqliteGraphError> {
let start_time = Instant::now();
let g1 = graph_to_petgraph(graph1)?;
let g2 = graph_to_petgraph(graph2)?;
let g1_size = g1.node_count();
let g2_size = g2.node_count();
if g1_size == 0 && g2_size == 0 {
return Ok(SimilarityResult {
isomorphic: true,
mcs_similarity: 1.0,
ged_distance: 0.0,
mcs_size: 0,
graph1_size: 0,
graph2_size: 0,
computation_time_ms: start_time.elapsed().as_millis(),
});
}
if g1_size == 0 || g2_size == 0 {
return Ok(SimilarityResult {
isomorphic: false,
mcs_similarity: 0.0,
ged_distance: 1.0,
mcs_size: 0,
graph1_size: g1_size,
graph2_size: g2_size,
computation_time_ms: start_time.elapsed().as_millis(),
});
}
let mut node_match = |_: &i64, _: &i64| -> bool { true };
let mut edge_match = |_: &(), _: &()| -> bool { true };
let g1_ref = &g1;
let g2_ref = &g2;
let isomorphic =
isomorphism::is_isomorphic_matching(&g1_ref, &g2_ref, &mut node_match, &mut edge_match);
if isomorphic {
return Ok(SimilarityResult {
isomorphic: true,
mcs_similarity: 1.0,
ged_distance: 0.0,
mcs_size: g1_size,
graph1_size: g1_size,
graph2_size: g2_size,
computation_time_ms: start_time.elapsed().as_millis(),
});
}
let mcs_size = maximum_common_subgraph(&g1, &g2, &bounds);
let max_size = g1_size.max(g2_size);
let mcs_similarity = if max_size > 0 {
mcs_size as f64 / max_size as f64
} else {
0.0
};
let ged_distance = 1.0 - mcs_similarity;
Ok(SimilarityResult {
isomorphic: false,
mcs_similarity,
ged_distance,
mcs_size,
graph1_size: g1_size,
graph2_size: g2_size,
computation_time_ms: start_time.elapsed().as_millis(),
})
}
pub fn structural_similarity_with_progress<F>(
graph1: &SqliteGraph,
graph2: &SqliteGraph,
bounds: SimilarityBounds,
progress: &F,
) -> Result<SimilarityResult, SqliteGraphError>
where
F: ProgressCallback,
{
progress.on_progress(0, Some(5), "Converting graphs to petgraph format");
let start_time = Instant::now();
let g1 = graph_to_petgraph(graph1)?;
let g2 = graph_to_petgraph(graph2)?;
let g1_size = g1.node_count();
let g2_size = g2.node_count();
progress.on_progress(
1,
Some(5),
&format!("Graph sizes: {} and {} nodes", g1_size, g2_size),
);
if g1_size == 0 && g2_size == 0 {
progress.on_progress(2, Some(5), "Both graphs empty - trivially isomorphic");
return Ok(SimilarityResult {
isomorphic: true,
mcs_similarity: 1.0,
ged_distance: 0.0,
mcs_size: 0,
graph1_size: 0,
graph2_size: 0,
computation_time_ms: start_time.elapsed().as_millis(),
});
}
if g1_size == 0 || g2_size == 0 {
progress.on_progress(2, Some(5), "One graph empty - no common structure");
return Ok(SimilarityResult {
isomorphic: false,
mcs_similarity: 0.0,
ged_distance: 1.0,
mcs_size: 0,
graph1_size: g1_size,
graph2_size: g2_size,
computation_time_ms: start_time.elapsed().as_millis(),
});
}
progress.on_progress(2, Some(5), "Checking exact isomorphism...");
let mut node_match = |_: &i64, _: &i64| -> bool { true };
let mut edge_match = |_: &(), _: &()| -> bool { true };
let g1_ref = &g1;
let g2_ref = &g2;
let isomorphic =
isomorphism::is_isomorphic_matching(&g1_ref, &g2_ref, &mut node_match, &mut edge_match);
if isomorphic {
progress.on_progress(3, Some(5), "Graphs are isomorphic!");
return Ok(SimilarityResult {
isomorphic: true,
mcs_similarity: 1.0,
ged_distance: 0.0,
mcs_size: g1_size,
graph1_size: g1_size,
graph2_size: g2_size,
computation_time_ms: start_time.elapsed().as_millis(),
});
}
progress.on_progress(
3,
Some(5),
"Not isomorphic - computing maximum common subgraph...",
);
let mcs_size = maximum_common_subgraph(&g1, &g2, &bounds);
progress.on_progress(4, Some(5), &format!("Found MCS of size {}", mcs_size));
let max_size = g1_size.max(g2_size);
let mcs_similarity = if max_size > 0 {
mcs_size as f64 / max_size as f64
} else {
0.0
};
let ged_distance = 1.0 - mcs_similarity;
progress.on_progress(
5,
Some(5),
&format!("Similarity score: {:.2}", mcs_similarity),
);
progress.on_complete();
Ok(SimilarityResult {
isomorphic: false,
mcs_similarity,
ged_distance,
mcs_size,
graph1_size: g1_size,
graph2_size: g2_size,
computation_time_ms: start_time.elapsed().as_millis(),
})
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{GraphEdge, GraphEntity};
fn create_test_graph_with_nodes(count: usize) -> SqliteGraph {
let graph = SqliteGraph::open_in_memory().expect("Failed to create graph");
for i in 0..count {
let entity = GraphEntity {
id: 0,
kind: "test".to_string(),
name: format!("test_{}", i),
file_path: Some(format!("test_{}.rs", i)),
data: serde_json::json!({"index": i}),
};
graph
.insert_entity(&entity)
.expect("Failed to insert entity");
}
graph
}
fn get_entity_ids(graph: &SqliteGraph, count: usize) -> Vec<i64> {
graph
.all_entity_ids()
.expect("Failed to get IDs")
.into_iter()
.take(count)
.collect()
}
fn add_edge(graph: &SqliteGraph, from_idx: i64, to_idx: i64) {
let ids: Vec<i64> = graph.all_entity_ids().expect("Failed to get IDs");
let edge = GraphEdge {
id: 0,
from_id: ids[from_idx as usize],
to_id: ids[to_idx as usize],
edge_type: "edge".to_string(),
data: serde_json::json!({}),
};
graph.insert_edge(&edge).ok();
}
fn add_typed_edge(graph: &SqliteGraph, from_idx: i64, to_idx: i64, edge_type: &str) {
let ids: Vec<i64> = graph.all_entity_ids().expect("Failed to get IDs");
let edge = GraphEdge {
id: 0,
from_id: ids[from_idx as usize],
to_id: ids[to_idx as usize],
edge_type: edge_type.to_string(),
data: serde_json::json!({}),
};
graph.insert_edge(&edge).ok();
}
#[test]
fn test_structural_similarity_identical() {
let graph1 = create_test_graph_with_nodes(4);
let graph2 = create_test_graph_with_nodes(4);
for i in 0..3 {
add_edge(&graph1, i, i + 1);
add_edge(&graph2, i, i + 1);
}
let bounds = SimilarityBounds::default();
let result = structural_similarity(&graph1, &graph2, bounds).unwrap();
assert!(result.isomorphic);
assert_eq!(result.mcs_similarity, 1.0);
assert_eq!(result.ged_distance, 0.0);
assert_eq!(result.mcs_size, 4);
assert_eq!(result.similarity_class(), "Identical");
}
#[test]
fn test_structural_similarity_isomorphic() {
let graph1 = create_test_graph_with_nodes(3);
let graph2 = create_test_graph_with_nodes(3);
let ids1 = get_entity_ids(&graph1, 3);
let ids2 = get_entity_ids(&graph2, 3);
for (from, to) in &[(0, 1), (1, 2), (2, 0)] {
let edge = GraphEdge {
id: 0,
from_id: ids1[*from],
to_id: ids1[*to],
edge_type: "edge".to_string(),
data: serde_json::json!({}),
};
graph1.insert_edge(&edge).ok();
}
for (from, to) in &[(0, 1), (1, 2), (2, 0)] {
let edge = GraphEdge {
id: 0,
from_id: ids2[*from],
to_id: ids2[*to],
edge_type: "edge".to_string(),
data: serde_json::json!({}),
};
graph2.insert_edge(&edge).ok();
}
let bounds = SimilarityBounds::default();
let result = structural_similarity(&graph1, &graph2, bounds).unwrap();
assert!(result.isomorphic);
assert_eq!(result.mcs_similarity, 1.0);
}
#[test]
fn test_structural_similarity_subgraph() {
let graph_large = create_test_graph_with_nodes(5);
let graph_small = create_test_graph_with_nodes(3);
for i in 0..4 {
add_edge(&graph_large, i, i + 1);
}
for i in 0..2 {
add_edge(&graph_small, i, i + 1);
}
let bounds = SimilarityBounds::default();
let result = structural_similarity(&graph_large, &graph_small, bounds).unwrap();
assert!(!result.isomorphic);
assert_eq!(result.mcs_size, 3);
assert!((result.mcs_similarity - 0.6).abs() < 0.01);
}
#[test]
fn test_structural_similarity_completely_different() {
let graph1 = create_test_graph_with_nodes(3);
let graph2 = create_test_graph_with_nodes(3);
for (from, to) in &[(0, 1), (1, 2), (2, 0)] {
add_edge(&graph1, *from, *to);
}
for (from, to) in &[(0, 1), (1, 2)] {
add_edge(&graph2, *from, *to);
}
let bounds = SimilarityBounds::default();
let result = structural_similarity(&graph1, &graph2, bounds).unwrap();
assert!(!result.isomorphic);
assert!(result.mcs_similarity < 1.0);
}
#[test]
fn test_structural_similarity_empty_graphs() {
let graph1 = SqliteGraph::open_in_memory().expect("Failed to create graph");
let graph2 = SqliteGraph::open_in_memory().expect("Failed to create graph");
let bounds = SimilarityBounds::default();
let result = structural_similarity(&graph1, &graph2, bounds).unwrap();
assert!(result.isomorphic);
assert_eq!(result.mcs_similarity, 1.0);
assert_eq!(result.ged_distance, 0.0);
assert_eq!(result.mcs_size, 0);
}
#[test]
fn test_structural_similarity_one_empty() {
let graph1 = SqliteGraph::open_in_memory().expect("Failed to create graph");
let graph2 = create_test_graph_with_nodes(3);
add_edge(&graph2, 0, 1);
add_edge(&graph2, 1, 2);
let bounds = SimilarityBounds::default();
let result = structural_similarity(&graph1, &graph2, bounds).unwrap();
assert!(!result.isomorphic);
assert_eq!(result.mcs_similarity, 0.0);
assert_eq!(result.ged_distance, 1.0);
assert!(result.has_no_common_structure());
}
#[test]
fn test_similarity_result_helpers() {
let result = SimilarityResult {
isomorphic: false,
mcs_similarity: 0.85,
ged_distance: 0.15,
mcs_size: 5,
graph1_size: 6,
graph2_size: 6,
computation_time_ms: 10,
};
assert!(result.is_very_similar());
assert!(result.is_moderately_similar());
assert_eq!(result.similarity_class(), "Very Similar");
}
#[test]
fn test_similarity_class() {
let identical = SimilarityResult {
isomorphic: true,
mcs_similarity: 1.0,
ged_distance: 0.0,
mcs_size: 5,
graph1_size: 5,
graph2_size: 5,
computation_time_ms: 10,
};
assert_eq!(identical.similarity_class(), "Identical");
let very_similar = SimilarityResult {
isomorphic: false,
mcs_similarity: 0.85,
ged_distance: 0.15,
mcs_size: 4,
graph1_size: 5,
graph2_size: 5,
computation_time_ms: 10,
};
assert_eq!(very_similar.similarity_class(), "Very Similar");
let similar = SimilarityResult {
isomorphic: false,
mcs_similarity: 0.6,
ged_distance: 0.4,
mcs_size: 3,
graph1_size: 5,
graph2_size: 5,
computation_time_ms: 10,
};
assert_eq!(similar.similarity_class(), "Similar");
let different = SimilarityResult {
isomorphic: false,
mcs_similarity: 0.3,
ged_distance: 0.7,
mcs_size: 2,
graph1_size: 5,
graph2_size: 5,
computation_time_ms: 10,
};
assert_eq!(different.similarity_class(), "Different");
let no_common = SimilarityResult {
isomorphic: false,
mcs_similarity: 0.0,
ged_distance: 1.0,
mcs_size: 0,
graph1_size: 5,
graph2_size: 5,
computation_time_ms: 10,
};
assert_eq!(no_common.similarity_class(), "No Common Structure");
}
#[test]
fn test_similarity_bounds_threshold() {
let bounds = SimilarityBounds {
max_matches: None,
timeout_ms: None,
similarity_threshold: Some(0.8),
};
assert!(bounds.is_similar_enough(0.9));
assert!(bounds.is_similar_enough(0.8));
assert!(!bounds.is_similar_enough(0.7));
let no_threshold = SimilarityBounds::default();
assert!(!no_threshold.is_similar_enough(0.9));
}
#[test]
fn test_similarity_with_progress() {
use crate::progress::NoProgress;
let graph1 = create_test_graph_with_nodes(3);
let graph2 = create_test_graph_with_nodes(3);
for i in 0..2 {
add_edge(&graph1, i, i + 1);
add_edge(&graph2, i, i + 1);
}
let progress = NoProgress;
let bounds = SimilarityBounds::default();
let result =
structural_similarity_with_progress(&graph1, &graph2, bounds, &progress).unwrap();
assert!(result.isomorphic);
}
#[test]
fn test_structural_similarity_refactor_verification() {
let original = create_test_graph_with_nodes(4);
for i in 0..3 {
add_edge(&original, i, i + 1);
}
let optimized = create_test_graph_with_nodes(4);
for i in 0..3 {
add_edge(&optimized, i, i + 1);
}
let bounds = SimilarityBounds::default();
let result = structural_similarity(&original, &optimized, bounds).unwrap();
assert!(result.isomorphic);
assert_eq!(result.similarity_class(), "Identical");
}
#[test]
fn test_structural_similarity_regression_detection() {
let v1 = create_test_graph_with_nodes(3);
for i in 0..2 {
add_edge(&v1, i, i + 1);
}
let v2 = create_test_graph_with_nodes(4);
add_edge(&v2, 0, 1);
add_edge(&v2, 1, 2);
add_edge(&v2, 2, 3);
add_edge(&v2, 0, 3);
let bounds = SimilarityBounds::default();
let result = structural_similarity(&v1, &v2, bounds).unwrap();
assert!(!result.isomorphic);
assert!(result.mcs_similarity < 1.0);
assert!(result.mcs_size >= 3);
}
#[test]
fn test_mcs_size_larger_pattern() {
let graph_small = create_test_graph_with_nodes(2);
let graph_large = create_test_graph_with_nodes(4);
add_edge(&graph_small, 0, 1);
for i in 0..3 {
add_edge(&graph_large, i, i + 1);
}
let bounds = SimilarityBounds::default();
let result = structural_similarity(&graph_small, &graph_large, bounds).unwrap();
assert!(!result.isomorphic); assert!(result.mcs_size >= 2); }
#[test]
fn test_computation_time_tracking() {
let graph1 = create_test_graph_with_nodes(3);
let graph2 = create_test_graph_with_nodes(3);
for i in 0..2 {
add_edge(&graph1, i, i + 1);
add_edge(&graph2, i, i + 1);
}
let bounds = SimilarityBounds::default();
let result = structural_similarity(&graph1, &graph2, bounds).unwrap();
assert!(result.computation_time_ms < 1000);
}
#[test]
fn test_similarity_bounds_builder() {
let bounds = SimilarityBounds::new()
.with_max_matches(100)
.with_timeout(5000)
.with_threshold(0.8);
assert_eq!(bounds.max_matches, Some(100));
assert_eq!(bounds.timeout_ms, Some(5000));
assert_eq!(bounds.similarity_threshold, Some(0.8));
assert!(bounds.is_bounded());
}
#[test]
fn test_similarity_with_timeout() {
let graph1 = create_test_graph_with_nodes(3);
let graph2 = create_test_graph_with_nodes(3);
for i in 0..2 {
add_edge(&graph1, i, i + 1);
add_edge(&graph2, i, i + 1);
}
let bounds = SimilarityBounds {
timeout_ms: Some(100), ..Default::default()
};
let result = structural_similarity(&graph1, &graph2, bounds).unwrap();
assert!(result.isomorphic);
}
#[test]
fn test_graph_sizes_in_result() {
let graph1 = create_test_graph_with_nodes(3);
let graph2 = create_test_graph_with_nodes(5);
for i in 0..2 {
add_edge(&graph1, i, i + 1);
}
for i in 0..4 {
add_edge(&graph2, i, i + 1);
}
let bounds = SimilarityBounds::default();
let result = structural_similarity(&graph1, &graph2, bounds).unwrap();
assert_eq!(result.graph1_size, 3);
assert_eq!(result.graph2_size, 5);
}
#[test]
fn test_ged_distance_calculation() {
let mcs_similarity = 0.7;
let ged_distance = 1.0 - mcs_similarity;
let result = SimilarityResult {
isomorphic: false,
mcs_similarity,
ged_distance,
mcs_size: 7,
graph1_size: 10,
graph2_size: 10,
computation_time_ms: 10,
};
let expected_ged = 1.0 - result.mcs_similarity;
assert!((result.ged_distance - expected_ged).abs() < 0.01);
}
#[test]
fn test_is_moderately_similar() {
let result = SimilarityResult {
isomorphic: false,
mcs_similarity: 0.5,
ged_distance: 0.5,
mcs_size: 5,
graph1_size: 10,
graph2_size: 10,
computation_time_ms: 10,
};
assert!(result.is_moderately_similar());
assert!(!result.is_very_similar());
}
#[test]
fn test_default_bounds() {
let bounds = SimilarityBounds::default();
assert_eq!(bounds.max_matches, None);
assert_eq!(bounds.timeout_ms, None);
assert_eq!(bounds.similarity_threshold, None);
}
}