scirs2_graph/algorithms/
similarity.rs

1//! Graph similarity algorithms
2//!
3//! This module contains algorithms for measuring similarity between nodes and graphs.
4
5use crate::base::{EdgeWeight, Graph, IndexType, Node};
6use crate::error::{GraphError, Result};
7use std::collections::HashSet;
8use std::hash::Hash;
9
10/// Compute the Jaccard similarity between two nodes based on their neighbors
11///
12/// Jaccard similarity is the size of the intersection divided by the size of the union
13/// of the neighbor sets.
14#[allow(dead_code)]
15pub fn jaccard_similarity<N, E, Ix>(graph: &Graph<N, E, Ix>, node1: &N, node2: &N) -> Result<f64>
16where
17    N: Node + Clone + Hash + Eq + std::fmt::Debug,
18    E: EdgeWeight,
19    Ix: IndexType,
20{
21    if !graph.contains_node(node1) || !graph.contains_node(node2) {
22        return Err(GraphError::node_not_found("node"));
23    }
24
25    let neighbors1: HashSet<N> = graph
26        .neighbors(node1)
27        .unwrap_or_default()
28        .into_iter()
29        .collect();
30    let neighbors2: HashSet<N> = graph
31        .neighbors(node2)
32        .unwrap_or_default()
33        .into_iter()
34        .collect();
35
36    if neighbors1.is_empty() && neighbors2.is_empty() {
37        return Ok(1.0); // Both have no neighbors, consider them similar
38    }
39
40    let intersection = neighbors1.intersection(&neighbors2).count();
41    let union = neighbors1.union(&neighbors2).count();
42
43    Ok(intersection as f64 / union as f64)
44}
45
46/// Compute the cosine similarity between two nodes based on their adjacency vectors
47#[allow(dead_code)]
48pub fn cosine_similarity<N, E, Ix>(graph: &Graph<N, E, Ix>, node1: &N, node2: &N) -> Result<f64>
49where
50    N: Node + Clone + Hash + Eq + std::fmt::Debug,
51    E: EdgeWeight + Into<f64>,
52    Ix: IndexType,
53{
54    if !graph.contains_node(node1) || !graph.contains_node(node2) {
55        return Err(GraphError::node_not_found("node"));
56    }
57
58    let nodes: Vec<N> = graph.nodes().into_iter().cloned().collect();
59    let n = nodes.len();
60
61    // Build adjacency vectors
62    let mut vec1 = vec![0.0; n];
63    let mut vec2 = vec![0.0; n];
64
65    for (i, node) in nodes.iter().enumerate() {
66        if let Ok(weight) = graph.edge_weight(node1, node) {
67            vec1[i] = weight.into();
68        }
69        if let Ok(weight) = graph.edge_weight(node2, node) {
70            vec2[i] = weight.into();
71        }
72    }
73
74    // Compute cosine similarity
75    let dot_product: f64 = vec1.iter().zip(&vec2).map(|(a, b)| a * b).sum();
76    let norm1: f64 = vec1.iter().map(|x| x * x).sum::<f64>().sqrt();
77    let norm2: f64 = vec2.iter().map(|x| x * x).sum::<f64>().sqrt();
78
79    if norm1 == 0.0 || norm2 == 0.0 {
80        return Ok(0.0);
81    }
82
83    Ok(dot_product / (norm1 * norm2))
84}
85
86/// Compute the graph edit distance between two graphs
87///
88/// This is a simplified version that counts the number of edge additions/deletions
89/// needed to transform one graph into another.
90#[allow(dead_code)]
91pub fn graph_edit_distance<N, E, Ix>(graph1: &Graph<N, E, Ix>, graph2: &Graph<N, E, Ix>) -> usize
92where
93    N: Node + Clone + Hash + Eq + std::fmt::Debug,
94    E: EdgeWeight,
95    Ix: IndexType,
96{
97    // Get all edges from both graphs
98    let edges1: HashSet<(N, N)> = graph1
99        .edges()
100        .into_iter()
101        .map(|edge| (edge.source, edge.target))
102        .collect();
103
104    let edges2: HashSet<(N, N)> = graph2
105        .edges()
106        .into_iter()
107        .map(|edge| (edge.source, edge.target))
108        .collect();
109
110    // Count edges only in _graph1 (need to be deleted)
111    let edges_to_delete = edges1.difference(&edges2).count();
112
113    // Count edges only in graph2 (need to be added)
114    let edges_to_add = edges2.difference(&edges1).count();
115
116    edges_to_delete + edges_to_add
117}
118
119#[cfg(test)]
120mod tests {
121    use super::*;
122    use crate::error::Result as GraphResult;
123    use crate::generators::create_graph;
124
125    #[test]
126    fn test_jaccard_similarity() -> GraphResult<()> {
127        let mut graph = create_graph::<&str, ()>();
128
129        // Create a graph where A and B share some neighbors
130        graph.add_edge("A", "C", ())?;
131        graph.add_edge("A", "D", ())?;
132        graph.add_edge("B", "C", ())?;
133        graph.add_edge("B", "E", ())?;
134
135        // A's neighbors: {C, D}
136        // B's neighbors: {C, E}
137        // Intersection: {C} (size 1)
138        // Union: {C, D, E} (size 3)
139        // Jaccard similarity: 1/3
140        let similarity = jaccard_similarity(&graph, &"A", &"B")?;
141        assert!((similarity - 1.0 / 3.0).abs() < 1e-6);
142
143        // Node with itself should have similarity 1.0
144        let self_similarity = jaccard_similarity(&graph, &"A", &"A")?;
145        assert_eq!(self_similarity, 1.0);
146
147        Ok(())
148    }
149
150    #[test]
151    fn test_cosine_similarity() -> GraphResult<()> {
152        let mut graph = create_graph::<&str, f64>();
153
154        // Create a weighted graph
155        graph.add_edge("A", "B", 1.0)?;
156        graph.add_edge("A", "C", 2.0)?;
157        graph.add_edge("B", "C", 3.0)?;
158
159        // Test cosine similarity between connected nodes
160        let similarity = cosine_similarity(&graph, &"A", &"B")?;
161        assert!((0.0..=1.0).contains(&similarity));
162
163        // Node with itself should have similarity 1.0
164        let self_similarity = cosine_similarity(&graph, &"A", &"A")?;
165        assert!((self_similarity - 1.0).abs() < 1e-6);
166
167        Ok(())
168    }
169
170    #[test]
171    fn test_graph_edit_distance() -> GraphResult<()> {
172        let mut graph1 = create_graph::<&str, ()>();
173        graph1.add_edge("A", "B", ())?;
174        graph1.add_edge("B", "C", ())?;
175
176        let mut graph2 = create_graph::<&str, ()>();
177        graph2.add_edge("A", "B", ())?;
178        graph2.add_edge("B", "C", ())?;
179        graph2.add_edge("C", "D", ())?;
180
181        // Graph2 has one additional edge
182        let distance = graph_edit_distance(&graph1, &graph2);
183        assert_eq!(distance, 1);
184
185        // Same graph should have distance 0
186        let self_distance = graph_edit_distance(&graph1, &graph1);
187        assert_eq!(self_distance, 0);
188
189        Ok(())
190    }
191}