use crate::graph::GraphRef;
pub fn jaccard<G: GraphRef>(graph: &G, u: usize, v: usize) -> f64 {
let nu = graph.neighbors_ref(u);
let nv = graph.neighbors_ref(v);
let intersection = count_intersection(nu, nv);
let union = nu.len() + nv.len() - intersection;
if union == 0 {
0.0
} else {
intersection as f64 / union as f64
}
}
pub fn cosine<G: GraphRef>(graph: &G, u: usize, v: usize) -> f64 {
let nu = graph.neighbors_ref(u);
let nv = graph.neighbors_ref(v);
let denom = ((nu.len() * nv.len()) as f64).sqrt();
if denom == 0.0 {
0.0
} else {
count_intersection(nu, nv) as f64 / denom
}
}
pub fn overlap<G: GraphRef>(graph: &G, u: usize, v: usize) -> f64 {
let nu = graph.neighbors_ref(u);
let nv = graph.neighbors_ref(v);
let min_size = nu.len().min(nv.len());
if min_size == 0 {
0.0
} else {
count_intersection(nu, nv) as f64 / min_size as f64
}
}
pub fn top_k_similar_jaccard<G: GraphRef>(graph: &G, u: usize, k: usize) -> Vec<(usize, f64)> {
let n = graph.node_count();
let mut scores: Vec<(usize, f64)> = (0..n)
.filter(|&v| v != u)
.map(|v| (v, jaccard(graph, u, v)))
.filter(|&(_, s)| s > 0.0)
.collect();
scores.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
scores.truncate(k);
scores
}
fn count_intersection(a: &[usize], b: &[usize]) -> usize {
if a.is_empty() || b.is_empty() {
return 0;
}
let (smaller, larger) = if a.len() <= b.len() { (a, b) } else { (b, a) };
if smaller.len() <= 16 {
smaller.iter().filter(|x| larger.contains(x)).count()
} else {
let set: std::collections::HashSet<usize> = smaller.iter().copied().collect();
larger.iter().filter(|x| set.contains(x)).count()
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::graph::GraphRef;
struct VecGraph {
adj: Vec<Vec<usize>>,
}
impl GraphRef for VecGraph {
fn node_count(&self) -> usize {
self.adj.len()
}
fn neighbors_ref(&self, node: usize) -> &[usize] {
&self.adj[node]
}
}
#[test]
fn jaccard_identical_neighborhoods() {
let g = VecGraph {
adj: vec![vec![1, 2], vec![0, 2], vec![0, 1]],
};
let j = jaccard(&g, 0, 1);
assert!((j - 1.0 / 3.0).abs() < 1e-10);
}
#[test]
fn jaccard_no_overlap() {
let g = VecGraph {
adj: vec![vec![1], vec![0], vec![3], vec![2]],
};
assert_eq!(jaccard(&g, 0, 2), 0.0);
}
#[test]
fn cosine_star() {
let g = VecGraph {
adj: vec![vec![1, 2, 3], vec![0], vec![0], vec![0]],
};
let c = cosine(&g, 1, 2);
assert!((c - 1.0).abs() < 1e-10);
}
#[test]
fn overlap_subset() {
let g = VecGraph {
adj: vec![vec![1, 2, 3], vec![0, 2], vec![0, 1], vec![0]],
};
let o = overlap(&g, 0, 1);
assert!((o - 0.5).abs() < 1e-10);
}
#[test]
fn top_k_returns_sorted() {
let g = VecGraph {
adj: vec![vec![1, 2, 3], vec![0, 2, 3], vec![0, 1], vec![0, 1]],
};
let top = top_k_similar_jaccard(&g, 0, 2);
assert!(top.len() <= 2);
if top.len() >= 2 {
assert!(top[0].1 >= top[1].1);
}
}
#[test]
fn isolated_nodes_zero() {
let g = VecGraph {
adj: vec![vec![], vec![]],
};
assert_eq!(jaccard(&g, 0, 1), 0.0);
assert_eq!(cosine(&g, 0, 1), 0.0);
assert_eq!(overlap(&g, 0, 1), 0.0);
}
}