#![allow(clippy::collapsible_if)]
use crate::AletheiaDB;
use crate::core::error::Result;
use crate::core::id::NodeId;
#[derive(Debug, Clone)]
pub struct BridgeScore {
pub total_score: f32,
pub inter_cluster_distance: f32,
pub intra_cluster_spread: f32,
pub neighbor_count: usize,
}
impl BridgeScore {
pub fn is_bridge(&self) -> bool {
self.total_score > 1.2 && self.neighbor_count >= 4
}
}
pub struct JanusDetector<'a> {
db: &'a AletheiaDB,
}
impl<'a> JanusDetector<'a> {
pub fn new(db: &'a AletheiaDB) -> Self {
Self { db }
}
pub fn analyze_node(&self, node_id: NodeId, property: &str) -> Result<BridgeScore> {
let edges = self.db.get_outgoing_edges(node_id);
let mut vectors: Vec<Vec<f32>> = Vec::new();
for edge_id in edges {
let neighbor_id = self.db.get_edge_target(edge_id)?;
let neighbor = self.db.get_node(neighbor_id)?;
#[allow(clippy::collapsible_if)]
if let Some(prop) = neighbor.get_property(property) {
if let Some(vec) = prop.as_vector() {
vectors.push(vec.to_vec());
}
}
}
let count = vectors.len();
if count < 2 {
return Ok(BridgeScore {
total_score: 0.0,
inter_cluster_distance: 0.0,
intra_cluster_spread: 0.0,
neighbor_count: count,
});
}
let (centroids, assignments) = self.kmeans_2(&vectors);
let dist = euclidean_distance_sq(¢roids[0], ¢roids[1]).sqrt();
let mut spread_sum = 0.0;
for (i, vec) in vectors.iter().enumerate() {
let cluster_idx = assignments[i];
let d = euclidean_distance_sq(vec, ¢roids[cluster_idx]).sqrt();
spread_sum += d;
}
let avg_spread = if count > 0 {
spread_sum / count as f32
} else {
0.0
};
let score = if avg_spread < 1e-6 {
if dist > 1e-6 {
100.0 } else {
0.0 }
} else {
dist / avg_spread
};
Ok(BridgeScore {
total_score: score,
inter_cluster_distance: dist,
intra_cluster_spread: avg_spread,
neighbor_count: count,
})
}
fn kmeans_2(&self, data: &[Vec<f32>]) -> (Vec<Vec<f32>>, Vec<usize>) {
if data.is_empty() {
return (vec![], vec![]);
}
let dim = data[0].len();
let c1 = data[0].clone();
let mut max_dist = -1.0;
let mut c2 = data[0].clone();
for v in data.iter().skip(1) {
let d = euclidean_distance_sq(&c1, v);
if d > max_dist {
max_dist = d;
c2 = v.clone();
}
}
let mut centroids = vec![c1, c2];
let mut assignments = vec![0; data.len()];
for _ in 0..10 {
let mut changes = 0;
let mut sums = vec![vec![0.0; dim]; 2];
let mut counts = [0; 2];
for (i, v) in data.iter().enumerate() {
let d0 = euclidean_distance_sq(¢roids[0], v);
let d1 = euclidean_distance_sq(¢roids[1], v);
let cluster = if d0 < d1 { 0 } else { 1 };
if assignments[i] != cluster {
changes += 1;
}
assignments[i] = cluster;
for k in 0..dim {
sums[cluster][k] += v[k];
}
counts[cluster] += 1;
}
for k in 0..2 {
if counts[k] > 0 {
for d in 0..dim {
centroids[k][d] = sums[k][d] / counts[k] as f32;
}
}
}
if changes == 0 {
break;
}
}
(centroids, assignments)
}
}
fn euclidean_distance_sq(a: &[f32], b: &[f32]) -> f32 {
a.iter().zip(b.iter()).map(|(x, y)| (x - y).powi(2)).sum()
}
#[cfg(test)]
mod tests {
use super::*;
use crate::core::property::PropertyMapBuilder;
use crate::index::vector::{DistanceMetric, HnswConfig};
#[test]
fn test_janus_dumbbell_bridge() {
let db = AletheiaDB::new().unwrap();
db.enable_vector_index("emb", HnswConfig::new(2, DistanceMetric::Euclidean))
.unwrap();
let a1 = db
.create_node(
"Node",
PropertyMapBuilder::new()
.insert_vector("emb", &[0.0, 0.0])
.build(),
)
.unwrap();
let a2 = db
.create_node(
"Node",
PropertyMapBuilder::new()
.insert_vector("emb", &[0.1, 0.0])
.build(),
)
.unwrap();
let a3 = db
.create_node(
"Node",
PropertyMapBuilder::new()
.insert_vector("emb", &[0.0, 0.1])
.build(),
)
.unwrap();
let b1 = db
.create_node(
"Node",
PropertyMapBuilder::new()
.insert_vector("emb", &[10.0, 10.0])
.build(),
)
.unwrap();
let b2 = db
.create_node(
"Node",
PropertyMapBuilder::new()
.insert_vector("emb", &[10.1, 10.0])
.build(),
)
.unwrap();
let b3 = db
.create_node(
"Node",
PropertyMapBuilder::new()
.insert_vector("emb", &[10.0, 10.1])
.build(),
)
.unwrap();
let bridge = db
.create_node(
"Bridge",
PropertyMapBuilder::new()
.insert_vector("emb", &[5.0, 5.0])
.build(),
)
.unwrap();
for n in [a1, a2, a3] {
db.create_edge(bridge, n, "LINKS", Default::default())
.unwrap();
}
for n in [b1, b2, b3] {
db.create_edge(bridge, n, "LINKS", Default::default())
.unwrap();
}
let janus = JanusDetector::new(&db);
let score = janus.analyze_node(bridge, "emb").unwrap();
println!("Bridge Score: {:?}", score);
assert!(
score.total_score > 5.0,
"Bridge node should have high score"
);
assert!(score.inter_cluster_distance > 10.0);
assert!(score.is_bridge());
}
#[test]
fn test_janus_homogeneous_cluster() {
let db = AletheiaDB::new().unwrap();
db.enable_vector_index("emb", HnswConfig::new(2, DistanceMetric::Euclidean))
.unwrap();
let center = db.create_node("Center", Default::default()).unwrap();
for i in 0..6 {
let v = [i as f32 * 0.1, 0.0];
let n = db
.create_node(
"Node",
PropertyMapBuilder::new().insert_vector("emb", &v).build(),
)
.unwrap();
db.create_edge(center, n, "LINKS", Default::default())
.unwrap();
}
let janus = JanusDetector::new(&db);
let score = janus.analyze_node(center, "emb").unwrap();
println!("Homogeneous Score: {:?}", score);
assert!(
score.total_score < 5.0,
"Homogeneous node should have low score compared to bridge"
);
}
}