use crate::AletheiaDB;
use crate::core::error::{Error, Result, VectorError};
use crate::core::id::NodeId;
use crate::core::vector::ops::normalize;
#[derive(Debug, Clone)]
pub struct Aspect {
pub centroid: Vec<f32>,
pub weight: f32,
pub exemplars: Vec<NodeId>,
}
pub struct Chameleon<'a> {
db: &'a AletheiaDB,
}
impl<'a> Chameleon<'a> {
pub fn new(db: &'a AletheiaDB) -> Self {
Self { db }
}
pub fn analyze_context(
&self,
node_id: NodeId,
property: &str,
k: usize,
) -> Result<Vec<Aspect>> {
let neighbor_ids = self.get_neighbors(node_id)?;
if neighbor_ids.is_empty() {
return Ok(Vec::new());
}
let mut data = Vec::with_capacity(neighbor_ids.len());
let mut expected_dim: Option<usize> = None;
for &nid in &neighbor_ids {
if let Ok(vec) = self.get_node_vector(nid, property) {
match expected_dim {
Some(dim) => {
if vec.len() == dim {
data.push((nid, vec));
}
}
None => {
if !vec.is_empty() {
expected_dim = Some(vec.len());
data.push((nid, vec));
}
}
}
}
}
if data.is_empty() {
return Ok(Vec::new());
}
let effective_k = k.min(data.len());
if effective_k == 0 {
return Ok(Vec::new());
}
let clusters = MiniKMeans::cluster(&data, effective_k);
let mut aspects = Vec::with_capacity(effective_k);
let total_points = data.len() as f32;
for cluster in clusters {
if cluster.points.is_empty() {
continue;
}
let weight = cluster.points.len() as f32 / total_points;
let mut points_with_dist: Vec<(NodeId, f32)> = cluster
.points
.iter()
.map(|&(nid, ref vec)| {
let dist = dist_sq(vec, &cluster.centroid);
(nid, dist)
})
.collect();
points_with_dist
.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
let exemplars: Vec<NodeId> = points_with_dist
.into_iter()
.take(3)
.map(|(nid, _)| nid)
.collect();
aspects.push(Aspect {
centroid: normalize(&cluster.centroid), weight,
exemplars,
});
}
aspects.sort_by(|a, b| {
b.weight
.partial_cmp(&a.weight)
.unwrap_or(std::cmp::Ordering::Equal)
});
Ok(aspects)
}
pub fn facet_search(
&self,
node_id: NodeId,
property: &str,
aspect_index: usize,
limit: usize,
) -> Result<Vec<(NodeId, f32)>> {
let k = 5;
let aspects = self.analyze_context(node_id, property, k)?;
if aspect_index >= aspects.len() {
return Err(Error::Vector(VectorError::IndexError(format!(
"Aspect index {} out of bounds (found {} aspects)",
aspect_index,
aspects.len()
))));
}
let aspect = &aspects[aspect_index];
self.db.search_vectors_in(property, &aspect.centroid, limit)
}
fn get_neighbors(&self, node_id: NodeId) -> Result<Vec<NodeId>> {
let outgoing = self.db.get_outgoing_edges(node_id);
let mut neighbors = Vec::with_capacity(outgoing.len());
for edge_id in outgoing {
let edge = self.db.get_edge(edge_id)?;
neighbors.push(edge.target);
}
Ok(neighbors)
}
fn get_node_vector(&self, node_id: NodeId, property: &str) -> Result<Vec<f32>> {
let node = self.db.get_node(node_id)?;
let val = node.properties.get(property).ok_or_else(|| {
Error::Vector(VectorError::IndexError(format!(
"Property '{}' missing",
property
)))
})?;
let vec = val.as_vector().ok_or_else(|| {
Error::Vector(VectorError::IndexError(format!(
"Property '{}' not a vector",
property
)))
})?;
Ok(vec.to_vec())
}
}
struct Cluster {
centroid: Vec<f32>,
points: Vec<(NodeId, Vec<f32>)>,
}
struct MiniKMeans;
impl MiniKMeans {
fn cluster(data: &[(NodeId, Vec<f32>)], k: usize) -> Vec<Cluster> {
if data.is_empty() || k == 0 {
return Vec::new();
}
let dim = data[0].1.len();
let mut centroids: Vec<Vec<f32>> = data.iter().take(k).map(|(_, v)| v.clone()).collect();
let mut assignments = vec![0; data.len()];
let max_iters = 20;
for _ in 0..max_iters {
let mut changes = 0;
let mut sums = vec![vec![0.0; dim]; k];
let mut counts = vec![0; k];
for (i, (_, vec)) in data.iter().enumerate() {
let mut best_cluster = 0;
let mut best_dist = f32::MAX;
for (c_idx, centroid) in centroids.iter().enumerate() {
let dist = dist_sq(vec, centroid);
if dist < best_dist {
best_dist = dist;
best_cluster = c_idx;
}
}
if assignments[i] != best_cluster {
assignments[i] = best_cluster;
changes += 1;
}
for (d, val) in vec.iter().enumerate() {
sums[best_cluster][d] += val;
}
counts[best_cluster] += 1;
}
for c_idx in 0..k {
if counts[c_idx] > 0 {
for d in 0..dim {
centroids[c_idx][d] = sums[c_idx][d] / counts[c_idx] as f32;
}
}
}
if changes == 0 {
break;
}
}
let mut clusters = Vec::with_capacity(k);
for centroid in centroids {
clusters.push(Cluster {
centroid,
points: Vec::new(),
});
}
for (i, (nid, vec)) in data.iter().enumerate() {
let c_idx = assignments[i];
clusters[c_idx].points.push((*nid, vec.clone()));
}
clusters
}
}
fn dist_sq(a: &[f32], b: &[f32]) -> f32 {
a.iter().zip(b.iter()).map(|(x, y)| (x - y).powi(2)).sum()
}
#[cfg(test)]
mod tests {
use super::*;
use crate::core::property::PropertyMapBuilder;
use crate::index::vector::{DistanceMetric, HnswConfig};
#[test]
fn test_chameleon_clustering() {
let db = AletheiaDB::new().unwrap();
let config = HnswConfig::new(2, DistanceMetric::Euclidean);
db.enable_vector_index("vec", config).unwrap();
let center = db
.create_node("Center", PropertyMapBuilder::new().build())
.unwrap();
let n1 = db
.create_node(
"A",
PropertyMapBuilder::new()
.insert_vector("vec", &[0.0, 0.0])
.build(),
)
.unwrap();
let n2 = db
.create_node(
"A",
PropertyMapBuilder::new()
.insert_vector("vec", &[0.1, 0.1])
.build(),
)
.unwrap();
let n3 = db
.create_node(
"B",
PropertyMapBuilder::new()
.insert_vector("vec", &[10.0, 10.0])
.build(),
)
.unwrap();
let n4 = db
.create_node(
"B",
PropertyMapBuilder::new()
.insert_vector("vec", &[10.1, 10.1])
.build(),
)
.unwrap();
for n in [n1, n2, n3, n4] {
db.create_edge(center, n, "LINK", PropertyMapBuilder::new().build())
.unwrap();
}
let chameleon = Chameleon::new(&db);
let aspects = chameleon.analyze_context(center, "vec", 2).unwrap();
assert_eq!(aspects.len(), 2);
}
#[test]
fn test_chameleon_clustering_orthogonal() {
let db = AletheiaDB::new().unwrap();
let config = HnswConfig::new(2, DistanceMetric::Euclidean);
db.enable_vector_index("vec", config).unwrap();
let center = db
.create_node("Center", PropertyMapBuilder::new().build())
.unwrap();
let n1 = db
.create_node(
"A",
PropertyMapBuilder::new()
.insert_vector("vec", &[1.0, 0.0])
.build(),
)
.unwrap();
let n2 = db
.create_node(
"A",
PropertyMapBuilder::new()
.insert_vector("vec", &[0.9, 0.1])
.build(),
)
.unwrap();
let n3 = db
.create_node(
"B",
PropertyMapBuilder::new()
.insert_vector("vec", &[0.0, 1.0])
.build(),
)
.unwrap();
let n4 = db
.create_node(
"B",
PropertyMapBuilder::new()
.insert_vector("vec", &[0.1, 0.9])
.build(),
)
.unwrap();
for n in [n1, n2, n3, n4] {
db.create_edge(center, n, "LINK", PropertyMapBuilder::new().build())
.unwrap();
}
let chameleon = Chameleon::new(&db);
let aspects = chameleon.analyze_context(center, "vec", 2).unwrap();
assert_eq!(aspects.len(), 2);
assert!((aspects[0].weight - 0.5).abs() < 0.1);
assert!((aspects[1].weight - 0.5).abs() < 0.1);
let c0 = &aspects[0].centroid;
let is_x = (c0[0] - 1.0).abs() < 0.2;
let is_y = (c0[1] - 1.0).abs() < 0.2;
assert!(
is_x || is_y,
"Centroid 0 {:?} should be near X or Y axis",
c0
);
}
#[test]
fn test_chameleon_mixed_dimensions_safe() {
let db = AletheiaDB::new().unwrap();
let center_props = PropertyMapBuilder::new().build();
let center = db.create_node("Center", center_props).unwrap();
let p1 = PropertyMapBuilder::new()
.insert_vector("vec", &[1.0, 0.0])
.build();
let n1 = db.create_node("A", p1).unwrap();
let p2 = PropertyMapBuilder::new()
.insert_vector("vec", &[1.0, 0.0, 0.0])
.build();
let n2 = db.create_node("B", p2).unwrap();
let edge_props = PropertyMapBuilder::new().build();
db.create_edge(center, n1, "LINK", edge_props.clone())
.unwrap();
db.create_edge(center, n2, "LINK", edge_props).unwrap();
let chameleon = Chameleon::new(&db);
let aspects = chameleon.analyze_context(center, "vec", 2).unwrap();
assert_eq!(aspects.len(), 1);
assert_eq!(aspects[0].exemplars[0], n1);
}
}