use crate::core::error::Result;
use crate::core::id::NodeId;
use crate::core::vector::{cosine_similarity, validate_vector};
use crate::query::traits::GraphView;
use std::cmp::Ordering;
use std::collections::{BinaryHeap, HashSet};
#[derive(Debug, Clone, PartialEq)]
struct ScoredCandidate {
node_id: NodeId,
similarity: f32,
}
impl ScoredCandidate {
fn new(node_id: NodeId, similarity: f32) -> Self {
Self {
node_id,
similarity,
}
}
}
impl Eq for ScoredCandidate {}
impl PartialOrd for ScoredCandidate {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl Ord for ScoredCandidate {
fn cmp(&self, other: &Self) -> Ordering {
other
.similarity
.partial_cmp(&self.similarity)
.unwrap_or(Ordering::Equal)
}
}
pub fn traverse_and_rank<G: GraphView + ?Sized>(
db: &G,
start: NodeId,
edge_label: &str,
target_embedding: &[f32],
k: usize,
) -> Result<Vec<(NodeId, f32)>> {
validate_vector(target_embedding)?;
let _start_node = db.get_node(start)?;
let edge_ids = db.get_outgoing_edges_with_label(start, edge_label);
let mut top_k_heap = BinaryHeap::with_capacity(k);
let mut visited = HashSet::with_capacity(edge_ids.len().min(k * 2));
for edge_id in edge_ids {
let target_id = db.get_edge_target(edge_id)?;
if visited.contains(&target_id) {
continue;
}
visited.insert(target_id);
let target_node = match db.get_node(target_id) {
Ok(node) => node,
Err(_) => continue, };
let embedding = match target_node.get_property("embedding") {
Some(prop) => match prop.as_vector() {
Some(vec) => vec,
None => continue, },
None => continue, };
match cosine_similarity(target_embedding, embedding) {
Ok(similarity) => {
let candidate = ScoredCandidate::new(target_id, similarity);
if top_k_heap.len() < k {
top_k_heap.push(candidate);
} else if let Some(min_candidate) = top_k_heap.peek() {
if similarity > min_candidate.similarity {
top_k_heap.pop(); top_k_heap.push(candidate); }
}
}
Err(_e) => {
#[cfg(feature = "observability")]
tracing::warn!(
target_id = %target_id,
error = %_e,
"Skipping node in traverse_and_rank due to incompatible embedding"
);
continue;
}
}
}
let mut results: Vec<(NodeId, f32)> = top_k_heap
.into_iter()
.map(|c| (c.node_id, c.similarity))
.collect();
results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(Ordering::Equal));
Ok(results)
}
pub fn find_similar_as_of<G: GraphView + ?Sized>(
db: &G,
embedding: &[f32],
k: usize,
timestamp: crate::core::temporal::Timestamp,
) -> Result<Vec<(NodeId, f32)>> {
validate_vector(embedding)?;
db.find_similar_as_of(embedding, k, timestamp)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::api::transaction::WriteOps;
use crate::core::error::VectorError;
use crate::core::property::PropertyMapBuilder;
use crate::db::AletheiaDB;
use crate::index::vector::{DistanceMetric, HnswConfig};
fn create_test_db() -> AletheiaDB {
let db = AletheiaDB::new().unwrap();
let config = HnswConfig::new(4, DistanceMetric::Cosine);
db.enable_vector_index("embedding", config)
.expect("Failed to enable vector index");
db
}
fn create_social_graph(db: &AletheiaDB) -> (NodeId, NodeId, NodeId, NodeId) {
let alice = db
.create_node(
"Person",
PropertyMapBuilder::new()
.insert("name", "Alice")
.insert_vector("embedding", &[1.0f32, 0.0, 0.0, 0.0])
.build(),
)
.expect("Failed to create Alice");
let bob = db
.create_node(
"Person",
PropertyMapBuilder::new()
.insert("name", "Bob")
.insert_vector("embedding", &[0.9f32, 0.1, 0.0, 0.0]) .build(),
)
.expect("Failed to create Bob");
let carol = db
.create_node(
"Person",
PropertyMapBuilder::new()
.insert("name", "Carol")
.insert_vector("embedding", &[0.0f32, 1.0, 0.0, 0.0]) .build(),
)
.expect("Failed to create Carol");
let dave = db
.create_node(
"Person",
PropertyMapBuilder::new()
.insert("name", "Dave")
.insert_vector("embedding", &[0.8f32, 0.2, 0.0, 0.0]) .build(),
)
.expect("Failed to create Dave");
db.create_edge(alice, bob, "KNOWS", PropertyMapBuilder::new().build())
.expect("Failed to create Alice->Bob edge");
db.create_edge(alice, carol, "KNOWS", PropertyMapBuilder::new().build())
.expect("Failed to create Alice->Carol edge");
db.create_edge(alice, dave, "KNOWS", PropertyMapBuilder::new().build())
.expect("Failed to create Alice->Dave edge");
(alice, bob, carol, dave)
}
#[test]
fn test_traverse_and_rank_basic() {
let db = create_test_db();
let (alice, bob, carol, dave) = create_social_graph(&db);
let alice_embedding = [1.0f32, 0.0, 0.0, 0.0];
let results =
traverse_and_rank(&db, alice, "KNOWS", &alice_embedding, 10).expect("Query failed");
assert_eq!(results.len(), 3, "Should return all 3 neighbors");
assert_eq!(results[0].0, bob, "Bob should be most similar");
assert_eq!(results[1].0, dave, "Dave should be second most similar");
assert_eq!(results[2].0, carol, "Carol should be least similar");
assert!(
results[0].1 > results[1].1,
"Scores should be in descending order"
);
assert!(
results[1].1 > results[2].1,
"Scores should be in descending order"
);
for (_, score) in &results {
assert!(
*score >= -1.0 && *score <= 1.0,
"Cosine similarity should be in [-1, 1]"
);
}
}
#[test]
fn test_traverse_and_rank_respects_k_limit() {
let db = create_test_db();
let (alice, bob, _carol, _dave) = create_social_graph(&db);
let alice_embedding = [1.0f32, 0.0, 0.0, 0.0];
let results =
traverse_and_rank(&db, alice, "KNOWS", &alice_embedding, 1).expect("Query failed");
assert_eq!(results.len(), 1, "Should respect k=1 limit");
assert_eq!(results[0].0, bob, "Should return most similar (Bob)");
}
#[test]
fn test_traverse_and_rank_no_neighbors() {
let db = create_test_db();
let isolated = db
.create_node(
"Person",
PropertyMapBuilder::new()
.insert("name", "Isolated")
.insert_vector("embedding", &[1.0f32, 0.0, 0.0, 0.0])
.build(),
)
.expect("Failed to create isolated node");
let query_embedding = [1.0f32, 0.0, 0.0, 0.0];
let results = traverse_and_rank(&db, isolated, "KNOWS", &query_embedding, 10)
.expect("Query should succeed");
assert_eq!(
results.len(),
0,
"Should return empty results for isolated node"
);
}
#[test]
fn test_traverse_and_rank_node_not_found() {
let db = create_test_db();
let fake_id = NodeId::new(99999).expect("valid id");
let query_embedding = [1.0f32, 0.0, 0.0, 0.0];
let result = traverse_and_rank(&db, fake_id, "KNOWS", &query_embedding, 10);
assert!(
result.is_err(),
"Should return error for non-existent start node"
);
assert!(
matches!(
result.unwrap_err(),
crate::core::error::Error::Storage(crate::core::error::StorageError::NodeNotFound(
_
))
),
"Should return NodeNotFound error"
);
}
#[test]
fn test_traverse_and_rank_invalid_embedding() {
let db = create_test_db();
let (alice, _bob, _carol, _dave) = create_social_graph(&db);
let nan_embedding = [f32::NAN, 0.0, 0.0, 0.0];
let result = traverse_and_rank(&db, alice, "KNOWS", &nan_embedding, 10);
assert!(result.is_err(), "Should reject NaN embedding");
assert!(
matches!(
result.unwrap_err(),
crate::core::error::Error::Vector(VectorError::ContainsNaN { .. })
),
"Should return ContainsNaN error"
);
let inf_embedding = [f32::INFINITY, 0.0, 0.0, 0.0];
let result = traverse_and_rank(&db, alice, "KNOWS", &inf_embedding, 10);
assert!(result.is_err(), "Should reject Infinity embedding");
assert!(
matches!(
result.unwrap_err(),
crate::core::error::Error::Vector(VectorError::ContainsInfinity { .. })
),
"Should return ContainsInfinity error"
);
}
#[test]
fn test_traverse_and_rank_handles_cycles() {
let db = create_test_db();
let node_a = db
.create_node(
"Person",
PropertyMapBuilder::new()
.insert("name", "A")
.insert_vector("embedding", &[1.0f32, 0.0, 0.0, 0.0])
.build(),
)
.expect("Failed to create A");
let node_b = db
.create_node(
"Person",
PropertyMapBuilder::new()
.insert("name", "B")
.insert_vector("embedding", &[0.9f32, 0.1, 0.0, 0.0])
.build(),
)
.expect("Failed to create B");
let node_c = db
.create_node(
"Person",
PropertyMapBuilder::new()
.insert("name", "C")
.insert_vector("embedding", &[0.8f32, 0.2, 0.0, 0.0])
.build(),
)
.expect("Failed to create C");
db.create_edge(node_a, node_b, "NEXT", PropertyMapBuilder::new().build())
.expect("Failed to create A->B edge");
db.create_edge(node_b, node_c, "NEXT", PropertyMapBuilder::new().build())
.expect("Failed to create B->C edge");
db.create_edge(node_c, node_a, "NEXT", PropertyMapBuilder::new().build())
.expect("Failed to create C->A edge");
let query_embedding = [1.0f32, 0.0, 0.0, 0.0];
let results = traverse_and_rank(&db, node_a, "NEXT", &query_embedding, 10)
.expect("Query should handle cycles");
assert_eq!(
results.len(),
1,
"Should return only direct neighbors, not cycle back"
);
assert_eq!(results[0].0, node_b, "Should return node B");
}
#[test]
fn test_traverse_and_rank_nodes_without_embeddings() {
let db = create_test_db();
let alice = db
.create_node(
"Person",
PropertyMapBuilder::new()
.insert("name", "Alice")
.insert_vector("embedding", &[1.0f32, 0.0, 0.0, 0.0])
.build(),
)
.expect("Failed to create Alice");
let bob = db
.create_node(
"Person",
PropertyMapBuilder::new()
.insert("name", "Bob")
.build(),
)
.expect("Failed to create Bob");
let carol = db
.create_node(
"Person",
PropertyMapBuilder::new()
.insert("name", "Carol")
.insert_vector("embedding", &[0.9f32, 0.1, 0.0, 0.0])
.build(),
)
.expect("Failed to create Carol");
db.create_edge(alice, bob, "KNOWS", PropertyMapBuilder::new().build())
.expect("Failed to create Alice->Bob edge");
db.create_edge(alice, carol, "KNOWS", PropertyMapBuilder::new().build())
.expect("Failed to create Alice->Carol edge");
let query_embedding = [1.0f32, 0.0, 0.0, 0.0];
let results = traverse_and_rank(&db, alice, "KNOWS", &query_embedding, 10)
.expect("Query should handle missing embeddings");
assert_eq!(
results.len(),
1,
"Should skip node without embedding and return only Carol"
);
assert_eq!(results[0].0, carol, "Should return Carol (has embedding)");
}
#[test]
fn test_traverse_and_rank_respects_edge_label() {
let db = create_test_db();
let alice = db
.create_node(
"Person",
PropertyMapBuilder::new()
.insert("name", "Alice")
.insert_vector("embedding", &[1.0f32, 0.0, 0.0, 0.0])
.build(),
)
.expect("Failed to create Alice");
let bob = db
.create_node(
"Person",
PropertyMapBuilder::new()
.insert("name", "Bob")
.insert_vector("embedding", &[0.9f32, 0.1, 0.0, 0.0])
.build(),
)
.expect("Failed to create Bob");
let carol = db
.create_node(
"Person",
PropertyMapBuilder::new()
.insert("name", "Carol")
.insert_vector("embedding", &[0.8f32, 0.2, 0.0, 0.0])
.build(),
)
.expect("Failed to create Carol");
db.create_edge(alice, bob, "KNOWS", PropertyMapBuilder::new().build())
.expect("Failed to create Alice->Bob KNOWS edge");
db.create_edge(
alice,
carol,
"WORKS_WITH",
PropertyMapBuilder::new().build(),
)
.expect("Failed to create Alice->Carol WORKS_WITH edge");
let query_embedding = [1.0f32, 0.0, 0.0, 0.0];
let results =
traverse_and_rank(&db, alice, "KNOWS", &query_embedding, 10).expect("Query failed");
assert_eq!(
results.len(),
1,
"Should only traverse KNOWS edges, not WORKS_WITH"
);
assert_eq!(results[0].0, bob, "Should return Bob (KNOWS edge)");
}
#[test]
fn test_traverse_and_rank_empty_database() {
let db = create_test_db();
let lonely = db
.create_node(
"Person",
PropertyMapBuilder::new()
.insert("name", "Lonely")
.insert_vector("embedding", &[1.0f32, 0.0, 0.0, 0.0])
.build(),
)
.expect("Failed to create lonely node");
let query_embedding = [1.0f32, 0.0, 0.0, 0.0];
let results = traverse_and_rank(&db, lonely, "KNOWS", &query_embedding, 10)
.expect("Query should succeed on node with no edges");
assert_eq!(
results.len(),
0,
"Should return empty results when node has no outgoing edges"
);
}
#[test]
fn test_traverse_and_rank_self_loop() {
let db = create_test_db();
let narcissist = db
.create_node(
"Person",
PropertyMapBuilder::new()
.insert("name", "Narcissist")
.insert_vector("embedding", &[1.0f32, 0.0, 0.0, 0.0])
.build(),
)
.expect("Failed to create narcissist node");
db.create_edge(
narcissist,
narcissist,
"LIKES",
PropertyMapBuilder::new().build(),
)
.expect("Failed to create self-loop");
let query_embedding = [0.9f32, 0.1, 0.0, 0.0];
let results = traverse_and_rank(&db, narcissist, "LIKES", &query_embedding, 10)
.expect("Query should handle self-loop");
assert_eq!(
results.len(),
1,
"Should return self as neighbor (self-loop)"
);
assert_eq!(
results[0].0, narcissist,
"Should return self as result of self-loop"
);
}
fn create_temporal_test_db() -> AletheiaDB {
use crate::index::vector::temporal::{
RetentionPolicy, SnapshotStrategy, TemporalVectorConfig,
};
let db = AletheiaDB::new().unwrap();
let hnsw_config = HnswConfig::new(4, DistanceMetric::Cosine);
let temporal_config = TemporalVectorConfig {
snapshot_strategy: SnapshotStrategy::TransactionInterval(1),
retention_policy: RetentionPolicy::KeepN(100),
max_snapshots: 100,
full_snapshot_interval: 5,
hnsw_config: Some(hnsw_config),
};
db.enable_temporal_vector_index("embedding", temporal_config)
.expect("Failed to enable temporal vector index");
db
}
#[test]
fn test_find_similar_as_of_basic() {
let db = create_temporal_test_db();
let alice = db
.create_node(
"Person",
PropertyMapBuilder::new()
.insert("name", "Alice")
.insert_vector("embedding", &[1.0f32, 0.0, 0.0, 0.0])
.build(),
)
.expect("Failed to create Alice");
let bob = db
.create_node(
"Person",
PropertyMapBuilder::new()
.insert("name", "Bob")
.insert_vector("embedding", &[0.9f32, 0.1, 0.0, 0.0])
.build(),
)
.expect("Failed to create Bob");
let carol = db
.create_node(
"Person",
PropertyMapBuilder::new()
.insert("name", "Carol")
.insert_vector("embedding", &[0.0f32, 1.0, 0.0, 0.0])
.build(),
)
.expect("Failed to create Carol");
use crate::core::temporal::time;
let timestamp = time::now();
let alice_embedding = [1.0f32, 0.0, 0.0, 0.0];
let results =
find_similar_as_of(&db, &alice_embedding, 10, timestamp).expect("Query should succeed");
assert_eq!(results.len(), 3, "Should return all nodes");
assert_eq!(results[0].0, alice, "Alice should be most similar");
assert_eq!(results[1].0, bob, "Bob should be second most similar");
assert_eq!(results[2].0, carol, "Carol should be least similar");
assert!(
results[0].1 >= results[1].1,
"Scores should be in descending order"
);
assert!(
results[1].1 >= results[2].1,
"Scores should be in descending order"
);
}
#[test]
fn test_find_similar_as_of_respects_k_limit() {
let db = create_temporal_test_db();
for i in 0..5 {
let vector = [i as f32 / 5.0, 1.0 - i as f32 / 5.0, 0.0, 0.0];
db.create_node(
"Person",
PropertyMapBuilder::new()
.insert("name", format!("Person{}", i))
.insert_vector("embedding", &vector)
.build(),
)
.expect("Failed to create node");
}
use crate::core::temporal::time;
let timestamp = time::now();
let query_embedding = [0.5f32, 0.5, 0.0, 0.0];
let results =
find_similar_as_of(&db, &query_embedding, 2, timestamp).expect("Query should succeed");
assert_eq!(results.len(), 2, "Should respect k=2 limit");
}
#[test]
fn test_find_similar_as_of_temporal_consistency() {
let db = create_temporal_test_db();
let node = db
.create_node(
"Document",
PropertyMapBuilder::new()
.insert("title", "Doc")
.insert_vector("embedding", &[1.0f32, 0.0, 0.0, 0.0])
.build(),
)
.expect("Failed to create node");
use crate::core::temporal::time;
let timestamp_before = time::now();
std::thread::sleep(std::time::Duration::from_millis(10));
db.write(|tx| {
tx.update_node(
node,
PropertyMapBuilder::new()
.insert_vector("embedding", &[0.0f32, 1.0, 0.0, 0.0])
.build(),
)
})
.expect("Failed to update node");
let timestamp_after = time::now();
let old_query = [1.0f32, 0.0, 0.0, 0.0];
let results_old = find_similar_as_of(&db, &old_query, 10, timestamp_before)
.expect("Query at old timestamp should succeed");
assert_eq!(
results_old.len(),
1,
"Should find one node at old timestamp"
);
assert_eq!(results_old[0].0, node, "Should find the same node");
assert!(
results_old[0].1 > 0.99,
"Old embedding should be very similar to old query"
);
let new_query = [0.0f32, 1.0, 0.0, 0.0];
let results_new = find_similar_as_of(&db, &new_query, 10, timestamp_after)
.expect("Query at new timestamp should succeed");
assert_eq!(
results_new.len(),
1,
"Should find one node at new timestamp"
);
assert_eq!(results_new[0].0, node, "Should find the same node");
assert!(
results_new[0].1 > 0.99,
"New embedding should be very similar to new query"
);
}
#[test]
fn test_find_similar_as_of_invalid_embedding() {
let db = create_temporal_test_db();
use crate::core::temporal::time;
let timestamp = time::now();
let nan_embedding = [f32::NAN, 0.0, 0.0, 0.0];
let result = find_similar_as_of(&db, &nan_embedding, 10, timestamp);
assert!(result.is_err(), "Should reject NaN embedding");
assert!(
matches!(
result.unwrap_err(),
crate::core::error::Error::Vector(VectorError::ContainsNaN { .. })
),
"Should return ContainsNaN error"
);
let inf_embedding = [f32::INFINITY, 0.0, 0.0, 0.0];
let result = find_similar_as_of(&db, &inf_embedding, 10, timestamp);
assert!(result.is_err(), "Should reject Infinity embedding");
assert!(
matches!(
result.unwrap_err(),
crate::core::error::Error::Vector(VectorError::ContainsInfinity { .. })
),
"Should return ContainsInfinity error"
);
}
#[test]
fn test_find_similar_as_of_no_temporal_index() {
let db = create_test_db();
db.create_node(
"Person",
PropertyMapBuilder::new()
.insert("name", "Alice")
.insert_vector("embedding", &[1.0f32, 0.0, 0.0, 0.0])
.build(),
)
.expect("Failed to create node");
use crate::core::temporal::time;
let timestamp = time::now();
let query_embedding = [1.0f32, 0.0, 0.0, 0.0];
let result = find_similar_as_of(&db, &query_embedding, 10, timestamp);
assert!(
result.is_err(),
"Should return error when temporal index not enabled"
);
assert!(
matches!(
result.unwrap_err(),
crate::core::error::Error::Vector(VectorError::IndexError(_))
),
"Should return IndexError"
);
}
#[test]
fn test_find_similar_as_of_empty_database() {
let db = create_temporal_test_db();
use crate::core::temporal::time;
let timestamp = time::now();
let query_embedding = [1.0f32, 0.0, 0.0, 0.0];
let results = find_similar_as_of(&db, &query_embedding, 10, timestamp)
.expect("Query on empty database should succeed");
assert_eq!(
results.len(),
0,
"Should return empty results for empty database"
);
}
}