use crate::core::error::Result;
use crate::core::hasher::IdentityHasher;
use crate::core::id::NodeId;
use crate::core::temporal::Timestamp;
use crate::core::vector::cosine_similarity;
use crate::query::traits::GraphView;
use std::cmp::Ordering;
use std::collections::{BinaryHeap, HashMap};
use std::hash::BuildHasherDefault;
const STRUCTURAL_HOP_COST: f32 = 0.1;
const NO_EMBEDDING_COST: f32 = 1.0;
#[derive(Debug, Clone, Copy, PartialEq)]
struct State {
cost: f32,
node: NodeId,
depth: usize,
}
impl Eq for State {}
impl Ord for State {
fn cmp(&self, other: &Self) -> Ordering {
if self.cost.is_nan() {
return Ordering::Less; }
if other.cost.is_nan() {
return Ordering::Greater; }
other
.cost
.partial_cmp(&self.cost)
.unwrap_or(Ordering::Equal)
}
}
impl PartialOrd for State {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
pub struct SemanticPathfinder<'a, G: GraphView + ?Sized> {
db: &'a G,
vector_property: String,
}
impl<'a, G: GraphView + ?Sized> SemanticPathfinder<'a, G> {
pub fn new(db: &'a G, vector_property: &str) -> Self {
Self {
db,
vector_property: vector_property.to_string(),
}
}
pub fn find_path(
&self,
start: NodeId,
end: NodeId,
query_embedding: &[f32],
max_depth: usize,
bidirectional: bool,
) -> Result<Option<Vec<NodeId>>> {
let mut pq = BinaryHeap::new();
let mut dist: HashMap<NodeId, f32, BuildHasherDefault<IdentityHasher>> = HashMap::default();
let mut came_from: HashMap<NodeId, NodeId, BuildHasherDefault<IdentityHasher>> =
HashMap::default();
dist.insert(start, 0.0);
pq.push(State {
cost: 0.0,
node: start,
depth: 0,
});
let mut neighbors = Vec::with_capacity(32);
while let Some(State { cost, node, depth }) = pq.pop() {
if node == end {
return Ok(Some(self.reconstruct_path(came_from, end)));
}
#[allow(clippy::collapsible_if)]
if let Some(&d) = dist.get(&node) {
if cost > d {
continue;
}
}
if depth >= max_depth {
continue;
}
neighbors.clear();
for edge_id in self.db.get_outgoing_edges(node) {
if let Ok(target) = self.db.get_edge_target(edge_id) {
neighbors.push(target);
}
}
if bidirectional {
for edge_id in self.db.get_incoming_edges(node) {
if let Ok(edge) = self.db.get_edge(edge_id) {
neighbors.push(edge.source);
}
}
}
for &target in &neighbors {
let semantic_cost = self.calculate_semantic_cost(target, query_embedding)?;
let new_cost = cost + semantic_cost + STRUCTURAL_HOP_COST;
if new_cost < *dist.get(&target).unwrap_or(&f32::INFINITY) {
dist.insert(target, new_cost);
came_from.insert(target, node);
pq.push(State {
cost: new_cost,
node: target,
depth: depth + 1,
});
}
}
}
Ok(None)
}
pub fn find_path_at_time(
&self,
start: NodeId,
end: NodeId,
query_embedding: &[f32],
time: Timestamp,
max_depth: usize,
bidirectional: bool,
) -> Result<Option<Vec<NodeId>>> {
let mut pq = BinaryHeap::new();
let mut dist: HashMap<NodeId, f32, BuildHasherDefault<IdentityHasher>> = HashMap::default();
let mut came_from: HashMap<NodeId, NodeId, BuildHasherDefault<IdentityHasher>> =
HashMap::default();
if self.db.get_node_at_time(start, time, time).is_err() {
return Ok(None);
}
dist.insert(start, 0.0);
pq.push(State {
cost: 0.0,
node: start,
depth: 0,
});
let mut neighbor_edges = Vec::with_capacity(32);
while let Some(State { cost, node, depth }) = pq.pop() {
if node == end {
return Ok(Some(self.reconstruct_path(came_from, end)));
}
#[allow(clippy::collapsible_if)]
if let Some(&d) = dist.get(&node) {
if cost > d {
continue;
}
}
if depth >= max_depth {
continue;
}
neighbor_edges.clear();
for edge_id in self.db.get_outgoing_edges_at_time(node, time, time) {
neighbor_edges.push((edge_id, true)); }
if bidirectional {
for edge_id in self.db.get_incoming_edges_at_time(node, time, time) {
neighbor_edges.push((edge_id, false)); }
}
for &(edge_id, is_outgoing) in &neighbor_edges {
if let Ok(edge) = self.db.get_edge_at_time(edge_id, time, time) {
let target = if is_outgoing {
edge.target
} else {
edge.source
};
if let Ok(target_node) = self.db.get_node_at_time(target, time, time) {
let target_embedding = target_node
.properties
.get(&self.vector_property)
.and_then(|v| v.as_vector());
let semantic_cost =
self.compute_semantic_cost(target_embedding, query_embedding)?;
let new_cost = cost + semantic_cost + STRUCTURAL_HOP_COST;
if new_cost < *dist.get(&target).unwrap_or(&f32::INFINITY) {
dist.insert(target, new_cost);
came_from.insert(target, node);
pq.push(State {
cost: new_cost,
node: target,
depth: depth + 1,
});
}
}
}
}
}
Ok(None)
}
fn calculate_semantic_cost(&self, node_id: NodeId, query: &[f32]) -> Result<f32> {
let node = self.db.get_node(node_id)?;
let embedding = node
.properties
.get(&self.vector_property)
.and_then(|v| v.as_vector());
self.compute_semantic_cost(embedding, query)
}
fn compute_semantic_cost(&self, embedding: Option<&[f32]>, query: &[f32]) -> Result<f32> {
if let Some(emb) = embedding {
match cosine_similarity(emb, query) {
Ok(sim) => {
Ok(1.0 - sim)
}
Err(crate::core::error::Error::Vector(
crate::core::error::VectorError::DimensionMismatch { .. },
)) => {
Ok(f32::INFINITY)
}
Err(e) => Err(e),
}
} else {
Ok(NO_EMBEDDING_COST)
}
}
fn reconstruct_path(
&self,
came_from: HashMap<NodeId, NodeId, BuildHasherDefault<IdentityHasher>>,
current: NodeId,
) -> Vec<NodeId> {
let mut path = vec![current];
let mut curr = current;
while let Some(&prev) = came_from.get(&curr) {
path.push(prev);
curr = prev;
}
path.reverse();
path
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::api::transaction::WriteOps;
use crate::core::error::Error;
use crate::core::property::PropertyMapBuilder;
use crate::db::AletheiaDB;
use crate::index::vector::{DistanceMetric, HnswConfig};
fn create_test_db() -> AletheiaDB {
let db = AletheiaDB::new().unwrap();
db.vector_index("embedding")
.hnsw(HnswConfig::new(3, DistanceMetric::Cosine))
.enable()
.unwrap();
db
}
#[test]
fn test_semantic_pathfinding_prefers_similar_nodes() {
let db = create_test_db();
let fruit_vec = vec![1.0, 0.0, 0.0];
let tech_vec = vec![0.0, 1.0, 0.0];
let start = db
.create_node(
"Start",
PropertyMapBuilder::new()
.insert_vector("embedding", &[0.5, 0.5, 0.0])
.build(),
)
.unwrap();
let end = db
.create_node(
"End",
PropertyMapBuilder::new()
.insert_vector("embedding", &[0.5, 0.5, 0.0])
.build(),
)
.unwrap();
let apple = db
.create_node(
"Apple",
PropertyMapBuilder::new()
.insert_vector("embedding", &fruit_vec)
.build(),
)
.unwrap();
let laptop = db
.create_node(
"Laptop",
PropertyMapBuilder::new()
.insert_vector("embedding", &tech_vec)
.build(),
)
.unwrap();
db.create_edge(start, apple, "NEXT", PropertyMapBuilder::new().build())
.unwrap();
db.create_edge(apple, end, "NEXT", PropertyMapBuilder::new().build())
.unwrap();
db.create_edge(start, laptop, "NEXT", PropertyMapBuilder::new().build())
.unwrap();
db.create_edge(laptop, end, "NEXT", PropertyMapBuilder::new().build())
.unwrap();
let query = vec![0.9, 0.1, 0.0];
let pathfinder = SemanticPathfinder::new(&db, "embedding");
let path = pathfinder
.find_path(start, end, &query, 10, false)
.unwrap()
.unwrap();
assert_eq!(path.len(), 3);
assert_eq!(path[0], start);
assert_eq!(path[1], apple);
assert_eq!(path[2], end);
}
#[test]
fn test_semantic_pathfinding_time_travel() {
use crate::core::temporal::time;
let db = create_test_db();
let _now = time::now();
let start = db
.create_node("Start", PropertyMapBuilder::new().build())
.unwrap();
let middle = db
.create_node(
"Middle",
PropertyMapBuilder::new()
.insert_vector("embedding", &[1.0, 0.0, 0.0])
.build(),
)
.unwrap();
let end = db
.create_node("End", PropertyMapBuilder::new().build())
.unwrap();
let (_, t_edges) = db
.write_with_timestamp(|tx| {
tx.create_edge(start, middle, "NEXT", PropertyMapBuilder::new().build())?;
tx.create_edge(middle, end, "NEXT", PropertyMapBuilder::new().build())?;
Ok::<_, Error>(())
})
.unwrap();
let t0 = t_edges;
let query = vec![1.0, 0.0, 0.0];
let pathfinder = SemanticPathfinder::new(&db, "embedding");
let path_t0 = pathfinder
.find_path_at_time(start, end, &query, t0, 10, false)
.unwrap();
assert!(path_t0.is_some(), "Path should exist at t0 before deletion");
let (_, t_delete) = db
.write_with_timestamp(|tx| tx.delete_node_cascade(middle))
.unwrap();
let _t1 = t_delete;
assert!(
t_delete > t0,
"Time must advance monotonically for subsequent transactions"
);
let path_t0_after_delete = pathfinder
.find_path_at_time(start, end, &query, t0, 10, false)
.unwrap();
assert!(
path_t0_after_delete.is_some(),
"Temporal adjacency index (enabled by default) should find path through deleted edges"
);
let path = path_t0_after_delete.unwrap();
assert_eq!(path.len(), 3);
assert_eq!(path[0], start);
assert_eq!(path[1], middle);
assert_eq!(path[2], end);
let new_middle = db
.create_node(
"NewMiddle",
PropertyMapBuilder::new()
.insert_vector("embedding", &[1.0, 0.0, 0.0])
.build(),
)
.unwrap();
let (_, t_new_edges) = db
.write_with_timestamp(|tx| {
tx.create_edge(start, new_middle, "NEXT", PropertyMapBuilder::new().build())?;
tx.create_edge(new_middle, end, "NEXT", PropertyMapBuilder::new().build())?;
Ok::<_, Error>(())
})
.unwrap();
let t2 = t_new_edges;
let path_t2 = pathfinder
.find_path_at_time(start, end, &query, t2, 10, false)
.unwrap();
assert!(path_t2.is_some(), "Path should exist at t2");
let path_t0_check = pathfinder
.find_path_at_time(start, end, &query, t0, 10, false)
.unwrap();
assert!(
path_t0_check.is_some(),
"Should find original path at t0 (through middle, not new_middle)"
);
assert_eq!(path_t0_check.as_ref().unwrap()[1], middle);
}
mod sentry_tests {
use super::*;
#[test]
fn test_pathfinding_zero_max_depth() {
let db = create_test_db();
let a = db
.create_node("A", PropertyMapBuilder::new().build())
.unwrap();
let b = db
.create_node("B", PropertyMapBuilder::new().build())
.unwrap();
db.create_edge(a, b, "NEXT", PropertyMapBuilder::new().build())
.unwrap();
let query = vec![0.0; 3];
let pathfinder = SemanticPathfinder::new(&db, "embedding");
let path = pathfinder.find_path(a, b, &query, 0, false).unwrap();
assert!(path.is_none(), "Depth 0 should not allow traversal");
}
#[test]
fn test_pathfinding_start_equals_end() {
let db = create_test_db();
let a = db
.create_node("A", PropertyMapBuilder::new().build())
.unwrap();
let query = vec![0.0; 3];
let pathfinder = SemanticPathfinder::new(&db, "embedding");
let path = pathfinder.find_path(a, a, &query, 10, false).unwrap();
assert!(path.is_some());
assert_eq!(path.unwrap(), vec![a]);
}
#[test]
fn test_pathfinding_disconnected() {
let db = create_test_db();
let a = db
.create_node("A", PropertyMapBuilder::new().build())
.unwrap();
let b = db
.create_node("B", PropertyMapBuilder::new().build())
.unwrap();
let query = vec![0.0; 3];
let pathfinder = SemanticPathfinder::new(&db, "embedding");
let path = pathfinder.find_path(a, b, &query, 10, false).unwrap();
assert!(path.is_none());
}
#[test]
fn test_pathfinding_cycle() {
let db = create_test_db();
let a = db
.create_node("A", PropertyMapBuilder::new().build())
.unwrap();
let b = db
.create_node("B", PropertyMapBuilder::new().build())
.unwrap();
db.create_edge(a, b, "NEXT", PropertyMapBuilder::new().build())
.unwrap();
db.create_edge(b, a, "BACK", PropertyMapBuilder::new().build())
.unwrap();
let query = vec![0.0; 3];
let pathfinder = SemanticPathfinder::new(&db, "embedding");
let c = db
.create_node("C", PropertyMapBuilder::new().build())
.unwrap();
let path = pathfinder.find_path(a, c, &query, 10, false).unwrap();
assert!(path.is_none());
}
#[test]
fn test_calculate_semantic_cost_dimension_mismatch() {
let db = create_test_db();
let a = db
.create_node(
"A",
PropertyMapBuilder::new()
.insert_vector("embedding", &[1.0, 0.0, 0.0])
.build(),
)
.unwrap();
let b = db
.create_node(
"B",
PropertyMapBuilder::new()
.insert_vector("embedding", &[0.0, 1.0, 0.0])
.build(),
)
.unwrap();
db.create_edge(a, b, "NEXT", PropertyMapBuilder::new().build())
.unwrap();
let query = vec![0.0; 4];
let pathfinder = SemanticPathfinder::new(&db, "embedding");
let result = pathfinder.find_path(a, b, &query, 10, false);
assert!(result.is_ok());
assert!(
result.unwrap().is_none(),
"Path should be blocked due to dimension mismatch"
);
}
}
mod sentry_robustness_tests {
use super::*;
use crate::api::transaction::WriteOps;
use crate::core::property::PropertyMapBuilder;
use crate::db::AletheiaDB;
use crate::index::vector::{DistanceMetric, HnswConfig};
fn create_test_db() -> AletheiaDB {
let db = AletheiaDB::new().unwrap();
db.vector_index("embedding")
.hnsw(HnswConfig::new(3, DistanceMetric::Cosine))
.enable()
.unwrap();
db
}
#[test]
fn test_pathfinding_skips_incompatible_dimensions() {
let db = create_test_db();
let start = db
.create_node(
"Start",
PropertyMapBuilder::new()
.insert_vector("embedding", &[1.0, 0.0, 0.0])
.build(),
)
.unwrap();
let end = db
.create_node(
"End",
PropertyMapBuilder::new()
.insert_vector("embedding", &[0.0, 0.0, 1.0])
.build(),
)
.unwrap();
let broken = db
.create_node(
"Broken",
PropertyMapBuilder::new()
.insert_vector("embedding_mixed", &[0.5, 0.5, 0.5, 0.5])
.build(),
)
.unwrap();
let valid = db
.create_node(
"Valid",
PropertyMapBuilder::new()
.insert_vector("embedding_mixed", &[0.5, 0.5, 0.0])
.build(),
)
.unwrap();
db.write(|tx| {
tx.update_node(
start,
PropertyMapBuilder::new()
.insert_vector("embedding_mixed", &[1.0, 0.0, 0.0])
.build(),
)
})
.unwrap();
db.write(|tx| {
tx.update_node(
end,
PropertyMapBuilder::new()
.insert_vector("embedding_mixed", &[0.0, 0.0, 1.0])
.build(),
)
})
.unwrap();
db.create_edge(start, broken, "NEXT", PropertyMapBuilder::new().build())
.unwrap();
db.create_edge(broken, end, "NEXT", PropertyMapBuilder::new().build())
.unwrap();
db.create_edge(start, valid, "NEXT", PropertyMapBuilder::new().build())
.unwrap();
db.create_edge(valid, end, "NEXT", PropertyMapBuilder::new().build())
.unwrap();
let query = vec![1.0, 0.0, 0.0];
let pathfinder = SemanticPathfinder::new(&db, "embedding_mixed");
let result = pathfinder.find_path(start, end, &query, 10, false);
match result {
Ok(Some(p)) => {
assert_eq!(p, vec![start, valid, end], "Should take valid path");
}
Ok(None) => panic!("Should find a path (returned None)"),
Err(e) => {
panic!(
"Regression: Dimension mismatch error was not suppressed. Expected successful pathfinding skipping invalid node. Error: {}",
e
);
}
}
}
}
}