use std::collections::VecDeque;
pub const VECTOR_GRAPH_FUSION_SCHEMA_VERSION: u32 = 1;
pub const VECTOR_GRAPH_FUSION_COMPARATOR: &str = "vector-graph-fusion:v1";
pub const VECTOR_GRAPH_FUSION_METRIC_FAMILY: &str = "l2-squared-top-k";
pub const VECTOR_GRAPH_FUSION_FRONTIER_LEADERBOARD: &str =
"release/evidence/benchmarks/frontier-leaderboard.json";
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct VectorGraphTopKEntry {
pub node_id: u32,
pub distance_bits: u32,
}
impl VectorGraphTopKEntry {
#[must_use]
fn new(node_id: usize, distance: f32) -> Self {
Self {
node_id: node_id as u32,
distance_bits: distance.to_bits(),
}
}
#[must_use]
pub fn distance(&self) -> f32 {
f32::from_bits(self.distance_bits)
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct VectorGraphFusionEvidence {
pub schema_version: u32,
pub comparator: String,
pub dataset_id: String,
pub metric_family: String,
pub release_floor: String,
pub failure_mode: String,
pub frontier_leaderboard_artifact: String,
pub node_count: u32,
pub dimension: u32,
pub neighbor_k: u32,
pub rank_k: u32,
pub csr_offsets: Vec<u32>,
pub csr_targets: Vec<u32>,
pub direct_top_k: Vec<VectorGraphTopKEntry>,
pub graph_traversal_top_k: Vec<VectorGraphTopKEntry>,
pub graph_reached_count: u32,
pub traversal_parity: bool,
pub top_k_stable: bool,
pub blockers: Vec<String>,
}
pub fn try_vector_graph_fusion_evidence(
vectors: &[f32],
dimension: usize,
neighbor_k: usize,
query: &[f32],
rank_k: usize,
dataset_id: impl Into<String>,
release_floor: impl Into<String>,
failure_mode: impl Into<String>,
) -> Result<VectorGraphFusionEvidence, String> {
let dataset_id = dataset_id.into();
let release_floor = release_floor.into();
let failure_mode = failure_mode.into();
validate_vector_graph_inputs(
vectors,
dimension,
neighbor_k,
query,
rank_k,
&dataset_id,
&release_floor,
&failure_mode,
)?;
let node_count = vectors.len() / dimension;
let (csr_offsets, csr_targets) = build_knn_csr(vectors, dimension, neighbor_k);
let direct_top_k = top_k_for_nodes(vectors, dimension, query, 0..node_count, rank_k);
let reached = traverse_from_seed(
direct_top_k[0].node_id as usize,
node_count,
&csr_offsets,
&csr_targets,
);
let graph_nodes = reached
.iter()
.enumerate()
.filter_map(|(node, reached)| reached.then_some(node));
let graph_traversal_top_k = top_k_for_nodes(vectors, dimension, query, graph_nodes, rank_k);
let graph_reached_count = reached.iter().filter(|seen| **seen).count();
let traversal_parity = graph_reached_count == node_count;
let top_k_stable = direct_top_k == graph_traversal_top_k;
let mut blockers = Vec::new();
if !traversal_parity {
blockers.push(format!(
"graph traversal reached {graph_reached_count}/{node_count} node(s); Fix: increase neighbor_k, add reciprocal edges, or shard the dataset before claiming graph-ranking parity."
));
}
if !top_k_stable {
blockers.push(
"graph traversal top-k differs from direct vector top-k; Fix: preserve candidate recall before using graph ranking evidence."
.to_string(),
);
}
Ok(VectorGraphFusionEvidence {
schema_version: VECTOR_GRAPH_FUSION_SCHEMA_VERSION,
comparator: VECTOR_GRAPH_FUSION_COMPARATOR.to_string(),
dataset_id,
metric_family: VECTOR_GRAPH_FUSION_METRIC_FAMILY.to_string(),
release_floor,
failure_mode,
frontier_leaderboard_artifact: VECTOR_GRAPH_FUSION_FRONTIER_LEADERBOARD.to_string(),
node_count: node_count as u32,
dimension: dimension as u32,
neighbor_k: neighbor_k as u32,
rank_k: rank_k as u32,
csr_offsets,
csr_targets,
direct_top_k,
graph_traversal_top_k,
graph_reached_count: graph_reached_count as u32,
traversal_parity,
top_k_stable,
blockers,
})
}
fn validate_vector_graph_inputs(
vectors: &[f32],
dimension: usize,
neighbor_k: usize,
query: &[f32],
rank_k: usize,
dataset_id: &str,
release_floor: &str,
failure_mode: &str,
) -> Result<(), String> {
if dimension == 0 {
return Err("Fix: vector graph fusion requires dimension > 0.".to_string());
}
if vectors.is_empty() {
return Err("Fix: vector graph fusion requires at least two vector rows.".to_string());
}
if vectors.len() % dimension != 0 {
return Err(format!(
"Fix: vector graph fusion received {} scalar value(s), not divisible by dimension={dimension}.",
vectors.len()
));
}
let node_count = vectors.len() / dimension;
if node_count < 2 {
return Err("Fix: vector graph fusion requires at least two vector rows.".to_string());
}
if node_count > u32::MAX as usize {
return Err(format!(
"Fix: vector graph fusion node_count={node_count} exceeds u32 graph ids; shard the dataset."
));
}
if dimension > u32::MAX as usize {
return Err(format!(
"Fix: vector graph fusion dimension={dimension} exceeds u32 evidence fields; shard the vectors."
));
}
if neighbor_k == 0 || neighbor_k >= node_count {
return Err(format!(
"Fix: vector graph fusion neighbor_k={neighbor_k} must be in 1..node_count for node_count={node_count}."
));
}
if rank_k == 0 || rank_k > node_count {
return Err(format!(
"Fix: vector graph fusion rank_k={rank_k} must be in 1..=node_count for node_count={node_count}."
));
}
if query.len() != dimension {
return Err(format!(
"Fix: vector graph fusion query has {} value(s), expected dimension={dimension}.",
query.len()
));
}
if vectors.iter().chain(query.iter()).any(|value| !value.is_finite()) {
return Err("Fix: vector graph fusion requires finite vector and query values.".to_string());
}
if dataset_id.trim().is_empty() {
return Err("Fix: vector graph fusion dataset_id cannot be blank.".to_string());
}
if release_floor.trim().is_empty() {
return Err("Fix: vector graph fusion release_floor cannot be blank.".to_string());
}
if failure_mode.trim().is_empty() {
return Err("Fix: vector graph fusion failure_mode cannot be blank.".to_string());
}
Ok(())
}
fn build_knn_csr(vectors: &[f32], dimension: usize, neighbor_k: usize) -> (Vec<u32>, Vec<u32>) {
let node_count = vectors.len() / dimension;
let mut offsets = Vec::with_capacity(node_count + 1);
let mut targets = Vec::with_capacity(node_count * neighbor_k);
offsets.push(0);
for src in 0..node_count {
let mut candidates = (0..node_count)
.filter(|dst| *dst != src)
.map(|dst| (squared_l2(row(vectors, dimension, src), row(vectors, dimension, dst)), dst))
.collect::<Vec<_>>();
candidates.sort_by(|left, right| {
left.0
.total_cmp(&right.0)
.then_with(|| left.1.cmp(&right.1))
});
targets.extend(candidates.into_iter().take(neighbor_k).map(|(_, dst)| dst as u32));
offsets.push(targets.len() as u32);
}
(offsets, targets)
}
fn top_k_for_nodes<I>(
vectors: &[f32],
dimension: usize,
query: &[f32],
nodes: I,
rank_k: usize,
) -> Vec<VectorGraphTopKEntry>
where
I: IntoIterator<Item = usize>,
{
let mut scored = nodes
.into_iter()
.map(|node| (squared_l2(row(vectors, dimension, node), query), node))
.collect::<Vec<_>>();
scored.sort_by(|left, right| {
left.0
.total_cmp(&right.0)
.then_with(|| left.1.cmp(&right.1))
});
scored
.into_iter()
.take(rank_k)
.map(|(distance, node)| VectorGraphTopKEntry::new(node, distance))
.collect()
}
fn traverse_from_seed(
seed: usize,
node_count: usize,
csr_offsets: &[u32],
csr_targets: &[u32],
) -> Vec<bool> {
let mut reached = vec![false; node_count];
let mut queue = VecDeque::new();
reached[seed] = true;
queue.push_back(seed);
while let Some(node) = queue.pop_front() {
let start = csr_offsets[node] as usize;
let end = csr_offsets[node + 1] as usize;
for target in &csr_targets[start..end] {
let target = *target as usize;
if !reached[target] {
reached[target] = true;
queue.push_back(target);
}
}
}
reached
}
fn row(vectors: &[f32], dimension: usize, row: usize) -> &[f32] {
let start = row * dimension;
&vectors[start..start + dimension]
}
fn squared_l2(left: &[f32], right: &[f32]) -> f32 {
left.iter()
.zip(right)
.map(|(left, right)| {
let delta = *left - *right;
delta * delta
})
.sum()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn connected_neighbor_graph_preserves_direct_top_k() {
let vectors = [0.0, 1.0, 2.0, 3.0, 4.0];
let evidence = try_vector_graph_fusion_evidence(
&vectors,
1,
2,
&[0.1],
3,
"ann.line.connected",
"release-floor:ann-vector",
"graph-recall-regression",
)
.expect("Fix: connected vector graph fusion fixture should build.");
assert!(evidence.blockers.is_empty(), "{evidence:#?}");
assert!(evidence.traversal_parity);
assert!(evidence.top_k_stable);
assert_eq!(evidence.direct_top_k, evidence.graph_traversal_top_k);
assert_eq!(evidence.direct_top_k[0].node_id, 0);
assert_eq!(evidence.csr_offsets, vec![0, 2, 4, 6, 8, 10]);
assert_eq!(
evidence.frontier_leaderboard_artifact,
VECTOR_GRAPH_FUSION_FRONTIER_LEADERBOARD
);
}
#[test]
fn disconnected_neighbor_graph_records_recall_blocker() {
let vectors = [0.0, 1.0, 100.0, 101.0];
let evidence = try_vector_graph_fusion_evidence(
&vectors,
1,
1,
&[0.0],
2,
"ann.line.disconnected",
"release-floor:ann-vector",
"graph-recall-regression",
)
.expect("Fix: disconnected vector graph fusion fixture should still build evidence.");
assert!(!evidence.traversal_parity);
assert!(evidence.top_k_stable);
assert!(evidence
.blockers
.iter()
.any(|blocker| blocker.contains("graph traversal reached 2/4")));
}
#[test]
fn invalid_vector_shape_is_actionable() {
let error = try_vector_graph_fusion_evidence(
&[0.0, 1.0, 2.0],
2,
1,
&[0.0, 1.0],
1,
"ann.bad",
"release-floor:ann-vector",
"shape-regression",
)
.expect_err("Fix: malformed vector shapes must be rejected.");
assert!(error.contains("not divisible by dimension=2"));
}
}