use ailake_catalog::{decode_centroid, DataFileEntry};
use ailake_core::VectorMetric;
use ailake_vec::{cosine_distance, dot_product, euclidean_distance};
pub struct VectorPruner;
impl VectorPruner {
pub fn prune(
files: Vec<DataFileEntry>,
query: &[f32],
metric: VectorMetric,
threshold: f32,
) -> Vec<DataFileEntry> {
files
.into_iter()
.filter(|entry| {
match decode_centroid(entry, metric) {
Some(centroid) => {
let dist = compute_distance(query, ¢roid.values, metric);
dist - centroid.radius <= threshold
}
None => true, }
})
.collect()
}
}
fn compute_distance(a: &[f32], b: &[f32], metric: VectorMetric) -> f32 {
match metric {
VectorMetric::Cosine => cosine_distance(a, b),
VectorMetric::Euclidean => euclidean_distance(a, b),
VectorMetric::DotProduct => -dot_product(a, b),
}
}
#[cfg(test)]
mod tests {
use super::*;
use ailake_catalog::{make_data_file_entry, VectorIndexInfo};
use ailake_core::VectorMetric;
use ailake_vec::compute_centroid_and_radius;
fn make_entry(path: &str, vecs: &[Vec<f32>], metric: VectorMetric) -> DataFileEntry {
let centroid = compute_centroid_and_radius(vecs, metric);
make_data_file_entry(
path,
vecs.len() as u64,
1024,
¢roid,
VectorIndexInfo {
column: "embedding",
dim: vecs[0].len() as u32,
hnsw_offset: 0,
hnsw_len: 0,
},
)
}
#[test]
fn prunes_far_file() {
let vecs = vec![vec![1.0f32, 0.0, 0.0], vec![0.9, 0.1, 0.0]];
let entry = make_entry("far.parquet", &vecs, VectorMetric::Cosine);
let query = vec![0.0f32, 0.0, 1.0];
let pruned = VectorPruner::prune(vec![entry], &query, VectorMetric::Cosine, 0.1);
assert!(pruned.is_empty(), "far file should be pruned");
}
#[test]
fn keeps_nearby_file() {
let vecs = vec![vec![1.0f32, 0.0, 0.0], vec![0.99, 0.1, 0.0]];
let entry = make_entry("near.parquet", &vecs, VectorMetric::Cosine);
let query = vec![1.0f32, 0.0, 0.0];
let kept = VectorPruner::prune(vec![entry], &query, VectorMetric::Cosine, 0.5);
assert_eq!(kept.len(), 1, "nearby file should be kept");
}
#[test]
fn no_centroid_always_kept() {
let entry = DataFileEntry {
path: "unknown.parquet".into(),
record_count: 10,
file_size_bytes: 512,
centroid_b64: None,
radius: None,
hnsw_offset: None,
hnsw_len: None,
vector_column: None,
vector_dim: None,
extra_vector_indexes: vec![],
index_status: ailake_catalog::IndexStatus::Ready,
};
let query = vec![0.0f32, 0.0, 1.0];
let kept = VectorPruner::prune(vec![entry], &query, VectorMetric::Cosine, 0.0);
assert_eq!(kept.len(), 1);
}
}