1use ailake_catalog::{decode_centroid, DataFileEntry};
2use ailake_core::VectorMetric;
3use ailake_vec::{cosine_distance, dot_product, euclidean_distance};
4
5pub struct VectorPruner;
6
7impl VectorPruner {
8 pub fn prune(
14 files: Vec<DataFileEntry>,
15 query: &[f32],
16 metric: VectorMetric,
17 threshold: f32,
18 ) -> Vec<DataFileEntry> {
19 files
20 .into_iter()
21 .filter(|entry| {
22 match decode_centroid(entry, metric) {
23 Some(centroid) => {
24 let dist = compute_distance(query, ¢roid.values, metric);
25 dist - centroid.radius <= threshold
27 }
28 None => true, }
30 })
31 .collect()
32 }
33}
34
35fn compute_distance(a: &[f32], b: &[f32], metric: VectorMetric) -> f32 {
36 match metric {
37 VectorMetric::Cosine => cosine_distance(a, b),
38 VectorMetric::Euclidean => euclidean_distance(a, b),
39 VectorMetric::DotProduct => -dot_product(a, b),
40 }
41}
42
43#[cfg(test)]
44mod tests {
45 use super::*;
46 use ailake_catalog::{make_data_file_entry, VectorIndexInfo};
47 use ailake_core::VectorMetric;
48 use ailake_vec::compute_centroid_and_radius;
49
50 fn make_entry(path: &str, vecs: &[Vec<f32>], metric: VectorMetric) -> DataFileEntry {
51 let centroid = compute_centroid_and_radius(vecs, metric);
52 make_data_file_entry(
53 path,
54 vecs.len() as u64,
55 1024,
56 ¢roid,
57 VectorIndexInfo {
58 column: "embedding",
59 dim: vecs[0].len() as u32,
60 hnsw_offset: 0,
61 hnsw_len: 0,
62 },
63 )
64 }
65
66 #[test]
67 fn prunes_far_file() {
68 let vecs = vec![vec![1.0f32, 0.0, 0.0], vec![0.9, 0.1, 0.0]];
70 let entry = make_entry("far.parquet", &vecs, VectorMetric::Cosine);
71 let query = vec![0.0f32, 0.0, 1.0];
72 let pruned = VectorPruner::prune(vec![entry], &query, VectorMetric::Cosine, 0.1);
73 assert!(pruned.is_empty(), "far file should be pruned");
74 }
75
76 #[test]
77 fn keeps_nearby_file() {
78 let vecs = vec![vec![1.0f32, 0.0, 0.0], vec![0.99, 0.1, 0.0]];
79 let entry = make_entry("near.parquet", &vecs, VectorMetric::Cosine);
80 let query = vec![1.0f32, 0.0, 0.0];
81 let kept = VectorPruner::prune(vec![entry], &query, VectorMetric::Cosine, 0.5);
82 assert_eq!(kept.len(), 1, "nearby file should be kept");
83 }
84
85 #[test]
86 fn no_centroid_always_kept() {
87 let entry = DataFileEntry {
88 path: "unknown.parquet".into(),
89 record_count: 10,
90 file_size_bytes: 512,
91 centroid_b64: None,
92 radius: None,
93 hnsw_offset: None,
94 hnsw_len: None,
95 vector_column: None,
96 vector_dim: None,
97 extra_vector_indexes: vec![],
98 index_status: ailake_catalog::IndexStatus::Ready,
99 };
100 let query = vec![0.0f32, 0.0, 1.0];
101 let kept = VectorPruner::prune(vec![entry], &query, VectorMetric::Cosine, 0.0);
102 assert_eq!(kept.len(), 1);
103 }
104}