Skip to main content

ailake_query/
pruner.rs

1use ailake_catalog::{decode_centroid, DataFileEntry};
2use ailake_core::VectorMetric;
3use ailake_vec::{cosine_distance, dot_product, euclidean_distance};
4
5pub struct VectorPruner;
6
7impl VectorPruner {
8    /// Remove files whose centroid is geometrically guaranteed to contain no vectors
9    /// within `threshold` distance of `query`.
10    ///
11    /// Pruning condition: `distance(query, centroid) - radius > threshold`
12    /// Files without centroid metadata are kept (conservative fallback).
13    pub fn prune(
14        files: Vec<DataFileEntry>,
15        query: &[f32],
16        metric: VectorMetric,
17        threshold: f32,
18    ) -> Vec<DataFileEntry> {
19        files
20            .into_iter()
21            .filter(|entry| {
22                match decode_centroid(entry, metric) {
23                    Some(centroid) => {
24                        let dist = compute_distance(query, &centroid.values, metric);
25                        // Keep file if any of its vectors could be within threshold
26                        dist - centroid.radius <= threshold
27                    }
28                    None => true, // no centroid → keep (safe fallback)
29                }
30            })
31            .collect()
32    }
33}
34
35fn compute_distance(a: &[f32], b: &[f32], metric: VectorMetric) -> f32 {
36    match metric {
37        VectorMetric::Cosine => cosine_distance(a, b),
38        VectorMetric::Euclidean => euclidean_distance(a, b),
39        VectorMetric::DotProduct => -dot_product(a, b),
40    }
41}
42
43#[cfg(test)]
44mod tests {
45    use super::*;
46    use ailake_catalog::{make_data_file_entry, VectorIndexInfo};
47    use ailake_core::VectorMetric;
48    use ailake_vec::compute_centroid_and_radius;
49
50    fn make_entry(path: &str, vecs: &[Vec<f32>], metric: VectorMetric) -> DataFileEntry {
51        let centroid = compute_centroid_and_radius(vecs, metric);
52        make_data_file_entry(
53            path,
54            vecs.len() as u64,
55            1024,
56            &centroid,
57            VectorIndexInfo {
58                column: "embedding",
59                dim: vecs[0].len() as u32,
60                hnsw_offset: 0,
61                hnsw_len: 0,
62            },
63        )
64    }
65
66    #[test]
67    fn prunes_far_file() {
68        // File centroid near [1,0,0], query near [0,0,1] — orthogonal → prune
69        let vecs = vec![vec![1.0f32, 0.0, 0.0], vec![0.9, 0.1, 0.0]];
70        let entry = make_entry("far.parquet", &vecs, VectorMetric::Cosine);
71        let query = vec![0.0f32, 0.0, 1.0];
72        let pruned = VectorPruner::prune(vec![entry], &query, VectorMetric::Cosine, 0.1);
73        assert!(pruned.is_empty(), "far file should be pruned");
74    }
75
76    #[test]
77    fn keeps_nearby_file() {
78        let vecs = vec![vec![1.0f32, 0.0, 0.0], vec![0.99, 0.1, 0.0]];
79        let entry = make_entry("near.parquet", &vecs, VectorMetric::Cosine);
80        let query = vec![1.0f32, 0.0, 0.0];
81        let kept = VectorPruner::prune(vec![entry], &query, VectorMetric::Cosine, 0.5);
82        assert_eq!(kept.len(), 1, "nearby file should be kept");
83    }
84
85    #[test]
86    fn no_centroid_always_kept() {
87        let entry = DataFileEntry {
88            path: "unknown.parquet".into(),
89            record_count: 10,
90            file_size_bytes: 512,
91            centroid_b64: None,
92            radius: None,
93            hnsw_offset: None,
94            hnsw_len: None,
95            vector_column: None,
96            vector_dim: None,
97            extra_vector_indexes: vec![],
98            index_status: ailake_catalog::IndexStatus::Ready,
99        };
100        let query = vec![0.0f32, 0.0, 1.0];
101        let kept = VectorPruner::prune(vec![entry], &query, VectorMetric::Cosine, 0.0);
102        assert_eq!(kept.len(), 1);
103    }
104}