nodedb_vector/sieve/
collection.rs1use std::collections::HashMap;
7
8use crate::error::VectorError;
9use crate::hnsw::graph::HnswIndex;
10use nodedb_types::hnsw::HnswParams;
11use nodedb_types::vector_distance::DistanceMetric;
12
13pub type PredicateSignature = String;
15
16pub struct SieveCollection {
22 subindices: HashMap<PredicateSignature, HnswIndex>,
24 sub_m: usize,
27}
28
29impl SieveCollection {
30 pub fn new(sub_m: usize) -> Self {
33 Self {
34 subindices: HashMap::new(),
35 sub_m,
36 }
37 }
38
39 pub fn build_subindex(
49 &mut self,
50 signature: PredicateSignature,
51 vectors: &[(u32, Vec<f32>)],
52 dim: usize,
53 metric: DistanceMetric,
54 ) -> Result<(), VectorError> {
55 let params = HnswParams {
56 m: self.sub_m,
57 m0: self.sub_m * 2,
58 ef_construction: 200,
59 metric,
60 };
61 let mut index = HnswIndex::new(dim, params);
62 for (_, vec) in vectors {
63 index.insert(vec.clone())?;
64 }
65 self.subindices.insert(signature, index);
66 Ok(())
67 }
68
69 pub fn has(&self, signature: &PredicateSignature) -> bool {
71 self.subindices.contains_key(signature)
72 }
73
74 pub fn get(&self, signature: &PredicateSignature) -> Option<&HnswIndex> {
76 self.subindices.get(signature)
77 }
78
79 pub fn drop(&mut self, signature: &PredicateSignature) {
81 self.subindices.remove(signature);
82 }
83
84 pub fn signatures(&self) -> Vec<&PredicateSignature> {
86 self.subindices.keys().collect()
87 }
88}
89
90pub use crate::hnsw::graph::SearchResult as SubindexSearchResult;
93
94#[cfg(test)]
95mod tests {
96 use super::*;
97
98 fn sample_vectors(n: usize, dim: usize) -> Vec<(u32, Vec<f32>)> {
99 (0..n).map(|i| (i as u32, vec![i as f32; dim])).collect()
100 }
101
102 #[test]
103 fn build_subindex_has_and_get() {
104 let mut coll = SieveCollection::new(8);
105 let vecs = sample_vectors(5, 3);
106 coll.build_subindex("tenant_id=42".to_string(), &vecs, 3, DistanceMetric::L2)
107 .expect("build should succeed");
108
109 assert!(coll.has(&"tenant_id=42".to_string()));
110 let idx = coll.get(&"tenant_id=42".to_string());
111 assert!(idx.is_some());
112 assert_eq!(idx.unwrap().len(), 5);
113 }
114
115 #[test]
116 fn drop_removes_subindex() {
117 let mut coll = SieveCollection::new(8);
118 let vecs = sample_vectors(5, 3);
119 coll.build_subindex("lang=en".to_string(), &vecs, 3, DistanceMetric::Cosine)
120 .expect("build should succeed");
121 assert!(coll.has(&"lang=en".to_string()));
122
123 coll.drop(&"lang=en".to_string());
124 assert!(!coll.has(&"lang=en".to_string()));
125 assert!(coll.get(&"lang=en".to_string()).is_none());
126 }
127
128 #[test]
129 fn signatures_lists_all_keys() {
130 let mut coll = SieveCollection::new(8);
131 let vecs = sample_vectors(3, 2);
132 coll.build_subindex("a".to_string(), &vecs, 2, DistanceMetric::L2)
133 .expect("build a");
134 coll.build_subindex("b".to_string(), &vecs, 2, DistanceMetric::L2)
135 .expect("build b");
136
137 let mut sigs: Vec<String> = coll.signatures().into_iter().cloned().collect();
138 sigs.sort();
139 assert_eq!(sigs, vec!["a".to_string(), "b".to_string()]);
140 }
141
142 #[test]
143 fn search_on_subindex() {
144 let mut coll = SieveCollection::new(8);
145 let vecs = sample_vectors(5, 3);
146 coll.build_subindex("tenant_id=1".to_string(), &vecs, 3, DistanceMetric::L2)
147 .expect("build");
148
149 let idx = coll.get(&"tenant_id=1".to_string()).unwrap();
150 let results = idx.search(&[2.0, 2.0, 2.0], 2, 32);
151 assert!(!results.is_empty());
152 }
153}