nodedb_vector/sieve/
collection.rs1use std::collections::HashMap;
7
8use crate::error::VectorError;
9use crate::hnsw::graph::HnswIndex;
10use nodedb_types::hnsw::HnswParams;
11use nodedb_types::vector_distance::DistanceMetric;
12
13pub type PredicateSignature = String;
15
16pub struct SieveCollection {
22 subindices: HashMap<PredicateSignature, HnswIndex>,
24 sub_m: usize,
27}
28
29impl SieveCollection {
30 pub fn new(sub_m: usize) -> Self {
33 Self {
34 subindices: HashMap::new(),
35 sub_m,
36 }
37 }
38
39 pub fn build_subindex(
49 &mut self,
50 signature: PredicateSignature,
51 vectors: &[(u32, Vec<f32>)],
52 dim: usize,
53 metric: DistanceMetric,
54 ) -> Result<(), VectorError> {
55 let params = HnswParams {
56 m: self.sub_m,
57 m0: self.sub_m * 2,
58 ef_construction: 200,
59 metric,
60 dtype: nodedb_types::vector_dtype::VectorStorageDtype::F32,
61 };
62 let mut index = HnswIndex::new(dim, params);
63 for (_, vec) in vectors {
64 index.insert(vec.clone())?;
65 }
66 self.subindices.insert(signature, index);
67 Ok(())
68 }
69
70 pub fn has(&self, signature: &PredicateSignature) -> bool {
72 self.subindices.contains_key(signature)
73 }
74
75 pub fn get(&self, signature: &PredicateSignature) -> Option<&HnswIndex> {
77 self.subindices.get(signature)
78 }
79
80 pub fn drop(&mut self, signature: &PredicateSignature) {
82 self.subindices.remove(signature);
83 }
84
85 pub fn signatures(&self) -> Vec<&PredicateSignature> {
87 self.subindices.keys().collect()
88 }
89}
90
91pub use crate::hnsw::graph::SearchResult as SubindexSearchResult;
94
95#[cfg(test)]
96mod tests {
97 use super::*;
98
99 fn sample_vectors(n: usize, dim: usize) -> Vec<(u32, Vec<f32>)> {
100 (0..n).map(|i| (i as u32, vec![i as f32; dim])).collect()
101 }
102
103 #[test]
104 fn build_subindex_has_and_get() {
105 let mut coll = SieveCollection::new(8);
106 let vecs = sample_vectors(5, 3);
107 coll.build_subindex("tenant_id=42".to_string(), &vecs, 3, DistanceMetric::L2)
108 .expect("build should succeed");
109
110 assert!(coll.has(&"tenant_id=42".to_string()));
111 let idx = coll.get(&"tenant_id=42".to_string());
112 assert!(idx.is_some());
113 assert_eq!(idx.unwrap().len(), 5);
114 }
115
116 #[test]
117 fn drop_removes_subindex() {
118 let mut coll = SieveCollection::new(8);
119 let vecs = sample_vectors(5, 3);
120 coll.build_subindex("lang=en".to_string(), &vecs, 3, DistanceMetric::Cosine)
121 .expect("build should succeed");
122 assert!(coll.has(&"lang=en".to_string()));
123
124 coll.drop(&"lang=en".to_string());
125 assert!(!coll.has(&"lang=en".to_string()));
126 assert!(coll.get(&"lang=en".to_string()).is_none());
127 }
128
129 #[test]
130 fn signatures_lists_all_keys() {
131 let mut coll = SieveCollection::new(8);
132 let vecs = sample_vectors(3, 2);
133 coll.build_subindex("a".to_string(), &vecs, 2, DistanceMetric::L2)
134 .expect("build a");
135 coll.build_subindex("b".to_string(), &vecs, 2, DistanceMetric::L2)
136 .expect("build b");
137
138 let mut sigs: Vec<String> = coll.signatures().into_iter().cloned().collect();
139 sigs.sort();
140 assert_eq!(sigs, vec!["a".to_string(), "b".to_string()]);
141 }
142
143 #[test]
144 fn search_on_subindex() {
145 let mut coll = SieveCollection::new(8);
146 let vecs = sample_vectors(5, 3);
147 coll.build_subindex("tenant_id=1".to_string(), &vecs, 3, DistanceMetric::L2)
148 .expect("build");
149
150 let idx = coll.get(&"tenant_id=1".to_string()).unwrap();
151 let results = idx.search(&[2.0, 2.0, 2.0], 2, 32);
152 assert!(!results.is_empty());
153 }
154}