velesdb_core/collection/search/
vector.rs1use crate::collection::types::Collection;
4use crate::error::{Error, Result};
5use crate::index::VectorIndex;
6use crate::point::{Point, SearchResult};
7use crate::quantization::{distance_pq, StorageMode};
8use crate::storage::{PayloadStorage, VectorStorage};
9
10impl Collection {
11 fn search_ids_with_adc_if_pq(&self, query: &[f32], k: usize) -> Vec<(u64, f32)> {
12 let config = self.config.read();
13 let is_pq = matches!(config.storage_mode, StorageMode::ProductQuantization);
14 let higher_is_better = config.metric.higher_is_better();
15 drop(config);
16
17 if !is_pq {
18 return self.index.search(query, k);
19 }
20
21 let candidates_k = k.saturating_mul(8).max(k + 32);
22 let index_results = self.index.search(query, candidates_k);
23
24 let pq_cache = self.pq_cache.read();
25 let quantizer = self.pq_quantizer.read();
26 let Some(quantizer) = quantizer.as_ref() else {
27 return index_results.into_iter().take(k).collect();
28 };
29
30 let mut rescored: Vec<(u64, f32)> = index_results
31 .into_iter()
32 .map(|(id, fallback_score)| {
33 let score = pq_cache
34 .get(&id)
35 .map(|pq_vec| distance_pq(query, pq_vec, &quantizer.codebook))
36 .unwrap_or(fallback_score);
37 (id, score)
38 })
39 .collect();
40
41 rescored.sort_by(|a, b| {
42 if higher_is_better {
43 b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal)
44 } else {
45 a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal)
46 }
47 });
48 rescored.truncate(k);
49 rescored
50 }
51}
52
53impl Collection {
54 pub fn search(&self, query: &[f32], k: usize) -> Result<Vec<SearchResult>> {
63 let config = self.config.read();
64
65 if config.metadata_only {
67 return Err(Error::SearchNotSupported(config.name.clone()));
68 }
69
70 if query.len() != config.dimension {
71 return Err(Error::DimensionMismatch {
72 expected: config.dimension,
73 actual: query.len(),
74 });
75 }
76 drop(config);
77
78 let index_results = self.search_ids_with_adc_if_pq(query, k);
80
81 let vector_storage = self.vector_storage.read();
82 let payload_storage = self.payload_storage.read();
83
84 let results: Vec<SearchResult> = index_results
86 .into_iter()
87 .filter_map(|(id, score)| {
88 let vector = vector_storage.retrieve(id).ok().flatten()?;
90 let payload = payload_storage.retrieve(id).ok().flatten();
91
92 let point = Point {
93 id,
94 vector,
95 payload,
96 };
97
98 Some(SearchResult::new(point, score))
99 })
100 .collect();
101
102 Ok(results)
103 }
104
105 pub fn search_with_ef(
114 &self,
115 query: &[f32],
116 k: usize,
117 ef_search: usize,
118 ) -> Result<Vec<SearchResult>> {
119 let config = self.config.read();
120
121 if query.len() != config.dimension {
122 return Err(Error::DimensionMismatch {
123 expected: config.dimension,
124 actual: query.len(),
125 });
126 }
127 drop(config);
128
129 let quality = match ef_search {
131 0..=64 => crate::SearchQuality::Fast,
132 65..=128 => crate::SearchQuality::Balanced,
133 129..=256 => crate::SearchQuality::Accurate,
134 _ => crate::SearchQuality::Perfect,
135 };
136
137 let index_results = self.index.search_with_quality(query, k, quality);
138
139 let vector_storage = self.vector_storage.read();
140 let payload_storage = self.payload_storage.read();
141
142 let results: Vec<SearchResult> = index_results
143 .into_iter()
144 .filter_map(|(id, score)| {
145 let vector = vector_storage.retrieve(id).ok().flatten()?;
146 let payload = payload_storage.retrieve(id).ok().flatten();
147
148 let point = Point {
149 id,
150 vector,
151 payload,
152 };
153
154 Some(SearchResult::new(point, score))
155 })
156 .collect();
157
158 Ok(results)
159 }
160
161 pub fn search_ids(&self, query: &[f32], k: usize) -> Result<Vec<(u64, f32)>> {
179 let config = self.config.read();
180
181 if query.len() != config.dimension {
182 return Err(Error::DimensionMismatch {
183 expected: config.dimension,
184 actual: query.len(),
185 });
186 }
187 drop(config);
188
189 let results = self.search_ids_with_adc_if_pq(query, k);
191 Ok(results)
192 }
193
194 pub fn search_with_filter(
209 &self,
210 query: &[f32],
211 k: usize,
212 filter: &crate::filter::Filter,
213 ) -> Result<Vec<SearchResult>> {
214 let config = self.config.read();
215
216 if query.len() != config.dimension {
217 return Err(Error::DimensionMismatch {
218 expected: config.dimension,
219 actual: query.len(),
220 });
221 }
222 drop(config);
223
224 let candidates_k = k.saturating_mul(4).max(k + 10);
227 let index_results = self.search_ids_with_adc_if_pq(query, candidates_k);
228
229 let vector_storage = self.vector_storage.read();
230 let payload_storage = self.payload_storage.read();
231
232 let mut results: Vec<SearchResult> = index_results
235 .into_iter()
236 .filter_map(|(id, score)| {
237 let vector = vector_storage.retrieve(id).ok().flatten()?;
238 let payload = payload_storage.retrieve(id).ok().flatten();
239
240 let matches = match payload.as_ref() {
243 Some(p) => filter.matches(p),
244 None => filter.matches(&serde_json::Value::Null),
245 };
246 if !matches {
247 return None;
248 }
249
250 let point = Point {
251 id,
252 vector,
253 payload,
254 };
255
256 Some(SearchResult::new(point, score))
257 })
258 .take(k)
259 .collect();
260
261 let config = self.config.read();
265 let higher_is_better = config.metric.higher_is_better();
266 drop(config);
267
268 results.sort_by(|a, b| {
269 if higher_is_better {
270 b.score
272 .partial_cmp(&a.score)
273 .unwrap_or(std::cmp::Ordering::Equal)
274 } else {
275 a.score
277 .partial_cmp(&b.score)
278 .unwrap_or(std::cmp::Ordering::Equal)
279 }
280 });
281
282 Ok(results)
283 }
284}