velesdb_core/collection/search/
text.rs1use super::resolve;
4use super::OrderedFloat;
5use crate::collection::types::Collection;
6use crate::error::Result;
7use crate::point::{Point, SearchResult};
8use crate::storage::{PayloadStorage, VectorStorage};
9use crate::validation::validate_dimension_match;
10
11impl Collection {
12 pub fn text_search(&self, query: &str, k: usize) -> Result<Vec<SearchResult>> {
27 let bm25_results = self.text_index.search(query, k);
28
29 let vector_storage = self.vector_storage.read();
30 let payload_storage = self.payload_storage.read();
31
32 Ok(resolve::resolve_id_score_pairs(
33 &bm25_results,
34 bm25_results.len(),
35 &*vector_storage,
36 &*payload_storage,
37 ))
38 }
39
40 pub fn text_search_with_filter(
56 &self,
57 query: &str,
58 k: usize,
59 filter: &crate::filter::Filter,
60 ) -> Result<Vec<SearchResult>> {
61 let candidates_k = k.saturating_mul(4).max(k + 10);
63 let bm25_results = self.text_index.search(query, candidates_k);
64
65 let vector_storage = self.vector_storage.read();
66 let payload_storage = self.payload_storage.read();
67
68 Ok(bm25_results
69 .into_iter()
70 .filter_map(|(id, score)| {
71 let vector = vector_storage.retrieve(id).ok().flatten()?;
72 let payload = payload_storage.retrieve(id).ok().flatten();
73
74 let payload_ref = payload.as_ref()?;
76 if !filter.matches(payload_ref) {
77 return None;
78 }
79
80 let point = Point {
81 id,
82 vector,
83 payload,
84 sparse_vectors: None,
85 };
86
87 Some(SearchResult::new(point, score))
88 })
89 .take(k)
90 .collect())
91 }
92
93 pub fn hybrid_search(
114 &self,
115 vector_query: &[f32],
116 text_query: &str,
117 k: usize,
118 vector_weight: Option<f32>,
119 ) -> Result<Vec<SearchResult>> {
120 use crate::index::VectorIndex;
121
122 let config = self.config.read();
123 validate_dimension_match(config.dimension, vector_query.len())?;
124 let metric = config.metric;
125 drop(config);
126
127 let weight = vector_weight.unwrap_or(0.5).clamp(0.0, 1.0);
128 let text_weight = 1.0 - weight;
129
130 let overfetch_k = k * 2;
131 let raw_vector_results = self.index.search(vector_query, overfetch_k);
132 let vector_results =
133 self.merge_delta(raw_vector_results, vector_query, overfetch_k, metric);
134 let text_results = self.text_index.search(text_query, k * 2);
135
136 let fused_scores =
137 Self::compute_rrf_scores(&vector_results, &text_results, weight, text_weight);
138
139 let scored_ids = Self::top_k_from_scores(fused_scores, k);
140 Ok(self.resolve_scored_ids(&scored_ids))
141 }
142
143 #[allow(clippy::cast_precision_loss)]
145 fn compute_rrf_scores(
146 vector_results: &[crate::scored_result::ScoredResult],
147 text_results: &[(u64, f32)],
148 vector_weight: f32,
149 text_weight: f32,
150 ) -> rustc_hash::FxHashMap<u64, f32> {
151 let mut fused: rustc_hash::FxHashMap<u64, f32> =
152 rustc_hash::FxHashMap::with_capacity_and_hasher(
153 vector_results.len() + text_results.len(),
154 rustc_hash::FxBuildHasher,
155 );
156 for (rank, sr) in vector_results.iter().enumerate() {
157 *fused.entry(sr.id).or_insert(0.0) += vector_weight / (rank as f32 + 60.0);
158 }
159 for (rank, (id, _)) in text_results.iter().enumerate() {
160 *fused.entry(*id).or_insert(0.0) += text_weight / (rank as f32 + 60.0);
161 }
162 fused
163 }
164
165 fn top_k_from_scores(
167 fused_scores: rustc_hash::FxHashMap<u64, f32>,
168 k: usize,
169 ) -> Vec<(u64, f32)> {
170 use std::cmp::Reverse;
171 use std::collections::BinaryHeap;
172
173 let mut heap: BinaryHeap<Reverse<(OrderedFloat, u64)>> = BinaryHeap::with_capacity(k + 1);
174 for (id, score) in fused_scores {
175 heap.push(Reverse((OrderedFloat(score), id)));
176 if heap.len() > k {
177 heap.pop();
178 }
179 }
180 let mut scored: Vec<(u64, f32)> = heap
181 .into_iter()
182 .map(|Reverse((OrderedFloat(s), id))| (id, s))
183 .collect();
184 scored.sort_by(|a, b| b.1.total_cmp(&a.1));
185 scored
186 }
187
188 fn resolve_scored_ids(&self, scored_ids: &[(u64, f32)]) -> Vec<SearchResult> {
190 let vector_storage = self.vector_storage.read();
191 let payload_storage = self.payload_storage.read();
192
193 resolve::resolve_id_score_pairs(
194 scored_ids,
195 scored_ids.len(),
196 &*vector_storage,
197 &*payload_storage,
198 )
199 }
200
201 pub fn hybrid_search_with_filter(
218 &self,
219 vector_query: &[f32],
220 text_query: &str,
221 k: usize,
222 vector_weight: Option<f32>,
223 filter: &crate::filter::Filter,
224 ) -> Result<Vec<SearchResult>> {
225 use crate::index::VectorIndex;
226
227 let config = self.config.read();
228 validate_dimension_match(config.dimension, vector_query.len())?;
229 let metric = config.metric;
230 drop(config);
231
232 let weight = vector_weight.unwrap_or(0.5).clamp(0.0, 1.0);
233 let text_weight = 1.0 - weight;
234 let candidates_k = k.saturating_mul(4).max(k + 10);
235
236 let raw_vector_results = self.index.search(vector_query, candidates_k);
237 let vector_results =
238 self.merge_delta(raw_vector_results, vector_query, candidates_k, metric);
239 let text_results = self.text_index.search(text_query, candidates_k);
240
241 let fused_scores =
242 Self::compute_rrf_scores(&vector_results, &text_results, weight, text_weight);
243
244 let mut scored_ids: Vec<_> = fused_scores.into_iter().collect();
245 scored_ids.sort_by(|a, b| b.1.total_cmp(&a.1));
246
247 Ok(self.resolve_scored_ids_filtered(&scored_ids, filter, k))
248 }
249
250 fn resolve_scored_ids_filtered(
252 &self,
253 scored_ids: &[(u64, f32)],
254 filter: &crate::filter::Filter,
255 k: usize,
256 ) -> Vec<SearchResult> {
257 let vector_storage = self.vector_storage.read();
258 let payload_storage = self.payload_storage.read();
259
260 scored_ids
261 .iter()
262 .filter_map(|&(id, score)| {
263 let vector = vector_storage.retrieve(id).ok().flatten()?;
264 let payload = payload_storage.retrieve(id).ok().flatten();
265 let payload_ref = payload.as_ref()?;
266 if !filter.matches(payload_ref) {
267 return None;
268 }
269 Some(SearchResult::new(
270 Point {
271 id,
272 vector,
273 payload,
274 sparse_vectors: None,
275 },
276 score,
277 ))
278 })
279 .take(k)
280 .collect()
281 }
282}