velesdb_core/collection/vector_collection/search.rs
1//! Search, match, aggregation, and query execution for `VectorCollection`.
2
3use std::collections::HashMap;
4
5use crate::error::Result;
6use crate::point::SearchResult;
7
8use super::VectorCollection;
9
10impl VectorCollection {
11 /// Performs kNN vector search using the HNSW index.
12 ///
13 /// Returns the `k` nearest neighbors ordered by ascending distance.
14 ///
15 /// # Errors
16 ///
17 /// - Returns an error if the query dimension does not match the collection.
18 /// - Returns an error if the HNSW index is not initialized.
19 ///
20 /// # Examples
21 ///
22 /// ```rust,no_run
23 /// # use velesdb_core::{VectorCollection, DistanceMetric, StorageMode};
24 /// # let coll = VectorCollection::create("./data/v".into(), "v", 128, DistanceMetric::Cosine, StorageMode::Full)?;
25 /// let results = coll.search(&vec![0.1; 128], 10)?;
26 /// for r in &results {
27 /// println!("id={} score={}", r.point.id, r.score);
28 /// }
29 /// # Ok::<(), velesdb_core::Error>(())
30 /// ```
31 pub fn search(&self, query: &[f32], k: usize) -> Result<Vec<SearchResult>> {
32 self.inner.search(query, k)
33 }
34
35 /// Performs full-text BM25 search over indexed payload fields.
36 ///
37 /// Returns up to `k` results ranked by BM25 relevance score.
38 ///
39 /// # Errors
40 ///
41 /// - Returns an error if storage retrieval fails.
42 ///
43 /// # Examples
44 ///
45 /// ```rust,no_run
46 /// # use velesdb_core::{VectorCollection, DistanceMetric, StorageMode};
47 /// # let coll = VectorCollection::create("./data/v".into(), "v", 128, DistanceMetric::Cosine, StorageMode::Full)?;
48 /// let results = coll.text_search("machine learning", 5)?;
49 /// # Ok::<(), velesdb_core::Error>(())
50 /// ```
51 pub fn text_search(&self, query: &str, k: usize) -> Result<Vec<SearchResult>> {
52 self.inner.text_search(query, k)
53 }
54
55 /// Performs kNN search with an explicit `ef_search` override.
56 ///
57 /// Higher `ef_search` values improve recall at the cost of latency.
58 ///
59 /// # Errors
60 ///
61 /// - Returns an error if the query dimension does not match the collection.
62 pub fn search_with_ef(
63 &self,
64 query: &[f32],
65 k: usize,
66 ef_search: usize,
67 ) -> Result<Vec<SearchResult>> {
68 self.inner.search_with_ef(query, k, ef_search)
69 }
70
71 /// Performs kNN search with a metadata filter applied post-retrieval.
72 ///
73 /// # Errors
74 ///
75 /// - Returns an error if the query dimension does not match the collection.
76 /// - Returns an error if the filter references an unsupported field type.
77 pub fn search_with_filter(
78 &self,
79 query: &[f32],
80 k: usize,
81 filter: &crate::filter::Filter,
82 ) -> Result<Vec<SearchResult>> {
83 self.inner.search_with_filter(query, k, filter)
84 }
85
86 /// Returns [`ScoredResult`] pairs without payload hydration.
87 ///
88 /// Faster than [`search`](Self::search) when only IDs and scores are needed.
89 ///
90 /// # Errors
91 ///
92 /// - Returns an error if the query dimension does not match the collection.
93 pub fn search_ids(
94 &self,
95 query: &[f32],
96 k: usize,
97 ) -> Result<Vec<crate::scored_result::ScoredResult>> {
98 self.inner.search_ids(query, k)
99 }
100
101 /// Full-text search with metadata filter.
102 ///
103 /// # Errors
104 ///
105 /// Returns an error if storage retrieval fails.
106 pub fn text_search_with_filter(
107 &self,
108 query: &str,
109 k: usize,
110 filter: &crate::filter::Filter,
111 ) -> Result<Vec<SearchResult>> {
112 self.inner.text_search_with_filter(query, k, filter)
113 }
114
115 /// Performs hybrid search combining vector kNN and BM25 full-text via RRF fusion.
116 ///
117 /// When `alpha` is `None`, a default blending factor is used. Values closer
118 /// to `1.0` weight vector results more; values closer to `0.0` weight text.
119 ///
120 /// # Errors
121 ///
122 /// - Returns an error if the query dimension does not match the collection.
123 /// - Returns an error if text indexing or storage retrieval fails.
124 ///
125 /// # Examples
126 ///
127 /// ```rust,no_run
128 /// # use velesdb_core::{VectorCollection, DistanceMetric, StorageMode};
129 /// # let coll = VectorCollection::create("./data/v".into(), "v", 128, DistanceMetric::Cosine, StorageMode::Full)?;
130 /// let results = coll.hybrid_search(&vec![0.1; 128], "machine learning", 10, Some(0.7))?;
131 /// # Ok::<(), velesdb_core::Error>(())
132 /// ```
133 pub fn hybrid_search(
134 &self,
135 vector: &[f32],
136 text: &str,
137 k: usize,
138 alpha: Option<f32>,
139 ) -> Result<Vec<SearchResult>> {
140 self.inner.hybrid_search(vector, text, k, alpha)
141 }
142
143 /// Performs hybrid search (vector + BM25) with a metadata filter.
144 ///
145 /// # Errors
146 ///
147 /// - Returns an error if the query dimension does not match the collection.
148 /// - Returns an error if text indexing, storage, or filtering fails.
149 pub fn hybrid_search_with_filter(
150 &self,
151 vector: &[f32],
152 text: &str,
153 k: usize,
154 alpha: Option<f32>,
155 filter: &crate::filter::Filter,
156 ) -> Result<Vec<SearchResult>> {
157 self.inner
158 .hybrid_search_with_filter(vector, text, k, alpha, filter)
159 }
160
161 /// Performs batch kNN search with per-query metadata filters.
162 ///
163 /// Each query in `queries` is paired with the filter at the same index in
164 /// `filters`. Pass `None` for queries that should not be filtered.
165 ///
166 /// # Errors
167 ///
168 /// - Returns an error if any query dimension does not match the collection.
169 /// - Returns an error if `queries` and `filters` have different lengths.
170 ///
171 /// # Examples
172 ///
173 /// ```rust,no_run
174 /// # use velesdb_core::{VectorCollection, DistanceMetric, StorageMode};
175 /// # let coll = VectorCollection::create("./data/v".into(), "v", 128, DistanceMetric::Cosine, StorageMode::Full)?;
176 /// let q1 = vec![0.1; 128];
177 /// let q2 = vec![0.2; 128];
178 /// let results = coll.search_batch_with_filters(
179 /// &[q1.as_slice(), q2.as_slice()],
180 /// 10,
181 /// &[None, None],
182 /// )?;
183 /// assert_eq!(results.len(), 2);
184 /// # Ok::<(), velesdb_core::Error>(())
185 /// ```
186 pub fn search_batch_with_filters(
187 &self,
188 queries: &[&[f32]],
189 k: usize,
190 filters: &[Option<crate::filter::Filter>],
191 ) -> Result<Vec<Vec<SearchResult>>> {
192 self.inner.search_batch_with_filters(queries, k, filters)
193 }
194
195 /// Performs multi-query search fusing results from multiple query vectors.
196 ///
197 /// # Errors
198 ///
199 /// - Returns an error if any query dimension does not match the collection.
200 /// - Returns an error if the fusion strategy fails.
201 pub fn multi_query_search(
202 &self,
203 queries: &[&[f32]],
204 k: usize,
205 strategy: crate::fusion::FusionStrategy,
206 filter: Option<&crate::filter::Filter>,
207 ) -> Result<Vec<SearchResult>> {
208 self.inner.multi_query_search(queries, k, strategy, filter)
209 }
210
211 /// Performs multi-query search returning only IDs and fused scores.
212 ///
213 /// # Errors
214 ///
215 /// - Returns an error if any query dimension does not match the collection.
216 /// - Returns an error if the fusion strategy fails.
217 pub fn multi_query_search_ids(
218 &self,
219 queries: &[&[f32]],
220 k: usize,
221 strategy: crate::fusion::FusionStrategy,
222 ) -> Result<Vec<(u64, f32)>> {
223 self.inner.multi_query_search_ids(queries, k, strategy)
224 }
225
226 /// Performs sparse-only search on the named index.
227 ///
228 /// # Errors
229 ///
230 /// Returns an error if the named sparse index does not exist.
231 pub fn sparse_search(
232 &self,
233 query: &crate::index::sparse::SparseVector,
234 k: usize,
235 index_name: &str,
236 ) -> Result<Vec<SearchResult>> {
237 let indexes = self.inner.sparse_indexes.read();
238 let index = indexes.get(index_name).ok_or_else(|| {
239 crate::error::Error::Config(format!(
240 "Sparse index '{}' not found",
241 if index_name.is_empty() {
242 "<default>"
243 } else {
244 index_name
245 }
246 ))
247 })?;
248 let results = crate::index::sparse::sparse_search(index, query, k);
249 drop(indexes);
250 Ok(self.inner.resolve_sparse_results(&results, k))
251 }
252
253 /// Performs hybrid dense+sparse search with RRF fusion.
254 ///
255 /// # Errors
256 ///
257 /// Returns an error if dense or sparse search fails, or fusion errors.
258 #[allow(clippy::too_many_arguments)]
259 pub fn hybrid_sparse_search(
260 &self,
261 dense_vector: &[f32],
262 sparse_query: &crate::index::sparse::SparseVector,
263 k: usize,
264 index_name: &str,
265 strategy: &crate::fusion::FusionStrategy,
266 ) -> Result<Vec<SearchResult>> {
267 let candidate_k = k.saturating_mul(2).max(k + 10);
268
269 let (dense_results, sparse_results) = self.inner.execute_both_branches(
270 dense_vector,
271 sparse_query,
272 index_name,
273 candidate_k,
274 None,
275 );
276
277 if dense_results.is_empty() && sparse_results.is_empty() {
278 return Ok(Vec::new());
279 }
280 if dense_results.is_empty() {
281 let scored: Vec<(u64, f32)> = sparse_results
282 .iter()
283 .map(|sd| (sd.doc_id, sd.score))
284 .collect();
285 return Ok(self.inner.resolve_fused_results(&scored, k));
286 }
287 if sparse_results.is_empty() {
288 return Ok(self.inner.resolve_fused_results(&dense_results, k));
289 }
290
291 let sparse_tuples: Vec<(u64, f32)> = sparse_results
292 .iter()
293 .map(|sd| (sd.doc_id, sd.score))
294 .collect();
295
296 let fused = strategy
297 .fuse(vec![dense_results, sparse_tuples])
298 .map_err(|e| crate::error::Error::Config(format!("Fusion error: {e}")))?;
299
300 Ok(self.inner.resolve_fused_results(&fused, k))
301 }
302
303 /// Executes a graph MATCH query against the collection's edge store.
304 ///
305 /// # Errors
306 ///
307 /// - Returns an error if the match clause references an invalid label or property.
308 /// - Returns an error if the edge store is not initialized.
309 pub fn execute_match(
310 &self,
311 match_clause: &crate::velesql::MatchClause,
312 params: &std::collections::HashMap<String, serde_json::Value>,
313 ) -> crate::error::Result<Vec<crate::collection::search::query::match_exec::MatchResult>> {
314 self.inner.execute_match(match_clause, params)
315 }
316
317 /// Executes a MATCH query with vector similarity filtering.
318 ///
319 /// # Errors
320 ///
321 /// - Returns an error if the match clause is invalid or the query dimension mismatches.
322 pub fn execute_match_with_similarity(
323 &self,
324 match_clause: &crate::velesql::MatchClause,
325 query_vector: &[f32],
326 threshold: f32,
327 params: &std::collections::HashMap<String, serde_json::Value>,
328 ) -> crate::error::Result<Vec<crate::collection::search::query::match_exec::MatchResult>> {
329 self.inner
330 .execute_match_with_similarity(match_clause, query_vector, threshold, params)
331 }
332
333 /// Executes an aggregation query (GROUP BY / COUNT / SUM / AVG / MIN / MAX).
334 ///
335 /// # Errors
336 ///
337 /// - Returns an error if the query is invalid or aggregation computation fails.
338 pub fn execute_aggregate(
339 &self,
340 query: &crate::velesql::Query,
341 params: &std::collections::HashMap<String, serde_json::Value>,
342 ) -> Result<serde_json::Value> {
343 self.inner.execute_aggregate(query, params)
344 }
345
346 /// Executes a parsed `VelesQL` query.
347 ///
348 /// # Errors
349 ///
350 /// - Returns an error if the query references missing fields or execution fails.
351 pub fn execute_query(
352 &self,
353 query: &crate::velesql::Query,
354 params: &HashMap<String, serde_json::Value>,
355 ) -> Result<Vec<SearchResult>> {
356 self.inner.execute_query(query, params)
357 }
358
359 /// Sends a point into the streaming ingestion channel.
360 ///
361 /// Returns `Ok(())` on success (202 semantics). Returns
362 /// `BackpressureError::BufferFull` when the channel is at capacity, or
363 /// `BackpressureError::NotConfigured` if streaming is not active.
364 ///
365 /// # Errors
366 ///
367 /// Returns `BackpressureError` on buffer-full or not-configured.
368 #[cfg(feature = "persistence")]
369 pub fn stream_insert(
370 &self,
371 point: crate::point::Point,
372 ) -> std::result::Result<(), crate::collection::streaming::BackpressureError> {
373 self.inner.stream_insert(point)
374 }
375
376 /// Pushes `(id, vector)` entries into the delta buffer if it is active.
377 ///
378 /// No-op when the delta buffer is inactive. This is the public interface
379 /// used by streaming upsert handlers (e.g., NDJSON stream endpoint) to
380 /// keep the delta buffer in sync after a successful `upsert_bulk` call.
381 #[cfg(feature = "persistence")]
382 pub fn push_to_delta_if_active(&self, entries: &[(u64, Vec<f32>)]) {
383 self.inner.push_to_delta_if_active(entries);
384 }
385
386 /// Returns `true` if the delta buffer is currently active (HNSW rebuild
387 /// in progress). External callers can use this to decide whether to
388 /// snapshot entries for delta before a `upsert_bulk` call.
389 #[cfg(feature = "persistence")]
390 #[must_use]
391 pub fn is_delta_active(&self) -> bool {
392 self.inner.delta_buffer.is_active()
393 }
394
395 /// Executes a raw VelesQL string, parsing it before execution.
396 ///
397 /// # Errors
398 ///
399 /// - Returns an error if the SQL string cannot be parsed.
400 /// - Returns an error if query execution fails.
401 pub fn execute_query_str(
402 &self,
403 sql: &str,
404 params: &HashMap<String, serde_json::Value>,
405 ) -> Result<Vec<SearchResult>> {
406 self.inner.execute_query_str(sql, params)
407 }
408}