velesdb_core/collection/vector_collection/search.rs
1//! Search, match, aggregation, and query execution for `VectorCollection`.
2
3use std::collections::HashMap;
4
5use crate::error::Result;
6use crate::point::SearchResult;
7
8use super::VectorCollection;
9
10impl VectorCollection {
11 /// Performs kNN vector search using the HNSW index.
12 ///
13 /// Returns the `k` nearest neighbors ordered by ascending distance.
14 ///
15 /// # Errors
16 ///
17 /// - Returns an error if the query dimension does not match the collection.
18 /// - Returns an error if the HNSW index is not initialized.
19 ///
20 /// # Examples
21 ///
22 /// ```rust,no_run
23 /// # use velesdb_core::{VectorCollection, DistanceMetric, StorageMode};
24 /// # let coll = VectorCollection::create("./data/v".into(), "v", 128, DistanceMetric::Cosine, StorageMode::Full)?;
25 /// let results = coll.search(&vec![0.1; 128], 10)?;
26 /// for r in &results {
27 /// println!("id={} score={}", r.point.id, r.score);
28 /// }
29 /// # Ok::<(), velesdb_core::Error>(())
30 /// ```
31 pub fn search(&self, query: &[f32], k: usize) -> Result<Vec<SearchResult>> {
32 self.inner.search(query, k)
33 }
34
35 /// Performs full-text BM25 search over indexed payload fields.
36 ///
37 /// Returns up to `k` results ranked by BM25 relevance score.
38 ///
39 /// # Errors
40 ///
41 /// - Returns an error if storage retrieval fails.
42 ///
43 /// # Examples
44 ///
45 /// ```rust,no_run
46 /// # use velesdb_core::{VectorCollection, DistanceMetric, StorageMode};
47 /// # let coll = VectorCollection::create("./data/v".into(), "v", 128, DistanceMetric::Cosine, StorageMode::Full)?;
48 /// let results = coll.text_search("machine learning", 5)?;
49 /// # Ok::<(), velesdb_core::Error>(())
50 /// ```
51 pub fn text_search(&self, query: &str, k: usize) -> Result<Vec<SearchResult>> {
52 self.inner.text_search(query, k)
53 }
54
55 /// Performs kNN search with an explicit `ef_search` override.
56 ///
57 /// Higher `ef_search` values improve recall at the cost of latency.
58 ///
59 /// # Errors
60 ///
61 /// - Returns an error if the query dimension does not match the collection.
62 pub fn search_with_ef(
63 &self,
64 query: &[f32],
65 k: usize,
66 ef_search: usize,
67 ) -> Result<Vec<SearchResult>> {
68 self.inner.search_with_ef(query, k, ef_search)
69 }
70
71 /// Performs kNN search with a specific [`SearchQuality`] profile.
72 ///
73 /// Use this instead of [`search_with_ef`] when you want named quality
74 /// modes like [`SearchQuality::AutoTune`] that compute ef dynamically.
75 ///
76 /// # Errors
77 ///
78 /// - Returns an error if the query dimension does not match the collection.
79 pub fn search_with_quality(
80 &self,
81 query: &[f32],
82 k: usize,
83 quality: crate::SearchQuality,
84 ) -> Result<Vec<SearchResult>> {
85 self.inner.search_with_quality(query, k, quality)
86 }
87
88 /// Performs kNN search with a metadata filter applied post-retrieval.
89 ///
90 /// # Errors
91 ///
92 /// - Returns an error if the query dimension does not match the collection.
93 /// - Returns an error if the filter references an unsupported field type.
94 pub fn search_with_filter(
95 &self,
96 query: &[f32],
97 k: usize,
98 filter: &crate::filter::Filter,
99 ) -> Result<Vec<SearchResult>> {
100 self.inner.search_with_filter(query, k, filter)
101 }
102
103 /// Returns [`ScoredResult`] pairs without payload hydration.
104 ///
105 /// Faster than [`search`](Self::search) when only IDs and scores are needed.
106 ///
107 /// # Errors
108 ///
109 /// - Returns an error if the query dimension does not match the collection.
110 pub fn search_ids(
111 &self,
112 query: &[f32],
113 k: usize,
114 ) -> Result<Vec<crate::scored_result::ScoredResult>> {
115 self.inner.search_ids(query, k)
116 }
117
118 /// Full-text search with metadata filter.
119 ///
120 /// # Errors
121 ///
122 /// Returns an error if storage retrieval fails.
123 pub fn text_search_with_filter(
124 &self,
125 query: &str,
126 k: usize,
127 filter: &crate::filter::Filter,
128 ) -> Result<Vec<SearchResult>> {
129 self.inner.text_search_with_filter(query, k, filter)
130 }
131
132 /// Performs hybrid search combining vector kNN and BM25 full-text via RRF fusion.
133 ///
134 /// When `alpha` is `None`, a default blending factor is used. Values closer
135 /// to `1.0` weight vector results more; values closer to `0.0` weight text.
136 ///
137 /// # Errors
138 ///
139 /// - Returns an error if the query dimension does not match the collection.
140 /// - Returns an error if text indexing or storage retrieval fails.
141 ///
142 /// # Examples
143 ///
144 /// ```rust,no_run
145 /// # use velesdb_core::{VectorCollection, DistanceMetric, StorageMode};
146 /// # let coll = VectorCollection::create("./data/v".into(), "v", 128, DistanceMetric::Cosine, StorageMode::Full)?;
147 /// let results = coll.hybrid_search(&vec![0.1; 128], "machine learning", 10, Some(0.7))?;
148 /// # Ok::<(), velesdb_core::Error>(())
149 /// ```
150 pub fn hybrid_search(
151 &self,
152 vector: &[f32],
153 text: &str,
154 k: usize,
155 alpha: Option<f32>,
156 ) -> Result<Vec<SearchResult>> {
157 self.inner.hybrid_search(vector, text, k, alpha)
158 }
159
160 /// Performs hybrid search (vector + BM25) with a metadata filter.
161 ///
162 /// # Errors
163 ///
164 /// - Returns an error if the query dimension does not match the collection.
165 /// - Returns an error if text indexing, storage, or filtering fails.
166 pub fn hybrid_search_with_filter(
167 &self,
168 vector: &[f32],
169 text: &str,
170 k: usize,
171 alpha: Option<f32>,
172 filter: &crate::filter::Filter,
173 ) -> Result<Vec<SearchResult>> {
174 self.inner
175 .hybrid_search_with_filter(vector, text, k, alpha, filter)
176 }
177
178 /// Performs batch kNN search with per-query metadata filters.
179 ///
180 /// Each query in `queries` is paired with the filter at the same index in
181 /// `filters`. Pass `None` for queries that should not be filtered.
182 ///
183 /// # Errors
184 ///
185 /// - Returns an error if any query dimension does not match the collection.
186 /// - Returns an error if `queries` and `filters` have different lengths.
187 ///
188 /// # Examples
189 ///
190 /// ```rust,no_run
191 /// # use velesdb_core::{VectorCollection, DistanceMetric, StorageMode};
192 /// # let coll = VectorCollection::create("./data/v".into(), "v", 128, DistanceMetric::Cosine, StorageMode::Full)?;
193 /// let q1 = vec![0.1; 128];
194 /// let q2 = vec![0.2; 128];
195 /// let results = coll.search_batch_with_filters(
196 /// &[q1.as_slice(), q2.as_slice()],
197 /// 10,
198 /// &[None, None],
199 /// )?;
200 /// assert_eq!(results.len(), 2);
201 /// # Ok::<(), velesdb_core::Error>(())
202 /// ```
203 pub fn search_batch_with_filters(
204 &self,
205 queries: &[&[f32]],
206 k: usize,
207 filters: &[Option<crate::filter::Filter>],
208 ) -> Result<Vec<Vec<SearchResult>>> {
209 self.inner.search_batch_with_filters(queries, k, filters)
210 }
211
212 /// Performs multi-query search fusing results from multiple query vectors.
213 ///
214 /// # Errors
215 ///
216 /// - Returns an error if any query dimension does not match the collection.
217 /// - Returns an error if the fusion strategy fails.
218 pub fn multi_query_search(
219 &self,
220 queries: &[&[f32]],
221 k: usize,
222 strategy: crate::fusion::FusionStrategy,
223 filter: Option<&crate::filter::Filter>,
224 ) -> Result<Vec<SearchResult>> {
225 self.inner.multi_query_search(queries, k, strategy, filter)
226 }
227
228 /// Performs multi-query search returning only IDs and fused scores.
229 ///
230 /// # Errors
231 ///
232 /// - Returns an error if any query dimension does not match the collection.
233 /// - Returns an error if the fusion strategy fails.
234 pub fn multi_query_search_ids(
235 &self,
236 queries: &[&[f32]],
237 k: usize,
238 strategy: crate::fusion::FusionStrategy,
239 ) -> Result<Vec<(u64, f32)>> {
240 self.inner.multi_query_search_ids(queries, k, strategy)
241 }
242
243 /// Performs sparse-only search on the named index.
244 ///
245 /// # Errors
246 ///
247 /// Returns an error if the named sparse index does not exist.
248 pub fn sparse_search(
249 &self,
250 query: &crate::index::sparse::SparseVector,
251 k: usize,
252 index_name: &str,
253 ) -> Result<Vec<SearchResult>> {
254 let indexes = self.inner.sparse_indexes.read();
255 let index = indexes.get(index_name).ok_or_else(|| {
256 crate::error::Error::Config(format!(
257 "Sparse index '{}' not found",
258 if index_name.is_empty() {
259 "<default>"
260 } else {
261 index_name
262 }
263 ))
264 })?;
265 let results = crate::index::sparse::sparse_search(index, query, k);
266 drop(indexes);
267 Ok(self.inner.resolve_sparse_results(&results, k))
268 }
269
270 /// Performs hybrid dense+sparse search with RRF fusion.
271 ///
272 /// # Errors
273 ///
274 /// Returns an error if dense or sparse search fails, or fusion errors.
275 #[allow(clippy::too_many_arguments)]
276 pub fn hybrid_sparse_search(
277 &self,
278 dense_vector: &[f32],
279 sparse_query: &crate::index::sparse::SparseVector,
280 k: usize,
281 index_name: &str,
282 strategy: &crate::fusion::FusionStrategy,
283 ) -> Result<Vec<SearchResult>> {
284 let candidate_k = k.saturating_mul(2).max(k + 10);
285
286 let (dense_results, sparse_results) = self.inner.execute_both_branches(
287 dense_vector,
288 sparse_query,
289 index_name,
290 candidate_k,
291 None,
292 );
293
294 if dense_results.is_empty() && sparse_results.is_empty() {
295 return Ok(Vec::new());
296 }
297 if dense_results.is_empty() {
298 let scored: Vec<(u64, f32)> = sparse_results
299 .iter()
300 .map(|sd| (sd.doc_id, sd.score))
301 .collect();
302 return Ok(self.inner.resolve_fused_results(&scored, k));
303 }
304 if sparse_results.is_empty() {
305 return Ok(self.inner.resolve_fused_results(&dense_results, k));
306 }
307
308 let sparse_tuples: Vec<(u64, f32)> = sparse_results
309 .iter()
310 .map(|sd| (sd.doc_id, sd.score))
311 .collect();
312
313 let fused = strategy
314 .fuse(vec![dense_results, sparse_tuples])
315 .map_err(|e| crate::error::Error::Config(format!("Fusion error: {e}")))?;
316
317 Ok(self.inner.resolve_fused_results(&fused, k))
318 }
319
320 /// Executes a graph MATCH query against the collection's edge store.
321 ///
322 /// # Errors
323 ///
324 /// - Returns an error if the match clause references an invalid label or property.
325 /// - Returns an error if the edge store is not initialized.
326 pub fn execute_match(
327 &self,
328 match_clause: &crate::velesql::MatchClause,
329 params: &std::collections::HashMap<String, serde_json::Value>,
330 ) -> crate::error::Result<Vec<crate::collection::search::query::match_exec::MatchResult>> {
331 self.inner.execute_match(match_clause, params)
332 }
333
334 /// Executes a MATCH query with vector similarity filtering.
335 ///
336 /// # Errors
337 ///
338 /// - Returns an error if the match clause is invalid or the query dimension mismatches.
339 pub fn execute_match_with_similarity(
340 &self,
341 match_clause: &crate::velesql::MatchClause,
342 query_vector: &[f32],
343 threshold: f32,
344 params: &std::collections::HashMap<String, serde_json::Value>,
345 ) -> crate::error::Result<Vec<crate::collection::search::query::match_exec::MatchResult>> {
346 self.inner
347 .execute_match_with_similarity(match_clause, query_vector, threshold, params)
348 }
349
350 /// Executes an aggregation query (GROUP BY / COUNT / SUM / AVG / MIN / MAX).
351 ///
352 /// # Errors
353 ///
354 /// - Returns an error if the query is invalid or aggregation computation fails.
355 pub fn execute_aggregate(
356 &self,
357 query: &crate::velesql::Query,
358 params: &std::collections::HashMap<String, serde_json::Value>,
359 ) -> Result<serde_json::Value> {
360 self.inner.execute_aggregate(query, params)
361 }
362
363 /// Executes a parsed `VelesQL` query.
364 ///
365 /// # Errors
366 ///
367 /// - Returns an error if the query references missing fields or execution fails.
368 pub fn execute_query(
369 &self,
370 query: &crate::velesql::Query,
371 params: &HashMap<String, serde_json::Value>,
372 ) -> Result<Vec<SearchResult>> {
373 self.inner.execute_query(query, params)
374 }
375
376 /// Sends a point into the streaming ingestion channel.
377 ///
378 /// Returns `Ok(())` on success (202 semantics). Returns
379 /// `BackpressureError::BufferFull` when the channel is at capacity, or
380 /// `BackpressureError::NotConfigured` if streaming is not active.
381 ///
382 /// # Errors
383 ///
384 /// Returns `BackpressureError` on buffer-full or not-configured.
385 #[cfg(feature = "persistence")]
386 pub fn stream_insert(
387 &self,
388 point: crate::point::Point,
389 ) -> std::result::Result<(), crate::collection::streaming::BackpressureError> {
390 self.inner.stream_insert(point)
391 }
392
393 /// Sends a batch of points into the streaming ingestion channel.
394 ///
395 /// Acquires the ingester lock once for the entire batch, eliminating
396 /// per-point lock overhead. Returns the number of points successfully
397 /// queued. See [`Collection::stream_insert_batch`] for details.
398 ///
399 /// # Errors
400 ///
401 /// Returns `BackpressureError` on buffer-full, drain-dead, or not-configured.
402 #[cfg(feature = "persistence")]
403 pub fn stream_insert_batch(
404 &self,
405 points: Vec<crate::point::Point>,
406 ) -> std::result::Result<usize, crate::collection::streaming::BackpressureError> {
407 self.inner.stream_insert_batch(points)
408 }
409
410 /// Pushes `(id, vector)` entries into the delta buffer if it is active.
411 ///
412 /// No-op when the delta buffer is inactive. This is the public interface
413 /// used by streaming upsert handlers (e.g., NDJSON stream endpoint) to
414 /// keep the delta buffer in sync after a successful `upsert_bulk` call.
415 #[cfg(feature = "persistence")]
416 pub fn push_to_delta_if_active(&self, entries: &[(u64, Vec<f32>)]) {
417 self.inner.push_to_delta_if_active(entries);
418 }
419
420 /// Returns `true` if the delta buffer is currently active (HNSW rebuild
421 /// in progress). External callers can use this to decide whether to
422 /// snapshot entries for delta before a `upsert_bulk` call.
423 #[cfg(feature = "persistence")]
424 #[must_use]
425 pub fn is_delta_active(&self) -> bool {
426 self.inner.delta_buffer.is_active()
427 }
428
429 /// Executes a raw VelesQL string, parsing it before execution.
430 ///
431 /// # Errors
432 ///
433 /// - Returns an error if the SQL string cannot be parsed.
434 /// - Returns an error if query execution fails.
435 pub fn execute_query_str(
436 &self,
437 sql: &str,
438 params: &HashMap<String, serde_json::Value>,
439 ) -> Result<Vec<SearchResult>> {
440 self.inner.execute_query_str(sql, params)
441 }
442}