Skip to main content

velesdb_core/collection/vector_collection/
search.rs

1//! Search, match, aggregation, and query execution for `VectorCollection`.
2
3use std::collections::HashMap;
4
5use crate::error::Result;
6use crate::point::SearchResult;
7
8use super::VectorCollection;
9
10impl VectorCollection {
11    /// Performs kNN vector search using the HNSW index.
12    ///
13    /// Returns the `k` nearest neighbors ordered by ascending distance.
14    ///
15    /// # Errors
16    ///
17    /// - Returns an error if the query dimension does not match the collection.
18    /// - Returns an error if the HNSW index is not initialized.
19    ///
20    /// # Examples
21    ///
22    /// ```rust,no_run
23    /// # use velesdb_core::{VectorCollection, DistanceMetric, StorageMode};
24    /// # let coll = VectorCollection::create("./data/v".into(), "v", 128, DistanceMetric::Cosine, StorageMode::Full)?;
25    /// let results = coll.search(&vec![0.1; 128], 10)?;
26    /// for r in &results {
27    ///     println!("id={} score={}", r.point.id, r.score);
28    /// }
29    /// # Ok::<(), velesdb_core::Error>(())
30    /// ```
31    pub fn search(&self, query: &[f32], k: usize) -> Result<Vec<SearchResult>> {
32        self.inner.search(query, k)
33    }
34
35    /// Performs full-text BM25 search over indexed payload fields.
36    ///
37    /// Returns up to `k` results ranked by BM25 relevance score.
38    ///
39    /// # Errors
40    ///
41    /// - Returns an error if storage retrieval fails.
42    ///
43    /// # Examples
44    ///
45    /// ```rust,no_run
46    /// # use velesdb_core::{VectorCollection, DistanceMetric, StorageMode};
47    /// # let coll = VectorCollection::create("./data/v".into(), "v", 128, DistanceMetric::Cosine, StorageMode::Full)?;
48    /// let results = coll.text_search("machine learning", 5)?;
49    /// # Ok::<(), velesdb_core::Error>(())
50    /// ```
51    pub fn text_search(&self, query: &str, k: usize) -> Result<Vec<SearchResult>> {
52        self.inner.text_search(query, k)
53    }
54
55    /// Performs kNN search with an explicit `ef_search` override.
56    ///
57    /// Higher `ef_search` values improve recall at the cost of latency.
58    ///
59    /// # Errors
60    ///
61    /// - Returns an error if the query dimension does not match the collection.
62    pub fn search_with_ef(
63        &self,
64        query: &[f32],
65        k: usize,
66        ef_search: usize,
67    ) -> Result<Vec<SearchResult>> {
68        self.inner.search_with_ef(query, k, ef_search)
69    }
70
71    /// Performs kNN search with a specific [`SearchQuality`] profile.
72    ///
73    /// Use this instead of [`search_with_ef`] when you want named quality
74    /// modes like [`SearchQuality::AutoTune`] that compute ef dynamically.
75    ///
76    /// # Errors
77    ///
78    /// - Returns an error if the query dimension does not match the collection.
79    pub fn search_with_quality(
80        &self,
81        query: &[f32],
82        k: usize,
83        quality: crate::SearchQuality,
84    ) -> Result<Vec<SearchResult>> {
85        self.inner.search_with_quality(query, k, quality)
86    }
87
88    /// Performs kNN search with a metadata filter applied post-retrieval.
89    ///
90    /// # Errors
91    ///
92    /// - Returns an error if the query dimension does not match the collection.
93    /// - Returns an error if the filter references an unsupported field type.
94    pub fn search_with_filter(
95        &self,
96        query: &[f32],
97        k: usize,
98        filter: &crate::filter::Filter,
99    ) -> Result<Vec<SearchResult>> {
100        self.inner.search_with_filter(query, k, filter)
101    }
102
103    /// Returns [`ScoredResult`] pairs without payload hydration.
104    ///
105    /// Faster than [`search`](Self::search) when only IDs and scores are needed.
106    ///
107    /// # Errors
108    ///
109    /// - Returns an error if the query dimension does not match the collection.
110    pub fn search_ids(
111        &self,
112        query: &[f32],
113        k: usize,
114    ) -> Result<Vec<crate::scored_result::ScoredResult>> {
115        self.inner.search_ids(query, k)
116    }
117
118    /// Full-text search with metadata filter.
119    ///
120    /// # Errors
121    ///
122    /// Returns an error if storage retrieval fails.
123    pub fn text_search_with_filter(
124        &self,
125        query: &str,
126        k: usize,
127        filter: &crate::filter::Filter,
128    ) -> Result<Vec<SearchResult>> {
129        self.inner.text_search_with_filter(query, k, filter)
130    }
131
132    /// Performs hybrid search combining vector kNN and BM25 full-text via RRF fusion.
133    ///
134    /// When `alpha` is `None`, a default blending factor is used. Values closer
135    /// to `1.0` weight vector results more; values closer to `0.0` weight text.
136    ///
137    /// # Errors
138    ///
139    /// - Returns an error if the query dimension does not match the collection.
140    /// - Returns an error if text indexing or storage retrieval fails.
141    ///
142    /// # Examples
143    ///
144    /// ```rust,no_run
145    /// # use velesdb_core::{VectorCollection, DistanceMetric, StorageMode};
146    /// # let coll = VectorCollection::create("./data/v".into(), "v", 128, DistanceMetric::Cosine, StorageMode::Full)?;
147    /// let results = coll.hybrid_search(&vec![0.1; 128], "machine learning", 10, Some(0.7))?;
148    /// # Ok::<(), velesdb_core::Error>(())
149    /// ```
150    pub fn hybrid_search(
151        &self,
152        vector: &[f32],
153        text: &str,
154        k: usize,
155        alpha: Option<f32>,
156    ) -> Result<Vec<SearchResult>> {
157        self.inner.hybrid_search(vector, text, k, alpha)
158    }
159
160    /// Performs hybrid search (vector + BM25) with a metadata filter.
161    ///
162    /// # Errors
163    ///
164    /// - Returns an error if the query dimension does not match the collection.
165    /// - Returns an error if text indexing, storage, or filtering fails.
166    pub fn hybrid_search_with_filter(
167        &self,
168        vector: &[f32],
169        text: &str,
170        k: usize,
171        alpha: Option<f32>,
172        filter: &crate::filter::Filter,
173    ) -> Result<Vec<SearchResult>> {
174        self.inner
175            .hybrid_search_with_filter(vector, text, k, alpha, filter)
176    }
177
178    /// Performs batch kNN search with per-query metadata filters.
179    ///
180    /// Each query in `queries` is paired with the filter at the same index in
181    /// `filters`. Pass `None` for queries that should not be filtered.
182    ///
183    /// # Errors
184    ///
185    /// - Returns an error if any query dimension does not match the collection.
186    /// - Returns an error if `queries` and `filters` have different lengths.
187    ///
188    /// # Examples
189    ///
190    /// ```rust,no_run
191    /// # use velesdb_core::{VectorCollection, DistanceMetric, StorageMode};
192    /// # let coll = VectorCollection::create("./data/v".into(), "v", 128, DistanceMetric::Cosine, StorageMode::Full)?;
193    /// let q1 = vec![0.1; 128];
194    /// let q2 = vec![0.2; 128];
195    /// let results = coll.search_batch_with_filters(
196    ///     &[q1.as_slice(), q2.as_slice()],
197    ///     10,
198    ///     &[None, None],
199    /// )?;
200    /// assert_eq!(results.len(), 2);
201    /// # Ok::<(), velesdb_core::Error>(())
202    /// ```
203    pub fn search_batch_with_filters(
204        &self,
205        queries: &[&[f32]],
206        k: usize,
207        filters: &[Option<crate::filter::Filter>],
208    ) -> Result<Vec<Vec<SearchResult>>> {
209        self.inner.search_batch_with_filters(queries, k, filters)
210    }
211
212    /// Performs multi-query search fusing results from multiple query vectors.
213    ///
214    /// # Errors
215    ///
216    /// - Returns an error if any query dimension does not match the collection.
217    /// - Returns an error if the fusion strategy fails.
218    pub fn multi_query_search(
219        &self,
220        queries: &[&[f32]],
221        k: usize,
222        strategy: crate::fusion::FusionStrategy,
223        filter: Option<&crate::filter::Filter>,
224    ) -> Result<Vec<SearchResult>> {
225        self.inner.multi_query_search(queries, k, strategy, filter)
226    }
227
228    /// Performs multi-query search returning only IDs and fused scores.
229    ///
230    /// # Errors
231    ///
232    /// - Returns an error if any query dimension does not match the collection.
233    /// - Returns an error if the fusion strategy fails.
234    pub fn multi_query_search_ids(
235        &self,
236        queries: &[&[f32]],
237        k: usize,
238        strategy: crate::fusion::FusionStrategy,
239    ) -> Result<Vec<(u64, f32)>> {
240        self.inner.multi_query_search_ids(queries, k, strategy)
241    }
242
243    /// Performs sparse-only search on the named index.
244    ///
245    /// # Errors
246    ///
247    /// Returns an error if the named sparse index does not exist.
248    pub fn sparse_search(
249        &self,
250        query: &crate::index::sparse::SparseVector,
251        k: usize,
252        index_name: &str,
253    ) -> Result<Vec<SearchResult>> {
254        let indexes = self.inner.sparse_indexes.read();
255        let index = indexes.get(index_name).ok_or_else(|| {
256            crate::error::Error::Config(format!(
257                "Sparse index '{}' not found",
258                if index_name.is_empty() {
259                    "<default>"
260                } else {
261                    index_name
262                }
263            ))
264        })?;
265        let results = crate::index::sparse::sparse_search(index, query, k);
266        drop(indexes);
267        Ok(self.inner.resolve_sparse_results(&results, k))
268    }
269
270    /// Performs hybrid dense+sparse search with RRF fusion.
271    ///
272    /// # Errors
273    ///
274    /// Returns an error if dense or sparse search fails, or fusion errors.
275    #[allow(clippy::too_many_arguments)]
276    pub fn hybrid_sparse_search(
277        &self,
278        dense_vector: &[f32],
279        sparse_query: &crate::index::sparse::SparseVector,
280        k: usize,
281        index_name: &str,
282        strategy: &crate::fusion::FusionStrategy,
283    ) -> Result<Vec<SearchResult>> {
284        let candidate_k = k.saturating_mul(2).max(k + 10);
285
286        let (dense_results, sparse_results) = self.inner.execute_both_branches(
287            dense_vector,
288            sparse_query,
289            index_name,
290            candidate_k,
291            None,
292        );
293
294        if dense_results.is_empty() && sparse_results.is_empty() {
295            return Ok(Vec::new());
296        }
297        if dense_results.is_empty() {
298            let scored: Vec<(u64, f32)> = sparse_results
299                .iter()
300                .map(|sd| (sd.doc_id, sd.score))
301                .collect();
302            return Ok(self.inner.resolve_fused_results(&scored, k));
303        }
304        if sparse_results.is_empty() {
305            return Ok(self.inner.resolve_fused_results(&dense_results, k));
306        }
307
308        let sparse_tuples: Vec<(u64, f32)> = sparse_results
309            .iter()
310            .map(|sd| (sd.doc_id, sd.score))
311            .collect();
312
313        let fused = strategy
314            .fuse(vec![dense_results, sparse_tuples])
315            .map_err(|e| crate::error::Error::Config(format!("Fusion error: {e}")))?;
316
317        Ok(self.inner.resolve_fused_results(&fused, k))
318    }
319
320    /// Executes a graph MATCH query against the collection's edge store.
321    ///
322    /// # Errors
323    ///
324    /// - Returns an error if the match clause references an invalid label or property.
325    /// - Returns an error if the edge store is not initialized.
326    pub fn execute_match(
327        &self,
328        match_clause: &crate::velesql::MatchClause,
329        params: &std::collections::HashMap<String, serde_json::Value>,
330    ) -> crate::error::Result<Vec<crate::collection::search::query::match_exec::MatchResult>> {
331        self.inner.execute_match(match_clause, params)
332    }
333
334    /// Executes a MATCH query with vector similarity filtering.
335    ///
336    /// # Errors
337    ///
338    /// - Returns an error if the match clause is invalid or the query dimension mismatches.
339    pub fn execute_match_with_similarity(
340        &self,
341        match_clause: &crate::velesql::MatchClause,
342        query_vector: &[f32],
343        threshold: f32,
344        params: &std::collections::HashMap<String, serde_json::Value>,
345    ) -> crate::error::Result<Vec<crate::collection::search::query::match_exec::MatchResult>> {
346        self.inner
347            .execute_match_with_similarity(match_clause, query_vector, threshold, params)
348    }
349
350    /// Executes an aggregation query (GROUP BY / COUNT / SUM / AVG / MIN / MAX).
351    ///
352    /// # Errors
353    ///
354    /// - Returns an error if the query is invalid or aggregation computation fails.
355    pub fn execute_aggregate(
356        &self,
357        query: &crate::velesql::Query,
358        params: &std::collections::HashMap<String, serde_json::Value>,
359    ) -> Result<serde_json::Value> {
360        self.inner.execute_aggregate(query, params)
361    }
362
363    /// Executes a parsed `VelesQL` query.
364    ///
365    /// # Errors
366    ///
367    /// - Returns an error if the query references missing fields or execution fails.
368    pub fn execute_query(
369        &self,
370        query: &crate::velesql::Query,
371        params: &HashMap<String, serde_json::Value>,
372    ) -> Result<Vec<SearchResult>> {
373        self.inner.execute_query(query, params)
374    }
375
376    /// Sends a point into the streaming ingestion channel.
377    ///
378    /// Returns `Ok(())` on success (202 semantics). Returns
379    /// `BackpressureError::BufferFull` when the channel is at capacity, or
380    /// `BackpressureError::NotConfigured` if streaming is not active.
381    ///
382    /// # Errors
383    ///
384    /// Returns `BackpressureError` on buffer-full or not-configured.
385    #[cfg(feature = "persistence")]
386    pub fn stream_insert(
387        &self,
388        point: crate::point::Point,
389    ) -> std::result::Result<(), crate::collection::streaming::BackpressureError> {
390        self.inner.stream_insert(point)
391    }
392
393    /// Sends a batch of points into the streaming ingestion channel.
394    ///
395    /// Acquires the ingester lock once for the entire batch, eliminating
396    /// per-point lock overhead. Returns the number of points successfully
397    /// queued. See [`Collection::stream_insert_batch`] for details.
398    ///
399    /// # Errors
400    ///
401    /// Returns `BackpressureError` on buffer-full, drain-dead, or not-configured.
402    #[cfg(feature = "persistence")]
403    pub fn stream_insert_batch(
404        &self,
405        points: Vec<crate::point::Point>,
406    ) -> std::result::Result<usize, crate::collection::streaming::BackpressureError> {
407        self.inner.stream_insert_batch(points)
408    }
409
410    /// Pushes `(id, vector)` entries into the delta buffer if it is active.
411    ///
412    /// No-op when the delta buffer is inactive. This is the public interface
413    /// used by streaming upsert handlers (e.g., NDJSON stream endpoint) to
414    /// keep the delta buffer in sync after a successful `upsert_bulk` call.
415    #[cfg(feature = "persistence")]
416    pub fn push_to_delta_if_active(&self, entries: &[(u64, Vec<f32>)]) {
417        self.inner.push_to_delta_if_active(entries);
418    }
419
420    /// Returns `true` if the delta buffer is currently active (HNSW rebuild
421    /// in progress). External callers can use this to decide whether to
422    /// snapshot entries for delta before a `upsert_bulk` call.
423    #[cfg(feature = "persistence")]
424    #[must_use]
425    pub fn is_delta_active(&self) -> bool {
426        self.inner.delta_buffer.is_active()
427    }
428
429    /// Executes a raw VelesQL string, parsing it before execution.
430    ///
431    /// # Errors
432    ///
433    /// - Returns an error if the SQL string cannot be parsed.
434    /// - Returns an error if query execution fails.
435    pub fn execute_query_str(
436        &self,
437        sql: &str,
438        params: &HashMap<String, serde_json::Value>,
439    ) -> Result<Vec<SearchResult>> {
440        self.inner.execute_query_str(sql, params)
441    }
442}