Skip to main content

velesdb_core/collection/vector_collection/
search.rs

1//! Search, match, aggregation, and query execution for `VectorCollection`.
2
3use std::collections::HashMap;
4
5use crate::error::Result;
6use crate::point::SearchResult;
7
8use super::VectorCollection;
9
10impl VectorCollection {
11    /// Performs kNN vector search using the HNSW index.
12    ///
13    /// Returns the `k` nearest neighbors ordered by ascending distance.
14    ///
15    /// # Errors
16    ///
17    /// - Returns an error if the query dimension does not match the collection.
18    /// - Returns an error if the HNSW index is not initialized.
19    ///
20    /// # Examples
21    ///
22    /// ```rust,no_run
23    /// # use velesdb_core::{VectorCollection, DistanceMetric, StorageMode};
24    /// # let coll = VectorCollection::create("./data/v".into(), "v", 128, DistanceMetric::Cosine, StorageMode::Full)?;
25    /// let results = coll.search(&vec![0.1; 128], 10)?;
26    /// for r in &results {
27    ///     println!("id={} score={}", r.point.id, r.score);
28    /// }
29    /// # Ok::<(), velesdb_core::Error>(())
30    /// ```
31    pub fn search(&self, query: &[f32], k: usize) -> Result<Vec<SearchResult>> {
32        self.inner.search(query, k)
33    }
34
35    /// Performs full-text BM25 search over indexed payload fields.
36    ///
37    /// Returns up to `k` results ranked by BM25 relevance score.
38    ///
39    /// # Errors
40    ///
41    /// - Returns an error if storage retrieval fails.
42    ///
43    /// # Examples
44    ///
45    /// ```rust,no_run
46    /// # use velesdb_core::{VectorCollection, DistanceMetric, StorageMode};
47    /// # let coll = VectorCollection::create("./data/v".into(), "v", 128, DistanceMetric::Cosine, StorageMode::Full)?;
48    /// let results = coll.text_search("machine learning", 5)?;
49    /// # Ok::<(), velesdb_core::Error>(())
50    /// ```
51    pub fn text_search(&self, query: &str, k: usize) -> Result<Vec<SearchResult>> {
52        self.inner.text_search(query, k)
53    }
54
55    /// Performs kNN search with an explicit `ef_search` override.
56    ///
57    /// Higher `ef_search` values improve recall at the cost of latency.
58    ///
59    /// # Errors
60    ///
61    /// - Returns an error if the query dimension does not match the collection.
62    pub fn search_with_ef(
63        &self,
64        query: &[f32],
65        k: usize,
66        ef_search: usize,
67    ) -> Result<Vec<SearchResult>> {
68        self.inner.search_with_ef(query, k, ef_search)
69    }
70
71    /// Performs kNN search with a metadata filter applied post-retrieval.
72    ///
73    /// # Errors
74    ///
75    /// - Returns an error if the query dimension does not match the collection.
76    /// - Returns an error if the filter references an unsupported field type.
77    pub fn search_with_filter(
78        &self,
79        query: &[f32],
80        k: usize,
81        filter: &crate::filter::Filter,
82    ) -> Result<Vec<SearchResult>> {
83        self.inner.search_with_filter(query, k, filter)
84    }
85
86    /// Returns [`ScoredResult`] pairs without payload hydration.
87    ///
88    /// Faster than [`search`](Self::search) when only IDs and scores are needed.
89    ///
90    /// # Errors
91    ///
92    /// - Returns an error if the query dimension does not match the collection.
93    pub fn search_ids(
94        &self,
95        query: &[f32],
96        k: usize,
97    ) -> Result<Vec<crate::scored_result::ScoredResult>> {
98        self.inner.search_ids(query, k)
99    }
100
101    /// Full-text search with metadata filter.
102    ///
103    /// # Errors
104    ///
105    /// Returns an error if storage retrieval fails.
106    pub fn text_search_with_filter(
107        &self,
108        query: &str,
109        k: usize,
110        filter: &crate::filter::Filter,
111    ) -> Result<Vec<SearchResult>> {
112        self.inner.text_search_with_filter(query, k, filter)
113    }
114
115    /// Performs hybrid search combining vector kNN and BM25 full-text via RRF fusion.
116    ///
117    /// When `alpha` is `None`, a default blending factor is used. Values closer
118    /// to `1.0` weight vector results more; values closer to `0.0` weight text.
119    ///
120    /// # Errors
121    ///
122    /// - Returns an error if the query dimension does not match the collection.
123    /// - Returns an error if text indexing or storage retrieval fails.
124    ///
125    /// # Examples
126    ///
127    /// ```rust,no_run
128    /// # use velesdb_core::{VectorCollection, DistanceMetric, StorageMode};
129    /// # let coll = VectorCollection::create("./data/v".into(), "v", 128, DistanceMetric::Cosine, StorageMode::Full)?;
130    /// let results = coll.hybrid_search(&vec![0.1; 128], "machine learning", 10, Some(0.7))?;
131    /// # Ok::<(), velesdb_core::Error>(())
132    /// ```
133    pub fn hybrid_search(
134        &self,
135        vector: &[f32],
136        text: &str,
137        k: usize,
138        alpha: Option<f32>,
139    ) -> Result<Vec<SearchResult>> {
140        self.inner.hybrid_search(vector, text, k, alpha)
141    }
142
143    /// Performs hybrid search (vector + BM25) with a metadata filter.
144    ///
145    /// # Errors
146    ///
147    /// - Returns an error if the query dimension does not match the collection.
148    /// - Returns an error if text indexing, storage, or filtering fails.
149    pub fn hybrid_search_with_filter(
150        &self,
151        vector: &[f32],
152        text: &str,
153        k: usize,
154        alpha: Option<f32>,
155        filter: &crate::filter::Filter,
156    ) -> Result<Vec<SearchResult>> {
157        self.inner
158            .hybrid_search_with_filter(vector, text, k, alpha, filter)
159    }
160
161    /// Performs batch kNN search with per-query metadata filters.
162    ///
163    /// Each query in `queries` is paired with the filter at the same index in
164    /// `filters`. Pass `None` for queries that should not be filtered.
165    ///
166    /// # Errors
167    ///
168    /// - Returns an error if any query dimension does not match the collection.
169    /// - Returns an error if `queries` and `filters` have different lengths.
170    ///
171    /// # Examples
172    ///
173    /// ```rust,no_run
174    /// # use velesdb_core::{VectorCollection, DistanceMetric, StorageMode};
175    /// # let coll = VectorCollection::create("./data/v".into(), "v", 128, DistanceMetric::Cosine, StorageMode::Full)?;
176    /// let q1 = vec![0.1; 128];
177    /// let q2 = vec![0.2; 128];
178    /// let results = coll.search_batch_with_filters(
179    ///     &[q1.as_slice(), q2.as_slice()],
180    ///     10,
181    ///     &[None, None],
182    /// )?;
183    /// assert_eq!(results.len(), 2);
184    /// # Ok::<(), velesdb_core::Error>(())
185    /// ```
186    pub fn search_batch_with_filters(
187        &self,
188        queries: &[&[f32]],
189        k: usize,
190        filters: &[Option<crate::filter::Filter>],
191    ) -> Result<Vec<Vec<SearchResult>>> {
192        self.inner.search_batch_with_filters(queries, k, filters)
193    }
194
195    /// Performs multi-query search fusing results from multiple query vectors.
196    ///
197    /// # Errors
198    ///
199    /// - Returns an error if any query dimension does not match the collection.
200    /// - Returns an error if the fusion strategy fails.
201    pub fn multi_query_search(
202        &self,
203        queries: &[&[f32]],
204        k: usize,
205        strategy: crate::fusion::FusionStrategy,
206        filter: Option<&crate::filter::Filter>,
207    ) -> Result<Vec<SearchResult>> {
208        self.inner.multi_query_search(queries, k, strategy, filter)
209    }
210
211    /// Performs multi-query search returning only IDs and fused scores.
212    ///
213    /// # Errors
214    ///
215    /// - Returns an error if any query dimension does not match the collection.
216    /// - Returns an error if the fusion strategy fails.
217    pub fn multi_query_search_ids(
218        &self,
219        queries: &[&[f32]],
220        k: usize,
221        strategy: crate::fusion::FusionStrategy,
222    ) -> Result<Vec<(u64, f32)>> {
223        self.inner.multi_query_search_ids(queries, k, strategy)
224    }
225
226    /// Performs sparse-only search on the named index.
227    ///
228    /// # Errors
229    ///
230    /// Returns an error if the named sparse index does not exist.
231    pub fn sparse_search(
232        &self,
233        query: &crate::index::sparse::SparseVector,
234        k: usize,
235        index_name: &str,
236    ) -> Result<Vec<SearchResult>> {
237        let indexes = self.inner.sparse_indexes.read();
238        let index = indexes.get(index_name).ok_or_else(|| {
239            crate::error::Error::Config(format!(
240                "Sparse index '{}' not found",
241                if index_name.is_empty() {
242                    "<default>"
243                } else {
244                    index_name
245                }
246            ))
247        })?;
248        let results = crate::index::sparse::sparse_search(index, query, k);
249        drop(indexes);
250        Ok(self.inner.resolve_sparse_results(&results, k))
251    }
252
253    /// Performs hybrid dense+sparse search with RRF fusion.
254    ///
255    /// # Errors
256    ///
257    /// Returns an error if dense or sparse search fails, or fusion errors.
258    #[allow(clippy::too_many_arguments)]
259    pub fn hybrid_sparse_search(
260        &self,
261        dense_vector: &[f32],
262        sparse_query: &crate::index::sparse::SparseVector,
263        k: usize,
264        index_name: &str,
265        strategy: &crate::fusion::FusionStrategy,
266    ) -> Result<Vec<SearchResult>> {
267        let candidate_k = k.saturating_mul(2).max(k + 10);
268
269        let (dense_results, sparse_results) = self.inner.execute_both_branches(
270            dense_vector,
271            sparse_query,
272            index_name,
273            candidate_k,
274            None,
275        );
276
277        if dense_results.is_empty() && sparse_results.is_empty() {
278            return Ok(Vec::new());
279        }
280        if dense_results.is_empty() {
281            let scored: Vec<(u64, f32)> = sparse_results
282                .iter()
283                .map(|sd| (sd.doc_id, sd.score))
284                .collect();
285            return Ok(self.inner.resolve_fused_results(&scored, k));
286        }
287        if sparse_results.is_empty() {
288            return Ok(self.inner.resolve_fused_results(&dense_results, k));
289        }
290
291        let sparse_tuples: Vec<(u64, f32)> = sparse_results
292            .iter()
293            .map(|sd| (sd.doc_id, sd.score))
294            .collect();
295
296        let fused = strategy
297            .fuse(vec![dense_results, sparse_tuples])
298            .map_err(|e| crate::error::Error::Config(format!("Fusion error: {e}")))?;
299
300        Ok(self.inner.resolve_fused_results(&fused, k))
301    }
302
303    /// Executes a graph MATCH query against the collection's edge store.
304    ///
305    /// # Errors
306    ///
307    /// - Returns an error if the match clause references an invalid label or property.
308    /// - Returns an error if the edge store is not initialized.
309    pub fn execute_match(
310        &self,
311        match_clause: &crate::velesql::MatchClause,
312        params: &std::collections::HashMap<String, serde_json::Value>,
313    ) -> crate::error::Result<Vec<crate::collection::search::query::match_exec::MatchResult>> {
314        self.inner.execute_match(match_clause, params)
315    }
316
317    /// Executes a MATCH query with vector similarity filtering.
318    ///
319    /// # Errors
320    ///
321    /// - Returns an error if the match clause is invalid or the query dimension mismatches.
322    pub fn execute_match_with_similarity(
323        &self,
324        match_clause: &crate::velesql::MatchClause,
325        query_vector: &[f32],
326        threshold: f32,
327        params: &std::collections::HashMap<String, serde_json::Value>,
328    ) -> crate::error::Result<Vec<crate::collection::search::query::match_exec::MatchResult>> {
329        self.inner
330            .execute_match_with_similarity(match_clause, query_vector, threshold, params)
331    }
332
333    /// Executes an aggregation query (GROUP BY / COUNT / SUM / AVG / MIN / MAX).
334    ///
335    /// # Errors
336    ///
337    /// - Returns an error if the query is invalid or aggregation computation fails.
338    pub fn execute_aggregate(
339        &self,
340        query: &crate::velesql::Query,
341        params: &std::collections::HashMap<String, serde_json::Value>,
342    ) -> Result<serde_json::Value> {
343        self.inner.execute_aggregate(query, params)
344    }
345
346    /// Executes a parsed `VelesQL` query.
347    ///
348    /// # Errors
349    ///
350    /// - Returns an error if the query references missing fields or execution fails.
351    pub fn execute_query(
352        &self,
353        query: &crate::velesql::Query,
354        params: &HashMap<String, serde_json::Value>,
355    ) -> Result<Vec<SearchResult>> {
356        self.inner.execute_query(query, params)
357    }
358
359    /// Sends a point into the streaming ingestion channel.
360    ///
361    /// Returns `Ok(())` on success (202 semantics). Returns
362    /// `BackpressureError::BufferFull` when the channel is at capacity, or
363    /// `BackpressureError::NotConfigured` if streaming is not active.
364    ///
365    /// # Errors
366    ///
367    /// Returns `BackpressureError` on buffer-full or not-configured.
368    #[cfg(feature = "persistence")]
369    pub fn stream_insert(
370        &self,
371        point: crate::point::Point,
372    ) -> std::result::Result<(), crate::collection::streaming::BackpressureError> {
373        self.inner.stream_insert(point)
374    }
375
376    /// Pushes `(id, vector)` entries into the delta buffer if it is active.
377    ///
378    /// No-op when the delta buffer is inactive. This is the public interface
379    /// used by streaming upsert handlers (e.g., NDJSON stream endpoint) to
380    /// keep the delta buffer in sync after a successful `upsert_bulk` call.
381    #[cfg(feature = "persistence")]
382    pub fn push_to_delta_if_active(&self, entries: &[(u64, Vec<f32>)]) {
383        self.inner.push_to_delta_if_active(entries);
384    }
385
386    /// Returns `true` if the delta buffer is currently active (HNSW rebuild
387    /// in progress). External callers can use this to decide whether to
388    /// snapshot entries for delta before a `upsert_bulk` call.
389    #[cfg(feature = "persistence")]
390    #[must_use]
391    pub fn is_delta_active(&self) -> bool {
392        self.inner.delta_buffer.is_active()
393    }
394
395    /// Executes a raw VelesQL string, parsing it before execution.
396    ///
397    /// # Errors
398    ///
399    /// - Returns an error if the SQL string cannot be parsed.
400    /// - Returns an error if query execution fails.
401    pub fn execute_query_str(
402        &self,
403        sql: &str,
404        params: &HashMap<String, serde_json::Value>,
405    ) -> Result<Vec<SearchResult>> {
406        self.inner.execute_query_str(sql, params)
407    }
408}