Skip to main content

velesdb_core/collection/
vector_collection.rs

1//! `VectorCollection`: newtype wrapper around `Collection` for vector workloads.
2//!
3//! This type provides a stable, typed API for vector collections.
4//! Internally it delegates 100% to the `Collection` executor to avoid
5//! any data synchronisation issues between separate storage layers.
6
7use std::collections::HashMap;
8use std::path::PathBuf;
9
10use crate::collection::types::{Collection, CollectionConfig};
11use crate::distance::DistanceMetric;
12use crate::error::Result;
13use crate::point::{Point, SearchResult};
14use crate::quantization::StorageMode;
15
16/// A vector collection combining HNSW search, payload storage, and full-text search.
17///
18/// `VectorCollection` is a typed newtype over `Collection` that provides
19/// a stable public API for vector workloads. All storage operations delegate
20/// to the single `inner: Collection` instance — no dual-storage desync.
21///
22/// # Examples
23///
24/// ```rust,no_run
25/// use velesdb_core::{VectorCollection, DistanceMetric, Point, StorageMode};
26/// use serde_json::json;
27///
28/// let coll = VectorCollection::create(
29///     "./data/docs".into(),
30///     "docs",
31///     768,
32///     DistanceMetric::Cosine,
33///     StorageMode::Full,
34/// )?;
35///
36/// coll.upsert(vec![
37///     Point::new(1, vec![0.1; 768], Some(json!({"title": "Hello"}))),
38/// ])?;
39///
40/// let results = coll.search(&vec![0.1; 768], 10)?;
41/// # Ok::<(), velesdb_core::Error>(())
42/// ```
43#[derive(Clone)]
44pub struct VectorCollection {
45    /// Single source of truth — all operations delegate here.
46    pub(crate) inner: Collection,
47}
48
49impl VectorCollection {
50    // -------------------------------------------------------------------------
51    // Lifecycle
52    // -------------------------------------------------------------------------
53
54    /// Creates a new `VectorCollection` at the given path.
55    ///
56    /// # Errors
57    ///
58    /// Returns an error if the directory cannot be created or storage fails.
59    pub fn create(
60        path: PathBuf,
61        _name: &str,
62        dimension: usize,
63        metric: DistanceMetric,
64        storage_mode: StorageMode,
65    ) -> Result<Self> {
66        Ok(Self {
67            inner: Collection::create_with_options(path, dimension, metric, storage_mode)?,
68        })
69    }
70
71    /// Opens an existing `VectorCollection` from disk.
72    ///
73    /// # Errors
74    ///
75    /// Returns an error if the config file cannot be read or storage cannot be opened.
76    pub fn open(path: PathBuf) -> Result<Self> {
77        Ok(Self {
78            inner: Collection::open(path)?,
79        })
80    }
81
82    /// Flushes all engines to disk and saves the config.
83    ///
84    /// # Errors
85    ///
86    /// Returns an error if any flush operation fails.
87    pub fn flush(&self) -> Result<()> {
88        self.inner.flush()
89    }
90
91    // -------------------------------------------------------------------------
92    // Collection metadata — all delegate to inner
93    // -------------------------------------------------------------------------
94
95    /// Returns the collection name.
96    #[must_use]
97    pub fn name(&self) -> String {
98        self.inner.config().name
99    }
100
101    /// Returns the vector dimension.
102    #[must_use]
103    pub fn dimension(&self) -> usize {
104        self.inner.config().dimension
105    }
106
107    /// Returns the distance metric.
108    #[must_use]
109    pub fn metric(&self) -> DistanceMetric {
110        self.inner.config().metric
111    }
112
113    /// Returns the storage mode.
114    #[must_use]
115    pub fn storage_mode(&self) -> StorageMode {
116        self.inner.config().storage_mode
117    }
118
119    /// Returns the number of points in the collection.
120    #[must_use]
121    pub fn len(&self) -> usize {
122        self.inner.len()
123    }
124
125    /// Returns `true` if the collection is empty.
126    #[must_use]
127    pub fn is_empty(&self) -> bool {
128        self.inner.is_empty()
129    }
130
131    /// Returns all point IDs.
132    #[must_use]
133    pub fn all_ids(&self) -> Vec<u64> {
134        self.inner.all_ids()
135    }
136
137    /// Returns the current collection config.
138    #[must_use]
139    pub fn config(&self) -> CollectionConfig {
140        self.inner.config()
141    }
142
143    // -------------------------------------------------------------------------
144    // CRUD — all delegate to inner
145    // -------------------------------------------------------------------------
146
147    /// Bulk insert optimized for high-throughput import.
148    ///
149    /// # Errors
150    ///
151    /// Returns an error if any point has a mismatched dimension.
152    pub fn upsert_bulk(&self, points: &[Point]) -> Result<usize> {
153        self.inner.upsert_bulk(points)
154    }
155
156    /// Inserts or updates points.
157    ///
158    /// # Errors
159    ///
160    /// Returns an error if the dimension mismatches or storage fails.
161    pub fn upsert(&self, points: impl IntoIterator<Item = Point>) -> Result<()> {
162        self.inner.upsert(points)
163    }
164
165    /// Retrieves points by IDs.
166    #[must_use]
167    pub fn get(&self, ids: &[u64]) -> Vec<Option<Point>> {
168        self.inner.get(ids)
169    }
170
171    /// Deletes points by IDs.
172    ///
173    /// # Errors
174    ///
175    /// Returns an error if storage operations fail.
176    pub fn delete(&self, ids: &[u64]) -> Result<()> {
177        self.inner.delete(ids)
178    }
179
180    // -------------------------------------------------------------------------
181    // Search — all delegate to inner
182    // -------------------------------------------------------------------------
183
184    /// Performs kNN vector search.
185    /// # Errors
186    pub fn search(&self, query: &[f32], k: usize) -> Result<Vec<SearchResult>> {
187        self.inner.search(query, k)
188    }
189
190    /// Performs full-text BM25 search.
191    #[must_use]
192    pub fn text_search(&self, query: &str, k: usize) -> Vec<SearchResult> {
193        self.inner.text_search(query, k)
194    }
195
196    /// kNN search with explicit ef_search override.
197    /// # Errors
198    pub fn search_with_ef(
199        &self,
200        query: &[f32],
201        k: usize,
202        ef_search: usize,
203    ) -> Result<Vec<SearchResult>> {
204        self.inner.search_with_ef(query, k, ef_search)
205    }
206
207    /// kNN search with metadata filter.
208    /// # Errors
209    pub fn search_with_filter(
210        &self,
211        query: &[f32],
212        k: usize,
213        filter: &crate::filter::Filter,
214    ) -> Result<Vec<SearchResult>> {
215        self.inner.search_with_filter(query, k, filter)
216    }
217
218    /// Returns `(id, score)` pairs without payload hydration.
219    /// # Errors
220    pub fn search_ids(&self, query: &[f32], k: usize) -> Result<Vec<(u64, f32)>> {
221        self.inner.search_ids(query, k)
222    }
223
224    /// Full-text search with metadata filter.
225    #[must_use]
226    pub fn text_search_with_filter(
227        &self,
228        query: &str,
229        k: usize,
230        filter: &crate::filter::Filter,
231    ) -> Vec<SearchResult> {
232        self.inner.text_search_with_filter(query, k, filter)
233    }
234
235    /// Hybrid search (vector + BM25 with RRF fusion).
236    /// # Errors
237    pub fn hybrid_search(
238        &self,
239        vector: &[f32],
240        text: &str,
241        k: usize,
242        alpha: Option<f32>,
243    ) -> Result<Vec<SearchResult>> {
244        self.inner.hybrid_search(vector, text, k, alpha)
245    }
246
247    /// Hybrid search with metadata filter.
248    /// # Errors
249    pub fn hybrid_search_with_filter(
250        &self,
251        vector: &[f32],
252        text: &str,
253        k: usize,
254        alpha: Option<f32>,
255        filter: &crate::filter::Filter,
256    ) -> Result<Vec<SearchResult>> {
257        self.inner
258            .hybrid_search_with_filter(vector, text, k, alpha, filter)
259    }
260
261    /// Batch search with per-query filters.
262    /// # Errors
263    pub fn search_batch_with_filters(
264        &self,
265        queries: &[&[f32]],
266        k: usize,
267        filters: &[Option<crate::filter::Filter>],
268    ) -> Result<Vec<Vec<SearchResult>>> {
269        self.inner.search_batch_with_filters(queries, k, filters)
270    }
271
272    /// Multi-query search (multiple vectors fused).
273    /// # Errors
274    pub fn multi_query_search(
275        &self,
276        queries: &[&[f32]],
277        k: usize,
278        strategy: crate::fusion::FusionStrategy,
279        filter: Option<&crate::filter::Filter>,
280    ) -> Result<Vec<SearchResult>> {
281        self.inner.multi_query_search(queries, k, strategy, filter)
282    }
283
284    /// Multi-query search returning only IDs and fused scores.
285    /// # Errors
286    pub fn multi_query_search_ids(
287        &self,
288        queries: &[&[f32]],
289        k: usize,
290        strategy: crate::fusion::FusionStrategy,
291    ) -> Result<Vec<(u64, f32)>> {
292        self.inner.multi_query_search_ids(queries, k, strategy)
293    }
294
295    // -------------------------------------------------------------------------
296    // Data mutations (metadata)
297    // -------------------------------------------------------------------------
298
299    /// Inserts or updates metadata-only points (no vectors).
300    /// # Errors
301    pub fn upsert_metadata(
302        &self,
303        points: impl IntoIterator<Item = crate::point::Point>,
304    ) -> Result<()> {
305        self.inner.upsert_metadata(points)
306    }
307
308    // -------------------------------------------------------------------------
309    // Index management
310    // -------------------------------------------------------------------------
311
312    /// Creates a secondary metadata index on a payload field.
313    /// # Errors
314    pub fn create_index(&self, field: &str) -> Result<()> {
315        self.inner.create_index(field)
316    }
317
318    /// Returns `true` if a secondary index exists on `field`.
319    #[must_use]
320    pub fn has_secondary_index(&self, field: &str) -> bool {
321        self.inner.has_secondary_index(field)
322    }
323
324    /// Creates a property index for O(1) equality lookups.
325    /// # Errors
326    pub fn create_property_index(&self, label: &str, property: &str) -> Result<()> {
327        self.inner.create_property_index(label, property)
328    }
329
330    /// Creates a range index for O(log n) range queries.
331    /// # Errors
332    pub fn create_range_index(&self, label: &str, property: &str) -> Result<()> {
333        self.inner.create_range_index(label, property)
334    }
335
336    /// Returns `true` if a property index exists.
337    #[must_use]
338    pub fn has_property_index(&self, label: &str, property: &str) -> bool {
339        self.inner.has_property_index(label, property)
340    }
341
342    /// Returns `true` if a range index exists.
343    #[must_use]
344    pub fn has_range_index(&self, label: &str, property: &str) -> bool {
345        self.inner.has_range_index(label, property)
346    }
347
348    /// Lists all index definitions on this collection.
349    #[must_use]
350    pub fn list_indexes(&self) -> Vec<crate::collection::IndexInfo> {
351        self.inner.list_indexes()
352    }
353
354    /// Drops an index. Returns `true` if an index was removed.
355    /// # Errors
356    pub fn drop_index(&self, label: &str, property: &str) -> Result<bool> {
357        self.inner.drop_index(label, property)
358    }
359
360    /// Returns total memory usage of all indexes in bytes.
361    #[must_use]
362    pub fn indexes_memory_usage(&self) -> usize {
363        self.inner.indexes_memory_usage()
364    }
365
366    // -------------------------------------------------------------------------
367    // Match (graph pattern)
368    // -------------------------------------------------------------------------
369
370    /// Executes a graph MATCH query.
371    /// # Errors
372    pub fn execute_match(
373        &self,
374        match_clause: &crate::velesql::MatchClause,
375        params: &std::collections::HashMap<String, serde_json::Value>,
376    ) -> crate::error::Result<Vec<crate::collection::search::query::match_exec::MatchResult>> {
377        self.inner.execute_match(match_clause, params)
378    }
379
380    /// Executes a MATCH query with vector similarity filtering.
381    /// # Errors
382    pub fn execute_match_with_similarity(
383        &self,
384        match_clause: &crate::velesql::MatchClause,
385        query_vector: &[f32],
386        threshold: f32,
387        params: &std::collections::HashMap<String, serde_json::Value>,
388    ) -> crate::error::Result<Vec<crate::collection::search::query::match_exec::MatchResult>> {
389        self.inner
390            .execute_match_with_similarity(match_clause, query_vector, threshold, params)
391    }
392
393    // -------------------------------------------------------------------------
394    // Aggregation
395    // -------------------------------------------------------------------------
396
397    /// Executes an aggregation query (GROUP BY / COUNT / SUM / AVG / MIN / MAX).
398    /// # Errors
399    pub fn execute_aggregate(
400        &self,
401        query: &crate::velesql::Query,
402        params: &std::collections::HashMap<String, serde_json::Value>,
403    ) -> Result<serde_json::Value> {
404        self.inner.execute_aggregate(query, params)
405    }
406
407    // -------------------------------------------------------------------------
408    // Statistics / misc
409    // -------------------------------------------------------------------------
410
411    /// Returns CBO statistics.
412    #[must_use]
413    pub fn get_stats(&self) -> crate::collection::stats::CollectionStats {
414        self.inner.get_stats()
415    }
416
417    /// Returns `true` if the collection is a metadata-only collection.
418    #[must_use]
419    pub fn is_metadata_only(&self) -> bool {
420        self.inner.is_metadata_only()
421    }
422
423    /// Analyzes the collection and returns fresh statistics.
424    /// # Errors
425    pub fn analyze(&self) -> Result<crate::collection::stats::CollectionStats> {
426        self.inner.analyze()
427    }
428
429    // -------------------------------------------------------------------------
430    // Sparse search
431    // -------------------------------------------------------------------------
432
433    /// Performs sparse-only search on the named index.
434    ///
435    /// # Errors
436    ///
437    /// Returns an error if the named sparse index does not exist.
438    pub fn sparse_search(
439        &self,
440        query: &crate::index::sparse::SparseVector,
441        k: usize,
442        index_name: &str,
443    ) -> Result<Vec<SearchResult>> {
444        let indexes = self.inner.sparse_indexes.read();
445        let index = indexes.get(index_name).ok_or_else(|| {
446            crate::error::Error::Config(format!(
447                "Sparse index '{}' not found",
448                if index_name.is_empty() {
449                    "<default>"
450                } else {
451                    index_name
452                }
453            ))
454        })?;
455        let results = crate::index::sparse::sparse_search(index, query, k);
456        drop(indexes);
457        Ok(self.inner.resolve_sparse_results(&results, k))
458    }
459
460    /// Performs hybrid dense+sparse search with RRF fusion.
461    ///
462    /// # Errors
463    ///
464    /// Returns an error if dense or sparse search fails, or fusion errors.
465    #[allow(clippy::too_many_arguments)]
466    pub fn hybrid_sparse_search(
467        &self,
468        dense_vector: &[f32],
469        sparse_query: &crate::index::sparse::SparseVector,
470        k: usize,
471        index_name: &str,
472        strategy: &crate::fusion::FusionStrategy,
473    ) -> Result<Vec<SearchResult>> {
474        let candidate_k = k.saturating_mul(2).max(k + 10);
475
476        let (dense_results, sparse_results) = self.inner.execute_both_branches(
477            dense_vector,
478            sparse_query,
479            index_name,
480            candidate_k,
481            None,
482        );
483
484        if dense_results.is_empty() && sparse_results.is_empty() {
485            return Ok(Vec::new());
486        }
487        if dense_results.is_empty() {
488            let scored: Vec<(u64, f32)> = sparse_results
489                .iter()
490                .map(|sd| (sd.doc_id, sd.score))
491                .collect();
492            return Ok(self.inner.resolve_fused_results(&scored, k));
493        }
494        if sparse_results.is_empty() {
495            return Ok(self.inner.resolve_fused_results(&dense_results, k));
496        }
497
498        let sparse_tuples: Vec<(u64, f32)> = sparse_results
499            .iter()
500            .map(|sd| (sd.doc_id, sd.score))
501            .collect();
502
503        let fused = strategy
504            .fuse(vec![dense_results, sparse_tuples])
505            .map_err(|e| crate::error::Error::Config(format!("Fusion error: {e}")))?;
506
507        Ok(self.inner.resolve_fused_results(&fused, k))
508    }
509
510    // -------------------------------------------------------------------------
511    // VelesQL
512    // -------------------------------------------------------------------------
513
514    /// Executes a `VelesQL` query.
515    /// # Errors
516    pub fn execute_query(
517        &self,
518        query: &crate::velesql::Query,
519        params: &HashMap<String, serde_json::Value>,
520    ) -> Result<Vec<SearchResult>> {
521        self.inner.execute_query(query, params)
522    }
523
524    /// Sends a point into the streaming ingestion channel.
525    ///
526    /// Returns `Ok(())` on success (202 semantics). Returns
527    /// `BackpressureError::BufferFull` when the channel is at capacity, or
528    /// `BackpressureError::NotConfigured` if streaming is not active.
529    ///
530    /// # Errors
531    ///
532    /// Returns `BackpressureError` on buffer-full or not-configured.
533    #[cfg(feature = "persistence")]
534    pub fn stream_insert(
535        &self,
536        point: crate::point::Point,
537    ) -> std::result::Result<(), crate::collection::streaming::BackpressureError> {
538        self.inner.stream_insert(point)
539    }
540
541    /// Pushes `(id, vector)` entries into the delta buffer if it is active.
542    ///
543    /// No-op when the delta buffer is inactive. This is the public interface
544    /// used by streaming upsert handlers (e.g., NDJSON stream endpoint) to
545    /// keep the delta buffer in sync after a successful `upsert_bulk` call.
546    #[cfg(feature = "persistence")]
547    pub fn push_to_delta_if_active(&self, entries: &[(u64, Vec<f32>)]) {
548        self.inner.push_to_delta_if_active(entries);
549    }
550
551    /// Returns `true` if the delta buffer is currently active (HNSW rebuild
552    /// in progress). External callers can use this to decide whether to
553    /// snapshot entries for delta before a `upsert_bulk` call.
554    #[cfg(feature = "persistence")]
555    #[must_use]
556    pub fn is_delta_active(&self) -> bool {
557        self.inner.delta_buffer.is_active()
558    }
559
560    /// Executes a raw VelesQL string.
561    /// # Errors
562    pub fn execute_query_str(
563        &self,
564        sql: &str,
565        params: &HashMap<String, serde_json::Value>,
566    ) -> Result<Vec<SearchResult>> {
567        let query = self
568            .inner
569            .query_cache
570            .parse(sql)
571            .map_err(|e| crate::error::Error::Query(e.to_string()))?;
572        self.inner.execute_query(&query, params)
573    }
574}