velesdb_core/collection/vector_collection.rs
1//! `VectorCollection`: newtype wrapper around `Collection` for vector workloads.
2//!
3//! This type provides a stable, typed API for vector collections.
4//! Internally it delegates 100% to the `Collection` executor to avoid
5//! any data synchronisation issues between separate storage layers.
6
7use std::collections::HashMap;
8use std::path::PathBuf;
9
10use crate::collection::types::{Collection, CollectionConfig};
11use crate::distance::DistanceMetric;
12use crate::error::Result;
13use crate::point::{Point, SearchResult};
14use crate::quantization::StorageMode;
15
16/// A vector collection combining HNSW search, payload storage, and full-text search.
17///
18/// `VectorCollection` is a typed newtype over `Collection` that provides
19/// a stable public API for vector workloads. All storage operations delegate
20/// to the single `inner: Collection` instance — no dual-storage desync.
21///
22/// # Examples
23///
24/// ```rust,no_run
25/// use velesdb_core::{VectorCollection, DistanceMetric, Point, StorageMode};
26/// use serde_json::json;
27///
28/// let coll = VectorCollection::create(
29/// "./data/docs".into(),
30/// "docs",
31/// 768,
32/// DistanceMetric::Cosine,
33/// StorageMode::Full,
34/// )?;
35///
36/// coll.upsert(vec![
37/// Point::new(1, vec![0.1; 768], Some(json!({"title": "Hello"}))),
38/// ])?;
39///
40/// let results = coll.search(&vec![0.1; 768], 10)?;
41/// # Ok::<(), velesdb_core::Error>(())
42/// ```
43#[derive(Clone)]
44pub struct VectorCollection {
45 /// Single source of truth — all operations delegate here.
46 pub(crate) inner: Collection,
47}
48
49impl VectorCollection {
50 // -------------------------------------------------------------------------
51 // Lifecycle
52 // -------------------------------------------------------------------------
53
54 /// Creates a new `VectorCollection` at the given path.
55 ///
56 /// # Errors
57 ///
58 /// Returns an error if the directory cannot be created or storage fails.
59 pub fn create(
60 path: PathBuf,
61 _name: &str,
62 dimension: usize,
63 metric: DistanceMetric,
64 storage_mode: StorageMode,
65 ) -> Result<Self> {
66 Ok(Self {
67 inner: Collection::create_with_options(path, dimension, metric, storage_mode)?,
68 })
69 }
70
71 /// Opens an existing `VectorCollection` from disk.
72 ///
73 /// # Errors
74 ///
75 /// Returns an error if the config file cannot be read or storage cannot be opened.
76 pub fn open(path: PathBuf) -> Result<Self> {
77 Ok(Self {
78 inner: Collection::open(path)?,
79 })
80 }
81
82 /// Flushes all engines to disk and saves the config.
83 ///
84 /// # Errors
85 ///
86 /// Returns an error if any flush operation fails.
87 pub fn flush(&self) -> Result<()> {
88 self.inner.flush()
89 }
90
91 // -------------------------------------------------------------------------
92 // Collection metadata — all delegate to inner
93 // -------------------------------------------------------------------------
94
95 /// Returns the collection name.
96 #[must_use]
97 pub fn name(&self) -> String {
98 self.inner.config().name
99 }
100
101 /// Returns the vector dimension.
102 #[must_use]
103 pub fn dimension(&self) -> usize {
104 self.inner.config().dimension
105 }
106
107 /// Returns the distance metric.
108 #[must_use]
109 pub fn metric(&self) -> DistanceMetric {
110 self.inner.config().metric
111 }
112
113 /// Returns the storage mode.
114 #[must_use]
115 pub fn storage_mode(&self) -> StorageMode {
116 self.inner.config().storage_mode
117 }
118
119 /// Returns the number of points in the collection.
120 #[must_use]
121 pub fn len(&self) -> usize {
122 self.inner.len()
123 }
124
125 /// Returns `true` if the collection is empty.
126 #[must_use]
127 pub fn is_empty(&self) -> bool {
128 self.inner.is_empty()
129 }
130
131 /// Returns all point IDs.
132 #[must_use]
133 pub fn all_ids(&self) -> Vec<u64> {
134 self.inner.all_ids()
135 }
136
137 /// Returns the current collection config.
138 #[must_use]
139 pub fn config(&self) -> CollectionConfig {
140 self.inner.config()
141 }
142
143 // -------------------------------------------------------------------------
144 // CRUD — all delegate to inner
145 // -------------------------------------------------------------------------
146
147 /// Bulk insert optimized for high-throughput import.
148 ///
149 /// # Errors
150 ///
151 /// Returns an error if any point has a mismatched dimension.
152 pub fn upsert_bulk(&self, points: &[Point]) -> Result<usize> {
153 self.inner.upsert_bulk(points)
154 }
155
156 /// Inserts or updates points.
157 ///
158 /// # Errors
159 ///
160 /// Returns an error if the dimension mismatches or storage fails.
161 pub fn upsert(&self, points: impl IntoIterator<Item = Point>) -> Result<()> {
162 self.inner.upsert(points)
163 }
164
165 /// Retrieves points by IDs.
166 #[must_use]
167 pub fn get(&self, ids: &[u64]) -> Vec<Option<Point>> {
168 self.inner.get(ids)
169 }
170
171 /// Deletes points by IDs.
172 ///
173 /// # Errors
174 ///
175 /// Returns an error if storage operations fail.
176 pub fn delete(&self, ids: &[u64]) -> Result<()> {
177 self.inner.delete(ids)
178 }
179
180 // -------------------------------------------------------------------------
181 // Search — all delegate to inner
182 // -------------------------------------------------------------------------
183
184 /// Performs kNN vector search.
185 /// # Errors
186 pub fn search(&self, query: &[f32], k: usize) -> Result<Vec<SearchResult>> {
187 self.inner.search(query, k)
188 }
189
190 /// Performs full-text BM25 search.
191 #[must_use]
192 pub fn text_search(&self, query: &str, k: usize) -> Vec<SearchResult> {
193 self.inner.text_search(query, k)
194 }
195
196 /// kNN search with explicit ef_search override.
197 /// # Errors
198 pub fn search_with_ef(
199 &self,
200 query: &[f32],
201 k: usize,
202 ef_search: usize,
203 ) -> Result<Vec<SearchResult>> {
204 self.inner.search_with_ef(query, k, ef_search)
205 }
206
207 /// kNN search with metadata filter.
208 /// # Errors
209 pub fn search_with_filter(
210 &self,
211 query: &[f32],
212 k: usize,
213 filter: &crate::filter::Filter,
214 ) -> Result<Vec<SearchResult>> {
215 self.inner.search_with_filter(query, k, filter)
216 }
217
218 /// Returns `(id, score)` pairs without payload hydration.
219 /// # Errors
220 pub fn search_ids(&self, query: &[f32], k: usize) -> Result<Vec<(u64, f32)>> {
221 self.inner.search_ids(query, k)
222 }
223
224 /// Full-text search with metadata filter.
225 #[must_use]
226 pub fn text_search_with_filter(
227 &self,
228 query: &str,
229 k: usize,
230 filter: &crate::filter::Filter,
231 ) -> Vec<SearchResult> {
232 self.inner.text_search_with_filter(query, k, filter)
233 }
234
235 /// Hybrid search (vector + BM25 with RRF fusion).
236 /// # Errors
237 pub fn hybrid_search(
238 &self,
239 vector: &[f32],
240 text: &str,
241 k: usize,
242 alpha: Option<f32>,
243 ) -> Result<Vec<SearchResult>> {
244 self.inner.hybrid_search(vector, text, k, alpha)
245 }
246
247 /// Hybrid search with metadata filter.
248 /// # Errors
249 pub fn hybrid_search_with_filter(
250 &self,
251 vector: &[f32],
252 text: &str,
253 k: usize,
254 alpha: Option<f32>,
255 filter: &crate::filter::Filter,
256 ) -> Result<Vec<SearchResult>> {
257 self.inner
258 .hybrid_search_with_filter(vector, text, k, alpha, filter)
259 }
260
261 /// Batch search with per-query filters.
262 /// # Errors
263 pub fn search_batch_with_filters(
264 &self,
265 queries: &[&[f32]],
266 k: usize,
267 filters: &[Option<crate::filter::Filter>],
268 ) -> Result<Vec<Vec<SearchResult>>> {
269 self.inner.search_batch_with_filters(queries, k, filters)
270 }
271
272 /// Multi-query search (multiple vectors fused).
273 /// # Errors
274 pub fn multi_query_search(
275 &self,
276 queries: &[&[f32]],
277 k: usize,
278 strategy: crate::fusion::FusionStrategy,
279 filter: Option<&crate::filter::Filter>,
280 ) -> Result<Vec<SearchResult>> {
281 self.inner.multi_query_search(queries, k, strategy, filter)
282 }
283
284 /// Multi-query search returning only IDs and fused scores.
285 /// # Errors
286 pub fn multi_query_search_ids(
287 &self,
288 queries: &[&[f32]],
289 k: usize,
290 strategy: crate::fusion::FusionStrategy,
291 ) -> Result<Vec<(u64, f32)>> {
292 self.inner.multi_query_search_ids(queries, k, strategy)
293 }
294
295 // -------------------------------------------------------------------------
296 // Data mutations (metadata)
297 // -------------------------------------------------------------------------
298
299 /// Inserts or updates metadata-only points (no vectors).
300 /// # Errors
301 pub fn upsert_metadata(
302 &self,
303 points: impl IntoIterator<Item = crate::point::Point>,
304 ) -> Result<()> {
305 self.inner.upsert_metadata(points)
306 }
307
308 // -------------------------------------------------------------------------
309 // Index management
310 // -------------------------------------------------------------------------
311
312 /// Creates a secondary metadata index on a payload field.
313 /// # Errors
314 pub fn create_index(&self, field: &str) -> Result<()> {
315 self.inner.create_index(field)
316 }
317
318 /// Returns `true` if a secondary index exists on `field`.
319 #[must_use]
320 pub fn has_secondary_index(&self, field: &str) -> bool {
321 self.inner.has_secondary_index(field)
322 }
323
324 /// Creates a property index for O(1) equality lookups.
325 /// # Errors
326 pub fn create_property_index(&self, label: &str, property: &str) -> Result<()> {
327 self.inner.create_property_index(label, property)
328 }
329
330 /// Creates a range index for O(log n) range queries.
331 /// # Errors
332 pub fn create_range_index(&self, label: &str, property: &str) -> Result<()> {
333 self.inner.create_range_index(label, property)
334 }
335
336 /// Returns `true` if a property index exists.
337 #[must_use]
338 pub fn has_property_index(&self, label: &str, property: &str) -> bool {
339 self.inner.has_property_index(label, property)
340 }
341
342 /// Returns `true` if a range index exists.
343 #[must_use]
344 pub fn has_range_index(&self, label: &str, property: &str) -> bool {
345 self.inner.has_range_index(label, property)
346 }
347
348 /// Lists all index definitions on this collection.
349 #[must_use]
350 pub fn list_indexes(&self) -> Vec<crate::collection::IndexInfo> {
351 self.inner.list_indexes()
352 }
353
354 /// Drops an index. Returns `true` if an index was removed.
355 /// # Errors
356 pub fn drop_index(&self, label: &str, property: &str) -> Result<bool> {
357 self.inner.drop_index(label, property)
358 }
359
360 /// Returns total memory usage of all indexes in bytes.
361 #[must_use]
362 pub fn indexes_memory_usage(&self) -> usize {
363 self.inner.indexes_memory_usage()
364 }
365
366 // -------------------------------------------------------------------------
367 // Match (graph pattern)
368 // -------------------------------------------------------------------------
369
370 /// Executes a graph MATCH query.
371 /// # Errors
372 pub fn execute_match(
373 &self,
374 match_clause: &crate::velesql::MatchClause,
375 params: &std::collections::HashMap<String, serde_json::Value>,
376 ) -> crate::error::Result<Vec<crate::collection::search::query::match_exec::MatchResult>> {
377 self.inner.execute_match(match_clause, params)
378 }
379
380 /// Executes a MATCH query with vector similarity filtering.
381 /// # Errors
382 pub fn execute_match_with_similarity(
383 &self,
384 match_clause: &crate::velesql::MatchClause,
385 query_vector: &[f32],
386 threshold: f32,
387 params: &std::collections::HashMap<String, serde_json::Value>,
388 ) -> crate::error::Result<Vec<crate::collection::search::query::match_exec::MatchResult>> {
389 self.inner
390 .execute_match_with_similarity(match_clause, query_vector, threshold, params)
391 }
392
393 // -------------------------------------------------------------------------
394 // Aggregation
395 // -------------------------------------------------------------------------
396
397 /// Executes an aggregation query (GROUP BY / COUNT / SUM / AVG / MIN / MAX).
398 /// # Errors
399 pub fn execute_aggregate(
400 &self,
401 query: &crate::velesql::Query,
402 params: &std::collections::HashMap<String, serde_json::Value>,
403 ) -> Result<serde_json::Value> {
404 self.inner.execute_aggregate(query, params)
405 }
406
407 // -------------------------------------------------------------------------
408 // Statistics / misc
409 // -------------------------------------------------------------------------
410
411 /// Returns CBO statistics.
412 #[must_use]
413 pub fn get_stats(&self) -> crate::collection::stats::CollectionStats {
414 self.inner.get_stats()
415 }
416
417 /// Returns `true` if the collection is a metadata-only collection.
418 #[must_use]
419 pub fn is_metadata_only(&self) -> bool {
420 self.inner.is_metadata_only()
421 }
422
423 /// Analyzes the collection and returns fresh statistics.
424 /// # Errors
425 pub fn analyze(&self) -> Result<crate::collection::stats::CollectionStats> {
426 self.inner.analyze()
427 }
428
429 // -------------------------------------------------------------------------
430 // Sparse search
431 // -------------------------------------------------------------------------
432
433 /// Performs sparse-only search on the named index.
434 ///
435 /// # Errors
436 ///
437 /// Returns an error if the named sparse index does not exist.
438 pub fn sparse_search(
439 &self,
440 query: &crate::index::sparse::SparseVector,
441 k: usize,
442 index_name: &str,
443 ) -> Result<Vec<SearchResult>> {
444 let indexes = self.inner.sparse_indexes.read();
445 let index = indexes.get(index_name).ok_or_else(|| {
446 crate::error::Error::Config(format!(
447 "Sparse index '{}' not found",
448 if index_name.is_empty() {
449 "<default>"
450 } else {
451 index_name
452 }
453 ))
454 })?;
455 let results = crate::index::sparse::sparse_search(index, query, k);
456 drop(indexes);
457 Ok(self.inner.resolve_sparse_results(&results, k))
458 }
459
460 /// Performs hybrid dense+sparse search with RRF fusion.
461 ///
462 /// # Errors
463 ///
464 /// Returns an error if dense or sparse search fails, or fusion errors.
465 #[allow(clippy::too_many_arguments)]
466 pub fn hybrid_sparse_search(
467 &self,
468 dense_vector: &[f32],
469 sparse_query: &crate::index::sparse::SparseVector,
470 k: usize,
471 index_name: &str,
472 strategy: &crate::fusion::FusionStrategy,
473 ) -> Result<Vec<SearchResult>> {
474 let candidate_k = k.saturating_mul(2).max(k + 10);
475
476 let (dense_results, sparse_results) = self.inner.execute_both_branches(
477 dense_vector,
478 sparse_query,
479 index_name,
480 candidate_k,
481 None,
482 );
483
484 if dense_results.is_empty() && sparse_results.is_empty() {
485 return Ok(Vec::new());
486 }
487 if dense_results.is_empty() {
488 let scored: Vec<(u64, f32)> = sparse_results
489 .iter()
490 .map(|sd| (sd.doc_id, sd.score))
491 .collect();
492 return Ok(self.inner.resolve_fused_results(&scored, k));
493 }
494 if sparse_results.is_empty() {
495 return Ok(self.inner.resolve_fused_results(&dense_results, k));
496 }
497
498 let sparse_tuples: Vec<(u64, f32)> = sparse_results
499 .iter()
500 .map(|sd| (sd.doc_id, sd.score))
501 .collect();
502
503 let fused = strategy
504 .fuse(vec![dense_results, sparse_tuples])
505 .map_err(|e| crate::error::Error::Config(format!("Fusion error: {e}")))?;
506
507 Ok(self.inner.resolve_fused_results(&fused, k))
508 }
509
510 // -------------------------------------------------------------------------
511 // VelesQL
512 // -------------------------------------------------------------------------
513
514 /// Executes a `VelesQL` query.
515 /// # Errors
516 pub fn execute_query(
517 &self,
518 query: &crate::velesql::Query,
519 params: &HashMap<String, serde_json::Value>,
520 ) -> Result<Vec<SearchResult>> {
521 self.inner.execute_query(query, params)
522 }
523
524 /// Sends a point into the streaming ingestion channel.
525 ///
526 /// Returns `Ok(())` on success (202 semantics). Returns
527 /// `BackpressureError::BufferFull` when the channel is at capacity, or
528 /// `BackpressureError::NotConfigured` if streaming is not active.
529 ///
530 /// # Errors
531 ///
532 /// Returns `BackpressureError` on buffer-full or not-configured.
533 #[cfg(feature = "persistence")]
534 pub fn stream_insert(
535 &self,
536 point: crate::point::Point,
537 ) -> std::result::Result<(), crate::collection::streaming::BackpressureError> {
538 self.inner.stream_insert(point)
539 }
540
541 /// Pushes `(id, vector)` entries into the delta buffer if it is active.
542 ///
543 /// No-op when the delta buffer is inactive. This is the public interface
544 /// used by streaming upsert handlers (e.g., NDJSON stream endpoint) to
545 /// keep the delta buffer in sync after a successful `upsert_bulk` call.
546 #[cfg(feature = "persistence")]
547 pub fn push_to_delta_if_active(&self, entries: &[(u64, Vec<f32>)]) {
548 self.inner.push_to_delta_if_active(entries);
549 }
550
551 /// Returns `true` if the delta buffer is currently active (HNSW rebuild
552 /// in progress). External callers can use this to decide whether to
553 /// snapshot entries for delta before a `upsert_bulk` call.
554 #[cfg(feature = "persistence")]
555 #[must_use]
556 pub fn is_delta_active(&self) -> bool {
557 self.inner.delta_buffer.is_active()
558 }
559
560 /// Executes a raw VelesQL string.
561 /// # Errors
562 pub fn execute_query_str(
563 &self,
564 sql: &str,
565 params: &HashMap<String, serde_json::Value>,
566 ) -> Result<Vec<SearchResult>> {
567 let query = self
568 .inner
569 .query_cache
570 .parse(sql)
571 .map_err(|e| crate::error::Error::Query(e.to_string()))?;
572 self.inner.execute_query(&query, params)
573 }
574}