velesdb_core/
lib.rs

1//! # `VelesDB` Core
2//!
3//! High-performance vector database engine written in Rust.
4//!
5//! `VelesDB` is a local-first vector database designed for semantic search,
6//! recommendation systems, and RAG (Retrieval-Augmented Generation) applications.
7//!
8//! ## Features
9//!
10//! - **Blazing Fast**: HNSW index with explicit SIMD (4x faster)
11//! - **5 Distance Metrics**: Cosine, Euclidean, Dot Product, Hamming, Jaccard
12//! - **Hybrid Search**: Vector + BM25 full-text with RRF fusion
13//! - **Quantization**: SQ8 (4x) and Binary (32x) memory compression
14//! - **Persistent Storage**: Memory-mapped files for efficient disk access
15//!
16//! ## Quick Start
17//!
18//! ```rust,ignore
19//! use velesdb_core::{Database, DistanceMetric, Point, StorageMode};
20//! use serde_json::json;
21//!
22//! // Create a new database
23//! let db = Database::open("./data")?;
24//!
25//! // Create a collection (all 5 metrics available)
26//! db.create_collection("documents", 768, DistanceMetric::Cosine)?;
27//! // Or with quantization: DistanceMetric::Hamming + StorageMode::Binary
28//!
29//! let collection = db.get_collection("documents").unwrap();
30//!
31//! // Insert vectors (upsert takes ownership)
32//! collection.upsert(vec![
33//!     Point::new(1, vec![0.1; 768], Some(json!({"title": "Hello World"}))),
34//! ])?;
35//!
36//! // Search for similar vectors
37//! let results = collection.search(&query_vector, 10)?;
38//!
39//! // Hybrid search (vector + text)
40//! let hybrid = collection.hybrid_search(&query_vector, "hello", 5, Some(0.7))?;
41//! ```
42
43#![warn(missing_docs)]
44#![warn(clippy::all)]
45#![warn(clippy::pedantic)]
46#![allow(clippy::module_name_repetitions)]
47// SOTA 2026 performance code - allow common numeric casts
48#![allow(clippy::cast_possible_truncation)]
49#![allow(clippy::cast_precision_loss)]
50#![allow(clippy::cast_possible_wrap)]
51#![allow(clippy::cast_sign_loss)]
52#![allow(clippy::cast_lossless)]
53// Documentation style preferences
54#![allow(clippy::doc_markdown)]
55// Code style preferences
56#![allow(clippy::single_match_else)]
57#![allow(clippy::manual_let_else)]
58#![allow(clippy::unused_self)]
59#![allow(clippy::uninlined_format_args)]
60#![allow(clippy::wildcard_imports)]
61#![allow(clippy::ptr_as_ptr)]
62#![allow(clippy::implicit_hasher)]
63#![allow(clippy::unnecessary_cast)]
64#![allow(clippy::collapsible_if)]
65#![allow(clippy::used_underscore_binding)]
66#![allow(clippy::manual_assert)]
67#![allow(clippy::assertions_on_constants)]
68#![allow(clippy::missing_errors_doc)]
69#![allow(clippy::unused_async)]
70
71pub mod alloc_guard;
72#[cfg(test)]
73mod alloc_guard_tests;
74pub mod cache;
75pub mod collection;
76pub mod column_store;
77#[cfg(test)]
78mod column_store_tests;
79pub mod compression;
80pub mod config;
81#[cfg(test)]
82mod config_tests;
83pub mod distance;
84#[cfg(test)]
85mod distance_tests;
86pub mod error;
87#[cfg(test)]
88mod error_tests;
89pub mod filter;
90#[cfg(test)]
91mod filter_like_tests;
92#[cfg(test)]
93mod filter_tests;
94pub mod fusion;
95pub mod gpu;
96#[cfg(test)]
97mod gpu_tests;
98pub mod half_precision;
99#[cfg(test)]
100mod half_precision_tests;
101pub mod index;
102pub mod metrics;
103#[cfg(test)]
104mod metrics_tests;
105pub mod perf_optimizations;
106pub mod point;
107#[cfg(test)]
108mod point_tests;
109pub mod quantization;
110#[cfg(test)]
111mod quantization_tests;
112pub mod simd;
113pub mod simd_avx512;
114#[cfg(test)]
115mod simd_avx512_tests;
116pub mod simd_dispatch;
117pub mod simd_explicit;
118#[cfg(test)]
119mod simd_explicit_tests;
120pub mod simd_native;
121pub mod storage;
122pub mod vector_ref;
123pub mod velesql;
124
125pub use index::{HnswIndex, HnswParams, SearchQuality, VectorIndex};
126
127pub use collection::{Collection, CollectionType};
128pub use distance::DistanceMetric;
129pub use error::{Error, Result};
130pub use filter::{Condition, Filter};
131pub use point::{Point, SearchResult};
132pub use quantization::{
133    cosine_similarity_quantized, cosine_similarity_quantized_simd, dot_product_quantized,
134    dot_product_quantized_simd, euclidean_squared_quantized, euclidean_squared_quantized_simd,
135    BinaryQuantizedVector, QuantizedVector, StorageMode,
136};
137
138pub use column_store::{ColumnStore, ColumnType, ColumnValue, StringId, StringTable, TypedColumn};
139pub use config::{
140    ConfigError, HnswConfig, LimitsConfig, LoggingConfig, QuantizationConfig, SearchConfig,
141    SearchMode, ServerConfig, StorageConfig, VelesConfig,
142};
143pub use fusion::{FusionError, FusionStrategy};
144pub use metrics::{
145    average_metrics, compute_latency_percentiles, hit_rate, mean_average_precision, mrr, ndcg_at_k,
146    precision_at_k, recall_at_k, LatencyStats,
147};
148
149/// Database instance managing collections and storage.
150pub struct Database {
151    /// Path to the data directory
152    data_dir: std::path::PathBuf,
153    /// Collections managed by this database
154    collections: parking_lot::RwLock<std::collections::HashMap<String, Collection>>,
155}
156
157impl Database {
158    /// Opens or creates a database at the specified path.
159    ///
160    /// # Arguments
161    ///
162    /// * `path` - Path to the data directory
163    ///
164    /// # Errors
165    ///
166    /// Returns an error if the directory cannot be created or accessed.
167    pub fn open<P: AsRef<std::path::Path>>(path: P) -> Result<Self> {
168        let data_dir = path.as_ref().to_path_buf();
169        std::fs::create_dir_all(&data_dir)?;
170
171        Ok(Self {
172            data_dir,
173            collections: parking_lot::RwLock::new(std::collections::HashMap::new()),
174        })
175    }
176
177    /// Creates a new collection with the specified parameters.
178    ///
179    /// # Arguments
180    ///
181    /// * `name` - Unique name for the collection
182    /// * `dimension` - Vector dimension (e.g., 768 for many embedding models)
183    /// * `metric` - Distance metric to use for similarity calculations
184    ///
185    /// # Errors
186    ///
187    /// Returns an error if a collection with the same name already exists.
188    pub fn create_collection(
189        &self,
190        name: &str,
191        dimension: usize,
192        metric: DistanceMetric,
193    ) -> Result<()> {
194        self.create_collection_with_options(name, dimension, metric, StorageMode::default())
195    }
196
197    /// Creates a new collection with custom storage options.
198    ///
199    /// # Arguments
200    ///
201    /// * `name` - Unique name for the collection
202    /// * `dimension` - Vector dimension
203    /// * `metric` - Distance metric
204    /// * `storage_mode` - Vector storage mode (Full, SQ8, Binary)
205    ///
206    /// # Errors
207    ///
208    /// Returns an error if a collection with the same name already exists.
209    pub fn create_collection_with_options(
210        &self,
211        name: &str,
212        dimension: usize,
213        metric: DistanceMetric,
214        storage_mode: StorageMode,
215    ) -> Result<()> {
216        let mut collections = self.collections.write();
217
218        if collections.contains_key(name) {
219            return Err(Error::CollectionExists(name.to_string()));
220        }
221
222        let collection_path = self.data_dir.join(name);
223        let collection =
224            Collection::create_with_options(collection_path, dimension, metric, storage_mode)?;
225        collections.insert(name.to_string(), collection);
226
227        Ok(())
228    }
229
230    /// Gets a reference to a collection by name.
231    ///
232    /// # Arguments
233    ///
234    /// * `name` - Name of the collection
235    ///
236    /// # Returns
237    ///
238    /// Returns `None` if the collection does not exist.
239    pub fn get_collection(&self, name: &str) -> Option<Collection> {
240        self.collections.read().get(name).cloned()
241    }
242
243    /// Lists all collection names in the database.
244    pub fn list_collections(&self) -> Vec<String> {
245        self.collections.read().keys().cloned().collect()
246    }
247
248    /// Deletes a collection by name.
249    ///
250    /// # Arguments
251    ///
252    /// * `name` - Name of the collection to delete
253    ///
254    /// # Errors
255    ///
256    /// Returns an error if the collection does not exist.
257    pub fn delete_collection(&self, name: &str) -> Result<()> {
258        let mut collections = self.collections.write();
259
260        if collections.remove(name).is_none() {
261            return Err(Error::CollectionNotFound(name.to_string()));
262        }
263
264        let collection_path = self.data_dir.join(name);
265        if collection_path.exists() {
266            std::fs::remove_dir_all(collection_path)?;
267        }
268
269        Ok(())
270    }
271
272    /// Creates a new collection with a specific type (Vector or `MetadataOnly`).
273    ///
274    /// # Arguments
275    ///
276    /// * `name` - Unique name for the collection
277    /// * `collection_type` - Type of collection to create
278    ///
279    /// # Errors
280    ///
281    /// Returns an error if a collection with the same name already exists.
282    ///
283    /// # Examples
284    ///
285    /// ```rust,ignore
286    /// use velesdb_core::{Database, CollectionType, DistanceMetric, StorageMode};
287    ///
288    /// let db = Database::open("./data")?;
289    ///
290    /// // Create a metadata-only collection
291    /// db.create_collection_typed("products", CollectionType::MetadataOnly)?;
292    ///
293    /// // Create a vector collection
294    /// db.create_collection_typed("embeddings", CollectionType::Vector {
295    ///     dimension: 768,
296    ///     metric: DistanceMetric::Cosine,
297    ///     storage_mode: StorageMode::Full,
298    /// })?;
299    /// ```
300    pub fn create_collection_typed(
301        &self,
302        name: &str,
303        collection_type: &CollectionType,
304    ) -> Result<()> {
305        let mut collections = self.collections.write();
306
307        if collections.contains_key(name) {
308            return Err(Error::CollectionExists(name.to_string()));
309        }
310
311        let collection_path = self.data_dir.join(name);
312        let collection = Collection::create_typed(collection_path, name, collection_type)?;
313        collections.insert(name.to_string(), collection);
314
315        Ok(())
316    }
317
318    /// Loads existing collections from disk.
319    ///
320    /// Call this after opening a database to load previously created collections.
321    ///
322    /// # Errors
323    ///
324    /// Returns an error if collection directories cannot be read.
325    pub fn load_collections(&self) -> Result<()> {
326        let mut collections = self.collections.write();
327
328        for entry in std::fs::read_dir(&self.data_dir)? {
329            let entry = entry?;
330            let path = entry.path();
331
332            if path.is_dir() {
333                let config_path = path.join("config.json");
334                if config_path.exists() {
335                    let name = path
336                        .file_name()
337                        .and_then(|n| n.to_str())
338                        .unwrap_or("unknown")
339                        .to_string();
340
341                    if let std::collections::hash_map::Entry::Vacant(entry) =
342                        collections.entry(name)
343                    {
344                        match Collection::open(path) {
345                            Ok(collection) => {
346                                entry.insert(collection);
347                            }
348                            Err(err) => {
349                                eprintln!("Warning: Failed to load collection: {err}");
350                            }
351                        }
352                    }
353                }
354            }
355        }
356
357        Ok(())
358    }
359}
360
361#[cfg(test)]
362mod tests {
363    use super::*;
364    use tempfile::tempdir;
365
366    #[test]
367    fn test_database_open() {
368        let dir = tempdir().unwrap();
369        let db = Database::open(dir.path()).unwrap();
370        assert!(db.list_collections().is_empty());
371    }
372
373    #[test]
374    fn test_create_collection() {
375        let dir = tempdir().unwrap();
376        let db = Database::open(dir.path()).unwrap();
377
378        db.create_collection("test", 768, DistanceMetric::Cosine)
379            .unwrap();
380
381        assert_eq!(db.list_collections(), vec!["test"]);
382    }
383
384    #[test]
385    fn test_duplicate_collection_error() {
386        let dir = tempdir().unwrap();
387        let db = Database::open(dir.path()).unwrap();
388
389        db.create_collection("test", 768, DistanceMetric::Cosine)
390            .unwrap();
391
392        let result = db.create_collection("test", 768, DistanceMetric::Cosine);
393        assert!(result.is_err());
394    }
395
396    #[test]
397    fn test_get_collection() {
398        let dir = tempdir().unwrap();
399        let db = Database::open(dir.path()).unwrap();
400
401        // Non-existent collection returns None
402        assert!(db.get_collection("nonexistent").is_none());
403
404        // Create and retrieve collection
405        db.create_collection("test", 768, DistanceMetric::Cosine)
406            .unwrap();
407
408        let collection = db.get_collection("test");
409        assert!(collection.is_some());
410
411        let config = collection.unwrap().config();
412        assert_eq!(config.dimension, 768);
413        assert_eq!(config.metric, DistanceMetric::Cosine);
414    }
415
416    #[test]
417    fn test_delete_collection() {
418        let dir = tempdir().unwrap();
419        let db = Database::open(dir.path()).unwrap();
420
421        db.create_collection("to_delete", 768, DistanceMetric::Cosine)
422            .unwrap();
423        assert_eq!(db.list_collections().len(), 1);
424
425        // Delete the collection
426        db.delete_collection("to_delete").unwrap();
427        assert!(db.list_collections().is_empty());
428        assert!(db.get_collection("to_delete").is_none());
429    }
430
431    #[test]
432    fn test_delete_nonexistent_collection() {
433        let dir = tempdir().unwrap();
434        let db = Database::open(dir.path()).unwrap();
435
436        let result = db.delete_collection("nonexistent");
437        assert!(result.is_err());
438    }
439
440    #[test]
441    fn test_multiple_collections() {
442        let dir = tempdir().unwrap();
443        let db = Database::open(dir.path()).unwrap();
444
445        db.create_collection("coll1", 128, DistanceMetric::Cosine)
446            .unwrap();
447        db.create_collection("coll2", 256, DistanceMetric::Euclidean)
448            .unwrap();
449        db.create_collection("coll3", 768, DistanceMetric::DotProduct)
450            .unwrap();
451
452        let collections = db.list_collections();
453        assert_eq!(collections.len(), 3);
454        assert!(collections.contains(&"coll1".to_string()));
455        assert!(collections.contains(&"coll2".to_string()));
456        assert!(collections.contains(&"coll3".to_string()));
457    }
458}