velesdb_core/
lib.rs

1//! # `VelesDB` Core
2//!
3//! High-performance vector database engine written in Rust.
4//!
5//! `VelesDB` is a local-first vector database designed for semantic search,
6//! recommendation systems, and RAG (Retrieval-Augmented Generation) applications.
7//!
8//! ## Features
9//!
10//! - **Blazing Fast**: HNSW index with explicit SIMD (4x faster)
11//! - **5 Distance Metrics**: Cosine, Euclidean, Dot Product, Hamming, Jaccard
12//! - **Hybrid Search**: Vector + BM25 full-text with RRF fusion
13//! - **Quantization**: SQ8 (4x) and Binary (32x) memory compression
14//! - **Persistent Storage**: Memory-mapped files for efficient disk access
15//!
16//! ## Quick Start
17//!
18//! ```rust,ignore
19//! use velesdb_core::{Database, DistanceMetric, Point, StorageMode};
20//! use serde_json::json;
21//!
22//! // Create a new database
23//! let db = Database::open("./data")?;
24//!
25//! // Create a collection (all 5 metrics available)
26//! db.create_collection("documents", 768, DistanceMetric::Cosine)?;
27//! // Or with quantization: DistanceMetric::Hamming + StorageMode::Binary
28//!
29//! let collection = db.get_collection("documents").unwrap();
30//!
31//! // Insert vectors (upsert takes ownership)
32//! collection.upsert(vec![
33//!     Point::new(1, vec![0.1; 768], Some(json!({"title": "Hello World"}))),
34//! ])?;
35//!
36//! // Search for similar vectors
37//! let results = collection.search(&query_vector, 10)?;
38//!
39//! // Hybrid search (vector + text)
40//! let hybrid = collection.hybrid_search(&query_vector, "hello", 5, Some(0.7))?;
41//! ```
42
43#![warn(missing_docs)]
44#![warn(clippy::all)]
45#![warn(clippy::pedantic)]
46#![allow(clippy::module_name_repetitions)]
47
48pub mod collection;
49pub mod column_store;
50pub mod distance;
51pub mod error;
52pub mod filter;
53pub mod half_precision;
54pub mod index;
55pub mod metrics;
56pub mod perf_optimizations;
57pub mod point;
58pub mod quantization;
59pub mod simd;
60pub mod simd_avx512;
61pub mod simd_explicit;
62pub mod storage;
63pub mod velesql;
64
65pub use index::{HnswIndex, HnswParams, SearchQuality, VectorIndex};
66
67pub use collection::Collection;
68pub use distance::DistanceMetric;
69pub use error::{Error, Result};
70pub use filter::{Condition, Filter};
71pub use point::Point;
72pub use quantization::{
73    cosine_similarity_quantized, cosine_similarity_quantized_simd, dot_product_quantized,
74    dot_product_quantized_simd, euclidean_squared_quantized, euclidean_squared_quantized_simd,
75    BinaryQuantizedVector, QuantizedVector, StorageMode,
76};
77
78pub use column_store::{ColumnStore, ColumnType, ColumnValue, StringId, StringTable, TypedColumn};
79pub use metrics::{
80    average_metrics, compute_latency_percentiles, hit_rate, mean_average_precision, mrr, ndcg_at_k,
81    precision_at_k, recall_at_k, LatencyStats,
82};
83
84/// Database instance managing collections and storage.
85pub struct Database {
86    /// Path to the data directory
87    data_dir: std::path::PathBuf,
88    /// Collections managed by this database
89    collections: parking_lot::RwLock<std::collections::HashMap<String, Collection>>,
90}
91
92impl Database {
93    /// Opens or creates a database at the specified path.
94    ///
95    /// # Arguments
96    ///
97    /// * `path` - Path to the data directory
98    ///
99    /// # Errors
100    ///
101    /// Returns an error if the directory cannot be created or accessed.
102    pub fn open<P: AsRef<std::path::Path>>(path: P) -> Result<Self> {
103        let data_dir = path.as_ref().to_path_buf();
104        std::fs::create_dir_all(&data_dir)?;
105
106        Ok(Self {
107            data_dir,
108            collections: parking_lot::RwLock::new(std::collections::HashMap::new()),
109        })
110    }
111
112    /// Creates a new collection with the specified parameters.
113    ///
114    /// # Arguments
115    ///
116    /// * `name` - Unique name for the collection
117    /// * `dimension` - Vector dimension (e.g., 768 for many embedding models)
118    /// * `metric` - Distance metric to use for similarity calculations
119    ///
120    /// # Errors
121    ///
122    /// Returns an error if a collection with the same name already exists.
123    pub fn create_collection(
124        &self,
125        name: &str,
126        dimension: usize,
127        metric: DistanceMetric,
128    ) -> Result<()> {
129        self.create_collection_with_options(name, dimension, metric, StorageMode::default())
130    }
131
132    /// Creates a new collection with custom storage options.
133    ///
134    /// # Arguments
135    ///
136    /// * `name` - Unique name for the collection
137    /// * `dimension` - Vector dimension
138    /// * `metric` - Distance metric
139    /// * `storage_mode` - Vector storage mode (Full, SQ8, Binary)
140    ///
141    /// # Errors
142    ///
143    /// Returns an error if a collection with the same name already exists.
144    pub fn create_collection_with_options(
145        &self,
146        name: &str,
147        dimension: usize,
148        metric: DistanceMetric,
149        storage_mode: StorageMode,
150    ) -> Result<()> {
151        let mut collections = self.collections.write();
152
153        if collections.contains_key(name) {
154            return Err(Error::CollectionExists(name.to_string()));
155        }
156
157        let collection_path = self.data_dir.join(name);
158        let collection =
159            Collection::create_with_options(collection_path, dimension, metric, storage_mode)?;
160        collections.insert(name.to_string(), collection);
161
162        Ok(())
163    }
164
165    /// Gets a reference to a collection by name.
166    ///
167    /// # Arguments
168    ///
169    /// * `name` - Name of the collection
170    ///
171    /// # Returns
172    ///
173    /// Returns `None` if the collection does not exist.
174    pub fn get_collection(&self, name: &str) -> Option<Collection> {
175        self.collections.read().get(name).cloned()
176    }
177
178    /// Lists all collection names in the database.
179    pub fn list_collections(&self) -> Vec<String> {
180        self.collections.read().keys().cloned().collect()
181    }
182
183    /// Deletes a collection by name.
184    ///
185    /// # Arguments
186    ///
187    /// * `name` - Name of the collection to delete
188    ///
189    /// # Errors
190    ///
191    /// Returns an error if the collection does not exist.
192    pub fn delete_collection(&self, name: &str) -> Result<()> {
193        let mut collections = self.collections.write();
194
195        if collections.remove(name).is_none() {
196            return Err(Error::CollectionNotFound(name.to_string()));
197        }
198
199        let collection_path = self.data_dir.join(name);
200        if collection_path.exists() {
201            std::fs::remove_dir_all(collection_path)?;
202        }
203
204        Ok(())
205    }
206}
207
208#[cfg(test)]
209mod tests {
210    use super::*;
211    use tempfile::tempdir;
212
213    #[test]
214    fn test_database_open() {
215        let dir = tempdir().unwrap();
216        let db = Database::open(dir.path()).unwrap();
217        assert!(db.list_collections().is_empty());
218    }
219
220    #[test]
221    fn test_create_collection() {
222        let dir = tempdir().unwrap();
223        let db = Database::open(dir.path()).unwrap();
224
225        db.create_collection("test", 768, DistanceMetric::Cosine)
226            .unwrap();
227
228        assert_eq!(db.list_collections(), vec!["test"]);
229    }
230
231    #[test]
232    fn test_duplicate_collection_error() {
233        let dir = tempdir().unwrap();
234        let db = Database::open(dir.path()).unwrap();
235
236        db.create_collection("test", 768, DistanceMetric::Cosine)
237            .unwrap();
238
239        let result = db.create_collection("test", 768, DistanceMetric::Cosine);
240        assert!(result.is_err());
241    }
242
243    #[test]
244    fn test_get_collection() {
245        let dir = tempdir().unwrap();
246        let db = Database::open(dir.path()).unwrap();
247
248        // Non-existent collection returns None
249        assert!(db.get_collection("nonexistent").is_none());
250
251        // Create and retrieve collection
252        db.create_collection("test", 768, DistanceMetric::Cosine)
253            .unwrap();
254
255        let collection = db.get_collection("test");
256        assert!(collection.is_some());
257
258        let config = collection.unwrap().config();
259        assert_eq!(config.dimension, 768);
260        assert_eq!(config.metric, DistanceMetric::Cosine);
261    }
262
263    #[test]
264    fn test_delete_collection() {
265        let dir = tempdir().unwrap();
266        let db = Database::open(dir.path()).unwrap();
267
268        db.create_collection("to_delete", 768, DistanceMetric::Cosine)
269            .unwrap();
270        assert_eq!(db.list_collections().len(), 1);
271
272        // Delete the collection
273        db.delete_collection("to_delete").unwrap();
274        assert!(db.list_collections().is_empty());
275        assert!(db.get_collection("to_delete").is_none());
276    }
277
278    #[test]
279    fn test_delete_nonexistent_collection() {
280        let dir = tempdir().unwrap();
281        let db = Database::open(dir.path()).unwrap();
282
283        let result = db.delete_collection("nonexistent");
284        assert!(result.is_err());
285    }
286
287    #[test]
288    fn test_multiple_collections() {
289        let dir = tempdir().unwrap();
290        let db = Database::open(dir.path()).unwrap();
291
292        db.create_collection("coll1", 128, DistanceMetric::Cosine)
293            .unwrap();
294        db.create_collection("coll2", 256, DistanceMetric::Euclidean)
295            .unwrap();
296        db.create_collection("coll3", 768, DistanceMetric::DotProduct)
297            .unwrap();
298
299        let collections = db.list_collections();
300        assert_eq!(collections.len(), 3);
301        assert!(collections.contains(&"coll1".to_string()));
302        assert!(collections.contains(&"coll2".to_string()));
303        assert!(collections.contains(&"coll3".to_string()));
304    }
305}