velesdb_core/
lib.rs

1//! # `VelesDB` Core
2//!
3//! High-performance vector database engine written in Rust.
4//!
5//! `VelesDB` is a local-first vector database designed for semantic search,
6//! recommendation systems, and RAG (Retrieval-Augmented Generation) applications.
7//!
8//! ## Features
9//!
10//! - **Blazing Fast**: HNSW index with SIMD-optimized distance calculations
11//! - **Persistent Storage**: Memory-mapped files for efficient disk access
12//! - **Simple API**: Easy-to-use interface for vector operations
13//!
14//! ## Quick Start
15//!
16//! ```rust,ignore
17//! use velesdb_core::{Database, Collection, DistanceMetric};
18//!
19//! // Create a new database
20//! let db = Database::open("./data")?;
21//!
22//! // Create a collection
23//! let collection = db.create_collection("documents", 768, DistanceMetric::Cosine)?;
24//!
25//! // Insert vectors
26//! collection.upsert(vec![
27//!     Point::new(1, vec![0.1, 0.2, ...], json!({"title": "Hello World"})),
28//! ])?;
29//!
30//! // Search for similar vectors
31//! let results = collection.search(&query_vector, 10)?;
32//! ```
33
34#![warn(missing_docs)]
35#![warn(clippy::all)]
36#![warn(clippy::pedantic)]
37#![allow(clippy::module_name_repetitions)]
38
39pub mod collection;
40pub mod column_store;
41pub mod distance;
42pub mod error;
43pub mod filter;
44pub mod half_precision;
45pub mod index;
46pub mod metrics;
47pub mod perf_optimizations;
48pub mod point;
49pub mod quantization;
50pub mod simd;
51pub mod simd_avx512;
52pub mod simd_explicit;
53pub mod storage;
54pub mod velesql;
55
56pub use index::{HnswIndex, HnswParams, SearchQuality, VectorIndex};
57
58pub use collection::Collection;
59pub use distance::DistanceMetric;
60pub use error::{Error, Result};
61pub use filter::{Condition, Filter};
62pub use point::Point;
63pub use quantization::{
64    cosine_similarity_quantized, cosine_similarity_quantized_simd, dot_product_quantized,
65    dot_product_quantized_simd, euclidean_squared_quantized, euclidean_squared_quantized_simd,
66    BinaryQuantizedVector, QuantizedVector, StorageMode,
67};
68
69pub use column_store::{ColumnStore, ColumnType, ColumnValue, StringId, StringTable, TypedColumn};
70pub use metrics::{
71    average_metrics, compute_latency_percentiles, hit_rate, mean_average_precision, mrr, ndcg_at_k,
72    precision_at_k, recall_at_k, LatencyStats,
73};
74
75/// Database instance managing collections and storage.
76pub struct Database {
77    /// Path to the data directory
78    data_dir: std::path::PathBuf,
79    /// Collections managed by this database
80    collections: parking_lot::RwLock<std::collections::HashMap<String, Collection>>,
81}
82
83impl Database {
84    /// Opens or creates a database at the specified path.
85    ///
86    /// # Arguments
87    ///
88    /// * `path` - Path to the data directory
89    ///
90    /// # Errors
91    ///
92    /// Returns an error if the directory cannot be created or accessed.
93    pub fn open<P: AsRef<std::path::Path>>(path: P) -> Result<Self> {
94        let data_dir = path.as_ref().to_path_buf();
95        std::fs::create_dir_all(&data_dir)?;
96
97        Ok(Self {
98            data_dir,
99            collections: parking_lot::RwLock::new(std::collections::HashMap::new()),
100        })
101    }
102
103    /// Creates a new collection with the specified parameters.
104    ///
105    /// # Arguments
106    ///
107    /// * `name` - Unique name for the collection
108    /// * `dimension` - Vector dimension (e.g., 768 for many embedding models)
109    /// * `metric` - Distance metric to use for similarity calculations
110    ///
111    /// # Errors
112    ///
113    /// Returns an error if a collection with the same name already exists.
114    pub fn create_collection(
115        &self,
116        name: &str,
117        dimension: usize,
118        metric: DistanceMetric,
119    ) -> Result<()> {
120        self.create_collection_with_options(name, dimension, metric, StorageMode::default())
121    }
122
123    /// Creates a new collection with custom storage options.
124    ///
125    /// # Arguments
126    ///
127    /// * `name` - Unique name for the collection
128    /// * `dimension` - Vector dimension
129    /// * `metric` - Distance metric
130    /// * `storage_mode` - Vector storage mode (Full, SQ8, Binary)
131    ///
132    /// # Errors
133    ///
134    /// Returns an error if a collection with the same name already exists.
135    pub fn create_collection_with_options(
136        &self,
137        name: &str,
138        dimension: usize,
139        metric: DistanceMetric,
140        storage_mode: StorageMode,
141    ) -> Result<()> {
142        let mut collections = self.collections.write();
143
144        if collections.contains_key(name) {
145            return Err(Error::CollectionExists(name.to_string()));
146        }
147
148        let collection_path = self.data_dir.join(name);
149        let collection =
150            Collection::create_with_options(collection_path, dimension, metric, storage_mode)?;
151        collections.insert(name.to_string(), collection);
152
153        Ok(())
154    }
155
156    /// Gets a reference to a collection by name.
157    ///
158    /// # Arguments
159    ///
160    /// * `name` - Name of the collection
161    ///
162    /// # Returns
163    ///
164    /// Returns `None` if the collection does not exist.
165    pub fn get_collection(&self, name: &str) -> Option<Collection> {
166        self.collections.read().get(name).cloned()
167    }
168
169    /// Lists all collection names in the database.
170    pub fn list_collections(&self) -> Vec<String> {
171        self.collections.read().keys().cloned().collect()
172    }
173
174    /// Deletes a collection by name.
175    ///
176    /// # Arguments
177    ///
178    /// * `name` - Name of the collection to delete
179    ///
180    /// # Errors
181    ///
182    /// Returns an error if the collection does not exist.
183    pub fn delete_collection(&self, name: &str) -> Result<()> {
184        let mut collections = self.collections.write();
185
186        if collections.remove(name).is_none() {
187            return Err(Error::CollectionNotFound(name.to_string()));
188        }
189
190        let collection_path = self.data_dir.join(name);
191        if collection_path.exists() {
192            std::fs::remove_dir_all(collection_path)?;
193        }
194
195        Ok(())
196    }
197}
198
199#[cfg(test)]
200mod tests {
201    use super::*;
202    use tempfile::tempdir;
203
204    #[test]
205    fn test_database_open() {
206        let dir = tempdir().unwrap();
207        let db = Database::open(dir.path()).unwrap();
208        assert!(db.list_collections().is_empty());
209    }
210
211    #[test]
212    fn test_create_collection() {
213        let dir = tempdir().unwrap();
214        let db = Database::open(dir.path()).unwrap();
215
216        db.create_collection("test", 768, DistanceMetric::Cosine)
217            .unwrap();
218
219        assert_eq!(db.list_collections(), vec!["test"]);
220    }
221
222    #[test]
223    fn test_duplicate_collection_error() {
224        let dir = tempdir().unwrap();
225        let db = Database::open(dir.path()).unwrap();
226
227        db.create_collection("test", 768, DistanceMetric::Cosine)
228            .unwrap();
229
230        let result = db.create_collection("test", 768, DistanceMetric::Cosine);
231        assert!(result.is_err());
232    }
233
234    #[test]
235    fn test_get_collection() {
236        let dir = tempdir().unwrap();
237        let db = Database::open(dir.path()).unwrap();
238
239        // Non-existent collection returns None
240        assert!(db.get_collection("nonexistent").is_none());
241
242        // Create and retrieve collection
243        db.create_collection("test", 768, DistanceMetric::Cosine)
244            .unwrap();
245
246        let collection = db.get_collection("test");
247        assert!(collection.is_some());
248
249        let config = collection.unwrap().config();
250        assert_eq!(config.dimension, 768);
251        assert_eq!(config.metric, DistanceMetric::Cosine);
252    }
253
254    #[test]
255    fn test_delete_collection() {
256        let dir = tempdir().unwrap();
257        let db = Database::open(dir.path()).unwrap();
258
259        db.create_collection("to_delete", 768, DistanceMetric::Cosine)
260            .unwrap();
261        assert_eq!(db.list_collections().len(), 1);
262
263        // Delete the collection
264        db.delete_collection("to_delete").unwrap();
265        assert!(db.list_collections().is_empty());
266        assert!(db.get_collection("to_delete").is_none());
267    }
268
269    #[test]
270    fn test_delete_nonexistent_collection() {
271        let dir = tempdir().unwrap();
272        let db = Database::open(dir.path()).unwrap();
273
274        let result = db.delete_collection("nonexistent");
275        assert!(result.is_err());
276    }
277
278    #[test]
279    fn test_multiple_collections() {
280        let dir = tempdir().unwrap();
281        let db = Database::open(dir.path()).unwrap();
282
283        db.create_collection("coll1", 128, DistanceMetric::Cosine)
284            .unwrap();
285        db.create_collection("coll2", 256, DistanceMetric::Euclidean)
286            .unwrap();
287        db.create_collection("coll3", 768, DistanceMetric::DotProduct)
288            .unwrap();
289
290        let collections = db.list_collections();
291        assert_eq!(collections.len(), 3);
292        assert!(collections.contains(&"coll1".to_string()));
293        assert!(collections.contains(&"coll2".to_string()));
294        assert!(collections.contains(&"coll3".to_string()));
295    }
296}