velesdb_core/
lib.rs

1//! # `VelesDB` Core
2//!
3//! High-performance vector database engine written in Rust.
4//!
5//! `VelesDB` is a local-first vector database designed for semantic search,
6//! recommendation systems, and RAG (Retrieval-Augmented Generation) applications.
7//!
8//! ## Features
9//!
10//! - **Blazing Fast**: HNSW index with SIMD-optimized distance calculations
11//! - **Persistent Storage**: Memory-mapped files for efficient disk access
12//! - **Simple API**: Easy-to-use interface for vector operations
13//!
14//! ## Quick Start
15//!
16//! ```rust,ignore
17//! use velesdb_core::{Database, Collection, DistanceMetric};
18//!
19//! // Create a new database
20//! let db = Database::open("./data")?;
21//!
22//! // Create a collection
23//! let collection = db.create_collection("documents", 768, DistanceMetric::Cosine)?;
24//!
25//! // Insert vectors
26//! collection.upsert(vec![
27//!     Point::new(1, vec![0.1, 0.2, ...], json!({"title": "Hello World"})),
28//! ])?;
29//!
30//! // Search for similar vectors
31//! let results = collection.search(&query_vector, 10)?;
32//! ```
33
34#![warn(missing_docs)]
35#![warn(clippy::all)]
36#![warn(clippy::pedantic)]
37#![allow(clippy::module_name_repetitions)]
38
39pub mod collection;
40pub mod column_store;
41pub mod distance;
42pub mod error;
43pub mod filter;
44pub mod half_precision;
45pub mod index;
46pub mod metrics;
47pub mod perf_optimizations;
48pub mod point;
49pub mod quantization;
50pub mod simd;
51pub mod simd_avx512;
52pub mod simd_explicit;
53pub mod storage;
54pub mod velesql;
55
56pub use index::{HnswIndex, HnswParams, SearchQuality, VectorIndex};
57
58pub use collection::Collection;
59pub use distance::DistanceMetric;
60pub use error::{Error, Result};
61pub use filter::{Condition, Filter};
62pub use point::Point;
63pub use quantization::{BinaryQuantizedVector, QuantizedVector, StorageMode};
64
65pub use column_store::{ColumnStore, ColumnType, ColumnValue, StringId, StringTable, TypedColumn};
66pub use metrics::{
67    average_metrics, compute_latency_percentiles, hit_rate, mean_average_precision, mrr, ndcg_at_k,
68    precision_at_k, recall_at_k, LatencyStats,
69};
70
71/// Database instance managing collections and storage.
72pub struct Database {
73    /// Path to the data directory
74    data_dir: std::path::PathBuf,
75    /// Collections managed by this database
76    collections: parking_lot::RwLock<std::collections::HashMap<String, Collection>>,
77}
78
79impl Database {
80    /// Opens or creates a database at the specified path.
81    ///
82    /// # Arguments
83    ///
84    /// * `path` - Path to the data directory
85    ///
86    /// # Errors
87    ///
88    /// Returns an error if the directory cannot be created or accessed.
89    pub fn open<P: AsRef<std::path::Path>>(path: P) -> Result<Self> {
90        let data_dir = path.as_ref().to_path_buf();
91        std::fs::create_dir_all(&data_dir)?;
92
93        Ok(Self {
94            data_dir,
95            collections: parking_lot::RwLock::new(std::collections::HashMap::new()),
96        })
97    }
98
99    /// Creates a new collection with the specified parameters.
100    ///
101    /// # Arguments
102    ///
103    /// * `name` - Unique name for the collection
104    /// * `dimension` - Vector dimension (e.g., 768 for many embedding models)
105    /// * `metric` - Distance metric to use for similarity calculations
106    ///
107    /// # Errors
108    ///
109    /// Returns an error if a collection with the same name already exists.
110    pub fn create_collection(
111        &self,
112        name: &str,
113        dimension: usize,
114        metric: DistanceMetric,
115    ) -> Result<()> {
116        let mut collections = self.collections.write();
117
118        if collections.contains_key(name) {
119            return Err(Error::CollectionExists(name.to_string()));
120        }
121
122        let collection_path = self.data_dir.join(name);
123        let collection = Collection::create(collection_path, dimension, metric)?;
124        collections.insert(name.to_string(), collection);
125
126        Ok(())
127    }
128
129    /// Gets a reference to a collection by name.
130    ///
131    /// # Arguments
132    ///
133    /// * `name` - Name of the collection
134    ///
135    /// # Returns
136    ///
137    /// Returns `None` if the collection does not exist.
138    pub fn get_collection(&self, name: &str) -> Option<Collection> {
139        self.collections.read().get(name).cloned()
140    }
141
142    /// Lists all collection names in the database.
143    pub fn list_collections(&self) -> Vec<String> {
144        self.collections.read().keys().cloned().collect()
145    }
146
147    /// Deletes a collection by name.
148    ///
149    /// # Arguments
150    ///
151    /// * `name` - Name of the collection to delete
152    ///
153    /// # Errors
154    ///
155    /// Returns an error if the collection does not exist.
156    pub fn delete_collection(&self, name: &str) -> Result<()> {
157        let mut collections = self.collections.write();
158
159        if collections.remove(name).is_none() {
160            return Err(Error::CollectionNotFound(name.to_string()));
161        }
162
163        let collection_path = self.data_dir.join(name);
164        if collection_path.exists() {
165            std::fs::remove_dir_all(collection_path)?;
166        }
167
168        Ok(())
169    }
170}
171
172#[cfg(test)]
173mod tests {
174    use super::*;
175    use tempfile::tempdir;
176
177    #[test]
178    fn test_database_open() {
179        let dir = tempdir().unwrap();
180        let db = Database::open(dir.path()).unwrap();
181        assert!(db.list_collections().is_empty());
182    }
183
184    #[test]
185    fn test_create_collection() {
186        let dir = tempdir().unwrap();
187        let db = Database::open(dir.path()).unwrap();
188
189        db.create_collection("test", 768, DistanceMetric::Cosine)
190            .unwrap();
191
192        assert_eq!(db.list_collections(), vec!["test"]);
193    }
194
195    #[test]
196    fn test_duplicate_collection_error() {
197        let dir = tempdir().unwrap();
198        let db = Database::open(dir.path()).unwrap();
199
200        db.create_collection("test", 768, DistanceMetric::Cosine)
201            .unwrap();
202
203        let result = db.create_collection("test", 768, DistanceMetric::Cosine);
204        assert!(result.is_err());
205    }
206
207    #[test]
208    fn test_get_collection() {
209        let dir = tempdir().unwrap();
210        let db = Database::open(dir.path()).unwrap();
211
212        // Non-existent collection returns None
213        assert!(db.get_collection("nonexistent").is_none());
214
215        // Create and retrieve collection
216        db.create_collection("test", 768, DistanceMetric::Cosine)
217            .unwrap();
218
219        let collection = db.get_collection("test");
220        assert!(collection.is_some());
221
222        let config = collection.unwrap().config();
223        assert_eq!(config.dimension, 768);
224        assert_eq!(config.metric, DistanceMetric::Cosine);
225    }
226
227    #[test]
228    fn test_delete_collection() {
229        let dir = tempdir().unwrap();
230        let db = Database::open(dir.path()).unwrap();
231
232        db.create_collection("to_delete", 768, DistanceMetric::Cosine)
233            .unwrap();
234        assert_eq!(db.list_collections().len(), 1);
235
236        // Delete the collection
237        db.delete_collection("to_delete").unwrap();
238        assert!(db.list_collections().is_empty());
239        assert!(db.get_collection("to_delete").is_none());
240    }
241
242    #[test]
243    fn test_delete_nonexistent_collection() {
244        let dir = tempdir().unwrap();
245        let db = Database::open(dir.path()).unwrap();
246
247        let result = db.delete_collection("nonexistent");
248        assert!(result.is_err());
249    }
250
251    #[test]
252    fn test_multiple_collections() {
253        let dir = tempdir().unwrap();
254        let db = Database::open(dir.path()).unwrap();
255
256        db.create_collection("coll1", 128, DistanceMetric::Cosine)
257            .unwrap();
258        db.create_collection("coll2", 256, DistanceMetric::Euclidean)
259            .unwrap();
260        db.create_collection("coll3", 768, DistanceMetric::DotProduct)
261            .unwrap();
262
263        let collections = db.list_collections();
264        assert_eq!(collections.len(), 3);
265        assert!(collections.contains(&"coll1".to_string()));
266        assert!(collections.contains(&"coll2".to_string()));
267        assert!(collections.contains(&"coll3".to_string()));
268    }
269}