velesdb_core/
lib.rs

1//! # `VelesDB` Core
2//!
3//! High-performance vector database engine written in Rust.
4//!
5//! `VelesDB` is a local-first vector database designed for semantic search,
6//! recommendation systems, and RAG (Retrieval-Augmented Generation) applications.
7//!
8//! ## Features
9//!
10//! - **Blazing Fast**: HNSW index with SIMD-optimized distance calculations
11//! - **Persistent Storage**: Memory-mapped files for efficient disk access
12//! - **Simple API**: Easy-to-use interface for vector operations
13//!
14//! ## Quick Start
15//!
16//! ```rust,ignore
17//! use velesdb_core::{Database, Collection, DistanceMetric};
18//!
19//! // Create a new database
20//! let db = Database::open("./data")?;
21//!
22//! // Create a collection
23//! let collection = db.create_collection("documents", 768, DistanceMetric::Cosine)?;
24//!
25//! // Insert vectors
26//! collection.upsert(vec![
27//!     Point::new(1, vec![0.1, 0.2, ...], json!({"title": "Hello World"})),
28//! ])?;
29//!
30//! // Search for similar vectors
31//! let results = collection.search(&query_vector, 10)?;
32//! ```
33
34#![warn(missing_docs)]
35#![warn(clippy::all)]
36#![warn(clippy::pedantic)]
37#![allow(clippy::module_name_repetitions)]
38
39pub mod collection;
40pub mod column_store;
41pub mod distance;
42pub mod error;
43pub mod filter;
44pub mod index;
45pub mod point;
46pub mod quantization;
47pub mod simd;
48pub mod simd_explicit;
49pub mod storage;
50pub mod velesql;
51
52pub use index::{HnswIndex, HnswParams, SearchQuality, VectorIndex};
53
54pub use collection::Collection;
55pub use distance::DistanceMetric;
56pub use error::{Error, Result};
57pub use filter::{Condition, Filter};
58pub use point::Point;
59pub use quantization::{BinaryQuantizedVector, QuantizedVector, StorageMode};
60
61pub use column_store::{ColumnStore, ColumnType, ColumnValue, StringId, StringTable, TypedColumn};
62
63/// Database instance managing collections and storage.
64pub struct Database {
65    /// Path to the data directory
66    data_dir: std::path::PathBuf,
67    /// Collections managed by this database
68    collections: parking_lot::RwLock<std::collections::HashMap<String, Collection>>,
69}
70
71impl Database {
72    /// Opens or creates a database at the specified path.
73    ///
74    /// # Arguments
75    ///
76    /// * `path` - Path to the data directory
77    ///
78    /// # Errors
79    ///
80    /// Returns an error if the directory cannot be created or accessed.
81    pub fn open<P: AsRef<std::path::Path>>(path: P) -> Result<Self> {
82        let data_dir = path.as_ref().to_path_buf();
83        std::fs::create_dir_all(&data_dir)?;
84
85        Ok(Self {
86            data_dir,
87            collections: parking_lot::RwLock::new(std::collections::HashMap::new()),
88        })
89    }
90
91    /// Creates a new collection with the specified parameters.
92    ///
93    /// # Arguments
94    ///
95    /// * `name` - Unique name for the collection
96    /// * `dimension` - Vector dimension (e.g., 768 for many embedding models)
97    /// * `metric` - Distance metric to use for similarity calculations
98    ///
99    /// # Errors
100    ///
101    /// Returns an error if a collection with the same name already exists.
102    pub fn create_collection(
103        &self,
104        name: &str,
105        dimension: usize,
106        metric: DistanceMetric,
107    ) -> Result<()> {
108        let mut collections = self.collections.write();
109
110        if collections.contains_key(name) {
111            return Err(Error::CollectionExists(name.to_string()));
112        }
113
114        let collection_path = self.data_dir.join(name);
115        let collection = Collection::create(collection_path, dimension, metric)?;
116        collections.insert(name.to_string(), collection);
117
118        Ok(())
119    }
120
121    /// Gets a reference to a collection by name.
122    ///
123    /// # Arguments
124    ///
125    /// * `name` - Name of the collection
126    ///
127    /// # Returns
128    ///
129    /// Returns `None` if the collection does not exist.
130    pub fn get_collection(&self, name: &str) -> Option<Collection> {
131        self.collections.read().get(name).cloned()
132    }
133
134    /// Lists all collection names in the database.
135    pub fn list_collections(&self) -> Vec<String> {
136        self.collections.read().keys().cloned().collect()
137    }
138
139    /// Deletes a collection by name.
140    ///
141    /// # Arguments
142    ///
143    /// * `name` - Name of the collection to delete
144    ///
145    /// # Errors
146    ///
147    /// Returns an error if the collection does not exist.
148    pub fn delete_collection(&self, name: &str) -> Result<()> {
149        let mut collections = self.collections.write();
150
151        if collections.remove(name).is_none() {
152            return Err(Error::CollectionNotFound(name.to_string()));
153        }
154
155        let collection_path = self.data_dir.join(name);
156        if collection_path.exists() {
157            std::fs::remove_dir_all(collection_path)?;
158        }
159
160        Ok(())
161    }
162}
163
164#[cfg(test)]
165mod tests {
166    use super::*;
167    use tempfile::tempdir;
168
169    #[test]
170    fn test_database_open() {
171        let dir = tempdir().unwrap();
172        let db = Database::open(dir.path()).unwrap();
173        assert!(db.list_collections().is_empty());
174    }
175
176    #[test]
177    fn test_create_collection() {
178        let dir = tempdir().unwrap();
179        let db = Database::open(dir.path()).unwrap();
180
181        db.create_collection("test", 768, DistanceMetric::Cosine)
182            .unwrap();
183
184        assert_eq!(db.list_collections(), vec!["test"]);
185    }
186
187    #[test]
188    fn test_duplicate_collection_error() {
189        let dir = tempdir().unwrap();
190        let db = Database::open(dir.path()).unwrap();
191
192        db.create_collection("test", 768, DistanceMetric::Cosine)
193            .unwrap();
194
195        let result = db.create_collection("test", 768, DistanceMetric::Cosine);
196        assert!(result.is_err());
197    }
198
199    #[test]
200    fn test_get_collection() {
201        let dir = tempdir().unwrap();
202        let db = Database::open(dir.path()).unwrap();
203
204        // Non-existent collection returns None
205        assert!(db.get_collection("nonexistent").is_none());
206
207        // Create and retrieve collection
208        db.create_collection("test", 768, DistanceMetric::Cosine)
209            .unwrap();
210
211        let collection = db.get_collection("test");
212        assert!(collection.is_some());
213
214        let config = collection.unwrap().config();
215        assert_eq!(config.dimension, 768);
216        assert_eq!(config.metric, DistanceMetric::Cosine);
217    }
218
219    #[test]
220    fn test_delete_collection() {
221        let dir = tempdir().unwrap();
222        let db = Database::open(dir.path()).unwrap();
223
224        db.create_collection("to_delete", 768, DistanceMetric::Cosine)
225            .unwrap();
226        assert_eq!(db.list_collections().len(), 1);
227
228        // Delete the collection
229        db.delete_collection("to_delete").unwrap();
230        assert!(db.list_collections().is_empty());
231        assert!(db.get_collection("to_delete").is_none());
232    }
233
234    #[test]
235    fn test_delete_nonexistent_collection() {
236        let dir = tempdir().unwrap();
237        let db = Database::open(dir.path()).unwrap();
238
239        let result = db.delete_collection("nonexistent");
240        assert!(result.is_err());
241    }
242
243    #[test]
244    fn test_multiple_collections() {
245        let dir = tempdir().unwrap();
246        let db = Database::open(dir.path()).unwrap();
247
248        db.create_collection("coll1", 128, DistanceMetric::Cosine)
249            .unwrap();
250        db.create_collection("coll2", 256, DistanceMetric::Euclidean)
251            .unwrap();
252        db.create_collection("coll3", 768, DistanceMetric::DotProduct)
253            .unwrap();
254
255        let collections = db.list_collections();
256        assert_eq!(collections.len(), 3);
257        assert!(collections.contains(&"coll1".to_string()));
258        assert!(collections.contains(&"coll2".to_string()));
259        assert!(collections.contains(&"coll3".to_string()));
260    }
261}