velesdb_core/
lib.rs

1//! # `VelesDB` Core
2//!
3//! High-performance vector database engine written in Rust.
4//!
5//! `VelesDB` is a local-first vector database designed for semantic search,
6//! recommendation systems, and RAG (Retrieval-Augmented Generation) applications.
7//!
8//! ## Features
9//!
10//! - **Blazing Fast**: HNSW index with SIMD-optimized distance calculations
11//! - **Persistent Storage**: Memory-mapped files for efficient disk access
12//! - **Simple API**: Easy-to-use interface for vector operations
13//!
14//! ## Quick Start
15//!
16//! ```rust,ignore
17//! use velesdb_core::{Database, Collection, DistanceMetric};
18//!
19//! // Create a new database
20//! let db = Database::open("./data")?;
21//!
22//! // Create a collection
23//! let collection = db.create_collection("documents", 768, DistanceMetric::Cosine)?;
24//!
25//! // Insert vectors
26//! collection.upsert(vec![
27//!     Point::new(1, vec![0.1, 0.2, ...], json!({"title": "Hello World"})),
28//! ])?;
29//!
30//! // Search for similar vectors
31//! let results = collection.search(&query_vector, 10)?;
32//! ```
33
34#![warn(missing_docs)]
35#![warn(clippy::all)]
36#![warn(clippy::pedantic)]
37#![allow(clippy::module_name_repetitions)]
38
39pub mod collection;
40pub mod column_store;
41pub mod distance;
42pub mod error;
43pub mod filter;
44pub mod half_precision;
45pub mod index;
46pub mod point;
47pub mod quantization;
48pub mod simd;
49pub mod simd_avx512;
50pub mod simd_explicit;
51pub mod storage;
52pub mod velesql;
53
54pub use index::{HnswIndex, HnswParams, SearchQuality, VectorIndex};
55
56pub use collection::Collection;
57pub use distance::DistanceMetric;
58pub use error::{Error, Result};
59pub use filter::{Condition, Filter};
60pub use point::Point;
61pub use quantization::{BinaryQuantizedVector, QuantizedVector, StorageMode};
62
63pub use column_store::{ColumnStore, ColumnType, ColumnValue, StringId, StringTable, TypedColumn};
64
65/// Database instance managing collections and storage.
66pub struct Database {
67    /// Path to the data directory
68    data_dir: std::path::PathBuf,
69    /// Collections managed by this database
70    collections: parking_lot::RwLock<std::collections::HashMap<String, Collection>>,
71}
72
73impl Database {
74    /// Opens or creates a database at the specified path.
75    ///
76    /// # Arguments
77    ///
78    /// * `path` - Path to the data directory
79    ///
80    /// # Errors
81    ///
82    /// Returns an error if the directory cannot be created or accessed.
83    pub fn open<P: AsRef<std::path::Path>>(path: P) -> Result<Self> {
84        let data_dir = path.as_ref().to_path_buf();
85        std::fs::create_dir_all(&data_dir)?;
86
87        Ok(Self {
88            data_dir,
89            collections: parking_lot::RwLock::new(std::collections::HashMap::new()),
90        })
91    }
92
93    /// Creates a new collection with the specified parameters.
94    ///
95    /// # Arguments
96    ///
97    /// * `name` - Unique name for the collection
98    /// * `dimension` - Vector dimension (e.g., 768 for many embedding models)
99    /// * `metric` - Distance metric to use for similarity calculations
100    ///
101    /// # Errors
102    ///
103    /// Returns an error if a collection with the same name already exists.
104    pub fn create_collection(
105        &self,
106        name: &str,
107        dimension: usize,
108        metric: DistanceMetric,
109    ) -> Result<()> {
110        let mut collections = self.collections.write();
111
112        if collections.contains_key(name) {
113            return Err(Error::CollectionExists(name.to_string()));
114        }
115
116        let collection_path = self.data_dir.join(name);
117        let collection = Collection::create(collection_path, dimension, metric)?;
118        collections.insert(name.to_string(), collection);
119
120        Ok(())
121    }
122
123    /// Gets a reference to a collection by name.
124    ///
125    /// # Arguments
126    ///
127    /// * `name` - Name of the collection
128    ///
129    /// # Returns
130    ///
131    /// Returns `None` if the collection does not exist.
132    pub fn get_collection(&self, name: &str) -> Option<Collection> {
133        self.collections.read().get(name).cloned()
134    }
135
136    /// Lists all collection names in the database.
137    pub fn list_collections(&self) -> Vec<String> {
138        self.collections.read().keys().cloned().collect()
139    }
140
141    /// Deletes a collection by name.
142    ///
143    /// # Arguments
144    ///
145    /// * `name` - Name of the collection to delete
146    ///
147    /// # Errors
148    ///
149    /// Returns an error if the collection does not exist.
150    pub fn delete_collection(&self, name: &str) -> Result<()> {
151        let mut collections = self.collections.write();
152
153        if collections.remove(name).is_none() {
154            return Err(Error::CollectionNotFound(name.to_string()));
155        }
156
157        let collection_path = self.data_dir.join(name);
158        if collection_path.exists() {
159            std::fs::remove_dir_all(collection_path)?;
160        }
161
162        Ok(())
163    }
164}
165
166#[cfg(test)]
167mod tests {
168    use super::*;
169    use tempfile::tempdir;
170
171    #[test]
172    fn test_database_open() {
173        let dir = tempdir().unwrap();
174        let db = Database::open(dir.path()).unwrap();
175        assert!(db.list_collections().is_empty());
176    }
177
178    #[test]
179    fn test_create_collection() {
180        let dir = tempdir().unwrap();
181        let db = Database::open(dir.path()).unwrap();
182
183        db.create_collection("test", 768, DistanceMetric::Cosine)
184            .unwrap();
185
186        assert_eq!(db.list_collections(), vec!["test"]);
187    }
188
189    #[test]
190    fn test_duplicate_collection_error() {
191        let dir = tempdir().unwrap();
192        let db = Database::open(dir.path()).unwrap();
193
194        db.create_collection("test", 768, DistanceMetric::Cosine)
195            .unwrap();
196
197        let result = db.create_collection("test", 768, DistanceMetric::Cosine);
198        assert!(result.is_err());
199    }
200
201    #[test]
202    fn test_get_collection() {
203        let dir = tempdir().unwrap();
204        let db = Database::open(dir.path()).unwrap();
205
206        // Non-existent collection returns None
207        assert!(db.get_collection("nonexistent").is_none());
208
209        // Create and retrieve collection
210        db.create_collection("test", 768, DistanceMetric::Cosine)
211            .unwrap();
212
213        let collection = db.get_collection("test");
214        assert!(collection.is_some());
215
216        let config = collection.unwrap().config();
217        assert_eq!(config.dimension, 768);
218        assert_eq!(config.metric, DistanceMetric::Cosine);
219    }
220
221    #[test]
222    fn test_delete_collection() {
223        let dir = tempdir().unwrap();
224        let db = Database::open(dir.path()).unwrap();
225
226        db.create_collection("to_delete", 768, DistanceMetric::Cosine)
227            .unwrap();
228        assert_eq!(db.list_collections().len(), 1);
229
230        // Delete the collection
231        db.delete_collection("to_delete").unwrap();
232        assert!(db.list_collections().is_empty());
233        assert!(db.get_collection("to_delete").is_none());
234    }
235
236    #[test]
237    fn test_delete_nonexistent_collection() {
238        let dir = tempdir().unwrap();
239        let db = Database::open(dir.path()).unwrap();
240
241        let result = db.delete_collection("nonexistent");
242        assert!(result.is_err());
243    }
244
245    #[test]
246    fn test_multiple_collections() {
247        let dir = tempdir().unwrap();
248        let db = Database::open(dir.path()).unwrap();
249
250        db.create_collection("coll1", 128, DistanceMetric::Cosine)
251            .unwrap();
252        db.create_collection("coll2", 256, DistanceMetric::Euclidean)
253            .unwrap();
254        db.create_collection("coll3", 768, DistanceMetric::DotProduct)
255            .unwrap();
256
257        let collections = db.list_collections();
258        assert_eq!(collections.len(), 3);
259        assert!(collections.contains(&"coll1".to_string()));
260        assert!(collections.contains(&"coll2".to_string()));
261        assert!(collections.contains(&"coll3".to_string()));
262    }
263}