velesdb_core/
lib.rs

1//! # `VelesDB` Core
2//!
3//! High-performance vector database engine written in Rust.
4//!
5//! `VelesDB` is a local-first vector database designed for semantic search,
6//! recommendation systems, and RAG (Retrieval-Augmented Generation) applications.
7//!
8//! ## Features
9//!
10//! - **Blazing Fast**: HNSW index with explicit SIMD (4x faster)
11//! - **5 Distance Metrics**: Cosine, Euclidean, Dot Product, Hamming, Jaccard
12//! - **Hybrid Search**: Vector + BM25 full-text with RRF fusion
13//! - **Quantization**: SQ8 (4x) and Binary (32x) memory compression
14//! - **Persistent Storage**: Memory-mapped files for efficient disk access
15//!
16//! ## Quick Start
17//!
18//! ```rust,ignore
19//! use velesdb_core::{Database, DistanceMetric, Point, StorageMode};
20//! use serde_json::json;
21//!
22//! // Create a new database
23//! let db = Database::open("./data")?;
24//!
25//! // Create a collection (all 5 metrics available)
26//! db.create_collection("documents", 768, DistanceMetric::Cosine)?;
27//! // Or with quantization: DistanceMetric::Hamming + StorageMode::Binary
28//!
29//! let collection = db.get_collection("documents").unwrap();
30//!
31//! // Insert vectors (upsert takes ownership)
32//! collection.upsert(vec![
33//!     Point::new(1, vec![0.1; 768], Some(json!({"title": "Hello World"}))),
34//! ])?;
35//!
36//! // Search for similar vectors
37//! let results = collection.search(&query_vector, 10)?;
38//!
39//! // Hybrid search (vector + text)
40//! let hybrid = collection.hybrid_search(&query_vector, "hello", 5, Some(0.7))?;
41//! ```
42
43#![warn(missing_docs)]
44#![warn(clippy::all)]
45#![warn(clippy::pedantic)]
46#![allow(clippy::module_name_repetitions)]
47
48pub mod collection;
49pub mod column_store;
50pub mod config;
51pub mod distance;
52pub mod error;
53pub mod filter;
54pub mod gpu;
55pub mod half_precision;
56pub mod index;
57pub mod metrics;
58pub mod perf_optimizations;
59pub mod point;
60pub mod quantization;
61pub mod simd;
62pub mod simd_avx512;
63pub mod simd_dispatch;
64pub mod simd_explicit;
65pub mod storage;
66pub mod vector_ref;
67pub mod velesql;
68
69pub use index::{HnswIndex, HnswParams, SearchQuality, VectorIndex};
70
71pub use collection::Collection;
72pub use distance::DistanceMetric;
73pub use error::{Error, Result};
74pub use filter::{Condition, Filter};
75pub use point::Point;
76pub use quantization::{
77    cosine_similarity_quantized, cosine_similarity_quantized_simd, dot_product_quantized,
78    dot_product_quantized_simd, euclidean_squared_quantized, euclidean_squared_quantized_simd,
79    BinaryQuantizedVector, QuantizedVector, StorageMode,
80};
81
82pub use column_store::{ColumnStore, ColumnType, ColumnValue, StringId, StringTable, TypedColumn};
83pub use config::{
84    ConfigError, HnswConfig, LimitsConfig, LoggingConfig, QuantizationConfig, SearchConfig,
85    SearchMode, ServerConfig, StorageConfig, VelesConfig,
86};
87pub use metrics::{
88    average_metrics, compute_latency_percentiles, hit_rate, mean_average_precision, mrr, ndcg_at_k,
89    precision_at_k, recall_at_k, LatencyStats,
90};
91
92/// Database instance managing collections and storage.
93pub struct Database {
94    /// Path to the data directory
95    data_dir: std::path::PathBuf,
96    /// Collections managed by this database
97    collections: parking_lot::RwLock<std::collections::HashMap<String, Collection>>,
98}
99
100impl Database {
101    /// Opens or creates a database at the specified path.
102    ///
103    /// # Arguments
104    ///
105    /// * `path` - Path to the data directory
106    ///
107    /// # Errors
108    ///
109    /// Returns an error if the directory cannot be created or accessed.
110    pub fn open<P: AsRef<std::path::Path>>(path: P) -> Result<Self> {
111        let data_dir = path.as_ref().to_path_buf();
112        std::fs::create_dir_all(&data_dir)?;
113
114        Ok(Self {
115            data_dir,
116            collections: parking_lot::RwLock::new(std::collections::HashMap::new()),
117        })
118    }
119
120    /// Creates a new collection with the specified parameters.
121    ///
122    /// # Arguments
123    ///
124    /// * `name` - Unique name for the collection
125    /// * `dimension` - Vector dimension (e.g., 768 for many embedding models)
126    /// * `metric` - Distance metric to use for similarity calculations
127    ///
128    /// # Errors
129    ///
130    /// Returns an error if a collection with the same name already exists.
131    pub fn create_collection(
132        &self,
133        name: &str,
134        dimension: usize,
135        metric: DistanceMetric,
136    ) -> Result<()> {
137        self.create_collection_with_options(name, dimension, metric, StorageMode::default())
138    }
139
140    /// Creates a new collection with custom storage options.
141    ///
142    /// # Arguments
143    ///
144    /// * `name` - Unique name for the collection
145    /// * `dimension` - Vector dimension
146    /// * `metric` - Distance metric
147    /// * `storage_mode` - Vector storage mode (Full, SQ8, Binary)
148    ///
149    /// # Errors
150    ///
151    /// Returns an error if a collection with the same name already exists.
152    pub fn create_collection_with_options(
153        &self,
154        name: &str,
155        dimension: usize,
156        metric: DistanceMetric,
157        storage_mode: StorageMode,
158    ) -> Result<()> {
159        let mut collections = self.collections.write();
160
161        if collections.contains_key(name) {
162            return Err(Error::CollectionExists(name.to_string()));
163        }
164
165        let collection_path = self.data_dir.join(name);
166        let collection =
167            Collection::create_with_options(collection_path, dimension, metric, storage_mode)?;
168        collections.insert(name.to_string(), collection);
169
170        Ok(())
171    }
172
173    /// Gets a reference to a collection by name.
174    ///
175    /// # Arguments
176    ///
177    /// * `name` - Name of the collection
178    ///
179    /// # Returns
180    ///
181    /// Returns `None` if the collection does not exist.
182    pub fn get_collection(&self, name: &str) -> Option<Collection> {
183        self.collections.read().get(name).cloned()
184    }
185
186    /// Lists all collection names in the database.
187    pub fn list_collections(&self) -> Vec<String> {
188        self.collections.read().keys().cloned().collect()
189    }
190
191    /// Deletes a collection by name.
192    ///
193    /// # Arguments
194    ///
195    /// * `name` - Name of the collection to delete
196    ///
197    /// # Errors
198    ///
199    /// Returns an error if the collection does not exist.
200    pub fn delete_collection(&self, name: &str) -> Result<()> {
201        let mut collections = self.collections.write();
202
203        if collections.remove(name).is_none() {
204            return Err(Error::CollectionNotFound(name.to_string()));
205        }
206
207        let collection_path = self.data_dir.join(name);
208        if collection_path.exists() {
209            std::fs::remove_dir_all(collection_path)?;
210        }
211
212        Ok(())
213    }
214}
215
216#[cfg(test)]
217mod tests {
218    use super::*;
219    use tempfile::tempdir;
220
221    #[test]
222    fn test_database_open() {
223        let dir = tempdir().unwrap();
224        let db = Database::open(dir.path()).unwrap();
225        assert!(db.list_collections().is_empty());
226    }
227
228    #[test]
229    fn test_create_collection() {
230        let dir = tempdir().unwrap();
231        let db = Database::open(dir.path()).unwrap();
232
233        db.create_collection("test", 768, DistanceMetric::Cosine)
234            .unwrap();
235
236        assert_eq!(db.list_collections(), vec!["test"]);
237    }
238
239    #[test]
240    fn test_duplicate_collection_error() {
241        let dir = tempdir().unwrap();
242        let db = Database::open(dir.path()).unwrap();
243
244        db.create_collection("test", 768, DistanceMetric::Cosine)
245            .unwrap();
246
247        let result = db.create_collection("test", 768, DistanceMetric::Cosine);
248        assert!(result.is_err());
249    }
250
251    #[test]
252    fn test_get_collection() {
253        let dir = tempdir().unwrap();
254        let db = Database::open(dir.path()).unwrap();
255
256        // Non-existent collection returns None
257        assert!(db.get_collection("nonexistent").is_none());
258
259        // Create and retrieve collection
260        db.create_collection("test", 768, DistanceMetric::Cosine)
261            .unwrap();
262
263        let collection = db.get_collection("test");
264        assert!(collection.is_some());
265
266        let config = collection.unwrap().config();
267        assert_eq!(config.dimension, 768);
268        assert_eq!(config.metric, DistanceMetric::Cosine);
269    }
270
271    #[test]
272    fn test_delete_collection() {
273        let dir = tempdir().unwrap();
274        let db = Database::open(dir.path()).unwrap();
275
276        db.create_collection("to_delete", 768, DistanceMetric::Cosine)
277            .unwrap();
278        assert_eq!(db.list_collections().len(), 1);
279
280        // Delete the collection
281        db.delete_collection("to_delete").unwrap();
282        assert!(db.list_collections().is_empty());
283        assert!(db.get_collection("to_delete").is_none());
284    }
285
286    #[test]
287    fn test_delete_nonexistent_collection() {
288        let dir = tempdir().unwrap();
289        let db = Database::open(dir.path()).unwrap();
290
291        let result = db.delete_collection("nonexistent");
292        assert!(result.is_err());
293    }
294
295    #[test]
296    fn test_multiple_collections() {
297        let dir = tempdir().unwrap();
298        let db = Database::open(dir.path()).unwrap();
299
300        db.create_collection("coll1", 128, DistanceMetric::Cosine)
301            .unwrap();
302        db.create_collection("coll2", 256, DistanceMetric::Euclidean)
303            .unwrap();
304        db.create_collection("coll3", 768, DistanceMetric::DotProduct)
305            .unwrap();
306
307        let collections = db.list_collections();
308        assert_eq!(collections.len(), 3);
309        assert!(collections.contains(&"coll1".to_string()));
310        assert!(collections.contains(&"coll2".to_string()));
311        assert!(collections.contains(&"coll3".to_string()));
312    }
313}