ruvector_core/
lib.rs

1//! # Ruvector Core
2//!
3//! High-performance Rust-native vector database with HNSW indexing and SIMD-optimized operations.
4//!
5//! ## Working Features (Tested & Benchmarked)
6//!
7//! - **HNSW Indexing**: Approximate nearest neighbor search with O(log n) complexity
8//! - **SIMD Distance**: SimSIMD-powered distance calculations (~16M ops/sec for 512-dim)
9//! - **Quantization**: Scalar (4x) and binary (32x) compression with distance support
10//! - **Persistence**: REDB-based storage with config persistence
11//! - **Search**: ~2.5K queries/sec on 10K vectors (benchmarked)
12//!
13//! ## ⚠️ Experimental/Incomplete Features - READ BEFORE USE
14//!
15//! - **AgenticDB**: ⚠️⚠️⚠️ **CRITICAL WARNING** ⚠️⚠️⚠️
16//!   - Uses PLACEHOLDER hash-based embeddings, NOT real semantic embeddings
17//!   - "dog" and "cat" will NOT be similar (different characters)
18//!   - "dog" and "god" WILL be similar (same characters) - **This is wrong!**
19//!   - **MUST integrate real embedding model for production** (ONNX, Candle, or API)
20//!   - See [`agenticdb`] module docs and `/examples/onnx-embeddings` for integration
21//! - **Advanced Features**: Conformal prediction, hybrid search - functional but less tested
22//!
23//! ## What This Is NOT
24//!
25//! - This is NOT a complete RAG solution - you need external embedding models
26//! - Examples use mock embeddings for demonstration only
27
28#![warn(missing_docs)]
29#![warn(clippy::all)]
30
31pub mod advanced_features;
32
33// AgenticDB requires storage feature
34#[cfg(feature = "storage")]
35pub mod agenticdb;
36
37pub mod distance;
38pub mod embeddings;
39pub mod error;
40pub mod index;
41pub mod quantization;
42
43// Storage backends - conditional compilation based on features
44#[cfg(feature = "storage")]
45pub mod storage;
46
47#[cfg(not(feature = "storage"))]
48pub mod storage_memory;
49
50#[cfg(not(feature = "storage"))]
51pub use storage_memory as storage;
52
53pub mod types;
54pub mod vector_db;
55
56// Performance optimization modules
57pub mod arena;
58pub mod cache_optimized;
59#[cfg(all(feature = "parallel", not(target_arch = "wasm32")))]
60pub mod lockfree;
61pub mod simd_intrinsics;
62
63/// Advanced techniques: hypergraphs, learned indexes, neural hashing, TDA (Phase 6)
64pub mod advanced;
65
66// Re-exports
67pub use advanced_features::{
68    ConformalConfig, ConformalPredictor, EnhancedPQ, FilterExpression, FilterStrategy,
69    FilteredSearch, HybridConfig, HybridSearch, MMRConfig, MMRSearch, PQConfig, PredictionSet,
70    BM25,
71};
72
73#[cfg(feature = "storage")]
74pub use agenticdb::AgenticDB;
75
76pub use embeddings::{EmbeddingProvider, HashEmbedding, BoxedEmbeddingProvider};
77#[cfg(feature = "api-embeddings")]
78pub use embeddings::ApiEmbedding;
79
80#[cfg(feature = "real-embeddings")]
81pub use embeddings::CandleEmbedding;
82
83// Compile-time warning about AgenticDB limitations
84#[cfg(feature = "storage")]
85const _: () = {
86    // This will appear in cargo build output as a note
87    #[deprecated(
88        since = "0.1.0",
89        note = "AgenticDB uses placeholder hash-based embeddings. For semantic search, integrate a real embedding model (ONNX, Candle, or API). See /examples/onnx-embeddings for production setup."
90    )]
91    const AGENTICDB_EMBEDDING_WARNING: () = ();
92    let _ = AGENTICDB_EMBEDDING_WARNING;
93};
94
95pub use error::{Result, RuvectorError};
96pub use types::{DistanceMetric, SearchQuery, SearchResult, VectorEntry, VectorId};
97pub use vector_db::VectorDB;
98
99#[cfg(test)]
100mod tests {
101    use super::*;
102
103    #[test]
104    fn test_version() {
105        // Verify version matches workspace - use dynamic check instead of hardcoded value
106        let version = env!("CARGO_PKG_VERSION");
107        assert!(!version.is_empty(), "Version should not be empty");
108        assert!(version.starts_with("0.1."), "Version should be 0.1.x");
109    }
110}