Skip to main content

ruvector_core/
lib.rs

1//! # Ruvector Core
2//!
3//! High-performance Rust-native vector database with HNSW indexing and SIMD-optimized operations.
4//!
5//! ## Working Features (Tested & Benchmarked)
6//!
7//! - **HNSW Indexing**: Approximate nearest neighbor search with O(log n) complexity
8//! - **SIMD Distance**: SimSIMD-powered distance calculations (~16M ops/sec for 512-dim)
9//! - **Quantization**: Scalar (4x), Int4 (8x), Product (8-16x), and binary (32x) compression with distance support
10//! - **Persistence**: REDB-based storage with config persistence
11//! - **Search**: ~2.5K queries/sec on 10K vectors (benchmarked)
12//!
13//! ## ⚠️ Experimental/Incomplete Features - READ BEFORE USE
14//!
15//! - **AgenticDB**: ⚠️⚠️⚠️ **CRITICAL WARNING** ⚠️⚠️⚠️
16//!   - Uses PLACEHOLDER hash-based embeddings by default, NOT real semantic embeddings
17//!   - "dog" and "cat" will NOT be similar (different characters)
18//!   - "dog" and "god" WILL be similar (same characters) - **This is wrong!**
19//!   - **Use `OnnxEmbedding` for production** (feature: `onnx-embeddings`)
20//!   - See [`embeddings::OnnxEmbedding`] for real semantic embeddings
21//! - **Advanced Features**: Conformal prediction, hybrid search - functional but less tested
22//!
23//! ## What This Is NOT
24//!
25//! - This is NOT a complete RAG solution - you need external embedding models
26//! - Examples use mock embeddings for demonstration only
27
28#![allow(missing_docs)]
29#![warn(clippy::all)]
30#![allow(clippy::incompatible_msrv)]
31
32pub mod advanced_features;
33
34// AgenticDB requires storage feature
35#[cfg(feature = "storage")]
36pub mod agenticdb;
37
38pub mod distance;
39pub mod embeddings;
40pub mod error;
41pub mod index;
42pub mod quantization;
43
44// Storage backends - conditional compilation based on features
45#[cfg(feature = "storage")]
46pub mod storage;
47
48#[cfg(not(feature = "storage"))]
49pub mod storage_memory;
50
51#[cfg(not(feature = "storage"))]
52pub use storage_memory as storage;
53
54pub mod types;
55pub mod vector_db;
56
57// Performance optimization modules
58pub mod arena;
59pub mod cache_optimized;
60#[cfg(all(feature = "parallel", not(target_arch = "wasm32")))]
61pub mod lockfree;
62pub mod simd_intrinsics;
63
64/// Unified Memory Pool and Paging System (ADR-006)
65///
66/// High-performance paged memory management for LLM inference:
67/// - 2MB page-granular allocation with best-fit strategy
68/// - Reference-counted pinning with RAII guards
69/// - LRU eviction with hysteresis for thrash prevention
70/// - Multi-tenant isolation with Hot/Warm/Cold residency tiers
71pub mod memory;
72
73/// Advanced techniques: hypergraphs, learned indexes, neural hashing, TDA (Phase 6)
74pub mod advanced;
75
76// Re-exports
77pub use advanced_features::{
78    ConformalConfig, ConformalPredictor, EnhancedPQ, FilterExpression, FilterStrategy,
79    FilteredSearch, FusionConfig, FusionStrategy, HybridConfig, HybridSearch, MMRConfig,
80    MMRSearch, PQConfig, PredictionSet, ScoredDoc, SparseIndex, SparseVector, BM25,
81    fuse_rankings,
82};
83
84#[cfg(feature = "storage")]
85pub use agenticdb::{
86    AgenticDB, PolicyAction, PolicyEntry, PolicyMemoryStore, SessionStateIndex, SessionTurn,
87    WitnessEntry, WitnessLog,
88};
89
90#[cfg(feature = "api-embeddings")]
91pub use embeddings::ApiEmbedding;
92pub use embeddings::{BoxedEmbeddingProvider, EmbeddingProvider, HashEmbedding};
93
94#[cfg(feature = "real-embeddings")]
95pub use embeddings::CandleEmbedding;
96
97#[cfg(feature = "onnx-embeddings")]
98pub use embeddings::OnnxEmbedding;
99
100// Compile-time warning about AgenticDB limitations
101#[cfg(feature = "storage")]
102#[allow(deprecated, clippy::let_unit_value)]
103const _: () = {
104    #[deprecated(
105        since = "0.1.0",
106        note = "AgenticDB uses placeholder hash-based embeddings. For semantic search, use OnnxEmbedding (feature: onnx-embeddings) or ApiEmbedding. See ADR-114 for details."
107    )]
108    const AGENTICDB_EMBEDDING_WARNING: () = ();
109    let _ = AGENTICDB_EMBEDDING_WARNING;
110};
111
112pub use error::{Result, RuvectorError};
113pub use types::{DistanceMetric, SearchQuery, SearchResult, VectorEntry, VectorId};
114pub use vector_db::VectorDB;
115
116// Quantization types (ADR-001)
117pub use quantization::{
118    BinaryQuantized, Int4Quantized, ProductQuantized, QuantizedVector, ScalarQuantized,
119};
120
121// Memory management types (ADR-001)
122pub use arena::{Arena, ArenaVec, BatchVectorAllocator, CacheAlignedVec, CACHE_LINE_SIZE};
123
124// Lock-free structures (requires parallel feature)
125#[cfg(all(feature = "parallel", not(target_arch = "wasm32")))]
126pub use lockfree::{
127    AtomicVectorPool, BatchItem, BatchResult, LockFreeBatchProcessor, LockFreeCounter,
128    LockFreeStats, LockFreeWorkQueue, ObjectPool, PooledObject, PooledVector, StatsSnapshot,
129    VectorPoolStats,
130};
131
132// Cache-optimized storage
133pub use cache_optimized::SoAVectorStorage;
134
135#[cfg(test)]
136mod tests {
137    use super::*;
138
139    #[test]
140    fn test_version() {
141        // Verify version matches workspace - use dynamic check instead of hardcoded value
142        let version = env!("CARGO_PKG_VERSION");
143        assert!(!version.is_empty(), "Version should not be empty");
144        // Version 2.x is the current major version
145        assert!(
146            version.starts_with("2.") || version.starts_with("0.1."),
147            "Version should be 2.x or 0.1.x, got: {}",
148            version
149        );
150    }
151}