edgevec/lib.rs
1//! # EdgeVec
2//!
3//! High-performance embedded vector database for Browser, Node, and Edge.
4//!
5//! ## Current Status
6//!
7//! **PHASE 3: Implementation (Week 7 Complete)**
8//!
9//! **Status:** Week 7 Complete — Persistence Hardened
10//!
11//! Core vector storage, HNSW graph indexing, and full durability (WAL + Snapshots) are implemented and verified.
12//!
13//! ## Implemented Features
14//!
15//! - **HNSW Graph**: Full insertion and search implementation with heuristic optimization.
16//! - **Vector Storage**: Contiguous memory layout for fast access.
17//! - **Scalar Quantization (SQ8)**: 4x memory reduction (f32 -> u8) with high accuracy.
18//! - **Durability**: Write-Ahead Log (WAL) with CRC32 checksums, crash recovery, and atomic snapshots.
19//! - **Metrics**: L2 (Euclidean), Cosine, and Dot Product distance functions.
20//!
21//! ## Development Protocol
22//!
23//! `EdgeVec` follows a military-grade development protocol:
24//!
25//! 1. **Architecture Phase** — Design docs must be approved before planning
26//! 2. **Planning Phase** — Roadmap must be approved before coding
27//! 3. **Implementation Phase** — Weekly tasks must be approved before coding
28//! 4. **All gates require `HOSTILE_REVIEWER` approval**
29//!
30//! ## Example
31//!
32//! ```rust
33//! use edgevec::{HnswConfig, HnswIndex, Metric, VectorStorage};
34//!
35//! // 1. Create Config
36//! let config = HnswConfig::new(128);
37//!
38//! // 2. Initialize Storage and Index
39//! let mut storage = VectorStorage::new(&config, None);
40//! let mut index = HnswIndex::new(config, &storage).expect("failed to create index");
41//!
42//! // 3. Insert Vectors
43//! let vector = vec![0.5; 128];
44//! let id = index.insert(&vector, &mut storage).expect("failed to insert");
45//!
46//! // 4. Search
47//! let query = vec![0.5; 128];
48//! let results = index.search(&query, 10, &storage).expect("failed to search");
49//!
50//! assert!(!results.is_empty());
51//! assert_eq!(results[0].vector_id, id);
52//! ```
53//!
54//! ## Persistence Example
55//!
56//! ```rust,no_run
57//! use edgevec::{HnswConfig, HnswIndex, VectorStorage};
58//! use edgevec::persistence::{write_snapshot, read_snapshot, MemoryBackend};
59//!
60//! // Create index and storage
61//! let config = HnswConfig::new(128);
62//! let mut storage = VectorStorage::new(&config, None);
63//! let mut index = HnswIndex::new(config, &storage).expect("failed to create");
64//!
65//! // Save snapshot using storage backend
66//! let mut backend = MemoryBackend::new();
67//! write_snapshot(&index, &storage, &mut backend).expect("failed to save");
68//!
69//! // Load snapshot
70//! let (loaded_index, loaded_storage) = read_snapshot(&backend).expect("failed to load");
71//! ```
72//!
73//! ## Next Steps (Phase 5)
74//!
75//! 1. **Documentation**: Finalize API docs.
76//! 2. **NPM Package**: Release to npm registry.
77//! 3. **Performance**: Final tuning and benchmarks.
78//!
79//! ## Documentation
80//!
81//! - [Genesis Workflow](docs/GENESIS_WORKFLOW.md)
82//! - [Agent Commands](.cursor/commands/README.md)
83//! - [Supreme Rules](.cursorrules)
84
85#![doc = include_str!("../README.md")]
86#![deny(missing_docs)]
87#![deny(clippy::all)]
88#![warn(clippy::pedantic)]
89#![allow(clippy::module_name_repetitions)]
90#![allow(clippy::doc_markdown)]
91#![allow(clippy::pub_underscore_fields)]
92#![allow(clippy::too_many_lines)]
93#![allow(clippy::uninlined_format_args)]
94#![allow(clippy::missing_panics_doc)]
95#![allow(clippy::cast_possible_truncation)]
96#![allow(clippy::cast_precision_loss)]
97#![allow(clippy::missing_errors_doc)]
98#![allow(clippy::redundant_closure_for_method_calls)]
99#![allow(clippy::wildcard_imports)]
100#![allow(clippy::map_unwrap_or)]
101#![allow(clippy::must_use_candidate)]
102
103/// Persistence and file format definitions.
104pub mod persistence;
105
106/// Unified error handling.
107pub mod error;
108
109/// Batch insertion API.
110pub mod batch;
111
112/// HNSW Graph implementation.
113pub mod hnsw;
114
115/// Distance metrics.
116pub mod metric;
117
118/// Vector storage.
119pub mod storage;
120
121/// WASM bindings.
122pub mod wasm;
123
124/// Quantization support.
125pub mod quantization;
126
127/// SIMD capability detection and runtime optimization.
128pub mod simd;
129
130/// Metadata storage for vector annotations.
131pub mod metadata;
132
133/// Filter expression parsing and evaluation.
134pub mod filter;
135
136/// Flat (brute-force) index for binary vectors.
137pub mod flat;
138
139/// Index implementations (FlatIndex, etc.).
140pub mod index;
141
142/// Sparse vector support for hybrid search.
143#[cfg(feature = "sparse")]
144pub mod sparse;
145
146// =============================================================================
147// Index Type Selection
148// =============================================================================
149
150/// Index type for vector search.
151///
152/// EdgeVec supports two index types with different performance characteristics:
153///
154/// | Index Type | Insert | Search (1M) | Recall | Best For |
155/// |------------|--------|-------------|--------|----------|
156/// | **Flat** | O(1) ~1μs | O(n) ~5-10ms | 100% (exact) | Real-time apps, <1M vectors |
157/// | **HNSW** | O(log n) ~2ms | O(log n) ~2ms | 90-95% | Large datasets, batch insert |
158///
159/// # Example (Rust)
160///
161/// ```rust
162/// use edgevec::{IndexType, HnswConfig, BinaryFlatIndex};
163///
164/// // Create a flat index for insert-heavy workloads
165/// let flat = BinaryFlatIndex::new(1024);
166///
167/// // Create an HNSW index for large-scale search
168/// let config = HnswConfig::new(1024);
169/// let index_type = IndexType::Hnsw(config);
170/// ```
171#[derive(Debug, Clone)]
172pub enum IndexType {
173 /// Brute force search (O(1) insert, O(n) search).
174 ///
175 /// Use for:
176 /// - Insert-heavy workloads (semantic caching)
177 /// - Datasets < 1M vectors
178 /// - When 100% recall (exact search) is required
179 /// - When insert latency is critical (~1μs vs ~2ms for HNSW)
180 Flat,
181
182 /// HNSW graph index (O(log n) insert, O(log n) search).
183 ///
184 /// Use for:
185 /// - Large datasets (>1M vectors)
186 /// - Read-heavy workloads
187 /// - When approximate nearest neighbors is acceptable
188 Hnsw(HnswConfig),
189}
190
191impl IndexType {
192 /// Create a Flat index type.
193 #[must_use]
194 pub fn flat() -> Self {
195 IndexType::Flat
196 }
197
198 /// Create an HNSW index type with configuration for given dimensions.
199 ///
200 /// Uses default HNSW parameters (M=12, M0=24, ef_construction=100, ef_search=50).
201 #[must_use]
202 pub fn hnsw(dimensions: u32) -> Self {
203 IndexType::Hnsw(HnswConfig::new(dimensions))
204 }
205
206 /// Create an HNSW index type with custom configuration.
207 #[must_use]
208 pub fn hnsw_with_config(config: HnswConfig) -> Self {
209 IndexType::Hnsw(config)
210 }
211
212 /// Check if this is a Flat index.
213 #[must_use]
214 pub fn is_flat(&self) -> bool {
215 matches!(self, IndexType::Flat)
216 }
217
218 /// Check if this is an HNSW index.
219 #[must_use]
220 pub fn is_hnsw(&self) -> bool {
221 matches!(self, IndexType::Hnsw(_))
222 }
223}
224
225/// Hybrid search combining dense and sparse retrieval.
226#[cfg(feature = "sparse")]
227pub mod hybrid;
228
229pub use batch::BatchInsertable;
230pub use error::BatchError;
231pub use flat::{BinaryFlatIndex, BinaryFlatIndexError, BinaryFlatSearchResult};
232pub use hnsw::{BatchDeleteError, BatchDeleteResult, HnswConfig, HnswIndex, SearchResult};
233pub use metric::Metric;
234
235// Re-export IndexType (defined in this crate root)
236// No `use` statement needed since it's already defined above
237pub use persistence::ChunkedWriter;
238pub use quantization::{BinaryQuantizer, QuantizedVector, QuantizerConfig, ScalarQuantizer};
239pub use simd::{
240 capabilities, detect_neon, select_backend, warn_if_suboptimal, SimdBackend, SimdCapabilities,
241};
242pub use storage::VectorStorage;
243
244pub use index::{
245 DistanceMetric, FlatIndex, FlatIndexConfig, FlatIndexError, FlatIndexHeader, FlatSearchResult,
246 FLAT_INDEX_MAGIC, FLAT_INDEX_VERSION,
247};
248
249#[cfg(feature = "sparse")]
250pub use sparse::{SparseError, SparseVector};
251
252/// The crate version string.
253pub const VERSION: &str = env!("CARGO_PKG_VERSION");
254
255/// Returns the crate version string.
256///
257/// # Returns
258///
259/// The crate version string.
260///
261/// # Example
262///
263/// ```rust
264/// let version = edgevec::version();
265/// assert!(!version.is_empty());
266/// ```
267#[must_use]
268pub fn version() -> &'static str {
269 VERSION
270}
271
272#[cfg(test)]
273mod tests {
274 use super::*;
275
276 #[test]
277 fn test_version_not_empty() {
278 assert!(!version().is_empty());
279 }
280}