Skip to main content

velesdb_core/quantization/
mod.rs

1//! Scalar Quantization (SQ8) and Binary Quantization for memory-efficient vector storage.
2//!
3//! This module implements quantization strategies to reduce memory usage:
4//!
5//! ## Benefits
6//!
7//! | Metric | f32 | SQ8 | Binary |
8//! |--------|-----|-----|--------|
9//! | RAM/vector (768d) | 3 KB | 770 bytes | 96 bytes |
10//! | Cache efficiency | Baseline | ~4x better | ~32x better |
11//! | Recall loss | 0% | ~0.5-1% | ~5-10% |
12
13use serde::{Deserialize, Serialize};
14
15mod binary;
16mod pq;
17mod rabitq;
18mod scalar;
19
20// Re-export binary quantization
21pub use binary::BinaryQuantizedVector;
22#[allow(unused_imports)]
23pub(crate) use pq::distance_pq_l2;
24#[cfg(feature = "persistence")]
25pub use pq::train_opq;
26pub use pq::{PQCodebook, PQVector, ProductQuantizer};
27
28// Re-export RaBitQ quantization
29pub use rabitq::{RaBitQCorrection, RaBitQIndex, RaBitQVector};
30
31// Re-export scalar quantization
32pub use scalar::{
33    cosine_similarity_quantized, cosine_similarity_quantized_simd, dot_product_quantized,
34    dot_product_quantized_simd, euclidean_squared_quantized, euclidean_squared_quantized_simd,
35    QuantizedVector,
36};
37
38/// Storage mode for vectors.
39#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
40#[serde(rename_all = "lowercase")]
41#[non_exhaustive]
42pub enum StorageMode {
43    /// Full precision f32 storage (default).
44    #[default]
45    Full,
46    /// 8-bit scalar quantization for 4x memory reduction.
47    SQ8,
48    /// 1-bit binary quantization for 32x memory reduction.
49    /// Best for edge/IoT devices with limited RAM.
50    Binary,
51    /// Product Quantization (PQ) for aggressive lossy compression (8x-16x typical).
52    ProductQuantization,
53    /// `RaBitQ` binary quantization for 32x compression with scalar correction.
54    RaBitQ,
55}