Skip to main content

velesdb_core/quantization/
mod.rs

1//! Scalar Quantization (SQ8) and Binary Quantization for memory-efficient vector storage.
2//!
3//! This module implements quantization strategies to reduce memory usage:
4//!
5//! ## Benefits
6//!
7//! | Metric | f32 | SQ8 | Binary |
8//! |--------|-----|-----|--------|
9//! | RAM/vector (768d) | 3 KB | 770 bytes | 96 bytes |
10//! | Cache efficiency | Baseline | ~4x better | ~32x better |
11//! | Recall loss | 0% | ~0.5-1% | ~5-10% |
12
13use std::io;
14
15use serde::{Deserialize, Serialize};
16
17mod binary;
18mod pq;
19pub(crate) mod pq_kmeans;
20pub(crate) mod pq_opq;
21mod rabitq;
22mod scalar;
23
24// Re-export binary quantization
25pub use binary::BinaryQuantizedVector;
26#[allow(unused_imports)]
27pub(crate) use pq::distance_pq_l2;
28pub use pq::{PQCodebook, PQVector, ProductQuantizer};
29#[cfg(feature = "persistence")]
30pub use pq_opq::train_opq;
31
32// Re-export RaBitQ quantization
33pub use rabitq::{RaBitQCorrection, RaBitQIndex, RaBitQVector};
34
35// Re-export scalar quantization
36pub use scalar::{
37    cosine_similarity_quantized, cosine_similarity_quantized_simd, dot_product_quantized,
38    dot_product_quantized_simd, euclidean_squared_quantized, euclidean_squared_quantized_simd,
39    QuantizedVector,
40};
41
42/// Trait for serializing and deserializing quantized vectors to/from bytes.
43///
44/// Provides a uniform interface for byte-level serialization across
45/// different quantization strategies (SQ8, Binary).
46pub trait QuantizationCodec: Sized {
47    /// Serializes the quantized vector to a byte representation.
48    fn to_bytes(&self) -> Vec<u8>;
49
50    /// Deserializes a quantized vector from bytes.
51    ///
52    /// # Errors
53    ///
54    /// Returns an error if the byte slice is too short or contains invalid data.
55    fn from_bytes(bytes: &[u8]) -> io::Result<Self>;
56}
57
58/// Storage mode for vectors.
59#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
60#[serde(rename_all = "lowercase")]
61#[non_exhaustive]
62pub enum StorageMode {
63    /// Full precision f32 storage (default).
64    #[default]
65    Full,
66    /// 8-bit scalar quantization for 4x memory reduction.
67    SQ8,
68    /// 1-bit binary quantization for 32x memory reduction.
69    /// Best for edge/IoT devices with limited RAM.
70    Binary,
71    /// Product Quantization (PQ) for aggressive lossy compression (8x-16x typical).
72    ProductQuantization,
73    /// `RaBitQ` binary quantization for 32x compression with scalar correction.
74    RaBitQ,
75}