velesdb_core/quantization/mod.rs
1//! Scalar Quantization (SQ8) and Binary Quantization for memory-efficient vector storage.
2//!
3//! This module implements quantization strategies to reduce memory usage:
4//!
5//! ## Benefits
6//!
7//! | Metric | f32 | SQ8 | Binary |
8//! |--------|-----|-----|--------|
9//! | RAM/vector (768d) | 3 KB | 770 bytes | 96 bytes |
10//! | Cache efficiency | Baseline | ~4x better | ~32x better |
11//! | Recall loss | 0% | ~0.5-1% | ~5-10% |
12
13use std::io;
14
15use serde::{Deserialize, Serialize};
16
17mod binary;
18mod pq;
19pub(crate) mod pq_kmeans;
20pub(crate) mod pq_opq;
21mod rabitq;
22mod scalar;
23
24// Re-export binary quantization
25pub use binary::BinaryQuantizedVector;
26#[allow(unused_imports)]
27pub(crate) use pq::distance_pq_l2;
28pub use pq::{PQCodebook, PQVector, ProductQuantizer};
29#[cfg(feature = "persistence")]
30pub use pq_opq::train_opq;
31
32// Re-export RaBitQ quantization
33pub use rabitq::{RaBitQCorrection, RaBitQIndex, RaBitQVector};
34
35// Re-export scalar quantization
36pub use scalar::{
37 cosine_similarity_quantized, cosine_similarity_quantized_simd, dot_product_quantized,
38 dot_product_quantized_simd, euclidean_squared_quantized, euclidean_squared_quantized_simd,
39 QuantizedVector,
40};
41
42/// Trait for serializing and deserializing quantized vectors to/from bytes.
43///
44/// Provides a uniform interface for byte-level serialization across
45/// different quantization strategies (SQ8, Binary).
46pub trait QuantizationCodec: Sized {
47 /// Serializes the quantized vector to a byte representation.
48 fn to_bytes(&self) -> Vec<u8>;
49
50 /// Deserializes a quantized vector from bytes.
51 ///
52 /// # Errors
53 ///
54 /// Returns an error if the byte slice is too short or contains invalid data.
55 fn from_bytes(bytes: &[u8]) -> io::Result<Self>;
56}
57
58/// Storage mode for vectors.
59#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
60#[serde(rename_all = "lowercase")]
61#[non_exhaustive]
62pub enum StorageMode {
63 /// Full precision f32 storage (default).
64 #[default]
65 Full,
66 /// 8-bit scalar quantization for 4x memory reduction.
67 SQ8,
68 /// 1-bit binary quantization for 32x memory reduction.
69 /// Best for edge/IoT devices with limited RAM.
70 Binary,
71 /// Product Quantization (PQ) for aggressive lossy compression (8x-16x typical).
72 ProductQuantization,
73 /// `RaBitQ` binary quantization for 32x compression with scalar correction.
74 RaBitQ,
75}