Skip to main content

velesdb_core/quantization/
mod.rs

1//! Scalar Quantization (SQ8) and Binary Quantization for memory-efficient vector storage.
2//!
3//! This module implements quantization strategies to reduce memory usage:
4//!
5//! ## Benefits
6//!
7//! | Metric | f32 | SQ8 | Binary |
8//! |--------|-----|-----|--------|
9//! | RAM/vector (768d) | 3 KB | 770 bytes | 96 bytes |
10//! | Cache efficiency | Baseline | ~4x better | ~32x better |
11//! | Recall loss | 0% | ~0.5-1% | ~5-10% |
12//!
13//! ## Engine integration status
14//!
15//! The figures above describe the quantization primitives themselves. In the
16//! collection query path: `RaBitQ` (binary traversal backend) and PQ (ADC
17//! rescoring) are wired end-to-end. Persistence across reopens covers
18//! TRAIN-QUANTIZER-produced artifacts (`rabitq.idx`, `codebook.pq`); a PQ
19//! quantizer trained lazily from inserts (no TRAIN statement) is in-memory
20//! only and retrains after a restart. SQ8/Binary collection modes currently
21//! maintain caches that no search path consumes — collection search stays
22//! full-precision f32 for those modes. See `docs/guides/QUANTIZATION.md`.
23
24use std::io;
25
26use serde::{Deserialize, Serialize};
27
28/// Validate that a flat row-major rotation matrix has exactly `dimension^2`
29/// elements, returning [`crate::error::Error::IndexCorrupted`] otherwise.
30///
31/// Shared by the PQ (OPQ) and `RaBitQ` load-time validators so the unchecked
32/// `matrix[i * d + j]` indexing in their rotation kernels stays in bounds.
33pub(crate) fn validate_rotation_len(
34    len: usize,
35    dimension: usize,
36    label: &str,
37) -> Result<(), crate::error::Error> {
38    // `checked_mul`: `dimension` is attacker-controlled post-deserialize; a wrapping
39    // `dimension * dimension` (esp. on 32-bit targets) could yield a small `expected`
40    // that a tampered `len` matches, false-passing the shape check that the unchecked
41    // `matrix[i * d + j]` indexing relies on.
42    let Some(expected) = dimension.checked_mul(dimension) else {
43        return Err(crate::error::Error::IndexCorrupted(format!(
44            "{label} rotation dimension {dimension} squared overflows usize"
45        )));
46    };
47    if len != expected {
48        return Err(crate::error::Error::IndexCorrupted(format!(
49            "{label} rotation has {len} elements, expected dimension^2 = {expected}"
50        )));
51    }
52    Ok(())
53}
54
55mod binary;
56pub(crate) mod codec_helpers;
57mod pq;
58pub(crate) mod pq_kmeans;
59pub(crate) mod pq_opq;
60#[cfg(feature = "persistence")]
61mod pq_persistence;
62mod rabitq;
63pub(crate) mod rabitq_store;
64mod scalar;
65
66// Re-export binary quantization
67pub use binary::BinaryQuantizedVector;
68#[allow(unused_imports)] // Called from vector.rs search path (persistence-gated).
69pub(crate) use pq::distance_pq_l2;
70#[allow(unused_imports)] // Called from vector.rs search path (persistence-gated).
71pub(crate) use pq::pq_adc_batch_rescore;
72pub use pq::{PQCodebook, PQVector, ProductQuantizer};
73#[cfg(feature = "persistence")]
74pub use pq_opq::train_opq;
75
76// Re-export RaBitQ quantization
77#[cfg(feature = "persistence")]
78pub(crate) use rabitq::PreparedQuery;
79pub use rabitq::{RaBitQCorrection, RaBitQIndex, RaBitQVector};
80#[cfg(feature = "persistence")]
81pub(crate) use rabitq_store::RaBitQVectorStore;
82
83// Re-export scalar quantization
84pub use scalar::{
85    cosine_similarity_quantized, cosine_similarity_quantized_simd, dot_product_quantized,
86    dot_product_quantized_simd, euclidean_squared_quantized, euclidean_squared_quantized_simd,
87    QuantizedVector,
88};
89
90/// Trait for serializing and deserializing quantized vectors to/from bytes.
91///
92/// Provides a uniform interface for byte-level serialization across
93/// different quantization strategies (SQ8, Binary).
94pub trait QuantizationCodec: Sized {
95    /// Serializes the quantized vector to a byte representation.
96    fn to_bytes(&self) -> Vec<u8>;
97
98    /// Deserializes a quantized vector from bytes.
99    ///
100    /// # Errors
101    ///
102    /// Returns an error if the byte slice is too short or contains invalid data.
103    fn from_bytes(bytes: &[u8]) -> io::Result<Self>;
104}
105
106/// Canonical names of every [`StorageMode`] variant, in declaration order.
107///
108/// Single source of truth for the storage-mode name set exported to downstream
109/// crates and bindings (Python `velesdb.STORAGE_MODES`, the integrations
110/// security guard). Each entry is the variant's
111/// [`canonical_name`](StorageMode::canonical_name); a unit test asserts the
112/// slice stays exhaustive so adding a variant without updating it fails CI.
113pub const STORAGE_MODE_NAMES: &[&str] = &["full", "sq8", "binary", "pq", "rabitq"];
114
115/// Storage mode for vectors.
116#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
117#[serde(rename_all = "lowercase")]
118#[non_exhaustive]
119pub enum StorageMode {
120    /// Full precision f32 storage (default).
121    #[default]
122    Full,
123    /// 8-bit scalar quantization for 4x memory reduction.
124    SQ8,
125    /// 1-bit binary quantization for 32x memory reduction.
126    /// Best for edge/IoT devices with limited RAM.
127    Binary,
128    /// Product Quantization (PQ) for aggressive lossy compression (8x-16x typical).
129    ProductQuantization,
130    /// `RaBitQ` binary quantization for 32x compression with scalar correction.
131    RaBitQ,
132}
133
134impl StorageMode {
135    /// Returns the canonical lowercase name for this storage mode.
136    ///
137    /// This is the single source of truth for string representations,
138    /// used by [`std::fmt::Display`], [`std::str::FromStr`], and downstream crates.
139    #[must_use]
140    pub const fn canonical_name(self) -> &'static str {
141        match self {
142            Self::Full => "full",
143            Self::SQ8 => "sq8",
144            Self::Binary => "binary",
145            Self::ProductQuantization => "pq",
146            Self::RaBitQ => "rabitq",
147        }
148    }
149
150    /// Parses a storage mode string with alias support.
151    ///
152    /// Accepted aliases (case-insensitive):
153    /// - `full`, `f32` -> `Full`
154    /// - `sq8`, `int8` -> `SQ8`
155    /// - `binary`, `bit` -> `Binary`
156    /// - `pq`, `product_quantization` -> `ProductQuantization`
157    /// - `rabitq` -> `RaBitQ`
158    ///
159    /// # Examples
160    ///
161    /// ```
162    /// use velesdb_core::StorageMode;
163    ///
164    /// assert_eq!(StorageMode::parse_alias("sq8"), Some(StorageMode::SQ8));
165    /// assert_eq!(StorageMode::parse_alias("INT8"), Some(StorageMode::SQ8));
166    /// assert_eq!(StorageMode::parse_alias("unknown"), None);
167    /// ```
168    #[must_use]
169    pub fn parse_alias(value: &str) -> Option<Self> {
170        match value.trim().to_lowercase().as_str() {
171            "full" | "f32" => Some(Self::Full),
172            "sq8" | "int8" => Some(Self::SQ8),
173            "binary" | "bit" => Some(Self::Binary),
174            "pq" | "product_quantization" => Some(Self::ProductQuantization),
175            "rabitq" => Some(Self::RaBitQ),
176            _ => None,
177        }
178    }
179}
180
181impl std::fmt::Display for StorageMode {
182    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
183        f.write_str(self.canonical_name())
184    }
185}
186
187impl std::str::FromStr for StorageMode {
188    type Err = String;
189
190    fn from_str(s: &str) -> Result<Self, Self::Err> {
191        Self::parse_alias(s).ok_or_else(|| {
192            format!(
193                "Unknown storage mode '{s}'. Valid options: full, f32, sq8, int8, binary, bit, pq, product_quantization, rabitq"
194            )
195        })
196    }
197}
198
199#[cfg(test)]
200mod storage_mode_parsing_tests {
201    use super::{StorageMode, STORAGE_MODE_NAMES};
202
203    /// Forces this test to be revisited whenever a variant is added: the
204    /// exhaustive `match` (no wildcard arm) fails to compile until the new
205    /// variant is listed here, which in turn flags the missing const entry.
206    fn ordinal(mode: StorageMode) -> usize {
207        match mode {
208            StorageMode::Full => 0,
209            StorageMode::SQ8 => 1,
210            StorageMode::Binary => 2,
211            StorageMode::ProductQuantization => 3,
212            StorageMode::RaBitQ => 4,
213        }
214    }
215
216    #[test]
217    fn storage_mode_names_is_exhaustive_and_canonical() {
218        let variants = [
219            StorageMode::Full,
220            StorageMode::SQ8,
221            StorageMode::Binary,
222            StorageMode::ProductQuantization,
223            StorageMode::RaBitQ,
224        ];
225        assert_eq!(variants.len(), STORAGE_MODE_NAMES.len());
226        for (i, variant) in variants.into_iter().enumerate() {
227            assert_eq!(ordinal(variant), i);
228            assert_eq!(STORAGE_MODE_NAMES[i], variant.canonical_name());
229        }
230    }
231
232    #[test]
233    fn test_parse_all_canonical_names() {
234        assert_eq!("full".parse::<StorageMode>().unwrap(), StorageMode::Full);
235        assert_eq!("sq8".parse::<StorageMode>().unwrap(), StorageMode::SQ8);
236        assert_eq!(
237            "binary".parse::<StorageMode>().unwrap(),
238            StorageMode::Binary
239        );
240        assert_eq!(
241            "pq".parse::<StorageMode>().unwrap(),
242            StorageMode::ProductQuantization
243        );
244        assert_eq!(
245            "rabitq".parse::<StorageMode>().unwrap(),
246            StorageMode::RaBitQ
247        );
248    }
249
250    #[test]
251    fn test_parse_aliases() {
252        assert_eq!("f32".parse::<StorageMode>().unwrap(), StorageMode::Full);
253        assert_eq!("int8".parse::<StorageMode>().unwrap(), StorageMode::SQ8);
254        assert_eq!("bit".parse::<StorageMode>().unwrap(), StorageMode::Binary);
255        assert_eq!(
256            "product_quantization".parse::<StorageMode>().unwrap(),
257            StorageMode::ProductQuantization
258        );
259    }
260
261    #[test]
262    fn test_parse_case_insensitive() {
263        assert_eq!("SQ8".parse::<StorageMode>().unwrap(), StorageMode::SQ8);
264        assert_eq!("FULL".parse::<StorageMode>().unwrap(), StorageMode::Full);
265        assert_eq!(
266            "RaBitQ".parse::<StorageMode>().unwrap(),
267            StorageMode::RaBitQ
268        );
269    }
270
271    #[test]
272    fn test_parse_unknown_returns_error() {
273        assert!("unknown".parse::<StorageMode>().is_err());
274        assert!("".parse::<StorageMode>().is_err());
275    }
276
277    #[test]
278    fn test_canonical_name_roundtrip() {
279        for mode in [
280            StorageMode::Full,
281            StorageMode::SQ8,
282            StorageMode::Binary,
283            StorageMode::ProductQuantization,
284            StorageMode::RaBitQ,
285        ] {
286            let name = mode.canonical_name();
287            assert_eq!(name.parse::<StorageMode>().unwrap(), mode);
288        }
289    }
290
291    #[test]
292    fn test_display_uses_canonical_name() {
293        assert_eq!(format!("{}", StorageMode::Full), "full");
294        assert_eq!(format!("{}", StorageMode::SQ8), "sq8");
295        assert_eq!(format!("{}", StorageMode::Binary), "binary");
296        assert_eq!(format!("{}", StorageMode::ProductQuantization), "pq");
297        assert_eq!(format!("{}", StorageMode::RaBitQ), "rabitq");
298    }
299}