ruvector_router_core/
quantization.rs

1//! Quantization techniques for memory compression
2
3use crate::error::{Result, VectorDbError};
4use crate::types::QuantizationType;
5use serde::{Deserialize, Serialize};
6
7/// Quantized vector representation
8#[derive(Debug, Clone, Serialize, Deserialize)]
9pub enum QuantizedVector {
10    /// No quantization - full precision float32
11    None(Vec<f32>),
12    /// Scalar quantization to int8
13    Scalar {
14        /// Quantized values
15        data: Vec<u8>,
16        /// Minimum value for dequantization
17        min: f32,
18        /// Scale factor for dequantization
19        scale: f32,
20    },
21    /// Product quantization
22    Product {
23        /// Codebook indices
24        codes: Vec<u8>,
25        /// Number of subspaces
26        subspaces: usize,
27    },
28    /// Binary quantization (1 bit per dimension)
29    Binary {
30        /// Packed binary data
31        data: Vec<u8>,
32        /// Threshold value
33        threshold: f32,
34    },
35}
36
37/// Quantize a vector using specified quantization type
38pub fn quantize(vector: &[f32], qtype: QuantizationType) -> Result<QuantizedVector> {
39    match qtype {
40        QuantizationType::None => Ok(QuantizedVector::None(vector.to_vec())),
41        QuantizationType::Scalar => Ok(scalar_quantize(vector)),
42        QuantizationType::Product { subspaces, k } => product_quantize(vector, subspaces, k),
43        QuantizationType::Binary => Ok(binary_quantize(vector)),
44    }
45}
46
47/// Dequantize a quantized vector back to float32
48pub fn dequantize(quantized: &QuantizedVector) -> Vec<f32> {
49    match quantized {
50        QuantizedVector::None(v) => v.clone(),
51        QuantizedVector::Scalar { data, min, scale } => scalar_dequantize(data, *min, *scale),
52        QuantizedVector::Product { codes, subspaces } => {
53            // Placeholder - would need codebooks stored separately
54            vec![0.0; codes.len() * (codes.len() / subspaces)]
55        }
56        QuantizedVector::Binary { data, threshold } => binary_dequantize(data, *threshold),
57    }
58}
59
60/// Scalar quantization to int8
61fn scalar_quantize(vector: &[f32]) -> QuantizedVector {
62    let min = vector.iter().copied().fold(f32::INFINITY, f32::min);
63    let max = vector.iter().copied().fold(f32::NEG_INFINITY, f32::max);
64
65    let scale = if max > min { 255.0 / (max - min) } else { 1.0 };
66
67    let data: Vec<u8> = vector
68        .iter()
69        .map(|&v| ((v - min) * scale).clamp(0.0, 255.0) as u8)
70        .collect();
71
72    QuantizedVector::Scalar { data, min, scale }
73}
74
75/// Dequantize scalar quantized vector
76fn scalar_dequantize(data: &[u8], min: f32, scale: f32) -> Vec<f32> {
77    data.iter().map(|&v| (v as f32) / scale + min).collect()
78}
79
80/// Product quantization (simplified version)
81fn product_quantize(vector: &[f32], subspaces: usize, _k: usize) -> Result<QuantizedVector> {
82    if !vector.len().is_multiple_of(subspaces) {
83        return Err(VectorDbError::Quantization(
84            "Vector length must be divisible by number of subspaces".to_string(),
85        ));
86    }
87
88    // Simplified: just store subspace indices
89    // In production, this would involve k-means clustering per subspace
90    let subspace_dim = vector.len() / subspaces;
91    let codes: Vec<u8> = (0..subspaces)
92        .map(|i| {
93            let start = i * subspace_dim;
94            let subvec = &vector[start..start + subspace_dim];
95            // Placeholder: hash to a code (0-255)
96            (subvec.iter().sum::<f32>() as u32 % 256) as u8
97        })
98        .collect();
99
100    Ok(QuantizedVector::Product { codes, subspaces })
101}
102
103/// Binary quantization (1 bit per dimension)
104fn binary_quantize(vector: &[f32]) -> QuantizedVector {
105    let threshold = vector.iter().sum::<f32>() / vector.len() as f32;
106
107    let num_bytes = vector.len().div_ceil(8);
108    let mut data = vec![0u8; num_bytes];
109
110    for (i, &val) in vector.iter().enumerate() {
111        if val > threshold {
112            let byte_idx = i / 8;
113            let bit_idx = i % 8;
114            data[byte_idx] |= 1 << bit_idx;
115        }
116    }
117
118    QuantizedVector::Binary { data, threshold }
119}
120
121/// Dequantize binary quantized vector
122fn binary_dequantize(data: &[u8], threshold: f32) -> Vec<f32> {
123    let mut result = Vec::with_capacity(data.len() * 8);
124
125    for &byte in data {
126        for bit_idx in 0..8 {
127            let bit = (byte >> bit_idx) & 1;
128            result.push(if bit == 1 {
129                threshold + 1.0
130            } else {
131                threshold - 1.0
132            });
133        }
134    }
135
136    result
137}
138
139/// Calculate memory savings from quantization
140pub fn calculate_compression_ratio(original_dims: usize, qtype: QuantizationType) -> f32 {
141    let original_bytes = original_dims * 4; // float32 = 4 bytes
142    let quantized_bytes = match qtype {
143        QuantizationType::None => original_bytes,
144        QuantizationType::Scalar => original_dims + 8, // u8 per dim + min + scale
145        QuantizationType::Product { subspaces, .. } => subspaces + 4, // u8 per subspace + overhead
146        QuantizationType::Binary => original_dims.div_ceil(8) + 4, // 1 bit per dim + threshold
147    };
148
149    original_bytes as f32 / quantized_bytes as f32
150}
151
152#[cfg(test)]
153mod tests {
154    use super::*;
155
156    #[test]
157    fn test_scalar_quantization() {
158        let vector = vec![1.0, 2.0, 3.0, 4.0, 5.0];
159        let quantized = scalar_quantize(&vector);
160        let dequantized = dequantize(&quantized);
161
162        // Check approximate equality (quantization loses precision)
163        for (orig, deq) in vector.iter().zip(dequantized.iter()) {
164            assert!((orig - deq).abs() < 0.1);
165        }
166    }
167
168    #[test]
169    fn test_binary_quantization() {
170        let vector = vec![1.0, 5.0, 2.0, 8.0, 3.0];
171        let quantized = binary_quantize(&vector);
172
173        match quantized {
174            QuantizedVector::Binary { data, .. } => {
175                assert!(!data.is_empty());
176            }
177            _ => panic!("Expected binary quantization"),
178        }
179    }
180
181    #[test]
182    fn test_compression_ratio() {
183        let ratio = calculate_compression_ratio(384, QuantizationType::Scalar);
184        assert!(ratio > 3.0); // Should be close to 4x
185
186        let ratio = calculate_compression_ratio(384, QuantizationType::Binary);
187        assert!(ratio > 20.0); // Should be close to 32x
188    }
189}