lnmp_quant/
vector.rs

1use crate::scheme::QuantScheme;
2use serde::{Deserialize, Serialize};
3
4/// Quantized embedding vector with compression parameters
5#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
6pub struct QuantizedVector {
7    /// Vector dimension (number of elements)
8    pub dim: u32,
9
10    /// Quantization scheme used
11    pub scheme: QuantScheme,
12
13    /// Scaling factor for dequantization
14    pub scale: f32,
15
16    /// Zero-point offset for optimal range utilization
17    pub zero_point: i8,
18
19    /// Minimum value in the original data (for reconstruction)
20    pub min_val: f32,
21
22    /// Packed quantized data (actual byte representation)
23    pub data: Vec<u8>,
24}
25
26impl QuantizedVector {
27    /// Creates a new quantized vector
28    pub fn new(
29        dim: u32,
30        scheme: QuantScheme,
31        scale: f32,
32        zero_point: i8,
33        min_val: f32,
34        data: Vec<u8>,
35    ) -> Self {
36        Self {
37            dim,
38            scheme,
39            scale,
40            zero_point,
41            min_val,
42            data,
43        }
44    }
45
46    /// Returns the size in bytes (excluding metadata)
47    pub fn data_size(&self) -> usize {
48        self.data.len()
49    }
50
51    /// Returns the compression ratio compared to F32 representation
52    pub fn compression_ratio(&self) -> f32 {
53        let original_size = self.dim as f32 * 4.0; // F32 = 4 bytes
54        let compressed_size = self.data.len() as f32;
55        original_size / compressed_size
56    }
57}
58
59#[cfg(test)]
60mod tests {
61    use super::*;
62
63    #[test]
64    fn test_quantized_vector_creation() {
65        let qv = QuantizedVector::new(128, QuantScheme::QInt8, 0.01, 0, 0.0, vec![0u8; 128]);
66
67        assert_eq!(qv.dim, 128);
68        assert_eq!(qv.scheme, QuantScheme::QInt8);
69        assert_eq!(qv.scale, 0.01);
70        assert_eq!(qv.zero_point, 0);
71        assert_eq!(qv.min_val, 0.0);
72        assert_eq!(qv.data_size(), 128);
73    }
74
75    #[test]
76    fn test_compression_ratio() {
77        let qv = QuantizedVector::new(512, QuantScheme::QInt8, 0.01, 0, 0.0, vec![0u8; 512]);
78
79        // F32: 512 * 4 = 2048 bytes
80        // QInt8: 512 * 1 = 512 bytes
81        // Ratio: 2048 / 512 = 4.0
82        assert_eq!(qv.compression_ratio(), 4.0);
83    }
84}