semantic_memory/
quantize.rs1use crate::error::MemoryError;
10
11#[derive(Debug, Clone)]
13pub struct QuantizedVector {
14 pub data: Vec<i8>,
16 pub scale: f32,
18 pub zero_point: i8,
20}
21
22#[derive(Debug, Clone)]
24pub struct Quantizer {
25 dimensions: usize,
26}
27
28impl Quantizer {
29 pub fn new(dimensions: usize) -> Self {
31 Self { dimensions }
32 }
33
34 pub fn dimensions(&self) -> usize {
36 self.dimensions
37 }
38
39 pub fn quantize(&self, vector: &[f32]) -> Result<QuantizedVector, MemoryError> {
44 if vector.len() != self.dimensions {
45 return Err(MemoryError::QuantizationError(format!(
46 "expected {} dimensions, got {}",
47 self.dimensions,
48 vector.len()
49 )));
50 }
51
52 let min = vector.iter().copied().fold(f32::INFINITY, f32::min);
53 let max = vector.iter().copied().fold(f32::NEG_INFINITY, f32::max);
54
55 if (max - min).abs() < f32::EPSILON {
57 return Ok(QuantizedVector {
58 data: vec![0i8; self.dimensions],
59 scale: 1.0,
60 zero_point: 0,
61 });
62 }
63
64 let scale = (max - min) / 254.0;
66 let zero_point_f = -127.0 - (min / scale);
67 let zero_point = zero_point_f.round().clamp(-127.0, 127.0) as i8;
68
69 let data: Vec<i8> = vector
70 .iter()
71 .map(|&v| {
72 let q = (v / scale + zero_point as f32).round();
73 q.clamp(-127.0, 127.0) as i8
74 })
75 .collect();
76
77 Ok(QuantizedVector {
78 data,
79 scale,
80 zero_point,
81 })
82 }
83
84 pub fn dequantize(&self, qv: &QuantizedVector) -> Vec<f32> {
86 qv.data
87 .iter()
88 .map(|&q| (q as f32 - qv.zero_point as f32) * qv.scale)
89 .collect()
90 }
91}
92
93pub fn pack_quantized(qv: &QuantizedVector) -> Vec<u8> {
98 let mut buf = Vec::with_capacity(5 + qv.data.len());
99 buf.extend_from_slice(&qv.scale.to_le_bytes());
100 buf.push(qv.zero_point as u8);
101 let data_bytes: &[u8] = bytemuck::cast_slice(&qv.data);
103 buf.extend_from_slice(data_bytes);
104 buf
105}
106
107pub fn unpack_quantized(bytes: &[u8], dimensions: usize) -> Result<QuantizedVector, MemoryError> {
109 let expected_len = 5 + dimensions;
110 if bytes.len() != expected_len {
111 return Err(MemoryError::QuantizationError(format!(
112 "expected {} bytes for {} dimensions, got {}",
113 expected_len, dimensions, bytes.len()
114 )));
115 }
116 let scale = f32::from_le_bytes(bytes[0..4].try_into().unwrap());
117 let zero_point = bytes[4] as i8;
118 let data: Vec<i8> = bytes[5..].iter().map(|&b| b as i8).collect();
119 Ok(QuantizedVector {
120 data,
121 scale,
122 zero_point,
123 })
124}