semantic_memory/
quantize.rs1use crate::error::MemoryError;
10
11#[derive(Debug, Clone)]
13pub struct QuantizedVector {
14 pub data: Vec<i8>,
16 pub scale: f32,
18 pub zero_point: i8,
20}
21
22#[derive(Debug, Clone)]
24pub struct Quantizer {
25 dimensions: usize,
26}
27
28impl Quantizer {
29 pub fn new(dimensions: usize) -> Self {
31 Self { dimensions }
32 }
33
34 pub fn dimensions(&self) -> usize {
36 self.dimensions
37 }
38
39 pub fn quantize(&self, vector: &[f32]) -> Result<QuantizedVector, MemoryError> {
44 if vector.len() != self.dimensions {
45 return Err(MemoryError::QuantizationError(format!(
46 "expected {} dimensions, got {}",
47 self.dimensions,
48 vector.len()
49 )));
50 }
51
52 let min = vector.iter().copied().fold(f32::INFINITY, f32::min);
53 let max = vector.iter().copied().fold(f32::NEG_INFINITY, f32::max);
54
55 if (max - min).abs() < f32::EPSILON {
57 return Ok(QuantizedVector {
58 data: vec![0i8; self.dimensions],
59 scale: 1.0,
60 zero_point: 0,
61 });
62 }
63
64 let scale = (max - min) / 255.0;
65 if !scale.is_finite() || scale <= 0.0 {
66 return Err(MemoryError::QuantizationError(
67 "computed non-finite quantization scale".into(),
68 ));
69 }
70 let zero_point_f = -128.0 - (min / scale);
71 let zero_point = zero_point_f.round().clamp(-128.0, 127.0) as i8;
72
73 let data: Vec<i8> = vector
74 .iter()
75 .map(|&v| {
76 let q = (v / scale + zero_point as f32).round();
77 q.clamp(-128.0, 127.0) as i8
78 })
79 .collect();
80
81 Ok(QuantizedVector {
82 data,
83 scale,
84 zero_point,
85 })
86 }
87
88 pub fn dequantize(&self, qv: &QuantizedVector) -> Vec<f32> {
90 qv.data
91 .iter()
92 .map(|&q| (q as f32 - qv.zero_point as f32) * qv.scale)
93 .collect()
94 }
95}
96
97pub fn pack_quantized(qv: &QuantizedVector) -> Vec<u8> {
102 let mut buf = Vec::with_capacity(5 + qv.data.len());
103 buf.extend_from_slice(&qv.scale.to_le_bytes());
104 buf.push(qv.zero_point.to_ne_bytes()[0]);
105 buf.extend(qv.data.iter().map(|value| value.to_ne_bytes()[0]));
106 buf
107}
108
109pub fn unpack_quantized(bytes: &[u8], dimensions: usize) -> Result<QuantizedVector, MemoryError> {
111 let expected_len = 5 + dimensions;
112 if bytes.len() != expected_len {
113 return Err(MemoryError::QuantizationError(format!(
114 "expected {} bytes for {} dimensions, got {}",
115 expected_len,
116 dimensions,
117 bytes.len()
118 )));
119 }
120 let scale_bytes: [u8; 4] = bytes[0..4]
121 .try_into()
122 .map_err(|e| MemoryError::QuantizationError(format!("invalid scale bytes: {e}")))?;
123 let scale = f32::from_le_bytes(scale_bytes);
124 let zero_point = i8::from_ne_bytes([bytes[4]]);
125 let data: Vec<i8> = bytes[5..].iter().map(|&b| i8::from_ne_bytes([b])).collect();
126 Ok(QuantizedVector {
127 data,
128 scale,
129 zero_point,
130 })
131}