1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
//! Scalar quantization (SQ8) for f32 → i8 vector compression.
//!
//! Per-vector affine quantization: each vector gets its own scale and zero_point,
//! computed from its min/max values. This gives 4x memory reduction with <0.5% cosine
//! similarity error on normalized embedding vectors.
//!
//! This module is independent of the HNSW backend and can be used with brute-force too.
use crate::error::MemoryError;
/// Scalar quantization parameters for a single vector.
#[derive(Debug, Clone)]
pub struct QuantizedVector {
/// Quantized int8 values.
pub data: Vec<i8>,
/// Scale factor: `original[i] ≈ (data[i] - zero_point) * scale`
pub scale: f32,
/// Quantization zero point, always in the i8 range [-128, 127].
pub zero_point: i8,
}
/// Quantizer that converts f32 vectors to int8 with per-vector calibration.
#[derive(Debug, Clone)]
pub struct Quantizer {
dimensions: usize,
}
impl Quantizer {
/// Create a new quantizer for vectors of the given dimensionality.
pub fn new(dimensions: usize) -> Self {
Self { dimensions }
}
/// The configured dimensionality.
pub fn dimensions(&self) -> usize {
self.dimensions
}
/// Quantize a single f32 vector to int8 with per-vector affine calibration.
///
/// Affine quantization maps to the full i8 range [-128, 127] (256 discrete levels).
/// Each vector gets its own scale/zero_point derived from its min/max values.
pub fn quantize(&self, vector: &[f32]) -> Result<QuantizedVector, MemoryError> {
if vector.len() != self.dimensions {
return Err(MemoryError::QuantizationError(format!(
"expected {} dimensions, got {}",
self.dimensions,
vector.len()
)));
}
let min = vector.iter().copied().fold(f32::INFINITY, f32::min);
let max = vector.iter().copied().fold(f32::NEG_INFINITY, f32::max);
// Handle edge case: constant vector (all dimensions same value)
if (max - min).abs() < f32::EPSILON {
return Ok(QuantizedVector {
data: vec![0i8; self.dimensions],
scale: 1.0,
zero_point: 0,
});
}
let scale = (max - min) / 255.0;
if !scale.is_finite() || scale <= 0.0 {
return Err(MemoryError::QuantizationError(
"computed non-finite quantization scale".into(),
));
}
let zero_point_f = -128.0 - (min / scale);
let zero_point = zero_point_f.round().clamp(-128.0, 127.0) as i8;
let data: Vec<i8> = vector
.iter()
.map(|&v| {
let q = (v / scale + zero_point as f32).round();
q.clamp(-128.0, 127.0) as i8
})
.collect();
Ok(QuantizedVector {
data,
scale,
zero_point,
})
}
/// Dequantize back to f32 (approximate reconstruction).
pub fn dequantize(&self, qv: &QuantizedVector) -> Vec<f32> {
qv.data
.iter()
.map(|&q| (q as f32 - qv.zero_point as f32) * qv.scale)
.collect()
}
}
/// Pack a QuantizedVector into bytes for SQLite storage.
///
/// Format: `[scale: f32 LE][zero_point: i8][data: i8 × dims]`
/// Total bytes: `4 + 1 + dims`
pub fn pack_quantized(qv: &QuantizedVector) -> Vec<u8> {
let mut buf = Vec::with_capacity(5 + qv.data.len());
buf.extend_from_slice(&qv.scale.to_le_bytes());
buf.push(qv.zero_point.to_ne_bytes()[0]);
buf.extend(qv.data.iter().map(|value| value.to_ne_bytes()[0]));
buf
}
/// Unpack bytes from SQLite into a QuantizedVector.
pub fn unpack_quantized(bytes: &[u8], dimensions: usize) -> Result<QuantizedVector, MemoryError> {
let expected_len = 5 + dimensions;
if bytes.len() != expected_len {
return Err(MemoryError::QuantizationError(format!(
"expected {} bytes for {} dimensions, got {}",
expected_len,
dimensions,
bytes.len()
)));
}
let scale_bytes: [u8; 4] = bytes[0..4]
.try_into()
.map_err(|e| MemoryError::QuantizationError(format!("invalid scale bytes: {e}")))?;
let scale = f32::from_le_bytes(scale_bytes);
let zero_point = i8::from_ne_bytes([bytes[4]]);
let data: Vec<i8> = bytes[5..].iter().map(|&b| i8::from_ne_bytes([b])).collect();
Ok(QuantizedVector {
data,
scale,
zero_point,
})
}