velesdb_core/quantization/
scalar.rs1use std::io;
8
9use super::codec_helpers::{serialize_with_header, validate_and_split_header};
10use super::QuantizationCodec;
11
12#[derive(Debug, Clone)]
17pub struct QuantizedVector {
18 pub data: Vec<u8>,
20 pub min: f32,
22 pub max: f32,
24}
25
26impl QuantizedVector {
27 #[must_use]
33 pub fn from_f32(vector: &[f32]) -> Self {
34 debug_assert!(!vector.is_empty(), "Cannot quantize empty vector");
36
37 let min = vector.iter().copied().fold(f32::INFINITY, f32::min);
38 let max = vector.iter().copied().fold(f32::NEG_INFINITY, f32::max);
39
40 let range = max - min;
41 let data = if range < f32::EPSILON {
42 vec![128u8; vector.len()]
44 } else {
45 let scale = 255.0 / range;
46 #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
50 vector
51 .iter()
52 .map(|&v| {
53 let normalized = (v - min) * scale;
54 normalized.round().clamp(0.0, 255.0) as u8
55 })
56 .collect()
57 };
58
59 Self { data, min, max }
60 }
61
62 #[must_use]
66 pub fn to_f32(&self) -> Vec<f32> {
67 let range = self.max - self.min;
68 if range < f32::EPSILON {
69 vec![self.min; self.data.len()]
71 } else {
72 let scale = range / 255.0;
73 self.data
74 .iter()
75 .map(|&v| f32::from(v) * scale + self.min)
76 .collect()
77 }
78 }
79
80 #[must_use]
82 pub fn dimension(&self) -> usize {
83 self.data.len()
84 }
85
86 #[must_use]
88 pub fn memory_size(&self) -> usize {
89 self.data.len() + 8 }
91}
92
93const SQ8_HEADER_SIZE: usize = 8;
95
96impl QuantizationCodec for QuantizedVector {
97 fn to_bytes(&self) -> Vec<u8> {
98 let mut header = [0u8; SQ8_HEADER_SIZE];
99 header[..4].copy_from_slice(&self.min.to_le_bytes());
100 header[4..].copy_from_slice(&self.max.to_le_bytes());
101 serialize_with_header(&header, &self.data)
102 }
103
104 fn from_bytes(bytes: &[u8]) -> io::Result<Self> {
105 let (header, payload) =
106 validate_and_split_header(bytes, SQ8_HEADER_SIZE, "QuantizedVector")?;
107
108 let min = f32::from_le_bytes([header[0], header[1], header[2], header[3]]);
109 let max = f32::from_le_bytes([header[4], header[5], header[6], header[7]]);
110 let data = payload.to_vec();
111
112 Ok(Self { data, min, max })
113 }
114}
115
116struct DequantParams {
125 scale: f32,
126 offset: f32,
127}
128
129fn dequant_params(quantized: &QuantizedVector) -> Option<DequantParams> {
134 let range = quantized.max - quantized.min;
135 if range < f32::EPSILON {
136 return None;
137 }
138 Some(DequantParams {
139 scale: range / 255.0,
140 offset: quantized.min,
141 })
142}
143
144#[must_use]
152pub fn dot_product_quantized(query: &[f32], quantized: &QuantizedVector) -> f32 {
153 debug_assert_eq!(
154 query.len(),
155 quantized.data.len(),
156 "Dimension mismatch in dot_product_quantized"
157 );
158
159 let Some(params) = dequant_params(quantized) else {
160 return query.iter().sum::<f32>() * quantized.min;
162 };
163
164 query
166 .iter()
167 .zip(quantized.data.iter())
168 .map(|(&q, &v)| q * (f32::from(v) * params.scale + params.offset))
169 .sum()
170}
171
172#[must_use]
174pub fn euclidean_squared_quantized(query: &[f32], quantized: &QuantizedVector) -> f32 {
175 debug_assert_eq!(
176 query.len(),
177 quantized.data.len(),
178 "Dimension mismatch in euclidean_squared_quantized"
179 );
180
181 let Some(params) = dequant_params(quantized) else {
182 let value = quantized.min;
183 return query.iter().map(|&q| (q - value).powi(2)).sum();
184 };
185
186 query
187 .iter()
188 .zip(quantized.data.iter())
189 .map(|(&q, &v)| {
190 let dequantized = f32::from(v) * params.scale + params.offset;
191 (q - dequantized).powi(2)
192 })
193 .sum()
194}
195
196#[must_use]
204pub fn cosine_similarity_quantized(query: &[f32], quantized: &QuantizedVector) -> f32 {
205 cosine_from_dot(dot_product_quantized(query, quantized), query, quantized)
206}
207
208fn cosine_from_dot(dot: f32, query: &[f32], quantized: &QuantizedVector) -> f32 {
214 use crate::simd_native;
215
216 let query_norm = simd_native::norm_native(query);
217
218 let quantized_norm = quantized_vector_norm(quantized);
220
221 if query_norm < f32::EPSILON || quantized_norm < f32::EPSILON {
222 return 0.0;
223 }
224
225 dot / (query_norm * quantized_norm)
226}
227
228#[inline]
233fn quantized_vector_norm(quantized: &QuantizedVector) -> f32 {
234 let Some(params) = dequant_params(quantized) else {
235 let value = quantized.min;
236 #[allow(clippy::cast_precision_loss)]
237 return value.abs() * (quantized.data.len() as f32).sqrt();
238 };
239
240 let len = quantized.data.len();
241 let chunks = len / 4;
242 let remainder = len % 4;
243
244 let mut sum0: f32 = 0.0;
245 let mut sum1: f32 = 0.0;
246 let mut sum2: f32 = 0.0;
247 let mut sum3: f32 = 0.0;
248
249 for i in 0..chunks {
250 let base = i * 4;
251 let d0 = f32::from(quantized.data[base]) * params.scale + params.offset;
252 let d1 = f32::from(quantized.data[base + 1]) * params.scale + params.offset;
253 let d2 = f32::from(quantized.data[base + 2]) * params.scale + params.offset;
254 let d3 = f32::from(quantized.data[base + 3]) * params.scale + params.offset;
255 sum0 += d0 * d0;
256 sum1 += d1 * d1;
257 sum2 += d2 * d2;
258 sum3 += d3 * d3;
259 }
260
261 let base = chunks * 4;
262 for i in 0..remainder {
263 let d = f32::from(quantized.data[base + i]) * params.scale + params.offset;
264 sum0 += d * d;
265 }
266
267 (sum0 + sum1 + sum2 + sum3).sqrt()
268}
269
270#[must_use]
282pub fn dot_product_quantized_simd(query: &[f32], quantized: &QuantizedVector) -> f32 {
283 debug_assert_eq!(
284 query.len(),
285 quantized.data.len(),
286 "Dimension mismatch in dot_product_quantized_simd"
287 );
288
289 let Some(params) = dequant_params(quantized) else {
290 return query.iter().sum::<f32>() * quantized.min;
291 };
292
293 dot_product_dequant_unrolled_8(query, &quantized.data, params.scale, params.offset)
294}
295
296#[inline]
298fn dot_product_dequant_unrolled_8(query: &[f32], data: &[u8], scale: f32, offset: f32) -> f32 {
299 let len = query.len();
300 let chunks = len / 8;
301 let remainder = len % 8;
302
303 let mut sum = 0.0f32;
304
305 for i in 0..chunks {
306 let base = i * 8;
307 for j in 0..8 {
308 let dequant = f32::from(data[base + j]) * scale + offset;
309 sum += query[base + j] * dequant;
310 }
311 }
312
313 let base = chunks * 8;
314 for i in 0..remainder {
315 let dequant = f32::from(data[base + i]) * scale + offset;
316 sum += query[base + i] * dequant;
317 }
318
319 sum
320}
321
322#[must_use]
324pub fn euclidean_squared_quantized_simd(query: &[f32], quantized: &QuantizedVector) -> f32 {
325 debug_assert_eq!(
326 query.len(),
327 quantized.data.len(),
328 "Dimension mismatch in euclidean_squared_quantized_simd"
329 );
330
331 let Some(params) = dequant_params(quantized) else {
332 let value = quantized.min;
333 return query.iter().map(|&q| (q - value).powi(2)).sum();
334 };
335
336 let len = query.len();
338 let chunks = len / 4;
339 let remainder = len % 4;
340 let mut sum = 0.0f32;
341
342 for i in 0..chunks {
343 let base = i * 4;
344 let d0 = f32::from(quantized.data[base]) * params.scale + params.offset;
345 let d1 = f32::from(quantized.data[base + 1]) * params.scale + params.offset;
346 let d2 = f32::from(quantized.data[base + 2]) * params.scale + params.offset;
347 let d3 = f32::from(quantized.data[base + 3]) * params.scale + params.offset;
348
349 let diff0 = query[base] - d0;
350 let diff1 = query[base + 1] - d1;
351 let diff2 = query[base + 2] - d2;
352 let diff3 = query[base + 3] - d3;
353
354 sum += diff0 * diff0 + diff1 * diff1 + diff2 * diff2 + diff3 * diff3;
355 }
356
357 let base = chunks * 4;
358 for i in 0..remainder {
359 let dequant = f32::from(quantized.data[base + i]) * params.scale + params.offset;
360 let diff = query[base + i] - dequant;
361 sum += diff * diff;
362 }
363
364 sum
365}
366
367#[must_use]
372pub fn cosine_similarity_quantized_simd(query: &[f32], quantized: &QuantizedVector) -> f32 {
373 cosine_from_dot(
374 dot_product_quantized_simd(query, quantized),
375 query,
376 quantized,
377 )
378}