velesdb_core/
quantization.rs1use serde::{Deserialize, Serialize};
15use std::io;
16
17#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
19#[serde(rename_all = "lowercase")]
20pub enum StorageMode {
21 #[default]
23 Full,
24 SQ8,
26 Binary,
29}
30
31#[derive(Debug, Clone)]
48pub struct BinaryQuantizedVector {
49 pub data: Vec<u8>,
51 dimension: usize,
53}
54
55impl BinaryQuantizedVector {
56 #[must_use]
68 pub fn from_f32(vector: &[f32]) -> Self {
69 assert!(!vector.is_empty(), "Cannot quantize empty vector");
70
71 let dimension = vector.len();
72 let num_bytes = dimension.div_ceil(8);
74 let mut data = vec![0u8; num_bytes];
75
76 for (i, &value) in vector.iter().enumerate() {
77 if value >= 0.0 {
78 let byte_idx = i / 8;
80 let bit_idx = i % 8;
81 data[byte_idx] |= 1 << bit_idx;
82 }
83 }
84
85 Self { data, dimension }
86 }
87
88 #[must_use]
90 pub fn dimension(&self) -> usize {
91 self.dimension
92 }
93
94 #[must_use]
96 pub fn memory_size(&self) -> usize {
97 self.data.len()
98 }
99
100 #[must_use]
104 pub fn get_bits(&self) -> Vec<bool> {
105 (0..self.dimension)
106 .map(|i| {
107 let byte_idx = i / 8;
108 let bit_idx = i % 8;
109 (self.data[byte_idx] >> bit_idx) & 1 == 1
110 })
111 .collect()
112 }
113
114 #[must_use]
123 pub fn hamming_distance(&self, other: &Self) -> u32 {
124 debug_assert_eq!(
125 self.dimension, other.dimension,
126 "Dimension mismatch in hamming_distance"
127 );
128
129 self.data
131 .iter()
132 .zip(other.data.iter())
133 .map(|(&a, &b)| (a ^ b).count_ones())
134 .sum()
135 }
136
137 #[must_use]
141 #[allow(clippy::cast_precision_loss)]
142 pub fn hamming_similarity(&self, other: &Self) -> f32 {
143 let distance = self.hamming_distance(other);
144 1.0 - (distance as f32 / self.dimension as f32)
145 }
146
147 #[must_use]
149 pub fn to_bytes(&self) -> Vec<u8> {
150 let mut bytes = Vec::with_capacity(4 + self.data.len());
151 #[allow(clippy::cast_possible_truncation)]
153 bytes.extend_from_slice(&(self.dimension as u32).to_le_bytes());
154 bytes.extend_from_slice(&self.data);
155 bytes
156 }
157
158 pub fn from_bytes(bytes: &[u8]) -> io::Result<Self> {
164 if bytes.len() < 4 {
165 return Err(io::Error::new(
166 io::ErrorKind::InvalidData,
167 "Not enough bytes for BinaryQuantizedVector header",
168 ));
169 }
170
171 let dimension = u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]) as usize;
172 let expected_data_len = dimension.div_ceil(8);
173
174 if bytes.len() < 4 + expected_data_len {
175 return Err(io::Error::new(
176 io::ErrorKind::InvalidData,
177 format!(
178 "Not enough bytes for BinaryQuantizedVector data: expected {}, got {}",
179 4 + expected_data_len,
180 bytes.len()
181 ),
182 ));
183 }
184
185 let data = bytes[4..4 + expected_data_len].to_vec();
186
187 Ok(Self { data, dimension })
188 }
189}
190
191#[derive(Debug, Clone)]
196pub struct QuantizedVector {
197 pub data: Vec<u8>,
199 pub min: f32,
201 pub max: f32,
203}
204
205impl QuantizedVector {
206 #[must_use]
216 pub fn from_f32(vector: &[f32]) -> Self {
217 assert!(!vector.is_empty(), "Cannot quantize empty vector");
218
219 let min = vector.iter().copied().fold(f32::INFINITY, f32::min);
220 let max = vector.iter().copied().fold(f32::NEG_INFINITY, f32::max);
221
222 let range = max - min;
223 let data = if range < f32::EPSILON {
224 vec![128u8; vector.len()]
226 } else {
227 let scale = 255.0 / range;
228 #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
229 vector
230 .iter()
231 .map(|&v| {
232 let normalized = (v - min) * scale;
233 normalized.round().clamp(0.0, 255.0) as u8
236 })
237 .collect()
238 };
239
240 Self { data, min, max }
241 }
242
243 #[must_use]
247 pub fn to_f32(&self) -> Vec<f32> {
248 let range = self.max - self.min;
249 if range < f32::EPSILON {
250 vec![self.min; self.data.len()]
252 } else {
253 let scale = range / 255.0;
254 self.data
255 .iter()
256 .map(|&v| f32::from(v) * scale + self.min)
257 .collect()
258 }
259 }
260
261 #[must_use]
263 pub fn dimension(&self) -> usize {
264 self.data.len()
265 }
266
267 #[must_use]
269 pub fn memory_size(&self) -> usize {
270 self.data.len() + 8 }
272
273 #[must_use]
275 pub fn to_bytes(&self) -> Vec<u8> {
276 let mut bytes = Vec::with_capacity(8 + self.data.len());
277 bytes.extend_from_slice(&self.min.to_le_bytes());
278 bytes.extend_from_slice(&self.max.to_le_bytes());
279 bytes.extend_from_slice(&self.data);
280 bytes
281 }
282
283 pub fn from_bytes(bytes: &[u8]) -> io::Result<Self> {
289 if bytes.len() < 8 {
290 return Err(io::Error::new(
291 io::ErrorKind::InvalidData,
292 "Not enough bytes for QuantizedVector header",
293 ));
294 }
295
296 let min = f32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]);
297 let max = f32::from_le_bytes([bytes[4], bytes[5], bytes[6], bytes[7]]);
298 let data = bytes[8..].to_vec();
299
300 Ok(Self { data, min, max })
301 }
302}
303
304#[must_use]
308pub fn dot_product_quantized(query: &[f32], quantized: &QuantizedVector) -> f32 {
309 debug_assert_eq!(
310 query.len(),
311 quantized.data.len(),
312 "Dimension mismatch in dot_product_quantized"
313 );
314
315 let range = quantized.max - quantized.min;
316 if range < f32::EPSILON {
317 let value = quantized.min;
319 return query.iter().sum::<f32>() * value;
320 }
321
322 let scale = range / 255.0;
323 let offset = quantized.min;
324
325 query
327 .iter()
328 .zip(quantized.data.iter())
329 .map(|(&q, &v)| q * (f32::from(v) * scale + offset))
330 .sum()
331}
332
333#[must_use]
335pub fn euclidean_squared_quantized(query: &[f32], quantized: &QuantizedVector) -> f32 {
336 debug_assert_eq!(
337 query.len(),
338 quantized.data.len(),
339 "Dimension mismatch in euclidean_squared_quantized"
340 );
341
342 let range = quantized.max - quantized.min;
343 if range < f32::EPSILON {
344 let value = quantized.min;
346 return query.iter().map(|&q| (q - value).powi(2)).sum();
347 }
348
349 let scale = range / 255.0;
350 let offset = quantized.min;
351
352 query
353 .iter()
354 .zip(quantized.data.iter())
355 .map(|(&q, &v)| {
356 let dequantized = f32::from(v) * scale + offset;
357 (q - dequantized).powi(2)
358 })
359 .sum()
360}
361
362#[must_use]
366pub fn cosine_similarity_quantized(query: &[f32], quantized: &QuantizedVector) -> f32 {
367 let dot = dot_product_quantized(query, quantized);
368
369 let query_norm: f32 = query.iter().map(|&x| x * x).sum::<f32>().sqrt();
371
372 let reconstructed = quantized.to_f32();
374 let quantized_norm: f32 = reconstructed.iter().map(|&x| x * x).sum::<f32>().sqrt();
375
376 if query_norm < f32::EPSILON || quantized_norm < f32::EPSILON {
377 return 0.0;
378 }
379
380 dot / (query_norm * quantized_norm)
381}
382
383#[cfg(all(target_arch = "x86_64", target_feature = "avx2"))]
388#[allow(unused_imports)]
389use std::arch::x86_64::*;
390
391#[must_use]
396pub fn dot_product_quantized_simd(query: &[f32], quantized: &QuantizedVector) -> f32 {
397 debug_assert_eq!(
398 query.len(),
399 quantized.data.len(),
400 "Dimension mismatch in dot_product_quantized_simd"
401 );
402
403 let range = quantized.max - quantized.min;
404 if range < f32::EPSILON {
405 let value = quantized.min;
406 return query.iter().sum::<f32>() * value;
407 }
408
409 let scale = range / 255.0;
410 let offset = quantized.min;
411
412 #[cfg(all(target_arch = "x86_64", target_feature = "avx2"))]
413 {
414 simd_dot_product_avx2(query, &quantized.data, scale, offset)
415 }
416
417 #[cfg(not(all(target_arch = "x86_64", target_feature = "avx2")))]
418 {
419 query
421 .iter()
422 .zip(quantized.data.iter())
423 .map(|(&q, &v)| q * (f32::from(v) * scale + offset))
424 .sum()
425 }
426}
427
428#[cfg(all(target_arch = "x86_64", target_feature = "avx2"))]
429#[inline]
430fn simd_dot_product_avx2(query: &[f32], data: &[u8], scale: f32, offset: f32) -> f32 {
431 let len = query.len();
432 let simd_len = len / 8;
433 let remainder = len % 8;
434
435 let mut sum = 0.0f32;
436
437 for i in 0..simd_len {
439 let base = i * 8;
440 for j in 0..8 {
442 let dequant = f32::from(data[base + j]) * scale + offset;
443 sum += query[base + j] * dequant;
444 }
445 }
446
447 let base = simd_len * 8;
449 for i in 0..remainder {
450 let dequant = f32::from(data[base + i]) * scale + offset;
451 sum += query[base + i] * dequant;
452 }
453
454 sum
455}
456
457#[must_use]
459pub fn euclidean_squared_quantized_simd(query: &[f32], quantized: &QuantizedVector) -> f32 {
460 debug_assert_eq!(
461 query.len(),
462 quantized.data.len(),
463 "Dimension mismatch in euclidean_squared_quantized_simd"
464 );
465
466 let range = quantized.max - quantized.min;
467 if range < f32::EPSILON {
468 let value = quantized.min;
469 return query.iter().map(|&q| (q - value).powi(2)).sum();
470 }
471
472 let scale = range / 255.0;
473 let offset = quantized.min;
474
475 let len = query.len();
477 let chunks = len / 4;
478 let remainder = len % 4;
479 let mut sum = 0.0f32;
480
481 for i in 0..chunks {
482 let base = i * 4;
483 let d0 = f32::from(quantized.data[base]) * scale + offset;
484 let d1 = f32::from(quantized.data[base + 1]) * scale + offset;
485 let d2 = f32::from(quantized.data[base + 2]) * scale + offset;
486 let d3 = f32::from(quantized.data[base + 3]) * scale + offset;
487
488 let diff0 = query[base] - d0;
489 let diff1 = query[base + 1] - d1;
490 let diff2 = query[base + 2] - d2;
491 let diff3 = query[base + 3] - d3;
492
493 sum += diff0 * diff0 + diff1 * diff1 + diff2 * diff2 + diff3 * diff3;
494 }
495
496 let base = chunks * 4;
497 for i in 0..remainder {
498 let dequant = f32::from(quantized.data[base + i]) * scale + offset;
499 let diff = query[base + i] - dequant;
500 sum += diff * diff;
501 }
502
503 sum
504}
505
506#[must_use]
510pub fn cosine_similarity_quantized_simd(query: &[f32], quantized: &QuantizedVector) -> f32 {
511 let dot = dot_product_quantized_simd(query, quantized);
512
513 let query_norm_sq: f32 = query.iter().map(|&x| x * x).sum();
515
516 let range = quantized.max - quantized.min;
518 let scale = if range < f32::EPSILON {
519 0.0
520 } else {
521 range / 255.0
522 };
523 let offset = quantized.min;
524
525 let quantized_norm_sq: f32 = quantized
526 .data
527 .iter()
528 .map(|&v| {
529 let dequant = f32::from(v) * scale + offset;
530 dequant * dequant
531 })
532 .sum();
533
534 let denom = (query_norm_sq * quantized_norm_sq).sqrt();
535 if denom < f32::EPSILON {
536 return 0.0;
537 }
538
539 dot / denom
540}