velesdb_core/
quantization.rs

1//! Scalar Quantization (SQ8) for memory-efficient vector storage.
2//!
3//! This module implements 8-bit scalar quantization to reduce memory usage by 4x
4//! while maintaining >95% recall accuracy.
5//!
6//! ## Benefits
7//!
8//! | Metric | f32 | SQ8 |
9//! |--------|-----|-----|
10//! | RAM/vector (768d) | 3 KB | 770 bytes |
11//! | Cache efficiency | Baseline | ~4x better |
12//! | Recall loss | 0% | ~0.5-1% |
13
14use serde::{Deserialize, Serialize};
15use std::io;
16
17/// Storage mode for vectors.
18#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
19#[serde(rename_all = "lowercase")]
20pub enum StorageMode {
21    /// Full precision f32 storage (default).
22    #[default]
23    Full,
24    /// 8-bit scalar quantization for 4x memory reduction.
25    SQ8,
26    /// 1-bit binary quantization for 32x memory reduction.
27    /// Best for edge/IoT devices with limited RAM.
28    Binary,
29}
30
31/// A binary quantized vector using 1-bit per dimension.
32///
33/// Each f32 value is converted to 1 bit: >= 0.0 becomes 1, < 0.0 becomes 0.
34/// This provides **32x memory reduction** compared to f32 storage.
35///
36/// # Memory Usage
37///
38/// | Dimension | f32 | Binary |
39/// |-----------|-----|--------|
40/// | 768 | 3072 bytes | 96 bytes |
41/// | 1536 | 6144 bytes | 192 bytes |
42///
43/// # Use with Rescoring
44///
45/// For best accuracy, use binary search for candidate selection,
46/// then rescore top candidates with full-precision vectors.
47#[derive(Debug, Clone)]
48pub struct BinaryQuantizedVector {
49    /// Binary data (1 bit per dimension, packed into bytes).
50    pub data: Vec<u8>,
51    /// Original dimension of the vector.
52    dimension: usize,
53}
54
55impl BinaryQuantizedVector {
56    /// Creates a new binary quantized vector from f32 data.
57    ///
58    /// Values >= 0.0 become 1, values < 0.0 become 0.
59    ///
60    /// # Arguments
61    ///
62    /// * `vector` - The original f32 vector to quantize
63    ///
64    /// # Panics
65    ///
66    /// Panics if the vector is empty.
67    #[must_use]
68    pub fn from_f32(vector: &[f32]) -> Self {
69        assert!(!vector.is_empty(), "Cannot quantize empty vector");
70
71        let dimension = vector.len();
72        // Calculate number of bytes needed: ceil(dimension / 8)
73        let num_bytes = dimension.div_ceil(8);
74        let mut data = vec![0u8; num_bytes];
75
76        for (i, &value) in vector.iter().enumerate() {
77            if value >= 0.0 {
78                // Set bit i in the packed byte array
79                let byte_idx = i / 8;
80                let bit_idx = i % 8;
81                data[byte_idx] |= 1 << bit_idx;
82            }
83        }
84
85        Self { data, dimension }
86    }
87
88    /// Returns the dimension of the original vector.
89    #[must_use]
90    pub fn dimension(&self) -> usize {
91        self.dimension
92    }
93
94    /// Returns the memory size in bytes.
95    #[must_use]
96    pub fn memory_size(&self) -> usize {
97        self.data.len()
98    }
99
100    /// Returns the individual bits as a boolean vector.
101    ///
102    /// Useful for debugging and testing.
103    #[must_use]
104    pub fn get_bits(&self) -> Vec<bool> {
105        (0..self.dimension)
106            .map(|i| {
107                let byte_idx = i / 8;
108                let bit_idx = i % 8;
109                (self.data[byte_idx] >> bit_idx) & 1 == 1
110            })
111            .collect()
112    }
113
114    /// Computes the Hamming distance to another binary vector.
115    ///
116    /// Hamming distance counts the number of bits that differ.
117    /// Uses POPCNT for fast bit counting.
118    ///
119    /// # Panics
120    ///
121    /// Panics if the vectors have different dimensions.
122    #[must_use]
123    pub fn hamming_distance(&self, other: &Self) -> u32 {
124        debug_assert_eq!(
125            self.dimension, other.dimension,
126            "Dimension mismatch in hamming_distance"
127        );
128
129        // XOR bytes and count differing bits using POPCNT
130        self.data
131            .iter()
132            .zip(other.data.iter())
133            .map(|(&a, &b)| (a ^ b).count_ones())
134            .sum()
135    }
136
137    /// Computes normalized Hamming similarity (0.0 to 1.0).
138    ///
139    /// Returns 1.0 for identical vectors, 0.0 for completely different.
140    #[must_use]
141    #[allow(clippy::cast_precision_loss)]
142    pub fn hamming_similarity(&self, other: &Self) -> f32 {
143        let distance = self.hamming_distance(other);
144        1.0 - (distance as f32 / self.dimension as f32)
145    }
146
147    /// Serializes the binary quantized vector to bytes.
148    #[must_use]
149    pub fn to_bytes(&self) -> Vec<u8> {
150        let mut bytes = Vec::with_capacity(4 + self.data.len());
151        // Store dimension as u32 (4 bytes)
152        #[allow(clippy::cast_possible_truncation)]
153        bytes.extend_from_slice(&(self.dimension as u32).to_le_bytes());
154        bytes.extend_from_slice(&self.data);
155        bytes
156    }
157
158    /// Deserializes a binary quantized vector from bytes.
159    ///
160    /// # Errors
161    ///
162    /// Returns an error if the bytes are invalid.
163    pub fn from_bytes(bytes: &[u8]) -> io::Result<Self> {
164        if bytes.len() < 4 {
165            return Err(io::Error::new(
166                io::ErrorKind::InvalidData,
167                "Not enough bytes for BinaryQuantizedVector header",
168            ));
169        }
170
171        let dimension = u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]) as usize;
172        let expected_data_len = dimension.div_ceil(8);
173
174        if bytes.len() < 4 + expected_data_len {
175            return Err(io::Error::new(
176                io::ErrorKind::InvalidData,
177                format!(
178                    "Not enough bytes for BinaryQuantizedVector data: expected {}, got {}",
179                    4 + expected_data_len,
180                    bytes.len()
181                ),
182            ));
183        }
184
185        let data = bytes[4..4 + expected_data_len].to_vec();
186
187        Ok(Self { data, dimension })
188    }
189}
190
191/// A quantized vector using 8-bit scalar quantization.
192///
193/// Each f32 value is mapped to a u8 (0-255) using min/max scaling.
194/// The original value can be reconstructed as: `value = (data[i] / 255.0) * (max - min) + min`
195#[derive(Debug, Clone)]
196pub struct QuantizedVector {
197    /// Quantized data (1 byte per dimension instead of 4).
198    pub data: Vec<u8>,
199    /// Minimum value in the original vector.
200    pub min: f32,
201    /// Maximum value in the original vector.
202    pub max: f32,
203}
204
205impl QuantizedVector {
206    /// Creates a new quantized vector from f32 data.
207    ///
208    /// # Arguments
209    ///
210    /// * `vector` - The original f32 vector to quantize
211    ///
212    /// # Panics
213    ///
214    /// Panics if the vector is empty.
215    #[must_use]
216    pub fn from_f32(vector: &[f32]) -> Self {
217        assert!(!vector.is_empty(), "Cannot quantize empty vector");
218
219        let min = vector.iter().copied().fold(f32::INFINITY, f32::min);
220        let max = vector.iter().copied().fold(f32::NEG_INFINITY, f32::max);
221
222        let range = max - min;
223        let data = if range < f32::EPSILON {
224            // All values are the same, map to 128 (middle of range)
225            vec![128u8; vector.len()]
226        } else {
227            let scale = 255.0 / range;
228            #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
229            vector
230                .iter()
231                .map(|&v| {
232                    let normalized = (v - min) * scale;
233                    // Clamp to [0, 255] to handle floating point errors
234                    // Safe: clamped to valid u8 range
235                    normalized.round().clamp(0.0, 255.0) as u8
236                })
237                .collect()
238        };
239
240        Self { data, min, max }
241    }
242
243    /// Reconstructs the original f32 vector from quantized data.
244    ///
245    /// Note: This is a lossy operation. The reconstructed values are approximations.
246    #[must_use]
247    pub fn to_f32(&self) -> Vec<f32> {
248        let range = self.max - self.min;
249        if range < f32::EPSILON {
250            // All values were the same
251            vec![self.min; self.data.len()]
252        } else {
253            let scale = range / 255.0;
254            self.data
255                .iter()
256                .map(|&v| f32::from(v) * scale + self.min)
257                .collect()
258        }
259    }
260
261    /// Returns the dimension of the vector.
262    #[must_use]
263    pub fn dimension(&self) -> usize {
264        self.data.len()
265    }
266
267    /// Returns the memory size in bytes.
268    #[must_use]
269    pub fn memory_size(&self) -> usize {
270        self.data.len() + 8 // data + min(4) + max(4)
271    }
272
273    /// Serializes the quantized vector to bytes.
274    #[must_use]
275    pub fn to_bytes(&self) -> Vec<u8> {
276        let mut bytes = Vec::with_capacity(8 + self.data.len());
277        bytes.extend_from_slice(&self.min.to_le_bytes());
278        bytes.extend_from_slice(&self.max.to_le_bytes());
279        bytes.extend_from_slice(&self.data);
280        bytes
281    }
282
283    /// Deserializes a quantized vector from bytes.
284    ///
285    /// # Errors
286    ///
287    /// Returns an error if the bytes are invalid.
288    pub fn from_bytes(bytes: &[u8]) -> io::Result<Self> {
289        if bytes.len() < 8 {
290            return Err(io::Error::new(
291                io::ErrorKind::InvalidData,
292                "Not enough bytes for QuantizedVector header",
293            ));
294        }
295
296        let min = f32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]);
297        let max = f32::from_le_bytes([bytes[4], bytes[5], bytes[6], bytes[7]]);
298        let data = bytes[8..].to_vec();
299
300        Ok(Self { data, min, max })
301    }
302}
303
304/// Computes the approximate dot product between a query vector (f32) and a quantized vector.
305///
306/// This avoids full dequantization for better performance.
307#[must_use]
308pub fn dot_product_quantized(query: &[f32], quantized: &QuantizedVector) -> f32 {
309    debug_assert_eq!(
310        query.len(),
311        quantized.data.len(),
312        "Dimension mismatch in dot_product_quantized"
313    );
314
315    let range = quantized.max - quantized.min;
316    if range < f32::EPSILON {
317        // All quantized values are the same
318        let value = quantized.min;
319        return query.iter().sum::<f32>() * value;
320    }
321
322    let scale = range / 255.0;
323    let offset = quantized.min;
324
325    // Compute dot product with on-the-fly dequantization
326    query
327        .iter()
328        .zip(quantized.data.iter())
329        .map(|(&q, &v)| q * (f32::from(v) * scale + offset))
330        .sum()
331}
332
333/// Computes the approximate squared Euclidean distance between a query (f32) and quantized vector.
334#[must_use]
335pub fn euclidean_squared_quantized(query: &[f32], quantized: &QuantizedVector) -> f32 {
336    debug_assert_eq!(
337        query.len(),
338        quantized.data.len(),
339        "Dimension mismatch in euclidean_squared_quantized"
340    );
341
342    let range = quantized.max - quantized.min;
343    if range < f32::EPSILON {
344        // All quantized values are the same
345        let value = quantized.min;
346        return query.iter().map(|&q| (q - value).powi(2)).sum();
347    }
348
349    let scale = range / 255.0;
350    let offset = quantized.min;
351
352    query
353        .iter()
354        .zip(quantized.data.iter())
355        .map(|(&q, &v)| {
356            let dequantized = f32::from(v) * scale + offset;
357            (q - dequantized).powi(2)
358        })
359        .sum()
360}
361
362/// Computes approximate cosine similarity between a query (f32) and quantized vector.
363///
364/// Note: For best accuracy, the query should be normalized.
365#[must_use]
366pub fn cosine_similarity_quantized(query: &[f32], quantized: &QuantizedVector) -> f32 {
367    let dot = dot_product_quantized(query, quantized);
368
369    // Compute norms
370    let query_norm: f32 = query.iter().map(|&x| x * x).sum::<f32>().sqrt();
371
372    // Dequantize to compute quantized vector norm (could be cached)
373    let reconstructed = quantized.to_f32();
374    let quantized_norm: f32 = reconstructed.iter().map(|&x| x * x).sum::<f32>().sqrt();
375
376    if query_norm < f32::EPSILON || quantized_norm < f32::EPSILON {
377        return 0.0;
378    }
379
380    dot / (query_norm * quantized_norm)
381}
382
383// =========================================================================
384// SIMD-optimized distance functions for SQ8 quantized vectors
385// =========================================================================
386
387#[cfg(all(target_arch = "x86_64", target_feature = "avx2"))]
388use std::arch::x86_64::*;
389
390/// SIMD-optimized dot product between f32 query and SQ8 quantized vector.
391///
392/// Uses AVX2 intrinsics on `x86_64` for ~2-3x speedup over scalar.
393/// Falls back to scalar on other architectures.
394#[must_use]
395pub fn dot_product_quantized_simd(query: &[f32], quantized: &QuantizedVector) -> f32 {
396    debug_assert_eq!(
397        query.len(),
398        quantized.data.len(),
399        "Dimension mismatch in dot_product_quantized_simd"
400    );
401
402    let range = quantized.max - quantized.min;
403    if range < f32::EPSILON {
404        let value = quantized.min;
405        return query.iter().sum::<f32>() * value;
406    }
407
408    let scale = range / 255.0;
409    let offset = quantized.min;
410
411    #[cfg(all(target_arch = "x86_64", target_feature = "avx2"))]
412    {
413        simd_dot_product_avx2(query, &quantized.data, scale, offset)
414    }
415
416    #[cfg(not(all(target_arch = "x86_64", target_feature = "avx2")))]
417    {
418        // Scalar fallback
419        query
420            .iter()
421            .zip(quantized.data.iter())
422            .map(|(&q, &v)| q * (f32::from(v) * scale + offset))
423            .sum()
424    }
425}
426
427#[cfg(all(target_arch = "x86_64", target_feature = "avx2"))]
428#[inline]
429fn simd_dot_product_avx2(query: &[f32], data: &[u8], scale: f32, offset: f32) -> f32 {
430    let len = query.len();
431    let simd_len = len / 8;
432    let remainder = len % 8;
433
434    let mut sum = 0.0f32;
435
436    // Process 8 elements at a time
437    for i in 0..simd_len {
438        let base = i * 8;
439        // Dequantize and compute dot product for 8 elements
440        for j in 0..8 {
441            let dequant = f32::from(data[base + j]) * scale + offset;
442            sum += query[base + j] * dequant;
443        }
444    }
445
446    // Handle remainder
447    let base = simd_len * 8;
448    for i in 0..remainder {
449        let dequant = f32::from(data[base + i]) * scale + offset;
450        sum += query[base + i] * dequant;
451    }
452
453    sum
454}
455
456/// SIMD-optimized squared Euclidean distance between f32 query and SQ8 vector.
457#[must_use]
458pub fn euclidean_squared_quantized_simd(query: &[f32], quantized: &QuantizedVector) -> f32 {
459    debug_assert_eq!(
460        query.len(),
461        quantized.data.len(),
462        "Dimension mismatch in euclidean_squared_quantized_simd"
463    );
464
465    let range = quantized.max - quantized.min;
466    if range < f32::EPSILON {
467        let value = quantized.min;
468        return query.iter().map(|&q| (q - value).powi(2)).sum();
469    }
470
471    let scale = range / 255.0;
472    let offset = quantized.min;
473
474    // Optimized loop with manual unrolling
475    let len = query.len();
476    let chunks = len / 4;
477    let remainder = len % 4;
478    let mut sum = 0.0f32;
479
480    for i in 0..chunks {
481        let base = i * 4;
482        let d0 = f32::from(quantized.data[base]) * scale + offset;
483        let d1 = f32::from(quantized.data[base + 1]) * scale + offset;
484        let d2 = f32::from(quantized.data[base + 2]) * scale + offset;
485        let d3 = f32::from(quantized.data[base + 3]) * scale + offset;
486
487        let diff0 = query[base] - d0;
488        let diff1 = query[base + 1] - d1;
489        let diff2 = query[base + 2] - d2;
490        let diff3 = query[base + 3] - d3;
491
492        sum += diff0 * diff0 + diff1 * diff1 + diff2 * diff2 + diff3 * diff3;
493    }
494
495    let base = chunks * 4;
496    for i in 0..remainder {
497        let dequant = f32::from(quantized.data[base + i]) * scale + offset;
498        let diff = query[base + i] - dequant;
499        sum += diff * diff;
500    }
501
502    sum
503}
504
505/// SIMD-optimized cosine similarity between f32 query and SQ8 vector.
506///
507/// Caches the quantized vector norm for repeated queries against same vector.
508#[must_use]
509pub fn cosine_similarity_quantized_simd(query: &[f32], quantized: &QuantizedVector) -> f32 {
510    let dot = dot_product_quantized_simd(query, quantized);
511
512    // Compute query norm
513    let query_norm_sq: f32 = query.iter().map(|&x| x * x).sum();
514
515    // Compute quantized norm (could be cached in QuantizedVector)
516    let range = quantized.max - quantized.min;
517    let scale = if range < f32::EPSILON {
518        0.0
519    } else {
520        range / 255.0
521    };
522    let offset = quantized.min;
523
524    let quantized_norm_sq: f32 = quantized
525        .data
526        .iter()
527        .map(|&v| {
528            let dequant = f32::from(v) * scale + offset;
529            dequant * dequant
530        })
531        .sum();
532
533    let denom = (query_norm_sq * quantized_norm_sq).sqrt();
534    if denom < f32::EPSILON {
535        return 0.0;
536    }
537
538    dot / denom
539}
540
541#[cfg(test)]
542mod tests {
543    use super::*;
544
545    // =========================================================================
546    // TDD Tests for SIMD Quantized Distance Functions
547    // =========================================================================
548
549    #[test]
550    fn test_dot_product_quantized_simd_simple() {
551        let query = vec![1.0, 0.0, 0.0];
552        let vector = vec![1.0, 0.0, 0.0];
553        let quantized = QuantizedVector::from_f32(&vector);
554
555        let result = dot_product_quantized_simd(&query, &quantized);
556        assert!(
557            (result - 1.0).abs() < 0.1,
558            "Result {result} not close to 1.0"
559        );
560    }
561
562    #[test]
563    #[allow(clippy::cast_precision_loss)]
564    fn test_dot_product_quantized_simd_768d() {
565        let dimension = 768;
566        let query: Vec<f32> = (0..dimension).map(|i| (i as f32) / 1000.0).collect();
567        let vector: Vec<f32> = (0..dimension).map(|i| (i as f32) / 1000.0).collect();
568        let quantized = QuantizedVector::from_f32(&vector);
569
570        let scalar = dot_product_quantized(&query, &quantized);
571        let simd = dot_product_quantized_simd(&query, &quantized);
572
573        // Results should be very close
574        let rel_error = ((scalar - simd) / scalar).abs();
575        assert!(rel_error < 0.01, "Relative error {rel_error} too high");
576    }
577
578    #[test]
579    #[allow(clippy::cast_precision_loss)]
580    fn test_euclidean_squared_quantized_simd_768d() {
581        let dimension = 768;
582        let query: Vec<f32> = (0..dimension).map(|i| (i as f32) / 1000.0).collect();
583        let vector: Vec<f32> = (0..dimension).map(|i| ((i + 10) as f32) / 1000.0).collect();
584        let quantized = QuantizedVector::from_f32(&vector);
585
586        let scalar = euclidean_squared_quantized(&query, &quantized);
587        let simd = euclidean_squared_quantized_simd(&query, &quantized);
588
589        let rel_error = ((scalar - simd) / scalar).abs();
590        assert!(rel_error < 0.01, "Relative error {rel_error} too high");
591    }
592
593    #[test]
594    fn test_cosine_similarity_quantized_simd_identical() {
595        let vector = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
596        let quantized = QuantizedVector::from_f32(&vector);
597
598        let similarity = cosine_similarity_quantized_simd(&vector, &quantized);
599        assert!(
600            (similarity - 1.0).abs() < 0.05,
601            "Similarity {similarity} not close to 1.0"
602        );
603    }
604
605    // =========================================================================
606    // TDD Tests for QuantizedVector
607    // =========================================================================
608
609    #[test]
610    fn test_quantize_simple_vector() {
611        // Arrange
612        let vector = vec![0.0, 0.5, 1.0];
613
614        // Act
615        let quantized = QuantizedVector::from_f32(&vector);
616
617        // Assert
618        assert_eq!(quantized.dimension(), 3);
619        assert!((quantized.min - 0.0).abs() < f32::EPSILON);
620        assert!((quantized.max - 1.0).abs() < f32::EPSILON);
621        assert_eq!(quantized.data[0], 0); // 0.0 -> 0
622        assert_eq!(quantized.data[1], 128); // 0.5 -> ~128
623        assert_eq!(quantized.data[2], 255); // 1.0 -> 255
624    }
625
626    #[test]
627    fn test_quantize_negative_values() {
628        // Arrange
629        let vector = vec![-1.0, 0.0, 1.0];
630
631        // Act
632        let quantized = QuantizedVector::from_f32(&vector);
633
634        // Assert
635        assert!((quantized.min - (-1.0)).abs() < f32::EPSILON);
636        assert!((quantized.max - 1.0).abs() < f32::EPSILON);
637        assert_eq!(quantized.data[0], 0); // -1.0 -> 0
638        assert_eq!(quantized.data[1], 128); // 0.0 -> ~128
639        assert_eq!(quantized.data[2], 255); // 1.0 -> 255
640    }
641
642    #[test]
643    fn test_quantize_constant_vector() {
644        // Arrange - all values the same
645        let vector = vec![0.5, 0.5, 0.5];
646
647        // Act
648        let quantized = QuantizedVector::from_f32(&vector);
649
650        // Assert - should handle gracefully
651        assert_eq!(quantized.dimension(), 3);
652        // All values should be middle (128)
653        for &v in &quantized.data {
654            assert_eq!(v, 128);
655        }
656    }
657
658    #[test]
659    fn test_dequantize_roundtrip() {
660        // Arrange
661        let original = vec![0.1, 0.5, 0.9, -0.3, 0.0];
662
663        // Act
664        let quantized = QuantizedVector::from_f32(&original);
665        let reconstructed = quantized.to_f32();
666
667        // Assert - reconstructed should be close to original
668        assert_eq!(reconstructed.len(), original.len());
669        for (orig, recon) in original.iter().zip(reconstructed.iter()) {
670            let error = (orig - recon).abs();
671            // Max error should be less than range/255
672            let max_error = (quantized.max - quantized.min) / 255.0;
673            assert!(
674                error <= max_error + f32::EPSILON,
675                "Error {error} exceeds max {max_error}"
676            );
677        }
678    }
679
680    #[test]
681    #[allow(clippy::cast_precision_loss)]
682    fn test_memory_reduction() {
683        // Arrange
684        let dimension = 768;
685        let vector: Vec<f32> = (0..dimension)
686            .map(|i| i as f32 / dimension as f32)
687            .collect();
688
689        // Act
690        let quantized = QuantizedVector::from_f32(&vector);
691
692        // Assert
693        let f32_size = dimension * 4; // 3072 bytes
694        let sq8_size = quantized.memory_size(); // 768 + 8 = 776 bytes
695
696        assert_eq!(f32_size, 3072);
697        assert_eq!(sq8_size, 776);
698        // ~4x reduction
699        #[allow(clippy::cast_precision_loss)]
700        let ratio = f32_size as f32 / sq8_size as f32;
701        assert!(ratio > 3.9);
702    }
703
704    #[test]
705    fn test_serialization_roundtrip() {
706        // Arrange
707        let vector = vec![0.1, 0.5, 0.9, -0.3];
708        let quantized = QuantizedVector::from_f32(&vector);
709
710        // Act
711        let bytes = quantized.to_bytes();
712        let deserialized = QuantizedVector::from_bytes(&bytes).unwrap();
713
714        // Assert
715        assert!((deserialized.min - quantized.min).abs() < f32::EPSILON);
716        assert!((deserialized.max - quantized.max).abs() < f32::EPSILON);
717        assert_eq!(deserialized.data, quantized.data);
718    }
719
720    #[test]
721    fn test_from_bytes_invalid() {
722        // Arrange - too few bytes
723        let bytes = vec![0u8; 5];
724
725        // Act
726        let result = QuantizedVector::from_bytes(&bytes);
727
728        // Assert
729        assert!(result.is_err());
730    }
731
732    // =========================================================================
733    // TDD Tests for Distance Functions
734    // =========================================================================
735
736    #[test]
737    fn test_dot_product_quantized_simple() {
738        // Arrange
739        let query = vec![1.0, 0.0, 0.0];
740        let vector = vec![1.0, 0.0, 0.0];
741        let quantized = QuantizedVector::from_f32(&vector);
742
743        // Act
744        let dot = dot_product_quantized(&query, &quantized);
745
746        // Assert - should be close to 1.0
747        assert!(
748            (dot - 1.0).abs() < 0.1,
749            "Dot product {dot} not close to 1.0"
750        );
751    }
752
753    #[test]
754    fn test_dot_product_quantized_orthogonal() {
755        // Arrange
756        let query = vec![1.0, 0.0, 0.0];
757        let vector = vec![0.0, 1.0, 0.0];
758        let quantized = QuantizedVector::from_f32(&vector);
759
760        // Act
761        let dot = dot_product_quantized(&query, &quantized);
762
763        // Assert - should be close to 0
764        assert!(dot.abs() < 0.1, "Dot product {dot} not close to 0");
765    }
766
767    #[test]
768    fn test_euclidean_squared_quantized() {
769        // Arrange
770        let query = vec![0.0, 0.0, 0.0];
771        let vector = vec![1.0, 0.0, 0.0];
772        let quantized = QuantizedVector::from_f32(&vector);
773
774        // Act
775        let dist = euclidean_squared_quantized(&query, &quantized);
776
777        // Assert - should be close to 1.0
778        assert!(
779            (dist - 1.0).abs() < 0.1,
780            "Euclidean squared {dist} not close to 1.0"
781        );
782    }
783
784    #[test]
785    fn test_euclidean_squared_quantized_same_point() {
786        // Arrange
787        let vector = vec![0.5, 0.5, 0.5];
788        let quantized = QuantizedVector::from_f32(&vector);
789
790        // Act
791        let dist = euclidean_squared_quantized(&vector, &quantized);
792
793        // Assert - distance to self should be ~0
794        assert!(dist < 0.01, "Distance to self {dist} should be ~0");
795    }
796
797    #[test]
798    fn test_cosine_similarity_quantized_identical() {
799        // Arrange
800        let vector = vec![1.0, 2.0, 3.0];
801        let quantized = QuantizedVector::from_f32(&vector);
802
803        // Act
804        let similarity = cosine_similarity_quantized(&vector, &quantized);
805
806        // Assert - similarity to self should be ~1.0
807        assert!(
808            (similarity - 1.0).abs() < 0.05,
809            "Cosine similarity to self {similarity} not close to 1.0"
810        );
811    }
812
813    #[test]
814    fn test_cosine_similarity_quantized_opposite() {
815        // Arrange
816        let query = vec![1.0, 0.0, 0.0];
817        let vector = vec![-1.0, 0.0, 0.0];
818        let quantized = QuantizedVector::from_f32(&vector);
819
820        // Act
821        let similarity = cosine_similarity_quantized(&query, &quantized);
822
823        // Assert - opposite vectors should have similarity ~-1.0
824        assert!(
825            (similarity - (-1.0)).abs() < 0.1,
826            "Cosine similarity {similarity} not close to -1.0"
827        );
828    }
829
830    // =========================================================================
831    // TDD Tests for Recall Accuracy
832    // =========================================================================
833
834    #[test]
835    #[allow(clippy::cast_precision_loss)]
836    fn test_recall_accuracy_high_dimension() {
837        // Arrange - simulate real embedding vectors
838        let dimension = 768;
839        let num_vectors = 100;
840
841        // Generate random-ish vectors
842        let vectors: Vec<Vec<f32>> = (0..num_vectors)
843            .map(|i| {
844                (0..dimension)
845                    .map(|j| {
846                        let x = ((i * 7 + j * 13) % 1000) as f32 / 1000.0;
847                        x * 2.0 - 1.0 // Range [-1, 1]
848                    })
849                    .collect()
850            })
851            .collect();
852
853        // Quantize all vectors
854        let quantized: Vec<QuantizedVector> = vectors
855            .iter()
856            .map(|v| QuantizedVector::from_f32(v))
857            .collect();
858
859        // Query vector
860        let query = &vectors[0];
861
862        // Act - compute distances with both methods
863        let mut f32_distances: Vec<(usize, f32)> = vectors
864            .iter()
865            .enumerate()
866            .map(|(i, v)| {
867                let dot: f32 = query.iter().zip(v.iter()).map(|(a, b)| a * b).sum();
868                (i, dot)
869            })
870            .collect();
871
872        let mut sq8_distances: Vec<(usize, f32)> = quantized
873            .iter()
874            .enumerate()
875            .map(|(i, q)| (i, dot_product_quantized(query, q)))
876            .collect();
877
878        // Sort by distance (descending for dot product)
879        f32_distances.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
880        sq8_distances.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
881
882        // Assert - check recall@10
883        let k = 10;
884        let f32_top_k: std::collections::HashSet<usize> =
885            f32_distances.iter().take(k).map(|(i, _)| *i).collect();
886        let sq8_top_k: std::collections::HashSet<usize> =
887            sq8_distances.iter().take(k).map(|(i, _)| *i).collect();
888
889        let recall = f32_top_k.intersection(&sq8_top_k).count() as f32 / k as f32;
890
891        assert!(
892            recall >= 0.8,
893            "Recall@{k} is {recall}, expected >= 0.8 (80%)"
894        );
895    }
896
897    #[test]
898    fn test_storage_mode_enum() {
899        // Arrange & Act
900        let full = StorageMode::Full;
901        let sq8 = StorageMode::SQ8;
902        let binary = StorageMode::Binary;
903        let default = StorageMode::default();
904
905        // Assert
906        assert_eq!(full, StorageMode::Full);
907        assert_eq!(sq8, StorageMode::SQ8);
908        assert_eq!(binary, StorageMode::Binary);
909        assert_eq!(default, StorageMode::Full);
910        assert_ne!(full, sq8);
911        assert_ne!(sq8, binary);
912    }
913
914    // =========================================================================
915    // TDD Tests for BinaryQuantizedVector
916    // =========================================================================
917
918    #[test]
919    fn test_binary_quantize_simple_vector() {
920        // Arrange - positive values become 1, negative become 0
921        let vector = vec![-1.0, 0.5, -0.5, 1.0];
922
923        // Act
924        let binary = BinaryQuantizedVector::from_f32(&vector);
925
926        // Assert
927        assert_eq!(binary.dimension(), 4);
928        // Bit pattern: 0, 1, 0, 1 = 0b0101 = 5 (reversed in byte)
929        // Actually stored as: bit 0 = vec[0], bit 1 = vec[1], etc.
930        // -1.0 -> 0, 0.5 -> 1, -0.5 -> 0, 1.0 -> 1
931        // Bits: 0b1010 when read left to right, but stored as 0b0101
932        assert_eq!(binary.data.len(), 1); // 4 bits fits in 1 byte
933    }
934
935    #[test]
936    fn test_binary_quantize_768d_memory() {
937        // Arrange - simulate real embedding dimension
938        let vector: Vec<f32> = (0..768)
939            .map(|i| if i % 2 == 0 { 0.5 } else { -0.5 })
940            .collect();
941
942        // Act
943        let binary = BinaryQuantizedVector::from_f32(&vector);
944
945        // Assert - 768 bits = 96 bytes
946        assert_eq!(binary.dimension(), 768);
947        assert_eq!(binary.data.len(), 96); // 768 / 8 = 96 bytes
948
949        // Memory comparison:
950        // f32: 768 * 4 = 3072 bytes
951        // Binary: 96 bytes
952        // Ratio: 32x reduction!
953        let f32_size = 768 * 4;
954        let binary_size = binary.memory_size();
955        assert_eq!(binary_size, 96);
956        #[allow(clippy::cast_precision_loss)]
957        let ratio = f32_size as f32 / binary_size as f32;
958        assert!(ratio >= 32.0, "Expected 32x reduction, got {ratio}x");
959    }
960
961    #[test]
962    fn test_binary_quantize_threshold_at_zero() {
963        // Arrange - test threshold behavior
964        let vector = vec![0.0, 0.001, -0.001, f32::EPSILON];
965
966        // Act
967        let binary = BinaryQuantizedVector::from_f32(&vector);
968
969        // Assert - 0.0 and positive become 1, negative become 0
970        // Using >= 0.0 as threshold
971        let bits = binary.get_bits();
972        assert!(bits[0], "0.0 should be 1");
973        assert!(bits[1], "0.001 should be 1");
974        assert!(!bits[2], "-0.001 should be 0");
975        assert!(bits[3], "EPSILON should be 1");
976    }
977
978    #[test]
979    fn test_binary_hamming_distance_identical() {
980        // Arrange
981        let vector = vec![0.5, -0.5, 0.5, -0.5, 0.5, -0.5, 0.5, -0.5];
982        let binary = BinaryQuantizedVector::from_f32(&vector);
983
984        // Act
985        let distance = binary.hamming_distance(&binary);
986
987        // Assert - identical vectors have 0 distance
988        assert_eq!(distance, 0);
989    }
990
991    #[test]
992    fn test_binary_hamming_distance_opposite() {
993        // Arrange
994        let v1 = vec![1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0];
995        let v2 = vec![-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0];
996        let b1 = BinaryQuantizedVector::from_f32(&v1);
997        let b2 = BinaryQuantizedVector::from_f32(&v2);
998
999        // Act
1000        let distance = b1.hamming_distance(&b2);
1001
1002        // Assert - all bits different = 8 distance
1003        assert_eq!(distance, 8);
1004    }
1005
1006    #[test]
1007    fn test_binary_hamming_distance_half_different() {
1008        // Arrange - half the bits differ
1009        let v1 = vec![1.0, 1.0, 1.0, 1.0, -1.0, -1.0, -1.0, -1.0];
1010        let v2 = vec![1.0, 1.0, -1.0, -1.0, 1.0, 1.0, -1.0, -1.0];
1011        let b1 = BinaryQuantizedVector::from_f32(&v1);
1012        let b2 = BinaryQuantizedVector::from_f32(&v2);
1013
1014        // Act
1015        let distance = b1.hamming_distance(&b2);
1016
1017        // Assert - 4 bits differ
1018        assert_eq!(distance, 4);
1019    }
1020
1021    #[test]
1022    fn test_binary_serialization_roundtrip() {
1023        // Arrange
1024        let vector: Vec<f32> = (0..768)
1025            .map(|i| if i % 3 == 0 { 0.5 } else { -0.5 })
1026            .collect();
1027        let binary = BinaryQuantizedVector::from_f32(&vector);
1028
1029        // Act
1030        let bytes = binary.to_bytes();
1031        let deserialized = BinaryQuantizedVector::from_bytes(&bytes).unwrap();
1032
1033        // Assert
1034        assert_eq!(deserialized.dimension(), binary.dimension());
1035        assert_eq!(deserialized.data, binary.data);
1036        assert_eq!(deserialized.hamming_distance(&binary), 0);
1037    }
1038
1039    #[test]
1040    fn test_binary_from_bytes_invalid() {
1041        // Arrange - too few bytes for header
1042        let bytes = vec![0u8; 3];
1043
1044        // Act
1045        let result = BinaryQuantizedVector::from_bytes(&bytes);
1046
1047        // Assert
1048        assert!(result.is_err());
1049    }
1050}