oxify_vector/
quantization.rs

1//! Vector Quantization for Memory Optimization
2//!
3//! Provides scalar (8-bit), 4-bit, and binary (1-bit) quantization.
4//!
5//! ## Scalar Quantization (8-bit)
6//! Compresses float32 vectors to int8/uint8, reducing memory usage by ~4x.
7//!
8//! **Benefits:**
9//! - **Memory**: 4x reduction (float32 → uint8)
10//! - **Speed**: Faster distance computations with SIMD
11//! - **Scalability**: Fit 4x more vectors in memory
12//!
13//! **Trade-offs:**
14//! - Small accuracy loss (~1-2% recall degradation)
15//! - One-time quantization cost during build
16//!
17//! ## 4-bit Quantization
18//! Compresses float32 vectors to 4-bit, reducing memory usage by ~8x.
19//!
20//! **Benefits:**
21//! - **Memory**: 8x reduction (float32 → 4-bit)
22//! - **Speed**: Fast distance computations with nibble packing
23//! - **Scalability**: Fit 8x more vectors in memory
24//! - **Sweet Spot**: Best balance between memory and accuracy
25//!
26//! **Trade-offs:**
27//! - Moderate accuracy loss (~2-4% recall degradation)
28//! - Nibble packing/unpacking overhead
29//!
30//! ## Binary Quantization (1-bit)
31//! Compresses float32 vectors to 1-bit, reducing memory usage by ~32x.
32//!
33//! **Benefits:**
34//! - **Memory**: 32x reduction (float32 → 1-bit)
35//! - **Speed**: Extremely fast Hamming distance with bitwise operations
36//! - **Scalability**: Fit 32x more vectors in memory
37//!
38//! **Trade-offs:**
39//! - Higher accuracy loss (~5-10% recall degradation)
40//! - Best for high-dimensional vectors (>128 dims)
41//!
42//! ## FP16 (Half-Precision) Quantization
43//! Compresses float32 vectors to float16 (16-bit), reducing memory usage by 2x.
44//!
45//! **Benefits:**
46//! - **Memory**: 2x reduction (float32 → float16)
47//! - **Accuracy**: Minimal accuracy loss (<0.1% recall degradation)
48//! - **Speed**: No quantization overhead, direct float16 operations
49//! - **Hardware Support**: Native support on modern CPUs/GPUs
50//!
51//! **Trade-offs:**
52//! - Lower compression ratio than 8-bit/4-bit/binary quantization
53//! - Requires FP16 hardware support for maximum performance
54//!
55//! ## Example
56//!
57//! ```rust
58//! use oxify_vector::quantization::{ScalarQuantizer, QuantizationConfig};
59//!
60//! let config = QuantizationConfig::default();
61//! let mut quantizer = ScalarQuantizer::new(config);
62//!
63//! // Fit quantizer to data
64//! let vectors = vec![vec![1.0, 2.0, 3.0], vec![4.0, 5.0, 6.0]];
65//! quantizer.fit(&vectors);
66//!
67//! // Quantize a vector
68//! let quantized = quantizer.quantize(&[1.5, 2.5, 3.5]);
69//! assert_eq!(quantized.len(), 3);
70//!
71//! // Dequantize back to floats
72//! let dequantized = quantizer.dequantize(&quantized);
73//! assert_eq!(dequantized.len(), 3);
74//! ```
75
76use anyhow::{anyhow, Result};
77use serde::{Deserialize, Serialize};
78
79use crate::simd::quantized_manhattan_distance_simd;
80
81/// Quantization configuration
82#[derive(Debug, Clone, Serialize, Deserialize)]
83pub struct QuantizationConfig {
84    /// Number of bits per value (currently only 8-bit supported)
85    pub bits: u8,
86    /// Whether to use signed quantization (int8) or unsigned (uint8)
87    pub signed: bool,
88}
89
90impl Default for QuantizationConfig {
91    fn default() -> Self {
92        Self {
93            bits: 8,
94            signed: false, // uint8 by default
95        }
96    }
97}
98
99/// Scalar quantizer for compressing float32 vectors
100#[derive(Debug, Clone, Serialize, Deserialize)]
101pub struct ScalarQuantizer {
102    config: QuantizationConfig,
103    /// Per-dimension minimum values
104    min_vals: Vec<f32>,
105    /// Per-dimension maximum values
106    max_vals: Vec<f32>,
107    /// Per-dimension scale factors
108    scales: Vec<f32>,
109    /// Number of dimensions
110    dimensions: usize,
111    /// Whether the quantizer has been fitted
112    is_fitted: bool,
113}
114
115impl ScalarQuantizer {
116    /// Create a new scalar quantizer
117    pub fn new(config: QuantizationConfig) -> Self {
118        Self {
119            config,
120            min_vals: Vec::new(),
121            max_vals: Vec::new(),
122            scales: Vec::new(),
123            dimensions: 0,
124            is_fitted: false,
125        }
126    }
127
128    /// Fit quantizer to training data
129    ///
130    /// Computes per-dimension min/max values for quantization.
131    pub fn fit(&mut self, vectors: &[Vec<f32>]) -> Result<()> {
132        if vectors.is_empty() {
133            return Err(anyhow!("Cannot fit quantizer on empty data"));
134        }
135
136        let dim = vectors[0].len();
137        if vectors.iter().any(|v| v.len() != dim) {
138            return Err(anyhow!("All vectors must have the same dimension"));
139        }
140
141        self.dimensions = dim;
142        self.min_vals = vec![f32::INFINITY; dim];
143        self.max_vals = vec![f32::NEG_INFINITY; dim];
144
145        // Compute per-dimension min/max
146        for vector in vectors {
147            for (i, &val) in vector.iter().enumerate() {
148                self.min_vals[i] = self.min_vals[i].min(val);
149                self.max_vals[i] = self.max_vals[i].max(val);
150            }
151        }
152
153        // Compute scale factors
154        self.scales = Vec::with_capacity(dim);
155        let max_quant_val = if self.config.signed { 127.0 } else { 255.0 };
156
157        for i in 0..dim {
158            let range = self.max_vals[i] - self.min_vals[i];
159            // Avoid division by zero for constant dimensions
160            self.scales.push(if range > 1e-10 {
161                max_quant_val / range
162            } else {
163                1.0
164            });
165        }
166
167        self.is_fitted = true;
168        Ok(())
169    }
170
171    /// Quantize a float32 vector to uint8/int8
172    pub fn quantize(&self, vector: &[f32]) -> Vec<u8> {
173        assert!(self.is_fitted, "Quantizer must be fitted before use");
174        assert_eq!(vector.len(), self.dimensions, "Vector dimension mismatch");
175
176        vector
177            .iter()
178            .enumerate()
179            .map(|(i, &val)| {
180                // Clip to [min, max]
181                let clipped = val.max(self.min_vals[i]).min(self.max_vals[i]);
182                // Scale to [0, 255] or [-127, 127]
183                let scaled = (clipped - self.min_vals[i]) * self.scales[i];
184                // Round and convert to u8
185                scaled.round().clamp(0.0, 255.0) as u8
186            })
187            .collect()
188    }
189
190    /// Dequantize a uint8/int8 vector back to float32
191    pub fn dequantize(&self, quantized: &[u8]) -> Vec<f32> {
192        assert!(self.is_fitted, "Quantizer must be fitted before use");
193        assert_eq!(
194            quantized.len(),
195            self.dimensions,
196            "Quantized vector dimension mismatch"
197        );
198
199        quantized
200            .iter()
201            .enumerate()
202            .map(|(i, &val)| {
203                // Convert back to float and rescale
204                let scaled = val as f32 / self.scales[i];
205                scaled + self.min_vals[i]
206            })
207            .collect()
208    }
209
210    /// Quantize multiple vectors
211    pub fn quantize_batch(&self, vectors: &[Vec<f32>]) -> Vec<Vec<u8>> {
212        vectors.iter().map(|v| self.quantize(v)).collect()
213    }
214
215    /// Dequantize multiple vectors
216    pub fn dequantize_batch(&self, quantized: &[Vec<u8>]) -> Vec<Vec<f32>> {
217        quantized.iter().map(|v| self.dequantize(v)).collect()
218    }
219
220    /// Compute approximate distance between quantized vectors
221    ///
222    /// This is faster than dequantizing and computing distance on float32.
223    /// Uses SIMD-optimized Manhattan distance for maximum performance.
224    pub fn quantized_distance(&self, a: &[u8], b: &[u8]) -> f32 {
225        assert_eq!(a.len(), b.len(), "Vector dimension mismatch");
226
227        // Use SIMD-optimized Manhattan distance on quantized values
228        quantized_manhattan_distance_simd(a, b) as f32
229    }
230
231    /// Get compression ratio
232    pub fn compression_ratio(&self) -> f32 {
233        // float32 → uint8 = 4x compression
234        4.0
235    }
236
237    /// Get memory savings percentage
238    pub fn memory_savings(&self) -> f32 {
239        // 75% memory savings (1/4 of original size)
240        0.75
241    }
242
243    /// Check if quantizer is fitted
244    pub fn is_fitted(&self) -> bool {
245        self.is_fitted
246    }
247
248    /// Get number of dimensions
249    pub fn dimensions(&self) -> usize {
250        self.dimensions
251    }
252}
253
254/// Quantized vector index for memory-efficient search
255#[derive(Debug, Clone, Serialize, Deserialize)]
256pub struct QuantizedVectorIndex {
257    quantizer: ScalarQuantizer,
258    /// Quantized vectors
259    quantized_vectors: Vec<Vec<u8>>,
260    /// Entity IDs
261    entity_ids: Vec<String>,
262}
263
264impl QuantizedVectorIndex {
265    /// Create a new quantized index
266    pub fn new(config: QuantizationConfig) -> Self {
267        Self {
268            quantizer: ScalarQuantizer::new(config),
269            quantized_vectors: Vec::new(),
270            entity_ids: Vec::new(),
271        }
272    }
273
274    /// Build index from float32 vectors
275    pub fn build(&mut self, vectors: &[(String, Vec<f32>)]) -> Result<()> {
276        if vectors.is_empty() {
277            return Err(anyhow!("Cannot build index from empty vectors"));
278        }
279
280        // Extract float vectors for fitting
281        let float_vecs: Vec<Vec<f32>> = vectors.iter().map(|(_, v)| v.clone()).collect();
282
283        // Fit quantizer
284        self.quantizer.fit(&float_vecs)?;
285
286        // Quantize all vectors
287        self.entity_ids = vectors.iter().map(|(id, _)| id.clone()).collect();
288        self.quantized_vectors = self.quantizer.quantize_batch(&float_vecs);
289
290        Ok(())
291    }
292
293    /// Search for k nearest neighbors
294    pub fn search(&self, query: &[f32], k: usize) -> Result<Vec<(String, f32)>> {
295        if !self.quantizer.is_fitted() {
296            return Err(anyhow!("Index not built"));
297        }
298
299        // Quantize query
300        let quantized_query = self.quantizer.quantize(query);
301
302        // Compute distances to all vectors
303        let mut distances: Vec<(usize, f32)> = self
304            .quantized_vectors
305            .iter()
306            .enumerate()
307            .map(|(i, v)| (i, self.quantizer.quantized_distance(&quantized_query, v)))
308            .collect();
309
310        // Sort by distance (ascending)
311        distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
312
313        // Return top-k
314        Ok(distances
315            .iter()
316            .take(k.min(self.entity_ids.len()))
317            .map(|(idx, dist)| (self.entity_ids[*idx].clone(), *dist))
318            .collect())
319    }
320
321    /// Get index statistics
322    pub fn stats(&self) -> QuantizedIndexStats {
323        let num_vectors = self.quantized_vectors.len();
324        let dimensions = self.quantizer.dimensions();
325        let original_bytes = num_vectors * dimensions * 4; // float32
326        let quantized_bytes = num_vectors * dimensions; // uint8
327
328        QuantizedIndexStats {
329            num_vectors,
330            dimensions,
331            compression_ratio: self.quantizer.compression_ratio(),
332            memory_savings: self.quantizer.memory_savings(),
333            original_bytes,
334            quantized_bytes,
335        }
336    }
337}
338
339/// Statistics for quantized index
340#[derive(Debug, Clone, Serialize, Deserialize)]
341pub struct QuantizedIndexStats {
342    pub num_vectors: usize,
343    pub dimensions: usize,
344    pub compression_ratio: f32,
345    pub memory_savings: f32,
346    pub original_bytes: usize,
347    pub quantized_bytes: usize,
348}
349
350// ============================================================================
351// Binary Quantization (1-bit per dimension)
352// ============================================================================
353
354/// Binary quantization configuration
355#[derive(Debug, Clone, Serialize, Deserialize)]
356pub struct BinaryQuantizationConfig {
357    /// Use mean-based thresholding (vs zero-based)
358    pub use_mean_threshold: bool,
359}
360
361impl Default for BinaryQuantizationConfig {
362    fn default() -> Self {
363        Self {
364            use_mean_threshold: true,
365        }
366    }
367}
368
369/// Binary quantizer for extreme memory compression (32x reduction)
370///
371/// Converts float32 vectors to 1-bit by thresholding each dimension.
372/// Uses efficient bit-packing and Hamming distance for similarity.
373#[derive(Debug, Clone, Serialize, Deserialize)]
374pub struct BinaryQuantizer {
375    config: BinaryQuantizationConfig,
376    /// Per-dimension thresholds (mean or zero)
377    thresholds: Vec<f32>,
378    /// Number of dimensions
379    dimensions: usize,
380    /// Whether the quantizer has been fitted
381    is_fitted: bool,
382}
383
384impl BinaryQuantizer {
385    /// Create a new binary quantizer
386    pub fn new(config: BinaryQuantizationConfig) -> Self {
387        Self {
388            config,
389            thresholds: Vec::new(),
390            dimensions: 0,
391            is_fitted: false,
392        }
393    }
394
395    /// Fit quantizer to training data
396    ///
397    /// Computes per-dimension thresholds (mean or zero).
398    pub fn fit(&mut self, vectors: &[Vec<f32>]) -> Result<()> {
399        if vectors.is_empty() {
400            return Err(anyhow!("Cannot fit quantizer on empty data"));
401        }
402
403        let dim = vectors[0].len();
404        if vectors.iter().any(|v| v.len() != dim) {
405            return Err(anyhow!("All vectors must have the same dimension"));
406        }
407
408        self.dimensions = dim;
409
410        if self.config.use_mean_threshold {
411            // Compute per-dimension means as thresholds
412            self.thresholds = vec![0.0; dim];
413            for vector in vectors {
414                for (i, &val) in vector.iter().enumerate() {
415                    self.thresholds[i] += val;
416                }
417            }
418            let count = vectors.len() as f32;
419            for threshold in &mut self.thresholds {
420                *threshold /= count;
421            }
422        } else {
423            // Use zero threshold
424            self.thresholds = vec![0.0; dim];
425        }
426
427        self.is_fitted = true;
428        Ok(())
429    }
430
431    /// Quantize a float32 vector to binary (packed as u8 array)
432    ///
433    /// Each bit represents whether the value is above the threshold.
434    /// Bits are packed into u8 bytes (8 bits per byte).
435    pub fn quantize(&self, vector: &[f32]) -> Vec<u8> {
436        assert!(self.is_fitted, "Quantizer must be fitted before use");
437        assert_eq!(vector.len(), self.dimensions, "Vector dimension mismatch");
438
439        // Calculate number of bytes needed (ceiling division)
440        let num_bytes = self.dimensions.div_ceil(8);
441        let mut binary = vec![0u8; num_bytes];
442
443        for (i, &val) in vector.iter().enumerate() {
444            if val > self.thresholds[i] {
445                let byte_idx = i / 8;
446                let bit_idx = i % 8;
447                binary[byte_idx] |= 1u8 << bit_idx;
448            }
449        }
450
451        binary
452    }
453
454    /// Dequantize a binary vector back to float32 (approximate)
455    ///
456    /// Reconstructs as threshold ± 1.0 based on bit values.
457    pub fn dequantize(&self, binary: &[u8]) -> Vec<f32> {
458        assert!(self.is_fitted, "Quantizer must be fitted before use");
459        let expected_bytes = self.dimensions.div_ceil(8);
460        assert_eq!(binary.len(), expected_bytes, "Binary vector size mismatch");
461
462        let mut vector = Vec::with_capacity(self.dimensions);
463        for i in 0..self.dimensions {
464            let byte_idx = i / 8;
465            let bit_idx = i % 8;
466            let bit_set = (binary[byte_idx] >> bit_idx) & 1 == 1;
467
468            // Reconstruct as threshold ± 1.0
469            let val = if bit_set {
470                self.thresholds[i] + 1.0
471            } else {
472                self.thresholds[i] - 1.0
473            };
474            vector.push(val);
475        }
476
477        vector
478    }
479
480    /// Quantize multiple vectors
481    pub fn quantize_batch(&self, vectors: &[Vec<f32>]) -> Vec<Vec<u8>> {
482        vectors.iter().map(|v| self.quantize(v)).collect()
483    }
484
485    /// Dequantize multiple vectors
486    pub fn dequantize_batch(&self, binary: &[Vec<u8>]) -> Vec<Vec<f32>> {
487        binary.iter().map(|v| self.dequantize(v)).collect()
488    }
489
490    /// Compute Hamming distance between binary vectors
491    ///
492    /// Hamming distance = number of differing bits (very fast with XOR + popcount).
493    #[inline]
494    pub fn hamming_distance(&self, a: &[u8], b: &[u8]) -> u32 {
495        assert_eq!(a.len(), b.len(), "Binary vector size mismatch");
496
497        a.iter()
498            .zip(b.iter())
499            .map(|(&x, &y)| (x ^ y).count_ones())
500            .sum()
501    }
502
503    /// Compute normalized Hamming similarity (0.0 to 1.0)
504    ///
505    /// Similarity = 1 - (hamming_distance / num_bits)
506    #[inline]
507    pub fn hamming_similarity(&self, a: &[u8], b: &[u8]) -> f32 {
508        let distance = self.hamming_distance(a, b);
509        1.0 - (distance as f32 / self.dimensions as f32)
510    }
511
512    /// Get compression ratio
513    pub fn compression_ratio(&self) -> f32 {
514        // float32 (4 bytes) → 1 bit (1/8 byte) = 32x compression
515        32.0
516    }
517
518    /// Get memory savings percentage
519    pub fn memory_savings(&self) -> f32 {
520        // 96.875% memory savings (1/32 of original size)
521        0.96875
522    }
523
524    /// Check if quantizer is fitted
525    pub fn is_fitted(&self) -> bool {
526        self.is_fitted
527    }
528
529    /// Get number of dimensions
530    pub fn dimensions(&self) -> usize {
531        self.dimensions
532    }
533}
534
535/// Binary quantized vector index for extreme memory efficiency
536#[derive(Debug, Clone, Serialize, Deserialize)]
537pub struct BinaryQuantizedIndex {
538    quantizer: BinaryQuantizer,
539    /// Binary quantized vectors
540    binary_vectors: Vec<Vec<u8>>,
541    /// Entity IDs
542    entity_ids: Vec<String>,
543}
544
545impl BinaryQuantizedIndex {
546    /// Create a new binary quantized index
547    pub fn new(config: BinaryQuantizationConfig) -> Self {
548        Self {
549            quantizer: BinaryQuantizer::new(config),
550            binary_vectors: Vec::new(),
551            entity_ids: Vec::new(),
552        }
553    }
554
555    /// Build index from float32 vectors
556    pub fn build(&mut self, vectors: &[(String, Vec<f32>)]) -> Result<()> {
557        if vectors.is_empty() {
558            return Err(anyhow!("Cannot build index from empty vectors"));
559        }
560
561        // Extract float vectors for fitting
562        let float_vecs: Vec<Vec<f32>> = vectors.iter().map(|(_, v)| v.clone()).collect();
563
564        // Fit quantizer
565        self.quantizer.fit(&float_vecs)?;
566
567        // Quantize all vectors
568        self.entity_ids = vectors.iter().map(|(id, _)| id.clone()).collect();
569        self.binary_vectors = self.quantizer.quantize_batch(&float_vecs);
570
571        Ok(())
572    }
573
574    /// Search for k nearest neighbors using Hamming similarity
575    pub fn search(&self, query: &[f32], k: usize) -> Result<Vec<(String, f32)>> {
576        if !self.quantizer.is_fitted() {
577            return Err(anyhow!("Index not built"));
578        }
579
580        // Quantize query
581        let binary_query = self.quantizer.quantize(query);
582
583        // Compute similarities to all vectors
584        let mut similarities: Vec<(usize, f32)> = self
585            .binary_vectors
586            .iter()
587            .enumerate()
588            .map(|(i, v)| (i, self.quantizer.hamming_similarity(&binary_query, v)))
589            .collect();
590
591        // Sort by similarity (descending - higher is better)
592        similarities.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
593
594        // Return top-k
595        Ok(similarities
596            .iter()
597            .take(k.min(self.entity_ids.len()))
598            .map(|(idx, sim)| (self.entity_ids[*idx].clone(), *sim))
599            .collect())
600    }
601
602    /// Get index statistics
603    pub fn stats(&self) -> BinaryQuantizedIndexStats {
604        let num_vectors = self.binary_vectors.len();
605        let dimensions = self.quantizer.dimensions();
606        let original_bytes = num_vectors * dimensions * 4; // float32
607        let binary_bytes = num_vectors * dimensions.div_ceil(8); // 1 bit per dim, packed
608
609        BinaryQuantizedIndexStats {
610            num_vectors,
611            dimensions,
612            compression_ratio: self.quantizer.compression_ratio(),
613            memory_savings: self.quantizer.memory_savings(),
614            original_bytes,
615            binary_bytes,
616        }
617    }
618}
619
620/// Statistics for binary quantized index
621#[derive(Debug, Clone, Serialize, Deserialize)]
622pub struct BinaryQuantizedIndexStats {
623    pub num_vectors: usize,
624    pub dimensions: usize,
625    pub compression_ratio: f32,
626    pub memory_savings: f32,
627    pub original_bytes: usize,
628    pub binary_bytes: usize,
629}
630
631// ============================================================================
632// 4-bit Quantization (Nibble packing)
633// ============================================================================
634
635/// 4-bit quantizer for balanced memory/accuracy trade-off (8x compression)
636///
637/// Converts float32 vectors to 4-bit (16 levels) by per-dimension min-max scaling.
638/// Packs two 4-bit values into each u8 byte (high and low nibbles).
639#[derive(Debug, Clone, Serialize, Deserialize)]
640pub struct FourBitQuantizer {
641    /// Per-dimension minimum values
642    min_vals: Vec<f32>,
643    /// Per-dimension maximum values
644    max_vals: Vec<f32>,
645    /// Per-dimension scale factors
646    scales: Vec<f32>,
647    /// Number of dimensions
648    dimensions: usize,
649    /// Whether the quantizer has been fitted
650    is_fitted: bool,
651}
652
653impl FourBitQuantizer {
654    /// Create a new 4-bit quantizer
655    pub fn new() -> Self {
656        Self {
657            min_vals: Vec::new(),
658            max_vals: Vec::new(),
659            scales: Vec::new(),
660            dimensions: 0,
661            is_fitted: false,
662        }
663    }
664
665    /// Fit quantizer to training data
666    ///
667    /// Computes per-dimension min/max values for 4-bit quantization.
668    pub fn fit(&mut self, vectors: &[Vec<f32>]) -> Result<()> {
669        if vectors.is_empty() {
670            return Err(anyhow!("Cannot fit quantizer on empty data"));
671        }
672
673        let dim = vectors[0].len();
674        if vectors.iter().any(|v| v.len() != dim) {
675            return Err(anyhow!("All vectors must have the same dimension"));
676        }
677
678        self.dimensions = dim;
679        self.min_vals = vec![f32::INFINITY; dim];
680        self.max_vals = vec![f32::NEG_INFINITY; dim];
681
682        // Compute per-dimension min/max
683        for vector in vectors {
684            for (i, &val) in vector.iter().enumerate() {
685                self.min_vals[i] = self.min_vals[i].min(val);
686                self.max_vals[i] = self.max_vals[i].max(val);
687            }
688        }
689
690        // Compute scale factors for 4-bit (0-15)
691        self.scales = Vec::with_capacity(dim);
692        for i in 0..dim {
693            let range = self.max_vals[i] - self.min_vals[i];
694            // Avoid division by zero for constant dimensions
695            self.scales
696                .push(if range > 1e-10 { 15.0 / range } else { 1.0 });
697        }
698
699        self.is_fitted = true;
700        Ok(())
701    }
702
703    /// Quantize a float32 vector to 4-bit (packed as u8 array)
704    ///
705    /// Two 4-bit values are packed into each u8: high nibble (bits 4-7) and low nibble (bits 0-3).
706    pub fn quantize(&self, vector: &[f32]) -> Vec<u8> {
707        assert!(self.is_fitted, "Quantizer must be fitted before use");
708        assert_eq!(vector.len(), self.dimensions, "Vector dimension mismatch");
709
710        // Calculate number of bytes needed (2 values per byte, ceiling division)
711        let num_bytes = self.dimensions.div_ceil(2);
712        let mut quantized = vec![0u8; num_bytes];
713
714        for (i, &val) in vector.iter().enumerate() {
715            // Clip to [min, max]
716            let clipped = val.max(self.min_vals[i]).min(self.max_vals[i]);
717            // Scale to [0, 15]
718            let scaled = (clipped - self.min_vals[i]) * self.scales[i];
719            let nibble = scaled.round().clamp(0.0, 15.0) as u8;
720
721            let byte_idx = i / 2;
722            if i % 2 == 0 {
723                // Even index: store in low nibble (bits 0-3)
724                quantized[byte_idx] |= nibble;
725            } else {
726                // Odd index: store in high nibble (bits 4-7)
727                quantized[byte_idx] |= nibble << 4;
728            }
729        }
730
731        quantized
732    }
733
734    /// Dequantize a 4-bit vector back to float32
735    pub fn dequantize(&self, quantized: &[u8]) -> Vec<f32> {
736        assert!(self.is_fitted, "Quantizer must be fitted before use");
737        let expected_bytes = self.dimensions.div_ceil(2);
738        assert_eq!(
739            quantized.len(),
740            expected_bytes,
741            "Quantized vector size mismatch"
742        );
743
744        let mut vector = Vec::with_capacity(self.dimensions);
745        for i in 0..self.dimensions {
746            let byte_idx = i / 2;
747            let nibble = if i % 2 == 0 {
748                // Even index: extract low nibble
749                quantized[byte_idx] & 0x0F
750            } else {
751                // Odd index: extract high nibble
752                (quantized[byte_idx] >> 4) & 0x0F
753            };
754
755            // Convert back to float and rescale
756            let scaled = nibble as f32 / self.scales[i];
757            vector.push(scaled + self.min_vals[i]);
758        }
759
760        vector
761    }
762
763    /// Quantize multiple vectors
764    pub fn quantize_batch(&self, vectors: &[Vec<f32>]) -> Vec<Vec<u8>> {
765        vectors.iter().map(|v| self.quantize(v)).collect()
766    }
767
768    /// Dequantize multiple vectors
769    pub fn dequantize_batch(&self, quantized: &[Vec<u8>]) -> Vec<Vec<f32>> {
770        quantized.iter().map(|v| self.dequantize(v)).collect()
771    }
772
773    /// Compute approximate distance between 4-bit quantized vectors
774    ///
775    /// Uses Manhattan distance on nibble values for speed.
776    #[inline]
777    pub fn quantized_distance(&self, a: &[u8], b: &[u8]) -> f32 {
778        assert_eq!(a.len(), b.len(), "Vector size mismatch");
779
780        let mut distance = 0.0f32;
781        for i in 0..self.dimensions {
782            let byte_idx = i / 2;
783            let nibble_a = if i % 2 == 0 {
784                a[byte_idx] & 0x0F
785            } else {
786                (a[byte_idx] >> 4) & 0x0F
787            };
788            let nibble_b = if i % 2 == 0 {
789                b[byte_idx] & 0x0F
790            } else {
791                (b[byte_idx] >> 4) & 0x0F
792            };
793
794            distance += (nibble_a as i32 - nibble_b as i32).abs() as f32;
795        }
796
797        distance
798    }
799
800    /// Get compression ratio
801    pub fn compression_ratio(&self) -> f32 {
802        // float32 (4 bytes) → 4-bit (0.5 byte) = 8x compression
803        8.0
804    }
805
806    /// Get memory savings percentage
807    pub fn memory_savings(&self) -> f32 {
808        // 87.5% memory savings (1/8 of original size)
809        0.875
810    }
811
812    /// Check if quantizer is fitted
813    pub fn is_fitted(&self) -> bool {
814        self.is_fitted
815    }
816
817    /// Get number of dimensions
818    pub fn dimensions(&self) -> usize {
819        self.dimensions
820    }
821}
822
823impl Default for FourBitQuantizer {
824    fn default() -> Self {
825        Self::new()
826    }
827}
828
829/// 4-bit quantized vector index for balanced memory efficiency
830#[derive(Debug, Clone, Serialize, Deserialize)]
831pub struct FourBitQuantizedIndex {
832    quantizer: FourBitQuantizer,
833    /// 4-bit quantized vectors
834    quantized_vectors: Vec<Vec<u8>>,
835    /// Entity IDs
836    entity_ids: Vec<String>,
837}
838
839impl FourBitQuantizedIndex {
840    /// Create a new 4-bit quantized index
841    pub fn new() -> Self {
842        Self {
843            quantizer: FourBitQuantizer::new(),
844            quantized_vectors: Vec::new(),
845            entity_ids: Vec::new(),
846        }
847    }
848
849    /// Build index from float32 vectors
850    pub fn build(&mut self, vectors: &[(String, Vec<f32>)]) -> Result<()> {
851        if vectors.is_empty() {
852            return Err(anyhow!("Cannot build index from empty vectors"));
853        }
854
855        // Extract float vectors for fitting
856        let float_vecs: Vec<Vec<f32>> = vectors.iter().map(|(_, v)| v.clone()).collect();
857
858        // Fit quantizer
859        self.quantizer.fit(&float_vecs)?;
860
861        // Quantize all vectors
862        self.entity_ids = vectors.iter().map(|(id, _)| id.clone()).collect();
863        self.quantized_vectors = self.quantizer.quantize_batch(&float_vecs);
864
865        Ok(())
866    }
867
868    /// Search for k nearest neighbors
869    pub fn search(&self, query: &[f32], k: usize) -> Result<Vec<(String, f32)>> {
870        if !self.quantizer.is_fitted() {
871            return Err(anyhow!("Index not built"));
872        }
873
874        // Quantize query
875        let quantized_query = self.quantizer.quantize(query);
876
877        // Compute distances to all vectors
878        let mut distances: Vec<(usize, f32)> = self
879            .quantized_vectors
880            .iter()
881            .enumerate()
882            .map(|(i, v)| (i, self.quantizer.quantized_distance(&quantized_query, v)))
883            .collect();
884
885        // Sort by distance (ascending - lower is better)
886        distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
887
888        // Return top-k
889        Ok(distances
890            .iter()
891            .take(k.min(self.entity_ids.len()))
892            .map(|(idx, dist)| (self.entity_ids[*idx].clone(), *dist))
893            .collect())
894    }
895
896    /// Get index statistics
897    pub fn stats(&self) -> FourBitQuantizedIndexStats {
898        let num_vectors = self.quantized_vectors.len();
899        let dimensions = self.quantizer.dimensions();
900        let original_bytes = num_vectors * dimensions * 4; // float32
901        let quantized_bytes = num_vectors * dimensions.div_ceil(2); // 4-bit (2 per byte)
902
903        FourBitQuantizedIndexStats {
904            num_vectors,
905            dimensions,
906            compression_ratio: self.quantizer.compression_ratio(),
907            memory_savings: self.quantizer.memory_savings(),
908            original_bytes,
909            quantized_bytes,
910        }
911    }
912}
913
914impl Default for FourBitQuantizedIndex {
915    fn default() -> Self {
916        Self::new()
917    }
918}
919
920/// Statistics for 4-bit quantized index
921#[derive(Debug, Clone, Serialize, Deserialize)]
922pub struct FourBitQuantizedIndexStats {
923    pub num_vectors: usize,
924    pub dimensions: usize,
925    pub compression_ratio: f32,
926    pub memory_savings: f32,
927    pub original_bytes: usize,
928    pub quantized_bytes: usize,
929}
930
931// ============================================================================
932// FP16 (Half-Precision Float) Quantization
933// ============================================================================
934
935#[cfg(feature = "fp16")]
936use half::f16;
937
938/// FP16 quantizer for high-accuracy memory reduction (2x compression)
939///
940/// Converts float32 vectors to float16 (16-bit IEEE 754 half-precision).
941/// Provides 2x memory reduction with minimal accuracy loss.
942///
943/// **When to use FP16:**
944/// - Need minimal accuracy loss (< 0.1% recall degradation)
945/// - Have modern hardware with FP16 support
946/// - Want simple conversion without fitting/calibration
947/// - Prefer direct float operations over quantized integer math
948#[cfg(feature = "fp16")]
949#[derive(Debug, Clone, Serialize, Deserialize)]
950pub struct Fp16Quantizer {
951    /// Number of dimensions
952    dimensions: usize,
953}
954
955#[cfg(feature = "fp16")]
956impl Fp16Quantizer {
957    /// Create a new FP16 quantizer
958    pub fn new() -> Self {
959        Self { dimensions: 0 }
960    }
961
962    /// Set dimensions (no fitting required for FP16)
963    pub fn set_dimensions(&mut self, dimensions: usize) {
964        self.dimensions = dimensions;
965    }
966
967    /// Convert float32 vector to float16 (packed as u16 array)
968    ///
969    /// Each f32 is converted to f16 and stored as u16 bits.
970    pub fn quantize(&self, vector: &[f32]) -> Vec<u16> {
971        // If dimensions not set, allow any size; otherwise verify match
972        if self.dimensions > 0 {
973            assert_eq!(vector.len(), self.dimensions, "Vector dimension mismatch");
974        }
975
976        vector
977            .iter()
978            .map(|&val| f16::from_f32(val).to_bits())
979            .collect()
980    }
981
982    /// Convert float16 (u16 bits) back to float32
983    pub fn dequantize(&self, quantized: &[u16]) -> Vec<f32> {
984        // If dimensions set, verify match; otherwise allow any size
985        if self.dimensions > 0 {
986            assert_eq!(
987                quantized.len(),
988                self.dimensions,
989                "Quantized vector dimension mismatch"
990            );
991        }
992
993        quantized
994            .iter()
995            .map(|&bits| f16::from_bits(bits).to_f32())
996            .collect()
997    }
998
999    /// Quantize multiple vectors
1000    pub fn quantize_batch(&self, vectors: &[Vec<f32>]) -> Vec<Vec<u16>> {
1001        vectors.iter().map(|v| self.quantize(v)).collect()
1002    }
1003
1004    /// Dequantize multiple vectors
1005    pub fn dequantize_batch(&self, quantized: &[Vec<u16>]) -> Vec<Vec<f32>> {
1006        quantized.iter().map(|v| self.dequantize(v)).collect()
1007    }
1008
1009    /// Compute distance directly on FP16 values (after conversion to f32)
1010    ///
1011    /// This converts back to f32 for computation. For production use,
1012    /// consider using SIMD FP16 operations on supported hardware.
1013    #[inline]
1014    pub fn fp16_distance(&self, a: &[u16], b: &[u16]) -> f32 {
1015        assert_eq!(a.len(), b.len(), "Vector dimension mismatch");
1016
1017        let mut distance = 0.0f32;
1018        for (&a_bits, &b_bits) in a.iter().zip(b.iter()) {
1019            let a_val = f16::from_bits(a_bits).to_f32();
1020            let b_val = f16::from_bits(b_bits).to_f32();
1021            let diff = a_val - b_val;
1022            distance += diff * diff;
1023        }
1024
1025        distance.sqrt()
1026    }
1027
1028    /// Get compression ratio
1029    pub fn compression_ratio(&self) -> f32 {
1030        // float32 (4 bytes) → float16 (2 bytes) = 2x compression
1031        2.0
1032    }
1033
1034    /// Get memory savings percentage
1035    pub fn memory_savings(&self) -> f32 {
1036        // 50% memory savings (1/2 of original size)
1037        0.5
1038    }
1039
1040    /// Get number of dimensions
1041    pub fn dimensions(&self) -> usize {
1042        self.dimensions
1043    }
1044}
1045
1046#[cfg(feature = "fp16")]
1047impl Default for Fp16Quantizer {
1048    fn default() -> Self {
1049        Self::new()
1050    }
1051}
1052
1053/// FP16 quantized vector index for high-accuracy memory efficiency
1054#[cfg(feature = "fp16")]
1055#[derive(Debug, Clone, Serialize, Deserialize)]
1056pub struct Fp16QuantizedIndex {
1057    quantizer: Fp16Quantizer,
1058    /// FP16 quantized vectors (stored as u16 bits)
1059    fp16_vectors: Vec<Vec<u16>>,
1060    /// Entity IDs
1061    entity_ids: Vec<String>,
1062}
1063
1064#[cfg(feature = "fp16")]
1065impl Fp16QuantizedIndex {
1066    /// Create a new FP16 quantized index
1067    pub fn new() -> Self {
1068        Self {
1069            quantizer: Fp16Quantizer::new(),
1070            fp16_vectors: Vec::new(),
1071            entity_ids: Vec::new(),
1072        }
1073    }
1074
1075    /// Build index from float32 vectors
1076    pub fn build(&mut self, vectors: &[(String, Vec<f32>)]) -> Result<()> {
1077        if vectors.is_empty() {
1078            return Err(anyhow!("Cannot build index from empty vectors"));
1079        }
1080
1081        // Set dimensions from first vector
1082        let dimensions = vectors[0].1.len();
1083        self.quantizer.set_dimensions(dimensions);
1084
1085        // Quantize all vectors
1086        self.entity_ids = vectors.iter().map(|(id, _)| id.clone()).collect();
1087        let float_vecs: Vec<Vec<f32>> = vectors.iter().map(|(_, v)| v.clone()).collect();
1088        self.fp16_vectors = self.quantizer.quantize_batch(&float_vecs);
1089
1090        Ok(())
1091    }
1092
1093    /// Search for k nearest neighbors using Euclidean distance
1094    pub fn search(&self, query: &[f32], k: usize) -> Result<Vec<(String, f32)>> {
1095        if self.fp16_vectors.is_empty() {
1096            return Err(anyhow!("Index not built"));
1097        }
1098
1099        // Quantize query
1100        let fp16_query = self.quantizer.quantize(query);
1101
1102        // Compute distances to all vectors
1103        let mut distances: Vec<(usize, f32)> = self
1104            .fp16_vectors
1105            .iter()
1106            .enumerate()
1107            .map(|(i, v)| (i, self.quantizer.fp16_distance(&fp16_query, v)))
1108            .collect();
1109
1110        // Sort by distance (ascending - lower is better)
1111        distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
1112
1113        // Return top-k
1114        Ok(distances
1115            .iter()
1116            .take(k.min(self.entity_ids.len()))
1117            .map(|(idx, dist)| (self.entity_ids[*idx].clone(), *dist))
1118            .collect())
1119    }
1120
1121    /// Get index statistics
1122    pub fn stats(&self) -> Fp16QuantizedIndexStats {
1123        let num_vectors = self.fp16_vectors.len();
1124        let dimensions = self.quantizer.dimensions();
1125        let original_bytes = num_vectors * dimensions * 4; // float32
1126        let fp16_bytes = num_vectors * dimensions * 2; // float16
1127
1128        Fp16QuantizedIndexStats {
1129            num_vectors,
1130            dimensions,
1131            compression_ratio: self.quantizer.compression_ratio(),
1132            memory_savings: self.quantizer.memory_savings(),
1133            original_bytes,
1134            fp16_bytes,
1135        }
1136    }
1137}
1138
1139#[cfg(feature = "fp16")]
1140impl Default for Fp16QuantizedIndex {
1141    fn default() -> Self {
1142        Self::new()
1143    }
1144}
1145
1146/// Statistics for FP16 quantized index
1147#[cfg(feature = "fp16")]
1148#[derive(Debug, Clone, Serialize, Deserialize)]
1149pub struct Fp16QuantizedIndexStats {
1150    pub num_vectors: usize,
1151    pub dimensions: usize,
1152    pub compression_ratio: f32,
1153    pub memory_savings: f32,
1154    pub original_bytes: usize,
1155    pub fp16_bytes: usize,
1156}
1157
1158#[cfg(test)]
1159mod tests {
1160    use super::*;
1161
1162    #[test]
1163    fn test_quantizer_fit() {
1164        let vectors = vec![
1165            vec![0.0, 1.0, 2.0],
1166            vec![1.0, 2.0, 3.0],
1167            vec![2.0, 3.0, 4.0],
1168        ];
1169
1170        let mut quantizer = ScalarQuantizer::new(QuantizationConfig::default());
1171        assert!(quantizer.fit(&vectors).is_ok());
1172        assert!(quantizer.is_fitted());
1173        assert_eq!(quantizer.dimensions(), 3);
1174    }
1175
1176    #[test]
1177    fn test_quantize_dequantize() {
1178        let vectors = vec![
1179            vec![0.0, 1.0, 2.0],
1180            vec![1.0, 2.0, 3.0],
1181            vec![2.0, 3.0, 4.0],
1182        ];
1183
1184        let mut quantizer = ScalarQuantizer::new(QuantizationConfig::default());
1185        quantizer.fit(&vectors).unwrap();
1186
1187        let vector = vec![1.0, 2.0, 3.0];
1188        let quantized = quantizer.quantize(&vector);
1189        let dequantized = quantizer.dequantize(&quantized);
1190
1191        // Check dimensions
1192        assert_eq!(quantized.len(), 3);
1193        assert_eq!(dequantized.len(), 3);
1194
1195        // Check approximate reconstruction
1196        for (orig, deq) in vector.iter().zip(dequantized.iter()) {
1197            assert!((orig - deq).abs() < 0.1); // Small error due to quantization
1198        }
1199    }
1200
1201    #[test]
1202    fn test_quantize_batch() {
1203        let vectors = vec![
1204            vec![0.0, 1.0, 2.0],
1205            vec![1.0, 2.0, 3.0],
1206            vec![2.0, 3.0, 4.0],
1207        ];
1208
1209        let mut quantizer = ScalarQuantizer::new(QuantizationConfig::default());
1210        quantizer.fit(&vectors).unwrap();
1211
1212        let quantized = quantizer.quantize_batch(&vectors);
1213        assert_eq!(quantized.len(), 3);
1214        assert_eq!(quantized[0].len(), 3);
1215    }
1216
1217    #[test]
1218    fn test_quantized_distance() {
1219        let vectors = vec![
1220            vec![0.0, 0.0, 0.0],
1221            vec![1.0, 1.0, 1.0],
1222            vec![2.0, 2.0, 2.0],
1223        ];
1224
1225        let mut quantizer = ScalarQuantizer::new(QuantizationConfig::default());
1226        quantizer.fit(&vectors).unwrap();
1227
1228        let a = quantizer.quantize(&vectors[0]);
1229        let b = quantizer.quantize(&vectors[1]);
1230        let c = quantizer.quantize(&vectors[2]);
1231
1232        let dist_ab = quantizer.quantized_distance(&a, &b);
1233        let dist_ac = quantizer.quantized_distance(&a, &c);
1234
1235        // Distance to farther vector should be larger
1236        assert!(dist_ac > dist_ab);
1237    }
1238
1239    #[test]
1240    fn test_compression_ratio() {
1241        let quantizer = ScalarQuantizer::new(QuantizationConfig::default());
1242        assert_eq!(quantizer.compression_ratio(), 4.0);
1243        assert_eq!(quantizer.memory_savings(), 0.75);
1244    }
1245
1246    #[test]
1247    fn test_quantized_index_build() {
1248        let vectors = vec![
1249            ("doc1".to_string(), vec![0.0, 1.0, 2.0]),
1250            ("doc2".to_string(), vec![1.0, 2.0, 3.0]),
1251            ("doc3".to_string(), vec![2.0, 3.0, 4.0]),
1252        ];
1253
1254        let mut index = QuantizedVectorIndex::new(QuantizationConfig::default());
1255        assert!(index.build(&vectors).is_ok());
1256    }
1257
1258    #[test]
1259    fn test_quantized_index_search() {
1260        let vectors = vec![
1261            ("doc1".to_string(), vec![0.0, 0.0, 0.0]),
1262            ("doc2".to_string(), vec![1.0, 1.0, 1.0]),
1263            ("doc3".to_string(), vec![2.0, 2.0, 2.0]),
1264        ];
1265
1266        let mut index = QuantizedVectorIndex::new(QuantizationConfig::default());
1267        index.build(&vectors).unwrap();
1268
1269        // Search for nearest to doc2
1270        let query = vec![1.0, 1.0, 1.0];
1271        let results = index.search(&query, 2).unwrap();
1272
1273        assert_eq!(results.len(), 2);
1274        assert_eq!(results[0].0, "doc2"); // Closest should be doc2
1275    }
1276
1277    #[test]
1278    fn test_quantized_index_stats() {
1279        let vectors = vec![
1280            ("doc1".to_string(), vec![0.0; 768]),
1281            ("doc2".to_string(), vec![1.0; 768]),
1282            ("doc3".to_string(), vec![2.0; 768]),
1283        ];
1284
1285        let mut index = QuantizedVectorIndex::new(QuantizationConfig::default());
1286        index.build(&vectors).unwrap();
1287
1288        let stats = index.stats();
1289        assert_eq!(stats.num_vectors, 3);
1290        assert_eq!(stats.dimensions, 768);
1291        assert_eq!(stats.original_bytes, 3 * 768 * 4); // 3 vectors * 768 dims * 4 bytes
1292        assert_eq!(stats.quantized_bytes, 3 * 768); // 3 vectors * 768 dims * 1 byte
1293        assert_eq!(stats.compression_ratio, 4.0);
1294    }
1295
1296    #[test]
1297    fn test_fit_empty_vectors() {
1298        let vectors: Vec<Vec<f32>> = vec![];
1299        let mut quantizer = ScalarQuantizer::new(QuantizationConfig::default());
1300        assert!(quantizer.fit(&vectors).is_err());
1301    }
1302
1303    #[test]
1304    #[should_panic(expected = "Quantizer must be fitted")]
1305    fn test_quantize_unfitted() {
1306        let quantizer = ScalarQuantizer::new(QuantizationConfig::default());
1307        quantizer.quantize(&[1.0, 2.0, 3.0]);
1308    }
1309
1310    #[test]
1311    #[should_panic(expected = "Vector dimension mismatch")]
1312    fn test_quantize_dimension_mismatch() {
1313        let vectors = vec![vec![0.0, 1.0, 2.0]];
1314        let mut quantizer = ScalarQuantizer::new(QuantizationConfig::default());
1315        quantizer.fit(&vectors).unwrap();
1316
1317        // Try to quantize vector with wrong dimension
1318        quantizer.quantize(&[1.0, 2.0]); // 2D instead of 3D
1319    }
1320
1321    // ========================================================================
1322    // Binary Quantization Tests
1323    // ========================================================================
1324
1325    #[test]
1326    fn test_binary_quantizer_fit() {
1327        let vectors = vec![
1328            vec![0.0, 1.0, 2.0],
1329            vec![1.0, 2.0, 3.0],
1330            vec![2.0, 3.0, 4.0],
1331        ];
1332
1333        let mut quantizer = BinaryQuantizer::new(BinaryQuantizationConfig::default());
1334        assert!(quantizer.fit(&vectors).is_ok());
1335        assert!(quantizer.is_fitted());
1336        assert_eq!(quantizer.dimensions(), 3);
1337    }
1338
1339    #[test]
1340    fn test_binary_quantize_dequantize() {
1341        let vectors = vec![
1342            vec![0.0, 1.0, 2.0],
1343            vec![1.0, 2.0, 3.0],
1344            vec![2.0, 3.0, 4.0],
1345        ];
1346
1347        let mut quantizer = BinaryQuantizer::new(BinaryQuantizationConfig::default());
1348        quantizer.fit(&vectors).unwrap();
1349
1350        let vector = vec![1.0, 2.0, 3.0];
1351        let binary = quantizer.quantize(&vector);
1352        let dequantized = quantizer.dequantize(&binary);
1353
1354        // Check dimensions
1355        assert_eq!(binary.len(), 1); // 3 bits fit in 1 byte
1356        assert_eq!(dequantized.len(), 3);
1357    }
1358
1359    #[test]
1360    fn test_binary_quantize_large_vector() {
1361        // Test with vector larger than 8 dimensions (multiple bytes)
1362        let vectors: Vec<Vec<f32>> = (0..10)
1363            .map(|_| (0..128).map(|i| i as f32).collect())
1364            .collect();
1365
1366        let mut quantizer = BinaryQuantizer::new(BinaryQuantizationConfig::default());
1367        quantizer.fit(&vectors).unwrap();
1368
1369        let vector: Vec<f32> = (0..128).map(|i| i as f32).collect();
1370        let binary = quantizer.quantize(&vector);
1371
1372        // 128 bits = 16 bytes
1373        assert_eq!(binary.len(), 16);
1374    }
1375
1376    #[test]
1377    fn test_binary_hamming_distance() {
1378        let vectors = vec![
1379            vec![0.0, 0.0, 0.0, 0.0],
1380            vec![1.0, 1.0, 1.0, 1.0],
1381            vec![2.0, 2.0, 2.0, 2.0],
1382        ];
1383
1384        let mut quantizer = BinaryQuantizer::new(BinaryQuantizationConfig::default());
1385        quantizer.fit(&vectors).unwrap();
1386
1387        let a = quantizer.quantize(&vectors[0]);
1388        let b = quantizer.quantize(&vectors[1]);
1389        let c = quantizer.quantize(&vectors[2]);
1390
1391        let dist_ab = quantizer.hamming_distance(&a, &b);
1392        let dist_ac = quantizer.hamming_distance(&a, &c);
1393
1394        // Check that distances make sense
1395        assert!(dist_ab <= 4); // Max 4 bits can differ
1396        assert!(dist_ac <= 4);
1397    }
1398
1399    #[test]
1400    fn test_binary_hamming_similarity() {
1401        let vectors = vec![
1402            vec![0.0, 0.0, 0.0, 0.0],
1403            vec![1.0, 1.0, 1.0, 1.0],
1404            vec![2.0, 2.0, 2.0, 2.0],
1405        ];
1406
1407        let mut quantizer = BinaryQuantizer::new(BinaryQuantizationConfig::default());
1408        quantizer.fit(&vectors).unwrap();
1409
1410        let a = quantizer.quantize(&vectors[0]);
1411        let b = quantizer.quantize(&vectors[1]);
1412
1413        let sim = quantizer.hamming_similarity(&a, &b);
1414
1415        // Similarity should be between 0.0 and 1.0
1416        assert!((0.0..=1.0).contains(&sim));
1417    }
1418
1419    #[test]
1420    fn test_binary_compression_ratio() {
1421        let quantizer = BinaryQuantizer::new(BinaryQuantizationConfig::default());
1422        assert_eq!(quantizer.compression_ratio(), 32.0);
1423        assert_eq!(quantizer.memory_savings(), 0.96875);
1424    }
1425
1426    #[test]
1427    fn test_binary_quantize_batch() {
1428        let vectors = vec![
1429            vec![0.0, 1.0, 2.0],
1430            vec![1.0, 2.0, 3.0],
1431            vec![2.0, 3.0, 4.0],
1432        ];
1433
1434        let mut quantizer = BinaryQuantizer::new(BinaryQuantizationConfig::default());
1435        quantizer.fit(&vectors).unwrap();
1436
1437        let binary = quantizer.quantize_batch(&vectors);
1438        assert_eq!(binary.len(), 3);
1439        assert_eq!(binary[0].len(), 1); // 3 bits fit in 1 byte
1440    }
1441
1442    #[test]
1443    fn test_binary_quantized_index_build() {
1444        let vectors = vec![
1445            ("doc1".to_string(), vec![0.0, 1.0, 2.0]),
1446            ("doc2".to_string(), vec![1.0, 2.0, 3.0]),
1447            ("doc3".to_string(), vec![2.0, 3.0, 4.0]),
1448        ];
1449
1450        let mut index = BinaryQuantizedIndex::new(BinaryQuantizationConfig::default());
1451        assert!(index.build(&vectors).is_ok());
1452    }
1453
1454    #[test]
1455    fn test_binary_quantized_index_search() {
1456        let vectors = vec![
1457            ("doc1".to_string(), vec![0.0, 0.0, 0.0]),
1458            ("doc2".to_string(), vec![1.0, 1.0, 1.0]),
1459            ("doc3".to_string(), vec![2.0, 2.0, 2.0]),
1460        ];
1461
1462        let mut index = BinaryQuantizedIndex::new(BinaryQuantizationConfig::default());
1463        index.build(&vectors).unwrap();
1464
1465        // Search for nearest to doc2
1466        let query = vec![1.0, 1.0, 1.0];
1467        let results = index.search(&query, 2).unwrap();
1468
1469        assert_eq!(results.len(), 2);
1470        // Results should be sorted by similarity (descending)
1471        assert!(results[0].1 >= results[1].1);
1472    }
1473
1474    #[test]
1475    fn test_binary_quantized_index_stats() {
1476        let vectors = vec![
1477            ("doc1".to_string(), vec![0.0; 768]),
1478            ("doc2".to_string(), vec![1.0; 768]),
1479            ("doc3".to_string(), vec![2.0; 768]),
1480        ];
1481
1482        let mut index = BinaryQuantizedIndex::new(BinaryQuantizationConfig::default());
1483        index.build(&vectors).unwrap();
1484
1485        let stats = index.stats();
1486        assert_eq!(stats.num_vectors, 3);
1487        assert_eq!(stats.dimensions, 768);
1488        assert_eq!(stats.original_bytes, 3 * 768 * 4); // 3 vectors * 768 dims * 4 bytes
1489        assert_eq!(stats.binary_bytes, 3 * 96); // 3 vectors * 96 bytes (768 bits / 8)
1490        assert_eq!(stats.compression_ratio, 32.0);
1491    }
1492
1493    #[test]
1494    fn test_binary_fit_empty_vectors() {
1495        let vectors: Vec<Vec<f32>> = vec![];
1496        let mut quantizer = BinaryQuantizer::new(BinaryQuantizationConfig::default());
1497        assert!(quantizer.fit(&vectors).is_err());
1498    }
1499
1500    #[test]
1501    #[should_panic(expected = "Quantizer must be fitted")]
1502    fn test_binary_quantize_unfitted() {
1503        let quantizer = BinaryQuantizer::new(BinaryQuantizationConfig::default());
1504        quantizer.quantize(&[1.0, 2.0, 3.0]);
1505    }
1506
1507    #[test]
1508    #[should_panic(expected = "Vector dimension mismatch")]
1509    fn test_binary_quantize_dimension_mismatch() {
1510        let vectors = vec![vec![0.0, 1.0, 2.0]];
1511        let mut quantizer = BinaryQuantizer::new(BinaryQuantizationConfig::default());
1512        quantizer.fit(&vectors).unwrap();
1513
1514        // Try to quantize vector with wrong dimension
1515        quantizer.quantize(&[1.0, 2.0]); // 2D instead of 3D
1516    }
1517
1518    #[test]
1519    fn test_binary_zero_threshold() {
1520        let vectors = vec![vec![-1.0, 0.0, 1.0], vec![-2.0, 0.0, 2.0]];
1521
1522        // Use zero threshold instead of mean threshold
1523        let config = BinaryQuantizationConfig {
1524            use_mean_threshold: false,
1525        };
1526
1527        let mut quantizer = BinaryQuantizer::new(config);
1528        quantizer.fit(&vectors).unwrap();
1529
1530        let vector = vec![-1.0, 0.0, 1.0]; // Below, equal, above zero
1531        let binary = quantizer.quantize(&vector);
1532
1533        // Check bit pattern: -1.0 → 0, 0.0 → 0, 1.0 → 1
1534        // Bits: 0, 0, 1 (in little-endian bit order within byte)
1535        // So byte should be: 0b00000100 = 4
1536        assert_eq!(binary[0] & 0b00000111, 0b00000100);
1537    }
1538
1539    #[test]
1540    fn test_binary_identical_vectors() {
1541        let vectors = vec![vec![1.0, 2.0, 3.0], vec![1.0, 2.0, 3.0]];
1542
1543        let mut quantizer = BinaryQuantizer::new(BinaryQuantizationConfig::default());
1544        quantizer.fit(&vectors).unwrap();
1545
1546        let a = quantizer.quantize(&vectors[0]);
1547        let b = quantizer.quantize(&vectors[1]);
1548
1549        // Identical vectors should have zero Hamming distance
1550        let dist = quantizer.hamming_distance(&a, &b);
1551        assert_eq!(dist, 0);
1552
1553        // And similarity of 1.0
1554        let sim = quantizer.hamming_similarity(&a, &b);
1555        assert_eq!(sim, 1.0);
1556    }
1557
1558    // ========================================================================
1559    // 4-bit Quantization Tests
1560    // ========================================================================
1561
1562    #[test]
1563    fn test_fourbit_quantizer_fit() {
1564        let vectors = vec![
1565            vec![0.0, 1.0, 2.0],
1566            vec![1.0, 2.0, 3.0],
1567            vec![2.0, 3.0, 4.0],
1568        ];
1569
1570        let mut quantizer = FourBitQuantizer::new();
1571        assert!(quantizer.fit(&vectors).is_ok());
1572        assert!(quantizer.is_fitted());
1573        assert_eq!(quantizer.dimensions(), 3);
1574    }
1575
1576    #[test]
1577    fn test_fourbit_quantize_dequantize() {
1578        let vectors = vec![
1579            vec![0.0, 1.0, 2.0],
1580            vec![1.0, 2.0, 3.0],
1581            vec![2.0, 3.0, 4.0],
1582        ];
1583
1584        let mut quantizer = FourBitQuantizer::new();
1585        quantizer.fit(&vectors).unwrap();
1586
1587        let vector = vec![1.0, 2.0, 3.0];
1588        let quantized = quantizer.quantize(&vector);
1589        let dequantized = quantizer.dequantize(&quantized);
1590
1591        // Check dimensions
1592        assert_eq!(quantized.len(), 2); // 3 values fit in 2 bytes (1.5 bytes, rounded up)
1593        assert_eq!(dequantized.len(), 3);
1594
1595        // Check approximate reconstruction (4-bit has 16 levels)
1596        for (orig, deq) in vector.iter().zip(dequantized.iter()) {
1597            assert!((orig - deq).abs() < 0.3); // Moderate error due to 4-bit quantization
1598        }
1599    }
1600
1601    #[test]
1602    fn test_fourbit_quantize_large_vector() {
1603        // Test with vector larger than 2 dimensions (multiple bytes)
1604        let vectors: Vec<Vec<f32>> = (0..10)
1605            .map(|_| (0..100).map(|i| i as f32).collect())
1606            .collect();
1607
1608        let mut quantizer = FourBitQuantizer::new();
1609        quantizer.fit(&vectors).unwrap();
1610
1611        let vector: Vec<f32> = (0..100).map(|i| i as f32).collect();
1612        let quantized = quantizer.quantize(&vector);
1613
1614        // 100 values = 50 bytes (2 values per byte)
1615        assert_eq!(quantized.len(), 50);
1616    }
1617
1618    #[test]
1619    fn test_fourbit_odd_dimensions() {
1620        // Test with odd number of dimensions
1621        let vectors = vec![
1622            vec![0.0, 1.0, 2.0, 3.0, 4.0], // 5 dimensions
1623            vec![1.0, 2.0, 3.0, 4.0, 5.0],
1624        ];
1625
1626        let mut quantizer = FourBitQuantizer::new();
1627        quantizer.fit(&vectors).unwrap();
1628
1629        let vector = vec![1.5, 2.5, 3.5, 4.5, 5.5];
1630        let quantized = quantizer.quantize(&vector);
1631
1632        // 5 values need 3 bytes (2.5 bytes, rounded up)
1633        assert_eq!(quantized.len(), 3);
1634    }
1635
1636    #[test]
1637    fn test_fourbit_nibble_packing() {
1638        // Use vectors with range to ensure proper scaling
1639        let vectors = vec![vec![0.0, 0.0], vec![15.0, 15.0]];
1640
1641        let mut quantizer = FourBitQuantizer::new();
1642        quantizer.fit(&vectors).unwrap();
1643
1644        // Test that nibbles are correctly packed
1645        // First value (0) in low nibble, second value (15) in high nibble
1646        let vector = vec![0.0, 15.0];
1647        let quantized = quantizer.quantize(&vector);
1648
1649        assert_eq!(quantized.len(), 1); // 2 values in 1 byte
1650                                        // 0 maps to nibble 0 (0b0000), 15 maps to nibble 15 (0b1111)
1651                                        // Byte: 0b11110000 = 0xF0 = 240
1652        assert_eq!(quantized[0], 0xF0);
1653    }
1654
1655    #[test]
1656    fn test_fourbit_compression_ratio() {
1657        let quantizer = FourBitQuantizer::new();
1658        assert_eq!(quantizer.compression_ratio(), 8.0);
1659        assert_eq!(quantizer.memory_savings(), 0.875);
1660    }
1661
1662    #[test]
1663    fn test_fourbit_quantize_batch() {
1664        let vectors = vec![
1665            vec![0.0, 1.0, 2.0],
1666            vec![1.0, 2.0, 3.0],
1667            vec![2.0, 3.0, 4.0],
1668        ];
1669
1670        let mut quantizer = FourBitQuantizer::new();
1671        quantizer.fit(&vectors).unwrap();
1672
1673        let quantized = quantizer.quantize_batch(&vectors);
1674        assert_eq!(quantized.len(), 3);
1675        assert_eq!(quantized[0].len(), 2); // 3 values need 2 bytes
1676    }
1677
1678    #[test]
1679    fn test_fourbit_quantized_distance() {
1680        let vectors = vec![
1681            vec![0.0, 0.0, 0.0],
1682            vec![1.0, 1.0, 1.0],
1683            vec![2.0, 2.0, 2.0],
1684        ];
1685
1686        let mut quantizer = FourBitQuantizer::new();
1687        quantizer.fit(&vectors).unwrap();
1688
1689        let a = quantizer.quantize(&vectors[0]);
1690        let b = quantizer.quantize(&vectors[1]);
1691        let c = quantizer.quantize(&vectors[2]);
1692
1693        let dist_ab = quantizer.quantized_distance(&a, &b);
1694        let dist_ac = quantizer.quantized_distance(&a, &c);
1695
1696        // Distance to farther vector should be larger
1697        assert!(dist_ac > dist_ab);
1698    }
1699
1700    #[test]
1701    fn test_fourbit_quantized_index_build() {
1702        let vectors = vec![
1703            ("doc1".to_string(), vec![0.0, 1.0, 2.0]),
1704            ("doc2".to_string(), vec![1.0, 2.0, 3.0]),
1705            ("doc3".to_string(), vec![2.0, 3.0, 4.0]),
1706        ];
1707
1708        let mut index = FourBitQuantizedIndex::new();
1709        assert!(index.build(&vectors).is_ok());
1710    }
1711
1712    #[test]
1713    fn test_fourbit_quantized_index_search() {
1714        let vectors = vec![
1715            ("doc1".to_string(), vec![0.0, 0.0, 0.0]),
1716            ("doc2".to_string(), vec![1.0, 1.0, 1.0]),
1717            ("doc3".to_string(), vec![2.0, 2.0, 2.0]),
1718        ];
1719
1720        let mut index = FourBitQuantizedIndex::new();
1721        index.build(&vectors).unwrap();
1722
1723        // Search for nearest to doc2
1724        let query = vec![1.0, 1.0, 1.0];
1725        let results = index.search(&query, 2).unwrap();
1726
1727        assert_eq!(results.len(), 2);
1728        assert_eq!(results[0].0, "doc2"); // Closest should be doc2
1729    }
1730
1731    #[test]
1732    fn test_fourbit_quantized_index_stats() {
1733        let vectors = vec![
1734            ("doc1".to_string(), vec![0.0; 768]),
1735            ("doc2".to_string(), vec![1.0; 768]),
1736            ("doc3".to_string(), vec![2.0; 768]),
1737        ];
1738
1739        let mut index = FourBitQuantizedIndex::new();
1740        index.build(&vectors).unwrap();
1741
1742        let stats = index.stats();
1743        assert_eq!(stats.num_vectors, 3);
1744        assert_eq!(stats.dimensions, 768);
1745        assert_eq!(stats.original_bytes, 3 * 768 * 4); // 3 vectors * 768 dims * 4 bytes
1746        assert_eq!(stats.quantized_bytes, 3 * 384); // 3 vectors * 384 bytes (768 / 2)
1747        assert_eq!(stats.compression_ratio, 8.0);
1748    }
1749
1750    #[test]
1751    fn test_fourbit_fit_empty_vectors() {
1752        let vectors: Vec<Vec<f32>> = vec![];
1753        let mut quantizer = FourBitQuantizer::new();
1754        assert!(quantizer.fit(&vectors).is_err());
1755    }
1756
1757    #[test]
1758    #[should_panic(expected = "Quantizer must be fitted")]
1759    fn test_fourbit_quantize_unfitted() {
1760        let quantizer = FourBitQuantizer::new();
1761        quantizer.quantize(&[1.0, 2.0, 3.0]);
1762    }
1763
1764    #[test]
1765    #[should_panic(expected = "Vector dimension mismatch")]
1766    fn test_fourbit_quantize_dimension_mismatch() {
1767        let vectors = vec![vec![0.0, 1.0, 2.0]];
1768        let mut quantizer = FourBitQuantizer::new();
1769        quantizer.fit(&vectors).unwrap();
1770
1771        // Try to quantize vector with wrong dimension
1772        quantizer.quantize(&[1.0, 2.0]); // 2D instead of 3D
1773    }
1774
1775    // ========================================================================
1776    // FP16 (Half-Precision Float) Tests
1777    // ========================================================================
1778
1779    #[test]
1780    #[cfg(feature = "fp16")]
1781    fn test_fp16_quantizer_basic() {
1782        let mut quantizer = Fp16Quantizer::new();
1783        quantizer.set_dimensions(3);
1784        assert_eq!(quantizer.dimensions(), 3);
1785    }
1786
1787    #[test]
1788    #[cfg(feature = "fp16")]
1789    fn test_fp16_quantize_dequantize() {
1790        let quantizer = Fp16Quantizer::new();
1791
1792        let vector = vec![1.0, 2.0, 3.0];
1793        let quantized = quantizer.quantize(&vector);
1794        let dequantized = quantizer.dequantize(&quantized);
1795
1796        // Check dimensions
1797        assert_eq!(quantized.len(), 3);
1798        assert_eq!(dequantized.len(), 3);
1799
1800        // Check high-precision reconstruction (FP16 is very accurate)
1801        for (orig, deq) in vector.iter().zip(dequantized.iter()) {
1802            assert!((orig - deq).abs() < 0.001); // Very small error due to FP16 precision
1803        }
1804    }
1805
1806    #[test]
1807    #[cfg(feature = "fp16")]
1808    fn test_fp16_quantize_large_vector() {
1809        let quantizer = Fp16Quantizer::new();
1810
1811        let vector: Vec<f32> = (0..768).map(|i| i as f32 * 0.1).collect();
1812        let quantized = quantizer.quantize(&vector);
1813
1814        // 768 f32 values → 768 u16 values
1815        assert_eq!(quantized.len(), 768);
1816    }
1817
1818    #[test]
1819    #[cfg(feature = "fp16")]
1820    fn test_fp16_quantize_batch() {
1821        let quantizer = Fp16Quantizer::new();
1822
1823        let vectors = vec![
1824            vec![0.0, 1.0, 2.0],
1825            vec![1.0, 2.0, 3.0],
1826            vec![2.0, 3.0, 4.0],
1827        ];
1828
1829        let quantized = quantizer.quantize_batch(&vectors);
1830        assert_eq!(quantized.len(), 3);
1831        assert_eq!(quantized[0].len(), 3);
1832    }
1833
1834    #[test]
1835    #[cfg(feature = "fp16")]
1836    fn test_fp16_distance() {
1837        let quantizer = Fp16Quantizer::new();
1838
1839        let v0 = vec![0.0, 0.0, 0.0];
1840        let v1 = vec![1.0, 1.0, 1.0];
1841        let v2 = vec![2.0, 2.0, 2.0];
1842
1843        let a = quantizer.quantize(&v0);
1844        let b = quantizer.quantize(&v1);
1845        let c = quantizer.quantize(&v2);
1846
1847        let dist_ab = quantizer.fp16_distance(&a, &b);
1848        let dist_ac = quantizer.fp16_distance(&a, &c);
1849
1850        // Distance to farther vector should be larger
1851        assert!(dist_ac > dist_ab);
1852    }
1853
1854    #[test]
1855    #[cfg(feature = "fp16")]
1856    fn test_fp16_compression_ratio() {
1857        let quantizer = Fp16Quantizer::new();
1858        assert_eq!(quantizer.compression_ratio(), 2.0);
1859        assert_eq!(quantizer.memory_savings(), 0.5);
1860    }
1861
1862    #[test]
1863    #[cfg(feature = "fp16")]
1864    fn test_fp16_quantized_index_build() {
1865        let vectors = vec![
1866            ("doc1".to_string(), vec![0.0, 1.0, 2.0]),
1867            ("doc2".to_string(), vec![1.0, 2.0, 3.0]),
1868            ("doc3".to_string(), vec![2.0, 3.0, 4.0]),
1869        ];
1870
1871        let mut index = Fp16QuantizedIndex::new();
1872        assert!(index.build(&vectors).is_ok());
1873    }
1874
1875    #[test]
1876    #[cfg(feature = "fp16")]
1877    fn test_fp16_quantized_index_search() {
1878        let vectors = vec![
1879            ("doc1".to_string(), vec![0.0, 0.0, 0.0]),
1880            ("doc2".to_string(), vec![1.0, 1.0, 1.0]),
1881            ("doc3".to_string(), vec![2.0, 2.0, 2.0]),
1882        ];
1883
1884        let mut index = Fp16QuantizedIndex::new();
1885        index.build(&vectors).unwrap();
1886
1887        // Search for nearest to doc2
1888        let query = vec![1.0, 1.0, 1.0];
1889        let results = index.search(&query, 2).unwrap();
1890
1891        assert_eq!(results.len(), 2);
1892        assert_eq!(results[0].0, "doc2"); // Closest should be doc2
1893    }
1894
1895    #[test]
1896    #[cfg(feature = "fp16")]
1897    fn test_fp16_quantized_index_stats() {
1898        let vectors = vec![
1899            ("doc1".to_string(), vec![0.0; 768]),
1900            ("doc2".to_string(), vec![1.0; 768]),
1901            ("doc3".to_string(), vec![2.0; 768]),
1902        ];
1903
1904        let mut index = Fp16QuantizedIndex::new();
1905        index.build(&vectors).unwrap();
1906
1907        let stats = index.stats();
1908        assert_eq!(stats.num_vectors, 3);
1909        assert_eq!(stats.dimensions, 768);
1910        assert_eq!(stats.original_bytes, 3 * 768 * 4); // 3 vectors * 768 dims * 4 bytes
1911        assert_eq!(stats.fp16_bytes, 3 * 768 * 2); // 3 vectors * 768 dims * 2 bytes
1912        assert_eq!(stats.compression_ratio, 2.0);
1913    }
1914
1915    #[test]
1916    #[cfg(feature = "fp16")]
1917    fn test_fp16_high_precision() {
1918        let quantizer = Fp16Quantizer::new();
1919
1920        // Test various values to ensure FP16 maintains good precision
1921        let test_values = vec![
1922            vec![0.1, 0.2, 0.3],
1923            vec![1.5, 2.5, 3.5],
1924            vec![100.0, 200.0, 300.0],
1925            vec![-1.0, -2.0, -3.0],
1926        ];
1927
1928        for vector in &test_values {
1929            let quantized = quantizer.quantize(vector);
1930            let dequantized = quantizer.dequantize(&quantized);
1931
1932            for (orig, deq) in vector.iter().zip(dequantized.iter()) {
1933                // FP16 should maintain <0.1% relative error for most values
1934                let relative_error = ((orig - deq) / orig).abs();
1935                assert!(relative_error < 0.001 || orig.abs() < 0.01);
1936            }
1937        }
1938    }
1939
1940    #[test]
1941    #[cfg(feature = "fp16")]
1942    #[should_panic(expected = "Vector dimension mismatch")]
1943    fn test_fp16_quantize_dimension_mismatch() {
1944        let mut quantizer = Fp16Quantizer::new();
1945        quantizer.set_dimensions(3); // Explicitly set dimensions to 3
1946
1947        let vector1 = vec![1.0, 2.0, 3.0];
1948        quantizer.quantize(&vector1); // Should work fine
1949
1950        // Try to quantize vector with wrong dimension
1951        quantizer.quantize(&[1.0, 2.0]); // 2D instead of 3D - should panic
1952    }
1953}
oxify_vector/quantization.rs

oxify_vector/
quantization.rs