1use anyhow::{anyhow, Result};
77use serde::{Deserialize, Serialize};
78
79use crate::simd::quantized_manhattan_distance_simd;
80
81#[derive(Debug, Clone, Serialize, Deserialize)]
83pub struct QuantizationConfig {
84 pub bits: u8,
86 pub signed: bool,
88}
89
90impl Default for QuantizationConfig {
91 fn default() -> Self {
92 Self {
93 bits: 8,
94 signed: false, }
96 }
97}
98
99#[derive(Debug, Clone, Serialize, Deserialize)]
101pub struct ScalarQuantizer {
102 config: QuantizationConfig,
103 min_vals: Vec<f32>,
105 max_vals: Vec<f32>,
107 scales: Vec<f32>,
109 dimensions: usize,
111 is_fitted: bool,
113}
114
115impl ScalarQuantizer {
116 pub fn new(config: QuantizationConfig) -> Self {
118 Self {
119 config,
120 min_vals: Vec::new(),
121 max_vals: Vec::new(),
122 scales: Vec::new(),
123 dimensions: 0,
124 is_fitted: false,
125 }
126 }
127
128 pub fn fit(&mut self, vectors: &[Vec<f32>]) -> Result<()> {
132 if vectors.is_empty() {
133 return Err(anyhow!("Cannot fit quantizer on empty data"));
134 }
135
136 let dim = vectors[0].len();
137 if vectors.iter().any(|v| v.len() != dim) {
138 return Err(anyhow!("All vectors must have the same dimension"));
139 }
140
141 self.dimensions = dim;
142 self.min_vals = vec![f32::INFINITY; dim];
143 self.max_vals = vec![f32::NEG_INFINITY; dim];
144
145 for vector in vectors {
147 for (i, &val) in vector.iter().enumerate() {
148 self.min_vals[i] = self.min_vals[i].min(val);
149 self.max_vals[i] = self.max_vals[i].max(val);
150 }
151 }
152
153 self.scales = Vec::with_capacity(dim);
155 let max_quant_val = if self.config.signed { 127.0 } else { 255.0 };
156
157 for i in 0..dim {
158 let range = self.max_vals[i] - self.min_vals[i];
159 self.scales.push(if range > 1e-10 {
161 max_quant_val / range
162 } else {
163 1.0
164 });
165 }
166
167 self.is_fitted = true;
168 Ok(())
169 }
170
171 pub fn quantize(&self, vector: &[f32]) -> Vec<u8> {
173 assert!(self.is_fitted, "Quantizer must be fitted before use");
174 assert_eq!(vector.len(), self.dimensions, "Vector dimension mismatch");
175
176 vector
177 .iter()
178 .enumerate()
179 .map(|(i, &val)| {
180 let clipped = val.max(self.min_vals[i]).min(self.max_vals[i]);
182 let scaled = (clipped - self.min_vals[i]) * self.scales[i];
184 scaled.round().clamp(0.0, 255.0) as u8
186 })
187 .collect()
188 }
189
190 pub fn dequantize(&self, quantized: &[u8]) -> Vec<f32> {
192 assert!(self.is_fitted, "Quantizer must be fitted before use");
193 assert_eq!(
194 quantized.len(),
195 self.dimensions,
196 "Quantized vector dimension mismatch"
197 );
198
199 quantized
200 .iter()
201 .enumerate()
202 .map(|(i, &val)| {
203 let scaled = val as f32 / self.scales[i];
205 scaled + self.min_vals[i]
206 })
207 .collect()
208 }
209
210 pub fn quantize_batch(&self, vectors: &[Vec<f32>]) -> Vec<Vec<u8>> {
212 vectors.iter().map(|v| self.quantize(v)).collect()
213 }
214
215 pub fn dequantize_batch(&self, quantized: &[Vec<u8>]) -> Vec<Vec<f32>> {
217 quantized.iter().map(|v| self.dequantize(v)).collect()
218 }
219
220 pub fn quantized_distance(&self, a: &[u8], b: &[u8]) -> f32 {
225 assert_eq!(a.len(), b.len(), "Vector dimension mismatch");
226
227 quantized_manhattan_distance_simd(a, b) as f32
229 }
230
231 pub fn compression_ratio(&self) -> f32 {
233 4.0
235 }
236
237 pub fn memory_savings(&self) -> f32 {
239 0.75
241 }
242
243 pub fn is_fitted(&self) -> bool {
245 self.is_fitted
246 }
247
248 pub fn dimensions(&self) -> usize {
250 self.dimensions
251 }
252}
253
254#[derive(Debug, Clone, Serialize, Deserialize)]
256pub struct QuantizedVectorIndex {
257 quantizer: ScalarQuantizer,
258 quantized_vectors: Vec<Vec<u8>>,
260 entity_ids: Vec<String>,
262}
263
264impl QuantizedVectorIndex {
265 pub fn new(config: QuantizationConfig) -> Self {
267 Self {
268 quantizer: ScalarQuantizer::new(config),
269 quantized_vectors: Vec::new(),
270 entity_ids: Vec::new(),
271 }
272 }
273
274 pub fn build(&mut self, vectors: &[(String, Vec<f32>)]) -> Result<()> {
276 if vectors.is_empty() {
277 return Err(anyhow!("Cannot build index from empty vectors"));
278 }
279
280 let float_vecs: Vec<Vec<f32>> = vectors.iter().map(|(_, v)| v.clone()).collect();
282
283 self.quantizer.fit(&float_vecs)?;
285
286 self.entity_ids = vectors.iter().map(|(id, _)| id.clone()).collect();
288 self.quantized_vectors = self.quantizer.quantize_batch(&float_vecs);
289
290 Ok(())
291 }
292
293 pub fn search(&self, query: &[f32], k: usize) -> Result<Vec<(String, f32)>> {
295 if !self.quantizer.is_fitted() {
296 return Err(anyhow!("Index not built"));
297 }
298
299 let quantized_query = self.quantizer.quantize(query);
301
302 let mut distances: Vec<(usize, f32)> = self
304 .quantized_vectors
305 .iter()
306 .enumerate()
307 .map(|(i, v)| (i, self.quantizer.quantized_distance(&quantized_query, v)))
308 .collect();
309
310 distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
312
313 Ok(distances
315 .iter()
316 .take(k.min(self.entity_ids.len()))
317 .map(|(idx, dist)| (self.entity_ids[*idx].clone(), *dist))
318 .collect())
319 }
320
321 pub fn stats(&self) -> QuantizedIndexStats {
323 let num_vectors = self.quantized_vectors.len();
324 let dimensions = self.quantizer.dimensions();
325 let original_bytes = num_vectors * dimensions * 4; let quantized_bytes = num_vectors * dimensions; QuantizedIndexStats {
329 num_vectors,
330 dimensions,
331 compression_ratio: self.quantizer.compression_ratio(),
332 memory_savings: self.quantizer.memory_savings(),
333 original_bytes,
334 quantized_bytes,
335 }
336 }
337}
338
339#[derive(Debug, Clone, Serialize, Deserialize)]
341pub struct QuantizedIndexStats {
342 pub num_vectors: usize,
343 pub dimensions: usize,
344 pub compression_ratio: f32,
345 pub memory_savings: f32,
346 pub original_bytes: usize,
347 pub quantized_bytes: usize,
348}
349
350#[derive(Debug, Clone, Serialize, Deserialize)]
356pub struct BinaryQuantizationConfig {
357 pub use_mean_threshold: bool,
359}
360
361impl Default for BinaryQuantizationConfig {
362 fn default() -> Self {
363 Self {
364 use_mean_threshold: true,
365 }
366 }
367}
368
369#[derive(Debug, Clone, Serialize, Deserialize)]
374pub struct BinaryQuantizer {
375 config: BinaryQuantizationConfig,
376 thresholds: Vec<f32>,
378 dimensions: usize,
380 is_fitted: bool,
382}
383
384impl BinaryQuantizer {
385 pub fn new(config: BinaryQuantizationConfig) -> Self {
387 Self {
388 config,
389 thresholds: Vec::new(),
390 dimensions: 0,
391 is_fitted: false,
392 }
393 }
394
395 pub fn fit(&mut self, vectors: &[Vec<f32>]) -> Result<()> {
399 if vectors.is_empty() {
400 return Err(anyhow!("Cannot fit quantizer on empty data"));
401 }
402
403 let dim = vectors[0].len();
404 if vectors.iter().any(|v| v.len() != dim) {
405 return Err(anyhow!("All vectors must have the same dimension"));
406 }
407
408 self.dimensions = dim;
409
410 if self.config.use_mean_threshold {
411 self.thresholds = vec![0.0; dim];
413 for vector in vectors {
414 for (i, &val) in vector.iter().enumerate() {
415 self.thresholds[i] += val;
416 }
417 }
418 let count = vectors.len() as f32;
419 for threshold in &mut self.thresholds {
420 *threshold /= count;
421 }
422 } else {
423 self.thresholds = vec![0.0; dim];
425 }
426
427 self.is_fitted = true;
428 Ok(())
429 }
430
431 pub fn quantize(&self, vector: &[f32]) -> Vec<u8> {
436 assert!(self.is_fitted, "Quantizer must be fitted before use");
437 assert_eq!(vector.len(), self.dimensions, "Vector dimension mismatch");
438
439 let num_bytes = self.dimensions.div_ceil(8);
441 let mut binary = vec![0u8; num_bytes];
442
443 for (i, &val) in vector.iter().enumerate() {
444 if val > self.thresholds[i] {
445 let byte_idx = i / 8;
446 let bit_idx = i % 8;
447 binary[byte_idx] |= 1u8 << bit_idx;
448 }
449 }
450
451 binary
452 }
453
454 pub fn dequantize(&self, binary: &[u8]) -> Vec<f32> {
458 assert!(self.is_fitted, "Quantizer must be fitted before use");
459 let expected_bytes = self.dimensions.div_ceil(8);
460 assert_eq!(binary.len(), expected_bytes, "Binary vector size mismatch");
461
462 let mut vector = Vec::with_capacity(self.dimensions);
463 for i in 0..self.dimensions {
464 let byte_idx = i / 8;
465 let bit_idx = i % 8;
466 let bit_set = (binary[byte_idx] >> bit_idx) & 1 == 1;
467
468 let val = if bit_set {
470 self.thresholds[i] + 1.0
471 } else {
472 self.thresholds[i] - 1.0
473 };
474 vector.push(val);
475 }
476
477 vector
478 }
479
480 pub fn quantize_batch(&self, vectors: &[Vec<f32>]) -> Vec<Vec<u8>> {
482 vectors.iter().map(|v| self.quantize(v)).collect()
483 }
484
485 pub fn dequantize_batch(&self, binary: &[Vec<u8>]) -> Vec<Vec<f32>> {
487 binary.iter().map(|v| self.dequantize(v)).collect()
488 }
489
490 #[inline]
494 pub fn hamming_distance(&self, a: &[u8], b: &[u8]) -> u32 {
495 assert_eq!(a.len(), b.len(), "Binary vector size mismatch");
496
497 a.iter()
498 .zip(b.iter())
499 .map(|(&x, &y)| (x ^ y).count_ones())
500 .sum()
501 }
502
503 #[inline]
507 pub fn hamming_similarity(&self, a: &[u8], b: &[u8]) -> f32 {
508 let distance = self.hamming_distance(a, b);
509 1.0 - (distance as f32 / self.dimensions as f32)
510 }
511
512 pub fn compression_ratio(&self) -> f32 {
514 32.0
516 }
517
518 pub fn memory_savings(&self) -> f32 {
520 0.96875
522 }
523
524 pub fn is_fitted(&self) -> bool {
526 self.is_fitted
527 }
528
529 pub fn dimensions(&self) -> usize {
531 self.dimensions
532 }
533}
534
535#[derive(Debug, Clone, Serialize, Deserialize)]
537pub struct BinaryQuantizedIndex {
538 quantizer: BinaryQuantizer,
539 binary_vectors: Vec<Vec<u8>>,
541 entity_ids: Vec<String>,
543}
544
545impl BinaryQuantizedIndex {
546 pub fn new(config: BinaryQuantizationConfig) -> Self {
548 Self {
549 quantizer: BinaryQuantizer::new(config),
550 binary_vectors: Vec::new(),
551 entity_ids: Vec::new(),
552 }
553 }
554
555 pub fn build(&mut self, vectors: &[(String, Vec<f32>)]) -> Result<()> {
557 if vectors.is_empty() {
558 return Err(anyhow!("Cannot build index from empty vectors"));
559 }
560
561 let float_vecs: Vec<Vec<f32>> = vectors.iter().map(|(_, v)| v.clone()).collect();
563
564 self.quantizer.fit(&float_vecs)?;
566
567 self.entity_ids = vectors.iter().map(|(id, _)| id.clone()).collect();
569 self.binary_vectors = self.quantizer.quantize_batch(&float_vecs);
570
571 Ok(())
572 }
573
574 pub fn search(&self, query: &[f32], k: usize) -> Result<Vec<(String, f32)>> {
576 if !self.quantizer.is_fitted() {
577 return Err(anyhow!("Index not built"));
578 }
579
580 let binary_query = self.quantizer.quantize(query);
582
583 let mut similarities: Vec<(usize, f32)> = self
585 .binary_vectors
586 .iter()
587 .enumerate()
588 .map(|(i, v)| (i, self.quantizer.hamming_similarity(&binary_query, v)))
589 .collect();
590
591 similarities.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
593
594 Ok(similarities
596 .iter()
597 .take(k.min(self.entity_ids.len()))
598 .map(|(idx, sim)| (self.entity_ids[*idx].clone(), *sim))
599 .collect())
600 }
601
602 pub fn stats(&self) -> BinaryQuantizedIndexStats {
604 let num_vectors = self.binary_vectors.len();
605 let dimensions = self.quantizer.dimensions();
606 let original_bytes = num_vectors * dimensions * 4; let binary_bytes = num_vectors * dimensions.div_ceil(8); BinaryQuantizedIndexStats {
610 num_vectors,
611 dimensions,
612 compression_ratio: self.quantizer.compression_ratio(),
613 memory_savings: self.quantizer.memory_savings(),
614 original_bytes,
615 binary_bytes,
616 }
617 }
618}
619
620#[derive(Debug, Clone, Serialize, Deserialize)]
622pub struct BinaryQuantizedIndexStats {
623 pub num_vectors: usize,
624 pub dimensions: usize,
625 pub compression_ratio: f32,
626 pub memory_savings: f32,
627 pub original_bytes: usize,
628 pub binary_bytes: usize,
629}
630
631#[derive(Debug, Clone, Serialize, Deserialize)]
640pub struct FourBitQuantizer {
641 min_vals: Vec<f32>,
643 max_vals: Vec<f32>,
645 scales: Vec<f32>,
647 dimensions: usize,
649 is_fitted: bool,
651}
652
653impl FourBitQuantizer {
654 pub fn new() -> Self {
656 Self {
657 min_vals: Vec::new(),
658 max_vals: Vec::new(),
659 scales: Vec::new(),
660 dimensions: 0,
661 is_fitted: false,
662 }
663 }
664
665 pub fn fit(&mut self, vectors: &[Vec<f32>]) -> Result<()> {
669 if vectors.is_empty() {
670 return Err(anyhow!("Cannot fit quantizer on empty data"));
671 }
672
673 let dim = vectors[0].len();
674 if vectors.iter().any(|v| v.len() != dim) {
675 return Err(anyhow!("All vectors must have the same dimension"));
676 }
677
678 self.dimensions = dim;
679 self.min_vals = vec![f32::INFINITY; dim];
680 self.max_vals = vec![f32::NEG_INFINITY; dim];
681
682 for vector in vectors {
684 for (i, &val) in vector.iter().enumerate() {
685 self.min_vals[i] = self.min_vals[i].min(val);
686 self.max_vals[i] = self.max_vals[i].max(val);
687 }
688 }
689
690 self.scales = Vec::with_capacity(dim);
692 for i in 0..dim {
693 let range = self.max_vals[i] - self.min_vals[i];
694 self.scales
696 .push(if range > 1e-10 { 15.0 / range } else { 1.0 });
697 }
698
699 self.is_fitted = true;
700 Ok(())
701 }
702
703 pub fn quantize(&self, vector: &[f32]) -> Vec<u8> {
707 assert!(self.is_fitted, "Quantizer must be fitted before use");
708 assert_eq!(vector.len(), self.dimensions, "Vector dimension mismatch");
709
710 let num_bytes = self.dimensions.div_ceil(2);
712 let mut quantized = vec![0u8; num_bytes];
713
714 for (i, &val) in vector.iter().enumerate() {
715 let clipped = val.max(self.min_vals[i]).min(self.max_vals[i]);
717 let scaled = (clipped - self.min_vals[i]) * self.scales[i];
719 let nibble = scaled.round().clamp(0.0, 15.0) as u8;
720
721 let byte_idx = i / 2;
722 if i % 2 == 0 {
723 quantized[byte_idx] |= nibble;
725 } else {
726 quantized[byte_idx] |= nibble << 4;
728 }
729 }
730
731 quantized
732 }
733
734 pub fn dequantize(&self, quantized: &[u8]) -> Vec<f32> {
736 assert!(self.is_fitted, "Quantizer must be fitted before use");
737 let expected_bytes = self.dimensions.div_ceil(2);
738 assert_eq!(
739 quantized.len(),
740 expected_bytes,
741 "Quantized vector size mismatch"
742 );
743
744 let mut vector = Vec::with_capacity(self.dimensions);
745 for i in 0..self.dimensions {
746 let byte_idx = i / 2;
747 let nibble = if i % 2 == 0 {
748 quantized[byte_idx] & 0x0F
750 } else {
751 (quantized[byte_idx] >> 4) & 0x0F
753 };
754
755 let scaled = nibble as f32 / self.scales[i];
757 vector.push(scaled + self.min_vals[i]);
758 }
759
760 vector
761 }
762
763 pub fn quantize_batch(&self, vectors: &[Vec<f32>]) -> Vec<Vec<u8>> {
765 vectors.iter().map(|v| self.quantize(v)).collect()
766 }
767
768 pub fn dequantize_batch(&self, quantized: &[Vec<u8>]) -> Vec<Vec<f32>> {
770 quantized.iter().map(|v| self.dequantize(v)).collect()
771 }
772
773 #[inline]
777 pub fn quantized_distance(&self, a: &[u8], b: &[u8]) -> f32 {
778 assert_eq!(a.len(), b.len(), "Vector size mismatch");
779
780 let mut distance = 0.0f32;
781 for i in 0..self.dimensions {
782 let byte_idx = i / 2;
783 let nibble_a = if i % 2 == 0 {
784 a[byte_idx] & 0x0F
785 } else {
786 (a[byte_idx] >> 4) & 0x0F
787 };
788 let nibble_b = if i % 2 == 0 {
789 b[byte_idx] & 0x0F
790 } else {
791 (b[byte_idx] >> 4) & 0x0F
792 };
793
794 distance += (nibble_a as i32 - nibble_b as i32).abs() as f32;
795 }
796
797 distance
798 }
799
800 pub fn compression_ratio(&self) -> f32 {
802 8.0
804 }
805
806 pub fn memory_savings(&self) -> f32 {
808 0.875
810 }
811
812 pub fn is_fitted(&self) -> bool {
814 self.is_fitted
815 }
816
817 pub fn dimensions(&self) -> usize {
819 self.dimensions
820 }
821}
822
823impl Default for FourBitQuantizer {
824 fn default() -> Self {
825 Self::new()
826 }
827}
828
829#[derive(Debug, Clone, Serialize, Deserialize)]
831pub struct FourBitQuantizedIndex {
832 quantizer: FourBitQuantizer,
833 quantized_vectors: Vec<Vec<u8>>,
835 entity_ids: Vec<String>,
837}
838
839impl FourBitQuantizedIndex {
840 pub fn new() -> Self {
842 Self {
843 quantizer: FourBitQuantizer::new(),
844 quantized_vectors: Vec::new(),
845 entity_ids: Vec::new(),
846 }
847 }
848
849 pub fn build(&mut self, vectors: &[(String, Vec<f32>)]) -> Result<()> {
851 if vectors.is_empty() {
852 return Err(anyhow!("Cannot build index from empty vectors"));
853 }
854
855 let float_vecs: Vec<Vec<f32>> = vectors.iter().map(|(_, v)| v.clone()).collect();
857
858 self.quantizer.fit(&float_vecs)?;
860
861 self.entity_ids = vectors.iter().map(|(id, _)| id.clone()).collect();
863 self.quantized_vectors = self.quantizer.quantize_batch(&float_vecs);
864
865 Ok(())
866 }
867
868 pub fn search(&self, query: &[f32], k: usize) -> Result<Vec<(String, f32)>> {
870 if !self.quantizer.is_fitted() {
871 return Err(anyhow!("Index not built"));
872 }
873
874 let quantized_query = self.quantizer.quantize(query);
876
877 let mut distances: Vec<(usize, f32)> = self
879 .quantized_vectors
880 .iter()
881 .enumerate()
882 .map(|(i, v)| (i, self.quantizer.quantized_distance(&quantized_query, v)))
883 .collect();
884
885 distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
887
888 Ok(distances
890 .iter()
891 .take(k.min(self.entity_ids.len()))
892 .map(|(idx, dist)| (self.entity_ids[*idx].clone(), *dist))
893 .collect())
894 }
895
896 pub fn stats(&self) -> FourBitQuantizedIndexStats {
898 let num_vectors = self.quantized_vectors.len();
899 let dimensions = self.quantizer.dimensions();
900 let original_bytes = num_vectors * dimensions * 4; let quantized_bytes = num_vectors * dimensions.div_ceil(2); FourBitQuantizedIndexStats {
904 num_vectors,
905 dimensions,
906 compression_ratio: self.quantizer.compression_ratio(),
907 memory_savings: self.quantizer.memory_savings(),
908 original_bytes,
909 quantized_bytes,
910 }
911 }
912}
913
914impl Default for FourBitQuantizedIndex {
915 fn default() -> Self {
916 Self::new()
917 }
918}
919
920#[derive(Debug, Clone, Serialize, Deserialize)]
922pub struct FourBitQuantizedIndexStats {
923 pub num_vectors: usize,
924 pub dimensions: usize,
925 pub compression_ratio: f32,
926 pub memory_savings: f32,
927 pub original_bytes: usize,
928 pub quantized_bytes: usize,
929}
930
931#[cfg(feature = "fp16")]
936use half::f16;
937
938#[cfg(feature = "fp16")]
949#[derive(Debug, Clone, Serialize, Deserialize)]
950pub struct Fp16Quantizer {
951 dimensions: usize,
953}
954
955#[cfg(feature = "fp16")]
956impl Fp16Quantizer {
957 pub fn new() -> Self {
959 Self { dimensions: 0 }
960 }
961
962 pub fn set_dimensions(&mut self, dimensions: usize) {
964 self.dimensions = dimensions;
965 }
966
967 pub fn quantize(&self, vector: &[f32]) -> Vec<u16> {
971 if self.dimensions > 0 {
973 assert_eq!(vector.len(), self.dimensions, "Vector dimension mismatch");
974 }
975
976 vector
977 .iter()
978 .map(|&val| f16::from_f32(val).to_bits())
979 .collect()
980 }
981
982 pub fn dequantize(&self, quantized: &[u16]) -> Vec<f32> {
984 if self.dimensions > 0 {
986 assert_eq!(
987 quantized.len(),
988 self.dimensions,
989 "Quantized vector dimension mismatch"
990 );
991 }
992
993 quantized
994 .iter()
995 .map(|&bits| f16::from_bits(bits).to_f32())
996 .collect()
997 }
998
999 pub fn quantize_batch(&self, vectors: &[Vec<f32>]) -> Vec<Vec<u16>> {
1001 vectors.iter().map(|v| self.quantize(v)).collect()
1002 }
1003
1004 pub fn dequantize_batch(&self, quantized: &[Vec<u16>]) -> Vec<Vec<f32>> {
1006 quantized.iter().map(|v| self.dequantize(v)).collect()
1007 }
1008
1009 #[inline]
1014 pub fn fp16_distance(&self, a: &[u16], b: &[u16]) -> f32 {
1015 assert_eq!(a.len(), b.len(), "Vector dimension mismatch");
1016
1017 let mut distance = 0.0f32;
1018 for (&a_bits, &b_bits) in a.iter().zip(b.iter()) {
1019 let a_val = f16::from_bits(a_bits).to_f32();
1020 let b_val = f16::from_bits(b_bits).to_f32();
1021 let diff = a_val - b_val;
1022 distance += diff * diff;
1023 }
1024
1025 distance.sqrt()
1026 }
1027
1028 pub fn compression_ratio(&self) -> f32 {
1030 2.0
1032 }
1033
1034 pub fn memory_savings(&self) -> f32 {
1036 0.5
1038 }
1039
1040 pub fn dimensions(&self) -> usize {
1042 self.dimensions
1043 }
1044}
1045
1046#[cfg(feature = "fp16")]
1047impl Default for Fp16Quantizer {
1048 fn default() -> Self {
1049 Self::new()
1050 }
1051}
1052
1053#[cfg(feature = "fp16")]
1055#[derive(Debug, Clone, Serialize, Deserialize)]
1056pub struct Fp16QuantizedIndex {
1057 quantizer: Fp16Quantizer,
1058 fp16_vectors: Vec<Vec<u16>>,
1060 entity_ids: Vec<String>,
1062}
1063
1064#[cfg(feature = "fp16")]
1065impl Fp16QuantizedIndex {
1066 pub fn new() -> Self {
1068 Self {
1069 quantizer: Fp16Quantizer::new(),
1070 fp16_vectors: Vec::new(),
1071 entity_ids: Vec::new(),
1072 }
1073 }
1074
1075 pub fn build(&mut self, vectors: &[(String, Vec<f32>)]) -> Result<()> {
1077 if vectors.is_empty() {
1078 return Err(anyhow!("Cannot build index from empty vectors"));
1079 }
1080
1081 let dimensions = vectors[0].1.len();
1083 self.quantizer.set_dimensions(dimensions);
1084
1085 self.entity_ids = vectors.iter().map(|(id, _)| id.clone()).collect();
1087 let float_vecs: Vec<Vec<f32>> = vectors.iter().map(|(_, v)| v.clone()).collect();
1088 self.fp16_vectors = self.quantizer.quantize_batch(&float_vecs);
1089
1090 Ok(())
1091 }
1092
1093 pub fn search(&self, query: &[f32], k: usize) -> Result<Vec<(String, f32)>> {
1095 if self.fp16_vectors.is_empty() {
1096 return Err(anyhow!("Index not built"));
1097 }
1098
1099 let fp16_query = self.quantizer.quantize(query);
1101
1102 let mut distances: Vec<(usize, f32)> = self
1104 .fp16_vectors
1105 .iter()
1106 .enumerate()
1107 .map(|(i, v)| (i, self.quantizer.fp16_distance(&fp16_query, v)))
1108 .collect();
1109
1110 distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
1112
1113 Ok(distances
1115 .iter()
1116 .take(k.min(self.entity_ids.len()))
1117 .map(|(idx, dist)| (self.entity_ids[*idx].clone(), *dist))
1118 .collect())
1119 }
1120
1121 pub fn stats(&self) -> Fp16QuantizedIndexStats {
1123 let num_vectors = self.fp16_vectors.len();
1124 let dimensions = self.quantizer.dimensions();
1125 let original_bytes = num_vectors * dimensions * 4; let fp16_bytes = num_vectors * dimensions * 2; Fp16QuantizedIndexStats {
1129 num_vectors,
1130 dimensions,
1131 compression_ratio: self.quantizer.compression_ratio(),
1132 memory_savings: self.quantizer.memory_savings(),
1133 original_bytes,
1134 fp16_bytes,
1135 }
1136 }
1137}
1138
1139#[cfg(feature = "fp16")]
1140impl Default for Fp16QuantizedIndex {
1141 fn default() -> Self {
1142 Self::new()
1143 }
1144}
1145
1146#[cfg(feature = "fp16")]
1148#[derive(Debug, Clone, Serialize, Deserialize)]
1149pub struct Fp16QuantizedIndexStats {
1150 pub num_vectors: usize,
1151 pub dimensions: usize,
1152 pub compression_ratio: f32,
1153 pub memory_savings: f32,
1154 pub original_bytes: usize,
1155 pub fp16_bytes: usize,
1156}
1157
1158#[cfg(test)]
1159mod tests {
1160 use super::*;
1161
1162 #[test]
1163 fn test_quantizer_fit() {
1164 let vectors = vec![
1165 vec![0.0, 1.0, 2.0],
1166 vec![1.0, 2.0, 3.0],
1167 vec![2.0, 3.0, 4.0],
1168 ];
1169
1170 let mut quantizer = ScalarQuantizer::new(QuantizationConfig::default());
1171 assert!(quantizer.fit(&vectors).is_ok());
1172 assert!(quantizer.is_fitted());
1173 assert_eq!(quantizer.dimensions(), 3);
1174 }
1175
1176 #[test]
1177 fn test_quantize_dequantize() {
1178 let vectors = vec![
1179 vec![0.0, 1.0, 2.0],
1180 vec![1.0, 2.0, 3.0],
1181 vec![2.0, 3.0, 4.0],
1182 ];
1183
1184 let mut quantizer = ScalarQuantizer::new(QuantizationConfig::default());
1185 quantizer.fit(&vectors).unwrap();
1186
1187 let vector = vec![1.0, 2.0, 3.0];
1188 let quantized = quantizer.quantize(&vector);
1189 let dequantized = quantizer.dequantize(&quantized);
1190
1191 assert_eq!(quantized.len(), 3);
1193 assert_eq!(dequantized.len(), 3);
1194
1195 for (orig, deq) in vector.iter().zip(dequantized.iter()) {
1197 assert!((orig - deq).abs() < 0.1); }
1199 }
1200
1201 #[test]
1202 fn test_quantize_batch() {
1203 let vectors = vec![
1204 vec![0.0, 1.0, 2.0],
1205 vec![1.0, 2.0, 3.0],
1206 vec![2.0, 3.0, 4.0],
1207 ];
1208
1209 let mut quantizer = ScalarQuantizer::new(QuantizationConfig::default());
1210 quantizer.fit(&vectors).unwrap();
1211
1212 let quantized = quantizer.quantize_batch(&vectors);
1213 assert_eq!(quantized.len(), 3);
1214 assert_eq!(quantized[0].len(), 3);
1215 }
1216
1217 #[test]
1218 fn test_quantized_distance() {
1219 let vectors = vec![
1220 vec![0.0, 0.0, 0.0],
1221 vec![1.0, 1.0, 1.0],
1222 vec![2.0, 2.0, 2.0],
1223 ];
1224
1225 let mut quantizer = ScalarQuantizer::new(QuantizationConfig::default());
1226 quantizer.fit(&vectors).unwrap();
1227
1228 let a = quantizer.quantize(&vectors[0]);
1229 let b = quantizer.quantize(&vectors[1]);
1230 let c = quantizer.quantize(&vectors[2]);
1231
1232 let dist_ab = quantizer.quantized_distance(&a, &b);
1233 let dist_ac = quantizer.quantized_distance(&a, &c);
1234
1235 assert!(dist_ac > dist_ab);
1237 }
1238
1239 #[test]
1240 fn test_compression_ratio() {
1241 let quantizer = ScalarQuantizer::new(QuantizationConfig::default());
1242 assert_eq!(quantizer.compression_ratio(), 4.0);
1243 assert_eq!(quantizer.memory_savings(), 0.75);
1244 }
1245
1246 #[test]
1247 fn test_quantized_index_build() {
1248 let vectors = vec![
1249 ("doc1".to_string(), vec![0.0, 1.0, 2.0]),
1250 ("doc2".to_string(), vec![1.0, 2.0, 3.0]),
1251 ("doc3".to_string(), vec![2.0, 3.0, 4.0]),
1252 ];
1253
1254 let mut index = QuantizedVectorIndex::new(QuantizationConfig::default());
1255 assert!(index.build(&vectors).is_ok());
1256 }
1257
1258 #[test]
1259 fn test_quantized_index_search() {
1260 let vectors = vec![
1261 ("doc1".to_string(), vec![0.0, 0.0, 0.0]),
1262 ("doc2".to_string(), vec![1.0, 1.0, 1.0]),
1263 ("doc3".to_string(), vec![2.0, 2.0, 2.0]),
1264 ];
1265
1266 let mut index = QuantizedVectorIndex::new(QuantizationConfig::default());
1267 index.build(&vectors).unwrap();
1268
1269 let query = vec![1.0, 1.0, 1.0];
1271 let results = index.search(&query, 2).unwrap();
1272
1273 assert_eq!(results.len(), 2);
1274 assert_eq!(results[0].0, "doc2"); }
1276
1277 #[test]
1278 fn test_quantized_index_stats() {
1279 let vectors = vec![
1280 ("doc1".to_string(), vec![0.0; 768]),
1281 ("doc2".to_string(), vec![1.0; 768]),
1282 ("doc3".to_string(), vec![2.0; 768]),
1283 ];
1284
1285 let mut index = QuantizedVectorIndex::new(QuantizationConfig::default());
1286 index.build(&vectors).unwrap();
1287
1288 let stats = index.stats();
1289 assert_eq!(stats.num_vectors, 3);
1290 assert_eq!(stats.dimensions, 768);
1291 assert_eq!(stats.original_bytes, 3 * 768 * 4); assert_eq!(stats.quantized_bytes, 3 * 768); assert_eq!(stats.compression_ratio, 4.0);
1294 }
1295
1296 #[test]
1297 fn test_fit_empty_vectors() {
1298 let vectors: Vec<Vec<f32>> = vec![];
1299 let mut quantizer = ScalarQuantizer::new(QuantizationConfig::default());
1300 assert!(quantizer.fit(&vectors).is_err());
1301 }
1302
1303 #[test]
1304 #[should_panic(expected = "Quantizer must be fitted")]
1305 fn test_quantize_unfitted() {
1306 let quantizer = ScalarQuantizer::new(QuantizationConfig::default());
1307 quantizer.quantize(&[1.0, 2.0, 3.0]);
1308 }
1309
1310 #[test]
1311 #[should_panic(expected = "Vector dimension mismatch")]
1312 fn test_quantize_dimension_mismatch() {
1313 let vectors = vec![vec![0.0, 1.0, 2.0]];
1314 let mut quantizer = ScalarQuantizer::new(QuantizationConfig::default());
1315 quantizer.fit(&vectors).unwrap();
1316
1317 quantizer.quantize(&[1.0, 2.0]); }
1320
1321 #[test]
1326 fn test_binary_quantizer_fit() {
1327 let vectors = vec![
1328 vec![0.0, 1.0, 2.0],
1329 vec![1.0, 2.0, 3.0],
1330 vec![2.0, 3.0, 4.0],
1331 ];
1332
1333 let mut quantizer = BinaryQuantizer::new(BinaryQuantizationConfig::default());
1334 assert!(quantizer.fit(&vectors).is_ok());
1335 assert!(quantizer.is_fitted());
1336 assert_eq!(quantizer.dimensions(), 3);
1337 }
1338
1339 #[test]
1340 fn test_binary_quantize_dequantize() {
1341 let vectors = vec![
1342 vec![0.0, 1.0, 2.0],
1343 vec![1.0, 2.0, 3.0],
1344 vec![2.0, 3.0, 4.0],
1345 ];
1346
1347 let mut quantizer = BinaryQuantizer::new(BinaryQuantizationConfig::default());
1348 quantizer.fit(&vectors).unwrap();
1349
1350 let vector = vec![1.0, 2.0, 3.0];
1351 let binary = quantizer.quantize(&vector);
1352 let dequantized = quantizer.dequantize(&binary);
1353
1354 assert_eq!(binary.len(), 1); assert_eq!(dequantized.len(), 3);
1357 }
1358
1359 #[test]
1360 fn test_binary_quantize_large_vector() {
1361 let vectors: Vec<Vec<f32>> = (0..10)
1363 .map(|_| (0..128).map(|i| i as f32).collect())
1364 .collect();
1365
1366 let mut quantizer = BinaryQuantizer::new(BinaryQuantizationConfig::default());
1367 quantizer.fit(&vectors).unwrap();
1368
1369 let vector: Vec<f32> = (0..128).map(|i| i as f32).collect();
1370 let binary = quantizer.quantize(&vector);
1371
1372 assert_eq!(binary.len(), 16);
1374 }
1375
1376 #[test]
1377 fn test_binary_hamming_distance() {
1378 let vectors = vec![
1379 vec![0.0, 0.0, 0.0, 0.0],
1380 vec![1.0, 1.0, 1.0, 1.0],
1381 vec![2.0, 2.0, 2.0, 2.0],
1382 ];
1383
1384 let mut quantizer = BinaryQuantizer::new(BinaryQuantizationConfig::default());
1385 quantizer.fit(&vectors).unwrap();
1386
1387 let a = quantizer.quantize(&vectors[0]);
1388 let b = quantizer.quantize(&vectors[1]);
1389 let c = quantizer.quantize(&vectors[2]);
1390
1391 let dist_ab = quantizer.hamming_distance(&a, &b);
1392 let dist_ac = quantizer.hamming_distance(&a, &c);
1393
1394 assert!(dist_ab <= 4); assert!(dist_ac <= 4);
1397 }
1398
1399 #[test]
1400 fn test_binary_hamming_similarity() {
1401 let vectors = vec![
1402 vec![0.0, 0.0, 0.0, 0.0],
1403 vec![1.0, 1.0, 1.0, 1.0],
1404 vec![2.0, 2.0, 2.0, 2.0],
1405 ];
1406
1407 let mut quantizer = BinaryQuantizer::new(BinaryQuantizationConfig::default());
1408 quantizer.fit(&vectors).unwrap();
1409
1410 let a = quantizer.quantize(&vectors[0]);
1411 let b = quantizer.quantize(&vectors[1]);
1412
1413 let sim = quantizer.hamming_similarity(&a, &b);
1414
1415 assert!((0.0..=1.0).contains(&sim));
1417 }
1418
1419 #[test]
1420 fn test_binary_compression_ratio() {
1421 let quantizer = BinaryQuantizer::new(BinaryQuantizationConfig::default());
1422 assert_eq!(quantizer.compression_ratio(), 32.0);
1423 assert_eq!(quantizer.memory_savings(), 0.96875);
1424 }
1425
1426 #[test]
1427 fn test_binary_quantize_batch() {
1428 let vectors = vec![
1429 vec![0.0, 1.0, 2.0],
1430 vec![1.0, 2.0, 3.0],
1431 vec![2.0, 3.0, 4.0],
1432 ];
1433
1434 let mut quantizer = BinaryQuantizer::new(BinaryQuantizationConfig::default());
1435 quantizer.fit(&vectors).unwrap();
1436
1437 let binary = quantizer.quantize_batch(&vectors);
1438 assert_eq!(binary.len(), 3);
1439 assert_eq!(binary[0].len(), 1); }
1441
1442 #[test]
1443 fn test_binary_quantized_index_build() {
1444 let vectors = vec![
1445 ("doc1".to_string(), vec![0.0, 1.0, 2.0]),
1446 ("doc2".to_string(), vec![1.0, 2.0, 3.0]),
1447 ("doc3".to_string(), vec![2.0, 3.0, 4.0]),
1448 ];
1449
1450 let mut index = BinaryQuantizedIndex::new(BinaryQuantizationConfig::default());
1451 assert!(index.build(&vectors).is_ok());
1452 }
1453
1454 #[test]
1455 fn test_binary_quantized_index_search() {
1456 let vectors = vec![
1457 ("doc1".to_string(), vec![0.0, 0.0, 0.0]),
1458 ("doc2".to_string(), vec![1.0, 1.0, 1.0]),
1459 ("doc3".to_string(), vec![2.0, 2.0, 2.0]),
1460 ];
1461
1462 let mut index = BinaryQuantizedIndex::new(BinaryQuantizationConfig::default());
1463 index.build(&vectors).unwrap();
1464
1465 let query = vec![1.0, 1.0, 1.0];
1467 let results = index.search(&query, 2).unwrap();
1468
1469 assert_eq!(results.len(), 2);
1470 assert!(results[0].1 >= results[1].1);
1472 }
1473
1474 #[test]
1475 fn test_binary_quantized_index_stats() {
1476 let vectors = vec![
1477 ("doc1".to_string(), vec![0.0; 768]),
1478 ("doc2".to_string(), vec![1.0; 768]),
1479 ("doc3".to_string(), vec![2.0; 768]),
1480 ];
1481
1482 let mut index = BinaryQuantizedIndex::new(BinaryQuantizationConfig::default());
1483 index.build(&vectors).unwrap();
1484
1485 let stats = index.stats();
1486 assert_eq!(stats.num_vectors, 3);
1487 assert_eq!(stats.dimensions, 768);
1488 assert_eq!(stats.original_bytes, 3 * 768 * 4); assert_eq!(stats.binary_bytes, 3 * 96); assert_eq!(stats.compression_ratio, 32.0);
1491 }
1492
1493 #[test]
1494 fn test_binary_fit_empty_vectors() {
1495 let vectors: Vec<Vec<f32>> = vec![];
1496 let mut quantizer = BinaryQuantizer::new(BinaryQuantizationConfig::default());
1497 assert!(quantizer.fit(&vectors).is_err());
1498 }
1499
1500 #[test]
1501 #[should_panic(expected = "Quantizer must be fitted")]
1502 fn test_binary_quantize_unfitted() {
1503 let quantizer = BinaryQuantizer::new(BinaryQuantizationConfig::default());
1504 quantizer.quantize(&[1.0, 2.0, 3.0]);
1505 }
1506
1507 #[test]
1508 #[should_panic(expected = "Vector dimension mismatch")]
1509 fn test_binary_quantize_dimension_mismatch() {
1510 let vectors = vec![vec![0.0, 1.0, 2.0]];
1511 let mut quantizer = BinaryQuantizer::new(BinaryQuantizationConfig::default());
1512 quantizer.fit(&vectors).unwrap();
1513
1514 quantizer.quantize(&[1.0, 2.0]); }
1517
1518 #[test]
1519 fn test_binary_zero_threshold() {
1520 let vectors = vec![vec![-1.0, 0.0, 1.0], vec![-2.0, 0.0, 2.0]];
1521
1522 let config = BinaryQuantizationConfig {
1524 use_mean_threshold: false,
1525 };
1526
1527 let mut quantizer = BinaryQuantizer::new(config);
1528 quantizer.fit(&vectors).unwrap();
1529
1530 let vector = vec![-1.0, 0.0, 1.0]; let binary = quantizer.quantize(&vector);
1532
1533 assert_eq!(binary[0] & 0b00000111, 0b00000100);
1537 }
1538
1539 #[test]
1540 fn test_binary_identical_vectors() {
1541 let vectors = vec![vec![1.0, 2.0, 3.0], vec![1.0, 2.0, 3.0]];
1542
1543 let mut quantizer = BinaryQuantizer::new(BinaryQuantizationConfig::default());
1544 quantizer.fit(&vectors).unwrap();
1545
1546 let a = quantizer.quantize(&vectors[0]);
1547 let b = quantizer.quantize(&vectors[1]);
1548
1549 let dist = quantizer.hamming_distance(&a, &b);
1551 assert_eq!(dist, 0);
1552
1553 let sim = quantizer.hamming_similarity(&a, &b);
1555 assert_eq!(sim, 1.0);
1556 }
1557
1558 #[test]
1563 fn test_fourbit_quantizer_fit() {
1564 let vectors = vec![
1565 vec![0.0, 1.0, 2.0],
1566 vec![1.0, 2.0, 3.0],
1567 vec![2.0, 3.0, 4.0],
1568 ];
1569
1570 let mut quantizer = FourBitQuantizer::new();
1571 assert!(quantizer.fit(&vectors).is_ok());
1572 assert!(quantizer.is_fitted());
1573 assert_eq!(quantizer.dimensions(), 3);
1574 }
1575
1576 #[test]
1577 fn test_fourbit_quantize_dequantize() {
1578 let vectors = vec![
1579 vec![0.0, 1.0, 2.0],
1580 vec![1.0, 2.0, 3.0],
1581 vec![2.0, 3.0, 4.0],
1582 ];
1583
1584 let mut quantizer = FourBitQuantizer::new();
1585 quantizer.fit(&vectors).unwrap();
1586
1587 let vector = vec![1.0, 2.0, 3.0];
1588 let quantized = quantizer.quantize(&vector);
1589 let dequantized = quantizer.dequantize(&quantized);
1590
1591 assert_eq!(quantized.len(), 2); assert_eq!(dequantized.len(), 3);
1594
1595 for (orig, deq) in vector.iter().zip(dequantized.iter()) {
1597 assert!((orig - deq).abs() < 0.3); }
1599 }
1600
1601 #[test]
1602 fn test_fourbit_quantize_large_vector() {
1603 let vectors: Vec<Vec<f32>> = (0..10)
1605 .map(|_| (0..100).map(|i| i as f32).collect())
1606 .collect();
1607
1608 let mut quantizer = FourBitQuantizer::new();
1609 quantizer.fit(&vectors).unwrap();
1610
1611 let vector: Vec<f32> = (0..100).map(|i| i as f32).collect();
1612 let quantized = quantizer.quantize(&vector);
1613
1614 assert_eq!(quantized.len(), 50);
1616 }
1617
1618 #[test]
1619 fn test_fourbit_odd_dimensions() {
1620 let vectors = vec![
1622 vec![0.0, 1.0, 2.0, 3.0, 4.0], vec![1.0, 2.0, 3.0, 4.0, 5.0],
1624 ];
1625
1626 let mut quantizer = FourBitQuantizer::new();
1627 quantizer.fit(&vectors).unwrap();
1628
1629 let vector = vec![1.5, 2.5, 3.5, 4.5, 5.5];
1630 let quantized = quantizer.quantize(&vector);
1631
1632 assert_eq!(quantized.len(), 3);
1634 }
1635
1636 #[test]
1637 fn test_fourbit_nibble_packing() {
1638 let vectors = vec![vec![0.0, 0.0], vec![15.0, 15.0]];
1640
1641 let mut quantizer = FourBitQuantizer::new();
1642 quantizer.fit(&vectors).unwrap();
1643
1644 let vector = vec![0.0, 15.0];
1647 let quantized = quantizer.quantize(&vector);
1648
1649 assert_eq!(quantized.len(), 1); assert_eq!(quantized[0], 0xF0);
1653 }
1654
1655 #[test]
1656 fn test_fourbit_compression_ratio() {
1657 let quantizer = FourBitQuantizer::new();
1658 assert_eq!(quantizer.compression_ratio(), 8.0);
1659 assert_eq!(quantizer.memory_savings(), 0.875);
1660 }
1661
1662 #[test]
1663 fn test_fourbit_quantize_batch() {
1664 let vectors = vec![
1665 vec![0.0, 1.0, 2.0],
1666 vec![1.0, 2.0, 3.0],
1667 vec![2.0, 3.0, 4.0],
1668 ];
1669
1670 let mut quantizer = FourBitQuantizer::new();
1671 quantizer.fit(&vectors).unwrap();
1672
1673 let quantized = quantizer.quantize_batch(&vectors);
1674 assert_eq!(quantized.len(), 3);
1675 assert_eq!(quantized[0].len(), 2); }
1677
1678 #[test]
1679 fn test_fourbit_quantized_distance() {
1680 let vectors = vec![
1681 vec![0.0, 0.0, 0.0],
1682 vec![1.0, 1.0, 1.0],
1683 vec![2.0, 2.0, 2.0],
1684 ];
1685
1686 let mut quantizer = FourBitQuantizer::new();
1687 quantizer.fit(&vectors).unwrap();
1688
1689 let a = quantizer.quantize(&vectors[0]);
1690 let b = quantizer.quantize(&vectors[1]);
1691 let c = quantizer.quantize(&vectors[2]);
1692
1693 let dist_ab = quantizer.quantized_distance(&a, &b);
1694 let dist_ac = quantizer.quantized_distance(&a, &c);
1695
1696 assert!(dist_ac > dist_ab);
1698 }
1699
1700 #[test]
1701 fn test_fourbit_quantized_index_build() {
1702 let vectors = vec![
1703 ("doc1".to_string(), vec![0.0, 1.0, 2.0]),
1704 ("doc2".to_string(), vec![1.0, 2.0, 3.0]),
1705 ("doc3".to_string(), vec![2.0, 3.0, 4.0]),
1706 ];
1707
1708 let mut index = FourBitQuantizedIndex::new();
1709 assert!(index.build(&vectors).is_ok());
1710 }
1711
1712 #[test]
1713 fn test_fourbit_quantized_index_search() {
1714 let vectors = vec![
1715 ("doc1".to_string(), vec![0.0, 0.0, 0.0]),
1716 ("doc2".to_string(), vec![1.0, 1.0, 1.0]),
1717 ("doc3".to_string(), vec![2.0, 2.0, 2.0]),
1718 ];
1719
1720 let mut index = FourBitQuantizedIndex::new();
1721 index.build(&vectors).unwrap();
1722
1723 let query = vec![1.0, 1.0, 1.0];
1725 let results = index.search(&query, 2).unwrap();
1726
1727 assert_eq!(results.len(), 2);
1728 assert_eq!(results[0].0, "doc2"); }
1730
1731 #[test]
1732 fn test_fourbit_quantized_index_stats() {
1733 let vectors = vec![
1734 ("doc1".to_string(), vec![0.0; 768]),
1735 ("doc2".to_string(), vec![1.0; 768]),
1736 ("doc3".to_string(), vec![2.0; 768]),
1737 ];
1738
1739 let mut index = FourBitQuantizedIndex::new();
1740 index.build(&vectors).unwrap();
1741
1742 let stats = index.stats();
1743 assert_eq!(stats.num_vectors, 3);
1744 assert_eq!(stats.dimensions, 768);
1745 assert_eq!(stats.original_bytes, 3 * 768 * 4); assert_eq!(stats.quantized_bytes, 3 * 384); assert_eq!(stats.compression_ratio, 8.0);
1748 }
1749
1750 #[test]
1751 fn test_fourbit_fit_empty_vectors() {
1752 let vectors: Vec<Vec<f32>> = vec![];
1753 let mut quantizer = FourBitQuantizer::new();
1754 assert!(quantizer.fit(&vectors).is_err());
1755 }
1756
1757 #[test]
1758 #[should_panic(expected = "Quantizer must be fitted")]
1759 fn test_fourbit_quantize_unfitted() {
1760 let quantizer = FourBitQuantizer::new();
1761 quantizer.quantize(&[1.0, 2.0, 3.0]);
1762 }
1763
1764 #[test]
1765 #[should_panic(expected = "Vector dimension mismatch")]
1766 fn test_fourbit_quantize_dimension_mismatch() {
1767 let vectors = vec![vec![0.0, 1.0, 2.0]];
1768 let mut quantizer = FourBitQuantizer::new();
1769 quantizer.fit(&vectors).unwrap();
1770
1771 quantizer.quantize(&[1.0, 2.0]); }
1774
1775 #[test]
1780 #[cfg(feature = "fp16")]
1781 fn test_fp16_quantizer_basic() {
1782 let mut quantizer = Fp16Quantizer::new();
1783 quantizer.set_dimensions(3);
1784 assert_eq!(quantizer.dimensions(), 3);
1785 }
1786
1787 #[test]
1788 #[cfg(feature = "fp16")]
1789 fn test_fp16_quantize_dequantize() {
1790 let quantizer = Fp16Quantizer::new();
1791
1792 let vector = vec![1.0, 2.0, 3.0];
1793 let quantized = quantizer.quantize(&vector);
1794 let dequantized = quantizer.dequantize(&quantized);
1795
1796 assert_eq!(quantized.len(), 3);
1798 assert_eq!(dequantized.len(), 3);
1799
1800 for (orig, deq) in vector.iter().zip(dequantized.iter()) {
1802 assert!((orig - deq).abs() < 0.001); }
1804 }
1805
1806 #[test]
1807 #[cfg(feature = "fp16")]
1808 fn test_fp16_quantize_large_vector() {
1809 let quantizer = Fp16Quantizer::new();
1810
1811 let vector: Vec<f32> = (0..768).map(|i| i as f32 * 0.1).collect();
1812 let quantized = quantizer.quantize(&vector);
1813
1814 assert_eq!(quantized.len(), 768);
1816 }
1817
1818 #[test]
1819 #[cfg(feature = "fp16")]
1820 fn test_fp16_quantize_batch() {
1821 let quantizer = Fp16Quantizer::new();
1822
1823 let vectors = vec![
1824 vec![0.0, 1.0, 2.0],
1825 vec![1.0, 2.0, 3.0],
1826 vec![2.0, 3.0, 4.0],
1827 ];
1828
1829 let quantized = quantizer.quantize_batch(&vectors);
1830 assert_eq!(quantized.len(), 3);
1831 assert_eq!(quantized[0].len(), 3);
1832 }
1833
1834 #[test]
1835 #[cfg(feature = "fp16")]
1836 fn test_fp16_distance() {
1837 let quantizer = Fp16Quantizer::new();
1838
1839 let v0 = vec![0.0, 0.0, 0.0];
1840 let v1 = vec![1.0, 1.0, 1.0];
1841 let v2 = vec![2.0, 2.0, 2.0];
1842
1843 let a = quantizer.quantize(&v0);
1844 let b = quantizer.quantize(&v1);
1845 let c = quantizer.quantize(&v2);
1846
1847 let dist_ab = quantizer.fp16_distance(&a, &b);
1848 let dist_ac = quantizer.fp16_distance(&a, &c);
1849
1850 assert!(dist_ac > dist_ab);
1852 }
1853
1854 #[test]
1855 #[cfg(feature = "fp16")]
1856 fn test_fp16_compression_ratio() {
1857 let quantizer = Fp16Quantizer::new();
1858 assert_eq!(quantizer.compression_ratio(), 2.0);
1859 assert_eq!(quantizer.memory_savings(), 0.5);
1860 }
1861
1862 #[test]
1863 #[cfg(feature = "fp16")]
1864 fn test_fp16_quantized_index_build() {
1865 let vectors = vec![
1866 ("doc1".to_string(), vec![0.0, 1.0, 2.0]),
1867 ("doc2".to_string(), vec![1.0, 2.0, 3.0]),
1868 ("doc3".to_string(), vec![2.0, 3.0, 4.0]),
1869 ];
1870
1871 let mut index = Fp16QuantizedIndex::new();
1872 assert!(index.build(&vectors).is_ok());
1873 }
1874
1875 #[test]
1876 #[cfg(feature = "fp16")]
1877 fn test_fp16_quantized_index_search() {
1878 let vectors = vec![
1879 ("doc1".to_string(), vec![0.0, 0.0, 0.0]),
1880 ("doc2".to_string(), vec![1.0, 1.0, 1.0]),
1881 ("doc3".to_string(), vec![2.0, 2.0, 2.0]),
1882 ];
1883
1884 let mut index = Fp16QuantizedIndex::new();
1885 index.build(&vectors).unwrap();
1886
1887 let query = vec![1.0, 1.0, 1.0];
1889 let results = index.search(&query, 2).unwrap();
1890
1891 assert_eq!(results.len(), 2);
1892 assert_eq!(results[0].0, "doc2"); }
1894
1895 #[test]
1896 #[cfg(feature = "fp16")]
1897 fn test_fp16_quantized_index_stats() {
1898 let vectors = vec![
1899 ("doc1".to_string(), vec![0.0; 768]),
1900 ("doc2".to_string(), vec![1.0; 768]),
1901 ("doc3".to_string(), vec![2.0; 768]),
1902 ];
1903
1904 let mut index = Fp16QuantizedIndex::new();
1905 index.build(&vectors).unwrap();
1906
1907 let stats = index.stats();
1908 assert_eq!(stats.num_vectors, 3);
1909 assert_eq!(stats.dimensions, 768);
1910 assert_eq!(stats.original_bytes, 3 * 768 * 4); assert_eq!(stats.fp16_bytes, 3 * 768 * 2); assert_eq!(stats.compression_ratio, 2.0);
1913 }
1914
1915 #[test]
1916 #[cfg(feature = "fp16")]
1917 fn test_fp16_high_precision() {
1918 let quantizer = Fp16Quantizer::new();
1919
1920 let test_values = vec![
1922 vec![0.1, 0.2, 0.3],
1923 vec![1.5, 2.5, 3.5],
1924 vec![100.0, 200.0, 300.0],
1925 vec![-1.0, -2.0, -3.0],
1926 ];
1927
1928 for vector in &test_values {
1929 let quantized = quantizer.quantize(vector);
1930 let dequantized = quantizer.dequantize(&quantized);
1931
1932 for (orig, deq) in vector.iter().zip(dequantized.iter()) {
1933 let relative_error = ((orig - deq) / orig).abs();
1935 assert!(relative_error < 0.001 || orig.abs() < 0.01);
1936 }
1937 }
1938 }
1939
1940 #[test]
1941 #[cfg(feature = "fp16")]
1942 #[should_panic(expected = "Vector dimension mismatch")]
1943 fn test_fp16_quantize_dimension_mismatch() {
1944 let mut quantizer = Fp16Quantizer::new();
1945 quantizer.set_dimensions(3); let vector1 = vec![1.0, 2.0, 3.0];
1948 quantizer.quantize(&vector1); quantizer.quantize(&[1.0, 2.0]); }
1953}