1use crate::{Vector, VectorData, VectorError};
2use half::f16;
3use std::io::{Read, Write};
4use zstd;
5
6#[derive(Debug, Clone, Default)]
7pub enum CompressionMethod {
8 #[default]
9 None,
10 Zstd {
11 level: i32,
12 },
13 Quantization {
14 bits: u8,
15 },
16 ProductQuantization {
17 subvectors: usize,
18 codebook_size: usize,
19 },
20 Pca {
21 components: usize,
22 },
23 Adaptive {
24 quality_level: AdaptiveQuality,
25 analysis_samples: usize,
26 },
27}
28
29#[derive(Debug, Clone)]
30pub enum AdaptiveQuality {
31 Fast, Balanced, BestRatio, }
35
36pub trait VectorCompressor: Send + Sync {
37 fn compress(&self, vector: &Vector) -> Result<Vec<u8>, VectorError>;
38 fn decompress(&self, data: &[u8], dimensions: usize) -> Result<Vector, VectorError>;
39 fn compression_ratio(&self) -> f32;
40}
41
42pub struct ZstdCompressor {
43 level: i32,
44}
45
46impl ZstdCompressor {
47 pub fn new(level: i32) -> Self {
48 Self {
49 level: level.clamp(1, 22),
50 }
51 }
52}
53
54impl VectorCompressor for ZstdCompressor {
55 fn compress(&self, vector: &Vector) -> Result<Vec<u8>, VectorError> {
56 let bytes = vector_to_bytes(vector)?;
57 zstd::encode_all(&bytes[..], self.level)
58 .map_err(|e| VectorError::CompressionError(e.to_string()))
59 }
60
61 fn decompress(&self, data: &[u8], dimensions: usize) -> Result<Vector, VectorError> {
62 let decompressed =
63 zstd::decode_all(data).map_err(|e| VectorError::CompressionError(e.to_string()))?;
64 bytes_to_vector(&decompressed, dimensions)
65 }
66
67 fn compression_ratio(&self) -> f32 {
68 match self.level {
70 1..=3 => 0.7,
71 4..=9 => 0.5,
72 10..=15 => 0.4,
73 16..=22 => 0.3,
74 _ => 1.0,
75 }
76 }
77}
78
79pub struct ScalarQuantizer {
80 bits: u8,
81 min_val: f32,
82 max_val: f32,
83}
84
85impl ScalarQuantizer {
86 pub fn new(bits: u8) -> Self {
87 Self {
88 bits: bits.clamp(1, 16),
89 min_val: 0.0,
90 max_val: 1.0,
91 }
92 }
93
94 pub fn with_range(bits: u8, min_val: f32, max_val: f32) -> Self {
95 Self {
96 bits: bits.clamp(1, 16),
97 min_val,
98 max_val,
99 }
100 }
101
102 pub fn train(&mut self, vectors: &[Vector]) -> Result<(), VectorError> {
103 if vectors.is_empty() {
104 return Err(VectorError::InvalidDimensions(
105 "No vectors to train on".to_string(),
106 ));
107 }
108
109 let mut min = f32::INFINITY;
110 let mut max = f32::NEG_INFINITY;
111
112 for vector in vectors {
113 match &vector.values {
114 VectorData::F32(v) => {
115 for &val in v {
116 min = min.min(val);
117 max = max.max(val);
118 }
119 }
120 VectorData::F64(v) => {
121 for &val in v {
122 min = min.min(val as f32);
123 max = max.max(val as f32);
124 }
125 }
126 _ => {}
127 }
128 }
129
130 self.min_val = min;
131 self.max_val = max;
132 Ok(())
133 }
134
135 fn quantize_value(&self, value: f32) -> u16 {
136 let normalized = ((value - self.min_val) / (self.max_val - self.min_val)).clamp(0.0, 1.0);
137 let max_quant_val = (1u32 << self.bits) - 1;
138 (normalized * max_quant_val as f32).round() as u16
139 }
140
141 fn dequantize_value(&self, quantized: u16) -> f32 {
142 let max_quant_val = (1u32 << self.bits) - 1;
143 let normalized = quantized as f32 / max_quant_val as f32;
144 normalized * (self.max_val - self.min_val) + self.min_val
145 }
146}
147
148impl VectorCompressor for ScalarQuantizer {
149 fn compress(&self, vector: &Vector) -> Result<Vec<u8>, VectorError> {
150 let values = match &vector.values {
151 VectorData::F32(v) => v.clone(),
152 VectorData::F64(v) => v.iter().map(|&x| x as f32).collect(),
153 _ => {
154 return Err(VectorError::UnsupportedOperation(
155 "Quantization only supports float vectors".to_string(),
156 ))
157 }
158 };
159
160 let mut compressed = Vec::new();
161
162 compressed.write_all(&self.bits.to_le_bytes())?;
164 compressed.write_all(&self.min_val.to_le_bytes())?;
165 compressed.write_all(&self.max_val.to_le_bytes())?;
166
167 if self.bits <= 8 {
169 for val in values {
170 let quantized = self.quantize_value(val) as u8;
171 compressed.push(quantized);
172 }
173 } else {
174 for val in values {
175 let quantized = self.quantize_value(val);
176 compressed.write_all(&quantized.to_le_bytes())?;
177 }
178 }
179
180 Ok(compressed)
181 }
182
183 fn decompress(&self, data: &[u8], dimensions: usize) -> Result<Vector, VectorError> {
184 let mut cursor = std::io::Cursor::new(data);
185
186 let mut bits_buf = [0u8; 1];
188 cursor.read_exact(&mut bits_buf)?;
189 let bits = bits_buf[0];
190
191 let mut min_buf = [0u8; 4];
192 cursor.read_exact(&mut min_buf)?;
193 let min_val = f32::from_le_bytes(min_buf);
194
195 let mut max_buf = [0u8; 4];
196 cursor.read_exact(&mut max_buf)?;
197 let max_val = f32::from_le_bytes(max_buf);
198
199 let quantizer = ScalarQuantizer {
201 bits,
202 min_val,
203 max_val,
204 };
205
206 let mut values = Vec::with_capacity(dimensions);
208
209 if bits <= 8 {
210 let mut buf = [0u8; 1];
211 for _ in 0..dimensions {
212 cursor.read_exact(&mut buf)?;
213 let quantized = buf[0] as u16;
214 values.push(quantizer.dequantize_value(quantized));
215 }
216 } else {
217 let mut buf = [0u8; 2];
218 for _ in 0..dimensions {
219 cursor.read_exact(&mut buf)?;
220 let quantized = u16::from_le_bytes(buf);
221 values.push(quantizer.dequantize_value(quantized));
222 }
223 }
224
225 Ok(Vector::new(values))
226 }
227
228 fn compression_ratio(&self) -> f32 {
229 self.bits as f32 / 32.0
231 }
232}
233
234pub struct PcaCompressor {
235 components: usize,
236 mean: Vec<f32>,
237 components_matrix: Vec<Vec<f32>>,
238 explained_variance_ratio: Vec<f32>,
239}
240
241impl PcaCompressor {
242 pub fn new(components: usize) -> Self {
243 Self {
244 components,
245 mean: Vec::new(),
246 components_matrix: Vec::new(),
247 explained_variance_ratio: Vec::new(),
248 }
249 }
250
251 pub fn train(&mut self, vectors: &[Vector]) -> Result<(), VectorError> {
252 if vectors.is_empty() {
253 return Err(VectorError::InvalidDimensions(
254 "No vectors to train on".to_string(),
255 ));
256 }
257
258 let data: Vec<Vec<f32>> = vectors
260 .iter()
261 .map(|v| match &v.values {
262 VectorData::F32(vals) => Ok(vals.clone()),
263 VectorData::F64(vals) => Ok(vals.iter().map(|&x| x as f32).collect()),
264 _ => Err(VectorError::UnsupportedOperation(
265 "PCA only supports float vectors".to_string(),
266 )),
267 })
268 .collect::<Result<Vec<_>, _>>()?;
269
270 let n_samples = data.len();
271 let n_features = data[0].len();
272
273 self.mean = vec![0.0; n_features];
275 for sample in &data {
276 for (i, &val) in sample.iter().enumerate() {
277 self.mean[i] += val;
278 }
279 }
280 for val in &mut self.mean {
281 *val /= n_samples as f32;
282 }
283
284 let mut centered = data.clone();
286 for sample in &mut centered {
287 for (i, val) in sample.iter_mut().enumerate() {
288 *val -= self.mean[i];
289 }
290 }
291
292 self.components_matrix = Vec::with_capacity(self.components);
294
295 use nalgebra::DMatrix;
297
298 let training_data: Result<Vec<Vec<f32>>, _> = vectors
300 .iter()
301 .map(|v| match &v.values {
302 VectorData::F32(vals) => Ok(vals.clone()),
303 VectorData::F64(vals) => Ok(vals.iter().map(|&x| x as f32).collect()),
304 _ => Err(VectorError::UnsupportedOperation(
305 "PCA only supports float vectors".to_string(),
306 )),
307 })
308 .collect();
309
310 let training_data = training_data?;
311 let n_samples = training_data.len();
312 if n_samples == 0 {
313 return Err(VectorError::InvalidDimensions(
314 "No training data provided for PCA".to_string(),
315 ));
316 }
317
318 let mut data_matrix = DMatrix::<f32>::zeros(n_samples, n_features);
320 for (i, sample) in training_data.iter().enumerate() {
321 for (j, &val) in sample.iter().enumerate() {
322 data_matrix[(i, j)] = val - self.mean[j];
323 }
324 }
325
326 let covariance = data_matrix.transpose() * &data_matrix / (n_samples as f32 - 1.0);
328
329 let svd = covariance.svd(true, true);
331
332 if let Some(u) = svd.u {
333 let num_components = self.components.min(u.ncols());
335
336 let singular_values = &svd.singular_values;
338 let total_variance: f32 = singular_values.iter().sum();
339 let mut explained_variance = Vec::with_capacity(num_components);
340
341 for i in 0..num_components {
342 let component: Vec<f32> = u.column(i).iter().cloned().collect();
343 self.components_matrix.push(component);
344
345 let variance_ratio = singular_values[i] / total_variance;
346 explained_variance.push(variance_ratio);
347 }
348
349 self.explained_variance_ratio = explained_variance;
351 } else {
352 return Err(VectorError::CompressionError(
353 "SVD decomposition failed for PCA".to_string(),
354 ));
355 }
356
357 Ok(())
358 }
359
360 fn project(&self, vector: &[f32]) -> Vec<f32> {
361 let mut centered = vector.to_vec();
362 for (i, val) in centered.iter_mut().enumerate() {
363 *val -= self.mean.get(i).unwrap_or(&0.0);
364 }
365
366 let mut projected = vec![0.0; self.components];
367 for (i, component) in self.components_matrix.iter().enumerate() {
368 let mut dot = 0.0;
369 for (j, &val) in centered.iter().enumerate() {
370 dot += val * component.get(j).unwrap_or(&0.0);
371 }
372 projected[i] = dot;
373 }
374
375 projected
376 }
377
378 fn reconstruct(&self, projected: &[f32]) -> Vec<f32> {
379 let n_features = self.mean.len();
380 let mut reconstructed = self.mean.clone();
381
382 for (i, &coeff) in projected.iter().enumerate() {
383 if let Some(component) = self.components_matrix.get(i) {
384 for (j, &comp_val) in component.iter().enumerate() {
385 if j < n_features {
386 reconstructed[j] += coeff * comp_val;
387 }
388 }
389 }
390 }
391
392 reconstructed
393 }
394
395 pub fn explained_variance_ratio(&self) -> &[f32] {
397 &self.explained_variance_ratio
398 }
399
400 pub fn total_explained_variance(&self) -> f32 {
402 self.explained_variance_ratio.iter().sum()
403 }
404}
405
406impl VectorCompressor for PcaCompressor {
407 fn compress(&self, vector: &Vector) -> Result<Vec<u8>, VectorError> {
408 let values = match &vector.values {
409 VectorData::F32(v) => v.clone(),
410 VectorData::F64(v) => v.iter().map(|&x| x as f32).collect(),
411 _ => {
412 return Err(VectorError::UnsupportedOperation(
413 "PCA only supports float vectors".to_string(),
414 ))
415 }
416 };
417
418 let projected = self.project(&values);
419
420 let mut compressed = Vec::new();
421 compressed.write_all(&(self.components as u32).to_le_bytes())?;
423
424 for val in projected {
426 compressed.write_all(&val.to_le_bytes())?;
427 }
428
429 Ok(compressed)
430 }
431
432 fn decompress(&self, data: &[u8], _dimensions: usize) -> Result<Vector, VectorError> {
433 let mut cursor = std::io::Cursor::new(data);
434
435 let mut components_buf = [0u8; 4];
437 cursor.read_exact(&mut components_buf)?;
438 let components = u32::from_le_bytes(components_buf) as usize;
439
440 let mut projected = Vec::with_capacity(components);
442 let mut val_buf = [0u8; 4];
443
444 for _ in 0..components {
445 cursor.read_exact(&mut val_buf)?;
446 projected.push(f32::from_le_bytes(val_buf));
447 }
448
449 let reconstructed = self.reconstruct(&projected);
450 Ok(Vector::new(reconstructed))
451 }
452
453 fn compression_ratio(&self) -> f32 {
454 if self.mean.is_empty() {
456 1.0
457 } else {
458 self.components as f32 / self.mean.len() as f32
459 }
460 }
461}
462
463pub struct ProductQuantizer {
465 subvectors: usize,
466 codebook_size: usize,
467 codebooks: Vec<Vec<Vec<f32>>>, subvector_dim: usize,
469}
470
471impl ProductQuantizer {
472 pub fn new(subvectors: usize, codebook_size: usize) -> Self {
473 Self {
474 subvectors,
475 codebook_size,
476 codebooks: Vec::new(),
477 subvector_dim: 0,
478 }
479 }
480
481 pub fn train(&mut self, vectors: &[Vector]) -> Result<(), VectorError> {
482 if vectors.is_empty() {
483 return Err(VectorError::InvalidDimensions(
484 "No training data provided for Product Quantization".to_string(),
485 ));
486 }
487
488 let vector_dim = vectors[0].dimensions;
490 if vector_dim % self.subvectors != 0 {
491 return Err(VectorError::InvalidDimensions(format!(
492 "Vector dimension {} is not divisible by number of subvectors {}",
493 vector_dim, self.subvectors
494 )));
495 }
496
497 self.subvector_dim = vector_dim / self.subvectors;
498 self.codebooks = Vec::with_capacity(self.subvectors);
499
500 let training_data: Result<Vec<Vec<f32>>, _> = vectors
502 .iter()
503 .map(|v| match &v.values {
504 VectorData::F32(vals) => Ok(vals.clone()),
505 VectorData::F64(vals) => Ok(vals.iter().map(|&x| x as f32).collect()),
506 _ => Err(VectorError::UnsupportedOperation(
507 "Product quantization only supports float vectors".to_string(),
508 )),
509 })
510 .collect();
511
512 let training_data = training_data?;
513
514 for subvec_idx in 0..self.subvectors {
516 let start_dim = subvec_idx * self.subvector_dim;
517 let end_dim = start_dim + self.subvector_dim;
518
519 let subvectors: Vec<Vec<f32>> = training_data
521 .iter()
522 .map(|v| v[start_dim..end_dim].to_vec())
523 .collect();
524
525 let codebook = self.train_codebook(&subvectors)?;
527 self.codebooks.push(codebook);
528 }
529
530 Ok(())
531 }
532
533 fn train_codebook(&self, subvectors: &[Vec<f32>]) -> Result<Vec<Vec<f32>>, VectorError> {
534 use scirs2_core::random::Random;
535 let mut rng = Random::seed(42);
536
537 if subvectors.is_empty() {
538 return Err(VectorError::InvalidDimensions(
539 "No subvectors to train codebook".to_string(),
540 ));
541 }
542
543 let dim = subvectors[0].len();
544 let mut centroids = Vec::with_capacity(self.codebook_size);
545
546 for _ in 0..self.codebook_size {
548 let mut centroid = vec![0.0; dim];
549 for val in &mut centroid {
550 *val = rng.gen_range(-1.0..1.0);
551 }
552 centroids.push(centroid);
553 }
554
555 for _ in 0..10 {
557 let mut assignments = vec![0; subvectors.len()];
559
560 for (i, subvec) in subvectors.iter().enumerate() {
562 let mut best_dist = f32::INFINITY;
563 let mut best_centroid = 0;
564
565 for (j, centroid) in centroids.iter().enumerate() {
566 let dist = euclidean_distance(subvec, centroid);
567 if dist < best_dist {
568 best_dist = dist;
569 best_centroid = j;
570 }
571 }
572 assignments[i] = best_centroid;
573 }
574
575 for (j, centroid) in centroids.iter_mut().enumerate() {
577 let assigned_points: Vec<&Vec<f32>> = subvectors
578 .iter()
579 .enumerate()
580 .filter(|(i, _)| assignments[*i] == j)
581 .map(|(_, v)| v)
582 .collect();
583
584 if !assigned_points.is_empty() {
585 for (d, centroid_val) in centroid.iter_mut().enumerate() {
586 *centroid_val = assigned_points.iter().map(|p| p[d]).sum::<f32>()
587 / assigned_points.len() as f32;
588 }
589 }
590 }
591 }
592
593 Ok(centroids)
594 }
595
596 fn quantize_vector(&self, vector: &[f32]) -> Result<Vec<u8>, VectorError> {
597 if vector.len() != self.subvectors * self.subvector_dim {
598 return Err(VectorError::InvalidDimensions(format!(
599 "Vector dimension {} doesn't match expected {}",
600 vector.len(),
601 self.subvectors * self.subvector_dim
602 )));
603 }
604
605 let mut codes = Vec::with_capacity(self.subvectors);
606
607 for subvec_idx in 0..self.subvectors {
608 let start_dim = subvec_idx * self.subvector_dim;
609 let end_dim = start_dim + self.subvector_dim;
610 let subvector = &vector[start_dim..end_dim];
611
612 let codebook = &self.codebooks[subvec_idx];
613 let mut best_dist = f32::INFINITY;
614 let mut best_code = 0u8;
615
616 for (code, centroid) in codebook.iter().enumerate() {
617 let dist = euclidean_distance(subvector, centroid);
618 if dist < best_dist {
619 best_dist = dist;
620 best_code = code as u8;
621 }
622 }
623
624 codes.push(best_code);
625 }
626
627 Ok(codes)
628 }
629
630 fn dequantize_codes(&self, codes: &[u8]) -> Result<Vec<f32>, VectorError> {
631 if codes.len() != self.subvectors {
632 return Err(VectorError::InvalidDimensions(format!(
633 "Code length {} doesn't match expected {}",
634 codes.len(),
635 self.subvectors
636 )));
637 }
638
639 let mut reconstructed = Vec::with_capacity(self.subvectors * self.subvector_dim);
640
641 for (subvec_idx, &code) in codes.iter().enumerate() {
642 let codebook = &self.codebooks[subvec_idx];
643 if (code as usize) < codebook.len() {
644 reconstructed.extend_from_slice(&codebook[code as usize]);
645 } else {
646 return Err(VectorError::InvalidDimensions(format!(
647 "Invalid code {} for codebook of size {}",
648 code,
649 codebook.len()
650 )));
651 }
652 }
653
654 Ok(reconstructed)
655 }
656}
657
658fn euclidean_distance(a: &[f32], b: &[f32]) -> f32 {
660 a.iter()
661 .zip(b.iter())
662 .map(|(&x, &y)| (x - y).powi(2))
663 .sum::<f32>()
664 .sqrt()
665}
666
667impl VectorCompressor for ProductQuantizer {
668 fn compress(&self, vector: &Vector) -> Result<Vec<u8>, VectorError> {
669 let values = match &vector.values {
670 VectorData::F32(v) => v.clone(),
671 VectorData::F64(v) => v.iter().map(|&x| x as f32).collect(),
672 _ => {
673 return Err(VectorError::UnsupportedOperation(
674 "Product quantization only supports float vectors".to_string(),
675 ))
676 }
677 };
678
679 let codes = self.quantize_vector(&values)?;
680
681 let mut compressed = Vec::new();
682 compressed.write_all(&(self.subvectors as u32).to_le_bytes())?;
684 compressed.write_all(&(self.codebook_size as u32).to_le_bytes())?;
685 compressed.write_all(&(self.subvector_dim as u32).to_le_bytes())?;
686
687 compressed.extend_from_slice(&codes);
689
690 Ok(compressed)
691 }
692
693 fn decompress(&self, data: &[u8], _dimensions: usize) -> Result<Vector, VectorError> {
694 if data.len() < 12 {
695 return Err(VectorError::InvalidData(
697 "Invalid compressed data format".to_string(),
698 ));
699 }
700
701 let subvectors = u32::from_le_bytes([data[0], data[1], data[2], data[3]]) as usize;
703 let codebook_size = u32::from_le_bytes([data[4], data[5], data[6], data[7]]) as usize;
704 let subvector_dim = u32::from_le_bytes([data[8], data[9], data[10], data[11]]) as usize;
705
706 if subvectors != self.subvectors
707 || codebook_size != self.codebook_size
708 || subvector_dim != self.subvector_dim
709 {
710 return Err(VectorError::InvalidData(
711 "Metadata mismatch in compressed data".to_string(),
712 ));
713 }
714
715 let codes = &data[12..];
716 if codes.len() != subvectors {
717 return Err(VectorError::InvalidData("Invalid code length".to_string()));
718 }
719
720 let values = self.dequantize_codes(codes)?;
721 Ok(Vector::new(values))
722 }
723
724 fn compression_ratio(&self) -> f32 {
725 (8.0 * self.subvectors as f32) / (32.0 * self.subvectors as f32 * self.subvector_dim as f32)
727 }
728}
729
730#[derive(Debug, Clone)]
732pub struct VectorAnalysis {
733 pub sparsity: f32, pub range: f32, pub mean: f32, pub std_dev: f32, pub entropy: f32, pub dominant_patterns: Vec<f32>, pub recommended_method: CompressionMethod,
740 pub expected_ratio: f32, }
742
743impl VectorAnalysis {
744 pub fn analyze(vectors: &[Vector], quality: &AdaptiveQuality) -> Result<Self, VectorError> {
745 if vectors.is_empty() {
746 return Err(VectorError::InvalidDimensions(
747 "No vectors to analyze".to_string(),
748 ));
749 }
750
751 let mut all_values = Vec::new();
753 let mut dimensions = 0;
754
755 for vector in vectors {
756 let values = match &vector.values {
757 VectorData::F32(v) => v.clone(),
758 VectorData::F64(v) => v.iter().map(|&x| x as f32).collect(),
759 VectorData::F16(v) => v.iter().map(|&x| f16::from_bits(x).to_f32()).collect(),
760 VectorData::I8(v) => v.iter().map(|&x| x as f32).collect(),
761 VectorData::Binary(_) => {
762 return Ok(Self::binary_analysis(vectors.len()));
763 }
764 };
765 if dimensions == 0 {
766 dimensions = values.len();
767 }
768 all_values.extend(values);
769 }
770
771 if all_values.is_empty() {
772 return Err(VectorError::InvalidDimensions(
773 "No values to analyze".to_string(),
774 ));
775 }
776
777 let min_val = all_values.iter().copied().fold(f32::INFINITY, f32::min);
779 let max_val = all_values.iter().copied().fold(f32::NEG_INFINITY, f32::max);
780 let range = max_val - min_val;
781 let mean = all_values.iter().sum::<f32>() / all_values.len() as f32;
782
783 let variance =
784 all_values.iter().map(|&x| (x - mean).powi(2)).sum::<f32>() / all_values.len() as f32;
785 let std_dev = variance.sqrt();
786
787 let epsilon = std_dev * 0.01; let near_zero_count = all_values.iter().filter(|&&x| x.abs() < epsilon).count();
790 let sparsity = near_zero_count as f32 / all_values.len() as f32;
791
792 let entropy = Self::calculate_entropy(&all_values);
794
795 let dominant_patterns = Self::find_dominant_patterns(&all_values);
797
798 let (recommended_method, expected_ratio) =
800 Self::select_optimal_method(sparsity, range, std_dev, entropy, dimensions, quality);
801
802 Ok(Self {
803 sparsity,
804 range,
805 mean,
806 std_dev,
807 entropy,
808 dominant_patterns,
809 recommended_method,
810 expected_ratio,
811 })
812 }
813
814 fn binary_analysis(_vector_count: usize) -> Self {
815 Self {
816 sparsity: 0.0,
817 range: 1.0,
818 mean: 0.5,
819 std_dev: 0.5,
820 entropy: 1.0,
821 dominant_patterns: vec![0.0, 1.0],
822 recommended_method: CompressionMethod::Zstd { level: 1 },
823 expected_ratio: 0.125, }
825 }
826
827 fn calculate_entropy(values: &[f32]) -> f32 {
828 let mut histogram = std::collections::HashMap::new();
830 let bins = 64; if values.is_empty() {
833 return 0.0;
834 }
835
836 let min_val = values.iter().copied().fold(f32::INFINITY, f32::min);
837 let max_val = values.iter().copied().fold(f32::NEG_INFINITY, f32::max);
838 let range = max_val - min_val;
839
840 if range == 0.0 {
841 return 0.0; }
843
844 for &value in values {
845 let bin = ((value - min_val) / range * (bins - 1) as f32) as usize;
846 let bin = bin.min(bins - 1);
847 *histogram.entry(bin).or_insert(0) += 1;
848 }
849
850 let total = values.len() as f32;
851 let mut entropy = 0.0;
852
853 for count in histogram.values() {
854 let probability = *count as f32 / total;
855 if probability > 0.0 {
856 entropy -= probability * probability.log2();
857 }
858 }
859
860 entropy
861 }
862
863 fn find_dominant_patterns(values: &[f32]) -> Vec<f32> {
864 let mut value_counts = std::collections::HashMap::new();
866
867 for &value in values {
869 let quantized = (value * 1000.0).round() / 1000.0; *value_counts.entry(quantized.to_bits()).or_insert(0) += 1;
871 }
872
873 let mut patterns: Vec<_> = value_counts.into_iter().collect();
874 patterns.sort_by(|a, b| b.1.cmp(&a.1)); patterns
877 .into_iter()
878 .take(5) .map(|(bits, _)| f32::from_bits(bits))
880 .collect()
881 }
882
883 fn select_optimal_method(
884 sparsity: f32,
885 range: f32,
886 std_dev: f32,
887 entropy: f32,
888 dimensions: usize,
889 quality: &AdaptiveQuality,
890 ) -> (CompressionMethod, f32) {
891 if sparsity > 0.7 {
895 return match quality {
896 AdaptiveQuality::Fast => (CompressionMethod::Zstd { level: 1 }, 0.3),
897 AdaptiveQuality::Balanced => (CompressionMethod::Zstd { level: 6 }, 0.2),
898 AdaptiveQuality::BestRatio => (CompressionMethod::Zstd { level: 19 }, 0.15),
899 };
900 }
901
902 if entropy < 2.0 {
904 return match quality {
905 AdaptiveQuality::Fast => (CompressionMethod::Zstd { level: 3 }, 0.4),
906 AdaptiveQuality::Balanced => (CompressionMethod::Zstd { level: 9 }, 0.3),
907 AdaptiveQuality::BestRatio => (CompressionMethod::Zstd { level: 22 }, 0.2),
908 };
909 }
910
911 if range < 2.0 && std_dev < 0.5 {
913 return match quality {
914 AdaptiveQuality::Fast => (CompressionMethod::Quantization { bits: 8 }, 0.25),
915 AdaptiveQuality::Balanced => (CompressionMethod::Quantization { bits: 6 }, 0.1875),
916 AdaptiveQuality::BestRatio => (CompressionMethod::Quantization { bits: 4 }, 0.125),
917 };
918 }
919
920 if dimensions > 128 {
922 let components = match quality {
923 AdaptiveQuality::Fast => dimensions * 7 / 10, AdaptiveQuality::Balanced => dimensions / 2, AdaptiveQuality::BestRatio => dimensions / 3, };
927 return (
928 CompressionMethod::Pca { components },
929 components as f32 / dimensions as f32,
930 );
931 }
932
933 match quality {
935 AdaptiveQuality::Fast => (CompressionMethod::Zstd { level: 3 }, 0.6),
936 AdaptiveQuality::Balanced => (CompressionMethod::Zstd { level: 6 }, 0.5),
937 AdaptiveQuality::BestRatio => (CompressionMethod::Zstd { level: 12 }, 0.4),
938 }
939 }
940}
941
942pub struct AdaptiveCompressor {
944 quality_level: AdaptiveQuality,
945 analysis_samples: usize,
946 current_method: Option<Box<dyn VectorCompressor>>,
947 analysis_cache: Option<VectorAnalysis>,
948 performance_metrics: CompressionMetrics,
949}
950
951#[derive(Debug, Clone)]
952pub struct CompressionMetrics {
953 pub vectors_compressed: usize,
954 pub total_original_size: usize,
955 pub total_compressed_size: usize,
956 pub compression_time_ms: f64,
957 pub decompression_time_ms: f64,
958 pub current_ratio: f32,
959 pub method_switches: usize,
960}
961
962impl Default for CompressionMetrics {
963 fn default() -> Self {
964 Self {
965 vectors_compressed: 0,
966 total_original_size: 0,
967 total_compressed_size: 0,
968 compression_time_ms: 0.0,
969 decompression_time_ms: 0.0,
970 current_ratio: 1.0,
971 method_switches: 0,
972 }
973 }
974}
975
976impl AdaptiveCompressor {
977 pub fn new(quality_level: AdaptiveQuality, analysis_samples: usize) -> Self {
978 Self {
979 quality_level,
980 analysis_samples: analysis_samples.max(10), current_method: None,
982 analysis_cache: None,
983 performance_metrics: CompressionMetrics::default(),
984 }
985 }
986
987 pub fn with_fast_quality() -> Self {
988 Self::new(AdaptiveQuality::Fast, 50)
989 }
990
991 pub fn with_balanced_quality() -> Self {
992 Self::new(AdaptiveQuality::Balanced, 100)
993 }
994
995 pub fn with_best_ratio() -> Self {
996 Self::new(AdaptiveQuality::BestRatio, 200)
997 }
998
999 pub fn optimize_for_vectors(&mut self, sample_vectors: &[Vector]) -> Result<(), VectorError> {
1001 if sample_vectors.is_empty() {
1002 return Ok(());
1003 }
1004
1005 let start_time = std::time::Instant::now();
1006
1007 let samples_to_analyze = sample_vectors.len().min(self.analysis_samples);
1009 let analysis_vectors = &sample_vectors[..samples_to_analyze];
1010
1011 let analysis = VectorAnalysis::analyze(analysis_vectors, &self.quality_level)?;
1012
1013 let should_switch = match (&self.current_method, &self.analysis_cache) {
1015 (Some(_), Some(cached)) => {
1016 !methods_equivalent(&cached.recommended_method, &analysis.recommended_method)
1018 }
1019 _ => true, };
1021
1022 if should_switch {
1023 self.current_method = Some(create_compressor(&analysis.recommended_method));
1024 self.performance_metrics.method_switches += 1;
1025 }
1026
1027 if self.current_method.is_some() {
1029 }
1032
1033 self.analysis_cache = Some(analysis);
1034
1035 let analysis_time = start_time.elapsed().as_secs_f64() * 1000.0;
1036 tracing::debug!("Adaptive compression analysis took {:.2}ms", analysis_time);
1037
1038 Ok(())
1039 }
1040
1041 fn train_compressor(
1042 &self,
1043 _compressor: &mut dyn VectorCompressor,
1044 _vectors: &[Vector],
1045 ) -> Result<(), VectorError> {
1046 Ok(())
1049 }
1050
1051 pub fn get_metrics(&self) -> &CompressionMetrics {
1052 &self.performance_metrics
1053 }
1054
1055 pub fn get_analysis(&self) -> Option<&VectorAnalysis> {
1056 self.analysis_cache.as_ref()
1057 }
1058
1059 pub fn adaptive_reanalysis(&mut self, recent_vectors: &[Vector]) -> Result<bool, VectorError> {
1061 if recent_vectors.len() < self.analysis_samples / 4 {
1062 return Ok(false); }
1064
1065 let old_method = self
1066 .analysis_cache
1067 .as_ref()
1068 .map(|a| a.recommended_method.clone());
1069
1070 self.optimize_for_vectors(recent_vectors)?;
1071
1072 let method_changed = match (old_method, &self.analysis_cache) {
1073 (Some(old), Some(new)) => !methods_equivalent(&old, &new.recommended_method),
1074 _ => false,
1075 };
1076
1077 Ok(method_changed)
1078 }
1079}
1080
1081impl VectorCompressor for AdaptiveCompressor {
1082 fn compress(&self, vector: &Vector) -> Result<Vec<u8>, VectorError> {
1083 if let Some(compressor) = &self.current_method {
1084 let start = std::time::Instant::now();
1085 let result = compressor.compress(vector);
1086 let _compression_time = start.elapsed().as_secs_f64() * 1000.0;
1087
1088 result
1092 } else {
1093 let no_op = NoOpCompressor;
1095 no_op.compress(vector)
1096 }
1097 }
1098
1099 fn decompress(&self, data: &[u8], dimensions: usize) -> Result<Vector, VectorError> {
1100 if let Some(compressor) = &self.current_method {
1101 let start = std::time::Instant::now();
1102 let result = compressor.decompress(data, dimensions);
1103 let _decompression_time = start.elapsed().as_secs_f64() * 1000.0;
1104
1105 result
1108 } else {
1109 let no_op = NoOpCompressor;
1111 no_op.decompress(data, dimensions)
1112 }
1113 }
1114
1115 fn compression_ratio(&self) -> f32 {
1116 if let Some(compressor) = &self.current_method {
1117 compressor.compression_ratio()
1118 } else {
1119 1.0
1120 }
1121 }
1122}
1123
1124fn methods_equivalent(method1: &CompressionMethod, method2: &CompressionMethod) -> bool {
1125 match (method1, method2) {
1126 (CompressionMethod::None, CompressionMethod::None) => true,
1127 (CompressionMethod::Zstd { level: l1 }, CompressionMethod::Zstd { level: l2 }) => {
1128 (l1 - l2).abs() <= 2 }
1130 (
1131 CompressionMethod::Quantization { bits: b1 },
1132 CompressionMethod::Quantization { bits: b2 },
1133 ) => b1 == b2,
1134 (CompressionMethod::Pca { components: c1 }, CompressionMethod::Pca { components: c2 }) => {
1135 ((*c1 as i32) - (*c2 as i32)).abs() <= (*c1 as i32) / 10 }
1137 _ => false,
1138 }
1139}
1140
1141pub fn create_compressor(method: &CompressionMethod) -> Box<dyn VectorCompressor> {
1142 match method {
1143 CompressionMethod::None => Box::new(NoOpCompressor),
1144 CompressionMethod::Zstd { level } => Box::new(ZstdCompressor::new(*level)),
1145 CompressionMethod::Quantization { bits } => Box::new(ScalarQuantizer::new(*bits)),
1146 CompressionMethod::Pca { components } => Box::new(PcaCompressor::new(*components)),
1147 CompressionMethod::ProductQuantization {
1148 subvectors,
1149 codebook_size,
1150 } => Box::new(ProductQuantizer::new(*subvectors, *codebook_size)),
1151 CompressionMethod::Adaptive {
1152 quality_level,
1153 analysis_samples,
1154 } => Box::new(AdaptiveCompressor::new(
1155 quality_level.clone(),
1156 *analysis_samples,
1157 )),
1158 }
1159}
1160
1161struct NoOpCompressor;
1162
1163impl VectorCompressor for NoOpCompressor {
1164 fn compress(&self, vector: &Vector) -> Result<Vec<u8>, VectorError> {
1165 vector_to_bytes(vector)
1166 }
1167
1168 fn decompress(&self, data: &[u8], dimensions: usize) -> Result<Vector, VectorError> {
1169 bytes_to_vector(data, dimensions)
1170 }
1171
1172 fn compression_ratio(&self) -> f32 {
1173 1.0
1174 }
1175}
1176
1177fn vector_to_bytes(vector: &Vector) -> Result<Vec<u8>, VectorError> {
1178 let mut bytes = Vec::new();
1179
1180 let type_byte = match &vector.values {
1182 VectorData::F32(_) => 0u8,
1183 VectorData::F64(_) => 1u8,
1184 VectorData::F16(_) => 2u8,
1185 VectorData::I8(_) => 3u8,
1186 VectorData::Binary(_) => 4u8,
1187 };
1188 bytes.push(type_byte);
1189
1190 match &vector.values {
1191 VectorData::F32(v) => {
1192 for val in v {
1193 bytes.write_all(&val.to_le_bytes())?;
1194 }
1195 }
1196 VectorData::F64(v) => {
1197 for val in v {
1198 bytes.write_all(&val.to_le_bytes())?;
1199 }
1200 }
1201 VectorData::F16(v) => {
1202 for val in v {
1203 bytes.write_all(&val.to_le_bytes())?;
1204 }
1205 }
1206 VectorData::I8(v) => {
1207 for &val in v {
1208 bytes.push(val as u8);
1209 }
1210 }
1211 VectorData::Binary(v) => {
1212 bytes.extend_from_slice(v);
1213 }
1214 }
1215
1216 Ok(bytes)
1217}
1218
1219fn bytes_to_vector(data: &[u8], dimensions: usize) -> Result<Vector, VectorError> {
1220 if data.is_empty() {
1221 return Err(VectorError::InvalidDimensions("Empty data".to_string()));
1222 }
1223
1224 let type_byte = data[0];
1225 let data = &data[1..];
1226
1227 match type_byte {
1228 0 => {
1229 let mut values = Vec::with_capacity(dimensions);
1231 let mut cursor = std::io::Cursor::new(data);
1232 let mut buf = [0u8; 4];
1233
1234 for _ in 0..dimensions {
1235 cursor.read_exact(&mut buf)?;
1236 values.push(f32::from_le_bytes(buf));
1237 }
1238 Ok(Vector::new(values))
1239 }
1240 1 => {
1241 let mut values = Vec::with_capacity(dimensions);
1243 let mut cursor = std::io::Cursor::new(data);
1244 let mut buf = [0u8; 8];
1245
1246 for _ in 0..dimensions {
1247 cursor.read_exact(&mut buf)?;
1248 values.push(f64::from_le_bytes(buf));
1249 }
1250 Ok(Vector::f64(values))
1251 }
1252 2 => {
1253 let mut values = Vec::with_capacity(dimensions);
1255 let mut cursor = std::io::Cursor::new(data);
1256 let mut buf = [0u8; 2];
1257
1258 for _ in 0..dimensions {
1259 cursor.read_exact(&mut buf)?;
1260 values.push(u16::from_le_bytes(buf));
1261 }
1262 Ok(Vector::f16(values))
1263 }
1264 3 => {
1265 Ok(Vector::i8(
1267 data[..dimensions].iter().map(|&b| b as i8).collect(),
1268 ))
1269 }
1270 4 => {
1271 Ok(Vector::binary(data[..dimensions].to_vec()))
1273 }
1274 _ => Err(VectorError::InvalidData(format!(
1275 "Unknown vector type: {type_byte}"
1276 ))),
1277 }
1278}
1279
1280#[cfg(test)]
1281mod tests {
1282 use super::*;
1283
1284 #[test]
1285 fn test_zstd_compression() {
1286 let vector = Vector::new(vec![1.0, 2.0, 3.0, 4.0, 5.0]);
1287 let compressor = ZstdCompressor::new(3);
1288
1289 let compressed = compressor.compress(&vector).unwrap();
1290 let decompressed = compressor.decompress(&compressed, 5).unwrap();
1291
1292 let orig = vector.as_f32();
1293 let dec = decompressed.as_f32();
1294 assert_eq!(orig.len(), dec.len());
1295 for (a, b) in orig.iter().zip(dec.iter()) {
1296 assert!((a - b).abs() < 1e-6);
1297 }
1298 }
1299
1300 #[test]
1301 fn test_scalar_quantization() {
1302 let vector = Vector::new(vec![0.1, 0.5, 0.9, 0.3, 0.7]);
1303 let mut quantizer = ScalarQuantizer::new(8);
1304 quantizer.train(std::slice::from_ref(&vector)).unwrap();
1305
1306 let compressed = quantizer.compress(&vector).unwrap();
1307 let decompressed = quantizer.decompress(&compressed, 5).unwrap();
1308
1309 assert!(compressed.len() < 20); let orig = vector.as_f32();
1313 let dec = decompressed.as_f32();
1314 assert_eq!(orig.len(), dec.len());
1315 for (a, b) in orig.iter().zip(dec.iter()) {
1317 assert!((a - b).abs() < 0.01);
1318 }
1319 }
1320
1321 #[test]
1322 fn test_pca_compression() {
1323 let vectors = vec![
1324 Vector::new(vec![1.0, 2.0, 3.0, 4.0, 5.0]),
1325 Vector::new(vec![2.0, 3.0, 4.0, 5.0, 6.0]),
1326 Vector::new(vec![3.0, 4.0, 5.0, 6.0, 7.0]),
1327 ];
1328
1329 let mut pca = PcaCompressor::new(3);
1330 pca.train(&vectors).unwrap();
1331
1332 let compressed = pca.compress(&vectors[0]).unwrap();
1333 let decompressed = pca.decompress(&compressed, 5).unwrap();
1334
1335 let dec = decompressed.as_f32();
1336 assert_eq!(dec.len(), 5);
1337 }
1338
1339 #[test]
1340 fn test_adaptive_compression_sparse_data() {
1341 let vectors = vec![
1343 Vector::new(vec![0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0]),
1344 Vector::new(vec![0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0]),
1345 Vector::new(vec![1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0]),
1346 ];
1347
1348 let mut adaptive = AdaptiveCompressor::with_balanced_quality();
1349 adaptive.optimize_for_vectors(&vectors).unwrap();
1350
1351 let analysis = adaptive.get_analysis().unwrap();
1352 assert!(analysis.sparsity > 0.5); let compressed = adaptive.compress(&vectors[0]).unwrap();
1356 let decompressed = adaptive.decompress(&compressed, 10).unwrap();
1357
1358 let orig = vectors[0].as_f32();
1359 let dec = decompressed.as_f32();
1360 assert_eq!(orig.len(), dec.len());
1361 }
1362
1363 #[test]
1364 fn test_adaptive_compression_quantizable_data() {
1365 let vectors = vec![
1367 Vector::new(vec![0.1, 0.2, 0.3, 0.4, 0.5]),
1368 Vector::new(vec![0.2, 0.3, 0.4, 0.5, 0.6]),
1369 Vector::new(vec![0.3, 0.4, 0.5, 0.6, 0.7]),
1370 ];
1371
1372 let mut adaptive = AdaptiveCompressor::with_balanced_quality();
1373 adaptive.optimize_for_vectors(&vectors).unwrap();
1374
1375 let analysis = adaptive.get_analysis().unwrap();
1376 assert!(analysis.range < 1.0); let compressed = adaptive.compress(&vectors[0]).unwrap();
1380 let decompressed = adaptive.decompress(&compressed, 5).unwrap();
1381
1382 let orig = vectors[0].as_f32();
1383 let dec = decompressed.as_f32();
1384 assert_eq!(orig.len(), dec.len());
1385
1386 assert!(adaptive.compression_ratio() < 0.5); }
1389
1390 #[test]
1391 fn test_adaptive_compression_high_dimensional() {
1392 let mut vectors = Vec::new();
1394 for i in 0..10 {
1395 let mut data = vec![0.0; 200]; for (j, item) in data.iter_mut().enumerate().take(200) {
1397 *item = (i * j) as f32 * 0.01;
1398 }
1399 vectors.push(Vector::new(data));
1400 }
1401
1402 let mut adaptive = AdaptiveCompressor::with_best_ratio();
1403 adaptive.optimize_for_vectors(&vectors).unwrap();
1404
1405 let analysis = adaptive.get_analysis().unwrap();
1406 match &analysis.recommended_method {
1408 CompressionMethod::Pca { components } => {
1409 assert!(*components < 200); }
1411 _ => {
1412 assert!(matches!(
1415 analysis.recommended_method,
1416 CompressionMethod::Pca { .. }
1417 | CompressionMethod::Quantization { .. }
1418 | CompressionMethod::Zstd { .. }
1419 ));
1420 }
1421 }
1422
1423 let original = &vectors[0];
1425 println!("Original vector length: {}", original.dimensions);
1426 println!("Recommended method: {:?}", analysis.recommended_method);
1427
1428 let compressed = adaptive.compress(original).unwrap();
1429 println!("Compressed size: {} bytes", compressed.len());
1430
1431 assert!(!compressed.is_empty());
1433 assert!(compressed.len() < original.dimensions * 4); match &analysis.recommended_method {
1438 CompressionMethod::Pca { components } => {
1439 assert!(*components < original.dimensions);
1441 println!(
1442 "PCA compression: {} → {} components",
1443 original.dimensions, components
1444 );
1445 }
1446 _ => {
1447 let decompressed = adaptive
1449 .decompress(&compressed, original.dimensions)
1450 .unwrap();
1451 let dec = decompressed.as_f32();
1452 let orig = original.as_f32();
1453 assert_eq!(dec.len(), orig.len());
1454 }
1455 }
1456 }
1457
1458 #[test]
1459 fn test_adaptive_compression_method_switching() {
1460 let mut adaptive = AdaptiveCompressor::with_fast_quality();
1461
1462 let sparse_vectors = vec![
1464 Vector::new(vec![0.0, 0.0, 1.0, 0.0, 0.0]),
1465 Vector::new(vec![0.0, 2.0, 0.0, 0.0, 0.0]),
1466 ];
1467 adaptive.optimize_for_vectors(&sparse_vectors).unwrap();
1468 let initial_switches = adaptive.get_metrics().method_switches;
1469
1470 let dense_vectors = vec![
1472 Vector::new(vec![0.1, 0.2, 0.3, 0.4, 0.5]),
1473 Vector::new(vec![0.2, 0.3, 0.4, 0.5, 0.6]),
1474 ];
1475 adaptive.optimize_for_vectors(&dense_vectors).unwrap();
1476
1477 assert!(adaptive.get_metrics().method_switches > initial_switches);
1479 }
1480
1481 #[test]
1482 fn test_vector_analysis() {
1483 let vectors = vec![
1484 Vector::new(vec![1.0, 2.0, 3.0]),
1485 Vector::new(vec![2.0, 3.0, 4.0]),
1486 Vector::new(vec![3.0, 4.0, 5.0]),
1487 ];
1488
1489 let analysis = VectorAnalysis::analyze(&vectors, &AdaptiveQuality::Balanced).unwrap();
1490
1491 assert!(analysis.mean > 0.0);
1492 assert!(analysis.std_dev > 0.0);
1493 assert!(analysis.range > 0.0);
1494 assert!(analysis.entropy >= 0.0);
1495 assert!(!analysis.dominant_patterns.is_empty());
1496 assert!(analysis.expected_ratio > 0.0 && analysis.expected_ratio <= 1.0);
1497 }
1498
1499 #[test]
1500 fn test_compression_method_equivalence() {
1501 assert!(methods_equivalent(
1502 &CompressionMethod::Zstd { level: 5 },
1503 &CompressionMethod::Zstd { level: 6 }
1504 )); assert!(!methods_equivalent(
1507 &CompressionMethod::Zstd { level: 1 },
1508 &CompressionMethod::Zstd { level: 10 }
1509 )); assert!(methods_equivalent(
1512 &CompressionMethod::Quantization { bits: 8 },
1513 &CompressionMethod::Quantization { bits: 8 }
1514 ));
1515
1516 assert!(!methods_equivalent(
1517 &CompressionMethod::Zstd { level: 5 },
1518 &CompressionMethod::Quantization { bits: 8 }
1519 )); }
1521
1522 #[test]
1523 fn test_adaptive_compressor_convenience_constructors() {
1524 let fast = AdaptiveCompressor::with_fast_quality();
1525 assert!(matches!(fast.quality_level, AdaptiveQuality::Fast));
1526
1527 let balanced = AdaptiveCompressor::with_balanced_quality();
1528 assert!(matches!(balanced.quality_level, AdaptiveQuality::Balanced));
1529
1530 let best = AdaptiveCompressor::with_best_ratio();
1531 assert!(matches!(best.quality_level, AdaptiveQuality::BestRatio));
1532 }
1533
1534 #[test]
1535 fn test_product_quantization() {
1536 let vectors = vec![
1538 Vector::new(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]),
1539 Vector::new(vec![2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]),
1540 Vector::new(vec![3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]),
1541 Vector::new(vec![1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5]),
1542 ];
1543
1544 let mut pq = ProductQuantizer::new(4, 4);
1546
1547 pq.train(&vectors).unwrap();
1549
1550 let original = &vectors[0];
1552 let compressed = pq.compress(original).unwrap();
1553 let decompressed = pq.decompress(&compressed, 8).unwrap();
1554
1555 assert_eq!(decompressed.dimensions, original.dimensions);
1557
1558 let ratio = pq.compression_ratio();
1560 assert!(
1561 ratio > 0.0 && ratio < 1.0,
1562 "Compression ratio should be between 0 and 1, got {ratio}"
1563 );
1564
1565 for vector in &vectors {
1567 let compressed = pq.compress(vector).unwrap();
1568 let decompressed = pq.decompress(&compressed, vector.dimensions).unwrap();
1569 assert_eq!(decompressed.dimensions, vector.dimensions);
1570 }
1571 }
1572
1573 #[test]
1574 fn test_product_quantization_invalid_dimensions() {
1575 let vectors = vec![
1577 Vector::new(vec![1.0, 2.0, 3.0]), ];
1579
1580 let mut pq = ProductQuantizer::new(4, 4); let result = pq.train(&vectors);
1582
1583 assert!(result.is_err());
1585 if let Err(VectorError::InvalidDimensions(_)) = result {
1586 } else {
1588 panic!("Expected InvalidDimensions error");
1589 }
1590 }
1591}