1use crate::{Vector, VectorData, VectorError};
2use half::f16;
3use std::io::{Read, Write};
4
5#[derive(Debug, Clone, Default)]
6pub enum CompressionMethod {
7 #[default]
8 None,
9 Zstd {
10 level: i32,
11 },
12 Quantization {
13 bits: u8,
14 },
15 ProductQuantization {
16 subvectors: usize,
17 codebook_size: usize,
18 },
19 Pca {
20 components: usize,
21 },
22 Adaptive {
23 quality_level: AdaptiveQuality,
24 analysis_samples: usize,
25 },
26}
27
28#[derive(Debug, Clone)]
29pub enum AdaptiveQuality {
30 Fast, Balanced, BestRatio, }
34
35pub trait VectorCompressor: Send + Sync {
36 fn compress(&self, vector: &Vector) -> Result<Vec<u8>, VectorError>;
37 fn decompress(&self, data: &[u8], dimensions: usize) -> Result<Vector, VectorError>;
38 fn compression_ratio(&self) -> f32;
39}
40
41pub struct ZstdCompressor {
42 level: i32,
43}
44
45impl ZstdCompressor {
46 pub fn new(level: i32) -> Self {
47 Self {
48 level: level.clamp(1, 22),
49 }
50 }
51}
52
53impl VectorCompressor for ZstdCompressor {
54 fn compress(&self, vector: &Vector) -> Result<Vec<u8>, VectorError> {
55 let bytes = vector_to_bytes(vector)?;
56 oxiarc_zstd::encode_all(&bytes, self.level)
57 .map_err(|e| VectorError::CompressionError(e.to_string()))
58 }
59
60 fn decompress(&self, data: &[u8], dimensions: usize) -> Result<Vector, VectorError> {
61 let decompressed = oxiarc_zstd::decode_all(data)
62 .map_err(|e| VectorError::CompressionError(e.to_string()))?;
63 bytes_to_vector(&decompressed, dimensions)
64 }
65
66 fn compression_ratio(&self) -> f32 {
67 match self.level {
69 1..=3 => 0.7,
70 4..=9 => 0.5,
71 10..=15 => 0.4,
72 16..=22 => 0.3,
73 _ => 1.0,
74 }
75 }
76}
77
78pub struct ScalarQuantizer {
79 bits: u8,
80 min_val: f32,
81 max_val: f32,
82}
83
84impl ScalarQuantizer {
85 pub fn new(bits: u8) -> Self {
86 Self {
87 bits: bits.clamp(1, 16),
88 min_val: 0.0,
89 max_val: 1.0,
90 }
91 }
92
93 pub fn with_range(bits: u8, min_val: f32, max_val: f32) -> Self {
94 Self {
95 bits: bits.clamp(1, 16),
96 min_val,
97 max_val,
98 }
99 }
100
101 pub fn train(&mut self, vectors: &[Vector]) -> Result<(), VectorError> {
102 if vectors.is_empty() {
103 return Err(VectorError::InvalidDimensions(
104 "No vectors to train on".to_string(),
105 ));
106 }
107
108 let mut min = f32::INFINITY;
109 let mut max = f32::NEG_INFINITY;
110
111 for vector in vectors {
112 match &vector.values {
113 VectorData::F32(v) => {
114 for &val in v {
115 min = min.min(val);
116 max = max.max(val);
117 }
118 }
119 VectorData::F64(v) => {
120 for &val in v {
121 min = min.min(val as f32);
122 max = max.max(val as f32);
123 }
124 }
125 _ => {}
126 }
127 }
128
129 self.min_val = min;
130 self.max_val = max;
131 Ok(())
132 }
133
134 fn quantize_value(&self, value: f32) -> u16 {
135 let normalized = ((value - self.min_val) / (self.max_val - self.min_val)).clamp(0.0, 1.0);
136 let max_quant_val = (1u32 << self.bits) - 1;
137 (normalized * max_quant_val as f32).round() as u16
138 }
139
140 fn dequantize_value(&self, quantized: u16) -> f32 {
141 let max_quant_val = (1u32 << self.bits) - 1;
142 let normalized = quantized as f32 / max_quant_val as f32;
143 normalized * (self.max_val - self.min_val) + self.min_val
144 }
145}
146
147impl VectorCompressor for ScalarQuantizer {
148 fn compress(&self, vector: &Vector) -> Result<Vec<u8>, VectorError> {
149 let values = match &vector.values {
150 VectorData::F32(v) => v.clone(),
151 VectorData::F64(v) => v.iter().map(|&x| x as f32).collect(),
152 _ => {
153 return Err(VectorError::UnsupportedOperation(
154 "Quantization only supports float vectors".to_string(),
155 ))
156 }
157 };
158
159 let mut compressed = Vec::new();
160
161 compressed.write_all(&self.bits.to_le_bytes())?;
163 compressed.write_all(&self.min_val.to_le_bytes())?;
164 compressed.write_all(&self.max_val.to_le_bytes())?;
165
166 if self.bits <= 8 {
168 for val in values {
169 let quantized = self.quantize_value(val) as u8;
170 compressed.push(quantized);
171 }
172 } else {
173 for val in values {
174 let quantized = self.quantize_value(val);
175 compressed.write_all(&quantized.to_le_bytes())?;
176 }
177 }
178
179 Ok(compressed)
180 }
181
182 fn decompress(&self, data: &[u8], dimensions: usize) -> Result<Vector, VectorError> {
183 let mut cursor = std::io::Cursor::new(data);
184
185 let mut bits_buf = [0u8; 1];
187 cursor.read_exact(&mut bits_buf)?;
188 let bits = bits_buf[0];
189
190 let mut min_buf = [0u8; 4];
191 cursor.read_exact(&mut min_buf)?;
192 let min_val = f32::from_le_bytes(min_buf);
193
194 let mut max_buf = [0u8; 4];
195 cursor.read_exact(&mut max_buf)?;
196 let max_val = f32::from_le_bytes(max_buf);
197
198 let quantizer = ScalarQuantizer {
200 bits,
201 min_val,
202 max_val,
203 };
204
205 let mut values = Vec::with_capacity(dimensions);
207
208 if bits <= 8 {
209 let mut buf = [0u8; 1];
210 for _ in 0..dimensions {
211 cursor.read_exact(&mut buf)?;
212 let quantized = buf[0] as u16;
213 values.push(quantizer.dequantize_value(quantized));
214 }
215 } else {
216 let mut buf = [0u8; 2];
217 for _ in 0..dimensions {
218 cursor.read_exact(&mut buf)?;
219 let quantized = u16::from_le_bytes(buf);
220 values.push(quantizer.dequantize_value(quantized));
221 }
222 }
223
224 Ok(Vector::new(values))
225 }
226
227 fn compression_ratio(&self) -> f32 {
228 self.bits as f32 / 32.0
230 }
231}
232
233pub struct PcaCompressor {
234 components: usize,
235 mean: Vec<f32>,
236 components_matrix: Vec<Vec<f32>>,
237 explained_variance_ratio: Vec<f32>,
238}
239
240impl PcaCompressor {
241 pub fn new(components: usize) -> Self {
242 Self {
243 components,
244 mean: Vec::new(),
245 components_matrix: Vec::new(),
246 explained_variance_ratio: Vec::new(),
247 }
248 }
249
250 pub fn train(&mut self, vectors: &[Vector]) -> Result<(), VectorError> {
251 if vectors.is_empty() {
252 return Err(VectorError::InvalidDimensions(
253 "No vectors to train on".to_string(),
254 ));
255 }
256
257 let data: Vec<Vec<f32>> = vectors
259 .iter()
260 .map(|v| match &v.values {
261 VectorData::F32(vals) => Ok(vals.clone()),
262 VectorData::F64(vals) => Ok(vals.iter().map(|&x| x as f32).collect()),
263 _ => Err(VectorError::UnsupportedOperation(
264 "PCA only supports float vectors".to_string(),
265 )),
266 })
267 .collect::<Result<Vec<_>, _>>()?;
268
269 let n_samples = data.len();
270 let n_features = data[0].len();
271
272 self.mean = vec![0.0; n_features];
274 for sample in &data {
275 for (i, &val) in sample.iter().enumerate() {
276 self.mean[i] += val;
277 }
278 }
279 for val in &mut self.mean {
280 *val /= n_samples as f32;
281 }
282
283 let mut centered = data.clone();
285 for sample in &mut centered {
286 for (i, val) in sample.iter_mut().enumerate() {
287 *val -= self.mean[i];
288 }
289 }
290
291 self.components_matrix = Vec::with_capacity(self.components);
293
294 use nalgebra::DMatrix;
296
297 let training_data: Result<Vec<Vec<f32>>, _> = vectors
299 .iter()
300 .map(|v| match &v.values {
301 VectorData::F32(vals) => Ok(vals.clone()),
302 VectorData::F64(vals) => Ok(vals.iter().map(|&x| x as f32).collect()),
303 _ => Err(VectorError::UnsupportedOperation(
304 "PCA only supports float vectors".to_string(),
305 )),
306 })
307 .collect();
308
309 let training_data = training_data?;
310 let n_samples = training_data.len();
311 if n_samples == 0 {
312 return Err(VectorError::InvalidDimensions(
313 "No training data provided for PCA".to_string(),
314 ));
315 }
316
317 let mut data_matrix = DMatrix::<f32>::zeros(n_samples, n_features);
319 for (i, sample) in training_data.iter().enumerate() {
320 for (j, &val) in sample.iter().enumerate() {
321 data_matrix[(i, j)] = val - self.mean[j];
322 }
323 }
324
325 let covariance = data_matrix.transpose() * &data_matrix / (n_samples as f32 - 1.0);
327
328 let svd = covariance.svd(true, true);
330
331 if let Some(u) = svd.u {
332 let num_components = self.components.min(u.ncols());
334
335 let singular_values = &svd.singular_values;
337 let total_variance: f32 = singular_values.iter().sum();
338 let mut explained_variance = Vec::with_capacity(num_components);
339
340 for i in 0..num_components {
341 let component: Vec<f32> = u.column(i).iter().cloned().collect();
342 self.components_matrix.push(component);
343
344 let variance_ratio = singular_values[i] / total_variance;
345 explained_variance.push(variance_ratio);
346 }
347
348 self.explained_variance_ratio = explained_variance;
350 } else {
351 return Err(VectorError::CompressionError(
352 "SVD decomposition failed for PCA".to_string(),
353 ));
354 }
355
356 Ok(())
357 }
358
359 fn project(&self, vector: &[f32]) -> Vec<f32> {
360 let mut centered = vector.to_vec();
361 for (i, val) in centered.iter_mut().enumerate() {
362 *val -= self.mean.get(i).unwrap_or(&0.0);
363 }
364
365 let mut projected = vec![0.0; self.components];
366 for (i, component) in self.components_matrix.iter().enumerate() {
367 let mut dot = 0.0;
368 for (j, &val) in centered.iter().enumerate() {
369 dot += val * component.get(j).unwrap_or(&0.0);
370 }
371 projected[i] = dot;
372 }
373
374 projected
375 }
376
377 fn reconstruct(&self, projected: &[f32]) -> Vec<f32> {
378 let n_features = self.mean.len();
379 let mut reconstructed = self.mean.clone();
380
381 for (i, &coeff) in projected.iter().enumerate() {
382 if let Some(component) = self.components_matrix.get(i) {
383 for (j, &comp_val) in component.iter().enumerate() {
384 if j < n_features {
385 reconstructed[j] += coeff * comp_val;
386 }
387 }
388 }
389 }
390
391 reconstructed
392 }
393
394 pub fn explained_variance_ratio(&self) -> &[f32] {
396 &self.explained_variance_ratio
397 }
398
399 pub fn total_explained_variance(&self) -> f32 {
401 self.explained_variance_ratio.iter().sum()
402 }
403}
404
405impl VectorCompressor for PcaCompressor {
406 fn compress(&self, vector: &Vector) -> Result<Vec<u8>, VectorError> {
407 let values = match &vector.values {
408 VectorData::F32(v) => v.clone(),
409 VectorData::F64(v) => v.iter().map(|&x| x as f32).collect(),
410 _ => {
411 return Err(VectorError::UnsupportedOperation(
412 "PCA only supports float vectors".to_string(),
413 ))
414 }
415 };
416
417 let projected = self.project(&values);
418
419 let mut compressed = Vec::new();
420 compressed.write_all(&(self.components as u32).to_le_bytes())?;
422
423 for val in projected {
425 compressed.write_all(&val.to_le_bytes())?;
426 }
427
428 Ok(compressed)
429 }
430
431 fn decompress(&self, data: &[u8], _dimensions: usize) -> Result<Vector, VectorError> {
432 let mut cursor = std::io::Cursor::new(data);
433
434 let mut components_buf = [0u8; 4];
436 cursor.read_exact(&mut components_buf)?;
437 let components = u32::from_le_bytes(components_buf) as usize;
438
439 let mut projected = Vec::with_capacity(components);
441 let mut val_buf = [0u8; 4];
442
443 for _ in 0..components {
444 cursor.read_exact(&mut val_buf)?;
445 projected.push(f32::from_le_bytes(val_buf));
446 }
447
448 let reconstructed = self.reconstruct(&projected);
449 Ok(Vector::new(reconstructed))
450 }
451
452 fn compression_ratio(&self) -> f32 {
453 if self.mean.is_empty() {
455 1.0
456 } else {
457 self.components as f32 / self.mean.len() as f32
458 }
459 }
460}
461
462pub struct ProductQuantizer {
464 subvectors: usize,
465 codebook_size: usize,
466 codebooks: Vec<Vec<Vec<f32>>>, subvector_dim: usize,
468}
469
470impl ProductQuantizer {
471 pub fn new(subvectors: usize, codebook_size: usize) -> Self {
472 Self {
473 subvectors,
474 codebook_size,
475 codebooks: Vec::new(),
476 subvector_dim: 0,
477 }
478 }
479
480 pub fn train(&mut self, vectors: &[Vector]) -> Result<(), VectorError> {
481 if vectors.is_empty() {
482 return Err(VectorError::InvalidDimensions(
483 "No training data provided for Product Quantization".to_string(),
484 ));
485 }
486
487 let vector_dim = vectors[0].dimensions;
489 if vector_dim % self.subvectors != 0 {
490 return Err(VectorError::InvalidDimensions(format!(
491 "Vector dimension {} is not divisible by number of subvectors {}",
492 vector_dim, self.subvectors
493 )));
494 }
495
496 self.subvector_dim = vector_dim / self.subvectors;
497 self.codebooks = Vec::with_capacity(self.subvectors);
498
499 let training_data: Result<Vec<Vec<f32>>, _> = vectors
501 .iter()
502 .map(|v| match &v.values {
503 VectorData::F32(vals) => Ok(vals.clone()),
504 VectorData::F64(vals) => Ok(vals.iter().map(|&x| x as f32).collect()),
505 _ => Err(VectorError::UnsupportedOperation(
506 "Product quantization only supports float vectors".to_string(),
507 )),
508 })
509 .collect();
510
511 let training_data = training_data?;
512
513 for subvec_idx in 0..self.subvectors {
515 let start_dim = subvec_idx * self.subvector_dim;
516 let end_dim = start_dim + self.subvector_dim;
517
518 let subvectors: Vec<Vec<f32>> = training_data
520 .iter()
521 .map(|v| v[start_dim..end_dim].to_vec())
522 .collect();
523
524 let codebook = self.train_codebook(&subvectors)?;
526 self.codebooks.push(codebook);
527 }
528
529 Ok(())
530 }
531
532 fn train_codebook(&self, subvectors: &[Vec<f32>]) -> Result<Vec<Vec<f32>>, VectorError> {
533 use scirs2_core::random::Random;
534 let mut rng = Random::seed(42);
535
536 if subvectors.is_empty() {
537 return Err(VectorError::InvalidDimensions(
538 "No subvectors to train codebook".to_string(),
539 ));
540 }
541
542 let dim = subvectors[0].len();
543 let mut centroids = Vec::with_capacity(self.codebook_size);
544
545 for _ in 0..self.codebook_size {
547 let mut centroid = vec![0.0; dim];
548 for val in &mut centroid {
549 *val = rng.gen_range(-1.0..1.0);
550 }
551 centroids.push(centroid);
552 }
553
554 for _ in 0..10 {
556 let mut assignments = vec![0; subvectors.len()];
558
559 for (i, subvec) in subvectors.iter().enumerate() {
561 let mut best_dist = f32::INFINITY;
562 let mut best_centroid = 0;
563
564 for (j, centroid) in centroids.iter().enumerate() {
565 let dist = euclidean_distance(subvec, centroid);
566 if dist < best_dist {
567 best_dist = dist;
568 best_centroid = j;
569 }
570 }
571 assignments[i] = best_centroid;
572 }
573
574 for (j, centroid) in centroids.iter_mut().enumerate() {
576 let assigned_points: Vec<&Vec<f32>> = subvectors
577 .iter()
578 .enumerate()
579 .filter(|(i, _)| assignments[*i] == j)
580 .map(|(_, v)| v)
581 .collect();
582
583 if !assigned_points.is_empty() {
584 for (d, centroid_val) in centroid.iter_mut().enumerate() {
585 *centroid_val = assigned_points.iter().map(|p| p[d]).sum::<f32>()
586 / assigned_points.len() as f32;
587 }
588 }
589 }
590 }
591
592 Ok(centroids)
593 }
594
595 fn quantize_vector(&self, vector: &[f32]) -> Result<Vec<u8>, VectorError> {
596 if vector.len() != self.subvectors * self.subvector_dim {
597 return Err(VectorError::InvalidDimensions(format!(
598 "Vector dimension {} doesn't match expected {}",
599 vector.len(),
600 self.subvectors * self.subvector_dim
601 )));
602 }
603
604 let mut codes = Vec::with_capacity(self.subvectors);
605
606 for subvec_idx in 0..self.subvectors {
607 let start_dim = subvec_idx * self.subvector_dim;
608 let end_dim = start_dim + self.subvector_dim;
609 let subvector = &vector[start_dim..end_dim];
610
611 let codebook = &self.codebooks[subvec_idx];
612 let mut best_dist = f32::INFINITY;
613 let mut best_code = 0u8;
614
615 for (code, centroid) in codebook.iter().enumerate() {
616 let dist = euclidean_distance(subvector, centroid);
617 if dist < best_dist {
618 best_dist = dist;
619 best_code = code as u8;
620 }
621 }
622
623 codes.push(best_code);
624 }
625
626 Ok(codes)
627 }
628
629 fn dequantize_codes(&self, codes: &[u8]) -> Result<Vec<f32>, VectorError> {
630 if codes.len() != self.subvectors {
631 return Err(VectorError::InvalidDimensions(format!(
632 "Code length {} doesn't match expected {}",
633 codes.len(),
634 self.subvectors
635 )));
636 }
637
638 let mut reconstructed = Vec::with_capacity(self.subvectors * self.subvector_dim);
639
640 for (subvec_idx, &code) in codes.iter().enumerate() {
641 let codebook = &self.codebooks[subvec_idx];
642 if (code as usize) < codebook.len() {
643 reconstructed.extend_from_slice(&codebook[code as usize]);
644 } else {
645 return Err(VectorError::InvalidDimensions(format!(
646 "Invalid code {} for codebook of size {}",
647 code,
648 codebook.len()
649 )));
650 }
651 }
652
653 Ok(reconstructed)
654 }
655}
656
657fn euclidean_distance(a: &[f32], b: &[f32]) -> f32 {
659 a.iter()
660 .zip(b.iter())
661 .map(|(&x, &y)| (x - y).powi(2))
662 .sum::<f32>()
663 .sqrt()
664}
665
666impl VectorCompressor for ProductQuantizer {
667 fn compress(&self, vector: &Vector) -> Result<Vec<u8>, VectorError> {
668 let values = match &vector.values {
669 VectorData::F32(v) => v.clone(),
670 VectorData::F64(v) => v.iter().map(|&x| x as f32).collect(),
671 _ => {
672 return Err(VectorError::UnsupportedOperation(
673 "Product quantization only supports float vectors".to_string(),
674 ))
675 }
676 };
677
678 let codes = self.quantize_vector(&values)?;
679
680 let mut compressed = Vec::new();
681 compressed.write_all(&(self.subvectors as u32).to_le_bytes())?;
683 compressed.write_all(&(self.codebook_size as u32).to_le_bytes())?;
684 compressed.write_all(&(self.subvector_dim as u32).to_le_bytes())?;
685
686 compressed.extend_from_slice(&codes);
688
689 Ok(compressed)
690 }
691
692 fn decompress(&self, data: &[u8], _dimensions: usize) -> Result<Vector, VectorError> {
693 if data.len() < 12 {
694 return Err(VectorError::InvalidData(
696 "Invalid compressed data format".to_string(),
697 ));
698 }
699
700 let subvectors = u32::from_le_bytes([data[0], data[1], data[2], data[3]]) as usize;
702 let codebook_size = u32::from_le_bytes([data[4], data[5], data[6], data[7]]) as usize;
703 let subvector_dim = u32::from_le_bytes([data[8], data[9], data[10], data[11]]) as usize;
704
705 if subvectors != self.subvectors
706 || codebook_size != self.codebook_size
707 || subvector_dim != self.subvector_dim
708 {
709 return Err(VectorError::InvalidData(
710 "Metadata mismatch in compressed data".to_string(),
711 ));
712 }
713
714 let codes = &data[12..];
715 if codes.len() != subvectors {
716 return Err(VectorError::InvalidData("Invalid code length".to_string()));
717 }
718
719 let values = self.dequantize_codes(codes)?;
720 Ok(Vector::new(values))
721 }
722
723 fn compression_ratio(&self) -> f32 {
724 (8.0 * self.subvectors as f32) / (32.0 * self.subvectors as f32 * self.subvector_dim as f32)
726 }
727}
728
729#[derive(Debug, Clone)]
731pub struct VectorAnalysis {
732 pub sparsity: f32, pub range: f32, pub mean: f32, pub std_dev: f32, pub entropy: f32, pub dominant_patterns: Vec<f32>, pub recommended_method: CompressionMethod,
739 pub expected_ratio: f32, }
741
742impl VectorAnalysis {
743 pub fn analyze(vectors: &[Vector], quality: &AdaptiveQuality) -> Result<Self, VectorError> {
744 if vectors.is_empty() {
745 return Err(VectorError::InvalidDimensions(
746 "No vectors to analyze".to_string(),
747 ));
748 }
749
750 let mut all_values = Vec::new();
752 let mut dimensions = 0;
753
754 for vector in vectors {
755 let values = match &vector.values {
756 VectorData::F32(v) => v.clone(),
757 VectorData::F64(v) => v.iter().map(|&x| x as f32).collect(),
758 VectorData::F16(v) => v.iter().map(|&x| f16::from_bits(x).to_f32()).collect(),
759 VectorData::I8(v) => v.iter().map(|&x| x as f32).collect(),
760 VectorData::Binary(_) => {
761 return Ok(Self::binary_analysis(vectors.len()));
762 }
763 };
764 if dimensions == 0 {
765 dimensions = values.len();
766 }
767 all_values.extend(values);
768 }
769
770 if all_values.is_empty() {
771 return Err(VectorError::InvalidDimensions(
772 "No values to analyze".to_string(),
773 ));
774 }
775
776 let min_val = all_values.iter().copied().fold(f32::INFINITY, f32::min);
778 let max_val = all_values.iter().copied().fold(f32::NEG_INFINITY, f32::max);
779 let range = max_val - min_val;
780 let mean = all_values.iter().sum::<f32>() / all_values.len() as f32;
781
782 let variance =
783 all_values.iter().map(|&x| (x - mean).powi(2)).sum::<f32>() / all_values.len() as f32;
784 let std_dev = variance.sqrt();
785
786 let epsilon = std_dev * 0.01; let near_zero_count = all_values.iter().filter(|&&x| x.abs() < epsilon).count();
789 let sparsity = near_zero_count as f32 / all_values.len() as f32;
790
791 let entropy = Self::calculate_entropy(&all_values);
793
794 let dominant_patterns = Self::find_dominant_patterns(&all_values);
796
797 let (recommended_method, expected_ratio) =
799 Self::select_optimal_method(sparsity, range, std_dev, entropy, dimensions, quality);
800
801 Ok(Self {
802 sparsity,
803 range,
804 mean,
805 std_dev,
806 entropy,
807 dominant_patterns,
808 recommended_method,
809 expected_ratio,
810 })
811 }
812
813 fn binary_analysis(_vector_count: usize) -> Self {
814 Self {
815 sparsity: 0.0,
816 range: 1.0,
817 mean: 0.5,
818 std_dev: 0.5,
819 entropy: 1.0,
820 dominant_patterns: vec![0.0, 1.0],
821 recommended_method: CompressionMethod::Zstd { level: 1 },
822 expected_ratio: 0.125, }
824 }
825
826 fn calculate_entropy(values: &[f32]) -> f32 {
827 let mut histogram = std::collections::HashMap::new();
829 let bins = 64; if values.is_empty() {
832 return 0.0;
833 }
834
835 let min_val = values.iter().copied().fold(f32::INFINITY, f32::min);
836 let max_val = values.iter().copied().fold(f32::NEG_INFINITY, f32::max);
837 let range = max_val - min_val;
838
839 if range == 0.0 {
840 return 0.0; }
842
843 for &value in values {
844 let bin = ((value - min_val) / range * (bins - 1) as f32) as usize;
845 let bin = bin.min(bins - 1);
846 *histogram.entry(bin).or_insert(0) += 1;
847 }
848
849 let total = values.len() as f32;
850 let mut entropy = 0.0;
851
852 for count in histogram.values() {
853 let probability = *count as f32 / total;
854 if probability > 0.0 {
855 entropy -= probability * probability.log2();
856 }
857 }
858
859 entropy
860 }
861
862 fn find_dominant_patterns(values: &[f32]) -> Vec<f32> {
863 let mut value_counts = std::collections::HashMap::new();
865
866 for &value in values {
868 let quantized = (value * 1000.0).round() / 1000.0; *value_counts.entry(quantized.to_bits()).or_insert(0) += 1;
870 }
871
872 let mut patterns: Vec<_> = value_counts.into_iter().collect();
873 patterns.sort_by(|a, b| b.1.cmp(&a.1)); patterns
876 .into_iter()
877 .take(5) .map(|(bits, _)| f32::from_bits(bits))
879 .collect()
880 }
881
882 fn select_optimal_method(
883 sparsity: f32,
884 range: f32,
885 std_dev: f32,
886 entropy: f32,
887 dimensions: usize,
888 quality: &AdaptiveQuality,
889 ) -> (CompressionMethod, f32) {
890 if sparsity > 0.7 {
894 return match quality {
895 AdaptiveQuality::Fast => (CompressionMethod::Zstd { level: 1 }, 0.3),
896 AdaptiveQuality::Balanced => (CompressionMethod::Zstd { level: 6 }, 0.2),
897 AdaptiveQuality::BestRatio => (CompressionMethod::Zstd { level: 19 }, 0.15),
898 };
899 }
900
901 if entropy < 2.0 {
903 return match quality {
904 AdaptiveQuality::Fast => (CompressionMethod::Zstd { level: 3 }, 0.4),
905 AdaptiveQuality::Balanced => (CompressionMethod::Zstd { level: 9 }, 0.3),
906 AdaptiveQuality::BestRatio => (CompressionMethod::Zstd { level: 22 }, 0.2),
907 };
908 }
909
910 if range < 2.0 && std_dev < 0.5 {
912 return match quality {
913 AdaptiveQuality::Fast => (CompressionMethod::Quantization { bits: 8 }, 0.25),
914 AdaptiveQuality::Balanced => (CompressionMethod::Quantization { bits: 6 }, 0.1875),
915 AdaptiveQuality::BestRatio => (CompressionMethod::Quantization { bits: 4 }, 0.125),
916 };
917 }
918
919 if dimensions > 128 {
921 let components = match quality {
922 AdaptiveQuality::Fast => dimensions * 7 / 10, AdaptiveQuality::Balanced => dimensions / 2, AdaptiveQuality::BestRatio => dimensions / 3, };
926 return (
927 CompressionMethod::Pca { components },
928 components as f32 / dimensions as f32,
929 );
930 }
931
932 match quality {
934 AdaptiveQuality::Fast => (CompressionMethod::Zstd { level: 3 }, 0.6),
935 AdaptiveQuality::Balanced => (CompressionMethod::Zstd { level: 6 }, 0.5),
936 AdaptiveQuality::BestRatio => (CompressionMethod::Zstd { level: 12 }, 0.4),
937 }
938 }
939}
940
941pub struct AdaptiveCompressor {
943 quality_level: AdaptiveQuality,
944 analysis_samples: usize,
945 current_method: Option<Box<dyn VectorCompressor>>,
946 analysis_cache: Option<VectorAnalysis>,
947 performance_metrics: CompressionMetrics,
948}
949
950#[derive(Debug, Clone)]
951pub struct CompressionMetrics {
952 pub vectors_compressed: usize,
953 pub total_original_size: usize,
954 pub total_compressed_size: usize,
955 pub compression_time_ms: f64,
956 pub decompression_time_ms: f64,
957 pub current_ratio: f32,
958 pub method_switches: usize,
959}
960
961impl Default for CompressionMetrics {
962 fn default() -> Self {
963 Self {
964 vectors_compressed: 0,
965 total_original_size: 0,
966 total_compressed_size: 0,
967 compression_time_ms: 0.0,
968 decompression_time_ms: 0.0,
969 current_ratio: 1.0,
970 method_switches: 0,
971 }
972 }
973}
974
975impl AdaptiveCompressor {
976 pub fn new(quality_level: AdaptiveQuality, analysis_samples: usize) -> Self {
977 Self {
978 quality_level,
979 analysis_samples: analysis_samples.max(10), current_method: None,
981 analysis_cache: None,
982 performance_metrics: CompressionMetrics::default(),
983 }
984 }
985
986 pub fn with_fast_quality() -> Self {
987 Self::new(AdaptiveQuality::Fast, 50)
988 }
989
990 pub fn with_balanced_quality() -> Self {
991 Self::new(AdaptiveQuality::Balanced, 100)
992 }
993
994 pub fn with_best_ratio() -> Self {
995 Self::new(AdaptiveQuality::BestRatio, 200)
996 }
997
998 pub fn optimize_for_vectors(&mut self, sample_vectors: &[Vector]) -> Result<(), VectorError> {
1000 if sample_vectors.is_empty() {
1001 return Ok(());
1002 }
1003
1004 let start_time = std::time::Instant::now();
1005
1006 let samples_to_analyze = sample_vectors.len().min(self.analysis_samples);
1008 let analysis_vectors = &sample_vectors[..samples_to_analyze];
1009
1010 let analysis = VectorAnalysis::analyze(analysis_vectors, &self.quality_level)?;
1011
1012 let should_switch = match (&self.current_method, &self.analysis_cache) {
1014 (Some(_), Some(cached)) => {
1015 !methods_equivalent(&cached.recommended_method, &analysis.recommended_method)
1017 }
1018 _ => true, };
1020
1021 if should_switch {
1022 self.current_method = Some(create_compressor(&analysis.recommended_method));
1023 self.performance_metrics.method_switches += 1;
1024 }
1025
1026 if self.current_method.is_some() {
1028 }
1031
1032 self.analysis_cache = Some(analysis);
1033
1034 let analysis_time = start_time.elapsed().as_secs_f64() * 1000.0;
1035 tracing::debug!("Adaptive compression analysis took {:.2}ms", analysis_time);
1036
1037 Ok(())
1038 }
1039
1040 fn train_compressor(
1041 &self,
1042 _compressor: &mut dyn VectorCompressor,
1043 _vectors: &[Vector],
1044 ) -> Result<(), VectorError> {
1045 Ok(())
1048 }
1049
1050 pub fn get_metrics(&self) -> &CompressionMetrics {
1051 &self.performance_metrics
1052 }
1053
1054 pub fn get_analysis(&self) -> Option<&VectorAnalysis> {
1055 self.analysis_cache.as_ref()
1056 }
1057
1058 pub fn adaptive_reanalysis(&mut self, recent_vectors: &[Vector]) -> Result<bool, VectorError> {
1060 if recent_vectors.len() < self.analysis_samples / 4 {
1061 return Ok(false); }
1063
1064 let old_method = self
1065 .analysis_cache
1066 .as_ref()
1067 .map(|a| a.recommended_method.clone());
1068
1069 self.optimize_for_vectors(recent_vectors)?;
1070
1071 let method_changed = match (old_method, &self.analysis_cache) {
1072 (Some(old), Some(new)) => !methods_equivalent(&old, &new.recommended_method),
1073 _ => false,
1074 };
1075
1076 Ok(method_changed)
1077 }
1078}
1079
1080impl VectorCompressor for AdaptiveCompressor {
1081 fn compress(&self, vector: &Vector) -> Result<Vec<u8>, VectorError> {
1082 if let Some(compressor) = &self.current_method {
1083 let start = std::time::Instant::now();
1084 let result = compressor.compress(vector);
1085 let _compression_time = start.elapsed().as_secs_f64() * 1000.0;
1086
1087 result
1091 } else {
1092 let no_op = NoOpCompressor;
1094 no_op.compress(vector)
1095 }
1096 }
1097
1098 fn decompress(&self, data: &[u8], dimensions: usize) -> Result<Vector, VectorError> {
1099 if let Some(compressor) = &self.current_method {
1100 let start = std::time::Instant::now();
1101 let result = compressor.decompress(data, dimensions);
1102 let _decompression_time = start.elapsed().as_secs_f64() * 1000.0;
1103
1104 result
1107 } else {
1108 let no_op = NoOpCompressor;
1110 no_op.decompress(data, dimensions)
1111 }
1112 }
1113
1114 fn compression_ratio(&self) -> f32 {
1115 if let Some(compressor) = &self.current_method {
1116 compressor.compression_ratio()
1117 } else {
1118 1.0
1119 }
1120 }
1121}
1122
1123fn methods_equivalent(method1: &CompressionMethod, method2: &CompressionMethod) -> bool {
1124 match (method1, method2) {
1125 (CompressionMethod::None, CompressionMethod::None) => true,
1126 (CompressionMethod::Zstd { level: l1 }, CompressionMethod::Zstd { level: l2 }) => {
1127 (l1 - l2).abs() <= 2 }
1129 (
1130 CompressionMethod::Quantization { bits: b1 },
1131 CompressionMethod::Quantization { bits: b2 },
1132 ) => b1 == b2,
1133 (CompressionMethod::Pca { components: c1 }, CompressionMethod::Pca { components: c2 }) => {
1134 ((*c1 as i32) - (*c2 as i32)).abs() <= (*c1 as i32) / 10 }
1136 _ => false,
1137 }
1138}
1139
1140pub fn create_compressor(method: &CompressionMethod) -> Box<dyn VectorCompressor> {
1141 match method {
1142 CompressionMethod::None => Box::new(NoOpCompressor),
1143 CompressionMethod::Zstd { level } => Box::new(ZstdCompressor::new(*level)),
1144 CompressionMethod::Quantization { bits } => Box::new(ScalarQuantizer::new(*bits)),
1145 CompressionMethod::Pca { components } => Box::new(PcaCompressor::new(*components)),
1146 CompressionMethod::ProductQuantization {
1147 subvectors,
1148 codebook_size,
1149 } => Box::new(ProductQuantizer::new(*subvectors, *codebook_size)),
1150 CompressionMethod::Adaptive {
1151 quality_level,
1152 analysis_samples,
1153 } => Box::new(AdaptiveCompressor::new(
1154 quality_level.clone(),
1155 *analysis_samples,
1156 )),
1157 }
1158}
1159
1160struct NoOpCompressor;
1161
1162impl VectorCompressor for NoOpCompressor {
1163 fn compress(&self, vector: &Vector) -> Result<Vec<u8>, VectorError> {
1164 vector_to_bytes(vector)
1165 }
1166
1167 fn decompress(&self, data: &[u8], dimensions: usize) -> Result<Vector, VectorError> {
1168 bytes_to_vector(data, dimensions)
1169 }
1170
1171 fn compression_ratio(&self) -> f32 {
1172 1.0
1173 }
1174}
1175
1176fn vector_to_bytes(vector: &Vector) -> Result<Vec<u8>, VectorError> {
1177 let mut bytes = Vec::new();
1178
1179 let type_byte = match &vector.values {
1181 VectorData::F32(_) => 0u8,
1182 VectorData::F64(_) => 1u8,
1183 VectorData::F16(_) => 2u8,
1184 VectorData::I8(_) => 3u8,
1185 VectorData::Binary(_) => 4u8,
1186 };
1187 bytes.push(type_byte);
1188
1189 match &vector.values {
1190 VectorData::F32(v) => {
1191 for val in v {
1192 bytes.write_all(&val.to_le_bytes())?;
1193 }
1194 }
1195 VectorData::F64(v) => {
1196 for val in v {
1197 bytes.write_all(&val.to_le_bytes())?;
1198 }
1199 }
1200 VectorData::F16(v) => {
1201 for val in v {
1202 bytes.write_all(&val.to_le_bytes())?;
1203 }
1204 }
1205 VectorData::I8(v) => {
1206 for &val in v {
1207 bytes.push(val as u8);
1208 }
1209 }
1210 VectorData::Binary(v) => {
1211 bytes.extend_from_slice(v);
1212 }
1213 }
1214
1215 Ok(bytes)
1216}
1217
1218fn bytes_to_vector(data: &[u8], dimensions: usize) -> Result<Vector, VectorError> {
1219 if data.is_empty() {
1220 return Err(VectorError::InvalidDimensions("Empty data".to_string()));
1221 }
1222
1223 let type_byte = data[0];
1224 let data = &data[1..];
1225
1226 match type_byte {
1227 0 => {
1228 let mut values = Vec::with_capacity(dimensions);
1230 let mut cursor = std::io::Cursor::new(data);
1231 let mut buf = [0u8; 4];
1232
1233 for _ in 0..dimensions {
1234 cursor.read_exact(&mut buf)?;
1235 values.push(f32::from_le_bytes(buf));
1236 }
1237 Ok(Vector::new(values))
1238 }
1239 1 => {
1240 let mut values = Vec::with_capacity(dimensions);
1242 let mut cursor = std::io::Cursor::new(data);
1243 let mut buf = [0u8; 8];
1244
1245 for _ in 0..dimensions {
1246 cursor.read_exact(&mut buf)?;
1247 values.push(f64::from_le_bytes(buf));
1248 }
1249 Ok(Vector::f64(values))
1250 }
1251 2 => {
1252 let mut values = Vec::with_capacity(dimensions);
1254 let mut cursor = std::io::Cursor::new(data);
1255 let mut buf = [0u8; 2];
1256
1257 for _ in 0..dimensions {
1258 cursor.read_exact(&mut buf)?;
1259 values.push(u16::from_le_bytes(buf));
1260 }
1261 Ok(Vector::f16(values))
1262 }
1263 3 => {
1264 Ok(Vector::i8(
1266 data[..dimensions].iter().map(|&b| b as i8).collect(),
1267 ))
1268 }
1269 4 => {
1270 Ok(Vector::binary(data[..dimensions].to_vec()))
1272 }
1273 _ => Err(VectorError::InvalidData(format!(
1274 "Unknown vector type: {type_byte}"
1275 ))),
1276 }
1277}
1278
1279#[cfg(test)]
1280mod tests {
1281 use super::*;
1282
1283 #[test]
1284 fn test_zstd_compression() {
1285 let vector = Vector::new(vec![1.0, 2.0, 3.0, 4.0, 5.0]);
1286 let compressor = ZstdCompressor::new(3);
1287
1288 let compressed = compressor.compress(&vector).unwrap();
1289 let decompressed = compressor.decompress(&compressed, 5).unwrap();
1290
1291 let orig = vector.as_f32();
1292 let dec = decompressed.as_f32();
1293 assert_eq!(orig.len(), dec.len());
1294 for (a, b) in orig.iter().zip(dec.iter()) {
1295 assert!((a - b).abs() < 1e-6);
1296 }
1297 }
1298
1299 #[test]
1300 fn test_scalar_quantization() {
1301 let vector = Vector::new(vec![0.1, 0.5, 0.9, 0.3, 0.7]);
1302 let mut quantizer = ScalarQuantizer::new(8);
1303 quantizer.train(std::slice::from_ref(&vector)).unwrap();
1304
1305 let compressed = quantizer.compress(&vector).unwrap();
1306 let decompressed = quantizer.decompress(&compressed, 5).unwrap();
1307
1308 assert!(compressed.len() < 20); let orig = vector.as_f32();
1312 let dec = decompressed.as_f32();
1313 assert_eq!(orig.len(), dec.len());
1314 for (a, b) in orig.iter().zip(dec.iter()) {
1316 assert!((a - b).abs() < 0.01);
1317 }
1318 }
1319
1320 #[test]
1321 fn test_pca_compression() {
1322 let vectors = vec![
1323 Vector::new(vec![1.0, 2.0, 3.0, 4.0, 5.0]),
1324 Vector::new(vec![2.0, 3.0, 4.0, 5.0, 6.0]),
1325 Vector::new(vec![3.0, 4.0, 5.0, 6.0, 7.0]),
1326 ];
1327
1328 let mut pca = PcaCompressor::new(3);
1329 pca.train(&vectors).unwrap();
1330
1331 let compressed = pca.compress(&vectors[0]).unwrap();
1332 let decompressed = pca.decompress(&compressed, 5).unwrap();
1333
1334 let dec = decompressed.as_f32();
1335 assert_eq!(dec.len(), 5);
1336 }
1337
1338 #[test]
1339 fn test_adaptive_compression_sparse_data() {
1340 let vectors = vec![
1342 Vector::new(vec![0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0]),
1343 Vector::new(vec![0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 1.0]),
1344 Vector::new(vec![1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0]),
1345 ];
1346
1347 let mut adaptive = AdaptiveCompressor::with_balanced_quality();
1348 adaptive.optimize_for_vectors(&vectors).unwrap();
1349
1350 let analysis = adaptive.get_analysis().unwrap();
1351 assert!(analysis.sparsity > 0.5); let compressed = adaptive.compress(&vectors[0]).unwrap();
1355 let decompressed = adaptive.decompress(&compressed, 10).unwrap();
1356
1357 let orig = vectors[0].as_f32();
1358 let dec = decompressed.as_f32();
1359 assert_eq!(orig.len(), dec.len());
1360 }
1361
1362 #[test]
1363 fn test_adaptive_compression_quantizable_data() {
1364 let vectors = vec![
1366 Vector::new(vec![0.1, 0.2, 0.3, 0.4, 0.5]),
1367 Vector::new(vec![0.2, 0.3, 0.4, 0.5, 0.6]),
1368 Vector::new(vec![0.3, 0.4, 0.5, 0.6, 0.7]),
1369 ];
1370
1371 let mut adaptive = AdaptiveCompressor::with_balanced_quality();
1372 adaptive.optimize_for_vectors(&vectors).unwrap();
1373
1374 let analysis = adaptive.get_analysis().unwrap();
1375 assert!(analysis.range < 1.0); let compressed = adaptive.compress(&vectors[0]).unwrap();
1379 let decompressed = adaptive.decompress(&compressed, 5).unwrap();
1380
1381 let orig = vectors[0].as_f32();
1382 let dec = decompressed.as_f32();
1383 assert_eq!(orig.len(), dec.len());
1384
1385 assert!(adaptive.compression_ratio() < 0.5); }
1388
1389 #[test]
1390 fn test_adaptive_compression_high_dimensional() {
1391 let mut vectors = Vec::new();
1393 for i in 0..10 {
1394 let mut data = vec![0.0; 200]; for (j, item) in data.iter_mut().enumerate().take(200) {
1396 *item = (i * j) as f32 * 0.01;
1397 }
1398 vectors.push(Vector::new(data));
1399 }
1400
1401 let mut adaptive = AdaptiveCompressor::with_best_ratio();
1402 adaptive.optimize_for_vectors(&vectors).unwrap();
1403
1404 let analysis = adaptive.get_analysis().unwrap();
1405 match &analysis.recommended_method {
1407 CompressionMethod::Pca { components } => {
1408 assert!(*components < 200); }
1410 _ => {
1411 assert!(matches!(
1414 analysis.recommended_method,
1415 CompressionMethod::Pca { .. }
1416 | CompressionMethod::Quantization { .. }
1417 | CompressionMethod::Zstd { .. }
1418 ));
1419 }
1420 }
1421
1422 let original = &vectors[0];
1424 println!("Original vector length: {}", original.dimensions);
1425 println!("Recommended method: {:?}", analysis.recommended_method);
1426
1427 let compressed = adaptive.compress(original).unwrap();
1428 println!("Compressed size: {} bytes", compressed.len());
1429
1430 assert!(!compressed.is_empty());
1432 assert!(compressed.len() < original.dimensions * 4); match &analysis.recommended_method {
1437 CompressionMethod::Pca { components } => {
1438 assert!(*components < original.dimensions);
1440 println!(
1441 "PCA compression: {} → {} components",
1442 original.dimensions, components
1443 );
1444 }
1445 _ => {
1446 let decompressed = adaptive
1448 .decompress(&compressed, original.dimensions)
1449 .unwrap();
1450 let dec = decompressed.as_f32();
1451 let orig = original.as_f32();
1452 assert_eq!(dec.len(), orig.len());
1453 }
1454 }
1455 }
1456
1457 #[test]
1458 fn test_adaptive_compression_method_switching() {
1459 let mut adaptive = AdaptiveCompressor::with_fast_quality();
1460
1461 let sparse_vectors = vec![
1463 Vector::new(vec![0.0, 0.0, 1.0, 0.0, 0.0]),
1464 Vector::new(vec![0.0, 2.0, 0.0, 0.0, 0.0]),
1465 ];
1466 adaptive.optimize_for_vectors(&sparse_vectors).unwrap();
1467 let initial_switches = adaptive.get_metrics().method_switches;
1468
1469 let dense_vectors = vec![
1471 Vector::new(vec![0.1, 0.2, 0.3, 0.4, 0.5]),
1472 Vector::new(vec![0.2, 0.3, 0.4, 0.5, 0.6]),
1473 ];
1474 adaptive.optimize_for_vectors(&dense_vectors).unwrap();
1475
1476 assert!(adaptive.get_metrics().method_switches > initial_switches);
1478 }
1479
1480 #[test]
1481 fn test_vector_analysis() {
1482 let vectors = vec![
1483 Vector::new(vec![1.0, 2.0, 3.0]),
1484 Vector::new(vec![2.0, 3.0, 4.0]),
1485 Vector::new(vec![3.0, 4.0, 5.0]),
1486 ];
1487
1488 let analysis = VectorAnalysis::analyze(&vectors, &AdaptiveQuality::Balanced).unwrap();
1489
1490 assert!(analysis.mean > 0.0);
1491 assert!(analysis.std_dev > 0.0);
1492 assert!(analysis.range > 0.0);
1493 assert!(analysis.entropy >= 0.0);
1494 assert!(!analysis.dominant_patterns.is_empty());
1495 assert!(analysis.expected_ratio > 0.0 && analysis.expected_ratio <= 1.0);
1496 }
1497
1498 #[test]
1499 fn test_compression_method_equivalence() {
1500 assert!(methods_equivalent(
1501 &CompressionMethod::Zstd { level: 5 },
1502 &CompressionMethod::Zstd { level: 6 }
1503 )); assert!(!methods_equivalent(
1506 &CompressionMethod::Zstd { level: 1 },
1507 &CompressionMethod::Zstd { level: 10 }
1508 )); assert!(methods_equivalent(
1511 &CompressionMethod::Quantization { bits: 8 },
1512 &CompressionMethod::Quantization { bits: 8 }
1513 ));
1514
1515 assert!(!methods_equivalent(
1516 &CompressionMethod::Zstd { level: 5 },
1517 &CompressionMethod::Quantization { bits: 8 }
1518 )); }
1520
1521 #[test]
1522 fn test_adaptive_compressor_convenience_constructors() {
1523 let fast = AdaptiveCompressor::with_fast_quality();
1524 assert!(matches!(fast.quality_level, AdaptiveQuality::Fast));
1525
1526 let balanced = AdaptiveCompressor::with_balanced_quality();
1527 assert!(matches!(balanced.quality_level, AdaptiveQuality::Balanced));
1528
1529 let best = AdaptiveCompressor::with_best_ratio();
1530 assert!(matches!(best.quality_level, AdaptiveQuality::BestRatio));
1531 }
1532
1533 #[test]
1534 fn test_product_quantization() {
1535 let vectors = vec![
1537 Vector::new(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]),
1538 Vector::new(vec![2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]),
1539 Vector::new(vec![3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]),
1540 Vector::new(vec![1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5]),
1541 ];
1542
1543 let mut pq = ProductQuantizer::new(4, 4);
1545
1546 pq.train(&vectors).unwrap();
1548
1549 let original = &vectors[0];
1551 let compressed = pq.compress(original).unwrap();
1552 let decompressed = pq.decompress(&compressed, 8).unwrap();
1553
1554 assert_eq!(decompressed.dimensions, original.dimensions);
1556
1557 let ratio = pq.compression_ratio();
1559 assert!(
1560 ratio > 0.0 && ratio < 1.0,
1561 "Compression ratio should be between 0 and 1, got {ratio}"
1562 );
1563
1564 for vector in &vectors {
1566 let compressed = pq.compress(vector).unwrap();
1567 let decompressed = pq.decompress(&compressed, vector.dimensions).unwrap();
1568 assert_eq!(decompressed.dimensions, vector.dimensions);
1569 }
1570 }
1571
1572 #[test]
1573 fn test_product_quantization_invalid_dimensions() {
1574 let vectors = vec![
1576 Vector::new(vec![1.0, 2.0, 3.0]), ];
1578
1579 let mut pq = ProductQuantizer::new(4, 4); let result = pq.train(&vectors);
1581
1582 assert!(result.is_err());
1584 if let Err(VectorError::InvalidDimensions(_)) = result {
1585 } else {
1587 panic!("Expected InvalidDimensions error");
1588 }
1589 }
1590}