1use crate::{
7 compression::{create_compressor, CompressionMethod, VectorCompressor},
8 Vector, VectorError,
9};
10use anyhow::Result;
11use std::collections::HashMap;
12use std::sync::{Arc, RwLock};
13use std::time::{Duration, Instant};
14
15#[derive(Debug, Clone)]
19pub struct CompressionContext {
20 pub domain: VectorDomain,
21 pub access_frequency: AccessFrequency,
22 pub quality_requirement: QualityRequirement,
23 pub resource_constraints: ResourceConstraints,
24 pub temporal_patterns: TemporalPatterns,
25}
26
27#[derive(Debug, Clone, PartialEq, Eq, Hash)]
29pub enum VectorDomain {
30 TextEmbeddings,
31 ImageFeatures,
32 AudioFeatures,
33 KnowledgeGraph,
34 TimeSeriesData,
35 Unknown,
36}
37
38#[derive(Debug, Clone)]
40pub enum AccessFrequency {
41 VeryHigh, High, Moderate, Low, Archive, }
47
48#[derive(Debug, Clone)]
50pub enum QualityRequirement {
51 Lossless, HighQuality, Balanced, Compressed, Aggressive, }
57
58#[derive(Debug, Clone)]
60pub struct ResourceConstraints {
61 pub cpu_usage_limit: f32, pub memory_usage_limit: f32, pub compression_time_limit: Duration,
64 pub decompression_time_limit: Duration,
65}
66
67#[derive(Debug, Clone)]
69pub struct TemporalPatterns {
70 pub time_of_day_factor: f32, pub load_factor: f32, pub seasonal_factor: f32, }
74
75#[derive(Debug, Clone)]
77pub struct VectorStats {
78 pub dimensions: usize,
79 pub mean: f32,
80 pub std_dev: f32,
81 pub min_val: f32,
82 pub max_val: f32,
83 pub entropy: f32,
84 pub sparsity: f32, pub correlation: f32, pub intrinsic_dimension: f32, pub clustering_tendency: f32, pub temporal_stability: f32, pub domain_affinity: VectorDomain, }
91
92impl Default for CompressionContext {
93 fn default() -> Self {
94 Self {
95 domain: VectorDomain::Unknown,
96 access_frequency: AccessFrequency::Moderate,
97 quality_requirement: QualityRequirement::Balanced,
98 resource_constraints: ResourceConstraints {
99 cpu_usage_limit: 0.7,
100 memory_usage_limit: 0.8,
101 compression_time_limit: Duration::from_millis(100),
102 decompression_time_limit: Duration::from_millis(50),
103 },
104 temporal_patterns: TemporalPatterns {
105 time_of_day_factor: 1.0,
106 load_factor: 1.0,
107 seasonal_factor: 1.0,
108 },
109 }
110 }
111}
112
113impl VectorStats {
114 pub fn from_vector(vector: &Vector) -> Result<Self, VectorError> {
116 Self::from_vector_with_context(vector, &CompressionContext::default())
117 }
118
119 pub fn from_vector_with_context(
121 vector: &Vector,
122 context: &CompressionContext,
123 ) -> Result<Self, VectorError> {
124 let values = vector.as_f32();
125 let n = values.len();
126
127 if n == 0 {
128 return Err(VectorError::InvalidDimensions("Empty vector".to_string()));
129 }
130
131 let sum: f32 = values.iter().sum();
133 let mean = sum / n as f32;
134
135 let variance: f32 = values.iter().map(|x| (x - mean).powi(2)).sum::<f32>() / n as f32;
136 let std_dev = variance.sqrt();
137
138 let min_val = values.iter().fold(f32::INFINITY, |a, &b| a.min(b));
139 let max_val = values.iter().fold(f32::NEG_INFINITY, |a, &b| a.max(b));
140
141 let bin_count = match context.domain {
143 VectorDomain::TextEmbeddings => 128,
144 VectorDomain::ImageFeatures => 256,
145 VectorDomain::KnowledgeGraph => 64,
146 _ => 256,
147 };
148
149 let mut histogram = vec![0u32; bin_count];
150 let range = max_val - min_val;
151 if range > 0.0 {
152 for val in &values {
153 let bucket = ((val - min_val) / range * (bin_count - 1) as f32)
154 .clamp(0.0, (bin_count - 1) as f32) as usize;
155 histogram[bucket] += 1;
156 }
157 }
158
159 let entropy = histogram
160 .iter()
161 .filter(|&&count| count > 0)
162 .map(|&count| {
163 let p = count as f32 / n as f32;
164 -p * p.log2()
165 })
166 .sum();
167
168 let threshold = std_dev * 0.1;
170 let sparse_count = values.iter().filter(|&&x| x.abs() < threshold).count();
171 let sparsity = sparse_count as f32 / n as f32;
172
173 let correlation = Self::calculate_enhanced_correlation(&values);
175
176 let intrinsic_dimension = Self::estimate_intrinsic_dimension(&values);
178
179 let clustering_tendency = Self::calculate_hopkins_statistic(&values);
181
182 let temporal_stability = 1.0;
184
185 let domain_affinity = Self::detect_domain(&values, entropy, sparsity, correlation);
187
188 Ok(VectorStats {
189 dimensions: n,
190 mean,
191 std_dev,
192 min_val,
193 max_val,
194 entropy,
195 sparsity,
196 correlation,
197 intrinsic_dimension,
198 clustering_tendency,
199 temporal_stability,
200 domain_affinity,
201 })
202 }
203
204 fn calculate_enhanced_correlation(values: &[f32]) -> f32 {
206 let n = values.len();
207 if n <= 1 {
208 return 0.0;
209 }
210
211 let window_sizes = [5, 10, 20].iter().map(|&w| w.min(n / 2).max(2));
212 let mut total_corr = 0.0;
213 let mut total_count = 0;
214
215 for window_size in window_sizes {
216 if window_size >= n {
217 continue;
218 }
219
220 for i in 0..(n - window_size) {
221 let window1 = &values[i..i + window_size];
222 let window2 = &values[i + 1..i + window_size + 1];
223
224 let mean1: f32 = window1.iter().sum::<f32>() / window_size as f32;
225 let mean2: f32 = window2.iter().sum::<f32>() / window_size as f32;
226
227 let covariance: f32 = window1
228 .iter()
229 .zip(window2)
230 .map(|(a, b)| (a - mean1) * (b - mean2))
231 .sum();
232 let var1: f32 = window1.iter().map(|x| (x - mean1).powi(2)).sum();
233 let var2: f32 = window2.iter().map(|x| (x - mean2).powi(2)).sum();
234
235 if var1 > 0.0 && var2 > 0.0 {
236 let corr = covariance / (var1.sqrt() * var2.sqrt());
237 total_corr += corr.abs();
238 total_count += 1;
239 }
240 }
241 }
242
243 if total_count > 0 {
244 total_corr / total_count as f32
245 } else {
246 0.0
247 }
248 }
249
250 fn estimate_intrinsic_dimension(values: &[f32]) -> f32 {
252 let n = values.len();
253 if n < 10 {
254 return n as f32;
255 }
256
257 let sample_size = n.min(100);
259 let step = n / sample_size;
260 let sampled: Vec<f32> = (0..sample_size).map(|i| values[i * step]).collect();
261
262 let mut log_radii = Vec::new();
264 let mut log_counts = Vec::new();
265
266 let max_val = sampled.iter().fold(f32::NEG_INFINITY, |a, &b| a.max(b));
267 let min_val = sampled.iter().fold(f32::INFINITY, |a, &b| a.min(b));
268 let range = max_val - min_val;
269
270 if range <= 0.0 {
271 return 1.0;
272 }
273
274 for radius_factor in [0.001, 0.01, 0.1, 0.5] {
275 let radius = range * radius_factor;
276 let mut count = 0;
277
278 for i in 0..sampled.len() {
279 for j in (i + 1)..sampled.len() {
280 if (sampled[i] - sampled[j]).abs() < radius {
281 count += 1;
282 }
283 }
284 }
285
286 if count > 0 {
287 log_radii.push(radius.ln());
288 log_counts.push((count as f32).ln());
289 }
290 }
291
292 if log_radii.len() < 2 {
294 return n as f32;
295 }
296
297 let mean_log_r: f32 = log_radii.iter().sum::<f32>() / log_radii.len() as f32;
298 let mean_log_c: f32 = log_counts.iter().sum::<f32>() / log_counts.len() as f32;
299
300 let numerator: f32 = log_radii
301 .iter()
302 .zip(&log_counts)
303 .map(|(r, c)| (r - mean_log_r) * (c - mean_log_c))
304 .sum();
305 let denominator: f32 = log_radii.iter().map(|r| (r - mean_log_r).powi(2)).sum();
306
307 if denominator > 0.0 {
308 let slope = numerator / denominator;
309 slope.abs().min(n as f32).max(1.0)
310 } else {
311 n as f32
312 }
313 }
314
315 fn calculate_hopkins_statistic(values: &[f32]) -> f32 {
317 let n = values.len();
318 if n < 10 {
319 return 0.5; }
321
322 let sample_size = (n / 10).clamp(5, 50);
323 let min_val = values.iter().fold(f32::INFINITY, |a, &b| a.min(b));
324 let max_val = values.iter().fold(f32::NEG_INFINITY, |a, &b| a.max(b));
325
326 if max_val <= min_val {
327 return 0.5;
328 }
329
330 let mut w_sum = 0.0; let mut u_sum = 0.0; for i in 0..sample_size {
335 let idx = (i * n / sample_size) % n;
336 let point = values[idx];
337
338 let mut min_dist = f32::INFINITY;
339 for &other in values {
340 if other != point {
341 let dist = (point - other).abs();
342 min_dist = min_dist.min(dist);
343 }
344 }
345 w_sum += min_dist;
346 }
347
348 use std::collections::hash_map::DefaultHasher;
350 use std::hash::{Hash, Hasher};
351
352 let mut hasher = DefaultHasher::new();
353 42u64.hash(&mut hasher);
354 let mut rng_state = hasher.finish();
355
356 for _ in 0..sample_size {
357 rng_state = rng_state.wrapping_mul(1103515245).wrapping_add(12345);
358 let random_point = min_val + (max_val - min_val) * (rng_state as f32 / u64::MAX as f32);
359
360 let mut min_dist = f32::INFINITY;
361 for &data_point in values {
362 let dist = (random_point - data_point).abs();
363 min_dist = min_dist.min(dist);
364 }
365 u_sum += min_dist;
366 }
367
368 if w_sum + u_sum > 0.0 {
369 u_sum / (w_sum + u_sum)
370 } else {
371 0.5
372 }
373 }
374
375 fn detect_domain(
377 _values: &[f32],
378 entropy: f32,
379 sparsity: f32,
380 correlation: f32,
381 ) -> VectorDomain {
382 if entropy > 6.0
384 && entropy < 8.0
385 && sparsity < 0.3
386 && correlation > 0.2
387 && correlation < 0.6
388 {
389 return VectorDomain::TextEmbeddings;
390 }
391
392 if entropy > 7.0 && correlation < 0.3 {
394 return VectorDomain::ImageFeatures;
395 }
396
397 if entropy < 6.0 && sparsity > 0.4 {
399 return VectorDomain::KnowledgeGraph;
400 }
401
402 if correlation > 0.7 && entropy > 5.0 && entropy < 7.0 {
404 return VectorDomain::TimeSeriesData;
405 }
406
407 VectorDomain::Unknown
408 }
409
410 pub fn from_vectors(vectors: &[Vector]) -> Result<Self, VectorError> {
412 Self::from_vectors_with_context(vectors, &CompressionContext::default())
413 }
414
415 pub fn from_vectors_with_context(
417 vectors: &[Vector],
418 context: &CompressionContext,
419 ) -> Result<Self, VectorError> {
420 if vectors.is_empty() {
421 return Err(VectorError::InvalidDimensions(
422 "No vectors provided".to_string(),
423 ));
424 }
425
426 let individual_stats: Result<Vec<_>, _> = vectors
427 .iter()
428 .map(|v| Self::from_vector_with_context(v, context))
429 .collect();
430 let stats = individual_stats?;
431
432 let n = stats.len() as f32;
433
434 Ok(VectorStats {
435 dimensions: stats[0].dimensions,
436 mean: stats.iter().map(|s| s.mean).sum::<f32>() / n,
437 std_dev: stats.iter().map(|s| s.std_dev).sum::<f32>() / n,
438 min_val: stats
439 .iter()
440 .map(|s| s.min_val)
441 .fold(f32::INFINITY, |a, b| a.min(b)),
442 max_val: stats
443 .iter()
444 .map(|s| s.max_val)
445 .fold(f32::NEG_INFINITY, |a, b| a.max(b)),
446 entropy: stats.iter().map(|s| s.entropy).sum::<f32>() / n,
447 sparsity: stats.iter().map(|s| s.sparsity).sum::<f32>() / n,
448 correlation: stats.iter().map(|s| s.correlation).sum::<f32>() / n,
449 intrinsic_dimension: stats.iter().map(|s| s.intrinsic_dimension).sum::<f32>() / n,
450 clustering_tendency: stats.iter().map(|s| s.clustering_tendency).sum::<f32>() / n,
451 temporal_stability: stats.iter().map(|s| s.temporal_stability).sum::<f32>() / n,
452 domain_affinity: Self::aggregate_domain_affinity(&stats),
453 })
454 }
455
456 fn aggregate_domain_affinity(stats: &[VectorStats]) -> VectorDomain {
458 let mut domain_counts = HashMap::new();
459
460 for stat in stats {
461 *domain_counts
462 .entry(stat.domain_affinity.clone())
463 .or_insert(0) += 1;
464 }
465
466 domain_counts
467 .into_iter()
468 .max_by_key(|(_, count)| *count)
469 .map(|(domain, _)| domain)
470 .unwrap_or(VectorDomain::Unknown)
471 }
472}
473
474#[derive(Debug, Clone)]
476pub struct CompressionMetrics {
477 pub method: CompressionMethod,
478 pub compression_ratio: f32,
479 pub compression_time: Duration,
480 pub decompression_time: Duration,
481 pub reconstruction_error: f32,
482 pub usage_count: u64,
483 pub avg_performance_score: f32,
484}
485
486impl CompressionMetrics {
487 pub fn new(method: CompressionMethod) -> Self {
488 Self {
489 method,
490 compression_ratio: 1.0,
491 compression_time: Duration::ZERO,
492 decompression_time: Duration::ZERO,
493 reconstruction_error: 0.0,
494 usage_count: 0,
495 avg_performance_score: 0.0,
496 }
497 }
498
499 pub fn calculate_score(&self, priorities: &CompressionPriorities) -> f32 {
501 let ratio_score = self.compression_ratio.min(0.9); let speed_score = 1.0 / (1.0 + self.compression_time.as_millis() as f32 / 1000.0);
503 let accuracy_score = 1.0 / (1.0 + self.reconstruction_error);
504
505 priorities.compression_weight * ratio_score
506 + priorities.speed_weight * speed_score
507 + priorities.accuracy_weight * accuracy_score
508 }
509
510 pub fn update(
512 &mut self,
513 compression_ratio: f32,
514 comp_time: Duration,
515 decomp_time: Duration,
516 error: f32,
517 priorities: &CompressionPriorities,
518 ) {
519 let alpha = 0.1; self.compression_ratio = self.compression_ratio * (1.0 - alpha) + compression_ratio * alpha;
522 self.compression_time = Duration::from_nanos(
523 (self.compression_time.as_nanos() as f32 * (1.0 - alpha)
524 + comp_time.as_nanos() as f32 * alpha) as u64,
525 );
526 self.decompression_time = Duration::from_nanos(
527 (self.decompression_time.as_nanos() as f32 * (1.0 - alpha)
528 + decomp_time.as_nanos() as f32 * alpha) as u64,
529 );
530 self.reconstruction_error = self.reconstruction_error * (1.0 - alpha) + error * alpha;
531 self.usage_count += 1;
532
533 self.avg_performance_score = self.calculate_score(priorities);
534 }
535}
536
537#[derive(Debug, Clone)]
539pub struct CompressionPriorities {
540 pub compression_weight: f32, pub speed_weight: f32, pub accuracy_weight: f32, }
544
545impl Default for CompressionPriorities {
546 fn default() -> Self {
547 Self {
548 compression_weight: 0.4,
549 speed_weight: 0.3,
550 accuracy_weight: 0.3,
551 }
552 }
553}
554
555#[derive(Debug, Clone)]
557pub struct MultiLevelCompression {
558 pub levels: Vec<CompressionMethod>,
559 pub thresholds: Vec<f32>, }
561
562impl Default for MultiLevelCompression {
563 fn default() -> Self {
564 Self::new()
565 }
566}
567
568impl MultiLevelCompression {
569 pub fn new() -> Self {
570 Self {
571 levels: vec![
572 CompressionMethod::None,
573 CompressionMethod::Quantization { bits: 16 },
574 CompressionMethod::Quantization { bits: 8 },
575 CompressionMethod::Pca { components: 0 }, CompressionMethod::Zstd { level: 3 },
577 ],
578 thresholds: vec![0.0, 0.1, 0.3, 0.6, 0.8],
579 }
580 }
581
582 pub fn select_level(&self, required_compression: f32) -> &CompressionMethod {
584 for (i, &threshold) in self.thresholds.iter().enumerate() {
585 if required_compression <= threshold {
586 return &self.levels[i];
587 }
588 }
589 self.levels
590 .last()
591 .expect("compression levels should not be empty")
592 }
593}
594
595pub struct AdaptiveCompressor {
597 priorities: CompressionPriorities,
599 metrics: Arc<RwLock<HashMap<String, CompressionMetrics>>>,
601 multi_level: MultiLevelCompression,
603 compressor_cache: Arc<RwLock<HashMap<String, Box<dyn VectorCompressor + Send + Sync>>>>,
605 stats_cache: Arc<RwLock<HashMap<String, (VectorStats, Instant)>>>,
607 exploration_rate: f32,
609 cache_ttl: Duration,
610}
611
612impl AdaptiveCompressor {
613 pub fn new() -> Self {
614 Self::new_with_priorities(CompressionPriorities::default())
615 }
616
617 pub fn new_with_priorities(priorities: CompressionPriorities) -> Self {
618 Self {
619 priorities,
620 metrics: Arc::new(RwLock::new(HashMap::new())),
621 multi_level: MultiLevelCompression::new(),
622 compressor_cache: Arc::new(RwLock::new(HashMap::new())),
623 stats_cache: Arc::new(RwLock::new(HashMap::new())),
624 exploration_rate: 0.1,
625 cache_ttl: Duration::from_secs(3600), }
627 }
628
629 pub fn analyze_and_recommend(
631 &mut self,
632 vectors: &[Vector],
633 ) -> Result<CompressionMethod, VectorError> {
634 let stats = VectorStats::from_vectors(vectors)?;
635 let stats_key = self.generate_stats_key(&stats);
636
637 {
639 let cache = self
640 .stats_cache
641 .read()
642 .expect("rwlock should not be poisoned");
643 if let Some((cached_stats, timestamp)) = cache.get(&stats_key) {
644 if timestamp.elapsed() < self.cache_ttl {
645 return Ok(self.recommend_from_stats(cached_stats));
646 }
647 }
648 }
649
650 {
652 let mut cache = self
653 .stats_cache
654 .write()
655 .expect("rwlock should not be poisoned");
656 cache.insert(stats_key, (stats.clone(), Instant::now()));
657 }
658
659 Ok(self.recommend_from_stats(&stats))
660 }
661
662 fn recommend_from_stats(&self, stats: &VectorStats) -> CompressionMethod {
664 if stats.sparsity > 0.7 {
666 return CompressionMethod::Quantization { bits: 4 };
667 }
668
669 if stats.correlation > 0.6 && stats.dimensions > 20 {
671 let components = (stats.dimensions as f32 * 0.7) as usize;
672 return CompressionMethod::Pca { components };
673 }
674
675 if stats.entropy < 4.0 {
677 return CompressionMethod::Zstd { level: 9 };
678 }
679
680 if stats.std_dev > stats.mean.abs() {
682 return CompressionMethod::Quantization { bits: 12 };
683 }
684
685 CompressionMethod::Quantization { bits: 8 }
687 }
688
689 pub fn compress_adaptive(&mut self, vector: &Vector) -> Result<Vec<u8>, VectorError> {
691 let stats = VectorStats::from_vector(vector)?;
692 let method = self.recommend_from_stats(&stats);
693
694 if self.should_explore() {
696 let alternative = self.get_alternative_method(&method);
697 return self.compress_with_method(vector, &alternative);
698 }
699
700 self.compress_with_method(vector, &method)
701 }
702
703 pub fn compress_with_method(
705 &mut self,
706 vector: &Vector,
707 method: &CompressionMethod,
708 ) -> Result<Vec<u8>, VectorError> {
709 let method_key = format!("{method:?}");
710 let compressor = self.get_or_create_compressor(method)?;
711
712 let start_time = Instant::now();
713 let compressed = compressor.compress(vector)?;
714 let compression_time = start_time.elapsed();
715
716 let decompressed = compressor.decompress(&compressed, vector.dimensions)?;
718 let error = self.calculate_reconstruction_error(vector, &decompressed)?;
719
720 let compression_ratio = compressed.len() as f32 / (vector.dimensions * 4) as f32; {
724 let mut metrics = self.metrics.write().expect("rwlock should not be poisoned");
725 let metric = metrics
726 .entry(method_key)
727 .or_insert_with(|| CompressionMetrics::new(method.clone()));
728 metric.update(
729 compression_ratio,
730 compression_time,
731 Duration::ZERO,
732 error,
733 &self.priorities,
734 );
735 }
736
737 Ok(compressed)
738 }
739
740 pub fn compress_multi_level(
742 &mut self,
743 vector: &Vector,
744 target_ratio: f32,
745 ) -> Result<Vec<u8>, VectorError> {
746 let mut current_vector = vector.clone();
747 let mut compression_steps = Vec::new();
748 let mut total_ratio = 1.0;
749
750 while total_ratio > target_ratio && compression_steps.len() < 3 {
751 let remaining_ratio = target_ratio / total_ratio;
752 let method = self.multi_level.select_level(remaining_ratio);
753
754 let compressor = self.get_or_create_compressor(method)?;
755 let compressed = compressor.compress(¤t_vector)?;
756
757 let step_ratio = compressed.len() as f32 / (current_vector.dimensions * 4) as f32;
758 total_ratio *= step_ratio;
759
760 compression_steps.push((method.clone(), compressed.clone()));
761
762 if total_ratio > target_ratio {
764 current_vector = compressor.decompress(&compressed, current_vector.dimensions)?;
765 }
766 }
767
768 self.serialize_multi_level_result(compression_steps)
770 }
771
772 pub fn get_best_method(&self) -> CompressionMethod {
774 let metrics = self.metrics.read().expect("rwlock should not be poisoned");
775 let best = metrics.values().max_by(|a, b| {
776 a.avg_performance_score
777 .partial_cmp(&b.avg_performance_score)
778 .expect("performance scores should be comparable")
779 });
780
781 best.map(|m| m.method.clone())
782 .unwrap_or(CompressionMethod::Quantization { bits: 8 })
783 }
784
785 pub fn get_performance_stats(&self) -> HashMap<String, CompressionMetrics> {
787 self.metrics
788 .read()
789 .expect("rwlock should not be poisoned")
790 .clone()
791 }
792
793 pub fn update_priorities(&mut self, priorities: CompressionPriorities) {
795 self.priorities = priorities;
796
797 let mut metrics = self.metrics.write().expect("rwlock should not be poisoned");
799 for metric in metrics.values_mut() {
800 metric.avg_performance_score = metric.calculate_score(&self.priorities);
801 }
802 }
803
804 pub fn reset(&mut self) {
806 self.metrics
807 .write()
808 .expect("rwlock should not be poisoned")
809 .clear();
810 self.compressor_cache
811 .write()
812 .expect("rwlock should not be poisoned")
813 .clear();
814 self.stats_cache
815 .write()
816 .expect("rwlock should not be poisoned")
817 .clear();
818 }
819
820 fn get_or_create_compressor(
823 &self,
824 method: &CompressionMethod,
825 ) -> Result<Box<dyn VectorCompressor>, VectorError> {
826 let method_key = format!("{method:?}");
827
828 {
829 let cache = self
830 .compressor_cache
831 .read()
832 .expect("rwlock should not be poisoned");
833 if cache.contains_key(&method_key) {
834 }
837 }
838
839 let compressor = create_compressor(method);
841
842 {
844 let _cache = self
845 .compressor_cache
846 .write()
847 .expect("rwlock should not be poisoned");
848 }
851
852 Ok(compressor)
853 }
854
855 fn calculate_reconstruction_error(
856 &self,
857 original: &Vector,
858 reconstructed: &Vector,
859 ) -> Result<f32, VectorError> {
860 let orig_values = original.as_f32();
861 let recon_values = reconstructed.as_f32();
862
863 if orig_values.len() != recon_values.len() {
864 return Err(VectorError::InvalidDimensions(
865 "Dimension mismatch".to_string(),
866 ));
867 }
868
869 let mse: f32 = orig_values
870 .iter()
871 .zip(recon_values.iter())
872 .map(|(a, b)| (a - b).powi(2))
873 .sum::<f32>()
874 / orig_values.len() as f32;
875
876 Ok(mse.sqrt()) }
878
879 fn generate_stats_key(&self, stats: &VectorStats) -> String {
880 format!(
881 "{}_{:.2}_{:.2}_{:.2}_{:.2}",
882 stats.dimensions, stats.entropy, stats.sparsity, stats.correlation, stats.std_dev
883 )
884 }
885
886 fn should_explore(&self) -> bool {
887 #[allow(unused_imports)]
888 use scirs2_core::random::{Random, Rng};
889 let mut rng = Random::seed(42);
890 rng.gen_range(0.0..1.0) < self.exploration_rate
891 }
892
893 fn get_alternative_method(&self, current: &CompressionMethod) -> CompressionMethod {
894 match current {
895 CompressionMethod::None => CompressionMethod::Quantization { bits: 8 },
896 CompressionMethod::Quantization { bits } => {
897 if *bits > 8 {
898 CompressionMethod::Quantization { bits: bits - 2 }
899 } else {
900 CompressionMethod::Pca { components: 16 }
901 }
902 }
903 CompressionMethod::Pca { components: _ } => CompressionMethod::Zstd { level: 6 },
904 CompressionMethod::Zstd { level } => {
905 if *level < 15 {
906 CompressionMethod::Zstd { level: level + 3 }
907 } else {
908 CompressionMethod::Quantization { bits: 4 }
909 }
910 }
911 _ => CompressionMethod::None,
912 }
913 }
914
915 fn serialize_multi_level_result(
916 &self,
917 steps: Vec<(CompressionMethod, Vec<u8>)>,
918 ) -> Result<Vec<u8>, VectorError> {
919 use std::io::Write;
920
921 let mut result = Vec::new();
922
923 result.write_all(&(steps.len() as u32).to_le_bytes())?;
925
926 for (method, data) in steps {
928 let method_id = match method {
930 CompressionMethod::None => 0u8,
931 CompressionMethod::Zstd { .. } => 1u8,
932 CompressionMethod::Quantization { .. } => 2u8,
933 CompressionMethod::Pca { .. } => 3u8,
934 CompressionMethod::ProductQuantization { .. } => 4u8,
935 CompressionMethod::Adaptive { .. } => 5u8,
936 };
937 result.push(method_id);
938
939 result.write_all(&(data.len() as u32).to_le_bytes())?;
941 result.extend_from_slice(&data);
942 }
943
944 Ok(result)
945 }
946}
947
948impl Default for AdaptiveCompressor {
949 fn default() -> Self {
950 Self::new()
951 }
952}
953
954pub struct CompressionProfiles {
956 profiles: HashMap<VectorDomain, CompressionPriorities>,
957}
958
959impl Default for CompressionProfiles {
960 fn default() -> Self {
961 Self::new()
962 }
963}
964
965impl CompressionProfiles {
966 pub fn new() -> Self {
967 let mut profiles = HashMap::new();
968
969 profiles.insert(
971 VectorDomain::TextEmbeddings,
972 CompressionPriorities {
973 compression_weight: 0.3,
974 speed_weight: 0.4,
975 accuracy_weight: 0.3,
976 },
977 );
978
979 profiles.insert(
981 VectorDomain::ImageFeatures,
982 CompressionPriorities {
983 compression_weight: 0.5,
984 speed_weight: 0.2,
985 accuracy_weight: 0.3,
986 },
987 );
988
989 profiles.insert(
991 VectorDomain::KnowledgeGraph,
992 CompressionPriorities {
993 compression_weight: 0.2,
994 speed_weight: 0.3,
995 accuracy_weight: 0.5,
996 },
997 );
998
999 profiles.insert(
1001 VectorDomain::TimeSeriesData,
1002 CompressionPriorities {
1003 compression_weight: 0.3,
1004 speed_weight: 0.4,
1005 accuracy_weight: 0.3,
1006 },
1007 );
1008
1009 profiles.insert(
1011 VectorDomain::AudioFeatures,
1012 CompressionPriorities {
1013 compression_weight: 0.4,
1014 speed_weight: 0.3,
1015 accuracy_weight: 0.3,
1016 },
1017 );
1018
1019 Self { profiles }
1020 }
1021
1022 pub fn get_profile(&self, domain: &VectorDomain) -> CompressionPriorities {
1023 self.profiles.get(domain).cloned().unwrap_or_default()
1024 }
1025
1026 pub fn update_profile(&mut self, domain: VectorDomain, priorities: CompressionPriorities) {
1027 self.profiles.insert(domain, priorities);
1028 }
1029}
1030
1031#[cfg(test)]
1032mod tests {
1033 use super::*;
1034
1035 #[test]
1036 fn test_vector_stats() {
1037 let vector = Vector::new(vec![1.0, 2.0, 3.0, 4.0, 5.0]);
1038 let stats = VectorStats::from_vector(&vector).unwrap();
1039
1040 assert_eq!(stats.dimensions, 5);
1041 assert_eq!(stats.mean, 3.0);
1042 assert!(stats.std_dev > 0.0);
1043 }
1044
1045 #[test]
1046 fn test_adaptive_compression() {
1047 let vectors = vec![
1048 Vector::new(vec![1.0, 2.0, 3.0, 4.0]),
1049 Vector::new(vec![2.0, 3.0, 4.0, 5.0]),
1050 Vector::new(vec![3.0, 4.0, 5.0, 6.0]),
1051 ];
1052
1053 let mut compressor = AdaptiveCompressor::new();
1054 let recommended = compressor.analyze_and_recommend(&vectors).unwrap();
1055
1056 assert!(!matches!(recommended, CompressionMethod::None));
1058 }
1059
1060 #[test]
1061 fn test_compression_metrics() {
1062 let method = CompressionMethod::Quantization { bits: 8 };
1063 let mut metrics = CompressionMetrics::new(method);
1064 let priorities = CompressionPriorities::default();
1065
1066 metrics.update(
1067 0.5,
1068 Duration::from_millis(10),
1069 Duration::from_millis(5),
1070 0.01,
1071 &priorities,
1072 );
1073
1074 assert!(metrics.avg_performance_score > 0.0);
1075 assert_eq!(metrics.usage_count, 1);
1076 }
1077
1078 #[test]
1079 fn test_multi_level_compression() {
1080 let mut compressor = AdaptiveCompressor::new();
1081 let values: Vec<f32> = (0..256).map(|i| (i % 16) as f32).collect();
1083 let vector = Vector::new(values);
1084
1085 let compressed = compressor.compress_multi_level(&vector, 0.1).unwrap();
1086
1087 println!(
1091 "Compressed size: {} bytes, original size: {} bytes",
1092 compressed.len(),
1093 vector.dimensions * 4
1094 );
1095 assert!(compressed.len() < vector.dimensions * 4); assert!(compressed.len() < 900); }
1098
1099 #[test]
1100 fn test_stats_aggregation() {
1101 let vectors = vec![
1102 Vector::new(vec![1.0, 2.0]),
1103 Vector::new(vec![3.0, 4.0]),
1104 Vector::new(vec![5.0, 6.0]),
1105 ];
1106
1107 let stats = VectorStats::from_vectors(&vectors).unwrap();
1108 assert_eq!(stats.dimensions, 2);
1109 assert!(stats.mean > 0.0);
1110 }
1111}