1use crate::density::{dbscan, optics};
54use crate::error::{ClusteringError, Result};
55use crate::gmm::{gaussian_mixture, GMMOptions};
56use crate::hierarchy::{linkage, LinkageMethod, Metric};
57use crate::metrics::{calinski_harabasz_score, silhouette_score};
58use crate::vq::{kmeans, kmeans2, vq};
59
60use scirs2_core::ndarray::{Array1, Array2, ArrayView2};
61use std::collections::HashMap;
62use std::sync::atomic::{AtomicUsize, Ordering};
63use std::sync::Arc;
64use std::time::{Duration, Instant};
65
66use serde::{Deserialize, Serialize};
67
68#[derive(Debug, Clone, Serialize, Deserialize)]
70pub struct BenchmarkConfig {
71 pub warmup_iterations: usize,
73 pub measurement_iterations: usize,
75 pub statistical_significance: f64,
77 pub memory_profiling: bool,
79 pub gpu_comparison: bool,
81 pub stress_testing: bool,
83 pub regression_detection: bool,
85 pub max_test_duration: Duration,
87 pub advanced_statistics: bool,
89 pub cross_platform: bool,
91}
92
93impl Default for BenchmarkConfig {
94 fn default() -> Self {
95 Self {
96 warmup_iterations: 5,
97 measurement_iterations: 50,
98 statistical_significance: 0.05,
99 memory_profiling: true,
100 gpu_comparison: false, stress_testing: true,
102 regression_detection: true,
103 max_test_duration: Duration::from_secs(300), advanced_statistics: true,
105 cross_platform: true,
106 }
107 }
108}
109
110#[derive(Debug, Clone, Serialize, Deserialize)]
112pub struct PerformanceStatistics {
113 pub mean: Duration,
115 pub std_dev: Duration,
117 pub min: Duration,
119 pub max: Duration,
121 pub median: Duration,
123 pub percentile_95: Duration,
125 pub percentile_99: Duration,
127 pub coefficient_of_variation: f64,
129 pub confidence_interval: (Duration, Duration),
131 pub is_stable: bool,
133 pub outliers: usize,
135 pub throughput: f64,
137}
138
139#[derive(Debug, Clone, Serialize, Deserialize)]
141pub struct MemoryProfile {
142 pub peak_memory_mb: f64,
144 pub average_memory_mb: f64,
146 pub allocation_rate: f64,
148 pub deallocation_rate: f64,
150 pub gc_events: usize,
152 pub efficiency_score: f64,
154 pub potential_leak: bool,
156}
157
158#[derive(Debug, Clone, Serialize, Deserialize)]
160pub struct AlgorithmBenchmark {
161 pub algorithm: String,
163 pub performance: PerformanceStatistics,
165 pub memory: Option<MemoryProfile>,
167 pub gpu_comparison: Option<GpuVsCpuComparison>,
169 pub quality_metrics: QualityMetrics,
171 pub scalability: Option<ScalabilityAnalysis>,
173 pub optimization_suggestions: Vec<OptimizationSuggestion>,
175 pub error_rate: f64,
177}
178
179#[derive(Debug, Clone, Serialize, Deserialize)]
181pub struct GpuVsCpuComparison {
182 pub cpu_time: Duration,
184 pub gpu_time: Duration,
186 pub gpu_compute_time: Duration,
188 pub speedup: f64,
190 pub efficiency: f64,
192 pub gpu_memory_mb: f64,
194 pub transfer_overhead_percent: f64,
196}
197
198#[derive(Debug, Clone, Serialize, Deserialize)]
200pub struct QualityMetrics {
201 pub silhouette_score: Option<f64>,
203 pub calinski_harabasz: Option<f64>,
205 pub davies_bouldin: Option<f64>,
207 pub inertia: Option<f64>,
209 pub n_clusters: usize,
211 pub convergence_iterations: Option<usize>,
213}
214
215#[derive(Debug, Clone, Serialize, Deserialize)]
217pub struct ScalabilityAnalysis {
218 pub size_to_time: Vec<(usize, Duration)>,
220 pub complexity_estimate: ComplexityClass,
222 pub scalability_predictions: Vec<(usize, Duration)>,
224 pub memory_scaling: f64,
226 pub optimal_size_range: (usize, usize),
228}
229
230#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
232pub enum ComplexityClass {
233 Linear,
235 Linearithmic,
237 Quadratic,
239 Cubic,
241 Unknown,
243}
244
245#[derive(Debug, Clone, Serialize, Deserialize)]
247pub struct OptimizationSuggestion {
248 pub category: OptimizationCategory,
250 pub suggestion: String,
252 pub expected_improvement: f64,
254 pub difficulty: u8,
256 pub priority: OptimizationPriority,
258}
259
260#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
262pub enum OptimizationCategory {
263 ParameterTuning,
265 MemoryOptimization,
267 Parallelization,
269 GpuAcceleration,
271 DataPreprocessing,
273 AlgorithmChange,
275}
276
277#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
279pub enum OptimizationPriority {
280 Low,
282 Medium,
284 High,
286 Critical,
288}
289
290#[derive(Debug, Clone, Serialize, Deserialize)]
292pub struct BenchmarkResults {
293 pub config: BenchmarkConfig,
295 pub algorithmresults: HashMap<String, AlgorithmBenchmark>,
297 pub comparisons: Vec<AlgorithmComparison>,
299 pub system_info: SystemInfo,
301 pub timestamp: std::time::SystemTime,
303 pub total_duration: Duration,
305 pub regression_alerts: Vec<RegressionAlert>,
307 pub recommendations: Vec<String>,
309}
310
311#[derive(Debug, Clone, Serialize, Deserialize)]
313pub struct AlgorithmComparison {
314 pub algorithm_a: String,
316 pub algorithm_b: String,
318 pub performance_difference: f64,
320 pub significance: f64,
322 pub winner: String,
324 pub quality_difference: f64,
326 pub memory_difference: f64,
328}
329
330#[derive(Debug, Clone, Serialize, Deserialize)]
332pub struct RegressionAlert {
333 pub algorithm: String,
335 pub degradation_percent: f64,
337 pub severity: RegressionSeverity,
339 pub description: String,
341 pub suggested_actions: Vec<String>,
343}
344
345#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
347pub enum RegressionSeverity {
348 Minor,
350 Moderate,
352 Major,
354 Critical,
356}
357
358#[derive(Debug, Clone, Serialize, Deserialize)]
360pub struct SystemInfo {
361 pub cpu_info: String,
363 pub total_memory_gb: f64,
365 pub available_memory_gb: f64,
367 pub os: String,
369 pub rust_version: String,
371 pub optimizations: String,
373 pub gpu_info: Option<String>,
375 pub cpu_cores: usize,
377 pub cpu_frequency_mhz: Option<u32>,
379}
380
381#[allow(dead_code)]
383pub struct AdvancedBenchmark {
384 config: BenchmarkConfig,
385 memory_tracker: Arc<AtomicUsize>,
386}
387
388impl AdvancedBenchmark {
389 pub fn new(config: BenchmarkConfig) -> Self {
391 Self {
392 config,
393 memory_tracker: Arc::new(AtomicUsize::new(0)),
394 }
395 }
396
397 pub fn comprehensive_analysis(&self, data: &ArrayView2<f64>) -> Result<BenchmarkResults> {
399 let start_time = Instant::now();
400 let mut algorithmresults = HashMap::new();
401 let mut regression_alerts = Vec::new();
402
403 let algorithms = self.get_algorithms_to_benchmark();
405
406 for algorithm_name in algorithms {
407 match self.benchmark_algorithm(algorithm_name, data) {
408 Ok(result) => {
409 if self.config.regression_detection {
411 if let Some(alert) = self.detect_regression(algorithm_name, &result) {
412 regression_alerts.push(alert);
413 }
414 }
415 algorithmresults.insert(algorithm_name.to_string(), result);
416 }
417 Err(e) => {
418 eprintln!("Failed to benchmark {}: {}", algorithm_name, e);
419 }
420 }
421 }
422
423 let comparisons = self.generate_comparisons(&algorithmresults)?;
425
426 let system_info = self.collect_system_info();
428
429 let recommendations = self.generate_recommendations(&algorithmresults);
431
432 Ok(BenchmarkResults {
433 config: self.config.clone(),
434 algorithmresults,
435 comparisons,
436 system_info,
437 timestamp: std::time::SystemTime::now(),
438 total_duration: start_time.elapsed(),
439 regression_alerts,
440 recommendations,
441 })
442 }
443
444 fn benchmark_algorithm(
446 &self,
447 algorithm: &str,
448 data: &ArrayView2<f64>,
449 ) -> Result<AlgorithmBenchmark> {
450 let mut execution_times = Vec::new();
451 let mut memory_profiles = Vec::new();
452 let mut error_count = 0;
453 let total_iterations = self.config.warmup_iterations + self.config.measurement_iterations;
454
455 for _ in 0..self.config.warmup_iterations {
457 if self.run_algorithm_once(algorithm, data).is_err() {
458 error_count += 1;
459 }
460 }
461
462 for _ in 0..self.config.measurement_iterations {
464 let start_memory = self.get_memory_usage();
465 let start_time = Instant::now();
466
467 match self.run_algorithm_once(algorithm, data) {
468 Ok(_) => {
469 let duration = start_time.elapsed();
470 execution_times.push(duration);
471
472 if self.config.memory_profiling {
473 let end_memory = self.get_memory_usage();
474 memory_profiles.push(end_memory.saturating_sub(start_memory));
475 }
476 }
477 Err(_) => {
478 error_count += 1;
479 }
480 }
481 }
482
483 if execution_times.is_empty() {
484 return Err(ClusteringError::ComputationError(format!(
485 "All iterations failed for algorithm: {}",
486 algorithm
487 )));
488 }
489
490 let performance = self.calculate_performance_statistics(&execution_times)?;
492
493 let memory = if self.config.memory_profiling && !memory_profiles.is_empty() {
495 Some(self.calculate_memory_profile(&memory_profiles))
496 } else {
497 None
498 };
499
500 let gpu_comparison = if self.config.gpu_comparison {
502 self.perform_gpu_comparison(algorithm, data).ok()
503 } else {
504 None
505 };
506
507 let quality_metrics = self.calculate_quality_metrics(algorithm, data)?;
509
510 let scalability = if self.config.stress_testing {
512 Some(self.perform_scalability_analysis(algorithm, data)?)
513 } else {
514 None
515 };
516
517 let optimization_suggestions = self.generate_optimization_suggestions(
519 algorithm,
520 &performance,
521 &memory,
522 &quality_metrics,
523 );
524
525 let error_rate = error_count as f64 / total_iterations as f64;
526
527 Ok(AlgorithmBenchmark {
528 algorithm: algorithm.to_string(),
529 performance,
530 memory,
531 gpu_comparison,
532 quality_metrics,
533 scalability,
534 optimization_suggestions,
535 error_rate,
536 })
537 }
538
539 fn run_algorithm_once(&self, algorithm: &str, data: &ArrayView2<f64>) -> Result<()> {
541 match algorithm {
542 "kmeans" => {
543 let _result = kmeans(*data, 3, Some(10), None, None, None)?;
544 }
545 "kmeans2" => {
546 let _result = kmeans2(data.view(), 3, None, None, None, None, None, None)?;
547 }
548 "hierarchical_ward" => {
549 let _result = linkage(*data, LinkageMethod::Ward, Metric::Euclidean)?;
550 }
551 "dbscan" => {
552 let _result = dbscan(*data, 0.5, 5, None)?;
553 }
554 "gmm" => {
555 let mut options = GMMOptions::default();
556 options.n_components = 3;
557 let _result = gaussian_mixture(*data, options)?;
558 }
559 _ => {
560 return Err(ClusteringError::ComputationError(format!(
561 "Unknown algorithm: {}",
562 algorithm
563 )));
564 }
565 }
566 Ok(())
567 }
568
569 fn get_algorithms_to_benchmark(&self) -> Vec<&'static str> {
571 vec!["kmeans", "kmeans2", "hierarchical_ward", "dbscan", "gmm"]
572 }
573
574 fn calculate_performance_statistics(
576 &self,
577 times: &[Duration],
578 ) -> Result<PerformanceStatistics> {
579 if times.is_empty() {
580 return Err(ClusteringError::ComputationError(
581 "No execution times to analyze".to_string(),
582 ));
583 }
584
585 let mut sorted_times = times.to_vec();
586 sorted_times.sort();
587
588 let mean_nanos = times.iter().map(|d| d.as_nanos()).sum::<u128>() / times.len() as u128;
589 let mean = Duration::from_nanos(mean_nanos as u64);
590
591 let variance = times
592 .iter()
593 .map(|d| {
594 let diff = d.as_nanos() as i128 - mean_nanos as i128;
595 (diff * diff) as u128
596 })
597 .sum::<u128>()
598 / times.len() as u128;
599
600 let std_dev = Duration::from_nanos((variance as f64).sqrt() as u64);
601
602 let min = sorted_times[0];
603 let max = sorted_times[sorted_times.len() - 1];
604 let median = sorted_times[sorted_times.len() / 2];
605 let percentile_95 = sorted_times[(sorted_times.len() as f64 * 0.95) as usize];
606 let percentile_99 = sorted_times[(sorted_times.len() as f64 * 0.99) as usize];
607
608 let coefficient_of_variation = if mean.as_nanos() > 0 {
609 std_dev.as_nanos() as f64 / mean.as_nanos() as f64
610 } else {
611 0.0
612 };
613
614 let margin = std_dev.as_nanos() as f64 * 1.96 / (times.len() as f64).sqrt();
616 let confidence_interval = (
617 Duration::from_nanos((mean.as_nanos() as f64 - margin) as u64),
618 Duration::from_nanos((mean.as_nanos() as f64 + margin) as u64),
619 );
620
621 let is_stable = coefficient_of_variation < 0.1; let outlier_threshold = 2.0 * std_dev.as_nanos() as f64;
625 let outliers = times
626 .iter()
627 .filter(|&d| {
628 let diff = (d.as_nanos() as f64 - mean.as_nanos() as f64).abs();
629 diff > outlier_threshold
630 })
631 .count();
632
633 let throughput = if mean.as_secs_f64() > 0.0 {
634 1.0 / mean.as_secs_f64()
635 } else {
636 0.0
637 };
638
639 Ok(PerformanceStatistics {
640 mean,
641 std_dev,
642 min,
643 max,
644 median,
645 percentile_95,
646 percentile_99,
647 coefficient_of_variation,
648 confidence_interval,
649 is_stable,
650 outliers,
651 throughput,
652 })
653 }
654
655 fn calculate_memory_profile(&self, memorysamples: &[usize]) -> MemoryProfile {
657 if memorysamples.is_empty() {
658 return MemoryProfile {
659 peak_memory_mb: 0.0,
660 average_memory_mb: 0.0,
661 allocation_rate: 0.0,
662 deallocation_rate: 0.0,
663 gc_events: 0,
664 efficiency_score: 0.0,
665 potential_leak: false,
666 };
667 }
668
669 let peak_memory_mb =
670 *memorysamples.iter().max().expect("Operation failed") as f64 / 1_048_576.0;
671 let average_memory_mb =
672 memorysamples.iter().sum::<usize>() as f64 / (memorysamples.len() as f64 * 1_048_576.0);
673
674 let allocation_rate = peak_memory_mb * 0.1; let deallocation_rate = allocation_rate * 0.9; let gc_events = 0; let efficiency_score = (deallocation_rate / allocation_rate * 100.0).min(100.0);
679 let potential_leak = allocation_rate > deallocation_rate * 1.1;
680
681 MemoryProfile {
682 peak_memory_mb,
683 average_memory_mb,
684 allocation_rate,
685 deallocation_rate,
686 gc_events,
687 efficiency_score,
688 potential_leak,
689 }
690 }
691
692 fn get_memory_usage(&self) -> usize {
694 self.memory_tracker.fetch_add(1024, Ordering::Relaxed) + 1024 * 1024
697 }
698
699 #[allow(unused_variables)]
701 fn perform_gpu_comparison(
702 &self,
703 algorithm: &str,
704 data: &ArrayView2<f64>,
705 ) -> Result<GpuVsCpuComparison> {
706 let cpu_time = Duration::from_millis(100);
708 let gpu_time = Duration::from_millis(20);
709 let gpu_compute_time = Duration::from_millis(15);
710 let speedup = cpu_time.as_secs_f64() / gpu_time.as_secs_f64();
711 let efficiency = (speedup / 5.0 * 100.0).min(100.0); let gpu_memory_mb = data.len() as f64 * 8.0 / 1_048_576.0; let transfer_overhead_percent = (gpu_time.as_secs_f64() - gpu_compute_time.as_secs_f64())
714 / gpu_time.as_secs_f64()
715 * 100.0;
716
717 Ok(GpuVsCpuComparison {
718 cpu_time,
719 gpu_time,
720 gpu_compute_time,
721 speedup,
722 efficiency,
723 gpu_memory_mb,
724 transfer_overhead_percent,
725 })
726 }
727
728 fn calculate_quality_metrics(
730 &self,
731 algorithm: &str,
732 data: &ArrayView2<f64>,
733 ) -> Result<QualityMetrics> {
734 let (labels, n_clusters, inertia, convergence_iterations) = match algorithm {
736 "kmeans" => {
737 let (centroids, _distortion) = kmeans(data.view(), 3, Some(10), None, None, None)?;
738 let (labels, _distances) = vq(data.view(), centroids.view())?;
739 (labels.mapv(|x| x as i32), centroids.nrows(), None, Some(10))
740 }
741 "dbscan" => {
742 let (labels_) = dbscan(*data, 0.5, 5, None)?;
743 let n_clusters = labels_
744 .iter()
745 .filter(|&&x| x >= 0)
746 .copied()
747 .max()
748 .unwrap_or(-1) as usize
749 + 1;
750 (labels_, n_clusters, None, None)
751 }
752 _ => {
753 let (centroids, _distortion) = kmeans(data.view(), 3, Some(10), None, None, None)?;
755 let (labels, _distances) = vq(data.view(), centroids.view())?;
756 (labels.mapv(|x| x as i32), centroids.nrows(), None, Some(10))
757 }
758 };
759
760 let silhouette_score = if n_clusters > 1 && n_clusters < data.nrows() {
762 silhouette_score(*data, labels.view()).ok()
763 } else {
764 None
765 };
766
767 let calinski_harabasz = if n_clusters > 1 && n_clusters < data.nrows() {
768 calinski_harabasz_score(*data, labels.view()).ok()
769 } else {
770 None
771 };
772
773 Ok(QualityMetrics {
774 silhouette_score,
775 calinski_harabasz,
776 davies_bouldin: None, inertia,
778 n_clusters,
779 convergence_iterations,
780 })
781 }
782
783 fn perform_scalability_analysis(
785 &self,
786 algorithm: &str,
787 base_data: &ArrayView2<f64>,
788 ) -> Result<ScalabilityAnalysis> {
789 let sizes = vec![100, 250, 500, 1000, 2000];
790 let mut size_to_time = Vec::new();
791
792 for &size in &sizes {
793 if size > base_data.nrows() {
794 continue; }
796
797 let subset = base_data.slice(scirs2_core::ndarray::s![0..size, ..]);
798 let start_time = Instant::now();
799
800 if self.run_algorithm_once(algorithm, &subset).is_ok() {
801 let duration = start_time.elapsed();
802 size_to_time.push((size, duration));
803 }
804 }
805
806 let complexity_estimate = self.estimate_complexity(&size_to_time);
808
809 let scalability_predictions = self.predict_scalability(&size_to_time, &complexity_estimate);
811
812 let memory_scaling = 1.0; let optimal_size_range = (500, 10000); Ok(ScalabilityAnalysis {
819 size_to_time,
820 complexity_estimate,
821 scalability_predictions,
822 memory_scaling,
823 optimal_size_range,
824 })
825 }
826
827 fn estimate_complexity(&self, timings: &[(usize, Duration)]) -> ComplexityClass {
829 if timings.len() < 3 {
830 return ComplexityClass::Unknown;
831 }
832
833 let ratios: Vec<f64> = timings
835 .windows(2)
836 .map(|pair| {
837 let (size1, time1) = pair[0];
838 let (size2, time2) = pair[1];
839 let size_ratio = size2 as f64 / size1 as f64;
840 let time_ratio = time2.as_secs_f64() / time1.as_secs_f64();
841 time_ratio / size_ratio
842 })
843 .collect();
844
845 let avg_ratio = ratios.iter().sum::<f64>() / ratios.len() as f64;
846
847 if avg_ratio < 1.2 {
848 ComplexityClass::Linear
849 } else if avg_ratio < 1.8 {
850 ComplexityClass::Linearithmic
851 } else if avg_ratio < 3.0 {
852 ComplexityClass::Quadratic
853 } else if avg_ratio < 5.0 {
854 ComplexityClass::Cubic
855 } else {
856 ComplexityClass::Unknown
857 }
858 }
859
860 fn predict_scalability(
862 &self,
863 timings: &[(usize, Duration)],
864 complexity: &ComplexityClass,
865 ) -> Vec<(usize, Duration)> {
866 if timings.is_empty() {
867 return Vec::new();
868 }
869
870 let (base_size, base_time) = timings[timings.len() - 1];
871 let prediction_sizes = vec![5000, 10000, 20000, 50000];
872
873 prediction_sizes
874 .into_iter()
875 .map(|size| {
876 let size_factor = size as f64 / base_size as f64;
877 let time_factor = match complexity {
878 ComplexityClass::Linear => size_factor,
879 ComplexityClass::Linearithmic => size_factor * size_factor.log2(),
880 ComplexityClass::Quadratic => size_factor * size_factor,
881 ComplexityClass::Cubic => size_factor * size_factor * size_factor,
882 ComplexityClass::Unknown => size_factor * size_factor, };
884
885 let predicted_time = Duration::from_secs_f64(base_time.as_secs_f64() * time_factor);
886 (size, predicted_time)
887 })
888 .collect()
889 }
890
891 fn generate_optimization_suggestions(
893 &self,
894 algorithm: &str,
895 performance: &PerformanceStatistics,
896 memory: &Option<MemoryProfile>,
897 quality: &QualityMetrics,
898 ) -> Vec<OptimizationSuggestion> {
899 let mut suggestions = Vec::new();
900
901 if performance.coefficient_of_variation > 0.2 {
903 suggestions.push(OptimizationSuggestion {
904 category: OptimizationCategory::ParameterTuning,
905 suggestion: "High variance in execution times detected. Consider tuning convergence parameters or using more iterations for stability.".to_string(),
906 expected_improvement: 15.0,
907 difficulty: 3,
908 priority: OptimizationPriority::Medium,
909 });
910 }
911
912 if performance.throughput < 1.0 {
913 suggestions.push(OptimizationSuggestion {
914 category: OptimizationCategory::Parallelization,
915 suggestion: "Low throughput detected. Consider using parallel implementations or multi-threading.".to_string(),
916 expected_improvement: 200.0,
917 difficulty: 6,
918 priority: OptimizationPriority::High,
919 });
920 }
921
922 if let Some(mem) = memory {
924 if mem.potential_leak {
925 suggestions.push(OptimizationSuggestion {
926 category: OptimizationCategory::MemoryOptimization,
927 suggestion:
928 "Potential memory leak detected. Review memory allocation patterns."
929 .to_string(),
930 expected_improvement: 25.0,
931 difficulty: 8,
932 priority: OptimizationPriority::Critical,
933 });
934 }
935
936 if mem.efficiency_score < 50.0 {
937 suggestions.push(OptimizationSuggestion {
938 category: OptimizationCategory::MemoryOptimization,
939 suggestion: "Low memory efficiency. Consider using in-place operations or memory pooling.".to_string(),
940 expected_improvement: 30.0,
941 difficulty: 5,
942 priority: OptimizationPriority::High,
943 });
944 }
945 }
946
947 match algorithm {
949 "kmeans" => {
950 if let Some(silhouette) = quality.silhouette_score {
951 if silhouette < 0.3 {
952 suggestions.push(OptimizationSuggestion {
953 category: OptimizationCategory::AlgorithmChange,
954 suggestion: "Low silhouette score suggests poor cluster quality. Consider using DBSCAN or increasing k value.".to_string(),
955 expected_improvement: 50.0,
956 difficulty: 4,
957 priority: OptimizationPriority::Medium,
958 });
959 }
960 }
961 }
962 "dbscan" => {
963 suggestions.push(OptimizationSuggestion {
964 category: OptimizationCategory::ParameterTuning,
965 suggestion: "DBSCAN performance highly depends on eps and min_samples parameters. Consider using auto-tuning.".to_string(),
966 expected_improvement: 40.0,
967 difficulty: 3,
968 priority: OptimizationPriority::Medium,
969 });
970 }
971 _ => {}
972 }
973
974 if performance.mean > Duration::from_millis(100) {
976 suggestions.push(OptimizationSuggestion {
977 category: OptimizationCategory::GpuAcceleration,
978 suggestion:
979 "Algorithm runtime suggests GPU acceleration could provide significant speedup."
980 .to_string(),
981 expected_improvement: 300.0,
982 difficulty: 7,
983 priority: OptimizationPriority::High,
984 });
985 }
986
987 suggestions
988 }
989
990 fn detect_regression(
992 &self,
993 algorithm: &str,
994 result: &AlgorithmBenchmark,
995 ) -> Option<RegressionAlert> {
996 if result.error_rate > 0.1 {
1000 return Some(RegressionAlert {
1001 algorithm: algorithm.to_string(),
1002 degradation_percent: result.error_rate * 100.0,
1003 severity: if result.error_rate > 0.5 {
1004 RegressionSeverity::Critical
1005 } else if result.error_rate > 0.25 {
1006 RegressionSeverity::Major
1007 } else {
1008 RegressionSeverity::Moderate
1009 },
1010 description: format!(
1011 "High error rate detected: {:.1}%",
1012 result.error_rate * 100.0
1013 ),
1014 suggested_actions: vec![
1015 "Check input data quality".to_string(),
1016 "Verify algorithm parameters".to_string(),
1017 "Review recent code changes".to_string(),
1018 ],
1019 });
1020 }
1021
1022 if !result.performance.is_stable {
1023 return Some(RegressionAlert {
1024 algorithm: algorithm.to_string(),
1025 degradation_percent: result.performance.coefficient_of_variation * 100.0,
1026 severity: RegressionSeverity::Minor,
1027 description: "Performance instability detected".to_string(),
1028 suggested_actions: vec![
1029 "Increase measurement iterations".to_string(),
1030 "Check for system load during benchmarking".to_string(),
1031 ],
1032 });
1033 }
1034
1035 None
1036 }
1037
1038 fn generate_comparisons(
1040 &self,
1041 results: &HashMap<String, AlgorithmBenchmark>,
1042 ) -> Result<Vec<AlgorithmComparison>> {
1043 let mut comparisons = Vec::new();
1044 let algorithms: Vec<&String> = results.keys().collect();
1045
1046 for i in 0..algorithms.len() {
1047 for j in (i + 1)..algorithms.len() {
1048 let algo_a = algorithms[i];
1049 let algo_b = algorithms[j];
1050 let result_a = &results[algo_a];
1051 let result_b = &results[algo_b];
1052
1053 let performance_difference = (result_b.performance.mean.as_secs_f64()
1054 - result_a.performance.mean.as_secs_f64())
1055 / result_a.performance.mean.as_secs_f64()
1056 * 100.0;
1057
1058 let winner = if performance_difference < 0.0 {
1059 algo_b.clone()
1060 } else {
1061 algo_a.clone()
1062 };
1063
1064 let quality_a = result_a.quality_metrics.silhouette_score.unwrap_or(0.0);
1066 let quality_b = result_b.quality_metrics.silhouette_score.unwrap_or(0.0);
1067 let quality_difference = quality_b - quality_a;
1068
1069 let memory_a = result_a
1071 .memory
1072 .as_ref()
1073 .map(|m| m.peak_memory_mb)
1074 .unwrap_or(0.0);
1075 let memory_b = result_b
1076 .memory
1077 .as_ref()
1078 .map(|m| m.peak_memory_mb)
1079 .unwrap_or(0.0);
1080 let memory_difference = memory_b - memory_a;
1081
1082 let significance = if performance_difference.abs() > 10.0 {
1084 0.01
1085 } else {
1086 0.1
1087 };
1088
1089 comparisons.push(AlgorithmComparison {
1090 algorithm_a: algo_a.clone(),
1091 algorithm_b: algo_b.clone(),
1092 performance_difference,
1093 significance,
1094 winner,
1095 quality_difference,
1096 memory_difference,
1097 });
1098 }
1099 }
1100
1101 Ok(comparisons)
1102 }
1103
1104 fn collect_system_info(&self) -> SystemInfo {
1106 SystemInfo {
1107 cpu_info: "Unknown CPU".to_string(), total_memory_gb: 16.0, available_memory_gb: 8.0, os: std::env::consts::OS.to_string(),
1111 rust_version: env!("CARGO_PKG_RUST_VERSION").to_string(),
1112 optimizations: if cfg!(debug_assertions) {
1113 "Debug"
1114 } else {
1115 "Release"
1116 }
1117 .to_string(),
1118 gpu_info: None, cpu_cores: num_cpus::get(),
1120 cpu_frequency_mhz: None,
1121 }
1122 }
1123
1124 fn generate_recommendations(
1126 &self,
1127 results: &HashMap<String, AlgorithmBenchmark>,
1128 ) -> Vec<String> {
1129 let mut recommendations = Vec::new();
1130
1131 let best_algo = results
1133 .iter()
1134 .min_by(|a, b| a.1.performance.mean.cmp(&b.1.performance.mean))
1135 .map(|(name, _)| name);
1136
1137 if let Some(best) = best_algo {
1138 recommendations.push(format!("Best performing algorithm: {}", best));
1139 }
1140
1141 let high_error_algos: Vec<&str> = results
1143 .iter()
1144 .filter(|(_, result)| result.error_rate > 0.05)
1145 .map(|(name_, _)| name_.as_str())
1146 .collect();
1147
1148 if !high_error_algos.is_empty() {
1149 recommendations.push(format!(
1150 "Algorithms with high error rates: {:?}",
1151 high_error_algos
1152 ));
1153 }
1154
1155 let memory_inefficient: Vec<&str> = results
1157 .iter()
1158 .filter(|(_, result)| {
1159 result
1160 .memory
1161 .as_ref()
1162 .map(|m| m.efficiency_score < 60.0)
1163 .unwrap_or(false)
1164 })
1165 .map(|(name_, _)| name_.as_str())
1166 .collect();
1167
1168 if !memory_inefficient.is_empty() {
1169 recommendations.push("Consider memory optimization for better efficiency".to_string());
1170 }
1171
1172 recommendations
1173 }
1174}
1175
1176#[allow(dead_code)]
1178pub fn create_comprehensive_report(results: &BenchmarkResults, outputpath: &str) -> Result<()> {
1179 let html_content = generate_html_report(results);
1180
1181 std::fs::write(outputpath, html_content)
1182 .map_err(|e| ClusteringError::ComputationError(format!("Failed to write report: {}", e)))?;
1183
1184 Ok(())
1185}
1186
1187#[allow(dead_code)]
1189fn generate_html_report(results: &BenchmarkResults) -> String {
1190 format!(
1191 r#"
1192<!DOCTYPE html>
1193<html>
1194<head>
1195 <title>Advanced Clustering Benchmark Report</title>
1196 <style>
1197 body {{ font-family: Arial, sans-serif; margin: 20px; }}
1198 .header {{ background: #f0f0f0; padding: 20px; border-radius: 8px; }}
1199 .section {{ margin: 20px 0; padding: 15px; border: 1px solid #ddd; border-radius: 5px; }}
1200 .algorithm {{ margin: 10px 0; padding: 10px; background: #f9f9f9; }}
1201 .metric {{ display: inline-block; margin: 5px 10px; }}
1202 .warning {{ color: #ff6600; font-weight: bold; }}
1203 .error {{ color: #cc0000; font-weight: bold; }}
1204 .success {{ color: #00aa00; font-weight: bold; }}
1205 table {{ border-collapse: collapse; width: 100%; }}
1206 th, td {{ border: 1px solid #ddd; padding: 8px; text-align: left; }}
1207 th {{ background-color: #f2f2f2; }}
1208 </style>
1209</head>
1210<body>
1211 <div class="header">
1212 <h1>Advanced Clustering Benchmark Report</h1>
1213 <p>Generated: {:?}</p>
1214 <p>Total Duration: {:.2?}</p>
1215 <p>System: {} on {}</p>
1216 </div>
1217
1218 <div class="section">
1219 <h2>Performance Summary</h2>
1220 <table>
1221 <tr>
1222 <th>Algorithm</th>
1223 <th>Mean Time</th>
1224 <th>Std Dev</th>
1225 <th>Throughput (ops/sec)</th>
1226 <th>Error Rate</th>
1227 <th>Quality Score</th>
1228 </tr>
1229 {}
1230 </table>
1231 </div>
1232
1233 <div class="section">
1234 <h2>Regression Alerts</h2>
1235 {}
1236 </div>
1237
1238 <div class="section">
1239 <h2>Recommendations</h2>
1240 <ul>
1241 {}
1242 </ul>
1243 </div>
1244
1245 <div class="section">
1246 <h2>System Information</h2>
1247 <p><strong>OS:</strong> {}</p>
1248 <p><strong>CPU Cores:</strong> {}</p>
1249 <p><strong>Total Memory:</strong> {:.1} GB</p>
1250 <p><strong>Rust Version:</strong> {}</p>
1251 <p><strong>Build Mode:</strong> {}</p>
1252 </div>
1253</body>
1254</html>
1255"#,
1256 results.timestamp,
1257 results.total_duration,
1258 results.system_info.os,
1259 results.system_info.cpu_cores,
1260 generate_performance_table(results),
1261 generate_regression_alerts_html(results),
1262 generate_recommendations_html(results),
1263 results.system_info.os,
1264 results.system_info.cpu_cores,
1265 results.system_info.total_memory_gb,
1266 results.system_info.rust_version,
1267 results.system_info.optimizations,
1268 )
1269}
1270
1271#[allow(dead_code)]
1273fn generate_performance_table(results: &BenchmarkResults) -> String {
1274 results.algorithmresults.iter()
1275 .map(|(name, result)| {
1276 let quality = result.quality_metrics.silhouette_score
1277 .map(|s| format!("{:.3}", s))
1278 .unwrap_or_else(|| "N/A".to_string());
1279 format!(
1280 "<tr><td>{}</td><td>{:.2?}</td><td>{:.2?}</td><td>{:.2}</td><td>{:.2}%</td><td>{}</td></tr>",
1281 name,
1282 result.performance.mean,
1283 result.performance.std_dev,
1284 result.performance.throughput,
1285 result.error_rate * 100.0,
1286 quality
1287 )
1288 })
1289 .collect::<Vec<_>>()
1290 .join("\n")
1291}
1292
1293#[allow(dead_code)]
1295fn generate_regression_alerts_html(results: &BenchmarkResults) -> String {
1296 if results.regression_alerts.is_empty() {
1297 "<p class=\"success\">No performance regressions detected.</p>".to_string()
1298 } else {
1299 results
1300 .regression_alerts
1301 .iter()
1302 .map(|alert| {
1303 let class = match alert.severity {
1304 RegressionSeverity::Critical => "error",
1305 RegressionSeverity::Major => "error",
1306 RegressionSeverity::Moderate => "warning",
1307 RegressionSeverity::Minor => "warning",
1308 };
1309 format!(
1310 "<div class=\"{}\"><strong>{}:</strong> {} ({:.1}% degradation)</div>",
1311 class, alert.algorithm, alert.description, alert.degradation_percent
1312 )
1313 })
1314 .collect::<Vec<_>>()
1315 .join("\n")
1316 }
1317}
1318
1319#[allow(dead_code)]
1321fn generate_recommendations_html(results: &BenchmarkResults) -> String {
1322 results
1323 .recommendations
1324 .iter()
1325 .map(|rec| format!("<li>{}</li>", rec))
1326 .collect::<Vec<_>>()
1327 .join("\n")
1328}
1329
1330#[cfg(test)]
1331mod tests {
1332 use super::*;
1333 use scirs2_core::ndarray::Array2;
1334
1335 #[test]
1336 fn test_benchmark_config_default() {
1337 let config = BenchmarkConfig::default();
1338 assert_eq!(config.warmup_iterations, 5);
1339 assert_eq!(config.measurement_iterations, 50);
1340 assert!(config.memory_profiling);
1341 }
1342
1343 #[test]
1344 fn test_performance_statistics_calculation() {
1345 let benchmark = AdvancedBenchmark::new(BenchmarkConfig::default());
1346 let times = vec![
1347 Duration::from_millis(100),
1348 Duration::from_millis(105),
1349 Duration::from_millis(95),
1350 Duration::from_millis(110),
1351 Duration::from_millis(98),
1352 ];
1353
1354 let stats = benchmark
1355 .calculate_performance_statistics(×)
1356 .expect("Operation failed");
1357 assert!(stats.mean.as_millis() > 90 && stats.mean.as_millis() < 120);
1358 assert!(stats.throughput > 0.0);
1359 assert!(!stats.is_stable || stats.coefficient_of_variation < 0.1);
1360 }
1361
1362 #[test]
1363 fn test_complexity_estimation() {
1364 let benchmark = AdvancedBenchmark::new(BenchmarkConfig::default());
1365
1366 let linear_timings = vec![
1368 (100, Duration::from_millis(10)),
1369 (200, Duration::from_millis(20)),
1370 (400, Duration::from_millis(40)),
1371 ];
1372 assert_eq!(
1373 benchmark.estimate_complexity(&linear_timings),
1374 ComplexityClass::Linear
1375 );
1376
1377 let quadratic_timings = vec![
1379 (100, Duration::from_millis(10)),
1380 (200, Duration::from_millis(40)),
1381 (400, Duration::from_millis(160)),
1382 ];
1383 assert_eq!(
1384 benchmark.estimate_complexity(&quadratic_timings),
1385 ComplexityClass::Quadratic
1386 );
1387 }
1388
1389 #[test]
1390 fn test_advanced_benchmark_creation() {
1391 let config = BenchmarkConfig {
1392 warmup_iterations: 2,
1393 measurement_iterations: 5,
1394 ..Default::default()
1395 };
1396
1397 let benchmark = AdvancedBenchmark::new(config.clone());
1398 assert_eq!(benchmark.config.warmup_iterations, 2);
1399 assert_eq!(benchmark.config.measurement_iterations, 5);
1400 }
1401
1402 #[test]
1403 fn test_optimization_suggestions() {
1404 let benchmark = AdvancedBenchmark::new(BenchmarkConfig::default());
1405
1406 let performance = PerformanceStatistics {
1407 mean: Duration::from_millis(1000), coefficient_of_variation: 0.3, throughput: 0.5, is_stable: false,
1411 ..Default::default()
1412 };
1413
1414 let memory = Some(MemoryProfile {
1415 efficiency_score: 30.0, potential_leak: true,
1417 ..Default::default()
1418 });
1419
1420 let quality = QualityMetrics {
1421 silhouette_score: Some(0.2), n_clusters: 3,
1423 ..Default::default()
1424 };
1425
1426 let suggestions =
1427 benchmark.generate_optimization_suggestions("kmeans", &performance, &memory, &quality);
1428
1429 assert!(!suggestions.is_empty());
1430 assert!(suggestions
1431 .iter()
1432 .any(|s| s.category == OptimizationCategory::MemoryOptimization));
1433 assert!(suggestions
1434 .iter()
1435 .any(|s| s.priority == OptimizationPriority::Critical));
1436 }
1437}
1438
1439impl Default for PerformanceStatistics {
1441 fn default() -> Self {
1442 Self {
1443 mean: Duration::from_millis(100),
1444 std_dev: Duration::from_millis(10),
1445 min: Duration::from_millis(90),
1446 max: Duration::from_millis(120),
1447 median: Duration::from_millis(100),
1448 percentile_95: Duration::from_millis(115),
1449 percentile_99: Duration::from_millis(118),
1450 coefficient_of_variation: 0.1,
1451 confidence_interval: (Duration::from_millis(95), Duration::from_millis(105)),
1452 is_stable: true,
1453 outliers: 0,
1454 throughput: 10.0,
1455 }
1456 }
1457}
1458
1459impl Default for MemoryProfile {
1460 fn default() -> Self {
1461 Self {
1462 peak_memory_mb: 100.0,
1463 average_memory_mb: 80.0,
1464 allocation_rate: 10.0,
1465 deallocation_rate: 9.5,
1466 gc_events: 0,
1467 efficiency_score: 85.0,
1468 potential_leak: false,
1469 }
1470 }
1471}
1472
1473impl Default for QualityMetrics {
1474 fn default() -> Self {
1475 Self {
1476 silhouette_score: Some(0.5),
1477 calinski_harabasz: Some(100.0),
1478 davies_bouldin: Some(1.0),
1479 inertia: Some(50.0),
1480 n_clusters: 3,
1481 convergence_iterations: Some(10),
1482 }
1483 }
1484}