1use crate::density::{dbscan, optics};
54use crate::error::{ClusteringError, Result};
55use crate::gmm::{gaussian_mixture, GMMOptions};
56use crate::hierarchy::{linkage, LinkageMethod, Metric};
57use crate::metrics::{calinski_harabasz_score, silhouette_score};
58use crate::vq::{kmeans, kmeans2, vq};
59
60use scirs2_core::ndarray::{Array1, Array2, ArrayView2};
61use std::collections::HashMap;
62use std::sync::atomic::{AtomicUsize, Ordering};
63use std::sync::Arc;
64use std::time::{Duration, Instant};
65
66use serde::{Deserialize, Serialize};
67
68#[derive(Debug, Clone, Serialize, Deserialize)]
70pub struct BenchmarkConfig {
71 pub warmup_iterations: usize,
73 pub measurement_iterations: usize,
75 pub statistical_significance: f64,
77 pub memory_profiling: bool,
79 pub gpu_comparison: bool,
81 pub stress_testing: bool,
83 pub regression_detection: bool,
85 pub max_test_duration: Duration,
87 pub advanced_statistics: bool,
89 pub cross_platform: bool,
91}
92
93impl Default for BenchmarkConfig {
94 fn default() -> Self {
95 Self {
96 warmup_iterations: 5,
97 measurement_iterations: 50,
98 statistical_significance: 0.05,
99 memory_profiling: true,
100 gpu_comparison: false, stress_testing: true,
102 regression_detection: true,
103 max_test_duration: Duration::from_secs(300), advanced_statistics: true,
105 cross_platform: true,
106 }
107 }
108}
109
110#[derive(Debug, Clone, Serialize, Deserialize)]
112pub struct PerformanceStatistics {
113 pub mean: Duration,
115 pub std_dev: Duration,
117 pub min: Duration,
119 pub max: Duration,
121 pub median: Duration,
123 pub percentile_95: Duration,
125 pub percentile_99: Duration,
127 pub coefficient_of_variation: f64,
129 pub confidence_interval: (Duration, Duration),
131 pub is_stable: bool,
133 pub outliers: usize,
135 pub throughput: f64,
137}
138
139#[derive(Debug, Clone, Serialize, Deserialize)]
141pub struct MemoryProfile {
142 pub peak_memory_mb: f64,
144 pub average_memory_mb: f64,
146 pub allocation_rate: f64,
148 pub deallocation_rate: f64,
150 pub gc_events: usize,
152 pub efficiency_score: f64,
154 pub potential_leak: bool,
156}
157
158#[derive(Debug, Clone, Serialize, Deserialize)]
160pub struct AlgorithmBenchmark {
161 pub algorithm: String,
163 pub performance: PerformanceStatistics,
165 pub memory: Option<MemoryProfile>,
167 pub gpu_comparison: Option<GpuVsCpuComparison>,
169 pub quality_metrics: QualityMetrics,
171 pub scalability: Option<ScalabilityAnalysis>,
173 pub optimization_suggestions: Vec<OptimizationSuggestion>,
175 pub error_rate: f64,
177}
178
179#[derive(Debug, Clone, Serialize, Deserialize)]
181pub struct GpuVsCpuComparison {
182 pub cpu_time: Duration,
184 pub gpu_time: Duration,
186 pub gpu_compute_time: Duration,
188 pub speedup: f64,
190 pub efficiency: f64,
192 pub gpu_memory_mb: f64,
194 pub transfer_overhead_percent: f64,
196}
197
198#[derive(Debug, Clone, Serialize, Deserialize)]
200pub struct QualityMetrics {
201 pub silhouette_score: Option<f64>,
203 pub calinski_harabasz: Option<f64>,
205 pub davies_bouldin: Option<f64>,
207 pub inertia: Option<f64>,
209 pub n_clusters: usize,
211 pub convergence_iterations: Option<usize>,
213}
214
215#[derive(Debug, Clone, Serialize, Deserialize)]
217pub struct ScalabilityAnalysis {
218 pub size_to_time: Vec<(usize, Duration)>,
220 pub complexity_estimate: ComplexityClass,
222 pub scalability_predictions: Vec<(usize, Duration)>,
224 pub memory_scaling: f64,
226 pub optimal_size_range: (usize, usize),
228}
229
230#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
232pub enum ComplexityClass {
233 Linear,
235 Linearithmic,
237 Quadratic,
239 Cubic,
241 Unknown,
243}
244
245#[derive(Debug, Clone, Serialize, Deserialize)]
247pub struct OptimizationSuggestion {
248 pub category: OptimizationCategory,
250 pub suggestion: String,
252 pub expected_improvement: f64,
254 pub difficulty: u8,
256 pub priority: OptimizationPriority,
258}
259
260#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
262pub enum OptimizationCategory {
263 ParameterTuning,
265 MemoryOptimization,
267 Parallelization,
269 GpuAcceleration,
271 DataPreprocessing,
273 AlgorithmChange,
275}
276
277#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
279pub enum OptimizationPriority {
280 Low,
282 Medium,
284 High,
286 Critical,
288}
289
290#[derive(Debug, Clone, Serialize, Deserialize)]
292pub struct BenchmarkResults {
293 pub config: BenchmarkConfig,
295 pub algorithmresults: HashMap<String, AlgorithmBenchmark>,
297 pub comparisons: Vec<AlgorithmComparison>,
299 pub system_info: SystemInfo,
301 pub timestamp: std::time::SystemTime,
303 pub total_duration: Duration,
305 pub regression_alerts: Vec<RegressionAlert>,
307 pub recommendations: Vec<String>,
309}
310
311#[derive(Debug, Clone, Serialize, Deserialize)]
313pub struct AlgorithmComparison {
314 pub algorithm_a: String,
316 pub algorithm_b: String,
318 pub performance_difference: f64,
320 pub significance: f64,
322 pub winner: String,
324 pub quality_difference: f64,
326 pub memory_difference: f64,
328}
329
330#[derive(Debug, Clone, Serialize, Deserialize)]
332pub struct RegressionAlert {
333 pub algorithm: String,
335 pub degradation_percent: f64,
337 pub severity: RegressionSeverity,
339 pub description: String,
341 pub suggested_actions: Vec<String>,
343}
344
345#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
347pub enum RegressionSeverity {
348 Minor,
350 Moderate,
352 Major,
354 Critical,
356}
357
358#[derive(Debug, Clone, Serialize, Deserialize)]
360pub struct SystemInfo {
361 pub cpu_info: String,
363 pub total_memory_gb: f64,
365 pub available_memory_gb: f64,
367 pub os: String,
369 pub rust_version: String,
371 pub optimizations: String,
373 pub gpu_info: Option<String>,
375 pub cpu_cores: usize,
377 pub cpu_frequency_mhz: Option<u32>,
379}
380
381#[allow(dead_code)]
383pub struct AdvancedBenchmark {
384 config: BenchmarkConfig,
385 memory_tracker: Arc<AtomicUsize>,
386}
387
388impl AdvancedBenchmark {
389 pub fn new(config: BenchmarkConfig) -> Self {
391 Self {
392 config,
393 memory_tracker: Arc::new(AtomicUsize::new(0)),
394 }
395 }
396
397 pub fn comprehensive_analysis(&self, data: &ArrayView2<f64>) -> Result<BenchmarkResults> {
399 let start_time = Instant::now();
400 let mut algorithmresults = HashMap::new();
401 let mut regression_alerts = Vec::new();
402
403 let algorithms = self.get_algorithms_to_benchmark();
405
406 for algorithm_name in algorithms {
407 match self.benchmark_algorithm(algorithm_name, data) {
408 Ok(result) => {
409 if self.config.regression_detection {
411 if let Some(alert) = self.detect_regression(algorithm_name, &result) {
412 regression_alerts.push(alert);
413 }
414 }
415 algorithmresults.insert(algorithm_name.to_string(), result);
416 }
417 Err(e) => {
418 eprintln!("Failed to benchmark {}: {}", algorithm_name, e);
419 }
420 }
421 }
422
423 let comparisons = self.generate_comparisons(&algorithmresults)?;
425
426 let system_info = self.collect_system_info();
428
429 let recommendations = self.generate_recommendations(&algorithmresults);
431
432 Ok(BenchmarkResults {
433 config: self.config.clone(),
434 algorithmresults,
435 comparisons,
436 system_info,
437 timestamp: std::time::SystemTime::now(),
438 total_duration: start_time.elapsed(),
439 regression_alerts,
440 recommendations,
441 })
442 }
443
444 fn benchmark_algorithm(
446 &self,
447 algorithm: &str,
448 data: &ArrayView2<f64>,
449 ) -> Result<AlgorithmBenchmark> {
450 let mut execution_times = Vec::new();
451 let mut memory_profiles = Vec::new();
452 let mut error_count = 0;
453 let total_iterations = self.config.warmup_iterations + self.config.measurement_iterations;
454
455 for _ in 0..self.config.warmup_iterations {
457 if self.run_algorithm_once(algorithm, data).is_err() {
458 error_count += 1;
459 }
460 }
461
462 for _ in 0..self.config.measurement_iterations {
464 let start_memory = self.get_memory_usage();
465 let start_time = Instant::now();
466
467 match self.run_algorithm_once(algorithm, data) {
468 Ok(_) => {
469 let duration = start_time.elapsed();
470 execution_times.push(duration);
471
472 if self.config.memory_profiling {
473 let end_memory = self.get_memory_usage();
474 memory_profiles.push(end_memory.saturating_sub(start_memory));
475 }
476 }
477 Err(_) => {
478 error_count += 1;
479 }
480 }
481 }
482
483 if execution_times.is_empty() {
484 return Err(ClusteringError::ComputationError(format!(
485 "All iterations failed for algorithm: {}",
486 algorithm
487 )));
488 }
489
490 let performance = self.calculate_performance_statistics(&execution_times)?;
492
493 let memory = if self.config.memory_profiling && !memory_profiles.is_empty() {
495 Some(self.calculate_memory_profile(&memory_profiles))
496 } else {
497 None
498 };
499
500 let gpu_comparison = if self.config.gpu_comparison {
502 self.perform_gpu_comparison(algorithm, data).ok()
503 } else {
504 None
505 };
506
507 let quality_metrics = self.calculate_quality_metrics(algorithm, data)?;
509
510 let scalability = if self.config.stress_testing {
512 Some(self.perform_scalability_analysis(algorithm, data)?)
513 } else {
514 None
515 };
516
517 let optimization_suggestions = self.generate_optimization_suggestions(
519 algorithm,
520 &performance,
521 &memory,
522 &quality_metrics,
523 );
524
525 let error_rate = error_count as f64 / total_iterations as f64;
526
527 Ok(AlgorithmBenchmark {
528 algorithm: algorithm.to_string(),
529 performance,
530 memory,
531 gpu_comparison,
532 quality_metrics,
533 scalability,
534 optimization_suggestions,
535 error_rate,
536 })
537 }
538
539 fn run_algorithm_once(&self, algorithm: &str, data: &ArrayView2<f64>) -> Result<()> {
541 match algorithm {
542 "kmeans" => {
543 let _result = kmeans(*data, 3, Some(10), None, None, None)?;
544 }
545 "kmeans2" => {
546 let _result = kmeans2(data.view(), 3, None, None, None, None, None, None)?;
547 }
548 "hierarchical_ward" => {
549 let _result = linkage(*data, LinkageMethod::Ward, Metric::Euclidean)?;
550 }
551 "dbscan" => {
552 let _result = dbscan(*data, 0.5, 5, None)?;
553 }
554 "gmm" => {
555 let mut options = GMMOptions::default();
556 options.n_components = 3;
557 let _result = gaussian_mixture(*data, options)?;
558 }
559 _ => {
560 return Err(ClusteringError::ComputationError(format!(
561 "Unknown algorithm: {}",
562 algorithm
563 )));
564 }
565 }
566 Ok(())
567 }
568
569 fn get_algorithms_to_benchmark(&self) -> Vec<&'static str> {
571 vec!["kmeans", "kmeans2", "hierarchical_ward", "dbscan", "gmm"]
572 }
573
574 fn calculate_performance_statistics(
576 &self,
577 times: &[Duration],
578 ) -> Result<PerformanceStatistics> {
579 if times.is_empty() {
580 return Err(ClusteringError::ComputationError(
581 "No execution times to analyze".to_string(),
582 ));
583 }
584
585 let mut sorted_times = times.to_vec();
586 sorted_times.sort();
587
588 let mean_nanos = times.iter().map(|d| d.as_nanos()).sum::<u128>() / times.len() as u128;
589 let mean = Duration::from_nanos(mean_nanos as u64);
590
591 let variance = times
592 .iter()
593 .map(|d| {
594 let diff = d.as_nanos() as i128 - mean_nanos as i128;
595 (diff * diff) as u128
596 })
597 .sum::<u128>()
598 / times.len() as u128;
599
600 let std_dev = Duration::from_nanos((variance as f64).sqrt() as u64);
601
602 let min = sorted_times[0];
603 let max = sorted_times[sorted_times.len() - 1];
604 let median = sorted_times[sorted_times.len() / 2];
605 let percentile_95 = sorted_times[(sorted_times.len() as f64 * 0.95) as usize];
606 let percentile_99 = sorted_times[(sorted_times.len() as f64 * 0.99) as usize];
607
608 let coefficient_of_variation = if mean.as_nanos() > 0 {
609 std_dev.as_nanos() as f64 / mean.as_nanos() as f64
610 } else {
611 0.0
612 };
613
614 let margin = std_dev.as_nanos() as f64 * 1.96 / (times.len() as f64).sqrt();
616 let confidence_interval = (
617 Duration::from_nanos((mean.as_nanos() as f64 - margin) as u64),
618 Duration::from_nanos((mean.as_nanos() as f64 + margin) as u64),
619 );
620
621 let is_stable = coefficient_of_variation < 0.1; let outlier_threshold = 2.0 * std_dev.as_nanos() as f64;
625 let outliers = times
626 .iter()
627 .filter(|&d| {
628 let diff = (d.as_nanos() as f64 - mean.as_nanos() as f64).abs();
629 diff > outlier_threshold
630 })
631 .count();
632
633 let throughput = if mean.as_secs_f64() > 0.0 {
634 1.0 / mean.as_secs_f64()
635 } else {
636 0.0
637 };
638
639 Ok(PerformanceStatistics {
640 mean,
641 std_dev,
642 min,
643 max,
644 median,
645 percentile_95,
646 percentile_99,
647 coefficient_of_variation,
648 confidence_interval,
649 is_stable,
650 outliers,
651 throughput,
652 })
653 }
654
655 fn calculate_memory_profile(&self, memorysamples: &[usize]) -> MemoryProfile {
657 if memorysamples.is_empty() {
658 return MemoryProfile {
659 peak_memory_mb: 0.0,
660 average_memory_mb: 0.0,
661 allocation_rate: 0.0,
662 deallocation_rate: 0.0,
663 gc_events: 0,
664 efficiency_score: 0.0,
665 potential_leak: false,
666 };
667 }
668
669 let peak_memory_mb = *memorysamples.iter().max().unwrap() as f64 / 1_048_576.0;
670 let average_memory_mb =
671 memorysamples.iter().sum::<usize>() as f64 / (memorysamples.len() as f64 * 1_048_576.0);
672
673 let allocation_rate = peak_memory_mb * 0.1; let deallocation_rate = allocation_rate * 0.9; let gc_events = 0; let efficiency_score = (deallocation_rate / allocation_rate * 100.0).min(100.0);
678 let potential_leak = allocation_rate > deallocation_rate * 1.1;
679
680 MemoryProfile {
681 peak_memory_mb,
682 average_memory_mb,
683 allocation_rate,
684 deallocation_rate,
685 gc_events,
686 efficiency_score,
687 potential_leak,
688 }
689 }
690
691 fn get_memory_usage(&self) -> usize {
693 self.memory_tracker.fetch_add(1024, Ordering::Relaxed) + 1024 * 1024
696 }
697
698 #[allow(unused_variables)]
700 fn perform_gpu_comparison(
701 &self,
702 algorithm: &str,
703 data: &ArrayView2<f64>,
704 ) -> Result<GpuVsCpuComparison> {
705 let cpu_time = Duration::from_millis(100);
707 let gpu_time = Duration::from_millis(20);
708 let gpu_compute_time = Duration::from_millis(15);
709 let speedup = cpu_time.as_secs_f64() / gpu_time.as_secs_f64();
710 let efficiency = (speedup / 5.0 * 100.0).min(100.0); let gpu_memory_mb = data.len() as f64 * 8.0 / 1_048_576.0; let transfer_overhead_percent = (gpu_time.as_secs_f64() - gpu_compute_time.as_secs_f64())
713 / gpu_time.as_secs_f64()
714 * 100.0;
715
716 Ok(GpuVsCpuComparison {
717 cpu_time,
718 gpu_time,
719 gpu_compute_time,
720 speedup,
721 efficiency,
722 gpu_memory_mb,
723 transfer_overhead_percent,
724 })
725 }
726
727 fn calculate_quality_metrics(
729 &self,
730 algorithm: &str,
731 data: &ArrayView2<f64>,
732 ) -> Result<QualityMetrics> {
733 let (labels, n_clusters, inertia, convergence_iterations) = match algorithm {
735 "kmeans" => {
736 let (centroids, _distortion) = kmeans(data.view(), 3, Some(10), None, None, None)?;
737 let (labels, _distances) = vq(data.view(), centroids.view())?;
738 (labels.mapv(|x| x as i32), centroids.nrows(), None, Some(10))
739 }
740 "dbscan" => {
741 let (labels_) = dbscan(*data, 0.5, 5, None)?;
742 let n_clusters = labels_
743 .iter()
744 .filter(|&&x| x >= 0)
745 .copied()
746 .max()
747 .unwrap_or(-1) as usize
748 + 1;
749 (labels_, n_clusters, None, None)
750 }
751 _ => {
752 let (centroids, _distortion) = kmeans(data.view(), 3, Some(10), None, None, None)?;
754 let (labels, _distances) = vq(data.view(), centroids.view())?;
755 (labels.mapv(|x| x as i32), centroids.nrows(), None, Some(10))
756 }
757 };
758
759 let silhouette_score = if n_clusters > 1 && n_clusters < data.nrows() {
761 silhouette_score(*data, labels.view()).ok()
762 } else {
763 None
764 };
765
766 let calinski_harabasz = if n_clusters > 1 && n_clusters < data.nrows() {
767 calinski_harabasz_score(*data, labels.view()).ok()
768 } else {
769 None
770 };
771
772 Ok(QualityMetrics {
773 silhouette_score,
774 calinski_harabasz,
775 davies_bouldin: None, inertia,
777 n_clusters,
778 convergence_iterations,
779 })
780 }
781
782 fn perform_scalability_analysis(
784 &self,
785 algorithm: &str,
786 base_data: &ArrayView2<f64>,
787 ) -> Result<ScalabilityAnalysis> {
788 let sizes = vec![100, 250, 500, 1000, 2000];
789 let mut size_to_time = Vec::new();
790
791 for &size in &sizes {
792 if size > base_data.nrows() {
793 continue; }
795
796 let subset = base_data.slice(scirs2_core::ndarray::s![0..size, ..]);
797 let start_time = Instant::now();
798
799 if self.run_algorithm_once(algorithm, &subset).is_ok() {
800 let duration = start_time.elapsed();
801 size_to_time.push((size, duration));
802 }
803 }
804
805 let complexity_estimate = self.estimate_complexity(&size_to_time);
807
808 let scalability_predictions = self.predict_scalability(&size_to_time, &complexity_estimate);
810
811 let memory_scaling = 1.0; let optimal_size_range = (500, 10000); Ok(ScalabilityAnalysis {
818 size_to_time,
819 complexity_estimate,
820 scalability_predictions,
821 memory_scaling,
822 optimal_size_range,
823 })
824 }
825
826 fn estimate_complexity(&self, timings: &[(usize, Duration)]) -> ComplexityClass {
828 if timings.len() < 3 {
829 return ComplexityClass::Unknown;
830 }
831
832 let ratios: Vec<f64> = timings
834 .windows(2)
835 .map(|pair| {
836 let (size1, time1) = pair[0];
837 let (size2, time2) = pair[1];
838 let size_ratio = size2 as f64 / size1 as f64;
839 let time_ratio = time2.as_secs_f64() / time1.as_secs_f64();
840 time_ratio / size_ratio
841 })
842 .collect();
843
844 let avg_ratio = ratios.iter().sum::<f64>() / ratios.len() as f64;
845
846 if avg_ratio < 1.2 {
847 ComplexityClass::Linear
848 } else if avg_ratio < 1.8 {
849 ComplexityClass::Linearithmic
850 } else if avg_ratio < 3.0 {
851 ComplexityClass::Quadratic
852 } else if avg_ratio < 5.0 {
853 ComplexityClass::Cubic
854 } else {
855 ComplexityClass::Unknown
856 }
857 }
858
859 fn predict_scalability(
861 &self,
862 timings: &[(usize, Duration)],
863 complexity: &ComplexityClass,
864 ) -> Vec<(usize, Duration)> {
865 if timings.is_empty() {
866 return Vec::new();
867 }
868
869 let (base_size, base_time) = timings[timings.len() - 1];
870 let prediction_sizes = vec![5000, 10000, 20000, 50000];
871
872 prediction_sizes
873 .into_iter()
874 .map(|size| {
875 let size_factor = size as f64 / base_size as f64;
876 let time_factor = match complexity {
877 ComplexityClass::Linear => size_factor,
878 ComplexityClass::Linearithmic => size_factor * size_factor.log2(),
879 ComplexityClass::Quadratic => size_factor * size_factor,
880 ComplexityClass::Cubic => size_factor * size_factor * size_factor,
881 ComplexityClass::Unknown => size_factor * size_factor, };
883
884 let predicted_time = Duration::from_secs_f64(base_time.as_secs_f64() * time_factor);
885 (size, predicted_time)
886 })
887 .collect()
888 }
889
890 fn generate_optimization_suggestions(
892 &self,
893 algorithm: &str,
894 performance: &PerformanceStatistics,
895 memory: &Option<MemoryProfile>,
896 quality: &QualityMetrics,
897 ) -> Vec<OptimizationSuggestion> {
898 let mut suggestions = Vec::new();
899
900 if performance.coefficient_of_variation > 0.2 {
902 suggestions.push(OptimizationSuggestion {
903 category: OptimizationCategory::ParameterTuning,
904 suggestion: "High variance in execution times detected. Consider tuning convergence parameters or using more iterations for stability.".to_string(),
905 expected_improvement: 15.0,
906 difficulty: 3,
907 priority: OptimizationPriority::Medium,
908 });
909 }
910
911 if performance.throughput < 1.0 {
912 suggestions.push(OptimizationSuggestion {
913 category: OptimizationCategory::Parallelization,
914 suggestion: "Low throughput detected. Consider using parallel implementations or multi-threading.".to_string(),
915 expected_improvement: 200.0,
916 difficulty: 6,
917 priority: OptimizationPriority::High,
918 });
919 }
920
921 if let Some(mem) = memory {
923 if mem.potential_leak {
924 suggestions.push(OptimizationSuggestion {
925 category: OptimizationCategory::MemoryOptimization,
926 suggestion:
927 "Potential memory leak detected. Review memory allocation patterns."
928 .to_string(),
929 expected_improvement: 25.0,
930 difficulty: 8,
931 priority: OptimizationPriority::Critical,
932 });
933 }
934
935 if mem.efficiency_score < 50.0 {
936 suggestions.push(OptimizationSuggestion {
937 category: OptimizationCategory::MemoryOptimization,
938 suggestion: "Low memory efficiency. Consider using in-place operations or memory pooling.".to_string(),
939 expected_improvement: 30.0,
940 difficulty: 5,
941 priority: OptimizationPriority::High,
942 });
943 }
944 }
945
946 match algorithm {
948 "kmeans" => {
949 if let Some(silhouette) = quality.silhouette_score {
950 if silhouette < 0.3 {
951 suggestions.push(OptimizationSuggestion {
952 category: OptimizationCategory::AlgorithmChange,
953 suggestion: "Low silhouette score suggests poor cluster quality. Consider using DBSCAN or increasing k value.".to_string(),
954 expected_improvement: 50.0,
955 difficulty: 4,
956 priority: OptimizationPriority::Medium,
957 });
958 }
959 }
960 }
961 "dbscan" => {
962 suggestions.push(OptimizationSuggestion {
963 category: OptimizationCategory::ParameterTuning,
964 suggestion: "DBSCAN performance highly depends on eps and min_samples parameters. Consider using auto-tuning.".to_string(),
965 expected_improvement: 40.0,
966 difficulty: 3,
967 priority: OptimizationPriority::Medium,
968 });
969 }
970 _ => {}
971 }
972
973 if performance.mean > Duration::from_millis(100) {
975 suggestions.push(OptimizationSuggestion {
976 category: OptimizationCategory::GpuAcceleration,
977 suggestion:
978 "Algorithm runtime suggests GPU acceleration could provide significant speedup."
979 .to_string(),
980 expected_improvement: 300.0,
981 difficulty: 7,
982 priority: OptimizationPriority::High,
983 });
984 }
985
986 suggestions
987 }
988
989 fn detect_regression(
991 &self,
992 algorithm: &str,
993 result: &AlgorithmBenchmark,
994 ) -> Option<RegressionAlert> {
995 if result.error_rate > 0.1 {
999 return Some(RegressionAlert {
1000 algorithm: algorithm.to_string(),
1001 degradation_percent: result.error_rate * 100.0,
1002 severity: if result.error_rate > 0.5 {
1003 RegressionSeverity::Critical
1004 } else if result.error_rate > 0.25 {
1005 RegressionSeverity::Major
1006 } else {
1007 RegressionSeverity::Moderate
1008 },
1009 description: format!(
1010 "High error rate detected: {:.1}%",
1011 result.error_rate * 100.0
1012 ),
1013 suggested_actions: vec![
1014 "Check input data quality".to_string(),
1015 "Verify algorithm parameters".to_string(),
1016 "Review recent code changes".to_string(),
1017 ],
1018 });
1019 }
1020
1021 if !result.performance.is_stable {
1022 return Some(RegressionAlert {
1023 algorithm: algorithm.to_string(),
1024 degradation_percent: result.performance.coefficient_of_variation * 100.0,
1025 severity: RegressionSeverity::Minor,
1026 description: "Performance instability detected".to_string(),
1027 suggested_actions: vec![
1028 "Increase measurement iterations".to_string(),
1029 "Check for system load during benchmarking".to_string(),
1030 ],
1031 });
1032 }
1033
1034 None
1035 }
1036
1037 fn generate_comparisons(
1039 &self,
1040 results: &HashMap<String, AlgorithmBenchmark>,
1041 ) -> Result<Vec<AlgorithmComparison>> {
1042 let mut comparisons = Vec::new();
1043 let algorithms: Vec<&String> = results.keys().collect();
1044
1045 for i in 0..algorithms.len() {
1046 for j in (i + 1)..algorithms.len() {
1047 let algo_a = algorithms[i];
1048 let algo_b = algorithms[j];
1049 let result_a = &results[algo_a];
1050 let result_b = &results[algo_b];
1051
1052 let performance_difference = (result_b.performance.mean.as_secs_f64()
1053 - result_a.performance.mean.as_secs_f64())
1054 / result_a.performance.mean.as_secs_f64()
1055 * 100.0;
1056
1057 let winner = if performance_difference < 0.0 {
1058 algo_b.clone()
1059 } else {
1060 algo_a.clone()
1061 };
1062
1063 let quality_a = result_a.quality_metrics.silhouette_score.unwrap_or(0.0);
1065 let quality_b = result_b.quality_metrics.silhouette_score.unwrap_or(0.0);
1066 let quality_difference = quality_b - quality_a;
1067
1068 let memory_a = result_a
1070 .memory
1071 .as_ref()
1072 .map(|m| m.peak_memory_mb)
1073 .unwrap_or(0.0);
1074 let memory_b = result_b
1075 .memory
1076 .as_ref()
1077 .map(|m| m.peak_memory_mb)
1078 .unwrap_or(0.0);
1079 let memory_difference = memory_b - memory_a;
1080
1081 let significance = if performance_difference.abs() > 10.0 {
1083 0.01
1084 } else {
1085 0.1
1086 };
1087
1088 comparisons.push(AlgorithmComparison {
1089 algorithm_a: algo_a.clone(),
1090 algorithm_b: algo_b.clone(),
1091 performance_difference,
1092 significance,
1093 winner,
1094 quality_difference,
1095 memory_difference,
1096 });
1097 }
1098 }
1099
1100 Ok(comparisons)
1101 }
1102
1103 fn collect_system_info(&self) -> SystemInfo {
1105 SystemInfo {
1106 cpu_info: "Unknown CPU".to_string(), total_memory_gb: 16.0, available_memory_gb: 8.0, os: std::env::consts::OS.to_string(),
1110 rust_version: env!("CARGO_PKG_RUST_VERSION").to_string(),
1111 optimizations: if cfg!(debug_assertions) {
1112 "Debug"
1113 } else {
1114 "Release"
1115 }
1116 .to_string(),
1117 gpu_info: None, cpu_cores: num_cpus::get(),
1119 cpu_frequency_mhz: None,
1120 }
1121 }
1122
1123 fn generate_recommendations(
1125 &self,
1126 results: &HashMap<String, AlgorithmBenchmark>,
1127 ) -> Vec<String> {
1128 let mut recommendations = Vec::new();
1129
1130 let best_algo = results
1132 .iter()
1133 .min_by(|a, b| a.1.performance.mean.cmp(&b.1.performance.mean))
1134 .map(|(name, _)| name);
1135
1136 if let Some(best) = best_algo {
1137 recommendations.push(format!("Best performing algorithm: {}", best));
1138 }
1139
1140 let high_error_algos: Vec<&str> = results
1142 .iter()
1143 .filter(|(_, result)| result.error_rate > 0.05)
1144 .map(|(name_, _)| name_.as_str())
1145 .collect();
1146
1147 if !high_error_algos.is_empty() {
1148 recommendations.push(format!(
1149 "Algorithms with high error rates: {:?}",
1150 high_error_algos
1151 ));
1152 }
1153
1154 let memory_inefficient: Vec<&str> = results
1156 .iter()
1157 .filter(|(_, result)| {
1158 result
1159 .memory
1160 .as_ref()
1161 .map(|m| m.efficiency_score < 60.0)
1162 .unwrap_or(false)
1163 })
1164 .map(|(name_, _)| name_.as_str())
1165 .collect();
1166
1167 if !memory_inefficient.is_empty() {
1168 recommendations.push("Consider memory optimization for better efficiency".to_string());
1169 }
1170
1171 recommendations
1172 }
1173}
1174
1175#[allow(dead_code)]
1177pub fn create_comprehensive_report(results: &BenchmarkResults, outputpath: &str) -> Result<()> {
1178 let html_content = generate_html_report(results);
1179
1180 std::fs::write(outputpath, html_content)
1181 .map_err(|e| ClusteringError::ComputationError(format!("Failed to write report: {}", e)))?;
1182
1183 Ok(())
1184}
1185
1186#[allow(dead_code)]
1188fn generate_html_report(results: &BenchmarkResults) -> String {
1189 format!(
1190 r#"
1191<!DOCTYPE html>
1192<html>
1193<head>
1194 <title>Advanced Clustering Benchmark Report</title>
1195 <style>
1196 body {{ font-family: Arial, sans-serif; margin: 20px; }}
1197 .header {{ background: #f0f0f0; padding: 20px; border-radius: 8px; }}
1198 .section {{ margin: 20px 0; padding: 15px; border: 1px solid #ddd; border-radius: 5px; }}
1199 .algorithm {{ margin: 10px 0; padding: 10px; background: #f9f9f9; }}
1200 .metric {{ display: inline-block; margin: 5px 10px; }}
1201 .warning {{ color: #ff6600; font-weight: bold; }}
1202 .error {{ color: #cc0000; font-weight: bold; }}
1203 .success {{ color: #00aa00; font-weight: bold; }}
1204 table {{ border-collapse: collapse; width: 100%; }}
1205 th, td {{ border: 1px solid #ddd; padding: 8px; text-align: left; }}
1206 th {{ background-color: #f2f2f2; }}
1207 </style>
1208</head>
1209<body>
1210 <div class="header">
1211 <h1>Advanced Clustering Benchmark Report</h1>
1212 <p>Generated: {:?}</p>
1213 <p>Total Duration: {:.2?}</p>
1214 <p>System: {} on {}</p>
1215 </div>
1216
1217 <div class="section">
1218 <h2>Performance Summary</h2>
1219 <table>
1220 <tr>
1221 <th>Algorithm</th>
1222 <th>Mean Time</th>
1223 <th>Std Dev</th>
1224 <th>Throughput (ops/sec)</th>
1225 <th>Error Rate</th>
1226 <th>Quality Score</th>
1227 </tr>
1228 {}
1229 </table>
1230 </div>
1231
1232 <div class="section">
1233 <h2>Regression Alerts</h2>
1234 {}
1235 </div>
1236
1237 <div class="section">
1238 <h2>Recommendations</h2>
1239 <ul>
1240 {}
1241 </ul>
1242 </div>
1243
1244 <div class="section">
1245 <h2>System Information</h2>
1246 <p><strong>OS:</strong> {}</p>
1247 <p><strong>CPU Cores:</strong> {}</p>
1248 <p><strong>Total Memory:</strong> {:.1} GB</p>
1249 <p><strong>Rust Version:</strong> {}</p>
1250 <p><strong>Build Mode:</strong> {}</p>
1251 </div>
1252</body>
1253</html>
1254"#,
1255 results.timestamp,
1256 results.total_duration,
1257 results.system_info.os,
1258 results.system_info.cpu_cores,
1259 generate_performance_table(results),
1260 generate_regression_alerts_html(results),
1261 generate_recommendations_html(results),
1262 results.system_info.os,
1263 results.system_info.cpu_cores,
1264 results.system_info.total_memory_gb,
1265 results.system_info.rust_version,
1266 results.system_info.optimizations,
1267 )
1268}
1269
1270#[allow(dead_code)]
1272fn generate_performance_table(results: &BenchmarkResults) -> String {
1273 results.algorithmresults.iter()
1274 .map(|(name, result)| {
1275 let quality = result.quality_metrics.silhouette_score
1276 .map(|s| format!("{:.3}", s))
1277 .unwrap_or_else(|| "N/A".to_string());
1278 format!(
1279 "<tr><td>{}</td><td>{:.2?}</td><td>{:.2?}</td><td>{:.2}</td><td>{:.2}%</td><td>{}</td></tr>",
1280 name,
1281 result.performance.mean,
1282 result.performance.std_dev,
1283 result.performance.throughput,
1284 result.error_rate * 100.0,
1285 quality
1286 )
1287 })
1288 .collect::<Vec<_>>()
1289 .join("\n")
1290}
1291
1292#[allow(dead_code)]
1294fn generate_regression_alerts_html(results: &BenchmarkResults) -> String {
1295 if results.regression_alerts.is_empty() {
1296 "<p class=\"success\">No performance regressions detected.</p>".to_string()
1297 } else {
1298 results
1299 .regression_alerts
1300 .iter()
1301 .map(|alert| {
1302 let class = match alert.severity {
1303 RegressionSeverity::Critical => "error",
1304 RegressionSeverity::Major => "error",
1305 RegressionSeverity::Moderate => "warning",
1306 RegressionSeverity::Minor => "warning",
1307 };
1308 format!(
1309 "<div class=\"{}\"><strong>{}:</strong> {} ({:.1}% degradation)</div>",
1310 class, alert.algorithm, alert.description, alert.degradation_percent
1311 )
1312 })
1313 .collect::<Vec<_>>()
1314 .join("\n")
1315 }
1316}
1317
1318#[allow(dead_code)]
1320fn generate_recommendations_html(results: &BenchmarkResults) -> String {
1321 results
1322 .recommendations
1323 .iter()
1324 .map(|rec| format!("<li>{}</li>", rec))
1325 .collect::<Vec<_>>()
1326 .join("\n")
1327}
1328
1329#[cfg(test)]
1330mod tests {
1331 use super::*;
1332 use scirs2_core::ndarray::Array2;
1333
1334 #[test]
1335 #[ignore = "timeout"]
1336 fn test_benchmark_config_default() {
1337 let config = BenchmarkConfig::default();
1338 assert_eq!(config.warmup_iterations, 5);
1339 assert_eq!(config.measurement_iterations, 50);
1340 assert!(config.memory_profiling);
1341 }
1342
1343 #[test]
1344 fn test_performance_statistics_calculation() {
1345 let benchmark = AdvancedBenchmark::new(BenchmarkConfig::default());
1346 let times = vec![
1347 Duration::from_millis(100),
1348 Duration::from_millis(105),
1349 Duration::from_millis(95),
1350 Duration::from_millis(110),
1351 Duration::from_millis(98),
1352 ];
1353
1354 let stats = benchmark.calculate_performance_statistics(×).unwrap();
1355 assert!(stats.mean.as_millis() > 90 && stats.mean.as_millis() < 120);
1356 assert!(stats.throughput > 0.0);
1357 assert!(!stats.is_stable || stats.coefficient_of_variation < 0.1);
1358 }
1359
1360 #[test]
1361 fn test_complexity_estimation() {
1362 let benchmark = AdvancedBenchmark::new(BenchmarkConfig::default());
1363
1364 let linear_timings = vec![
1366 (100, Duration::from_millis(10)),
1367 (200, Duration::from_millis(20)),
1368 (400, Duration::from_millis(40)),
1369 ];
1370 assert_eq!(
1371 benchmark.estimate_complexity(&linear_timings),
1372 ComplexityClass::Linear
1373 );
1374
1375 let quadratic_timings = vec![
1377 (100, Duration::from_millis(10)),
1378 (200, Duration::from_millis(40)),
1379 (400, Duration::from_millis(160)),
1380 ];
1381 assert_eq!(
1382 benchmark.estimate_complexity(&quadratic_timings),
1383 ComplexityClass::Quadratic
1384 );
1385 }
1386
1387 #[test]
1388 #[ignore = "timeout"]
1389 fn test_advanced_benchmark_creation() {
1390 let config = BenchmarkConfig {
1391 warmup_iterations: 2,
1392 measurement_iterations: 5,
1393 ..Default::default()
1394 };
1395
1396 let benchmark = AdvancedBenchmark::new(config.clone());
1397 assert_eq!(benchmark.config.warmup_iterations, 2);
1398 assert_eq!(benchmark.config.measurement_iterations, 5);
1399 }
1400
1401 #[test]
1402 fn test_optimization_suggestions() {
1403 let benchmark = AdvancedBenchmark::new(BenchmarkConfig::default());
1404
1405 let performance = PerformanceStatistics {
1406 mean: Duration::from_millis(1000), coefficient_of_variation: 0.3, throughput: 0.5, is_stable: false,
1410 ..Default::default()
1411 };
1412
1413 let memory = Some(MemoryProfile {
1414 efficiency_score: 30.0, potential_leak: true,
1416 ..Default::default()
1417 });
1418
1419 let quality = QualityMetrics {
1420 silhouette_score: Some(0.2), n_clusters: 3,
1422 ..Default::default()
1423 };
1424
1425 let suggestions =
1426 benchmark.generate_optimization_suggestions("kmeans", &performance, &memory, &quality);
1427
1428 assert!(!suggestions.is_empty());
1429 assert!(suggestions
1430 .iter()
1431 .any(|s| s.category == OptimizationCategory::MemoryOptimization));
1432 assert!(suggestions
1433 .iter()
1434 .any(|s| s.priority == OptimizationPriority::Critical));
1435 }
1436}
1437
1438impl Default for PerformanceStatistics {
1440 fn default() -> Self {
1441 Self {
1442 mean: Duration::from_millis(100),
1443 std_dev: Duration::from_millis(10),
1444 min: Duration::from_millis(90),
1445 max: Duration::from_millis(120),
1446 median: Duration::from_millis(100),
1447 percentile_95: Duration::from_millis(115),
1448 percentile_99: Duration::from_millis(118),
1449 coefficient_of_variation: 0.1,
1450 confidence_interval: (Duration::from_millis(95), Duration::from_millis(105)),
1451 is_stable: true,
1452 outliers: 0,
1453 throughput: 10.0,
1454 }
1455 }
1456}
1457
1458impl Default for MemoryProfile {
1459 fn default() -> Self {
1460 Self {
1461 peak_memory_mb: 100.0,
1462 average_memory_mb: 80.0,
1463 allocation_rate: 10.0,
1464 deallocation_rate: 9.5,
1465 gc_events: 0,
1466 efficiency_score: 85.0,
1467 potential_leak: false,
1468 }
1469 }
1470}
1471
1472impl Default for QualityMetrics {
1473 fn default() -> Self {
1474 Self {
1475 silhouette_score: Some(0.5),
1476 calinski_harabasz: Some(100.0),
1477 davies_bouldin: Some(1.0),
1478 inertia: Some(50.0),
1479 n_clusters: 3,
1480 convergence_iterations: Some(10),
1481 }
1482 }
1483}