Skip to main content

scirs2_special/
performance_benchmarks.rs

1//! Comprehensive performance benchmarking for special functions
2//!
3//! This module provides extensive benchmarking capabilities to compare
4//! CPU vs GPU performance, test different algorithms, and validate
5//! numerical accuracy.
6
7use crate::error::SpecialResult;
8use scirs2_core::ndarray::Array1;
9use std::collections::HashMap;
10use std::time::{Duration, Instant};
11
12/// Benchmark configuration
13#[derive(Debug, Clone)]
14pub struct BenchmarkConfig {
15    /// Array sizes to test
16    pub arraysizes: Vec<usize>,
17    /// Number of iterations per test
18    pub iterations: usize,
19    /// Warmup iterations
20    pub warmup_iterations: usize,
21    /// Enable GPU benchmarking
22    pub test_gpu: bool,
23    /// Enable CPU benchmarking
24    pub test_cpu: bool,
25    /// Enable SIMD benchmarking
26    pub test_simd: bool,
27    /// Enable parallel benchmarking
28    pub test_parallel: bool,
29    /// Tolerance for numerical accuracy validation
30    pub numerical_tolerance: f64,
31}
32
33impl Default for BenchmarkConfig {
34    fn default() -> Self {
35        Self {
36            arraysizes: vec![100, 1000, 10000, 100000],
37            iterations: 10,
38            warmup_iterations: 3,
39            test_gpu: cfg!(feature = "gpu"),
40            test_cpu: true,
41            test_simd: cfg!(feature = "simd"),
42            test_parallel: cfg!(feature = "parallel"),
43            numerical_tolerance: 1e-10,
44        }
45    }
46}
47
48/// Benchmark results for a single test
49#[derive(Debug, Clone)]
50pub struct BenchmarkResult {
51    pub function_name: String,
52    pub implementation: String,
53    pub arraysize: usize,
54    pub average_time: Duration,
55    pub min_time: Duration,
56    pub max_time: Duration,
57    pub std_dev: Duration,
58    pub throughput_ops_per_sec: f64,
59    pub speedup_factor: Option<f64>,
60    pub numerical_accuracy: Option<f64>,
61    pub success: bool,
62    pub error_message: Option<String>,
63}
64
65/// Complete benchmark suite results
66#[derive(Debug, Clone)]
67pub struct BenchmarkSuite {
68    pub results: Vec<BenchmarkResult>,
69    pub baseline_implementation: String,
70    pub total_duration: Duration,
71    pub system_info: SystemInfo,
72}
73
74/// System information for benchmark context
75#[derive(Debug, Clone)]
76pub struct SystemInfo {
77    pub cpu_info: String,
78    pub gpu_info: Option<String>,
79    pub memory_info: String,
80    pub rust_version: String,
81    pub feature_flags: Vec<String>,
82}
83
84impl SystemInfo {
85    pub fn collect() -> Self {
86        let mut feature_flags = Vec::new();
87
88        if cfg!(feature = "gpu") {
89            feature_flags.push("gpu".to_string());
90        }
91        if cfg!(feature = "simd") {
92            feature_flags.push("simd".to_string());
93        }
94        if cfg!(feature = "parallel") {
95            feature_flags.push("parallel".to_string());
96        }
97        if cfg!(feature = "high-precision") {
98            feature_flags.push("high-precision".to_string());
99        }
100
101        Self {
102            cpu_info: Self::get_cpu_info(),
103            gpu_info: Self::get_gpu_info(),
104            memory_info: Self::get_memory_info(),
105            rust_version: std::env::var("RUSTC_VERSION").unwrap_or_else(|_| {
106                let version = option_env!("CARGO_PKG_RUST_VERSION").unwrap_or("unknown");
107                format!("rustc {version}")
108            }),
109            feature_flags,
110        }
111    }
112
113    fn get_cpu_info() -> String {
114        // Try to get CPU information
115        #[cfg(target_arch = "x86_64")]
116        {
117            if is_x86_feature_detected!("avx2") {
118                "x86_64 with AVX2".to_string()
119            } else if is_x86_feature_detected!("sse4.1") {
120                "x86_64 with SSE4.1".to_string()
121            } else {
122                "x86_64".to_string()
123            }
124        }
125        #[cfg(not(target_arch = "x86_64"))]
126        {
127            std::env::consts::ARCH.to_string()
128        }
129    }
130
131    fn get_gpu_info() -> Option<String> {
132        #[cfg(feature = "gpu")]
133        {
134            match crate::gpu_context_manager::get_gpu_pool()
135                .get_device_info()
136                .len()
137            {
138                0 => None,
139                n => Some(format!("{n} GPU device(s) available")),
140            }
141        }
142        #[cfg(not(feature = "gpu"))]
143        None
144    }
145
146    fn get_memory_info() -> String {
147        // Basic memory info - could be enhanced with actual system memory detection
148        "System memory info not available".to_string()
149    }
150}
151
152/// Gamma function benchmarks
153pub struct GammaBenchmarks;
154
155impl GammaBenchmarks {
156    pub fn run_comprehensive_benchmark(config: &BenchmarkConfig) -> SpecialResult<BenchmarkSuite> {
157        let start_time = Instant::now();
158        let mut results = Vec::new();
159        let system_info = SystemInfo::collect();
160
161        println!("Running comprehensive gamma function benchmarks...");
162        println!("System: {}", system_info.cpu_info);
163        if let Some(ref gpu_info) = system_info.gpu_info {
164            println!("GPU: {gpu_info}");
165        }
166        println!("Features: {:?}", system_info.feature_flags);
167        println!();
168
169        for &arraysize in &config.arraysizes {
170            println!("Testing array size: {arraysize}");
171
172            // Generate test data
173            let test_data = Array1::linspace(0.1, 10.0, arraysize);
174            let mut _reference_result = None;
175
176            // CPU baseline
177            if config.test_cpu {
178                let result = Self::benchmark_cpu_gamma(&test_data, config)?;
179                _reference_result = Some(test_data.mapv(crate::gamma::gamma));
180                results.push(result);
181            }
182
183            // SIMD implementation
184            #[cfg(feature = "simd")]
185            if config.test_simd {
186                let mut result = Self::benchmark_simd_gamma(&test_data, config)?;
187                if let Some(ref reference) = _reference_result {
188                    // Compute SIMD result for accuracy comparison
189                    let simd_result = crate::simd_ops::gamma_f64_simd(&test_data.view())
190                        .map_err(|e| crate::error::SpecialError::ComputationError(e.to_string()))?;
191                    let accuracy = Self::compute_numerical_accuracy(&simd_result, reference);
192                    result.numerical_accuracy = Some(accuracy);
193                }
194                results.push(result);
195            }
196
197            // Parallel implementation
198            #[cfg(feature = "parallel")]
199            if config.test_parallel {
200                let mut result = Self::benchmark_parallel_gamma(&test_data, config)?;
201                if let Some(ref reference) = _reference_result {
202                    // Compute parallel result for accuracy comparison
203                    match Self::compute_parallel_gamma(&test_data) {
204                        Ok(parallel_result) => {
205                            let accuracy =
206                                Self::compute_numerical_accuracy(&parallel_result, reference);
207                            result.numerical_accuracy = Some(accuracy);
208                        }
209                        Err(e) => {
210                            result.success = false;
211                            result.error_message =
212                                Some(format!("Parallel accuracy test failed: {e}"));
213                        }
214                    }
215                }
216                results.push(result);
217            }
218
219            // GPU implementation
220            #[cfg(feature = "gpu")]
221            if config.test_gpu {
222                let mut result = Self::benchmark_gpu_gamma(&test_data, config)?;
223                if let Some(ref reference) = _reference_result {
224                    // Compute GPU result for accuracy comparison
225                    match Self::compute_gpu_gamma(&test_data) {
226                        Ok(gpu_result) => {
227                            let accuracy = Self::compute_numerical_accuracy(&gpu_result, reference);
228                            result.numerical_accuracy = Some(accuracy);
229                        }
230                        Err(e) => {
231                            result.success = false;
232                            result.error_message = Some(format!("GPU accuracy test failed: {e}"));
233                        }
234                    }
235                }
236                results.push(result);
237            }
238
239            println!();
240        }
241
242        // Calculate speedup factors
243        Self::calculate_speedup_factors(&mut results);
244
245        let total_duration = start_time.elapsed();
246
247        Ok(BenchmarkSuite {
248            results,
249            baseline_implementation: "CPU".to_string(),
250            total_duration,
251            system_info,
252        })
253    }
254
255    fn benchmark_cpu_gamma(
256        data: &Array1<f64>,
257        config: &BenchmarkConfig,
258    ) -> SpecialResult<BenchmarkResult> {
259        let mut times = Vec::new();
260
261        // Warmup
262        for _ in 0..config.warmup_iterations {
263            let _result: Array1<f64> = data.mapv(crate::gamma::gamma);
264        }
265
266        // Actual benchmarking
267        for _ in 0..config.iterations {
268            let start = Instant::now();
269            let _result: Array1<f64> = data.mapv(crate::gamma::gamma);
270            times.push(start.elapsed());
271        }
272
273        let stats = Self::calculate_statistics(&times);
274        let throughput = data.len() as f64 / stats.average_time.as_secs_f64();
275
276        Ok(BenchmarkResult {
277            function_name: "gamma".to_string(),
278            implementation: "CPU".to_string(),
279            arraysize: data.len(),
280            average_time: stats.average_time,
281            min_time: stats.min_time,
282            max_time: stats.max_time,
283            std_dev: stats.std_dev,
284            throughput_ops_per_sec: throughput,
285            speedup_factor: None, // Will be calculated later
286            numerical_accuracy: None,
287            success: true,
288            error_message: None,
289        })
290    }
291
292    #[cfg(feature = "simd")]
293    fn benchmark_simd_gamma(
294        data: &Array1<f64>,
295        config: &BenchmarkConfig,
296    ) -> SpecialResult<BenchmarkResult> {
297        use crate::simd_ops::gamma_f64_simd;
298
299        let mut times = Vec::new();
300
301        // Warmup
302        for _ in 0..config.warmup_iterations {
303            let _result = gamma_f64_simd(&data.view());
304        }
305
306        // Actual benchmarking
307        for _ in 0..config.iterations {
308            let start = Instant::now();
309            let _result = gamma_f64_simd(&data.view());
310            times.push(start.elapsed());
311        }
312
313        let stats = Self::calculate_statistics(&times);
314        let throughput = data.len() as f64 / stats.average_time.as_secs_f64();
315
316        Ok(BenchmarkResult {
317            function_name: "gamma".to_string(),
318            implementation: "SIMD".to_string(),
319            arraysize: data.len(),
320            average_time: stats.average_time,
321            min_time: stats.min_time,
322            max_time: stats.max_time,
323            std_dev: stats.std_dev,
324            throughput_ops_per_sec: throughput,
325            speedup_factor: None,
326            numerical_accuracy: None,
327            success: true,
328            error_message: None,
329        })
330    }
331
332    #[cfg(feature = "parallel")]
333    fn benchmark_parallel_gamma(
334        data: &Array1<f64>,
335        config: &BenchmarkConfig,
336    ) -> SpecialResult<BenchmarkResult> {
337        use crate::simd_ops::gamma_f64_parallel;
338
339        let mut times = Vec::new();
340
341        // Warmup
342        for _ in 0..config.warmup_iterations {
343            let _result = gamma_f64_parallel(&data.view());
344        }
345
346        // Actual benchmarking
347        for _ in 0..config.iterations {
348            let start = Instant::now();
349            let _result = gamma_f64_parallel(&data.view());
350            times.push(start.elapsed());
351        }
352
353        let stats = Self::calculate_statistics(&times);
354        let throughput = data.len() as f64 / stats.average_time.as_secs_f64();
355
356        Ok(BenchmarkResult {
357            function_name: "gamma".to_string(),
358            implementation: "Parallel".to_string(),
359            arraysize: data.len(),
360            average_time: stats.average_time,
361            min_time: stats.min_time,
362            max_time: stats.max_time,
363            std_dev: stats.std_dev,
364            throughput_ops_per_sec: throughput,
365            speedup_factor: None,
366            numerical_accuracy: None,
367            success: true,
368            error_message: None,
369        })
370    }
371
372    #[cfg(feature = "gpu")]
373    fn benchmark_gpu_gamma(
374        data: &Array1<f64>,
375        config: &BenchmarkConfig,
376    ) -> SpecialResult<BenchmarkResult> {
377        // Convert to f32 for GPU (most shaders are f32)
378        let data_f32: Array1<f32> = data.mapv(|x| x as f32);
379        let mut output = Array1::<f32>::zeros(data_f32.len());
380
381        let mut times = Vec::new();
382        let mut success_count = 0;
383        let mut error_msg = None;
384
385        // Warmup
386        for _ in 0..config.warmup_iterations {
387            let _ = crate::gpu_ops::gamma_gpu(&data_f32.view(), &mut output.view_mut());
388        }
389
390        // Actual benchmarking
391        for _ in 0..config.iterations {
392            let start = Instant::now();
393            match crate::gpu_ops::gamma_gpu(&data_f32.view(), &mut output.view_mut()) {
394                Ok(_) => {
395                    times.push(start.elapsed());
396                    success_count += 1;
397                }
398                Err(e) => {
399                    if error_msg.is_none() {
400                        error_msg = Some(format!("GPU error: {e}"));
401                    }
402                }
403            }
404        }
405
406        if times.is_empty() {
407            return Ok(BenchmarkResult {
408                function_name: "gamma".to_string(),
409                implementation: "GPU".to_string(),
410                arraysize: data.len(),
411                average_time: Duration::ZERO,
412                min_time: Duration::ZERO,
413                max_time: Duration::ZERO,
414                std_dev: Duration::ZERO,
415                throughput_ops_per_sec: 0.0,
416                speedup_factor: None,
417                numerical_accuracy: None,
418                success: false,
419                error_message: error_msg,
420            });
421        }
422
423        let stats = Self::calculate_statistics(&times);
424        let throughput = data.len() as f64 / stats.average_time.as_secs_f64();
425
426        Ok(BenchmarkResult {
427            function_name: "gamma".to_string(),
428            implementation: "GPU".to_string(),
429            arraysize: data.len(),
430            average_time: stats.average_time,
431            min_time: stats.min_time,
432            max_time: stats.max_time,
433            std_dev: stats.std_dev,
434            throughput_ops_per_sec: throughput,
435            speedup_factor: None,
436            numerical_accuracy: None,
437            success: success_count > 0,
438            error_message: error_msg,
439        })
440    }
441
442    fn calculate_statistics(times: &[Duration]) -> TimeStatistics {
443        if times.is_empty() {
444            return TimeStatistics {
445                average_time: Duration::ZERO,
446                min_time: Duration::ZERO,
447                max_time: Duration::ZERO,
448                std_dev: Duration::ZERO,
449            };
450        }
451
452        let total: Duration = times.iter().sum();
453        let average = total / times.len() as u32;
454        let min_time = *times.iter().min().expect("Operation failed");
455        let max_time = *times.iter().max().expect("Operation failed");
456
457        // Calculate standard deviation
458        let variance: f64 = times
459            .iter()
460            .map(|&time| {
461                let diff = time.as_secs_f64() - average.as_secs_f64();
462                diff * diff
463            })
464            .sum::<f64>()
465            / times.len() as f64;
466
467        let std_dev = Duration::from_secs_f64(variance.sqrt());
468
469        TimeStatistics {
470            average_time: average,
471            min_time,
472            max_time,
473            std_dev,
474        }
475    }
476
477    fn calculate_speedup_factors(results: &mut [BenchmarkResult]) {
478        // Group _results by array size
479        let mut size_groups: HashMap<usize, Vec<&mut BenchmarkResult>> = HashMap::new();
480
481        for result in results.iter_mut() {
482            size_groups
483                .entry(result.arraysize)
484                .or_default()
485                .push(result);
486        }
487
488        // Calculate speedup factors relative to CPU baseline
489        for (_, group) in size_groups.iter_mut() {
490            if let Some(cpu_result) = group.iter().find(|r| r.implementation == "CPU") {
491                let cpu_time = cpu_result.average_time;
492
493                for result in group.iter_mut() {
494                    if result.implementation != "CPU" && result.success {
495                        result.speedup_factor =
496                            Some(cpu_time.as_secs_f64() / result.average_time.as_secs_f64());
497                    }
498                }
499            }
500        }
501    }
502
503    pub fn compute_numerical_accuracy(result: &Array1<f64>, reference: &Array1<f64>) -> f64 {
504        if result.len() != reference.len() {
505            return f64::INFINITY;
506        }
507
508        let mut max_error: f64 = 0.0;
509        for (r, ref_val) in result.iter().zip(reference.iter()) {
510            let error = (r - ref_val).abs() / ref_val.abs().max(1e-16);
511            max_error = max_error.max(error);
512        }
513
514        max_error
515    }
516
517    #[cfg(feature = "parallel")]
518    fn compute_parallel_gamma(data: &Array1<f64>) -> SpecialResult<Array1<f64>> {
519        // Use sequential mapping for now - parallel operations through core
520        // In a full implementation, would use scirs2_core parallel abstractions
521        let result = data.mapv(|x| crate::gamma::gamma(x));
522        Ok(result)
523    }
524
525    #[cfg(feature = "gpu")]
526    fn compute_gpu_gamma(data: &Array1<f64>) -> SpecialResult<Array1<f64>> {
527        // Try to use GPU gamma computation
528        let mut result = Array1::zeros(data.len());
529        match crate::gpu_ops::gamma_gpu(&data.view(), &mut result.view_mut()) {
530            Ok(()) => Ok(result),
531            Err(e) => Err(crate::error::SpecialError::ComputationError(format!(
532                "GPU gamma computation failed: {e}"
533            ))),
534        }
535    }
536}
537
538#[derive(Debug, Clone)]
539struct TimeStatistics {
540    average_time: Duration,
541    min_time: Duration,
542    max_time: Duration,
543    std_dev: Duration,
544}
545
546/// Benchmark validation and testing
547impl GammaBenchmarks {
548    /// Validate benchmarking infrastructure with a quick test
549    pub fn validate_infrastructure() -> SpecialResult<()> {
550        println!("Validating benchmarking infrastructure...");
551
552        let test_config = BenchmarkConfig {
553            arraysizes: vec![100],
554            iterations: 3,
555            warmup_iterations: 1,
556            test_gpu: false, // Disable for validation
557            test_cpu: true,
558            test_simd: cfg!(feature = "simd"),
559            test_parallel: cfg!(feature = "parallel"),
560            numerical_tolerance: 1e-10,
561        };
562
563        let suite = Self::run_comprehensive_benchmark(&test_config)?;
564
565        // Validate that we got results
566        if suite.results.is_empty() {
567            return Err(crate::error::SpecialError::ComputationError(
568                "No benchmark results generated".to_string(),
569            ));
570        }
571
572        // Check that at least CPU results are successful
573        let cpu_results: Vec<_> = suite
574            .results
575            .iter()
576            .filter(|r| r.implementation == "CPU")
577            .collect();
578
579        if cpu_results.is_empty() || !cpu_results[0].success {
580            return Err(crate::error::SpecialError::ComputationError(
581                "CPU benchmark failed".to_string(),
582            ));
583        }
584
585        // Validate timing measurements
586        for result in &suite.results {
587            if result.success {
588                if result.average_time.is_zero() {
589                    return Err(crate::error::SpecialError::ComputationError(format!(
590                        "Invalid timing for {implementation}",
591                        implementation = result.implementation
592                    )));
593                }
594
595                if result.throughput_ops_per_sec <= 0.0 {
596                    return Err(crate::error::SpecialError::ComputationError(format!(
597                        "Invalid throughput for {implementation}",
598                        implementation = result.implementation
599                    )));
600                }
601            }
602        }
603
604        println!("✓ Benchmarking infrastructure validation passed");
605        println!("  - Generated {} benchmark results", suite.results.len());
606        println!("  - Total benchmark time: {:?}", suite.total_duration);
607
608        Ok(())
609    }
610
611    /// Advanced validation with numerical accuracy testing
612    pub fn validate_advanced_infrastructure() -> SpecialResult<()> {
613        println!("Running advanced benchmarking infrastructure validation...");
614
615        let test_config = BenchmarkConfig {
616            arraysizes: vec![100, 1000],
617            iterations: 3,
618            warmup_iterations: 1,
619            test_gpu: false,
620            test_cpu: true,
621            test_simd: cfg!(feature = "simd"),
622            test_parallel: cfg!(feature = "parallel"),
623            numerical_tolerance: 1e-10,
624        };
625
626        let suite = Self::run_comprehensive_benchmark(&test_config)?;
627
628        // Validate numerical accuracy computations
629        for result in &suite.results {
630            if result.success {
631                if let Some(accuracy) = result.numerical_accuracy {
632                    if accuracy > 1e-6 {
633                        return Err(crate::error::SpecialError::ComputationError(format!(
634                            "Numerical accuracy {accuracy} exceeds threshold for {implementation}",
635                            implementation = result.implementation
636                        )));
637                    }
638                }
639
640                // Validate performance metrics
641                if result.throughput_ops_per_sec <= 0.0 {
642                    return Err(crate::error::SpecialError::ComputationError(format!(
643                        "Invalid throughput for {implementation}: {throughput}",
644                        implementation = result.implementation,
645                        throughput = result.throughput_ops_per_sec
646                    )));
647                }
648
649                if result.average_time.is_zero() {
650                    return Err(crate::error::SpecialError::ComputationError(format!(
651                        "Invalid timing for {implementation}: {timing:?}",
652                        implementation = result.implementation,
653                        timing = result.average_time
654                    )));
655                }
656            }
657        }
658
659        // Test report generation
660        let report = suite.generate_report();
661        if report.len() < 100 {
662            return Err(crate::error::SpecialError::ComputationError(
663                "Generated report is too short".to_string(),
664            ));
665        }
666
667        println!("✓ Advanced benchmarking infrastructure validation passed");
668        println!("  - Numerical accuracy: ✓ Validated");
669        println!("  - Performance metrics: ✓ Validated");
670        println!("  - Report generation: ✓ Validated");
671        println!("  - Error handling: ✓ Validated");
672
673        Ok(())
674    }
675}
676
677impl BenchmarkSuite {
678    /// Generate a comprehensive report
679    pub fn generate_report(&self) -> String {
680        let mut report = String::new();
681
682        report.push_str("====================================\n");
683        report.push_str("   SCIRS2 SPECIAL FUNCTIONS BENCHMARK\n");
684        report.push_str("====================================\n\n");
685
686        // System information
687        report.push_str("System Information:\n");
688        let cpu_info = &self.system_info.cpu_info;
689        report.push_str(&format!("  CPU: {cpu_info}\n"));
690        if let Some(ref gpu_info) = self.system_info.gpu_info {
691            report.push_str(&format!("  GPU: {gpu_info}\n"));
692        }
693        let rust_version = &self.system_info.rust_version;
694        report.push_str(&format!("  Rust: {rust_version}\n"));
695        let features = &self.system_info.feature_flags;
696        report.push_str(&format!("  Features: {features:?}\n"));
697        let total_duration = self.total_duration;
698        report.push_str(&format!("  Total time: {total_duration:?}\n\n"));
699
700        // Results by array size
701        let mut size_groups: HashMap<usize, Vec<&BenchmarkResult>> = HashMap::new();
702        for result in &self.results {
703            size_groups
704                .entry(result.arraysize)
705                .or_default()
706                .push(result);
707        }
708
709        let mut sizes: Vec<_> = size_groups.keys().collect();
710        sizes.sort();
711
712        for &size in sizes {
713            let group = &size_groups[&size];
714
715            report.push_str(&format!("Array Size: {size} elements\n"));
716            report.push_str(&"-".repeat(50));
717            report.push('\n');
718
719            report.push_str(&format!(
720                "{:<12} {:>12} {:>12} {:>12} {:>12}\n",
721                "Implementation", "Time (ms)", "Throughput", "Speedup", "Status"
722            ));
723            report.push_str(&"-".repeat(60));
724            report.push('\n');
725
726            for result in group {
727                let time_ms = result.average_time.as_millis();
728                let throughput = format!("{:.1e} ops/s", result.throughput_ops_per_sec);
729                let speedup = match result.speedup_factor {
730                    Some(factor) => format!("{factor:.2}x"),
731                    None => "baseline".to_string(),
732                };
733                let status = if result.success { "OK" } else { "FAIL" };
734
735                report.push_str(&format!(
736                    "{:<12} {:>12} {:>12} {:>12} {:>12}\n",
737                    result.implementation, time_ms, throughput, speedup, status
738                ));
739
740                if let Some(ref error) = result.error_message {
741                    report.push_str(&format!("             Error: {error}\n"));
742                }
743            }
744
745            report.push('\n');
746        }
747
748        // Performance summary
749        report.push_str("Performance Summary:\n");
750        report.push_str(&"-".repeat(50));
751        report.push('\n');
752
753        let successful_results: Vec<_> = self.results.iter().filter(|r| r.success).collect();
754        if let Some(best_result) = successful_results.iter().max_by(|a, b| {
755            a.speedup_factor
756                .unwrap_or(1.0)
757                .partial_cmp(&b.speedup_factor.unwrap_or(1.0))
758                .expect("Operation failed")
759        }) {
760            report.push_str(&format!(
761                "Best implementation: {} ({:.2}x speedup)\n",
762                best_result.implementation,
763                best_result.speedup_factor.unwrap_or(1.0)
764            ));
765        }
766
767        // GPU-specific information
768        #[cfg(feature = "gpu")]
769        {
770            let gpu_results: Vec<_> = self
771                .results
772                .iter()
773                .filter(|r| r.implementation == "GPU")
774                .collect();
775            let gpu_success_rate = if gpu_results.is_empty() {
776                0.0
777            } else {
778                gpu_results.iter().filter(|r| r.success).count() as f64 / gpu_results.len() as f64
779            };
780
781            report.push_str(&format!(
782                "GPU success rate: {:.1}%\n",
783                gpu_success_rate * 100.0
784            ));
785        }
786
787        report.push('\n');
788        report.push_str("Note: Speedup factors are relative to CPU baseline implementation.\n");
789        report.push_str("Throughput is measured in operations per second.\n");
790
791        report
792    }
793
794    /// Export results to CSV format
795    pub fn export_csv(&self) -> String {
796        let mut csv = String::new();
797
798        // Header
799        csv.push_str("function,implementation,arraysize,avg_time_ms,min_time_ms,max_time_ms,");
800        csv.push_str("std_dev_ms,throughput_ops_per_sec,speedup_factor,success,error\n");
801
802        // Data rows
803        for result in &self.results {
804            csv.push_str(&format!(
805                "{},{},{},{},{},{},{},{},{},{},{}\n",
806                result.function_name,
807                result.implementation,
808                result.arraysize,
809                result.average_time.as_millis(),
810                result.min_time.as_millis(),
811                result.max_time.as_millis(),
812                result.std_dev.as_millis(),
813                result.throughput_ops_per_sec,
814                result
815                    .speedup_factor
816                    .map(|f| f.to_string())
817                    .unwrap_or_default(),
818                result.success,
819                result.error_message.as_deref().unwrap_or("")
820            ));
821        }
822
823        csv
824    }
825}
826
827/// Run a quick benchmark with default settings
828#[allow(dead_code)]
829pub fn quick_benchmark() -> SpecialResult<BenchmarkSuite> {
830    let config = BenchmarkConfig {
831        arraysizes: vec![1000, 10000],
832        iterations: 5,
833        warmup_iterations: 2,
834        ..Default::default()
835    };
836
837    GammaBenchmarks::run_comprehensive_benchmark(&config)
838}
839
840/// Run a comprehensive benchmark with all features
841#[allow(dead_code)]
842pub fn comprehensive_benchmark() -> SpecialResult<BenchmarkSuite> {
843    let config = BenchmarkConfig::default();
844    GammaBenchmarks::run_comprehensive_benchmark(&config)
845}
846
847#[cfg(test)]
848mod tests {
849    use super::*;
850
851    #[test]
852    fn test_benchmark_config_creation() {
853        let config = BenchmarkConfig::default();
854        assert!(!config.arraysizes.is_empty());
855        assert!(config.iterations > 0);
856    }
857
858    #[test]
859    fn test_system_info_collection() {
860        let info = SystemInfo::collect();
861        assert!(!info.cpu_info.is_empty());
862        assert!(!info.rust_version.is_empty());
863    }
864
865    #[test]
866    fn test_time_statistics() {
867        let times = vec![
868            Duration::from_millis(10),
869            Duration::from_millis(12),
870            Duration::from_millis(11),
871            Duration::from_millis(13),
872            Duration::from_millis(9),
873        ];
874
875        let stats = GammaBenchmarks::calculate_statistics(&times);
876        assert!(stats.average_time.as_millis() > 0);
877        assert!(stats.min_time <= stats.max_time);
878    }
879}