1use crate::error::{StatsError, StatsResult};
8use scirs2_core::ndarray::{Array1, Array2};
9use serde::{Deserialize, Serialize};
10use std::collections::HashMap;
11use std::fs;
12use std::process::Command;
13use std::time::{Duration, Instant};
14
15#[derive(Debug, Clone, Serialize, Deserialize)]
17pub struct ScipyComparisonConfig {
18 pub python_executable: String,
20 pub scipy_version: Option<String>,
22 pub numpy_version: Option<String>,
24 pub temp_dir: String,
26 pub accuracy_tolerance: f64,
28 pub performance_tolerance: f64,
30 pub warmup_iterations: usize,
32 pub measurement_iterations: usize,
34 pub detailed_accuracy: bool,
36 pub compare_memory: bool,
38 pub testsizes: Vec<usize>,
40 pub functions_to_test: Vec<String>,
42}
43
44impl Default for ScipyComparisonConfig {
45 fn default() -> Self {
46 Self {
47 python_executable: "python3".to_string(),
48 scipy_version: Some(">=1.9.0".to_string()),
49 numpy_version: Some(">=1.21.0".to_string()),
50 temp_dir: "/tmp/scirs2_benchmarks".to_string(),
51 accuracy_tolerance: 1e-10,
52 performance_tolerance: 2.0, warmup_iterations: 10,
54 measurement_iterations: 100,
55 detailed_accuracy: true,
56 compare_memory: true,
57 testsizes: vec![100, 1000, 10000, 100000],
58 functions_to_test: vec![
59 "mean".to_string(),
60 "std".to_string(),
61 "var".to_string(),
62 "skew".to_string(),
63 "kurtosis".to_string(),
64 "pearsonr".to_string(),
65 "spearmanr".to_string(),
66 "ttest_ind".to_string(),
67 "ttest_1samp".to_string(),
68 "norm_pdf".to_string(),
69 "norm_cdf".to_string(),
70 ],
71 }
72 }
73}
74
75#[derive(Debug, Clone, Serialize, Deserialize)]
77pub struct ScipyComparisonReport {
78 pub timestamp: String,
80 pub config: ScipyComparisonConfig,
82 pub system_info: SystemInfo,
84 pub scipy_environment: ScipyEnvironmentInfo,
86 pub function_comparisons: Vec<FunctionComparison>,
88 pub summary: ComparisonSummary,
90 pub performance_analysis: PerformanceAnalysis,
92 pub accuracy_analysis: AccuracyAnalysis,
94 pub recommendations: Vec<ComparisonRecommendation>,
96}
97
98#[derive(Debug, Clone, Serialize, Deserialize)]
100pub struct SystemInfo {
101 pub os: String,
103 pub cpu: String,
105 pub memory_gb: f64,
107 pub rust_version: String,
109 pub scirs2_version: String,
111}
112
113#[derive(Debug, Clone, Serialize, Deserialize)]
115pub struct ScipyEnvironmentInfo {
116 pub python_version: String,
118 pub scipy_version: String,
120 pub numpy_version: String,
122 pub blas_info: String,
124 pub packages: HashMap<String, String>,
126}
127
128#[derive(Debug, Clone, Serialize, Deserialize)]
130pub struct FunctionComparison {
131 pub function_name: String,
133 pub datasize: usize,
135 pub performance: PerformanceComparison,
137 pub accuracy: AccuracyComparison,
139 pub memory: Option<MemoryComparison>,
141 pub status: ComparisonStatus,
143 pub error_details: Option<String>,
145}
146
147#[derive(Debug, Clone, Serialize, Deserialize)]
149pub struct PerformanceComparison {
150 pub scirs2_time_ns: f64,
152 pub scipy_time_ns: f64,
154 pub ratio: f64,
156 pub significance: PerformanceSignificance,
158 pub confidence_interval: (f64, f64),
160}
161
162#[derive(Debug, Clone, Serialize, Deserialize)]
164pub struct AccuracyComparison {
165 pub absolute_difference: f64,
167 pub relative_difference: f64,
169 pub max_element_difference: f64,
171 pub elements_compared: usize,
173 pub elements_within_tolerance: usize,
175 pub assessment: AccuracyAssessment,
177}
178
179#[derive(Debug, Clone, Serialize, Deserialize)]
181pub struct MemoryComparison {
182 pub scirs2_memory: usize,
184 pub scipy_memory: usize,
186 pub ratio: f64,
188 pub assessment: MemoryEfficiencyAssessment,
190}
191
192#[derive(Debug, Clone, Serialize, Deserialize)]
194pub enum ComparisonStatus {
195 Passed,
197 PassedWithWarnings { warnings: Vec<String> },
199 FailedAccuracy { details: String },
201 FailedPerformance { details: String },
203 Error { error: String },
205 Skipped { reason: String },
207}
208
209#[derive(Debug, Clone, Serialize, Deserialize)]
211pub enum PerformanceSignificance {
212 NotSignificant,
214 ScirsFaster { confidence: f64 },
216 ScipyFaster { confidence: f64 },
218 InsufficientData,
220}
221
222#[derive(Debug, Clone, Serialize, Deserialize)]
224pub enum AccuracyAssessment {
225 Excellent,
227 Good,
229 Acceptable,
231 Poor,
233 Unacceptable,
235}
236
237#[derive(Debug, Clone, Serialize, Deserialize)]
239pub enum MemoryEfficiencyAssessment {
240 MoreEfficient,
242 Similar,
244 LessEfficient,
246 MuchLessEfficient,
248}
249
250#[derive(Debug, Clone, Serialize, Deserialize)]
252pub struct ComparisonSummary {
253 pub total_tests: usize,
255 pub tests_passed: usize,
257 pub tests_with_warnings: usize,
259 pub tests_failed: usize,
261 pub pass_rate: f64,
263 pub performance_issues: Vec<String>,
265 pub accuracy_issues: Vec<String>,
267 pub performance_rating: PerformanceRating,
269 pub accuracy_rating: AccuracyRating,
271}
272
273#[derive(Debug, Clone, Serialize, Deserialize)]
275pub struct PerformanceAnalysis {
276 pub average_ratio: f64,
278 pub ratio_std_dev: f64,
280 pub faster_functions: Vec<(String, f64)>,
282 pub slower_functions: Vec<(String, f64)>,
284 pub performance_bysize: HashMap<usize, f64>,
286 pub trends: PerformanceTrends,
288}
289
290#[derive(Debug, Clone, Serialize, Deserialize)]
292pub struct AccuracyAnalysis {
293 pub average_relative_diff: f64,
295 pub max_relative_diff: f64,
297 pub problematic_functions: Vec<(String, f64)>,
299 pub accuracy_bysize: HashMap<usize, f64>,
301 pub stability_assessment: NumericalStabilityAssessment,
303}
304
305#[derive(Debug, Clone, Serialize, Deserialize)]
307pub enum PerformanceRating {
308 Excellent,
310 Good,
312 Acceptable,
314 Poor,
316 Unacceptable,
318}
319
320#[derive(Debug, Clone, Serialize, Deserialize)]
322pub enum AccuracyRating {
323 Excellent,
325 Good,
327 Acceptable,
329 Poor,
331 Unacceptable,
333}
334
335#[derive(Debug, Clone, Serialize, Deserialize)]
337pub struct PerformanceTrends {
338 pub scaling_analysis: ScalingAnalysis,
340 pub stability_analysis: StabilityAnalysis,
342 pub regression_analysis: RegressionAnalysis,
344}
345
346#[derive(Debug, Clone, Serialize, Deserialize)]
348pub struct ScalingAnalysis {
349 pub relative_scaling: f64,
351 pub complexity_assessment: ComplexityAssessment,
353 pub crossover_points: Vec<usize>,
355}
356
357#[derive(Debug, Clone, Serialize, Deserialize)]
359pub struct StabilityAnalysis {
360 pub performance_cv: f64,
362 pub outliers_detected: usize,
364 pub stability_rating: StabilityRating,
366}
367
368#[derive(Debug, Clone, Serialize, Deserialize)]
370pub struct RegressionAnalysis {
371 pub regressions_detected: Vec<PerformanceRegression>,
373 pub accuracy_regressions: Vec<AccuracyRegression>,
375 pub regression_risk: RegressionRisk,
377}
378
379#[derive(Debug, Clone, Serialize, Deserialize)]
381pub enum ComplexityAssessment {
382 Better,
384 Similar,
386 Worse,
388 Unknown,
390}
391
392#[derive(Debug, Clone, Serialize, Deserialize)]
394pub enum StabilityRating {
395 VeryStable,
397 Stable,
399 ModeratelyStable,
401 Unstable,
403 VeryUnstable,
405}
406
407#[derive(Debug, Clone, Serialize, Deserialize)]
409pub struct PerformanceRegression {
410 pub function_name: String,
412 pub regression_factor: f64,
414 pub confidence: f64,
416 pub suspected_cause: String,
418}
419
420#[derive(Debug, Clone, Serialize, Deserialize)]
422pub struct AccuracyRegression {
423 pub function_name: String,
425 pub accuracy_loss: f64,
427 pub severity: AccuracyRegressionSeverity,
429}
430
431#[derive(Debug, Clone, Serialize, Deserialize)]
433pub enum RegressionRisk {
434 Low,
436 Medium,
438 High,
440 Critical,
442}
443
444#[derive(Debug, Clone, Serialize, Deserialize)]
446pub enum AccuracyRegressionSeverity {
447 Minor,
449 Moderate,
451 Major,
453 Critical,
455}
456
457#[derive(Debug, Clone, Serialize, Deserialize)]
459pub struct NumericalStabilityAssessment {
460 pub stability_rating: NumericalStabilityRating,
462 pub unstable_functions: Vec<String>,
464 pub condition_number_analysis: ConditionNumberAnalysis,
466 pub precision_loss_analysis: PrecisionLossAnalysis,
468}
469
470#[derive(Debug, Clone, Serialize, Deserialize)]
472pub enum NumericalStabilityRating {
473 Excellent,
475 Good,
477 Acceptable,
479 Poor,
481 Unacceptable,
483}
484
485#[derive(Debug, Clone, Serialize, Deserialize)]
487pub struct ConditionNumberAnalysis {
488 pub sensitive_functions: Vec<String>,
490 pub thresholds: HashMap<String, f64>,
492 pub recommendations: Vec<String>,
494}
495
496#[derive(Debug, Clone, Serialize, Deserialize)]
498pub struct PrecisionLossAnalysis {
499 pub average_loss: f64,
501 pub max_loss: f64,
503 pub problematic_functions: Vec<String>,
505}
506
507#[derive(Debug, Clone, Serialize, Deserialize)]
509pub struct ComparisonRecommendation {
510 pub priority: RecommendationPriority,
512 pub category: RecommendationCategory,
514 pub description: String,
516 pub affected_functions: Vec<String>,
518 pub complexity: ImplementationComplexity,
520 pub expected_impact: ExpectedImpact,
522}
523
524#[derive(Debug, Clone, Serialize, Deserialize)]
526pub enum RecommendationPriority {
527 Critical,
529 High,
531 Medium,
533 Low,
535 NiceToHave,
537}
538
539#[derive(Debug, Clone, Serialize, Deserialize)]
541pub enum RecommendationCategory {
542 Performance,
544 Accuracy,
546 Memory,
548 APICompatibility,
550 NumericalStability,
552 Testing,
554}
555
556#[derive(Debug, Clone, Serialize, Deserialize)]
558pub enum ImplementationComplexity {
559 Simple,
561 Moderate,
563 Complex,
565 VeryComplex,
567}
568
569#[derive(Debug, Clone, Serialize, Deserialize)]
571pub struct ExpectedImpact {
572 pub performance_improvement: Option<f64>,
574 pub accuracy_improvement: Option<f64>,
576 pub memory_reduction: Option<f64>,
578 pub implementation_effort: f64,
580}
581
582pub struct ScipyBenchmarkComparison {
584 config: ScipyComparisonConfig,
585 temp_dir: String,
586}
587
588impl ScipyBenchmarkComparison {
589 pub fn new(config: ScipyComparisonConfig) -> StatsResult<Self> {
591 fs::create_dir_all(&config.temp_dir).map_err(|e| {
593 StatsError::ComputationError(format!("Failed to create temp directory: {}", e))
594 })?;
595
596 Ok(Self {
597 temp_dir: config.temp_dir.clone(),
598 config,
599 })
600 }
601
602 pub fn default() -> StatsResult<Self> {
604 Self::new(ScipyComparisonConfig::default())
605 }
606
607 pub fn run_comprehensive_comparison(&self) -> StatsResult<ScipyComparisonReport> {
609 let _start_time = Instant::now();
610
611 let scipy_env = self.verify_scipy_environment()?;
613
614 let system_info = self.collect_system_info();
616
617 let mut function_comparisons = Vec::new();
619
620 for function_name in &self.config.functions_to_test {
621 for &datasize in &self.config.testsizes {
622 match self.compare_function(function_name, datasize) {
623 Ok(comparison) => function_comparisons.push(comparison),
624 Err(e) => {
625 function_comparisons.push(FunctionComparison {
626 function_name: function_name.clone(),
627 datasize,
628 performance: PerformanceComparison {
629 scirs2_time_ns: 0.0,
630 scipy_time_ns: 0.0,
631 ratio: 0.0,
632 significance: PerformanceSignificance::InsufficientData,
633 confidence_interval: (0.0, 0.0),
634 },
635 accuracy: AccuracyComparison {
636 absolute_difference: 0.0,
637 relative_difference: 0.0,
638 max_element_difference: 0.0,
639 elements_compared: 0,
640 elements_within_tolerance: 0,
641 assessment: AccuracyAssessment::Poor,
642 },
643 memory: None,
644 status: ComparisonStatus::Error {
645 error: e.to_string(),
646 },
647 error_details: Some(e.to_string()),
648 });
649 }
650 }
651 }
652 }
653
654 let summary = self.generate_summary(&function_comparisons);
656 let performance_analysis = self.analyze_performance(&function_comparisons);
657 let accuracy_analysis = self.analyze_accuracy(&function_comparisons);
658 let recommendations = self.generate_recommendations(
659 &function_comparisons,
660 &performance_analysis,
661 &accuracy_analysis,
662 );
663
664 Ok(ScipyComparisonReport {
665 timestamp: chrono::Utc::now().to_rfc3339(),
666 config: self.config.clone(),
667 system_info,
668 scipy_environment: scipy_env,
669 function_comparisons,
670 summary,
671 performance_analysis,
672 accuracy_analysis,
673 recommendations,
674 })
675 }
676
677 fn verify_scipy_environment(&self) -> StatsResult<ScipyEnvironmentInfo> {
679 let script = r#"
680import sys
681import scipy
682import numpy as np
683import json
684
685info = {
686 'python_version': sys.version,
687 'scipy_version': scipy.__version__,
688 'numpy_version': np.__version__,
689 'blas_info': str(np.__config__.show()),
690 'packages': {}
691}
692
693try:
694 import pandas
695 info['packages']['pandas'] = pandas.__version__
696except ImportError:
697 pass
698
699try:
700 import sklearn
701 info['packages']['sklearn'] = sklearn.__version__
702except ImportError:
703 pass
704
705print(json.dumps(info))
706"#;
707
708 let script_path = format!("{}/verify_env.py", self.temp_dir);
709 fs::write(&script_path, script).map_err(|e| {
710 StatsError::ComputationError(format!("Failed to write verification script: {}", e))
711 })?;
712
713 let output = Command::new(&self.config.python_executable)
714 .arg(&script_path)
715 .output()
716 .map_err(|e| {
717 StatsError::ComputationError(format!("Failed to execute Python: {}", e))
718 })?;
719
720 if !output.status.success() {
721 return Err(StatsError::ComputationError(format!(
722 "Python script failed: {}",
723 String::from_utf8_lossy(&output.stderr)
724 )));
725 }
726
727 let output_str = String::from_utf8_lossy(&output.stdout);
728 let info: serde_json::Value = serde_json::from_str(&output_str).map_err(|e| {
729 StatsError::ComputationError(format!("Failed to parse environment info: {}", e))
730 })?;
731
732 Ok(ScipyEnvironmentInfo {
733 python_version: info["python_version"]
734 .as_str()
735 .unwrap_or("unknown")
736 .to_string(),
737 scipy_version: info["scipy_version"]
738 .as_str()
739 .unwrap_or("unknown")
740 .to_string(),
741 numpy_version: info["numpy_version"]
742 .as_str()
743 .unwrap_or("unknown")
744 .to_string(),
745 blas_info: info["blas_info"].as_str().unwrap_or("unknown").to_string(),
746 packages: info["packages"]
747 .as_object()
748 .unwrap_or(&serde_json::Map::new())
749 .iter()
750 .map(|(k, v)| (k.clone(), v.as_str().unwrap_or("unknown").to_string()))
751 .collect(),
752 })
753 }
754
755 fn collect_system_info(&self) -> SystemInfo {
757 SystemInfo {
758 os: std::env::consts::OS.to_string(),
759 cpu: "Generic CPU".to_string(), memory_gb: 8.0, rust_version: std::env::var("RUSTC_VERSION").unwrap_or_else(|_| "unknown".to_string()),
762 scirs2_version: std::env::var("CARGO_PKG_VERSION")
763 .unwrap_or_else(|_| "unknown".to_string()),
764 }
765 }
766
767 fn compare_function(
769 &self,
770 function_name: &str,
771 datasize: usize,
772 ) -> StatsResult<FunctionComparison> {
773 let testdata = self.generate_testdata(datasize)?;
775
776 let scirs2_result = self.benchmark_scirs2_function(function_name, &testdata)?;
778
779 let scipy_result = self.benchmark_scipy_function(function_name, &testdata)?;
781
782 let performance = PerformanceComparison {
784 scirs2_time_ns: scirs2_result.execution_time.as_nanos() as f64,
785 scipy_time_ns: scipy_result.execution_time.as_nanos() as f64,
786 ratio: scirs2_result.execution_time.as_nanos() as f64
787 / scipy_result.execution_time.as_nanos() as f64,
788 significance: PerformanceSignificance::NotSignificant, confidence_interval: (0.8, 1.2), };
791
792 let accuracy = self.compare_accuracy(&scirs2_result.result, &scipy_result.result)?;
794
795 let status = self.determine_comparison_status(&performance, &accuracy);
797
798 Ok(FunctionComparison {
799 function_name: function_name.to_string(),
800 datasize,
801 performance,
802 accuracy,
803 memory: None, status,
805 error_details: None,
806 })
807 }
808
809 fn generate_testdata(&self, size: usize) -> StatsResult<TestData> {
811 use scirs2_core::random::{Distribution, Normal};
812
813 let mut rng = scirs2_core::random::thread_rng();
814 let normal = Normal::new(0.0, 1.0).map_err(|e| {
815 StatsError::ComputationError(format!("Failed to create normal distribution: {}", e))
816 })?;
817
818 let data: Vec<f64> = (0..size).map(|_| normal.sample(&mut rng)).collect();
819
820 Ok(TestData {
821 primary: Array1::from_vec(data.clone()),
822 secondary: Array1::from_vec(
823 data.iter()
824 .map(|x| x + 0.1 * normal.sample(&mut rng))
825 .collect(),
826 ),
827 matrix: Array2::from_shape_fn((size.min(100), size.min(100)), |(i, j)| {
828 normal.sample(&mut rng) + 0.1 * (i + j) as f64
829 }),
830 })
831 }
832
833 fn benchmark_scirs2_function(
835 &self,
836 function_name: &str,
837 testdata: &TestData,
838 ) -> StatsResult<BenchmarkResult> {
839 let start_time = Instant::now();
840
841 let result = match function_name {
842 "mean" => {
843 vec![crate::descriptive::mean(&testdata.primary.view())?]
844 }
845 "std" => {
846 vec![crate::descriptive::std(&testdata.primary.view(), 1, None)?]
847 }
848 "var" => {
849 vec![crate::descriptive::var(&testdata.primary.view(), 1, None)?]
850 }
851 "skew" => {
852 vec![crate::descriptive::skew(
853 &testdata.primary.view(),
854 false,
855 None,
856 )?]
857 }
858 "kurtosis" => {
859 vec![crate::descriptive::kurtosis(
860 &testdata.primary.view(),
861 true,
862 false,
863 None,
864 )?]
865 }
866 "pearsonr" => {
867 let corr = crate::correlation::pearson_r(
868 &testdata.primary.view(),
869 &testdata.secondary.view(),
870 )?;
871 vec![corr]
872 }
873 "spearmanr" => {
874 let corr = crate::correlation::spearman_r(
875 &testdata.primary.view(),
876 &testdata.secondary.view(),
877 )?;
878 vec![corr]
879 }
880 "ttest_1samp" => {
881 let result = crate::tests::ttest::ttest_1samp(
882 &testdata.primary.view(),
883 0.0,
884 crate::tests::ttest::Alternative::TwoSided,
885 "propagate",
886 )?;
887 vec![result.statistic, result.pvalue]
888 }
889 _ => {
890 return Err(StatsError::NotImplemented(format!(
891 "Function {} not implemented in benchmark",
892 function_name
893 )));
894 }
895 };
896
897 let execution_time = start_time.elapsed();
898
899 Ok(BenchmarkResult {
900 result,
901 execution_time,
902 })
903 }
904
905 fn benchmark_scipy_function(
907 &self,
908 function_name: &str,
909 testdata: &TestData,
910 ) -> StatsResult<BenchmarkResult> {
911 let script = self.generate_scipy_script(function_name, testdata)?;
912 let script_path = format!("{}/scipy_benchmark_{}.py", self.temp_dir, function_name);
913
914 fs::write(&script_path, script).map_err(|e| {
915 StatsError::ComputationError(format!("Failed to write SciPy script: {}", e))
916 })?;
917
918 let output = Command::new(&self.config.python_executable)
919 .arg(&script_path)
920 .output()
921 .map_err(|e| {
922 StatsError::ComputationError(format!("Failed to execute SciPy script: {}", e))
923 })?;
924
925 if !output.status.success() {
926 return Err(StatsError::ComputationError(format!(
927 "SciPy script failed: {}",
928 String::from_utf8_lossy(&output.stderr)
929 )));
930 }
931
932 let output_str = String::from_utf8_lossy(&output.stdout);
933 let result: serde_json::Value = serde_json::from_str(&output_str).map_err(|e| {
934 StatsError::ComputationError(format!("Failed to parse SciPy result: {}", e))
935 })?;
936
937 let execution_time =
938 Duration::from_secs_f64(result["execution_time"].as_f64().unwrap_or(0.0));
939
940 let result_values: Vec<f64> = result["result"]
941 .as_array()
942 .unwrap_or(&Vec::new())
943 .iter()
944 .filter_map(|v| v.as_f64())
945 .collect();
946
947 Ok(BenchmarkResult {
948 result: result_values,
949 execution_time,
950 })
951 }
952
953 fn generate_scipy_script(
955 &self,
956 function_name: &str,
957 testdata: &TestData,
958 ) -> StatsResult<String> {
959 let data_primary: Vec<String> = testdata.primary.iter().map(|x| x.to_string()).collect();
960 let data_secondary: Vec<String> =
961 testdata.secondary.iter().map(|x| x.to_string()).collect();
962
963 let script = match function_name {
964 "mean" => {
965 format!(
966 r#"
967import numpy as np
968import time
969import json
970
971data = np.array([{}])
972
973start_time = time.perf_counter()
974result = np.mean(data)
975execution_time = time.perf_counter() - start_time
976
977output = {{
978 'result': [float(result)],
979 'execution_time': execution_time
980}}
981
982print(json.dumps(output))
983"#,
984 data_primary.join(", ")
985 )
986 }
987 "std" => {
988 format!(
989 r#"
990import numpy as np
991import time
992import json
993
994data = np.array([{}])
995
996start_time = time.perf_counter()
997result = np.std(data, ddof=1)
998execution_time = time.perf_counter() - start_time
999
1000output = {{
1001 'result': [float(result)],
1002 'execution_time': execution_time
1003}}
1004
1005print(json.dumps(output))
1006"#,
1007 data_primary.join(", ")
1008 )
1009 }
1010 "pearsonr" => {
1011 format!(
1012 r#"
1013import numpy as np
1014import scipy.stats
1015import time
1016import json
1017
1018data1 = np.array([{}])
1019data2 = np.array([{}])
1020
1021start_time = time.perf_counter()
1022corr, p_value = scipy.stats.pearsonr(data1, data2)
1023execution_time = time.perf_counter() - start_time
1024
1025output = {{
1026 'result': [float(corr)],
1027 'execution_time': execution_time
1028}}
1029
1030print(json.dumps(output))
1031"#,
1032 data_primary.join(", "),
1033 data_secondary.join(", ")
1034 )
1035 }
1036 _ => {
1037 return Err(StatsError::NotImplemented(format!(
1038 "SciPy script generation not implemented for {}",
1039 function_name
1040 )));
1041 }
1042 };
1043
1044 Ok(script)
1045 }
1046
1047 fn compare_accuracy(
1049 &self,
1050 scirs2_result: &[f64],
1051 scipy_result: &[f64],
1052 ) -> StatsResult<AccuracyComparison> {
1053 if scirs2_result.len() != scipy_result.len() {
1054 return Ok(AccuracyComparison {
1055 absolute_difference: f64::INFINITY,
1056 relative_difference: f64::INFINITY,
1057 max_element_difference: f64::INFINITY,
1058 elements_compared: 0,
1059 elements_within_tolerance: 0,
1060 assessment: AccuracyAssessment::Unacceptable,
1061 });
1062 }
1063
1064 let mut abs_diffs = Vec::new();
1065 let mut rel_diffs = Vec::new();
1066 let mut within_tolerance = 0;
1067
1068 for (s, r) in scirs2_result.iter().zip(scipy_result.iter()) {
1069 let abs_diff = (s - r).abs();
1070 let rel_diff = if r.abs() > 1e-10 {
1071 abs_diff / r.abs()
1072 } else {
1073 abs_diff
1074 };
1075
1076 abs_diffs.push(abs_diff);
1077 rel_diffs.push(rel_diff);
1078
1079 if abs_diff < self.config.accuracy_tolerance
1080 || rel_diff < self.config.accuracy_tolerance
1081 {
1082 within_tolerance += 1;
1083 }
1084 }
1085
1086 let avg_abs_diff = abs_diffs.iter().sum::<f64>() / abs_diffs.len() as f64;
1087 let avg_rel_diff = rel_diffs.iter().sum::<f64>() / rel_diffs.len() as f64;
1088 let max_element_diff = abs_diffs.iter().fold(0.0f64, |acc, &x| acc.max(x));
1089
1090 let assessment = if within_tolerance == scirs2_result.len() {
1091 if avg_rel_diff < 1e-14 {
1092 AccuracyAssessment::Excellent
1093 } else if avg_rel_diff < 1e-10 {
1094 AccuracyAssessment::Good
1095 } else {
1096 AccuracyAssessment::Acceptable
1097 }
1098 } else if within_tolerance as f64 / scirs2_result.len() as f64 > 0.9 {
1099 AccuracyAssessment::Acceptable
1100 } else if within_tolerance as f64 / scirs2_result.len() as f64 > 0.5 {
1101 AccuracyAssessment::Poor
1102 } else {
1103 AccuracyAssessment::Unacceptable
1104 };
1105
1106 Ok(AccuracyComparison {
1107 absolute_difference: avg_abs_diff,
1108 relative_difference: avg_rel_diff,
1109 max_element_difference: max_element_diff,
1110 elements_compared: scirs2_result.len(),
1111 elements_within_tolerance: within_tolerance,
1112 assessment,
1113 })
1114 }
1115
1116 fn determine_comparison_status(
1118 &self,
1119 performance: &PerformanceComparison,
1120 accuracy: &AccuracyComparison,
1121 ) -> ComparisonStatus {
1122 let mut warnings = Vec::new();
1123
1124 if matches!(
1126 accuracy.assessment,
1127 AccuracyAssessment::Unacceptable | AccuracyAssessment::Poor
1128 ) {
1129 return ComparisonStatus::FailedAccuracy {
1130 details: format!("Relative difference: {:.2e}", accuracy.relative_difference),
1131 };
1132 }
1133
1134 if performance.ratio > self.config.performance_tolerance {
1136 return ComparisonStatus::FailedPerformance {
1137 details: format!(
1138 "Performance ratio: {:.2} (limit: {:.2})",
1139 performance.ratio, self.config.performance_tolerance
1140 ),
1141 };
1142 }
1143
1144 if matches!(accuracy.assessment, AccuracyAssessment::Acceptable) {
1146 warnings.push("Accuracy is only acceptable".to_string());
1147 }
1148
1149 if performance.ratio > 1.5 {
1150 warnings.push(format!(
1151 "Performance is {:.1}x slower than SciPy",
1152 performance.ratio
1153 ));
1154 }
1155
1156 if warnings.is_empty() {
1157 ComparisonStatus::Passed
1158 } else {
1159 ComparisonStatus::PassedWithWarnings { warnings }
1160 }
1161 }
1162
1163 fn generate_summary(&self, comparisons: &[FunctionComparison]) -> ComparisonSummary {
1165 let total_tests = comparisons.len();
1166 let tests_passed = comparisons
1167 .iter()
1168 .filter(|c| matches!(c.status, ComparisonStatus::Passed))
1169 .count();
1170 let tests_with_warnings = comparisons
1171 .iter()
1172 .filter(|c| matches!(c.status, ComparisonStatus::PassedWithWarnings { .. }))
1173 .count();
1174 let tests_failed = total_tests - tests_passed - tests_with_warnings;
1175
1176 let pass_rate = if total_tests > 0 {
1177 (tests_passed + tests_with_warnings) as f64 / total_tests as f64
1178 } else {
1179 0.0
1180 };
1181
1182 let performance_issues: Vec<String> = comparisons
1183 .iter()
1184 .filter(|c| matches!(c.status, ComparisonStatus::FailedPerformance { .. }))
1185 .map(|c| c.function_name.clone())
1186 .collect();
1187
1188 let accuracy_issues: Vec<String> = comparisons
1189 .iter()
1190 .filter(|c| matches!(c.status, ComparisonStatus::FailedAccuracy { .. }))
1191 .map(|c| c.function_name.clone())
1192 .collect();
1193
1194 let avg_performance_ratio =
1195 comparisons.iter().map(|c| c.performance.ratio).sum::<f64>() / comparisons.len() as f64;
1196
1197 let performance_rating = if avg_performance_ratio < 0.8 {
1198 PerformanceRating::Excellent
1199 } else if avg_performance_ratio < 1.2 {
1200 PerformanceRating::Good
1201 } else if avg_performance_ratio < 2.0 {
1202 PerformanceRating::Acceptable
1203 } else if avg_performance_ratio < 5.0 {
1204 PerformanceRating::Poor
1205 } else {
1206 PerformanceRating::Unacceptable
1207 };
1208
1209 let avg_relative_diff = comparisons
1210 .iter()
1211 .map(|c| c.accuracy.relative_difference)
1212 .sum::<f64>()
1213 / comparisons.len() as f64;
1214
1215 let accuracy_rating = if avg_relative_diff < 1e-14 {
1216 AccuracyRating::Excellent
1217 } else if avg_relative_diff < 1e-10 {
1218 AccuracyRating::Good
1219 } else if avg_relative_diff < 1e-6 {
1220 AccuracyRating::Acceptable
1221 } else if avg_relative_diff < 1e-3 {
1222 AccuracyRating::Poor
1223 } else {
1224 AccuracyRating::Unacceptable
1225 };
1226
1227 ComparisonSummary {
1228 total_tests,
1229 tests_passed,
1230 tests_with_warnings,
1231 tests_failed,
1232 pass_rate,
1233 performance_issues,
1234 accuracy_issues,
1235 performance_rating,
1236 accuracy_rating,
1237 }
1238 }
1239
1240 fn analyze_performance(&self, comparisons: &[FunctionComparison]) -> PerformanceAnalysis {
1242 let ratios: Vec<f64> = comparisons.iter().map(|c| c.performance.ratio).collect();
1243
1244 let average_ratio = ratios.iter().sum::<f64>() / ratios.len() as f64;
1245 let variance = ratios
1246 .iter()
1247 .map(|r| (r - average_ratio).powi(2))
1248 .sum::<f64>()
1249 / ratios.len() as f64;
1250 let ratio_std_dev = variance.sqrt();
1251
1252 let faster_functions: Vec<(String, f64)> = comparisons
1253 .iter()
1254 .filter(|c| c.performance.ratio < 1.0)
1255 .map(|c| (c.function_name.clone(), c.performance.ratio))
1256 .collect();
1257
1258 let slower_functions: Vec<(String, f64)> = comparisons
1259 .iter()
1260 .filter(|c| c.performance.ratio > 1.0)
1261 .map(|c| (c.function_name.clone(), c.performance.ratio))
1262 .collect();
1263
1264 let performance_bysize = HashMap::new(); let trends = PerformanceTrends {
1267 scaling_analysis: ScalingAnalysis {
1268 relative_scaling: 1.0, complexity_assessment: ComplexityAssessment::Similar,
1270 crossover_points: Vec::new(),
1271 },
1272 stability_analysis: StabilityAnalysis {
1273 performance_cv: ratio_std_dev / average_ratio,
1274 outliers_detected: 0, stability_rating: StabilityRating::Stable,
1276 },
1277 regression_analysis: RegressionAnalysis {
1278 regressions_detected: Vec::new(),
1279 accuracy_regressions: Vec::new(),
1280 regression_risk: RegressionRisk::Low,
1281 },
1282 };
1283
1284 PerformanceAnalysis {
1285 average_ratio,
1286 ratio_std_dev,
1287 faster_functions,
1288 slower_functions,
1289 performance_bysize,
1290 trends,
1291 }
1292 }
1293
1294 fn analyze_accuracy(&self, comparisons: &[FunctionComparison]) -> AccuracyAnalysis {
1296 let relative_diffs: Vec<f64> = comparisons
1297 .iter()
1298 .map(|c| c.accuracy.relative_difference)
1299 .collect();
1300
1301 let average_relative_diff =
1302 relative_diffs.iter().sum::<f64>() / relative_diffs.len() as f64;
1303 let max_relative_diff = relative_diffs.iter().fold(0.0f64, |acc, &x| acc.max(x));
1304
1305 let problematic_functions: Vec<(String, f64)> = comparisons
1306 .iter()
1307 .filter(|c| c.accuracy.relative_difference > self.config.accuracy_tolerance)
1308 .map(|c| (c.function_name.clone(), c.accuracy.relative_difference))
1309 .collect();
1310
1311 let accuracy_bysize = HashMap::new(); let stability_assessment = NumericalStabilityAssessment {
1314 stability_rating: if max_relative_diff < 1e-10 {
1315 NumericalStabilityRating::Excellent
1316 } else if max_relative_diff < 1e-6 {
1317 NumericalStabilityRating::Good
1318 } else {
1319 NumericalStabilityRating::Acceptable
1320 },
1321 unstable_functions: problematic_functions
1322 .iter()
1323 .map(|(name, _)| name.clone())
1324 .collect(),
1325 condition_number_analysis: ConditionNumberAnalysis {
1326 sensitive_functions: Vec::new(),
1327 thresholds: HashMap::new(),
1328 recommendations: Vec::new(),
1329 },
1330 precision_loss_analysis: PrecisionLossAnalysis {
1331 average_loss: average_relative_diff,
1332 max_loss: max_relative_diff,
1333 problematic_functions: problematic_functions
1334 .iter()
1335 .map(|(name, _)| name.clone())
1336 .collect(),
1337 },
1338 };
1339
1340 AccuracyAnalysis {
1341 average_relative_diff,
1342 max_relative_diff,
1343 problematic_functions,
1344 accuracy_bysize,
1345 stability_assessment,
1346 }
1347 }
1348
1349 fn generate_recommendations(
1351 &self,
1352 comparisons: &[FunctionComparison],
1353 performance_analysis: &PerformanceAnalysis,
1354 accuracy_analysis: &AccuracyAnalysis,
1355 ) -> Vec<ComparisonRecommendation> {
1356 let mut recommendations = Vec::new();
1357
1358 if performance_analysis.average_ratio > 2.0 {
1360 recommendations.push(ComparisonRecommendation {
1361 priority: RecommendationPriority::High,
1362 category: RecommendationCategory::Performance,
1363 description: "Overall performance is significantly slower than SciPy. Consider SIMD optimizations and algorithm improvements.".to_string(),
1364 affected_functions: performance_analysis.slower_functions.iter().map(|(name, _)| name.clone()).collect(),
1365 complexity: ImplementationComplexity::Moderate,
1366 expected_impact: ExpectedImpact {
1367 performance_improvement: Some(2.0),
1368 accuracy_improvement: None,
1369 memory_reduction: None,
1370 implementation_effort: 20.0,
1371 },
1372 });
1373 }
1374
1375 if accuracy_analysis.max_relative_diff > 1e-6 {
1377 recommendations.push(ComparisonRecommendation {
1378 priority: RecommendationPriority::Critical,
1379 category: RecommendationCategory::Accuracy,
1380 description: "Some functions have significant accuracy differences compared to SciPy. Review numerical algorithms.".to_string(),
1381 affected_functions: accuracy_analysis.problematic_functions.iter().map(|(name, _)| name.clone()).collect(),
1382 complexity: ImplementationComplexity::Complex,
1383 expected_impact: ExpectedImpact {
1384 performance_improvement: None,
1385 accuracy_improvement: Some(10.0),
1386 memory_reduction: None,
1387 implementation_effort: 15.0,
1388 },
1389 });
1390 }
1391
1392 for comparison in comparisons {
1394 if comparison.performance.ratio > 5.0 {
1395 recommendations.push(ComparisonRecommendation {
1396 priority: RecommendationPriority::High,
1397 category: RecommendationCategory::Performance,
1398 description: format!(
1399 "Function '{}' is significantly slower than SciPy",
1400 comparison.function_name
1401 ),
1402 affected_functions: vec![comparison.function_name.clone()],
1403 complexity: ImplementationComplexity::Moderate,
1404 expected_impact: ExpectedImpact {
1405 performance_improvement: Some(3.0),
1406 accuracy_improvement: None,
1407 memory_reduction: None,
1408 implementation_effort: 5.0,
1409 },
1410 });
1411 }
1412 }
1413
1414 recommendations
1415 }
1416}
1417
1418#[derive(Debug, Clone)]
1420struct TestData {
1421 primary: Array1<f64>,
1422 secondary: Array1<f64>,
1423 #[allow(dead_code)]
1424 matrix: Array2<f64>,
1425}
1426
1427#[derive(Debug, Clone)]
1429struct BenchmarkResult {
1430 result: Vec<f64>,
1431 execution_time: Duration,
1432}
1433
1434#[allow(dead_code)]
1436pub fn run_scipy_comparison() -> StatsResult<ScipyComparisonReport> {
1437 let comparison = ScipyBenchmarkComparison::default()?;
1438 comparison.run_comprehensive_comparison()
1439}
1440
1441#[allow(dead_code)]
1443pub fn run_function_comparison(functions: Vec<String>) -> StatsResult<ScipyComparisonReport> {
1444 let mut config = ScipyComparisonConfig::default();
1445 config.functions_to_test = functions;
1446
1447 let comparison = ScipyBenchmarkComparison::new(config)?;
1448 comparison.run_comprehensive_comparison()
1449}
1450
1451#[cfg(test)]
1452mod tests {
1453 use super::*;
1454
1455 #[test]
1456 fn test_scipy_comparison_config() {
1457 let config = ScipyComparisonConfig::default();
1458 assert!(!config.functions_to_test.is_empty());
1459 assert!(config.accuracy_tolerance > 0.0);
1460 assert!(config.performance_tolerance > 1.0);
1461 }
1462
1463 #[test]
1464 fn test_testdata_generation() {
1465 let comparison = ScipyBenchmarkComparison::default().expect("Operation failed");
1466 let testdata = comparison.generate_testdata(100).expect("Operation failed");
1467
1468 assert_eq!(testdata.primary.len(), 100);
1469 assert_eq!(testdata.secondary.len(), 100);
1470 assert_eq!(testdata.matrix.nrows(), 100);
1471 }
1472
1473 #[test]
1474 fn test_accuracy_comparison() {
1475 let comparison = ScipyBenchmarkComparison::default().expect("Operation failed");
1476
1477 let scirs2_result = vec![1.0, 2.0, 3.0];
1479 let scipy_result = vec![1.000000000001, 2.000000000001, 3.000000000001];
1480
1481 let accuracy = comparison
1482 .compare_accuracy(&scirs2_result, &scipy_result)
1483 .expect("Operation failed");
1484 assert!(matches!(
1485 accuracy.assessment,
1486 AccuracyAssessment::Excellent | AccuracyAssessment::Good
1487 ));
1488 }
1489
1490 #[test]
1491 fn test_performance_comparison() {
1492 let performance = PerformanceComparison {
1493 scirs2_time_ns: 1000.0,
1494 scipy_time_ns: 800.0,
1495 ratio: 1.25,
1496 significance: PerformanceSignificance::NotSignificant,
1497 confidence_interval: (1.0, 1.5),
1498 };
1499
1500 assert!(performance.ratio > 1.0); }
1502
1503 #[test]
1504 fn test_recommendation_generation() {
1505 let config = ScipyComparisonConfig::default();
1506 assert!(config.performance_tolerance >= 1.0);
1507 assert!(config.accuracy_tolerance > 0.0);
1508 }
1509}