Skip to main content

scirs2_stats/
scipy_benchmark_comparison.rs

1//! SciPy benchmark comparison framework for scirs2-stats v1.0.0
2//!
3//! This module provides comprehensive benchmarking against SciPy to validate
4//! performance, accuracy, and API compatibility. It includes automated test
5//! generation, statistical validation, and performance regression detection.
6
7use crate::error::{StatsError, StatsResult};
8use scirs2_core::ndarray::{Array1, Array2};
9use serde::{Deserialize, Serialize};
10use std::collections::HashMap;
11use std::fs;
12use std::process::Command;
13use std::time::{Duration, Instant};
14
15/// Configuration for SciPy comparison benchmarks
16#[derive(Debug, Clone, Serialize, Deserialize)]
17pub struct ScipyComparisonConfig {
18    /// Python executable path
19    pub python_executable: String,
20    /// SciPy version requirement
21    pub scipy_version: Option<String>,
22    /// NumPy version requirement
23    pub numpy_version: Option<String>,
24    /// Temporary directory for test scripts
25    pub temp_dir: String,
26    /// Accuracy tolerance for numerical comparisons
27    pub accuracy_tolerance: f64,
28    /// Performance tolerance (ratio to SciPy)
29    pub performance_tolerance: f64,
30    /// Number of warmup iterations
31    pub warmup_iterations: usize,
32    /// Number of measurement iterations
33    pub measurement_iterations: usize,
34    /// Enable detailed accuracy analysis
35    pub detailed_accuracy: bool,
36    /// Enable memory usage comparison
37    pub compare_memory: bool,
38    /// Test data sizes
39    pub testsizes: Vec<usize>,
40    /// Functions to benchmark
41    pub functions_to_test: Vec<String>,
42}
43
44impl Default for ScipyComparisonConfig {
45    fn default() -> Self {
46        Self {
47            python_executable: "python3".to_string(),
48            scipy_version: Some(">=1.9.0".to_string()),
49            numpy_version: Some(">=1.21.0".to_string()),
50            temp_dir: {
51                let mut p = std::env::temp_dir();
52                p.push("scirs2_benchmarks");
53                p.to_string_lossy().into_owned()
54            },
55            accuracy_tolerance: 1e-10,
56            performance_tolerance: 2.0, // Allow 2x slower than SciPy
57            warmup_iterations: 10,
58            measurement_iterations: 100,
59            detailed_accuracy: true,
60            compare_memory: true,
61            testsizes: vec![100, 1000, 10000, 100000],
62            functions_to_test: vec![
63                "mean".to_string(),
64                "std".to_string(),
65                "var".to_string(),
66                "skew".to_string(),
67                "kurtosis".to_string(),
68                "pearsonr".to_string(),
69                "spearmanr".to_string(),
70                "ttest_ind".to_string(),
71                "ttest_1samp".to_string(),
72                "norm_pdf".to_string(),
73                "norm_cdf".to_string(),
74            ],
75        }
76    }
77}
78
79/// Results of SciPy comparison benchmarking
80#[derive(Debug, Clone, Serialize, Deserialize)]
81pub struct ScipyComparisonReport {
82    /// Timestamp of the comparison
83    pub timestamp: String,
84    /// Configuration used
85    pub config: ScipyComparisonConfig,
86    /// System information
87    pub system_info: SystemInfo,
88    /// SciPy environment info
89    pub scipy_environment: ScipyEnvironmentInfo,
90    /// Individual function comparison results
91    pub function_comparisons: Vec<FunctionComparison>,
92    /// Overall summary statistics
93    pub summary: ComparisonSummary,
94    /// Performance analysis
95    pub performance_analysis: PerformanceAnalysis,
96    /// Accuracy analysis
97    pub accuracy_analysis: AccuracyAnalysis,
98    /// Recommendations
99    pub recommendations: Vec<ComparisonRecommendation>,
100}
101
102/// System information for comparison context
103#[derive(Debug, Clone, Serialize, Deserialize)]
104pub struct SystemInfo {
105    /// Operating system
106    pub os: String,
107    /// CPU information
108    pub cpu: String,
109    /// Memory information
110    pub memory_gb: f64,
111    /// Rust version
112    pub rust_version: String,
113    /// scirs2-stats version
114    pub scirs2_version: String,
115}
116
117/// SciPy environment information
118#[derive(Debug, Clone, Serialize, Deserialize)]
119pub struct ScipyEnvironmentInfo {
120    /// Python version
121    pub python_version: String,
122    /// SciPy version
123    pub scipy_version: String,
124    /// NumPy version
125    pub numpy_version: String,
126    /// BLAS/LAPACK information
127    pub blas_info: String,
128    /// Available Python packages
129    pub packages: HashMap<String, String>,
130}
131
132/// Comparison results for a single function
133#[derive(Debug, Clone, Serialize, Deserialize)]
134pub struct FunctionComparison {
135    /// Function name
136    pub function_name: String,
137    /// Test data size
138    pub datasize: usize,
139    /// Performance comparison
140    pub performance: PerformanceComparison,
141    /// Accuracy comparison
142    pub accuracy: AccuracyComparison,
143    /// Memory usage comparison
144    pub memory: Option<MemoryComparison>,
145    /// Test status
146    pub status: ComparisonStatus,
147    /// Error details if failed
148    pub error_details: Option<String>,
149}
150
151/// Performance comparison between scirs2-stats and SciPy
152#[derive(Debug, Clone, Serialize, Deserialize)]
153pub struct PerformanceComparison {
154    /// scirs2-stats execution time (nanoseconds)
155    pub scirs2_time_ns: f64,
156    /// SciPy execution time (nanoseconds)
157    pub scipy_time_ns: f64,
158    /// Performance ratio (scirs2/scipy)
159    pub ratio: f64,
160    /// Statistical significance of difference
161    pub significance: PerformanceSignificance,
162    /// Confidence interval for ratio
163    pub confidence_interval: (f64, f64),
164}
165
166/// Accuracy comparison between implementations
167#[derive(Debug, Clone, Serialize, Deserialize)]
168pub struct AccuracyComparison {
169    /// Absolute difference
170    pub absolute_difference: f64,
171    /// Relative difference
172    pub relative_difference: f64,
173    /// Maximum element-wise difference
174    pub max_element_difference: f64,
175    /// Number of elements compared
176    pub elements_compared: usize,
177    /// Elements within tolerance
178    pub elements_within_tolerance: usize,
179    /// Accuracy assessment
180    pub assessment: AccuracyAssessment,
181}
182
183/// Memory usage comparison
184#[derive(Debug, Clone, Serialize, Deserialize)]
185pub struct MemoryComparison {
186    /// scirs2-stats memory usage (bytes)
187    pub scirs2_memory: usize,
188    /// SciPy memory usage (bytes)
189    pub scipy_memory: usize,
190    /// Memory ratio (scirs2/scipy)
191    pub ratio: f64,
192    /// Memory efficiency assessment
193    pub assessment: MemoryEfficiencyAssessment,
194}
195
196/// Status of comparison test
197#[derive(Debug, Clone, Serialize, Deserialize)]
198pub enum ComparisonStatus {
199    /// Test passed all checks
200    Passed,
201    /// Test passed with warnings
202    PassedWithWarnings { warnings: Vec<String> },
203    /// Test failed accuracy requirements
204    FailedAccuracy { details: String },
205    /// Test failed performance requirements
206    FailedPerformance { details: String },
207    /// Test encountered execution error
208    Error { error: String },
209    /// Test was skipped
210    Skipped { reason: String },
211}
212
213/// Performance significance assessment
214#[derive(Debug, Clone, Serialize, Deserialize)]
215pub enum PerformanceSignificance {
216    /// No significant difference
217    NotSignificant,
218    /// scirs2-stats significantly faster
219    ScirsFaster { confidence: f64 },
220    /// SciPy significantly faster
221    ScipyFaster { confidence: f64 },
222    /// Insufficient data for assessment
223    InsufficientData,
224}
225
226/// Accuracy assessment categories
227#[derive(Debug, Clone, Serialize, Deserialize)]
228pub enum AccuracyAssessment {
229    /// Excellent accuracy (within machine precision)
230    Excellent,
231    /// Good accuracy (within specified tolerance)
232    Good,
233    /// Acceptable accuracy (small differences)
234    Acceptable,
235    /// Poor accuracy (significant differences)
236    Poor,
237    /// Unacceptable accuracy (large differences)
238    Unacceptable,
239}
240
241/// Memory efficiency assessment
242#[derive(Debug, Clone, Serialize, Deserialize)]
243pub enum MemoryEfficiencyAssessment {
244    /// More memory efficient than SciPy
245    MoreEfficient,
246    /// Similar memory usage to SciPy
247    Similar,
248    /// Less memory efficient than SciPy
249    LessEfficient,
250    /// Significantly less memory efficient
251    MuchLessEfficient,
252}
253
254/// Overall comparison summary
255#[derive(Debug, Clone, Serialize, Deserialize)]
256pub struct ComparisonSummary {
257    /// Total tests run
258    pub total_tests: usize,
259    /// Tests passed
260    pub tests_passed: usize,
261    /// Tests with warnings
262    pub tests_with_warnings: usize,
263    /// Tests failed
264    pub tests_failed: usize,
265    /// Overall pass rate
266    pub pass_rate: f64,
267    /// Functions with performance issues
268    pub performance_issues: Vec<String>,
269    /// Functions with accuracy issues
270    pub accuracy_issues: Vec<String>,
271    /// Overall performance rating
272    pub performance_rating: PerformanceRating,
273    /// Overall accuracy rating
274    pub accuracy_rating: AccuracyRating,
275}
276
277/// Performance analysis across all tests
278#[derive(Debug, Clone, Serialize, Deserialize)]
279pub struct PerformanceAnalysis {
280    /// Average performance ratio
281    pub average_ratio: f64,
282    /// Performance ratio standard deviation
283    pub ratio_std_dev: f64,
284    /// Functions faster than SciPy
285    pub faster_functions: Vec<(String, f64)>,
286    /// Functions slower than SciPy
287    pub slower_functions: Vec<(String, f64)>,
288    /// Performance by data size
289    pub performance_bysize: HashMap<usize, f64>,
290    /// Performance trends
291    pub trends: PerformanceTrends,
292}
293
294/// Accuracy analysis across all tests
295#[derive(Debug, Clone, Serialize, Deserialize)]
296pub struct AccuracyAnalysis {
297    /// Average relative difference
298    pub average_relative_diff: f64,
299    /// Maximum relative difference
300    pub max_relative_diff: f64,
301    /// Functions with accuracy issues
302    pub problematic_functions: Vec<(String, f64)>,
303    /// Accuracy by data size
304    pub accuracy_bysize: HashMap<usize, f64>,
305    /// Numerical stability assessment
306    pub stability_assessment: NumericalStabilityAssessment,
307}
308
309/// Performance rating categories
310#[derive(Debug, Clone, Serialize, Deserialize)]
311pub enum PerformanceRating {
312    /// Excellent performance (consistently faster)
313    Excellent,
314    /// Good performance (mostly competitive)
315    Good,
316    /// Acceptable performance (within tolerance)
317    Acceptable,
318    /// Poor performance (consistently slower)
319    Poor,
320    /// Unacceptable performance (significantly slower)
321    Unacceptable,
322}
323
324/// Accuracy rating categories
325#[derive(Debug, Clone, Serialize, Deserialize)]
326pub enum AccuracyRating {
327    /// Excellent accuracy (machine precision)
328    Excellent,
329    /// Good accuracy (high precision)
330    Good,
331    /// Acceptable accuracy (within tolerance)
332    Acceptable,
333    /// Poor accuracy (noticeable differences)
334    Poor,
335    /// Unacceptable accuracy (significant errors)
336    Unacceptable,
337}
338
339/// Performance trends analysis
340#[derive(Debug, Clone, Serialize, Deserialize)]
341pub struct PerformanceTrends {
342    /// Performance scaling with data size
343    pub scaling_analysis: ScalingAnalysis,
344    /// Performance stability over multiple runs
345    pub stability_analysis: StabilityAnalysis,
346    /// Performance regression detection
347    pub regression_analysis: RegressionAnalysis,
348}
349
350/// Scaling analysis results
351#[derive(Debug, Clone, Serialize, Deserialize)]
352pub struct ScalingAnalysis {
353    /// Scaling factor relative to SciPy
354    pub relative_scaling: f64,
355    /// Complexity assessment
356    pub complexity_assessment: ComplexityAssessment,
357    /// Crossover points where performance changes
358    pub crossover_points: Vec<usize>,
359}
360
361/// Stability analysis results
362#[derive(Debug, Clone, Serialize, Deserialize)]
363pub struct StabilityAnalysis {
364    /// Coefficient of variation for performance
365    pub performance_cv: f64,
366    /// Performance outliers detected
367    pub outliers_detected: usize,
368    /// Stability rating
369    pub stability_rating: StabilityRating,
370}
371
372/// Regression analysis results
373#[derive(Debug, Clone, Serialize, Deserialize)]
374pub struct RegressionAnalysis {
375    /// Performance regressions detected
376    pub regressions_detected: Vec<PerformanceRegression>,
377    /// Accuracy regressions detected
378    pub accuracy_regressions: Vec<AccuracyRegression>,
379    /// Overall regression risk
380    pub regression_risk: RegressionRisk,
381}
382
383/// Complexity assessment
384#[derive(Debug, Clone, Serialize, Deserialize)]
385pub enum ComplexityAssessment {
386    /// Better complexity than SciPy
387    Better,
388    /// Similar complexity to SciPy
389    Similar,
390    /// Worse complexity than SciPy
391    Worse,
392    /// Unknown complexity relationship
393    Unknown,
394}
395
396/// Stability rating
397#[derive(Debug, Clone, Serialize, Deserialize)]
398pub enum StabilityRating {
399    /// Very stable performance
400    VeryStable,
401    /// Stable performance
402    Stable,
403    /// Moderately stable
404    ModeratelyStable,
405    /// Unstable performance
406    Unstable,
407    /// Very unstable performance
408    VeryUnstable,
409}
410
411/// Performance regression
412#[derive(Debug, Clone, Serialize, Deserialize)]
413pub struct PerformanceRegression {
414    /// Function affected
415    pub function_name: String,
416    /// Regression magnitude
417    pub regression_factor: f64,
418    /// Confidence in detection
419    pub confidence: f64,
420    /// Suspected cause
421    pub suspected_cause: String,
422}
423
424/// Accuracy regression
425#[derive(Debug, Clone, Serialize, Deserialize)]
426pub struct AccuracyRegression {
427    /// Function affected
428    pub function_name: String,
429    /// Accuracy degradation
430    pub accuracy_loss: f64,
431    /// Severity assessment
432    pub severity: AccuracyRegressionSeverity,
433}
434
435/// Regression risk assessment
436#[derive(Debug, Clone, Serialize, Deserialize)]
437pub enum RegressionRisk {
438    /// Low risk of regressions
439    Low,
440    /// Medium risk of regressions
441    Medium,
442    /// High risk of regressions
443    High,
444    /// Critical risk of regressions
445    Critical,
446}
447
448/// Accuracy regression severity
449#[derive(Debug, Clone, Serialize, Deserialize)]
450pub enum AccuracyRegressionSeverity {
451    /// Minor accuracy loss
452    Minor,
453    /// Moderate accuracy loss
454    Moderate,
455    /// Major accuracy loss
456    Major,
457    /// Critical accuracy loss
458    Critical,
459}
460
461/// Numerical stability assessment
462#[derive(Debug, Clone, Serialize, Deserialize)]
463pub struct NumericalStabilityAssessment {
464    /// Overall stability rating
465    pub stability_rating: NumericalStabilityRating,
466    /// Functions with stability issues
467    pub unstable_functions: Vec<String>,
468    /// Condition number analysis
469    pub condition_number_analysis: ConditionNumberAnalysis,
470    /// Precision loss analysis
471    pub precision_loss_analysis: PrecisionLossAnalysis,
472}
473
474/// Numerical stability rating
475#[derive(Debug, Clone, Serialize, Deserialize)]
476pub enum NumericalStabilityRating {
477    /// Excellent numerical stability
478    Excellent,
479    /// Good numerical stability
480    Good,
481    /// Acceptable numerical stability
482    Acceptable,
483    /// Poor numerical stability
484    Poor,
485    /// Unacceptable numerical stability
486    Unacceptable,
487}
488
489/// Condition number analysis
490#[derive(Debug, Clone, Serialize, Deserialize)]
491pub struct ConditionNumberAnalysis {
492    /// Functions sensitive to condition number
493    pub sensitive_functions: Vec<String>,
494    /// Condition number thresholds
495    pub thresholds: HashMap<String, f64>,
496    /// Stability recommendations
497    pub recommendations: Vec<String>,
498}
499
500/// Precision loss analysis
501#[derive(Debug, Clone, Serialize, Deserialize)]
502pub struct PrecisionLossAnalysis {
503    /// Average precision loss
504    pub average_loss: f64,
505    /// Maximum precision loss
506    pub max_loss: f64,
507    /// Functions with significant loss
508    pub problematic_functions: Vec<String>,
509}
510
511/// Comparison recommendation
512#[derive(Debug, Clone, Serialize, Deserialize)]
513pub struct ComparisonRecommendation {
514    /// Recommendation priority
515    pub priority: RecommendationPriority,
516    /// Category of recommendation
517    pub category: RecommendationCategory,
518    /// Recommendation description
519    pub description: String,
520    /// Affected functions
521    pub affected_functions: Vec<String>,
522    /// Implementation complexity
523    pub complexity: ImplementationComplexity,
524    /// Expected impact
525    pub expected_impact: ExpectedImpact,
526}
527
528/// Recommendation priority levels
529#[derive(Debug, Clone, Serialize, Deserialize)]
530pub enum RecommendationPriority {
531    /// Critical priority
532    Critical,
533    /// High priority
534    High,
535    /// Medium priority
536    Medium,
537    /// Low priority
538    Low,
539    /// Nice to have
540    NiceToHave,
541}
542
543/// Recommendation categories
544#[derive(Debug, Clone, Serialize, Deserialize)]
545pub enum RecommendationCategory {
546    /// Performance optimization
547    Performance,
548    /// Accuracy improvement
549    Accuracy,
550    /// Memory optimization
551    Memory,
552    /// API compatibility
553    APICompatibility,
554    /// Numerical stability
555    NumericalStability,
556    /// Testing enhancement
557    Testing,
558}
559
560/// Implementation complexity assessment
561#[derive(Debug, Clone, Serialize, Deserialize)]
562pub enum ImplementationComplexity {
563    /// Simple to implement
564    Simple,
565    /// Moderate complexity
566    Moderate,
567    /// Complex implementation
568    Complex,
569    /// Very complex implementation
570    VeryComplex,
571}
572
573/// Expected impact of recommendation
574#[derive(Debug, Clone, Serialize, Deserialize)]
575pub struct ExpectedImpact {
576    /// Performance improvement factor
577    pub performance_improvement: Option<f64>,
578    /// Accuracy improvement
579    pub accuracy_improvement: Option<f64>,
580    /// Memory reduction factor
581    pub memory_reduction: Option<f64>,
582    /// Implementation effort (person-days)
583    pub implementation_effort: f64,
584}
585
586/// Main SciPy comparison framework
587pub struct ScipyBenchmarkComparison {
588    config: ScipyComparisonConfig,
589    temp_dir: String,
590}
591
592impl ScipyBenchmarkComparison {
593    /// Create new SciPy comparison framework
594    pub fn new(config: ScipyComparisonConfig) -> StatsResult<Self> {
595        // Create temporary directory
596        fs::create_dir_all(&config.temp_dir).map_err(|e| {
597            StatsError::ComputationError(format!("Failed to create temp directory: {}", e))
598        })?;
599
600        Ok(Self {
601            temp_dir: config.temp_dir.clone(),
602            config,
603        })
604    }
605
606    /// Create with default configuration
607    pub fn default() -> StatsResult<Self> {
608        Self::new(ScipyComparisonConfig::default())
609    }
610
611    /// Run comprehensive comparison benchmarks
612    pub fn run_comprehensive_comparison(&self) -> StatsResult<ScipyComparisonReport> {
613        let _start_time = Instant::now();
614
615        // Verify SciPy environment
616        let scipy_env = self.verify_scipy_environment()?;
617
618        // Collect system information
619        let system_info = self.collect_system_info();
620
621        // Run function comparisons
622        let mut function_comparisons = Vec::new();
623
624        for function_name in &self.config.functions_to_test {
625            for &datasize in &self.config.testsizes {
626                match self.compare_function(function_name, datasize) {
627                    Ok(comparison) => function_comparisons.push(comparison),
628                    Err(e) => {
629                        function_comparisons.push(FunctionComparison {
630                            function_name: function_name.clone(),
631                            datasize,
632                            performance: PerformanceComparison {
633                                scirs2_time_ns: 0.0,
634                                scipy_time_ns: 0.0,
635                                ratio: 0.0,
636                                significance: PerformanceSignificance::InsufficientData,
637                                confidence_interval: (0.0, 0.0),
638                            },
639                            accuracy: AccuracyComparison {
640                                absolute_difference: 0.0,
641                                relative_difference: 0.0,
642                                max_element_difference: 0.0,
643                                elements_compared: 0,
644                                elements_within_tolerance: 0,
645                                assessment: AccuracyAssessment::Poor,
646                            },
647                            memory: None,
648                            status: ComparisonStatus::Error {
649                                error: e.to_string(),
650                            },
651                            error_details: Some(e.to_string()),
652                        });
653                    }
654                }
655            }
656        }
657
658        // Analyze results
659        let summary = self.generate_summary(&function_comparisons);
660        let performance_analysis = self.analyze_performance(&function_comparisons);
661        let accuracy_analysis = self.analyze_accuracy(&function_comparisons);
662        let recommendations = self.generate_recommendations(
663            &function_comparisons,
664            &performance_analysis,
665            &accuracy_analysis,
666        );
667
668        Ok(ScipyComparisonReport {
669            timestamp: chrono::Utc::now().to_rfc3339(),
670            config: self.config.clone(),
671            system_info,
672            scipy_environment: scipy_env,
673            function_comparisons,
674            summary,
675            performance_analysis,
676            accuracy_analysis,
677            recommendations,
678        })
679    }
680
681    /// Verify SciPy environment is available and compatible
682    fn verify_scipy_environment(&self) -> StatsResult<ScipyEnvironmentInfo> {
683        let script = r#"
684import sys
685import scipy
686import numpy as np
687import json
688
689info = {
690    'python_version': sys.version,
691    'scipy_version': scipy.__version__,
692    'numpy_version': np.__version__,
693    'blas_info': str(np.__config__.show()),
694    'packages': {}
695}
696
697try:
698    import pandas
699    info['packages']['pandas'] = pandas.__version__
700except ImportError:
701    pass
702
703try:
704    import sklearn
705    info['packages']['sklearn'] = sklearn.__version__
706except ImportError:
707    pass
708
709print(json.dumps(info))
710"#;
711
712        let script_path = format!("{}/verify_env.py", self.temp_dir);
713        fs::write(&script_path, script).map_err(|e| {
714            StatsError::ComputationError(format!("Failed to write verification script: {}", e))
715        })?;
716
717        let output = Command::new(&self.config.python_executable)
718            .arg(&script_path)
719            .output()
720            .map_err(|e| {
721                StatsError::ComputationError(format!("Failed to execute Python: {}", e))
722            })?;
723
724        if !output.status.success() {
725            return Err(StatsError::ComputationError(format!(
726                "Python script failed: {}",
727                String::from_utf8_lossy(&output.stderr)
728            )));
729        }
730
731        let output_str = String::from_utf8_lossy(&output.stdout);
732        let info: serde_json::Value = serde_json::from_str(&output_str).map_err(|e| {
733            StatsError::ComputationError(format!("Failed to parse environment info: {}", e))
734        })?;
735
736        Ok(ScipyEnvironmentInfo {
737            python_version: info["python_version"]
738                .as_str()
739                .unwrap_or("unknown")
740                .to_string(),
741            scipy_version: info["scipy_version"]
742                .as_str()
743                .unwrap_or("unknown")
744                .to_string(),
745            numpy_version: info["numpy_version"]
746                .as_str()
747                .unwrap_or("unknown")
748                .to_string(),
749            blas_info: info["blas_info"].as_str().unwrap_or("unknown").to_string(),
750            packages: info["packages"]
751                .as_object()
752                .unwrap_or(&serde_json::Map::new())
753                .iter()
754                .map(|(k, v)| (k.clone(), v.as_str().unwrap_or("unknown").to_string()))
755                .collect(),
756        })
757    }
758
759    /// Collect system information
760    fn collect_system_info(&self) -> SystemInfo {
761        SystemInfo {
762            os: std::env::consts::OS.to_string(),
763            cpu: "Generic CPU".to_string(), // Would use proper CPU detection
764            memory_gb: 8.0,                 // Placeholder
765            rust_version: std::env::var("RUSTC_VERSION").unwrap_or_else(|_| "unknown".to_string()),
766            scirs2_version: std::env::var("CARGO_PKG_VERSION")
767                .unwrap_or_else(|_| "unknown".to_string()),
768        }
769    }
770
771    /// Compare a single function between scirs2-stats and SciPy
772    fn compare_function(
773        &self,
774        function_name: &str,
775        datasize: usize,
776    ) -> StatsResult<FunctionComparison> {
777        // Generate test data
778        let testdata = self.generate_testdata(datasize)?;
779
780        // Benchmark scirs2-stats function
781        let scirs2_result = self.benchmark_scirs2_function(function_name, &testdata)?;
782
783        // Benchmark SciPy function
784        let scipy_result = self.benchmark_scipy_function(function_name, &testdata)?;
785
786        // Compare performance
787        let performance = PerformanceComparison {
788            scirs2_time_ns: scirs2_result.execution_time.as_nanos() as f64,
789            scipy_time_ns: scipy_result.execution_time.as_nanos() as f64,
790            ratio: scirs2_result.execution_time.as_nanos() as f64
791                / scipy_result.execution_time.as_nanos() as f64,
792            significance: PerformanceSignificance::NotSignificant, // Simplified
793            confidence_interval: (0.8, 1.2),                       // Placeholder
794        };
795
796        // Compare accuracy
797        let accuracy = self.compare_accuracy(&scirs2_result.result, &scipy_result.result)?;
798
799        // Determine status
800        let status = self.determine_comparison_status(&performance, &accuracy);
801
802        Ok(FunctionComparison {
803            function_name: function_name.to_string(),
804            datasize,
805            performance,
806            accuracy,
807            memory: None, // Would implement memory comparison
808            status,
809            error_details: None,
810        })
811    }
812
813    /// Generate test data for benchmarking
814    fn generate_testdata(&self, size: usize) -> StatsResult<TestData> {
815        use scirs2_core::random::{Distribution, Normal};
816
817        let mut rng = scirs2_core::random::thread_rng();
818        let normal = Normal::new(0.0, 1.0).map_err(|e| {
819            StatsError::ComputationError(format!("Failed to create normal distribution: {}", e))
820        })?;
821
822        let data: Vec<f64> = (0..size).map(|_| normal.sample(&mut rng)).collect();
823
824        Ok(TestData {
825            primary: Array1::from_vec(data.clone()),
826            secondary: Array1::from_vec(
827                data.iter()
828                    .map(|x| x + 0.1 * normal.sample(&mut rng))
829                    .collect(),
830            ),
831            matrix: Array2::from_shape_fn((size.min(100), size.min(100)), |(i, j)| {
832                normal.sample(&mut rng) + 0.1 * (i + j) as f64
833            }),
834        })
835    }
836
837    /// Benchmark scirs2-stats function
838    fn benchmark_scirs2_function(
839        &self,
840        function_name: &str,
841        testdata: &TestData,
842    ) -> StatsResult<BenchmarkResult> {
843        let start_time = Instant::now();
844
845        let result = match function_name {
846            "mean" => {
847                vec![crate::descriptive::mean(&testdata.primary.view())?]
848            }
849            "std" => {
850                vec![crate::descriptive::std(&testdata.primary.view(), 1, None)?]
851            }
852            "var" => {
853                vec![crate::descriptive::var(&testdata.primary.view(), 1, None)?]
854            }
855            "skew" => {
856                vec![crate::descriptive::skew(
857                    &testdata.primary.view(),
858                    false,
859                    None,
860                )?]
861            }
862            "kurtosis" => {
863                vec![crate::descriptive::kurtosis(
864                    &testdata.primary.view(),
865                    true,
866                    false,
867                    None,
868                )?]
869            }
870            "pearsonr" => {
871                let corr = crate::correlation::pearson_r(
872                    &testdata.primary.view(),
873                    &testdata.secondary.view(),
874                )?;
875                vec![corr]
876            }
877            "spearmanr" => {
878                let corr = crate::correlation::spearman_r(
879                    &testdata.primary.view(),
880                    &testdata.secondary.view(),
881                )?;
882                vec![corr]
883            }
884            "ttest_1samp" => {
885                let result = crate::tests::ttest::ttest_1samp(
886                    &testdata.primary.view(),
887                    0.0,
888                    crate::tests::ttest::Alternative::TwoSided,
889                    "propagate",
890                )?;
891                vec![result.statistic, result.pvalue]
892            }
893            _ => {
894                return Err(StatsError::NotImplemented(format!(
895                    "Function {} not implemented in benchmark",
896                    function_name
897                )));
898            }
899        };
900
901        let execution_time = start_time.elapsed();
902
903        Ok(BenchmarkResult {
904            result,
905            execution_time,
906        })
907    }
908
909    /// Benchmark SciPy function
910    fn benchmark_scipy_function(
911        &self,
912        function_name: &str,
913        testdata: &TestData,
914    ) -> StatsResult<BenchmarkResult> {
915        let script = self.generate_scipy_script(function_name, testdata)?;
916        let script_path = format!("{}/scipy_benchmark_{}.py", self.temp_dir, function_name);
917
918        fs::write(&script_path, script).map_err(|e| {
919            StatsError::ComputationError(format!("Failed to write SciPy script: {}", e))
920        })?;
921
922        let output = Command::new(&self.config.python_executable)
923            .arg(&script_path)
924            .output()
925            .map_err(|e| {
926                StatsError::ComputationError(format!("Failed to execute SciPy script: {}", e))
927            })?;
928
929        if !output.status.success() {
930            return Err(StatsError::ComputationError(format!(
931                "SciPy script failed: {}",
932                String::from_utf8_lossy(&output.stderr)
933            )));
934        }
935
936        let output_str = String::from_utf8_lossy(&output.stdout);
937        let result: serde_json::Value = serde_json::from_str(&output_str).map_err(|e| {
938            StatsError::ComputationError(format!("Failed to parse SciPy result: {}", e))
939        })?;
940
941        let execution_time =
942            Duration::from_secs_f64(result["execution_time"].as_f64().unwrap_or(0.0));
943
944        let result_values: Vec<f64> = result["result"]
945            .as_array()
946            .unwrap_or(&Vec::new())
947            .iter()
948            .filter_map(|v| v.as_f64())
949            .collect();
950
951        Ok(BenchmarkResult {
952            result: result_values,
953            execution_time,
954        })
955    }
956
957    /// Generate SciPy benchmark script
958    fn generate_scipy_script(
959        &self,
960        function_name: &str,
961        testdata: &TestData,
962    ) -> StatsResult<String> {
963        let data_primary: Vec<String> = testdata.primary.iter().map(|x| x.to_string()).collect();
964        let data_secondary: Vec<String> =
965            testdata.secondary.iter().map(|x| x.to_string()).collect();
966
967        let script = match function_name {
968            "mean" => {
969                format!(
970                    r#"
971import numpy as np
972import time
973import json
974
975data = np.array([{}])
976
977start_time = time.perf_counter()
978result = np.mean(data)
979execution_time = time.perf_counter() - start_time
980
981output = {{
982    'result': [float(result)],
983    'execution_time': execution_time
984}}
985
986print(json.dumps(output))
987"#,
988                    data_primary.join(", ")
989                )
990            }
991            "std" => {
992                format!(
993                    r#"
994import numpy as np
995import time
996import json
997
998data = np.array([{}])
999
1000start_time = time.perf_counter()
1001result = np.std(data, ddof=1)
1002execution_time = time.perf_counter() - start_time
1003
1004output = {{
1005    'result': [float(result)],
1006    'execution_time': execution_time
1007}}
1008
1009print(json.dumps(output))
1010"#,
1011                    data_primary.join(", ")
1012                )
1013            }
1014            "pearsonr" => {
1015                format!(
1016                    r#"
1017import numpy as np
1018import scipy.stats
1019import time
1020import json
1021
1022data1 = np.array([{}])
1023data2 = np.array([{}])
1024
1025start_time = time.perf_counter()
1026corr, p_value = scipy.stats.pearsonr(data1, data2)
1027execution_time = time.perf_counter() - start_time
1028
1029output = {{
1030    'result': [float(corr)],
1031    'execution_time': execution_time
1032}}
1033
1034print(json.dumps(output))
1035"#,
1036                    data_primary.join(", "),
1037                    data_secondary.join(", ")
1038                )
1039            }
1040            _ => {
1041                return Err(StatsError::NotImplemented(format!(
1042                    "SciPy script generation not implemented for {}",
1043                    function_name
1044                )));
1045            }
1046        };
1047
1048        Ok(script)
1049    }
1050
1051    /// Compare accuracy between results
1052    fn compare_accuracy(
1053        &self,
1054        scirs2_result: &[f64],
1055        scipy_result: &[f64],
1056    ) -> StatsResult<AccuracyComparison> {
1057        if scirs2_result.len() != scipy_result.len() {
1058            return Ok(AccuracyComparison {
1059                absolute_difference: f64::INFINITY,
1060                relative_difference: f64::INFINITY,
1061                max_element_difference: f64::INFINITY,
1062                elements_compared: 0,
1063                elements_within_tolerance: 0,
1064                assessment: AccuracyAssessment::Unacceptable,
1065            });
1066        }
1067
1068        let mut abs_diffs = Vec::new();
1069        let mut rel_diffs = Vec::new();
1070        let mut within_tolerance = 0;
1071
1072        for (s, r) in scirs2_result.iter().zip(scipy_result.iter()) {
1073            let abs_diff = (s - r).abs();
1074            let rel_diff = if r.abs() > 1e-10 {
1075                abs_diff / r.abs()
1076            } else {
1077                abs_diff
1078            };
1079
1080            abs_diffs.push(abs_diff);
1081            rel_diffs.push(rel_diff);
1082
1083            if abs_diff < self.config.accuracy_tolerance
1084                || rel_diff < self.config.accuracy_tolerance
1085            {
1086                within_tolerance += 1;
1087            }
1088        }
1089
1090        let avg_abs_diff = abs_diffs.iter().sum::<f64>() / abs_diffs.len() as f64;
1091        let avg_rel_diff = rel_diffs.iter().sum::<f64>() / rel_diffs.len() as f64;
1092        let max_element_diff = abs_diffs.iter().fold(0.0f64, |acc, &x| acc.max(x));
1093
1094        let assessment = if within_tolerance == scirs2_result.len() {
1095            if avg_rel_diff < 1e-14 {
1096                AccuracyAssessment::Excellent
1097            } else if avg_rel_diff < 1e-10 {
1098                AccuracyAssessment::Good
1099            } else {
1100                AccuracyAssessment::Acceptable
1101            }
1102        } else if within_tolerance as f64 / scirs2_result.len() as f64 > 0.9 {
1103            AccuracyAssessment::Acceptable
1104        } else if within_tolerance as f64 / scirs2_result.len() as f64 > 0.5 {
1105            AccuracyAssessment::Poor
1106        } else {
1107            AccuracyAssessment::Unacceptable
1108        };
1109
1110        Ok(AccuracyComparison {
1111            absolute_difference: avg_abs_diff,
1112            relative_difference: avg_rel_diff,
1113            max_element_difference: max_element_diff,
1114            elements_compared: scirs2_result.len(),
1115            elements_within_tolerance: within_tolerance,
1116            assessment,
1117        })
1118    }
1119
1120    /// Determine comparison status
1121    fn determine_comparison_status(
1122        &self,
1123        performance: &PerformanceComparison,
1124        accuracy: &AccuracyComparison,
1125    ) -> ComparisonStatus {
1126        let mut warnings = Vec::new();
1127
1128        // Check accuracy
1129        if matches!(
1130            accuracy.assessment,
1131            AccuracyAssessment::Unacceptable | AccuracyAssessment::Poor
1132        ) {
1133            return ComparisonStatus::FailedAccuracy {
1134                details: format!("Relative difference: {:.2e}", accuracy.relative_difference),
1135            };
1136        }
1137
1138        // Check performance
1139        if performance.ratio > self.config.performance_tolerance {
1140            return ComparisonStatus::FailedPerformance {
1141                details: format!(
1142                    "Performance ratio: {:.2} (limit: {:.2})",
1143                    performance.ratio, self.config.performance_tolerance
1144                ),
1145            };
1146        }
1147
1148        // Check for warnings
1149        if matches!(accuracy.assessment, AccuracyAssessment::Acceptable) {
1150            warnings.push("Accuracy is only acceptable".to_string());
1151        }
1152
1153        if performance.ratio > 1.5 {
1154            warnings.push(format!(
1155                "Performance is {:.1}x slower than SciPy",
1156                performance.ratio
1157            ));
1158        }
1159
1160        if warnings.is_empty() {
1161            ComparisonStatus::Passed
1162        } else {
1163            ComparisonStatus::PassedWithWarnings { warnings }
1164        }
1165    }
1166
1167    /// Generate comparison summary
1168    fn generate_summary(&self, comparisons: &[FunctionComparison]) -> ComparisonSummary {
1169        let total_tests = comparisons.len();
1170        let tests_passed = comparisons
1171            .iter()
1172            .filter(|c| matches!(c.status, ComparisonStatus::Passed))
1173            .count();
1174        let tests_with_warnings = comparisons
1175            .iter()
1176            .filter(|c| matches!(c.status, ComparisonStatus::PassedWithWarnings { .. }))
1177            .count();
1178        let tests_failed = total_tests - tests_passed - tests_with_warnings;
1179
1180        let pass_rate = if total_tests > 0 {
1181            (tests_passed + tests_with_warnings) as f64 / total_tests as f64
1182        } else {
1183            0.0
1184        };
1185
1186        let performance_issues: Vec<String> = comparisons
1187            .iter()
1188            .filter(|c| matches!(c.status, ComparisonStatus::FailedPerformance { .. }))
1189            .map(|c| c.function_name.clone())
1190            .collect();
1191
1192        let accuracy_issues: Vec<String> = comparisons
1193            .iter()
1194            .filter(|c| matches!(c.status, ComparisonStatus::FailedAccuracy { .. }))
1195            .map(|c| c.function_name.clone())
1196            .collect();
1197
1198        let avg_performance_ratio =
1199            comparisons.iter().map(|c| c.performance.ratio).sum::<f64>() / comparisons.len() as f64;
1200
1201        let performance_rating = if avg_performance_ratio < 0.8 {
1202            PerformanceRating::Excellent
1203        } else if avg_performance_ratio < 1.2 {
1204            PerformanceRating::Good
1205        } else if avg_performance_ratio < 2.0 {
1206            PerformanceRating::Acceptable
1207        } else if avg_performance_ratio < 5.0 {
1208            PerformanceRating::Poor
1209        } else {
1210            PerformanceRating::Unacceptable
1211        };
1212
1213        let avg_relative_diff = comparisons
1214            .iter()
1215            .map(|c| c.accuracy.relative_difference)
1216            .sum::<f64>()
1217            / comparisons.len() as f64;
1218
1219        let accuracy_rating = if avg_relative_diff < 1e-14 {
1220            AccuracyRating::Excellent
1221        } else if avg_relative_diff < 1e-10 {
1222            AccuracyRating::Good
1223        } else if avg_relative_diff < 1e-6 {
1224            AccuracyRating::Acceptable
1225        } else if avg_relative_diff < 1e-3 {
1226            AccuracyRating::Poor
1227        } else {
1228            AccuracyRating::Unacceptable
1229        };
1230
1231        ComparisonSummary {
1232            total_tests,
1233            tests_passed,
1234            tests_with_warnings,
1235            tests_failed,
1236            pass_rate,
1237            performance_issues,
1238            accuracy_issues,
1239            performance_rating,
1240            accuracy_rating,
1241        }
1242    }
1243
1244    /// Analyze performance across all comparisons
1245    fn analyze_performance(&self, comparisons: &[FunctionComparison]) -> PerformanceAnalysis {
1246        let ratios: Vec<f64> = comparisons.iter().map(|c| c.performance.ratio).collect();
1247
1248        let average_ratio = ratios.iter().sum::<f64>() / ratios.len() as f64;
1249        let variance = ratios
1250            .iter()
1251            .map(|r| (r - average_ratio).powi(2))
1252            .sum::<f64>()
1253            / ratios.len() as f64;
1254        let ratio_std_dev = variance.sqrt();
1255
1256        let faster_functions: Vec<(String, f64)> = comparisons
1257            .iter()
1258            .filter(|c| c.performance.ratio < 1.0)
1259            .map(|c| (c.function_name.clone(), c.performance.ratio))
1260            .collect();
1261
1262        let slower_functions: Vec<(String, f64)> = comparisons
1263            .iter()
1264            .filter(|c| c.performance.ratio > 1.0)
1265            .map(|c| (c.function_name.clone(), c.performance.ratio))
1266            .collect();
1267
1268        let performance_bysize = HashMap::new(); // Would implement proper analysis
1269
1270        let trends = PerformanceTrends {
1271            scaling_analysis: ScalingAnalysis {
1272                relative_scaling: 1.0, // Placeholder
1273                complexity_assessment: ComplexityAssessment::Similar,
1274                crossover_points: Vec::new(),
1275            },
1276            stability_analysis: StabilityAnalysis {
1277                performance_cv: ratio_std_dev / average_ratio,
1278                outliers_detected: 0, // Would implement outlier detection
1279                stability_rating: StabilityRating::Stable,
1280            },
1281            regression_analysis: RegressionAnalysis {
1282                regressions_detected: Vec::new(),
1283                accuracy_regressions: Vec::new(),
1284                regression_risk: RegressionRisk::Low,
1285            },
1286        };
1287
1288        PerformanceAnalysis {
1289            average_ratio,
1290            ratio_std_dev,
1291            faster_functions,
1292            slower_functions,
1293            performance_bysize,
1294            trends,
1295        }
1296    }
1297
1298    /// Analyze accuracy across all comparisons
1299    fn analyze_accuracy(&self, comparisons: &[FunctionComparison]) -> AccuracyAnalysis {
1300        let relative_diffs: Vec<f64> = comparisons
1301            .iter()
1302            .map(|c| c.accuracy.relative_difference)
1303            .collect();
1304
1305        let average_relative_diff =
1306            relative_diffs.iter().sum::<f64>() / relative_diffs.len() as f64;
1307        let max_relative_diff = relative_diffs.iter().fold(0.0f64, |acc, &x| acc.max(x));
1308
1309        let problematic_functions: Vec<(String, f64)> = comparisons
1310            .iter()
1311            .filter(|c| c.accuracy.relative_difference > self.config.accuracy_tolerance)
1312            .map(|c| (c.function_name.clone(), c.accuracy.relative_difference))
1313            .collect();
1314
1315        let accuracy_bysize = HashMap::new(); // Would implement proper analysis
1316
1317        let stability_assessment = NumericalStabilityAssessment {
1318            stability_rating: if max_relative_diff < 1e-10 {
1319                NumericalStabilityRating::Excellent
1320            } else if max_relative_diff < 1e-6 {
1321                NumericalStabilityRating::Good
1322            } else {
1323                NumericalStabilityRating::Acceptable
1324            },
1325            unstable_functions: problematic_functions
1326                .iter()
1327                .map(|(name, _)| name.clone())
1328                .collect(),
1329            condition_number_analysis: ConditionNumberAnalysis {
1330                sensitive_functions: Vec::new(),
1331                thresholds: HashMap::new(),
1332                recommendations: Vec::new(),
1333            },
1334            precision_loss_analysis: PrecisionLossAnalysis {
1335                average_loss: average_relative_diff,
1336                max_loss: max_relative_diff,
1337                problematic_functions: problematic_functions
1338                    .iter()
1339                    .map(|(name, _)| name.clone())
1340                    .collect(),
1341            },
1342        };
1343
1344        AccuracyAnalysis {
1345            average_relative_diff,
1346            max_relative_diff,
1347            problematic_functions,
1348            accuracy_bysize,
1349            stability_assessment,
1350        }
1351    }
1352
1353    /// Generate recommendations based on analysis
1354    fn generate_recommendations(
1355        &self,
1356        comparisons: &[FunctionComparison],
1357        performance_analysis: &PerformanceAnalysis,
1358        accuracy_analysis: &AccuracyAnalysis,
1359    ) -> Vec<ComparisonRecommendation> {
1360        let mut recommendations = Vec::new();
1361
1362        // Performance recommendations
1363        if performance_analysis.average_ratio > 2.0 {
1364            recommendations.push(ComparisonRecommendation {
1365                priority: RecommendationPriority::High,
1366                category: RecommendationCategory::Performance,
1367                description: "Overall performance is significantly slower than SciPy. Consider SIMD optimizations and algorithm improvements.".to_string(),
1368                affected_functions: performance_analysis.slower_functions.iter().map(|(name, _)| name.clone()).collect(),
1369                complexity: ImplementationComplexity::Moderate,
1370                expected_impact: ExpectedImpact {
1371                    performance_improvement: Some(2.0),
1372                    accuracy_improvement: None,
1373                    memory_reduction: None,
1374                    implementation_effort: 20.0,
1375                },
1376            });
1377        }
1378
1379        // Accuracy recommendations
1380        if accuracy_analysis.max_relative_diff > 1e-6 {
1381            recommendations.push(ComparisonRecommendation {
1382                priority: RecommendationPriority::Critical,
1383                category: RecommendationCategory::Accuracy,
1384                description: "Some functions have significant accuracy differences compared to SciPy. Review numerical algorithms.".to_string(),
1385                affected_functions: accuracy_analysis.problematic_functions.iter().map(|(name, _)| name.clone()).collect(),
1386                complexity: ImplementationComplexity::Complex,
1387                expected_impact: ExpectedImpact {
1388                    performance_improvement: None,
1389                    accuracy_improvement: Some(10.0),
1390                    memory_reduction: None,
1391                    implementation_effort: 15.0,
1392                },
1393            });
1394        }
1395
1396        // Function-specific recommendations
1397        for comparison in comparisons {
1398            if comparison.performance.ratio > 5.0 {
1399                recommendations.push(ComparisonRecommendation {
1400                    priority: RecommendationPriority::High,
1401                    category: RecommendationCategory::Performance,
1402                    description: format!(
1403                        "Function '{}' is significantly slower than SciPy",
1404                        comparison.function_name
1405                    ),
1406                    affected_functions: vec![comparison.function_name.clone()],
1407                    complexity: ImplementationComplexity::Moderate,
1408                    expected_impact: ExpectedImpact {
1409                        performance_improvement: Some(3.0),
1410                        accuracy_improvement: None,
1411                        memory_reduction: None,
1412                        implementation_effort: 5.0,
1413                    },
1414                });
1415            }
1416        }
1417
1418        recommendations
1419    }
1420}
1421
1422/// Test data structure for benchmarking
1423#[derive(Debug, Clone)]
1424struct TestData {
1425    primary: Array1<f64>,
1426    secondary: Array1<f64>,
1427    #[allow(dead_code)]
1428    matrix: Array2<f64>,
1429}
1430
1431/// Benchmark result structure
1432#[derive(Debug, Clone)]
1433struct BenchmarkResult {
1434    result: Vec<f64>,
1435    execution_time: Duration,
1436}
1437
1438/// Convenience function to run SciPy comparison
1439#[allow(dead_code)]
1440pub fn run_scipy_comparison() -> StatsResult<ScipyComparisonReport> {
1441    let comparison = ScipyBenchmarkComparison::default()?;
1442    comparison.run_comprehensive_comparison()
1443}
1444
1445/// Run comparison for specific functions
1446#[allow(dead_code)]
1447pub fn run_function_comparison(functions: Vec<String>) -> StatsResult<ScipyComparisonReport> {
1448    let mut config = ScipyComparisonConfig::default();
1449    config.functions_to_test = functions;
1450
1451    let comparison = ScipyBenchmarkComparison::new(config)?;
1452    comparison.run_comprehensive_comparison()
1453}
1454
1455#[cfg(test)]
1456mod tests {
1457    use super::*;
1458
1459    #[test]
1460    fn test_scipy_comparison_config() {
1461        let config = ScipyComparisonConfig::default();
1462        assert!(!config.functions_to_test.is_empty());
1463        assert!(config.accuracy_tolerance > 0.0);
1464        assert!(config.performance_tolerance > 1.0);
1465    }
1466
1467    #[test]
1468    fn test_testdata_generation() {
1469        let comparison = ScipyBenchmarkComparison::default().expect("Operation failed");
1470        let testdata = comparison.generate_testdata(100).expect("Operation failed");
1471
1472        assert_eq!(testdata.primary.len(), 100);
1473        assert_eq!(testdata.secondary.len(), 100);
1474        assert_eq!(testdata.matrix.nrows(), 100);
1475    }
1476
1477    #[test]
1478    fn test_accuracy_comparison() {
1479        let comparison = ScipyBenchmarkComparison::default().expect("Operation failed");
1480
1481        // Use very small differences well within tolerance (< 1e-12)
1482        let scirs2_result = vec![1.0, 2.0, 3.0];
1483        let scipy_result = vec![1.000000000001, 2.000000000001, 3.000000000001];
1484
1485        let accuracy = comparison
1486            .compare_accuracy(&scirs2_result, &scipy_result)
1487            .expect("Operation failed");
1488        assert!(matches!(
1489            accuracy.assessment,
1490            AccuracyAssessment::Excellent | AccuracyAssessment::Good
1491        ));
1492    }
1493
1494    #[test]
1495    fn test_performance_comparison() {
1496        let performance = PerformanceComparison {
1497            scirs2_time_ns: 1000.0,
1498            scipy_time_ns: 800.0,
1499            ratio: 1.25,
1500            significance: PerformanceSignificance::NotSignificant,
1501            confidence_interval: (1.0, 1.5),
1502        };
1503
1504        assert!(performance.ratio > 1.0); // scirs2 slower
1505    }
1506
1507    #[test]
1508    fn test_recommendation_generation() {
1509        let config = ScipyComparisonConfig::default();
1510        assert!(config.performance_tolerance >= 1.0);
1511        assert!(config.accuracy_tolerance > 0.0);
1512    }
1513}