scirs2_stats/
scipy_benchmark_comparison.rs

1//! SciPy benchmark comparison framework for scirs2-stats v1.0.0
2//!
3//! This module provides comprehensive benchmarking against SciPy to validate
4//! performance, accuracy, and API compatibility. It includes automated test
5//! generation, statistical validation, and performance regression detection.
6
7use crate::error::{StatsError, StatsResult};
8use scirs2_core::ndarray::{Array1, Array2};
9use serde::{Deserialize, Serialize};
10use std::collections::HashMap;
11use std::fs;
12use std::process::Command;
13use std::time::{Duration, Instant};
14
15/// Configuration for SciPy comparison benchmarks
16#[derive(Debug, Clone, Serialize, Deserialize)]
17pub struct ScipyComparisonConfig {
18    /// Python executable path
19    pub python_executable: String,
20    /// SciPy version requirement
21    pub scipy_version: Option<String>,
22    /// NumPy version requirement
23    pub numpy_version: Option<String>,
24    /// Temporary directory for test scripts
25    pub temp_dir: String,
26    /// Accuracy tolerance for numerical comparisons
27    pub accuracy_tolerance: f64,
28    /// Performance tolerance (ratio to SciPy)
29    pub performance_tolerance: f64,
30    /// Number of warmup iterations
31    pub warmup_iterations: usize,
32    /// Number of measurement iterations
33    pub measurement_iterations: usize,
34    /// Enable detailed accuracy analysis
35    pub detailed_accuracy: bool,
36    /// Enable memory usage comparison
37    pub compare_memory: bool,
38    /// Test data sizes
39    pub testsizes: Vec<usize>,
40    /// Functions to benchmark
41    pub functions_to_test: Vec<String>,
42}
43
44impl Default for ScipyComparisonConfig {
45    fn default() -> Self {
46        Self {
47            python_executable: "python3".to_string(),
48            scipy_version: Some(">=1.9.0".to_string()),
49            numpy_version: Some(">=1.21.0".to_string()),
50            temp_dir: "/tmp/scirs2_benchmarks".to_string(),
51            accuracy_tolerance: 1e-10,
52            performance_tolerance: 2.0, // Allow 2x slower than SciPy
53            warmup_iterations: 10,
54            measurement_iterations: 100,
55            detailed_accuracy: true,
56            compare_memory: true,
57            testsizes: vec![100, 1000, 10000, 100000],
58            functions_to_test: vec![
59                "mean".to_string(),
60                "std".to_string(),
61                "var".to_string(),
62                "skew".to_string(),
63                "kurtosis".to_string(),
64                "pearsonr".to_string(),
65                "spearmanr".to_string(),
66                "ttest_ind".to_string(),
67                "ttest_1samp".to_string(),
68                "norm_pdf".to_string(),
69                "norm_cdf".to_string(),
70            ],
71        }
72    }
73}
74
75/// Results of SciPy comparison benchmarking
76#[derive(Debug, Clone, Serialize, Deserialize)]
77pub struct ScipyComparisonReport {
78    /// Timestamp of the comparison
79    pub timestamp: String,
80    /// Configuration used
81    pub config: ScipyComparisonConfig,
82    /// System information
83    pub system_info: SystemInfo,
84    /// SciPy environment info
85    pub scipy_environment: ScipyEnvironmentInfo,
86    /// Individual function comparison results
87    pub function_comparisons: Vec<FunctionComparison>,
88    /// Overall summary statistics
89    pub summary: ComparisonSummary,
90    /// Performance analysis
91    pub performance_analysis: PerformanceAnalysis,
92    /// Accuracy analysis
93    pub accuracy_analysis: AccuracyAnalysis,
94    /// Recommendations
95    pub recommendations: Vec<ComparisonRecommendation>,
96}
97
98/// System information for comparison context
99#[derive(Debug, Clone, Serialize, Deserialize)]
100pub struct SystemInfo {
101    /// Operating system
102    pub os: String,
103    /// CPU information
104    pub cpu: String,
105    /// Memory information
106    pub memory_gb: f64,
107    /// Rust version
108    pub rust_version: String,
109    /// scirs2-stats version
110    pub scirs2_version: String,
111}
112
113/// SciPy environment information
114#[derive(Debug, Clone, Serialize, Deserialize)]
115pub struct ScipyEnvironmentInfo {
116    /// Python version
117    pub python_version: String,
118    /// SciPy version
119    pub scipy_version: String,
120    /// NumPy version
121    pub numpy_version: String,
122    /// BLAS/LAPACK information
123    pub blas_info: String,
124    /// Available Python packages
125    pub packages: HashMap<String, String>,
126}
127
128/// Comparison results for a single function
129#[derive(Debug, Clone, Serialize, Deserialize)]
130pub struct FunctionComparison {
131    /// Function name
132    pub function_name: String,
133    /// Test data size
134    pub datasize: usize,
135    /// Performance comparison
136    pub performance: PerformanceComparison,
137    /// Accuracy comparison
138    pub accuracy: AccuracyComparison,
139    /// Memory usage comparison
140    pub memory: Option<MemoryComparison>,
141    /// Test status
142    pub status: ComparisonStatus,
143    /// Error details if failed
144    pub error_details: Option<String>,
145}
146
147/// Performance comparison between scirs2-stats and SciPy
148#[derive(Debug, Clone, Serialize, Deserialize)]
149pub struct PerformanceComparison {
150    /// scirs2-stats execution time (nanoseconds)
151    pub scirs2_time_ns: f64,
152    /// SciPy execution time (nanoseconds)
153    pub scipy_time_ns: f64,
154    /// Performance ratio (scirs2/scipy)
155    pub ratio: f64,
156    /// Statistical significance of difference
157    pub significance: PerformanceSignificance,
158    /// Confidence interval for ratio
159    pub confidence_interval: (f64, f64),
160}
161
162/// Accuracy comparison between implementations
163#[derive(Debug, Clone, Serialize, Deserialize)]
164pub struct AccuracyComparison {
165    /// Absolute difference
166    pub absolute_difference: f64,
167    /// Relative difference
168    pub relative_difference: f64,
169    /// Maximum element-wise difference
170    pub max_element_difference: f64,
171    /// Number of elements compared
172    pub elements_compared: usize,
173    /// Elements within tolerance
174    pub elements_within_tolerance: usize,
175    /// Accuracy assessment
176    pub assessment: AccuracyAssessment,
177}
178
179/// Memory usage comparison
180#[derive(Debug, Clone, Serialize, Deserialize)]
181pub struct MemoryComparison {
182    /// scirs2-stats memory usage (bytes)
183    pub scirs2_memory: usize,
184    /// SciPy memory usage (bytes)
185    pub scipy_memory: usize,
186    /// Memory ratio (scirs2/scipy)
187    pub ratio: f64,
188    /// Memory efficiency assessment
189    pub assessment: MemoryEfficiencyAssessment,
190}
191
192/// Status of comparison test
193#[derive(Debug, Clone, Serialize, Deserialize)]
194pub enum ComparisonStatus {
195    /// Test passed all checks
196    Passed,
197    /// Test passed with warnings
198    PassedWithWarnings { warnings: Vec<String> },
199    /// Test failed accuracy requirements
200    FailedAccuracy { details: String },
201    /// Test failed performance requirements
202    FailedPerformance { details: String },
203    /// Test encountered execution error
204    Error { error: String },
205    /// Test was skipped
206    Skipped { reason: String },
207}
208
209/// Performance significance assessment
210#[derive(Debug, Clone, Serialize, Deserialize)]
211pub enum PerformanceSignificance {
212    /// No significant difference
213    NotSignificant,
214    /// scirs2-stats significantly faster
215    ScirsFaster { confidence: f64 },
216    /// SciPy significantly faster
217    ScipyFaster { confidence: f64 },
218    /// Insufficient data for assessment
219    InsufficientData,
220}
221
222/// Accuracy assessment categories
223#[derive(Debug, Clone, Serialize, Deserialize)]
224pub enum AccuracyAssessment {
225    /// Excellent accuracy (within machine precision)
226    Excellent,
227    /// Good accuracy (within specified tolerance)
228    Good,
229    /// Acceptable accuracy (small differences)
230    Acceptable,
231    /// Poor accuracy (significant differences)
232    Poor,
233    /// Unacceptable accuracy (large differences)
234    Unacceptable,
235}
236
237/// Memory efficiency assessment
238#[derive(Debug, Clone, Serialize, Deserialize)]
239pub enum MemoryEfficiencyAssessment {
240    /// More memory efficient than SciPy
241    MoreEfficient,
242    /// Similar memory usage to SciPy
243    Similar,
244    /// Less memory efficient than SciPy
245    LessEfficient,
246    /// Significantly less memory efficient
247    MuchLessEfficient,
248}
249
250/// Overall comparison summary
251#[derive(Debug, Clone, Serialize, Deserialize)]
252pub struct ComparisonSummary {
253    /// Total tests run
254    pub total_tests: usize,
255    /// Tests passed
256    pub tests_passed: usize,
257    /// Tests with warnings
258    pub tests_with_warnings: usize,
259    /// Tests failed
260    pub tests_failed: usize,
261    /// Overall pass rate
262    pub pass_rate: f64,
263    /// Functions with performance issues
264    pub performance_issues: Vec<String>,
265    /// Functions with accuracy issues
266    pub accuracy_issues: Vec<String>,
267    /// Overall performance rating
268    pub performance_rating: PerformanceRating,
269    /// Overall accuracy rating
270    pub accuracy_rating: AccuracyRating,
271}
272
273/// Performance analysis across all tests
274#[derive(Debug, Clone, Serialize, Deserialize)]
275pub struct PerformanceAnalysis {
276    /// Average performance ratio
277    pub average_ratio: f64,
278    /// Performance ratio standard deviation
279    pub ratio_std_dev: f64,
280    /// Functions faster than SciPy
281    pub faster_functions: Vec<(String, f64)>,
282    /// Functions slower than SciPy
283    pub slower_functions: Vec<(String, f64)>,
284    /// Performance by data size
285    pub performance_bysize: HashMap<usize, f64>,
286    /// Performance trends
287    pub trends: PerformanceTrends,
288}
289
290/// Accuracy analysis across all tests
291#[derive(Debug, Clone, Serialize, Deserialize)]
292pub struct AccuracyAnalysis {
293    /// Average relative difference
294    pub average_relative_diff: f64,
295    /// Maximum relative difference
296    pub max_relative_diff: f64,
297    /// Functions with accuracy issues
298    pub problematic_functions: Vec<(String, f64)>,
299    /// Accuracy by data size
300    pub accuracy_bysize: HashMap<usize, f64>,
301    /// Numerical stability assessment
302    pub stability_assessment: NumericalStabilityAssessment,
303}
304
305/// Performance rating categories
306#[derive(Debug, Clone, Serialize, Deserialize)]
307pub enum PerformanceRating {
308    /// Excellent performance (consistently faster)
309    Excellent,
310    /// Good performance (mostly competitive)
311    Good,
312    /// Acceptable performance (within tolerance)
313    Acceptable,
314    /// Poor performance (consistently slower)
315    Poor,
316    /// Unacceptable performance (significantly slower)
317    Unacceptable,
318}
319
320/// Accuracy rating categories
321#[derive(Debug, Clone, Serialize, Deserialize)]
322pub enum AccuracyRating {
323    /// Excellent accuracy (machine precision)
324    Excellent,
325    /// Good accuracy (high precision)
326    Good,
327    /// Acceptable accuracy (within tolerance)
328    Acceptable,
329    /// Poor accuracy (noticeable differences)
330    Poor,
331    /// Unacceptable accuracy (significant errors)
332    Unacceptable,
333}
334
335/// Performance trends analysis
336#[derive(Debug, Clone, Serialize, Deserialize)]
337pub struct PerformanceTrends {
338    /// Performance scaling with data size
339    pub scaling_analysis: ScalingAnalysis,
340    /// Performance stability over multiple runs
341    pub stability_analysis: StabilityAnalysis,
342    /// Performance regression detection
343    pub regression_analysis: RegressionAnalysis,
344}
345
346/// Scaling analysis results
347#[derive(Debug, Clone, Serialize, Deserialize)]
348pub struct ScalingAnalysis {
349    /// Scaling factor relative to SciPy
350    pub relative_scaling: f64,
351    /// Complexity assessment
352    pub complexity_assessment: ComplexityAssessment,
353    /// Crossover points where performance changes
354    pub crossover_points: Vec<usize>,
355}
356
357/// Stability analysis results
358#[derive(Debug, Clone, Serialize, Deserialize)]
359pub struct StabilityAnalysis {
360    /// Coefficient of variation for performance
361    pub performance_cv: f64,
362    /// Performance outliers detected
363    pub outliers_detected: usize,
364    /// Stability rating
365    pub stability_rating: StabilityRating,
366}
367
368/// Regression analysis results
369#[derive(Debug, Clone, Serialize, Deserialize)]
370pub struct RegressionAnalysis {
371    /// Performance regressions detected
372    pub regressions_detected: Vec<PerformanceRegression>,
373    /// Accuracy regressions detected
374    pub accuracy_regressions: Vec<AccuracyRegression>,
375    /// Overall regression risk
376    pub regression_risk: RegressionRisk,
377}
378
379/// Complexity assessment
380#[derive(Debug, Clone, Serialize, Deserialize)]
381pub enum ComplexityAssessment {
382    /// Better complexity than SciPy
383    Better,
384    /// Similar complexity to SciPy
385    Similar,
386    /// Worse complexity than SciPy
387    Worse,
388    /// Unknown complexity relationship
389    Unknown,
390}
391
392/// Stability rating
393#[derive(Debug, Clone, Serialize, Deserialize)]
394pub enum StabilityRating {
395    /// Very stable performance
396    VeryStable,
397    /// Stable performance
398    Stable,
399    /// Moderately stable
400    ModeratelyStable,
401    /// Unstable performance
402    Unstable,
403    /// Very unstable performance
404    VeryUnstable,
405}
406
407/// Performance regression
408#[derive(Debug, Clone, Serialize, Deserialize)]
409pub struct PerformanceRegression {
410    /// Function affected
411    pub function_name: String,
412    /// Regression magnitude
413    pub regression_factor: f64,
414    /// Confidence in detection
415    pub confidence: f64,
416    /// Suspected cause
417    pub suspected_cause: String,
418}
419
420/// Accuracy regression
421#[derive(Debug, Clone, Serialize, Deserialize)]
422pub struct AccuracyRegression {
423    /// Function affected
424    pub function_name: String,
425    /// Accuracy degradation
426    pub accuracy_loss: f64,
427    /// Severity assessment
428    pub severity: AccuracyRegressionSeverity,
429}
430
431/// Regression risk assessment
432#[derive(Debug, Clone, Serialize, Deserialize)]
433pub enum RegressionRisk {
434    /// Low risk of regressions
435    Low,
436    /// Medium risk of regressions
437    Medium,
438    /// High risk of regressions
439    High,
440    /// Critical risk of regressions
441    Critical,
442}
443
444/// Accuracy regression severity
445#[derive(Debug, Clone, Serialize, Deserialize)]
446pub enum AccuracyRegressionSeverity {
447    /// Minor accuracy loss
448    Minor,
449    /// Moderate accuracy loss
450    Moderate,
451    /// Major accuracy loss
452    Major,
453    /// Critical accuracy loss
454    Critical,
455}
456
457/// Numerical stability assessment
458#[derive(Debug, Clone, Serialize, Deserialize)]
459pub struct NumericalStabilityAssessment {
460    /// Overall stability rating
461    pub stability_rating: NumericalStabilityRating,
462    /// Functions with stability issues
463    pub unstable_functions: Vec<String>,
464    /// Condition number analysis
465    pub condition_number_analysis: ConditionNumberAnalysis,
466    /// Precision loss analysis
467    pub precision_loss_analysis: PrecisionLossAnalysis,
468}
469
470/// Numerical stability rating
471#[derive(Debug, Clone, Serialize, Deserialize)]
472pub enum NumericalStabilityRating {
473    /// Excellent numerical stability
474    Excellent,
475    /// Good numerical stability
476    Good,
477    /// Acceptable numerical stability
478    Acceptable,
479    /// Poor numerical stability
480    Poor,
481    /// Unacceptable numerical stability
482    Unacceptable,
483}
484
485/// Condition number analysis
486#[derive(Debug, Clone, Serialize, Deserialize)]
487pub struct ConditionNumberAnalysis {
488    /// Functions sensitive to condition number
489    pub sensitive_functions: Vec<String>,
490    /// Condition number thresholds
491    pub thresholds: HashMap<String, f64>,
492    /// Stability recommendations
493    pub recommendations: Vec<String>,
494}
495
496/// Precision loss analysis
497#[derive(Debug, Clone, Serialize, Deserialize)]
498pub struct PrecisionLossAnalysis {
499    /// Average precision loss
500    pub average_loss: f64,
501    /// Maximum precision loss
502    pub max_loss: f64,
503    /// Functions with significant loss
504    pub problematic_functions: Vec<String>,
505}
506
507/// Comparison recommendation
508#[derive(Debug, Clone, Serialize, Deserialize)]
509pub struct ComparisonRecommendation {
510    /// Recommendation priority
511    pub priority: RecommendationPriority,
512    /// Category of recommendation
513    pub category: RecommendationCategory,
514    /// Recommendation description
515    pub description: String,
516    /// Affected functions
517    pub affected_functions: Vec<String>,
518    /// Implementation complexity
519    pub complexity: ImplementationComplexity,
520    /// Expected impact
521    pub expected_impact: ExpectedImpact,
522}
523
524/// Recommendation priority levels
525#[derive(Debug, Clone, Serialize, Deserialize)]
526pub enum RecommendationPriority {
527    /// Critical priority
528    Critical,
529    /// High priority
530    High,
531    /// Medium priority
532    Medium,
533    /// Low priority
534    Low,
535    /// Nice to have
536    NiceToHave,
537}
538
539/// Recommendation categories
540#[derive(Debug, Clone, Serialize, Deserialize)]
541pub enum RecommendationCategory {
542    /// Performance optimization
543    Performance,
544    /// Accuracy improvement
545    Accuracy,
546    /// Memory optimization
547    Memory,
548    /// API compatibility
549    APICompatibility,
550    /// Numerical stability
551    NumericalStability,
552    /// Testing enhancement
553    Testing,
554}
555
556/// Implementation complexity assessment
557#[derive(Debug, Clone, Serialize, Deserialize)]
558pub enum ImplementationComplexity {
559    /// Simple to implement
560    Simple,
561    /// Moderate complexity
562    Moderate,
563    /// Complex implementation
564    Complex,
565    /// Very complex implementation
566    VeryComplex,
567}
568
569/// Expected impact of recommendation
570#[derive(Debug, Clone, Serialize, Deserialize)]
571pub struct ExpectedImpact {
572    /// Performance improvement factor
573    pub performance_improvement: Option<f64>,
574    /// Accuracy improvement
575    pub accuracy_improvement: Option<f64>,
576    /// Memory reduction factor
577    pub memory_reduction: Option<f64>,
578    /// Implementation effort (person-days)
579    pub implementation_effort: f64,
580}
581
582/// Main SciPy comparison framework
583pub struct ScipyBenchmarkComparison {
584    config: ScipyComparisonConfig,
585    temp_dir: String,
586}
587
588impl ScipyBenchmarkComparison {
589    /// Create new SciPy comparison framework
590    pub fn new(config: ScipyComparisonConfig) -> StatsResult<Self> {
591        // Create temporary directory
592        fs::create_dir_all(&config.temp_dir).map_err(|e| {
593            StatsError::ComputationError(format!("Failed to create temp directory: {}", e))
594        })?;
595
596        Ok(Self {
597            temp_dir: config.temp_dir.clone(),
598            config,
599        })
600    }
601
602    /// Create with default configuration
603    pub fn default() -> StatsResult<Self> {
604        Self::new(ScipyComparisonConfig::default())
605    }
606
607    /// Run comprehensive comparison benchmarks
608    pub fn run_comprehensive_comparison(&self) -> StatsResult<ScipyComparisonReport> {
609        let _start_time = Instant::now();
610
611        // Verify SciPy environment
612        let scipy_env = self.verify_scipy_environment()?;
613
614        // Collect system information
615        let system_info = self.collect_system_info();
616
617        // Run function comparisons
618        let mut function_comparisons = Vec::new();
619
620        for function_name in &self.config.functions_to_test {
621            for &datasize in &self.config.testsizes {
622                match self.compare_function(function_name, datasize) {
623                    Ok(comparison) => function_comparisons.push(comparison),
624                    Err(e) => {
625                        function_comparisons.push(FunctionComparison {
626                            function_name: function_name.clone(),
627                            datasize,
628                            performance: PerformanceComparison {
629                                scirs2_time_ns: 0.0,
630                                scipy_time_ns: 0.0,
631                                ratio: 0.0,
632                                significance: PerformanceSignificance::InsufficientData,
633                                confidence_interval: (0.0, 0.0),
634                            },
635                            accuracy: AccuracyComparison {
636                                absolute_difference: 0.0,
637                                relative_difference: 0.0,
638                                max_element_difference: 0.0,
639                                elements_compared: 0,
640                                elements_within_tolerance: 0,
641                                assessment: AccuracyAssessment::Poor,
642                            },
643                            memory: None,
644                            status: ComparisonStatus::Error {
645                                error: e.to_string(),
646                            },
647                            error_details: Some(e.to_string()),
648                        });
649                    }
650                }
651            }
652        }
653
654        // Analyze results
655        let summary = self.generate_summary(&function_comparisons);
656        let performance_analysis = self.analyze_performance(&function_comparisons);
657        let accuracy_analysis = self.analyze_accuracy(&function_comparisons);
658        let recommendations = self.generate_recommendations(
659            &function_comparisons,
660            &performance_analysis,
661            &accuracy_analysis,
662        );
663
664        Ok(ScipyComparisonReport {
665            timestamp: chrono::Utc::now().to_rfc3339(),
666            config: self.config.clone(),
667            system_info,
668            scipy_environment: scipy_env,
669            function_comparisons,
670            summary,
671            performance_analysis,
672            accuracy_analysis,
673            recommendations,
674        })
675    }
676
677    /// Verify SciPy environment is available and compatible
678    fn verify_scipy_environment(&self) -> StatsResult<ScipyEnvironmentInfo> {
679        let script = r#"
680import sys
681import scipy
682import numpy as np
683import json
684
685info = {
686    'python_version': sys.version,
687    'scipy_version': scipy.__version__,
688    'numpy_version': np.__version__,
689    'blas_info': str(np.__config__.show()),
690    'packages': {}
691}
692
693try:
694    import pandas
695    info['packages']['pandas'] = pandas.__version__
696except ImportError:
697    pass
698
699try:
700    import sklearn
701    info['packages']['sklearn'] = sklearn.__version__
702except ImportError:
703    pass
704
705print(json.dumps(info))
706"#;
707
708        let script_path = format!("{}/verify_env.py", self.temp_dir);
709        fs::write(&script_path, script).map_err(|e| {
710            StatsError::ComputationError(format!("Failed to write verification script: {}", e))
711        })?;
712
713        let output = Command::new(&self.config.python_executable)
714            .arg(&script_path)
715            .output()
716            .map_err(|e| {
717                StatsError::ComputationError(format!("Failed to execute Python: {}", e))
718            })?;
719
720        if !output.status.success() {
721            return Err(StatsError::ComputationError(format!(
722                "Python script failed: {}",
723                String::from_utf8_lossy(&output.stderr)
724            )));
725        }
726
727        let output_str = String::from_utf8_lossy(&output.stdout);
728        let info: serde_json::Value = serde_json::from_str(&output_str).map_err(|e| {
729            StatsError::ComputationError(format!("Failed to parse environment info: {}", e))
730        })?;
731
732        Ok(ScipyEnvironmentInfo {
733            python_version: info["python_version"]
734                .as_str()
735                .unwrap_or("unknown")
736                .to_string(),
737            scipy_version: info["scipy_version"]
738                .as_str()
739                .unwrap_or("unknown")
740                .to_string(),
741            numpy_version: info["numpy_version"]
742                .as_str()
743                .unwrap_or("unknown")
744                .to_string(),
745            blas_info: info["blas_info"].as_str().unwrap_or("unknown").to_string(),
746            packages: info["packages"]
747                .as_object()
748                .unwrap_or(&serde_json::Map::new())
749                .iter()
750                .map(|(k, v)| (k.clone(), v.as_str().unwrap_or("unknown").to_string()))
751                .collect(),
752        })
753    }
754
755    /// Collect system information
756    fn collect_system_info(&self) -> SystemInfo {
757        SystemInfo {
758            os: std::env::consts::OS.to_string(),
759            cpu: "Generic CPU".to_string(), // Would use proper CPU detection
760            memory_gb: 8.0,                 // Placeholder
761            rust_version: std::env::var("RUSTC_VERSION").unwrap_or_else(|_| "unknown".to_string()),
762            scirs2_version: std::env::var("CARGO_PKG_VERSION")
763                .unwrap_or_else(|_| "unknown".to_string()),
764        }
765    }
766
767    /// Compare a single function between scirs2-stats and SciPy
768    fn compare_function(
769        &self,
770        function_name: &str,
771        datasize: usize,
772    ) -> StatsResult<FunctionComparison> {
773        // Generate test data
774        let testdata = self.generate_testdata(datasize)?;
775
776        // Benchmark scirs2-stats function
777        let scirs2_result = self.benchmark_scirs2_function(function_name, &testdata)?;
778
779        // Benchmark SciPy function
780        let scipy_result = self.benchmark_scipy_function(function_name, &testdata)?;
781
782        // Compare performance
783        let performance = PerformanceComparison {
784            scirs2_time_ns: scirs2_result.execution_time.as_nanos() as f64,
785            scipy_time_ns: scipy_result.execution_time.as_nanos() as f64,
786            ratio: scirs2_result.execution_time.as_nanos() as f64
787                / scipy_result.execution_time.as_nanos() as f64,
788            significance: PerformanceSignificance::NotSignificant, // Simplified
789            confidence_interval: (0.8, 1.2),                       // Placeholder
790        };
791
792        // Compare accuracy
793        let accuracy = self.compare_accuracy(&scirs2_result.result, &scipy_result.result)?;
794
795        // Determine status
796        let status = self.determine_comparison_status(&performance, &accuracy);
797
798        Ok(FunctionComparison {
799            function_name: function_name.to_string(),
800            datasize,
801            performance,
802            accuracy,
803            memory: None, // Would implement memory comparison
804            status,
805            error_details: None,
806        })
807    }
808
809    /// Generate test data for benchmarking
810    fn generate_testdata(&self, size: usize) -> StatsResult<TestData> {
811        use scirs2_core::random::{Distribution, Normal};
812
813        let mut rng = scirs2_core::random::thread_rng();
814        let normal = Normal::new(0.0, 1.0).map_err(|e| {
815            StatsError::ComputationError(format!("Failed to create normal distribution: {}", e))
816        })?;
817
818        let data: Vec<f64> = (0..size).map(|_| normal.sample(&mut rng)).collect();
819
820        Ok(TestData {
821            primary: Array1::from_vec(data.clone()),
822            secondary: Array1::from_vec(
823                data.iter()
824                    .map(|x| x + 0.1 * normal.sample(&mut rng))
825                    .collect(),
826            ),
827            matrix: Array2::from_shape_fn((size.min(100), size.min(100)), |(i, j)| {
828                normal.sample(&mut rng) + 0.1 * (i + j) as f64
829            }),
830        })
831    }
832
833    /// Benchmark scirs2-stats function
834    fn benchmark_scirs2_function(
835        &self,
836        function_name: &str,
837        testdata: &TestData,
838    ) -> StatsResult<BenchmarkResult> {
839        let start_time = Instant::now();
840
841        let result = match function_name {
842            "mean" => {
843                vec![crate::descriptive::mean(&testdata.primary.view())?]
844            }
845            "std" => {
846                vec![crate::descriptive::std(&testdata.primary.view(), 1, None)?]
847            }
848            "var" => {
849                vec![crate::descriptive::var(&testdata.primary.view(), 1, None)?]
850            }
851            "skew" => {
852                vec![crate::descriptive::skew(
853                    &testdata.primary.view(),
854                    false,
855                    None,
856                )?]
857            }
858            "kurtosis" => {
859                vec![crate::descriptive::kurtosis(
860                    &testdata.primary.view(),
861                    true,
862                    false,
863                    None,
864                )?]
865            }
866            "pearsonr" => {
867                let corr = crate::correlation::pearson_r(
868                    &testdata.primary.view(),
869                    &testdata.secondary.view(),
870                )?;
871                vec![corr]
872            }
873            "spearmanr" => {
874                let corr = crate::correlation::spearman_r(
875                    &testdata.primary.view(),
876                    &testdata.secondary.view(),
877                )?;
878                vec![corr]
879            }
880            "ttest_1samp" => {
881                let result = crate::tests::ttest::ttest_1samp(
882                    &testdata.primary.view(),
883                    0.0,
884                    crate::tests::ttest::Alternative::TwoSided,
885                    "propagate",
886                )?;
887                vec![result.statistic, result.pvalue]
888            }
889            _ => {
890                return Err(StatsError::NotImplemented(format!(
891                    "Function {} not implemented in benchmark",
892                    function_name
893                )));
894            }
895        };
896
897        let execution_time = start_time.elapsed();
898
899        Ok(BenchmarkResult {
900            result,
901            execution_time,
902        })
903    }
904
905    /// Benchmark SciPy function
906    fn benchmark_scipy_function(
907        &self,
908        function_name: &str,
909        testdata: &TestData,
910    ) -> StatsResult<BenchmarkResult> {
911        let script = self.generate_scipy_script(function_name, testdata)?;
912        let script_path = format!("{}/scipy_benchmark_{}.py", self.temp_dir, function_name);
913
914        fs::write(&script_path, script).map_err(|e| {
915            StatsError::ComputationError(format!("Failed to write SciPy script: {}", e))
916        })?;
917
918        let output = Command::new(&self.config.python_executable)
919            .arg(&script_path)
920            .output()
921            .map_err(|e| {
922                StatsError::ComputationError(format!("Failed to execute SciPy script: {}", e))
923            })?;
924
925        if !output.status.success() {
926            return Err(StatsError::ComputationError(format!(
927                "SciPy script failed: {}",
928                String::from_utf8_lossy(&output.stderr)
929            )));
930        }
931
932        let output_str = String::from_utf8_lossy(&output.stdout);
933        let result: serde_json::Value = serde_json::from_str(&output_str).map_err(|e| {
934            StatsError::ComputationError(format!("Failed to parse SciPy result: {}", e))
935        })?;
936
937        let execution_time =
938            Duration::from_secs_f64(result["execution_time"].as_f64().unwrap_or(0.0));
939
940        let result_values: Vec<f64> = result["result"]
941            .as_array()
942            .unwrap_or(&Vec::new())
943            .iter()
944            .filter_map(|v| v.as_f64())
945            .collect();
946
947        Ok(BenchmarkResult {
948            result: result_values,
949            execution_time,
950        })
951    }
952
953    /// Generate SciPy benchmark script
954    fn generate_scipy_script(
955        &self,
956        function_name: &str,
957        testdata: &TestData,
958    ) -> StatsResult<String> {
959        let data_primary: Vec<String> = testdata.primary.iter().map(|x| x.to_string()).collect();
960        let data_secondary: Vec<String> =
961            testdata.secondary.iter().map(|x| x.to_string()).collect();
962
963        let script = match function_name {
964            "mean" => {
965                format!(
966                    r#"
967import numpy as np
968import time
969import json
970
971data = np.array([{}])
972
973start_time = time.perf_counter()
974result = np.mean(data)
975execution_time = time.perf_counter() - start_time
976
977output = {{
978    'result': [float(result)],
979    'execution_time': execution_time
980}}
981
982print(json.dumps(output))
983"#,
984                    data_primary.join(", ")
985                )
986            }
987            "std" => {
988                format!(
989                    r#"
990import numpy as np
991import time
992import json
993
994data = np.array([{}])
995
996start_time = time.perf_counter()
997result = np.std(data, ddof=1)
998execution_time = time.perf_counter() - start_time
999
1000output = {{
1001    'result': [float(result)],
1002    'execution_time': execution_time
1003}}
1004
1005print(json.dumps(output))
1006"#,
1007                    data_primary.join(", ")
1008                )
1009            }
1010            "pearsonr" => {
1011                format!(
1012                    r#"
1013import numpy as np
1014import scipy.stats
1015import time
1016import json
1017
1018data1 = np.array([{}])
1019data2 = np.array([{}])
1020
1021start_time = time.perf_counter()
1022corr, p_value = scipy.stats.pearsonr(data1, data2)
1023execution_time = time.perf_counter() - start_time
1024
1025output = {{
1026    'result': [float(corr)],
1027    'execution_time': execution_time
1028}}
1029
1030print(json.dumps(output))
1031"#,
1032                    data_primary.join(", "),
1033                    data_secondary.join(", ")
1034                )
1035            }
1036            _ => {
1037                return Err(StatsError::NotImplemented(format!(
1038                    "SciPy script generation not implemented for {}",
1039                    function_name
1040                )));
1041            }
1042        };
1043
1044        Ok(script)
1045    }
1046
1047    /// Compare accuracy between results
1048    fn compare_accuracy(
1049        &self,
1050        scirs2_result: &[f64],
1051        scipy_result: &[f64],
1052    ) -> StatsResult<AccuracyComparison> {
1053        if scirs2_result.len() != scipy_result.len() {
1054            return Ok(AccuracyComparison {
1055                absolute_difference: f64::INFINITY,
1056                relative_difference: f64::INFINITY,
1057                max_element_difference: f64::INFINITY,
1058                elements_compared: 0,
1059                elements_within_tolerance: 0,
1060                assessment: AccuracyAssessment::Unacceptable,
1061            });
1062        }
1063
1064        let mut abs_diffs = Vec::new();
1065        let mut rel_diffs = Vec::new();
1066        let mut within_tolerance = 0;
1067
1068        for (s, r) in scirs2_result.iter().zip(scipy_result.iter()) {
1069            let abs_diff = (s - r).abs();
1070            let rel_diff = if r.abs() > 1e-10 {
1071                abs_diff / r.abs()
1072            } else {
1073                abs_diff
1074            };
1075
1076            abs_diffs.push(abs_diff);
1077            rel_diffs.push(rel_diff);
1078
1079            if abs_diff < self.config.accuracy_tolerance
1080                || rel_diff < self.config.accuracy_tolerance
1081            {
1082                within_tolerance += 1;
1083            }
1084        }
1085
1086        let avg_abs_diff = abs_diffs.iter().sum::<f64>() / abs_diffs.len() as f64;
1087        let avg_rel_diff = rel_diffs.iter().sum::<f64>() / rel_diffs.len() as f64;
1088        let max_element_diff = abs_diffs.iter().fold(0.0f64, |acc, &x| acc.max(x));
1089
1090        let assessment = if within_tolerance == scirs2_result.len() {
1091            if avg_rel_diff < 1e-14 {
1092                AccuracyAssessment::Excellent
1093            } else if avg_rel_diff < 1e-10 {
1094                AccuracyAssessment::Good
1095            } else {
1096                AccuracyAssessment::Acceptable
1097            }
1098        } else if within_tolerance as f64 / scirs2_result.len() as f64 > 0.9 {
1099            AccuracyAssessment::Acceptable
1100        } else if within_tolerance as f64 / scirs2_result.len() as f64 > 0.5 {
1101            AccuracyAssessment::Poor
1102        } else {
1103            AccuracyAssessment::Unacceptable
1104        };
1105
1106        Ok(AccuracyComparison {
1107            absolute_difference: avg_abs_diff,
1108            relative_difference: avg_rel_diff,
1109            max_element_difference: max_element_diff,
1110            elements_compared: scirs2_result.len(),
1111            elements_within_tolerance: within_tolerance,
1112            assessment,
1113        })
1114    }
1115
1116    /// Determine comparison status
1117    fn determine_comparison_status(
1118        &self,
1119        performance: &PerformanceComparison,
1120        accuracy: &AccuracyComparison,
1121    ) -> ComparisonStatus {
1122        let mut warnings = Vec::new();
1123
1124        // Check accuracy
1125        if matches!(
1126            accuracy.assessment,
1127            AccuracyAssessment::Unacceptable | AccuracyAssessment::Poor
1128        ) {
1129            return ComparisonStatus::FailedAccuracy {
1130                details: format!("Relative difference: {:.2e}", accuracy.relative_difference),
1131            };
1132        }
1133
1134        // Check performance
1135        if performance.ratio > self.config.performance_tolerance {
1136            return ComparisonStatus::FailedPerformance {
1137                details: format!(
1138                    "Performance ratio: {:.2} (limit: {:.2})",
1139                    performance.ratio, self.config.performance_tolerance
1140                ),
1141            };
1142        }
1143
1144        // Check for warnings
1145        if matches!(accuracy.assessment, AccuracyAssessment::Acceptable) {
1146            warnings.push("Accuracy is only acceptable".to_string());
1147        }
1148
1149        if performance.ratio > 1.5 {
1150            warnings.push(format!(
1151                "Performance is {:.1}x slower than SciPy",
1152                performance.ratio
1153            ));
1154        }
1155
1156        if warnings.is_empty() {
1157            ComparisonStatus::Passed
1158        } else {
1159            ComparisonStatus::PassedWithWarnings { warnings }
1160        }
1161    }
1162
1163    /// Generate comparison summary
1164    fn generate_summary(&self, comparisons: &[FunctionComparison]) -> ComparisonSummary {
1165        let total_tests = comparisons.len();
1166        let tests_passed = comparisons
1167            .iter()
1168            .filter(|c| matches!(c.status, ComparisonStatus::Passed))
1169            .count();
1170        let tests_with_warnings = comparisons
1171            .iter()
1172            .filter(|c| matches!(c.status, ComparisonStatus::PassedWithWarnings { .. }))
1173            .count();
1174        let tests_failed = total_tests - tests_passed - tests_with_warnings;
1175
1176        let pass_rate = if total_tests > 0 {
1177            (tests_passed + tests_with_warnings) as f64 / total_tests as f64
1178        } else {
1179            0.0
1180        };
1181
1182        let performance_issues: Vec<String> = comparisons
1183            .iter()
1184            .filter(|c| matches!(c.status, ComparisonStatus::FailedPerformance { .. }))
1185            .map(|c| c.function_name.clone())
1186            .collect();
1187
1188        let accuracy_issues: Vec<String> = comparisons
1189            .iter()
1190            .filter(|c| matches!(c.status, ComparisonStatus::FailedAccuracy { .. }))
1191            .map(|c| c.function_name.clone())
1192            .collect();
1193
1194        let avg_performance_ratio =
1195            comparisons.iter().map(|c| c.performance.ratio).sum::<f64>() / comparisons.len() as f64;
1196
1197        let performance_rating = if avg_performance_ratio < 0.8 {
1198            PerformanceRating::Excellent
1199        } else if avg_performance_ratio < 1.2 {
1200            PerformanceRating::Good
1201        } else if avg_performance_ratio < 2.0 {
1202            PerformanceRating::Acceptable
1203        } else if avg_performance_ratio < 5.0 {
1204            PerformanceRating::Poor
1205        } else {
1206            PerformanceRating::Unacceptable
1207        };
1208
1209        let avg_relative_diff = comparisons
1210            .iter()
1211            .map(|c| c.accuracy.relative_difference)
1212            .sum::<f64>()
1213            / comparisons.len() as f64;
1214
1215        let accuracy_rating = if avg_relative_diff < 1e-14 {
1216            AccuracyRating::Excellent
1217        } else if avg_relative_diff < 1e-10 {
1218            AccuracyRating::Good
1219        } else if avg_relative_diff < 1e-6 {
1220            AccuracyRating::Acceptable
1221        } else if avg_relative_diff < 1e-3 {
1222            AccuracyRating::Poor
1223        } else {
1224            AccuracyRating::Unacceptable
1225        };
1226
1227        ComparisonSummary {
1228            total_tests,
1229            tests_passed,
1230            tests_with_warnings,
1231            tests_failed,
1232            pass_rate,
1233            performance_issues,
1234            accuracy_issues,
1235            performance_rating,
1236            accuracy_rating,
1237        }
1238    }
1239
1240    /// Analyze performance across all comparisons
1241    fn analyze_performance(&self, comparisons: &[FunctionComparison]) -> PerformanceAnalysis {
1242        let ratios: Vec<f64> = comparisons.iter().map(|c| c.performance.ratio).collect();
1243
1244        let average_ratio = ratios.iter().sum::<f64>() / ratios.len() as f64;
1245        let variance = ratios
1246            .iter()
1247            .map(|r| (r - average_ratio).powi(2))
1248            .sum::<f64>()
1249            / ratios.len() as f64;
1250        let ratio_std_dev = variance.sqrt();
1251
1252        let faster_functions: Vec<(String, f64)> = comparisons
1253            .iter()
1254            .filter(|c| c.performance.ratio < 1.0)
1255            .map(|c| (c.function_name.clone(), c.performance.ratio))
1256            .collect();
1257
1258        let slower_functions: Vec<(String, f64)> = comparisons
1259            .iter()
1260            .filter(|c| c.performance.ratio > 1.0)
1261            .map(|c| (c.function_name.clone(), c.performance.ratio))
1262            .collect();
1263
1264        let performance_bysize = HashMap::new(); // Would implement proper analysis
1265
1266        let trends = PerformanceTrends {
1267            scaling_analysis: ScalingAnalysis {
1268                relative_scaling: 1.0, // Placeholder
1269                complexity_assessment: ComplexityAssessment::Similar,
1270                crossover_points: Vec::new(),
1271            },
1272            stability_analysis: StabilityAnalysis {
1273                performance_cv: ratio_std_dev / average_ratio,
1274                outliers_detected: 0, // Would implement outlier detection
1275                stability_rating: StabilityRating::Stable,
1276            },
1277            regression_analysis: RegressionAnalysis {
1278                regressions_detected: Vec::new(),
1279                accuracy_regressions: Vec::new(),
1280                regression_risk: RegressionRisk::Low,
1281            },
1282        };
1283
1284        PerformanceAnalysis {
1285            average_ratio,
1286            ratio_std_dev,
1287            faster_functions,
1288            slower_functions,
1289            performance_bysize,
1290            trends,
1291        }
1292    }
1293
1294    /// Analyze accuracy across all comparisons
1295    fn analyze_accuracy(&self, comparisons: &[FunctionComparison]) -> AccuracyAnalysis {
1296        let relative_diffs: Vec<f64> = comparisons
1297            .iter()
1298            .map(|c| c.accuracy.relative_difference)
1299            .collect();
1300
1301        let average_relative_diff =
1302            relative_diffs.iter().sum::<f64>() / relative_diffs.len() as f64;
1303        let max_relative_diff = relative_diffs.iter().fold(0.0f64, |acc, &x| acc.max(x));
1304
1305        let problematic_functions: Vec<(String, f64)> = comparisons
1306            .iter()
1307            .filter(|c| c.accuracy.relative_difference > self.config.accuracy_tolerance)
1308            .map(|c| (c.function_name.clone(), c.accuracy.relative_difference))
1309            .collect();
1310
1311        let accuracy_bysize = HashMap::new(); // Would implement proper analysis
1312
1313        let stability_assessment = NumericalStabilityAssessment {
1314            stability_rating: if max_relative_diff < 1e-10 {
1315                NumericalStabilityRating::Excellent
1316            } else if max_relative_diff < 1e-6 {
1317                NumericalStabilityRating::Good
1318            } else {
1319                NumericalStabilityRating::Acceptable
1320            },
1321            unstable_functions: problematic_functions
1322                .iter()
1323                .map(|(name, _)| name.clone())
1324                .collect(),
1325            condition_number_analysis: ConditionNumberAnalysis {
1326                sensitive_functions: Vec::new(),
1327                thresholds: HashMap::new(),
1328                recommendations: Vec::new(),
1329            },
1330            precision_loss_analysis: PrecisionLossAnalysis {
1331                average_loss: average_relative_diff,
1332                max_loss: max_relative_diff,
1333                problematic_functions: problematic_functions
1334                    .iter()
1335                    .map(|(name, _)| name.clone())
1336                    .collect(),
1337            },
1338        };
1339
1340        AccuracyAnalysis {
1341            average_relative_diff,
1342            max_relative_diff,
1343            problematic_functions,
1344            accuracy_bysize,
1345            stability_assessment,
1346        }
1347    }
1348
1349    /// Generate recommendations based on analysis
1350    fn generate_recommendations(
1351        &self,
1352        comparisons: &[FunctionComparison],
1353        performance_analysis: &PerformanceAnalysis,
1354        accuracy_analysis: &AccuracyAnalysis,
1355    ) -> Vec<ComparisonRecommendation> {
1356        let mut recommendations = Vec::new();
1357
1358        // Performance recommendations
1359        if performance_analysis.average_ratio > 2.0 {
1360            recommendations.push(ComparisonRecommendation {
1361                priority: RecommendationPriority::High,
1362                category: RecommendationCategory::Performance,
1363                description: "Overall performance is significantly slower than SciPy. Consider SIMD optimizations and algorithm improvements.".to_string(),
1364                affected_functions: performance_analysis.slower_functions.iter().map(|(name, _)| name.clone()).collect(),
1365                complexity: ImplementationComplexity::Moderate,
1366                expected_impact: ExpectedImpact {
1367                    performance_improvement: Some(2.0),
1368                    accuracy_improvement: None,
1369                    memory_reduction: None,
1370                    implementation_effort: 20.0,
1371                },
1372            });
1373        }
1374
1375        // Accuracy recommendations
1376        if accuracy_analysis.max_relative_diff > 1e-6 {
1377            recommendations.push(ComparisonRecommendation {
1378                priority: RecommendationPriority::Critical,
1379                category: RecommendationCategory::Accuracy,
1380                description: "Some functions have significant accuracy differences compared to SciPy. Review numerical algorithms.".to_string(),
1381                affected_functions: accuracy_analysis.problematic_functions.iter().map(|(name, _)| name.clone()).collect(),
1382                complexity: ImplementationComplexity::Complex,
1383                expected_impact: ExpectedImpact {
1384                    performance_improvement: None,
1385                    accuracy_improvement: Some(10.0),
1386                    memory_reduction: None,
1387                    implementation_effort: 15.0,
1388                },
1389            });
1390        }
1391
1392        // Function-specific recommendations
1393        for comparison in comparisons {
1394            if comparison.performance.ratio > 5.0 {
1395                recommendations.push(ComparisonRecommendation {
1396                    priority: RecommendationPriority::High,
1397                    category: RecommendationCategory::Performance,
1398                    description: format!(
1399                        "Function '{}' is significantly slower than SciPy",
1400                        comparison.function_name
1401                    ),
1402                    affected_functions: vec![comparison.function_name.clone()],
1403                    complexity: ImplementationComplexity::Moderate,
1404                    expected_impact: ExpectedImpact {
1405                        performance_improvement: Some(3.0),
1406                        accuracy_improvement: None,
1407                        memory_reduction: None,
1408                        implementation_effort: 5.0,
1409                    },
1410                });
1411            }
1412        }
1413
1414        recommendations
1415    }
1416}
1417
1418/// Test data structure for benchmarking
1419#[derive(Debug, Clone)]
1420struct TestData {
1421    primary: Array1<f64>,
1422    secondary: Array1<f64>,
1423    #[allow(dead_code)]
1424    matrix: Array2<f64>,
1425}
1426
1427/// Benchmark result structure
1428#[derive(Debug, Clone)]
1429struct BenchmarkResult {
1430    result: Vec<f64>,
1431    execution_time: Duration,
1432}
1433
1434/// Convenience function to run SciPy comparison
1435#[allow(dead_code)]
1436pub fn run_scipy_comparison() -> StatsResult<ScipyComparisonReport> {
1437    let comparison = ScipyBenchmarkComparison::default()?;
1438    comparison.run_comprehensive_comparison()
1439}
1440
1441/// Run comparison for specific functions
1442#[allow(dead_code)]
1443pub fn run_function_comparison(functions: Vec<String>) -> StatsResult<ScipyComparisonReport> {
1444    let mut config = ScipyComparisonConfig::default();
1445    config.functions_to_test = functions;
1446
1447    let comparison = ScipyBenchmarkComparison::new(config)?;
1448    comparison.run_comprehensive_comparison()
1449}
1450
1451#[cfg(test)]
1452mod tests {
1453    use super::*;
1454
1455    #[test]
1456    fn test_scipy_comparison_config() {
1457        let config = ScipyComparisonConfig::default();
1458        assert!(!config.functions_to_test.is_empty());
1459        assert!(config.accuracy_tolerance > 0.0);
1460        assert!(config.performance_tolerance > 1.0);
1461    }
1462
1463    #[test]
1464    fn test_testdata_generation() {
1465        let comparison = ScipyBenchmarkComparison::default().expect("Operation failed");
1466        let testdata = comparison.generate_testdata(100).expect("Operation failed");
1467
1468        assert_eq!(testdata.primary.len(), 100);
1469        assert_eq!(testdata.secondary.len(), 100);
1470        assert_eq!(testdata.matrix.nrows(), 100);
1471    }
1472
1473    #[test]
1474    fn test_accuracy_comparison() {
1475        let comparison = ScipyBenchmarkComparison::default().expect("Operation failed");
1476
1477        // Use very small differences well within tolerance (< 1e-12)
1478        let scirs2_result = vec![1.0, 2.0, 3.0];
1479        let scipy_result = vec![1.000000000001, 2.000000000001, 3.000000000001];
1480
1481        let accuracy = comparison
1482            .compare_accuracy(&scirs2_result, &scipy_result)
1483            .expect("Operation failed");
1484        assert!(matches!(
1485            accuracy.assessment,
1486            AccuracyAssessment::Excellent | AccuracyAssessment::Good
1487        ));
1488    }
1489
1490    #[test]
1491    fn test_performance_comparison() {
1492        let performance = PerformanceComparison {
1493            scirs2_time_ns: 1000.0,
1494            scipy_time_ns: 800.0,
1495            ratio: 1.25,
1496            significance: PerformanceSignificance::NotSignificant,
1497            confidence_interval: (1.0, 1.5),
1498        };
1499
1500        assert!(performance.ratio > 1.0); // scirs2 slower
1501    }
1502
1503    #[test]
1504    fn test_recommendation_generation() {
1505        let config = ScipyComparisonConfig::default();
1506        assert!(config.performance_tolerance >= 1.0);
1507        assert!(config.accuracy_tolerance > 0.0);
1508    }
1509}