use crate::error::{StatsError, StatsResult};
use crate::numerical_stability_analyzer::{
NumericalStabilityAnalyzer, StabilityAnalysisResult, StabilityConfig,
};
use crate::propertybased_validation::{
ComprehensivePropertyTestSuite, PropertyTestConfig, PropertyTestResult,
};
use crate::scipy_benchmark_framework::{BenchmarkConfig, BenchmarkResult, ScipyBenchmarkFramework};
use scirs2_core::ndarray::{Array1, ArrayView1};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::time::Instant;
#[derive(Debug)]
pub struct ComprehensiveValidationSuite {
benchmark_framework: ScipyBenchmarkFramework,
property_test_suite: ComprehensivePropertyTestSuite,
stability_analyzer: NumericalStabilityAnalyzer,
config: ValidationSuiteConfig,
cached_results: HashMap<String, ComprehensiveValidationResult>,
}
#[derive(Debug, Clone)]
pub struct ValidationSuiteConfig {
pub benchmark_config: BenchmarkConfig,
pub property_config: PropertyTestConfig,
pub stability_config: StabilityConfig,
pub enable_cross_validation: bool,
pub enable_regression_detection: bool,
pub production_readiness_threshold: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ComprehensiveValidationResult {
pub function_name: String,
pub benchmark_results: Vec<BenchmarkResult>,
pub property_results: Vec<PropertyTestResult>,
pub stability_result: StabilityAnalysisResult,
pub overall_status: ValidationStatus,
pub production_readiness: ProductionReadinessAssessment,
pub cross_validation: CrossValidationAnalysis,
pub validation_time: std::time::Duration,
pub validated_at: chrono::DateTime<chrono::Utc>,
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
pub enum ValidationStatus {
FullyValidated,
MostlyValidated,
PartiallyValidated,
PoorlyValidated,
ValidationFailed,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ProductionReadinessAssessment {
pub is_production_ready: bool,
pub readiness_score: f64,
pub readiness_criteria: ReadinessCriteria,
pub production_blockers: Vec<ProductionBlocker>,
pub recommendations: Vec<ProductionRecommendation>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ReadinessCriteria {
pub accuracy_ready: bool,
pub performance_ready: bool,
pub stability_ready: bool,
pub error_handling_ready: bool,
pub documentation_ready: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ProductionBlocker {
pub blocker_type: BlockerType,
pub description: String,
pub severity: BlockerSeverity,
pub resolution_effort: ResolutionEffort,
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
pub enum BlockerType {
Accuracy,
Performance,
Stability,
API,
ErrorHandling,
Documentation,
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)]
pub enum BlockerSeverity {
Critical,
High,
Medium,
Low,
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
pub enum ResolutionEffort {
Minimal,
Low,
Medium,
High,
VeryHigh,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ProductionRecommendation {
pub area: RecommendationArea,
pub recommendation: String,
pub priority: RecommendationPriority,
pub expected_impact: f64,
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
pub enum RecommendationArea {
Algorithm,
Performance,
ErrorHandling,
Testing,
Documentation,
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)]
pub enum RecommendationPriority {
Critical,
High,
Medium,
Low,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CrossValidationAnalysis {
pub benchmark_stability_correlation: f64,
pub property_benchmark_correlation: f64,
pub property_stability_correlation: f64,
pub framework_agreement: f64,
pub validation_confidence: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ComprehensiveValidationReport {
pub total_functions: usize,
pub production_ready_functions: usize,
pub functions_needing_improvement: usize,
pub validation_summary: ValidationSummary,
pub function_results: Vec<ComprehensiveValidationResult>,
pub framework_analysis: FrameworkAnalysis,
pub overall_production_readiness: OverallProductionReadiness,
pub generated_at: chrono::DateTime<chrono::Utc>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ValidationSummary {
pub average_benchmark_score: f64,
pub average_property_pass_rate: f64,
pub average_stability_score: f64,
pub overall_validation_score: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FrameworkAnalysis {
pub benchmark_reliability: f64,
pub property_test_reliability: f64,
pub stability_reliability: f64,
pub inter_framework_agreement: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OverallProductionReadiness {
pub is_production_ready: bool,
pub production_ready_percentage: f64,
pub critical_blockers: Vec<ProductionBlocker>,
pub top_recommendations: Vec<ProductionRecommendation>,
}
impl Default for ValidationSuiteConfig {
fn default() -> Self {
Self {
benchmark_config: BenchmarkConfig::default(),
property_config: PropertyTestConfig::default(),
stability_config: StabilityConfig::default(),
enable_cross_validation: true,
enable_regression_detection: true,
production_readiness_threshold: 0.85,
}
}
}
impl ComprehensiveValidationSuite {
pub fn new(config: ValidationSuiteConfig) -> Self {
Self {
benchmark_framework: ScipyBenchmarkFramework::new(config.benchmark_config.clone()),
property_test_suite: ComprehensivePropertyTestSuite::new(
config.property_config.clone(),
),
stability_analyzer: NumericalStabilityAnalyzer::new(config.stability_config.clone()),
config: config,
cached_results: HashMap::new(),
}
}
pub fn default() -> Self {
Self::new(ValidationSuiteConfig::default())
}
pub fn validate_function<F, G>(
&mut self,
function_name: &str,
scirs2_impl: F,
scipy_reference: Option<G>,
) -> StatsResult<ComprehensiveValidationResult>
where
F: Fn(&ArrayView1<f64>) -> StatsResult<f64> + Clone,
G: Fn(&ArrayView1<f64>) -> f64,
{
let start_time = Instant::now();
let benchmark_results = if let Some(scipy_func) = scipy_reference {
self.benchmark_framework.benchmark_function(
function_name,
scirs2_impl.clone(),
scipy_func,
)?
} else {
Vec::new()
};
let property_results = self.property_test_suite.test_function(function_name)?;
let testdata = self.generate_testdata(1000)?;
let stability_result = self.stability_analyzer.analyze_function(
function_name,
scirs2_impl,
&testdata.view(),
)?;
let cross_validation = if self.config.enable_cross_validation {
self.perform_cross_validation(&benchmark_results, &property_results, &stability_result)
} else {
CrossValidationAnalysis {
benchmark_stability_correlation: 0.0,
property_benchmark_correlation: 0.0,
property_stability_correlation: 0.0,
framework_agreement: 0.0,
validation_confidence: 0.5,
}
};
let overall_status =
self.determine_overall_status(&benchmark_results, &property_results, &stability_result);
let production_readiness = self.assess_production_readiness(
&benchmark_results,
&property_results,
&stability_result,
&cross_validation,
);
let validation_time = start_time.elapsed();
let result = ComprehensiveValidationResult {
function_name: function_name.to_string(),
benchmark_results,
property_results,
stability_result,
overall_status,
production_readiness,
cross_validation,
validation_time,
validated_at: chrono::Utc::now(),
};
self.cached_results
.insert(function_name.to_string(), result.clone());
Ok(result)
}
fn generate_testdata(&self, size: usize) -> StatsResult<Array1<f64>> {
use scirs2_core::random::prelude::*;
use scirs2_core::random::{Distribution, Normal};
let mut rng = StdRng::seed_from_u64(self.config.property_config.seed);
let normal = Normal::new(0.0, 1.0)
.map_err(|e| StatsError::InvalidInput(format!("Distribution error: {}", e)))?;
let mut data = Array1::zeros(size);
for val in data.iter_mut() {
*val = normal.sample(&mut rng);
}
Ok(data)
}
fn perform_cross_validation(
&self,
benchmark_results: &[BenchmarkResult],
property_results: &[PropertyTestResult],
stability_result: &StabilityAnalysisResult,
) -> CrossValidationAnalysis {
let benchmark_score = if !benchmark_results.is_empty() {
benchmark_results
.iter()
.map(|r| match r.status {
crate::scipy_benchmark_framework::BenchmarkStatus::Pass => 1.0,
crate::scipy_benchmark_framework::BenchmarkStatus::AccuracyPass => 0.7,
crate::scipy_benchmark_framework::BenchmarkStatus::PerformancePass => 0.7,
crate::scipy_benchmark_framework::BenchmarkStatus::Fail => 0.0,
crate::scipy_benchmark_framework::BenchmarkStatus::Error => 0.0,
})
.sum::<f64>()
/ benchmark_results.len() as f64
} else {
0.5
};
let property_score = if !property_results.is_empty() {
property_results
.iter()
.map(|r| r.test_cases_passed as f64 / r.test_cases_run.max(1) as f64)
.sum::<f64>()
/ property_results.len() as f64
} else {
0.5
};
let stability_score = stability_result.stability_score / 100.0;
let benchmark_stability_correlation = 1.0 - (benchmark_score - stability_score).abs();
let property_benchmark_correlation = 1.0 - (property_score - benchmark_score).abs();
let property_stability_correlation = 1.0 - (property_score - stability_score).abs();
let framework_agreement = (benchmark_stability_correlation
+ property_benchmark_correlation
+ property_stability_correlation)
/ 3.0;
let validation_confidence = framework_agreement;
CrossValidationAnalysis {
benchmark_stability_correlation,
property_benchmark_correlation,
property_stability_correlation,
framework_agreement,
validation_confidence,
}
}
fn determine_overall_status(
&self,
benchmark_results: &[BenchmarkResult],
property_results: &[PropertyTestResult],
stability_result: &StabilityAnalysisResult,
) -> ValidationStatus {
let mut validation_scores = Vec::new();
if !benchmark_results.is_empty() {
let benchmark_pass_rate = benchmark_results
.iter()
.filter(|r| {
matches!(
r.status,
crate::scipy_benchmark_framework::BenchmarkStatus::Pass
)
})
.count() as f64
/ benchmark_results.len() as f64;
validation_scores.push(benchmark_pass_rate);
}
if !property_results.is_empty() {
let property_pass_rate = property_results
.iter()
.map(|r| r.test_cases_passed as f64 / r.test_cases_run.max(1) as f64)
.sum::<f64>()
/ property_results.len() as f64;
validation_scores.push(property_pass_rate);
}
validation_scores.push(stability_result.stability_score / 100.0);
let average_score = validation_scores.iter().sum::<f64>() / validation_scores.len() as f64;
if average_score >= 0.9 {
ValidationStatus::FullyValidated
} else if average_score >= 0.75 {
ValidationStatus::MostlyValidated
} else if average_score >= 0.5 {
ValidationStatus::PartiallyValidated
} else if average_score >= 0.25 {
ValidationStatus::PoorlyValidated
} else {
ValidationStatus::ValidationFailed
}
}
fn assess_production_readiness(
&self,
benchmark_results: &[BenchmarkResult],
property_results: &[PropertyTestResult],
stability_result: &StabilityAnalysisResult,
cross_validation: &CrossValidationAnalysis,
) -> ProductionReadinessAssessment {
let mut readiness_score = 0.0;
let mut production_blockers = Vec::new();
let recommendations = Vec::new();
let accuracy_ready = benchmark_results
.iter()
.all(|r| r.accuracy.passes_tolerance);
if accuracy_ready {
readiness_score += 25.0;
} else {
production_blockers.push(ProductionBlocker {
blocker_type: BlockerType::Accuracy,
description: "Accuracy does not meet tolerance requirements".to_string(),
severity: BlockerSeverity::Critical,
resolution_effort: ResolutionEffort::Medium,
});
}
let performance_ready = benchmark_results.iter().all(|r| {
matches!(
r.performance.performance_grade,
crate::scipy_benchmark_framework::PerformanceGrade::A
| crate::scipy_benchmark_framework::PerformanceGrade::B
| crate::scipy_benchmark_framework::PerformanceGrade::C
)
});
if performance_ready {
readiness_score += 20.0;
}
let stability_ready = matches!(
stability_result.stability_grade,
crate::numerical_stability_analyzer::StabilityGrade::Excellent
| crate::numerical_stability_analyzer::StabilityGrade::Good
);
if stability_ready {
readiness_score += 25.0;
}
let property_ready = property_results.iter().all(|r| {
matches!(
r.status,
crate::propertybased_validation::PropertyTestStatus::Pass
)
});
if property_ready {
readiness_score += 20.0;
}
if cross_validation.validation_confidence > 0.8 {
readiness_score += 10.0;
}
let is_production_ready =
readiness_score >= (self.config.production_readiness_threshold * 100.0);
let readiness_criteria = ReadinessCriteria {
accuracy_ready,
performance_ready,
stability_ready,
error_handling_ready: true, documentation_ready: true, };
ProductionReadinessAssessment {
is_production_ready,
readiness_score,
readiness_criteria,
production_blockers,
recommendations,
}
}
pub fn generate_comprehensive_report(&self) -> ComprehensiveValidationReport {
let function_results: Vec<_> = self.cached_results.values().cloned().collect();
let total_functions = function_results.len();
let production_ready_functions = function_results
.iter()
.filter(|r| r.production_readiness.is_production_ready)
.count();
let functions_needing_improvement = total_functions - production_ready_functions;
let average_benchmark_score = if total_functions > 0 {
function_results
.iter()
.map(|r| {
r.benchmark_results
.iter()
.map(|b| {
if matches!(
b.status,
crate::scipy_benchmark_framework::BenchmarkStatus::Pass
) {
100.0
} else {
0.0
}
})
.sum::<f64>()
/ r.benchmark_results.len().max(1) as f64
})
.sum::<f64>()
/ total_functions as f64
} else {
0.0
};
let average_stability_score = if total_functions > 0 {
function_results
.iter()
.map(|r| r.stability_result.stability_score)
.sum::<f64>()
/ total_functions as f64
} else {
0.0
};
let validation_summary = ValidationSummary {
average_benchmark_score,
average_property_pass_rate: 0.0, average_stability_score,
overall_validation_score: (average_benchmark_score + average_stability_score) / 2.0,
};
let framework_analysis = FrameworkAnalysis {
benchmark_reliability: 0.9, property_test_reliability: 0.85,
stability_reliability: 0.8,
inter_framework_agreement: function_results
.iter()
.map(|r| r.cross_validation.framework_agreement)
.sum::<f64>()
/ total_functions.max(1) as f64,
};
let overall_production_readiness = OverallProductionReadiness {
is_production_ready: production_ready_functions as f64 / total_functions.max(1) as f64
> self.config.production_readiness_threshold,
production_ready_percentage: production_ready_functions as f64
/ total_functions.max(1) as f64
* 100.0,
critical_blockers: Vec::new(), top_recommendations: Vec::new(), };
ComprehensiveValidationReport {
total_functions,
production_ready_functions,
functions_needing_improvement,
validation_summary,
function_results,
framework_analysis,
overall_production_readiness,
generated_at: chrono::Utc::now(),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::descriptive::mean;
#[test]
fn test_comprehensive_validation_suite_creation() {
let suite = ComprehensiveValidationSuite::default();
assert_eq!(suite.config.production_readiness_threshold, 0.85);
}
#[test]
fn test_validation_status_determination() {
let suite = ComprehensiveValidationSuite::default();
let benchmark_results = vec![];
let property_results = vec![];
let stability_result = crate::numerical_stability_analyzer::StabilityAnalysisResult {
function_name: "test".to_string(),
stability_grade: crate::numerical_stability_analyzer::StabilityGrade::Excellent,
condition_analysis: crate::numerical_stability_analyzer::ConditionNumberAnalysis {
condition_number: 1.0,
conditioning_class:
crate::numerical_stability_analyzer::ConditioningClass::WellConditioned,
accuracy_loss_digits: 0.0,
input_sensitivity: 0.0,
},
error_propagation: crate::numerical_stability_analyzer::ErrorPropagationAnalysis {
forward_error_bound: 0.0,
backward_error_bound: 0.0,
error_amplification: 1.0,
rounding_error_stability: 1.0,
},
edge_case_robustness: crate::numerical_stability_analyzer::EdgeCaseRobustness {
handles_infinity: true,
handles_nan: true,
handles_zero: true,
handles_large_values: true,
handles_small_values: true,
edge_case_success_rate: 1.0,
},
precision_analysis: crate::numerical_stability_analyzer::PrecisionAnalysis {
precision_loss_bits: 0.0,
relative_precision: 1.0,
cancellation_errors: vec![],
overflow_underflow_risk: crate::numerical_stability_analyzer::OverflowRisk::None,
},
recommendations: vec![],
stability_score: 95.0,
};
let status = suite.determine_overall_status(
&benchmark_results,
&property_results,
&stability_result,
);
assert_eq!(status, ValidationStatus::FullyValidated);
}
#[test]
#[ignore] fn test_mean_comprehensive_validation() {
let mut suite = ComprehensiveValidationSuite::new(ValidationSuiteConfig {
benchmark_config: BenchmarkConfig {
testsizes: vec![100],
performance_iterations: 5,
warmup_iterations: 1,
..Default::default()
},
property_config: PropertyTestConfig {
test_cases_per_property: 10,
..Default::default()
},
..Default::default()
});
let scipy_mean = |data: &ArrayView1<f64>| -> f64 { data.sum() / data.len() as f64 };
let result = suite
.validate_function("mean", |data| mean(data), Some(scipy_mean))
.expect("Operation failed");
assert_eq!(result.function_name, "mean");
assert!(matches!(
result.overall_status,
ValidationStatus::FullyValidated | ValidationStatus::MostlyValidated
));
assert!(result.validation_time.as_secs() < 60); }
}