use crate::vibe::{VIBEError, ValidationResult};
use rand::prelude::*;
use rand::rngs::StdRng;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct StatisticalConfig {
pub bootstrap_samples: usize,
pub confidence_level: f32,
pub significance_threshold: f32,
pub enable_robust_statistics: bool,
pub enable_bayesian_methods: bool,
}
impl Default for StatisticalConfig {
fn default() -> Self {
Self {
bootstrap_samples: 1000,
confidence_level: 0.95,
significance_threshold: 0.05,
enable_robust_statistics: true,
enable_bayesian_methods: false,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct StatisticalResult {
pub p_value: f32,
pub is_significant: bool,
pub effect_size: f32,
pub confidence_interval: (f32, f32),
pub test_power: f32,
pub method: StatisticalMethod,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum StatisticalMethod {
BootstrapResampling,
TTest,
MannWhitneyU,
BayesianInference,
PermutationTest,
}
pub struct StatisticalEngine {
config: StatisticalConfig,
rng: StdRng,
}
impl StatisticalEngine {
pub fn new(config: StatisticalConfig) -> Self {
Self {
config,
rng: StdRng::from_entropy(),
}
}
pub fn default() -> Self {
Self::new(StatisticalConfig::default())
}
pub fn bootstrap_significance_test(
&mut self,
validation_results: &[ValidationResult],
baseline_score: f32,
) -> Result<StatisticalResult, VIBEError> {
if validation_results.is_empty() {
return Err(VIBEError::ValidationError(
"No validation results for statistical testing".to_string(),
));
}
let scores: Vec<f32> = validation_results
.iter()
.map(|result| result.overall_score / 100.0)
.collect();
let bootstrap_distribution = self.bootstrap_resampling(&scores)?;
let p_value = self.calculate_p_value(baseline_score, &bootstrap_distribution);
let effect_size = self.calculate_effect_size(&scores, baseline_score);
let confidence_interval = self.calculate_confidence_interval(&bootstrap_distribution);
let test_power = self.calculate_power(&scores, baseline_score);
Ok(StatisticalResult {
p_value,
is_significant: p_value < self.config.significance_threshold,
effect_size,
confidence_interval,
test_power,
method: StatisticalMethod::BootstrapResampling,
})
}
pub fn multi_model_consistency_test(
&mut self,
model_scores: &HashMap<String, f32>,
) -> Result<StatisticalResult, VIBEError> {
if model_scores.is_empty() {
return Err(VIBEError::ValidationError(
"No model scores for consistency testing".to_string(),
));
}
let scores: Vec<f32> = model_scores.values().copied().collect();
let mean_score = scores.iter().sum::<f32>() / scores.len() as f32;
let variance = scores
.iter()
.map(|score| (score - mean_score).powi(2))
.sum::<f32>()
/ scores.len() as f32;
let consistency_score = 1.0 - variance.min(1.0);
let p_value = variance;
Ok(StatisticalResult {
p_value,
is_significant: variance < 0.1, effect_size: consistency_score,
confidence_interval: (mean_score - variance.sqrt(), mean_score + variance.sqrt()),
test_power: 0.8, method: StatisticalMethod::BootstrapResampling,
})
}
pub fn cross_validation_stability_test(
&mut self,
validation_folds: &[Vec<ValidationResult>],
) -> Result<StatisticalResult, VIBEError> {
if validation_folds.is_empty() {
return Err(VIBEError::ValidationError(
"No validation folds for stability testing".to_string(),
));
}
let fold_means: Vec<f32> = validation_folds
.iter()
.map(|fold| {
fold.iter()
.map(|result| result.overall_score / 100.0)
.sum::<f32>()
/ fold.len() as f32
})
.collect();
let overall_mean = fold_means.iter().sum::<f32>() / fold_means.len() as f32;
let variance = fold_means
.iter()
.map(|mean| (mean - overall_mean).powi(2))
.sum::<f32>()
/ fold_means.len() as f32;
let stability_score = 1.0 - variance.min(1.0);
Ok(StatisticalResult {
p_value: variance,
is_significant: variance < 0.05, effect_size: stability_score,
confidence_interval: (overall_mean - variance.sqrt(), overall_mean + variance.sqrt()),
test_power: 0.8,
method: StatisticalMethod::BootstrapResampling,
})
}
pub fn cross_cultural_significance_test(
&mut self,
cultural_validation_results: &HashMap<String, Vec<ValidationResult>>,
) -> Result<StatisticalResult, VIBEError> {
if cultural_validation_results.is_empty() {
return Err(VIBEError::ValidationError(
"No cultural validation results for testing".to_string(),
));
}
let cultural_means: HashMap<String, f32> = cultural_validation_results
.iter()
.map(|(culture, results)| {
let mean = results
.iter()
.map(|result| result.overall_score / 100.0)
.sum::<f32>()
/ results.len() as f32;
(culture.clone(), mean)
})
.collect();
let cultural_scores: Vec<f32> = cultural_means.values().copied().collect();
let overall_mean = cultural_scores.iter().sum::<f32>() / cultural_scores.len() as f32;
let variance = cultural_scores
.iter()
.map(|score| (score - overall_mean).powi(2))
.sum::<f32>()
/ cultural_scores.len() as f32;
let cultural_consistency = 1.0 - variance.min(1.0);
Ok(StatisticalResult {
p_value: variance,
is_significant: variance < 0.1, effect_size: cultural_consistency,
confidence_interval: (overall_mean - variance.sqrt(), overall_mean + variance.sqrt()),
test_power: 0.7,
method: StatisticalMethod::BootstrapResampling,
})
}
fn bootstrap_resampling(&mut self, scores: &[f32]) -> Result<Vec<f32>, VIBEError> {
let n = scores.len();
let mut bootstrap_means = Vec::with_capacity(self.config.bootstrap_samples);
for _ in 0..self.config.bootstrap_samples {
let sample: Vec<f32> = (0..n)
.map(|_| scores[self.rng.gen_range(0..n)])
.collect();
let mean = sample.iter().sum::<f32>() / sample.len() as f32;
bootstrap_means.push(mean);
}
bootstrap_means.sort_by(|a, b| a.partial_cmp(b).unwrap());
Ok(bootstrap_means)
}
fn calculate_p_value(&self, baseline: f32, bootstrap_distribution: &[f32]) -> f32 {
let extreme_count = bootstrap_distribution
.iter()
.filter(|&score| score >= baseline)
.count();
extreme_count as f32 / bootstrap_distribution.len() as f32
}
fn calculate_effect_size(&self, scores: &[f32], baseline: f32) -> f32 {
let sample_mean = scores.iter().sum::<f32>() / scores.len() as f32;
let sample_std = self.calculate_standard_deviation(scores);
if sample_std == 0.0 {
return 0.0;
}
(sample_mean - baseline) / sample_std
}
fn calculate_confidence_interval(&self, bootstrap_distribution: &[f32]) -> (f32, f32) {
let n = bootstrap_distribution.len();
let lower_index = ((1.0 - self.config.confidence_level) / 2.0 * n as f32) as usize;
let upper_index = n - lower_index - 1;
(
bootstrap_distribution[lower_index],
bootstrap_distribution[upper_index],
)
}
fn calculate_power(&self, scores: </f32>, baseline: f32) -> f32 {
let effect_size = self.calculate_effect_size(scores, baseline);
let n = scores.len();
if n == 0 {
return 0.0;
}
let power = 1.0 - (-effect_size.abs() * (n as f32).sqrt()).exp();
power.min(1.0).max(0.0)
}
fn calculate_standard_deviation(&self, data: &[f32]) -> f32 {
if data.is_empty() {
return 0.0;
}
let mean = data.iter().sum::<f32>() / data.len() as f32;
let variance = data
.iter()
.map(|value| (value - mean).powi(2))
.sum::<f32>()
/ data.len() as f32;
variance.sqrt()
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BayesianAnalyzer {
prior_alpha: f32,
prior_beta: f32,
}
impl BayesianAnalyzer {
pub fn new(prior_alpha: f32, prior_beta: f32) -> Self {
Self {
prior_alpha,
prior_beta,
}
}
pub fn bayesian_inference(
&self,
successes: usize,
total: usize,
) -> (f32, f32) {
let posterior_alpha = self.prior_alpha + successes as f32;
let posterior_beta = self.prior_beta + (total - successes) as f32;
let posterior_mean = posterior_alpha / (posterior_alpha + posterior_beta);
let posterior_variance = (posterior_alpha * posterior_beta)
/ ((posterior_alpha + posterior_beta).powi(2)
* (posterior_alpha + posterior_beta + 1.0));
let credible_interval = (
posterior_mean - 1.96 * posterior_variance.sqrt(),
posterior_mean + 1.96 * posterior_variance.sqrt(),
);
(posterior_mean, credible_interval.1)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::vibe::ValidationResult;
#[test]
fn test_statistical_engine_creation() {
let engine = StatisticalEngine::default();
assert_eq!(engine.config.bootstrap_samples, 1000);
assert_eq!(engine.config.confidence_level, 0.95);
}
#[test]
fn test_bootstrap_significance_test() {
let mut engine = StatisticalEngine::default();
let validation_results = vec![
ValidationResult {
overall_score: 85.0,
platform_scores: HashMap::new(),
confidence_interval: None,
status: crate::vibe::validation::ValidationStatus::Validated,
detailed_results: HashMap::new(),
validation_time_ms: 100,
issues: Vec::new(),
recommendations: Vec::new(),
timestamp: chrono::Utc::now(),
protocol_id: uuid::Uuid::new_v4(),
},
ValidationResult {
overall_score: 90.0,
platform_scores: HashMap::new(),
confidence_interval: None,
status: crate::vibe::validation::ValidationStatus::Validated,
detailed_results: HashMap::new(),
validation_time_ms: 100,
issues: Vec::new(),
recommendations: Vec::new(),
timestamp: chrono::Utc::now(),
protocol_id: uuid::Uuid::new_v4(),
},
];
let result = engine
.bootstrap_significance_test(&validation_results, 0.7)
.unwrap();
assert!(result.p_value >= 0.0);
assert!(result.p_value <= 1.0);
}
#[test]
fn test_bayesian_inference() {
let analyzer = BayesianAnalyzer::new(1.0, 1.0); let (mean, _) = analyzer.bayesian_inference(80, 100);
assert!(mean > 0.7 && mean < 0.9);
}
}