use scirs2_core::ndarray::{Array1, Array2};
use std::collections::HashMap;
use super::{config::NoiseType, performance::DDPerformanceAnalysis, sequences::DDSequence};
use crate::DeviceResult;
#[cfg(feature = "scirs2")]
use scirs2_stats::{mean, std};
#[cfg(not(feature = "scirs2"))]
use super::fallback_scirs2::{inv, mean, std, trace};
use scirs2_core::random::prelude::*;
#[derive(Debug, Clone)]
pub struct DDStatisticalAnalysis {
pub basic_statistics: BasicStatistics,
pub advanced_statistics: AdvancedStatistics,
pub ml_insights: Option<MLInsights>,
pub uncertainty_analysis: UncertaintyAnalysis,
}
#[derive(Debug, Clone)]
pub struct BasicStatistics {
pub mean: f64,
pub std_deviation: f64,
pub variance: f64,
pub skewness: f64,
pub kurtosis: f64,
pub median: f64,
pub iqr: f64,
}
#[derive(Debug, Clone)]
pub struct AdvancedStatistics {
pub multivariate_analysis: MultivariateAnalysis,
pub time_series_analysis: Option<TimeSeriesAnalysis>,
pub bayesian_analysis: Option<BayesianAnalysis>,
pub non_parametric_analysis: NonParametricAnalysis,
}
#[derive(Debug, Clone)]
pub struct MultivariateAnalysis {
pub pca_results: PCAResults,
pub factor_analysis: FactorAnalysisResults,
pub cluster_analysis: ClusterAnalysisResults,
pub discriminant_analysis: DiscriminantAnalysisResults,
}
#[derive(Debug, Clone)]
pub struct PCAResults {
pub components: Array2<f64>,
pub explained_variance_ratio: Array1<f64>,
pub cumulative_variance: Array1<f64>,
pub n_components_retain: usize,
}
#[derive(Debug, Clone)]
pub struct FactorAnalysisResults {
pub loadings: Array2<f64>,
pub communalities: Array1<f64>,
pub specific_variances: Array1<f64>,
pub factor_scores: Array2<f64>,
}
#[derive(Debug, Clone)]
pub struct ClusterAnalysisResults {
pub cluster_labels: Array1<i32>,
pub cluster_centers: Array2<f64>,
pub silhouette_scores: Array1<f64>,
pub inertia: f64,
}
#[derive(Debug, Clone)]
pub struct DiscriminantAnalysisResults {
pub linear_discriminants: Array2<f64>,
pub classification_accuracy: f64,
pub cv_score: f64,
pub feature_importance: Array1<f64>,
}
#[derive(Debug, Clone)]
pub struct TimeSeriesAnalysis {
pub trend_analysis: TrendAnalysis,
pub seasonality_analysis: SeasonalityAnalysis,
pub stationarity_tests: StationarityTests,
pub autocorrelation: AutocorrelationAnalysis,
}
#[derive(Debug, Clone)]
pub struct TrendAnalysis {
pub linear_slope: f64,
pub trend_p_value: f64,
pub r_squared: f64,
pub trend_direction: TrendDirection,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum TrendDirection {
Increasing,
Decreasing,
Stable,
NonLinear,
}
#[derive(Debug, Clone)]
pub struct SeasonalityAnalysis {
pub seasonal_period: Option<usize>,
pub seasonal_strength: f64,
pub seasonal_decomposition: SeasonalDecomposition,
}
#[derive(Debug, Clone)]
pub struct SeasonalDecomposition {
pub trend: Array1<f64>,
pub seasonal: Array1<f64>,
pub residual: Array1<f64>,
}
#[derive(Debug, Clone)]
pub struct StationarityTests {
pub adf_test: StationarityTest,
pub kpss_test: StationarityTest,
pub pp_test: StationarityTest,
}
#[derive(Debug, Clone)]
pub struct StationarityTest {
pub statistic: f64,
pub p_value: f64,
pub critical_values: HashMap<String, f64>,
pub is_stationary: bool,
}
#[derive(Debug, Clone)]
pub struct AutocorrelationAnalysis {
pub acf: Array1<f64>,
pub pacf: Array1<f64>,
pub significant_lags: Vec<usize>,
pub ljung_box_test: LjungBoxTest,
}
#[derive(Debug, Clone)]
pub struct LjungBoxTest {
pub statistic: f64,
pub p_value: f64,
pub df: usize,
pub has_autocorrelation: bool,
}
#[derive(Debug, Clone)]
pub struct BayesianAnalysis {
pub posterior_distributions: HashMap<String, PosteriorDistribution>,
pub credible_intervals: HashMap<String, (f64, f64)>,
pub bayes_factors: HashMap<String, f64>,
pub model_evidence: f64,
}
#[derive(Debug, Clone)]
pub struct PosteriorDistribution {
pub samples: Array1<f64>,
pub mean: f64,
pub std: f64,
pub hdi: (f64, f64),
}
#[derive(Debug, Clone)]
pub struct NonParametricAnalysis {
pub rank_statistics: RankStatistics,
pub permutation_tests: PermutationTests,
pub bootstrap_analysis: BootstrapAnalysis,
pub kde_analysis: KDEAnalysis,
}
#[derive(Debug, Clone)]
pub struct RankStatistics {
pub spearman_correlation: f64,
pub kendall_tau: f64,
pub mann_whitney_u: MannWhitneyTest,
pub wilcoxon_test: WilcoxonTest,
}
#[derive(Debug, Clone)]
pub struct MannWhitneyTest {
pub u_statistic: f64,
pub p_value: f64,
pub effect_size: f64,
}
#[derive(Debug, Clone)]
pub struct WilcoxonTest {
pub statistic: f64,
pub p_value: f64,
pub effect_size: f64,
}
#[derive(Debug, Clone)]
pub struct PermutationTests {
pub p_values: HashMap<String, f64>,
pub effect_sizes: HashMap<String, f64>,
pub n_permutations: usize,
}
#[derive(Debug, Clone)]
pub struct BootstrapAnalysis {
pub confidence_intervals: HashMap<String, (f64, f64)>,
pub bias: HashMap<String, f64>,
pub standard_errors: HashMap<String, f64>,
pub n_bootstrap: usize,
}
#[derive(Debug, Clone)]
pub struct KDEAnalysis {
pub density_estimates: Array1<f64>,
pub bandwidth: f64,
pub grid_points: Array1<f64>,
pub kernel_type: String,
}
#[derive(Debug, Clone)]
pub struct MLInsights {
pub feature_importance: Array1<f64>,
pub anomaly_detection: AnomalyDetectionResults,
pub predictive_modeling: PredictiveModelingResults,
pub dimensionality_reduction: DimensionalityReduction,
}
#[derive(Debug, Clone)]
pub struct AnomalyDetectionResults {
pub anomaly_scores: Array1<f64>,
pub threshold: f64,
pub anomalies: Vec<usize>,
pub isolation_forest: IsolationForestResults,
}
#[derive(Debug, Clone)]
pub struct IsolationForestResults {
pub scores: Array1<f64>,
pub path_lengths: Array1<f64>,
pub contamination: f64,
}
#[derive(Debug, Clone)]
pub struct PredictiveModelingResults {
pub performance_metrics: HashMap<String, f64>,
pub cv_scores: Array1<f64>,
pub learning_curves: LearningCurves,
pub interpretability: ModelInterpretability,
}
#[derive(Debug, Clone)]
pub struct LearningCurves {
pub training_sizes: Array1<f64>,
pub training_scores: Array1<f64>,
pub validation_scores: Array1<f64>,
}
#[derive(Debug, Clone)]
pub struct ModelInterpretability {
pub shap_values: Array2<f64>,
pub feature_attributions: Array1<f64>,
pub partial_dependence: HashMap<String, (Array1<f64>, Array1<f64>)>,
}
#[derive(Debug, Clone)]
pub struct DimensionalityReduction {
pub tsne_results: TSNEResults,
pub umap_results: UMAPResults,
pub manifold_learning: ManifoldLearningResults,
}
#[derive(Debug, Clone)]
pub struct TSNEResults {
pub embedding: Array2<f64>,
pub perplexity: f64,
pub kl_divergence: f64,
}
#[derive(Debug, Clone)]
pub struct UMAPResults {
pub embedding: Array2<f64>,
pub n_neighbors: usize,
pub min_dist: f64,
}
#[derive(Debug, Clone)]
pub struct ManifoldLearningResults {
pub intrinsic_dimension: usize,
pub local_linearity: Array1<f64>,
pub embedding: Array2<f64>,
}
#[derive(Debug, Clone)]
pub struct UncertaintyAnalysis {
pub aleatory_uncertainty: f64,
pub epistemic_uncertainty: f64,
pub total_uncertainty: f64,
pub uncertainty_sources: HashMap<String, f64>,
pub sensitivity_analysis: SensitivityAnalysis,
}
#[derive(Debug, Clone)]
pub struct SensitivityAnalysis {
pub first_order_indices: Array1<f64>,
pub total_indices: Array1<f64>,
pub interaction_effects: Array2<f64>,
pub morris_screening: MorrisScreeningResults,
}
#[derive(Debug, Clone)]
pub struct MorrisScreeningResults {
pub elementary_effects: Array2<f64>,
pub mu: Array1<f64>,
pub sigma: Array1<f64>,
pub mu_star: Array1<f64>,
}
pub struct DDStatisticalAnalyzer;
impl DDStatisticalAnalyzer {
pub fn perform_statistical_analysis(
sequence: &DDSequence,
performance_analysis: &DDPerformanceAnalysis,
) -> DeviceResult<DDStatisticalAnalysis> {
let sample_data = Self::generate_sample_data(sequence, performance_analysis)?;
let basic_statistics = Self::calculate_basic_statistics(&sample_data)?;
let advanced_statistics = Self::perform_advanced_analysis(&sample_data)?;
let ml_insights = Self::extract_ml_insights(&sample_data)?;
let uncertainty_analysis = Self::quantify_uncertainty(&sample_data)?;
Ok(DDStatisticalAnalysis {
basic_statistics,
advanced_statistics,
ml_insights: Some(ml_insights),
uncertainty_analysis,
})
}
fn generate_sample_data(
_sequence: &DDSequence,
performance_analysis: &DDPerformanceAnalysis,
) -> DeviceResult<Array2<f64>> {
let n_samples = 100;
let n_features = performance_analysis.metrics.len();
let mut data = Array2::zeros((n_samples, n_features));
for i in 0..n_samples {
for j in 0..n_features {
data[[i, j]] = thread_rng().random::<f64>();
}
}
Ok(data)
}
fn calculate_basic_statistics(data: &Array2<f64>) -> DeviceResult<BasicStatistics> {
let flat_data = data.iter().copied().collect::<Vec<f64>>();
let n = flat_data.len() as f64;
let mean = flat_data.iter().sum::<f64>() / n;
let variance = flat_data.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / (n - 1.0);
let std_deviation = variance.sqrt();
let skewness = flat_data
.iter()
.map(|x| ((x - mean) / std_deviation).powi(3))
.sum::<f64>()
/ n;
let kurtosis = flat_data
.iter()
.map(|x| ((x - mean) / std_deviation).powi(4))
.sum::<f64>()
/ n
- 3.0;
let mut sorted_data = flat_data;
sorted_data.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let median = if sorted_data.len() % 2 == 0 {
f64::midpoint(
sorted_data[sorted_data.len() / 2 - 1],
sorted_data[sorted_data.len() / 2],
)
} else {
sorted_data[sorted_data.len() / 2]
};
let q1 = sorted_data[sorted_data.len() / 4];
let q3 = sorted_data[3 * sorted_data.len() / 4];
let iqr = q3 - q1;
Ok(BasicStatistics {
mean,
std_deviation,
variance,
skewness,
kurtosis,
median,
iqr,
})
}
fn perform_advanced_analysis(_data: &Array2<f64>) -> DeviceResult<AdvancedStatistics> {
let multivariate_analysis = MultivariateAnalysis {
pca_results: PCAResults {
components: Array2::eye(2),
explained_variance_ratio: Array1::from_vec(vec![0.8, 0.2]),
cumulative_variance: Array1::from_vec(vec![0.8, 1.0]),
n_components_retain: 2,
},
factor_analysis: FactorAnalysisResults {
loadings: Array2::eye(2),
communalities: Array1::from_vec(vec![0.8, 0.9]),
specific_variances: Array1::from_vec(vec![0.2, 0.1]),
factor_scores: Array2::zeros((10, 2)),
},
cluster_analysis: ClusterAnalysisResults {
cluster_labels: Array1::zeros(10),
cluster_centers: Array2::zeros((3, 2)),
silhouette_scores: Array1::from_vec(vec![0.8, 0.7, 0.9]),
inertia: 10.0,
},
discriminant_analysis: DiscriminantAnalysisResults {
linear_discriminants: Array2::eye(2),
classification_accuracy: 0.95,
cv_score: 0.93,
feature_importance: Array1::from_vec(vec![0.7, 0.3]),
},
};
Ok(AdvancedStatistics {
multivariate_analysis,
time_series_analysis: None,
bayesian_analysis: None,
non_parametric_analysis: NonParametricAnalysis {
rank_statistics: RankStatistics {
spearman_correlation: 0.8,
kendall_tau: 0.7,
mann_whitney_u: MannWhitneyTest {
u_statistic: 50.0,
p_value: 0.05,
effect_size: 0.5,
},
wilcoxon_test: WilcoxonTest {
statistic: 25.0,
p_value: 0.03,
effect_size: 0.6,
},
},
permutation_tests: PermutationTests {
p_values: HashMap::new(),
effect_sizes: HashMap::new(),
n_permutations: 1000,
},
bootstrap_analysis: BootstrapAnalysis {
confidence_intervals: HashMap::new(),
bias: HashMap::new(),
standard_errors: HashMap::new(),
n_bootstrap: 1000,
},
kde_analysis: KDEAnalysis {
density_estimates: Array1::zeros(100),
bandwidth: 0.1,
grid_points: Array1::zeros(100),
kernel_type: "gaussian".to_string(),
},
},
})
}
fn extract_ml_insights(_data: &Array2<f64>) -> DeviceResult<MLInsights> {
Ok(MLInsights {
feature_importance: Array1::from_vec(vec![0.5, 0.3, 0.2]),
anomaly_detection: AnomalyDetectionResults {
anomaly_scores: Array1::zeros(100),
threshold: 0.5,
anomalies: vec![5, 15, 87],
isolation_forest: IsolationForestResults {
scores: Array1::zeros(100),
path_lengths: Array1::zeros(100),
contamination: 0.1,
},
},
predictive_modeling: PredictiveModelingResults {
performance_metrics: HashMap::new(),
cv_scores: Array1::from_vec(vec![0.9, 0.85, 0.92, 0.88, 0.90]),
learning_curves: LearningCurves {
training_sizes: Array1::from_vec(vec![10.0, 25.0, 50.0, 75.0, 100.0]),
training_scores: Array1::from_vec(vec![0.8, 0.85, 0.9, 0.92, 0.93]),
validation_scores: Array1::from_vec(vec![0.75, 0.82, 0.87, 0.89, 0.90]),
},
interpretability: ModelInterpretability {
shap_values: Array2::zeros((100, 3)),
feature_attributions: Array1::from_vec(vec![0.4, 0.35, 0.25]),
partial_dependence: HashMap::new(),
},
},
dimensionality_reduction: DimensionalityReduction {
tsne_results: TSNEResults {
embedding: Array2::zeros((100, 2)),
perplexity: 30.0,
kl_divergence: 1.5,
},
umap_results: UMAPResults {
embedding: Array2::zeros((100, 2)),
n_neighbors: 15,
min_dist: 0.1,
},
manifold_learning: ManifoldLearningResults {
intrinsic_dimension: 2,
local_linearity: Array1::zeros(100),
embedding: Array2::zeros((100, 2)),
},
},
})
}
fn quantify_uncertainty(_data: &Array2<f64>) -> DeviceResult<UncertaintyAnalysis> {
let mut uncertainty_sources = HashMap::new();
uncertainty_sources.insert("measurement_noise".to_string(), 0.3);
uncertainty_sources.insert("model_uncertainty".to_string(), 0.2);
uncertainty_sources.insert("parameter_uncertainty".to_string(), 0.1);
Ok(UncertaintyAnalysis {
aleatory_uncertainty: 0.3,
epistemic_uncertainty: 0.2,
total_uncertainty: 0.5,
uncertainty_sources,
sensitivity_analysis: SensitivityAnalysis {
first_order_indices: Array1::from_vec(vec![0.4, 0.3, 0.2, 0.1]),
total_indices: Array1::from_vec(vec![0.5, 0.4, 0.25, 0.15]),
interaction_effects: Array2::zeros((4, 4)),
morris_screening: MorrisScreeningResults {
elementary_effects: Array2::zeros((100, 4)),
mu: Array1::from_vec(vec![0.2, 0.15, 0.1, 0.05]),
sigma: Array1::from_vec(vec![0.1, 0.08, 0.06, 0.04]),
mu_star: Array1::from_vec(vec![0.25, 0.18, 0.12, 0.08]),
},
},
})
}
}