1use scirs2_core::ndarray::{Array1, Array2};
4use std::collections::HashMap;
5
6use super::{config::NoiseType, performance::DDPerformanceAnalysis, sequences::DDSequence};
7use crate::DeviceResult;
8
9#[cfg(feature = "scirs2")]
11use scirs2_stats::{mean, std};
12
13#[cfg(not(feature = "scirs2"))]
14use super::fallback_scirs2::{inv, mean, std, trace};
15use scirs2_core::random::prelude::*;
16
17#[derive(Debug, Clone)]
19pub struct DDStatisticalAnalysis {
20 pub basic_statistics: BasicStatistics,
22 pub advanced_statistics: AdvancedStatistics,
24 pub ml_insights: Option<MLInsights>,
26 pub uncertainty_analysis: UncertaintyAnalysis,
28}
29
30#[derive(Debug, Clone)]
32pub struct BasicStatistics {
33 pub mean: f64,
35 pub std_deviation: f64,
37 pub variance: f64,
39 pub skewness: f64,
41 pub kurtosis: f64,
43 pub median: f64,
45 pub iqr: f64,
47}
48
49#[derive(Debug, Clone)]
51pub struct AdvancedStatistics {
52 pub multivariate_analysis: MultivariateAnalysis,
54 pub time_series_analysis: Option<TimeSeriesAnalysis>,
56 pub bayesian_analysis: Option<BayesianAnalysis>,
58 pub non_parametric_analysis: NonParametricAnalysis,
60}
61
62#[derive(Debug, Clone)]
64pub struct MultivariateAnalysis {
65 pub pca_results: PCAResults,
67 pub factor_analysis: FactorAnalysisResults,
69 pub cluster_analysis: ClusterAnalysisResults,
71 pub discriminant_analysis: DiscriminantAnalysisResults,
73}
74
75#[derive(Debug, Clone)]
77pub struct PCAResults {
78 pub components: Array2<f64>,
80 pub explained_variance_ratio: Array1<f64>,
82 pub cumulative_variance: Array1<f64>,
84 pub n_components_retain: usize,
86}
87
88#[derive(Debug, Clone)]
90pub struct FactorAnalysisResults {
91 pub loadings: Array2<f64>,
93 pub communalities: Array1<f64>,
95 pub specific_variances: Array1<f64>,
97 pub factor_scores: Array2<f64>,
99}
100
101#[derive(Debug, Clone)]
103pub struct ClusterAnalysisResults {
104 pub cluster_labels: Array1<i32>,
106 pub cluster_centers: Array2<f64>,
108 pub silhouette_scores: Array1<f64>,
110 pub inertia: f64,
112}
113
114#[derive(Debug, Clone)]
116pub struct DiscriminantAnalysisResults {
117 pub linear_discriminants: Array2<f64>,
119 pub classification_accuracy: f64,
121 pub cv_score: f64,
123 pub feature_importance: Array1<f64>,
125}
126
127#[derive(Debug, Clone)]
129pub struct TimeSeriesAnalysis {
130 pub trend_analysis: TrendAnalysis,
132 pub seasonality_analysis: SeasonalityAnalysis,
134 pub stationarity_tests: StationarityTests,
136 pub autocorrelation: AutocorrelationAnalysis,
138}
139
140#[derive(Debug, Clone)]
142pub struct TrendAnalysis {
143 pub linear_slope: f64,
145 pub trend_p_value: f64,
147 pub r_squared: f64,
149 pub trend_direction: TrendDirection,
151}
152
153#[derive(Debug, Clone, PartialEq, Eq)]
155pub enum TrendDirection {
156 Increasing,
157 Decreasing,
158 Stable,
159 NonLinear,
160}
161
162#[derive(Debug, Clone)]
164pub struct SeasonalityAnalysis {
165 pub seasonal_period: Option<usize>,
167 pub seasonal_strength: f64,
169 pub seasonal_decomposition: SeasonalDecomposition,
171}
172
173#[derive(Debug, Clone)]
175pub struct SeasonalDecomposition {
176 pub trend: Array1<f64>,
178 pub seasonal: Array1<f64>,
180 pub residual: Array1<f64>,
182}
183
184#[derive(Debug, Clone)]
186pub struct StationarityTests {
187 pub adf_test: StationarityTest,
189 pub kpss_test: StationarityTest,
191 pub pp_test: StationarityTest,
193}
194
195#[derive(Debug, Clone)]
197pub struct StationarityTest {
198 pub statistic: f64,
200 pub p_value: f64,
202 pub critical_values: HashMap<String, f64>,
204 pub is_stationary: bool,
206}
207
208#[derive(Debug, Clone)]
210pub struct AutocorrelationAnalysis {
211 pub acf: Array1<f64>,
213 pub pacf: Array1<f64>,
215 pub significant_lags: Vec<usize>,
217 pub ljung_box_test: LjungBoxTest,
219}
220
221#[derive(Debug, Clone)]
223pub struct LjungBoxTest {
224 pub statistic: f64,
226 pub p_value: f64,
228 pub df: usize,
230 pub has_autocorrelation: bool,
232}
233
234#[derive(Debug, Clone)]
236pub struct BayesianAnalysis {
237 pub posterior_distributions: HashMap<String, PosteriorDistribution>,
239 pub credible_intervals: HashMap<String, (f64, f64)>,
241 pub bayes_factors: HashMap<String, f64>,
243 pub model_evidence: f64,
245}
246
247#[derive(Debug, Clone)]
249pub struct PosteriorDistribution {
250 pub samples: Array1<f64>,
252 pub mean: f64,
254 pub std: f64,
256 pub hdi: (f64, f64),
258}
259
260#[derive(Debug, Clone)]
262pub struct NonParametricAnalysis {
263 pub rank_statistics: RankStatistics,
265 pub permutation_tests: PermutationTests,
267 pub bootstrap_analysis: BootstrapAnalysis,
269 pub kde_analysis: KDEAnalysis,
271}
272
273#[derive(Debug, Clone)]
275pub struct RankStatistics {
276 pub spearman_correlation: f64,
278 pub kendall_tau: f64,
280 pub mann_whitney_u: MannWhitneyTest,
282 pub wilcoxon_test: WilcoxonTest,
284}
285
286#[derive(Debug, Clone)]
288pub struct MannWhitneyTest {
289 pub u_statistic: f64,
291 pub p_value: f64,
293 pub effect_size: f64,
295}
296
297#[derive(Debug, Clone)]
299pub struct WilcoxonTest {
300 pub statistic: f64,
302 pub p_value: f64,
304 pub effect_size: f64,
306}
307
308#[derive(Debug, Clone)]
310pub struct PermutationTests {
311 pub p_values: HashMap<String, f64>,
313 pub effect_sizes: HashMap<String, f64>,
315 pub n_permutations: usize,
317}
318
319#[derive(Debug, Clone)]
321pub struct BootstrapAnalysis {
322 pub confidence_intervals: HashMap<String, (f64, f64)>,
324 pub bias: HashMap<String, f64>,
326 pub standard_errors: HashMap<String, f64>,
328 pub n_bootstrap: usize,
330}
331
332#[derive(Debug, Clone)]
334pub struct KDEAnalysis {
335 pub density_estimates: Array1<f64>,
337 pub bandwidth: f64,
339 pub grid_points: Array1<f64>,
341 pub kernel_type: String,
343}
344
345#[derive(Debug, Clone)]
347pub struct MLInsights {
348 pub feature_importance: Array1<f64>,
350 pub anomaly_detection: AnomalyDetectionResults,
352 pub predictive_modeling: PredictiveModelingResults,
354 pub dimensionality_reduction: DimensionalityReduction,
356}
357
358#[derive(Debug, Clone)]
360pub struct AnomalyDetectionResults {
361 pub anomaly_scores: Array1<f64>,
363 pub threshold: f64,
365 pub anomalies: Vec<usize>,
367 pub isolation_forest: IsolationForestResults,
369}
370
371#[derive(Debug, Clone)]
373pub struct IsolationForestResults {
374 pub scores: Array1<f64>,
376 pub path_lengths: Array1<f64>,
378 pub contamination: f64,
380}
381
382#[derive(Debug, Clone)]
384pub struct PredictiveModelingResults {
385 pub performance_metrics: HashMap<String, f64>,
387 pub cv_scores: Array1<f64>,
389 pub learning_curves: LearningCurves,
391 pub interpretability: ModelInterpretability,
393}
394
395#[derive(Debug, Clone)]
397pub struct LearningCurves {
398 pub training_sizes: Array1<f64>,
400 pub training_scores: Array1<f64>,
402 pub validation_scores: Array1<f64>,
404}
405
406#[derive(Debug, Clone)]
408pub struct ModelInterpretability {
409 pub shap_values: Array2<f64>,
411 pub feature_attributions: Array1<f64>,
413 pub partial_dependence: HashMap<String, (Array1<f64>, Array1<f64>)>,
415}
416
417#[derive(Debug, Clone)]
419pub struct DimensionalityReduction {
420 pub tsne_results: TSNEResults,
422 pub umap_results: UMAPResults,
424 pub manifold_learning: ManifoldLearningResults,
426}
427
428#[derive(Debug, Clone)]
430pub struct TSNEResults {
431 pub embedding: Array2<f64>,
433 pub perplexity: f64,
435 pub kl_divergence: f64,
437}
438
439#[derive(Debug, Clone)]
441pub struct UMAPResults {
442 pub embedding: Array2<f64>,
444 pub n_neighbors: usize,
446 pub min_dist: f64,
448}
449
450#[derive(Debug, Clone)]
452pub struct ManifoldLearningResults {
453 pub intrinsic_dimension: usize,
455 pub local_linearity: Array1<f64>,
457 pub embedding: Array2<f64>,
459}
460
461#[derive(Debug, Clone)]
463pub struct UncertaintyAnalysis {
464 pub aleatory_uncertainty: f64,
466 pub epistemic_uncertainty: f64,
468 pub total_uncertainty: f64,
470 pub uncertainty_sources: HashMap<String, f64>,
472 pub sensitivity_analysis: SensitivityAnalysis,
474}
475
476#[derive(Debug, Clone)]
478pub struct SensitivityAnalysis {
479 pub first_order_indices: Array1<f64>,
481 pub total_indices: Array1<f64>,
483 pub interaction_effects: Array2<f64>,
485 pub morris_screening: MorrisScreeningResults,
487}
488
489#[derive(Debug, Clone)]
491pub struct MorrisScreeningResults {
492 pub elementary_effects: Array2<f64>,
494 pub mu: Array1<f64>,
496 pub sigma: Array1<f64>,
498 pub mu_star: Array1<f64>,
500}
501
502pub struct DDStatisticalAnalyzer;
504
505impl DDStatisticalAnalyzer {
506 pub fn perform_statistical_analysis(
508 sequence: &DDSequence,
509 performance_analysis: &DDPerformanceAnalysis,
510 ) -> DeviceResult<DDStatisticalAnalysis> {
511 let sample_data = Self::generate_sample_data(sequence, performance_analysis)?;
513
514 let basic_statistics = Self::calculate_basic_statistics(&sample_data)?;
515 let advanced_statistics = Self::perform_advanced_analysis(&sample_data)?;
516 let ml_insights = Self::extract_ml_insights(&sample_data)?;
517 let uncertainty_analysis = Self::quantify_uncertainty(&sample_data)?;
518
519 Ok(DDStatisticalAnalysis {
520 basic_statistics,
521 advanced_statistics,
522 ml_insights: Some(ml_insights),
523 uncertainty_analysis,
524 })
525 }
526
527 fn generate_sample_data(
529 _sequence: &DDSequence,
530 performance_analysis: &DDPerformanceAnalysis,
531 ) -> DeviceResult<Array2<f64>> {
532 let n_samples = 100;
534 let n_features = performance_analysis.metrics.len();
535 let mut data = Array2::zeros((n_samples, n_features));
536
537 for i in 0..n_samples {
539 for j in 0..n_features {
540 data[[i, j]] = thread_rng().gen::<f64>();
541 }
542 }
543
544 Ok(data)
545 }
546
547 fn calculate_basic_statistics(data: &Array2<f64>) -> DeviceResult<BasicStatistics> {
549 let flat_data = data.iter().copied().collect::<Vec<f64>>();
550 let n = flat_data.len() as f64;
551
552 let mean = flat_data.iter().sum::<f64>() / n;
553 let variance = flat_data.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / (n - 1.0);
554 let std_deviation = variance.sqrt();
555
556 let skewness = flat_data
558 .iter()
559 .map(|x| ((x - mean) / std_deviation).powi(3))
560 .sum::<f64>()
561 / n;
562
563 let kurtosis = flat_data
564 .iter()
565 .map(|x| ((x - mean) / std_deviation).powi(4))
566 .sum::<f64>()
567 / n
568 - 3.0;
569
570 let mut sorted_data = flat_data;
572 sorted_data.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
573
574 let median = if sorted_data.len() % 2 == 0 {
575 f64::midpoint(
576 sorted_data[sorted_data.len() / 2 - 1],
577 sorted_data[sorted_data.len() / 2],
578 )
579 } else {
580 sorted_data[sorted_data.len() / 2]
581 };
582
583 let q1 = sorted_data[sorted_data.len() / 4];
584 let q3 = sorted_data[3 * sorted_data.len() / 4];
585 let iqr = q3 - q1;
586
587 Ok(BasicStatistics {
588 mean,
589 std_deviation,
590 variance,
591 skewness,
592 kurtosis,
593 median,
594 iqr,
595 })
596 }
597
598 fn perform_advanced_analysis(_data: &Array2<f64>) -> DeviceResult<AdvancedStatistics> {
600 let multivariate_analysis = MultivariateAnalysis {
602 pca_results: PCAResults {
603 components: Array2::eye(2),
604 explained_variance_ratio: Array1::from_vec(vec![0.8, 0.2]),
605 cumulative_variance: Array1::from_vec(vec![0.8, 1.0]),
606 n_components_retain: 2,
607 },
608 factor_analysis: FactorAnalysisResults {
609 loadings: Array2::eye(2),
610 communalities: Array1::from_vec(vec![0.8, 0.9]),
611 specific_variances: Array1::from_vec(vec![0.2, 0.1]),
612 factor_scores: Array2::zeros((10, 2)),
613 },
614 cluster_analysis: ClusterAnalysisResults {
615 cluster_labels: Array1::zeros(10),
616 cluster_centers: Array2::zeros((3, 2)),
617 silhouette_scores: Array1::from_vec(vec![0.8, 0.7, 0.9]),
618 inertia: 10.0,
619 },
620 discriminant_analysis: DiscriminantAnalysisResults {
621 linear_discriminants: Array2::eye(2),
622 classification_accuracy: 0.95,
623 cv_score: 0.93,
624 feature_importance: Array1::from_vec(vec![0.7, 0.3]),
625 },
626 };
627
628 Ok(AdvancedStatistics {
629 multivariate_analysis,
630 time_series_analysis: None,
631 bayesian_analysis: None,
632 non_parametric_analysis: NonParametricAnalysis {
633 rank_statistics: RankStatistics {
634 spearman_correlation: 0.8,
635 kendall_tau: 0.7,
636 mann_whitney_u: MannWhitneyTest {
637 u_statistic: 50.0,
638 p_value: 0.05,
639 effect_size: 0.5,
640 },
641 wilcoxon_test: WilcoxonTest {
642 statistic: 25.0,
643 p_value: 0.03,
644 effect_size: 0.6,
645 },
646 },
647 permutation_tests: PermutationTests {
648 p_values: HashMap::new(),
649 effect_sizes: HashMap::new(),
650 n_permutations: 1000,
651 },
652 bootstrap_analysis: BootstrapAnalysis {
653 confidence_intervals: HashMap::new(),
654 bias: HashMap::new(),
655 standard_errors: HashMap::new(),
656 n_bootstrap: 1000,
657 },
658 kde_analysis: KDEAnalysis {
659 density_estimates: Array1::zeros(100),
660 bandwidth: 0.1,
661 grid_points: Array1::zeros(100),
662 kernel_type: "gaussian".to_string(),
663 },
664 },
665 })
666 }
667
668 fn extract_ml_insights(_data: &Array2<f64>) -> DeviceResult<MLInsights> {
670 Ok(MLInsights {
671 feature_importance: Array1::from_vec(vec![0.5, 0.3, 0.2]),
672 anomaly_detection: AnomalyDetectionResults {
673 anomaly_scores: Array1::zeros(100),
674 threshold: 0.5,
675 anomalies: vec![5, 15, 87],
676 isolation_forest: IsolationForestResults {
677 scores: Array1::zeros(100),
678 path_lengths: Array1::zeros(100),
679 contamination: 0.1,
680 },
681 },
682 predictive_modeling: PredictiveModelingResults {
683 performance_metrics: HashMap::new(),
684 cv_scores: Array1::from_vec(vec![0.9, 0.85, 0.92, 0.88, 0.90]),
685 learning_curves: LearningCurves {
686 training_sizes: Array1::from_vec(vec![10.0, 25.0, 50.0, 75.0, 100.0]),
687 training_scores: Array1::from_vec(vec![0.8, 0.85, 0.9, 0.92, 0.93]),
688 validation_scores: Array1::from_vec(vec![0.75, 0.82, 0.87, 0.89, 0.90]),
689 },
690 interpretability: ModelInterpretability {
691 shap_values: Array2::zeros((100, 3)),
692 feature_attributions: Array1::from_vec(vec![0.4, 0.35, 0.25]),
693 partial_dependence: HashMap::new(),
694 },
695 },
696 dimensionality_reduction: DimensionalityReduction {
697 tsne_results: TSNEResults {
698 embedding: Array2::zeros((100, 2)),
699 perplexity: 30.0,
700 kl_divergence: 1.5,
701 },
702 umap_results: UMAPResults {
703 embedding: Array2::zeros((100, 2)),
704 n_neighbors: 15,
705 min_dist: 0.1,
706 },
707 manifold_learning: ManifoldLearningResults {
708 intrinsic_dimension: 2,
709 local_linearity: Array1::zeros(100),
710 embedding: Array2::zeros((100, 2)),
711 },
712 },
713 })
714 }
715
716 fn quantify_uncertainty(_data: &Array2<f64>) -> DeviceResult<UncertaintyAnalysis> {
718 let mut uncertainty_sources = HashMap::new();
719 uncertainty_sources.insert("measurement_noise".to_string(), 0.3);
720 uncertainty_sources.insert("model_uncertainty".to_string(), 0.2);
721 uncertainty_sources.insert("parameter_uncertainty".to_string(), 0.1);
722
723 Ok(UncertaintyAnalysis {
724 aleatory_uncertainty: 0.3,
725 epistemic_uncertainty: 0.2,
726 total_uncertainty: 0.5,
727 uncertainty_sources,
728 sensitivity_analysis: SensitivityAnalysis {
729 first_order_indices: Array1::from_vec(vec![0.4, 0.3, 0.2, 0.1]),
730 total_indices: Array1::from_vec(vec![0.5, 0.4, 0.25, 0.15]),
731 interaction_effects: Array2::zeros((4, 4)),
732 morris_screening: MorrisScreeningResults {
733 elementary_effects: Array2::zeros((100, 4)),
734 mu: Array1::from_vec(vec![0.2, 0.15, 0.1, 0.05]),
735 sigma: Array1::from_vec(vec![0.1, 0.08, 0.06, 0.04]),
736 mu_star: Array1::from_vec(vec![0.25, 0.18, 0.12, 0.08]),
737 },
738 },
739 })
740 }
741}