1use scirs2_core::ndarray::{Array1, Array2};
4use std::collections::HashMap;
5
6use super::{config::NoiseType, performance::DDPerformanceAnalysis, sequences::DDSequence};
7use crate::DeviceResult;
8
9#[cfg(feature = "scirs2")]
11use scirs2_stats::{mean, std};
12
13#[cfg(not(feature = "scirs2"))]
14use super::fallback_scirs2::{inv, mean, std, trace};
15use scirs2_core::random::prelude::*;
16
17#[derive(Debug, Clone)]
19pub struct DDStatisticalAnalysis {
20 pub basic_statistics: BasicStatistics,
22 pub advanced_statistics: AdvancedStatistics,
24 pub ml_insights: Option<MLInsights>,
26 pub uncertainty_analysis: UncertaintyAnalysis,
28}
29
30#[derive(Debug, Clone)]
32pub struct BasicStatistics {
33 pub mean: f64,
35 pub std_deviation: f64,
37 pub variance: f64,
39 pub skewness: f64,
41 pub kurtosis: f64,
43 pub median: f64,
45 pub iqr: f64,
47}
48
49#[derive(Debug, Clone)]
51pub struct AdvancedStatistics {
52 pub multivariate_analysis: MultivariateAnalysis,
54 pub time_series_analysis: Option<TimeSeriesAnalysis>,
56 pub bayesian_analysis: Option<BayesianAnalysis>,
58 pub non_parametric_analysis: NonParametricAnalysis,
60}
61
62#[derive(Debug, Clone)]
64pub struct MultivariateAnalysis {
65 pub pca_results: PCAResults,
67 pub factor_analysis: FactorAnalysisResults,
69 pub cluster_analysis: ClusterAnalysisResults,
71 pub discriminant_analysis: DiscriminantAnalysisResults,
73}
74
75#[derive(Debug, Clone)]
77pub struct PCAResults {
78 pub components: Array2<f64>,
80 pub explained_variance_ratio: Array1<f64>,
82 pub cumulative_variance: Array1<f64>,
84 pub n_components_retain: usize,
86}
87
88#[derive(Debug, Clone)]
90pub struct FactorAnalysisResults {
91 pub loadings: Array2<f64>,
93 pub communalities: Array1<f64>,
95 pub specific_variances: Array1<f64>,
97 pub factor_scores: Array2<f64>,
99}
100
101#[derive(Debug, Clone)]
103pub struct ClusterAnalysisResults {
104 pub cluster_labels: Array1<i32>,
106 pub cluster_centers: Array2<f64>,
108 pub silhouette_scores: Array1<f64>,
110 pub inertia: f64,
112}
113
114#[derive(Debug, Clone)]
116pub struct DiscriminantAnalysisResults {
117 pub linear_discriminants: Array2<f64>,
119 pub classification_accuracy: f64,
121 pub cv_score: f64,
123 pub feature_importance: Array1<f64>,
125}
126
127#[derive(Debug, Clone)]
129pub struct TimeSeriesAnalysis {
130 pub trend_analysis: TrendAnalysis,
132 pub seasonality_analysis: SeasonalityAnalysis,
134 pub stationarity_tests: StationarityTests,
136 pub autocorrelation: AutocorrelationAnalysis,
138}
139
140#[derive(Debug, Clone)]
142pub struct TrendAnalysis {
143 pub linear_slope: f64,
145 pub trend_p_value: f64,
147 pub r_squared: f64,
149 pub trend_direction: TrendDirection,
151}
152
153#[derive(Debug, Clone, PartialEq)]
155pub enum TrendDirection {
156 Increasing,
157 Decreasing,
158 Stable,
159 NonLinear,
160}
161
162#[derive(Debug, Clone)]
164pub struct SeasonalityAnalysis {
165 pub seasonal_period: Option<usize>,
167 pub seasonal_strength: f64,
169 pub seasonal_decomposition: SeasonalDecomposition,
171}
172
173#[derive(Debug, Clone)]
175pub struct SeasonalDecomposition {
176 pub trend: Array1<f64>,
178 pub seasonal: Array1<f64>,
180 pub residual: Array1<f64>,
182}
183
184#[derive(Debug, Clone)]
186pub struct StationarityTests {
187 pub adf_test: StationarityTest,
189 pub kpss_test: StationarityTest,
191 pub pp_test: StationarityTest,
193}
194
195#[derive(Debug, Clone)]
197pub struct StationarityTest {
198 pub statistic: f64,
200 pub p_value: f64,
202 pub critical_values: HashMap<String, f64>,
204 pub is_stationary: bool,
206}
207
208#[derive(Debug, Clone)]
210pub struct AutocorrelationAnalysis {
211 pub acf: Array1<f64>,
213 pub pacf: Array1<f64>,
215 pub significant_lags: Vec<usize>,
217 pub ljung_box_test: LjungBoxTest,
219}
220
221#[derive(Debug, Clone)]
223pub struct LjungBoxTest {
224 pub statistic: f64,
226 pub p_value: f64,
228 pub df: usize,
230 pub has_autocorrelation: bool,
232}
233
234#[derive(Debug, Clone)]
236pub struct BayesianAnalysis {
237 pub posterior_distributions: HashMap<String, PosteriorDistribution>,
239 pub credible_intervals: HashMap<String, (f64, f64)>,
241 pub bayes_factors: HashMap<String, f64>,
243 pub model_evidence: f64,
245}
246
247#[derive(Debug, Clone)]
249pub struct PosteriorDistribution {
250 pub samples: Array1<f64>,
252 pub mean: f64,
254 pub std: f64,
256 pub hdi: (f64, f64),
258}
259
260#[derive(Debug, Clone)]
262pub struct NonParametricAnalysis {
263 pub rank_statistics: RankStatistics,
265 pub permutation_tests: PermutationTests,
267 pub bootstrap_analysis: BootstrapAnalysis,
269 pub kde_analysis: KDEAnalysis,
271}
272
273#[derive(Debug, Clone)]
275pub struct RankStatistics {
276 pub spearman_correlation: f64,
278 pub kendall_tau: f64,
280 pub mann_whitney_u: MannWhitneyTest,
282 pub wilcoxon_test: WilcoxonTest,
284}
285
286#[derive(Debug, Clone)]
288pub struct MannWhitneyTest {
289 pub u_statistic: f64,
291 pub p_value: f64,
293 pub effect_size: f64,
295}
296
297#[derive(Debug, Clone)]
299pub struct WilcoxonTest {
300 pub statistic: f64,
302 pub p_value: f64,
304 pub effect_size: f64,
306}
307
308#[derive(Debug, Clone)]
310pub struct PermutationTests {
311 pub p_values: HashMap<String, f64>,
313 pub effect_sizes: HashMap<String, f64>,
315 pub n_permutations: usize,
317}
318
319#[derive(Debug, Clone)]
321pub struct BootstrapAnalysis {
322 pub confidence_intervals: HashMap<String, (f64, f64)>,
324 pub bias: HashMap<String, f64>,
326 pub standard_errors: HashMap<String, f64>,
328 pub n_bootstrap: usize,
330}
331
332#[derive(Debug, Clone)]
334pub struct KDEAnalysis {
335 pub density_estimates: Array1<f64>,
337 pub bandwidth: f64,
339 pub grid_points: Array1<f64>,
341 pub kernel_type: String,
343}
344
345#[derive(Debug, Clone)]
347pub struct MLInsights {
348 pub feature_importance: Array1<f64>,
350 pub anomaly_detection: AnomalyDetectionResults,
352 pub predictive_modeling: PredictiveModelingResults,
354 pub dimensionality_reduction: DimensionalityReduction,
356}
357
358#[derive(Debug, Clone)]
360pub struct AnomalyDetectionResults {
361 pub anomaly_scores: Array1<f64>,
363 pub threshold: f64,
365 pub anomalies: Vec<usize>,
367 pub isolation_forest: IsolationForestResults,
369}
370
371#[derive(Debug, Clone)]
373pub struct IsolationForestResults {
374 pub scores: Array1<f64>,
376 pub path_lengths: Array1<f64>,
378 pub contamination: f64,
380}
381
382#[derive(Debug, Clone)]
384pub struct PredictiveModelingResults {
385 pub performance_metrics: HashMap<String, f64>,
387 pub cv_scores: Array1<f64>,
389 pub learning_curves: LearningCurves,
391 pub interpretability: ModelInterpretability,
393}
394
395#[derive(Debug, Clone)]
397pub struct LearningCurves {
398 pub training_sizes: Array1<f64>,
400 pub training_scores: Array1<f64>,
402 pub validation_scores: Array1<f64>,
404}
405
406#[derive(Debug, Clone)]
408pub struct ModelInterpretability {
409 pub shap_values: Array2<f64>,
411 pub feature_attributions: Array1<f64>,
413 pub partial_dependence: HashMap<String, (Array1<f64>, Array1<f64>)>,
415}
416
417#[derive(Debug, Clone)]
419pub struct DimensionalityReduction {
420 pub tsne_results: TSNEResults,
422 pub umap_results: UMAPResults,
424 pub manifold_learning: ManifoldLearningResults,
426}
427
428#[derive(Debug, Clone)]
430pub struct TSNEResults {
431 pub embedding: Array2<f64>,
433 pub perplexity: f64,
435 pub kl_divergence: f64,
437}
438
439#[derive(Debug, Clone)]
441pub struct UMAPResults {
442 pub embedding: Array2<f64>,
444 pub n_neighbors: usize,
446 pub min_dist: f64,
448}
449
450#[derive(Debug, Clone)]
452pub struct ManifoldLearningResults {
453 pub intrinsic_dimension: usize,
455 pub local_linearity: Array1<f64>,
457 pub embedding: Array2<f64>,
459}
460
461#[derive(Debug, Clone)]
463pub struct UncertaintyAnalysis {
464 pub aleatory_uncertainty: f64,
466 pub epistemic_uncertainty: f64,
468 pub total_uncertainty: f64,
470 pub uncertainty_sources: HashMap<String, f64>,
472 pub sensitivity_analysis: SensitivityAnalysis,
474}
475
476#[derive(Debug, Clone)]
478pub struct SensitivityAnalysis {
479 pub first_order_indices: Array1<f64>,
481 pub total_indices: Array1<f64>,
483 pub interaction_effects: Array2<f64>,
485 pub morris_screening: MorrisScreeningResults,
487}
488
489#[derive(Debug, Clone)]
491pub struct MorrisScreeningResults {
492 pub elementary_effects: Array2<f64>,
494 pub mu: Array1<f64>,
496 pub sigma: Array1<f64>,
498 pub mu_star: Array1<f64>,
500}
501
502pub struct DDStatisticalAnalyzer;
504
505impl DDStatisticalAnalyzer {
506 pub fn perform_statistical_analysis(
508 sequence: &DDSequence,
509 performance_analysis: &DDPerformanceAnalysis,
510 ) -> DeviceResult<DDStatisticalAnalysis> {
511 let sample_data = Self::generate_sample_data(sequence, performance_analysis)?;
513
514 let basic_statistics = Self::calculate_basic_statistics(&sample_data)?;
515 let advanced_statistics = Self::perform_advanced_analysis(&sample_data)?;
516 let ml_insights = Self::extract_ml_insights(&sample_data)?;
517 let uncertainty_analysis = Self::quantify_uncertainty(&sample_data)?;
518
519 Ok(DDStatisticalAnalysis {
520 basic_statistics,
521 advanced_statistics,
522 ml_insights: Some(ml_insights),
523 uncertainty_analysis,
524 })
525 }
526
527 fn generate_sample_data(
529 _sequence: &DDSequence,
530 performance_analysis: &DDPerformanceAnalysis,
531 ) -> DeviceResult<Array2<f64>> {
532 let n_samples = 100;
534 let n_features = performance_analysis.metrics.len();
535 let mut data = Array2::zeros((n_samples, n_features));
536
537 for i in 0..n_samples {
539 for j in 0..n_features {
540 data[[i, j]] = thread_rng().gen::<f64>();
541 }
542 }
543
544 Ok(data)
545 }
546
547 fn calculate_basic_statistics(data: &Array2<f64>) -> DeviceResult<BasicStatistics> {
549 let flat_data = data.iter().cloned().collect::<Vec<f64>>();
550 let n = flat_data.len() as f64;
551
552 let mean = flat_data.iter().sum::<f64>() / n;
553 let variance = flat_data.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / (n - 1.0);
554 let std_deviation = variance.sqrt();
555
556 let skewness = flat_data
558 .iter()
559 .map(|x| ((x - mean) / std_deviation).powi(3))
560 .sum::<f64>()
561 / n;
562
563 let kurtosis = flat_data
564 .iter()
565 .map(|x| ((x - mean) / std_deviation).powi(4))
566 .sum::<f64>()
567 / n
568 - 3.0;
569
570 let mut sorted_data = flat_data.clone();
572 sorted_data.sort_by(|a, b| a.partial_cmp(b).unwrap());
573
574 let median = if sorted_data.len() % 2 == 0 {
575 (sorted_data[sorted_data.len() / 2 - 1] + sorted_data[sorted_data.len() / 2]) / 2.0
576 } else {
577 sorted_data[sorted_data.len() / 2]
578 };
579
580 let q1 = sorted_data[sorted_data.len() / 4];
581 let q3 = sorted_data[3 * sorted_data.len() / 4];
582 let iqr = q3 - q1;
583
584 Ok(BasicStatistics {
585 mean,
586 std_deviation,
587 variance,
588 skewness,
589 kurtosis,
590 median,
591 iqr,
592 })
593 }
594
595 fn perform_advanced_analysis(_data: &Array2<f64>) -> DeviceResult<AdvancedStatistics> {
597 let multivariate_analysis = MultivariateAnalysis {
599 pca_results: PCAResults {
600 components: Array2::eye(2),
601 explained_variance_ratio: Array1::from_vec(vec![0.8, 0.2]),
602 cumulative_variance: Array1::from_vec(vec![0.8, 1.0]),
603 n_components_retain: 2,
604 },
605 factor_analysis: FactorAnalysisResults {
606 loadings: Array2::eye(2),
607 communalities: Array1::from_vec(vec![0.8, 0.9]),
608 specific_variances: Array1::from_vec(vec![0.2, 0.1]),
609 factor_scores: Array2::zeros((10, 2)),
610 },
611 cluster_analysis: ClusterAnalysisResults {
612 cluster_labels: Array1::zeros(10),
613 cluster_centers: Array2::zeros((3, 2)),
614 silhouette_scores: Array1::from_vec(vec![0.8, 0.7, 0.9]),
615 inertia: 10.0,
616 },
617 discriminant_analysis: DiscriminantAnalysisResults {
618 linear_discriminants: Array2::eye(2),
619 classification_accuracy: 0.95,
620 cv_score: 0.93,
621 feature_importance: Array1::from_vec(vec![0.7, 0.3]),
622 },
623 };
624
625 Ok(AdvancedStatistics {
626 multivariate_analysis,
627 time_series_analysis: None,
628 bayesian_analysis: None,
629 non_parametric_analysis: NonParametricAnalysis {
630 rank_statistics: RankStatistics {
631 spearman_correlation: 0.8,
632 kendall_tau: 0.7,
633 mann_whitney_u: MannWhitneyTest {
634 u_statistic: 50.0,
635 p_value: 0.05,
636 effect_size: 0.5,
637 },
638 wilcoxon_test: WilcoxonTest {
639 statistic: 25.0,
640 p_value: 0.03,
641 effect_size: 0.6,
642 },
643 },
644 permutation_tests: PermutationTests {
645 p_values: HashMap::new(),
646 effect_sizes: HashMap::new(),
647 n_permutations: 1000,
648 },
649 bootstrap_analysis: BootstrapAnalysis {
650 confidence_intervals: HashMap::new(),
651 bias: HashMap::new(),
652 standard_errors: HashMap::new(),
653 n_bootstrap: 1000,
654 },
655 kde_analysis: KDEAnalysis {
656 density_estimates: Array1::zeros(100),
657 bandwidth: 0.1,
658 grid_points: Array1::zeros(100),
659 kernel_type: "gaussian".to_string(),
660 },
661 },
662 })
663 }
664
665 fn extract_ml_insights(_data: &Array2<f64>) -> DeviceResult<MLInsights> {
667 Ok(MLInsights {
668 feature_importance: Array1::from_vec(vec![0.5, 0.3, 0.2]),
669 anomaly_detection: AnomalyDetectionResults {
670 anomaly_scores: Array1::zeros(100),
671 threshold: 0.5,
672 anomalies: vec![5, 15, 87],
673 isolation_forest: IsolationForestResults {
674 scores: Array1::zeros(100),
675 path_lengths: Array1::zeros(100),
676 contamination: 0.1,
677 },
678 },
679 predictive_modeling: PredictiveModelingResults {
680 performance_metrics: HashMap::new(),
681 cv_scores: Array1::from_vec(vec![0.9, 0.85, 0.92, 0.88, 0.90]),
682 learning_curves: LearningCurves {
683 training_sizes: Array1::from_vec(vec![10.0, 25.0, 50.0, 75.0, 100.0]),
684 training_scores: Array1::from_vec(vec![0.8, 0.85, 0.9, 0.92, 0.93]),
685 validation_scores: Array1::from_vec(vec![0.75, 0.82, 0.87, 0.89, 0.90]),
686 },
687 interpretability: ModelInterpretability {
688 shap_values: Array2::zeros((100, 3)),
689 feature_attributions: Array1::from_vec(vec![0.4, 0.35, 0.25]),
690 partial_dependence: HashMap::new(),
691 },
692 },
693 dimensionality_reduction: DimensionalityReduction {
694 tsne_results: TSNEResults {
695 embedding: Array2::zeros((100, 2)),
696 perplexity: 30.0,
697 kl_divergence: 1.5,
698 },
699 umap_results: UMAPResults {
700 embedding: Array2::zeros((100, 2)),
701 n_neighbors: 15,
702 min_dist: 0.1,
703 },
704 manifold_learning: ManifoldLearningResults {
705 intrinsic_dimension: 2,
706 local_linearity: Array1::zeros(100),
707 embedding: Array2::zeros((100, 2)),
708 },
709 },
710 })
711 }
712
713 fn quantify_uncertainty(_data: &Array2<f64>) -> DeviceResult<UncertaintyAnalysis> {
715 let mut uncertainty_sources = HashMap::new();
716 uncertainty_sources.insert("measurement_noise".to_string(), 0.3);
717 uncertainty_sources.insert("model_uncertainty".to_string(), 0.2);
718 uncertainty_sources.insert("parameter_uncertainty".to_string(), 0.1);
719
720 Ok(UncertaintyAnalysis {
721 aleatory_uncertainty: 0.3,
722 epistemic_uncertainty: 0.2,
723 total_uncertainty: 0.5,
724 uncertainty_sources,
725 sensitivity_analysis: SensitivityAnalysis {
726 first_order_indices: Array1::from_vec(vec![0.4, 0.3, 0.2, 0.1]),
727 total_indices: Array1::from_vec(vec![0.5, 0.4, 0.25, 0.15]),
728 interaction_effects: Array2::zeros((4, 4)),
729 morris_screening: MorrisScreeningResults {
730 elementary_effects: Array2::zeros((100, 4)),
731 mu: Array1::from_vec(vec![0.2, 0.15, 0.1, 0.05]),
732 sigma: Array1::from_vec(vec![0.1, 0.08, 0.06, 0.04]),
733 mu_star: Array1::from_vec(vec![0.25, 0.18, 0.12, 0.08]),
734 },
735 },
736 })
737 }
738}