1use super::config::DistributionType;
7use crate::{DeviceError, DeviceResult};
8use scirs2_core::ndarray::{Array1, Array2, ArrayView2};
9use scirs2_core::random::prelude::*;
10use scirs2_stats::{corrcoef, kurtosis, mean, median, skew, spearmanr, std, var};
11use std::collections::HashMap;
12
13#[derive(Debug, Clone)]
15pub struct StatisticalNoiseModel {
16 pub distributions: HashMap<String, NoiseDistribution>,
18 pub moments: HashMap<String, MomentAnalysis>,
20 pub correlation_structure: CorrelationStructure,
22 pub outlier_analysis: OutlierAnalysis,
24 pub density_estimates: HashMap<String, DensityEstimate>,
26}
27
28#[derive(Debug, Clone)]
30pub struct NoiseDistribution {
31 pub distribution_type: DistributionType,
32 pub parameters: Vec<f64>,
33 pub goodness_of_fit: f64,
34 pub confidence_intervals: Vec<(f64, f64)>,
35 pub p_value: f64,
36}
37
38#[derive(Debug, Clone)]
40pub struct MomentAnalysis {
41 pub mean: f64,
42 pub variance: f64,
43 pub skewness: f64,
44 pub kurtosis: f64,
45 pub higher_moments: Vec<f64>,
46 pub confidence_intervals: HashMap<String, (f64, f64)>,
47}
48
49#[derive(Debug, Clone)]
51pub struct CorrelationStructure {
52 pub correlationmatrix: Array2<f64>,
53 pub partial_correlations: Array2<f64>,
54 pub rank_correlations: Array2<f64>,
55 pub time_varying_correlations: Option<Array2<f64>>,
56 pub correlation_networks: CorrelationNetworks,
57}
58
59#[derive(Debug, Clone)]
61pub struct CorrelationNetworks {
62 pub threshold_networks: HashMap<String, Array2<bool>>,
63 pub community_structure: Vec<Vec<usize>>,
64 pub centrality_measures: HashMap<usize, CentralityMeasures>,
65}
66
67#[derive(Debug, Clone)]
69pub struct CentralityMeasures {
70 pub betweenness: f64,
71 pub closeness: f64,
72 pub eigenvector: f64,
73 pub pagerank: f64,
74}
75
76#[derive(Debug, Clone)]
78pub struct OutlierAnalysis {
79 pub outlier_indices: Vec<usize>,
80 pub outlier_scores: Array1<f64>,
81 pub outlier_method: OutlierMethod,
82 pub contamination_rate: f64,
83}
84
85#[derive(Debug, Clone, PartialEq, Eq)]
87pub enum OutlierMethod {
88 IsolationForest,
89 LocalOutlierFactor,
90 OneClassSVM,
91 DBSCAN,
92 StatisticalTests,
93}
94
95#[derive(Debug, Clone)]
97pub struct DensityEstimate {
98 pub method: DensityMethod,
99 pub bandwidth: f64,
100 pub support: Array1<f64>,
101 pub density: Array1<f64>,
102 pub log_likelihood: f64,
103}
104
105#[derive(Debug, Clone, PartialEq, Eq)]
107pub enum DensityMethod {
108 KernelDensityEstimation,
109 HistogramEstimation,
110 GaussianMixture,
111 Splines,
112}
113
114pub struct StatisticalAnalyzer {
116 confidence_level: f64,
117 bootstrap_samples: usize,
118}
119
120impl StatisticalAnalyzer {
121 pub const fn new(confidence_level: f64, bootstrap_samples: usize) -> Self {
123 Self {
124 confidence_level,
125 bootstrap_samples,
126 }
127 }
128
129 pub fn fit_best_distribution(&self, data: &ArrayView2<f64>) -> DeviceResult<NoiseDistribution> {
131 let flat_data = data.iter().copied().collect::<Vec<f64>>();
132 let data_array = Array1::from_vec(flat_data);
133
134 let mut best_distribution = NoiseDistribution {
135 distribution_type: DistributionType::Normal,
136 parameters: vec![],
137 goodness_of_fit: 0.0,
138 confidence_intervals: vec![],
139 p_value: 0.0,
140 };
141
142 let mut best_score = f64::NEG_INFINITY;
143
144 let data_mean = mean(&data_array.view())
146 .map_err(|e| DeviceError::APIError(format!("Mean calculation error: {e:?}")))?;
147 let data_std = std(&data_array.view(), 1, None)
148 .map_err(|e| DeviceError::APIError(format!("Std calculation error: {e:?}")))?;
149
150 let (ks_stat, p_value) =
151 self.kolmogorov_smirnov_test(&data_array, &DistributionType::Normal)?;
152 let score = -ks_stat;
153
154 if score > best_score {
155 best_score = score;
156 best_distribution = NoiseDistribution {
157 distribution_type: DistributionType::Normal,
158 parameters: vec![data_mean, data_std],
159 goodness_of_fit: 1.0 - ks_stat,
160 confidence_intervals: vec![(
161 1.96f64.mul_add(-data_std, data_mean),
162 1.96f64.mul_add(data_std, data_mean),
163 )],
164 p_value,
165 };
166 }
167
168 if data_array.iter().all(|&x| x > 0.0) {
170 let (ks_stat, p_value) =
171 self.kolmogorov_smirnov_test(&data_array, &DistributionType::Gamma)?;
172 let score = -ks_stat;
173
174 if score > best_score {
175 let (shape, scale) = self.estimate_gamma_parameters(&data_array)?;
176 best_score = score;
177 best_distribution = NoiseDistribution {
178 distribution_type: DistributionType::Gamma,
179 parameters: vec![shape, scale],
180 goodness_of_fit: 1.0 - ks_stat,
181 confidence_intervals: vec![(0.0, shape * scale * 3.0)],
182 p_value,
183 };
184 }
185 }
186
187 if data_array.iter().all(|&x| x >= 0.0) {
189 let (ks_stat, p_value) =
190 self.kolmogorov_smirnov_test(&data_array, &DistributionType::Exponential)?;
191 let score = -ks_stat;
192
193 if score > best_score {
194 let rate = 1.0 / data_mean;
195 best_distribution = NoiseDistribution {
196 distribution_type: DistributionType::Exponential,
197 parameters: vec![rate],
198 goodness_of_fit: 1.0 - ks_stat,
199 confidence_intervals: vec![(0.0, -data_mean * (0.05_f64).ln())],
200 p_value,
201 };
202 }
203 }
204
205 Ok(best_distribution)
206 }
207
208 pub fn analyze_moments(&self, data: &ArrayView2<f64>) -> DeviceResult<MomentAnalysis> {
210 let flat_data = data.iter().copied().collect::<Vec<f64>>();
211 let data_array = Array1::from_vec(flat_data);
212
213 let data_mean = mean(&data_array.view())
214 .map_err(|e| DeviceError::APIError(format!("Mean calculation error: {e:?}")))?;
215
216 let data_var = var(&data_array.view(), 1, None)
217 .map_err(|e| DeviceError::APIError(format!("Variance calculation error: {e:?}")))?;
218
219 let data_skew = skew(&data_array.view(), true, None)
220 .map_err(|e| DeviceError::APIError(format!("Skewness calculation error: {e:?}")))?;
221
222 let data_kurt = kurtosis(&data_array.view(), true, true, None)
223 .map_err(|e| DeviceError::APIError(format!("Kurtosis calculation error: {e:?}")))?;
224
225 let higher_moments = self.calculate_higher_moments(&data_array, 6)?;
227
228 let confidence_intervals = self.bootstrap_moment_confidence(&data_array)?;
230
231 Ok(MomentAnalysis {
232 mean: data_mean,
233 variance: data_var,
234 skewness: data_skew,
235 kurtosis: data_kurt,
236 higher_moments,
237 confidence_intervals,
238 })
239 }
240
241 pub fn analyze_correlation_structure(
243 &self,
244 noise_measurements: &HashMap<String, Array2<f64>>,
245 ) -> DeviceResult<CorrelationStructure> {
246 if noise_measurements.len() < 2 {
247 return Ok(CorrelationStructure {
248 correlationmatrix: Array2::zeros((0, 0)),
249 partial_correlations: Array2::zeros((0, 0)),
250 rank_correlations: Array2::zeros((0, 0)),
251 time_varying_correlations: None,
252 correlation_networks: CorrelationNetworks {
253 threshold_networks: HashMap::new(),
254 community_structure: vec![],
255 centrality_measures: HashMap::new(),
256 },
257 });
258 }
259
260 let sources: Vec<String> = noise_measurements.keys().cloned().collect();
262 let n_sources = sources.len();
263 let max_samples = noise_measurements
264 .values()
265 .map(|data| data.nrows())
266 .max()
267 .unwrap_or(0);
268
269 let mut data_matrix = Array2::zeros((max_samples, n_sources));
270
271 for (i, source) in sources.iter().enumerate() {
272 if let Some(measurements) = noise_measurements.get(source) {
273 let flat_data: Vec<f64> = measurements.iter().copied().collect();
274 let n_samples = flat_data.len().min(max_samples);
275 for j in 0..n_samples {
276 data_matrix[[j, i]] = flat_data[j];
277 }
278 }
279 }
280
281 let correlationmatrix = corrcoef(&data_matrix.view(), "pearson")
283 .map_err(|e| DeviceError::APIError(format!("Correlation matrix error: {e:?}")))?;
284
285 let rank_correlations = self.compute_rank_correlations(&data_matrix)?;
287
288 let partial_correlations = self.compute_partial_correlations(&correlationmatrix)?;
290
291 let correlation_networks = self.build_correlation_networks(&correlationmatrix)?;
293
294 Ok(CorrelationStructure {
295 correlationmatrix,
296 partial_correlations,
297 rank_correlations,
298 time_varying_correlations: None,
299 correlation_networks,
300 })
301 }
302
303 pub fn detect_outliers(
305 &self,
306 noise_measurements: &HashMap<String, Array2<f64>>,
307 ) -> DeviceResult<OutlierAnalysis> {
308 let mut all_outliers = Vec::new();
309 let mut all_scores = Vec::new();
310
311 for measurements in noise_measurements.values() {
313 let flat_data: Vec<f64> = measurements.iter().copied().collect();
314
315 let data_array = Array1::from_vec(flat_data);
317 let (outliers, scores) = self.detect_outliers_statistical(&data_array)?;
318
319 all_outliers.extend(outliers);
320 all_scores.extend(scores);
321 }
322
323 let contamination_rate = all_outliers.len() as f64
324 / noise_measurements.values().map(|m| m.len()).sum::<usize>() as f64;
325
326 Ok(OutlierAnalysis {
327 outlier_indices: all_outliers,
328 outlier_scores: Array1::from_vec(all_scores),
329 outlier_method: OutlierMethod::StatisticalTests,
330 contamination_rate,
331 })
332 }
333
334 pub fn estimate_density(&self, data: &ArrayView2<f64>) -> DeviceResult<DensityEstimate> {
336 let flat_data = data.iter().copied().collect::<Vec<f64>>();
337 let data_array = Array1::from_vec(flat_data);
338
339 let n = data_array.len();
340 if n < 2 {
341 return Ok(DensityEstimate {
342 method: DensityMethod::KernelDensityEstimation,
343 bandwidth: 1.0,
344 support: Array1::zeros(0),
345 density: Array1::zeros(0),
346 log_likelihood: 0.0,
347 });
348 }
349
350 let data_std = std(&data_array.view(), 1, None)
352 .map_err(|e| DeviceError::APIError(format!("Std calculation error: {e:?}")))?;
353 let bandwidth = 1.06 * data_std * (n as f64).powf(-1.0 / 5.0);
354
355 let data_min = data_array.iter().fold(f64::INFINITY, |a, &b| a.min(b));
357 let data_max = data_array.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b));
358 let range = data_max - data_min;
359 let support_points = 100;
360
361 let support = Array1::from_shape_fn(support_points, |i| {
362 (i as f64 / (support_points - 1) as f64 * 1.4 * range)
363 .mul_add(1.0, 0.2f64.mul_add(-range, data_min))
364 });
365
366 let mut density = Array1::zeros(support_points);
368 for (i, &x) in support.iter().enumerate() {
369 let mut sum = 0.0;
370 for &data_point in &data_array {
371 let z = (x - data_point) / bandwidth;
372 sum += (-0.5 * z * z).exp();
373 }
374 density[i] = sum / (n as f64 * bandwidth * (2.0 * std::f64::consts::PI).sqrt());
375 }
376
377 let mut log_likelihood = 0.0;
379 for &data_point in &data_array {
380 let mut sum = 0.0;
381 for &other_point in &data_array {
382 if (data_point - other_point).abs() > 1e-10 {
383 let z = (data_point - other_point) / bandwidth;
384 sum += (-0.5 * z * z).exp();
385 }
386 }
387 if sum > 0.0 {
388 log_likelihood +=
389 (sum / ((n - 1) as f64 * bandwidth * (2.0 * std::f64::consts::PI).sqrt())).ln();
390 }
391 }
392
393 Ok(DensityEstimate {
394 method: DensityMethod::KernelDensityEstimation,
395 bandwidth,
396 support,
397 density,
398 log_likelihood,
399 })
400 }
401
402 fn kolmogorov_smirnov_test(
405 &self,
406 data: &Array1<f64>,
407 distribution_type: &DistributionType,
408 ) -> DeviceResult<(f64, f64)> {
409 let n = data.len() as f64;
410 let mut sorted_data = data.to_vec();
411 sorted_data.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
412
413 let data_mean = mean(&data.view())
414 .map_err(|e| DeviceError::APIError(format!("Mean calculation error: {e:?}")))?;
415 let data_std = std(&data.view(), 1, None)
416 .map_err(|e| DeviceError::APIError(format!("Std calculation error: {e:?}")))?;
417
418 let mut max_diff: f64 = 0.0;
419 for (i, &value) in sorted_data.iter().enumerate() {
420 let empirical_cdf = (i + 1) as f64 / n;
421
422 let theoretical_cdf = match distribution_type {
424 DistributionType::Normal => {
425 let z = (value - data_mean) / data_std;
426 0.5 * (1.0 + self.erf(z / 2.0_f64.sqrt()))
427 }
428 DistributionType::Exponential => {
429 if value >= 0.0 {
430 let rate = 1.0 / data_mean;
431 1.0 - (-rate * value).exp()
432 } else {
433 0.0
434 }
435 }
436 DistributionType::Gamma => {
437 let (shape, scale) = self.estimate_gamma_parameters(data)?;
439 if value >= 0.0 {
440 self.gamma_cdf_approx(value, shape, scale)
441 } else {
442 0.0
443 }
444 }
445 _ => 0.5, };
447
448 let diff = (empirical_cdf - theoretical_cdf).abs();
449 max_diff = max_diff.max(diff);
450 }
451
452 let p_value = if max_diff > 0.0 {
453 2.0 * (-2.0 * n * max_diff * max_diff).exp()
454 } else {
455 1.0
456 };
457
458 Ok((max_diff, p_value))
459 }
460
461 fn erf(&self, x: f64) -> f64 {
463 let a1 = 0.254_829_592;
465 let a2 = -0.284_496_736;
466 let a3 = 1.421_413_741;
467 let a4 = -1.453_152_027;
468 let a5 = 1.061_405_429;
469 let p = 0.327_591_1;
470
471 let sign = x.signum();
472 let x = x.abs();
473
474 let t = 1.0 / (1.0 + p * x);
475 let y = ((a5 * t + a4).mul_add(t, a3).mul_add(t, a2).mul_add(t, a1) * t)
476 .mul_add(-(-x * x).exp(), 1.0);
477
478 sign * y
479 }
480
481 fn gamma_cdf_approx(&self, x: f64, shape: f64, scale: f64) -> f64 {
483 if x <= 0.0 {
484 return 0.0;
485 }
486
487 let normalized_x = x / scale;
489
490 if shape > 10.0 {
492 let mean = shape * scale;
493 let variance = shape * scale * scale;
494 let std_dev = variance.sqrt();
495 let z = (x - mean) / std_dev;
496 0.5 * (1.0 + self.erf(z / 2.0_f64.sqrt()))
497 } else {
498 (normalized_x / (normalized_x + 1.0)).powf(shape)
500 }
501 }
502
503 fn estimate_gamma_parameters(&self, data: &Array1<f64>) -> DeviceResult<(f64, f64)> {
504 let data_mean = mean(&data.view())
505 .map_err(|e| DeviceError::APIError(format!("Mean calculation error: {e:?}")))?;
506 let data_var = var(&data.view(), 1, None)
507 .map_err(|e| DeviceError::APIError(format!("Variance calculation error: {e:?}")))?;
508
509 if data_var > 0.0 {
510 let scale = data_var / data_mean;
511 let shape = data_mean / scale;
512 Ok((shape, scale))
513 } else {
514 Ok((1.0, data_mean))
515 }
516 }
517
518 fn calculate_higher_moments(
519 &self,
520 data: &Array1<f64>,
521 max_order: usize,
522 ) -> DeviceResult<Vec<f64>> {
523 let data_mean = mean(&data.view())
524 .map_err(|e| DeviceError::APIError(format!("Mean calculation error: {e:?}")))?;
525
526 let mut moments = Vec::new();
527
528 for order in 3..=max_order {
529 let mut sum = 0.0;
530 for &value in data {
531 sum += (value - data_mean).powi(order as i32);
532 }
533 moments.push(sum / data.len() as f64);
534 }
535
536 Ok(moments)
537 }
538
539 fn bootstrap_moment_confidence(
540 &self,
541 data: &Array1<f64>,
542 ) -> DeviceResult<HashMap<String, (f64, f64)>> {
543 let mut confidence_intervals = HashMap::new();
544 let mut rng = thread_rng();
545 use scirs2_core::random::prelude::*;
546
547 let mut bootstrap_means = Vec::new();
549 for _ in 0..self.bootstrap_samples {
550 let sample: Vec<f64> = (0..data.len())
551 .map(|_| data[rng.gen_range(0..data.len())])
552 .collect();
553 let sample_array = Array1::from_vec(sample);
554 if let Ok(sample_mean) = mean(&sample_array.view()) {
555 bootstrap_means.push(sample_mean);
556 }
557 }
558
559 bootstrap_means.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
560 let alpha = 1.0 - self.confidence_level;
561 let lower_idx = (alpha / 2.0 * self.bootstrap_samples as f64) as usize;
562 let upper_idx = ((1.0 - alpha / 2.0) * self.bootstrap_samples as f64) as usize;
563
564 if !bootstrap_means.is_empty() && upper_idx < bootstrap_means.len() {
565 confidence_intervals.insert(
566 "mean".to_string(),
567 (bootstrap_means[lower_idx], bootstrap_means[upper_idx]),
568 );
569 }
570
571 Ok(confidence_intervals)
572 }
573
574 fn compute_rank_correlations(&self, data: &Array2<f64>) -> DeviceResult<Array2<f64>> {
575 let n_vars = data.ncols();
576 let mut rank_corr = Array2::zeros((n_vars, n_vars));
577
578 for i in 0..n_vars {
579 for j in 0..n_vars {
580 if i == j {
581 rank_corr[[i, j]] = 1.0;
582 } else {
583 let col1 = data.column(i);
584 let col2 = data.column(j);
585
586 if let Ok((rho, _)) = spearmanr(&col1, &col2, "two-sided") {
587 rank_corr[[i, j]] = rho;
588 }
589 }
590 }
591 }
592
593 Ok(rank_corr)
594 }
595
596 fn compute_partial_correlations(&self, corr_matrix: &Array2<f64>) -> DeviceResult<Array2<f64>> {
597 let n = corr_matrix.nrows();
598 let mut partial_corr = Array2::zeros((n, n));
599
600 for i in 0..n {
602 for j in 0..n {
603 if i == j {
604 partial_corr[[i, j]] = 1.0;
605 } else {
606 partial_corr[[i, j]] = corr_matrix[[i, j]];
608 }
609 }
610 }
611
612 Ok(partial_corr)
613 }
614
615 fn build_correlation_networks(
616 &self,
617 corr_matrix: &Array2<f64>,
618 ) -> DeviceResult<CorrelationNetworks> {
619 let n = corr_matrix.nrows();
620 let mut threshold_networks = HashMap::new();
621
622 for &threshold in &[0.3, 0.5, 0.7, 0.9] {
624 let mut network = Array2::from_elem((n, n), false);
625 for i in 0..n {
626 for j in 0..n {
627 if i != j && corr_matrix[[i, j]].abs() > threshold {
628 network[[i, j]] = true;
629 }
630 }
631 }
632 threshold_networks.insert(threshold.to_string(), network);
633 }
634
635 let community_structure = vec![vec![0, 1], vec![2, 3]];
637 let centrality_measures = HashMap::new();
638
639 Ok(CorrelationNetworks {
640 threshold_networks,
641 community_structure,
642 centrality_measures,
643 })
644 }
645
646 fn detect_outliers_statistical(
647 &self,
648 data: &Array1<f64>,
649 ) -> DeviceResult<(Vec<usize>, Vec<f64>)> {
650 let data_median = median(&data.view())
652 .map_err(|e| DeviceError::APIError(format!("Median calculation error: {e:?}")))?;
653
654 let deviations: Vec<f64> = data.iter().map(|&x| (x - data_median).abs()).collect();
656 let mad = median(&Array1::from_vec(deviations).view())
657 .map_err(|e| DeviceError::APIError(format!("MAD calculation error: {e:?}")))?;
658
659 let threshold = 3.5;
660 let mut outliers = Vec::new();
661 let mut scores = Vec::new();
662
663 for (i, &value) in data.iter().enumerate() {
664 let modified_z_score = if mad > 0.0 {
665 0.6745 * (value - data_median) / mad
666 } else {
667 0.0
668 };
669
670 scores.push(modified_z_score.abs());
671
672 if modified_z_score.abs() > threshold {
673 outliers.push(i);
674 }
675 }
676
677 Ok((outliers, scores))
678 }
679}