quantrs2_device/noise_modeling_scirs2/
statistical.rs

1//! Statistical noise analysis using SciRS2
2//!
3//! This module provides comprehensive statistical characterization of quantum noise,
4//! including distributional analysis, moment analysis, correlation analysis, and outlier detection.
5
6use super::config::DistributionType;
7use crate::{DeviceError, DeviceResult};
8use scirs2_core::ndarray::{Array1, Array2, ArrayView2};
9use scirs2_core::random::prelude::*;
10use scirs2_stats::{corrcoef, kurtosis, mean, median, skew, spearmanr, std, var};
11use std::collections::HashMap;
12
13/// Statistical noise characterization
14#[derive(Debug, Clone)]
15pub struct StatisticalNoiseModel {
16    /// Distributional analysis for each noise source
17    pub distributions: HashMap<String, NoiseDistribution>,
18    /// Higher-order moment analysis
19    pub moments: HashMap<String, MomentAnalysis>,
20    /// Correlation structure between noise sources
21    pub correlation_structure: CorrelationStructure,
22    /// Outlier detection and analysis
23    pub outlier_analysis: OutlierAnalysis,
24    /// Non-parametric density estimates
25    pub density_estimates: HashMap<String, DensityEstimate>,
26}
27
28/// Noise distribution analysis result
29#[derive(Debug, Clone)]
30pub struct NoiseDistribution {
31    pub distribution_type: DistributionType,
32    pub parameters: Vec<f64>,
33    pub goodness_of_fit: f64,
34    pub confidence_intervals: Vec<(f64, f64)>,
35    pub p_value: f64,
36}
37
38/// Statistical moment analysis
39#[derive(Debug, Clone)]
40pub struct MomentAnalysis {
41    pub mean: f64,
42    pub variance: f64,
43    pub skewness: f64,
44    pub kurtosis: f64,
45    pub higher_moments: Vec<f64>,
46    pub confidence_intervals: HashMap<String, (f64, f64)>,
47}
48
49/// Correlation structure between noise sources
50#[derive(Debug, Clone)]
51pub struct CorrelationStructure {
52    pub correlationmatrix: Array2<f64>,
53    pub partial_correlations: Array2<f64>,
54    pub rank_correlations: Array2<f64>,
55    pub time_varying_correlations: Option<Array2<f64>>,
56    pub correlation_networks: CorrelationNetworks,
57}
58
59/// Correlation network analysis
60#[derive(Debug, Clone)]
61pub struct CorrelationNetworks {
62    pub threshold_networks: HashMap<String, Array2<bool>>,
63    pub community_structure: Vec<Vec<usize>>,
64    pub centrality_measures: HashMap<usize, CentralityMeasures>,
65}
66
67/// Network centrality measures
68#[derive(Debug, Clone)]
69pub struct CentralityMeasures {
70    pub betweenness: f64,
71    pub closeness: f64,
72    pub eigenvector: f64,
73    pub pagerank: f64,
74}
75
76/// Outlier detection results
77#[derive(Debug, Clone)]
78pub struct OutlierAnalysis {
79    pub outlier_indices: Vec<usize>,
80    pub outlier_scores: Array1<f64>,
81    pub outlier_method: OutlierMethod,
82    pub contamination_rate: f64,
83}
84
85/// Outlier detection methods
86#[derive(Debug, Clone, PartialEq, Eq)]
87pub enum OutlierMethod {
88    IsolationForest,
89    LocalOutlierFactor,
90    OneClassSVM,
91    DBSCAN,
92    StatisticalTests,
93}
94
95/// Non-parametric density estimation
96#[derive(Debug, Clone)]
97pub struct DensityEstimate {
98    pub method: DensityMethod,
99    pub bandwidth: f64,
100    pub support: Array1<f64>,
101    pub density: Array1<f64>,
102    pub log_likelihood: f64,
103}
104
105/// Density estimation methods
106#[derive(Debug, Clone, PartialEq, Eq)]
107pub enum DensityMethod {
108    KernelDensityEstimation,
109    HistogramEstimation,
110    GaussianMixture,
111    Splines,
112}
113
114/// Statistical analysis engine
115pub struct StatisticalAnalyzer {
116    confidence_level: f64,
117    bootstrap_samples: usize,
118}
119
120impl StatisticalAnalyzer {
121    /// Create a new statistical analyzer
122    pub const fn new(confidence_level: f64, bootstrap_samples: usize) -> Self {
123        Self {
124            confidence_level,
125            bootstrap_samples,
126        }
127    }
128
129    /// Fit the best distribution to data using maximum likelihood and goodness-of-fit tests
130    pub fn fit_best_distribution(&self, data: &ArrayView2<f64>) -> DeviceResult<NoiseDistribution> {
131        let flat_data = data.iter().copied().collect::<Vec<f64>>();
132        let data_array = Array1::from_vec(flat_data);
133
134        let mut best_distribution = NoiseDistribution {
135            distribution_type: DistributionType::Normal,
136            parameters: vec![],
137            goodness_of_fit: 0.0,
138            confidence_intervals: vec![],
139            p_value: 0.0,
140        };
141
142        let mut best_score = f64::NEG_INFINITY;
143
144        // Test normal distribution
145        let data_mean = mean(&data_array.view())
146            .map_err(|e| DeviceError::APIError(format!("Mean calculation error: {e:?}")))?;
147        let data_std = std(&data_array.view(), 1, None)
148            .map_err(|e| DeviceError::APIError(format!("Std calculation error: {e:?}")))?;
149
150        let (ks_stat, p_value) =
151            self.kolmogorov_smirnov_test(&data_array, &DistributionType::Normal)?;
152        let score = -ks_stat;
153
154        if score > best_score {
155            best_score = score;
156            best_distribution = NoiseDistribution {
157                distribution_type: DistributionType::Normal,
158                parameters: vec![data_mean, data_std],
159                goodness_of_fit: 1.0 - ks_stat,
160                confidence_intervals: vec![(
161                    1.96f64.mul_add(-data_std, data_mean),
162                    1.96f64.mul_add(data_std, data_mean),
163                )],
164                p_value,
165            };
166        }
167
168        // Test gamma distribution for positive data
169        if data_array.iter().all(|&x| x > 0.0) {
170            let (ks_stat, p_value) =
171                self.kolmogorov_smirnov_test(&data_array, &DistributionType::Gamma)?;
172            let score = -ks_stat;
173
174            if score > best_score {
175                let (shape, scale) = self.estimate_gamma_parameters(&data_array)?;
176                best_score = score;
177                best_distribution = NoiseDistribution {
178                    distribution_type: DistributionType::Gamma,
179                    parameters: vec![shape, scale],
180                    goodness_of_fit: 1.0 - ks_stat,
181                    confidence_intervals: vec![(0.0, shape * scale * 3.0)],
182                    p_value,
183                };
184            }
185        }
186
187        // Test exponential distribution for non-negative data
188        if data_array.iter().all(|&x| x >= 0.0) {
189            let (ks_stat, p_value) =
190                self.kolmogorov_smirnov_test(&data_array, &DistributionType::Exponential)?;
191            let score = -ks_stat;
192
193            if score > best_score {
194                let rate = 1.0 / data_mean;
195                best_distribution = NoiseDistribution {
196                    distribution_type: DistributionType::Exponential,
197                    parameters: vec![rate],
198                    goodness_of_fit: 1.0 - ks_stat,
199                    confidence_intervals: vec![(0.0, -data_mean * (0.05_f64).ln())],
200                    p_value,
201                };
202            }
203        }
204
205        Ok(best_distribution)
206    }
207
208    /// Analyze statistical moments of the data
209    pub fn analyze_moments(&self, data: &ArrayView2<f64>) -> DeviceResult<MomentAnalysis> {
210        let flat_data = data.iter().copied().collect::<Vec<f64>>();
211        let data_array = Array1::from_vec(flat_data);
212
213        let data_mean = mean(&data_array.view())
214            .map_err(|e| DeviceError::APIError(format!("Mean calculation error: {e:?}")))?;
215
216        let data_var = var(&data_array.view(), 1, None)
217            .map_err(|e| DeviceError::APIError(format!("Variance calculation error: {e:?}")))?;
218
219        let data_skew = skew(&data_array.view(), true, None)
220            .map_err(|e| DeviceError::APIError(format!("Skewness calculation error: {e:?}")))?;
221
222        let data_kurt = kurtosis(&data_array.view(), true, true, None)
223            .map_err(|e| DeviceError::APIError(format!("Kurtosis calculation error: {e:?}")))?;
224
225        // Calculate higher moments
226        let higher_moments = self.calculate_higher_moments(&data_array, 6)?;
227
228        // Calculate confidence intervals using bootstrap
229        let confidence_intervals = self.bootstrap_moment_confidence(&data_array)?;
230
231        Ok(MomentAnalysis {
232            mean: data_mean,
233            variance: data_var,
234            skewness: data_skew,
235            kurtosis: data_kurt,
236            higher_moments,
237            confidence_intervals,
238        })
239    }
240
241    /// Analyze correlation structure between multiple noise sources
242    pub fn analyze_correlation_structure(
243        &self,
244        noise_measurements: &HashMap<String, Array2<f64>>,
245    ) -> DeviceResult<CorrelationStructure> {
246        if noise_measurements.len() < 2 {
247            return Ok(CorrelationStructure {
248                correlationmatrix: Array2::zeros((0, 0)),
249                partial_correlations: Array2::zeros((0, 0)),
250                rank_correlations: Array2::zeros((0, 0)),
251                time_varying_correlations: None,
252                correlation_networks: CorrelationNetworks {
253                    threshold_networks: HashMap::new(),
254                    community_structure: vec![],
255                    centrality_measures: HashMap::new(),
256                },
257            });
258        }
259
260        // Collect all data into a matrix
261        let sources: Vec<String> = noise_measurements.keys().cloned().collect();
262        let n_sources = sources.len();
263        let max_samples = noise_measurements
264            .values()
265            .map(|data| data.nrows())
266            .max()
267            .unwrap_or(0);
268
269        let mut data_matrix = Array2::zeros((max_samples, n_sources));
270
271        for (i, source) in sources.iter().enumerate() {
272            if let Some(measurements) = noise_measurements.get(source) {
273                let flat_data: Vec<f64> = measurements.iter().copied().collect();
274                let n_samples = flat_data.len().min(max_samples);
275                for j in 0..n_samples {
276                    data_matrix[[j, i]] = flat_data[j];
277                }
278            }
279        }
280
281        // Compute correlation matrices
282        let correlationmatrix = corrcoef(&data_matrix.view(), "pearson")
283            .map_err(|e| DeviceError::APIError(format!("Correlation matrix error: {e:?}")))?;
284
285        // Compute rank correlations
286        let rank_correlations = self.compute_rank_correlations(&data_matrix)?;
287
288        // Compute partial correlations
289        let partial_correlations = self.compute_partial_correlations(&correlationmatrix)?;
290
291        // Build correlation networks
292        let correlation_networks = self.build_correlation_networks(&correlationmatrix)?;
293
294        Ok(CorrelationStructure {
295            correlationmatrix,
296            partial_correlations,
297            rank_correlations,
298            time_varying_correlations: None,
299            correlation_networks,
300        })
301    }
302
303    /// Detect outliers in the data using multiple methods
304    pub fn detect_outliers(
305        &self,
306        noise_measurements: &HashMap<String, Array2<f64>>,
307    ) -> DeviceResult<OutlierAnalysis> {
308        let mut all_outliers = Vec::new();
309        let mut all_scores = Vec::new();
310
311        // Collect all data points
312        for measurements in noise_measurements.values() {
313            let flat_data: Vec<f64> = measurements.iter().copied().collect();
314
315            // Use statistical method (modified Z-score)
316            let data_array = Array1::from_vec(flat_data);
317            let (outliers, scores) = self.detect_outliers_statistical(&data_array)?;
318
319            all_outliers.extend(outliers);
320            all_scores.extend(scores);
321        }
322
323        let contamination_rate = all_outliers.len() as f64
324            / noise_measurements.values().map(|m| m.len()).sum::<usize>() as f64;
325
326        Ok(OutlierAnalysis {
327            outlier_indices: all_outliers,
328            outlier_scores: Array1::from_vec(all_scores),
329            outlier_method: OutlierMethod::StatisticalTests,
330            contamination_rate,
331        })
332    }
333
334    /// Estimate probability density non-parametrically using kernel density estimation
335    pub fn estimate_density(&self, data: &ArrayView2<f64>) -> DeviceResult<DensityEstimate> {
336        let flat_data = data.iter().copied().collect::<Vec<f64>>();
337        let data_array = Array1::from_vec(flat_data);
338
339        let n = data_array.len();
340        if n < 2 {
341            return Ok(DensityEstimate {
342                method: DensityMethod::KernelDensityEstimation,
343                bandwidth: 1.0,
344                support: Array1::zeros(0),
345                density: Array1::zeros(0),
346                log_likelihood: 0.0,
347            });
348        }
349
350        // Scott's rule for bandwidth selection
351        let data_std = std(&data_array.view(), 1, None)
352            .map_err(|e| DeviceError::APIError(format!("Std calculation error: {e:?}")))?;
353        let bandwidth = 1.06 * data_std * (n as f64).powf(-1.0 / 5.0);
354
355        // Create support points
356        let data_min = data_array.iter().fold(f64::INFINITY, |a, &b| a.min(b));
357        let data_max = data_array.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b));
358        let range = data_max - data_min;
359        let support_points = 100;
360
361        let support = Array1::from_shape_fn(support_points, |i| {
362            (i as f64 / (support_points - 1) as f64 * 1.4 * range)
363                .mul_add(1.0, 0.2f64.mul_add(-range, data_min))
364        });
365
366        // Compute density using Gaussian kernel
367        let mut density = Array1::zeros(support_points);
368        for (i, &x) in support.iter().enumerate() {
369            let mut sum = 0.0;
370            for &data_point in &data_array {
371                let z = (x - data_point) / bandwidth;
372                sum += (-0.5 * z * z).exp();
373            }
374            density[i] = sum / (n as f64 * bandwidth * (2.0 * std::f64::consts::PI).sqrt());
375        }
376
377        // Compute log-likelihood for cross-validation
378        let mut log_likelihood = 0.0;
379        for &data_point in &data_array {
380            let mut sum = 0.0;
381            for &other_point in &data_array {
382                if (data_point - other_point).abs() > 1e-10 {
383                    let z = (data_point - other_point) / bandwidth;
384                    sum += (-0.5 * z * z).exp();
385                }
386            }
387            if sum > 0.0 {
388                log_likelihood +=
389                    (sum / ((n - 1) as f64 * bandwidth * (2.0 * std::f64::consts::PI).sqrt())).ln();
390            }
391        }
392
393        Ok(DensityEstimate {
394            method: DensityMethod::KernelDensityEstimation,
395            bandwidth,
396            support,
397            density,
398            log_likelihood,
399        })
400    }
401
402    // Helper methods
403
404    fn kolmogorov_smirnov_test(
405        &self,
406        data: &Array1<f64>,
407        distribution_type: &DistributionType,
408    ) -> DeviceResult<(f64, f64)> {
409        let n = data.len() as f64;
410        let mut sorted_data = data.to_vec();
411        sorted_data.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
412
413        let data_mean = mean(&data.view())
414            .map_err(|e| DeviceError::APIError(format!("Mean calculation error: {e:?}")))?;
415        let data_std = std(&data.view(), 1, None)
416            .map_err(|e| DeviceError::APIError(format!("Std calculation error: {e:?}")))?;
417
418        let mut max_diff: f64 = 0.0;
419        for (i, &value) in sorted_data.iter().enumerate() {
420            let empirical_cdf = (i + 1) as f64 / n;
421
422            // Calculate theoretical CDF based on distribution type
423            let theoretical_cdf = match distribution_type {
424                DistributionType::Normal => {
425                    let z = (value - data_mean) / data_std;
426                    0.5 * (1.0 + self.erf(z / 2.0_f64.sqrt()))
427                }
428                DistributionType::Exponential => {
429                    if value >= 0.0 {
430                        let rate = 1.0 / data_mean;
431                        1.0 - (-rate * value).exp()
432                    } else {
433                        0.0
434                    }
435                }
436                DistributionType::Gamma => {
437                    // Simplified gamma CDF approximation
438                    let (shape, scale) = self.estimate_gamma_parameters(data)?;
439                    if value >= 0.0 {
440                        self.gamma_cdf_approx(value, shape, scale)
441                    } else {
442                        0.0
443                    }
444                }
445                _ => 0.5, // Default fallback
446            };
447
448            let diff = (empirical_cdf - theoretical_cdf).abs();
449            max_diff = max_diff.max(diff);
450        }
451
452        let p_value = if max_diff > 0.0 {
453            2.0 * (-2.0 * n * max_diff * max_diff).exp()
454        } else {
455            1.0
456        };
457
458        Ok((max_diff, p_value))
459    }
460
461    /// Error function approximation
462    fn erf(&self, x: f64) -> f64 {
463        // Abramowitz and Stegun approximation
464        let a1 = 0.254_829_592;
465        let a2 = -0.284_496_736;
466        let a3 = 1.421_413_741;
467        let a4 = -1.453_152_027;
468        let a5 = 1.061_405_429;
469        let p = 0.327_591_1;
470
471        let sign = x.signum();
472        let x = x.abs();
473
474        let t = 1.0 / (1.0 + p * x);
475        let y = ((a5 * t + a4).mul_add(t, a3).mul_add(t, a2).mul_add(t, a1) * t)
476            .mul_add(-(-x * x).exp(), 1.0);
477
478        sign * y
479    }
480
481    /// Gamma CDF approximation
482    fn gamma_cdf_approx(&self, x: f64, shape: f64, scale: f64) -> f64 {
483        if x <= 0.0 {
484            return 0.0;
485        }
486
487        // Simple approximation using incomplete gamma function
488        let normalized_x = x / scale;
489
490        // For simplicity, use normal approximation for large shape parameters
491        if shape > 10.0 {
492            let mean = shape * scale;
493            let variance = shape * scale * scale;
494            let std_dev = variance.sqrt();
495            let z = (x - mean) / std_dev;
496            0.5 * (1.0 + self.erf(z / 2.0_f64.sqrt()))
497        } else {
498            // Simple approximation
499            (normalized_x / (normalized_x + 1.0)).powf(shape)
500        }
501    }
502
503    fn estimate_gamma_parameters(&self, data: &Array1<f64>) -> DeviceResult<(f64, f64)> {
504        let data_mean = mean(&data.view())
505            .map_err(|e| DeviceError::APIError(format!("Mean calculation error: {e:?}")))?;
506        let data_var = var(&data.view(), 1, None)
507            .map_err(|e| DeviceError::APIError(format!("Variance calculation error: {e:?}")))?;
508
509        if data_var > 0.0 {
510            let scale = data_var / data_mean;
511            let shape = data_mean / scale;
512            Ok((shape, scale))
513        } else {
514            Ok((1.0, data_mean))
515        }
516    }
517
518    fn calculate_higher_moments(
519        &self,
520        data: &Array1<f64>,
521        max_order: usize,
522    ) -> DeviceResult<Vec<f64>> {
523        let data_mean = mean(&data.view())
524            .map_err(|e| DeviceError::APIError(format!("Mean calculation error: {e:?}")))?;
525
526        let mut moments = Vec::new();
527
528        for order in 3..=max_order {
529            let mut sum = 0.0;
530            for &value in data {
531                sum += (value - data_mean).powi(order as i32);
532            }
533            moments.push(sum / data.len() as f64);
534        }
535
536        Ok(moments)
537    }
538
539    fn bootstrap_moment_confidence(
540        &self,
541        data: &Array1<f64>,
542    ) -> DeviceResult<HashMap<String, (f64, f64)>> {
543        let mut confidence_intervals = HashMap::new();
544        let mut rng = thread_rng();
545        use scirs2_core::random::prelude::*;
546
547        // Bootstrap for mean
548        let mut bootstrap_means = Vec::new();
549        for _ in 0..self.bootstrap_samples {
550            let sample: Vec<f64> = (0..data.len())
551                .map(|_| data[rng.gen_range(0..data.len())])
552                .collect();
553            let sample_array = Array1::from_vec(sample);
554            if let Ok(sample_mean) = mean(&sample_array.view()) {
555                bootstrap_means.push(sample_mean);
556            }
557        }
558
559        bootstrap_means.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
560        let alpha = 1.0 - self.confidence_level;
561        let lower_idx = (alpha / 2.0 * self.bootstrap_samples as f64) as usize;
562        let upper_idx = ((1.0 - alpha / 2.0) * self.bootstrap_samples as f64) as usize;
563
564        if !bootstrap_means.is_empty() && upper_idx < bootstrap_means.len() {
565            confidence_intervals.insert(
566                "mean".to_string(),
567                (bootstrap_means[lower_idx], bootstrap_means[upper_idx]),
568            );
569        }
570
571        Ok(confidence_intervals)
572    }
573
574    fn compute_rank_correlations(&self, data: &Array2<f64>) -> DeviceResult<Array2<f64>> {
575        let n_vars = data.ncols();
576        let mut rank_corr = Array2::zeros((n_vars, n_vars));
577
578        for i in 0..n_vars {
579            for j in 0..n_vars {
580                if i == j {
581                    rank_corr[[i, j]] = 1.0;
582                } else {
583                    let col1 = data.column(i);
584                    let col2 = data.column(j);
585
586                    if let Ok((rho, _)) = spearmanr(&col1, &col2, "two-sided") {
587                        rank_corr[[i, j]] = rho;
588                    }
589                }
590            }
591        }
592
593        Ok(rank_corr)
594    }
595
596    fn compute_partial_correlations(&self, corr_matrix: &Array2<f64>) -> DeviceResult<Array2<f64>> {
597        let n = corr_matrix.nrows();
598        let mut partial_corr = Array2::zeros((n, n));
599
600        // Simplified partial correlation computation
601        for i in 0..n {
602            for j in 0..n {
603                if i == j {
604                    partial_corr[[i, j]] = 1.0;
605                } else {
606                    // Simplified calculation - in practice would use matrix inversion
607                    partial_corr[[i, j]] = corr_matrix[[i, j]];
608                }
609            }
610        }
611
612        Ok(partial_corr)
613    }
614
615    fn build_correlation_networks(
616        &self,
617        corr_matrix: &Array2<f64>,
618    ) -> DeviceResult<CorrelationNetworks> {
619        let n = corr_matrix.nrows();
620        let mut threshold_networks = HashMap::new();
621
622        // Create networks at different correlation thresholds
623        for &threshold in &[0.3, 0.5, 0.7, 0.9] {
624            let mut network = Array2::from_elem((n, n), false);
625            for i in 0..n {
626                for j in 0..n {
627                    if i != j && corr_matrix[[i, j]].abs() > threshold {
628                        network[[i, j]] = true;
629                    }
630                }
631            }
632            threshold_networks.insert(threshold.to_string(), network);
633        }
634
635        // Simplified community structure and centrality
636        let community_structure = vec![vec![0, 1], vec![2, 3]];
637        let centrality_measures = HashMap::new();
638
639        Ok(CorrelationNetworks {
640            threshold_networks,
641            community_structure,
642            centrality_measures,
643        })
644    }
645
646    fn detect_outliers_statistical(
647        &self,
648        data: &Array1<f64>,
649    ) -> DeviceResult<(Vec<usize>, Vec<f64>)> {
650        // Modified Z-score method
651        let data_median = median(&data.view())
652            .map_err(|e| DeviceError::APIError(format!("Median calculation error: {e:?}")))?;
653
654        // Median absolute deviation
655        let deviations: Vec<f64> = data.iter().map(|&x| (x - data_median).abs()).collect();
656        let mad = median(&Array1::from_vec(deviations).view())
657            .map_err(|e| DeviceError::APIError(format!("MAD calculation error: {e:?}")))?;
658
659        let threshold = 3.5;
660        let mut outliers = Vec::new();
661        let mut scores = Vec::new();
662
663        for (i, &value) in data.iter().enumerate() {
664            let modified_z_score = if mad > 0.0 {
665                0.6745 * (value - data_median) / mad
666            } else {
667                0.0
668            };
669
670            scores.push(modified_z_score.abs());
671
672            if modified_z_score.abs() > threshold {
673                outliers.push(i);
674            }
675        }
676
677        Ok((outliers, scores))
678    }
679}