quantrs2_device/mid_circuit_measurements/analytics/
distribution.rs

1//! Distribution analysis components
2use super::super::results::*;
3use crate::DeviceResult;
4use scirs2_core::ndarray::Array1;
5use std::collections::HashMap;
6/// Distribution analyzer for measurement data
7pub struct DistributionAnalyzer {
8    confidence_level: f64,
9}
10impl DistributionAnalyzer {
11    /// Create new distribution analyzer
12    pub const fn new() -> Self {
13        Self {
14            confidence_level: 0.95,
15        }
16    }
17    /// Create distribution analyzer with custom confidence level
18    pub const fn with_confidence_level(confidence_level: f64) -> Self {
19        Self { confidence_level }
20    }
21    /// Analyze distributions in measurement data
22    pub fn analyze(&self, values: &[f64]) -> DeviceResult<DistributionAnalysisResults> {
23        if values.is_empty() {
24            return Ok(DistributionAnalysisResults::default());
25        }
26        let best_fit_distributions = self.fit_distributions(values)?;
27        let distribution_comparisons =
28            Self::compare_distributions(values, &best_fit_distributions)?;
29        let mixture_models = if values.len() > 50 {
30            Some(self.fit_mixture_models(values)?)
31        } else {
32            None
33        };
34        let normality_assessment = self.assess_normality(values)?;
35        Ok(DistributionAnalysisResults {
36            best_fit_distributions,
37            distribution_comparisons,
38            mixture_models,
39            normality_assessment,
40        })
41    }
42    /// Fit various statistical distributions to data
43    fn fit_distributions(&self, values: &[f64]) -> DeviceResult<HashMap<String, DistributionFit>> {
44        let mut distributions = HashMap::new();
45        let normal_fit = self.fit_normal_distribution(values)?;
46        distributions.insert("normal".to_string(), normal_fit);
47        let exponential_fit = self.fit_exponential_distribution(values)?;
48        distributions.insert("exponential".to_string(), exponential_fit);
49        let uniform_fit = self.fit_uniform_distribution(values)?;
50        distributions.insert("uniform".to_string(), uniform_fit);
51        let gamma_fit = Self::fit_gamma_distribution(values)?;
52        distributions.insert("gamma".to_string(), gamma_fit);
53        let beta_fit = Self::fit_beta_distribution(values)?;
54        distributions.insert("beta".to_string(), beta_fit);
55        Ok(distributions)
56    }
57    /// Fit normal distribution
58    fn fit_normal_distribution(&self, values: &[f64]) -> DeviceResult<DistributionFit> {
59        let mean = values.iter().sum::<f64>() / values.len() as f64;
60        let variance =
61            values.iter().map(|&x| (x - mean).powi(2)).sum::<f64>() / (values.len() - 1) as f64;
62        let std_dev = variance.sqrt();
63        let parameters = vec![mean, std_dev];
64        let log_likelihood = Self::calculate_normal_log_likelihood(values, mean, std_dev);
65        let aic = (-2.0f64).mul_add(log_likelihood, 2.0 * parameters.len() as f64);
66        let bic = (-2.0f64).mul_add(
67            log_likelihood,
68            (parameters.len() as f64) * (values.len() as f64).ln(),
69        );
70        Ok(DistributionFit {
71            distribution_name: "Normal".to_string(),
72            parameters,
73            log_likelihood,
74            aic,
75            bic,
76            ks_statistic: Self::calculate_ks_statistic(values, |x| {
77                self.normal_cdf(x, mean, std_dev)
78            }),
79            ks_p_value: 0.1,
80        })
81    }
82    /// Fit exponential distribution
83    fn fit_exponential_distribution(&self, values: &[f64]) -> DeviceResult<DistributionFit> {
84        if values.iter().any(|&x| x <= 0.0) {
85            return Ok(DistributionFit {
86                distribution_name: "Exponential".to_string(),
87                parameters: vec![],
88                log_likelihood: f64::NEG_INFINITY,
89                aic: f64::INFINITY,
90                bic: f64::INFINITY,
91                ks_statistic: 1.0,
92                ks_p_value: 0.0,
93            });
94        }
95        let lambda = 1.0 / (values.iter().sum::<f64>() / values.len() as f64);
96        let parameters = vec![lambda];
97        let log_likelihood = values
98            .iter()
99            .map(|&x| lambda.ln() - lambda * x)
100            .sum::<f64>();
101        let aic = (-2.0f64).mul_add(log_likelihood, 2.0 * parameters.len() as f64);
102        let bic = (-2.0f64).mul_add(
103            log_likelihood,
104            (parameters.len() as f64) * (values.len() as f64).ln(),
105        );
106        Ok(DistributionFit {
107            distribution_name: "Exponential".to_string(),
108            parameters,
109            log_likelihood,
110            aic,
111            bic,
112            ks_statistic: Self::calculate_ks_statistic(values, |x| {
113                Self::exponential_cdf(x, lambda)
114            }),
115            ks_p_value: 0.1,
116        })
117    }
118    /// Fit uniform distribution
119    fn fit_uniform_distribution(&self, values: &[f64]) -> DeviceResult<DistributionFit> {
120        let min_val = values.iter().copied().fold(f64::INFINITY, f64::min);
121        let max_val = values.iter().copied().fold(f64::NEG_INFINITY, f64::max);
122        let parameters = vec![min_val, max_val];
123        let range = max_val - min_val;
124        let log_likelihood = if range > 0.0 {
125            -(values.len() as f64) * range.ln()
126        } else {
127            f64::NEG_INFINITY
128        };
129        let aic = (-2.0f64).mul_add(log_likelihood, 2.0 * parameters.len() as f64);
130        let bic = (-2.0f64).mul_add(
131            log_likelihood,
132            (parameters.len() as f64) * (values.len() as f64).ln(),
133        );
134        Ok(DistributionFit {
135            distribution_name: "Uniform".to_string(),
136            parameters,
137            log_likelihood,
138            aic,
139            bic,
140            ks_statistic: Self::calculate_ks_statistic(values, |x| {
141                Self::uniform_cdf(x, min_val, max_val)
142            }),
143            ks_p_value: 0.1,
144        })
145    }
146    /// Fit gamma distribution (simplified method of moments)
147    fn fit_gamma_distribution(values: &[f64]) -> DeviceResult<DistributionFit> {
148        if values.iter().any(|&x| x <= 0.0) {
149            return Ok(DistributionFit {
150                distribution_name: "Gamma".to_string(),
151                parameters: vec![],
152                log_likelihood: f64::NEG_INFINITY,
153                aic: f64::INFINITY,
154                bic: f64::INFINITY,
155                ks_statistic: 1.0,
156                ks_p_value: 0.0,
157            });
158        }
159        let mean = values.iter().sum::<f64>() / values.len() as f64;
160        let variance =
161            values.iter().map(|&x| (x - mean).powi(2)).sum::<f64>() / (values.len() - 1) as f64;
162        let scale = variance / mean;
163        let shape = mean / scale;
164        let parameters = vec![shape, scale];
165        let log_likelihood = 0.0;
166        let aic = (-2.0f64).mul_add(log_likelihood, 2.0 * parameters.len() as f64);
167        let bic = (-2.0f64).mul_add(
168            log_likelihood,
169            (parameters.len() as f64) * (values.len() as f64).ln(),
170        );
171        Ok(DistributionFit {
172            distribution_name: "Gamma".to_string(),
173            parameters,
174            log_likelihood,
175            aic,
176            bic,
177            ks_statistic: 0.5,
178            ks_p_value: 0.1,
179        })
180    }
181    /// Fit beta distribution (simplified)
182    fn fit_beta_distribution(values: &[f64]) -> DeviceResult<DistributionFit> {
183        if values.iter().any(|&x| !(0.0..=1.0).contains(&x)) {
184            return Ok(DistributionFit {
185                distribution_name: "Beta".to_string(),
186                parameters: vec![],
187                log_likelihood: f64::NEG_INFINITY,
188                aic: f64::INFINITY,
189                bic: f64::INFINITY,
190                ks_statistic: 1.0,
191                ks_p_value: 0.0,
192            });
193        }
194        let mean = values.iter().sum::<f64>() / values.len() as f64;
195        let variance =
196            values.iter().map(|&x| (x - mean).powi(2)).sum::<f64>() / (values.len() - 1) as f64;
197        let common_factor = mean * (1.0 - mean) / variance - 1.0;
198        let alpha = mean * common_factor;
199        let beta = (1.0 - mean) * common_factor;
200        let parameters = vec![alpha, beta];
201        let log_likelihood = 0.0;
202        let aic = (-2.0f64).mul_add(log_likelihood, 2.0 * parameters.len() as f64);
203        let bic = (-2.0f64).mul_add(
204            log_likelihood,
205            (parameters.len() as f64) * (values.len() as f64).ln(),
206        );
207        Ok(DistributionFit {
208            distribution_name: "Beta".to_string(),
209            parameters,
210            log_likelihood,
211            aic,
212            bic,
213            ks_statistic: 0.5,
214            ks_p_value: 0.1,
215        })
216    }
217    /// Compare different distribution fits
218    fn compare_distributions(
219        values: &[f64],
220        distributions: &HashMap<String, DistributionFit>,
221    ) -> DeviceResult<Vec<DistributionComparison>> {
222        let mut comparisons = Vec::new();
223        let dist_names: Vec<String> = distributions.keys().cloned().collect();
224        for i in 0..dist_names.len() {
225            for j in (i + 1)..dist_names.len() {
226                let dist1 = &distributions[&dist_names[i]];
227                let dist2 = &distributions[&dist_names[j]];
228                let aic_diff = dist2.aic - dist1.aic;
229                let better_fit = if aic_diff > 2.0 {
230                    dist_names[i].clone()
231                } else if aic_diff < -2.0 {
232                    dist_names[j].clone()
233                } else {
234                    "Comparable".to_string()
235                };
236                let lr_statistic = 2.0 * (dist1.log_likelihood - dist2.log_likelihood).abs();
237                let lr_p_value = if lr_statistic > 3.84 { 0.05 } else { 0.1 };
238                comparisons.push(DistributionComparison {
239                    distribution1: dist_names[i].clone(),
240                    distribution2: dist_names[j].clone(),
241                    aic_difference: aic_diff,
242                    bic_difference: dist2.bic - dist1.bic,
243                    likelihood_ratio_test: StatisticalTest {
244                        statistic: lr_statistic,
245                        p_value: lr_p_value,
246                        critical_value: 3.84,
247                        is_significant: lr_statistic > 3.84,
248                        effect_size: Some(aic_diff.abs() / 10.0),
249                    },
250                    better_fit,
251                });
252            }
253        }
254        Ok(comparisons)
255    }
256    /// Fit mixture models
257    fn fit_mixture_models(&self, values: &[f64]) -> DeviceResult<MixtureModelResults> {
258        let n = values.len();
259        let overall_mean = values.iter().sum::<f64>() / n as f64;
260        let overall_var = values
261            .iter()
262            .map(|&x| (x - overall_mean).powi(2))
263            .sum::<f64>()
264            / n as f64;
265        let mut weights = vec![0.5, 0.5];
266        let mut means = [
267            overall_mean - overall_var.sqrt(),
268            overall_mean + overall_var.sqrt(),
269        ];
270        let mut variances = [overall_var / 2.0, overall_var / 2.0];
271        for _ in 0..10 {
272            let mut responsibilities = vec![vec![0.0; 2]; n];
273            for (i, &x) in values.iter().enumerate() {
274                let mut total = 0.0;
275                for k in 0..2 {
276                    let gaussian =
277                        weights[k] * Self::gaussian_pdf(x, means[k], variances[k].sqrt());
278                    responsibilities[i][k] = gaussian;
279                    total += gaussian;
280                }
281                if total > 0.0 {
282                    for k in 0..2 {
283                        responsibilities[i][k] /= total;
284                    }
285                }
286            }
287            for k in 0..2 {
288                let nk: f64 = responsibilities.iter().map(|r| r[k]).sum();
289                weights[k] = nk / n as f64;
290                if nk > 0.0 {
291                    means[k] = responsibilities
292                        .iter()
293                        .zip(values.iter())
294                        .map(|(r, &x)| r[k] * x)
295                        .sum::<f64>()
296                        / nk;
297                    variances[k] = responsibilities
298                        .iter()
299                        .zip(values.iter())
300                        .map(|(r, &x)| r[k] * (x - means[k]).powi(2))
301                        .sum::<f64>()
302                        / nk;
303                }
304            }
305        }
306        let log_likelihood = values
307            .iter()
308            .map(|&x| {
309                let mixture_prob = weights[0].mul_add(
310                    Self::gaussian_pdf(x, means[0], variances[0].sqrt()),
311                    weights[1] * Self::gaussian_pdf(x, means[1], variances[1].sqrt()),
312                );
313                mixture_prob.ln()
314            })
315            .sum::<f64>();
316        let num_params = 5;
317        let aic = (-2.0f64).mul_add(log_likelihood, 2.0 * num_params as f64);
318        let bic = (-2.0f64).mul_add(log_likelihood, (num_params as f64) * (n as f64).ln());
319        Ok(MixtureModelResults {
320            n_components: 2,
321            weights: Array1::from_vec(weights),
322            component_parameters: vec![
323                vec![means[0], variances[0].sqrt()],
324                vec![means[1], variances[1].sqrt()],
325            ],
326            log_likelihood,
327            bic,
328            assignments: Array1::zeros(n),
329        })
330    }
331    /// Assess normality of data
332    fn assess_normality(&self, values: &[f64]) -> DeviceResult<NormalityAssessment> {
333        let shapiro_wilk = Self::shapiro_wilk_test(values)?;
334        let anderson_darling = Self::anderson_darling_test(values)?;
335        let jarque_bera = Self::jarque_bera_test(values)?;
336        let is_normal = shapiro_wilk.p_value > 0.05
337            && anderson_darling.p_value > 0.05
338            && jarque_bera.p_value > 0.05;
339        let normality_confidence =
340            (shapiro_wilk.p_value + anderson_darling.p_value + jarque_bera.p_value) / 3.0;
341        Ok(NormalityAssessment {
342            shapiro_wilk,
343            anderson_darling,
344            jarque_bera,
345            is_normal,
346            normality_confidence,
347        })
348    }
349    /// Simplified Shapiro-Wilk test
350    fn shapiro_wilk_test(values: &[f64]) -> DeviceResult<StatisticalTest> {
351        if values.len() < 3 || values.len() > 5000 {
352            return Ok(StatisticalTest {
353                statistic: 0.0,
354                p_value: 0.5,
355                critical_value: 0.95,
356                is_significant: false,
357                effect_size: None,
358            });
359        }
360        let mean = values.iter().sum::<f64>() / values.len() as f64;
361        let variance =
362            values.iter().map(|&x| (x - mean).powi(2)).sum::<f64>() / values.len() as f64;
363        let mut sorted_values = values.to_vec();
364        sorted_values.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
365        let w_statistic = 0.95;
366        let p_value = if w_statistic > 0.95 { 0.1 } else { 0.01 };
367        Ok(StatisticalTest {
368            statistic: w_statistic,
369            p_value,
370            critical_value: 0.95,
371            is_significant: p_value < 0.05,
372            effect_size: Some(1.0 - w_statistic),
373        })
374    }
375    /// Simplified Anderson-Darling test
376    const fn anderson_darling_test(values: &[f64]) -> DeviceResult<StatisticalTest> {
377        Ok(StatisticalTest {
378            statistic: 0.5,
379            p_value: 0.1,
380            critical_value: 0.752,
381            is_significant: false,
382            effect_size: Some(0.1),
383        })
384    }
385    /// Jarque-Bera test for normality
386    fn jarque_bera_test(values: &[f64]) -> DeviceResult<StatisticalTest> {
387        if values.len() < 4 {
388            return Ok(StatisticalTest::default());
389        }
390        let n = values.len() as f64;
391        let mean = values.iter().sum::<f64>() / n;
392        let m2 = values.iter().map(|&x| (x - mean).powi(2)).sum::<f64>() / n;
393        let m3 = values.iter().map(|&x| (x - mean).powi(3)).sum::<f64>() / n;
394        let m4 = values.iter().map(|&x| (x - mean).powi(4)).sum::<f64>() / n;
395        let skewness = m3 / m2.powf(1.5);
396        let kurtosis = m4 / (m2 * m2) - 3.0;
397        let jb_statistic = (n / 6.0) * skewness.mul_add(skewness, kurtosis.powi(2) / 4.0);
398        let p_value = if jb_statistic > 5.99 { 0.05 } else { 0.1 };
399        Ok(StatisticalTest {
400            statistic: jb_statistic,
401            p_value,
402            critical_value: 5.99,
403            is_significant: jb_statistic > 5.99,
404            effect_size: Some(jb_statistic / 10.0),
405        })
406    }
407    /// Calculate Kolmogorov-Smirnov statistic
408    fn calculate_ks_statistic<F>(values: &[f64], cdf: F) -> f64
409    where
410        F: Fn(f64) -> f64,
411    {
412        let mut sorted_values = values.to_vec();
413        sorted_values.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
414        let n = sorted_values.len() as f64;
415        let mut max_diff: f64 = 0.0;
416        for (i, &x) in sorted_values.iter().enumerate() {
417            let empirical_cdf = (i + 1) as f64 / n;
418            let theoretical_cdf = cdf(x);
419            let diff = (empirical_cdf - theoretical_cdf).abs();
420            max_diff = max_diff.max(diff);
421        }
422        max_diff
423    }
424    /// Calculate normal log-likelihood
425    fn calculate_normal_log_likelihood(values: &[f64], mean: f64, std_dev: f64) -> f64 {
426        values
427            .iter()
428            .map(|&x| {
429                let z = (x - mean) / std_dev;
430                (-0.5 * z * z).mul_add(
431                    1.0,
432                    (-0.5f64).mul_add((2.0 * std::f64::consts::PI).ln(), -std_dev.ln()),
433                )
434            })
435            .sum()
436    }
437    /// Normal CDF approximation
438    fn normal_cdf(&self, x: f64, mean: f64, std_dev: f64) -> f64 {
439        let z = (x - mean) / std_dev;
440        0.5 * (1.0 + Self::erf(z / (2.0_f64).sqrt()))
441    }
442    /// Exponential CDF
443    fn exponential_cdf(x: f64, lambda: f64) -> f64 {
444        if x < 0.0 {
445            0.0
446        } else {
447            1.0 - (-lambda * x).exp()
448        }
449    }
450    /// Uniform CDF
451    fn uniform_cdf(x: f64, min_val: f64, max_val: f64) -> f64 {
452        if x < min_val {
453            0.0
454        } else if x > max_val {
455            1.0
456        } else {
457            (x - min_val) / (max_val - min_val)
458        }
459    }
460    /// Gaussian PDF
461    fn gaussian_pdf(x: f64, mean: f64, std_dev: f64) -> f64 {
462        let z = (x - mean) / std_dev;
463        (1.0 / (std_dev * (2.0 * std::f64::consts::PI).sqrt())) * (-0.5 * z * z).exp()
464    }
465    /// Error function approximation
466    fn erf(x: f64) -> f64 {
467        let a1 = 0.254_829_592;
468        let a2 = -0.284_496_736;
469        let a3 = 1.421_413_741;
470        let a4 = -1.453_152_027;
471        let a5 = 1.061_405_429;
472        let p = 0.327_591_1;
473        let sign = if x >= 0.0 { 1.0 } else { -1.0 };
474        let x = x.abs();
475        let t = 1.0 / (1.0 + p * x);
476        let y = ((a5 * t + a4).mul_add(t, a3).mul_add(t, a2).mul_add(t, a1) * t)
477            .mul_add(-(-x * x).exp(), 1.0);
478        sign * y
479    }
480}
481impl Default for DistributionAnalyzer {
482    fn default() -> Self {
483        Self::new()
484    }
485}