1use super::super::results::*;
3use crate::DeviceResult;
4use scirs2_core::ndarray::Array1;
5use std::collections::HashMap;
6pub struct DistributionAnalyzer {
8 confidence_level: f64,
9}
10impl DistributionAnalyzer {
11 pub const fn new() -> Self {
13 Self {
14 confidence_level: 0.95,
15 }
16 }
17 pub const fn with_confidence_level(confidence_level: f64) -> Self {
19 Self { confidence_level }
20 }
21 pub fn analyze(&self, values: &[f64]) -> DeviceResult<DistributionAnalysisResults> {
23 if values.is_empty() {
24 return Ok(DistributionAnalysisResults::default());
25 }
26 let best_fit_distributions = self.fit_distributions(values)?;
27 let distribution_comparisons =
28 Self::compare_distributions(values, &best_fit_distributions)?;
29 let mixture_models = if values.len() > 50 {
30 Some(self.fit_mixture_models(values)?)
31 } else {
32 None
33 };
34 let normality_assessment = self.assess_normality(values)?;
35 Ok(DistributionAnalysisResults {
36 best_fit_distributions,
37 distribution_comparisons,
38 mixture_models,
39 normality_assessment,
40 })
41 }
42 fn fit_distributions(&self, values: &[f64]) -> DeviceResult<HashMap<String, DistributionFit>> {
44 let mut distributions = HashMap::new();
45 let normal_fit = self.fit_normal_distribution(values)?;
46 distributions.insert("normal".to_string(), normal_fit);
47 let exponential_fit = self.fit_exponential_distribution(values)?;
48 distributions.insert("exponential".to_string(), exponential_fit);
49 let uniform_fit = self.fit_uniform_distribution(values)?;
50 distributions.insert("uniform".to_string(), uniform_fit);
51 let gamma_fit = Self::fit_gamma_distribution(values)?;
52 distributions.insert("gamma".to_string(), gamma_fit);
53 let beta_fit = Self::fit_beta_distribution(values)?;
54 distributions.insert("beta".to_string(), beta_fit);
55 Ok(distributions)
56 }
57 fn fit_normal_distribution(&self, values: &[f64]) -> DeviceResult<DistributionFit> {
59 let mean = values.iter().sum::<f64>() / values.len() as f64;
60 let variance =
61 values.iter().map(|&x| (x - mean).powi(2)).sum::<f64>() / (values.len() - 1) as f64;
62 let std_dev = variance.sqrt();
63 let parameters = vec![mean, std_dev];
64 let log_likelihood = Self::calculate_normal_log_likelihood(values, mean, std_dev);
65 let aic = (-2.0f64).mul_add(log_likelihood, 2.0 * parameters.len() as f64);
66 let bic = (-2.0f64).mul_add(
67 log_likelihood,
68 (parameters.len() as f64) * (values.len() as f64).ln(),
69 );
70 Ok(DistributionFit {
71 distribution_name: "Normal".to_string(),
72 parameters,
73 log_likelihood,
74 aic,
75 bic,
76 ks_statistic: Self::calculate_ks_statistic(values, |x| {
77 self.normal_cdf(x, mean, std_dev)
78 }),
79 ks_p_value: 0.1,
80 })
81 }
82 fn fit_exponential_distribution(&self, values: &[f64]) -> DeviceResult<DistributionFit> {
84 if values.iter().any(|&x| x <= 0.0) {
85 return Ok(DistributionFit {
86 distribution_name: "Exponential".to_string(),
87 parameters: vec![],
88 log_likelihood: f64::NEG_INFINITY,
89 aic: f64::INFINITY,
90 bic: f64::INFINITY,
91 ks_statistic: 1.0,
92 ks_p_value: 0.0,
93 });
94 }
95 let lambda = 1.0 / (values.iter().sum::<f64>() / values.len() as f64);
96 let parameters = vec![lambda];
97 let log_likelihood = values
98 .iter()
99 .map(|&x| lambda.ln() - lambda * x)
100 .sum::<f64>();
101 let aic = (-2.0f64).mul_add(log_likelihood, 2.0 * parameters.len() as f64);
102 let bic = (-2.0f64).mul_add(
103 log_likelihood,
104 (parameters.len() as f64) * (values.len() as f64).ln(),
105 );
106 Ok(DistributionFit {
107 distribution_name: "Exponential".to_string(),
108 parameters,
109 log_likelihood,
110 aic,
111 bic,
112 ks_statistic: Self::calculate_ks_statistic(values, |x| {
113 Self::exponential_cdf(x, lambda)
114 }),
115 ks_p_value: 0.1,
116 })
117 }
118 fn fit_uniform_distribution(&self, values: &[f64]) -> DeviceResult<DistributionFit> {
120 let min_val = values.iter().copied().fold(f64::INFINITY, f64::min);
121 let max_val = values.iter().copied().fold(f64::NEG_INFINITY, f64::max);
122 let parameters = vec![min_val, max_val];
123 let range = max_val - min_val;
124 let log_likelihood = if range > 0.0 {
125 -(values.len() as f64) * range.ln()
126 } else {
127 f64::NEG_INFINITY
128 };
129 let aic = (-2.0f64).mul_add(log_likelihood, 2.0 * parameters.len() as f64);
130 let bic = (-2.0f64).mul_add(
131 log_likelihood,
132 (parameters.len() as f64) * (values.len() as f64).ln(),
133 );
134 Ok(DistributionFit {
135 distribution_name: "Uniform".to_string(),
136 parameters,
137 log_likelihood,
138 aic,
139 bic,
140 ks_statistic: Self::calculate_ks_statistic(values, |x| {
141 Self::uniform_cdf(x, min_val, max_val)
142 }),
143 ks_p_value: 0.1,
144 })
145 }
146 fn fit_gamma_distribution(values: &[f64]) -> DeviceResult<DistributionFit> {
148 if values.iter().any(|&x| x <= 0.0) {
149 return Ok(DistributionFit {
150 distribution_name: "Gamma".to_string(),
151 parameters: vec![],
152 log_likelihood: f64::NEG_INFINITY,
153 aic: f64::INFINITY,
154 bic: f64::INFINITY,
155 ks_statistic: 1.0,
156 ks_p_value: 0.0,
157 });
158 }
159 let mean = values.iter().sum::<f64>() / values.len() as f64;
160 let variance =
161 values.iter().map(|&x| (x - mean).powi(2)).sum::<f64>() / (values.len() - 1) as f64;
162 let scale = variance / mean;
163 let shape = mean / scale;
164 let parameters = vec![shape, scale];
165 let log_likelihood = 0.0;
166 let aic = (-2.0f64).mul_add(log_likelihood, 2.0 * parameters.len() as f64);
167 let bic = (-2.0f64).mul_add(
168 log_likelihood,
169 (parameters.len() as f64) * (values.len() as f64).ln(),
170 );
171 Ok(DistributionFit {
172 distribution_name: "Gamma".to_string(),
173 parameters,
174 log_likelihood,
175 aic,
176 bic,
177 ks_statistic: 0.5,
178 ks_p_value: 0.1,
179 })
180 }
181 fn fit_beta_distribution(values: &[f64]) -> DeviceResult<DistributionFit> {
183 if values.iter().any(|&x| !(0.0..=1.0).contains(&x)) {
184 return Ok(DistributionFit {
185 distribution_name: "Beta".to_string(),
186 parameters: vec![],
187 log_likelihood: f64::NEG_INFINITY,
188 aic: f64::INFINITY,
189 bic: f64::INFINITY,
190 ks_statistic: 1.0,
191 ks_p_value: 0.0,
192 });
193 }
194 let mean = values.iter().sum::<f64>() / values.len() as f64;
195 let variance =
196 values.iter().map(|&x| (x - mean).powi(2)).sum::<f64>() / (values.len() - 1) as f64;
197 let common_factor = mean * (1.0 - mean) / variance - 1.0;
198 let alpha = mean * common_factor;
199 let beta = (1.0 - mean) * common_factor;
200 let parameters = vec![alpha, beta];
201 let log_likelihood = 0.0;
202 let aic = (-2.0f64).mul_add(log_likelihood, 2.0 * parameters.len() as f64);
203 let bic = (-2.0f64).mul_add(
204 log_likelihood,
205 (parameters.len() as f64) * (values.len() as f64).ln(),
206 );
207 Ok(DistributionFit {
208 distribution_name: "Beta".to_string(),
209 parameters,
210 log_likelihood,
211 aic,
212 bic,
213 ks_statistic: 0.5,
214 ks_p_value: 0.1,
215 })
216 }
217 fn compare_distributions(
219 values: &[f64],
220 distributions: &HashMap<String, DistributionFit>,
221 ) -> DeviceResult<Vec<DistributionComparison>> {
222 let mut comparisons = Vec::new();
223 let dist_names: Vec<String> = distributions.keys().cloned().collect();
224 for i in 0..dist_names.len() {
225 for j in (i + 1)..dist_names.len() {
226 let dist1 = &distributions[&dist_names[i]];
227 let dist2 = &distributions[&dist_names[j]];
228 let aic_diff = dist2.aic - dist1.aic;
229 let better_fit = if aic_diff > 2.0 {
230 dist_names[i].clone()
231 } else if aic_diff < -2.0 {
232 dist_names[j].clone()
233 } else {
234 "Comparable".to_string()
235 };
236 let lr_statistic = 2.0 * (dist1.log_likelihood - dist2.log_likelihood).abs();
237 let lr_p_value = if lr_statistic > 3.84 { 0.05 } else { 0.1 };
238 comparisons.push(DistributionComparison {
239 distribution1: dist_names[i].clone(),
240 distribution2: dist_names[j].clone(),
241 aic_difference: aic_diff,
242 bic_difference: dist2.bic - dist1.bic,
243 likelihood_ratio_test: StatisticalTest {
244 statistic: lr_statistic,
245 p_value: lr_p_value,
246 critical_value: 3.84,
247 is_significant: lr_statistic > 3.84,
248 effect_size: Some(aic_diff.abs() / 10.0),
249 },
250 better_fit,
251 });
252 }
253 }
254 Ok(comparisons)
255 }
256 fn fit_mixture_models(&self, values: &[f64]) -> DeviceResult<MixtureModelResults> {
258 let n = values.len();
259 let overall_mean = values.iter().sum::<f64>() / n as f64;
260 let overall_var = values
261 .iter()
262 .map(|&x| (x - overall_mean).powi(2))
263 .sum::<f64>()
264 / n as f64;
265 let mut weights = vec![0.5, 0.5];
266 let mut means = [
267 overall_mean - overall_var.sqrt(),
268 overall_mean + overall_var.sqrt(),
269 ];
270 let mut variances = [overall_var / 2.0, overall_var / 2.0];
271 for _ in 0..10 {
272 let mut responsibilities = vec![vec![0.0; 2]; n];
273 for (i, &x) in values.iter().enumerate() {
274 let mut total = 0.0;
275 for k in 0..2 {
276 let gaussian =
277 weights[k] * Self::gaussian_pdf(x, means[k], variances[k].sqrt());
278 responsibilities[i][k] = gaussian;
279 total += gaussian;
280 }
281 if total > 0.0 {
282 for k in 0..2 {
283 responsibilities[i][k] /= total;
284 }
285 }
286 }
287 for k in 0..2 {
288 let nk: f64 = responsibilities.iter().map(|r| r[k]).sum();
289 weights[k] = nk / n as f64;
290 if nk > 0.0 {
291 means[k] = responsibilities
292 .iter()
293 .zip(values.iter())
294 .map(|(r, &x)| r[k] * x)
295 .sum::<f64>()
296 / nk;
297 variances[k] = responsibilities
298 .iter()
299 .zip(values.iter())
300 .map(|(r, &x)| r[k] * (x - means[k]).powi(2))
301 .sum::<f64>()
302 / nk;
303 }
304 }
305 }
306 let log_likelihood = values
307 .iter()
308 .map(|&x| {
309 let mixture_prob = weights[0].mul_add(
310 Self::gaussian_pdf(x, means[0], variances[0].sqrt()),
311 weights[1] * Self::gaussian_pdf(x, means[1], variances[1].sqrt()),
312 );
313 mixture_prob.ln()
314 })
315 .sum::<f64>();
316 let num_params = 5;
317 let aic = (-2.0f64).mul_add(log_likelihood, 2.0 * num_params as f64);
318 let bic = (-2.0f64).mul_add(log_likelihood, (num_params as f64) * (n as f64).ln());
319 Ok(MixtureModelResults {
320 n_components: 2,
321 weights: Array1::from_vec(weights),
322 component_parameters: vec![
323 vec![means[0], variances[0].sqrt()],
324 vec![means[1], variances[1].sqrt()],
325 ],
326 log_likelihood,
327 bic,
328 assignments: Array1::zeros(n),
329 })
330 }
331 fn assess_normality(&self, values: &[f64]) -> DeviceResult<NormalityAssessment> {
333 let shapiro_wilk = Self::shapiro_wilk_test(values)?;
334 let anderson_darling = Self::anderson_darling_test(values)?;
335 let jarque_bera = Self::jarque_bera_test(values)?;
336 let is_normal = shapiro_wilk.p_value > 0.05
337 && anderson_darling.p_value > 0.05
338 && jarque_bera.p_value > 0.05;
339 let normality_confidence =
340 (shapiro_wilk.p_value + anderson_darling.p_value + jarque_bera.p_value) / 3.0;
341 Ok(NormalityAssessment {
342 shapiro_wilk,
343 anderson_darling,
344 jarque_bera,
345 is_normal,
346 normality_confidence,
347 })
348 }
349 fn shapiro_wilk_test(values: &[f64]) -> DeviceResult<StatisticalTest> {
351 if values.len() < 3 || values.len() > 5000 {
352 return Ok(StatisticalTest {
353 statistic: 0.0,
354 p_value: 0.5,
355 critical_value: 0.95,
356 is_significant: false,
357 effect_size: None,
358 });
359 }
360 let mean = values.iter().sum::<f64>() / values.len() as f64;
361 let variance =
362 values.iter().map(|&x| (x - mean).powi(2)).sum::<f64>() / values.len() as f64;
363 let mut sorted_values = values.to_vec();
364 sorted_values.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
365 let w_statistic = 0.95;
366 let p_value = if w_statistic > 0.95 { 0.1 } else { 0.01 };
367 Ok(StatisticalTest {
368 statistic: w_statistic,
369 p_value,
370 critical_value: 0.95,
371 is_significant: p_value < 0.05,
372 effect_size: Some(1.0 - w_statistic),
373 })
374 }
375 const fn anderson_darling_test(values: &[f64]) -> DeviceResult<StatisticalTest> {
377 Ok(StatisticalTest {
378 statistic: 0.5,
379 p_value: 0.1,
380 critical_value: 0.752,
381 is_significant: false,
382 effect_size: Some(0.1),
383 })
384 }
385 fn jarque_bera_test(values: &[f64]) -> DeviceResult<StatisticalTest> {
387 if values.len() < 4 {
388 return Ok(StatisticalTest::default());
389 }
390 let n = values.len() as f64;
391 let mean = values.iter().sum::<f64>() / n;
392 let m2 = values.iter().map(|&x| (x - mean).powi(2)).sum::<f64>() / n;
393 let m3 = values.iter().map(|&x| (x - mean).powi(3)).sum::<f64>() / n;
394 let m4 = values.iter().map(|&x| (x - mean).powi(4)).sum::<f64>() / n;
395 let skewness = m3 / m2.powf(1.5);
396 let kurtosis = m4 / (m2 * m2) - 3.0;
397 let jb_statistic = (n / 6.0) * skewness.mul_add(skewness, kurtosis.powi(2) / 4.0);
398 let p_value = if jb_statistic > 5.99 { 0.05 } else { 0.1 };
399 Ok(StatisticalTest {
400 statistic: jb_statistic,
401 p_value,
402 critical_value: 5.99,
403 is_significant: jb_statistic > 5.99,
404 effect_size: Some(jb_statistic / 10.0),
405 })
406 }
407 fn calculate_ks_statistic<F>(values: &[f64], cdf: F) -> f64
409 where
410 F: Fn(f64) -> f64,
411 {
412 let mut sorted_values = values.to_vec();
413 sorted_values.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
414 let n = sorted_values.len() as f64;
415 let mut max_diff: f64 = 0.0;
416 for (i, &x) in sorted_values.iter().enumerate() {
417 let empirical_cdf = (i + 1) as f64 / n;
418 let theoretical_cdf = cdf(x);
419 let diff = (empirical_cdf - theoretical_cdf).abs();
420 max_diff = max_diff.max(diff);
421 }
422 max_diff
423 }
424 fn calculate_normal_log_likelihood(values: &[f64], mean: f64, std_dev: f64) -> f64 {
426 values
427 .iter()
428 .map(|&x| {
429 let z = (x - mean) / std_dev;
430 (-0.5 * z * z).mul_add(
431 1.0,
432 (-0.5f64).mul_add((2.0 * std::f64::consts::PI).ln(), -std_dev.ln()),
433 )
434 })
435 .sum()
436 }
437 fn normal_cdf(&self, x: f64, mean: f64, std_dev: f64) -> f64 {
439 let z = (x - mean) / std_dev;
440 0.5 * (1.0 + Self::erf(z / (2.0_f64).sqrt()))
441 }
442 fn exponential_cdf(x: f64, lambda: f64) -> f64 {
444 if x < 0.0 {
445 0.0
446 } else {
447 1.0 - (-lambda * x).exp()
448 }
449 }
450 fn uniform_cdf(x: f64, min_val: f64, max_val: f64) -> f64 {
452 if x < min_val {
453 0.0
454 } else if x > max_val {
455 1.0
456 } else {
457 (x - min_val) / (max_val - min_val)
458 }
459 }
460 fn gaussian_pdf(x: f64, mean: f64, std_dev: f64) -> f64 {
462 let z = (x - mean) / std_dev;
463 (1.0 / (std_dev * (2.0 * std::f64::consts::PI).sqrt())) * (-0.5 * z * z).exp()
464 }
465 fn erf(x: f64) -> f64 {
467 let a1 = 0.254_829_592;
468 let a2 = -0.284_496_736;
469 let a3 = 1.421_413_741;
470 let a4 = -1.453_152_027;
471 let a5 = 1.061_405_429;
472 let p = 0.327_591_1;
473 let sign = if x >= 0.0 { 1.0 } else { -1.0 };
474 let x = x.abs();
475 let t = 1.0 / (1.0 + p * x);
476 let y = ((a5 * t + a4).mul_add(t, a3).mul_add(t, a2).mul_add(t, a1) * t)
477 .mul_add(-(-x * x).exp(), 1.0);
478 sign * y
479 }
480}
481impl Default for DistributionAnalyzer {
482 fn default() -> Self {
483 Self::new()
484 }
485}