lawkit_core/laws/poisson/
result.rs1use crate::common::risk::RiskLevel;
2use crate::error::{BenfError, Result};
3use std::collections::HashMap;
4
5#[derive(Debug, Clone)]
7pub struct PoissonResult {
8 pub dataset_name: String,
9 pub numbers_analyzed: usize,
10 pub risk_level: RiskLevel,
11
12 pub lambda: f64, pub sample_mean: f64, pub sample_variance: f64, pub variance_ratio: f64, pub chi_square_statistic: f64, pub chi_square_p_value: f64, pub kolmogorov_smirnov_statistic: f64, pub kolmogorov_smirnov_p_value: f64, pub goodness_of_fit_score: f64, pub poisson_quality: f64, pub distribution_assessment: PoissonAssessment,
28
29 pub frequency_distribution: HashMap<u32, u32>, pub expected_frequencies: HashMap<u32, f64>, pub rare_events_threshold: u32, pub rare_events_count: usize, pub probability_zero: f64, pub probability_one: f64, pub probability_two_or_more: f64, pub confidence_interval_lambda: (f64, f64), pub mean_time_between_events: Option<f64>, pub exponential_fit_quality: Option<f64>, pub is_homogeneous_process: Option<bool>, }
46
47#[derive(Debug, Clone, PartialEq)]
49pub enum PoissonAssessment {
50 Excellent, Good, Moderate, Poor, NonPoisson, }
56
57impl PoissonResult {
58 pub fn new(dataset_name: String, numbers: &[f64]) -> Result<Self> {
59 if numbers.len() < 10 {
60 return Err(BenfError::InsufficientData(numbers.len()));
61 }
62
63 let mut event_counts: Vec<u32> = Vec::new();
65 for &num in numbers {
66 if num < 0.0 || num.fract() != 0.0 {
67 return Err(BenfError::ParseError(
68 "ポアソン分布分析には非負整数値が必要です".to_string(),
69 ));
70 }
71 event_counts.push(num as u32);
72 }
73
74 let sample_mean = numbers.iter().sum::<f64>() / numbers.len() as f64;
76 let sample_variance = calculate_variance(numbers, sample_mean);
77 let variance_ratio = sample_variance / sample_mean;
78 let lambda = sample_mean; let frequency_distribution = create_frequency_distribution(&event_counts);
82 let expected_frequencies =
83 calculate_expected_frequencies(lambda, numbers.len(), &frequency_distribution);
84
85 let (chi_square_statistic, chi_square_p_value) =
87 chi_square_goodness_of_fit_test(&frequency_distribution, &expected_frequencies);
88
89 let (ks_statistic, ks_p_value) = kolmogorov_smirnov_poisson_test(&event_counts, lambda);
90
91 let goodness_of_fit_score =
93 calculate_goodness_of_fit_score(variance_ratio, chi_square_p_value, ks_p_value);
94 let poisson_quality = calculate_poisson_quality_score(variance_ratio, sample_mean);
95 let distribution_assessment =
96 assess_poisson_distribution(goodness_of_fit_score, variance_ratio);
97
98 let risk_level = determine_risk_level(goodness_of_fit_score, &distribution_assessment);
100
101 let rare_events_threshold = calculate_rare_events_threshold(lambda);
103 let rare_events_count = event_counts
104 .iter()
105 .filter(|&&x| x >= rare_events_threshold)
106 .count();
107
108 let probability_zero = poisson_probability(0, lambda);
110 let probability_one = poisson_probability(1, lambda);
111 let probability_two_or_more = 1.0 - probability_zero - probability_one;
112
113 let confidence_interval_lambda =
115 calculate_lambda_confidence_interval(sample_mean, numbers.len());
116
117 Ok(PoissonResult {
118 dataset_name,
119 numbers_analyzed: numbers.len(),
120 risk_level,
121 lambda,
122 sample_mean,
123 sample_variance,
124 variance_ratio,
125 chi_square_statistic,
126 chi_square_p_value,
127 kolmogorov_smirnov_statistic: ks_statistic,
128 kolmogorov_smirnov_p_value: ks_p_value,
129 goodness_of_fit_score,
130 poisson_quality,
131 distribution_assessment,
132 frequency_distribution,
133 expected_frequencies,
134 rare_events_threshold,
135 rare_events_count,
136 probability_zero,
137 probability_one,
138 probability_two_or_more,
139 confidence_interval_lambda,
140 mean_time_between_events: None, exponential_fit_quality: None,
142 is_homogeneous_process: None,
143 })
144 }
145}
146
147fn calculate_variance(numbers: &[f64], mean: f64) -> f64 {
149 let sum_squared_diff: f64 = numbers.iter().map(|&x| (x - mean).powi(2)).sum();
150 sum_squared_diff / (numbers.len() - 1) as f64
151}
152
153fn create_frequency_distribution(event_counts: &[u32]) -> HashMap<u32, u32> {
155 let mut freq_dist = HashMap::new();
156 for &count in event_counts {
157 *freq_dist.entry(count).or_insert(0) += 1;
158 }
159 freq_dist
160}
161
162fn calculate_expected_frequencies(
164 lambda: f64,
165 sample_size: usize,
166 observed: &HashMap<u32, u32>,
167) -> HashMap<u32, f64> {
168 let mut expected = HashMap::new();
169 let max_value = *observed.keys().max().unwrap_or(&0);
170
171 for k in 0..=max_value {
172 let probability = poisson_probability(k, lambda);
173 expected.insert(k, probability * sample_size as f64);
174 }
175
176 expected
177}
178
179fn poisson_probability(k: u32, lambda: f64) -> f64 {
181 if lambda <= 0.0 {
182 return if k == 0 { 1.0 } else { 0.0 };
183 }
184
185 let ln_prob = k as f64 * lambda.ln() - lambda - ln_factorial(k);
186 ln_prob.exp()
187}
188
189fn ln_factorial(n: u32) -> f64 {
191 if n <= 1 {
192 return 0.0;
193 }
194
195 if n > 10 {
197 let n_f = n as f64;
198 n_f * n_f.ln() - n_f + 0.5 * (2.0 * std::f64::consts::PI * n_f).ln()
199 } else {
200 (2..=n).map(|i| (i as f64).ln()).sum()
202 }
203}
204
205fn chi_square_goodness_of_fit_test(
207 observed: &HashMap<u32, u32>,
208 expected: &HashMap<u32, f64>,
209) -> (f64, f64) {
210 let mut chi_square = 0.0;
211 let mut degrees_of_freedom: i32 = 0;
212
213 for (&k, &obs_freq) in observed {
214 if let Some(&exp_freq) = expected.get(&k) {
215 if exp_freq >= 5.0 {
216 let diff = obs_freq as f64 - exp_freq;
218 chi_square += (diff * diff) / exp_freq;
219 degrees_of_freedom += 1;
220 }
221 }
222 }
223
224 degrees_of_freedom = degrees_of_freedom.saturating_sub(2); let p_value = estimate_chi_square_p_value(chi_square, degrees_of_freedom);
228
229 (chi_square, p_value)
230}
231
232fn kolmogorov_smirnov_poisson_test(event_counts: &[u32], lambda: f64) -> (f64, f64) {
234 let mut sorted_counts = event_counts.to_vec();
235 sorted_counts.sort();
236
237 let n = sorted_counts.len() as f64;
238 let mut max_diff: f64 = 0.0;
239
240 for (i, &k) in sorted_counts.iter().enumerate() {
241 let theoretical_cdf = poisson_cdf(k, lambda);
243
244 let empirical_cdf = (i + 1) as f64 / n;
246
247 let diff = (theoretical_cdf - empirical_cdf).abs();
248 max_diff = max_diff.max(diff);
249 }
250
251 let critical_value = 1.36 / n.sqrt();
253 let p_value = if max_diff > critical_value { 0.01 } else { 0.1 };
254
255 (max_diff, p_value)
256}
257
258fn poisson_cdf(k: u32, lambda: f64) -> f64 {
260 let mut cdf = 0.0;
261 for i in 0..=k {
262 cdf += poisson_probability(i, lambda);
263 }
264 cdf
265}
266
267fn calculate_goodness_of_fit_score(variance_ratio: f64, chi_square_p: f64, ks_p: f64) -> f64 {
269 let variance_score = if variance_ratio > 0.0 {
271 let ratio_deviation = (variance_ratio - 1.0).abs();
272 (1.0 / (1.0 + ratio_deviation)).max(0.0)
273 } else {
274 0.0
275 };
276
277 let p_value_score = (chi_square_p + ks_p) / 2.0;
279
280 (variance_score * 0.6 + p_value_score * 0.4).min(1.0)
282}
283
284fn calculate_poisson_quality_score(variance_ratio: f64, mean: f64) -> f64 {
286 let ratio_quality = if variance_ratio > 0.0 {
287 let deviation = (variance_ratio - 1.0).abs();
288 (1.0 / (1.0 + 2.0 * deviation)).max(0.0)
289 } else {
290 0.0
291 };
292
293 let mean_quality = if (0.1..=10.0).contains(&mean) {
295 1.0
296 } else if mean > 10.0 {
297 (10.0 / mean).min(1.0)
298 } else {
299 mean / 0.1
300 };
301
302 (ratio_quality * 0.8 + mean_quality * 0.2).min(1.0)
303}
304
305fn assess_poisson_distribution(_goodness_score: f64, variance_ratio: f64) -> PoissonAssessment {
307 let ratio_deviation = (variance_ratio - 1.0).abs();
308
309 match (_goodness_score, ratio_deviation) {
310 (s, d) if s > 0.8 && d < 0.2 => PoissonAssessment::Excellent,
311 (s, d) if s > 0.6 && d < 0.5 => PoissonAssessment::Good,
312 (s, d) if s > 0.4 && d < 1.0 => PoissonAssessment::Moderate,
313 (s, d) if s > 0.2 && d < 2.0 => PoissonAssessment::Poor,
314 _ => PoissonAssessment::NonPoisson,
315 }
316}
317
318fn determine_risk_level(_goodness_score: f64, assessment: &PoissonAssessment) -> RiskLevel {
320 match assessment {
321 PoissonAssessment::Excellent => RiskLevel::Low,
322 PoissonAssessment::Good => RiskLevel::Low,
323 PoissonAssessment::Moderate => RiskLevel::Medium,
324 PoissonAssessment::Poor => RiskLevel::High,
325 PoissonAssessment::NonPoisson => RiskLevel::Critical,
326 }
327}
328
329fn calculate_rare_events_threshold(lambda: f64) -> u32 {
331 (lambda + 3.0 * lambda.sqrt()).ceil() as u32
333}
334
335fn calculate_lambda_confidence_interval(sample_mean: f64, sample_size: usize) -> (f64, f64) {
337 let std_error = (sample_mean / sample_size as f64).sqrt();
339 let margin = 1.96 * std_error;
340
341 ((sample_mean - margin).max(0.0), sample_mean + margin)
342}
343
344fn estimate_chi_square_p_value(chi_square: f64, df: i32) -> f64 {
346 if df <= 0 {
347 return 1.0;
348 }
349
350 let critical_values = match df {
352 1 => vec![(3.84, 0.05), (6.64, 0.01), (10.83, 0.001)],
353 2 => vec![(5.99, 0.05), (9.21, 0.01), (13.82, 0.001)],
354 3 => vec![(7.81, 0.05), (11.34, 0.01), (16.27, 0.001)],
355 _ => vec![(9.49, 0.05), (13.28, 0.01), (18.47, 0.001)], };
357
358 for (critical, alpha) in critical_values {
359 if chi_square < critical {
360 return 1.0 - alpha;
361 }
362 }
363
364 0.001
365}