lawkit_core/laws/poisson/
result.rs

1use crate::common::risk::RiskLevel;
2use crate::error::{BenfError, Result};
3use std::collections::HashMap;
4
5/// ポアソン分布分析結果
6#[derive(Debug, Clone)]
7pub struct PoissonResult {
8    pub dataset_name: String,
9    pub numbers_analyzed: usize,
10    pub risk_level: RiskLevel,
11
12    // ポアソン分布パラメータ
13    pub lambda: f64,          // 平均発生率(λ)
14    pub sample_mean: f64,     // 標本平均
15    pub sample_variance: f64, // 標本分散
16    pub variance_ratio: f64,  // 分散/平均比(1に近いほどポアソン分布に適合)
17
18    // 適合度検定
19    pub chi_square_statistic: f64,         // カイ二乗検定統計量
20    pub chi_square_p_value: f64,           // カイ二乗検定p値
21    pub kolmogorov_smirnov_statistic: f64, // KS検定統計量
22    pub kolmogorov_smirnov_p_value: f64,   // KS検定p値
23
24    // 適合度評価
25    pub goodness_of_fit_score: f64, // 適合度総合スコア(0-1)
26    pub poisson_quality: f64,       // ポアソン性品質スコア
27    pub distribution_assessment: PoissonAssessment,
28
29    // 発生頻度分析
30    pub frequency_distribution: HashMap<u32, u32>, // 値 -> 出現回数
31    pub expected_frequencies: HashMap<u32, f64>,   // 理論期待値
32    pub rare_events_threshold: u32,                // 稀少事象の閾値
33    pub rare_events_count: usize,                  // 稀少事象の数
34
35    // 予測・推定
36    pub probability_zero: f64,                  // 発生確率0の確率
37    pub probability_one: f64,                   // 発生確率1の確率
38    pub probability_two_or_more: f64,           // 2回以上発生の確率
39    pub confidence_interval_lambda: (f64, f64), // λの95%信頼区間
40
41    // 時系列特性(時間間隔データの場合)
42    pub mean_time_between_events: Option<f64>, // 平均発生間隔
43    pub exponential_fit_quality: Option<f64>,  // 指数分布適合度
44    pub is_homogeneous_process: Option<bool>,  // 斉次過程かどうか
45}
46
47/// ポアソン分布適合度評価
48#[derive(Debug, Clone, PartialEq)]
49pub enum PoissonAssessment {
50    Excellent,  // 優秀なポアソン適合
51    Good,       // 良好なポアソン適合
52    Moderate,   // 中程度のポアソン適合
53    Poor,       // 不十分なポアソン適合
54    NonPoisson, // ポアソン分布でない
55}
56
57impl PoissonResult {
58    pub fn new(dataset_name: String, numbers: &[f64]) -> Result<Self> {
59        if numbers.len() < 10 {
60            return Err(BenfError::InsufficientData(numbers.len()));
61        }
62
63        // 非負整数値チェック
64        let mut event_counts: Vec<u32> = Vec::new();
65        for &num in numbers {
66            if num < 0.0 || num.fract() != 0.0 {
67                return Err(BenfError::ParseError(
68                    "ポアソン分布分析には非負整数値が必要です".to_string(),
69                ));
70            }
71            event_counts.push(num as u32);
72        }
73
74        // 基本統計計算
75        let sample_mean = numbers.iter().sum::<f64>() / numbers.len() as f64;
76        let sample_variance = calculate_variance(numbers, sample_mean);
77        let variance_ratio = sample_variance / sample_mean;
78        let lambda = sample_mean; // MLE推定値
79
80        // 頻度分布作成
81        let frequency_distribution = create_frequency_distribution(&event_counts);
82        let expected_frequencies =
83            calculate_expected_frequencies(lambda, numbers.len(), &frequency_distribution);
84
85        // 適合度検定
86        let (chi_square_statistic, chi_square_p_value) =
87            chi_square_goodness_of_fit_test(&frequency_distribution, &expected_frequencies);
88
89        let (ks_statistic, ks_p_value) = kolmogorov_smirnov_poisson_test(&event_counts, lambda);
90
91        // 適合度評価
92        let goodness_of_fit_score =
93            calculate_goodness_of_fit_score(variance_ratio, chi_square_p_value, ks_p_value);
94        let poisson_quality = calculate_poisson_quality_score(variance_ratio, sample_mean);
95        let distribution_assessment =
96            assess_poisson_distribution(goodness_of_fit_score, variance_ratio);
97
98        // リスク評価
99        let risk_level = determine_risk_level(goodness_of_fit_score, &distribution_assessment);
100
101        // 稀少事象分析
102        let rare_events_threshold = calculate_rare_events_threshold(lambda);
103        let rare_events_count = event_counts
104            .iter()
105            .filter(|&&x| x >= rare_events_threshold)
106            .count();
107
108        // 確率計算
109        let probability_zero = poisson_probability(0, lambda);
110        let probability_one = poisson_probability(1, lambda);
111        let probability_two_or_more = 1.0 - probability_zero - probability_one;
112
113        // 信頼区間
114        let confidence_interval_lambda =
115            calculate_lambda_confidence_interval(sample_mean, numbers.len());
116
117        Ok(PoissonResult {
118            dataset_name,
119            numbers_analyzed: numbers.len(),
120            risk_level,
121            lambda,
122            sample_mean,
123            sample_variance,
124            variance_ratio,
125            chi_square_statistic,
126            chi_square_p_value,
127            kolmogorov_smirnov_statistic: ks_statistic,
128            kolmogorov_smirnov_p_value: ks_p_value,
129            goodness_of_fit_score,
130            poisson_quality,
131            distribution_assessment,
132            frequency_distribution,
133            expected_frequencies,
134            rare_events_threshold,
135            rare_events_count,
136            probability_zero,
137            probability_one,
138            probability_two_or_more,
139            confidence_interval_lambda,
140            mean_time_between_events: None, // 時系列分析は将来実装
141            exponential_fit_quality: None,
142            is_homogeneous_process: None,
143        })
144    }
145}
146
147/// 分散計算
148fn calculate_variance(numbers: &[f64], mean: f64) -> f64 {
149    let sum_squared_diff: f64 = numbers.iter().map(|&x| (x - mean).powi(2)).sum();
150    sum_squared_diff / (numbers.len() - 1) as f64
151}
152
153/// 頻度分布作成
154fn create_frequency_distribution(event_counts: &[u32]) -> HashMap<u32, u32> {
155    let mut freq_dist = HashMap::new();
156    for &count in event_counts {
157        *freq_dist.entry(count).or_insert(0) += 1;
158    }
159    freq_dist
160}
161
162/// 期待頻度計算
163fn calculate_expected_frequencies(
164    lambda: f64,
165    sample_size: usize,
166    observed: &HashMap<u32, u32>,
167) -> HashMap<u32, f64> {
168    let mut expected = HashMap::new();
169    let max_value = *observed.keys().max().unwrap_or(&0);
170
171    for k in 0..=max_value {
172        let probability = poisson_probability(k, lambda);
173        expected.insert(k, probability * sample_size as f64);
174    }
175
176    expected
177}
178
179/// ポアソン確率計算 P(X = k) = (λ^k * e^(-λ)) / k!
180fn poisson_probability(k: u32, lambda: f64) -> f64 {
181    if lambda <= 0.0 {
182        return if k == 0 { 1.0 } else { 0.0 };
183    }
184
185    let ln_prob = k as f64 * lambda.ln() - lambda - ln_factorial(k);
186    ln_prob.exp()
187}
188
189/// 対数階乗計算(数値安定性のため)
190fn ln_factorial(n: u32) -> f64 {
191    if n <= 1 {
192        return 0.0;
193    }
194
195    // Stirlingの近似 ln(n!) ≈ n*ln(n) - n + 0.5*ln(2*π*n)
196    if n > 10 {
197        let n_f = n as f64;
198        n_f * n_f.ln() - n_f + 0.5 * (2.0 * std::f64::consts::PI * n_f).ln()
199    } else {
200        // 小さな値は直接計算
201        (2..=n).map(|i| (i as f64).ln()).sum()
202    }
203}
204
205/// カイ二乗適合度検定
206fn chi_square_goodness_of_fit_test(
207    observed: &HashMap<u32, u32>,
208    expected: &HashMap<u32, f64>,
209) -> (f64, f64) {
210    let mut chi_square = 0.0;
211    let mut degrees_of_freedom: i32 = 0;
212
213    for (&k, &obs_freq) in observed {
214        if let Some(&exp_freq) = expected.get(&k) {
215            if exp_freq >= 5.0 {
216                // 期待度数5以上の階級のみ使用
217                let diff = obs_freq as f64 - exp_freq;
218                chi_square += (diff * diff) / exp_freq;
219                degrees_of_freedom += 1;
220            }
221        }
222    }
223
224    degrees_of_freedom = degrees_of_freedom.saturating_sub(2); // パラメータ数(λ)を考慮
225
226    // 簡易p値推定
227    let p_value = estimate_chi_square_p_value(chi_square, degrees_of_freedom);
228
229    (chi_square, p_value)
230}
231
232/// KS検定(ポアソン分布用)
233fn kolmogorov_smirnov_poisson_test(event_counts: &[u32], lambda: f64) -> (f64, f64) {
234    let mut sorted_counts = event_counts.to_vec();
235    sorted_counts.sort();
236
237    let n = sorted_counts.len() as f64;
238    let mut max_diff: f64 = 0.0;
239
240    for (i, &k) in sorted_counts.iter().enumerate() {
241        // 理論累積分布関数
242        let theoretical_cdf = poisson_cdf(k, lambda);
243
244        // 経験累積分布関数
245        let empirical_cdf = (i + 1) as f64 / n;
246
247        let diff = (theoretical_cdf - empirical_cdf).abs();
248        max_diff = max_diff.max(diff);
249    }
250
251    // 簡易p値推定
252    let critical_value = 1.36 / n.sqrt();
253    let p_value = if max_diff > critical_value { 0.01 } else { 0.1 };
254
255    (max_diff, p_value)
256}
257
258/// ポアソン分布累積分布関数
259fn poisson_cdf(k: u32, lambda: f64) -> f64 {
260    let mut cdf = 0.0;
261    for i in 0..=k {
262        cdf += poisson_probability(i, lambda);
263    }
264    cdf
265}
266
267/// 適合度スコア計算
268fn calculate_goodness_of_fit_score(variance_ratio: f64, chi_square_p: f64, ks_p: f64) -> f64 {
269    // 分散/平均比が1に近いほど高スコア
270    let variance_score = if variance_ratio > 0.0 {
271        let ratio_deviation = (variance_ratio - 1.0).abs();
272        (1.0 / (1.0 + ratio_deviation)).max(0.0)
273    } else {
274        0.0
275    };
276
277    // p値が高いほど高スコア
278    let p_value_score = (chi_square_p + ks_p) / 2.0;
279
280    // 総合スコア
281    (variance_score * 0.6 + p_value_score * 0.4).min(1.0)
282}
283
284/// ポアソン品質スコア計算
285fn calculate_poisson_quality_score(variance_ratio: f64, mean: f64) -> f64 {
286    let ratio_quality = if variance_ratio > 0.0 {
287        let deviation = (variance_ratio - 1.0).abs();
288        (1.0 / (1.0 + 2.0 * deviation)).max(0.0)
289    } else {
290        0.0
291    };
292
293    // 平均が適度な値(0.1-10)の場合にボーナス
294    let mean_quality = if (0.1..=10.0).contains(&mean) {
295        1.0
296    } else if mean > 10.0 {
297        (10.0 / mean).min(1.0)
298    } else {
299        mean / 0.1
300    };
301
302    (ratio_quality * 0.8 + mean_quality * 0.2).min(1.0)
303}
304
305/// ポアソン分布評価
306fn assess_poisson_distribution(_goodness_score: f64, variance_ratio: f64) -> PoissonAssessment {
307    let ratio_deviation = (variance_ratio - 1.0).abs();
308
309    match (_goodness_score, ratio_deviation) {
310        (s, d) if s > 0.8 && d < 0.2 => PoissonAssessment::Excellent,
311        (s, d) if s > 0.6 && d < 0.5 => PoissonAssessment::Good,
312        (s, d) if s > 0.4 && d < 1.0 => PoissonAssessment::Moderate,
313        (s, d) if s > 0.2 && d < 2.0 => PoissonAssessment::Poor,
314        _ => PoissonAssessment::NonPoisson,
315    }
316}
317
318/// リスク評価
319fn determine_risk_level(_goodness_score: f64, assessment: &PoissonAssessment) -> RiskLevel {
320    match assessment {
321        PoissonAssessment::Excellent => RiskLevel::Low,
322        PoissonAssessment::Good => RiskLevel::Low,
323        PoissonAssessment::Moderate => RiskLevel::Medium,
324        PoissonAssessment::Poor => RiskLevel::High,
325        PoissonAssessment::NonPoisson => RiskLevel::Critical,
326    }
327}
328
329/// 稀少事象閾値計算
330fn calculate_rare_events_threshold(lambda: f64) -> u32 {
331    // λ + 3√λ を閾値とする(99.7%以上の事象を稀少とする)
332    (lambda + 3.0 * lambda.sqrt()).ceil() as u32
333}
334
335/// λの信頼区間計算
336fn calculate_lambda_confidence_interval(sample_mean: f64, sample_size: usize) -> (f64, f64) {
337    // 大標本近似: λ ± 1.96 * √(λ/n)
338    let std_error = (sample_mean / sample_size as f64).sqrt();
339    let margin = 1.96 * std_error;
340
341    ((sample_mean - margin).max(0.0), sample_mean + margin)
342}
343
344/// カイ二乗分布p値簡易推定
345fn estimate_chi_square_p_value(chi_square: f64, df: i32) -> f64 {
346    if df <= 0 {
347        return 1.0;
348    }
349
350    // 簡易推定(正確な計算には特殊関数が必要)
351    let critical_values = match df {
352        1 => vec![(3.84, 0.05), (6.64, 0.01), (10.83, 0.001)],
353        2 => vec![(5.99, 0.05), (9.21, 0.01), (13.82, 0.001)],
354        3 => vec![(7.81, 0.05), (11.34, 0.01), (16.27, 0.001)],
355        _ => vec![(9.49, 0.05), (13.28, 0.01), (18.47, 0.001)], // df=4の値を近似
356    };
357
358    for (critical, alpha) in critical_values {
359        if chi_square < critical {
360            return 1.0 - alpha;
361        }
362    }
363
364    0.001
365}