lawkit_core/laws/normal/
result.rs

1use crate::{
2    common::risk::RiskLevel,
3    error::{BenfError, Result},
4};
5
6/// 正規分布解析結果
7#[derive(Debug, Clone)]
8pub struct NormalResult {
9    pub dataset_name: String,
10    pub numbers_analyzed: usize,
11    pub risk_level: RiskLevel,
12
13    // 分布パラメータ
14    pub mean: f64,     // 平均値
15    pub std_dev: f64,  // 標準偏差
16    pub variance: f64, // 分散
17    pub skewness: f64, // 歪度(左右の偏り)
18    pub kurtosis: f64, // 尖度(分布の尖り)
19
20    // 正規性検定結果
21    pub shapiro_wilk_statistic: f64,       // Shapiro-Wilk検定統計量
22    pub shapiro_wilk_p_value: f64,         // Shapiro-Wilk p値
23    pub anderson_darling_statistic: f64,   // Anderson-Darling検定統計量
24    pub anderson_darling_p_value: f64,     // Anderson-Darling p値
25    pub kolmogorov_smirnov_statistic: f64, // Kolmogorov-Smirnov検定統計量
26    pub kolmogorov_smirnov_p_value: f64,   // Kolmogorov-Smirnov p値
27
28    // 適合度評価
29    pub normality_score: f64,      // 正規性総合スコア(0-1)
30    pub qq_correlation: f64,       // Q-Q plot相関係数
31    pub distribution_quality: f64, // 分布品質スコア
32
33    // 異常値検出
34    pub outliers_z_score: Vec<(usize, f64, f64)>, // (インデックス, 値, Z-score)
35    pub outliers_modified_z: Vec<(usize, f64, f64)>, // (インデックス, 値, 修正Z-score)
36    pub outliers_iqr: Vec<(usize, f64)>,          // (インデックス, 値)
37
38    // 信頼区間・範囲
39    pub mean_confidence_interval: (f64, f64), // 平均の95%信頼区間
40    pub prediction_interval_95: (f64, f64),   // 95%予測区間
41    pub three_sigma_limits: (f64, f64),       // 3σ限界
42
43    // 品質管理指標
44    pub within_1_sigma_percent: f64, // 1σ以内の割合
45    pub within_2_sigma_percent: f64, // 2σ以内の割合
46    pub within_3_sigma_percent: f64, // 3σ以内の割合
47}
48
49impl NormalResult {
50    pub fn new(dataset_name: String, numbers: &[f64]) -> Result<Self> {
51        if numbers.len() < 8 {
52            return Err(BenfError::InsufficientData(numbers.len()));
53        }
54
55        let numbers_analyzed = numbers.len();
56
57        // 基本統計量計算
58        let mean = calculate_mean(numbers);
59        let variance = calculate_variance(numbers, mean);
60        let std_dev = variance.sqrt();
61        let skewness = calculate_skewness(numbers, mean, std_dev);
62        let kurtosis = calculate_kurtosis(numbers, mean, std_dev);
63
64        // 正規性検定
65        let shapiro_result = shapiro_wilk_test(numbers);
66        let anderson_result = anderson_darling_test(numbers, mean, std_dev);
67        let ks_result = kolmogorov_smirnov_test(numbers, mean, std_dev);
68
69        // 適合度評価
70        let qq_correlation = calculate_qq_correlation(numbers, mean, std_dev);
71        let normality_score = calculate_normality_score(
72            shapiro_result.1,
73            anderson_result.1,
74            ks_result.1,
75            qq_correlation,
76        );
77        let distribution_quality =
78            calculate_distribution_quality(skewness, kurtosis, normality_score);
79
80        // 異常値検出
81        let outliers_z_score = detect_outliers_z_score(numbers, mean, std_dev);
82        let outliers_modified_z = detect_outliers_modified_z_score(numbers);
83        let outliers_iqr = detect_outliers_iqr(numbers);
84
85        // 信頼区間・範囲計算
86        let mean_confidence_interval = calculate_mean_confidence_interval(numbers, mean, std_dev);
87        let prediction_interval_95 = (mean - 1.96 * std_dev, mean + 1.96 * std_dev);
88        let three_sigma_limits = (mean - 3.0 * std_dev, mean + 3.0 * std_dev);
89
90        // 品質管理指標
91        let within_1_sigma_percent = calculate_within_sigma_percent(numbers, mean, std_dev, 1.0);
92        let within_2_sigma_percent = calculate_within_sigma_percent(numbers, mean, std_dev, 2.0);
93        let within_3_sigma_percent = calculate_within_sigma_percent(numbers, mean, std_dev, 3.0);
94
95        // リスクレベル判定
96        let risk_level =
97            determine_risk_level(normality_score, &outliers_z_score, skewness, kurtosis);
98
99        Ok(NormalResult {
100            dataset_name,
101            numbers_analyzed,
102            risk_level,
103            mean,
104            std_dev,
105            variance,
106            skewness,
107            kurtosis,
108            shapiro_wilk_statistic: shapiro_result.0,
109            shapiro_wilk_p_value: shapiro_result.1,
110            anderson_darling_statistic: anderson_result.0,
111            anderson_darling_p_value: anderson_result.1,
112            kolmogorov_smirnov_statistic: ks_result.0,
113            kolmogorov_smirnov_p_value: ks_result.1,
114            normality_score,
115            qq_correlation,
116            distribution_quality,
117            outliers_z_score,
118            outliers_modified_z,
119            outliers_iqr,
120            mean_confidence_interval,
121            prediction_interval_95,
122            three_sigma_limits,
123            within_1_sigma_percent,
124            within_2_sigma_percent,
125            within_3_sigma_percent,
126        })
127    }
128}
129
130/// 平均値計算
131fn calculate_mean(numbers: &[f64]) -> f64 {
132    numbers.iter().sum::<f64>() / numbers.len() as f64
133}
134
135/// 分散計算
136fn calculate_variance(numbers: &[f64], mean: f64) -> f64 {
137    let sum_squared_diff: f64 = numbers.iter().map(|&x| (x - mean).powi(2)).sum();
138    sum_squared_diff / (numbers.len() - 1) as f64
139}
140
141/// 歪度計算(標準化第3次モーメント)
142fn calculate_skewness(numbers: &[f64], mean: f64, std_dev: f64) -> f64 {
143    let n = numbers.len() as f64;
144    let sum_cubed: f64 = numbers
145        .iter()
146        .map(|&x| ((x - mean) / std_dev).powi(3))
147        .sum();
148
149    sum_cubed / n
150}
151
152/// 尖度計算(標準化第4次モーメント - 3)
153fn calculate_kurtosis(numbers: &[f64], mean: f64, std_dev: f64) -> f64 {
154    let n = numbers.len() as f64;
155    let sum_fourth: f64 = numbers
156        .iter()
157        .map(|&x| ((x - mean) / std_dev).powi(4))
158        .sum();
159
160    (sum_fourth / n) - 3.0 // 正規分布の尖度3を基準とした超過尖度
161}
162
163/// Shapiro-Wilk検定(簡易版)
164fn shapiro_wilk_test(numbers: &[f64]) -> (f64, f64) {
165    let n = numbers.len();
166    if !(8..=5000).contains(&n) {
167        return (0.0, 1.0); // 適用範囲外
168    }
169
170    let mut sorted = numbers.to_vec();
171    sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
172
173    // 簡易実装: 分位数相関係数を使用
174    let qq_corr = calculate_qq_correlation(
175        &sorted,
176        calculate_mean(&sorted),
177        calculate_variance(&sorted, calculate_mean(&sorted)).sqrt(),
178    );
179    let w_statistic = qq_corr * qq_corr;
180
181    // 簡易p値推定(厳密な計算は複雑な積分が必要)
182    let p_value = if w_statistic > 0.95 {
183        0.5
184    } else if w_statistic > 0.90 {
185        0.1
186    } else if w_statistic > 0.80 {
187        0.01
188    } else {
189        0.001
190    };
191
192    (w_statistic, p_value)
193}
194
195/// Anderson-Darling検定(簡易版)
196fn anderson_darling_test(numbers: &[f64], mean: f64, std_dev: f64) -> (f64, f64) {
197    let mut sorted = numbers.to_vec();
198    sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
199
200    let n = sorted.len() as f64;
201    let mut a_squared = 0.0;
202
203    for (i, &x) in sorted.iter().enumerate() {
204        let z = (x - mean) / std_dev;
205        let phi = standard_normal_cdf(z);
206        let phi_complement = 1.0 - standard_normal_cdf(-z);
207
208        if phi > 0.0 && phi < 1.0 && phi_complement > 0.0 {
209            a_squared += (2.0 * (i + 1) as f64 - 1.0) * (phi.ln() + phi_complement.ln()) / n;
210        }
211    }
212
213    a_squared = -n - a_squared;
214
215    // 簡易p値推定
216    let p_value = if a_squared < 0.5 {
217        0.5
218    } else if a_squared < 1.0 {
219        0.1
220    } else if a_squared < 2.0 {
221        0.01
222    } else {
223        0.001
224    };
225
226    (a_squared, p_value)
227}
228
229/// Kolmogorov-Smirnov検定(簡易版)
230fn kolmogorov_smirnov_test(numbers: &[f64], mean: f64, std_dev: f64) -> (f64, f64) {
231    let mut sorted = numbers.to_vec();
232    sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
233
234    let n = sorted.len() as f64;
235    let mut max_diff: f64 = 0.0;
236
237    for (i, &x) in sorted.iter().enumerate() {
238        let z = (x - mean) / std_dev;
239        let expected_cdf = standard_normal_cdf(z);
240        let empirical_cdf = (i + 1) as f64 / n;
241
242        let diff = (expected_cdf - empirical_cdf).abs();
243        max_diff = max_diff.max(diff);
244    }
245
246    // 簡易p値推定
247    let ks_critical = 1.36 / n.sqrt(); // α=0.05での臨界値
248    let p_value = if max_diff < ks_critical * 0.5 {
249        0.5
250    } else if max_diff < ks_critical {
251        0.1
252    } else if max_diff < ks_critical * 1.5 {
253        0.01
254    } else {
255        0.001
256    };
257
258    (max_diff, p_value)
259}
260
261/// 標準正規分布の累積分布関数(簡易実装)
262fn standard_normal_cdf(z: f64) -> f64 {
263    if z > 6.0 {
264        return 1.0;
265    }
266    if z < -6.0 {
267        return 0.0;
268    }
269
270    // Box-Muller変換の逆関数による近似
271    let t = 1.0 / (1.0 + 0.2316419 * z.abs());
272    let d = 0.3989423 * (-z * z / 2.0).exp();
273    let probability =
274        d * t * (0.3193815 + t * (-0.3565638 + t * (1.7814779 + t * (-1.8212560 + t * 1.3302744))));
275
276    if z >= 0.0 {
277        1.0 - probability
278    } else {
279        probability
280    }
281}
282
283/// Q-Q plot相関係数計算
284fn calculate_qq_correlation(numbers: &[f64], mean: f64, std_dev: f64) -> f64 {
285    let mut sorted = numbers.to_vec();
286    sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
287
288    let n = sorted.len();
289    let mut theoretical_quantiles = Vec::new();
290
291    for i in 0..n {
292        let p = (i + 1) as f64 / (n + 1) as f64;
293        let z = inverse_standard_normal(p);
294        theoretical_quantiles.push(mean + std_dev * z);
295    }
296
297    pearson_correlation(&sorted, &theoretical_quantiles)
298}
299
300/// 標準正規分布の逆関数(簡易実装)
301fn inverse_standard_normal(p: f64) -> f64 {
302    if p <= 0.0 {
303        return -6.0;
304    }
305    if p >= 1.0 {
306        return 6.0;
307    }
308
309    // Beasley-Springer-Moro algorithm の簡易版
310    let a = p - 0.5;
311    if a.abs() < 0.42 {
312        let r = a * a;
313        return a
314            * (((2.5066282 + r * 0.3001648) + r * 0.0010805)
315                / ((1.0 + r * 1.6372227) + r * 0.0312753));
316    }
317
318    let r = if a < 0.0 { p } else { 1.0 - p };
319    let s = (-r.ln()).sqrt();
320    let t = s
321        - ((2.515517 + s * 0.802853) + s * s * 0.010328)
322            / ((1.0 + s * 1.432788) + s * s * 0.189269);
323
324    if a < 0.0 {
325        -t
326    } else {
327        t
328    }
329}
330
331/// ピアソン相関係数計算
332fn pearson_correlation(x: &[f64], y: &[f64]) -> f64 {
333    if x.len() != y.len() {
334        return 0.0;
335    }
336
337    let n = x.len() as f64;
338    let mean_x = x.iter().sum::<f64>() / n;
339    let mean_y = y.iter().sum::<f64>() / n;
340
341    let mut numerator = 0.0;
342    let mut sum_x_sq = 0.0;
343    let mut sum_y_sq = 0.0;
344
345    for (&xi, &yi) in x.iter().zip(y.iter()) {
346        let dx = xi - mean_x;
347        let dy = yi - mean_y;
348        numerator += dx * dy;
349        sum_x_sq += dx * dx;
350        sum_y_sq += dy * dy;
351    }
352
353    let denominator = (sum_x_sq * sum_y_sq).sqrt();
354    if denominator == 0.0 {
355        0.0
356    } else {
357        numerator / denominator
358    }
359}
360
361/// 正規性総合スコア計算
362fn calculate_normality_score(sw_p: f64, ad_p: f64, ks_p: f64, qq_corr: f64) -> f64 {
363    // p値は高いほど正規分布に近い、相関係数も高いほど良い
364    let p_score = (sw_p * 0.4 + ad_p * 0.3 + ks_p * 0.3).min(1.0);
365    let corr_score = qq_corr.abs();
366
367    (p_score * 0.6 + corr_score * 0.4).clamp(0.0, 1.0)
368}
369
370/// 分布品質スコア計算
371fn calculate_distribution_quality(skewness: f64, kurtosis: f64, normality_score: f64) -> f64 {
372    // 歪度・尖度が0に近いほど良い(正規分布では歪度=0, 超過尖度=0)
373    let skew_score = 1.0 - (skewness.abs() / 2.0).min(1.0);
374    let kurt_score = 1.0 - (kurtosis.abs() / 3.0).min(1.0);
375
376    (normality_score * 0.5 + skew_score * 0.25 + kurt_score * 0.25).clamp(0.0, 1.0)
377}
378
379/// Z-scoreによる異常値検出
380fn detect_outliers_z_score(numbers: &[f64], mean: f64, std_dev: f64) -> Vec<(usize, f64, f64)> {
381    let mut outliers = Vec::new();
382    let threshold = 2.5; // 2.5σ以上を異常値とする
383
384    for (i, &value) in numbers.iter().enumerate() {
385        let z_score = (value - mean) / std_dev;
386        if z_score.abs() > threshold {
387            outliers.push((i, value, z_score));
388        }
389    }
390
391    outliers
392}
393
394/// 修正Z-scoreによる異常値検出
395fn detect_outliers_modified_z_score(numbers: &[f64]) -> Vec<(usize, f64, f64)> {
396    let median = calculate_median(numbers);
397    let mad = calculate_mad(numbers, median);
398    let mut outliers = Vec::new();
399    let threshold = 3.5; // 修正Z-scoreの閾値
400
401    for (i, &value) in numbers.iter().enumerate() {
402        let modified_z = 0.6745 * (value - median) / mad;
403        if modified_z.abs() > threshold {
404            outliers.push((i, value, modified_z));
405        }
406    }
407
408    outliers
409}
410
411/// IQR法による異常値検出
412fn detect_outliers_iqr(numbers: &[f64]) -> Vec<(usize, f64)> {
413    let mut sorted = numbers.to_vec();
414    sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
415
416    let n = sorted.len();
417    let q1 = sorted[n / 4];
418    let q3 = sorted[3 * n / 4];
419    let iqr = q3 - q1;
420    let lower_bound = q1 - 1.5 * iqr;
421    let upper_bound = q3 + 1.5 * iqr;
422
423    let mut outliers = Vec::new();
424    for (i, &value) in numbers.iter().enumerate() {
425        if value < lower_bound || value > upper_bound {
426            outliers.push((i, value));
427        }
428    }
429
430    outliers
431}
432
433/// 中央値計算
434fn calculate_median(numbers: &[f64]) -> f64 {
435    let mut sorted = numbers.to_vec();
436    sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
437    let n = sorted.len();
438
439    if n % 2 == 0 {
440        (sorted[n / 2 - 1] + sorted[n / 2]) / 2.0
441    } else {
442        sorted[n / 2]
443    }
444}
445
446/// MAD(Median Absolute Deviation)計算
447fn calculate_mad(numbers: &[f64], median: f64) -> f64 {
448    let deviations: Vec<f64> = numbers.iter().map(|&x| (x - median).abs()).collect();
449    calculate_median(&deviations)
450}
451
452/// 平均の信頼区間計算
453fn calculate_mean_confidence_interval(numbers: &[f64], mean: f64, std_dev: f64) -> (f64, f64) {
454    let n = numbers.len() as f64;
455    let se = std_dev / n.sqrt();
456    let t_critical = 1.96; // 大標本近似でt分布≈標準正規分布
457
458    (mean - t_critical * se, mean + t_critical * se)
459}
460
461/// σ範囲内データの割合計算
462fn calculate_within_sigma_percent(numbers: &[f64], mean: f64, std_dev: f64, sigma: f64) -> f64 {
463    let lower = mean - sigma * std_dev;
464    let upper = mean + sigma * std_dev;
465
466    let within_count = numbers
467        .iter()
468        .filter(|&&x| x >= lower && x <= upper)
469        .count();
470
471    (within_count as f64 / numbers.len() as f64) * 100.0
472}
473
474/// リスクレベル判定
475fn determine_risk_level(
476    normality_score: f64,
477    outliers: &[(usize, f64, f64)],
478    skewness: f64,
479    kurtosis: f64,
480) -> RiskLevel {
481    let outlier_ratio = outliers.len() as f64 / 100.0; // 仮の分母、実際は総データ数
482
483    if normality_score > 0.8 && outlier_ratio < 0.05 && skewness.abs() < 0.5 && kurtosis.abs() < 0.5
484    {
485        RiskLevel::Low
486    } else if normality_score > 0.6
487        && outlier_ratio < 0.1
488        && skewness.abs() < 1.0
489        && kurtosis.abs() < 1.0
490    {
491        RiskLevel::Medium
492    } else if normality_score > 0.3 && outlier_ratio < 0.2 {
493        RiskLevel::High
494    } else {
495        RiskLevel::Critical
496    }
497}
498
499#[cfg(test)]
500mod tests {
501    use super::*;
502
503    #[test]
504    fn test_normal_distribution() {
505        // 標準正規分布に近いデータ
506        let numbers = vec![
507            0.0, 0.5, -0.3, 1.2, -0.8, 0.2, -0.1, 0.9, -0.6, 0.7, -0.4, 0.3, 1.1, -0.9, 0.6, -0.2,
508            0.8, -0.5, 0.1, 0.4,
509        ];
510
511        let result = NormalResult::new("test".to_string(), &numbers).unwrap();
512
513        assert_eq!(result.numbers_analyzed, 20);
514        assert!((result.mean - 0.0).abs() < 0.3); // 平均は0に近い
515        assert!(result.std_dev > 0.0);
516        assert!(result.normality_score > 0.0);
517    }
518
519    #[test]
520    fn test_insufficient_data() {
521        let numbers = vec![1.0, 2.0, 3.0]; // 8個未満
522        let result = NormalResult::new("test".to_string(), &numbers);
523
524        assert!(result.is_err());
525    }
526
527    #[test]
528    fn test_outlier_detection() {
529        let numbers = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 100.0]; // 100.0が外れ値
530        let result = NormalResult::new("outlier_test".to_string(), &numbers).unwrap();
531
532        assert!(!result.outliers_z_score.is_empty());
533    }
534}