1use super::result::PoissonResult;
2use crate::error::Result;
3
4pub fn analyze_poisson_distribution(numbers: &[f64], dataset_name: &str) -> Result<PoissonResult> {
6 PoissonResult::new(dataset_name.to_string(), numbers)
7}
8
9pub fn test_poisson_fit(numbers: &[f64], test_type: PoissonTest) -> Result<PoissonTestResult> {
11 let result = PoissonResult::new("poisson_test".to_string(), numbers)?;
12
13 match test_type {
14 PoissonTest::ChiSquare => Ok(PoissonTestResult {
15 test_name: "Chi-Square Goodness of Fit".to_string(),
16 statistic: result.chi_square_statistic,
17 p_value: result.chi_square_p_value,
18 critical_value: 0.05,
19 is_poisson: result.chi_square_p_value > 0.05,
20 parameter_lambda: result.lambda,
21 }),
22 PoissonTest::KolmogorovSmirnov => Ok(PoissonTestResult {
23 test_name: "Kolmogorov-Smirnov".to_string(),
24 statistic: result.kolmogorov_smirnov_statistic,
25 p_value: result.kolmogorov_smirnov_p_value,
26 critical_value: 0.05,
27 is_poisson: result.kolmogorov_smirnov_p_value > 0.05,
28 parameter_lambda: result.lambda,
29 }),
30 PoissonTest::VarianceTest => {
31 let test_statistic = result.variance_ratio;
33 let p_value = variance_mean_ratio_p_value(test_statistic, numbers.len());
34
35 Ok(PoissonTestResult {
36 test_name: "Variance-to-Mean Ratio Test".to_string(),
37 statistic: test_statistic,
38 p_value,
39 critical_value: 0.05,
40 is_poisson: p_value > 0.05,
41 parameter_lambda: result.lambda,
42 })
43 }
44 PoissonTest::All => {
45 let overall_p = (result.chi_square_p_value + result.kolmogorov_smirnov_p_value) / 2.0;
47 let variance_p = variance_mean_ratio_p_value(result.variance_ratio, numbers.len());
48 let combined_p = (overall_p + variance_p) / 2.0;
49
50 Ok(PoissonTestResult {
51 test_name: "Combined Poisson Tests".to_string(),
52 statistic: result.goodness_of_fit_score,
53 p_value: combined_p,
54 critical_value: 0.05,
55 is_poisson: combined_p > 0.05,
56 parameter_lambda: result.lambda,
57 })
58 }
59 }
60}
61
62pub fn predict_event_probabilities(lambda: f64, max_events: u32) -> EventProbabilityResult {
64 let mut probabilities = Vec::new();
65 let mut cumulative_probabilities = Vec::new();
66 let mut cumulative = 0.0;
67
68 for k in 0..=max_events {
69 let prob = poisson_probability(k, lambda);
70 cumulative += prob;
71
72 probabilities.push(EventProbability {
73 event_count: k,
74 probability: prob,
75 cumulative_probability: cumulative,
76 });
77 cumulative_probabilities.push(cumulative);
78 }
79
80 EventProbabilityResult {
81 lambda,
82 max_events,
83 probabilities,
84 tail_probability: 1.0 - cumulative,
85 most_likely_count: find_mode(lambda),
86 expected_value: lambda,
87 variance: lambda,
88 }
89}
90
91pub fn analyze_rare_events(numbers: &[f64], lambda: f64) -> RareEventAnalysis {
93 let event_counts: Vec<u32> = numbers.iter().map(|&x| x as u32).collect();
94
95 let threshold_95 = poisson_quantile(0.95, lambda);
97 let threshold_99 = poisson_quantile(0.99, lambda);
98 let threshold_999 = poisson_quantile(0.999, lambda);
99
100 let rare_95 = event_counts.iter().filter(|&&x| x >= threshold_95).count();
101 let rare_99 = event_counts.iter().filter(|&&x| x >= threshold_99).count();
102 let rare_999 = event_counts.iter().filter(|&&x| x >= threshold_999).count();
103
104 let extreme_events: Vec<ExtremeEvent> = event_counts
105 .iter()
106 .enumerate()
107 .filter(|&(_, &count)| count >= threshold_99)
108 .map(|(index, &count)| ExtremeEvent {
109 index,
110 event_count: count,
111 probability: poisson_probability(count, lambda),
112 rarity_level: if count >= threshold_999 {
113 RarityLevel::ExtremelyRare
114 } else if count >= threshold_99 {
115 RarityLevel::VeryRare
116 } else {
117 RarityLevel::Rare
118 },
119 })
120 .collect();
121
122 RareEventAnalysis {
123 lambda,
124 total_observations: numbers.len(),
125 threshold_95,
126 threshold_99,
127 threshold_999,
128 rare_events_95: rare_95,
129 rare_events_99: rare_99,
130 rare_events_999: rare_999,
131 extreme_events,
132 expected_rare_99: (numbers.len() as f64 * 0.01) as usize,
133 clustering_detected: detect_clustering(&event_counts, threshold_99),
134 }
135}
136
137pub fn analyze_time_intervals(intervals: &[f64]) -> Result<TimeIntervalAnalysis> {
139 if intervals.len() < 5 {
140 return Err(crate::error::BenfError::InsufficientData(intervals.len()));
141 }
142
143 let mean_interval = intervals.iter().sum::<f64>() / intervals.len() as f64;
144 let lambda_estimate = 1.0 / mean_interval; let exponential_fit = test_exponential_fit(intervals, mean_interval);
148
149 let memoryless_test = test_memoryless_property(intervals);
151
152 let homogeneity_test = test_homogeneity(intervals);
154
155 Ok(TimeIntervalAnalysis {
156 mean_interval,
157 lambda_estimate,
158 exponential_fit_p_value: exponential_fit,
159 memoryless_p_value: memoryless_test,
160 homogeneity_p_value: homogeneity_test,
161 is_poisson_process: exponential_fit > 0.05
162 && memoryless_test > 0.05
163 && homogeneity_test > 0.05,
164 confidence_interval_lambda: calculate_lambda_ci_from_intervals(
165 lambda_estimate,
166 intervals.len(),
167 ),
168 })
169}
170
171fn poisson_probability(k: u32, lambda: f64) -> f64 {
174 if lambda <= 0.0 {
175 return if k == 0 { 1.0 } else { 0.0 };
176 }
177
178 let ln_prob = k as f64 * lambda.ln() - lambda - ln_factorial(k);
179 ln_prob.exp()
180}
181
182fn ln_factorial(n: u32) -> f64 {
183 if n <= 1 {
184 return 0.0;
185 }
186
187 if n > 10 {
188 let n_f = n as f64;
189 n_f * n_f.ln() - n_f + 0.5 * (2.0 * std::f64::consts::PI * n_f).ln()
190 } else {
191 (2..=n).map(|i| (i as f64).ln()).sum()
192 }
193}
194
195fn variance_mean_ratio_p_value(ratio: f64, sample_size: usize) -> f64 {
196 let test_statistic = (ratio - 1.0) * (sample_size as f64).sqrt();
199
200 let abs_stat = test_statistic.abs();
202 if abs_stat > 2.58 {
203 0.01
204 } else if abs_stat > 1.96 {
205 0.05
206 } else if abs_stat > 1.64 {
207 0.1
208 } else {
209 0.5
210 }
211}
212
213fn poisson_quantile(p: f64, lambda: f64) -> u32 {
214 let mut k = 0;
216 let mut cumulative = 0.0;
217
218 while cumulative < p {
219 cumulative += poisson_probability(k, lambda);
220 if cumulative < p {
221 k += 1;
222 }
223
224 if k > 1000 {
225 break;
227 }
228 }
229
230 k
231}
232
233fn find_mode(lambda: f64) -> u32 {
234 let floor_lambda = lambda.floor() as u32;
236 let prob_floor = poisson_probability(floor_lambda, lambda);
237 let prob_floor_plus1 = poisson_probability(floor_lambda + 1, lambda);
238
239 if prob_floor >= prob_floor_plus1 {
240 floor_lambda
241 } else {
242 floor_lambda + 1
243 }
244}
245
246fn detect_clustering(event_counts: &[u32], threshold: u32) -> bool {
247 let mut consecutive_rare = 0;
249 let mut max_consecutive = 0;
250
251 for &count in event_counts {
252 if count >= threshold {
253 consecutive_rare += 1;
254 max_consecutive = max_consecutive.max(consecutive_rare);
255 } else {
256 consecutive_rare = 0;
257 }
258 }
259
260 max_consecutive >= 2
262}
263
264fn test_exponential_fit(intervals: &[f64], mean_interval: f64) -> f64 {
265 let mut sorted_intervals = intervals.to_vec();
268 sorted_intervals.sort_by(|a, b| a.partial_cmp(b).unwrap());
269
270 let n = sorted_intervals.len() as f64;
271 let mut max_diff: f64 = 0.0;
272
273 for (i, &interval) in sorted_intervals.iter().enumerate() {
274 let theoretical_cdf = 1.0 - (-interval / mean_interval).exp();
275 let empirical_cdf = (i + 1) as f64 / n;
276 let diff = (theoretical_cdf - empirical_cdf).abs();
277 max_diff = max_diff.max(diff);
278 }
279
280 let critical = 1.36 / n.sqrt();
282 if max_diff > critical {
283 0.01
284 } else {
285 0.1
286 }
287}
288
289fn test_memoryless_property(intervals: &[f64]) -> f64 {
290 if intervals.len() < 3 {
293 return 1.0;
294 }
295
296 let mut correlation_sum = 0.0;
297 let mean = intervals.iter().sum::<f64>() / intervals.len() as f64;
298
299 for i in 0..intervals.len() - 1 {
300 correlation_sum += (intervals[i] - mean) * (intervals[i + 1] - mean);
301 }
302
303 let abs_correlation = correlation_sum.abs() / intervals.len() as f64;
305 if abs_correlation < mean * 0.1 {
306 0.1
307 } else {
308 0.01
309 }
310}
311
312fn test_homogeneity(intervals: &[f64]) -> f64 {
313 let mid = intervals.len() / 2;
316 let first_half_mean = intervals[..mid].iter().sum::<f64>() / mid as f64;
317 let second_half_mean = intervals[mid..].iter().sum::<f64>() / (intervals.len() - mid) as f64;
318
319 let ratio = first_half_mean / second_half_mean;
320
321 if (ratio - 1.0).abs() < 0.2 {
323 0.1
324 } else {
325 0.01
326 }
327}
328
329fn calculate_lambda_ci_from_intervals(lambda: f64, n: usize) -> (f64, f64) {
330 let std_error = lambda / (n as f64).sqrt();
331 let margin = 1.96 * std_error;
332 ((lambda - margin).max(0.0), lambda + margin)
333}
334
335#[derive(Debug, Clone)]
339pub enum PoissonTest {
340 ChiSquare, KolmogorovSmirnov, VarianceTest, All, }
345
346#[derive(Debug, Clone)]
348pub struct PoissonTestResult {
349 pub test_name: String,
350 pub statistic: f64,
351 pub p_value: f64,
352 pub critical_value: f64,
353 pub is_poisson: bool,
354 pub parameter_lambda: f64,
355}
356
357#[derive(Debug, Clone)]
359pub struct EventProbability {
360 pub event_count: u32,
361 pub probability: f64,
362 pub cumulative_probability: f64,
363}
364
365#[derive(Debug, Clone)]
367pub struct EventProbabilityResult {
368 pub lambda: f64,
369 pub max_events: u32,
370 pub probabilities: Vec<EventProbability>,
371 pub tail_probability: f64,
372 pub most_likely_count: u32,
373 pub expected_value: f64,
374 pub variance: f64,
375}
376
377#[derive(Debug, Clone, PartialEq)]
379pub enum RarityLevel {
380 Rare, VeryRare, ExtremelyRare, }
384
385#[derive(Debug, Clone)]
387pub struct ExtremeEvent {
388 pub index: usize,
389 pub event_count: u32,
390 pub probability: f64,
391 pub rarity_level: RarityLevel,
392}
393
394#[derive(Debug, Clone)]
396pub struct RareEventAnalysis {
397 pub lambda: f64,
398 pub total_observations: usize,
399 pub threshold_95: u32,
400 pub threshold_99: u32,
401 pub threshold_999: u32,
402 pub rare_events_95: usize,
403 pub rare_events_99: usize,
404 pub rare_events_999: usize,
405 pub extreme_events: Vec<ExtremeEvent>,
406 pub expected_rare_99: usize,
407 pub clustering_detected: bool,
408}
409
410#[derive(Debug, Clone)]
412pub struct TimeIntervalAnalysis {
413 pub mean_interval: f64,
414 pub lambda_estimate: f64,
415 pub exponential_fit_p_value: f64,
416 pub memoryless_p_value: f64,
417 pub homogeneity_p_value: f64,
418 pub is_poisson_process: bool,
419 pub confidence_interval_lambda: (f64, f64),
420}
421
422#[cfg(test)]
423mod tests {
424 use super::*;
425
426 #[test]
427 fn test_poisson_probability() {
428 let lambda = 2.0;
429 let prob_0 = poisson_probability(0, lambda);
430 let prob_1 = poisson_probability(1, lambda);
431 let prob_2 = poisson_probability(2, lambda);
432
433 assert!((prob_0 - 0.135).abs() < 0.01);
435 assert!((prob_1 - 0.271).abs() < 0.01);
437 assert!((prob_2 - 0.271).abs() < 0.01);
439 }
440
441 #[test]
442 fn test_poisson_analysis() {
443 let numbers = vec![0.0, 1.0, 2.0, 1.0, 0.0, 3.0, 1.0, 2.0, 0.0, 1.0];
444 let result = analyze_poisson_distribution(&numbers, "test").unwrap();
445
446 assert_eq!(result.numbers_analyzed, 10);
447 assert!(result.lambda > 0.0);
448 assert!(result.sample_mean > 0.0);
449 }
450
451 #[test]
452 fn test_event_probability_prediction() {
453 let lambda = 1.5;
454 let result = predict_event_probabilities(lambda, 5);
455
456 assert_eq!(result.lambda, lambda);
457 assert_eq!(result.expected_value, lambda);
458 assert_eq!(result.variance, lambda);
459 assert_eq!(result.probabilities.len(), 6); }
461
462 #[test]
463 fn test_poisson_tests() {
464 let numbers = vec![0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 3.0, 0.0, 1.0];
465
466 let chi_result = test_poisson_fit(&numbers, PoissonTest::ChiSquare).unwrap();
467 assert_eq!(chi_result.test_name, "Chi-Square Goodness of Fit");
468 assert!(chi_result.parameter_lambda > 0.0);
469
470 let ks_result = test_poisson_fit(&numbers, PoissonTest::KolmogorovSmirnov).unwrap();
471 assert_eq!(ks_result.test_name, "Kolmogorov-Smirnov");
472 }
473}