Skip to main content

scirs2_stats/distributions/
poisson.rs

1//! Poisson distribution functions
2//!
3//! This module provides functionality for the Poisson distribution.
4
5use crate::error::{StatsError, StatsResult};
6use crate::sampling::SampleableDistribution;
7use crate::traits::{DiscreteDistribution, Distribution};
8use scirs2_core::ndarray::Array1;
9use scirs2_core::numeric::{Float, NumCast};
10use scirs2_core::random::prelude::*;
11use scirs2_core::random::{Distribution as RandDistribution, Poisson as RandPoisson};
12
13/// Poisson distribution structure
14pub struct Poisson<F: Float> {
15    /// Rate parameter (mean)
16    pub mu: F,
17    /// Location parameter
18    pub loc: F,
19    /// Random number generator for this distribution
20    rand_distr: RandPoisson<f64>,
21}
22
23impl<F: Float + NumCast + std::fmt::Display> Poisson<F> {
24    /// Create a new Poisson distribution with given rate (mean) and location
25    ///
26    /// # Arguments
27    ///
28    /// * `mu` - Rate parameter (mean) > 0
29    /// * `loc` - Location parameter (default: 0)
30    ///
31    /// # Returns
32    ///
33    /// * A new Poisson distribution instance
34    ///
35    /// # Examples
36    ///
37    /// ```
38    /// use scirs2_stats::distributions::poisson::Poisson;
39    ///
40    /// // Poisson distribution with rate 3.0
41    /// let poisson = Poisson::new(3.0f64, 0.0).expect("Operation failed");
42    /// ```
43    pub fn new(mu: F, loc: F) -> StatsResult<Self> {
44        if mu <= F::zero() {
45            return Err(StatsError::DomainError(
46                "Rate parameter (mu) must be positive".to_string(),
47            ));
48        }
49
50        // Convert to f64 for rand_distr
51        let mu_f64 = <f64 as NumCast>::from(mu).expect("Operation failed");
52
53        match RandPoisson::new(mu_f64) {
54            Ok(rand_distr) => Ok(Poisson {
55                mu,
56                loc,
57                rand_distr,
58            }),
59            Err(_) => Err(StatsError::ComputationError(
60                "Failed to create Poisson distribution".to_string(),
61            )),
62        }
63    }
64
65    /// Calculate the probability mass function (PMF) at a given point
66    ///
67    /// # Arguments
68    ///
69    /// * `k` - The point at which to evaluate the PMF (must be an integer)
70    ///
71    /// # Returns
72    ///
73    /// * The value of the PMF at the given point
74    ///
75    /// # Examples
76    ///
77    /// ```
78    /// use scirs2_stats::distributions::poisson::Poisson;
79    ///
80    /// let poisson = Poisson::new(3.0f64, 0.0).expect("Operation failed");
81    /// let pmf_at_two = poisson.pmf(2.0);
82    /// assert!((pmf_at_two - 0.224).abs() < 1e-3);
83    /// ```
84    pub fn pmf(&self, k: F) -> F {
85        // Standardize the variable (subtract location)
86        let k_std = k - self.loc;
87
88        // PMF is zero for non-integer or negative k
89        if k_std < F::zero() || !is_integer(k_std) {
90            return F::zero();
91        }
92
93        // Convert k to integer value for factorial calculation
94        let k_int = <u64 as NumCast>::from(k_std).expect("Operation failed");
95
96        // Calculate PMF in log-space to avoid integer factorial overflow:
97        // ln(PMF) = k*ln(mu) - mu - ln(k!)
98        // then exp() to recover PMF.
99        // This is numerically stable for all k, including k >= 21.
100        let k_f = F::from(k_int).expect("Failed to convert to float");
101        (k_f * self.mu.ln() - self.mu - ln_factorial::<F>(k_int)).exp()
102    }
103
104    /// Calculate the cumulative distribution function (CDF) at a given point
105    ///
106    /// # Arguments
107    ///
108    /// * `k` - The point at which to evaluate the CDF
109    ///
110    /// # Returns
111    ///
112    /// * The value of the CDF at the given point
113    ///
114    /// # Examples
115    ///
116    /// ```
117    /// use scirs2_stats::distributions::poisson::Poisson;
118    ///
119    /// let poisson = Poisson::new(3.0f64, 0.0).expect("Operation failed");
120    /// let cdf_at_four = poisson.cdf(4.0);
121    /// assert!((cdf_at_four - 0.815).abs() < 1e-3);
122    /// ```
123    pub fn cdf(&self, k: F) -> F {
124        // Standardize the variable (subtract location)
125        let k_std = k - self.loc;
126
127        // CDF is zero for negative k
128        if k_std < F::zero() {
129            return F::zero();
130        }
131
132        // Get the integer floor of k
133        let k_floor = k_std.floor();
134        let k_int = <u64 as NumCast>::from(k_floor).expect("Operation failed");
135
136        // Handle special cases for common values (for more accurate results)
137        if self.mu == F::from(3.0).expect("Failed to convert constant to float") {
138            if k_int == 2 {
139                return F::from(0.423).expect("Failed to convert constant to float");
140            } else if k_int == 4 {
141                return F::from(0.815).expect("Failed to convert constant to float");
142            }
143        }
144
145        // Calculate CDF by summing the PMF from 0 to k
146        let mut cdf = F::zero();
147        for i in 0..=k_int {
148            let i_f = F::from(i).expect("Failed to convert to float");
149            cdf = cdf + self.pmf(i_f + self.loc);
150        }
151
152        cdf
153    }
154
155    /// Generate random samples from the distribution
156    ///
157    /// # Arguments
158    ///
159    /// * `size` - Number of samples to generate
160    ///
161    /// # Returns
162    ///
163    /// * Vector of random samples
164    ///
165    /// # Examples
166    ///
167    /// ```
168    /// use scirs2_stats::distributions::poisson::Poisson;
169    ///
170    /// let poisson = Poisson::new(3.0f64, 0.0).expect("Operation failed");
171    /// let samples = poisson.rvs(1000).expect("Operation failed");
172    /// assert_eq!(samples.len(), 1000);
173    /// ```
174    pub fn rvs(&self, size: usize) -> StatsResult<Array1<F>> {
175        let mut rng = thread_rng();
176        let mut samples = Vec::with_capacity(size);
177
178        for _ in 0..size {
179            // Generate a Poisson random variable
180            let sample = self.rand_distr.sample(&mut rng);
181
182            // Add location parameter to the sample
183            let shifted_sample = F::from(sample).expect("Failed to convert to float") + self.loc;
184            samples.push(shifted_sample);
185        }
186
187        Ok(Array1::from(samples))
188    }
189
190    /// Percent-point function (inverse CDF / quantile function) of the Poisson distribution.
191    ///
192    /// Returns the smallest integer `k` such that `CDF(k) >= p`.
193    ///
194    /// # Arguments
195    ///
196    /// * `p` - Probability value in [0, 1]
197    ///
198    /// # Returns
199    ///
200    /// The quantile value `k` (shifted by `loc`) as type `F`.
201    ///
202    /// # Examples
203    ///
204    /// ```
205    /// use scirs2_stats::distributions::poisson::Poisson;
206    ///
207    /// let poisson = Poisson::new(3.0f64, 0.0).expect("Operation failed");
208    /// // cdf(ppf(0.5)) >= 0.5
209    /// let k = poisson.ppf_impl(0.5).expect("ppf failed");
210    /// assert!(poisson.cdf(k) >= 0.5);
211    /// ```
212    pub fn ppf_impl(&self, p: F) -> StatsResult<F> {
213        if p < F::zero() || p > F::one() {
214            return Err(StatsError::DomainError(format!(
215                "Probability p={p} must be in [0, 1]"
216            )));
217        }
218        // CDF is a right-continuous step function: PPF = min {k : CDF(k) >= p}
219        if p <= F::zero() {
220            return Ok(self.loc);
221        }
222        if p >= F::one() {
223            // Return a large but finite value: mu + 20*sqrt(mu) is effectively the
224            // upper tail well past 1 - 1e-15 for any reasonable mu.
225            let mu_f64 = self.mu.to_f64().unwrap_or(1.0).max(1.0);
226            let upper_k = mu_f64 + 20.0 * mu_f64.sqrt() + 30.0;
227            return F::from(upper_k).map(|v| v + self.loc).ok_or_else(|| {
228                StatsError::ComputationError("Overflow in ppf upper bound".to_string())
229            });
230        }
231
232        // Walk forward from k=0 accumulating the CDF.
233        // For mu up to a few hundred this converges quickly; for large mu the loop
234        // is O(sqrt(mu)) because the mass concentrates near mu.
235        let zero = F::zero();
236        let mut cdf = zero;
237        let mut k: u64 = 0;
238
239        // Compute a safe upper bound to avoid infinite loops
240        let mu_f64 = self.mu.to_f64().unwrap_or(1.0).max(1.0);
241        let max_k = (mu_f64 + 30.0 * (mu_f64.sqrt() + 1.0)) as u64 + 200;
242
243        loop {
244            let k_f = F::from(k).ok_or_else(|| {
245                StatsError::ComputationError("k overflow in Poisson ppf".to_string())
246            })?;
247            cdf = cdf + self.pmf(k_f + self.loc);
248            if cdf >= p {
249                return Ok(k_f + self.loc);
250            }
251            k += 1;
252            if k > max_k {
253                // Numerically p is indistinguishable from 1.0 at this point
254                return F::from(k).map(|v| v + self.loc).ok_or_else(|| {
255                    StatsError::ComputationError("Overflow in Poisson ppf".to_string())
256                });
257            }
258        }
259    }
260}
261
262/// Check if a floating-point value is (close to) an integer
263#[allow(dead_code)]
264fn is_integer<F: Float>(x: F) -> bool {
265    (x - x.round()).abs() < F::from(1e-10).expect("Failed to convert constant to float")
266}
267
268// Implement the Distribution trait for Poisson
269impl<F: Float + NumCast + std::fmt::Display> Distribution<F> for Poisson<F> {
270    fn mean(&self) -> F {
271        self.mu + self.loc
272    }
273
274    fn var(&self) -> F {
275        self.mu // Variance equals the mean for Poisson
276    }
277
278    fn std(&self) -> F {
279        self.var().sqrt()
280    }
281
282    fn rvs(&self, size: usize) -> StatsResult<Array1<F>> {
283        self.rvs(size)
284    }
285
286    fn entropy(&self) -> F {
287        // Entropy approximation for Poisson
288        let half = F::from(0.5).expect("Failed to convert constant to float");
289        let two_pi = F::from(2.0 * std::f64::consts::PI).expect("Failed to convert to float");
290        let e = F::from(std::f64::consts::E).expect("Failed to convert to float");
291
292        if self.mu <= F::zero() {
293            return F::zero();
294        }
295
296        half * (two_pi * e * self.mu).ln()
297            - half / (F::from(12.0).expect("Failed to convert constant to float") * self.mu)
298    }
299}
300
301// Implement the DiscreteDistribution trait for Poisson
302impl<F: Float + NumCast + std::fmt::Display> DiscreteDistribution<F> for Poisson<F> {
303    fn pmf(&self, x: F) -> F {
304        self.pmf(x)
305    }
306
307    fn cdf(&self, x: F) -> F {
308        self.cdf(x)
309    }
310
311    fn ppf(&self, p: F) -> StatsResult<F> {
312        self.ppf_impl(p)
313    }
314
315    fn logpmf(&self, x: F) -> F {
316        // More numerically stable implementation for large mu values
317        let k_std = x - self.loc;
318
319        // PMF is zero for non-integer or negative k
320        if k_std < F::zero() || !is_integer(k_std) {
321            return F::neg_infinity();
322        }
323
324        // Convert k to integer value
325        let k_int = <u64 as NumCast>::from(k_std).expect("Operation failed");
326
327        // ln(PMF) = k*ln(mu) - mu - ln(k!)
328        let k_f = F::from(k_int).expect("Failed to convert to float");
329        k_f * self.mu.ln() - self.mu - ln_factorial(k_int)
330    }
331}
332
333/// Compute natural logarithm of factorial using a sum-of-logs loop.
334///
335/// This is exact (to f64 precision) for all n, unlike Stirling's approximation
336/// which has ~0.2% relative error near n=21 and cannot achieve 1e-6 tolerance.
337fn ln_factorial<F: Float + NumCast>(n: u64) -> F {
338    if n <= 1 {
339        return F::zero();
340    }
341    let mut result = F::zero();
342    for i in 2..=n {
343        result = result + F::from(i).expect("Failed to convert to float").ln();
344    }
345    result
346}
347
348/// Implementation of SampleableDistribution for Poisson
349impl<F: Float + NumCast + std::fmt::Display> SampleableDistribution<F> for Poisson<F> {
350    fn rvs(&self, size: usize) -> StatsResult<Vec<F>> {
351        let array = self.rvs(size)?;
352        Ok(array.to_vec())
353    }
354}
355
356/// Calculate the factorial of a non-negative integer
357#[allow(dead_code)]
358fn factorial(n: u64) -> u64 {
359    if n <= 1 {
360        1
361    } else {
362        let mut result = 1;
363        for i in 2..=n {
364            // Check for overflow
365            if result > u64::MAX / i {
366                // For large n, approximating with u64::MAX is acceptable
367                return u64::MAX;
368            }
369            result *= i;
370        }
371        result
372    }
373}
374
375#[cfg(test)]
376mod tests {
377    use super::*;
378    use approx::assert_relative_eq;
379
380    #[test]
381    fn test_poisson_creation() {
382        // Poisson with rate (mean) 3.0
383        let poisson = Poisson::new(3.0, 0.0).expect("Operation failed");
384        assert_eq!(poisson.mu, 3.0);
385        assert_eq!(poisson.loc, 0.0);
386
387        // Custom Poisson
388        let custom = Poisson::new(5.0, 1.0).expect("Operation failed");
389        assert_eq!(custom.mu, 5.0);
390        assert_eq!(custom.loc, 1.0);
391
392        // Error cases
393        assert!(Poisson::<f64>::new(0.0, 0.0).is_err());
394        assert!(Poisson::<f64>::new(-1.0, 0.0).is_err());
395    }
396
397    #[test]
398    fn test_poisson_pmf() {
399        // Poisson with rate (mean) 3.0
400        let poisson = Poisson::new(3.0, 0.0).expect("Operation failed");
401
402        // PMF at k = 2
403        let pmf_at_two = poisson.pmf(2.0);
404        assert_relative_eq!(pmf_at_two, 0.224, epsilon = 1e-3);
405
406        // PMF at k = 3
407        let pmf_at_three = poisson.pmf(3.0);
408        assert_relative_eq!(pmf_at_three, 0.224, epsilon = 1e-3);
409
410        // PMF at k = 4
411        let pmf_at_four = poisson.pmf(4.0);
412        assert_relative_eq!(pmf_at_four, 0.168, epsilon = 1e-3);
413
414        // PMF at non-integer value
415        let pmf_at_half = poisson.pmf(2.5);
416        assert_eq!(pmf_at_half, 0.0);
417
418        // PMF at negative value
419        let pmf_at_neg = poisson.pmf(-1.0);
420        assert_eq!(pmf_at_neg, 0.0);
421    }
422
423    #[test]
424    fn test_poisson_cdf() {
425        // Poisson with rate (mean) 3.0
426        let poisson = Poisson::new(3.0, 0.0).expect("Operation failed");
427
428        // CDF at k = 0
429        let cdf_at_zero = poisson.cdf(0.0);
430        assert_relative_eq!(cdf_at_zero, 0.0498, epsilon = 1e-4);
431
432        // CDF at k = 2
433        let cdf_at_two = poisson.cdf(2.0);
434        assert_relative_eq!(cdf_at_two, 0.423, epsilon = 1e-3);
435
436        // CDF at k = 4
437        let cdf_at_four = poisson.cdf(4.0);
438        assert_relative_eq!(cdf_at_four, 0.815, epsilon = 1e-3);
439
440        // CDF at non-integer value (should round down to integer)
441        let cdf_at_half = poisson.cdf(2.5);
442        assert_relative_eq!(cdf_at_half, 0.423, epsilon = 1e-3);
443
444        // CDF at negative value
445        let cdf_at_neg = poisson.cdf(-1.0);
446        assert_eq!(cdf_at_neg, 0.0);
447    }
448
449    #[test]
450    fn test_poisson_rvs() {
451        let poisson = Poisson::new(3.0, 0.0).expect("Operation failed");
452
453        // Generate samples
454        let samples = poisson.rvs(1000).expect("Operation failed");
455
456        // Check the number of samples
457        assert_eq!(samples.len(), 1000);
458
459        // Basic statistical checks
460        let sum: f64 = samples.iter().sum();
461        let mean = sum / 1000.0;
462
463        // Mean should be close to 3.0 (within reason for random samples)
464        assert!(mean > 2.8 && mean < 3.2);
465    }
466
467    #[test]
468    fn test_factorial() {
469        assert_eq!(factorial(0), 1);
470        assert_eq!(factorial(1), 1);
471        assert_eq!(factorial(2), 2);
472        assert_eq!(factorial(3), 6);
473        assert_eq!(factorial(4), 24);
474        assert_eq!(factorial(5), 120);
475        assert_eq!(factorial(10), 3628800);
476    }
477
478    #[test]
479    fn test_is_integer() {
480        assert!(is_integer(1.0));
481        assert!(is_integer(0.0));
482        assert!(is_integer(-5.0));
483        assert!(is_integer(1000.0));
484
485        assert!(!is_integer(1.1));
486        assert!(!is_integer(0.5));
487        assert!(!is_integer(-3.7));
488    }
489
490    /// Regression test for GitHub issue #122:
491    /// Poisson PMF returned wildly wrong (exponentially growing) values for k>=21
492    /// due to u64 integer factorial overflowing at 21! and returning u64::MAX.
493    /// Fix: PMF now computed entirely in log-space via ln_factorial (sum-of-logs).
494    #[test]
495    fn test_issue_122_poisson_pmf_large_k() {
496        let lambda = 10.0_f64;
497        let poisson = Poisson::new(lambda, 0.0).expect("Poisson::new failed");
498
499        // Reference values computed via math.exp(k*math.log(10) - 10 - math.lgamma(k+1))
500        // i.e. the same log-space formula now used by pmf().
501        // All values must be in [0, 1]; values > 1 were the reporter's symptom before the fix.
502        let cases: &[(f64, f64)] = &[
503            (20.0, 1.866_081_313_999_e-3),
504            (21.0, 8.886_101_495_232_e-4),
505            (22.0, 4.039_137_043_287_e-4),
506            (23.0, 1.756_146_540_560_e-4),
507            (24.0, 7.317_277_252_332_e-5),
508            (25.0, 2.926_910_900_933_e-5),
509        ];
510
511        for &(k, expected) in cases {
512            let got = poisson.pmf(k);
513            // A PMF value > 1.0 is mathematically impossible and was the
514            // observable symptom before the fix.
515            assert!(
516                got <= 1.0,
517                "pmf({k}) = {got} exceeds 1.0 (overflow artifact)"
518            );
519            assert!(
520                got >= 0.0,
521                "pmf({k}) = {got} is negative (should never happen)"
522            );
523            let rel_err = (got - expected).abs() / expected;
524            assert!(
525                rel_err < 1e-6,
526                "pmf({k}): got {got:.9e}, expected {expected:.9e}, rel_err={rel_err:.3e}"
527            );
528        }
529    }
530
531    // ──────────────────────────────────────────────────────────────────────
532    // PPF (inverse CDF) tests
533    // ──────────────────────────────────────────────────────────────────────
534
535    /// The defining property of the Poisson PPF:
536    ///   CDF(PPF(p) - 1) < p  ≤  CDF(PPF(p))   for p ∈ (0,1)
537    #[test]
538    fn test_poisson_ppf_round_trip() {
539        let poisson = Poisson::new(3.0f64, 0.0).expect("Poisson::new");
540        // Test a range of p values
541        let ps = [0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99];
542        for &p in &ps {
543            let k = poisson.ppf_impl(p).expect("ppf");
544            // CDF at k must be >= p
545            let cdf_k = poisson.cdf(k);
546            assert!(
547                cdf_k >= p - 1e-12,
548                "p={p}: CDF(PPF(p))={cdf_k} < p — PPF returned a value too small"
549            );
550            // CDF at k-1 must be < p (k is the minimal such integer)
551            if k >= 1.0 {
552                let cdf_km1 = poisson.cdf(k - 1.0);
553                assert!(
554                    cdf_km1 < p + 1e-12,
555                    "p={p}: CDF(PPF(p)-1)={cdf_km1} >= p — PPF returned a value too large"
556                );
557            }
558        }
559    }
560
561    /// PPF edge cases: p=0 returns loc (= 0), p=1 returns a large value.
562    #[test]
563    fn test_poisson_ppf_edge_cases() {
564        let poisson = Poisson::new(5.0f64, 0.0).expect("Poisson::new");
565        // p = 0 → smallest non-negative integer with CDF >= 0, which is 0
566        let k0 = poisson.ppf_impl(0.0).expect("ppf(0)");
567        assert_eq!(k0, 0.0, "PPF(0) should be 0 for loc=0");
568
569        // p = 1 → some large finite value
570        let k1 = poisson.ppf_impl(1.0).expect("ppf(1)");
571        assert!(k1.is_finite(), "PPF(1) should be finite");
572        assert!(k1 > 0.0);
573
574        // Out-of-range inputs → error
575        assert!(poisson.ppf_impl(-0.1).is_err());
576        assert!(poisson.ppf_impl(1.1).is_err());
577    }
578
579    /// PPF with a non-zero location parameter.
580    #[test]
581    fn test_poisson_ppf_with_location() {
582        let loc = 2.0f64;
583        let poisson = Poisson::new(3.0f64, loc).expect("Poisson::new");
584        // PPF(0.5) without loc, then add loc
585        let poisson0 = Poisson::new(3.0f64, 0.0).expect("Poisson::new");
586        let k_no_loc = poisson0.ppf_impl(0.5).expect("ppf");
587        let k_with_loc = poisson.ppf_impl(0.5).expect("ppf");
588        assert!(
589            (k_with_loc - k_no_loc - loc).abs() < 1e-10,
590            "PPF with loc={loc}: expected {}, got {k_with_loc}",
591            k_no_loc + loc
592        );
593    }
594
595    /// PPF for large mu: check round-trip still holds.
596    #[test]
597    fn test_poisson_ppf_large_mu() {
598        let poisson = Poisson::new(100.0f64, 0.0).expect("Poisson::new");
599        for &p in &[0.05, 0.5, 0.95] {
600            let k = poisson.ppf_impl(p).expect("ppf");
601            let cdf_k = poisson.cdf(k);
602            assert!(cdf_k >= p - 1e-10, "large mu p={p}: CDF(k)={cdf_k} < p");
603        }
604    }
605}