ngboost_rs/dist/
poisson.rs

1use crate::dist::{Distribution, DistributionMethods, RegressionDistn};
2use crate::scores::{CRPScore, LogScore, Scorable};
3use ndarray::{array, Array1, Array2, Array3};
4use rand::prelude::*;
5use statrs::distribution::{Discrete, DiscreteCDF, Poisson as PoissonDist};
6
7/// The Poisson distribution.
8#[derive(Debug, Clone)]
9pub struct Poisson {
10    pub rate: Array1<f64>,
11    _params: Array2<f64>,
12}
13
14impl Distribution for Poisson {
15    fn from_params(params: &Array2<f64>) -> Self {
16        let rate = params.column(0).mapv(f64::exp);
17        Poisson {
18            rate,
19            _params: params.clone(),
20        }
21    }
22
23    fn fit(y: &Array1<f64>) -> Array1<f64> {
24        let mean = y.mean().unwrap_or(1.0).max(1e-6);
25        array![mean.ln()]
26    }
27
28    fn n_params(&self) -> usize {
29        1
30    }
31
32    fn predict(&self) -> Array1<f64> {
33        self.rate.clone()
34    }
35
36    fn params(&self) -> &Array2<f64> {
37        &self._params
38    }
39}
40
41impl RegressionDistn for Poisson {}
42
43impl DistributionMethods for Poisson {
44    fn mean(&self) -> Array1<f64> {
45        // Mean of Poisson is rate (lambda)
46        self.rate.clone()
47    }
48
49    fn variance(&self) -> Array1<f64> {
50        // Variance of Poisson is also rate (lambda)
51        self.rate.clone()
52    }
53
54    fn std(&self) -> Array1<f64> {
55        self.rate.mapv(f64::sqrt)
56    }
57
58    fn pdf(&self, y: &Array1<f64>) -> Array1<f64> {
59        // PMF for Poisson (discrete, so we call it pdf for interface consistency)
60        let mut result = Array1::zeros(y.len());
61        for i in 0..y.len() {
62            let y_int = y[i].round() as u64;
63            if y[i] >= 0.0 {
64                if let Ok(d) = PoissonDist::new(self.rate[i]) {
65                    result[i] = d.pmf(y_int);
66                }
67            }
68        }
69        result
70    }
71
72    fn logpdf(&self, y: &Array1<f64>) -> Array1<f64> {
73        // Log PMF for Poisson
74        let mut result = Array1::zeros(y.len());
75        for i in 0..y.len() {
76            let y_int = y[i].round() as u64;
77            if y[i] >= 0.0 {
78                if let Ok(d) = PoissonDist::new(self.rate[i]) {
79                    result[i] = d.ln_pmf(y_int);
80                }
81            } else {
82                result[i] = f64::NEG_INFINITY;
83            }
84        }
85        result
86    }
87
88    fn cdf(&self, y: &Array1<f64>) -> Array1<f64> {
89        // CDF for Poisson: P(X <= y)
90        let mut result = Array1::zeros(y.len());
91        for i in 0..y.len() {
92            if y[i] < 0.0 {
93                result[i] = 0.0;
94            } else {
95                let y_floor = y[i].floor() as u64;
96                if let Ok(d) = PoissonDist::new(self.rate[i]) {
97                    result[i] = d.cdf(y_floor);
98                }
99            }
100        }
101        result
102    }
103
104    fn ppf(&self, q: &Array1<f64>) -> Array1<f64> {
105        // Inverse CDF (quantile function) for Poisson
106        // Returns the smallest integer k such that CDF(k) >= q
107        let mut result = Array1::zeros(q.len());
108        for i in 0..q.len() {
109            let q_clamped = q[i].clamp(0.0, 1.0 - 1e-15);
110            if let Ok(d) = PoissonDist::new(self.rate[i]) {
111                // Use inverse_cdf which returns the smallest k where P(X <= k) >= q
112                result[i] = d.inverse_cdf(q_clamped) as f64;
113            }
114        }
115        result
116    }
117
118    fn sample(&self, n_samples: usize) -> Array2<f64> {
119        let n_obs = self.rate.len();
120        let mut samples = Array2::zeros((n_samples, n_obs));
121        let mut rng = rand::rng();
122
123        for i in 0..n_obs {
124            let rate = self.rate[i];
125            for s in 0..n_samples {
126                // Use Knuth's algorithm for Poisson sampling
127                // This is more reliable than inverse CDF for the statrs implementation
128                let l = (-rate).exp();
129                let mut k = 0u64;
130                let mut p = 1.0_f64;
131
132                loop {
133                    k += 1;
134                    let u: f64 = rng.random();
135                    p *= u;
136                    if p <= l {
137                        break;
138                    }
139                }
140                samples[[s, i]] = (k - 1) as f64;
141            }
142        }
143        samples
144    }
145
146    fn median(&self) -> Array1<f64> {
147        // Median of Poisson is approximately rate - ln(2) + 1/(3*rate) for large rate
148        // For exact, use ppf(0.5)
149        let q = Array1::from_elem(self.rate.len(), 0.5);
150        self.ppf(&q)
151    }
152
153    fn mode(&self) -> Array1<f64> {
154        // Mode of Poisson is floor(rate)
155        // For integer rate, both floor(rate) and floor(rate)-1 are modes
156        self.rate.mapv(|r| r.floor())
157    }
158}
159
160impl Poisson {
161    /// Returns the probability mass function (PMF) at y.
162    /// This is an alias for pdf() but with a more appropriate name for discrete distributions.
163    pub fn pmf(&self, y: &Array1<f64>) -> Array1<f64> {
164        self.pdf(y)
165    }
166
167    /// Returns the log probability mass function at y.
168    pub fn ln_pmf(&self, y: &Array1<f64>) -> Array1<f64> {
169        self.logpdf(y)
170    }
171}
172
173impl Scorable<LogScore> for Poisson {
174    fn score(&self, y: &Array1<f64>) -> Array1<f64> {
175        let mut scores = Array1::zeros(y.len());
176        for (i, &y_i) in y.iter().enumerate() {
177            if let Ok(d) = PoissonDist::new(self.rate[i]) {
178                scores[i] = -d.ln_pmf(y_i.round() as u64);
179            } else {
180                scores[i] = f64::MAX;
181            }
182        }
183        scores
184    }
185
186    fn d_score(&self, y: &Array1<f64>) -> Array2<f64> {
187        let n_obs = y.len();
188        let mut d_params = Array2::zeros((n_obs, 1));
189
190        // d/d(log(rate)) of -log_pmf
191        // log_pmf = -rate + y*log(rate) - log(y!)
192        // d(-log_pmf)/d(log(rate)) = rate - y
193        let grad = &self.rate - y;
194
195        d_params.column_mut(0).assign(&grad);
196        d_params
197    }
198
199    fn metric(&self) -> Array3<f64> {
200        // Fisher Information for Poisson: rate
201        let n_obs = self.rate.len();
202        let mut fi = Array3::zeros((n_obs, 1, 1));
203
204        for i in 0..n_obs {
205            fi[[i, 0, 0]] = self.rate[i];
206        }
207
208        fi
209    }
210}
211
212impl Scorable<CRPScore> for Poisson {
213    fn score(&self, y: &Array1<f64>) -> Array1<f64> {
214        // CRPS for Poisson distribution (discrete CRPS)
215        // For discrete distributions:
216        // CRPS = E|X - y| - 0.5 * E|X - X'|
217        // where X, X' are independent draws from the Poisson distribution
218        //
219        // For Poisson with rate λ, there's a closed-form expression:
220        // CRPS = y * (2*F(y) - 1) - λ * (2*F(y-1) - 1) + 2*λ*f(floor(y)) - λ*exp(-2λ)*I_0(2λ)
221        // where F is CDF, f is PMF, and I_0 is modified Bessel function
222        //
223        // Simpler approximation using the identity:
224        // CRPS = 2 * Σ_{k=0}^{∞} (F(k) - 1{y ≤ k}) * (F(k) - 1{y ≤ k-1})
225        //      = Σ_{k=0}^{∞} (F(k) - 1{y ≤ k})²
226        //
227        // We compute this by summing over k values with significant probability mass
228
229        let mut scores = Array1::zeros(y.len());
230
231        for i in 0..y.len() {
232            let lambda = self.rate[i];
233            let y_i = y[i].round() as i64; // Round to nearest integer
234
235            if let Ok(d) = PoissonDist::new(lambda) {
236                // Compute CRPS using the discrete formula
237                // CRPS = Σ_{k=0}^{∞} (F(k) - 1{y ≤ k})²
238                // We truncate the sum when probability mass becomes negligible
239
240                let mut crps = 0.0;
241
242                // Determine range to sum over (cover most of the probability mass)
243                // Use mean ± 6*std for Poisson (covers >99.99% of mass)
244                let std_dev = lambda.sqrt();
245                let k_max = ((lambda + 6.0 * std_dev).ceil() as i64).max(y_i + 10);
246
247                for k in 0..=k_max {
248                    let f_k = d.cdf(k as u64);
249                    let indicator = if y_i <= k { 1.0 } else { 0.0 };
250                    let diff = f_k - indicator;
251                    crps += diff * diff;
252                }
253
254                scores[i] = crps;
255            } else {
256                scores[i] = f64::MAX;
257            }
258        }
259        scores
260    }
261
262    fn d_score(&self, y: &Array1<f64>) -> Array2<f64> {
263        // Gradient of discrete CRPS w.r.t. log(rate)
264        // Using numerical differentiation or analytical form
265        //
266        // d(CRPS)/d(log(λ)) = λ * d(CRPS)/d(λ)
267        //
268        // For Poisson CRPS, the gradient involves:
269        // d(F(k))/d(λ) = -f(k) + f(k-1) for k >= 1, and -f(0) for k=0
270        // where f is the PMF
271
272        let n_obs = y.len();
273        let mut d_params = Array2::zeros((n_obs, 1));
274
275        for i in 0..n_obs {
276            let lambda = self.rate[i];
277            let y_i = y[i].round() as i64;
278
279            if let Ok(d) = PoissonDist::new(lambda) {
280                let mut d_crps = 0.0;
281
282                let std_dev = lambda.sqrt();
283                let k_max = ((lambda + 6.0 * std_dev).ceil() as i64).max(y_i + 10);
284
285                for k in 0..=k_max {
286                    let f_k = d.cdf(k as u64);
287                    let indicator = if y_i <= k { 1.0 } else { 0.0 };
288                    let diff = f_k - indicator;
289
290                    // d(F(k))/d(λ) for Poisson:
291                    // F(k) = Γ(k+1, λ) / k! = Q(k+1, λ) (regularized incomplete gamma)
292                    // d(F(k))/d(λ) = -f(k) where f(k) is the PMF
293                    let pmf_k = d.pmf(k as u64);
294                    let df_dlambda = -pmf_k;
295
296                    d_crps += 2.0 * diff * df_dlambda;
297                }
298
299                // Convert to d/d(log(λ))
300                d_params[[i, 0]] = lambda * d_crps;
301            }
302        }
303
304        d_params
305    }
306
307    fn metric(&self) -> Array3<f64> {
308        // Metric for discrete CRPS
309        // Using an approximation based on the Poisson variance structure
310        let n_obs = self.rate.len();
311        let mut fi = Array3::zeros((n_obs, 1, 1));
312
313        // For Poisson CRPS, the metric scales with rate
314        // Approximation: metric ≈ 2 * sqrt(rate) / sqrt(pi)
315        for i in 0..n_obs {
316            let lambda = self.rate[i];
317            fi[[i, 0, 0]] = 2.0 * lambda.sqrt() / std::f64::consts::PI.sqrt();
318        }
319
320        fi
321    }
322}
323
324#[cfg(test)]
325mod tests {
326    use super::*;
327    use approx::assert_relative_eq;
328
329    #[test]
330    fn test_poisson_distribution_methods() {
331        let params = Array2::from_shape_vec((2, 1), vec![1.0_f64.ln(), 5.0_f64.ln()]).unwrap();
332        let dist = Poisson::from_params(&params);
333
334        // Test mean: rate
335        let mean = dist.mean();
336        assert_relative_eq!(mean[0], 1.0, epsilon = 1e-10);
337        assert_relative_eq!(mean[1], 5.0, epsilon = 1e-10);
338
339        // Test variance: rate
340        let var = dist.variance();
341        assert_relative_eq!(var[0], 1.0, epsilon = 1e-10);
342        assert_relative_eq!(var[1], 5.0, epsilon = 1e-10);
343
344        // Test mode: floor(rate) - but for integer rates, mode = rate
345        // Due to floating point, exp(ln(5)) ≈ 5 but floor might give 4
346        let mode = dist.mode();
347        assert_relative_eq!(mode[0], 1.0, epsilon = 1e-10);
348        // Mode for Poisson(5) is 4 or 5 (both valid), we use floor which may give 4
349        assert!(mode[1] == 4.0 || mode[1] == 5.0);
350    }
351
352    #[test]
353    fn test_poisson_cdf_ppf() {
354        let params = Array2::from_shape_vec((1, 1), vec![5.0_f64.ln()]).unwrap();
355        let dist = Poisson::from_params(&params);
356
357        // CDF at mean (5) should be around 0.616 for Poisson(5)
358        let y = Array1::from_vec(vec![5.0]);
359        let cdf = dist.cdf(&y);
360        assert!(cdf[0] > 0.5 && cdf[0] < 0.7);
361
362        // PPF at 0.5 should be around median
363        let q = Array1::from_vec(vec![0.5]);
364        let ppf = dist.ppf(&q);
365        assert!(ppf[0] >= 4.0 && ppf[0] <= 5.0);
366    }
367
368    #[test]
369    fn test_poisson_pmf() {
370        let params = Array2::from_shape_vec((1, 1), vec![2.0_f64.ln()]).unwrap();
371        let dist = Poisson::from_params(&params);
372
373        // PMF at 2 for Poisson(2): exp(-2) * 2^2 / 2! = exp(-2) * 2 ≈ 0.271
374        let y = Array1::from_vec(vec![2.0]);
375        let pmf = dist.pmf(&y);
376        let expected = (-2.0_f64).exp() * 4.0 / 2.0;
377        assert_relative_eq!(pmf[0], expected, epsilon = 1e-10);
378
379        // PMF at 0 for Poisson(2): exp(-2) ≈ 0.135
380        let y = Array1::from_vec(vec![0.0]);
381        let pmf = dist.pmf(&y);
382        assert_relative_eq!(pmf[0], (-2.0_f64).exp(), epsilon = 1e-10);
383    }
384
385    #[test]
386    fn test_poisson_sample() {
387        let params = Array2::from_shape_vec((1, 1), vec![3.0_f64.ln()]).unwrap();
388        let dist = Poisson::from_params(&params);
389
390        let samples = dist.sample(1000);
391        assert_eq!(samples.shape(), &[1000, 1]);
392
393        // All samples should be non-negative integers
394        assert!(samples.iter().all(|&x| x >= 0.0 && x.fract() == 0.0));
395
396        // Check that sample mean is close to rate = 3
397        let sample_mean: f64 = samples.column(0).iter().sum::<f64>() / samples.nrows() as f64;
398        assert!((sample_mean - 3.0).abs() < 0.5);
399    }
400
401    #[test]
402    fn test_poisson_fit() {
403        let y = Array1::from_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0]);
404        let params = Poisson::fit(&y);
405        assert_eq!(params.len(), 1);
406        // Mean of y is 3.0, so log(rate) should be log(3)
407        assert_relative_eq!(params[0], 3.0_f64.ln(), epsilon = 1e-10);
408    }
409
410    #[test]
411    fn test_poisson_logscore() {
412        let params = Array2::from_shape_vec((1, 1), vec![2.0_f64.ln()]).unwrap();
413        let dist = Poisson::from_params(&params);
414
415        let y = Array1::from_vec(vec![2.0]);
416        let score = Scorable::<LogScore>::score(&dist, &y);
417
418        // Score should be finite and positive
419        assert!(score[0].is_finite());
420        assert!(score[0] > 0.0);
421
422        // Score should equal -ln(pmf(2))
423        let expected = -dist.ln_pmf(&y)[0];
424        assert_relative_eq!(score[0], expected, epsilon = 1e-10);
425    }
426
427    #[test]
428    fn test_poisson_d_score() {
429        let params = Array2::from_shape_vec((1, 1), vec![2.0_f64.ln()]).unwrap();
430        let dist = Poisson::from_params(&params);
431
432        let y = Array1::from_vec(vec![2.0]);
433        let d_score = Scorable::<LogScore>::d_score(&dist, &y);
434
435        // d(-log_pmf)/d(log(rate)) = rate - y = 2 - 2 = 0
436        assert_relative_eq!(d_score[[0, 0]], 0.0, epsilon = 1e-10);
437
438        // For y = 1, d_score = 2 - 1 = 1
439        let y = Array1::from_vec(vec![1.0]);
440        let d_score = Scorable::<LogScore>::d_score(&dist, &y);
441        assert_relative_eq!(d_score[[0, 0]], 1.0, epsilon = 1e-10);
442    }
443
444    #[test]
445    fn test_poisson_metric() {
446        let params = Array2::from_shape_vec((1, 1), vec![3.0_f64.ln()]).unwrap();
447        let dist = Poisson::from_params(&params);
448
449        let metric = Scorable::<LogScore>::metric(&dist);
450        // Fisher Information for Poisson is rate
451        assert_relative_eq!(metric[[0, 0, 0]], 3.0, epsilon = 1e-10);
452    }
453
454    #[test]
455    fn test_poisson_median() {
456        let params = Array2::from_shape_vec((1, 1), vec![10.0_f64.ln()]).unwrap();
457        let dist = Poisson::from_params(&params);
458
459        // Median for Poisson(10) is around 10
460        let median = dist.median();
461        assert!(median[0] >= 9.0 && median[0] <= 10.0);
462    }
463
464    #[test]
465    fn test_poisson_interval() {
466        let params = Array2::from_shape_vec((1, 1), vec![5.0_f64.ln()]).unwrap();
467        let dist = Poisson::from_params(&params);
468
469        let (lower, upper) = dist.interval(0.1);
470        // 90% interval for Poisson(5) should roughly contain most probability
471        assert!(lower[0] >= 0.0);
472        assert!(upper[0] > lower[0]);
473        assert!(lower[0] <= 5.0);
474        assert!(upper[0] >= 5.0);
475    }
476
477    #[test]
478    fn test_poisson_cdf_bounds() {
479        let params = Array2::from_shape_vec((1, 1), vec![5.0_f64.ln()]).unwrap();
480        let dist = Poisson::from_params(&params);
481
482        // CDF at very large value should be close to 1
483        let y = Array1::from_vec(vec![100.0]);
484        let cdf = dist.cdf(&y);
485        assert!(cdf[0] > 0.999);
486
487        // CDF at negative value should be 0
488        let y_neg = Array1::from_vec(vec![-1.0]);
489        let cdf_neg = dist.cdf(&y_neg);
490        assert_relative_eq!(cdf_neg[0], 0.0, epsilon = 1e-10);
491    }
492
493    #[test]
494    fn test_poisson_crpscore() {
495        let params = Array2::from_shape_vec((1, 1), vec![5.0_f64.ln()]).unwrap(); // rate = 5
496        let dist = Poisson::from_params(&params);
497
498        let y = Array1::from_vec(vec![5.0]);
499        let score = Scorable::<CRPScore>::score(&dist, &y);
500
501        // CRPS should be finite and non-negative
502        assert!(score[0].is_finite());
503        assert!(score[0] >= 0.0);
504
505        // CRPS at the mean should be relatively small
506        assert!(score[0] < 1.0);
507    }
508
509    #[test]
510    fn test_poisson_crpscore_extreme() {
511        let params = Array2::from_shape_vec((1, 1), vec![5.0_f64.ln()]).unwrap(); // rate = 5
512        let dist = Poisson::from_params(&params);
513
514        // CRPS at extreme values should be larger
515        let y_low = Array1::from_vec(vec![0.0]);
516        let y_high = Array1::from_vec(vec![15.0]);
517        let y_mean = Array1::from_vec(vec![5.0]);
518
519        let score_low = Scorable::<CRPScore>::score(&dist, &y_low);
520        let score_high = Scorable::<CRPScore>::score(&dist, &y_high);
521        let score_mean = Scorable::<CRPScore>::score(&dist, &y_mean);
522
523        // Scores at extremes should be larger than at the mean
524        assert!(score_low[0] > score_mean[0]);
525        assert!(score_high[0] > score_mean[0]);
526    }
527
528    #[test]
529    fn test_poisson_crpscore_d_score() {
530        let params = Array2::from_shape_vec((1, 1), vec![3.0_f64.ln()]).unwrap();
531        let dist = Poisson::from_params(&params);
532
533        let y = Array1::from_vec(vec![3.0]);
534        let d_score = Scorable::<CRPScore>::d_score(&dist, &y);
535
536        // Gradient should be finite
537        assert!(d_score[[0, 0]].is_finite());
538    }
539
540    #[test]
541    fn test_poisson_crpscore_metric() {
542        let params = Array2::from_shape_vec((1, 1), vec![5.0_f64.ln()]).unwrap();
543        let dist = Poisson::from_params(&params);
544
545        let metric = Scorable::<CRPScore>::metric(&dist);
546
547        // Metric should be positive
548        assert!(metric[[0, 0, 0]] > 0.0);
549    }
550}