Skip to main content

ndarray_glm/response/
poisson.rs

1//! Model for Poisson regression
2
3#[cfg(feature = "stats")]
4use crate::response::Response;
5use crate::{
6    error::{RegressionError, RegressionResult},
7    glm::{DispersionType, Glm},
8    link::Link,
9    math::prod_log,
10    num::Float,
11    response::Yval,
12};
13use num_traits::{ToPrimitive, Unsigned};
14#[cfg(feature = "stats")]
15use statrs::distribution::Poisson as PoisDist;
16use std::marker::PhantomData;
17
18/// Poisson regression over an unsigned integer type.
19pub struct Poisson<L = link::Log>
20where
21    L: Link<Poisson<L>>,
22{
23    _link: PhantomData<L>,
24}
25
26/// Poisson variables can be any unsigned integer.
27impl<U, L> Yval<Poisson<L>> for U
28where
29    U: Unsigned + ToPrimitive + ToString + Copy,
30    L: Link<Poisson<L>>,
31{
32    fn into_float<F: Float>(self) -> RegressionResult<F, F> {
33        F::from(self).ok_or_else(|| RegressionError::InvalidY(self.to_string()))
34    }
35}
36// TODO: A floating point response for Poisson might also be do-able.
37
38#[cfg(feature = "stats")]
39impl<L> Response for Poisson<L>
40where
41    L: Link<Poisson<L>>,
42{
43    type DistributionType = PoisDist;
44
45    fn get_distribution(mu: f64, _phi: f64) -> Self::DistributionType {
46        // NOTE: This will panic if mu <= 0. For canonical poisson that shouldn't be an issue, but
47        // clamp at the lowest positive value just to be safe and to not lose any precision.
48        PoisDist::new(mu.max(f64::MIN_POSITIVE)).unwrap()
49    }
50}
51
52impl<L> Glm for Poisson<L>
53where
54    L: Link<Poisson<L>>,
55{
56    type Link = L;
57    const DISPERSED: DispersionType = DispersionType::NoDispersion;
58
59    /// The logarithm of the partition function for Poisson is the exponential of the natural
60    /// parameter, which is the logarithm of the mean.
61    fn log_partition<F: Float>(nat_par: F) -> F {
62        num_traits::Float::exp(nat_par)
63    }
64
65    /// The variance of a Poisson variable is equal to its mean.
66    fn variance<F: Float>(mean: F) -> F {
67        mean
68    }
69
70    /// The saturation likelihood of the Poisson distribution is non-trivial.
71    /// It is equal to y * (log(y) - 1). We aren't including the normalization term B = -log(y!).
72    fn log_like_sat<F: Float>(y: F) -> F {
73        prod_log(y) - y
74    }
75}
76
77pub(crate) mod link {
78    //! Link functions for Poisson regression
79    use super::Poisson;
80    use crate::{
81        link::{Canonical, Link},
82        num::Float,
83    };
84
85    /// The canonical link function of the Poisson response is the logarithm.
86    pub struct Log {}
87    impl Canonical for Log {}
88    impl Link<Poisson<Log>> for Log {
89        fn func<F: Float>(y: F) -> F {
90            num_traits::Float::ln(y)
91        }
92        fn func_inv<F: Float>(lin_pred: F) -> F {
93            num_traits::Float::exp(lin_pred)
94        }
95    }
96}
97
98#[cfg(test)]
99mod tests {
100    use super::*;
101    use crate::{error::RegressionResult, model::ModelBuilder};
102    use approx::assert_abs_diff_eq;
103    use ndarray::{Array1, array};
104
105    #[test]
106    fn poisson_reg() -> RegressionResult<(), f64> {
107        let ln2 = f64::ln(2.);
108        let beta = array![0., ln2, -ln2];
109        let data_x = array![[1., 0.], [1., 1.], [0., 1.], [0., 1.]];
110        let data_y: Array1<u32> = array![2, 1, 0, 1];
111        let model = ModelBuilder::<Poisson>::data(&data_y, &data_x).build()?;
112        let fit = model.fit_options().max_iter(10).fit()?;
113        dbg!(fit.n_iter);
114        assert_abs_diff_eq!(beta, fit.result, epsilon = f32::EPSILON as f64);
115        Ok(())
116    }
117
118    #[test]
119    // Confirm log closure explicitly.
120    fn logit_closure() {
121        use super::link::Log;
122        use crate::link::TestLink;
123        // Because floats lose precision on difference from 1 relative to 0, higher values get
124        // mapped back to infinity under closure. This is sort of fundamental to the logit
125        // function and I'm not sure there's a good way around it.
126        let x = array![-500., -50., -2.0, -0.2, 0., 0.5, 20.];
127        Log::check_closure(&x);
128        let y = array![0., 1e-5, 0.25, 0.5, 0.8, 0.9999, 1.0];
129        Log::check_closure_y(&y);
130    }
131}