ndarray_glm/response/
linear.rs

1//! Functions for solving linear regression
2
3#[cfg(feature = "stats")]
4use crate::response::Response;
5use crate::{
6    error::{RegressionError, RegressionResult},
7    glm::{DispersionType, Glm},
8    link::Link,
9    num::Float,
10    response::Yval,
11};
12use num_traits::ToPrimitive;
13#[cfg(feature = "stats")]
14use statrs::distribution::Normal;
15use std::marker::PhantomData;
16
17/// Linear regression with constant variance (Ordinary least squares).
18pub struct Linear<L = link::Id>
19where
20    L: Link<Linear<L>>,
21{
22    _link: PhantomData<L>,
23}
24
25/// Allow all floating point types in the linear model.
26impl<Y, L> Yval<Linear<L>> for Y
27where
28    Y: Float + ToPrimitive + ToString,
29    L: Link<Linear<L>>,
30{
31    fn into_float<F: Float>(self) -> RegressionResult<F, F> {
32        F::from(self).ok_or_else(|| RegressionError::InvalidY(self.to_string()))
33    }
34}
35
36#[cfg(feature = "stats")]
37impl<L> Response for Linear<L>
38where
39    L: Link<Linear<L>>,
40{
41    type DistributionType = Normal;
42
43    fn get_distribution(mu: f64, phi: f64) -> Self::DistributionType {
44        // TODO: We should probably return an error instead of unwrap()-ing each of these
45        // distributions, because a sigma of zero is possible (e.g. in an underspecified model).
46        // The statrs errors aren't unified so we can't implement a simple #[from] for our error
47        // enum and will need to map_err in each implementation.
48        // Clipping works around these issues. Note that sigma ~ 1e-154.
49        let sigma = phi.max(f64::MIN_POSITIVE).sqrt();
50        Normal::new(mu, sigma).unwrap()
51    }
52}
53
54impl<L> Glm for Linear<L>
55where
56    L: Link<Linear<L>>,
57{
58    type Link = L;
59    const DISPERSED: DispersionType = DispersionType::FreeDispersion;
60
61    /// Logarithm of the partition function in terms of the natural parameter,
62    /// which is mu for OLS.
63    fn log_partition<F: Float>(nat_par: F) -> F {
64        let half = F::from(0.5).unwrap();
65        half * nat_par * nat_par
66    }
67
68    /// variance is not a function of the mean in OLS regression.
69    fn variance<F: Float>(_mean: F) -> F {
70        F::one()
71    }
72
73    /// The saturated model likelihood is 0.5*y^2 for each observation. Note
74    /// that if a sum of squares were used for the log-likelihood, this would be
75    /// zero.
76    fn log_like_sat<F: Float>(y: F) -> F {
77        // Only for linear regression does this identity hold.
78        Self::log_partition(y)
79    }
80}
81
82pub(crate) mod link {
83    //! Link functions for linear regression.
84    use super::*;
85    use crate::link::{Canonical, Link};
86
87    /// The identity link function, which is canonical for linear regression.
88    pub struct Id;
89    /// The identity is the canonical link function.
90    impl Canonical for Id {}
91    impl Link<Linear> for Id {
92        #[inline]
93        fn func<F: Float>(y: F) -> F {
94            y
95        }
96        #[inline]
97        fn func_inv<F: Float>(lin_pred: F) -> F {
98            lin_pred
99        }
100    }
101}
102
103#[cfg(test)]
104mod tests {
105    use super::Linear;
106    use crate::{error::RegressionResult, model::ModelBuilder};
107    use approx::assert_abs_diff_eq;
108    use ndarray::array;
109
110    #[test]
111    // Check closure, which should be trivial for linear as the link and natural parameters are
112    // just identity.
113    fn id_closure() {
114        use crate::link::TestLink;
115        // Define an assorted array of values over several orders of magnitude without any
116        // obvious exploitable patterns
117        let x = crate::array![
118            -1e5, -100., -13., -2.0, -1.0, -0.025, -0.001, 0., 0.001, 0.04, 1.0, 2.5, 17., 128.,
119            1e5
120        ];
121        super::link::Id::check_closure(&x);
122        super::link::Id::check_closure_y(&x);
123    }
124
125    #[test]
126    fn lin_reg() -> RegressionResult<(), f64> {
127        let beta = array![0.3, 1.2, -0.5];
128        let data_x = array![[-0.1, 0.2], [0.7, 0.5], [3.2, 0.1]];
129        // let data_x = array![[-0.1, 0.1], [0.7, -0.7], [3.2, -3.2]];
130        let data_y = array![
131            beta[0] + beta[1] * data_x[[0, 0]] + beta[2] * data_x[[0, 1]],
132            beta[0] + beta[1] * data_x[[1, 0]] + beta[2] * data_x[[1, 1]],
133            beta[0] + beta[1] * data_x[[2, 0]] + beta[2] * data_x[[2, 1]],
134        ];
135        let model = ModelBuilder::<Linear>::data(&data_y, &data_x).build()?;
136        let fit = model.fit_options().max_iter(10).fit()?;
137        // This is failing within the default tolerance
138        assert_abs_diff_eq!(beta, fit.result, epsilon = 64.0 * f64::EPSILON);
139        let _lr: f64 = fit.lr_test();
140        Ok(())
141    }
142}
ndarray_glm/response/linear.rs

ndarray_glm/response/
linear.rs