Skip to main content

ndarray_glm/
glm.rs

1//! Trait defining a generalized linear model for common functionality.
2//! Models are fit such that $`<\mathbf{y}> = g^{-1}(\mathbf{X}*\mathbf{\beta})`$ where g is the
3//! link function.
4
5use crate::irls::IrlsStep;
6use crate::link::{Link, Transform};
7use crate::{
8    data::Dataset,
9    error::RegressionResult,
10    fit::{Fit, options::FitOptions},
11    irls::Irls,
12    model::Model,
13    num::Float,
14};
15use itertools::process_results;
16use ndarray::{Array1, Array2, ArrayRef2};
17use ndarray_linalg::SolveH;
18
19/// Whether the model's response has a free dispersion parameter (e.g. linear) or if it is fixed to
20/// one (e.g. logistic)
21pub enum DispersionType {
22    FreeDispersion,
23    NoDispersion,
24}
25
26/// Trait describing generalized linear model that enables the IRLS algorithm
27/// for fitting.
28pub trait Glm: Sized {
29    /// The link function type of the GLM instantiation. Implementations specify
30    /// this manually so that the provided methods can be called in this trait
31    /// without necessitating a trait parameter.
32    type Link: Link<Self>;
33
34    /// Registers whether the dispersion is fixed at one (e.g. logistic) or free (e.g. linear)
35    const DISPERSED: DispersionType;
36
37    /// The link function which maps the expected value of the response variable
38    /// to the linear predictor.
39    fn link<F: Float>(y: Array1<F>) -> Array1<F> {
40        y.mapv(Self::Link::func)
41    }
42
43    /// The inverse of the link function which maps the linear predictors to the
44    /// expected value of the prediction.
45    fn mean<F: Float>(lin_pred: &Array1<F>) -> Array1<F> {
46        lin_pred.mapv(Self::Link::func_inv)
47    }
48
49    /// The logarithm of the partition function $`A(\eta)`$ in terms of the natural parameter.
50    /// This can be used to calculate the normalized likelihood.
51    fn log_partition<F: Float>(nat_par: F) -> F;
52
53    /// The variance function $`V(\mu)`$, the second derivative of the log-partition function
54    /// with respect to the natural parameter:
55    ///
56    /// ```math
57    /// V(\mu) = \left.\frac{\partial^2 A(\eta)}{\partial \eta^2}\right|_{\eta = \eta(\mu)}
58    /// ```
59    ///
60    /// This is unique to each response family and does not depend on the link function.
61    fn variance<F: Float>(mean: F) -> F;
62
63    /// Get the full adjusted variance diagonal from the linear predictors directly
64    fn adjusted_variance_diag<F: Float>(lin_pred: &Array1<F>) -> Array1<F> {
65        // The prediction of y given the current model.
66        let predictor: Array1<F> = Self::mean(lin_pred);
67
68        // The variances predicted by the model.
69        let var_diag: Array1<F> = predictor.mapv(Self::variance);
70
71        Self::Link::adjust_variance(var_diag, lin_pred)
72    }
73
74    /// Returns the likelihood function summed over all observations.
75    fn log_like<F>(data: &Dataset<F>, regressors: &Array1<F>) -> F
76    where
77        F: Float,
78    {
79        // the total likelihood prior to regularization
80        let terms = Self::log_like_terms(data, regressors);
81        let weighted_terms = data.apply_total_weights(terms);
82        weighted_terms.sum()
83    }
84
85    /// Per-observation log-likelihood as a function of the response $`y`$ and natural
86    /// parameter $`\eta`$:
87    ///
88    /// ```math
89    /// l(y, \eta) = y\eta - A(\eta)
90    /// ```
91    ///
92    /// Terms depending only on $`y`$ are dropped. The dispersion parameter is taken to be 1, as
93    /// it does not affect the IRLS steps. The default implementation can be overwritten for
94    /// performance or numerical accuracy, but should be mathematically equivalent.
95    fn log_like_natural<F>(y: F, nat: F) -> F
96    where
97        F: Float,
98    {
99        // subtracting the saturated likelihood to keep the likelihood closer to
100        // zero, but this can complicate some fit statistics. In addition to
101        // causing some null likelihood tests to fail as written, it would make
102        // the current deviance calculation incorrect.
103        y * nat - Self::log_partition(nat)
104    }
105
106    /// Returns the likelihood of a saturated model where every observation can
107    /// be fit exactly.
108    fn log_like_sat<F>(y: F) -> F
109    where
110        F: Float;
111
112    /// Returns the log-likelihood contributions for each observable given the regressor values.
113    /// It's assumed that these regresssors are in the same standardization scale as the dataset.
114    fn log_like_terms<F>(data: &Dataset<F>, regressors: &Array1<F>) -> Array1<F>
115    where
116        F: Float,
117    {
118        let lin_pred = data.linear_predictor_std(regressors);
119        let nat_par = Self::Link::nat_param(lin_pred);
120        // the likelihood prior to regularization
121        ndarray::Zip::from(&data.y)
122            .and(&nat_par)
123            .map_collect(|&y, &eta| Self::log_like_natural(y, eta))
124    }
125
126    /// Provide an initial guess for the parameters. This can be overridden
127    /// but this should provide a decent general starting point. The y data is
128    /// averaged with its mean to prevent infinities resulting from application
129    /// of the link function:
130    /// X * beta_0 ~ g(0.5*(y + y_avg))
131    /// This is equivalent to minimizing half the sum of squared differences
132    /// between X*beta and g(0.5*(y + y_avg)).
133    fn init_guess<F>(data: &Dataset<F>) -> Array1<F>
134    where
135        F: Float,
136        ArrayRef2<F>: SolveH<F>,
137    {
138        let y_bar: F = data.y.mean().unwrap_or_else(F::zero);
139        let mu_y: Array1<F> = data.y.mapv(|y| F::half() * (y + y_bar));
140        let link_y = mu_y.mapv(Self::Link::func);
141        // Compensate for linear offsets if they are present
142        let link_y: Array1<F> = if let Some(off) = &data.linear_offset {
143            &link_y - off
144        } else {
145            link_y
146        };
147        let x_mat: Array2<F> = data.x_conj().dot(&data.x);
148        let init_guess: Array1<F> = x_mat
149            .solveh_into(data.x_conj().dot(&link_y))
150            .unwrap_or_else(|err| {
151                eprintln!("WARNING: failed to get initial guess for IRLS. Will begin at zero.");
152                eprintln!("{err}");
153                Array1::<F>::zeros(data.x.ncols())
154            });
155        init_guess
156    }
157
158    /// Do the regression and return a result. Returns object holding fit result.
159    fn regression<F>(
160        model: &Model<Self, F>,
161        options: FitOptions<F>,
162    ) -> RegressionResult<Fit<'_, Self, F>, F>
163    where
164        F: Float,
165        Self: Sized,
166    {
167        let initial: Array1<F> = options
168            .init_guess
169            .clone()
170            // The guess is given in external space, so it should be mapped to the internal before
171            // passing to IRLS.
172            .map(|b| model.data.transform_beta(b))
173            .unwrap_or_else(|| Self::init_guess(&model.data));
174
175        let mut irls: Irls<Self, F> = Irls::new(model, initial, options);
176
177        // Collect the best guess along the way so we don't have to loop back through to make sure
178        // we've got the optimum. The full history is stored in the IRLS structure.
179        let optimum: IrlsStep<F> = process_results(irls.by_ref(), |iter| {
180            iter.max_by(|a, b| a.like.total_cmp(&b.like))
181        })?
182        .unwrap_or_else(|| irls.make_last_step());
183
184        Ok(Fit::new(&model.data, optimum, irls))
185    }
186}
187
188#[cfg(test)]
189mod tests {
190    use crate::{Linear, ModelBuilder};
191    use anyhow::Result;
192    use ndarray::{Array1, Array2};
193
194    /// Start with a trivially optimized model and make sure that the iteration returns a value
195    #[test]
196    fn returns_at_least_one() -> Result<()> {
197        let data_y = Array1::<f64>::zeros(4);
198        let data_x = Array2::<f64>::zeros((4, 2));
199        let model = ModelBuilder::<Linear>::data(&data_y, &data_x).build()?;
200        let fit = model.fit_options().l2_reg(1e-6).fit()?;
201        // These should be exactly equal, not approximately.
202        assert_eq!(&fit.result, Array1::<f64>::zeros(3));
203        Ok(())
204    }
205}