ndarray_glm/glm.rs
1//! Trait defining a generalized linear model for common functionality.
2//! Models are fit such that $`<\mathbf{y}> = g^{-1}(\mathbf{X}*\mathbf{\beta})`$ where g is the
3//! link function.
4
5use crate::irls::IrlsStep;
6use crate::link::{Link, Transform};
7use crate::{
8 data::Dataset,
9 error::RegressionResult,
10 fit::{Fit, options::FitOptions},
11 irls::Irls,
12 model::Model,
13 num::Float,
14};
15use itertools::process_results;
16use ndarray::{Array1, Array2, ArrayRef2};
17use ndarray_linalg::SolveH;
18
19/// Whether the model's response has a free dispersion parameter (e.g. linear) or if it is fixed to
20/// one (e.g. logistic)
21pub enum DispersionType {
22 FreeDispersion,
23 NoDispersion,
24}
25
26/// Trait describing generalized linear model that enables the IRLS algorithm
27/// for fitting.
28pub trait Glm: Sized {
29 /// The link function type of the GLM instantiation. Implementations specify
30 /// this manually so that the provided methods can be called in this trait
31 /// without necessitating a trait parameter.
32 type Link: Link<Self>;
33
34 /// Registers whether the dispersion is fixed at one (e.g. logistic) or free (e.g. linear)
35 const DISPERSED: DispersionType;
36
37 /// The link function which maps the expected value of the response variable
38 /// to the linear predictor.
39 fn link<F: Float>(y: Array1<F>) -> Array1<F> {
40 y.mapv(Self::Link::func)
41 }
42
43 /// The inverse of the link function which maps the linear predictors to the
44 /// expected value of the prediction.
45 fn mean<F: Float>(lin_pred: &Array1<F>) -> Array1<F> {
46 lin_pred.mapv(Self::Link::func_inv)
47 }
48
49 /// The logarithm of the partition function $`A(\eta)`$ in terms of the natural parameter.
50 /// This can be used to calculate the normalized likelihood.
51 fn log_partition<F: Float>(nat_par: F) -> F;
52
53 /// The variance function $`V(\mu)`$, the second derivative of the log-partition function
54 /// with respect to the natural parameter:
55 ///
56 /// ```math
57 /// V(\mu) = \left.\frac{\partial^2 A(\eta)}{\partial \eta^2}\right|_{\eta = \eta(\mu)}
58 /// ```
59 ///
60 /// This is unique to each response family and does not depend on the link function.
61 fn variance<F: Float>(mean: F) -> F;
62
63 /// Get the full adjusted variance diagonal from the linear predictors directly
64 fn adjusted_variance_diag<F: Float>(lin_pred: &Array1<F>) -> Array1<F> {
65 // The prediction of y given the current model.
66 let predictor: Array1<F> = Self::mean(lin_pred);
67
68 // The variances predicted by the model.
69 let var_diag: Array1<F> = predictor.mapv(Self::variance);
70
71 Self::Link::adjust_variance(var_diag, lin_pred)
72 }
73
74 /// Returns the likelihood function summed over all observations.
75 fn log_like<F>(data: &Dataset<F>, regressors: &Array1<F>) -> F
76 where
77 F: Float,
78 {
79 // the total likelihood prior to regularization
80 let terms = Self::log_like_terms(data, regressors);
81 let weighted_terms = data.apply_total_weights(terms);
82 weighted_terms.sum()
83 }
84
85 /// Per-observation log-likelihood as a function of the response $`y`$ and natural
86 /// parameter $`\eta`$:
87 ///
88 /// ```math
89 /// l(y, \eta) = y\eta - A(\eta)
90 /// ```
91 ///
92 /// Terms depending only on $`y`$ are dropped. The dispersion parameter is taken to be 1, as
93 /// it does not affect the IRLS steps. The default implementation can be overwritten for
94 /// performance or numerical accuracy, but should be mathematically equivalent.
95 fn log_like_natural<F>(y: F, nat: F) -> F
96 where
97 F: Float,
98 {
99 // subtracting the saturated likelihood to keep the likelihood closer to
100 // zero, but this can complicate some fit statistics. In addition to
101 // causing some null likelihood tests to fail as written, it would make
102 // the current deviance calculation incorrect.
103 y * nat - Self::log_partition(nat)
104 }
105
106 /// Returns the likelihood of a saturated model where every observation can
107 /// be fit exactly.
108 fn log_like_sat<F>(y: F) -> F
109 where
110 F: Float;
111
112 /// Returns the log-likelihood contributions for each observable given the regressor values.
113 /// It's assumed that these regresssors are in the same standardization scale as the dataset.
114 fn log_like_terms<F>(data: &Dataset<F>, regressors: &Array1<F>) -> Array1<F>
115 where
116 F: Float,
117 {
118 let lin_pred = data.linear_predictor_std(regressors);
119 let nat_par = Self::Link::nat_param(lin_pred);
120 // the likelihood prior to regularization
121 ndarray::Zip::from(&data.y)
122 .and(&nat_par)
123 .map_collect(|&y, &eta| Self::log_like_natural(y, eta))
124 }
125
126 /// Provide an initial guess for the parameters. This can be overridden
127 /// but this should provide a decent general starting point. The y data is
128 /// averaged with its mean to prevent infinities resulting from application
129 /// of the link function:
130 /// X * beta_0 ~ g(0.5*(y + y_avg))
131 /// This is equivalent to minimizing half the sum of squared differences
132 /// between X*beta and g(0.5*(y + y_avg)).
133 fn init_guess<F>(data: &Dataset<F>) -> Array1<F>
134 where
135 F: Float,
136 ArrayRef2<F>: SolveH<F>,
137 {
138 let y_bar: F = data.y.mean().unwrap_or_else(F::zero);
139 let mu_y: Array1<F> = data.y.mapv(|y| F::half() * (y + y_bar));
140 let link_y = mu_y.mapv(Self::Link::func);
141 // Compensate for linear offsets if they are present
142 let link_y: Array1<F> = if let Some(off) = &data.linear_offset {
143 &link_y - off
144 } else {
145 link_y
146 };
147 let x_mat: Array2<F> = data.x_conj().dot(&data.x);
148 let init_guess: Array1<F> = x_mat
149 .solveh_into(data.x_conj().dot(&link_y))
150 .unwrap_or_else(|err| {
151 eprintln!("WARNING: failed to get initial guess for IRLS. Will begin at zero.");
152 eprintln!("{err}");
153 Array1::<F>::zeros(data.x.ncols())
154 });
155 init_guess
156 }
157
158 /// Do the regression and return a result. Returns object holding fit result.
159 fn regression<F>(
160 model: &Model<Self, F>,
161 options: FitOptions<F>,
162 ) -> RegressionResult<Fit<'_, Self, F>, F>
163 where
164 F: Float,
165 Self: Sized,
166 {
167 let initial: Array1<F> = options
168 .init_guess
169 .clone()
170 // The guess is given in external space, so it should be mapped to the internal before
171 // passing to IRLS.
172 .map(|b| model.data.transform_beta(b))
173 .unwrap_or_else(|| Self::init_guess(&model.data));
174
175 let mut irls: Irls<Self, F> = Irls::new(model, initial, options);
176
177 // Collect the best guess along the way so we don't have to loop back through to make sure
178 // we've got the optimum. The full history is stored in the IRLS structure.
179 let optimum: IrlsStep<F> = process_results(irls.by_ref(), |iter| {
180 iter.max_by(|a, b| a.like.total_cmp(&b.like))
181 })?
182 .unwrap_or_else(|| irls.make_last_step());
183
184 Ok(Fit::new(&model.data, optimum, irls))
185 }
186}
187
188#[cfg(test)]
189mod tests {
190 use crate::{Linear, ModelBuilder};
191 use anyhow::Result;
192 use ndarray::{Array1, Array2};
193
194 /// Start with a trivially optimized model and make sure that the iteration returns a value
195 #[test]
196 fn returns_at_least_one() -> Result<()> {
197 let data_y = Array1::<f64>::zeros(4);
198 let data_x = Array2::<f64>::zeros((4, 2));
199 let model = ModelBuilder::<Linear>::data(&data_y, &data_x).build()?;
200 let fit = model.fit_options().l2_reg(1e-6).fit()?;
201 // These should be exactly equal, not approximately.
202 assert_eq!(&fit.result, Array1::<f64>::zeros(3));
203 Ok(())
204 }
205}