ndarray_glm/fit.rs
1//! Stores the fit results of the IRLS regression and provides functions that
2//! depend on the MLE estimate. These include statistical tests for goodness-of-fit.
3
4pub mod options;
5use crate::{
6 error::RegressionResult,
7 glm::{DispersionType, Glm},
8 irls::Irls,
9 link::{Link, Transform},
10 model::{Dataset, Model},
11 num::Float,
12 regularization::IrlsReg,
13 Linear,
14};
15use ndarray::{array, Array1, Array2, ArrayBase, ArrayView1, Axis, Data, Ix2};
16use ndarray_linalg::InverseInto;
17use options::FitOptions;
18use std::{
19 cell::{Ref, RefCell},
20 marker::PhantomData,
21};
22
23/// the result of a successful GLM fit
24pub struct Fit<'a, M, F>
25where
26 M: Glm,
27 F: Float,
28{
29 model: PhantomData<M>,
30 /// The data and model specification used in the fit.
31 data: &'a Dataset<F>,
32 /// Whether the intercept covariate is used
33 use_intercept: bool,
34 /// The parameter values that maximize the likelihood as given by the IRLS regression.
35 pub result: Array1<F>,
36 /// The options used for this fit.
37 pub options: FitOptions<F>,
38 /// The value of the likelihood function for the fit result.
39 pub model_like: F,
40 /// The regularizer of the fit
41 reg: Box<dyn IrlsReg<F>>,
42 /// The number of overall iterations taken in the IRLS.
43 pub n_iter: usize,
44 /// The number of data points
45 n_data: usize,
46 /// The number of parameters
47 n_par: usize,
48 /// The estimated covariance matrix of the parameters. Since the calculation
49 /// requires a matrix inversion, it is computed only when needed and the
50 /// value is cached. Access through the `covariance()` function.
51 cov: RefCell<Option<Array2<F>>>,
52 /// The likelihood and parameters for the null model.
53 null_model: RefCell<Option<(F, Array1<F>)>>,
54}
55
56impl<'a, M, F> Fit<'a, M, F>
57where
58 M: Glm,
59 F: 'static + Float,
60{
61 /// Returns the Akaike information criterion for the model fit.
62 // TODO: Should an effective number of parameters that takes regularization
63 // into acount be considered?
64 pub fn aic(&self) -> F {
65 F::from(2 * self.n_par).unwrap() - F::from(2.).unwrap() * self.model_like
66 }
67
68 /// Returns the Bayesian information criterion for the model fit.
69 // TODO: Also consider the effect of regularization on this statistic.
70 // TODO: Wikipedia suggests that the variance should included in the number
71 // of parameters for multiple linear regression. Should an additional
72 // parameter be included for the dispersion parameter? This question does
73 // not affect the difference between two models fit with the methodology in
74 // this package.
75 pub fn bic(&self) -> F {
76 let logn = num_traits::Float::ln(F::from(self.data.y.len()).unwrap());
77 logn * F::from(self.n_par).unwrap() - F::from(2.).unwrap() * self.model_like
78 }
79
80 /// The covariance matrix estimated by the Fisher information and the dispersion parameter (for
81 /// families with a free scale). The matrix is cached to avoid repeating the potentially
82 /// expensive matrix inversion.
83 pub fn covariance(&self) -> RegressionResult<Ref<Array2<F>>> {
84 if self.cov.borrow().is_none() {
85 if self.data.weights.is_some() {
86 // NOTE: Perhaps it is just the fisher matrix that must be updated.
87 unimplemented!(
88 "The covariance calculation must take into account weights/correlations."
89 );
90 }
91 let fisher_reg = self.fisher(&self.result);
92 // The covariance must be multiplied by the dispersion parameter.
93 // For logistic/poisson regression, this is identically 1.
94 // For linear/gamma regression it is estimated from the data.
95 let phi: F = self.dispersion();
96 // NOTE: invh/invh_into() are bugged and incorrect!
97 let unscaled_cov: Array2<F> = fisher_reg.inv_into()?;
98 let cov = unscaled_cov * phi;
99 *self.cov.borrow_mut() = Some(cov);
100 }
101 Ok(Ref::map(self.cov.borrow(), |x| x.as_ref().unwrap()))
102 }
103
104 /// Returns the deviance of the fit: twice the difference between the
105 /// saturated likelihood and the model likelihood. Asymptotically this fits
106 /// a chi-squared distribution with `self.ndf()` degrees of freedom.
107 /// Note that the regularized likelihood is used here.
108 // TODO: This is likely sensitive to regularization because the saturated
109 // model is not regularized but the model likelihood is. Perhaps this can be
110 // accounted for with an effective number of degrees of freedom.
111 pub fn deviance(&self) -> F {
112 // Note that this must change if the GLM likelihood subtracts the
113 // saturated one already.
114 F::from(2.).unwrap() * (self.data.y.mapv(M::log_like_sat).sum() - self.model_like)
115 }
116
117 /// The dispersion parameter(typically denoted `phi`) which relates the variance of the `y`
118 /// values with the variance of the response distribution: `Var[y] = phi * Var[mu]`.
119 /// Identically one for logistic, binomial, and Poisson regression.
120 /// For others (linear, gamma) the dispersion parameter is estimated from the data.
121 /// This is equal to the total deviance divided by the degrees of freedom. For OLS linear
122 /// regression this is equal to the sum of `(y_i - mu_i)^2 / (n-p)`, an estimate of `sigma^2`;
123 /// with no covariates it is equal to the sample variance.
124 pub fn dispersion(&self) -> F {
125 use DispersionType::*;
126 match M::DISPERSED {
127 FreeDispersion => {
128 let ndf: F = F::from(self.ndf()).unwrap();
129 let dev = self.deviance();
130 dev / ndf
131 }
132 NoDispersion => F::one(),
133 }
134 }
135
136 /// Returns the errors in the response variables for the data passed as an
137 /// argument given the current model fit.
138 fn errors(&self, data: &Dataset<F>) -> Array1<F> {
139 &data.y - &self.predict(&data.x, data.linear_offset.as_ref())
140 }
141
142 #[deprecated(since = "0.0.10", note = "use predict() instead")]
143 pub fn expectation<S>(
144 &self,
145 data_x: &ArrayBase<S, Ix2>,
146 lin_off: Option<&Array1<F>>,
147 ) -> Array1<F>
148 where
149 S: Data<Elem = F>,
150 {
151 self.predict(data_x, lin_off)
152 }
153
154 /// Returns the fisher information (the negative hessian of the likelihood)
155 /// at the parameter values given. The regularization is included.
156 pub fn fisher(&self, params: &Array1<F>) -> Array2<F> {
157 let lin_pred: Array1<F> = self.data.linear_predictor(params);
158 let mu: Array1<F> = M::mean(&lin_pred);
159 let var_diag: Array1<F> = mu.mapv_into(M::variance);
160 // adjust the variance for non-canonical link functions
161 let eta_d = M::Link::d_nat_param(&lin_pred);
162 let adj_var: Array1<F> = &eta_d * &var_diag * eta_d;
163 // calculate the fisher matrix
164 let fisher: Array2<F> = (&self.data.x.t() * &adj_var).dot(&self.data.x);
165 // Regularize the fisher matrix
166 self.reg.as_ref().irls_mat(fisher, params)
167 }
168
169 /// Perform a likelihood-ratio test, returning the statistic -2*ln(L_0/L)
170 /// where L_0 is the likelihood of the best-fit null model (with no
171 /// parameters but the intercept) and L is the likelihood of the fit result.
172 /// The number of degrees of freedom of this statistic, equal to the number
173 /// of parameters fixed to zero to form the null model, is `test_ndf()`. By
174 /// Wilks' theorem this statistic is asymptotically chi-squared distributed
175 /// with this number of degrees of freedom.
176 // TODO: Should the effective number of degrees of freedom due to
177 // regularization be taken into account? Should the degrees of freedom be a
178 // float?
179 pub fn lr_test(&self) -> F {
180 // The model likelihood should include regularization terms and there
181 // shouldn't be any in the null model with all non-intercept parameters
182 // set to zero.
183 let null_like = self.null_like();
184 F::from(-2.).unwrap() * (null_like - self.model_like)
185 }
186
187 /// Perform a likelihood-ratio test against a general alternative model, not
188 /// necessarily a null model. The alternative model is regularized the same
189 /// way that the regression resulting in this fit was. The degrees of
190 /// freedom cannot be generally inferred.
191 pub fn lr_test_against(&self, alternative: &Array1<F>) -> F {
192 let alt_like = M::log_like(self.data, alternative);
193 let alt_like_reg = alt_like + self.reg.likelihood(alternative);
194 F::from(2.).unwrap() * (self.model_like - alt_like_reg)
195 }
196
197 /// Returns the residual degrees of freedom in the model, i.e. the number
198 /// of data points minus the number of parameters. Not to be confused with
199 /// `test_ndf()`, the degrees of freedom in the statistical tests of the
200 /// fit.
201 pub fn ndf(&self) -> usize {
202 self.n_data - self.n_par
203 }
204
205 pub(crate) fn new(data: &'a Dataset<F>, use_intercept: bool, irls: Irls<M, F>) -> Self {
206 let Irls {
207 guess: result,
208 options,
209 reg,
210 n_iter,
211 last_like_data: data_like,
212 ..
213 } = irls;
214 assert_eq!(data_like, M::log_like(data, &result), "Unregularized likelihoods should match exactly.");
215 // Cache some of these variables that will be used often.
216 let n_par = result.len();
217 let n_data = data.y.len();
218 let model_like = data_like + reg.likelihood(&result);
219 Self {
220 model: PhantomData,
221 data,
222 use_intercept,
223 result,
224 options,
225 model_like,
226 reg,
227 n_iter,
228 n_data,
229 n_par,
230 cov: RefCell::new(None),
231 null_model: RefCell::new(None),
232 }
233 }
234
235 /// Returns the likelihood given the null model, which fixes all parameters
236 /// to zero except the intercept (if it is used). A total of `test_ndf()`
237 /// parameters are constrained.
238 pub fn null_like(&self) -> F {
239 let (null_like, _) = self.null_model_fit();
240 null_like
241 }
242
243 /// Return the likelihood and intercept for the null model. Since this can
244 /// require an additional regression, the values are cached.
245 fn null_model_fit(&self) -> (F, Array1<F>) {
246 // TODO: make a result instead of allowing a potential panic in the borrow.
247 if self.null_model.borrow().is_none() {
248 let (null_like, null_intercept): (F, Array1<F>) = match &self.data.linear_offset {
249 None => {
250 // If there is no linear offset, the natural parameter is
251 // identical for all observations so it is sufficient to
252 // calculate the null likelihood for a single point with y equal
253 // to the average.
254 // The average y
255 let y_bar: F = self
256 .data
257 .y
258 .mean()
259 .expect("Should be able to take average of y values");
260 // This approach assumes that the likelihood is in the natural
261 // exponential form as calculated by Glm::log_like_natural(). If that
262 // function is overridden and the values differ significantly, this
263 // approach will give incorrect results. If the likelihood has terms
264 // non-linear in y, then the likelihood must be calculated for every
265 // point rather than averaged.
266 // If the intercept is allowed to maximize the likelihood, the natural
267 // parameter is equal to the link of the expectation. Otherwise it is
268 // the transformation function of zero.
269 let intercept: F = if self.use_intercept {
270 M::Link::func(y_bar)
271 } else {
272 F::zero()
273 };
274 // this is a length-one array. This works because the
275 // likelihood contribution is the same for all observations.
276 let nat_par = M::Link::nat_param(array![intercept]);
277 // The null likelihood per observation
278 let null_like_one: F = M::log_like_natural(y_bar, nat_par[0]);
279 // just multiply the average likelihood by the number of data points, since every term is the same.
280 let null_like_total = F::from(self.n_data).unwrap() * null_like_one;
281 let null_params: Array1<F> = {
282 let mut par = Array1::<F>::zeros(self.n_par);
283 par[0] = intercept;
284 par
285 };
286 (null_like_total, null_params)
287 }
288 Some(off) => {
289 if self.use_intercept {
290 // If there are linear offsets and the intercept is allowed
291 // to be free, there is not a major simplification and the
292 // model needs to be re-fit.
293 // the X data is a single column of ones. Since this model
294 // isn't being created by the ModelBuilder, the X data
295 // has to be automatically padded with ones.
296 let data_x_null = Array2::<F>::ones((self.n_data, 1));
297 let null_model = Model {
298 model: std::marker::PhantomData::<M>,
299 data: Dataset::<F> {
300 y: self.data.y.clone(),
301 x: data_x_null,
302 linear_offset: Some(off.clone()),
303 weights: self.data.weights.clone(),
304 hat: RefCell::new(None),
305 },
306 // If we are in this branch it is because an intercept is needed.
307 use_intercept: true,
308 };
309 // TODO: Make this function return an error, although it's
310 // difficult to imagine this case happening.
311 // TODO: Should the tolerance of this fit be stricter?
312 // The intercept should not be regularized
313 let null_fit = null_model
314 .fit_options()
315 // There shouldn't be too much trouble fitting this
316 // single-parameter fit, but there shouldn't be harm in
317 // using the same maximum as in the original model.
318 .max_iter(self.options.max_iter)
319 .fit()
320 .expect("Could not fit null model!");
321 let null_params: Array1<F> = {
322 let mut par = Array1::<F>::zeros(self.n_par);
323 // there is only one parameter in this fit.
324 par[0] = null_fit.result[0];
325 par
326 };
327 (null_fit.model_like, null_params)
328 } else {
329 // If the intercept is fixed to zero, then no minimization is
330 // required. The natural parameters are directly known in terms
331 // of the linear offset. The likelihood must still be summed
332 // over all observations, since they have different offsets.
333 let nat_par = M::Link::nat_param(off.clone());
334 let null_like = ndarray::Zip::from(&self.data.y)
335 .and(&nat_par)
336 .map_collect(|&y, &eta| M::log_like_natural(y, eta))
337 .sum();
338 let null_params = Array1::<F>::zeros(self.n_par);
339 (null_like, null_params)
340 }
341 }
342 };
343 *self.null_model.borrow_mut() = Some((null_like, null_intercept));
344 }
345 self.null_model
346 .borrow()
347 .as_ref()
348 .expect("the null model should be cached now")
349 .clone()
350 }
351
352 /// Returns the expected value of Y given the input data X. This data need
353 /// not be the training data, so an option for linear offsets is provided.
354 /// Panics if the number of covariates in the data matrix is not consistent
355 /// with the training set. The data matrix may need to be padded by ones if
356 /// it is not part of a Model. The `utility::one_pad()` function facilitates
357 /// this.
358 pub fn predict<S>(&self, data_x: &ArrayBase<S, Ix2>, lin_off: Option<&Array1<F>>) -> Array1<F>
359 where
360 S: Data<Elem = F>,
361 {
362 let lin_pred: Array1<F> = data_x.dot(&self.result);
363 let lin_pred: Array1<F> = if let Some(off) = &lin_off {
364 lin_pred + *off
365 } else {
366 lin_pred
367 };
368 lin_pred.mapv_into(M::Link::func_inv)
369 }
370
371 /// Return the deviance residuals for each point in the training data.
372 /// Equal to `sign(y-E[y|x])*sqrt(-2*(L[y|x] - L_sat[y]))`.
373 /// This is usually a better choice for non-linear models.
374 /// NaNs might be possible if L[y|x] > L_sat[y] due to floating-point operations. These are
375 /// not checked or clipped right now.
376 pub fn resid_dev(&self) -> Array1<F> {
377 let signs = self.resid_resp().mapv_into(F::signum);
378 let ll_terms: Array1<F> = M::log_like_terms(self.data, &self.result);
379 let ll_sat: Array1<F> = self.data.y.mapv(M::log_like_sat);
380 let neg_two = F::from(-2.).unwrap();
381 let ll_diff = (ll_terms - ll_sat) * neg_two;
382 let dev: Array1<F> = ll_diff.mapv_into(num_traits::Float::sqrt);
383 signs * dev
384 }
385
386 /// Return the standardized deviance residuals, also known as the "internally studentized
387 /// deviance residuals". This is generally applicable for outlier detection, although the
388 /// influence of each point on the fit is only approximately accounted for.
389 /// `d / sqrt(phi * (1 - h))` where `d` is the deviance residual, phi is the dispersion (e.g.
390 /// sigma^2 for linear regression, 1 for logistic regression), and h is the leverage.
391 pub fn resid_dev_std(&self) -> RegressionResult<Array1<F>> {
392 let dev = self.resid_dev();
393 let phi = self.dispersion();
394 let hat: Array1<F> = self.data.leverage()?;
395 let omh: Array1<F> = -hat + F::one();
396 let denom: Array1<F> = (omh * phi).mapv_into(num_traits::Float::sqrt);
397 Ok(dev / denom)
398 }
399
400 /// Return the partial residuals.
401 pub fn resid_part(&self) -> Array1<F> {
402 let x_mean = self.data.x.mean_axis(Axis(0)).expect("empty dataset");
403 let x_centered = &self.data.x - x_mean.insert_axis(Axis(0));
404 self.resid_work() + x_centered.dot(&self.result)
405 }
406
407 /// Return the Pearson residuals for each point in the training data.
408 /// This is equal to `(y - E[y])/sqrt(V(E[y]))`, where V is the variance function.
409 /// These are not scaled by the sample standard deviation for families with a free dispersion
410 /// parameter like linear regression.
411 pub fn resid_pear(&self) -> Array1<F> {
412 let mu: Array1<F> = self.predict(&self.data.x, self.data.linear_offset.as_ref());
413 let residuals = &self.data.y - μ
414 let var_diag: Array1<F> = mu.mapv_into(M::variance);
415 let std: Array1<F> = var_diag.mapv_into(num_traits::Float::sqrt);
416 residuals / std
417 }
418
419 /// Return the standardized Pearson residuals for every observation.
420 /// Also known as the "internally studentized Pearson residuals".
421 /// (y - E[y]) / (sqrt(Var[y] * (1 - h))) where h is a vector representing the leverage for
422 /// each observation.
423 pub fn resid_pear_std(&self) -> RegressionResult<Array1<F>> {
424 let pearson = self.resid_pear();
425 let phi = self.dispersion();
426 let hat = self.data.leverage()?;
427 let omh = -hat + F::one();
428 let denom: Array1<F> = (omh * phi).mapv_into(num_traits::Float::sqrt);
429 Ok(pearson / denom)
430 }
431
432 /// Return the response residuals, or fitting deviation, for each data point in the fit; that
433 /// is, the difference y - E[y|x] where the expectation value is the y value predicted by the
434 /// model given x.
435 pub fn resid_resp(&self) -> Array1<F> {
436 self.errors(self.data)
437 }
438
439 /// Return the studentized residuals, which are the changes in the fit likelihood resulting
440 /// from leaving each observation out. This is a robust and general method for outlier
441 /// detection, although a one-step approximation is used to avoid re-fitting the model
442 /// completely for each observation.
443 /// If the linear errors are standard normally distributed then this statistic should follow a
444 /// t-distribution with `self.ndf() - 1` degrees of freedom.
445 pub fn resid_student(&self) -> RegressionResult<Array1<F>> {
446 let r_dev = self.resid_dev();
447 let r_pear = self.resid_pear();
448 let signs = r_pear.mapv(F::signum);
449 let r_dev_sq = r_dev.mapv_into(|x| x * x);
450 let r_pear_sq = r_pear.mapv_into(|x| x * x);
451 let hat = self.data.leverage()?;
452 let omh = -hat.clone() + F::one();
453 let sum_quad = &r_dev_sq + hat * r_pear_sq / &omh;
454 let sum_quad_scaled = match M::DISPERSED {
455 // The dispersion is corrected for the contribution from each current point.
456 // This is an approximation; the exact solution would perform a fit at each point.
457 DispersionType::FreeDispersion => {
458 let dev = self.deviance();
459 let dof = F::from(self.ndf() - 1).unwrap();
460 let phi_i: Array1<F> = (-r_dev_sq / &omh + dev) / dof;
461 sum_quad / phi_i
462 }
463 DispersionType::NoDispersion => sum_quad,
464 };
465 Ok(signs * sum_quad_scaled.mapv_into(num_traits::Float::sqrt))
466 }
467
468 /// Returns the working residuals `d\eta/d\mu * (y - E{y|x})`.
469 /// This should be equal to the response residuals divided by the variance function (as
470 /// opposed to the square root of the variance as in the Pearson residuals).
471 pub fn resid_work(&self) -> Array1<F> {
472 let lin_pred: Array1<F> = self.data.linear_predictor(&self.result);
473 let mu: Array1<F> = lin_pred.mapv(M::Link::func_inv);
474 let resid_response: Array1<F> = &self.data.y - μ
475 let d_eta: Array1<F> = M::Link::d_nat_param(&lin_pred);
476 d_eta * resid_response
477 }
478
479 /// Returns the score function (the gradient of the likelihood) at the
480 /// parameter values given. It should be zero within FPE at the minimized
481 /// result.
482 pub fn score(&self, params: &Array1<F>) -> Array1<F> {
483 // This represents the predictions given the input parameters, not the
484 // fit parameters.
485 let lin_pred: Array1<F> = self.data.linear_predictor(params);
486 let mu: Array1<F> = M::mean(&lin_pred);
487 let resid_response = &self.data.y - mu;
488 // adjust for non-canonical link functions.
489 let eta_d = M::Link::d_nat_param(&lin_pred);
490 let resid_working = eta_d * resid_response;
491 let score_unreg = self.data.x.t().dot(&resid_working);
492 self.reg.as_ref().gradient(score_unreg, params)
493 }
494
495 /// Returns the score test statistic. This statistic is asymptotically
496 /// chi-squared distributed with `test_ndf()` degrees of freedom.
497 pub fn score_test(&self) -> RegressionResult<F> {
498 let (_, null_params) = self.null_model_fit();
499 self.score_test_against(null_params)
500 }
501
502 /// Returns the score test statistic compared to another set of model
503 /// parameters, not necessarily a null model. The degrees of freedom cannot
504 /// be generally inferred.
505 pub fn score_test_against(&self, alternative: Array1<F>) -> RegressionResult<F> {
506 let score_alt = self.score(&alternative);
507 let fisher_alt = self.fisher(&alternative);
508 // The is not the same as the cached covariance matrix since it is
509 // evaluated at the null parameters.
510 // NOTE: invh/invh_into() are bugged and incorrect!
511 let inv_fisher_alt = fisher_alt.inv_into()?;
512 Ok(score_alt.t().dot(&inv_fisher_alt.dot(&score_alt)))
513 }
514
515 /// The degrees of freedom for the likelihood ratio test, the score test,
516 /// and the Wald test. Not to be confused with `ndf()`, the degrees of
517 /// freedom in the model fit.
518 pub fn test_ndf(&self) -> usize {
519 if self.use_intercept {
520 self.n_par - 1
521 } else {
522 self.n_par
523 }
524 }
525
526 /// Returns the Wald test statistic compared to a null model with only an
527 /// intercept (if one is used). This statistic is asymptotically chi-squared
528 /// distributed with `test_ndf()` degrees of freedom.
529 pub fn wald_test(&self) -> F {
530 // The null parameters are all zero except for a possible intercept term
531 // which optimizes the null model.
532 let (_, null_params) = self.null_model_fit();
533 self.wald_test_against(&null_params)
534 }
535
536 /// Returns the Wald test statistic compared to another specified model fit
537 /// instead of the null model. The degrees of freedom cannot be generally
538 /// inferred.
539 pub fn wald_test_against(&self, alternative: &Array1<F>) -> F {
540 let d_params: Array1<F> = &self.result - alternative;
541 let fisher_alt: Array2<F> = self.fisher(alternative);
542 d_params.t().dot(&fisher_alt.dot(&d_params))
543 }
544
545 /// Returns the signed square root of the Wald test statistic for each
546 /// parameter. Since it does not account for covariance between the
547 /// parameters it may not be accurate.
548 pub fn wald_z(&self) -> RegressionResult<Array1<F>> {
549 let par_cov = self.covariance()?;
550 let par_variances: ArrayView1<F> = par_cov.diag();
551 Ok(&self.result / &par_variances.mapv(num_traits::Float::sqrt))
552 }
553}
554
555/// Specialized functions for OLS.
556impl<'a, F> Fit<'a, Linear, F>
557where
558 F: 'static + Float,
559{
560 /// Returns the coefficient of multiple correlation, R^2.
561 pub fn r_sq(&self) -> F {
562 let y_avg: F = self.data.y.mean().expect("Data should be non-empty");
563 let total_sum_sq: F = self.data.y.mapv(|y| y - y_avg).mapv(|dy| dy * dy).sum();
564 (total_sum_sq - self.resid_sum_sq()) / total_sum_sq
565 }
566
567 /// Returns the residual sum of squares, i.e. the sum of the squared residuals.
568 pub fn resid_sum_sq(&self) -> F {
569 self.resid_resp().mapv_into(|r| r * r).sum()
570 }
571}
572
573#[cfg(test)]
574mod tests {
575 use super::*;
576 use crate::{
577 model::ModelBuilder,
578 utility::{one_pad, standardize},
579 Linear, Logistic,
580 };
581 use anyhow::Result;
582 use approx::assert_abs_diff_eq;
583 use ndarray::Axis;
584
585 /// Checks if the test statistics are invariant based upon whether the data is standardized.
586 #[test]
587 fn standardization_invariance() -> Result<()> {
588 let data_y = array![true, false, false, true, true, true, true, false, true];
589 let data_x = array![-0.5, 0.3, -0.6, 0.2, 0.3, 1.2, 0.8, 0.6, -0.2].insert_axis(Axis(1));
590 let lin_off = array![0.1, 0.0, -0.1, 0.2, 0.1, 0.3, 0.4, -0.1, 0.1];
591 let data_x_std = standardize(data_x.clone());
592 let model = ModelBuilder::<Logistic>::data(&data_y, &data_x)
593 .linear_offset(lin_off.clone())
594 .build()?;
595 let fit = model.fit()?;
596 let model_std = ModelBuilder::<Logistic>::data(&data_y, &data_x_std)
597 .linear_offset(lin_off)
598 .build()?;
599 let fit_std = model_std.fit()?;
600 let lr = fit.lr_test();
601 let lr_std = fit_std.lr_test();
602 assert_abs_diff_eq!(lr, lr_std);
603 eprintln!("about to try score test");
604 assert_abs_diff_eq!(
605 fit.score_test()?,
606 fit_std.score_test()?,
607 epsilon = f32::EPSILON as f64
608 );
609 eprintln!("about to try wald test");
610 assert_abs_diff_eq!(
611 fit.wald_test(),
612 fit_std.wald_test(),
613 epsilon = 4.0 * f64::EPSILON
614 );
615 assert_abs_diff_eq!(fit.aic(), fit_std.aic());
616 assert_abs_diff_eq!(fit.bic(), fit_std.bic());
617 eprintln!("about to try deviance");
618 assert_abs_diff_eq!(fit.deviance(), fit_std.deviance());
619 // The Wald Z-score of the intercept term is not invariant under a
620 // linear transformation of the data, but the parameter part seems to
621 // be, at least for single-component data.
622 assert_abs_diff_eq!(
623 fit.wald_z()?[1],
624 fit_std.wald_z()?[1],
625 epsilon = 0.01 * f32::EPSILON as f64
626 );
627
628 Ok(())
629 }
630
631 #[test]
632 fn null_model() -> Result<()> {
633 let data_y = array![true, false, false, true, true];
634 let data_x: Array2<f64> = array![[], [], [], [], []];
635 let model = ModelBuilder::<Logistic>::data(&data_y, &data_x).build()?;
636 let fit = model.fit()?;
637 dbg!(fit.n_iter);
638 dbg!(&fit.result);
639 // with no offsets, the result should be the link function of the mean.
640 assert_abs_diff_eq!(
641 fit.result[0],
642 <Logistic as Glm>::Link::func(0.6),
643 epsilon = 4.0 * f64::EPSILON
644 );
645 let empty_null_like = fit.null_like();
646 assert_eq!(fit.test_ndf(), 0);
647 dbg!(&fit.model_like);
648 let lr = fit.lr_test();
649 // Since there is no data, the null likelihood should be identical to
650 // the fit likelihood, so the likelihood ratio test should yield zero.
651 assert_abs_diff_eq!(lr, 0., epsilon = 4. * f64::EPSILON);
652
653 // Check that the assertions still hold if linear offsets are included.
654 let lin_off: Array1<f64> = array![0.2, -0.1, 0.1, 0.0, 0.1];
655 let model = ModelBuilder::<Logistic>::data(&data_y, &data_x)
656 .linear_offset(lin_off)
657 .build()?;
658 let fit_off = model.fit()?;
659 let empty_model_like_off = fit_off.model_like;
660 let empty_null_like_off = fit_off.null_like();
661 // these two assertions should be equivalent
662 assert_abs_diff_eq!(fit_off.lr_test(), 0.);
663 assert_abs_diff_eq!(empty_model_like_off, empty_null_like_off);
664
665 // check consistency with data provided
666 let data_x_with = array![[0.5], [-0.2], [0.3], [0.4], [-0.1]];
667 let model = ModelBuilder::<Logistic>::data(&data_y, &data_x_with).build()?;
668 let fit_with = model.fit()?;
669 dbg!(&fit_with.result);
670 // The null likelihood of the model with parameters should be the same
671 // as the likelihood of the model with only the intercept.
672 assert_abs_diff_eq!(empty_null_like, fit_with.null_like());
673
674 Ok(())
675 }
676
677 #[test]
678 fn null_like_logistic() -> Result<()> {
679 // 6 true and 4 false for y_bar = 0.6.
680 let data_y = array![true, true, true, true, true, true, false, false, false, false];
681 let ybar: f64 = 0.6;
682 let data_x = array![0.4, 0.2, 0.5, 0.1, 0.6, 0.7, 0.3, 0.8, -0.1, 0.1].insert_axis(Axis(1));
683 let model = ModelBuilder::<Logistic>::data(&data_y, &data_x).build()?;
684 let fit = model.fit()?;
685 let target_null_like = fit
686 .data
687 .y
688 .mapv(|y| {
689 let eta = (ybar / (1. - ybar)).ln();
690 y * eta - eta.exp().ln_1p()
691 })
692 .sum();
693 assert_abs_diff_eq!(fit.null_like(), target_null_like);
694 Ok(())
695 }
696
697 // Check that the deviance is equal to the sum of square deviations for a linear model
698 #[test]
699 fn deviance_linear() -> Result<()> {
700 let data_y = array![0.3, -0.2, 0.5, 0.7, 0.2, 1.4, 1.1, 0.2];
701 let data_x = array![0.6, 2.1, 0.4, -3.2, 0.7, 0.1, -0.3, 0.5].insert_axis(Axis(1));
702 let model = ModelBuilder::<Linear>::data(&data_y, &data_x).build()?;
703 let fit = model.fit()?;
704 // The predicted values of Y given the model.
705 let pred_y = fit.predict(&one_pad(data_x.view()), None);
706 let target_dev = (data_y - pred_y).mapv(|dy| dy * dy).sum();
707 assert_abs_diff_eq!(fit.deviance(), target_dev,);
708 Ok(())
709 }
710
711 // Check that the deviance and dispersion parameter are equal up to the number of degrees of
712 // freedom for a linea model.
713 #[test]
714 fn deviance_dispersion_eq_linear() -> Result<()> {
715 let data_y = array![0.2, -0.1, 0.4, 1.3, 0.2, -0.6, 0.9];
716 let data_x = array![
717 [0.4, 0.2],
718 [0.1, 0.4],
719 [-0.1, 0.3],
720 [0.5, 0.7],
721 [0.4, 0.1],
722 [-0.2, -0.3],
723 [0.4, -0.1]
724 ];
725 let model = ModelBuilder::<Linear>::data(&data_y, &data_x).build()?;
726 let fit = model.fit()?;
727 let dev = fit.deviance();
728 let disp = fit.dispersion();
729 let ndf = fit.ndf() as f64;
730 assert_abs_diff_eq!(dev, disp * ndf, epsilon = 4. * f64::EPSILON);
731 Ok(())
732 }
733
734 // Check that the residuals for a linear model are all consistent.
735 #[test]
736 fn residuals_linear() -> Result<()> {
737 let data_y = array![0.1, -0.3, 0.7, 0.2, 1.2, -0.4];
738 let data_x = array![0.4, 0.1, 0.3, -0.1, 0.5, 0.6].insert_axis(Axis(1));
739 let model = ModelBuilder::<Linear>::data(&data_y, &data_x).build()?;
740 let fit = model.fit()?;
741 let response = fit.resid_resp();
742 let pearson = fit.resid_pear();
743 let deviance = fit.resid_dev();
744 assert_abs_diff_eq!(response, pearson);
745 assert_abs_diff_eq!(response, deviance);
746 let pearson_std = fit.resid_pear_std()?;
747 let deviance_std = fit.resid_dev_std()?;
748 let _student = fit.resid_student()?;
749 assert_abs_diff_eq!(pearson_std, deviance_std, epsilon = 8. * f64::EPSILON);
750
751 // // NOTE: Studentization can't be checked directly because the method used is an
752 // approximation. Another approach will be needed to give exact values.
753 // let orig_dev = fit.deviance();
754 // let n_data = data_y.len();
755 // // Check that the leave-one-out stats hold literally
756 // let mut loo_dev: Vec<f64> = Vec::new();
757 // for i in 0..n_data {
758 // let ya = data_y.slice(s![0..i]);
759 // let yb = data_y.slice(s![i + 1..]);
760 // let xa = data_x.slice(s![0..i, ..]);
761 // let xb = data_x.slice(s![i + 1.., ..]);
762 // let y_loo = concatenate![Axis(0), ya, yb];
763 // let x_loo = concatenate![Axis(0), xa, xb];
764 // let model_i = ModelBuilder::<Linear>::data(&y_loo, &x_loo).build()?;
765 // let fit_i = model_i.fit()?;
766 // let yi = data_y[i];
767 // let xi = data_x.slice(s![i..i + 1, ..]);
768 // let xi = crate::utility::one_pad(xi);
769 // let yi_pred: f64 = fit_i.predict(&xi, None)[0];
770 // let disp_i = fit_i.dispersion();
771 // let pear_loo = (yi - yi_pred) / disp_i.sqrt();
772 // let dev_i = fit_i.deviance();
773 // let d_dev = 2. * (orig_dev - dev_i);
774 // loo_dev.push(d_dev.sqrt() * (yi - yi_pred).signum());
775 // }
776 // let loo_dev: Array1<f64> = loo_dev.into();
777 // This is off from 1 by a constant factor that depends on the data
778 // This is only approximately true
779 // assert_abs_diff_eq!(student, loo_dev);
780 Ok(())
781 }
782
783 // check the null likelihood for the case where it can be counted exactly.
784 #[test]
785 fn null_like_linear() -> Result<()> {
786 let data_y = array![0.3, -0.2, 0.5, 0.7, 0.2, 1.4, 1.1, 0.2];
787 let data_x = array![0.6, 2.1, 0.4, -3.2, 0.7, 0.1, -0.3, 0.5].insert_axis(Axis(1));
788 let ybar: f64 = data_y.mean().unwrap();
789 let model = ModelBuilder::<Linear>::data(&data_y, &data_x).build()?;
790 let fit = model.fit()?;
791 // let target_null_like = data_y.mapv(|y| -0.5 * (y - ybar) * (y - ybar)).sum();
792 let target_null_like = data_y.mapv(|y| y * ybar - 0.5 * ybar * ybar).sum();
793 // With the saturated likelihood subtracted the null likelihood should
794 // just be the sum of squared differences from the mean.
795 // let target_null_like = 0.;
796 // dbg!(target_null_like);
797 let fit_null_like = fit.null_like();
798 assert_abs_diff_eq!(2. * (fit.model_like - fit_null_like), fit.lr_test());
799 assert_eq!(fit.test_ndf(), 1);
800 assert_abs_diff_eq!(
801 fit_null_like,
802 target_null_like,
803 epsilon = 4.0 * f64::EPSILON
804 );
805 Ok(())
806 }
807
808 // check the null likelihood where there is no dependence on the X data.
809 #[test]
810 fn null_like_logistic_nodep() -> Result<()> {
811 let data_y = array![true, true, false, false, true, false, false, true];
812 let data_x = array![0.4, 0.2, 0.4, 0.2, 0.7, 0.7, -0.1, -0.1].insert_axis(Axis(1));
813 let model = ModelBuilder::<Logistic>::data(&data_y, &data_x).build()?;
814 let fit = model.fit()?;
815 let lr = fit.lr_test();
816 assert_abs_diff_eq!(lr, 0.);
817 Ok(())
818 }
819 // TODO: Test that the statistics behave sensibly under regularization. The
820 // likelihood ratio test should yield a smaller value.
821
822 // Test the basic caching funcions.
823 #[test]
824 fn cached_computations() -> Result<()> {
825 let data_y = array![true, true, false, true, true, false, false, false, true];
826 let data_x = array![0.4, 0.1, -0.3, 0.7, -0.5, -0.1, 0.8, 1.0, 0.4].insert_axis(Axis(1));
827 let model = ModelBuilder::<Logistic>::data(&data_y, &data_x).build()?;
828 let fit = model.fit()?;
829 let _null_like = fit.null_like();
830 let _null_like = fit.null_like();
831 let _cov = fit.covariance()?;
832 let _wald = fit.wald_z();
833 Ok(())
834 }
835
836 // Check the consistency of the various statistical tests for linear
837 // regression, where they should be the most comparable.
838 #[test]
839 fn linear_stat_tests() -> Result<()> {
840 let data_y = array![-0.3, -0.1, 0.0, 0.2, 0.4, 0.5, 0.8, 0.8, 1.1];
841 let data_x = array![-0.5, -0.2, 0.1, 0.2, 0.5, 0.6, 0.7, 0.9, 1.3].insert_axis(Axis(1));
842 let model = ModelBuilder::<Linear>::data(&data_y, &data_x).build()?;
843 let fit = model.fit()?;
844 let lr = fit.lr_test();
845 let wald = fit.wald_test();
846 let score = fit.score_test()?;
847 assert_abs_diff_eq!(lr, wald, epsilon = 32.0 * f64::EPSILON);
848 assert_abs_diff_eq!(lr, score, epsilon = 32.0 * f64::EPSILON);
849 Ok(())
850 }
851}