ndarray_glm/fit.rs
1//! Stores the fit results of the IRLS regression and provides functions that
2//! depend on the MLE estimate. These include statistical tests for goodness-of-fit.
3
4pub mod options;
5use crate::{
6 error::RegressionResult,
7 glm::{DispersionType, Glm},
8 irls::{Irls, IrlsStep},
9 link::{Link, Transform},
10 model::{Dataset, Model},
11 num::Float,
12 regularization::IrlsReg,
13 Linear,
14};
15use ndarray::{array, Array1, Array2, ArrayBase, ArrayView1, Axis, Data, Ix2};
16use ndarray_linalg::InverseInto;
17use options::FitOptions;
18use std::{
19 cell::{Ref, RefCell},
20 marker::PhantomData,
21};
22
23/// the result of a successful GLM fit
24pub struct Fit<'a, M, F>
25where
26 M: Glm,
27 F: Float,
28{
29 model: PhantomData<M>,
30 /// The data and model specification used in the fit.
31 data: &'a Dataset<F>,
32 /// Whether the intercept covariate is used
33 use_intercept: bool,
34 /// The parameter values that maximize the likelihood as given by the IRLS regression.
35 pub result: Array1<F>,
36 /// The options used for this fit.
37 pub options: FitOptions<F>,
38 /// The value of the likelihood function for the fit result.
39 pub model_like: F,
40 /// The regularizer of the fit
41 reg: Box<dyn IrlsReg<F>>,
42 /// The number of overall iterations taken in the IRLS.
43 pub n_iter: usize,
44 /// The history of guesses and likelihoods over the IRLS iterations.
45 pub history: Vec<IrlsStep<F>>,
46 /// The number of parameters
47 n_par: usize,
48 /// The unscaled covariance matrix of the parameters, otherwise known as the Fisher
49 /// information. Since the calculation requires a matrix inversion, it is computed only when
50 /// needed and the value is cached.
51 cov_unscaled: RefCell<Option<Array2<F>>>,
52 /// The hat matrix of the data and fit. Since the calculation requires a matrix inversion of
53 /// the fisher information, it is computed only when needed and the value is cached. Access
54 /// through the `hat()` function.
55 hat: RefCell<Option<Array2<F>>>,
56 /// The likelihood and parameters for the null model.
57 null_model: RefCell<Option<(F, Array1<F>)>>,
58}
59
60impl<'a, M, F> Fit<'a, M, F>
61where
62 M: Glm,
63 F: 'static + Float,
64{
65 /// Returns the Akaike information criterion for the model fit. It is unique only to an
66 /// additive constant, so only differences in AIC are meaningful.
67 // TODO: Should an effective number of parameters that takes regularization
68 // into acount be considered?
69 pub fn aic(&self) -> F {
70 let log_weights = self.data.get_variance_weights().mapv(num_traits::Float::ln);
71 let sum_log_weights = self.data.freq_sum(&log_weights);
72 // NOTE: This is now the unregularized deviance.
73 self.deviance() + F::two() * self.rank() - F::two() * sum_log_weights
74 }
75
76 /// Returns the Bayesian information criterion for the model fit.
77 // TODO: Also consider the effect of regularization on this statistic.
78 // TODO: Wikipedia suggests that the variance should included in the number
79 // of parameters for multiple linear regression. Should an additional
80 // parameter be included for the dispersion parameter? This question does
81 // not affect the difference between two models fit with the methodology in
82 // this package.
83 pub fn bic(&self) -> F {
84 let logn = num_traits::Float::ln(self.data.n_obs());
85 logn * self.rank() - F::two() * self.model_like
86 }
87
88 /// The covariance matrix estimated by the Fisher information and the dispersion parameter (for
89 /// families with a free scale). The Fisher matrix is cached to avoid repeating the potentially
90 /// expensive matrix inversion.
91 pub fn covariance(&self) -> RegressionResult<Array2<F>> {
92 // The covariance must be multiplied by the dispersion parameter.
93 // For logistic/poisson regression, this is identically 1.
94 // For linear/gamma regression it is estimated from the data.
95 let phi: F = self.dispersion();
96 // NOTE: invh/invh_into() are bugged and incorrect!
97 let unscaled_cov: Array2<F> = self.fisher_inv()?.to_owned();
98 let cov = unscaled_cov * phi;
99 Ok(cov)
100 }
101
102 /// Returns the deviance of the fit: twice the difference between the
103 /// saturated likelihood and the model likelihood. Asymptotically this fits
104 /// a chi-squared distribution with `self.ndf()` degrees of freedom.
105 /// Note that the unregularized likelihood is used here.
106 pub fn deviance(&self) -> F {
107 let terms = self.deviance_terms();
108 self.data.freq_sum(&terms)
109 }
110
111 /// Returns the contribution to the deviance from each observation. The total deviance should
112 /// be the sum of all of these. Variance weights are already included, but not frequency
113 /// weights.
114 fn deviance_terms(&self) -> Array1<F> {
115 let ll_terms: Array1<F> = M::log_like_terms(self.data, &self.result);
116 let ll_sat: Array1<F> = self.data.y.mapv(M::log_like_sat);
117 let terms = (ll_sat - ll_terms) * F::two();
118 self.data.apply_var_weights(terms)
119 }
120
121 /// Returns the self-excluded deviance terms, i.e. the deviance of an observation as if the
122 /// model was fit without it. This is a one-step approximation.
123 fn deviance_terms_loo(&self) -> RegressionResult<Array1<F>> {
124 let dev_terms = self.deviance_terms();
125 let pear_sq = self.resid_pear().mapv(|r| r * r);
126 let hat_rat = self.leverage()?.mapv(|h| h / (F::one() - h));
127 let result = dev_terms + &hat_rat * (&hat_rat + F::two()) * pear_sq;
128 Ok(result)
129 }
130
131 /// The dispersion parameter (typically denoted `phi`) which relates the variance of the `y`
132 /// values with the variance of the response distribution: `Var[y] = phi * V[mu]`.
133 /// Identically one for logistic, binomial, and Poisson regression.
134 /// For others (linear, gamma) the dispersion parameter is estimated from the data.
135 /// This is equal to the total deviance divided by the degrees of freedom. For OLS linear
136 /// regression this is equal to the sum of `(y_i - mu_i)^2 / (n-p)`, an estimate of `sigma^2`;
137 /// with no covariates it is equal to the sample variance.
138 pub fn dispersion(&self) -> F {
139 use DispersionType::*;
140 match M::DISPERSED {
141 FreeDispersion => {
142 let dev = self.deviance();
143 let p = self.rank();
144 let n_eff = self.data.n_eff();
145 let scaling = if p >= n_eff {
146 // This is the overparameterized regime, which is checked directly instead of
147 // allowing negative values. It's not clear what conditions result in this when
148 // p < N.
149 F::zero()
150 } else {
151 (F::one() - p / n_eff) * self.data.sum_weights()
152 };
153 dev / scaling
154 }
155 NoDispersion => F::one(),
156 }
157 }
158
159 /// Return the dispersion terms with the observation(s) at each point excluded from the fit.
160 fn dispersion_loo(&self) -> RegressionResult<Array1<F>> {
161 use DispersionType::*;
162 match M::DISPERSED {
163 FreeDispersion => {
164 let pear_sq = self.resid_pear().mapv(|r| r * r);
165 let hat_rat = self.leverage()?.mapv(|h| h / (F::one() - h));
166 let terms = self.deviance_terms() + hat_rat * pear_sq;
167 // Don't apply total weights since the variance weights are already
168 // included in the residual terms. However, we do need the frequency weights.
169 let terms = self.data.apply_freq_weights(terms);
170 let total: Array1<F> = -terms + self.deviance();
171 let scaled_total: Array1<F> = match &self.data.weights {
172 None => match &self.data.freqs {
173 Some(f) => total / -(f - self.ndf()),
174 None => total / (self.ndf() - F::one()),
175 },
176 Some(w) => {
177 let v1 = self.data.freq_sum(w);
178 let w2 = w * w;
179 let v2 = self.data.freq_sum(&w2);
180 // The subtracted out terms need the frequency terms as well
181 let f_w = self.data.apply_freq_weights(w.clone());
182 let f_w2 = self.data.apply_freq_weights(w2);
183 // the modifed sums from leaving out the ith observation
184 let v1p = -f_w + v1;
185 let v2p = -f_w2 + v2;
186 let p = self.rank();
187 let scale = &v1p - v2p / &v1p * p;
188 total / scale
189 }
190 };
191 Ok(scaled_total)
192 }
193 NoDispersion => Ok(Array1::<F>::ones(self.data.y.len())),
194 }
195 }
196
197 /// Returns the errors in the response variables for the data passed as an
198 /// argument given the current model fit.
199 fn errors(&self, data: &Dataset<F>) -> Array1<F> {
200 &data.y - &self.predict(&data.x, data.linear_offset.as_ref())
201 }
202
203 #[deprecated(since = "0.0.10", note = "use predict() instead")]
204 pub fn expectation<S>(
205 &self,
206 data_x: &ArrayBase<S, Ix2>,
207 lin_off: Option<&Array1<F>>,
208 ) -> Array1<F>
209 where
210 S: Data<Elem = F>,
211 {
212 self.predict(data_x, lin_off)
213 }
214
215 /// Returns the fisher information (the negative hessian of the likelihood)
216 /// at the parameter values given. The regularization is included.
217 pub fn fisher(&self, params: &Array1<F>) -> Array2<F> {
218 let lin_pred: Array1<F> = self.data.linear_predictor(params);
219 let adj_var: Array1<F> = M::adjusted_variance_diag(&lin_pred);
220 // calculate the fisher matrix
221 let fisher: Array2<F> = (self.data.x_conj() * &adj_var).dot(&self.data.x);
222 // Regularize the fisher matrix
223 self.reg.as_ref().irls_mat(fisher, params)
224 }
225
226 /// The inverse of the (regularized) fisher information matrix. This is used in some other
227 /// calculations (like the covariance and hat matrices) so it is cached.
228 fn fisher_inv(&self) -> RegressionResult<Ref<'_, Array2<F>>> {
229 if self.cov_unscaled.borrow().is_none() {
230 let fisher_reg = self.fisher(&self.result);
231 // NOTE: invh/invh_into() are bugged and incorrect!
232 let unscaled_cov: Array2<F> = fisher_reg.inv_into()?;
233 *self.cov_unscaled.borrow_mut() = Some(unscaled_cov);
234 }
235 Ok(Ref::map(self.cov_unscaled.borrow(), |x| {
236 x.as_ref().unwrap()
237 }))
238 }
239
240 /// Returns the hat matrix of fit, also known as the "projection" or "influence" matrix.
241 /// The convention used corresponds to H = dE[y]/dy and is orthogonal to the response
242 /// residuals. This version is not symmetric.
243 pub fn hat(&self) -> RegressionResult<Ref<'_, Array2<F>>> {
244 if self.hat.borrow().is_none() {
245 let lin_pred = self.data.linear_predictor(&self.result);
246 // Apply the eta' terms manually instead of calling adjusted_variance_diag, because the
247 // adjusted variance method applies 2 powers to the variance, while we want one power
248 // to the variance and one to the weights.
249 // let adj_var = M::adjusted_variance_diag(&lin_pred);
250
251 let mu = M::mean(&lin_pred);
252 let var = mu.mapv_into(M::variance);
253 let eta_d = M::Link::d_nat_param(&lin_pred);
254
255 let fisher_inv = self.fisher_inv()?;
256
257 // the GLM variance and the data weights are put on different sides in this convention
258 let left = (var * &eta_d).insert_axis(Axis(1)) * &self.data.x;
259 let right = self.data.x_conj() * &eta_d;
260 let result = left.dot(&fisher_inv.dot(&right));
261
262 *self.hat.borrow_mut() = Some(result);
263 }
264 let borrowed: Ref<Option<Array2<F>>> = self.hat.borrow();
265 Ok(Ref::map(borrowed, |x| x.as_ref().unwrap()))
266 }
267
268 /// A matrix where each row corresponds to the contribution to the coefficients incurred by
269 /// including the observation in that row. This is inexact for nonlinear models, as a one-step
270 /// approximation is used.
271 /// To approximate the coeficients that would result from excluding the ith observation, the
272 /// ith row of this matrix should be subtracted from the fit result.
273 pub fn infl_coef(&self) -> RegressionResult<Array2<F>> {
274 let lin_pred = self.data.linear_predictor(&self.result);
275 let resid_resp = self.resid_resp();
276 let omh = -self.leverage()? + F::one();
277 let resid_adj = M::Link::adjust_errors(resid_resp, &lin_pred) / omh;
278 let xte = self.data.x_conj() * resid_adj;
279 let fisher_inv = self.fisher_inv()?;
280 let delta_b = xte.t().dot(&*fisher_inv);
281 Ok(delta_b)
282 }
283
284 /// Returns the leverage for each observation. This is given by the diagonal of the projection
285 /// matrix and indicates the sensitivity of each prediction to its corresponding observation.
286 pub fn leverage(&self) -> RegressionResult<Array1<F>> {
287 let hat = self.hat()?;
288 Ok(hat.diag().to_owned())
289 }
290
291 /// Returns exact coefficients from leaving each observation out, one-at-a-time.
292 /// This is a much more expensive operation than the original regression because a new one is
293 /// performed for each observation.
294 pub fn loo_exact(&self) -> RegressionResult<Array2<F>> {
295 let loo_coef: Array2<F> = self.infl_coef()?;
296 // NOTE: could also use the result itself as the initial guess
297 let loo_initial = &self.result - loo_coef;
298 let mut loo_result = loo_initial.clone();
299 let n_obs = self.data.y.len();
300 for i in 0..n_obs {
301 let data_i: Dataset<F> = {
302 let mut data_i: Dataset<F> = self.data.clone();
303 let mut freqs: Array1<F> = data_i.freqs.unwrap_or(Array1::<F>::ones(n_obs));
304 freqs[i] = F::zero();
305 data_i.freqs = Some(freqs);
306 data_i
307 };
308 let model_i = Model {
309 model: PhantomData::<M>,
310 data: data_i,
311 use_intercept: self.use_intercept,
312 };
313 let options = {
314 let mut options = self.options.clone();
315 options.init_guess = Some(self.result.clone());
316 options
317 };
318 let fit_i = model_i.with_options(options).fit()?;
319 loo_result.row_mut(i).assign(&fit_i.result);
320 }
321 Ok(loo_result)
322 }
323
324 /// Perform a likelihood-ratio test, returning the statistic -2*ln(L_0/L)
325 /// where L_0 is the likelihood of the best-fit null model (with no
326 /// parameters but the intercept) and L is the likelihood of the fit result.
327 /// The number of degrees of freedom of this statistic, equal to the number
328 /// of parameters fixed to zero to form the null model, is `test_ndf()`. By
329 /// Wilks' theorem this statistic is asymptotically chi-squared distributed
330 /// with this number of degrees of freedom.
331 // TODO: Should the effective number of degrees of freedom due to
332 // regularization be taken into account?
333 pub fn lr_test(&self) -> F {
334 // The model likelihood should include regularization terms and there
335 // shouldn't be any in the null model with all non-intercept parameters
336 // set to zero.
337 let null_like = self.null_like();
338 F::from(-2.).unwrap() * (null_like - self.model_like)
339 }
340
341 /// Perform a likelihood-ratio test against a general alternative model, not
342 /// necessarily a null model. The alternative model is regularized the same
343 /// way that the regression resulting in this fit was. The degrees of
344 /// freedom cannot be generally inferred.
345 pub fn lr_test_against(&self, alternative: &Array1<F>) -> F {
346 let alt_like = M::log_like(self.data, alternative);
347 let alt_like_reg = alt_like + self.reg.likelihood(alternative);
348 F::two() * (self.model_like - alt_like_reg)
349 }
350
351 /// Returns the residual degrees of freedom in the model, i.e. the number
352 /// of data points minus the number of parameters. Not to be confused with
353 /// `test_ndf()`, the degrees of freedom in the statistical tests of the
354 /// fit parameters.
355 pub fn ndf(&self) -> F {
356 self.data.n_obs() - self.rank()
357 }
358
359 pub(crate) fn new(
360 data: &'a Dataset<F>,
361 use_intercept: bool,
362 irls: Irls<M, F>,
363 history: Vec<IrlsStep<F>>,
364 ) -> Self {
365 let Irls {
366 guess: result,
367 options,
368 reg,
369 n_iter,
370 last_like_data: data_like,
371 ..
372 } = irls;
373 assert_eq!(
374 data_like,
375 M::log_like(data, &result),
376 "Unregularized likelihoods should match exactly."
377 );
378 assert_eq!(
379 n_iter,
380 history.len(),
381 "Number of iterations should match history length"
382 );
383 assert_eq!(
384 result,
385 history.last().unwrap().guess,
386 "Last guess should be the same"
387 );
388 // Cache some of these variables that will be used often.
389 let n_par = result.len();
390 let model_like = data_like + reg.likelihood(&result);
391 assert_eq!(
392 model_like,
393 history.last().unwrap().like,
394 "Last data likelihood should be the same"
395 );
396 Self {
397 model: PhantomData,
398 data,
399 use_intercept,
400 result,
401 options,
402 model_like,
403 reg,
404 n_iter,
405 history,
406 n_par,
407 cov_unscaled: RefCell::new(None),
408 hat: RefCell::new(None),
409 null_model: RefCell::new(None),
410 }
411 }
412
413 /// Returns the likelihood given the null model, which fixes all parameters
414 /// to zero except the intercept (if it is used). A total of `test_ndf()`
415 /// parameters are constrained.
416 pub fn null_like(&self) -> F {
417 let (null_like, _) = self.null_model_fit();
418 null_like
419 }
420
421 /// Return the likelihood and intercept for the null model. Since this can
422 /// require an additional regression, the values are cached.
423 fn null_model_fit(&self) -> (F, Array1<F>) {
424 // TODO: make a result instead of allowing a potential panic in the borrow.
425 if self.null_model.borrow().is_none() {
426 let (null_like, null_intercept): (F, Array1<F>) = match &self.data.linear_offset {
427 None => {
428 // If there is no linear offset, the natural parameter is
429 // identical for all observations so it is sufficient to
430 // calculate the null likelihood for a single point with y equal
431 // to the average.
432 // The average y
433 let y_bar: F = self.data.weighted_sum(&self.data.y) / self.data.sum_weights();
434 // This approach assumes that the likelihood is in the natural
435 // exponential form as calculated by Glm::log_like_natural(). If that
436 // function is overridden and the values differ significantly, this
437 // approach will give incorrect results. If the likelihood has terms
438 // non-linear in y, then the likelihood must be calculated for every
439 // point rather than averaged.
440 // If the intercept is allowed to maximize the likelihood, the natural
441 // parameter is equal to the link of the expectation. Otherwise it is
442 // the transformation function of zero.
443 let intercept: F = if self.use_intercept {
444 M::Link::func(y_bar)
445 } else {
446 F::zero()
447 };
448 // this is a length-one array. This works because the
449 // likelihood contribution is the same for all observations.
450 let nat_par = M::Link::nat_param(array![intercept]);
451 // The null likelihood per observation
452 let null_like_one: F = M::log_like_natural(y_bar, nat_par[0]);
453 // just multiply the average likelihood by the number of data points, since every term is the same.
454 let null_like_total = self.data.sum_weights() * null_like_one;
455 let null_params: Array1<F> = {
456 let mut par = Array1::<F>::zeros(self.n_par);
457 par[0] = intercept;
458 par
459 };
460 (null_like_total, null_params)
461 }
462 Some(off) => {
463 if self.use_intercept {
464 // If there are linear offsets and the intercept is allowed
465 // to be free, there is not a major simplification and the
466 // model needs to be re-fit.
467 // the X data is a single column of ones. Since this model
468 // isn't being created by the ModelBuilder, the X data
469 // has to be automatically padded with ones.
470 let data_x_null = Array2::<F>::ones((self.data.y.len(), 1));
471 let null_model = Model {
472 model: std::marker::PhantomData::<M>,
473 data: Dataset::<F> {
474 y: self.data.y.clone(),
475 x: data_x_null,
476 linear_offset: Some(off.clone()),
477 weights: self.data.weights.clone(),
478 freqs: self.data.freqs.clone(),
479 },
480 // If we are in this branch it is because an intercept is needed.
481 use_intercept: true,
482 };
483 // TODO: Make this function return an error, although it's
484 // difficult to imagine this case happening.
485 // TODO: Should the tolerance of this fit be stricter?
486 // The intercept should not be regularized
487 let null_fit = null_model
488 .fit_options()
489 // There shouldn't be too much trouble fitting this
490 // single-parameter fit, but there shouldn't be harm in
491 // using the same maximum as in the original model.
492 .max_iter(self.options.max_iter)
493 .fit()
494 .expect("Could not fit null model!");
495 let null_params: Array1<F> = {
496 let mut par = Array1::<F>::zeros(self.n_par);
497 // there is only one parameter in this fit.
498 par[0] = null_fit.result[0];
499 par
500 };
501 (null_fit.model_like, null_params)
502 } else {
503 // If the intercept is fixed to zero, then no minimization is
504 // required. The natural parameters are directly known in terms
505 // of the linear offset. The likelihood must still be summed
506 // over all observations, since they have different offsets.
507 let nat_par = M::Link::nat_param(off.clone());
508 let null_like_terms = ndarray::Zip::from(&self.data.y)
509 .and(&nat_par)
510 .map_collect(|&y, &eta| M::log_like_natural(y, eta));
511 let null_like = self.data.weighted_sum(&null_like_terms);
512 let null_params = Array1::<F>::zeros(self.n_par);
513 (null_like, null_params)
514 }
515 }
516 };
517 *self.null_model.borrow_mut() = Some((null_like, null_intercept));
518 }
519 self.null_model
520 .borrow()
521 .as_ref()
522 .expect("the null model should be cached now")
523 .clone()
524 }
525
526 /// Returns the expected value of Y given the input data X. This data need
527 /// not be the training data, so an option for linear offsets is provided.
528 /// Panics if the number of covariates in the data matrix is not consistent
529 /// with the training set. The data matrix may need to be padded by ones if
530 /// it is not part of a Model. The `utility::one_pad()` function facilitates
531 /// this.
532 pub fn predict<S>(&self, data_x: &ArrayBase<S, Ix2>, lin_off: Option<&Array1<F>>) -> Array1<F>
533 where
534 S: Data<Elem = F>,
535 {
536 let lin_pred: Array1<F> = data_x.dot(&self.result);
537 let lin_pred: Array1<F> = if let Some(off) = &lin_off {
538 lin_pred + *off
539 } else {
540 lin_pred
541 };
542 lin_pred.mapv_into(M::Link::func_inv)
543 }
544
545 /// Returns the rank of the model (i.e. the number of parameters)
546 fn rank(&self) -> F {
547 F::from(self.n_par).unwrap()
548 }
549
550 /// Return the deviance residuals for each point in the training data.
551 /// Equal to `sign(y-E[y|x])*sqrt(-2*(L[y|x] - L_sat[y]))`.
552 /// This is usually a better choice for non-linear models.
553 /// NaNs might be possible if L[y|x] > L_sat[y] due to floating-point operations. These are
554 /// not checked or clipped right now.
555 pub fn resid_dev(&self) -> Array1<F> {
556 let signs = self.resid_resp().mapv_into(F::signum);
557 let ll_diff = self.deviance_terms();
558 let dev: Array1<F> = ll_diff.mapv_into(num_traits::Float::sqrt);
559 signs * dev
560 }
561
562 /// Return the standardized deviance residuals, also known as the "internally studentized
563 /// deviance residuals". This is generally applicable for outlier detection, although the
564 /// influence of each point on the fit is only approximately accounted for.
565 /// `d / sqrt(phi * (1 - h))` where `d` is the deviance residual, phi is the dispersion (e.g.
566 /// sigma^2 for linear regression, 1 for logistic regression), and h is the leverage.
567 pub fn resid_dev_std(&self) -> RegressionResult<Array1<F>> {
568 let dev = self.resid_dev();
569 let phi = self.dispersion();
570 let hat: Array1<F> = self.leverage()?;
571 let omh: Array1<F> = -hat + F::one();
572 let denom: Array1<F> = (omh * phi).mapv_into(num_traits::Float::sqrt);
573 Ok(dev / denom)
574 }
575
576 /// Return the partial residuals.
577 pub fn resid_part(&self) -> Array1<F> {
578 let x_mean = self.data.x.mean_axis(Axis(0)).expect("empty dataset");
579 let x_centered = &self.data.x - x_mean.insert_axis(Axis(0));
580 self.resid_work() + x_centered.dot(&self.result)
581 }
582
583 /// Return the Pearson residuals for each point in the training data.
584 /// This is equal to `(y - E[y])/sqrt(V(E[y]))`, where V is the variance function.
585 /// These are not scaled by the sample standard deviation for families with a free dispersion
586 /// parameter like linear regression.
587 pub fn resid_pear(&self) -> Array1<F> {
588 let mu: Array1<F> = self.predict(&self.data.x, self.data.linear_offset.as_ref());
589 let residuals = &self.data.y - μ
590 let inv_var_diag: Array1<F> = mu.mapv_into(M::variance).mapv_into(F::recip);
591 // the variance weights are the reciprocal of the corresponding variance
592 let scales = self
593 .data
594 .apply_var_weights(inv_var_diag)
595 .mapv_into(num_traits::Float::sqrt);
596 scales * residuals
597 }
598
599 /// Return the standardized Pearson residuals for every observation.
600 /// Also known as the "internally studentized Pearson residuals".
601 /// (y - E[y]) / (sqrt(Var[y] * (1 - h))) where h is a vector representing the leverage for
602 /// each observation. These are meant to have a variance of 1.
603 pub fn resid_pear_std(&self) -> RegressionResult<Array1<F>> {
604 let pearson = self.resid_pear();
605 let phi = self.dispersion();
606 let hat = self.leverage()?;
607 let omh = -hat + F::one();
608 let denom: Array1<F> = (omh * phi).mapv_into(num_traits::Float::sqrt);
609 Ok(pearson / denom)
610 }
611
612 /// Return the response residuals, or fitting deviation, for each data point in the fit; that
613 /// is, the difference y - E[y|x] where the expectation value is the y value predicted by the
614 /// model given x.
615 pub fn resid_resp(&self) -> Array1<F> {
616 self.errors(self.data)
617 }
618
619 /// Return the studentized residuals, which are the deviance residuals at each point computed
620 /// as if the fit is leaving out the corresponding observation. For families with a free
621 /// dispersion parameter, the deviance is normalized by the one-step approximation to the
622 /// dispersion.
623 /// This is a robust and general method for outlier detection, although a one-step
624 /// approximation is used to avoid re-fitting the model completely for each observation.
625 /// If the linear errors are standard normally distributed then this statistic should follow a
626 /// t-distribution with `self.ndf() - 1` degrees of freedom.
627 pub fn resid_student(&self) -> RegressionResult<Array1<F>> {
628 let signs = self.resid_resp().mapv(F::signum);
629 let dev_terms_loo: Array1<F> = self.deviance_terms_loo()?;
630 // NOTE: This match could also be handled internally in dispersion_loo()
631 let dev_terms_scaled = match M::DISPERSED {
632 // The dispersion is corrected for the contribution from each current point.
633 // This is an approximation; the exact solution would perform a fit at each point.
634 DispersionType::FreeDispersion => dev_terms_loo / self.dispersion_loo()?,
635 DispersionType::NoDispersion => dev_terms_loo,
636 };
637 Ok(signs * dev_terms_scaled.mapv_into(num_traits::Float::sqrt))
638 }
639
640 /// Returns the working residuals `dg(\mu)/d\mu * (y - E{y|x})`.
641 /// This should be equal to the response residuals divided by the variance function (as
642 /// opposed to the square root of the variance as in the Pearson residuals).
643 pub fn resid_work(&self) -> Array1<F> {
644 let lin_pred: Array1<F> = self.data.linear_predictor(&self.result);
645 let mu: Array1<F> = lin_pred.mapv(M::Link::func_inv);
646 let resid_response: Array1<F> = &self.data.y - μ
647 let var: Array1<F> = mu.mapv(M::variance);
648 // adjust for non-canonical link functions; we want a total factor of 1/eta'
649 let (adj_response, adj_var) =
650 M::Link::adjust_errors_variance(resid_response, var, &lin_pred);
651 adj_response / adj_var
652 }
653
654 /// Returns the score function (the gradient of the likelihood) at the
655 /// parameter values given. It should be zero within FPE at the minimized
656 /// result.
657 pub fn score(&self, params: &Array1<F>) -> Array1<F> {
658 // This represents the predictions given the input parameters, not the
659 // fit parameters.
660 let lin_pred: Array1<F> = self.data.linear_predictor(params);
661 let mu: Array1<F> = M::mean(&lin_pred);
662 let resid_response = &self.data.y - mu;
663 let resid_working = M::Link::adjust_errors(resid_response, &lin_pred);
664 let score_unreg = self.data.x_conj().dot(&resid_working);
665 self.reg.as_ref().gradient(score_unreg, params)
666 }
667
668 /// Returns the score test statistic. This statistic is asymptotically
669 /// chi-squared distributed with `test_ndf()` degrees of freedom.
670 pub fn score_test(&self) -> RegressionResult<F> {
671 let (_, null_params) = self.null_model_fit();
672 self.score_test_against(null_params)
673 }
674
675 /// Returns the score test statistic compared to another set of model
676 /// parameters, not necessarily a null model. The degrees of freedom cannot
677 /// be generally inferred.
678 pub fn score_test_against(&self, alternative: Array1<F>) -> RegressionResult<F> {
679 let score_alt = self.score(&alternative);
680 let fisher_alt = self.fisher(&alternative);
681 // The is not the same as the cached covariance matrix since it is
682 // evaluated at the null parameters.
683 // NOTE: invh/invh_into() are bugged and incorrect!
684 let inv_fisher_alt = fisher_alt.inv_into()?;
685 Ok(score_alt.t().dot(&inv_fisher_alt.dot(&score_alt)))
686 }
687
688 /// The degrees of freedom for the likelihood ratio test, the score test,
689 /// and the Wald test. Not to be confused with `ndf()`, the degrees of
690 /// freedom in the model fit.
691 pub fn test_ndf(&self) -> usize {
692 if self.use_intercept {
693 self.n_par - 1
694 } else {
695 self.n_par
696 }
697 }
698
699 /// Returns the Wald test statistic compared to a null model with only an
700 /// intercept (if one is used). This statistic is asymptotically chi-squared
701 /// distributed with `test_ndf()` degrees of freedom.
702 pub fn wald_test(&self) -> F {
703 // The null parameters are all zero except for a possible intercept term
704 // which optimizes the null model.
705 let (_, null_params) = self.null_model_fit();
706 self.wald_test_against(&null_params)
707 }
708
709 /// Returns the Wald test statistic compared to another specified model fit
710 /// instead of the null model. The degrees of freedom cannot be generally
711 /// inferred.
712 pub fn wald_test_against(&self, alternative: &Array1<F>) -> F {
713 let d_params: Array1<F> = &self.result - alternative;
714 let fisher_alt: Array2<F> = self.fisher(alternative);
715 d_params.t().dot(&fisher_alt.dot(&d_params))
716 }
717
718 /// Returns the signed square root of the Wald test statistic for each
719 /// parameter. Since it does not account for covariance between the
720 /// parameters it may not be accurate.
721 pub fn wald_z(&self) -> RegressionResult<Array1<F>> {
722 let par_cov = self.covariance()?;
723 let par_variances: ArrayView1<F> = par_cov.diag();
724 Ok(&self.result / &par_variances.mapv(num_traits::Float::sqrt))
725 }
726}
727
728/// Specialized functions for OLS.
729impl<'a, F> Fit<'a, Linear, F>
730where
731 F: 'static + Float,
732{
733 /// Returns the coefficient of multiple correlation, R^2.
734 pub fn r_sq(&self) -> F {
735 let y_avg: F = self.data.y.mean().expect("Data should be non-empty");
736 let total_sum_sq: F = self.data.y.mapv(|y| y - y_avg).mapv(|dy| dy * dy).sum();
737 (total_sum_sq - self.resid_sum_sq()) / total_sum_sq
738 }
739
740 /// Returns the residual sum of squares, i.e. the sum of the squared residuals.
741 pub fn resid_sum_sq(&self) -> F {
742 self.resid_resp().mapv_into(|r| r * r).sum()
743 }
744}
745
746#[cfg(test)]
747mod tests {
748 use super::*;
749 use crate::{
750 model::ModelBuilder,
751 utility::{one_pad, standardize},
752 Linear, Logistic,
753 };
754 use anyhow::Result;
755 use approx::assert_abs_diff_eq;
756 use ndarray::Axis;
757 use ndarray::{concatenate, s};
758
759 /// Checks if the test statistics are invariant based upon whether the data is standardized.
760 #[test]
761 fn standardization_invariance() -> Result<()> {
762 let data_y = array![true, false, false, true, true, true, true, false, true];
763 let data_x = array![-0.5, 0.3, -0.6, 0.2, 0.3, 1.2, 0.8, 0.6, -0.2].insert_axis(Axis(1));
764 let lin_off = array![0.1, 0.0, -0.1, 0.2, 0.1, 0.3, 0.4, -0.1, 0.1];
765 let data_x_std = standardize(data_x.clone());
766 let model = ModelBuilder::<Logistic>::data(&data_y, &data_x)
767 .linear_offset(lin_off.clone())
768 .build()?;
769 let fit = model.fit()?;
770 let model_std = ModelBuilder::<Logistic>::data(&data_y, &data_x_std)
771 .linear_offset(lin_off)
772 .build()?;
773 let fit_std = model_std.fit()?;
774 let lr = fit.lr_test();
775 let lr_std = fit_std.lr_test();
776 assert_abs_diff_eq!(lr, lr_std);
777 eprintln!("about to try score test");
778 assert_abs_diff_eq!(
779 fit.score_test()?,
780 fit_std.score_test()?,
781 epsilon = f32::EPSILON as f64
782 );
783 eprintln!("about to try wald test");
784 assert_abs_diff_eq!(
785 fit.wald_test(),
786 fit_std.wald_test(),
787 epsilon = 4.0 * f64::EPSILON
788 );
789 assert_abs_diff_eq!(fit.aic(), fit_std.aic());
790 assert_abs_diff_eq!(fit.bic(), fit_std.bic());
791 eprintln!("about to try deviance");
792 assert_abs_diff_eq!(fit.deviance(), fit_std.deviance());
793 // The Wald Z-score of the intercept term is not invariant under a
794 // linear transformation of the data, but the parameter part seems to
795 // be, at least for single-component data.
796 assert_abs_diff_eq!(
797 fit.wald_z()?[1],
798 fit_std.wald_z()?[1],
799 epsilon = 0.01 * f32::EPSILON as f64
800 );
801
802 Ok(())
803 }
804
805 #[test]
806 fn null_model() -> Result<()> {
807 let data_y = array![true, false, false, true, true];
808 let data_x: Array2<f64> = array![[], [], [], [], []];
809 let model = ModelBuilder::<Logistic>::data(&data_y, &data_x).build()?;
810 let fit = model.fit()?;
811 dbg!(fit.n_iter);
812 dbg!(&fit.result);
813 // with no offsets, the result should be the link function of the mean.
814 assert_abs_diff_eq!(
815 fit.result[0],
816 <Logistic as Glm>::Link::func(0.6),
817 epsilon = 4.0 * f64::EPSILON
818 );
819 let empty_null_like = fit.null_like();
820 assert_eq!(fit.test_ndf(), 0);
821 dbg!(&fit.model_like);
822 let lr = fit.lr_test();
823 // Since there is no data, the null likelihood should be identical to
824 // the fit likelihood, so the likelihood ratio test should yield zero.
825 assert_abs_diff_eq!(lr, 0., epsilon = 4. * f64::EPSILON);
826
827 // Check that the assertions still hold if linear offsets are included.
828 let lin_off: Array1<f64> = array![0.2, -0.1, 0.1, 0.0, 0.1];
829 let model = ModelBuilder::<Logistic>::data(&data_y, &data_x)
830 .linear_offset(lin_off)
831 .build()?;
832 let fit_off = model.fit()?;
833 let empty_model_like_off = fit_off.model_like;
834 let empty_null_like_off = fit_off.null_like();
835 // these two assertions should be equivalent
836 assert_abs_diff_eq!(fit_off.lr_test(), 0.);
837 assert_abs_diff_eq!(empty_model_like_off, empty_null_like_off);
838
839 // check consistency with data provided
840 let data_x_with = array![[0.5], [-0.2], [0.3], [0.4], [-0.1]];
841 let model = ModelBuilder::<Logistic>::data(&data_y, &data_x_with).build()?;
842 let fit_with = model.fit()?;
843 dbg!(&fit_with.result);
844 // The null likelihood of the model with parameters should be the same
845 // as the likelihood of the model with only the intercept.
846 assert_abs_diff_eq!(empty_null_like, fit_with.null_like());
847
848 Ok(())
849 }
850
851 #[test]
852 fn null_like_logistic() -> Result<()> {
853 // 6 true and 4 false for y_bar = 0.6.
854 let data_y = array![true, true, true, true, true, true, false, false, false, false];
855 let ybar: f64 = 0.6;
856 let data_x = array![0.4, 0.2, 0.5, 0.1, 0.6, 0.7, 0.3, 0.8, -0.1, 0.1].insert_axis(Axis(1));
857 let model = ModelBuilder::<Logistic>::data(&data_y, &data_x).build()?;
858 let fit = model.fit()?;
859 let target_null_like = fit
860 .data
861 .y
862 .mapv(|y| {
863 let eta = (ybar / (1. - ybar)).ln();
864 y * eta - eta.exp().ln_1p()
865 })
866 .sum();
867 assert_abs_diff_eq!(fit.null_like(), target_null_like);
868 Ok(())
869 }
870
871 // Check that the deviance is equal to the sum of square deviations for a linear model
872 #[test]
873 fn deviance_linear() -> Result<()> {
874 let data_y = array![0.3, -0.2, 0.5, 0.7, 0.2, 1.4, 1.1, 0.2];
875 let data_x = array![0.6, 2.1, 0.4, -3.2, 0.7, 0.1, -0.3, 0.5].insert_axis(Axis(1));
876 let model = ModelBuilder::<Linear>::data(&data_y, &data_x).build()?;
877 let fit = model.fit()?;
878 // The predicted values of Y given the model.
879 let pred_y = fit.predict(&one_pad(data_x.view()), None);
880 let target_dev = (data_y - pred_y).mapv(|dy| dy * dy).sum();
881 assert_abs_diff_eq!(fit.deviance(), target_dev, epsilon = 4. * f64::EPSILON);
882 Ok(())
883 }
884
885 // Check that the deviance and dispersion parameter are equal up to the number of degrees of
886 // freedom for a linea model.
887 #[test]
888 fn deviance_dispersion_eq_linear() -> Result<()> {
889 let data_y = array![0.2, -0.1, 0.4, 1.3, 0.2, -0.6, 0.9];
890 let data_x = array![
891 [0.4, 0.2],
892 [0.1, 0.4],
893 [-0.1, 0.3],
894 [0.5, 0.7],
895 [0.4, 0.1],
896 [-0.2, -0.3],
897 [0.4, -0.1]
898 ];
899 let model = ModelBuilder::<Linear>::data(&data_y, &data_x).build()?;
900 let fit = model.fit()?;
901 let dev = fit.deviance();
902 let disp = fit.dispersion();
903 let ndf = fit.ndf();
904 assert_abs_diff_eq!(dev, disp * ndf, epsilon = 4. * f64::EPSILON);
905 Ok(())
906 }
907
908 // Check that the residuals for a linear model are all consistent.
909 #[test]
910 fn residuals_linear() -> Result<()> {
911 let data_y = array![0.1, -0.3, 0.7, 0.2, 1.2, -0.4];
912 let data_x = array![0.4, 0.1, 0.3, -0.1, 0.5, 0.6].insert_axis(Axis(1));
913 let weights = array![0.8, 1.2, 0.9, 0.8, 1.1, 0.9];
914 // the implied variances from the weights
915 let wgt_sigmas = weights.map(|w: &f64| 1. / w.sqrt());
916 let model = ModelBuilder::<Linear>::data(&data_y, &data_x)
917 .var_weights(weights.clone())
918 .build()?;
919 let fit = model.fit()?;
920 let response = fit.resid_resp();
921 let resp_scaled = &response / wgt_sigmas;
922 let pearson = fit.resid_pear();
923 let deviance = fit.resid_dev();
924 assert_abs_diff_eq!(resp_scaled, pearson);
925 assert_abs_diff_eq!(resp_scaled, deviance);
926 let pearson_std = fit.resid_pear_std()?;
927 let deviance_std = fit.resid_dev_std()?;
928 assert_abs_diff_eq!(pearson_std, deviance_std, epsilon = 8. * f64::EPSILON);
929 // The externally-studentized residuals aren't expected to match the internally-studentized
930 // ones.
931 let dev_terms_loo = fit.deviance_terms_loo()?;
932 let disp_terms_loo = fit.dispersion_loo()?;
933 let student = fit.resid_student()?;
934
935 // NOTE: Studentization can't be checked directly in general because the method used is a
936 // one-step approximation, however it should be exact in the linear OLS case.
937 let n_data = data_y.len();
938 // Check that the leave-one-out stats hold literally
939 let mut loo_diff: Vec<f64> = Vec::new();
940 let mut loo_dev_res: Vec<f64> = Vec::new();
941 let mut loo_disp: Vec<f64> = Vec::new();
942 for i in 0..n_data {
943 let ya = data_y.slice(s![0..i]);
944 let yb = data_y.slice(s![i + 1..]);
945 let xa = data_x.slice(s![0..i, ..]);
946 let xb = data_x.slice(s![i + 1.., ..]);
947 let wa = weights.slice(s![0..i]);
948 let wb = weights.slice(s![i + 1..]);
949 let y_loo = concatenate![Axis(0), ya, yb];
950 let x_loo = concatenate![Axis(0), xa, xb];
951 let w_loo = concatenate![Axis(0), wa, wb];
952 let model_i = ModelBuilder::<Linear>::data(&y_loo, &x_loo)
953 .var_weights(w_loo)
954 .build()?;
955 let fit_i = model_i.fit()?;
956 let yi = data_y[i];
957 let xi = data_x.slice(s![i..i + 1, ..]);
958 let xi = crate::utility::one_pad(xi);
959 let wi = weights[i];
960 let yi_pred: f64 = fit_i.predict(&xi, None)[0];
961 let disp_i = fit_i.dispersion();
962 let var_i = disp_i / wi;
963 let diff_i = yi - yi_pred;
964 let res_dev_i = diff_i / var_i.sqrt();
965 loo_diff.push(wi * diff_i * diff_i);
966 loo_disp.push(disp_i);
967 loo_dev_res.push(res_dev_i);
968 }
969 let loo_diff: Array1<f64> = loo_diff.into();
970 let loo_disp: Array1<f64> = loo_disp.into();
971 let loo_dev_res: Array1<f64> = loo_dev_res.into();
972 assert_abs_diff_eq!(dev_terms_loo, loo_diff, epsilon = 8. * f64::EPSILON);
973 assert_abs_diff_eq!(disp_terms_loo, loo_disp, epsilon = 8. * f64::EPSILON);
974 assert_abs_diff_eq!(student, loo_dev_res, epsilon = 8. * f64::EPSILON);
975 Ok(())
976 }
977
978 // check the null likelihood for the case where it can be counted exactly.
979 #[test]
980 fn null_like_linear() -> Result<()> {
981 let data_y = array![0.3, -0.2, 0.5, 0.7, 0.2, 1.4, 1.1, 0.2];
982 let data_x = array![0.6, 2.1, 0.4, -3.2, 0.7, 0.1, -0.3, 0.5].insert_axis(Axis(1));
983 let ybar: f64 = data_y.mean().unwrap();
984 let model = ModelBuilder::<Linear>::data(&data_y, &data_x).build()?;
985 let fit = model.fit()?;
986 // let target_null_like = data_y.mapv(|y| -0.5 * (y - ybar) * (y - ybar)).sum();
987 let target_null_like = data_y.mapv(|y| y * ybar - 0.5 * ybar * ybar).sum();
988 // With the saturated likelihood subtracted the null likelihood should
989 // just be the sum of squared differences from the mean.
990 // let target_null_like = 0.;
991 // dbg!(target_null_like);
992 let fit_null_like = fit.null_like();
993 assert_abs_diff_eq!(2. * (fit.model_like - fit_null_like), fit.lr_test());
994 assert_eq!(fit.test_ndf(), 1);
995 assert_abs_diff_eq!(
996 fit_null_like,
997 target_null_like,
998 epsilon = 4.0 * f64::EPSILON
999 );
1000 Ok(())
1001 }
1002
1003 // check the leave-one-out one-step for the linear model
1004 #[test]
1005 fn loo_linear() -> Result<()> {
1006 let data_y = array![0.1, -0.3, 0.7, 0.2, 1.2, -0.4];
1007 let data_x = array![0.4, 0.1, 0.3, -0.1, 0.5, 0.6].insert_axis(Axis(1));
1008 let weights = array![1.0, 1.2, 0.8, 1.1, 1.0, 0.7];
1009 let model = ModelBuilder::<Linear>::data(&data_y, &data_x)
1010 .var_weights(weights.clone())
1011 .build()?;
1012 let fit = model.fit()?;
1013
1014 let loo_coef: Array2<f64> = fit.infl_coef()?;
1015 let loo_results = &fit.result - loo_coef;
1016 let n_data = data_y.len();
1017 for i in 0..n_data {
1018 let ya = data_y.slice(s![0..i]);
1019 let yb = data_y.slice(s![i + 1..]);
1020 let xa = data_x.slice(s![0..i, ..]);
1021 let xb = data_x.slice(s![i + 1.., ..]);
1022 let wa = weights.slice(s![0..i]);
1023 let wb = weights.slice(s![i + 1..]);
1024 let y_loo = concatenate![Axis(0), ya, yb];
1025 let x_loo = concatenate![Axis(0), xa, xb];
1026 let w_loo = concatenate![Axis(0), wa, wb];
1027 let model_i = ModelBuilder::<Linear>::data(&y_loo, &x_loo)
1028 .var_weights(w_loo)
1029 .build()?;
1030 let fit_i = model_i.fit()?;
1031 assert_abs_diff_eq!(
1032 loo_results.row(i),
1033 &fit_i.result,
1034 epsilon = f32::EPSILON as f64
1035 );
1036 }
1037 Ok(())
1038 }
1039
1040 // check the null likelihood where there is no dependence on the X data.
1041 #[test]
1042 fn null_like_logistic_nodep() -> Result<()> {
1043 let data_y = array![true, true, false, false, true, false, false, true];
1044 let data_x = array![0.4, 0.2, 0.4, 0.2, 0.7, 0.7, -0.1, -0.1].insert_axis(Axis(1));
1045 let model = ModelBuilder::<Logistic>::data(&data_y, &data_x).build()?;
1046 let fit = model.fit()?;
1047 let lr = fit.lr_test();
1048 assert_abs_diff_eq!(lr, 0.);
1049 Ok(())
1050 }
1051 // TODO: Test that the statistics behave sensibly under regularization. The
1052 // likelihood ratio test should yield a smaller value.
1053
1054 // Test the basic caching funcions.
1055 #[test]
1056 fn cached_computations() -> Result<()> {
1057 let data_y = array![true, true, false, true, true, false, false, false, true];
1058 let data_x = array![0.4, 0.1, -0.3, 0.7, -0.5, -0.1, 0.8, 1.0, 0.4].insert_axis(Axis(1));
1059 let model = ModelBuilder::<Logistic>::data(&data_y, &data_x).build()?;
1060 let fit = model.fit()?;
1061 let _null_like = fit.null_like();
1062 let _null_like = fit.null_like();
1063 let _cov = fit.covariance()?;
1064 let _wald = fit.wald_z();
1065 Ok(())
1066 }
1067
1068 // Check the consistency of the various statistical tests for linear
1069 // regression, where they should be the most comparable.
1070 #[test]
1071 fn linear_stat_tests() -> Result<()> {
1072 let data_y = array![-0.3, -0.1, 0.0, 0.2, 0.4, 0.5, 0.8, 0.8, 1.1];
1073 let data_x = array![-0.5, -0.2, 0.1, 0.2, 0.5, 0.6, 0.7, 0.9, 1.3].insert_axis(Axis(1));
1074 let model = ModelBuilder::<Linear>::data(&data_y, &data_x).build()?;
1075 let fit = model.fit()?;
1076 let lr = fit.lr_test();
1077 let wald = fit.wald_test();
1078 let score = fit.score_test()?;
1079 assert_abs_diff_eq!(lr, wald, epsilon = 32.0 * f64::EPSILON);
1080 assert_abs_diff_eq!(lr, score, epsilon = 32.0 * f64::EPSILON);
1081 Ok(())
1082 }
1083}