ndarray_glm/fit.rs
1//! Stores the fit results of the IRLS regression and provides functions that
2//! depend on the MLE estimate. These include statistical tests for goodness-of-fit.
3
4pub mod options;
5use crate::{
6 Linear,
7 error::RegressionResult,
8 glm::{DispersionType, Glm},
9 irls::{Irls, IrlsStep},
10 link::{Link, Transform},
11 model::{Dataset, Model},
12 num::Float,
13 regularization::IrlsReg,
14};
15use ndarray::{Array1, Array2, ArrayBase, ArrayView1, Axis, Data, Ix2, array};
16use ndarray_linalg::InverseInto;
17use options::FitOptions;
18use std::{
19 cell::{Ref, RefCell},
20 marker::PhantomData,
21};
22
23/// the result of a successful GLM fit
24pub struct Fit<'a, M, F>
25where
26 M: Glm,
27 F: Float,
28{
29 model: PhantomData<M>,
30 /// The data and model specification used in the fit.
31 data: &'a Dataset<F>,
32 /// Whether the intercept covariate is used
33 use_intercept: bool,
34 /// The parameter values that maximize the likelihood as given by the IRLS regression.
35 pub result: Array1<F>,
36 /// The options used for this fit.
37 pub options: FitOptions<F>,
38 /// The value of the likelihood function for the fit result.
39 pub model_like: F,
40 /// The regularizer of the fit
41 reg: Box<dyn IrlsReg<F>>,
42 /// The number of overall iterations taken in the IRLS.
43 pub n_iter: usize,
44 /// The history of guesses and likelihoods over the IRLS iterations.
45 pub history: Vec<IrlsStep<F>>,
46 /// The number of parameters
47 n_par: usize,
48 /// The unscaled covariance matrix of the parameters, i.e. the inverse of the Fisher
49 /// information. Since the calculation requires a matrix inversion, it is computed only when
50 /// needed and the value is cached.
51 cov_unscaled: RefCell<Option<Array2<F>>>,
52 /// The hat matrix of the data and fit. Since the calculation requires a matrix inversion of
53 /// the fisher information, it is computed only when needed and the value is cached. Access
54 /// through the `hat()` function.
55 hat: RefCell<Option<Array2<F>>>,
56 /// The likelihood and parameters for the null model.
57 null_model: RefCell<Option<(F, Array1<F>)>>,
58}
59
60impl<'a, M, F> Fit<'a, M, F>
61where
62 M: Glm,
63 F: 'static + Float,
64{
65 /// Returns the Akaike information criterion for the model fit.
66 ///
67 /// ```math
68 /// \text{AIC} = D + 2K - 2\sum_{i} \ln w_{i}
69 /// ```
70 ///
71 /// where $`D`$ is the deviance, $`K`$ is the number of parameters, and $`w_i`$ are the
72 /// variance weights.
73 /// This is unique only to an additive constant, so only differences in AIC are meaningful.
74 pub fn aic(&self) -> F {
75 let log_weights = self.data.get_variance_weights().mapv(num_traits::Float::ln);
76 let sum_log_weights = self.data.freq_sum(&log_weights);
77 // NOTE: This is now the unregularized deviance.
78 self.deviance() + F::two() * self.rank() - F::two() * sum_log_weights
79 }
80
81 /// Returns the Bayesian information criterion for the model fit.
82 ///
83 /// ```math
84 /// \text{BIC} = K \ln(n) - 2l - s\sum_{i} \ln w_{i}
85 /// ```
86 ///
87 /// where $`K`$ is the number of parameters, $`n`$ is the number of observations, and $`l`$ is
88 /// the log-likelihood (including the variance weight normalization terms).
89 // TODO: Also consider the effect of regularization on this statistic.
90 // TODO: Wikipedia suggests that the variance should included in the number
91 // of parameters for multiple linear regression. Should an additional
92 // parameter be included for the dispersion parameter? This question does
93 // not affect the difference between two models fit with the methodology in
94 // this package.
95 pub fn bic(&self) -> F {
96 let log_weights = self.data.get_variance_weights().mapv(num_traits::Float::ln);
97 let sum_log_weights = self.data.freq_sum(&log_weights);
98 let logn = num_traits::Float::ln(self.data.n_obs());
99 logn * self.rank() - F::two() * self.model_like - F::two() * sum_log_weights
100 }
101
102 /// The Cook's distance for each observation, which measures how much the predicted values
103 /// change when leaving out each observation.
104 ///
105 /// ```math
106 /// C_i = \frac{r_i^2 \, h_i}{K \, \hat\phi \, (1 - h_i)^2}
107 /// ```
108 ///
109 /// where $`r_i`$ is the Pearson residual, $`h_i`$ is the leverage, $`K`$ is the rank
110 /// (number of parameters), and $`\hat\phi`$ is the estimated dispersion.
111 pub fn cooks(&self) -> RegressionResult<Array1<F>> {
112 let hat = self.leverage()?;
113 let pear_sq = self.resid_pear().mapv(|r| r * r);
114 let h_terms: Array1<F> = hat.mapv_into(|h| {
115 let omh = F::one() - h;
116 h / (omh * omh)
117 });
118 let denom: F = self.rank() * self.dispersion();
119 Ok(pear_sq * h_terms / denom)
120 }
121
122 /// The covariance matrix of the parameter estimates:
123 ///
124 /// ```math
125 /// \text{Cov}[\hat{\boldsymbol\beta}] = \hat\phi \, (\mathbf{X}^\mathsf{T}\mathbf{WSX})^{-1}
126 /// ```
127 ///
128 /// where $`\hat\phi`$ is the estimated dispersion, $`\mathbf{W}`$ is the weight matrix, and
129 /// $`\mathbf{S}`$ is the diagonal variance matrix. The Fisher matrix inverse is cached.
130 pub fn covariance(&self) -> RegressionResult<Array2<F>> {
131 // The covariance must be multiplied by the dispersion parameter.
132 // For logistic/poisson regression, this is identically 1.
133 // For linear/gamma regression it is estimated from the data.
134 let phi: F = self.dispersion();
135 // NOTE: invh/invh_into() are bugged and incorrect!
136 let unscaled_cov: Array2<F> = self.fisher_inv()?.to_owned();
137 let cov = unscaled_cov * phi;
138 Ok(cov)
139 }
140
141 /// Returns the deviance of the fit:
142 ///
143 /// ```math
144 /// D = -2 \left[ l(\hat{\boldsymbol\beta}) - l_\text{sat} \right]
145 /// ```
146 ///
147 /// Asymptotically $`\chi^2`$-distributed with [`ndf()`](Self::ndf) degrees of freedom.
148 /// The unregularized likelihood is used.
149 pub fn deviance(&self) -> F {
150 let terms = self.deviance_terms();
151 self.data.freq_sum(&terms)
152 }
153
154 /// Returns the contribution to the deviance from each observation. The total deviance should
155 /// be the sum of all of these. Variance weights are already included, but not frequency
156 /// weights.
157 fn deviance_terms(&self) -> Array1<F> {
158 let ll_terms: Array1<F> = M::log_like_terms(self.data, &self.result);
159 let ll_sat: Array1<F> = self.data.y.mapv(M::log_like_sat);
160 let terms = (ll_sat - ll_terms) * F::two();
161 self.data.apply_var_weights(terms)
162 }
163
164 /// Returns the self-excluded deviance terms, i.e. the deviance of an observation as if the
165 /// model was fit without it. This is a one-step approximation.
166 fn deviance_terms_loo(&self) -> RegressionResult<Array1<F>> {
167 let dev_terms = self.deviance_terms();
168 let pear_sq = self.resid_pear().mapv(|r| r * r);
169 let hat_rat = self.leverage()?.mapv(|h| h / (F::one() - h));
170 let result = dev_terms + &hat_rat * (&hat_rat + F::two()) * pear_sq;
171 Ok(result)
172 }
173
174 /// The dispersion parameter $`\hat\phi`$ relating the variance to the variance function:
175 /// $`\text{Var}[y] = \phi \, V(\mu)`$.
176 ///
177 /// Identically one for logistic, binomial, and Poisson regression.
178 /// For families with a free dispersion (linear, gamma), estimated as:
179 ///
180 /// ```math
181 /// \hat\phi = \frac{D}{\left(1 - \frac{K}{n_\text{eff}}\right) \sum_i w_i}
182 /// ```
183 ///
184 /// which reduces to $`D / (N - K)`$ without variance weights.
185 pub fn dispersion(&self) -> F {
186 use DispersionType::*;
187 match M::DISPERSED {
188 FreeDispersion => {
189 let dev = self.deviance();
190 let p = self.rank();
191 let n_eff = self.data.n_eff();
192 let scaling = if p >= n_eff {
193 // This is the overparameterized regime, which is checked directly instead of
194 // allowing negative values. It's not clear what conditions result in this when
195 // p < N.
196 F::zero()
197 } else {
198 (F::one() - p / n_eff) * self.data.sum_weights()
199 };
200 dev / scaling
201 }
202 NoDispersion => F::one(),
203 }
204 }
205
206 /// Return the dispersion terms with the observation(s) at each point excluded from the fit.
207 fn dispersion_loo(&self) -> RegressionResult<Array1<F>> {
208 use DispersionType::*;
209 match M::DISPERSED {
210 FreeDispersion => {
211 let pear_sq = self.resid_pear().mapv(|r| r * r);
212 let hat_rat = self.leverage()?.mapv(|h| h / (F::one() - h));
213 let terms = self.deviance_terms() + hat_rat * pear_sq;
214 // Don't apply total weights since the variance weights are already
215 // included in the residual terms. However, we do need the frequency weights.
216 let terms = self.data.apply_freq_weights(terms);
217 let total: Array1<F> = -terms + self.deviance();
218 let scaled_total: Array1<F> = match &self.data.weights {
219 None => match &self.data.freqs {
220 Some(f) => total / -(f - self.ndf()),
221 None => total / (self.ndf() - F::one()),
222 },
223 Some(w) => {
224 let v1 = self.data.freq_sum(w);
225 let w2 = w * w;
226 let v2 = self.data.freq_sum(&w2);
227 // The subtracted out terms need the frequency terms as well
228 let f_w = self.data.apply_freq_weights(w.clone());
229 let f_w2 = self.data.apply_freq_weights(w2);
230 // the modifed sums from leaving out the ith observation
231 let v1p = -f_w + v1;
232 let v2p = -f_w2 + v2;
233 let p = self.rank();
234 let scale = &v1p - v2p / &v1p * p;
235 total / scale
236 }
237 };
238 Ok(scaled_total)
239 }
240 NoDispersion => Ok(Array1::<F>::ones(self.data.y.len())),
241 }
242 }
243
244 /// Returns the errors in the response variables for the data passed as an
245 /// argument given the current model fit.
246 fn errors(&self, data: &Dataset<F>) -> Array1<F> {
247 &data.y - &self.predict(&data.x, data.linear_offset.as_ref())
248 }
249
250 #[deprecated(since = "0.0.10", note = "use predict() instead")]
251 pub fn expectation<S>(
252 &self,
253 data_x: &ArrayBase<S, Ix2>,
254 lin_off: Option<&Array1<F>>,
255 ) -> Array1<F>
256 where
257 S: Data<Elem = F>,
258 {
259 self.predict(data_x, lin_off)
260 }
261
262 /// Returns the Fisher information (the negative Hessian of the log-likelihood) at the
263 /// parameter values given:
264 ///
265 /// ```math
266 /// \mathcal{I}(\boldsymbol\beta) = \mathbf{X}^\mathsf{T}\mathbf{W}\eta'^2\mathbf{S}\mathbf{X}
267 /// ```
268 ///
269 /// where $`\mathbf{S} = \text{diag}(V(\mu_i))`$ and $`\eta'`$ is the derivative of the natural
270 /// parameter in terms of the linear predictor ($`\eta(\omega) = g_0(g^{-1}(\omega))`$ where
271 /// $`g_0`$ is the canonical link function).
272 /// functions.
273 /// The regularization is included.
274 pub fn fisher(&self, params: &Array1<F>) -> Array2<F> {
275 let lin_pred: Array1<F> = self.data.linear_predictor(params);
276 let adj_var: Array1<F> = M::adjusted_variance_diag(&lin_pred);
277 // calculate the fisher matrix
278 let fisher: Array2<F> = (self.data.x_conj() * &adj_var).dot(&self.data.x);
279 // Regularize the fisher matrix
280 self.reg.as_ref().irls_mat(fisher, params)
281 }
282
283 /// The inverse of the (regularized) fisher information matrix. This is used in some other
284 /// calculations (like the covariance and hat matrices) so it is cached.
285 fn fisher_inv(&self) -> RegressionResult<Ref<'_, Array2<F>>> {
286 if self.cov_unscaled.borrow().is_none() {
287 let fisher_reg = self.fisher(&self.result);
288 // NOTE: invh/invh_into() are bugged and incorrect!
289 let unscaled_cov: Array2<F> = fisher_reg.inv_into()?;
290 *self.cov_unscaled.borrow_mut() = Some(unscaled_cov);
291 }
292 Ok(Ref::map(self.cov_unscaled.borrow(), |x| {
293 x.as_ref().unwrap()
294 }))
295 }
296
297 /// Returns the hat matrix, also known as the "projection" or "influence" matrix:
298 ///
299 /// ```math
300 /// P_{ij} = \frac{\partial \hat{y}_i}{\partial y_j}
301 /// ```
302 ///
303 /// Orthogonal to the response residuals at the fit result: $`\mathbf{P}(\mathbf{y} -
304 /// \hat{\mathbf{y}}) = 0`$.
305 /// This version is not symmetric, but the diagonal is invariant to this choice of convention.
306 pub fn hat(&self) -> RegressionResult<Ref<'_, Array2<F>>> {
307 if self.hat.borrow().is_none() {
308 let lin_pred = self.data.linear_predictor(&self.result);
309 // Apply the eta' terms manually instead of calling adjusted_variance_diag, because the
310 // adjusted variance method applies 2 powers to the variance, while we want one power
311 // to the variance and one to the weights.
312 // let adj_var = M::adjusted_variance_diag(&lin_pred);
313
314 let mu = M::mean(&lin_pred);
315 let var = mu.mapv_into(M::variance);
316 let eta_d = M::Link::d_nat_param(&lin_pred);
317
318 let fisher_inv = self.fisher_inv()?;
319
320 // the GLM variance and the data weights are put on different sides in this convention
321 let left = (var * &eta_d).insert_axis(Axis(1)) * &self.data.x;
322 let right = self.data.x_conj() * &eta_d;
323 let result = left.dot(&fisher_inv.dot(&right));
324
325 *self.hat.borrow_mut() = Some(result);
326 }
327 let borrowed: Ref<Option<Array2<F>>> = self.hat.borrow();
328 Ok(Ref::map(borrowed, |x| x.as_ref().unwrap()))
329 }
330
331 /// The one-step approximation to the change in coefficients from excluding each observation:
332 ///
333 /// ```math
334 /// \Delta\boldsymbol\beta^{(-i)} \approx \frac{1}{1-h_i}\,\mathcal{I}^{-1}\mathbf{x}^{(i)} w_i \eta'_i e_i
335 /// ```
336 ///
337 /// Each row $`i`$ should be subtracted from $`\hat{\boldsymbol\beta}`$ to approximate
338 /// the coefficients that would result from excluding observation $`i`$.
339 /// Exact for linear models; a one-step approximation for nonlinear models.
340 pub fn infl_coef(&self) -> RegressionResult<Array2<F>> {
341 let lin_pred = self.data.linear_predictor(&self.result);
342 let resid_resp = self.resid_resp();
343 let omh = -self.leverage()? + F::one();
344 let resid_adj = M::Link::adjust_errors(resid_resp, &lin_pred) / omh;
345 let xte = self.data.x_conj() * resid_adj;
346 let fisher_inv = self.fisher_inv()?;
347 let delta_b = xte.t().dot(&*fisher_inv);
348 Ok(delta_b)
349 }
350
351 /// Returns the leverage $`h_i = P_{ii}`$ for each observation: the diagonal of the hat matrix.
352 /// Indicates the sensitivity of each prediction to its corresponding observation.
353 pub fn leverage(&self) -> RegressionResult<Array1<F>> {
354 let hat = self.hat()?;
355 Ok(hat.diag().to_owned())
356 }
357
358 /// Returns exact coefficients from leaving each observation out, one-at-a-time.
359 /// This is a much more expensive operation than the original regression because a new one is
360 /// performed for each observation.
361 pub fn loo_exact(&self) -> RegressionResult<Array2<F>> {
362 let loo_coef: Array2<F> = self.infl_coef()?;
363 // NOTE: could also use the result itself as the initial guess
364 let loo_initial = &self.result - loo_coef;
365 let mut loo_result = loo_initial.clone();
366 let n_obs = self.data.y.len();
367 for i in 0..n_obs {
368 let data_i: Dataset<F> = {
369 let mut data_i: Dataset<F> = self.data.clone();
370 let mut freqs: Array1<F> = data_i.freqs.unwrap_or(Array1::<F>::ones(n_obs));
371 freqs[i] = F::zero();
372 data_i.freqs = Some(freqs);
373 data_i
374 };
375 let model_i = Model {
376 model: PhantomData::<M>,
377 data: data_i,
378 use_intercept: self.use_intercept,
379 };
380 let options = {
381 let mut options = self.options.clone();
382 options.init_guess = Some(self.result.clone());
383 options
384 };
385 let fit_i = model_i.with_options(options).fit()?;
386 loo_result.row_mut(i).assign(&fit_i.result);
387 }
388 Ok(loo_result)
389 }
390
391 /// Perform a likelihood-ratio test, returning the statistic:
392 ///
393 /// ```math
394 /// \Lambda = -2 \ln \frac{L_0}{L} = -2(l_0 - l)
395 /// ```
396 ///
397 /// where $`L_0`$ is the null model likelihood (intercept only) and $`L`$ is the fit likelihood.
398 /// By Wilks' theorem, asymptotically $`\chi^2`$-distributed with
399 /// [`test_ndf()`](Self::test_ndf) degrees of freedom.
400 // TODO: Should the effective number of degrees of freedom due to
401 // regularization be taken into account?
402 pub fn lr_test(&self) -> F {
403 // The model likelihood should include regularization terms and there
404 // shouldn't be any in the null model with all non-intercept parameters
405 // set to zero.
406 let null_like = self.null_like();
407 F::from(-2.).unwrap() * (null_like - self.model_like)
408 }
409
410 /// Perform a likelihood-ratio test against a general alternative model, not
411 /// necessarily a null model. The alternative model is regularized the same
412 /// way that the regression resulting in this fit was. The degrees of
413 /// freedom cannot be generally inferred.
414 pub fn lr_test_against(&self, alternative: &Array1<F>) -> F {
415 let alt_like = M::log_like(self.data, alternative);
416 let alt_like_reg = alt_like + self.reg.likelihood(alternative);
417 F::two() * (self.model_like - alt_like_reg)
418 }
419
420 /// Returns the residual degrees of freedom in the model, i.e. the number
421 /// of data points minus the number of parameters. Not to be confused with
422 /// `test_ndf()`, the degrees of freedom in the statistical tests of the
423 /// fit parameters.
424 pub fn ndf(&self) -> F {
425 self.data.n_obs() - self.rank()
426 }
427
428 pub(crate) fn new(
429 data: &'a Dataset<F>,
430 use_intercept: bool,
431 irls: Irls<M, F>,
432 history: Vec<IrlsStep<F>>,
433 ) -> Self {
434 let Irls {
435 guess: result,
436 options,
437 reg,
438 n_iter,
439 last_like_data: data_like,
440 ..
441 } = irls;
442 assert_eq!(
443 data_like,
444 M::log_like(data, &result),
445 "Unregularized likelihoods should match exactly."
446 );
447 assert_eq!(
448 n_iter,
449 history.len(),
450 "Number of iterations should match history length"
451 );
452 assert_eq!(
453 result,
454 history.last().unwrap().guess,
455 "Last guess should be the same"
456 );
457 // Cache some of these variables that will be used often.
458 let n_par = result.len();
459 let model_like = data_like + reg.likelihood(&result);
460 assert_eq!(
461 model_like,
462 history.last().unwrap().like,
463 "Last data likelihood should be the same"
464 );
465 Self {
466 model: PhantomData,
467 data,
468 use_intercept,
469 result,
470 options,
471 model_like,
472 reg,
473 n_iter,
474 history,
475 n_par,
476 cov_unscaled: RefCell::new(None),
477 hat: RefCell::new(None),
478 null_model: RefCell::new(None),
479 }
480 }
481
482 /// Returns the likelihood given the null model, which fixes all parameters
483 /// to zero except the intercept (if it is used). A total of `test_ndf()`
484 /// parameters are constrained.
485 pub fn null_like(&self) -> F {
486 let (null_like, _) = self.null_model_fit();
487 null_like
488 }
489
490 /// Return the likelihood and intercept for the null model. Since this can
491 /// require an additional regression, the values are cached.
492 fn null_model_fit(&self) -> (F, Array1<F>) {
493 // TODO: make a result instead of allowing a potential panic in the borrow.
494 if self.null_model.borrow().is_none() {
495 let (null_like, null_intercept): (F, Array1<F>) = match &self.data.linear_offset {
496 None => {
497 // If there is no linear offset, the natural parameter is
498 // identical for all observations so it is sufficient to
499 // calculate the null likelihood for a single point with y equal
500 // to the average.
501 // The average y
502 let y_bar: F = self.data.weighted_sum(&self.data.y) / self.data.sum_weights();
503 // This approach assumes that the likelihood is in the natural
504 // exponential form as calculated by Glm::log_like_natural(). If that
505 // function is overridden and the values differ significantly, this
506 // approach will give incorrect results. If the likelihood has terms
507 // non-linear in y, then the likelihood must be calculated for every
508 // point rather than averaged.
509 // If the intercept is allowed to maximize the likelihood, the natural
510 // parameter is equal to the link of the expectation. Otherwise it is
511 // the transformation function of zero.
512 let intercept: F = if self.use_intercept {
513 M::Link::func(y_bar)
514 } else {
515 F::zero()
516 };
517 // this is a length-one array. This works because the
518 // likelihood contribution is the same for all observations.
519 let nat_par = M::Link::nat_param(array![intercept]);
520 // The null likelihood per observation
521 let null_like_one: F = M::log_like_natural(y_bar, nat_par[0]);
522 // just multiply the average likelihood by the number of data points, since every term is the same.
523 let null_like_total = self.data.sum_weights() * null_like_one;
524 let null_params: Array1<F> = {
525 let mut par = Array1::<F>::zeros(self.n_par);
526 par[0] = intercept;
527 par
528 };
529 (null_like_total, null_params)
530 }
531 Some(off) => {
532 if self.use_intercept {
533 // If there are linear offsets and the intercept is allowed
534 // to be free, there is not a major simplification and the
535 // model needs to be re-fit.
536 // the X data is a single column of ones. Since this model
537 // isn't being created by the ModelBuilder, the X data
538 // has to be automatically padded with ones.
539 let data_x_null = Array2::<F>::ones((self.data.y.len(), 1));
540 let null_model = Model {
541 model: std::marker::PhantomData::<M>,
542 data: Dataset::<F> {
543 y: self.data.y.clone(),
544 x: data_x_null,
545 linear_offset: Some(off.clone()),
546 weights: self.data.weights.clone(),
547 freqs: self.data.freqs.clone(),
548 },
549 // If we are in this branch it is because an intercept is needed.
550 use_intercept: true,
551 };
552 // TODO: Make this function return an error, although it's
553 // difficult to imagine this case happening.
554 // TODO: Should the tolerance of this fit be stricter?
555 // The intercept should not be regularized
556 let null_fit = null_model
557 .fit_options()
558 // There shouldn't be too much trouble fitting this
559 // single-parameter fit, but there shouldn't be harm in
560 // using the same maximum as in the original model.
561 .max_iter(self.options.max_iter)
562 .fit()
563 .expect("Could not fit null model!");
564 let null_params: Array1<F> = {
565 let mut par = Array1::<F>::zeros(self.n_par);
566 // there is only one parameter in this fit.
567 par[0] = null_fit.result[0];
568 par
569 };
570 (null_fit.model_like, null_params)
571 } else {
572 // If the intercept is fixed to zero, then no minimization is
573 // required. The natural parameters are directly known in terms
574 // of the linear offset. The likelihood must still be summed
575 // over all observations, since they have different offsets.
576 let nat_par = M::Link::nat_param(off.clone());
577 let null_like_terms = ndarray::Zip::from(&self.data.y)
578 .and(&nat_par)
579 .map_collect(|&y, &eta| M::log_like_natural(y, eta));
580 let null_like = self.data.weighted_sum(&null_like_terms);
581 let null_params = Array1::<F>::zeros(self.n_par);
582 (null_like, null_params)
583 }
584 }
585 };
586 *self.null_model.borrow_mut() = Some((null_like, null_intercept));
587 }
588 self.null_model
589 .borrow()
590 .as_ref()
591 .expect("the null model should be cached now")
592 .clone()
593 }
594
595 /// Returns $`\hat{\mathbf{y}} = g^{-1}(\mathbf{X}\hat{\boldsymbol\beta} + \boldsymbol\omega_0)`$
596 /// given input data $`\mathbf{X}`$ and an optional linear offset
597 /// $`\boldsymbol\omega_0`$. The data need not be the training data.
598 pub fn predict<S>(&self, data_x: &ArrayBase<S, Ix2>, lin_off: Option<&Array1<F>>) -> Array1<F>
599 where
600 S: Data<Elem = F>,
601 {
602 let lin_pred: Array1<F> = data_x.dot(&self.result);
603 let lin_pred: Array1<F> = if let Some(off) = &lin_off {
604 lin_pred + *off
605 } else {
606 lin_pred
607 };
608 lin_pred.mapv_into(M::Link::func_inv)
609 }
610
611 /// Returns the rank $`K`$ of the model (i.e. the number of parameters)
612 fn rank(&self) -> F {
613 F::from(self.n_par).unwrap()
614 }
615
616 /// Return the deviance residuals for each point in the training data:
617 ///
618 /// ```math
619 /// d_i = \text{sign}(y_i - \hat\mu_i)\sqrt{D_i}
620 /// ```
621 ///
622 /// where $`D_i`$ is the per-observation deviance contribution.
623 /// This is usually a better choice than Pearson residuals for non-linear models.
624 pub fn resid_dev(&self) -> Array1<F> {
625 let signs = self.resid_resp().mapv_into(F::signum);
626 let ll_diff = self.deviance_terms();
627 let dev: Array1<F> = ll_diff.mapv_into(num_traits::Float::sqrt);
628 signs * dev
629 }
630
631 /// Return the standardized deviance residuals (internally studentized):
632 ///
633 /// ```math
634 /// d_i^* = \frac{d_i}{\sqrt{\hat\phi(1 - h_i)}}
635 /// ```
636 ///
637 /// where $`d_i`$ is the deviance residual, $`\hat\phi`$ is the dispersion, and $`h_i`$ is
638 /// the leverage. Generally applicable for outlier detection.
639 pub fn resid_dev_std(&self) -> RegressionResult<Array1<F>> {
640 let dev = self.resid_dev();
641 let phi = self.dispersion();
642 let hat: Array1<F> = self.leverage()?;
643 let omh: Array1<F> = -hat + F::one();
644 let denom: Array1<F> = (omh * phi).mapv_into(num_traits::Float::sqrt);
645 Ok(dev / denom)
646 }
647
648 /// Return the partial residuals.
649 pub fn resid_part(&self) -> Array1<F> {
650 let x_mean = self.data.x.mean_axis(Axis(0)).expect("empty dataset");
651 let x_centered = &self.data.x - x_mean.insert_axis(Axis(0));
652 self.resid_work() + x_centered.dot(&self.result)
653 }
654
655 /// Return the Pearson residuals for each point in the training data:
656 ///
657 /// ```math
658 /// r_i = \sqrt{w_i} \, \frac{y_i - \hat\mu_i}{\sqrt{V(\hat\mu_i)}}
659 /// ```
660 ///
661 /// where $`V`$ is the variance function and $`w_i`$ are the variance weights.
662 /// Not scaled by the dispersion for families with a free dispersion parameter.
663 pub fn resid_pear(&self) -> Array1<F> {
664 let mu: Array1<F> = self.predict(&self.data.x, self.data.linear_offset.as_ref());
665 let residuals = &self.data.y - μ
666 let inv_var_diag: Array1<F> = mu.mapv_into(M::variance).mapv_into(F::recip);
667 // the variance weights are the reciprocal of the corresponding variance
668 let scales = self
669 .data
670 .apply_var_weights(inv_var_diag)
671 .mapv_into(num_traits::Float::sqrt);
672 scales * residuals
673 }
674
675 /// Return the standardized Pearson residuals (internally studentized):
676 ///
677 /// ```math
678 /// r_i^* = \frac{r_i}{\sqrt{\hat\phi(1 - h_i)}}
679 /// ```
680 ///
681 /// where $`r_i`$ is the Pearson residual and $`h_i`$ is the leverage. These are expected
682 /// to have unit variance.
683 pub fn resid_pear_std(&self) -> RegressionResult<Array1<F>> {
684 let pearson = self.resid_pear();
685 let phi = self.dispersion();
686 let hat = self.leverage()?;
687 let omh = -hat + F::one();
688 let denom: Array1<F> = (omh * phi).mapv_into(num_traits::Float::sqrt);
689 Ok(pearson / denom)
690 }
691
692 /// Return the response residuals: $`e_i^\text{resp} = y_i - \hat\mu_i`$.
693 pub fn resid_resp(&self) -> Array1<F> {
694 self.errors(self.data)
695 }
696
697 /// Return the externally studentized residuals:
698 ///
699 /// ```math
700 /// \tilde{t}_i = \text{sign}(e_i) \sqrt{\frac{D_i^{(-i)}}{\hat\phi^{(-i)}}}
701 /// ```
702 ///
703 /// where $`D_i^{(-i)}`$ and $`\hat\phi^{(-i)}`$ are the LOO deviance and dispersion
704 /// approximated via one-step deletion. Under normality, $`t`$-distributed with
705 /// $`N - K - 1`$ degrees of freedom. This is a robust and general method for outlier
706 /// detection.
707 pub fn resid_student(&self) -> RegressionResult<Array1<F>> {
708 let signs = self.resid_resp().mapv(F::signum);
709 let dev_terms_loo: Array1<F> = self.deviance_terms_loo()?;
710 // NOTE: This match could also be handled internally in dispersion_loo()
711 let dev_terms_scaled = match M::DISPERSED {
712 // The dispersion is corrected for the contribution from each current point.
713 // This is an approximation; the exact solution would perform a fit at each point.
714 DispersionType::FreeDispersion => dev_terms_loo / self.dispersion_loo()?,
715 DispersionType::NoDispersion => dev_terms_loo,
716 };
717 Ok(signs * dev_terms_scaled.mapv_into(num_traits::Float::sqrt))
718 }
719
720 /// Returns the working residuals:
721 ///
722 /// ```math
723 /// e_i^\text{work} = \frac{\eta'(\omega_i)(y_i - \hat\mu_i)}{V(\hat\mu_i)}
724 /// ```
725 ///
726 /// Equal to the response residuals divided by the variance function (as
727 /// opposed to the square root of the variance as in the Pearson residuals).
728 pub fn resid_work(&self) -> Array1<F> {
729 let lin_pred: Array1<F> = self.data.linear_predictor(&self.result);
730 let mu: Array1<F> = lin_pred.mapv(M::Link::func_inv);
731 let resid_response: Array1<F> = &self.data.y - μ
732 let var: Array1<F> = mu.mapv(M::variance);
733 // adjust for non-canonical link functions; we want a total factor of 1/eta'
734 let (adj_response, adj_var) =
735 M::Link::adjust_errors_variance(resid_response, var, &lin_pred);
736 adj_response / adj_var
737 }
738
739 /// Returns the score function $`\nabla_{\boldsymbol\beta} l`$ (the gradient of the
740 /// log-likelihood) at the parameter values given. Should be zero at the MLE.
741 pub fn score(&self, params: &Array1<F>) -> Array1<F> {
742 // This represents the predictions given the input parameters, not the
743 // fit parameters.
744 let lin_pred: Array1<F> = self.data.linear_predictor(params);
745 let mu: Array1<F> = M::mean(&lin_pred);
746 let resid_response = &self.data.y - mu;
747 let resid_working = M::Link::adjust_errors(resid_response, &lin_pred);
748 let score_unreg = self.data.x_conj().dot(&resid_working);
749 self.reg.as_ref().gradient(score_unreg, params)
750 }
751
752 /// Returns the score test statistic:
753 ///
754 /// ```math
755 /// S = \mathbf{J}(\boldsymbol\beta_0)^\mathsf{T} \, \mathcal{I}(\boldsymbol\beta_0)^{-1} \, \mathbf{J}(\boldsymbol\beta_0)
756 /// ```
757 ///
758 /// where $`\mathbf{J}`$ is the score and $`\mathcal{I}`$ is the Fisher information, both
759 /// evaluated at the null parameters. Asymptotically $`\chi^2`$-distributed with
760 /// [`test_ndf()`](Self::test_ndf) degrees of freedom.
761 pub fn score_test(&self) -> RegressionResult<F> {
762 let (_, null_params) = self.null_model_fit();
763 self.score_test_against(null_params)
764 }
765
766 /// Returns the score test statistic compared to another set of model
767 /// parameters, not necessarily a null model. The degrees of freedom cannot
768 /// be generally inferred.
769 pub fn score_test_against(&self, alternative: Array1<F>) -> RegressionResult<F> {
770 let score_alt = self.score(&alternative);
771 let fisher_alt = self.fisher(&alternative);
772 // The is not the same as the cached covariance matrix since it is
773 // evaluated at the null parameters.
774 // NOTE: invh/invh_into() are bugged and incorrect!
775 let inv_fisher_alt = fisher_alt.inv_into()?;
776 Ok(score_alt.t().dot(&inv_fisher_alt.dot(&score_alt)))
777 }
778
779 /// The degrees of freedom for the likelihood ratio test, the score test,
780 /// and the Wald test. Not to be confused with `ndf()`, the degrees of
781 /// freedom in the model fit.
782 pub fn test_ndf(&self) -> usize {
783 if self.use_intercept {
784 self.n_par - 1
785 } else {
786 self.n_par
787 }
788 }
789
790 /// Returns the Wald test statistic:
791 ///
792 /// ```math
793 /// W = (\hat{\boldsymbol\beta} - \boldsymbol\beta_0)^\mathsf{T} \, \mathcal{I}(\boldsymbol\beta_0) \, (\hat{\boldsymbol\beta} - \boldsymbol\beta_0)
794 /// ```
795 ///
796 /// Compared to a null model with only an intercept (if one is used). Asymptotically
797 /// $`\chi^2`$-distributed with [`test_ndf()`](Self::test_ndf) degrees of freedom.
798 pub fn wald_test(&self) -> F {
799 // The null parameters are all zero except for a possible intercept term
800 // which optimizes the null model.
801 let (_, null_params) = self.null_model_fit();
802 self.wald_test_against(&null_params)
803 }
804
805 /// Returns the Wald test statistic compared to another specified model fit
806 /// instead of the null model. The degrees of freedom cannot be generally
807 /// inferred.
808 pub fn wald_test_against(&self, alternative: &Array1<F>) -> F {
809 let d_params: Array1<F> = &self.result - alternative;
810 let fisher_alt: Array2<F> = self.fisher(alternative);
811 d_params.t().dot(&fisher_alt.dot(&d_params))
812 }
813
814 /// Returns the per-parameter Wald $`z`$-statistic:
815 ///
816 /// ```math
817 /// z_k = \frac{\hat\beta_k}{\sqrt{\text{Cov}[\hat{\boldsymbol\beta}]_{kk}}}
818 /// ```
819 ///
820 /// Since it does not account for covariance between parameters it may not be accurate.
821 pub fn wald_z(&self) -> RegressionResult<Array1<F>> {
822 let par_cov = self.covariance()?;
823 let par_variances: ArrayView1<F> = par_cov.diag();
824 Ok(&self.result / &par_variances.mapv(num_traits::Float::sqrt))
825 }
826}
827
828/// Specialized functions for OLS.
829impl<'a, F> Fit<'a, Linear, F>
830where
831 F: 'static + Float,
832{
833 /// Returns the coefficient of determination $`R^2`$:
834 ///
835 /// ```math
836 /// R^2 = 1 - \frac{\text{RSS}}{\text{TSS}}
837 /// ```
838 ///
839 /// where RSS is the residual sum of squares and TSS is the total sum of squares.
840 pub fn r_sq(&self) -> F {
841 let y_avg: F = self.data.y.mean().expect("Data should be non-empty");
842 let total_sum_sq: F = self.data.y.mapv(|y| y - y_avg).mapv(|dy| dy * dy).sum();
843 (total_sum_sq - self.resid_sum_sq()) / total_sum_sq
844 }
845
846 /// Returns the residual sum of squares:
847 /// $`\text{RSS} = \sum_i (y_i - \hat\mu_i)^2`$.
848 pub fn resid_sum_sq(&self) -> F {
849 self.resid_resp().mapv_into(|r| r * r).sum()
850 }
851}
852
853#[cfg(test)]
854mod tests {
855 use super::*;
856 use crate::{
857 Linear, Logistic,
858 model::ModelBuilder,
859 utility::{one_pad, standardize},
860 };
861 use anyhow::Result;
862 use approx::assert_abs_diff_eq;
863 use ndarray::Axis;
864 use ndarray::{concatenate, s};
865
866 /// Checks if the test statistics are invariant based upon whether the data is standardized.
867 #[test]
868 fn standardization_invariance() -> Result<()> {
869 let data_y = array![true, false, false, true, true, true, true, false, true];
870 let data_x = array![-0.5, 0.3, -0.6, 0.2, 0.3, 1.2, 0.8, 0.6, -0.2].insert_axis(Axis(1));
871 let lin_off = array![0.1, 0.0, -0.1, 0.2, 0.1, 0.3, 0.4, -0.1, 0.1];
872 let data_x_std = standardize(data_x.clone());
873 let model = ModelBuilder::<Logistic>::data(&data_y, &data_x)
874 .linear_offset(lin_off.clone())
875 .build()?;
876 let fit = model.fit()?;
877 let model_std = ModelBuilder::<Logistic>::data(&data_y, &data_x_std)
878 .linear_offset(lin_off)
879 .build()?;
880 let fit_std = model_std.fit()?;
881 let lr = fit.lr_test();
882 let lr_std = fit_std.lr_test();
883 assert_abs_diff_eq!(lr, lr_std);
884 eprintln!("about to try score test");
885 assert_abs_diff_eq!(
886 fit.score_test()?,
887 fit_std.score_test()?,
888 epsilon = f32::EPSILON as f64
889 );
890 eprintln!("about to try wald test");
891 assert_abs_diff_eq!(
892 fit.wald_test(),
893 fit_std.wald_test(),
894 epsilon = 4.0 * f64::EPSILON
895 );
896 assert_abs_diff_eq!(fit.aic(), fit_std.aic());
897 assert_abs_diff_eq!(fit.bic(), fit_std.bic());
898 eprintln!("about to try deviance");
899 assert_abs_diff_eq!(fit.deviance(), fit_std.deviance());
900 // The Wald Z-score of the intercept term is not invariant under a
901 // linear transformation of the data, but the parameter part seems to
902 // be, at least for single-component data.
903 assert_abs_diff_eq!(
904 fit.wald_z()?[1],
905 fit_std.wald_z()?[1],
906 epsilon = 0.01 * f32::EPSILON as f64
907 );
908
909 Ok(())
910 }
911
912 #[test]
913 fn null_model() -> Result<()> {
914 let data_y = array![true, false, false, true, true];
915 let data_x: Array2<f64> = array![[], [], [], [], []];
916 let model = ModelBuilder::<Logistic>::data(&data_y, &data_x).build()?;
917 let fit = model.fit()?;
918 dbg!(fit.n_iter);
919 dbg!(&fit.result);
920 // with no offsets, the result should be the link function of the mean.
921 assert_abs_diff_eq!(
922 fit.result[0],
923 <Logistic as Glm>::Link::func(0.6),
924 epsilon = 4.0 * f64::EPSILON
925 );
926 let empty_null_like = fit.null_like();
927 assert_eq!(fit.test_ndf(), 0);
928 dbg!(&fit.model_like);
929 let lr = fit.lr_test();
930 // Since there is no data, the null likelihood should be identical to
931 // the fit likelihood, so the likelihood ratio test should yield zero.
932 assert_abs_diff_eq!(lr, 0., epsilon = 4. * f64::EPSILON);
933
934 // Check that the assertions still hold if linear offsets are included.
935 let lin_off: Array1<f64> = array![0.2, -0.1, 0.1, 0.0, 0.1];
936 let model = ModelBuilder::<Logistic>::data(&data_y, &data_x)
937 .linear_offset(lin_off)
938 .build()?;
939 let fit_off = model.fit()?;
940 let empty_model_like_off = fit_off.model_like;
941 let empty_null_like_off = fit_off.null_like();
942 // these two assertions should be equivalent
943 assert_abs_diff_eq!(fit_off.lr_test(), 0.);
944 assert_abs_diff_eq!(empty_model_like_off, empty_null_like_off);
945
946 // check consistency with data provided
947 let data_x_with = array![[0.5], [-0.2], [0.3], [0.4], [-0.1]];
948 let model = ModelBuilder::<Logistic>::data(&data_y, &data_x_with).build()?;
949 let fit_with = model.fit()?;
950 dbg!(&fit_with.result);
951 // The null likelihood of the model with parameters should be the same
952 // as the likelihood of the model with only the intercept.
953 assert_abs_diff_eq!(empty_null_like, fit_with.null_like());
954
955 Ok(())
956 }
957
958 #[test]
959 fn null_like_logistic() -> Result<()> {
960 // 6 true and 4 false for y_bar = 0.6.
961 let data_y = array![
962 true, true, true, true, true, true, false, false, false, false
963 ];
964 let ybar: f64 = 0.6;
965 let data_x = array![0.4, 0.2, 0.5, 0.1, 0.6, 0.7, 0.3, 0.8, -0.1, 0.1].insert_axis(Axis(1));
966 let model = ModelBuilder::<Logistic>::data(&data_y, &data_x).build()?;
967 let fit = model.fit()?;
968 let target_null_like = fit
969 .data
970 .y
971 .mapv(|y| {
972 let eta = (ybar / (1. - ybar)).ln();
973 y * eta - eta.exp().ln_1p()
974 })
975 .sum();
976 assert_abs_diff_eq!(fit.null_like(), target_null_like);
977 Ok(())
978 }
979
980 // Check that the deviance is equal to the sum of square deviations for a linear model
981 #[test]
982 fn deviance_linear() -> Result<()> {
983 let data_y = array![0.3, -0.2, 0.5, 0.7, 0.2, 1.4, 1.1, 0.2];
984 let data_x = array![0.6, 2.1, 0.4, -3.2, 0.7, 0.1, -0.3, 0.5].insert_axis(Axis(1));
985 let model = ModelBuilder::<Linear>::data(&data_y, &data_x).build()?;
986 let fit = model.fit()?;
987 // The predicted values of Y given the model.
988 let pred_y = fit.predict(&one_pad(data_x.view()), None);
989 let target_dev = (data_y - pred_y).mapv(|dy| dy * dy).sum();
990 assert_abs_diff_eq!(fit.deviance(), target_dev, epsilon = 4. * f64::EPSILON);
991 Ok(())
992 }
993
994 // Check that the deviance and dispersion parameter are equal up to the number of degrees of
995 // freedom for a linea model.
996 #[test]
997 fn deviance_dispersion_eq_linear() -> Result<()> {
998 let data_y = array![0.2, -0.1, 0.4, 1.3, 0.2, -0.6, 0.9];
999 let data_x = array![
1000 [0.4, 0.2],
1001 [0.1, 0.4],
1002 [-0.1, 0.3],
1003 [0.5, 0.7],
1004 [0.4, 0.1],
1005 [-0.2, -0.3],
1006 [0.4, -0.1]
1007 ];
1008 let model = ModelBuilder::<Linear>::data(&data_y, &data_x).build()?;
1009 let fit = model.fit()?;
1010 let dev = fit.deviance();
1011 let disp = fit.dispersion();
1012 let ndf = fit.ndf();
1013 assert_abs_diff_eq!(dev, disp * ndf, epsilon = 4. * f64::EPSILON);
1014 Ok(())
1015 }
1016
1017 // Check that the residuals for a linear model are all consistent.
1018 #[test]
1019 fn residuals_linear() -> Result<()> {
1020 let data_y = array![0.1, -0.3, 0.7, 0.2, 1.2, -0.4];
1021 let data_x = array![0.4, 0.1, 0.3, -0.1, 0.5, 0.6].insert_axis(Axis(1));
1022 let weights = array![0.8, 1.2, 0.9, 0.8, 1.1, 0.9];
1023 // the implied variances from the weights
1024 let wgt_sigmas = weights.map(|w: &f64| 1. / w.sqrt());
1025 let model = ModelBuilder::<Linear>::data(&data_y, &data_x)
1026 .var_weights(weights.clone())
1027 .build()?;
1028 let fit = model.fit()?;
1029 let response = fit.resid_resp();
1030 let resp_scaled = &response / wgt_sigmas;
1031 let pearson = fit.resid_pear();
1032 let deviance = fit.resid_dev();
1033 assert_abs_diff_eq!(resp_scaled, pearson);
1034 assert_abs_diff_eq!(resp_scaled, deviance);
1035 let pearson_std = fit.resid_pear_std()?;
1036 let deviance_std = fit.resid_dev_std()?;
1037 assert_abs_diff_eq!(pearson_std, deviance_std, epsilon = 8. * f64::EPSILON);
1038 // The externally-studentized residuals aren't expected to match the internally-studentized
1039 // ones.
1040 let dev_terms_loo = fit.deviance_terms_loo()?;
1041 let disp_terms_loo = fit.dispersion_loo()?;
1042 let student = fit.resid_student()?;
1043
1044 // NOTE: Studentization can't be checked directly in general because the method used is a
1045 // one-step approximation, however it should be exact in the linear OLS case.
1046 let n_data = data_y.len();
1047 // Check that the leave-one-out stats hold literally
1048 let mut loo_diff: Vec<f64> = Vec::new();
1049 let mut loo_dev_res: Vec<f64> = Vec::new();
1050 let mut loo_disp: Vec<f64> = Vec::new();
1051 for i in 0..n_data {
1052 let ya = data_y.slice(s![0..i]);
1053 let yb = data_y.slice(s![i + 1..]);
1054 let xa = data_x.slice(s![0..i, ..]);
1055 let xb = data_x.slice(s![i + 1.., ..]);
1056 let wa = weights.slice(s![0..i]);
1057 let wb = weights.slice(s![i + 1..]);
1058 let y_loo = concatenate![Axis(0), ya, yb];
1059 let x_loo = concatenate![Axis(0), xa, xb];
1060 let w_loo = concatenate![Axis(0), wa, wb];
1061 let model_i = ModelBuilder::<Linear>::data(&y_loo, &x_loo)
1062 .var_weights(w_loo)
1063 .build()?;
1064 let fit_i = model_i.fit()?;
1065 let yi = data_y[i];
1066 let xi = data_x.slice(s![i..i + 1, ..]);
1067 let xi = crate::utility::one_pad(xi);
1068 let wi = weights[i];
1069 let yi_pred: f64 = fit_i.predict(&xi, None)[0];
1070 let disp_i = fit_i.dispersion();
1071 let var_i = disp_i / wi;
1072 let diff_i = yi - yi_pred;
1073 let res_dev_i = diff_i / var_i.sqrt();
1074 loo_diff.push(wi * diff_i * diff_i);
1075 loo_disp.push(disp_i);
1076 loo_dev_res.push(res_dev_i);
1077 }
1078 let loo_diff: Array1<f64> = loo_diff.into();
1079 let loo_disp: Array1<f64> = loo_disp.into();
1080 let loo_dev_res: Array1<f64> = loo_dev_res.into();
1081 assert_abs_diff_eq!(dev_terms_loo, loo_diff, epsilon = 8. * f64::EPSILON);
1082 assert_abs_diff_eq!(disp_terms_loo, loo_disp, epsilon = 8. * f64::EPSILON);
1083 assert_abs_diff_eq!(student, loo_dev_res, epsilon = 8. * f64::EPSILON);
1084 Ok(())
1085 }
1086
1087 // check the null likelihood for the case where it can be counted exactly.
1088 #[test]
1089 fn null_like_linear() -> Result<()> {
1090 let data_y = array![0.3, -0.2, 0.5, 0.7, 0.2, 1.4, 1.1, 0.2];
1091 let data_x = array![0.6, 2.1, 0.4, -3.2, 0.7, 0.1, -0.3, 0.5].insert_axis(Axis(1));
1092 let ybar: f64 = data_y.mean().unwrap();
1093 let model = ModelBuilder::<Linear>::data(&data_y, &data_x).build()?;
1094 let fit = model.fit()?;
1095 // let target_null_like = data_y.mapv(|y| -0.5 * (y - ybar) * (y - ybar)).sum();
1096 let target_null_like = data_y.mapv(|y| y * ybar - 0.5 * ybar * ybar).sum();
1097 // With the saturated likelihood subtracted the null likelihood should
1098 // just be the sum of squared differences from the mean.
1099 // let target_null_like = 0.;
1100 // dbg!(target_null_like);
1101 let fit_null_like = fit.null_like();
1102 assert_abs_diff_eq!(2. * (fit.model_like - fit_null_like), fit.lr_test());
1103 assert_eq!(fit.test_ndf(), 1);
1104 assert_abs_diff_eq!(
1105 fit_null_like,
1106 target_null_like,
1107 epsilon = 4.0 * f64::EPSILON
1108 );
1109 Ok(())
1110 }
1111
1112 // check the leave-one-out one-step for the linear model
1113 #[test]
1114 fn loo_linear() -> Result<()> {
1115 let data_y = array![0.1, -0.3, 0.7, 0.2, 1.2, -0.4];
1116 let data_x = array![0.4, 0.1, 0.3, -0.1, 0.5, 0.6].insert_axis(Axis(1));
1117 let weights = array![1.0, 1.2, 0.8, 1.1, 1.0, 0.7];
1118 let model = ModelBuilder::<Linear>::data(&data_y, &data_x)
1119 .var_weights(weights.clone())
1120 .build()?;
1121 let fit = model.fit()?;
1122
1123 let loo_coef: Array2<f64> = fit.infl_coef()?;
1124 let loo_results = &fit.result - loo_coef;
1125 let n_data = data_y.len();
1126 for i in 0..n_data {
1127 let ya = data_y.slice(s![0..i]);
1128 let yb = data_y.slice(s![i + 1..]);
1129 let xa = data_x.slice(s![0..i, ..]);
1130 let xb = data_x.slice(s![i + 1.., ..]);
1131 let wa = weights.slice(s![0..i]);
1132 let wb = weights.slice(s![i + 1..]);
1133 let y_loo = concatenate![Axis(0), ya, yb];
1134 let x_loo = concatenate![Axis(0), xa, xb];
1135 let w_loo = concatenate![Axis(0), wa, wb];
1136 let model_i = ModelBuilder::<Linear>::data(&y_loo, &x_loo)
1137 .var_weights(w_loo)
1138 .build()?;
1139 let fit_i = model_i.fit()?;
1140 assert_abs_diff_eq!(
1141 loo_results.row(i),
1142 &fit_i.result,
1143 epsilon = f32::EPSILON as f64
1144 );
1145 }
1146 Ok(())
1147 }
1148
1149 // check the null likelihood where there is no dependence on the X data.
1150 #[test]
1151 fn null_like_logistic_nodep() -> Result<()> {
1152 let data_y = array![true, true, false, false, true, false, false, true];
1153 let data_x = array![0.4, 0.2, 0.4, 0.2, 0.7, 0.7, -0.1, -0.1].insert_axis(Axis(1));
1154 let model = ModelBuilder::<Logistic>::data(&data_y, &data_x).build()?;
1155 let fit = model.fit()?;
1156 let lr = fit.lr_test();
1157 assert_abs_diff_eq!(lr, 0.);
1158 Ok(())
1159 }
1160 // TODO: Test that the statistics behave sensibly under regularization. The
1161 // likelihood ratio test should yield a smaller value.
1162
1163 // Test the basic caching funcions.
1164 #[test]
1165 fn cached_computations() -> Result<()> {
1166 let data_y = array![true, true, false, true, true, false, false, false, true];
1167 let data_x = array![0.4, 0.1, -0.3, 0.7, -0.5, -0.1, 0.8, 1.0, 0.4].insert_axis(Axis(1));
1168 let model = ModelBuilder::<Logistic>::data(&data_y, &data_x).build()?;
1169 let fit = model.fit()?;
1170 let _null_like = fit.null_like();
1171 let _null_like = fit.null_like();
1172 let _cov = fit.covariance()?;
1173 let _wald = fit.wald_z();
1174 Ok(())
1175 }
1176
1177 // Check the consistency of the various statistical tests for linear
1178 // regression, where they should be the most comparable.
1179 #[test]
1180 fn linear_stat_tests() -> Result<()> {
1181 let data_y = array![-0.3, -0.1, 0.0, 0.2, 0.4, 0.5, 0.8, 0.8, 1.1];
1182 let data_x = array![-0.5, -0.2, 0.1, 0.2, 0.5, 0.6, 0.7, 0.9, 1.3].insert_axis(Axis(1));
1183 let model = ModelBuilder::<Linear>::data(&data_y, &data_x).build()?;
1184 let fit = model.fit()?;
1185 let lr = fit.lr_test();
1186 let wald = fit.wald_test();
1187 let score = fit.score_test()?;
1188 assert_abs_diff_eq!(lr, wald, epsilon = 32.0 * f64::EPSILON);
1189 assert_abs_diff_eq!(lr, score, epsilon = 32.0 * f64::EPSILON);
1190 Ok(())
1191 }
1192}