ndarray_glm/
fit.rs

1//! Stores the fit results of the IRLS regression and provides functions that
2//! depend on the MLE estimate. These include statistical tests for goodness-of-fit.
3
4pub mod options;
5use crate::{
6    error::RegressionResult,
7    glm::{DispersionType, Glm},
8    irls::Irls,
9    link::{Link, Transform},
10    model::{Dataset, Model},
11    num::Float,
12    regularization::IrlsReg,
13    Linear,
14};
15use ndarray::{array, Array1, Array2, ArrayBase, ArrayView1, Axis, Data, Ix2};
16use ndarray_linalg::InverseInto;
17use options::FitOptions;
18use std::{
19    cell::{Ref, RefCell},
20    marker::PhantomData,
21};
22
23/// the result of a successful GLM fit
24pub struct Fit<'a, M, F>
25where
26    M: Glm,
27    F: Float,
28{
29    model: PhantomData<M>,
30    /// The data and model specification used in the fit.
31    data: &'a Dataset<F>,
32    /// Whether the intercept covariate is used
33    use_intercept: bool,
34    /// The parameter values that maximize the likelihood as given by the IRLS regression.
35    pub result: Array1<F>,
36    /// The options used for this fit.
37    pub options: FitOptions<F>,
38    /// The value of the likelihood function for the fit result.
39    pub model_like: F,
40    /// The regularizer of the fit
41    reg: Box<dyn IrlsReg<F>>,
42    /// The number of overall iterations taken in the IRLS.
43    pub n_iter: usize,
44    /// The number of data points
45    n_data: usize,
46    /// The number of parameters
47    n_par: usize,
48    /// The estimated covariance matrix of the parameters. Since the calculation
49    /// requires a matrix inversion, it is computed only when needed and the
50    /// value is cached. Access through the `covariance()` function.
51    cov: RefCell<Option<Array2<F>>>,
52    /// The likelihood and parameters for the null model.
53    null_model: RefCell<Option<(F, Array1<F>)>>,
54}
55
56impl<'a, M, F> Fit<'a, M, F>
57where
58    M: Glm,
59    F: 'static + Float,
60{
61    /// Returns the Akaike information criterion for the model fit.
62    // TODO: Should an effective number of parameters that takes regularization
63    // into acount be considered?
64    pub fn aic(&self) -> F {
65        F::from(2 * self.n_par).unwrap() - F::from(2.).unwrap() * self.model_like
66    }
67
68    /// Returns the Bayesian information criterion for the model fit.
69    // TODO: Also consider the effect of regularization on this statistic.
70    // TODO: Wikipedia suggests that the variance should included in the number
71    // of parameters for multiple linear regression. Should an additional
72    // parameter be included for the dispersion parameter? This question does
73    // not affect the difference between two models fit with the methodology in
74    // this package.
75    pub fn bic(&self) -> F {
76        let logn = num_traits::Float::ln(F::from(self.data.y.len()).unwrap());
77        logn * F::from(self.n_par).unwrap() - F::from(2.).unwrap() * self.model_like
78    }
79
80    /// The covariance matrix estimated by the Fisher information and the dispersion parameter (for
81    /// families with a free scale). The matrix is cached to avoid repeating the potentially
82    /// expensive matrix inversion.
83    pub fn covariance(&self) -> RegressionResult<Ref<Array2<F>>> {
84        if self.cov.borrow().is_none() {
85            if self.data.weights.is_some() {
86                // NOTE: Perhaps it is just the fisher matrix that must be updated.
87                unimplemented!(
88                    "The covariance calculation must take into account weights/correlations."
89                );
90            }
91            let fisher_reg = self.fisher(&self.result);
92            // The covariance must be multiplied by the dispersion parameter.
93            // For logistic/poisson regression, this is identically 1.
94            // For linear/gamma regression it is estimated from the data.
95            let phi: F = self.dispersion();
96            // NOTE: invh/invh_into() are bugged and incorrect!
97            let unscaled_cov: Array2<F> = fisher_reg.inv_into()?;
98            let cov = unscaled_cov * phi;
99            *self.cov.borrow_mut() = Some(cov);
100        }
101        Ok(Ref::map(self.cov.borrow(), |x| x.as_ref().unwrap()))
102    }
103
104    /// Returns the deviance of the fit: twice the difference between the
105    /// saturated likelihood and the model likelihood. Asymptotically this fits
106    /// a chi-squared distribution with `self.ndf()` degrees of freedom.
107    /// Note that the regularized likelihood is used here.
108    // TODO: This is likely sensitive to regularization because the saturated
109    // model is not regularized but the model likelihood is. Perhaps this can be
110    // accounted for with an effective number of degrees of freedom.
111    pub fn deviance(&self) -> F {
112        // Note that this must change if the GLM likelihood subtracts the
113        // saturated one already.
114        F::from(2.).unwrap() * (self.data.y.mapv(M::log_like_sat).sum() - self.model_like)
115    }
116
117    /// The dispersion parameter(typically denoted `phi`)  which relates the variance of the `y`
118    /// values with the variance of the response distribution: `Var[y] = phi * Var[mu]`.
119    /// Identically one for logistic, binomial, and Poisson regression.
120    /// For others (linear, gamma) the dispersion parameter is estimated from the data.
121    /// This is equal to the total deviance divided by the degrees of freedom.  For OLS linear
122    /// regression this is equal to the sum of `(y_i - mu_i)^2 / (n-p)`, an estimate of `sigma^2`;
123    /// with no covariates it is equal to the sample variance.
124    pub fn dispersion(&self) -> F {
125        use DispersionType::*;
126        match M::DISPERSED {
127            FreeDispersion => {
128                let ndf: F = F::from(self.ndf()).unwrap();
129                let dev = self.deviance();
130                dev / ndf
131            }
132            NoDispersion => F::one(),
133        }
134    }
135
136    /// Returns the errors in the response variables for the data passed as an
137    /// argument given the current model fit.
138    fn errors(&self, data: &Dataset<F>) -> Array1<F> {
139        &data.y - &self.predict(&data.x, data.linear_offset.as_ref())
140    }
141
142    #[deprecated(since = "0.0.10", note = "use predict() instead")]
143    pub fn expectation<S>(
144        &self,
145        data_x: &ArrayBase<S, Ix2>,
146        lin_off: Option<&Array1<F>>,
147    ) -> Array1<F>
148    where
149        S: Data<Elem = F>,
150    {
151        self.predict(data_x, lin_off)
152    }
153
154    /// Returns the fisher information (the negative hessian of the likelihood)
155    /// at the parameter values given. The regularization is included.
156    pub fn fisher(&self, params: &Array1<F>) -> Array2<F> {
157        let lin_pred: Array1<F> = self.data.linear_predictor(params);
158        let mu: Array1<F> = M::mean(&lin_pred);
159        let var_diag: Array1<F> = mu.mapv_into(M::variance);
160        // adjust the variance for non-canonical link functions
161        let eta_d = M::Link::d_nat_param(&lin_pred);
162        let adj_var: Array1<F> = &eta_d * &var_diag * eta_d;
163        // calculate the fisher matrix
164        let fisher: Array2<F> = (&self.data.x.t() * &adj_var).dot(&self.data.x);
165        // Regularize the fisher matrix
166        self.reg.as_ref().irls_mat(fisher, params)
167    }
168
169    /// Perform a likelihood-ratio test, returning the statistic -2*ln(L_0/L)
170    /// where L_0 is the likelihood of the best-fit null model (with no
171    /// parameters but the intercept) and L is the likelihood of the fit result.
172    /// The number of degrees of freedom of this statistic, equal to the number
173    /// of parameters fixed to zero to form the null model, is `test_ndf()`. By
174    /// Wilks' theorem this statistic is asymptotically chi-squared distributed
175    /// with this number of degrees of freedom.
176    // TODO: Should the effective number of degrees of freedom due to
177    // regularization be taken into account? Should the degrees of freedom be a
178    // float?
179    pub fn lr_test(&self) -> F {
180        // The model likelihood should include regularization terms and there
181        // shouldn't be any in the null model with all non-intercept parameters
182        // set to zero.
183        let null_like = self.null_like();
184        F::from(-2.).unwrap() * (null_like - self.model_like)
185    }
186
187    /// Perform a likelihood-ratio test against a general alternative model, not
188    /// necessarily a null model. The alternative model is regularized the same
189    /// way that the regression resulting in this fit was. The degrees of
190    /// freedom cannot be generally inferred.
191    pub fn lr_test_against(&self, alternative: &Array1<F>) -> F {
192        let alt_like = M::log_like(self.data, alternative);
193        let alt_like_reg = alt_like + self.reg.likelihood(alternative);
194        F::from(2.).unwrap() * (self.model_like - alt_like_reg)
195    }
196
197    /// Returns the residual degrees of freedom in the model, i.e. the number
198    /// of data points minus the number of parameters. Not to be confused with
199    /// `test_ndf()`, the degrees of freedom in the statistical tests of the
200    /// fit.
201    pub fn ndf(&self) -> usize {
202        self.n_data - self.n_par
203    }
204
205    pub(crate) fn new(data: &'a Dataset<F>, use_intercept: bool, irls: Irls<M, F>) -> Self {
206        let Irls {
207            guess: result,
208            options,
209            reg,
210            n_iter,
211            last_like_data: data_like,
212            ..
213        } = irls;
214        assert_eq!(data_like, M::log_like(data, &result), "Unregularized likelihoods should match exactly.");
215        // Cache some of these variables that will be used often.
216        let n_par = result.len();
217        let n_data = data.y.len();
218        let model_like = data_like + reg.likelihood(&result);
219        Self {
220            model: PhantomData,
221            data,
222            use_intercept,
223            result,
224            options,
225            model_like,
226            reg,
227            n_iter,
228            n_data,
229            n_par,
230            cov: RefCell::new(None),
231            null_model: RefCell::new(None),
232        }
233    }
234
235    /// Returns the likelihood given the null model, which fixes all parameters
236    /// to zero except the intercept (if it is used). A total of `test_ndf()`
237    /// parameters are constrained.
238    pub fn null_like(&self) -> F {
239        let (null_like, _) = self.null_model_fit();
240        null_like
241    }
242
243    /// Return the likelihood and intercept for the null model. Since this can
244    /// require an additional regression, the values are cached.
245    fn null_model_fit(&self) -> (F, Array1<F>) {
246        // TODO: make a result instead of allowing a potential panic in the borrow.
247        if self.null_model.borrow().is_none() {
248            let (null_like, null_intercept): (F, Array1<F>) = match &self.data.linear_offset {
249                None => {
250                    // If there is no linear offset, the natural parameter is
251                    // identical for all observations so it is sufficient to
252                    // calculate the null likelihood for a single point with y equal
253                    // to the average.
254                    // The average y
255                    let y_bar: F = self
256                        .data
257                        .y
258                        .mean()
259                        .expect("Should be able to take average of y values");
260                    // This approach assumes that the likelihood is in the natural
261                    // exponential form as calculated by Glm::log_like_natural(). If that
262                    // function is overridden and the values differ significantly, this
263                    // approach will give incorrect results. If the likelihood has terms
264                    // non-linear in y, then the likelihood must be calculated for every
265                    // point rather than averaged.
266                    // If the intercept is allowed to maximize the likelihood, the natural
267                    // parameter is equal to the link of the expectation. Otherwise it is
268                    // the transformation function of zero.
269                    let intercept: F = if self.use_intercept {
270                        M::Link::func(y_bar)
271                    } else {
272                        F::zero()
273                    };
274                    // this is a length-one array. This works because the
275                    // likelihood contribution is the same for all observations.
276                    let nat_par = M::Link::nat_param(array![intercept]);
277                    // The null likelihood per observation
278                    let null_like_one: F = M::log_like_natural(y_bar, nat_par[0]);
279                    // just multiply the average likelihood by the number of data points, since every term is the same.
280                    let null_like_total = F::from(self.n_data).unwrap() * null_like_one;
281                    let null_params: Array1<F> = {
282                        let mut par = Array1::<F>::zeros(self.n_par);
283                        par[0] = intercept;
284                        par
285                    };
286                    (null_like_total, null_params)
287                }
288                Some(off) => {
289                    if self.use_intercept {
290                        // If there are linear offsets and the intercept is allowed
291                        // to be free, there is not a major simplification and the
292                        // model needs to be re-fit.
293                        // the X data is a single column of ones. Since this model
294                        // isn't being created by the ModelBuilder, the X data
295                        // has to be automatically padded with ones.
296                        let data_x_null = Array2::<F>::ones((self.n_data, 1));
297                        let null_model = Model {
298                            model: std::marker::PhantomData::<M>,
299                            data: Dataset::<F> {
300                                y: self.data.y.clone(),
301                                x: data_x_null,
302                                linear_offset: Some(off.clone()),
303                                weights: self.data.weights.clone(),
304                                hat: RefCell::new(None),
305                            },
306                            // If we are in this branch it is because an intercept is needed.
307                            use_intercept: true,
308                        };
309                        // TODO: Make this function return an error, although it's
310                        // difficult to imagine this case happening.
311                        // TODO: Should the tolerance of this fit be stricter?
312                        // The intercept should not be regularized
313                        let null_fit = null_model
314                            .fit_options()
315                            // There shouldn't be too much trouble fitting this
316                            // single-parameter fit, but there shouldn't be harm in
317                            // using the same maximum as in the original model.
318                            .max_iter(self.options.max_iter)
319                            .fit()
320                            .expect("Could not fit null model!");
321                        let null_params: Array1<F> = {
322                            let mut par = Array1::<F>::zeros(self.n_par);
323                            // there is only one parameter in this fit.
324                            par[0] = null_fit.result[0];
325                            par
326                        };
327                        (null_fit.model_like, null_params)
328                    } else {
329                        // If the intercept is fixed to zero, then no minimization is
330                        // required. The natural parameters are directly known in terms
331                        // of the linear offset. The likelihood must still be summed
332                        // over all observations, since they have different offsets.
333                        let nat_par = M::Link::nat_param(off.clone());
334                        let null_like = ndarray::Zip::from(&self.data.y)
335                            .and(&nat_par)
336                            .map_collect(|&y, &eta| M::log_like_natural(y, eta))
337                            .sum();
338                        let null_params = Array1::<F>::zeros(self.n_par);
339                        (null_like, null_params)
340                    }
341                }
342            };
343            *self.null_model.borrow_mut() = Some((null_like, null_intercept));
344        }
345        self.null_model
346            .borrow()
347            .as_ref()
348            .expect("the null model should be cached now")
349            .clone()
350    }
351
352    /// Returns the expected value of Y given the input data X. This data need
353    /// not be the training data, so an option for linear offsets is provided.
354    /// Panics if the number of covariates in the data matrix is not consistent
355    /// with the training set. The data matrix may need to be padded by ones if
356    /// it is not part of a Model. The `utility::one_pad()` function facilitates
357    /// this.
358    pub fn predict<S>(&self, data_x: &ArrayBase<S, Ix2>, lin_off: Option<&Array1<F>>) -> Array1<F>
359    where
360        S: Data<Elem = F>,
361    {
362        let lin_pred: Array1<F> = data_x.dot(&self.result);
363        let lin_pred: Array1<F> = if let Some(off) = &lin_off {
364            lin_pred + *off
365        } else {
366            lin_pred
367        };
368        lin_pred.mapv_into(M::Link::func_inv)
369    }
370
371    /// Return the deviance residuals for each point in the training data.
372    /// Equal to `sign(y-E[y|x])*sqrt(-2*(L[y|x] - L_sat[y]))`.
373    /// This is usually a better choice for non-linear models.
374    /// NaNs might be possible if L[y|x] > L_sat[y] due to floating-point operations. These are
375    /// not checked or clipped right now.
376    pub fn resid_dev(&self) -> Array1<F> {
377        let signs = self.resid_resp().mapv_into(F::signum);
378        let ll_terms: Array1<F> = M::log_like_terms(self.data, &self.result);
379        let ll_sat: Array1<F> = self.data.y.mapv(M::log_like_sat);
380        let neg_two = F::from(-2.).unwrap();
381        let ll_diff = (ll_terms - ll_sat) * neg_two;
382        let dev: Array1<F> = ll_diff.mapv_into(num_traits::Float::sqrt);
383        signs * dev
384    }
385
386    /// Return the standardized deviance residuals, also known as the "internally studentized
387    /// deviance residuals". This is generally applicable for outlier detection, although the
388    /// influence of each point on the fit is only approximately accounted for.
389    /// `d / sqrt(phi * (1 - h))` where `d` is the deviance residual, phi is the dispersion (e.g.
390    /// sigma^2 for linear regression, 1 for logistic regression), and h is the leverage.
391    pub fn resid_dev_std(&self) -> RegressionResult<Array1<F>> {
392        let dev = self.resid_dev();
393        let phi = self.dispersion();
394        let hat: Array1<F> = self.data.leverage()?;
395        let omh: Array1<F> = -hat + F::one();
396        let denom: Array1<F> = (omh * phi).mapv_into(num_traits::Float::sqrt);
397        Ok(dev / denom)
398    }
399
400    /// Return the partial residuals.
401    pub fn resid_part(&self) -> Array1<F> {
402        let x_mean = self.data.x.mean_axis(Axis(0)).expect("empty dataset");
403        let x_centered = &self.data.x - x_mean.insert_axis(Axis(0));
404        self.resid_work() + x_centered.dot(&self.result)
405    }
406
407    /// Return the Pearson residuals for each point in the training data.
408    /// This is equal to `(y - E[y])/sqrt(V(E[y]))`, where V is the variance function.
409    /// These are not scaled by the sample standard deviation for families with a free dispersion
410    /// parameter like linear regression.
411    pub fn resid_pear(&self) -> Array1<F> {
412        let mu: Array1<F> = self.predict(&self.data.x, self.data.linear_offset.as_ref());
413        let residuals = &self.data.y - &mu;
414        let var_diag: Array1<F> = mu.mapv_into(M::variance);
415        let std: Array1<F> = var_diag.mapv_into(num_traits::Float::sqrt);
416        residuals / std
417    }
418
419    /// Return the standardized Pearson residuals for every observation.
420    /// Also known as the "internally studentized Pearson residuals".
421    /// (y - E[y]) / (sqrt(Var[y] * (1 - h))) where h is a vector representing the leverage for
422    /// each observation.
423    pub fn resid_pear_std(&self) -> RegressionResult<Array1<F>> {
424        let pearson = self.resid_pear();
425        let phi = self.dispersion();
426        let hat = self.data.leverage()?;
427        let omh = -hat + F::one();
428        let denom: Array1<F> = (omh * phi).mapv_into(num_traits::Float::sqrt);
429        Ok(pearson / denom)
430    }
431
432    /// Return the response residuals, or fitting deviation, for each data point in the fit; that
433    /// is, the difference y - E[y|x] where the expectation value is the y value predicted by the
434    /// model given x.
435    pub fn resid_resp(&self) -> Array1<F> {
436        self.errors(self.data)
437    }
438
439    /// Return the studentized residuals, which are the changes in the fit likelihood resulting
440    /// from leaving each observation out. This is a robust and general method for outlier
441    /// detection, although a one-step approximation is used to avoid re-fitting the model
442    /// completely for each observation.
443    /// If the linear errors are standard normally distributed then this statistic should follow a
444    /// t-distribution with `self.ndf() - 1` degrees of freedom.
445    pub fn resid_student(&self) -> RegressionResult<Array1<F>> {
446        let r_dev = self.resid_dev();
447        let r_pear = self.resid_pear();
448        let signs = r_pear.mapv(F::signum);
449        let r_dev_sq = r_dev.mapv_into(|x| x * x);
450        let r_pear_sq = r_pear.mapv_into(|x| x * x);
451        let hat = self.data.leverage()?;
452        let omh = -hat.clone() + F::one();
453        let sum_quad = &r_dev_sq + hat * r_pear_sq / &omh;
454        let sum_quad_scaled = match M::DISPERSED {
455            // The dispersion is corrected for the contribution from each current point.
456            // This is an approximation; the exact solution would perform a fit at each point.
457            DispersionType::FreeDispersion => {
458                let dev = self.deviance();
459                let dof = F::from(self.ndf() - 1).unwrap();
460                let phi_i: Array1<F> = (-r_dev_sq / &omh + dev) / dof;
461                sum_quad / phi_i
462            }
463            DispersionType::NoDispersion => sum_quad,
464        };
465        Ok(signs * sum_quad_scaled.mapv_into(num_traits::Float::sqrt))
466    }
467
468    /// Returns the working residuals `d\eta/d\mu * (y - E{y|x})`.
469    /// This should be equal to the response residuals divided by the variance function (as
470    /// opposed to the square root of the variance as in the Pearson residuals).
471    pub fn resid_work(&self) -> Array1<F> {
472        let lin_pred: Array1<F> = self.data.linear_predictor(&self.result);
473        let mu: Array1<F> = lin_pred.mapv(M::Link::func_inv);
474        let resid_response: Array1<F> = &self.data.y - &mu;
475        let d_eta: Array1<F> = M::Link::d_nat_param(&lin_pred);
476        d_eta * resid_response
477    }
478
479    /// Returns the score function (the gradient of the likelihood) at the
480    /// parameter values given. It should be zero within FPE at the minimized
481    /// result.
482    pub fn score(&self, params: &Array1<F>) -> Array1<F> {
483        // This represents the predictions given the input parameters, not the
484        // fit parameters.
485        let lin_pred: Array1<F> = self.data.linear_predictor(params);
486        let mu: Array1<F> = M::mean(&lin_pred);
487        let resid_response = &self.data.y - mu;
488        // adjust for non-canonical link functions.
489        let eta_d = M::Link::d_nat_param(&lin_pred);
490        let resid_working = eta_d * resid_response;
491        let score_unreg = self.data.x.t().dot(&resid_working);
492        self.reg.as_ref().gradient(score_unreg, params)
493    }
494
495    /// Returns the score test statistic. This statistic is asymptotically
496    /// chi-squared distributed with `test_ndf()` degrees of freedom.
497    pub fn score_test(&self) -> RegressionResult<F> {
498        let (_, null_params) = self.null_model_fit();
499        self.score_test_against(null_params)
500    }
501
502    /// Returns the score test statistic compared to another set of model
503    /// parameters, not necessarily a null model. The degrees of freedom cannot
504    /// be generally inferred.
505    pub fn score_test_against(&self, alternative: Array1<F>) -> RegressionResult<F> {
506        let score_alt = self.score(&alternative);
507        let fisher_alt = self.fisher(&alternative);
508        // The is not the same as the cached covariance matrix since it is
509        // evaluated at the null parameters.
510        // NOTE: invh/invh_into() are bugged and incorrect!
511        let inv_fisher_alt = fisher_alt.inv_into()?;
512        Ok(score_alt.t().dot(&inv_fisher_alt.dot(&score_alt)))
513    }
514
515    /// The degrees of freedom for the likelihood ratio test, the score test,
516    /// and the Wald test. Not to be confused with `ndf()`, the degrees of
517    /// freedom in the model fit.
518    pub fn test_ndf(&self) -> usize {
519        if self.use_intercept {
520            self.n_par - 1
521        } else {
522            self.n_par
523        }
524    }
525
526    /// Returns the Wald test statistic compared to a null model with only an
527    /// intercept (if one is used). This statistic is asymptotically chi-squared
528    /// distributed with `test_ndf()` degrees of freedom.
529    pub fn wald_test(&self) -> F {
530        // The null parameters are all zero except for a possible intercept term
531        // which optimizes the null model.
532        let (_, null_params) = self.null_model_fit();
533        self.wald_test_against(&null_params)
534    }
535
536    /// Returns the Wald test statistic compared to another specified model fit
537    /// instead of the null model. The degrees of freedom cannot be generally
538    /// inferred.
539    pub fn wald_test_against(&self, alternative: &Array1<F>) -> F {
540        let d_params: Array1<F> = &self.result - alternative;
541        let fisher_alt: Array2<F> = self.fisher(alternative);
542        d_params.t().dot(&fisher_alt.dot(&d_params))
543    }
544
545    /// Returns the signed square root of the Wald test statistic for each
546    /// parameter. Since it does not account for covariance between the
547    /// parameters it may not be accurate.
548    pub fn wald_z(&self) -> RegressionResult<Array1<F>> {
549        let par_cov = self.covariance()?;
550        let par_variances: ArrayView1<F> = par_cov.diag();
551        Ok(&self.result / &par_variances.mapv(num_traits::Float::sqrt))
552    }
553}
554
555/// Specialized functions for OLS.
556impl<'a, F> Fit<'a, Linear, F>
557where
558    F: 'static + Float,
559{
560    /// Returns the coefficient of multiple correlation, R^2.
561    pub fn r_sq(&self) -> F {
562        let y_avg: F = self.data.y.mean().expect("Data should be non-empty");
563        let total_sum_sq: F = self.data.y.mapv(|y| y - y_avg).mapv(|dy| dy * dy).sum();
564        (total_sum_sq - self.resid_sum_sq()) / total_sum_sq
565    }
566
567    /// Returns the residual sum of squares, i.e. the sum of the squared residuals.
568    pub fn resid_sum_sq(&self) -> F {
569        self.resid_resp().mapv_into(|r| r * r).sum()
570    }
571}
572
573#[cfg(test)]
574mod tests {
575    use super::*;
576    use crate::{
577        model::ModelBuilder,
578        utility::{one_pad, standardize},
579        Linear, Logistic,
580    };
581    use anyhow::Result;
582    use approx::assert_abs_diff_eq;
583    use ndarray::Axis;
584
585    /// Checks if the test statistics are invariant based upon whether the data is standardized.
586    #[test]
587    fn standardization_invariance() -> Result<()> {
588        let data_y = array![true, false, false, true, true, true, true, false, true];
589        let data_x = array![-0.5, 0.3, -0.6, 0.2, 0.3, 1.2, 0.8, 0.6, -0.2].insert_axis(Axis(1));
590        let lin_off = array![0.1, 0.0, -0.1, 0.2, 0.1, 0.3, 0.4, -0.1, 0.1];
591        let data_x_std = standardize(data_x.clone());
592        let model = ModelBuilder::<Logistic>::data(&data_y, &data_x)
593            .linear_offset(lin_off.clone())
594            .build()?;
595        let fit = model.fit()?;
596        let model_std = ModelBuilder::<Logistic>::data(&data_y, &data_x_std)
597            .linear_offset(lin_off)
598            .build()?;
599        let fit_std = model_std.fit()?;
600        let lr = fit.lr_test();
601        let lr_std = fit_std.lr_test();
602        assert_abs_diff_eq!(lr, lr_std);
603        eprintln!("about to try score test");
604        assert_abs_diff_eq!(
605            fit.score_test()?,
606            fit_std.score_test()?,
607            epsilon = f32::EPSILON as f64
608        );
609        eprintln!("about to try wald test");
610        assert_abs_diff_eq!(
611            fit.wald_test(),
612            fit_std.wald_test(),
613            epsilon = 4.0 * f64::EPSILON
614        );
615        assert_abs_diff_eq!(fit.aic(), fit_std.aic());
616        assert_abs_diff_eq!(fit.bic(), fit_std.bic());
617        eprintln!("about to try deviance");
618        assert_abs_diff_eq!(fit.deviance(), fit_std.deviance());
619        // The Wald Z-score of the intercept term is not invariant under a
620        // linear transformation of the data, but the parameter part seems to
621        // be, at least for single-component data.
622        assert_abs_diff_eq!(
623            fit.wald_z()?[1],
624            fit_std.wald_z()?[1],
625            epsilon = 0.01 * f32::EPSILON as f64
626        );
627
628        Ok(())
629    }
630
631    #[test]
632    fn null_model() -> Result<()> {
633        let data_y = array![true, false, false, true, true];
634        let data_x: Array2<f64> = array![[], [], [], [], []];
635        let model = ModelBuilder::<Logistic>::data(&data_y, &data_x).build()?;
636        let fit = model.fit()?;
637        dbg!(fit.n_iter);
638        dbg!(&fit.result);
639        // with no offsets, the result should be the link function of the mean.
640        assert_abs_diff_eq!(
641            fit.result[0],
642            <Logistic as Glm>::Link::func(0.6),
643            epsilon = 4.0 * f64::EPSILON
644        );
645        let empty_null_like = fit.null_like();
646        assert_eq!(fit.test_ndf(), 0);
647        dbg!(&fit.model_like);
648        let lr = fit.lr_test();
649        // Since there is no data, the null likelihood should be identical to
650        // the fit likelihood, so the likelihood ratio test should yield zero.
651        assert_abs_diff_eq!(lr, 0., epsilon = 4. * f64::EPSILON);
652
653        // Check that the assertions still hold if linear offsets are included.
654        let lin_off: Array1<f64> = array![0.2, -0.1, 0.1, 0.0, 0.1];
655        let model = ModelBuilder::<Logistic>::data(&data_y, &data_x)
656            .linear_offset(lin_off)
657            .build()?;
658        let fit_off = model.fit()?;
659        let empty_model_like_off = fit_off.model_like;
660        let empty_null_like_off = fit_off.null_like();
661        // these two assertions should be equivalent
662        assert_abs_diff_eq!(fit_off.lr_test(), 0.);
663        assert_abs_diff_eq!(empty_model_like_off, empty_null_like_off);
664
665        // check consistency with data provided
666        let data_x_with = array![[0.5], [-0.2], [0.3], [0.4], [-0.1]];
667        let model = ModelBuilder::<Logistic>::data(&data_y, &data_x_with).build()?;
668        let fit_with = model.fit()?;
669        dbg!(&fit_with.result);
670        // The null likelihood of the model with parameters should be the same
671        // as the likelihood of the model with only the intercept.
672        assert_abs_diff_eq!(empty_null_like, fit_with.null_like());
673
674        Ok(())
675    }
676
677    #[test]
678    fn null_like_logistic() -> Result<()> {
679        // 6 true and 4 false for y_bar = 0.6.
680        let data_y = array![true, true, true, true, true, true, false, false, false, false];
681        let ybar: f64 = 0.6;
682        let data_x = array![0.4, 0.2, 0.5, 0.1, 0.6, 0.7, 0.3, 0.8, -0.1, 0.1].insert_axis(Axis(1));
683        let model = ModelBuilder::<Logistic>::data(&data_y, &data_x).build()?;
684        let fit = model.fit()?;
685        let target_null_like = fit
686            .data
687            .y
688            .mapv(|y| {
689                let eta = (ybar / (1. - ybar)).ln();
690                y * eta - eta.exp().ln_1p()
691            })
692            .sum();
693        assert_abs_diff_eq!(fit.null_like(), target_null_like);
694        Ok(())
695    }
696
697    // Check that the deviance is equal to the sum of square deviations for a linear model
698    #[test]
699    fn deviance_linear() -> Result<()> {
700        let data_y = array![0.3, -0.2, 0.5, 0.7, 0.2, 1.4, 1.1, 0.2];
701        let data_x = array![0.6, 2.1, 0.4, -3.2, 0.7, 0.1, -0.3, 0.5].insert_axis(Axis(1));
702        let model = ModelBuilder::<Linear>::data(&data_y, &data_x).build()?;
703        let fit = model.fit()?;
704        // The predicted values of Y given the model.
705        let pred_y = fit.predict(&one_pad(data_x.view()), None);
706        let target_dev = (data_y - pred_y).mapv(|dy| dy * dy).sum();
707        assert_abs_diff_eq!(fit.deviance(), target_dev,);
708        Ok(())
709    }
710
711    // Check that the deviance and dispersion parameter are equal up to the number of degrees of
712    // freedom for a linea model.
713    #[test]
714    fn deviance_dispersion_eq_linear() -> Result<()> {
715        let data_y = array![0.2, -0.1, 0.4, 1.3, 0.2, -0.6, 0.9];
716        let data_x = array![
717            [0.4, 0.2],
718            [0.1, 0.4],
719            [-0.1, 0.3],
720            [0.5, 0.7],
721            [0.4, 0.1],
722            [-0.2, -0.3],
723            [0.4, -0.1]
724        ];
725        let model = ModelBuilder::<Linear>::data(&data_y, &data_x).build()?;
726        let fit = model.fit()?;
727        let dev = fit.deviance();
728        let disp = fit.dispersion();
729        let ndf = fit.ndf() as f64;
730        assert_abs_diff_eq!(dev, disp * ndf, epsilon = 4. * f64::EPSILON);
731        Ok(())
732    }
733
734    // Check that the residuals for a linear model are all consistent.
735    #[test]
736    fn residuals_linear() -> Result<()> {
737        let data_y = array![0.1, -0.3, 0.7, 0.2, 1.2, -0.4];
738        let data_x = array![0.4, 0.1, 0.3, -0.1, 0.5, 0.6].insert_axis(Axis(1));
739        let model = ModelBuilder::<Linear>::data(&data_y, &data_x).build()?;
740        let fit = model.fit()?;
741        let response = fit.resid_resp();
742        let pearson = fit.resid_pear();
743        let deviance = fit.resid_dev();
744        assert_abs_diff_eq!(response, pearson);
745        assert_abs_diff_eq!(response, deviance);
746        let pearson_std = fit.resid_pear_std()?;
747        let deviance_std = fit.resid_dev_std()?;
748        let _student = fit.resid_student()?;
749        assert_abs_diff_eq!(pearson_std, deviance_std, epsilon = 8. * f64::EPSILON);
750
751        // // NOTE: Studentization can't be checked directly because the method used is an
752        // approximation. Another approach will be needed to give exact values.
753        // let orig_dev = fit.deviance();
754        // let n_data = data_y.len();
755        // // Check that the leave-one-out stats hold literally
756        // let mut loo_dev: Vec<f64> = Vec::new();
757        // for i in 0..n_data {
758        //     let ya = data_y.slice(s![0..i]);
759        //     let yb = data_y.slice(s![i + 1..]);
760        //     let xa = data_x.slice(s![0..i, ..]);
761        //     let xb = data_x.slice(s![i + 1.., ..]);
762        //     let y_loo = concatenate![Axis(0), ya, yb];
763        //     let x_loo = concatenate![Axis(0), xa, xb];
764        //     let model_i = ModelBuilder::<Linear>::data(&y_loo, &x_loo).build()?;
765        //     let fit_i = model_i.fit()?;
766        //     let yi = data_y[i];
767        //     let xi = data_x.slice(s![i..i + 1, ..]);
768        //     let xi = crate::utility::one_pad(xi);
769        //     let yi_pred: f64 = fit_i.predict(&xi, None)[0];
770        //     let disp_i = fit_i.dispersion();
771        //     let pear_loo = (yi - yi_pred) / disp_i.sqrt();
772        //     let dev_i = fit_i.deviance();
773        //     let d_dev = 2. * (orig_dev - dev_i);
774        //     loo_dev.push(d_dev.sqrt() * (yi - yi_pred).signum());
775        // }
776        // let loo_dev: Array1<f64> = loo_dev.into();
777        // This is off from 1 by a constant factor that depends on the data
778        // This is only approximately true
779        // assert_abs_diff_eq!(student, loo_dev);
780        Ok(())
781    }
782
783    // check the null likelihood for the case where it can be counted exactly.
784    #[test]
785    fn null_like_linear() -> Result<()> {
786        let data_y = array![0.3, -0.2, 0.5, 0.7, 0.2, 1.4, 1.1, 0.2];
787        let data_x = array![0.6, 2.1, 0.4, -3.2, 0.7, 0.1, -0.3, 0.5].insert_axis(Axis(1));
788        let ybar: f64 = data_y.mean().unwrap();
789        let model = ModelBuilder::<Linear>::data(&data_y, &data_x).build()?;
790        let fit = model.fit()?;
791        // let target_null_like = data_y.mapv(|y| -0.5 * (y - ybar) * (y - ybar)).sum();
792        let target_null_like = data_y.mapv(|y| y * ybar - 0.5 * ybar * ybar).sum();
793        // With the saturated likelihood subtracted the null likelihood should
794        // just be the sum of squared differences from the mean.
795        // let target_null_like = 0.;
796        // dbg!(target_null_like);
797        let fit_null_like = fit.null_like();
798        assert_abs_diff_eq!(2. * (fit.model_like - fit_null_like), fit.lr_test());
799        assert_eq!(fit.test_ndf(), 1);
800        assert_abs_diff_eq!(
801            fit_null_like,
802            target_null_like,
803            epsilon = 4.0 * f64::EPSILON
804        );
805        Ok(())
806    }
807
808    // check the null likelihood where there is no dependence on the X data.
809    #[test]
810    fn null_like_logistic_nodep() -> Result<()> {
811        let data_y = array![true, true, false, false, true, false, false, true];
812        let data_x = array![0.4, 0.2, 0.4, 0.2, 0.7, 0.7, -0.1, -0.1].insert_axis(Axis(1));
813        let model = ModelBuilder::<Logistic>::data(&data_y, &data_x).build()?;
814        let fit = model.fit()?;
815        let lr = fit.lr_test();
816        assert_abs_diff_eq!(lr, 0.);
817        Ok(())
818    }
819    // TODO: Test that the statistics behave sensibly under regularization. The
820    // likelihood ratio test should yield a smaller value.
821
822    // Test the basic caching funcions.
823    #[test]
824    fn cached_computations() -> Result<()> {
825        let data_y = array![true, true, false, true, true, false, false, false, true];
826        let data_x = array![0.4, 0.1, -0.3, 0.7, -0.5, -0.1, 0.8, 1.0, 0.4].insert_axis(Axis(1));
827        let model = ModelBuilder::<Logistic>::data(&data_y, &data_x).build()?;
828        let fit = model.fit()?;
829        let _null_like = fit.null_like();
830        let _null_like = fit.null_like();
831        let _cov = fit.covariance()?;
832        let _wald = fit.wald_z();
833        Ok(())
834    }
835
836    // Check the consistency of the various statistical tests for linear
837    // regression, where they should be the most comparable.
838    #[test]
839    fn linear_stat_tests() -> Result<()> {
840        let data_y = array![-0.3, -0.1, 0.0, 0.2, 0.4, 0.5, 0.8, 0.8, 1.1];
841        let data_x = array![-0.5, -0.2, 0.1, 0.2, 0.5, 0.6, 0.7, 0.9, 1.3].insert_axis(Axis(1));
842        let model = ModelBuilder::<Linear>::data(&data_y, &data_x).build()?;
843        let fit = model.fit()?;
844        let lr = fit.lr_test();
845        let wald = fit.wald_test();
846        let score = fit.score_test()?;
847        assert_abs_diff_eq!(lr, wald, epsilon = 32.0 * f64::EPSILON);
848        assert_abs_diff_eq!(lr, score, epsilon = 32.0 * f64::EPSILON);
849        Ok(())
850    }
851}