Skip to main content

ndarray_glm/
fit.rs

1//! Stores the fit results of the IRLS regression and provides functions that
2//! depend on the MLE estimate. These include statistical tests for goodness-of-fit.
3
4pub mod options;
5use crate::{
6    Linear,
7    error::RegressionResult,
8    glm::{DispersionType, Glm},
9    irls::{Irls, IrlsStep},
10    link::{Link, Transform},
11    model::{Dataset, Model},
12    num::Float,
13    regularization::IrlsReg,
14};
15use ndarray::{Array1, Array2, ArrayBase, ArrayView1, Axis, Data, Ix2, array};
16use ndarray_linalg::InverseInto;
17use options::FitOptions;
18use std::{
19    cell::{Ref, RefCell},
20    marker::PhantomData,
21};
22
23/// the result of a successful GLM fit
24pub struct Fit<'a, M, F>
25where
26    M: Glm,
27    F: Float,
28{
29    model: PhantomData<M>,
30    /// The data and model specification used in the fit.
31    data: &'a Dataset<F>,
32    /// Whether the intercept covariate is used
33    use_intercept: bool,
34    /// The parameter values that maximize the likelihood as given by the IRLS regression.
35    pub result: Array1<F>,
36    /// The options used for this fit.
37    pub options: FitOptions<F>,
38    /// The value of the likelihood function for the fit result.
39    pub model_like: F,
40    /// The regularizer of the fit
41    reg: Box<dyn IrlsReg<F>>,
42    /// The number of overall iterations taken in the IRLS.
43    pub n_iter: usize,
44    /// The history of guesses and likelihoods over the IRLS iterations.
45    pub history: Vec<IrlsStep<F>>,
46    /// The number of parameters
47    n_par: usize,
48    /// The unscaled covariance matrix of the parameters, i.e. the inverse of the Fisher
49    /// information. Since the calculation requires a matrix inversion, it is computed only when
50    /// needed and the value is cached.
51    cov_unscaled: RefCell<Option<Array2<F>>>,
52    /// The hat matrix of the data and fit. Since the calculation requires a matrix inversion of
53    /// the fisher information, it is computed only when needed and the value is cached. Access
54    /// through the `hat()` function.
55    hat: RefCell<Option<Array2<F>>>,
56    /// The likelihood and parameters for the null model.
57    null_model: RefCell<Option<(F, Array1<F>)>>,
58}
59
60impl<'a, M, F> Fit<'a, M, F>
61where
62    M: Glm,
63    F: 'static + Float,
64{
65    /// Returns the Akaike information criterion for the model fit.
66    ///
67    /// ```math
68    /// \text{AIC} = D + 2K - 2\sum_{i} \ln w_{i}
69    /// ```
70    ///
71    /// where $`D`$ is the deviance, $`K`$ is the number of parameters, and $`w_i`$ are the
72    /// variance weights.
73    /// This is unique only to an additive constant, so only differences in AIC are meaningful.
74    pub fn aic(&self) -> F {
75        let log_weights = self.data.get_variance_weights().mapv(num_traits::Float::ln);
76        let sum_log_weights = self.data.freq_sum(&log_weights);
77        // NOTE: This is now the unregularized deviance.
78        self.deviance() + F::two() * self.rank() - F::two() * sum_log_weights
79    }
80
81    /// Returns the Bayesian information criterion for the model fit.
82    ///
83    /// ```math
84    /// \text{BIC} = K \ln(n) - 2l - s\sum_{i} \ln w_{i}
85    /// ```
86    ///
87    /// where $`K`$ is the number of parameters, $`n`$ is the number of observations, and $`l`$ is
88    /// the log-likelihood (including the variance weight normalization terms).
89    // TODO: Also consider the effect of regularization on this statistic.
90    // TODO: Wikipedia suggests that the variance should included in the number
91    // of parameters for multiple linear regression. Should an additional
92    // parameter be included for the dispersion parameter? This question does
93    // not affect the difference between two models fit with the methodology in
94    // this package.
95    pub fn bic(&self) -> F {
96        let log_weights = self.data.get_variance_weights().mapv(num_traits::Float::ln);
97        let sum_log_weights = self.data.freq_sum(&log_weights);
98        let logn = num_traits::Float::ln(self.data.n_obs());
99        logn * self.rank() - F::two() * self.model_like - F::two() * sum_log_weights
100    }
101
102    /// The Cook's distance for each observation, which measures how much the predicted values
103    /// change when leaving out each observation.
104    ///
105    /// ```math
106    /// C_i = \frac{r_i^2 \, h_i}{K \, \hat\phi \, (1 - h_i)^2}
107    /// ```
108    ///
109    /// where $`r_i`$ is the Pearson residual, $`h_i`$ is the leverage, $`K`$ is the rank
110    /// (number of parameters), and $`\hat\phi`$ is the estimated dispersion.
111    pub fn cooks(&self) -> RegressionResult<Array1<F>> {
112        let hat = self.leverage()?;
113        let pear_sq = self.resid_pear().mapv(|r| r * r);
114        let h_terms: Array1<F> = hat.mapv_into(|h| {
115            let omh = F::one() - h;
116            h / (omh * omh)
117        });
118        let denom: F = self.rank() * self.dispersion();
119        Ok(pear_sq * h_terms / denom)
120    }
121
122    /// The covariance matrix of the parameter estimates:
123    ///
124    /// ```math
125    /// \text{Cov}[\hat{\boldsymbol\beta}] = \hat\phi \, (\mathbf{X}^\mathsf{T}\mathbf{WSX})^{-1}
126    /// ```
127    ///
128    /// where $`\hat\phi`$ is the estimated dispersion, $`\mathbf{W}`$ is the weight matrix, and
129    /// $`\mathbf{S}`$ is the diagonal variance matrix. The Fisher matrix inverse is cached.
130    pub fn covariance(&self) -> RegressionResult<Array2<F>> {
131        // The covariance must be multiplied by the dispersion parameter.
132        // For logistic/poisson regression, this is identically 1.
133        // For linear/gamma regression it is estimated from the data.
134        let phi: F = self.dispersion();
135        // NOTE: invh/invh_into() are bugged and incorrect!
136        let unscaled_cov: Array2<F> = self.fisher_inv()?.to_owned();
137        let cov = unscaled_cov * phi;
138        Ok(cov)
139    }
140
141    /// Returns the deviance of the fit:
142    ///
143    /// ```math
144    /// D = -2 \left[ l(\hat{\boldsymbol\beta}) - l_\text{sat} \right]
145    /// ```
146    ///
147    /// Asymptotically $`\chi^2`$-distributed with [`ndf()`](Self::ndf) degrees of freedom.
148    /// The unregularized likelihood is used.
149    pub fn deviance(&self) -> F {
150        let terms = self.deviance_terms();
151        self.data.freq_sum(&terms)
152    }
153
154    /// Returns the contribution to the deviance from each observation. The total deviance should
155    /// be the sum of all of these. Variance weights are already included, but not frequency
156    /// weights.
157    fn deviance_terms(&self) -> Array1<F> {
158        let ll_terms: Array1<F> = M::log_like_terms(self.data, &self.result);
159        let ll_sat: Array1<F> = self.data.y.mapv(M::log_like_sat);
160        let terms = (ll_sat - ll_terms) * F::two();
161        self.data.apply_var_weights(terms)
162    }
163
164    /// Returns the self-excluded deviance terms, i.e. the deviance of an observation as if the
165    /// model was fit without it. This is a one-step approximation.
166    fn deviance_terms_loo(&self) -> RegressionResult<Array1<F>> {
167        let dev_terms = self.deviance_terms();
168        let pear_sq = self.resid_pear().mapv(|r| r * r);
169        let hat_rat = self.leverage()?.mapv(|h| h / (F::one() - h));
170        let result = dev_terms + &hat_rat * (&hat_rat + F::two()) * pear_sq;
171        Ok(result)
172    }
173
174    /// The dispersion parameter $`\hat\phi`$ relating the variance to the variance function:
175    /// $`\text{Var}[y] = \phi \, V(\mu)`$.
176    ///
177    /// Identically one for logistic, binomial, and Poisson regression.
178    /// For families with a free dispersion (linear, gamma), estimated as:
179    ///
180    /// ```math
181    /// \hat\phi = \frac{D}{\left(1 - \frac{K}{n_\text{eff}}\right) \sum_i w_i}
182    /// ```
183    ///
184    /// which reduces to $`D / (N - K)`$ without variance weights.
185    pub fn dispersion(&self) -> F {
186        use DispersionType::*;
187        match M::DISPERSED {
188            FreeDispersion => {
189                let dev = self.deviance();
190                let p = self.rank();
191                let n_eff = self.data.n_eff();
192                let scaling = if p >= n_eff {
193                    // This is the overparameterized regime, which is checked directly instead of
194                    // allowing negative values. It's not clear what conditions result in this when
195                    // p < N.
196                    F::zero()
197                } else {
198                    (F::one() - p / n_eff) * self.data.sum_weights()
199                };
200                dev / scaling
201            }
202            NoDispersion => F::one(),
203        }
204    }
205
206    /// Return the dispersion terms with the observation(s) at each point excluded from the fit.
207    fn dispersion_loo(&self) -> RegressionResult<Array1<F>> {
208        use DispersionType::*;
209        match M::DISPERSED {
210            FreeDispersion => {
211                let pear_sq = self.resid_pear().mapv(|r| r * r);
212                let hat_rat = self.leverage()?.mapv(|h| h / (F::one() - h));
213                let terms = self.deviance_terms() + hat_rat * pear_sq;
214                // Don't apply total weights since the variance weights are already
215                // included in the residual terms. However, we do need the frequency weights.
216                let terms = self.data.apply_freq_weights(terms);
217                let total: Array1<F> = -terms + self.deviance();
218                let scaled_total: Array1<F> = match &self.data.weights {
219                    None => match &self.data.freqs {
220                        Some(f) => total / -(f - self.ndf()),
221                        None => total / (self.ndf() - F::one()),
222                    },
223                    Some(w) => {
224                        let v1 = self.data.freq_sum(w);
225                        let w2 = w * w;
226                        let v2 = self.data.freq_sum(&w2);
227                        // The subtracted out terms need the frequency terms as well
228                        let f_w = self.data.apply_freq_weights(w.clone());
229                        let f_w2 = self.data.apply_freq_weights(w2);
230                        // the modifed sums from leaving out the ith observation
231                        let v1p = -f_w + v1;
232                        let v2p = -f_w2 + v2;
233                        let p = self.rank();
234                        let scale = &v1p - v2p / &v1p * p;
235                        total / scale
236                    }
237                };
238                Ok(scaled_total)
239            }
240            NoDispersion => Ok(Array1::<F>::ones(self.data.y.len())),
241        }
242    }
243
244    /// Returns the errors in the response variables for the data passed as an
245    /// argument given the current model fit.
246    fn errors(&self, data: &Dataset<F>) -> Array1<F> {
247        &data.y - &self.predict(&data.x, data.linear_offset.as_ref())
248    }
249
250    #[deprecated(since = "0.0.10", note = "use predict() instead")]
251    pub fn expectation<S>(
252        &self,
253        data_x: &ArrayBase<S, Ix2>,
254        lin_off: Option<&Array1<F>>,
255    ) -> Array1<F>
256    where
257        S: Data<Elem = F>,
258    {
259        self.predict(data_x, lin_off)
260    }
261
262    /// Returns the Fisher information (the negative Hessian of the log-likelihood) at the
263    /// parameter values given:
264    ///
265    /// ```math
266    /// \mathcal{I}(\boldsymbol\beta) = \mathbf{X}^\mathsf{T}\mathbf{W}\eta'^2\mathbf{S}\mathbf{X}
267    /// ```
268    ///
269    /// where $`\mathbf{S} = \text{diag}(V(\mu_i))`$ and $`\eta'`$ is the derivative of the natural
270    /// parameter in terms of the linear predictor ($`\eta(\omega) = g_0(g^{-1}(\omega))`$ where
271    /// $`g_0`$ is the canonical link function).
272    /// functions.
273    /// The regularization is included.
274    pub fn fisher(&self, params: &Array1<F>) -> Array2<F> {
275        let lin_pred: Array1<F> = self.data.linear_predictor(params);
276        let adj_var: Array1<F> = M::adjusted_variance_diag(&lin_pred);
277        // calculate the fisher matrix
278        let fisher: Array2<F> = (self.data.x_conj() * &adj_var).dot(&self.data.x);
279        // Regularize the fisher matrix
280        self.reg.as_ref().irls_mat(fisher, params)
281    }
282
283    /// The inverse of the (regularized) fisher information matrix. This is used in some other
284    /// calculations (like the covariance and hat matrices) so it is cached.
285    fn fisher_inv(&self) -> RegressionResult<Ref<'_, Array2<F>>> {
286        if self.cov_unscaled.borrow().is_none() {
287            let fisher_reg = self.fisher(&self.result);
288            // NOTE: invh/invh_into() are bugged and incorrect!
289            let unscaled_cov: Array2<F> = fisher_reg.inv_into()?;
290            *self.cov_unscaled.borrow_mut() = Some(unscaled_cov);
291        }
292        Ok(Ref::map(self.cov_unscaled.borrow(), |x| {
293            x.as_ref().unwrap()
294        }))
295    }
296
297    /// Returns the hat matrix, also known as the "projection" or "influence" matrix:
298    ///
299    /// ```math
300    /// P_{ij} = \frac{\partial \hat{y}_i}{\partial y_j}
301    /// ```
302    ///
303    /// Orthogonal to the response residuals at the fit result: $`\mathbf{P}(\mathbf{y} -
304    /// \hat{\mathbf{y}}) = 0`$.
305    /// This version is not symmetric, but the diagonal is invariant to this choice of convention.
306    pub fn hat(&self) -> RegressionResult<Ref<'_, Array2<F>>> {
307        if self.hat.borrow().is_none() {
308            let lin_pred = self.data.linear_predictor(&self.result);
309            // Apply the eta' terms manually instead of calling adjusted_variance_diag, because the
310            // adjusted variance method applies 2 powers to the variance, while we want one power
311            // to the variance and one to the weights.
312            // let adj_var = M::adjusted_variance_diag(&lin_pred);
313
314            let mu = M::mean(&lin_pred);
315            let var = mu.mapv_into(M::variance);
316            let eta_d = M::Link::d_nat_param(&lin_pred);
317
318            let fisher_inv = self.fisher_inv()?;
319
320            // the GLM variance and the data weights are put on different sides in this convention
321            let left = (var * &eta_d).insert_axis(Axis(1)) * &self.data.x;
322            let right = self.data.x_conj() * &eta_d;
323            let result = left.dot(&fisher_inv.dot(&right));
324
325            *self.hat.borrow_mut() = Some(result);
326        }
327        let borrowed: Ref<Option<Array2<F>>> = self.hat.borrow();
328        Ok(Ref::map(borrowed, |x| x.as_ref().unwrap()))
329    }
330
331    /// The one-step approximation to the change in coefficients from excluding each observation:
332    ///
333    /// ```math
334    /// \Delta\boldsymbol\beta^{(-i)} \approx \frac{1}{1-h_i}\,\mathcal{I}^{-1}\mathbf{x}^{(i)} w_i \eta'_i e_i
335    /// ```
336    ///
337    /// Each row $`i`$ should be subtracted from $`\hat{\boldsymbol\beta}`$ to approximate
338    /// the coefficients that would result from excluding observation $`i`$.
339    /// Exact for linear models; a one-step approximation for nonlinear models.
340    pub fn infl_coef(&self) -> RegressionResult<Array2<F>> {
341        let lin_pred = self.data.linear_predictor(&self.result);
342        let resid_resp = self.resid_resp();
343        let omh = -self.leverage()? + F::one();
344        let resid_adj = M::Link::adjust_errors(resid_resp, &lin_pred) / omh;
345        let xte = self.data.x_conj() * resid_adj;
346        let fisher_inv = self.fisher_inv()?;
347        let delta_b = xte.t().dot(&*fisher_inv);
348        Ok(delta_b)
349    }
350
351    /// Returns the leverage $`h_i = P_{ii}`$ for each observation: the diagonal of the hat matrix.
352    /// Indicates the sensitivity of each prediction to its corresponding observation.
353    pub fn leverage(&self) -> RegressionResult<Array1<F>> {
354        let hat = self.hat()?;
355        Ok(hat.diag().to_owned())
356    }
357
358    /// Returns exact coefficients from leaving each observation out, one-at-a-time.
359    /// This is a much more expensive operation than the original regression because a new one is
360    /// performed for each observation.
361    pub fn loo_exact(&self) -> RegressionResult<Array2<F>> {
362        let loo_coef: Array2<F> = self.infl_coef()?;
363        // NOTE: could also use the result itself as the initial guess
364        let loo_initial = &self.result - loo_coef;
365        let mut loo_result = loo_initial.clone();
366        let n_obs = self.data.y.len();
367        for i in 0..n_obs {
368            let data_i: Dataset<F> = {
369                let mut data_i: Dataset<F> = self.data.clone();
370                let mut freqs: Array1<F> = data_i.freqs.unwrap_or(Array1::<F>::ones(n_obs));
371                freqs[i] = F::zero();
372                data_i.freqs = Some(freqs);
373                data_i
374            };
375            let model_i = Model {
376                model: PhantomData::<M>,
377                data: data_i,
378                use_intercept: self.use_intercept,
379            };
380            let options = {
381                let mut options = self.options.clone();
382                options.init_guess = Some(self.result.clone());
383                options
384            };
385            let fit_i = model_i.with_options(options).fit()?;
386            loo_result.row_mut(i).assign(&fit_i.result);
387        }
388        Ok(loo_result)
389    }
390
391    /// Perform a likelihood-ratio test, returning the statistic:
392    ///
393    /// ```math
394    /// \Lambda = -2 \ln \frac{L_0}{L} = -2(l_0 - l)
395    /// ```
396    ///
397    /// where $`L_0`$ is the null model likelihood (intercept only) and $`L`$ is the fit likelihood.
398    /// By Wilks' theorem, asymptotically $`\chi^2`$-distributed with
399    /// [`test_ndf()`](Self::test_ndf) degrees of freedom.
400    // TODO: Should the effective number of degrees of freedom due to
401    // regularization be taken into account?
402    pub fn lr_test(&self) -> F {
403        // The model likelihood should include regularization terms and there
404        // shouldn't be any in the null model with all non-intercept parameters
405        // set to zero.
406        let null_like = self.null_like();
407        F::from(-2.).unwrap() * (null_like - self.model_like)
408    }
409
410    /// Perform a likelihood-ratio test against a general alternative model, not
411    /// necessarily a null model. The alternative model is regularized the same
412    /// way that the regression resulting in this fit was. The degrees of
413    /// freedom cannot be generally inferred.
414    pub fn lr_test_against(&self, alternative: &Array1<F>) -> F {
415        let alt_like = M::log_like(self.data, alternative);
416        let alt_like_reg = alt_like + self.reg.likelihood(alternative);
417        F::two() * (self.model_like - alt_like_reg)
418    }
419
420    /// Returns the residual degrees of freedom in the model, i.e. the number
421    /// of data points minus the number of parameters. Not to be confused with
422    /// `test_ndf()`, the degrees of freedom in the statistical tests of the
423    /// fit parameters.
424    pub fn ndf(&self) -> F {
425        self.data.n_obs() - self.rank()
426    }
427
428    pub(crate) fn new(
429        data: &'a Dataset<F>,
430        use_intercept: bool,
431        irls: Irls<M, F>,
432        history: Vec<IrlsStep<F>>,
433    ) -> Self {
434        let Irls {
435            guess: result,
436            options,
437            reg,
438            n_iter,
439            last_like_data: data_like,
440            ..
441        } = irls;
442        assert_eq!(
443            data_like,
444            M::log_like(data, &result),
445            "Unregularized likelihoods should match exactly."
446        );
447        assert_eq!(
448            n_iter,
449            history.len(),
450            "Number of iterations should match history length"
451        );
452        assert_eq!(
453            result,
454            history.last().unwrap().guess,
455            "Last guess should be the same"
456        );
457        // Cache some of these variables that will be used often.
458        let n_par = result.len();
459        let model_like = data_like + reg.likelihood(&result);
460        assert_eq!(
461            model_like,
462            history.last().unwrap().like,
463            "Last data likelihood should be the same"
464        );
465        Self {
466            model: PhantomData,
467            data,
468            use_intercept,
469            result,
470            options,
471            model_like,
472            reg,
473            n_iter,
474            history,
475            n_par,
476            cov_unscaled: RefCell::new(None),
477            hat: RefCell::new(None),
478            null_model: RefCell::new(None),
479        }
480    }
481
482    /// Returns the likelihood given the null model, which fixes all parameters
483    /// to zero except the intercept (if it is used). A total of `test_ndf()`
484    /// parameters are constrained.
485    pub fn null_like(&self) -> F {
486        let (null_like, _) = self.null_model_fit();
487        null_like
488    }
489
490    /// Return the likelihood and intercept for the null model. Since this can
491    /// require an additional regression, the values are cached.
492    fn null_model_fit(&self) -> (F, Array1<F>) {
493        // TODO: make a result instead of allowing a potential panic in the borrow.
494        if self.null_model.borrow().is_none() {
495            let (null_like, null_intercept): (F, Array1<F>) = match &self.data.linear_offset {
496                None => {
497                    // If there is no linear offset, the natural parameter is
498                    // identical for all observations so it is sufficient to
499                    // calculate the null likelihood for a single point with y equal
500                    // to the average.
501                    // The average y
502                    let y_bar: F = self.data.weighted_sum(&self.data.y) / self.data.sum_weights();
503                    // This approach assumes that the likelihood is in the natural
504                    // exponential form as calculated by Glm::log_like_natural(). If that
505                    // function is overridden and the values differ significantly, this
506                    // approach will give incorrect results. If the likelihood has terms
507                    // non-linear in y, then the likelihood must be calculated for every
508                    // point rather than averaged.
509                    // If the intercept is allowed to maximize the likelihood, the natural
510                    // parameter is equal to the link of the expectation. Otherwise it is
511                    // the transformation function of zero.
512                    let intercept: F = if self.use_intercept {
513                        M::Link::func(y_bar)
514                    } else {
515                        F::zero()
516                    };
517                    // this is a length-one array. This works because the
518                    // likelihood contribution is the same for all observations.
519                    let nat_par = M::Link::nat_param(array![intercept]);
520                    // The null likelihood per observation
521                    let null_like_one: F = M::log_like_natural(y_bar, nat_par[0]);
522                    // just multiply the average likelihood by the number of data points, since every term is the same.
523                    let null_like_total = self.data.sum_weights() * null_like_one;
524                    let null_params: Array1<F> = {
525                        let mut par = Array1::<F>::zeros(self.n_par);
526                        par[0] = intercept;
527                        par
528                    };
529                    (null_like_total, null_params)
530                }
531                Some(off) => {
532                    if self.use_intercept {
533                        // If there are linear offsets and the intercept is allowed
534                        // to be free, there is not a major simplification and the
535                        // model needs to be re-fit.
536                        // the X data is a single column of ones. Since this model
537                        // isn't being created by the ModelBuilder, the X data
538                        // has to be automatically padded with ones.
539                        let data_x_null = Array2::<F>::ones((self.data.y.len(), 1));
540                        let null_model = Model {
541                            model: std::marker::PhantomData::<M>,
542                            data: Dataset::<F> {
543                                y: self.data.y.clone(),
544                                x: data_x_null,
545                                linear_offset: Some(off.clone()),
546                                weights: self.data.weights.clone(),
547                                freqs: self.data.freqs.clone(),
548                            },
549                            // If we are in this branch it is because an intercept is needed.
550                            use_intercept: true,
551                        };
552                        // TODO: Make this function return an error, although it's
553                        // difficult to imagine this case happening.
554                        // TODO: Should the tolerance of this fit be stricter?
555                        // The intercept should not be regularized
556                        let null_fit = null_model
557                            .fit_options()
558                            // There shouldn't be too much trouble fitting this
559                            // single-parameter fit, but there shouldn't be harm in
560                            // using the same maximum as in the original model.
561                            .max_iter(self.options.max_iter)
562                            .fit()
563                            .expect("Could not fit null model!");
564                        let null_params: Array1<F> = {
565                            let mut par = Array1::<F>::zeros(self.n_par);
566                            // there is only one parameter in this fit.
567                            par[0] = null_fit.result[0];
568                            par
569                        };
570                        (null_fit.model_like, null_params)
571                    } else {
572                        // If the intercept is fixed to zero, then no minimization is
573                        // required. The natural parameters are directly known in terms
574                        // of the linear offset. The likelihood must still be summed
575                        // over all observations, since they have different offsets.
576                        let nat_par = M::Link::nat_param(off.clone());
577                        let null_like_terms = ndarray::Zip::from(&self.data.y)
578                            .and(&nat_par)
579                            .map_collect(|&y, &eta| M::log_like_natural(y, eta));
580                        let null_like = self.data.weighted_sum(&null_like_terms);
581                        let null_params = Array1::<F>::zeros(self.n_par);
582                        (null_like, null_params)
583                    }
584                }
585            };
586            *self.null_model.borrow_mut() = Some((null_like, null_intercept));
587        }
588        self.null_model
589            .borrow()
590            .as_ref()
591            .expect("the null model should be cached now")
592            .clone()
593    }
594
595    /// Returns $`\hat{\mathbf{y}} = g^{-1}(\mathbf{X}\hat{\boldsymbol\beta} + \boldsymbol\omega_0)`$
596    /// given input data $`\mathbf{X}`$ and an optional linear offset
597    /// $`\boldsymbol\omega_0`$. The data need not be the training data.
598    pub fn predict<S>(&self, data_x: &ArrayBase<S, Ix2>, lin_off: Option<&Array1<F>>) -> Array1<F>
599    where
600        S: Data<Elem = F>,
601    {
602        let lin_pred: Array1<F> = data_x.dot(&self.result);
603        let lin_pred: Array1<F> = if let Some(off) = &lin_off {
604            lin_pred + *off
605        } else {
606            lin_pred
607        };
608        lin_pred.mapv_into(M::Link::func_inv)
609    }
610
611    /// Returns the rank $`K`$ of the model (i.e. the number of parameters)
612    fn rank(&self) -> F {
613        F::from(self.n_par).unwrap()
614    }
615
616    /// Return the deviance residuals for each point in the training data:
617    ///
618    /// ```math
619    /// d_i = \text{sign}(y_i - \hat\mu_i)\sqrt{D_i}
620    /// ```
621    ///
622    /// where $`D_i`$ is the per-observation deviance contribution.
623    /// This is usually a better choice than Pearson residuals for non-linear models.
624    pub fn resid_dev(&self) -> Array1<F> {
625        let signs = self.resid_resp().mapv_into(F::signum);
626        let ll_diff = self.deviance_terms();
627        let dev: Array1<F> = ll_diff.mapv_into(num_traits::Float::sqrt);
628        signs * dev
629    }
630
631    /// Return the standardized deviance residuals (internally studentized):
632    ///
633    /// ```math
634    /// d_i^* = \frac{d_i}{\sqrt{\hat\phi(1 - h_i)}}
635    /// ```
636    ///
637    /// where $`d_i`$ is the deviance residual, $`\hat\phi`$ is the dispersion, and $`h_i`$ is
638    /// the leverage. Generally applicable for outlier detection.
639    pub fn resid_dev_std(&self) -> RegressionResult<Array1<F>> {
640        let dev = self.resid_dev();
641        let phi = self.dispersion();
642        let hat: Array1<F> = self.leverage()?;
643        let omh: Array1<F> = -hat + F::one();
644        let denom: Array1<F> = (omh * phi).mapv_into(num_traits::Float::sqrt);
645        Ok(dev / denom)
646    }
647
648    /// Return the partial residuals.
649    pub fn resid_part(&self) -> Array1<F> {
650        let x_mean = self.data.x.mean_axis(Axis(0)).expect("empty dataset");
651        let x_centered = &self.data.x - x_mean.insert_axis(Axis(0));
652        self.resid_work() + x_centered.dot(&self.result)
653    }
654
655    /// Return the Pearson residuals for each point in the training data:
656    ///
657    /// ```math
658    /// r_i = \sqrt{w_i} \, \frac{y_i - \hat\mu_i}{\sqrt{V(\hat\mu_i)}}
659    /// ```
660    ///
661    /// where $`V`$ is the variance function and $`w_i`$ are the variance weights.
662    /// Not scaled by the dispersion for families with a free dispersion parameter.
663    pub fn resid_pear(&self) -> Array1<F> {
664        let mu: Array1<F> = self.predict(&self.data.x, self.data.linear_offset.as_ref());
665        let residuals = &self.data.y - &mu;
666        let inv_var_diag: Array1<F> = mu.mapv_into(M::variance).mapv_into(F::recip);
667        // the variance weights are the reciprocal of the corresponding variance
668        let scales = self
669            .data
670            .apply_var_weights(inv_var_diag)
671            .mapv_into(num_traits::Float::sqrt);
672        scales * residuals
673    }
674
675    /// Return the standardized Pearson residuals (internally studentized):
676    ///
677    /// ```math
678    /// r_i^* = \frac{r_i}{\sqrt{\hat\phi(1 - h_i)}}
679    /// ```
680    ///
681    /// where $`r_i`$ is the Pearson residual and $`h_i`$ is the leverage. These are expected
682    /// to have unit variance.
683    pub fn resid_pear_std(&self) -> RegressionResult<Array1<F>> {
684        let pearson = self.resid_pear();
685        let phi = self.dispersion();
686        let hat = self.leverage()?;
687        let omh = -hat + F::one();
688        let denom: Array1<F> = (omh * phi).mapv_into(num_traits::Float::sqrt);
689        Ok(pearson / denom)
690    }
691
692    /// Return the response residuals: $`e_i^\text{resp} = y_i - \hat\mu_i`$.
693    pub fn resid_resp(&self) -> Array1<F> {
694        self.errors(self.data)
695    }
696
697    /// Return the externally studentized residuals:
698    ///
699    /// ```math
700    /// \tilde{t}_i = \text{sign}(e_i) \sqrt{\frac{D_i^{(-i)}}{\hat\phi^{(-i)}}}
701    /// ```
702    ///
703    /// where $`D_i^{(-i)}`$ and $`\hat\phi^{(-i)}`$ are the LOO deviance and dispersion
704    /// approximated via one-step deletion. Under normality, $`t`$-distributed with
705    /// $`N - K - 1`$ degrees of freedom. This is a robust and general method for outlier
706    /// detection.
707    pub fn resid_student(&self) -> RegressionResult<Array1<F>> {
708        let signs = self.resid_resp().mapv(F::signum);
709        let dev_terms_loo: Array1<F> = self.deviance_terms_loo()?;
710        // NOTE: This match could also be handled internally in dispersion_loo()
711        let dev_terms_scaled = match M::DISPERSED {
712            // The dispersion is corrected for the contribution from each current point.
713            // This is an approximation; the exact solution would perform a fit at each point.
714            DispersionType::FreeDispersion => dev_terms_loo / self.dispersion_loo()?,
715            DispersionType::NoDispersion => dev_terms_loo,
716        };
717        Ok(signs * dev_terms_scaled.mapv_into(num_traits::Float::sqrt))
718    }
719
720    /// Returns the working residuals:
721    ///
722    /// ```math
723    /// e_i^\text{work} = \frac{\eta'(\omega_i)(y_i - \hat\mu_i)}{V(\hat\mu_i)}
724    /// ```
725    ///
726    /// Equal to the response residuals divided by the variance function (as
727    /// opposed to the square root of the variance as in the Pearson residuals).
728    pub fn resid_work(&self) -> Array1<F> {
729        let lin_pred: Array1<F> = self.data.linear_predictor(&self.result);
730        let mu: Array1<F> = lin_pred.mapv(M::Link::func_inv);
731        let resid_response: Array1<F> = &self.data.y - &mu;
732        let var: Array1<F> = mu.mapv(M::variance);
733        // adjust for non-canonical link functions; we want a total factor of 1/eta'
734        let (adj_response, adj_var) =
735            M::Link::adjust_errors_variance(resid_response, var, &lin_pred);
736        adj_response / adj_var
737    }
738
739    /// Returns the score function $`\nabla_{\boldsymbol\beta} l`$ (the gradient of the
740    /// log-likelihood) at the parameter values given. Should be zero at the MLE.
741    pub fn score(&self, params: &Array1<F>) -> Array1<F> {
742        // This represents the predictions given the input parameters, not the
743        // fit parameters.
744        let lin_pred: Array1<F> = self.data.linear_predictor(params);
745        let mu: Array1<F> = M::mean(&lin_pred);
746        let resid_response = &self.data.y - mu;
747        let resid_working = M::Link::adjust_errors(resid_response, &lin_pred);
748        let score_unreg = self.data.x_conj().dot(&resid_working);
749        self.reg.as_ref().gradient(score_unreg, params)
750    }
751
752    /// Returns the score test statistic:
753    ///
754    /// ```math
755    /// S = \mathbf{J}(\boldsymbol\beta_0)^\mathsf{T} \, \mathcal{I}(\boldsymbol\beta_0)^{-1} \, \mathbf{J}(\boldsymbol\beta_0)
756    /// ```
757    ///
758    /// where $`\mathbf{J}`$ is the score and $`\mathcal{I}`$ is the Fisher information, both
759    /// evaluated at the null parameters. Asymptotically $`\chi^2`$-distributed with
760    /// [`test_ndf()`](Self::test_ndf) degrees of freedom.
761    pub fn score_test(&self) -> RegressionResult<F> {
762        let (_, null_params) = self.null_model_fit();
763        self.score_test_against(null_params)
764    }
765
766    /// Returns the score test statistic compared to another set of model
767    /// parameters, not necessarily a null model. The degrees of freedom cannot
768    /// be generally inferred.
769    pub fn score_test_against(&self, alternative: Array1<F>) -> RegressionResult<F> {
770        let score_alt = self.score(&alternative);
771        let fisher_alt = self.fisher(&alternative);
772        // The is not the same as the cached covariance matrix since it is
773        // evaluated at the null parameters.
774        // NOTE: invh/invh_into() are bugged and incorrect!
775        let inv_fisher_alt = fisher_alt.inv_into()?;
776        Ok(score_alt.t().dot(&inv_fisher_alt.dot(&score_alt)))
777    }
778
779    /// The degrees of freedom for the likelihood ratio test, the score test,
780    /// and the Wald test. Not to be confused with `ndf()`, the degrees of
781    /// freedom in the model fit.
782    pub fn test_ndf(&self) -> usize {
783        if self.use_intercept {
784            self.n_par - 1
785        } else {
786            self.n_par
787        }
788    }
789
790    /// Returns the Wald test statistic:
791    ///
792    /// ```math
793    /// W = (\hat{\boldsymbol\beta} - \boldsymbol\beta_0)^\mathsf{T} \, \mathcal{I}(\boldsymbol\beta_0) \, (\hat{\boldsymbol\beta} - \boldsymbol\beta_0)
794    /// ```
795    ///
796    /// Compared to a null model with only an intercept (if one is used). Asymptotically
797    /// $`\chi^2`$-distributed with [`test_ndf()`](Self::test_ndf) degrees of freedom.
798    pub fn wald_test(&self) -> F {
799        // The null parameters are all zero except for a possible intercept term
800        // which optimizes the null model.
801        let (_, null_params) = self.null_model_fit();
802        self.wald_test_against(&null_params)
803    }
804
805    /// Returns the Wald test statistic compared to another specified model fit
806    /// instead of the null model. The degrees of freedom cannot be generally
807    /// inferred.
808    pub fn wald_test_against(&self, alternative: &Array1<F>) -> F {
809        let d_params: Array1<F> = &self.result - alternative;
810        let fisher_alt: Array2<F> = self.fisher(alternative);
811        d_params.t().dot(&fisher_alt.dot(&d_params))
812    }
813
814    /// Returns the per-parameter Wald $`z`$-statistic:
815    ///
816    /// ```math
817    /// z_k = \frac{\hat\beta_k}{\sqrt{\text{Cov}[\hat{\boldsymbol\beta}]_{kk}}}
818    /// ```
819    ///
820    /// Since it does not account for covariance between parameters it may not be accurate.
821    pub fn wald_z(&self) -> RegressionResult<Array1<F>> {
822        let par_cov = self.covariance()?;
823        let par_variances: ArrayView1<F> = par_cov.diag();
824        Ok(&self.result / &par_variances.mapv(num_traits::Float::sqrt))
825    }
826}
827
828/// Specialized functions for OLS.
829impl<'a, F> Fit<'a, Linear, F>
830where
831    F: 'static + Float,
832{
833    /// Returns the coefficient of determination $`R^2`$:
834    ///
835    /// ```math
836    /// R^2 = 1 - \frac{\text{RSS}}{\text{TSS}}
837    /// ```
838    ///
839    /// where RSS is the residual sum of squares and TSS is the total sum of squares.
840    pub fn r_sq(&self) -> F {
841        let y_avg: F = self.data.y.mean().expect("Data should be non-empty");
842        let total_sum_sq: F = self.data.y.mapv(|y| y - y_avg).mapv(|dy| dy * dy).sum();
843        (total_sum_sq - self.resid_sum_sq()) / total_sum_sq
844    }
845
846    /// Returns the residual sum of squares:
847    /// $`\text{RSS} = \sum_i (y_i - \hat\mu_i)^2`$.
848    pub fn resid_sum_sq(&self) -> F {
849        self.resid_resp().mapv_into(|r| r * r).sum()
850    }
851}
852
853#[cfg(test)]
854mod tests {
855    use super::*;
856    use crate::{
857        Linear, Logistic,
858        model::ModelBuilder,
859        utility::{one_pad, standardize},
860    };
861    use anyhow::Result;
862    use approx::assert_abs_diff_eq;
863    use ndarray::Axis;
864    use ndarray::{concatenate, s};
865
866    /// Checks if the test statistics are invariant based upon whether the data is standardized.
867    #[test]
868    fn standardization_invariance() -> Result<()> {
869        let data_y = array![true, false, false, true, true, true, true, false, true];
870        let data_x = array![-0.5, 0.3, -0.6, 0.2, 0.3, 1.2, 0.8, 0.6, -0.2].insert_axis(Axis(1));
871        let lin_off = array![0.1, 0.0, -0.1, 0.2, 0.1, 0.3, 0.4, -0.1, 0.1];
872        let data_x_std = standardize(data_x.clone());
873        let model = ModelBuilder::<Logistic>::data(&data_y, &data_x)
874            .linear_offset(lin_off.clone())
875            .build()?;
876        let fit = model.fit()?;
877        let model_std = ModelBuilder::<Logistic>::data(&data_y, &data_x_std)
878            .linear_offset(lin_off)
879            .build()?;
880        let fit_std = model_std.fit()?;
881        let lr = fit.lr_test();
882        let lr_std = fit_std.lr_test();
883        assert_abs_diff_eq!(lr, lr_std);
884        eprintln!("about to try score test");
885        assert_abs_diff_eq!(
886            fit.score_test()?,
887            fit_std.score_test()?,
888            epsilon = f32::EPSILON as f64
889        );
890        eprintln!("about to try wald test");
891        assert_abs_diff_eq!(
892            fit.wald_test(),
893            fit_std.wald_test(),
894            epsilon = 4.0 * f64::EPSILON
895        );
896        assert_abs_diff_eq!(fit.aic(), fit_std.aic());
897        assert_abs_diff_eq!(fit.bic(), fit_std.bic());
898        eprintln!("about to try deviance");
899        assert_abs_diff_eq!(fit.deviance(), fit_std.deviance());
900        // The Wald Z-score of the intercept term is not invariant under a
901        // linear transformation of the data, but the parameter part seems to
902        // be, at least for single-component data.
903        assert_abs_diff_eq!(
904            fit.wald_z()?[1],
905            fit_std.wald_z()?[1],
906            epsilon = 0.01 * f32::EPSILON as f64
907        );
908
909        Ok(())
910    }
911
912    #[test]
913    fn null_model() -> Result<()> {
914        let data_y = array![true, false, false, true, true];
915        let data_x: Array2<f64> = array![[], [], [], [], []];
916        let model = ModelBuilder::<Logistic>::data(&data_y, &data_x).build()?;
917        let fit = model.fit()?;
918        dbg!(fit.n_iter);
919        dbg!(&fit.result);
920        // with no offsets, the result should be the link function of the mean.
921        assert_abs_diff_eq!(
922            fit.result[0],
923            <Logistic as Glm>::Link::func(0.6),
924            epsilon = 4.0 * f64::EPSILON
925        );
926        let empty_null_like = fit.null_like();
927        assert_eq!(fit.test_ndf(), 0);
928        dbg!(&fit.model_like);
929        let lr = fit.lr_test();
930        // Since there is no data, the null likelihood should be identical to
931        // the fit likelihood, so the likelihood ratio test should yield zero.
932        assert_abs_diff_eq!(lr, 0., epsilon = 4. * f64::EPSILON);
933
934        // Check that the assertions still hold if linear offsets are included.
935        let lin_off: Array1<f64> = array![0.2, -0.1, 0.1, 0.0, 0.1];
936        let model = ModelBuilder::<Logistic>::data(&data_y, &data_x)
937            .linear_offset(lin_off)
938            .build()?;
939        let fit_off = model.fit()?;
940        let empty_model_like_off = fit_off.model_like;
941        let empty_null_like_off = fit_off.null_like();
942        // these two assertions should be equivalent
943        assert_abs_diff_eq!(fit_off.lr_test(), 0.);
944        assert_abs_diff_eq!(empty_model_like_off, empty_null_like_off);
945
946        // check consistency with data provided
947        let data_x_with = array![[0.5], [-0.2], [0.3], [0.4], [-0.1]];
948        let model = ModelBuilder::<Logistic>::data(&data_y, &data_x_with).build()?;
949        let fit_with = model.fit()?;
950        dbg!(&fit_with.result);
951        // The null likelihood of the model with parameters should be the same
952        // as the likelihood of the model with only the intercept.
953        assert_abs_diff_eq!(empty_null_like, fit_with.null_like());
954
955        Ok(())
956    }
957
958    #[test]
959    fn null_like_logistic() -> Result<()> {
960        // 6 true and 4 false for y_bar = 0.6.
961        let data_y = array![
962            true, true, true, true, true, true, false, false, false, false
963        ];
964        let ybar: f64 = 0.6;
965        let data_x = array![0.4, 0.2, 0.5, 0.1, 0.6, 0.7, 0.3, 0.8, -0.1, 0.1].insert_axis(Axis(1));
966        let model = ModelBuilder::<Logistic>::data(&data_y, &data_x).build()?;
967        let fit = model.fit()?;
968        let target_null_like = fit
969            .data
970            .y
971            .mapv(|y| {
972                let eta = (ybar / (1. - ybar)).ln();
973                y * eta - eta.exp().ln_1p()
974            })
975            .sum();
976        assert_abs_diff_eq!(fit.null_like(), target_null_like);
977        Ok(())
978    }
979
980    // Check that the deviance is equal to the sum of square deviations for a linear model
981    #[test]
982    fn deviance_linear() -> Result<()> {
983        let data_y = array![0.3, -0.2, 0.5, 0.7, 0.2, 1.4, 1.1, 0.2];
984        let data_x = array![0.6, 2.1, 0.4, -3.2, 0.7, 0.1, -0.3, 0.5].insert_axis(Axis(1));
985        let model = ModelBuilder::<Linear>::data(&data_y, &data_x).build()?;
986        let fit = model.fit()?;
987        // The predicted values of Y given the model.
988        let pred_y = fit.predict(&one_pad(data_x.view()), None);
989        let target_dev = (data_y - pred_y).mapv(|dy| dy * dy).sum();
990        assert_abs_diff_eq!(fit.deviance(), target_dev, epsilon = 4. * f64::EPSILON);
991        Ok(())
992    }
993
994    // Check that the deviance and dispersion parameter are equal up to the number of degrees of
995    // freedom for a linea model.
996    #[test]
997    fn deviance_dispersion_eq_linear() -> Result<()> {
998        let data_y = array![0.2, -0.1, 0.4, 1.3, 0.2, -0.6, 0.9];
999        let data_x = array![
1000            [0.4, 0.2],
1001            [0.1, 0.4],
1002            [-0.1, 0.3],
1003            [0.5, 0.7],
1004            [0.4, 0.1],
1005            [-0.2, -0.3],
1006            [0.4, -0.1]
1007        ];
1008        let model = ModelBuilder::<Linear>::data(&data_y, &data_x).build()?;
1009        let fit = model.fit()?;
1010        let dev = fit.deviance();
1011        let disp = fit.dispersion();
1012        let ndf = fit.ndf();
1013        assert_abs_diff_eq!(dev, disp * ndf, epsilon = 4. * f64::EPSILON);
1014        Ok(())
1015    }
1016
1017    // Check that the residuals for a linear model are all consistent.
1018    #[test]
1019    fn residuals_linear() -> Result<()> {
1020        let data_y = array![0.1, -0.3, 0.7, 0.2, 1.2, -0.4];
1021        let data_x = array![0.4, 0.1, 0.3, -0.1, 0.5, 0.6].insert_axis(Axis(1));
1022        let weights = array![0.8, 1.2, 0.9, 0.8, 1.1, 0.9];
1023        // the implied variances from the weights
1024        let wgt_sigmas = weights.map(|w: &f64| 1. / w.sqrt());
1025        let model = ModelBuilder::<Linear>::data(&data_y, &data_x)
1026            .var_weights(weights.clone())
1027            .build()?;
1028        let fit = model.fit()?;
1029        let response = fit.resid_resp();
1030        let resp_scaled = &response / wgt_sigmas;
1031        let pearson = fit.resid_pear();
1032        let deviance = fit.resid_dev();
1033        assert_abs_diff_eq!(resp_scaled, pearson);
1034        assert_abs_diff_eq!(resp_scaled, deviance);
1035        let pearson_std = fit.resid_pear_std()?;
1036        let deviance_std = fit.resid_dev_std()?;
1037        assert_abs_diff_eq!(pearson_std, deviance_std, epsilon = 8. * f64::EPSILON);
1038        // The externally-studentized residuals aren't expected to match the internally-studentized
1039        // ones.
1040        let dev_terms_loo = fit.deviance_terms_loo()?;
1041        let disp_terms_loo = fit.dispersion_loo()?;
1042        let student = fit.resid_student()?;
1043
1044        // NOTE: Studentization can't be checked directly in general because the method used is a
1045        // one-step approximation, however it should be exact in the linear OLS case.
1046        let n_data = data_y.len();
1047        // Check that the leave-one-out stats hold literally
1048        let mut loo_diff: Vec<f64> = Vec::new();
1049        let mut loo_dev_res: Vec<f64> = Vec::new();
1050        let mut loo_disp: Vec<f64> = Vec::new();
1051        for i in 0..n_data {
1052            let ya = data_y.slice(s![0..i]);
1053            let yb = data_y.slice(s![i + 1..]);
1054            let xa = data_x.slice(s![0..i, ..]);
1055            let xb = data_x.slice(s![i + 1.., ..]);
1056            let wa = weights.slice(s![0..i]);
1057            let wb = weights.slice(s![i + 1..]);
1058            let y_loo = concatenate![Axis(0), ya, yb];
1059            let x_loo = concatenate![Axis(0), xa, xb];
1060            let w_loo = concatenate![Axis(0), wa, wb];
1061            let model_i = ModelBuilder::<Linear>::data(&y_loo, &x_loo)
1062                .var_weights(w_loo)
1063                .build()?;
1064            let fit_i = model_i.fit()?;
1065            let yi = data_y[i];
1066            let xi = data_x.slice(s![i..i + 1, ..]);
1067            let xi = crate::utility::one_pad(xi);
1068            let wi = weights[i];
1069            let yi_pred: f64 = fit_i.predict(&xi, None)[0];
1070            let disp_i = fit_i.dispersion();
1071            let var_i = disp_i / wi;
1072            let diff_i = yi - yi_pred;
1073            let res_dev_i = diff_i / var_i.sqrt();
1074            loo_diff.push(wi * diff_i * diff_i);
1075            loo_disp.push(disp_i);
1076            loo_dev_res.push(res_dev_i);
1077        }
1078        let loo_diff: Array1<f64> = loo_diff.into();
1079        let loo_disp: Array1<f64> = loo_disp.into();
1080        let loo_dev_res: Array1<f64> = loo_dev_res.into();
1081        assert_abs_diff_eq!(dev_terms_loo, loo_diff, epsilon = 8. * f64::EPSILON);
1082        assert_abs_diff_eq!(disp_terms_loo, loo_disp, epsilon = 8. * f64::EPSILON);
1083        assert_abs_diff_eq!(student, loo_dev_res, epsilon = 8. * f64::EPSILON);
1084        Ok(())
1085    }
1086
1087    // check the null likelihood for the case where it can be counted exactly.
1088    #[test]
1089    fn null_like_linear() -> Result<()> {
1090        let data_y = array![0.3, -0.2, 0.5, 0.7, 0.2, 1.4, 1.1, 0.2];
1091        let data_x = array![0.6, 2.1, 0.4, -3.2, 0.7, 0.1, -0.3, 0.5].insert_axis(Axis(1));
1092        let ybar: f64 = data_y.mean().unwrap();
1093        let model = ModelBuilder::<Linear>::data(&data_y, &data_x).build()?;
1094        let fit = model.fit()?;
1095        // let target_null_like = data_y.mapv(|y| -0.5 * (y - ybar) * (y - ybar)).sum();
1096        let target_null_like = data_y.mapv(|y| y * ybar - 0.5 * ybar * ybar).sum();
1097        // With the saturated likelihood subtracted the null likelihood should
1098        // just be the sum of squared differences from the mean.
1099        // let target_null_like = 0.;
1100        // dbg!(target_null_like);
1101        let fit_null_like = fit.null_like();
1102        assert_abs_diff_eq!(2. * (fit.model_like - fit_null_like), fit.lr_test());
1103        assert_eq!(fit.test_ndf(), 1);
1104        assert_abs_diff_eq!(
1105            fit_null_like,
1106            target_null_like,
1107            epsilon = 4.0 * f64::EPSILON
1108        );
1109        Ok(())
1110    }
1111
1112    // check the leave-one-out one-step for the linear model
1113    #[test]
1114    fn loo_linear() -> Result<()> {
1115        let data_y = array![0.1, -0.3, 0.7, 0.2, 1.2, -0.4];
1116        let data_x = array![0.4, 0.1, 0.3, -0.1, 0.5, 0.6].insert_axis(Axis(1));
1117        let weights = array![1.0, 1.2, 0.8, 1.1, 1.0, 0.7];
1118        let model = ModelBuilder::<Linear>::data(&data_y, &data_x)
1119            .var_weights(weights.clone())
1120            .build()?;
1121        let fit = model.fit()?;
1122
1123        let loo_coef: Array2<f64> = fit.infl_coef()?;
1124        let loo_results = &fit.result - loo_coef;
1125        let n_data = data_y.len();
1126        for i in 0..n_data {
1127            let ya = data_y.slice(s![0..i]);
1128            let yb = data_y.slice(s![i + 1..]);
1129            let xa = data_x.slice(s![0..i, ..]);
1130            let xb = data_x.slice(s![i + 1.., ..]);
1131            let wa = weights.slice(s![0..i]);
1132            let wb = weights.slice(s![i + 1..]);
1133            let y_loo = concatenate![Axis(0), ya, yb];
1134            let x_loo = concatenate![Axis(0), xa, xb];
1135            let w_loo = concatenate![Axis(0), wa, wb];
1136            let model_i = ModelBuilder::<Linear>::data(&y_loo, &x_loo)
1137                .var_weights(w_loo)
1138                .build()?;
1139            let fit_i = model_i.fit()?;
1140            assert_abs_diff_eq!(
1141                loo_results.row(i),
1142                &fit_i.result,
1143                epsilon = f32::EPSILON as f64
1144            );
1145        }
1146        Ok(())
1147    }
1148
1149    // check the null likelihood where there is no dependence on the X data.
1150    #[test]
1151    fn null_like_logistic_nodep() -> Result<()> {
1152        let data_y = array![true, true, false, false, true, false, false, true];
1153        let data_x = array![0.4, 0.2, 0.4, 0.2, 0.7, 0.7, -0.1, -0.1].insert_axis(Axis(1));
1154        let model = ModelBuilder::<Logistic>::data(&data_y, &data_x).build()?;
1155        let fit = model.fit()?;
1156        let lr = fit.lr_test();
1157        assert_abs_diff_eq!(lr, 0.);
1158        Ok(())
1159    }
1160    // TODO: Test that the statistics behave sensibly under regularization. The
1161    // likelihood ratio test should yield a smaller value.
1162
1163    // Test the basic caching funcions.
1164    #[test]
1165    fn cached_computations() -> Result<()> {
1166        let data_y = array![true, true, false, true, true, false, false, false, true];
1167        let data_x = array![0.4, 0.1, -0.3, 0.7, -0.5, -0.1, 0.8, 1.0, 0.4].insert_axis(Axis(1));
1168        let model = ModelBuilder::<Logistic>::data(&data_y, &data_x).build()?;
1169        let fit = model.fit()?;
1170        let _null_like = fit.null_like();
1171        let _null_like = fit.null_like();
1172        let _cov = fit.covariance()?;
1173        let _wald = fit.wald_z();
1174        Ok(())
1175    }
1176
1177    // Check the consistency of the various statistical tests for linear
1178    // regression, where they should be the most comparable.
1179    #[test]
1180    fn linear_stat_tests() -> Result<()> {
1181        let data_y = array![-0.3, -0.1, 0.0, 0.2, 0.4, 0.5, 0.8, 0.8, 1.1];
1182        let data_x = array![-0.5, -0.2, 0.1, 0.2, 0.5, 0.6, 0.7, 0.9, 1.3].insert_axis(Axis(1));
1183        let model = ModelBuilder::<Linear>::data(&data_y, &data_x).build()?;
1184        let fit = model.fit()?;
1185        let lr = fit.lr_test();
1186        let wald = fit.wald_test();
1187        let score = fit.score_test()?;
1188        assert_abs_diff_eq!(lr, wald, epsilon = 32.0 * f64::EPSILON);
1189        assert_abs_diff_eq!(lr, score, epsilon = 32.0 * f64::EPSILON);
1190        Ok(())
1191    }
1192}