ndarray_glm/fit.rs
1//! Stores the fit results of the IRLS regression and provides functions that
2//! depend on the MLE estimate. These include statistical tests for goodness-of-fit.
3
4pub mod options;
5#[cfg(feature = "stats")]
6use crate::response::Response;
7use crate::{
8 Linear,
9 data::{Dataset, one_pad},
10 error::RegressionResult,
11 glm::{DispersionType, Glm},
12 irls::{Irls, IrlsStep},
13 link::{Link, Transform},
14 model::Model,
15 num::Float,
16 regularization::IrlsReg,
17};
18use ndarray::{Array1, Array2, ArrayBase, ArrayView1, Axis, Data, Ix2, array, s};
19use ndarray_linalg::InverseInto;
20use once_cell::unsync::OnceCell; // can be replaced by std::cell::OnceCell upon stabilization
21use options::FitOptions;
22#[cfg(feature = "stats")]
23use statrs::distribution::ContinuousCDF;
24use std::marker::PhantomData;
25
26/// the result of a successful GLM fit
27pub struct Fit<'a, M, F>
28where
29 M: Glm,
30 F: Float,
31{
32 model: PhantomData<M>,
33 /// The data and model specification used in the fit.
34 data: &'a Dataset<F>,
35 /// The parameter values that maximize the likelihood as given by the IRLS regression. If the
36 /// dataset was internally standardized, this is transformed back
37 pub result: Array1<F>,
38 /// The parameter values used internally for the standardized data.
39 result_std: Array1<F>,
40 /// The linear predictor of the X training data given the fit result. Note that it is invariant
41 /// under any standardization scaling, since it is an inner product between X and beta.
42 x_lin_pred: Array1<F>,
43 /// The predicted y-values for the training data.
44 y_hat: Array1<F>,
45 /// The options used for this fit.
46 pub options: FitOptions<F>,
47 /// The value of the likelihood function for the fit result.
48 pub model_like: F,
49 /// The regularizer of the fit
50 reg: Box<dyn IrlsReg<F>>,
51 /// The number of overall iterations taken in the IRLS.
52 pub n_iter: usize,
53 /// The history of guesses and likelihoods over the IRLS iterations.
54 pub history: Vec<IrlsStep<F>>,
55 /// The number of parameters
56 n_par: usize,
57 // The remaining variables hold cached results and matrices
58 /// The estimated dispersion parameter, which is called in many places. For some families this
59 /// is just one.
60 phi: OnceCell<F>,
61 /// The deviance terms for each observation are called frequently and used in several places.
62 dev_terms: OnceCell<Array1<F>>,
63 /// The Pearson residuals, a common statistic that is re-used in several other quantities.
64 resid_pear: OnceCell<Array1<F>>,
65 /// The covariance matrix, using the sandwich approach.
66 covariance: OnceCell<Array2<F>>,
67 /// The inverse of the regularized Fisher information matrix in the external non-standardized
68 /// parameter basis. Used as a component of the sandwich covariance and for influence
69 /// calculations. Cached on first access.
70 fisher_inv: OnceCell<Array2<F>>,
71 /// The inverse of the fisher matrix in standardized internal parameters.
72 fisher_std_inv: OnceCell<Array2<F>>,
73 /// The hat matrix of the data and fit. Since the calculation requires a matrix inversion of
74 /// the fisher information, it is computed only when needed and the value is cached. Access
75 /// through the `hat()` function.
76 hat: OnceCell<Array2<F>>,
77 /// The likelihood and parameters for the null model.
78 null_model: OnceCell<(F, Array1<F>)>,
79}
80
81impl<'a, M, F> Fit<'a, M, F>
82where
83 M: Glm,
84 F: 'static + Float,
85{
86 /// Returns the Akaike information criterion for the model fit.
87 ///
88 /// ```math
89 /// \text{AIC} = D + 2K - 2\sum_{i} \ln w_{i}
90 /// ```
91 ///
92 /// where $`D`$ is the deviance, $`K`$ is the number of parameters, and $`w_i`$ are the
93 /// variance weights.
94 /// This is unique only to an additive constant, so only differences in AIC are meaningful.
95 pub fn aic(&self) -> F {
96 let log_weights = self.data.get_variance_weights().mapv(num_traits::Float::ln);
97 let sum_log_weights = self.data.freq_sum(&log_weights);
98 // NOTE: This is now the unregularized deviance.
99 self.deviance() + F::two() * self.rank() - F::two() * sum_log_weights
100 }
101
102 /// Returns the Bayesian information criterion for the model fit.
103 ///
104 /// ```math
105 /// \text{BIC} = K \ln(n) - 2l - 2\sum_{i} \ln w_{i}
106 /// ```
107 ///
108 /// where $`K`$ is the number of parameters, $`n`$ is the number of observations, and $`l`$ is
109 /// the log-likelihood (including the variance weight normalization terms).
110 // TODO: Also consider the effect of regularization on this statistic.
111 // TODO: Wikipedia suggests that the variance should included in the number
112 // of parameters for multiple linear regression. Should an additional
113 // parameter be included for the dispersion parameter? This question does
114 // not affect the difference between two models fit with the methodology in
115 // this package.
116 pub fn bic(&self) -> F {
117 let log_weights = self.data.get_variance_weights().mapv(num_traits::Float::ln);
118 let sum_log_weights = self.data.freq_sum(&log_weights);
119 let logn = num_traits::Float::ln(self.data.n_obs());
120 logn * self.rank() - F::two() * self.model_like - F::two() * sum_log_weights
121 }
122
123 /// The Cook's distance for each observation, which measures how much the predicted values
124 /// change when leaving out each observation.
125 ///
126 /// ```math
127 /// C_i = \frac{r_i^2 \, h_i}{K \, \hat\phi \, (1 - h_i)^2}
128 /// ```
129 ///
130 /// where $`r_i`$ is the Pearson residual, $`h_i`$ is the leverage, $`K`$ is the rank
131 /// (number of parameters), and $`\hat\phi`$ is the estimated dispersion.
132 pub fn cooks(&self) -> RegressionResult<Array1<F>, F> {
133 let hat = self.leverage()?;
134 let pear_sq = self.resid_pear().mapv(|r| r * r);
135 let h_terms: Array1<F> = hat.mapv_into(|h| {
136 let omh = F::one() - h;
137 h / (omh * omh)
138 });
139 let denom: F = self.rank() * self.dispersion();
140 Ok(pear_sq * h_terms / denom)
141 }
142
143 /// The covariance matrix of the parameter estimates. When no regularization is used, this is:
144 ///
145 /// ```math
146 /// \text{Cov}[\hat{\boldsymbol\beta}] = \hat\phi \, (\mathbf{X}^\mathsf{T}\mathbf{WSX})^{-1}
147 /// ```
148 ///
149 /// When regularization is active, the sandwich form is used to correctly account for the bias
150 /// introduced by the penalty:
151 ///
152 /// ```math
153 /// \text{Cov}[\hat{\boldsymbol\beta}] = \hat\phi \, \mathcal{I}_\text{reg}^{-1} \, \mathcal{I}_\text{data} \, \mathcal{I}_\text{reg}^{-1}
154 /// ```
155 ///
156 /// where $`\mathcal{I}_\text{reg}`$ is the regularized Fisher information and
157 /// $`\mathcal{I}_\text{data}`$ is the unregularized (data-only) Fisher information.
158 /// When unregularized, $`\mathcal{I}_\text{reg} = \mathcal{I}_\text{data}`$ and this reduces
159 /// to the standard form. The result is cached on first access.
160 pub fn covariance(&self) -> RegressionResult<&Array2<F>, F> {
161 self.covariance.get_or_try_init(|| {
162 // The covariance must be multiplied by the dispersion parameter.
163 // For logistic/poisson regression, this is identically 1.
164 // For linear/gamma regression it is estimated from the data.
165 let phi: F = self.dispersion();
166 let f_reg_inv: Array2<F> = self.fisher_inv()?.to_owned();
167 // Use the sandwich form so that regularization doesn't artificially deflate uncertainty.
168 // When unregularized, F_reg = F_data and this reduces to F_data^{-1}.
169 // The unregularized fisher matrix is most easily acquired in terms of the standardized
170 // variables, so apply the external transformation to it.
171 let f_data = self
172 .data
173 .inverse_transform_fisher(self.fisher_data_std(&self.result_std));
174 Ok(f_reg_inv.dot(&f_data).dot(&f_reg_inv) * phi)
175 })
176 }
177
178 /// Returns the deviance of the fit:
179 ///
180 /// ```math
181 /// D = -2 \left[ l(\hat{\boldsymbol\beta}) - l_\text{sat} \right]
182 /// ```
183 ///
184 /// Asymptotically $`\chi^2`$-distributed with [`ndf()`](Self::ndf) degrees of freedom.
185 /// The unregularized likelihood is used.
186 pub fn deviance(&self) -> F {
187 let terms = self.deviance_terms();
188 self.data.freq_sum(terms)
189 }
190
191 /// Returns the contribution to the deviance from each observation. The total deviance should
192 /// be the sum of all of these. Variance weights are already included, but not frequency
193 /// weights.
194 fn deviance_terms(&self) -> &Array1<F> {
195 self.dev_terms.get_or_init(|| {
196 let ll_terms: Array1<F> = M::log_like_terms(self.data, &self.result_std);
197 let ll_sat: Array1<F> = self.data.y.mapv(M::log_like_sat);
198 let terms = (ll_sat - ll_terms) * F::two();
199 self.data.apply_var_weights(terms)
200 })
201 }
202
203 /// Returns the self-excluded deviance terms, i.e. the deviance of an observation as if the
204 /// model was fit without it. This is a one-step approximation.
205 fn deviance_terms_loo(&self) -> RegressionResult<Array1<F>, F> {
206 let dev_terms = self.deviance_terms();
207 let pear_sq = self.resid_pear().mapv(|r| r * r);
208 let hat_rat = self.leverage()?.mapv(|h| h / (F::one() - h));
209 let result = dev_terms + &hat_rat * (&hat_rat + F::two()) * pear_sq;
210 Ok(result)
211 }
212
213 /// The dispersion parameter $`\hat\phi`$ relating the variance to the variance function:
214 /// $`\text{Var}[y] = \phi \, V(\mu)`$.
215 ///
216 /// Identically one for logistic, binomial, and Poisson regression.
217 /// For families with a free dispersion (linear, gamma), estimated as:
218 ///
219 /// ```math
220 /// \hat\phi = \frac{D}{\left(1 - \frac{K}{n_\text{eff}}\right) \sum_i w_i}
221 /// ```
222 ///
223 /// which reduces to $`D / (N - K)`$ without variance weights.
224 pub fn dispersion(&self) -> F {
225 *self.phi.get_or_init(|| {
226 use DispersionType::*;
227 match M::DISPERSED {
228 FreeDispersion => {
229 let dev = self.deviance();
230 let p = self.rank();
231 let n_eff = self.data.n_eff();
232 let scaling = if p >= n_eff {
233 // This is the overparameterized regime, which is checked directly instead of
234 // allowing negative values. It's not clear what conditions result in this when
235 // p < N.
236 F::zero()
237 } else {
238 (F::one() - p / n_eff) * self.data.sum_weights()
239 };
240 dev / scaling
241 }
242 NoDispersion => F::one(),
243 }
244 })
245 }
246
247 /// Return the dispersion terms with the observation(s) at each point excluded from the fit.
248 fn dispersion_loo(&self) -> RegressionResult<Array1<F>, F> {
249 use DispersionType::*;
250 match M::DISPERSED {
251 FreeDispersion => {
252 let pear_sq = self.resid_pear().mapv(|r| r * r);
253 let hat_rat = self.leverage()?.mapv(|h| h / (F::one() - h));
254 let terms = self.deviance_terms() + hat_rat * pear_sq;
255 // Don't apply total weights since the variance weights are already
256 // included in the residual terms. However, we do need the frequency weights.
257 let terms = self.data.apply_freq_weights(terms);
258 let total: Array1<F> = -terms + self.deviance();
259 let scaled_total: Array1<F> = match &self.data.weights {
260 None => match &self.data.freqs {
261 Some(f) => total / -(f - self.ndf()),
262 None => total / (self.ndf() - F::one()),
263 },
264 Some(w) => {
265 let v1 = self.data.freq_sum(w);
266 let w2 = w * w;
267 let v2 = self.data.freq_sum(&w2);
268 // The subtracted out terms need the frequency terms as well
269 let f_w = self.data.apply_freq_weights(w.clone());
270 let f_w2 = self.data.apply_freq_weights(w2);
271 // the modifed sums from leaving out the ith observation
272 let v1p = -f_w + v1;
273 let v2p = -f_w2 + v2;
274 let p = self.rank();
275 let scale = &v1p - v2p / &v1p * p;
276 total / scale
277 }
278 };
279 Ok(scaled_total)
280 }
281 NoDispersion => Ok(Array1::<F>::ones(self.data.y.len())),
282 }
283 }
284
285 /// Returns the Fisher information (the negative Hessian of the log-likelihood) at the
286 /// parameter values given:
287 ///
288 /// ```math
289 /// \mathcal{I}(\boldsymbol\beta) = \mathbf{X}^\mathsf{T}\mathbf{W}\eta'^2\mathbf{S}\mathbf{X}
290 /// ```
291 ///
292 /// where $`\mathbf{S} = \text{diag}(V(\mu_i))`$ and $`\eta'`$ is the derivative of the natural
293 /// parameter in terms of the linear predictor ($`\eta(\omega) = g_0(g^{-1}(\omega))`$ where
294 /// $`g_0`$ is the canonical link function).
295 /// The regularization is included.
296 pub fn fisher(&self, params: &Array1<F>) -> Array2<F> {
297 // Note that fisher() is a public function so it should take the external parameters.
298 let params = self.data.transform_beta(params.clone());
299 // We actually need to futher apply the transformation beyond this, so that *arrays
300 // multiplying this resulting matrix* are hit the right way.
301 let fish_std = self.fisher_std(¶ms);
302 self.data.inverse_transform_fisher(fish_std)
303 }
304
305 /// The inverse of the (regularized) fisher information matrix, in the external parameter
306 /// basis. Used for the influence calculations, and as a component of the sandwich covariance.
307 /// Cached on first access.
308 fn fisher_inv(&self) -> RegressionResult<&Array2<F>, F> {
309 self.fisher_inv.get_or_try_init(|| {
310 let fisher_reg = self.fisher(&self.result);
311 // NOTE: invh/invh_into() are bugged and incorrect, as they are sensitive to the layout.
312 // Even with X in column-major order, the fisher matrix is in the space of the
313 // covariates, and seems to naturally emerge as row-major. The bug could be avoided
314 // with:
315 // let fish_inv = fisher_reg.t().to_owned().invh_into()?;
316 // but this also allocates again so let's not worry about this yet.
317 let fish_inv = fisher_reg.inv_into()?;
318 Ok(fish_inv)
319 })
320 }
321
322 /// Compute the data-only (unregularized) Fisher information in the internal standardized basis.
323 fn fisher_data_std(&self, params: &Array1<F>) -> Array2<F> {
324 let lin_pred: Array1<F> = self.data.linear_predictor_std(params);
325 let adj_var: Array1<F> = M::adjusted_variance_diag(&lin_pred);
326 (self.data.x_conj() * &adj_var).dot(&self.data.x)
327 }
328
329 /// Compute the fisher information in terms of the internal (likely standardized)
330 /// coefficients. The regularization is included here.
331 fn fisher_std(&self, params: &Array1<F>) -> Array2<F> {
332 let fisher = self.fisher_data_std(params);
333 self.reg.as_ref().irls_mat(fisher, params)
334 }
335
336 /// The inverse of the (regularized) fisher information matrix, in the internal standardized
337 /// parameter basis.
338 fn fisher_std_inv(&self) -> RegressionResult<&Array2<F>, F> {
339 self.fisher_std_inv.get_or_try_init(|| {
340 let fisher_reg = self.fisher_std(&self.result_std);
341 // NOTE: invh/invh_into() are bugged and incorrect!
342 let fish_inv = fisher_reg.inv_into()?;
343 Ok(fish_inv)
344 })
345 }
346
347 /// Returns the hat matrix, also known as the "projection" or "influence" matrix:
348 ///
349 /// ```math
350 /// P_{ij} = \frac{\partial \hat{y}_i}{\partial y_j}
351 /// ```
352 ///
353 /// Orthogonal to the response residuals at the fit result: $`\mathbf{P}(\mathbf{y} -
354 /// \hat{\mathbf{y}}) = 0`$.
355 /// This version is not symmetric, but the diagonal is invariant to this choice of convention.
356 pub fn hat(&self) -> RegressionResult<&Array2<F>, F> {
357 self.hat.get_or_try_init(|| {
358 // Do the full computation in terms of the internal parameters, since this observable
359 // is not sensitive to the choice of basis.
360
361 // Apply the eta' terms manually instead of calling adjusted_variance_diag, because the
362 // adjusted variance method applies 2 powers to the variance, while we want one power
363 // to the variance and one to the weights.
364 // let adj_var = M::adjusted_variance_diag(&self.x_lin_pred);
365
366 let var = self.y_hat.clone().mapv_into(M::variance);
367 let eta_d = M::Link::d_nat_param(&self.x_lin_pred);
368
369 let fisher_inv = self.fisher_std_inv()?;
370
371 // the GLM variance and the data weights are put on different sides in this convention
372 let left = (var * &eta_d).insert_axis(Axis(1)) * &self.data.x;
373 let right = self.data.x_conj() * &eta_d;
374 Ok(left.dot(&fisher_inv.dot(&right)))
375 })
376 }
377
378 /// The one-step approximation to the change in coefficients from excluding each observation:
379 ///
380 /// ```math
381 /// \Delta\boldsymbol\beta^{(-i)} \approx \frac{1}{1-h_i}\,\mathcal{I}^{-1}\mathbf{x}^{(i)} w_i \eta'_i e_i
382 /// ```
383 ///
384 /// Each row $`i`$ should be subtracted from $`\hat{\boldsymbol\beta}`$ to approximate
385 /// the coefficients that would result from excluding observation $`i`$.
386 /// Exact for linear models; a one-step approximation for nonlinear models.
387 pub fn infl_coef(&self) -> RegressionResult<Array2<F>, F> {
388 // The linear predictor can be acquired in terms of the standardized parameters, but the
389 // rest of the computation should use external.
390 let resid_resp = self.resid_resp();
391 let omh = -self.leverage()? + F::one();
392 let resid_adj = M::Link::adjust_errors(resid_resp, &self.x_lin_pred) / omh;
393 let xte = self.data.x_conj_ext() * resid_adj;
394 let fisher_inv = self.fisher_inv()?;
395 let delta_b = xte.t().dot(fisher_inv);
396 Ok(delta_b)
397 }
398
399 /// Returns the leverage $`h_i = P_{ii}`$ for each observation: the diagonal of the hat matrix.
400 /// Indicates the sensitivity of each prediction to its corresponding observation.
401 pub fn leverage(&self) -> RegressionResult<Array1<F>, F> {
402 let hat = self.hat()?;
403 Ok(hat.diag().to_owned())
404 }
405
406 /// Returns exact coefficients from leaving each observation out, one-at-a-time.
407 /// This is a much more expensive operation than the original regression because a new one is
408 /// performed for each observation.
409 pub fn loo_exact(&self) -> RegressionResult<Array2<F>, F> {
410 // Use the one-step approximation to get good starting points for each exclusion.
411 let loo_coef: Array2<F> = self.infl_coef()?;
412 // NOTE: These coefficients need to be in terms of external parameters
413 let loo_initial = &self.result - loo_coef;
414 let mut loo_result = loo_initial.clone();
415 let n_obs = self.data.y.len();
416 for i in 0..n_obs {
417 let data_i: Dataset<F> = {
418 // Get the proper X data matrix as it existed before standardization and
419 // intercept-padding.
420 let x_i = self.data.x_orig();
421 // Leave the observation out by setting the frequency weight to zero.
422 let mut freqs_i: Array1<F> =
423 self.data.freqs.clone().unwrap_or(Array1::<F>::ones(n_obs));
424 freqs_i[i] = F::zero();
425 let mut data_i = Dataset {
426 y: self.data.y.clone(),
427 x: x_i,
428 linear_offset: self.data.linear_offset.clone(),
429 weights: self.data.weights.clone(),
430 freqs: Some(freqs_i),
431 // These fields must be set this way, as they are in the ModelBuilder, before
432 // finalize_design_matrix() is called.
433 has_intercept: false,
434 standardizer: None,
435 };
436 data_i.finalize_design_matrix(
437 self.data.standardizer.is_some(),
438 self.data.has_intercept,
439 );
440 data_i
441 };
442 let model_i = Model {
443 model: PhantomData::<M>,
444 data: data_i,
445 };
446 let options = {
447 let mut options = self.options.clone();
448 // The one-step approximation should be a good starting point.
449 // Use the external result as it should be re-standardized to the new dataset
450 // internally before being passed to IRLS.
451 options.init_guess = Some(loo_initial.row(i).to_owned());
452 options
453 };
454 let fit_i = model_i.with_options(options).fit()?;
455 // The internal re-fit transforms back to the external scale.
456 loo_result.row_mut(i).assign(&fit_i.result);
457 }
458 Ok(loo_result)
459 }
460
461 /// Perform a likelihood-ratio test, returning the statistic:
462 ///
463 /// ```math
464 /// \Lambda = -2 \ln \frac{L_0}{L} = -2(l_0 - l)
465 /// ```
466 ///
467 /// where $`L_0`$ is the null model likelihood (intercept only) and $`L`$ is the fit likelihood.
468 /// By Wilks' theorem, asymptotically $`\chi^2`$-distributed with
469 /// [`test_ndf()`](Self::test_ndf) degrees of freedom.
470 // TODO: Should the effective number of degrees of freedom due to
471 // regularization be taken into account?
472 pub fn lr_test(&self) -> F {
473 // The model likelihood should include regularization terms and there
474 // shouldn't be any in the null model with all non-intercept parameters
475 // set to zero.
476 let null_like = self.null_like();
477 F::from(-2.).unwrap() * (null_like - self.model_like)
478 }
479
480 /// Perform a likelihood-ratio test against a general alternative model, not
481 /// necessarily a null model. The alternative model is regularized the same
482 /// way that the regression resulting in this fit was. The degrees of
483 /// freedom cannot be generally inferred.
484 pub fn lr_test_against(&self, alternative: &Array1<F>) -> F {
485 let alt_std = self.data.transform_beta(alternative.clone());
486 let alt_like = M::log_like(self.data, &alt_std);
487 let alt_like_reg = alt_like + self.reg.likelihood(&alt_std);
488 F::two() * (self.model_like - alt_like_reg)
489 }
490
491 /// Returns the residual degrees of freedom in the model, i.e. the number
492 /// of data points minus the number of parameters. Not to be confused with
493 /// `test_ndf()`, the degrees of freedom in the statistical tests of the
494 /// fit parameters.
495 pub fn ndf(&self) -> F {
496 self.data.n_obs() - self.rank()
497 }
498
499 pub(crate) fn new(data: &'a Dataset<F>, optimum: IrlsStep<F>, irls: Irls<M, F>) -> Self {
500 let IrlsStep {
501 guess: result_std,
502 like: model_like,
503 } = optimum;
504 let Irls {
505 options,
506 reg,
507 n_iter,
508 history,
509 ..
510 } = irls;
511 // Cache some of these variables that will be used often.
512 let n_par = result_std.len();
513 // NOTE: This necessarily uses the coefficients directly from the standardized data.
514 // Store the linear predictor and the corresponding y values as they are commonly used.
515 let x_lin_pred = data.linear_predictor_std(&result_std);
516 let y_hat = M::mean(&x_lin_pred);
517 // The public result must be transformed back to the external scale for compatability with
518 // the input data.
519 let result_ext = data.inverse_transform_beta(result_std.clone());
520 // The history also needs to be transformed back to the external scale, since it is public.
521 // It shouldn't be used directly by the Fit's methods.
522 let history = history
523 .into_iter()
524 .map(|IrlsStep { guess, like }| IrlsStep {
525 guess: data.inverse_transform_beta(guess),
526 like,
527 })
528 .collect();
529 Self {
530 model: PhantomData,
531 data,
532 result: result_ext,
533 result_std,
534 x_lin_pred,
535 y_hat,
536 options,
537 model_like,
538 reg,
539 n_iter,
540 history,
541 n_par,
542 phi: OnceCell::new(),
543 dev_terms: OnceCell::new(),
544 resid_pear: OnceCell::new(),
545 covariance: OnceCell::new(),
546 fisher_inv: OnceCell::new(),
547 fisher_std_inv: OnceCell::new(),
548 hat: OnceCell::new(),
549 null_model: OnceCell::new(),
550 }
551 }
552
553 /// Returns the likelihood given the null model, which fixes all parameters
554 /// to zero except the intercept (if it is used). A total of `test_ndf()`
555 /// parameters are constrained.
556 pub fn null_like(&self) -> F {
557 let (null_like, _) = self.null_model_fit();
558 *null_like
559 }
560
561 /// Return the likelihood and null model parameters, which will be zero with the possible
562 /// exception of the intercept term. Since this can require an additional regression, the
563 /// values are cached.
564 fn null_model_fit(&self) -> &(F, Array1<F>) {
565 self.null_model
566 .get_or_init(|| match &self.data.linear_offset {
567 None => {
568 // If there is no linear offset, the natural parameter is
569 // identical for all observations so it is sufficient to
570 // calculate the null likelihood for a single point with y equal
571 // to the average.
572 // The average y
573 let y_bar: F = self.data.weighted_sum(&self.data.y) / self.data.sum_weights();
574 // This approach assumes that the likelihood is in the natural
575 // exponential form as calculated by Glm::log_like_natural(). If that
576 // function is overridden and the values differ significantly, this
577 // approach will give incorrect results. If the likelihood has terms
578 // non-linear in y, then the likelihood must be calculated for every
579 // point rather than averaged.
580 // If the intercept is allowed to maximize the likelihood, the natural
581 // parameter is equal to the link of the expectation. Otherwise it is
582 // the transformation function of zero.
583 let intercept: F = if self.data.has_intercept {
584 M::Link::func(y_bar)
585 } else {
586 F::zero()
587 };
588 // this is a length-one array. This works because the
589 // likelihood contribution is the same for all observations.
590 let nat_par = M::Link::nat_param(array![intercept]);
591 // The null likelihood per observation
592 let null_like_one: F = M::log_like_natural(y_bar, nat_par[0]);
593 // just multiply the average likelihood by the number of data points, since every term is the same.
594 let null_like_total = self.data.sum_weights() * null_like_one;
595 let null_params: Array1<F> = {
596 let mut par = Array1::<F>::zeros(self.n_par);
597 par[0] = intercept;
598 par
599 };
600 (null_like_total, null_params)
601 }
602 Some(off) => {
603 if self.data.has_intercept {
604 // If there are linear offsets and the intercept is allowed
605 // to be free, there is not a major simplification and the
606 // model needs to be re-fit.
607 // the X data is a single column of ones. Since this model
608 // isn't being created by the ModelBuilder, the X data
609 // has to be automatically padded with ones.
610 let data_x_null = Array2::<F>::ones((self.data.y.len(), 1));
611 let null_model = Model {
612 model: std::marker::PhantomData::<M>,
613 data: Dataset::<F> {
614 y: self.data.y.clone(),
615 x: data_x_null,
616 linear_offset: Some(off.clone()),
617 weights: self.data.weights.clone(),
618 freqs: self.data.freqs.clone(),
619 // If we are in this branch it is because an intercept is needed.
620 has_intercept: true,
621 // We don't use standardization for the null model.
622 standardizer: None,
623 },
624 };
625 // TODO: Make this function return an error, although it's
626 // difficult to imagine this case happening.
627 // TODO: Should the tolerance of this fit be stricter?
628 // The intercept should not be regularized
629 let null_fit = null_model
630 .fit_options()
631 // There shouldn't be too much trouble fitting this
632 // single-parameter fit, but there shouldn't be harm in
633 // using the same maximum as in the original model.
634 .max_iter(self.options.max_iter)
635 .fit()
636 .expect("Could not fit null model!");
637 let null_params: Array1<F> = {
638 let mut par = Array1::<F>::zeros(self.n_par);
639 // there is only one parameter in this fit. It should be the same as
640 // the external result since this model doesn't have standardization.
641 par[0] = null_fit.result_std[0];
642 par
643 };
644 (null_fit.model_like, null_params)
645 } else {
646 // If the intercept is fixed to zero, then no minimization is
647 // required. The natural parameters are directly known in terms
648 // of the linear offset. The likelihood must still be summed
649 // over all observations, since they have different offsets.
650 let nat_par = M::Link::nat_param(off.clone());
651 let null_like_terms = ndarray::Zip::from(&self.data.y)
652 .and(&nat_par)
653 .map_collect(|&y, &eta| M::log_like_natural(y, eta));
654 let null_like = self.data.weighted_sum(&null_like_terms);
655 let null_params = Array1::<F>::zeros(self.n_par);
656 (null_like, null_params)
657 }
658 }
659 })
660 }
661
662 /// Returns $`\hat{\mathbf{y}} = g^{-1}(\mathbf{X}\hat{\boldsymbol\beta} + \boldsymbol\omega_0)`$
663 /// given input data $`\mathbf{X}`$ and an optional linear offset
664 /// $`\boldsymbol\omega_0`$. The data need not be the training data.
665 pub fn predict<S>(&self, data_x: &ArrayBase<S, Ix2>, lin_off: Option<&Array1<F>>) -> Array1<F>
666 where
667 S: Data<Elem = F>,
668 {
669 let lin_pred = if self.data.has_intercept {
670 one_pad(data_x.view())
671 } else {
672 data_x.to_owned()
673 }
674 .dot(&self.result);
675 let lin_pred: Array1<F> = if let Some(off) = &lin_off {
676 lin_pred + *off
677 } else {
678 lin_pred
679 };
680 M::mean(&lin_pred)
681 }
682
683 /// Returns the rank $`K`$ of the model (i.e. the number of parameters)
684 fn rank(&self) -> F {
685 F::from(self.n_par).unwrap()
686 }
687
688 /// Return the deviance residuals for each point in the training data:
689 ///
690 /// ```math
691 /// d_i = \text{sign}(y_i - \hat\mu_i)\sqrt{D_i}
692 /// ```
693 ///
694 /// where $`D_i`$ is the per-observation deviance contribution.
695 /// This is usually a better choice than Pearson residuals for non-linear models.
696 pub fn resid_dev(&self) -> Array1<F> {
697 let signs = self.resid_resp().mapv_into(F::signum);
698 let ll_diff = self.deviance_terms();
699 let dev: Array1<F> = ll_diff.clone().mapv_into(num_traits::Float::sqrt);
700 signs * dev
701 }
702
703 /// Return the standardized deviance residuals (internally studentized):
704 ///
705 /// ```math
706 /// d_i^* = \frac{d_i}{\sqrt{\hat\phi(1 - h_i)}}
707 /// ```
708 ///
709 /// where $`d_i`$ is the deviance residual, $`\hat\phi`$ is the dispersion, and $`h_i`$ is
710 /// the leverage. Generally applicable for outlier detection.
711 pub fn resid_dev_std(&self) -> RegressionResult<Array1<F>, F> {
712 let dev = self.resid_dev();
713 let phi = self.dispersion();
714 let hat: Array1<F> = self.leverage()?;
715 let omh: Array1<F> = -hat + F::one();
716 let denom: Array1<F> = (omh * phi).mapv_into(num_traits::Float::sqrt);
717 Ok(dev / denom)
718 }
719
720 /// Return the partial residuals as an n_obs × n_predictors matrix. Each column $`j`$
721 /// contains working residuals plus the centered contribution of predictor $`j`$:
722 ///
723 /// ```math
724 /// r^{(j)}_i = r^w_i + (x_{ij} - \bar x_j) \beta_j
725 /// ```
726 ///
727 /// The bar denotes the **fully weighted** column mean (combining both variance and frequency
728 /// weights), which is the WLS-consistent choice: it ensures that $`\sum_i w_i r^{(j)}_i = 0`$
729 /// for each predictor. R's `residuals(model, type = "partial")` uses only frequency weights in
730 /// its centering (excluding variance weights), so results will differ when variance weights are
731 /// present. For models without an intercept, no centering is applied. The intercept term is
732 /// always excluded from the output.
733 pub fn resid_part(&self) -> Array2<F> {
734 let resid_work_i = self.resid_work().insert_axis(Axis(1));
735 let n = resid_work_i.len();
736
737 // Use external (unstandardized) feature columns and coefficients.
738 let x_orig = self.data.x_orig();
739 // The intercept term is not part of the partial residuals.
740 let beta_j = if self.data.has_intercept {
741 self.result.slice(s![1..]).to_owned()
742 } else {
743 self.result.clone()
744 }
745 .insert_axis(Axis(0));
746
747 // Center by fully-weighted (variance × frequency) column means. This is the WLS-consistent
748 // choice: sum_i w_i r^(j)_i = 0 for each predictor. R excludes variance weights from
749 // centering; results will differ from R when variance weights are present with an intercept.
750 // No centering for no-intercept models.
751 let x_centered: Array2<F> = if self.data.has_intercept {
752 let wt_factors = self
753 .data
754 .apply_total_weights(Array1::ones(n))
755 .insert_axis(Axis(1));
756 let x_mean: Array1<F> = (&x_orig * &wt_factors).sum_axis(Axis(0)) / wt_factors.sum();
757 x_orig - x_mean.insert_axis(Axis(0))
758 } else {
759 x_orig
760 };
761 resid_work_i + x_centered * beta_j
762 }
763
764 /// Return the Pearson residuals for each point in the training data:
765 ///
766 /// ```math
767 /// r_i = \sqrt{w_i} \, \frac{y_i - \hat\mu_i}{\sqrt{V(\hat\mu_i)}}
768 /// ```
769 ///
770 /// where $`V`$ is the variance function and $`w_i`$ are the variance weights.
771 /// Not scaled by the dispersion for families with a free dispersion parameter.
772 pub fn resid_pear(&self) -> &Array1<F> {
773 self.resid_pear.get_or_init(|| {
774 let residuals = self.resid_resp();
775 let inv_var_diag: Array1<F> = self
776 .y_hat
777 .clone()
778 .mapv_into(M::variance)
779 .mapv_into(F::recip);
780 // the variance weights are the reciprocal of the corresponding variance
781 let scales = self
782 .data
783 .apply_var_weights(inv_var_diag)
784 .mapv_into(num_traits::Float::sqrt);
785 scales * residuals
786 })
787 }
788
789 /// Return the standardized Pearson residuals (internally studentized):
790 ///
791 /// ```math
792 /// r_i^* = \frac{r_i}{\sqrt{\hat\phi(1 - h_i)}}
793 /// ```
794 ///
795 /// where $`r_i`$ is the Pearson residual and $`h_i`$ is the leverage. These are expected
796 /// to have unit variance.
797 pub fn resid_pear_std(&self) -> RegressionResult<Array1<F>, F> {
798 let pearson = self.resid_pear();
799 let phi = self.dispersion();
800 let hat = self.leverage()?;
801 let omh = -hat + F::one();
802 let denom: Array1<F> = (omh * phi).mapv_into(num_traits::Float::sqrt);
803 Ok(pearson / denom)
804 }
805
806 /// Return the response residuals: $`e_i^\text{resp} = y_i - \hat\mu_i`$.
807 pub fn resid_resp(&self) -> Array1<F> {
808 &self.data.y - &self.y_hat
809 }
810
811 /// Return the externally studentized residuals:
812 ///
813 /// ```math
814 /// \tilde{t}_i = \text{sign}(e_i) \sqrt{\frac{D_i^{(-i)}}{\hat\phi^{(-i)}}}
815 /// ```
816 ///
817 /// where $`D_i^{(-i)}`$ and $`\hat\phi^{(-i)}`$ are the LOO deviance and dispersion
818 /// approximated via one-step deletion. Under normality, $`t`$-distributed with
819 /// $`N - K - 1`$ degrees of freedom. This is a robust and general method for outlier
820 /// detection.
821 pub fn resid_student(&self) -> RegressionResult<Array1<F>, F> {
822 let signs = self.resid_resp().mapv(F::signum);
823 let dev_terms_loo: Array1<F> = self.deviance_terms_loo()?;
824 // NOTE: This match could also be handled internally in dispersion_loo()
825 let dev_terms_scaled = match M::DISPERSED {
826 // The dispersion is corrected for the contribution from each current point.
827 // This is an approximation; the exact solution would perform a fit at each point.
828 DispersionType::FreeDispersion => dev_terms_loo / self.dispersion_loo()?,
829 DispersionType::NoDispersion => dev_terms_loo,
830 };
831 Ok(signs * dev_terms_scaled.mapv_into(num_traits::Float::sqrt))
832 }
833
834 /// Returns the working residuals:
835 ///
836 /// ```math
837 /// e_i^\text{work} = g'(\hat\mu_i)\,(y_i - \hat\mu_i) = \frac{y_i - \hat\mu_i}{\eta'(\omega_i)\,V(\hat\mu_i)}
838 /// ```
839 ///
840 /// where $`g'(\mu)`$ is the derivative of the link function and $`\eta'(\omega)`$ is the
841 /// derivative of the natural parameter with respect to the linear predictor. For canonical
842 /// links $`\eta'(\omega) = 1`$, reducing this to $`(y_i - \hat\mu_i)/V(\hat\mu_i)`$.
843 ///
844 /// These can be interpreted as the residual differences mapped into the linear predictor space
845 /// of $`\omega = \mathbf{x}\cdot\boldsymbol{\beta}`$.
846 pub fn resid_work(&self) -> Array1<F> {
847 let resid_response: Array1<F> = self.resid_resp();
848 let var: Array1<F> = self.y_hat.clone().mapv(M::variance);
849 // adjust for non-canonical link functions; we want a total factor of 1/eta'
850 let (adj_response, adj_var) =
851 M::Link::adjust_errors_variance(resid_response, var, &self.x_lin_pred);
852 adj_response / adj_var
853 }
854
855 /// Returns the score function $`\nabla_{\boldsymbol\beta} l`$ (the gradient of the
856 /// regularized log-likelihood) at the parameter values given. Should be zero at the MLE.
857 /// The input and output are in the external (unstandardized) parameter space.
858 pub fn score(&self, params: Array1<F>) -> Array1<F> {
859 // Compute in the internal (standardized) basis so the regularization gradient is applied
860 // to the correct parameters, then transform the result back to external coordinates via
861 // score_ext = J^T score_int where J = d(beta_std)/d(beta_ext).
862 let params_std = self.data.transform_beta(params);
863 let lin_pred: Array1<F> = self.data.linear_predictor_std(¶ms_std);
864 let mu: Array1<F> = M::mean(&lin_pred);
865 let resid_response = &self.data.y - mu;
866 let resid_working = M::Link::adjust_errors(resid_response, &lin_pred);
867 let score_int = self.data.x_conj().dot(&resid_working);
868 let score_int_reg = self.reg.as_ref().gradient(score_int, ¶ms_std);
869 self.data.inverse_transform_score(score_int_reg)
870 }
871
872 /// Returns the score test statistic:
873 ///
874 /// ```math
875 /// S = \mathbf{J}(\boldsymbol\beta_0)^\mathsf{T} \, \mathcal{I}(\boldsymbol\beta_0)^{-1} \, \mathbf{J}(\boldsymbol\beta_0)
876 /// ```
877 ///
878 /// where $`\mathbf{J}`$ is the score and $`\mathcal{I}`$ is the Fisher information, both
879 /// evaluated at the null parameters. Asymptotically $`\chi^2`$-distributed with
880 /// [`test_ndf()`](Self::test_ndf) degrees of freedom.
881 pub fn score_test(&self) -> RegressionResult<F, F> {
882 let (_, null_params) = self.null_model_fit();
883 // NOTE: The null model is not sensitive to standardization.
884 self.score_test_against(null_params.clone())
885 }
886
887 /// Returns the score test statistic compared to another set of model
888 /// parameters, not necessarily a null model. The degrees of freedom cannot
889 /// be generally inferred.
890 pub fn score_test_against(&self, alternative: Array1<F>) -> RegressionResult<F, F> {
891 let fisher_alt = self.fisher(&alternative);
892 let score_alt = self.score(alternative);
893 // The is not the same as the cached covariance matrix since it is
894 // evaluated at the null parameters.
895 // NOTE: invh/invh_into() are bugged and incorrect!
896 let inv_fisher_alt = fisher_alt.inv_into()?;
897 Ok(score_alt.t().dot(&inv_fisher_alt.dot(&score_alt)))
898 }
899
900 /// The degrees of freedom for the likelihood ratio test, the score test,
901 /// and the Wald test. Not to be confused with `ndf()`, the degrees of
902 /// freedom in the model fit.
903 pub fn test_ndf(&self) -> usize {
904 if self.data.has_intercept {
905 self.n_par - 1
906 } else {
907 self.n_par
908 }
909 }
910
911 /// Returns the Wald test statistic:
912 ///
913 /// ```math
914 /// W = (\hat{\boldsymbol\beta} - \boldsymbol\beta_0)^\mathsf{T} \, \mathcal{I}(\boldsymbol\beta_0) \, (\hat{\boldsymbol\beta} - \boldsymbol\beta_0)
915 /// ```
916 ///
917 /// Compared to a null model with only an intercept (if one is used). Asymptotically
918 /// $`\chi^2`$-distributed with [`test_ndf()`](Self::test_ndf) degrees of freedom.
919 pub fn wald_test(&self) -> F {
920 // The null parameters are all zero except for a possible intercept term
921 // which optimizes the null model.
922 let (_, null_params) = self.null_model_fit();
923 // NOTE: The null model is agnostic to standardization, since it discards all features
924 // except perhaps the intercept.
925 self.wald_test_against(null_params)
926 }
927
928 /// Returns the Wald test statistic compared to another specified model fit
929 /// instead of the null model. The degrees of freedom cannot be generally
930 /// inferred.
931 pub fn wald_test_against(&self, alternative: &Array1<F>) -> F {
932 // This could be computed in either the internal or external basis. This implementation
933 // will use internal, so first we just need to transform the input.
934 let alt_std = self.data.transform_beta(alternative.clone());
935 let d_params_std = &self.result_std - &alt_std;
936 // Get the fisher matrix at the *other* result, since this is a test of this fit's
937 // parameters under a model given by the other.
938 let fisher_std: Array2<F> = self.fisher_std(&alt_std);
939 d_params_std.t().dot(&fisher_std.dot(&d_params_std))
940 }
941
942 /// Returns the per-parameter Wald $`z`$-statistic:
943 ///
944 /// ```math
945 /// z_k = \frac{\hat\beta_k}{\sqrt{\text{Cov}[\hat{\boldsymbol\beta}]_{kk}}}
946 /// ```
947 ///
948 /// Since it does not account for covariance between parameters it may not be accurate.
949 pub fn wald_z(&self) -> RegressionResult<Array1<F>, F> {
950 let par_cov = self.covariance()?;
951 let par_variances: ArrayView1<F> = par_cov.diag();
952 Ok(&self.result / &par_variances.mapv(num_traits::Float::sqrt))
953 }
954}
955
956/// Specialized functions for OLS.
957impl<'a, F> Fit<'a, Linear, F>
958where
959 F: 'static + Float,
960{
961 /// Returns the coefficient of determination $`R^2`$:
962 ///
963 /// ```math
964 /// R^2 = 1 - \frac{D}{D_0}
965 /// ```
966 ///
967 /// where $`D`$ is the deviance of the fitted model and $`D_0`$ is the null deviance
968 /// (deviance of the intercept-only model). For Gaussian with no weights this reduces to
969 /// $`1 - \text{RSS}/\text{TSS}`$. Using the deviance ratio correctly handles variance and
970 /// frequency weights.
971 // NOTE: that this implementation would be valid as a pseudo-$`R^2`$ for all GLMs, but it's
972 // mostly expected in the OLS context and probably wouldn't add much.
973 pub fn r_sq(&self) -> F {
974 let lr = self.lr_test();
975 lr / (lr + self.deviance())
976 }
977
978 /// Returns the weighted residual sum of squares (RSS):
979 /// $`\text{RSS} = \sum_i w_i (y_i - \hat\mu_i)^2`$
980 /// where $`w_i`$ combines variance and frequency weights.
981 // NOTE: this implementation would be well-defined for any GLM, but there are typically better
982 // tools for the job so it's not exposed for non-linear families.
983 pub fn rss(&self) -> F {
984 let resid_sq = self.resid_resp().mapv(|r| r * r);
985 self.data.apply_total_weights(resid_sq).sum()
986 }
987}
988
989/// Methods that require the `stats` feature for distribution CDF evaluation.
990#[cfg(feature = "stats")]
991impl<'a, M, F> Fit<'a, M, F>
992where
993 M: Glm,
994 F: 'static + Float,
995{
996 /// Perform a full re-fit of the model excluding the ith data column.
997 /// NOTE: Ideally we could return a full Fit object, but as it stands the source data must
998 /// outlive the Fit result. It would be nice if we could get around this with some COW
999 /// shenanigans but this will probably require a big change. To get around this for now, just
1000 /// return the deviance, which is all we're using this function for at the moment.
1001 fn dev_without_covariate(&self, i: usize) -> RegressionResult<F, F> {
1002 use ndarray::{concatenate, s};
1003
1004 let is_intercept = self.data.has_intercept && (i == 0);
1005 let x_reduced: Array2<F> = if is_intercept {
1006 // If this is the intercept term, we need to use the original unscaled data to avoid
1007 // mixing
1008 self.data.x_ext().slice(s![.., 1..]).to_owned()
1009 } else {
1010 concatenate![
1011 Axis(1),
1012 self.data.x.slice(s![.., ..i]),
1013 self.data.x.slice(s![.., i + 1..])
1014 ]
1015 };
1016 // NOTE: This data has been standardized already. Since we are fully dropping a covariate,
1017 // each data column should still be standardized if the full external set is.
1018 let data_reduced = Dataset::<F> {
1019 y: self.data.y.clone(),
1020 x: x_reduced,
1021 linear_offset: self.data.linear_offset.clone(),
1022 weights: self.data.weights.clone(),
1023 freqs: self.data.freqs.clone(),
1024 has_intercept: !is_intercept,
1025 // It shouldn't be necessary to standardize again. Each individual column should
1026 // already be standardized, if the full dataset was.
1027 standardizer: None,
1028 };
1029 let model_reduced = Model {
1030 model: PhantomData::<M>,
1031 data: data_reduced,
1032 };
1033 // Start with the non-excluded parameters at the values from the main fit.
1034 let init_guess = if is_intercept {
1035 // For the intercept term, we need to use the original unscaled data, or else the
1036 // transformation mixes the intercept with the other terms. This number is supposed to
1037 // represent the model deviance from excluding the *external* intercept term.
1038 self.result.slice(s![1..]).to_owned()
1039 } else {
1040 // The data is already standardized in the non-intercept case, so we should start the
1041 // the standardized parameters, removing the covariate of interest.
1042 concatenate![
1043 Axis(0),
1044 self.result_std.slice(s![..i]),
1045 self.result_std.slice(s![i + 1..])
1046 ]
1047 };
1048 let fit_options = {
1049 let mut fit_options = self.options.clone();
1050 fit_options.init_guess = Some(init_guess);
1051 fit_options
1052 };
1053 let fit_reduced = model_reduced.with_options(fit_options).fit()?;
1054 Ok(fit_reduced.deviance())
1055 }
1056
1057 /// Returns the p-value for the omnibus likelihood-ratio test (full model vs. null/intercept-only
1058 /// model).
1059 ///
1060 /// The LR statistic is asymptotically $`\chi^2`$-distributed with
1061 /// [`test_ndf()`](Self::test_ndf) degrees of freedom, so the p-value is the upper-tail
1062 /// probability:
1063 ///
1064 /// ```math
1065 /// p = 1 - F_{\chi^2}(\Lambda;\, \text{test\_ndf})
1066 /// ```
1067 pub fn pvalue_lr_test(&self) -> F {
1068 use statrs::distribution::{ChiSquared, ContinuousCDF};
1069 let stat = self.lr_test().to_f64().unwrap();
1070 let ndf = self.test_ndf() as f64;
1071 if ndf == 0.0 {
1072 return F::one();
1073 }
1074 let chi2 = ChiSquared::new(ndf).unwrap();
1075 F::from(chi2.sf(stat)).unwrap()
1076 }
1077
1078 /// Returns per-parameter p-values from the Wald $`z`$-statistics.
1079 ///
1080 /// The reference distribution depends on whether the family has a free dispersion parameter:
1081 ///
1082 /// - **No dispersion** (logistic, Poisson): standard normal — two-tailed
1083 /// $`p_k = 2\bigl[1 - \Phi(|z_k|)\bigr]`$
1084 /// - **Free dispersion** (linear): Student-$`t`$ with [`ndf()`](Self::ndf) degrees of
1085 /// freedom — two-tailed $`p_k = 2\bigl[1 - F_t(|z_k|;\, \text{ndf})\bigr]`$
1086 ///
1087 /// IMPORTANT: Note that this test is an approximation for non-linear models and is known to
1088 /// sometimes yield misleading values compared to an exact test. It is not hard to find it
1089 /// give p-values that may imply significantly different conclusions for your analysis (e.g.
1090 /// p<0.07 vs. p<0.02 in one of our tests).
1091 pub fn pvalue_wald(&self) -> RegressionResult<Array1<F>, F> {
1092 let z = self.wald_z()?;
1093 let pvals = match M::DISPERSED {
1094 DispersionType::NoDispersion => {
1095 use statrs::distribution::Normal;
1096 let norm = Normal::standard();
1097 z.mapv(|zi| {
1098 let abs_z = num_traits::Float::abs(zi).to_f64().unwrap();
1099 F::from(2.0 * norm.sf(abs_z)).unwrap()
1100 })
1101 }
1102 DispersionType::FreeDispersion => {
1103 use statrs::distribution::StudentsT;
1104 let df = self.ndf().to_f64().unwrap();
1105 let t_dist = StudentsT::new(0.0, 1.0, df).unwrap();
1106 z.mapv(|zi| {
1107 let abs_z = num_traits::Float::abs(zi).to_f64().unwrap();
1108 F::from(2.0 * t_dist.sf(abs_z)).unwrap()
1109 })
1110 }
1111 };
1112 Ok(pvals)
1113 }
1114
1115 /// Returns per-parameter p-values from drop-one analysis of deviance (exact, expensive).
1116 ///
1117 /// For each parameter $`k`$, a reduced model is fit with that parameter removed and the
1118 /// deviance difference $`\Delta D = D_{\text{reduced}} - D_{\text{full}}`$ is used:
1119 ///
1120 /// - **No dispersion**: $`p = 1 - F_{\chi^2}(\Delta D;\, 1)`$
1121 /// - **Free dispersion**: $`F = \Delta D / \hat\phi`$, $`p = 1 - F_F(F;\, 1,\,
1122 /// \text{ndf})`$
1123 ///
1124 /// For the intercept (if present), the reduced model is fit without an intercept. For all
1125 /// other parameters, the reduced model is fit with that column removed from the design matrix.
1126 pub fn pvalue_exact(&self) -> RegressionResult<Array1<F>, F> {
1127 let n_par = self.n_par;
1128 let dev_full = self.deviance();
1129 let phi_full = self.dispersion();
1130 let ndf_f64 = self.ndf().to_f64().unwrap();
1131 let mut pvals = Array1::<F>::zeros(n_par);
1132 for k in 0..n_par {
1133 let dev_reduced = self.dev_without_covariate(k)?;
1134 let delta_d = dev_reduced - dev_full;
1135
1136 let p = match M::DISPERSED {
1137 DispersionType::NoDispersion => {
1138 use statrs::distribution::ChiSquared;
1139 let chi2 = ChiSquared::new(1.0).unwrap();
1140 1.0 - chi2.cdf(delta_d.to_f64().unwrap())
1141 }
1142 DispersionType::FreeDispersion => {
1143 use statrs::distribution::FisherSnedecor;
1144 let f_stat = delta_d / phi_full;
1145 let f_dist = FisherSnedecor::new(1.0, ndf_f64).unwrap();
1146 f_dist.sf(f_stat.to_f64().unwrap())
1147 }
1148 };
1149 pvals[k] = F::from(p).unwrap();
1150 }
1151
1152 Ok(pvals)
1153 }
1154}
1155
1156#[cfg(feature = "stats")]
1157impl<'a, M, F> Fit<'a, M, F>
1158where
1159 M: Glm + Response,
1160 F: 'static + Float,
1161{
1162 /// Returns the quantile residuals, which are standard-normal distributed by construction if
1163 /// the residuals are distributed according to the GLM family's response function.
1164 ///
1165 /// ```math
1166 /// q_i = \Phi^{-1}(F(y_i | \mathbf{x}_i \cdot \boldsymbol{\beta}))
1167 /// ```
1168 /// where $`\Phi`$ is the standard normal quantile function and $`F`$ is the CDF of the
1169 /// response distribution, conditioned on the predicted mean.
1170 ///
1171 /// Only implemented for families with continuous response distributions. The variance weights
1172 /// are used to scale the spread for families that use free dispersion. For the Linear model,
1173 /// this is an expensive way of getting $`\frac{(y_i - \hat \mu_i)}{\sqrt{\hat\phi / w_i}} =
1174 /// \frac{(y_i - \hat \mu_i)}{\hat\sigma_i}`$.
1175 ///
1176 /// These residuals are not standardized/studentized in any way, meaning the impact of each
1177 /// observation is present in the corresponding response distribution for that point.
1178 pub fn resid_quantile(&self) -> Array1<F>
1179 where
1180 <M as Response>::DistributionType: ContinuousCDF<f64, f64>,
1181 {
1182 use ndarray::Zip;
1183 use statrs::distribution::Normal;
1184
1185 let std_norm = Normal::standard();
1186 let mu = self.y_hat.mapv(|mu| mu.to_f64().unwrap());
1187 let phi: f64 = self.dispersion().to_f64().unwrap();
1188 // The effective dispersion for each point should be scaled by the variance weight.
1189 // NOTE: The variance weights will not impact the families with no dispersion, as those do
1190 // not use the phi parameter in get_distribution().
1191 let phi_i = self
1192 .data
1193 .get_variance_weights()
1194 .mapv(|w| phi / w.to_f64().unwrap());
1195 Zip::from(&self.data.y)
1196 .and(&mu)
1197 .and(&phi_i)
1198 .map_collect(|y, &mu, &phi_i| {
1199 let dist = M::get_distribution(mu, phi_i);
1200 // This variable should be standard-uniform distributed under data following the model
1201 // assumption
1202 let f = dist.cdf(y.to_f64().unwrap());
1203 F::from_f64(std_norm.inverse_cdf(f)).unwrap()
1204 })
1205 }
1206}
1207
1208#[cfg(test)]
1209mod tests {
1210 use super::*;
1211 use crate::{Linear, Logistic, model::ModelBuilder};
1212 use anyhow::Result;
1213 use approx::assert_abs_diff_eq;
1214 use ndarray::Axis;
1215 use ndarray::{concatenate, s};
1216
1217 /// Checks if the test statistics are invariant based upon whether the data is standardized.
1218 #[test]
1219 fn standardization_invariance() -> Result<()> {
1220 let data_y = array![true, false, false, true, true, true, true, false, true];
1221 let data_x = array![-0.5, 0.3, -0.6, 0.2, 0.3, 1.2, 0.8, 0.6, -0.2].insert_axis(Axis(1));
1222 let lin_off = array![0.1, 0.0, -0.1, 0.2, 0.1, 0.3, 0.4, -0.1, 0.1];
1223 let model = ModelBuilder::<Logistic>::data(&data_y, &data_x)
1224 .no_standardize()
1225 .linear_offset(lin_off.clone())
1226 .build()?;
1227 let fit = model.fit()?;
1228 let model_std = ModelBuilder::<Logistic>::data(&data_y, &data_x)
1229 .linear_offset(lin_off)
1230 .build()?;
1231 let fit_std = model_std.fit()?;
1232 assert_abs_diff_eq!(&fit.result, &fit_std.result, epsilon = 4.0 * f64::EPSILON);
1233 assert_abs_diff_eq!(
1234 fit.covariance()?,
1235 fit_std.covariance()?,
1236 epsilon = 0.01 * f32::EPSILON as f64
1237 );
1238 let lr = fit.lr_test();
1239 let lr_std = fit_std.lr_test();
1240 assert_abs_diff_eq!(lr, lr_std, epsilon = 4.0 * f64::EPSILON);
1241 assert_abs_diff_eq!(
1242 fit.score_test()?,
1243 fit_std.score_test()?,
1244 epsilon = f32::EPSILON as f64
1245 );
1246 assert_abs_diff_eq!(
1247 fit.wald_test(),
1248 fit_std.wald_test(),
1249 epsilon = 4.0 * f64::EPSILON
1250 );
1251 assert_abs_diff_eq!(fit.aic(), fit_std.aic(), epsilon = 4.0 * f64::EPSILON);
1252 assert_abs_diff_eq!(fit.bic(), fit_std.bic(), epsilon = 4.0 * f64::EPSILON);
1253 assert_abs_diff_eq!(
1254 fit.deviance(),
1255 fit_std.deviance(),
1256 epsilon = 4.0 * f64::EPSILON
1257 );
1258 // The Wald Z-score of the intercept term is also invariant, with the new scaling
1259 // approach, so we can compare the full vectors.
1260 assert_abs_diff_eq!(
1261 fit.wald_z()?,
1262 fit_std.wald_z()?,
1263 epsilon = 0.01 * f32::EPSILON as f64
1264 );
1265 // try p-values
1266 #[cfg(feature = "stats")]
1267 assert_abs_diff_eq!(
1268 fit.pvalue_lr_test(),
1269 fit_std.pvalue_lr_test(),
1270 epsilon = 0.01 * f32::EPSILON as f64
1271 );
1272 #[cfg(feature = "stats")]
1273 assert_abs_diff_eq!(
1274 fit.pvalue_wald()?,
1275 fit_std.pvalue_wald()?,
1276 epsilon = 0.01 * f32::EPSILON as f64
1277 );
1278 #[cfg(feature = "stats")]
1279 assert_abs_diff_eq!(
1280 fit.pvalue_exact()?,
1281 fit_std.pvalue_exact()?,
1282 epsilon = 0.01 * f32::EPSILON as f64
1283 );
1284
1285 // Ensure that the score and fisher functions are identical even when evaluated at another
1286 // point. The fit results are near [0.5, 0.5], so pick somewhere not too close.
1287 let other = array![-0.5, 2.0];
1288 assert_abs_diff_eq!(fit.score(other.clone()), fit_std.score(other.clone()));
1289 assert_abs_diff_eq!(fit.fisher(&other), fit_std.fisher(&other.clone()));
1290
1291 Ok(())
1292 }
1293
1294 #[test]
1295 fn null_model() -> Result<()> {
1296 let data_y = array![true, false, false, true, true];
1297 let data_x: Array2<f64> = array![[], [], [], [], []];
1298 let model = ModelBuilder::<Logistic>::data(&data_y, &data_x).build()?;
1299 let fit = model.fit()?;
1300 dbg!(fit.n_iter);
1301 dbg!(&fit.result);
1302 // with no offsets, the result should be the link function of the mean.
1303 assert_abs_diff_eq!(
1304 fit.result[0],
1305 <Logistic as Glm>::Link::func(0.6),
1306 epsilon = 4.0 * f64::EPSILON
1307 );
1308 let empty_null_like = fit.null_like();
1309 assert_eq!(fit.test_ndf(), 0);
1310 dbg!(&fit.model_like);
1311 let lr = fit.lr_test();
1312 // Since there is no data, the null likelihood should be identical to
1313 // the fit likelihood, so the likelihood ratio test should yield zero.
1314 assert_abs_diff_eq!(lr, 0., epsilon = 4. * f64::EPSILON);
1315
1316 // Check that the assertions still hold if linear offsets are included.
1317 let lin_off: Array1<f64> = array![0.2, -0.1, 0.1, 0.0, 0.1];
1318 let model = ModelBuilder::<Logistic>::data(&data_y, &data_x)
1319 .linear_offset(lin_off)
1320 .build()?;
1321 let fit_off = model.fit()?;
1322 let empty_model_like_off = fit_off.model_like;
1323 let empty_null_like_off = fit_off.null_like();
1324 // these two assertions should be equivalent
1325 assert_abs_diff_eq!(fit_off.lr_test(), 0.);
1326 assert_abs_diff_eq!(empty_model_like_off, empty_null_like_off);
1327
1328 // check consistency with data provided
1329 let data_x_with = array![[0.5], [-0.2], [0.3], [0.4], [-0.1]];
1330 let model = ModelBuilder::<Logistic>::data(&data_y, &data_x_with).build()?;
1331 let fit_with = model.fit()?;
1332 dbg!(&fit_with.result);
1333 // The null likelihood of the model with parameters should be the same
1334 // as the likelihood of the model with only the intercept.
1335 assert_abs_diff_eq!(empty_null_like, fit_with.null_like());
1336
1337 Ok(())
1338 }
1339
1340 #[test]
1341 fn null_like_logistic() -> Result<()> {
1342 // 6 true and 4 false for y_bar = 0.6.
1343 let data_y = array![
1344 true, true, true, true, true, true, false, false, false, false
1345 ];
1346 let ybar: f64 = 0.6;
1347 let data_x = array![0.4, 0.2, 0.5, 0.1, 0.6, 0.7, 0.3, 0.8, -0.1, 0.1].insert_axis(Axis(1));
1348 let model = ModelBuilder::<Logistic>::data(&data_y, &data_x).build()?;
1349 let fit = model.fit()?;
1350 let target_null_like = fit
1351 .data
1352 .y
1353 .mapv(|y| {
1354 let eta = (ybar / (1. - ybar)).ln();
1355 y * eta - eta.exp().ln_1p()
1356 })
1357 .sum();
1358 assert_abs_diff_eq!(fit.null_like(), target_null_like);
1359 Ok(())
1360 }
1361
1362 // Check that the deviance is equal to the sum of square deviations for a linear model
1363 #[test]
1364 fn deviance_linear() -> Result<()> {
1365 let data_y = array![0.3, -0.2, 0.5, 0.7, 0.2, 1.4, 1.1, 0.2];
1366 let data_x = array![0.6, 2.1, 0.4, -3.2, 0.7, 0.1, -0.3, 0.5].insert_axis(Axis(1));
1367 let model = ModelBuilder::<Linear>::data(&data_y, &data_x).build()?;
1368 let fit = model.fit()?;
1369 // The predicted values of Y given the model.
1370 let pred_y = fit.predict(&data_x, None);
1371 let target_dev = (data_y - pred_y).mapv(|dy| dy * dy).sum();
1372 assert_abs_diff_eq!(fit.deviance(), target_dev, epsilon = 4. * f64::EPSILON);
1373 Ok(())
1374 }
1375
1376 // Check that the deviance and dispersion parameter are equal up to the number of degrees of
1377 // freedom for a linea model.
1378 #[test]
1379 fn deviance_dispersion_eq_linear() -> Result<()> {
1380 let data_y = array![0.2, -0.1, 0.4, 1.3, 0.2, -0.6, 0.9];
1381 let data_x = array![
1382 [0.4, 0.2],
1383 [0.1, 0.4],
1384 [-0.1, 0.3],
1385 [0.5, 0.7],
1386 [0.4, 0.1],
1387 [-0.2, -0.3],
1388 [0.4, -0.1]
1389 ];
1390 let model = ModelBuilder::<Linear>::data(&data_y, &data_x).build()?;
1391 let fit = model.fit()?;
1392 let dev = fit.deviance();
1393 let disp = fit.dispersion();
1394 let ndf = fit.ndf();
1395 assert_abs_diff_eq!(dev, disp * ndf, epsilon = 4. * f64::EPSILON);
1396 Ok(())
1397 }
1398
1399 // Check that the residuals for a linear model are all consistent.
1400 #[test]
1401 fn residuals_linear() -> Result<()> {
1402 let data_y = array![0.1, -0.3, 0.7, 0.2, 1.2, -0.4];
1403 let data_x = array![0.4, 0.1, 0.3, -0.1, 0.5, 0.6].insert_axis(Axis(1));
1404 let weights = array![0.8, 1.2, 0.9, 0.8, 1.1, 0.9];
1405 // the implied variances from the weights
1406 let wgt_sigmas = weights.map(|w: &f64| 1. / w.sqrt());
1407 let model = ModelBuilder::<Linear>::data(&data_y, &data_x)
1408 .var_weights(weights.clone())
1409 .build()?;
1410 let fit = model.fit()?;
1411 let response = fit.resid_resp();
1412 let resp_scaled = &response / wgt_sigmas;
1413 let pearson = fit.resid_pear();
1414 let deviance = fit.resid_dev();
1415 assert_abs_diff_eq!(resp_scaled, pearson);
1416 assert_abs_diff_eq!(resp_scaled, deviance);
1417 let pearson_std = fit.resid_pear_std()?;
1418 let deviance_std = fit.resid_dev_std()?;
1419 assert_abs_diff_eq!(pearson_std, deviance_std, epsilon = 8. * f64::EPSILON);
1420 // The externally-studentized residuals aren't expected to match the internally-studentized
1421 // ones.
1422 let dev_terms_loo = fit.deviance_terms_loo()?;
1423 let disp_terms_loo = fit.dispersion_loo()?;
1424 let student = fit.resid_student()?;
1425
1426 // NOTE: Studentization can't be checked directly in general because the method used is a
1427 // one-step approximation, however it should be exact in the linear OLS case.
1428 let n_data = data_y.len();
1429 // Check that the leave-one-out stats hold literally
1430 let mut loo_diff: Vec<f64> = Vec::new();
1431 let mut loo_dev_res: Vec<f64> = Vec::new();
1432 let mut loo_disp: Vec<f64> = Vec::new();
1433 for i in 0..n_data {
1434 let ya = data_y.slice(s![0..i]);
1435 let yb = data_y.slice(s![i + 1..]);
1436 let xa = data_x.slice(s![0..i, ..]);
1437 let xb = data_x.slice(s![i + 1.., ..]);
1438 let wa = weights.slice(s![0..i]);
1439 let wb = weights.slice(s![i + 1..]);
1440 let y_loo = concatenate![Axis(0), ya, yb];
1441 let x_loo = concatenate![Axis(0), xa, xb];
1442 let w_loo = concatenate![Axis(0), wa, wb];
1443 let model_i = ModelBuilder::<Linear>::data(&y_loo, &x_loo)
1444 .var_weights(w_loo)
1445 .build()?;
1446 let fit_i = model_i.fit()?;
1447 let yi = data_y[i];
1448 let xi = data_x.slice(s![i..i + 1, ..]);
1449 let wi = weights[i];
1450 let yi_pred: f64 = fit_i.predict(&xi, None)[0];
1451 let disp_i = fit_i.dispersion();
1452 let var_i = disp_i / wi;
1453 let diff_i = yi - yi_pred;
1454 let res_dev_i = diff_i / var_i.sqrt();
1455 loo_diff.push(wi * diff_i * diff_i);
1456 loo_disp.push(disp_i);
1457 loo_dev_res.push(res_dev_i);
1458 }
1459 let loo_diff: Array1<f64> = loo_diff.into();
1460 let loo_disp: Array1<f64> = loo_disp.into();
1461 let loo_dev_res: Array1<f64> = loo_dev_res.into();
1462 assert_abs_diff_eq!(dev_terms_loo, loo_diff, epsilon = 8. * f64::EPSILON);
1463 assert_abs_diff_eq!(disp_terms_loo, loo_disp, epsilon = 8. * f64::EPSILON);
1464 assert_abs_diff_eq!(student, loo_dev_res, epsilon = 8. * f64::EPSILON);
1465 Ok(())
1466 }
1467
1468 // check the null likelihood for the case where it can be counted exactly.
1469 #[test]
1470 fn null_like_linear() -> Result<()> {
1471 let data_y = array![0.3, -0.2, 0.5, 0.7, 0.2, 1.4, 1.1, 0.2];
1472 let data_x = array![0.6, 2.1, 0.4, -3.2, 0.7, 0.1, -0.3, 0.5].insert_axis(Axis(1));
1473 let ybar: f64 = data_y.mean().unwrap();
1474 let model = ModelBuilder::<Linear>::data(&data_y, &data_x).build()?;
1475 let fit = model.fit()?;
1476 // let target_null_like = data_y.mapv(|y| -0.5 * (y - ybar) * (y - ybar)).sum();
1477 let target_null_like = data_y.mapv(|y| y * ybar - 0.5 * ybar * ybar).sum();
1478 // With the saturated likelihood subtracted the null likelihood should
1479 // just be the sum of squared differences from the mean.
1480 // let target_null_like = 0.;
1481 // dbg!(target_null_like);
1482 let fit_null_like = fit.null_like();
1483 assert_abs_diff_eq!(2. * (fit.model_like - fit_null_like), fit.lr_test());
1484 assert_eq!(fit.test_ndf(), 1);
1485 assert_abs_diff_eq!(
1486 fit_null_like,
1487 target_null_like,
1488 epsilon = 4.0 * f64::EPSILON
1489 );
1490 Ok(())
1491 }
1492
1493 // check the leave-one-out one-step for the linear model
1494 #[test]
1495 fn loo_linear() -> Result<()> {
1496 let data_y = array![0.1, -0.3, 0.7, 0.2, 1.2, -0.4];
1497 let data_x = array![0.4, 0.1, 0.3, -0.1, 0.5, 0.6].insert_axis(Axis(1));
1498 let weights = array![1.0, 1.2, 0.8, 1.1, 1.0, 0.7];
1499 let model = ModelBuilder::<Linear>::data(&data_y, &data_x)
1500 .var_weights(weights.clone())
1501 .build()?;
1502 let fit = model.fit()?;
1503
1504 let loo_exact = fit.loo_exact()?;
1505
1506 let loo_coef: Array2<f64> = fit.infl_coef()?;
1507 let loo_results = &fit.result - loo_coef;
1508 let n_data = data_y.len();
1509 for i in 0..n_data {
1510 let ya = data_y.slice(s![0..i]);
1511 let yb = data_y.slice(s![i + 1..]);
1512 let xa = data_x.slice(s![0..i, ..]);
1513 let xb = data_x.slice(s![i + 1.., ..]);
1514 let wa = weights.slice(s![0..i]);
1515 let wb = weights.slice(s![i + 1..]);
1516 let y_loo = concatenate![Axis(0), ya, yb];
1517 let x_loo = concatenate![Axis(0), xa, xb];
1518 let w_loo = concatenate![Axis(0), wa, wb];
1519 let model_i = ModelBuilder::<Linear>::data(&y_loo, &x_loo)
1520 .var_weights(w_loo)
1521 .build()?;
1522 let fit_i = model_i.fit()?;
1523 assert_abs_diff_eq!(
1524 loo_exact.row(i),
1525 &fit_i.result,
1526 epsilon = f32::EPSILON as f64
1527 );
1528 assert_abs_diff_eq!(
1529 loo_results.row(i),
1530 &fit_i.result,
1531 epsilon = f32::EPSILON as f64
1532 );
1533 }
1534 Ok(())
1535 }
1536
1537 // check the leave-one-out one-step for the logistic model
1538 #[test]
1539 fn loo_logistic() -> Result<()> {
1540 let data_y = array![false, false, true, true, true, false];
1541 let data_x = array![0.4, 0.1, 0.3, -0.1, 0.5, 0.6].insert_axis(Axis(1));
1542 let weights = array![1.0, 1.2, 0.8, 1.1, 1.0, 0.7];
1543 let model = ModelBuilder::<Logistic>::data(&data_y, &data_x)
1544 .var_weights(weights.clone())
1545 .build()?;
1546 let fit = model.fit()?;
1547 let fit_reg = model.fit_options().l2_reg(0.5).fit()?;
1548
1549 // NOTE: The one-step approximation fails for non-linear response functions, so we
1550 // should only test the exact case.
1551 let loo_exact = fit.loo_exact()?;
1552 let loo_exact_reg = fit_reg.loo_exact()?;
1553 let n_data = data_y.len();
1554 for i in 0..n_data {
1555 let ya = data_y.slice(s![0..i]);
1556 let yb = data_y.slice(s![i + 1..]);
1557 let xa = data_x.slice(s![0..i, ..]);
1558 let xb = data_x.slice(s![i + 1.., ..]);
1559 let wa = weights.slice(s![0..i]);
1560 let wb = weights.slice(s![i + 1..]);
1561 let y_loo = concatenate![Axis(0), ya, yb];
1562 let x_loo = concatenate![Axis(0), xa, xb];
1563 let w_loo = concatenate![Axis(0), wa, wb];
1564 let model_i = ModelBuilder::<Logistic>::data(&y_loo, &x_loo)
1565 .var_weights(w_loo)
1566 .build()?;
1567 let fit_i = model_i.fit()?;
1568 let fit_i_reg = model_i.fit_options().l2_reg(0.5).fit()?;
1569 assert_abs_diff_eq!(
1570 loo_exact.row(i),
1571 &fit_i.result,
1572 epsilon = f32::EPSILON as f64
1573 );
1574 assert_abs_diff_eq!(
1575 loo_exact_reg.row(i),
1576 &fit_i_reg.result,
1577 epsilon = f32::EPSILON as f64
1578 );
1579 }
1580 Ok(())
1581 }
1582
1583 // check the null likelihood where there is no dependence on the X data.
1584 #[test]
1585 fn null_like_logistic_nodep() -> Result<()> {
1586 let data_y = array![true, true, false, false, true, false, false, true];
1587 let data_x = array![0.4, 0.2, 0.4, 0.2, 0.7, 0.7, -0.1, -0.1].insert_axis(Axis(1));
1588 let model = ModelBuilder::<Logistic>::data(&data_y, &data_x).build()?;
1589 let fit = model.fit()?;
1590 let lr = fit.lr_test();
1591 assert_abs_diff_eq!(lr, 0.);
1592 Ok(())
1593 }
1594 // TODO: Test that the statistics behave sensibly under regularization. The
1595 // likelihood ratio test should yield a smaller value.
1596
1597 // Test the basic caching funcions.
1598 #[test]
1599 fn cached_computations() -> Result<()> {
1600 let data_y = array![true, true, false, true, true, false, false, false, true];
1601 let data_x = array![0.4, 0.1, -0.3, 0.7, -0.5, -0.1, 0.8, 1.0, 0.4].insert_axis(Axis(1));
1602 let model = ModelBuilder::<Logistic>::data(&data_y, &data_x).build()?;
1603 let fit = model.fit()?;
1604 let _null_like = fit.null_like();
1605 let _null_like = fit.null_like();
1606 let _cov = fit.covariance()?;
1607 let _wald = fit.wald_z();
1608 Ok(())
1609 }
1610
1611 // Check the consistency of the various statistical tests for linear
1612 // regression, where they should be the most comparable.
1613 #[test]
1614 fn linear_stat_tests() -> Result<()> {
1615 let data_y = array![-0.3, -0.1, 0.0, 0.2, 0.4, 0.5, 0.8, 0.8, 1.1];
1616 let data_x = array![-0.5, -0.2, 0.1, 0.2, 0.5, 0.6, 0.7, 0.9, 1.3].insert_axis(Axis(1));
1617 let model = ModelBuilder::<Linear>::data(&data_y, &data_x).build()?;
1618 let fit = model.fit()?;
1619 let lr = fit.lr_test();
1620 let wald = fit.wald_test();
1621 let score = fit.score_test()?;
1622 let eps = 32.0 * f64::EPSILON;
1623 assert_abs_diff_eq!(lr, wald, epsilon = eps);
1624 assert_abs_diff_eq!(lr, score, epsilon = eps);
1625 // The score vector should be zero at the minimum
1626 assert_abs_diff_eq!(fit.score(fit.result.clone()), array![0., 0.], epsilon = eps,);
1627 Ok(())
1628 }
1629
1630 // The score should be zero at the MLE even with L2 regularization and internal standardization.
1631 #[test]
1632 fn score_zero_at_mle_regularized() -> Result<()> {
1633 let data_y = array![-0.3, -0.1, 0.0, 0.2, 0.4, 0.5, 0.8, 0.8, 1.1];
1634 let data_x = array![-0.5, -0.2, 0.1, 0.2, 0.5, 0.6, 0.7, 0.9, 1.3].insert_axis(Axis(1));
1635 let model = ModelBuilder::<Linear>::data(&data_y, &data_x).build()?;
1636 let fit = model.fit_options().l2_reg(0.1).fit()?;
1637 let eps = 1e-8;
1638 assert_abs_diff_eq!(fit.score(fit.result.clone()), array![0., 0.], epsilon = eps);
1639 // Also check with standardization disabled
1640 let model_nostd = ModelBuilder::<Linear>::data(&data_y, &data_x)
1641 .no_standardize()
1642 .build()?;
1643 let fit_nostd = model_nostd.fit_options().l2_reg(0.1).fit()?;
1644 assert_abs_diff_eq!(
1645 fit_nostd.score(fit_nostd.result.clone()),
1646 array![0., 0.],
1647 epsilon = eps
1648 );
1649 // NOTE: Without regularization, the results themselves will not be exactly identical
1650 // through standardization.
1651 Ok(())
1652 }
1653
1654 #[cfg(feature = "stats")]
1655 #[test]
1656 fn linear_quantile_deviance_equivalence() -> Result<()> {
1657 let data_y = array![-0.5, 0.2, -0.2, 1.2, 0.4, 0.6];
1658 let data_x = array![
1659 [-0.3, 1.0],
1660 [0.2, 0.4],
1661 [-0.5, -0.1],
1662 [-0.2, 0.8],
1663 [0.6, 0.1],
1664 [0.1, -0.4]
1665 ];
1666 let var_weights = array![1.0, 0.2, 2.0, 1.5, 0.7, 1.2];
1667 let model = ModelBuilder::<Linear>::data(&data_y, &data_x)
1668 .var_weights(var_weights.clone())
1669 .build()?;
1670 let fit = model.fit_options().l2_reg(0.25).fit()?;
1671 let phi = fit.dispersion();
1672 let std_devs = var_weights.mapv_into(|w| num_traits::Float::sqrt(phi / w));
1673 let scaled_resid = fit.resid_resp() / std_devs;
1674 let res_qua = fit.resid_quantile();
1675 assert_abs_diff_eq!(scaled_resid, res_qua, epsilon = 0.01 * f32::EPSILON as f64);
1676
1677 Ok(())
1678 }
1679
1680 /// Test that `pvalue_exact` computes a valid intercept p-value by comparing against R's
1681 /// `anova(glm(y ~ x - 1), glm(y ~ x), test=...)`.
1682 ///
1683 /// R reference (linear):
1684 /// ```r
1685 /// y <- c(0.3, 1.5, 0.8, 2.1, 1.7, 3.2, 2.5, 0.9)
1686 /// x1 <- c(0.1, 0.5, 0.2, 0.8, 0.6, 1.1, 0.9, 0.3)
1687 /// x2 <- c(0.4, 0.1, 0.3, 0.7, 0.2, 0.5, 0.8, 0.6)
1688 /// anova(glm(y ~ x1 + x2 - 1), glm(y ~ x1 + x2), test = "F")
1689 /// # Pr(>F) = 0.1203156
1690 /// ```
1691 ///
1692 /// R reference (logistic):
1693 /// ```r
1694 /// y <- c(1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1)
1695 /// x <- c(0.5, -0.3, 0.2, 0.8, 0.1, 0.6, -0.1, -0.4, 0.3, 0.4, 0.7, -0.2)
1696 /// anova(glm(y ~ x - 1, family=binomial()), glm(y ~ x, family=binomial()), test = "Chisq")
1697 /// # Pr(>Chi) = 0.6042003
1698 ///
1699 /// NOTE: This test was generated by claude code, hence the ugly hard-coded values.
1700 /// ```
1701 #[cfg(feature = "stats")]
1702 #[test]
1703 fn pvalue_exact_intercept() -> Result<()> {
1704 use crate::Logistic;
1705
1706 // Linear: intercept F-test should match R's anova() result.
1707 // For Gaussian the exact (F-test) intercept p-value equals the Wald t-test p-value.
1708 let y = array![0.3_f64, 1.5, 0.8, 2.1, 1.7, 3.2, 2.5, 0.9];
1709 let x = array![
1710 [0.1_f64, 0.4],
1711 [0.5, 0.1],
1712 [0.2, 0.3],
1713 [0.8, 0.7],
1714 [0.6, 0.2],
1715 [1.1, 0.5],
1716 [0.9, 0.8],
1717 [0.3, 0.6]
1718 ];
1719 let model = ModelBuilder::<Linear>::data(&y, &x).build()?;
1720 let fit = model.fit()?;
1721 let exact_p = fit.pvalue_exact()?;
1722 assert!(
1723 exact_p[0].is_finite() && exact_p[0] >= 0.0 && exact_p[0] <= 1.0,
1724 "intercept p-value must be in [0, 1]"
1725 );
1726 // R: anova(glm(y ~ x1+x2-1), glm(y ~ x1+x2), test="F") Pr(>F) = 0.1203156
1727 assert_abs_diff_eq!(exact_p[0], 0.1203156, epsilon = 1e-5);
1728
1729 // Logistic: intercept chi-squared test.
1730 let y_bin = array![
1731 true, false, true, true, false, true, false, false, true, false, true, true
1732 ];
1733 let x_bin = array![
1734 0.5_f64, -0.3, 0.2, 0.8, 0.1, 0.6, -0.1, -0.4, 0.3, 0.4, 0.7, -0.2
1735 ]
1736 .insert_axis(Axis(1));
1737 let model_bin = ModelBuilder::<Logistic>::data(&y_bin, &x_bin).build()?;
1738 let fit_bin = model_bin.fit()?;
1739 let exact_p_bin = fit_bin.pvalue_exact()?;
1740 // R: anova(..., test="Chisq") Pr(>Chi) = 0.6042003
1741 assert_abs_diff_eq!(exact_p_bin[0], 0.6042003, epsilon = 1e-4);
1742
1743 Ok(())
1744 }
1745
1746 /// Quantile residuals for Exponential regression are Φ⁻¹(F_Exp(rate=1/μ̂)(y)).
1747 ///
1748 /// Uses the same two-group dataset as the `exp_ex` unit test in `response/exponential.rs`,
1749 /// where the exact MLE is known: group means ȳ₀=2, ȳ₁=4, so μ̂ = [2,2,4,4,4].
1750 /// The expected quantile residuals are computed independently here using the Exp CDF.
1751 #[cfg(feature = "stats")]
1752 #[test]
1753 fn exponential_quantile_residuals() -> Result<()> {
1754 use crate::Exponential;
1755 use statrs::distribution::{ContinuousCDF, Exp, Normal};
1756
1757 let data_y = array![1.0_f64, 3.0, 2.0, 4.0, 6.0];
1758 let data_x = array![[0.0_f64], [0.], [1.0], [1.0], [1.0]];
1759 let model = ModelBuilder::<Exponential>::data(&data_y, &data_x).build()?;
1760 let fit = model.fit()?;
1761
1762 // Exact MLE: μ̂ = [2, 2, 4, 4, 4]. Compute expected q_i = Φ⁻¹(F_Exp(rate=1/μ)(y)).
1763 let mu_hat = array![2.0_f64, 2.0, 4.0, 4.0, 4.0];
1764 let std_norm = Normal::standard();
1765 let expected = Array1::from_iter(data_y.iter().zip(mu_hat.iter()).map(|(&y, &mu)| {
1766 let dist = Exp::new(1.0 / mu).unwrap();
1767 std_norm.inverse_cdf(dist.cdf(y))
1768 }));
1769
1770 assert_abs_diff_eq!(fit.resid_quantile(), expected, epsilon = 1e-10);
1771 Ok(())
1772 }
1773
1774 /// Quantile residuals for Gamma regression are Φ⁻¹(F_Gamma(α=1/φ̂, β=α/μ̂)(y)).
1775 ///
1776 /// Uses the same two-group dataset as the `gamma_ex` unit test in `response/gamma.rs`,
1777 /// where the exact MLE is known: group means ȳ₀=2, ȳ₁=4, so μ̂ = [2,2,4,4,4].
1778 /// Expected quantile residuals are independently computed here using the Gamma CDF with
1779 /// the dispersion estimate from the fit.
1780 #[cfg(feature = "stats")]
1781 #[test]
1782 fn gamma_quantile_residuals() -> Result<()> {
1783 use crate::Gamma;
1784 use statrs::distribution::{ContinuousCDF, Gamma as GammaDist, Normal};
1785
1786 let data_y = array![1.0_f64, 3.0, 2.0, 4.0, 6.0];
1787 let data_x = array![[0.0_f64], [0.], [1.0], [1.0], [1.0]];
1788 let model = ModelBuilder::<Gamma>::data(&data_y, &data_x).build()?;
1789 let fit = model.fit()?;
1790
1791 // Exact MLE: μ̂ = [2, 2, 4, 4, 4]. Compute expected q_i = Φ⁻¹(F_Gamma(α, β)(y))
1792 // where α = 1/φ̂ and β = α/μ̂ (rate parameterization: mean = α/β = μ).
1793 let phi: f64 = fit.dispersion();
1794 let alpha = 1.0 / phi;
1795 let mu_hat = array![2.0_f64, 2.0, 4.0, 4.0, 4.0];
1796 let std_norm = Normal::standard();
1797 let expected = Array1::from_iter(data_y.iter().zip(mu_hat.iter()).map(|(&y, &mu)| {
1798 let dist = GammaDist::new(alpha, alpha / mu).unwrap();
1799 std_norm.inverse_cdf(dist.cdf(y))
1800 }));
1801
1802 assert_abs_diff_eq!(fit.resid_quantile(), expected, epsilon = 1e-10);
1803 Ok(())
1804 }
1805}