ndarray_glm/fit.rs
1//! Stores the fit results of the IRLS regression and provides functions that
2//! depend on the MLE estimate. These include statistical tests for goodness-of-fit.
3
4pub mod options;
5#[cfg(feature = "stats")]
6use crate::response::Response;
7use crate::{
8 Linear,
9 data::{Dataset, one_pad},
10 error::RegressionResult,
11 glm::{DispersionType, Glm},
12 irls::{Irls, IrlsStep},
13 link::{Link, Transform},
14 model::Model,
15 num::Float,
16 regularization::IrlsReg,
17};
18use ndarray::{Array1, Array2, ArrayBase, ArrayView1, Axis, Data, Ix2, array, s};
19use ndarray_linalg::InverseInto;
20use once_cell::unsync::OnceCell; // can be replaced by std::cell::OnceCell upon stabilization
21use options::FitOptions;
22#[cfg(feature = "stats")]
23use statrs::distribution::ContinuousCDF;
24use std::marker::PhantomData;
25
26/// the result of a successful GLM fit
27pub struct Fit<'a, M, F>
28where
29 M: Glm,
30 F: Float,
31{
32 model: PhantomData<M>,
33 /// The data and model specification used in the fit.
34 data: &'a Dataset<F>,
35 /// The parameter values that maximize the likelihood as given by the IRLS regression. If the
36 /// dataset was internally standardized, this is transformed back
37 pub result: Array1<F>,
38 /// The parameter values used internally for the standardized data.
39 result_std: Array1<F>,
40 /// The predicted y-values for the training data.
41 y_hat: Array1<F>,
42 /// The options used for this fit.
43 pub options: FitOptions<F>,
44 /// The value of the likelihood function for the fit result.
45 pub model_like: F,
46 /// The regularizer of the fit
47 reg: Box<dyn IrlsReg<F>>,
48 /// The number of overall iterations taken in the IRLS.
49 pub n_iter: usize,
50 /// The history of guesses and likelihoods over the IRLS iterations.
51 pub history: Vec<IrlsStep<F>>,
52 /// The number of parameters
53 n_par: usize,
54 // The remaining variables hold cached results and matrices
55 /// The estimated dispersion parameter, which is called in many places. For some families this
56 /// is just one.
57 phi: OnceCell<F>,
58 /// The Pearson residuals, a common statistic that is re-used in several other quantities.
59 resid_pear: OnceCell<Array1<F>>,
60 /// The covariance matrix, using the sandwich approach.
61 covariance: OnceCell<Array2<F>>,
62 /// The inverse of the regularized Fisher information matrix in the external non-standardized
63 /// parameter basis. Used as a component of the sandwich covariance and for influence
64 /// calculations. Cached on first access.
65 fisher_inv: OnceCell<Array2<F>>,
66 /// The inverse of the fisher matrix in standardized internal parameters.
67 fisher_std_inv: OnceCell<Array2<F>>,
68 /// The hat matrix of the data and fit. Since the calculation requires a matrix inversion of
69 /// the fisher information, it is computed only when needed and the value is cached. Access
70 /// through the `hat()` function.
71 hat: OnceCell<Array2<F>>,
72 /// The likelihood and parameters for the null model.
73 null_model: OnceCell<(F, Array1<F>)>,
74}
75
76impl<'a, M, F> Fit<'a, M, F>
77where
78 M: Glm,
79 F: 'static + Float,
80{
81 /// Returns the Akaike information criterion for the model fit.
82 ///
83 /// ```math
84 /// \text{AIC} = D + 2K - 2\sum_{i} \ln w_{i}
85 /// ```
86 ///
87 /// where $`D`$ is the deviance, $`K`$ is the number of parameters, and $`w_i`$ are the
88 /// variance weights.
89 /// This is unique only to an additive constant, so only differences in AIC are meaningful.
90 pub fn aic(&self) -> F {
91 let log_weights = self.data.get_variance_weights().mapv(num_traits::Float::ln);
92 let sum_log_weights = self.data.freq_sum(&log_weights);
93 // NOTE: This is now the unregularized deviance.
94 self.deviance() + F::two() * self.rank() - F::two() * sum_log_weights
95 }
96
97 /// Returns the Bayesian information criterion for the model fit.
98 ///
99 /// ```math
100 /// \text{BIC} = K \ln(n) - 2l - 2\sum_{i} \ln w_{i}
101 /// ```
102 ///
103 /// where $`K`$ is the number of parameters, $`n`$ is the number of observations, and $`l`$ is
104 /// the log-likelihood (including the variance weight normalization terms).
105 // TODO: Also consider the effect of regularization on this statistic.
106 // TODO: Wikipedia suggests that the variance should included in the number
107 // of parameters for multiple linear regression. Should an additional
108 // parameter be included for the dispersion parameter? This question does
109 // not affect the difference between two models fit with the methodology in
110 // this package.
111 pub fn bic(&self) -> F {
112 let log_weights = self.data.get_variance_weights().mapv(num_traits::Float::ln);
113 let sum_log_weights = self.data.freq_sum(&log_weights);
114 let logn = num_traits::Float::ln(self.data.n_obs());
115 logn * self.rank() - F::two() * self.model_like - F::two() * sum_log_weights
116 }
117
118 /// The Cook's distance for each observation, which measures how much the predicted values
119 /// change when leaving out each observation.
120 ///
121 /// ```math
122 /// C_i = \frac{r_i^2 \, h_i}{K \, \hat\phi \, (1 - h_i)^2}
123 /// ```
124 ///
125 /// where $`r_i`$ is the Pearson residual, $`h_i`$ is the leverage, $`K`$ is the rank
126 /// (number of parameters), and $`\hat\phi`$ is the estimated dispersion.
127 pub fn cooks(&self) -> RegressionResult<Array1<F>, F> {
128 let hat = self.leverage()?;
129 let pear_sq = self.resid_pear().mapv(|r| r * r);
130 let h_terms: Array1<F> = hat.mapv_into(|h| {
131 let omh = F::one() - h;
132 h / (omh * omh)
133 });
134 let denom: F = self.rank() * self.dispersion();
135 Ok(pear_sq * h_terms / denom)
136 }
137
138 /// The covariance matrix of the parameter estimates. When no regularization is used, this is:
139 ///
140 /// ```math
141 /// \text{Cov}[\hat{\boldsymbol\beta}] = \hat\phi \, (\mathbf{X}^\mathsf{T}\mathbf{WSX})^{-1}
142 /// ```
143 ///
144 /// When regularization is active, the sandwich form is used to correctly account for the bias
145 /// introduced by the penalty:
146 ///
147 /// ```math
148 /// \text{Cov}[\hat{\boldsymbol\beta}] = \hat\phi \, \mathcal{I}_\text{reg}^{-1} \, \mathcal{I}_\text{data} \, \mathcal{I}_\text{reg}^{-1}
149 /// ```
150 ///
151 /// where $`\mathcal{I}_\text{reg}`$ is the regularized Fisher information and
152 /// $`\mathcal{I}_\text{data}`$ is the unregularized (data-only) Fisher information.
153 /// When unregularized, $`\mathcal{I}_\text{reg} = \mathcal{I}_\text{data}`$ and this reduces
154 /// to the standard form. The result is cached on first access.
155 pub fn covariance(&self) -> RegressionResult<&Array2<F>, F> {
156 self.covariance.get_or_try_init(|| {
157 // The covariance must be multiplied by the dispersion parameter.
158 // For logistic/poisson regression, this is identically 1.
159 // For linear/gamma regression it is estimated from the data.
160 let phi: F = self.dispersion();
161 // NOTE: invh()/invh_into() are bugged and give incorrect values!
162 let f_reg_inv: Array2<F> = self.fisher_inv()?.to_owned();
163 // Use the sandwich form so that regularization doesn't artificially deflate uncertainty.
164 // When unregularized, F_reg = F_data and this reduces to F_data^{-1}.
165 // The unregularized fisher matrix is most easily acquired in terms of the standardized
166 // variables, so apply the external transformation to it.
167 let f_data = self
168 .data
169 .inverse_transform_fisher(self.fisher_data_std(&self.result_std));
170 Ok(f_reg_inv.dot(&f_data).dot(&f_reg_inv) * phi)
171 })
172 }
173
174 /// Returns the deviance of the fit:
175 ///
176 /// ```math
177 /// D = -2 \left[ l(\hat{\boldsymbol\beta}) - l_\text{sat} \right]
178 /// ```
179 ///
180 /// Asymptotically $`\chi^2`$-distributed with [`ndf()`](Self::ndf) degrees of freedom.
181 /// The unregularized likelihood is used.
182 pub fn deviance(&self) -> F {
183 let terms = self.deviance_terms();
184 self.data.freq_sum(&terms)
185 }
186
187 /// Returns the contribution to the deviance from each observation. The total deviance should
188 /// be the sum of all of these. Variance weights are already included, but not frequency
189 /// weights.
190 fn deviance_terms(&self) -> Array1<F> {
191 let ll_terms: Array1<F> = M::log_like_terms(self.data, &self.result_std);
192 let ll_sat: Array1<F> = self.data.y.mapv(M::log_like_sat);
193 let terms = (ll_sat - ll_terms) * F::two();
194 self.data.apply_var_weights(terms)
195 }
196
197 /// Returns the self-excluded deviance terms, i.e. the deviance of an observation as if the
198 /// model was fit without it. This is a one-step approximation.
199 fn deviance_terms_loo(&self) -> RegressionResult<Array1<F>, F> {
200 let dev_terms = self.deviance_terms();
201 let pear_sq = self.resid_pear().mapv(|r| r * r);
202 let hat_rat = self.leverage()?.mapv(|h| h / (F::one() - h));
203 let result = dev_terms + &hat_rat * (&hat_rat + F::two()) * pear_sq;
204 Ok(result)
205 }
206
207 /// The dispersion parameter $`\hat\phi`$ relating the variance to the variance function:
208 /// $`\text{Var}[y] = \phi \, V(\mu)`$.
209 ///
210 /// Identically one for logistic, binomial, and Poisson regression.
211 /// For families with a free dispersion (linear, gamma), estimated as:
212 ///
213 /// ```math
214 /// \hat\phi = \frac{D}{\left(1 - \frac{K}{n_\text{eff}}\right) \sum_i w_i}
215 /// ```
216 ///
217 /// which reduces to $`D / (N - K)`$ without variance weights.
218 pub fn dispersion(&self) -> F {
219 *self.phi.get_or_init(|| {
220 use DispersionType::*;
221 match M::DISPERSED {
222 FreeDispersion => {
223 let dev = self.deviance();
224 let p = self.rank();
225 let n_eff = self.data.n_eff();
226 let scaling = if p >= n_eff {
227 // This is the overparameterized regime, which is checked directly instead of
228 // allowing negative values. It's not clear what conditions result in this when
229 // p < N.
230 F::zero()
231 } else {
232 (F::one() - p / n_eff) * self.data.sum_weights()
233 };
234 dev / scaling
235 }
236 NoDispersion => F::one(),
237 }
238 })
239 }
240
241 /// Return the dispersion terms with the observation(s) at each point excluded from the fit.
242 fn dispersion_loo(&self) -> RegressionResult<Array1<F>, F> {
243 use DispersionType::*;
244 match M::DISPERSED {
245 FreeDispersion => {
246 let pear_sq = self.resid_pear().mapv(|r| r * r);
247 let hat_rat = self.leverage()?.mapv(|h| h / (F::one() - h));
248 let terms = self.deviance_terms() + hat_rat * pear_sq;
249 // Don't apply total weights since the variance weights are already
250 // included in the residual terms. However, we do need the frequency weights.
251 let terms = self.data.apply_freq_weights(terms);
252 let total: Array1<F> = -terms + self.deviance();
253 let scaled_total: Array1<F> = match &self.data.weights {
254 None => match &self.data.freqs {
255 Some(f) => total / -(f - self.ndf()),
256 None => total / (self.ndf() - F::one()),
257 },
258 Some(w) => {
259 let v1 = self.data.freq_sum(w);
260 let w2 = w * w;
261 let v2 = self.data.freq_sum(&w2);
262 // The subtracted out terms need the frequency terms as well
263 let f_w = self.data.apply_freq_weights(w.clone());
264 let f_w2 = self.data.apply_freq_weights(w2);
265 // the modifed sums from leaving out the ith observation
266 let v1p = -f_w + v1;
267 let v2p = -f_w2 + v2;
268 let p = self.rank();
269 let scale = &v1p - v2p / &v1p * p;
270 total / scale
271 }
272 };
273 Ok(scaled_total)
274 }
275 NoDispersion => Ok(Array1::<F>::ones(self.data.y.len())),
276 }
277 }
278
279 /// Returns the Fisher information (the negative Hessian of the log-likelihood) at the
280 /// parameter values given:
281 ///
282 /// ```math
283 /// \mathcal{I}(\boldsymbol\beta) = \mathbf{X}^\mathsf{T}\mathbf{W}\eta'^2\mathbf{S}\mathbf{X}
284 /// ```
285 ///
286 /// where $`\mathbf{S} = \text{diag}(V(\mu_i))`$ and $`\eta'`$ is the derivative of the natural
287 /// parameter in terms of the linear predictor ($`\eta(\omega) = g_0(g^{-1}(\omega))`$ where
288 /// $`g_0`$ is the canonical link function).
289 /// The regularization is included.
290 pub fn fisher(&self, params: &Array1<F>) -> Array2<F> {
291 // Note that fisher() is a public function so it should take the external parameters.
292 let params = self.data.transform_beta(params.clone());
293 // We actually need to futher apply the transformation beyond this, so that *arrays
294 // multiplying this resulting matrix* are hit the right way.
295 let fish_std = self.fisher_std(¶ms);
296 self.data.inverse_transform_fisher(fish_std)
297 }
298
299 /// The inverse of the (regularized) fisher information matrix, in the external parameter
300 /// basis. Used for the influence calculations, and as a component of the sandwich covariance.
301 /// Cached on first access.
302 fn fisher_inv(&self) -> RegressionResult<&Array2<F>, F> {
303 self.fisher_inv.get_or_try_init(|| {
304 let fisher_reg = self.fisher(&self.result);
305 // NOTE: invh/invh_into() are bugged and incorrect!
306 let fish_inv = fisher_reg.inv_into()?;
307 Ok(fish_inv)
308 })
309 }
310
311 /// Compute the data-only (unregularized) Fisher information in the internal standardized basis.
312 fn fisher_data_std(&self, params: &Array1<F>) -> Array2<F> {
313 let lin_pred: Array1<F> = self.data.linear_predictor_std(params);
314 let adj_var: Array1<F> = M::adjusted_variance_diag(&lin_pred);
315 (self.data.x_conj() * &adj_var).dot(&self.data.x)
316 }
317
318 /// Compute the fisher information in terms of the internal (likely standardized)
319 /// coefficients. The regularization is included here.
320 fn fisher_std(&self, params: &Array1<F>) -> Array2<F> {
321 let fisher = self.fisher_data_std(params);
322 self.reg.as_ref().irls_mat(fisher, params)
323 }
324
325 /// The inverse of the (regularized) fisher information matrix, in the internal standardized
326 /// parameter basis.
327 fn fisher_std_inv(&self) -> RegressionResult<&Array2<F>, F> {
328 self.fisher_std_inv.get_or_try_init(|| {
329 let fisher_reg = self.fisher_std(&self.result_std);
330 // NOTE: invh/invh_into() are bugged and incorrect!
331 let fish_inv = fisher_reg.inv_into()?;
332 Ok(fish_inv)
333 })
334 }
335
336 /// Returns the hat matrix, also known as the "projection" or "influence" matrix:
337 ///
338 /// ```math
339 /// P_{ij} = \frac{\partial \hat{y}_i}{\partial y_j}
340 /// ```
341 ///
342 /// Orthogonal to the response residuals at the fit result: $`\mathbf{P}(\mathbf{y} -
343 /// \hat{\mathbf{y}}) = 0`$.
344 /// This version is not symmetric, but the diagonal is invariant to this choice of convention.
345 pub fn hat(&self) -> RegressionResult<&Array2<F>, F> {
346 self.hat.get_or_try_init(|| {
347 // Do the full computation in terms of the internal parameters, since this observable
348 // is not sensitive to the choice of basis.
349 let lin_pred = self.data.linear_predictor_std(&self.result_std);
350 // Apply the eta' terms manually instead of calling adjusted_variance_diag, because the
351 // adjusted variance method applies 2 powers to the variance, while we want one power
352 // to the variance and one to the weights.
353 // let adj_var = M::adjusted_variance_diag(&lin_pred);
354
355 let mu = M::mean(&lin_pred);
356 let var = mu.mapv_into(M::variance);
357 let eta_d = M::Link::d_nat_param(&lin_pred);
358
359 let fisher_inv = self.fisher_std_inv()?;
360
361 // the GLM variance and the data weights are put on different sides in this convention
362 let left = (var * &eta_d).insert_axis(Axis(1)) * &self.data.x;
363 let right = self.data.x_conj() * &eta_d;
364 Ok(left.dot(&fisher_inv.dot(&right)))
365 })
366 }
367
368 /// The one-step approximation to the change in coefficients from excluding each observation:
369 ///
370 /// ```math
371 /// \Delta\boldsymbol\beta^{(-i)} \approx \frac{1}{1-h_i}\,\mathcal{I}^{-1}\mathbf{x}^{(i)} w_i \eta'_i e_i
372 /// ```
373 ///
374 /// Each row $`i`$ should be subtracted from $`\hat{\boldsymbol\beta}`$ to approximate
375 /// the coefficients that would result from excluding observation $`i`$.
376 /// Exact for linear models; a one-step approximation for nonlinear models.
377 pub fn infl_coef(&self) -> RegressionResult<Array2<F>, F> {
378 // The linear predictor can be acquired in terms of the standardized parameters, but the
379 // rest of the computation should use external.
380 let lin_pred = self.data.linear_predictor_std(&self.result_std);
381 let resid_resp = self.resid_resp();
382 let omh = -self.leverage()? + F::one();
383 let resid_adj = M::Link::adjust_errors(resid_resp, &lin_pred) / omh;
384 let xte = self.data.x_conj_ext() * resid_adj;
385 let fisher_inv = self.fisher_inv()?;
386 let delta_b = xte.t().dot(fisher_inv);
387 Ok(delta_b)
388 }
389
390 /// Returns the leverage $`h_i = P_{ii}`$ for each observation: the diagonal of the hat matrix.
391 /// Indicates the sensitivity of each prediction to its corresponding observation.
392 pub fn leverage(&self) -> RegressionResult<Array1<F>, F> {
393 let hat = self.hat()?;
394 Ok(hat.diag().to_owned())
395 }
396
397 /// Returns exact coefficients from leaving each observation out, one-at-a-time.
398 /// This is a much more expensive operation than the original regression because a new one is
399 /// performed for each observation.
400 pub fn loo_exact(&self) -> RegressionResult<Array2<F>, F> {
401 // Use the one-step approximation to get good starting points for each exclusion.
402 let loo_coef: Array2<F> = self.infl_coef()?;
403 // NOTE: These coefficients need to be in terms of external parameters
404 let loo_initial = &self.result - loo_coef;
405 let mut loo_result = loo_initial.clone();
406 let n_obs = self.data.y.len();
407 for i in 0..n_obs {
408 let data_i: Dataset<F> = {
409 // Get the proper X data matrix as it existed before standardization and
410 // intercept-padding.
411 let x_i = self.data.x_orig();
412 // Leave the observation out by setting the frequency weight to zero.
413 let mut freqs_i: Array1<F> =
414 self.data.freqs.clone().unwrap_or(Array1::<F>::ones(n_obs));
415 freqs_i[i] = F::zero();
416 let mut data_i = Dataset {
417 y: self.data.y.clone(),
418 x: x_i,
419 linear_offset: self.data.linear_offset.clone(),
420 weights: self.data.weights.clone(),
421 freqs: Some(freqs_i),
422 // These fields must be set this way, as they are in the ModelBuilder, before
423 // finalize_design_matrix() is called.
424 has_intercept: false,
425 standardizer: None,
426 };
427 data_i.finalize_design_matrix(
428 self.data.standardizer.is_some(),
429 self.data.has_intercept,
430 );
431 data_i
432 };
433 let model_i = Model {
434 model: PhantomData::<M>,
435 data: data_i,
436 };
437 let options = {
438 let mut options = self.options.clone();
439 // The one-step approximation should be a good starting point.
440 // Use the external result as it should be re-standardized to the new dataset
441 // internally before being passed to IRLS.
442 options.init_guess = Some(loo_initial.row(i).to_owned());
443 options
444 };
445 let fit_i = model_i.with_options(options).fit()?;
446 // The internal re-fit transforms back to the external scale.
447 loo_result.row_mut(i).assign(&fit_i.result);
448 }
449 Ok(loo_result)
450 }
451
452 /// Perform a likelihood-ratio test, returning the statistic:
453 ///
454 /// ```math
455 /// \Lambda = -2 \ln \frac{L_0}{L} = -2(l_0 - l)
456 /// ```
457 ///
458 /// where $`L_0`$ is the null model likelihood (intercept only) and $`L`$ is the fit likelihood.
459 /// By Wilks' theorem, asymptotically $`\chi^2`$-distributed with
460 /// [`test_ndf()`](Self::test_ndf) degrees of freedom.
461 // TODO: Should the effective number of degrees of freedom due to
462 // regularization be taken into account?
463 pub fn lr_test(&self) -> F {
464 // The model likelihood should include regularization terms and there
465 // shouldn't be any in the null model with all non-intercept parameters
466 // set to zero.
467 let null_like = self.null_like();
468 F::from(-2.).unwrap() * (null_like - self.model_like)
469 }
470
471 /// Perform a likelihood-ratio test against a general alternative model, not
472 /// necessarily a null model. The alternative model is regularized the same
473 /// way that the regression resulting in this fit was. The degrees of
474 /// freedom cannot be generally inferred.
475 pub fn lr_test_against(&self, alternative: &Array1<F>) -> F {
476 let alt_std = self.data.transform_beta(alternative.clone());
477 let alt_like = M::log_like(self.data, &alt_std);
478 let alt_like_reg = alt_like + self.reg.likelihood(&alt_std);
479 F::two() * (self.model_like - alt_like_reg)
480 }
481
482 /// Returns the residual degrees of freedom in the model, i.e. the number
483 /// of data points minus the number of parameters. Not to be confused with
484 /// `test_ndf()`, the degrees of freedom in the statistical tests of the
485 /// fit parameters.
486 pub fn ndf(&self) -> F {
487 self.data.n_obs() - self.rank()
488 }
489
490 pub(crate) fn new(data: &'a Dataset<F>, optimum: IrlsStep<F>, irls: Irls<M, F>) -> Self {
491 let IrlsStep {
492 guess: result_std,
493 like: model_like,
494 } = optimum;
495 let Irls {
496 options,
497 reg,
498 n_iter,
499 history,
500 ..
501 } = irls;
502 // Cache some of these variables that will be used often.
503 let n_par = result_std.len();
504 // NOTE: This necessarily uses the coefficients directly from the standardized data.
505 // Store these predictions as they are commonly used.
506 let y_hat = M::mean(&data.linear_predictor_std(&result_std));
507 // The public result must be transformed back to the external scale for compatability with
508 // the input data.
509 let result_ext = data.inverse_transform_beta(result_std.clone());
510 // The history also needs to be transformed back to the external scale, since it is public.
511 // It shouldn't be used directly by the Fit's methods.
512 let history = history
513 .into_iter()
514 .map(|IrlsStep { guess, like }| IrlsStep {
515 guess: data.inverse_transform_beta(guess),
516 like,
517 })
518 .collect();
519 Self {
520 model: PhantomData,
521 data,
522 result: result_ext,
523 result_std,
524 y_hat,
525 options,
526 model_like,
527 reg,
528 n_iter,
529 history,
530 n_par,
531 phi: OnceCell::new(),
532 resid_pear: OnceCell::new(),
533 covariance: OnceCell::new(),
534 fisher_inv: OnceCell::new(),
535 fisher_std_inv: OnceCell::new(),
536 hat: OnceCell::new(),
537 null_model: OnceCell::new(),
538 }
539 }
540
541 /// Returns the likelihood given the null model, which fixes all parameters
542 /// to zero except the intercept (if it is used). A total of `test_ndf()`
543 /// parameters are constrained.
544 pub fn null_like(&self) -> F {
545 let (null_like, _) = self.null_model_fit();
546 *null_like
547 }
548
549 /// Return the likelihood and null model parameters, which will be zero with the possible
550 /// exception of the intercept term. Since this can require an additional regression, the
551 /// values are cached.
552 fn null_model_fit(&self) -> &(F, Array1<F>) {
553 self.null_model
554 .get_or_init(|| match &self.data.linear_offset {
555 None => {
556 // If there is no linear offset, the natural parameter is
557 // identical for all observations so it is sufficient to
558 // calculate the null likelihood for a single point with y equal
559 // to the average.
560 // The average y
561 let y_bar: F = self.data.weighted_sum(&self.data.y) / self.data.sum_weights();
562 // This approach assumes that the likelihood is in the natural
563 // exponential form as calculated by Glm::log_like_natural(). If that
564 // function is overridden and the values differ significantly, this
565 // approach will give incorrect results. If the likelihood has terms
566 // non-linear in y, then the likelihood must be calculated for every
567 // point rather than averaged.
568 // If the intercept is allowed to maximize the likelihood, the natural
569 // parameter is equal to the link of the expectation. Otherwise it is
570 // the transformation function of zero.
571 let intercept: F = if self.data.has_intercept {
572 M::Link::func(y_bar)
573 } else {
574 F::zero()
575 };
576 // this is a length-one array. This works because the
577 // likelihood contribution is the same for all observations.
578 let nat_par = M::Link::nat_param(array![intercept]);
579 // The null likelihood per observation
580 let null_like_one: F = M::log_like_natural(y_bar, nat_par[0]);
581 // just multiply the average likelihood by the number of data points, since every term is the same.
582 let null_like_total = self.data.sum_weights() * null_like_one;
583 let null_params: Array1<F> = {
584 let mut par = Array1::<F>::zeros(self.n_par);
585 par[0] = intercept;
586 par
587 };
588 (null_like_total, null_params)
589 }
590 Some(off) => {
591 if self.data.has_intercept {
592 // If there are linear offsets and the intercept is allowed
593 // to be free, there is not a major simplification and the
594 // model needs to be re-fit.
595 // the X data is a single column of ones. Since this model
596 // isn't being created by the ModelBuilder, the X data
597 // has to be automatically padded with ones.
598 let data_x_null = Array2::<F>::ones((self.data.y.len(), 1));
599 let null_model = Model {
600 model: std::marker::PhantomData::<M>,
601 data: Dataset::<F> {
602 y: self.data.y.clone(),
603 x: data_x_null,
604 linear_offset: Some(off.clone()),
605 weights: self.data.weights.clone(),
606 freqs: self.data.freqs.clone(),
607 // If we are in this branch it is because an intercept is needed.
608 has_intercept: true,
609 // We don't use standardization for the null model.
610 standardizer: None,
611 },
612 };
613 // TODO: Make this function return an error, although it's
614 // difficult to imagine this case happening.
615 // TODO: Should the tolerance of this fit be stricter?
616 // The intercept should not be regularized
617 let null_fit = null_model
618 .fit_options()
619 // There shouldn't be too much trouble fitting this
620 // single-parameter fit, but there shouldn't be harm in
621 // using the same maximum as in the original model.
622 .max_iter(self.options.max_iter)
623 .fit()
624 .expect("Could not fit null model!");
625 let null_params: Array1<F> = {
626 let mut par = Array1::<F>::zeros(self.n_par);
627 // there is only one parameter in this fit. It should be the same as
628 // the external result since this model doesn't have standardization.
629 par[0] = null_fit.result_std[0];
630 par
631 };
632 (null_fit.model_like, null_params)
633 } else {
634 // If the intercept is fixed to zero, then no minimization is
635 // required. The natural parameters are directly known in terms
636 // of the linear offset. The likelihood must still be summed
637 // over all observations, since they have different offsets.
638 let nat_par = M::Link::nat_param(off.clone());
639 let null_like_terms = ndarray::Zip::from(&self.data.y)
640 .and(&nat_par)
641 .map_collect(|&y, &eta| M::log_like_natural(y, eta));
642 let null_like = self.data.weighted_sum(&null_like_terms);
643 let null_params = Array1::<F>::zeros(self.n_par);
644 (null_like, null_params)
645 }
646 }
647 })
648 }
649
650 /// Returns $`\hat{\mathbf{y}} = g^{-1}(\mathbf{X}\hat{\boldsymbol\beta} + \boldsymbol\omega_0)`$
651 /// given input data $`\mathbf{X}`$ and an optional linear offset
652 /// $`\boldsymbol\omega_0`$. The data need not be the training data.
653 pub fn predict<S>(&self, data_x: &ArrayBase<S, Ix2>, lin_off: Option<&Array1<F>>) -> Array1<F>
654 where
655 S: Data<Elem = F>,
656 {
657 let lin_pred = if self.data.has_intercept {
658 one_pad(data_x.view())
659 } else {
660 data_x.to_owned()
661 }
662 .dot(&self.result);
663 let lin_pred: Array1<F> = if let Some(off) = &lin_off {
664 lin_pred + *off
665 } else {
666 lin_pred
667 };
668 M::mean(&lin_pred)
669 }
670
671 /// Returns the rank $`K`$ of the model (i.e. the number of parameters)
672 fn rank(&self) -> F {
673 F::from(self.n_par).unwrap()
674 }
675
676 /// Return the deviance residuals for each point in the training data:
677 ///
678 /// ```math
679 /// d_i = \text{sign}(y_i - \hat\mu_i)\sqrt{D_i}
680 /// ```
681 ///
682 /// where $`D_i`$ is the per-observation deviance contribution.
683 /// This is usually a better choice than Pearson residuals for non-linear models.
684 pub fn resid_dev(&self) -> Array1<F> {
685 let signs = self.resid_resp().mapv_into(F::signum);
686 let ll_diff = self.deviance_terms();
687 let dev: Array1<F> = ll_diff.mapv_into(num_traits::Float::sqrt);
688 signs * dev
689 }
690
691 /// Return the standardized deviance residuals (internally studentized):
692 ///
693 /// ```math
694 /// d_i^* = \frac{d_i}{\sqrt{\hat\phi(1 - h_i)}}
695 /// ```
696 ///
697 /// where $`d_i`$ is the deviance residual, $`\hat\phi`$ is the dispersion, and $`h_i`$ is
698 /// the leverage. Generally applicable for outlier detection.
699 pub fn resid_dev_std(&self) -> RegressionResult<Array1<F>, F> {
700 let dev = self.resid_dev();
701 let phi = self.dispersion();
702 let hat: Array1<F> = self.leverage()?;
703 let omh: Array1<F> = -hat + F::one();
704 let denom: Array1<F> = (omh * phi).mapv_into(num_traits::Float::sqrt);
705 Ok(dev / denom)
706 }
707
708 /// Return the partial residuals as an n_obs × n_predictors matrix. Each column $`j`$
709 /// contains working residuals plus the centered contribution of predictor $`j`$:
710 ///
711 /// ```math
712 /// r^{(j)}_i = r^w_i + (x_{ij} - \bar x_j) \beta_j
713 /// ```
714 ///
715 /// The bar denotes the **fully weighted** column mean (combining both variance and frequency
716 /// weights), which is the WLS-consistent choice: it ensures that $`\sum_i w_i r^{(j)}_i = 0`$
717 /// for each predictor. R's `residuals(model, type = "partial")` uses only frequency weights in
718 /// its centering (excluding variance weights), so results will differ when variance weights are
719 /// present. For models without an intercept, no centering is applied. The intercept term is
720 /// always excluded from the output.
721 pub fn resid_part(&self) -> Array2<F> {
722 let resid_work_i = self.resid_work().insert_axis(Axis(1));
723 let n = resid_work_i.len();
724
725 // Use external (unstandardized) feature columns and coefficients.
726 let x_orig = self.data.x_orig();
727 // The intercept term is not part of the partial residuals.
728 let beta_j = if self.data.has_intercept {
729 self.result.slice(s![1..]).to_owned()
730 } else {
731 self.result.clone()
732 }
733 .insert_axis(Axis(0));
734
735 // Center by fully-weighted (variance × frequency) column means. This is the WLS-consistent
736 // choice: sum_i w_i r^(j)_i = 0 for each predictor. R excludes variance weights from
737 // centering; results will differ from R when variance weights are present with an intercept.
738 // No centering for no-intercept models.
739 let x_centered: Array2<F> = if self.data.has_intercept {
740 let wt_factors = self
741 .data
742 .apply_total_weights(Array1::ones(n))
743 .insert_axis(Axis(1));
744 let x_mean: Array1<F> = (&x_orig * &wt_factors).sum_axis(Axis(0)) / wt_factors.sum();
745 x_orig - x_mean.insert_axis(Axis(0))
746 } else {
747 x_orig
748 };
749 resid_work_i + x_centered * beta_j
750 }
751
752 /// Return the Pearson residuals for each point in the training data:
753 ///
754 /// ```math
755 /// r_i = \sqrt{w_i} \, \frac{y_i - \hat\mu_i}{\sqrt{V(\hat\mu_i)}}
756 /// ```
757 ///
758 /// where $`V`$ is the variance function and $`w_i`$ are the variance weights.
759 /// Not scaled by the dispersion for families with a free dispersion parameter.
760 pub fn resid_pear(&self) -> &Array1<F> {
761 self.resid_pear.get_or_init(|| {
762 let residuals = self.resid_resp();
763 let inv_var_diag: Array1<F> = self
764 .y_hat
765 .clone()
766 .mapv_into(M::variance)
767 .mapv_into(F::recip);
768 // the variance weights are the reciprocal of the corresponding variance
769 let scales = self
770 .data
771 .apply_var_weights(inv_var_diag)
772 .mapv_into(num_traits::Float::sqrt);
773 scales * residuals
774 })
775 }
776
777 /// Return the standardized Pearson residuals (internally studentized):
778 ///
779 /// ```math
780 /// r_i^* = \frac{r_i}{\sqrt{\hat\phi(1 - h_i)}}
781 /// ```
782 ///
783 /// where $`r_i`$ is the Pearson residual and $`h_i`$ is the leverage. These are expected
784 /// to have unit variance.
785 pub fn resid_pear_std(&self) -> RegressionResult<Array1<F>, F> {
786 let pearson = self.resid_pear();
787 let phi = self.dispersion();
788 let hat = self.leverage()?;
789 let omh = -hat + F::one();
790 let denom: Array1<F> = (omh * phi).mapv_into(num_traits::Float::sqrt);
791 Ok(pearson / denom)
792 }
793
794 /// Return the response residuals: $`e_i^\text{resp} = y_i - \hat\mu_i`$.
795 pub fn resid_resp(&self) -> Array1<F> {
796 &self.data.y - &self.y_hat
797 }
798
799 /// Return the externally studentized residuals:
800 ///
801 /// ```math
802 /// \tilde{t}_i = \text{sign}(e_i) \sqrt{\frac{D_i^{(-i)}}{\hat\phi^{(-i)}}}
803 /// ```
804 ///
805 /// where $`D_i^{(-i)}`$ and $`\hat\phi^{(-i)}`$ are the LOO deviance and dispersion
806 /// approximated via one-step deletion. Under normality, $`t`$-distributed with
807 /// $`N - K - 1`$ degrees of freedom. This is a robust and general method for outlier
808 /// detection.
809 pub fn resid_student(&self) -> RegressionResult<Array1<F>, F> {
810 let signs = self.resid_resp().mapv(F::signum);
811 let dev_terms_loo: Array1<F> = self.deviance_terms_loo()?;
812 // NOTE: This match could also be handled internally in dispersion_loo()
813 let dev_terms_scaled = match M::DISPERSED {
814 // The dispersion is corrected for the contribution from each current point.
815 // This is an approximation; the exact solution would perform a fit at each point.
816 DispersionType::FreeDispersion => dev_terms_loo / self.dispersion_loo()?,
817 DispersionType::NoDispersion => dev_terms_loo,
818 };
819 Ok(signs * dev_terms_scaled.mapv_into(num_traits::Float::sqrt))
820 }
821
822 /// Returns the working residuals:
823 ///
824 /// ```math
825 /// e_i^\text{work} = g'(\hat\mu_i)\,(y_i - \hat\mu_i) = \frac{y_i - \hat\mu_i}{\eta'(\omega_i)\,V(\hat\mu_i)}
826 /// ```
827 ///
828 /// where $`g'(\mu)`$ is the derivative of the link function and $`\eta'(\omega)`$ is the
829 /// derivative of the natural parameter with respect to the linear predictor. For canonical
830 /// links $`\eta'(\omega) = 1`$, reducing this to $`(y_i - \hat\mu_i)/V(\hat\mu_i)`$.
831 ///
832 /// These can be interpreted as the residual differences mapped into the linear predictor space
833 /// of $`\omega = \mathbf{x}\cdot\boldsymbol{\beta}`$.
834 pub fn resid_work(&self) -> Array1<F> {
835 let lin_pred: Array1<F> = self.data.linear_predictor_std(&self.result_std);
836 let mu: Array1<F> = self.y_hat.clone();
837 let resid_response: Array1<F> = &self.data.y - μ
838 let var: Array1<F> = mu.mapv(M::variance);
839 // adjust for non-canonical link functions; we want a total factor of 1/eta'
840 let (adj_response, adj_var) =
841 M::Link::adjust_errors_variance(resid_response, var, &lin_pred);
842 adj_response / adj_var
843 }
844
845 /// Returns the score function $`\nabla_{\boldsymbol\beta} l`$ (the gradient of the
846 /// regularized log-likelihood) at the parameter values given. Should be zero at the MLE.
847 /// The input and output are in the external (unstandardized) parameter space.
848 pub fn score(&self, params: Array1<F>) -> Array1<F> {
849 // Compute in the internal (standardized) basis so the regularization gradient is applied
850 // to the correct parameters, then transform the result back to external coordinates via
851 // score_ext = J^T score_int where J = d(beta_std)/d(beta_ext).
852 let params_std = self.data.transform_beta(params);
853 let lin_pred: Array1<F> = self.data.linear_predictor_std(¶ms_std);
854 let mu: Array1<F> = M::mean(&lin_pred);
855 let resid_response = &self.data.y - mu;
856 let resid_working = M::Link::adjust_errors(resid_response, &lin_pred);
857 let score_int = self.data.x_conj().dot(&resid_working);
858 let score_int_reg = self.reg.as_ref().gradient(score_int, ¶ms_std);
859 self.data.inverse_transform_score(score_int_reg)
860 }
861
862 /// Returns the score test statistic:
863 ///
864 /// ```math
865 /// S = \mathbf{J}(\boldsymbol\beta_0)^\mathsf{T} \, \mathcal{I}(\boldsymbol\beta_0)^{-1} \, \mathbf{J}(\boldsymbol\beta_0)
866 /// ```
867 ///
868 /// where $`\mathbf{J}`$ is the score and $`\mathcal{I}`$ is the Fisher information, both
869 /// evaluated at the null parameters. Asymptotically $`\chi^2`$-distributed with
870 /// [`test_ndf()`](Self::test_ndf) degrees of freedom.
871 pub fn score_test(&self) -> RegressionResult<F, F> {
872 let (_, null_params) = self.null_model_fit();
873 self.score_test_against(null_params.clone())
874 }
875
876 /// Returns the score test statistic compared to another set of model
877 /// parameters, not necessarily a null model. The degrees of freedom cannot
878 /// be generally inferred.
879 pub fn score_test_against(&self, alternative: Array1<F>) -> RegressionResult<F, F> {
880 let fisher_alt = self.fisher(&alternative);
881 let score_alt = self.score(alternative);
882 // The is not the same as the cached covariance matrix since it is
883 // evaluated at the null parameters.
884 // NOTE: invh/invh_into() are bugged and incorrect!
885 let inv_fisher_alt = fisher_alt.inv_into()?;
886 Ok(score_alt.t().dot(&inv_fisher_alt.dot(&score_alt)))
887 }
888
889 /// The degrees of freedom for the likelihood ratio test, the score test,
890 /// and the Wald test. Not to be confused with `ndf()`, the degrees of
891 /// freedom in the model fit.
892 pub fn test_ndf(&self) -> usize {
893 if self.data.has_intercept {
894 self.n_par - 1
895 } else {
896 self.n_par
897 }
898 }
899
900 /// Returns the Wald test statistic:
901 ///
902 /// ```math
903 /// W = (\hat{\boldsymbol\beta} - \boldsymbol\beta_0)^\mathsf{T} \, \mathcal{I}(\boldsymbol\beta_0) \, (\hat{\boldsymbol\beta} - \boldsymbol\beta_0)
904 /// ```
905 ///
906 /// Compared to a null model with only an intercept (if one is used). Asymptotically
907 /// $`\chi^2`$-distributed with [`test_ndf()`](Self::test_ndf) degrees of freedom.
908 pub fn wald_test(&self) -> F {
909 // The null parameters are all zero except for a possible intercept term
910 // which optimizes the null model.
911 let (_, null_params) = self.null_model_fit();
912 // NOTE: The null model is agnostic to standardization, since it discards all features
913 // except perhaps the intercept.
914 self.wald_test_against(null_params)
915 }
916
917 /// Returns the Wald test statistic compared to another specified model fit
918 /// instead of the null model. The degrees of freedom cannot be generally
919 /// inferred.
920 pub fn wald_test_against(&self, alternative: &Array1<F>) -> F {
921 // This could be computed in either the internal or external basis. This implementation
922 // will use internal, so first we just need to transform the input.
923 let alt_std = self.data.transform_beta(alternative.clone());
924 let d_params_std = &self.result_std - &alt_std;
925 // Get the fisher matrix at the *other* result, since this is a test of this fit's
926 // parameters under a model given by the other.
927 let fisher_std: Array2<F> = self.fisher_std(&alt_std);
928 d_params_std.t().dot(&fisher_std.dot(&d_params_std))
929 }
930
931 /// Returns the per-parameter Wald $`z`$-statistic:
932 ///
933 /// ```math
934 /// z_k = \frac{\hat\beta_k}{\sqrt{\text{Cov}[\hat{\boldsymbol\beta}]_{kk}}}
935 /// ```
936 ///
937 /// Since it does not account for covariance between parameters it may not be accurate.
938 pub fn wald_z(&self) -> RegressionResult<Array1<F>, F> {
939 let par_cov = self.covariance()?;
940 let par_variances: ArrayView1<F> = par_cov.diag();
941 Ok(&self.result / &par_variances.mapv(num_traits::Float::sqrt))
942 }
943}
944
945/// Specialized functions for OLS.
946impl<'a, F> Fit<'a, Linear, F>
947where
948 F: 'static + Float,
949{
950 /// Returns the coefficient of determination $`R^2`$:
951 ///
952 /// ```math
953 /// R^2 = 1 - \frac{D}{D_0}
954 /// ```
955 ///
956 /// where $`D`$ is the deviance of the fitted model and $`D_0`$ is the null deviance
957 /// (deviance of the intercept-only model). For Gaussian with no weights this reduces to
958 /// $`1 - \text{RSS}/\text{TSS}`$. Using the deviance ratio correctly handles variance and
959 /// frequency weights.
960 // NOTE: that this implementation would be valid as a pseudo-$`R^2`$ for all GLMs, but it's
961 // mostly expected in the OLS context and probably wouldn't add much.
962 pub fn r_sq(&self) -> F {
963 let lr = self.lr_test();
964 lr / (lr + self.deviance())
965 }
966
967 /// Returns the weighted residual sum of squares (RSS):
968 /// $`\text{RSS} = \sum_i w_i (y_i - \hat\mu_i)^2`$
969 /// where $`w_i`$ combines variance and frequency weights.
970 // NOTE: this implementation would be well-defined for any GLM, but there are typically better
971 // tools for the job so it's not exposed for non-linear families.
972 pub fn rss(&self) -> F {
973 let resid_sq = self.resid_resp().mapv(|r| r * r);
974 self.data.apply_total_weights(resid_sq).sum()
975 }
976}
977
978/// Methods that require the `stats` feature for distribution CDF evaluation.
979#[cfg(feature = "stats")]
980impl<'a, M, F> Fit<'a, M, F>
981where
982 M: Glm,
983 F: 'static + Float,
984{
985 /// Perform a full re-fit of the model excluding the ith data column.
986 /// NOTE: Ideally we could return a full Fit object, but as it stands the source data must
987 /// outlive the Fit result. It would be nice if we could get around this with some COW
988 /// shenanigans but this will probably require a big change. To get around this for now, just
989 /// return the deviance, which is all we're using this function for at the moment.
990 fn dev_without_covariate(&self, i: usize) -> RegressionResult<F, F> {
991 use ndarray::{concatenate, s};
992
993 let is_intercept = self.data.has_intercept && (i == 0);
994 let x_reduced: Array2<F> = if is_intercept {
995 // If this is the intercept term, we need to use the original unscaled data to avoid
996 // mixing
997 self.data.x_ext().slice(s![.., 1..]).to_owned()
998 } else {
999 concatenate![
1000 Axis(1),
1001 self.data.x.slice(s![.., ..i]),
1002 self.data.x.slice(s![.., i + 1..])
1003 ]
1004 };
1005 // NOTE: This data has been standardized already. Since we are fully dropping a covariate,
1006 // each data column should still be standardized if the full external set is.
1007 let data_reduced = Dataset::<F> {
1008 y: self.data.y.clone(),
1009 x: x_reduced,
1010 linear_offset: self.data.linear_offset.clone(),
1011 weights: self.data.weights.clone(),
1012 freqs: self.data.freqs.clone(),
1013 has_intercept: !is_intercept,
1014 // It shouldn't be necessary to standardize again. Each individual column should
1015 // already be standardized, if the full dataset was.
1016 standardizer: None,
1017 };
1018 let model_reduced = Model {
1019 model: PhantomData::<M>,
1020 data: data_reduced,
1021 };
1022 // Start with the non-excluded parameters at the values from the main fit.
1023 let init_guess = if is_intercept {
1024 // For the intercept term, we need to use the original unscaled data, or else the
1025 // transformation mixes the intercept with the other terms. This number is supposed to
1026 // represent the model deviance from excluding the *external* intercept term.
1027 self.result.slice(s![1..]).to_owned()
1028 } else {
1029 // The data is already standardized in the non-intercept case, so we should start the
1030 // the standardized parameters, removing the covariate of interest.
1031 concatenate![
1032 Axis(0),
1033 self.result_std.slice(s![..i]),
1034 self.result_std.slice(s![i + 1..])
1035 ]
1036 };
1037 let fit_options = {
1038 let mut fit_options = self.options.clone();
1039 fit_options.init_guess = Some(init_guess);
1040 fit_options
1041 };
1042 let fit_reduced = model_reduced.with_options(fit_options).fit()?;
1043 Ok(fit_reduced.deviance())
1044 }
1045
1046 /// Returns the p-value for the omnibus likelihood-ratio test (full model vs. null/intercept-only
1047 /// model).
1048 ///
1049 /// The LR statistic is asymptotically $`\chi^2`$-distributed with
1050 /// [`test_ndf()`](Self::test_ndf) degrees of freedom, so the p-value is the upper-tail
1051 /// probability:
1052 ///
1053 /// ```math
1054 /// p = 1 - F_{\chi^2}(\Lambda;\, \text{test\_ndf})
1055 /// ```
1056 pub fn pvalue_lr_test(&self) -> F {
1057 use statrs::distribution::{ChiSquared, ContinuousCDF};
1058 let stat = self.lr_test().to_f64().unwrap();
1059 let ndf = self.test_ndf() as f64;
1060 if ndf == 0.0 {
1061 return F::one();
1062 }
1063 let chi2 = ChiSquared::new(ndf).unwrap();
1064 F::from(chi2.sf(stat)).unwrap()
1065 }
1066
1067 /// Returns per-parameter p-values from the Wald $`z`$-statistics.
1068 ///
1069 /// The reference distribution depends on whether the family has a free dispersion parameter:
1070 ///
1071 /// - **No dispersion** (logistic, Poisson): standard normal — two-tailed
1072 /// $`p_k = 2\bigl[1 - \Phi(|z_k|)\bigr]`$
1073 /// - **Free dispersion** (linear): Student-$`t`$ with [`ndf()`](Self::ndf) degrees of
1074 /// freedom — two-tailed $`p_k = 2\bigl[1 - F_t(|z_k|;\, \text{ndf})\bigr]`$
1075 ///
1076 /// IMPORTANT: Note that this test is an approximation for non-linear models and is known to
1077 /// sometimes yield misleading values compared to an exact test. It is not hard to find it
1078 /// give p-values that may imply significantly different conclusions for your analysis (e.g.
1079 /// p<0.07 vs. p<0.02 in one of our tests).
1080 pub fn pvalue_wald(&self) -> RegressionResult<Array1<F>, F> {
1081 let z = self.wald_z()?;
1082 let pvals = match M::DISPERSED {
1083 DispersionType::NoDispersion => {
1084 use statrs::distribution::Normal;
1085 let norm = Normal::standard();
1086 z.mapv(|zi| {
1087 let abs_z = num_traits::Float::abs(zi).to_f64().unwrap();
1088 F::from(2.0 * norm.sf(abs_z)).unwrap()
1089 })
1090 }
1091 DispersionType::FreeDispersion => {
1092 use statrs::distribution::StudentsT;
1093 let df = self.ndf().to_f64().unwrap();
1094 let t_dist = StudentsT::new(0.0, 1.0, df).unwrap();
1095 z.mapv(|zi| {
1096 let abs_z = num_traits::Float::abs(zi).to_f64().unwrap();
1097 F::from(2.0 * t_dist.sf(abs_z)).unwrap()
1098 })
1099 }
1100 };
1101 Ok(pvals)
1102 }
1103
1104 /// Returns per-parameter p-values from drop-one analysis of deviance (exact, expensive).
1105 ///
1106 /// For each parameter $`k`$, a reduced model is fit with that parameter removed and the
1107 /// deviance difference $`\Delta D = D_{\text{reduced}} - D_{\text{full}}`$ is used:
1108 ///
1109 /// - **No dispersion**: $`p = 1 - F_{\chi^2}(\Delta D;\, 1)`$
1110 /// - **Free dispersion**: $`F = \Delta D / \hat\phi`$, $`p = 1 - F_F(F;\, 1,\,
1111 /// \text{ndf})`$
1112 ///
1113 /// For the intercept (if present), the reduced model is fit without an intercept. For all
1114 /// other parameters, the reduced model is fit with that column removed from the design matrix.
1115 pub fn pvalue_exact(&self) -> RegressionResult<Array1<F>, F> {
1116 let n_par = self.n_par;
1117 let dev_full = self.deviance();
1118 let phi_full = self.dispersion();
1119 let ndf_f64 = self.ndf().to_f64().unwrap();
1120 let mut pvals = Array1::<F>::zeros(n_par);
1121 for k in 0..n_par {
1122 let dev_reduced = self.dev_without_covariate(k)?;
1123 let delta_d = dev_reduced - dev_full;
1124
1125 let p = match M::DISPERSED {
1126 DispersionType::NoDispersion => {
1127 use statrs::distribution::ChiSquared;
1128 let chi2 = ChiSquared::new(1.0).unwrap();
1129 1.0 - chi2.cdf(delta_d.to_f64().unwrap())
1130 }
1131 DispersionType::FreeDispersion => {
1132 use statrs::distribution::FisherSnedecor;
1133 let f_stat = delta_d / phi_full;
1134 let f_dist = FisherSnedecor::new(1.0, ndf_f64).unwrap();
1135 f_dist.sf(f_stat.to_f64().unwrap())
1136 }
1137 };
1138 pvals[k] = F::from(p).unwrap();
1139 }
1140
1141 Ok(pvals)
1142 }
1143}
1144
1145#[cfg(feature = "stats")]
1146impl<'a, M, F> Fit<'a, M, F>
1147where
1148 M: Glm + Response,
1149 F: 'static + Float,
1150{
1151 /// Returns the quantile residuals, which are standard-normal distributed by construction if
1152 /// the residuals are distributed according to the GLM family's response function.
1153 ///
1154 /// ```math
1155 /// q_i = \Phi^{-1}(F(y_i | \mathbf{x}_i \cdot \boldsymbol{\beta}))
1156 /// ```
1157 /// where $`\Phi`$ is the standard normal quantile function and $`F`$ is the CDF of the
1158 /// response distribution, conditioned on the predicted mean.
1159 ///
1160 /// Only implemented for families with continuous response distributions. The variance weights
1161 /// are used to scale the spread for families that use free dispersion. For the Linear model,
1162 /// this is an expensive way of getting $`\frac{(y_i - \hat \mu_i)}{\sqrt{\hat\phi / w_i}} =
1163 /// \frac{(y_i - \hat \mu_i)}{\hat\sigma_i}`$.
1164 ///
1165 /// These residuals are not standardized/studentized in any way, meaning the impact of each
1166 /// observation is present in the corresponding response distribution for that point.
1167 pub fn resid_quantile(&self) -> Array1<F>
1168 where
1169 <M as Response>::DistributionType: ContinuousCDF<f64, f64>,
1170 {
1171 use ndarray::Zip;
1172 use statrs::distribution::Normal;
1173
1174 let std_norm = Normal::standard();
1175 let mu = self.y_hat.mapv(|mu| mu.to_f64().unwrap());
1176 let phi: f64 = self.dispersion().to_f64().unwrap();
1177 // The effective dispersion for each point should be scaled by the variance weight.
1178 // NOTE: The variance weights will not impact the families with no dispersion, as those do
1179 // not use the phi parameter in get_distribution().
1180 let phi_i = self
1181 .data
1182 .get_variance_weights()
1183 .mapv(|w| phi / w.to_f64().unwrap());
1184 Zip::from(&self.data.y)
1185 .and(&mu)
1186 .and(&phi_i)
1187 .map_collect(|y, &mu, &phi_i| {
1188 let dist = M::get_distribution(mu, phi_i);
1189 // This variable should be standard-uniform distributed under data following the model
1190 // assumption
1191 let f = dist.cdf(y.to_f64().unwrap());
1192 F::from_f64(std_norm.inverse_cdf(f)).unwrap()
1193 })
1194 }
1195}
1196
1197#[cfg(test)]
1198mod tests {
1199 use super::*;
1200 use crate::{Linear, Logistic, model::ModelBuilder};
1201 use anyhow::Result;
1202 use approx::assert_abs_diff_eq;
1203 use ndarray::Axis;
1204 use ndarray::{concatenate, s};
1205
1206 /// Checks if the test statistics are invariant based upon whether the data is standardized.
1207 #[test]
1208 fn standardization_invariance() -> Result<()> {
1209 let data_y = array![true, false, false, true, true, true, true, false, true];
1210 let data_x = array![-0.5, 0.3, -0.6, 0.2, 0.3, 1.2, 0.8, 0.6, -0.2].insert_axis(Axis(1));
1211 let lin_off = array![0.1, 0.0, -0.1, 0.2, 0.1, 0.3, 0.4, -0.1, 0.1];
1212 let model = ModelBuilder::<Logistic>::data(&data_y, &data_x)
1213 .no_standardize()
1214 .linear_offset(lin_off.clone())
1215 .build()?;
1216 let fit = model.fit()?;
1217 let model_std = ModelBuilder::<Logistic>::data(&data_y, &data_x)
1218 .linear_offset(lin_off)
1219 .build()?;
1220 let fit_std = model_std.fit()?;
1221 assert_abs_diff_eq!(&fit.result, &fit_std.result, epsilon = 4.0 * f64::EPSILON);
1222 assert_abs_diff_eq!(
1223 fit.covariance()?,
1224 fit_std.covariance()?,
1225 epsilon = 0.01 * f32::EPSILON as f64
1226 );
1227 let lr = fit.lr_test();
1228 let lr_std = fit_std.lr_test();
1229 assert_abs_diff_eq!(lr, lr_std, epsilon = 4.0 * f64::EPSILON);
1230 assert_abs_diff_eq!(
1231 fit.score_test()?,
1232 fit_std.score_test()?,
1233 epsilon = f32::EPSILON as f64
1234 );
1235 assert_abs_diff_eq!(
1236 fit.wald_test(),
1237 fit_std.wald_test(),
1238 epsilon = 4.0 * f64::EPSILON
1239 );
1240 assert_abs_diff_eq!(fit.aic(), fit_std.aic(), epsilon = 4.0 * f64::EPSILON);
1241 assert_abs_diff_eq!(fit.bic(), fit_std.bic(), epsilon = 4.0 * f64::EPSILON);
1242 assert_abs_diff_eq!(
1243 fit.deviance(),
1244 fit_std.deviance(),
1245 epsilon = 4.0 * f64::EPSILON
1246 );
1247 // The Wald Z-score of the intercept term is also invariant, with the new scaling
1248 // approach, so we can compare the full vectors.
1249 assert_abs_diff_eq!(
1250 fit.wald_z()?,
1251 fit_std.wald_z()?,
1252 epsilon = 0.01 * f32::EPSILON as f64
1253 );
1254 // try p-values
1255 assert_abs_diff_eq!(
1256 fit.pvalue_lr_test(),
1257 fit_std.pvalue_lr_test(),
1258 epsilon = 0.01 * f32::EPSILON as f64
1259 );
1260 assert_abs_diff_eq!(
1261 fit.pvalue_wald()?,
1262 fit_std.pvalue_wald()?,
1263 epsilon = 0.01 * f32::EPSILON as f64
1264 );
1265 assert_abs_diff_eq!(
1266 fit.pvalue_exact()?,
1267 fit_std.pvalue_exact()?,
1268 epsilon = 0.01 * f32::EPSILON as f64
1269 );
1270
1271 // Ensure that the score and fisher functions are identical even when evaluated at another
1272 // point. The fit results are near [0.5, 0.5], so pick somewhere not too close.
1273 let other = array![-0.5, 2.0];
1274 assert_abs_diff_eq!(fit.score(other.clone()), fit_std.score(other.clone()));
1275 assert_abs_diff_eq!(fit.fisher(&other), fit_std.fisher(&other.clone()));
1276
1277 Ok(())
1278 }
1279
1280 #[test]
1281 fn null_model() -> Result<()> {
1282 let data_y = array![true, false, false, true, true];
1283 let data_x: Array2<f64> = array![[], [], [], [], []];
1284 let model = ModelBuilder::<Logistic>::data(&data_y, &data_x).build()?;
1285 let fit = model.fit()?;
1286 dbg!(fit.n_iter);
1287 dbg!(&fit.result);
1288 // with no offsets, the result should be the link function of the mean.
1289 assert_abs_diff_eq!(
1290 fit.result[0],
1291 <Logistic as Glm>::Link::func(0.6),
1292 epsilon = 4.0 * f64::EPSILON
1293 );
1294 let empty_null_like = fit.null_like();
1295 assert_eq!(fit.test_ndf(), 0);
1296 dbg!(&fit.model_like);
1297 let lr = fit.lr_test();
1298 // Since there is no data, the null likelihood should be identical to
1299 // the fit likelihood, so the likelihood ratio test should yield zero.
1300 assert_abs_diff_eq!(lr, 0., epsilon = 4. * f64::EPSILON);
1301
1302 // Check that the assertions still hold if linear offsets are included.
1303 let lin_off: Array1<f64> = array![0.2, -0.1, 0.1, 0.0, 0.1];
1304 let model = ModelBuilder::<Logistic>::data(&data_y, &data_x)
1305 .linear_offset(lin_off)
1306 .build()?;
1307 let fit_off = model.fit()?;
1308 let empty_model_like_off = fit_off.model_like;
1309 let empty_null_like_off = fit_off.null_like();
1310 // these two assertions should be equivalent
1311 assert_abs_diff_eq!(fit_off.lr_test(), 0.);
1312 assert_abs_diff_eq!(empty_model_like_off, empty_null_like_off);
1313
1314 // check consistency with data provided
1315 let data_x_with = array![[0.5], [-0.2], [0.3], [0.4], [-0.1]];
1316 let model = ModelBuilder::<Logistic>::data(&data_y, &data_x_with).build()?;
1317 let fit_with = model.fit()?;
1318 dbg!(&fit_with.result);
1319 // The null likelihood of the model with parameters should be the same
1320 // as the likelihood of the model with only the intercept.
1321 assert_abs_diff_eq!(empty_null_like, fit_with.null_like());
1322
1323 Ok(())
1324 }
1325
1326 #[test]
1327 fn null_like_logistic() -> Result<()> {
1328 // 6 true and 4 false for y_bar = 0.6.
1329 let data_y = array![
1330 true, true, true, true, true, true, false, false, false, false
1331 ];
1332 let ybar: f64 = 0.6;
1333 let data_x = array![0.4, 0.2, 0.5, 0.1, 0.6, 0.7, 0.3, 0.8, -0.1, 0.1].insert_axis(Axis(1));
1334 let model = ModelBuilder::<Logistic>::data(&data_y, &data_x).build()?;
1335 let fit = model.fit()?;
1336 let target_null_like = fit
1337 .data
1338 .y
1339 .mapv(|y| {
1340 let eta = (ybar / (1. - ybar)).ln();
1341 y * eta - eta.exp().ln_1p()
1342 })
1343 .sum();
1344 assert_abs_diff_eq!(fit.null_like(), target_null_like);
1345 Ok(())
1346 }
1347
1348 // Check that the deviance is equal to the sum of square deviations for a linear model
1349 #[test]
1350 fn deviance_linear() -> Result<()> {
1351 let data_y = array![0.3, -0.2, 0.5, 0.7, 0.2, 1.4, 1.1, 0.2];
1352 let data_x = array![0.6, 2.1, 0.4, -3.2, 0.7, 0.1, -0.3, 0.5].insert_axis(Axis(1));
1353 let model = ModelBuilder::<Linear>::data(&data_y, &data_x).build()?;
1354 let fit = model.fit()?;
1355 // The predicted values of Y given the model.
1356 let pred_y = fit.predict(&data_x, None);
1357 let target_dev = (data_y - pred_y).mapv(|dy| dy * dy).sum();
1358 assert_abs_diff_eq!(fit.deviance(), target_dev, epsilon = 4. * f64::EPSILON);
1359 Ok(())
1360 }
1361
1362 // Check that the deviance and dispersion parameter are equal up to the number of degrees of
1363 // freedom for a linea model.
1364 #[test]
1365 fn deviance_dispersion_eq_linear() -> Result<()> {
1366 let data_y = array![0.2, -0.1, 0.4, 1.3, 0.2, -0.6, 0.9];
1367 let data_x = array![
1368 [0.4, 0.2],
1369 [0.1, 0.4],
1370 [-0.1, 0.3],
1371 [0.5, 0.7],
1372 [0.4, 0.1],
1373 [-0.2, -0.3],
1374 [0.4, -0.1]
1375 ];
1376 let model = ModelBuilder::<Linear>::data(&data_y, &data_x).build()?;
1377 let fit = model.fit()?;
1378 let dev = fit.deviance();
1379 let disp = fit.dispersion();
1380 let ndf = fit.ndf();
1381 assert_abs_diff_eq!(dev, disp * ndf, epsilon = 4. * f64::EPSILON);
1382 Ok(())
1383 }
1384
1385 // Check that the residuals for a linear model are all consistent.
1386 #[test]
1387 fn residuals_linear() -> Result<()> {
1388 let data_y = array![0.1, -0.3, 0.7, 0.2, 1.2, -0.4];
1389 let data_x = array![0.4, 0.1, 0.3, -0.1, 0.5, 0.6].insert_axis(Axis(1));
1390 let weights = array![0.8, 1.2, 0.9, 0.8, 1.1, 0.9];
1391 // the implied variances from the weights
1392 let wgt_sigmas = weights.map(|w: &f64| 1. / w.sqrt());
1393 let model = ModelBuilder::<Linear>::data(&data_y, &data_x)
1394 .var_weights(weights.clone())
1395 .build()?;
1396 let fit = model.fit()?;
1397 let response = fit.resid_resp();
1398 let resp_scaled = &response / wgt_sigmas;
1399 let pearson = fit.resid_pear();
1400 let deviance = fit.resid_dev();
1401 assert_abs_diff_eq!(resp_scaled, pearson);
1402 assert_abs_diff_eq!(resp_scaled, deviance);
1403 let pearson_std = fit.resid_pear_std()?;
1404 let deviance_std = fit.resid_dev_std()?;
1405 assert_abs_diff_eq!(pearson_std, deviance_std, epsilon = 8. * f64::EPSILON);
1406 // The externally-studentized residuals aren't expected to match the internally-studentized
1407 // ones.
1408 let dev_terms_loo = fit.deviance_terms_loo()?;
1409 let disp_terms_loo = fit.dispersion_loo()?;
1410 let student = fit.resid_student()?;
1411
1412 // NOTE: Studentization can't be checked directly in general because the method used is a
1413 // one-step approximation, however it should be exact in the linear OLS case.
1414 let n_data = data_y.len();
1415 // Check that the leave-one-out stats hold literally
1416 let mut loo_diff: Vec<f64> = Vec::new();
1417 let mut loo_dev_res: Vec<f64> = Vec::new();
1418 let mut loo_disp: Vec<f64> = Vec::new();
1419 for i in 0..n_data {
1420 let ya = data_y.slice(s![0..i]);
1421 let yb = data_y.slice(s![i + 1..]);
1422 let xa = data_x.slice(s![0..i, ..]);
1423 let xb = data_x.slice(s![i + 1.., ..]);
1424 let wa = weights.slice(s![0..i]);
1425 let wb = weights.slice(s![i + 1..]);
1426 let y_loo = concatenate![Axis(0), ya, yb];
1427 let x_loo = concatenate![Axis(0), xa, xb];
1428 let w_loo = concatenate![Axis(0), wa, wb];
1429 let model_i = ModelBuilder::<Linear>::data(&y_loo, &x_loo)
1430 .var_weights(w_loo)
1431 .build()?;
1432 let fit_i = model_i.fit()?;
1433 let yi = data_y[i];
1434 let xi = data_x.slice(s![i..i + 1, ..]);
1435 let wi = weights[i];
1436 let yi_pred: f64 = fit_i.predict(&xi, None)[0];
1437 let disp_i = fit_i.dispersion();
1438 let var_i = disp_i / wi;
1439 let diff_i = yi - yi_pred;
1440 let res_dev_i = diff_i / var_i.sqrt();
1441 loo_diff.push(wi * diff_i * diff_i);
1442 loo_disp.push(disp_i);
1443 loo_dev_res.push(res_dev_i);
1444 }
1445 let loo_diff: Array1<f64> = loo_diff.into();
1446 let loo_disp: Array1<f64> = loo_disp.into();
1447 let loo_dev_res: Array1<f64> = loo_dev_res.into();
1448 assert_abs_diff_eq!(dev_terms_loo, loo_diff, epsilon = 8. * f64::EPSILON);
1449 assert_abs_diff_eq!(disp_terms_loo, loo_disp, epsilon = 8. * f64::EPSILON);
1450 assert_abs_diff_eq!(student, loo_dev_res, epsilon = 8. * f64::EPSILON);
1451 Ok(())
1452 }
1453
1454 // check the null likelihood for the case where it can be counted exactly.
1455 #[test]
1456 fn null_like_linear() -> Result<()> {
1457 let data_y = array![0.3, -0.2, 0.5, 0.7, 0.2, 1.4, 1.1, 0.2];
1458 let data_x = array![0.6, 2.1, 0.4, -3.2, 0.7, 0.1, -0.3, 0.5].insert_axis(Axis(1));
1459 let ybar: f64 = data_y.mean().unwrap();
1460 let model = ModelBuilder::<Linear>::data(&data_y, &data_x).build()?;
1461 let fit = model.fit()?;
1462 // let target_null_like = data_y.mapv(|y| -0.5 * (y - ybar) * (y - ybar)).sum();
1463 let target_null_like = data_y.mapv(|y| y * ybar - 0.5 * ybar * ybar).sum();
1464 // With the saturated likelihood subtracted the null likelihood should
1465 // just be the sum of squared differences from the mean.
1466 // let target_null_like = 0.;
1467 // dbg!(target_null_like);
1468 let fit_null_like = fit.null_like();
1469 assert_abs_diff_eq!(2. * (fit.model_like - fit_null_like), fit.lr_test());
1470 assert_eq!(fit.test_ndf(), 1);
1471 assert_abs_diff_eq!(
1472 fit_null_like,
1473 target_null_like,
1474 epsilon = 4.0 * f64::EPSILON
1475 );
1476 Ok(())
1477 }
1478
1479 // check the leave-one-out one-step for the linear model
1480 #[test]
1481 fn loo_linear() -> Result<()> {
1482 let data_y = array![0.1, -0.3, 0.7, 0.2, 1.2, -0.4];
1483 let data_x = array![0.4, 0.1, 0.3, -0.1, 0.5, 0.6].insert_axis(Axis(1));
1484 let weights = array![1.0, 1.2, 0.8, 1.1, 1.0, 0.7];
1485 let model = ModelBuilder::<Linear>::data(&data_y, &data_x)
1486 .var_weights(weights.clone())
1487 .build()?;
1488 let fit = model.fit()?;
1489
1490 let loo_exact = fit.loo_exact()?;
1491
1492 let loo_coef: Array2<f64> = fit.infl_coef()?;
1493 let loo_results = &fit.result - loo_coef;
1494 let n_data = data_y.len();
1495 for i in 0..n_data {
1496 let ya = data_y.slice(s![0..i]);
1497 let yb = data_y.slice(s![i + 1..]);
1498 let xa = data_x.slice(s![0..i, ..]);
1499 let xb = data_x.slice(s![i + 1.., ..]);
1500 let wa = weights.slice(s![0..i]);
1501 let wb = weights.slice(s![i + 1..]);
1502 let y_loo = concatenate![Axis(0), ya, yb];
1503 let x_loo = concatenate![Axis(0), xa, xb];
1504 let w_loo = concatenate![Axis(0), wa, wb];
1505 let model_i = ModelBuilder::<Linear>::data(&y_loo, &x_loo)
1506 .var_weights(w_loo)
1507 .build()?;
1508 let fit_i = model_i.fit()?;
1509 assert_abs_diff_eq!(
1510 loo_exact.row(i),
1511 &fit_i.result,
1512 epsilon = f32::EPSILON as f64
1513 );
1514 assert_abs_diff_eq!(
1515 loo_results.row(i),
1516 &fit_i.result,
1517 epsilon = f32::EPSILON as f64
1518 );
1519 }
1520 Ok(())
1521 }
1522
1523 // check the leave-one-out one-step for the logistic model
1524 #[test]
1525 fn loo_logistic() -> Result<()> {
1526 let data_y = array![false, false, true, true, true, false];
1527 let data_x = array![0.4, 0.1, 0.3, -0.1, 0.5, 0.6].insert_axis(Axis(1));
1528 let weights = array![1.0, 1.2, 0.8, 1.1, 1.0, 0.7];
1529 let model = ModelBuilder::<Logistic>::data(&data_y, &data_x)
1530 .var_weights(weights.clone())
1531 .build()?;
1532 let fit = model.fit()?;
1533 let fit_reg = model.fit_options().l2_reg(0.5).fit()?;
1534
1535 // NOTE: The one-step approximation fails for non-linear response functions, so we
1536 // should only test the exact case.
1537 let loo_exact = fit.loo_exact()?;
1538 let loo_exact_reg = fit_reg.loo_exact()?;
1539 let n_data = data_y.len();
1540 for i in 0..n_data {
1541 let ya = data_y.slice(s![0..i]);
1542 let yb = data_y.slice(s![i + 1..]);
1543 let xa = data_x.slice(s![0..i, ..]);
1544 let xb = data_x.slice(s![i + 1.., ..]);
1545 let wa = weights.slice(s![0..i]);
1546 let wb = weights.slice(s![i + 1..]);
1547 let y_loo = concatenate![Axis(0), ya, yb];
1548 let x_loo = concatenate![Axis(0), xa, xb];
1549 let w_loo = concatenate![Axis(0), wa, wb];
1550 let model_i = ModelBuilder::<Logistic>::data(&y_loo, &x_loo)
1551 .var_weights(w_loo)
1552 .build()?;
1553 let fit_i = model_i.fit()?;
1554 let fit_i_reg = model_i.fit_options().l2_reg(0.5).fit()?;
1555 assert_abs_diff_eq!(
1556 loo_exact.row(i),
1557 &fit_i.result,
1558 epsilon = f32::EPSILON as f64
1559 );
1560 assert_abs_diff_eq!(
1561 loo_exact_reg.row(i),
1562 &fit_i_reg.result,
1563 epsilon = f32::EPSILON as f64
1564 );
1565 }
1566 Ok(())
1567 }
1568
1569 // check the null likelihood where there is no dependence on the X data.
1570 #[test]
1571 fn null_like_logistic_nodep() -> Result<()> {
1572 let data_y = array![true, true, false, false, true, false, false, true];
1573 let data_x = array![0.4, 0.2, 0.4, 0.2, 0.7, 0.7, -0.1, -0.1].insert_axis(Axis(1));
1574 let model = ModelBuilder::<Logistic>::data(&data_y, &data_x).build()?;
1575 let fit = model.fit()?;
1576 let lr = fit.lr_test();
1577 assert_abs_diff_eq!(lr, 0.);
1578 Ok(())
1579 }
1580 // TODO: Test that the statistics behave sensibly under regularization. The
1581 // likelihood ratio test should yield a smaller value.
1582
1583 // Test the basic caching funcions.
1584 #[test]
1585 fn cached_computations() -> Result<()> {
1586 let data_y = array![true, true, false, true, true, false, false, false, true];
1587 let data_x = array![0.4, 0.1, -0.3, 0.7, -0.5, -0.1, 0.8, 1.0, 0.4].insert_axis(Axis(1));
1588 let model = ModelBuilder::<Logistic>::data(&data_y, &data_x).build()?;
1589 let fit = model.fit()?;
1590 let _null_like = fit.null_like();
1591 let _null_like = fit.null_like();
1592 let _cov = fit.covariance()?;
1593 let _wald = fit.wald_z();
1594 Ok(())
1595 }
1596
1597 // Check the consistency of the various statistical tests for linear
1598 // regression, where they should be the most comparable.
1599 #[test]
1600 fn linear_stat_tests() -> Result<()> {
1601 let data_y = array![-0.3, -0.1, 0.0, 0.2, 0.4, 0.5, 0.8, 0.8, 1.1];
1602 let data_x = array![-0.5, -0.2, 0.1, 0.2, 0.5, 0.6, 0.7, 0.9, 1.3].insert_axis(Axis(1));
1603 let model = ModelBuilder::<Linear>::data(&data_y, &data_x).build()?;
1604 let fit = model.fit()?;
1605 let lr = fit.lr_test();
1606 let wald = fit.wald_test();
1607 let score = fit.score_test()?;
1608 let eps = 32.0 * f64::EPSILON;
1609 assert_abs_diff_eq!(lr, wald, epsilon = eps);
1610 assert_abs_diff_eq!(lr, score, epsilon = eps);
1611 // The score vector should be zero at the minimum
1612 assert_abs_diff_eq!(fit.score(fit.result.clone()), array![0., 0.], epsilon = eps,);
1613 Ok(())
1614 }
1615
1616 // The score should be zero at the MLE even with L2 regularization and internal standardization.
1617 #[test]
1618 fn score_zero_at_mle_regularized() -> Result<()> {
1619 let data_y = array![-0.3, -0.1, 0.0, 0.2, 0.4, 0.5, 0.8, 0.8, 1.1];
1620 let data_x = array![-0.5, -0.2, 0.1, 0.2, 0.5, 0.6, 0.7, 0.9, 1.3].insert_axis(Axis(1));
1621 let model = ModelBuilder::<Linear>::data(&data_y, &data_x).build()?;
1622 let fit = model.fit_options().l2_reg(0.1).fit()?;
1623 let eps = 1e-8;
1624 assert_abs_diff_eq!(fit.score(fit.result.clone()), array![0., 0.], epsilon = eps);
1625 // Also check with standardization disabled
1626 let model_nostd = ModelBuilder::<Linear>::data(&data_y, &data_x)
1627 .no_standardize()
1628 .build()?;
1629 let fit_nostd = model_nostd.fit_options().l2_reg(0.1).fit()?;
1630 assert_abs_diff_eq!(
1631 fit_nostd.score(fit_nostd.result.clone()),
1632 array![0., 0.],
1633 epsilon = eps
1634 );
1635 // NOTE: Without regularization, the results themselves will not be exactly identical
1636 // through standardization.
1637 Ok(())
1638 }
1639
1640 #[cfg(feature = "stats")]
1641 #[test]
1642 fn linear_quantile_deviance_equivalence() -> Result<()> {
1643 let data_y = array![-0.5, 0.2, -0.2, 1.2, 0.4, 0.6];
1644 let data_x = array![
1645 [-0.3, 1.0],
1646 [0.2, 0.4],
1647 [-0.5, -0.1],
1648 [-0.2, 0.8],
1649 [0.6, 0.1],
1650 [0.1, -0.4]
1651 ];
1652 let var_weights = array![1.0, 0.2, 2.0, 1.5, 0.7, 1.2];
1653 let model = ModelBuilder::<Linear>::data(&data_y, &data_x)
1654 .var_weights(var_weights.clone())
1655 .build()?;
1656 let fit = model.fit_options().l2_reg(0.25).fit()?;
1657 let phi = fit.dispersion();
1658 let std_devs = var_weights.mapv_into(|w| num_traits::Float::sqrt(phi / w));
1659 let scaled_resid = fit.resid_resp() / std_devs;
1660 let res_qua = fit.resid_quantile();
1661 assert_abs_diff_eq!(scaled_resid, res_qua, epsilon = 0.01 * f32::EPSILON as f64);
1662
1663 Ok(())
1664 }
1665
1666 /// Test that `pvalue_exact` computes a valid intercept p-value by comparing against R's
1667 /// `anova(glm(y ~ x - 1), glm(y ~ x), test=...)`.
1668 ///
1669 /// R reference (linear):
1670 /// ```r
1671 /// y <- c(0.3, 1.5, 0.8, 2.1, 1.7, 3.2, 2.5, 0.9)
1672 /// x1 <- c(0.1, 0.5, 0.2, 0.8, 0.6, 1.1, 0.9, 0.3)
1673 /// x2 <- c(0.4, 0.1, 0.3, 0.7, 0.2, 0.5, 0.8, 0.6)
1674 /// anova(glm(y ~ x1 + x2 - 1), glm(y ~ x1 + x2), test = "F")
1675 /// # Pr(>F) = 0.1203156
1676 /// ```
1677 ///
1678 /// R reference (logistic):
1679 /// ```r
1680 /// y <- c(1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1)
1681 /// x <- c(0.5, -0.3, 0.2, 0.8, 0.1, 0.6, -0.1, -0.4, 0.3, 0.4, 0.7, -0.2)
1682 /// anova(glm(y ~ x - 1, family=binomial()), glm(y ~ x, family=binomial()), test = "Chisq")
1683 /// # Pr(>Chi) = 0.6042003
1684 ///
1685 /// NOTE: This test was generated by claude code, hence the ugly hard-coded values.
1686 /// ```
1687 #[cfg(feature = "stats")]
1688 #[test]
1689 fn pvalue_exact_intercept() -> Result<()> {
1690 use crate::Logistic;
1691
1692 // Linear: intercept F-test should match R's anova() result.
1693 // For Gaussian the exact (F-test) intercept p-value equals the Wald t-test p-value.
1694 let y = array![0.3_f64, 1.5, 0.8, 2.1, 1.7, 3.2, 2.5, 0.9];
1695 let x = array![
1696 [0.1_f64, 0.4],
1697 [0.5, 0.1],
1698 [0.2, 0.3],
1699 [0.8, 0.7],
1700 [0.6, 0.2],
1701 [1.1, 0.5],
1702 [0.9, 0.8],
1703 [0.3, 0.6]
1704 ];
1705 let model = ModelBuilder::<Linear>::data(&y, &x).build()?;
1706 let fit = model.fit()?;
1707 let exact_p = fit.pvalue_exact()?;
1708 assert!(
1709 exact_p[0].is_finite() && exact_p[0] >= 0.0 && exact_p[0] <= 1.0,
1710 "intercept p-value must be in [0, 1]"
1711 );
1712 // R: anova(glm(y ~ x1+x2-1), glm(y ~ x1+x2), test="F") Pr(>F) = 0.1203156
1713 assert_abs_diff_eq!(exact_p[0], 0.1203156, epsilon = 1e-5);
1714
1715 // Logistic: intercept chi-squared test.
1716 let y_bin = array![
1717 true, false, true, true, false, true, false, false, true, false, true, true
1718 ];
1719 let x_bin = array![
1720 0.5_f64, -0.3, 0.2, 0.8, 0.1, 0.6, -0.1, -0.4, 0.3, 0.4, 0.7, -0.2
1721 ]
1722 .insert_axis(Axis(1));
1723 let model_bin = ModelBuilder::<Logistic>::data(&y_bin, &x_bin).build()?;
1724 let fit_bin = model_bin.fit()?;
1725 let exact_p_bin = fit_bin.pvalue_exact()?;
1726 // R: anova(..., test="Chisq") Pr(>Chi) = 0.6042003
1727 assert_abs_diff_eq!(exact_p_bin[0], 0.6042003, epsilon = 1e-4);
1728
1729 Ok(())
1730 }
1731}