1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855
//! Stores the fit results of the IRLS regression and provides functions that
//! depend on the MLE estimate. These include statistical tests for goodness-of-fit.
pub mod options;
use crate::{
error::RegressionResult,
glm::{DispersionType, Glm},
link::{Link, Transform},
model::{Dataset, Model},
num::Float,
Linear,
};
use ndarray::{array, Array1, Array2, ArrayBase, ArrayView1, Axis, Data, Ix2};
use ndarray_linalg::InverseInto;
use options::FitOptions;
use std::{
cell::{Ref, RefCell},
marker::PhantomData,
};
/// the result of a successful GLM fit
pub struct Fit<'a, M, F>
where
M: Glm,
F: Float,
{
model: PhantomData<M>,
/// The data and model specification used in the fit.
data: &'a Dataset<F>,
/// Whether the intercept covariate is used
use_intercept: bool,
/// The parameter values that maximize the likelihood as given by the IRLS regression.
pub result: Array1<F>,
/// The options used for this fit.
pub options: FitOptions<F>,
/// The value of the likelihood function for the fit result.
pub model_like: F,
/// The number of overall iterations taken in the IRLS.
pub n_iter: usize,
/// The number of steps taken in the algorithm, which includes step halving.
pub n_steps: usize,
/// The number of data points
n_data: usize,
/// The number of parameters
n_par: usize,
/// The estimated covariance matrix of the parameters. Since the calculation
/// requires a matrix inversion, it is computed only when needed and the
/// value is cached. Access through the `covariance()` function.
cov: RefCell<Option<Array2<F>>>,
/// The likelihood and parameters for the null model.
null_model: RefCell<Option<(F, Array1<F>)>>,
}
impl<'a, M, F> Fit<'a, M, F>
where
M: Glm,
F: 'static + Float,
{
/// Returns the Akaike information criterion for the model fit.
// TODO: Should an effective number of parameters that takes regularization
// into acount be considered?
pub fn aic(&self) -> F {
F::from(2 * self.n_par).unwrap() - F::from(2.).unwrap() * self.model_like
}
/// Returns the Bayesian information criterion for the model fit.
// TODO: Also consider the effect of regularization on this statistic.
// TODO: Wikipedia suggests that the variance should included in the number
// of parameters for multiple linear regression. Should an additional
// parameter be included for the dispersion parameter? This question does
// not affect the difference between two models fit with the methodology in
// this package.
pub fn bic(&self) -> F {
let logn = num_traits::Float::ln(F::from(self.data.y.len()).unwrap());
logn * F::from(self.n_par).unwrap() - F::from(2.).unwrap() * self.model_like
}
/// The covariance matrix estimated by the Fisher information and the dispersion parameter (for
/// families with a free scale). The matrix is cached to avoid repeating the potentially
/// expensive matrix inversion.
pub fn covariance(&self) -> RegressionResult<Ref<Array2<F>>> {
if self.cov.borrow().is_none() {
if self.data.weights.is_some() {
// NOTE: Perhaps it is just the fisher matrix that must be updated.
unimplemented!(
"The covariance calculation must take into account weights/correlations."
);
}
let fisher_reg = self.fisher(&self.result);
// The covariance must be multiplied by the dispersion parameter.
// For logistic/poisson regression, this is identically 1.
// For linear/gamma regression it is estimated from the data.
let phi: F = self.dispersion();
// NOTE: invh/invh_into() are bugged and incorrect!
let unscaled_cov: Array2<F> = fisher_reg.inv_into()?;
let cov = unscaled_cov * phi;
*self.cov.borrow_mut() = Some(cov);
}
Ok(Ref::map(self.cov.borrow(), |x| x.as_ref().unwrap()))
}
/// Returns the deviance of the fit: twice the difference between the
/// saturated likelihood and the model likelihood. Asymptotically this fits
/// a chi-squared distribution with `self.ndf()` degrees of freedom.
/// Note that the regularized likelihood is used here.
// TODO: This is likely sensitive to regularization because the saturated
// model is not regularized but the model likelihood is. Perhaps this can be
// accounted for with an effective number of degrees of freedom.
pub fn deviance(&self) -> F {
// Note that this must change if the GLM likelihood subtracts the
// saturated one already.
F::from(2.).unwrap() * (self.data.y.mapv(M::log_like_sat).sum() - self.model_like)
}
/// The dispersion parameter(typically denoted `phi`) which relates the variance of the `y`
/// values with the variance of the response distribution: `Var[y] = phi * Var[mu]`.
/// Identically one for logistic, binomial, and Poisson regression.
/// For others (linear, gamma) the dispersion parameter is estimated from the data.
/// This is equal to the total deviance divided by the degrees of freedom. For OLS linear
/// regression this is equal to the sum of `(y_i - mu_i)^2 / (n-p)`, an estimate of `sigma^2`;
/// with no covariates it is equal to the sample variance.
pub fn dispersion(&self) -> F {
use DispersionType::*;
match M::DISPERSED {
FreeDispersion => {
let ndf: F = F::from(self.ndf()).unwrap();
let dev = self.deviance();
dev / ndf
}
NoDispersion => F::one(),
}
}
/// Returns the errors in the response variables for the data passed as an
/// argument given the current model fit.
fn errors(&self, data: &Dataset<F>) -> Array1<F> {
&data.y - &self.predict(&data.x, data.linear_offset.as_ref())
}
#[deprecated(since = "0.0.10", note = "use predict() instead")]
pub fn expectation<S>(
&self,
data_x: &ArrayBase<S, Ix2>,
lin_off: Option<&Array1<F>>,
) -> Array1<F>
where
S: Data<Elem = F>,
{
self.predict(data_x, lin_off)
}
/// Returns the fisher information (the negative hessian of the likelihood)
/// at the parameter values given. The regularization is included.
pub fn fisher(&self, params: &Array1<F>) -> Array2<F> {
let lin_pred: Array1<F> = self.data.linear_predictor(params);
let mu: Array1<F> = M::mean(&lin_pred);
let var_diag: Array1<F> = mu.mapv_into(M::variance);
// adjust the variance for non-canonical link functions
let eta_d = M::Link::d_nat_param(&lin_pred);
let adj_var: Array1<F> = &eta_d * &var_diag * eta_d;
// calculate the fisher matrix
let fisher: Array2<F> = (&self.data.x.t() * &adj_var).dot(&self.data.x);
// Regularize the fisher matrix
self.options.reg.as_ref().irls_mat(fisher, params)
}
/// Perform a likelihood-ratio test, returning the statistic -2*ln(L_0/L)
/// where L_0 is the likelihood of the best-fit null model (with no
/// parameters but the intercept) and L is the likelihood of the fit result.
/// The number of degrees of freedom of this statistic, equal to the number
/// of parameters fixed to zero to form the null model, is `test_ndf()`. By
/// Wilks' theorem this statistic is asymptotically chi-squared distributed
/// with this number of degrees of freedom.
// TODO: Should the effective number of degrees of freedom due to
// regularization be taken into account? Should the degrees of freedom be a
// float?
pub fn lr_test(&self) -> F {
// The model likelihood should include regularization terms and there
// shouldn't be any in the null model with all non-intercept parameters
// set to zero.
let null_like = self.null_like();
F::from(-2.).unwrap() * (null_like - self.model_like)
}
/// Perform a likelihood-ratio test against a general alternative model, not
/// necessarily a null model. The alternative model is regularized the same
/// way that the regression resulting in this fit was. The degrees of
/// freedom cannot be generally inferred.
pub fn lr_test_against(&self, alternative: &Array1<F>) -> F {
let alt_like = M::log_like_reg(self.data, alternative, self.options.reg.as_ref());
F::from(2.).unwrap() * (self.model_like - alt_like)
}
/// Returns the residual degrees of freedom in the model, i.e. the number
/// of data points minus the number of parameters. Not to be confused with
/// `test_ndf()`, the degrees of freedom in the statistical tests of the
/// fit.
pub fn ndf(&self) -> usize {
self.n_data - self.n_par
}
pub(crate) fn new(
data: &'a Dataset<F>,
use_intercept: bool,
result: Array1<F>,
options: FitOptions<F>,
model_like: F,
n_iter: usize,
n_steps: usize,
) -> Self {
if !model_like.is_nan()
&& model_like != M::log_like_reg(data, &result, options.reg.as_ref())
{
eprintln!("Model likelihood does not match result! There is an error in the GLM fitting code.");
dbg!(&result);
dbg!(model_like);
dbg!(n_iter);
dbg!(n_steps);
}
// Cache some of these variables that will be used often.
let n_par = result.len();
let n_data = data.y.len();
Self {
model: PhantomData,
data,
use_intercept,
result,
options,
model_like,
n_iter,
n_steps,
n_data,
n_par,
cov: RefCell::new(None),
null_model: RefCell::new(None),
}
}
/// Returns the likelihood given the null model, which fixes all parameters
/// to zero except the intercept (if it is used). A total of `test_ndf()`
/// parameters are constrained.
fn null_like(&self) -> F {
let (null_like, _) = self.null_model_fit();
null_like
}
/// Return the likelihood and intercept for the null model. Since this can
/// require an additional regression, the values are cached.
fn null_model_fit(&self) -> (F, Array1<F>) {
// TODO: make a result instead of allowing a potential panic in the borrow.
if self.null_model.borrow().is_none() {
let (null_like, null_intercept): (F, Array1<F>) = match &self.data.linear_offset {
None => {
// If there is no linear offset, the natural parameter is
// identical for all observations so it is sufficient to
// calculate the null likelihood for a single point with y equal
// to the average.
// The average y
let y_bar: F = self
.data
.y
.mean()
.expect("Should be able to take average of y values");
// This approach assumes that the likelihood is in the natural
// exponential form as calculated by Glm::log_like_natural(). If that
// function is overridden and the values differ significantly, this
// approach will give incorrect results. If the likelihood has terms
// non-linear in y, then the likelihood must be calculated for every
// point rather than averaged.
// If the intercept is allowed to maximize the likelihood, the natural
// parameter is equal to the link of the expectation. Otherwise it is
// the transformation function of zero.
let intercept: F = if self.use_intercept {
M::Link::func(y_bar)
} else {
F::zero()
};
// this is a length-one array. This works because the
// likelihood contribution is the same for all observations.
let nat_par = M::Link::nat_param(array![intercept]);
// The null likelihood per observation
let null_like_one: F = M::log_like_natural(y_bar, nat_par[0]);
// just multiply the average likelihood by the number of data points, since every term is the same.
let null_like_total = F::from(self.n_data).unwrap() * null_like_one;
let null_params: Array1<F> = {
let mut par = Array1::<F>::zeros(self.n_par);
par[0] = intercept;
par
};
(null_like_total, null_params)
}
Some(off) => {
if self.use_intercept {
// If there are linear offsets and the intercept is allowed
// to be free, there is not a major simplification and the
// model needs to be re-fit.
// the X data is a single column of ones. Since this model
// isn't being created by the ModelBuilder, the X data
// has to be automatically padded with ones.
let data_x_null = Array2::<F>::ones((self.n_data, 1));
let null_model = Model {
model: std::marker::PhantomData::<M>,
data: Dataset::<F> {
y: self.data.y.clone(),
x: data_x_null,
linear_offset: Some(off.clone()),
weights: self.data.weights.clone(),
// If we are here it is because an intercept is needed.
hat: RefCell::new(None),
},
use_intercept: true,
};
// TODO: Make this function return an error, although it's
// difficult to imagine this case happening.
// TODO: Should the tolerance of this fit be stricter?
// The intercept should not be regularized
let null_fit = null_model
.fit_options()
// There shouldn't be too much trouble fitting this
// single-parameter fit, but there shouldn't be harm in
// using the same maximum as in the original model.
.max_iter(self.options.max_iter)
.fit()
.expect("Could not fit null model!");
let null_params: Array1<F> = {
let mut par = Array1::<F>::zeros(self.n_par);
// there is only one parameter in this fit.
par[0] = null_fit.result[0];
par
};
(null_fit.model_like, null_params)
} else {
// If the intercept is fixed to zero, then no minimization is
// required. The natural parameters are directly known in terms
// of the linear offset. The likelihood must still be summed
// over all observations, since they have different offsets.
let nat_par = M::Link::nat_param(off.clone());
let null_like = ndarray::Zip::from(&self.data.y)
.and(&nat_par)
.map_collect(|&y, &eta| M::log_like_natural(y, eta))
.sum();
let null_params = Array1::<F>::zeros(self.n_par);
(null_like, null_params)
}
}
};
*self.null_model.borrow_mut() = Some((null_like, null_intercept));
}
self.null_model
.borrow()
.as_ref()
.expect("the null model should be cached now")
.clone()
}
/// Returns the expected value of Y given the input data X. This data need
/// not be the training data, so an option for linear offsets is provided.
/// Panics if the number of covariates in the data matrix is not consistent
/// with the training set. The data matrix may need to be padded by ones if
/// it is not part of a Model. The `utility::one_pad()` function facilitates
/// this.
pub fn predict<S>(&self, data_x: &ArrayBase<S, Ix2>, lin_off: Option<&Array1<F>>) -> Array1<F>
where
S: Data<Elem = F>,
{
let lin_pred: Array1<F> = data_x.dot(&self.result);
let lin_pred: Array1<F> = if let Some(off) = &lin_off {
lin_pred + *off
} else {
lin_pred
};
lin_pred.mapv_into(M::Link::func_inv)
}
/// Return the deviance residuals for each point in the training data.
/// Equal to `sign(y-E[y|x])*sqrt(-2*(L[y|x] - L_sat[y]))`.
/// This is usually a better choice for non-linear models.
/// NaNs might be possible if L[y|x] > L_sat[y] due to floating-point operations. These are
/// not checked or clipped right now.
pub fn resid_dev(&self) -> Array1<F> {
let signs = self.resid_resp().mapv_into(F::signum);
let ll_terms: Array1<F> = M::log_like_terms(self.data, &self.result);
let ll_sat: Array1<F> = self.data.y.mapv(M::log_like_sat);
let neg_two = F::from(-2.).unwrap();
let ll_diff = (ll_terms - ll_sat) * neg_two;
let dev: Array1<F> = ll_diff.mapv_into(num_traits::Float::sqrt);
signs * dev
}
/// Return the standardized deviance residuals, also known as the "internally studentized
/// deviance residuals". This is generally applicable for outlier detection, although the
/// influence of each point on the fit is only approximately accounted for.
/// `d / sqrt(phi * (1 - h))` where `d` is the deviance residual, phi is the dispersion (e.g.
/// sigma^2 for linear regression, 1 for logistic regression), and h is the leverage.
pub fn resid_dev_std(&self) -> RegressionResult<Array1<F>> {
let dev = self.resid_dev();
let phi = self.dispersion();
let hat: Array1<F> = self.data.leverage()?;
let omh: Array1<F> = -hat + F::one();
let denom: Array1<F> = (omh * phi).mapv_into(num_traits::Float::sqrt);
Ok(dev / denom)
}
/// Return the partial residuals.
pub fn resid_part(&self) -> Array1<F> {
let x_mean = self.data.x.mean_axis(Axis(0)).expect("empty dataset");
let x_centered = &self.data.x - x_mean.insert_axis(Axis(0));
self.resid_work() + x_centered.dot(&self.result)
}
/// Return the Pearson residuals for each point in the training data.
/// This is equal to `(y - E[y])/sqrt(V(E[y]))`, where V is the variance function.
/// These are not scaled by the sample standard deviation for families with a free dispersion
/// parameter like linear regression.
pub fn resid_pear(&self) -> Array1<F> {
let mu: Array1<F> = self.predict(&self.data.x, self.data.linear_offset.as_ref());
let residuals = &self.data.y - μ
let var_diag: Array1<F> = mu.mapv_into(M::variance);
let std: Array1<F> = var_diag.mapv_into(num_traits::Float::sqrt);
residuals / std
}
/// Return the standardized Pearson residuals for every observation.
/// Also known as the "internally studentized Pearson residuals".
/// (y - E[y]) / (sqrt(Var[y] * (1 - h))) where h is a vector representing the leverage for
/// each observation.
pub fn resid_pear_std(&self) -> RegressionResult<Array1<F>> {
let pearson = self.resid_pear();
let phi = self.dispersion();
let hat = self.data.leverage()?;
let omh = -hat + F::one();
let denom: Array1<F> = (omh * phi).mapv_into(num_traits::Float::sqrt);
Ok(pearson / denom)
}
/// Return the response residuals, or fitting deviation, for each data point in the fit; that
/// is, the difference y - E[y|x] where the expectation value is the y value predicted by the
/// model given x.
pub fn resid_resp(&self) -> Array1<F> {
self.errors(self.data)
}
/// Return the studentized residuals, which are the changes in the fit likelihood resulting
/// from leaving each observation out. This is a robust and general method for outlier
/// detection, although a one-step approximation is used to avoid re-fitting the model
/// completely for each observation.
/// If the linear errors are standard normally distributed then this statistic should follow a
/// t-distribution with `self.ndf() - 1` degrees of freedom.
pub fn resid_student(&self) -> RegressionResult<Array1<F>> {
let r_dev = self.resid_dev();
let r_pear = self.resid_pear();
let signs = r_pear.mapv(F::signum);
let r_dev_sq = r_dev.mapv_into(|x| x * x);
let r_pear_sq = r_pear.mapv_into(|x| x * x);
let hat = self.data.leverage()?;
let omh = -hat.clone() + F::one();
let sum_quad = &r_dev_sq + hat * r_pear_sq / &omh;
let sum_quad_scaled = match M::DISPERSED {
// The dispersion is corrected for the contribution from each current point.
// This is an approximation; the exact solution would perform a fit at each point.
DispersionType::FreeDispersion => {
let dev = self.deviance();
let dof = F::from(self.ndf() - 1).unwrap();
let phi_i: Array1<F> = (-r_dev_sq / &omh + dev) / dof;
sum_quad / phi_i
}
DispersionType::NoDispersion => sum_quad,
};
Ok(signs * sum_quad_scaled.mapv_into(num_traits::Float::sqrt))
}
/// Returns the working residuals `d\eta/d\mu * (y - E{y|x})`.
/// This should be equal to the response residuals divided by the variance function (as
/// opposed to the square root of the variance as in the Pearson residuals).
pub fn resid_work(&self) -> Array1<F> {
let lin_pred: Array1<F> = self.data.linear_predictor(&self.result);
let mu: Array1<F> = lin_pred.mapv(M::Link::func_inv);
let resid_response: Array1<F> = &self.data.y - μ
let d_eta: Array1<F> = M::Link::d_nat_param(&lin_pred);
d_eta * resid_response
}
/// Returns the score function (the gradient of the likelihood) at the
/// parameter values given. It should be zero within FPE at the minimized
/// result.
pub fn score(&self, params: &Array1<F>) -> Array1<F> {
// This represents the predictions given the input parameters, not the
// fit parameters.
let lin_pred: Array1<F> = self.data.linear_predictor(params);
let mu: Array1<F> = M::mean(&lin_pred);
let resid_response = &self.data.y - mu;
// adjust for non-canonical link functions.
let eta_d = M::Link::d_nat_param(&lin_pred);
let resid_working = eta_d * resid_response;
let score_unreg = self.data.x.t().dot(&resid_working);
self.options.reg.as_ref().gradient(score_unreg, params)
}
/// Returns the score test statistic. This statistic is asymptotically
/// chi-squared distributed with `test_ndf()` degrees of freedom.
pub fn score_test(&self) -> RegressionResult<F> {
let (_, null_params) = self.null_model_fit();
self.score_test_against(null_params)
}
/// Returns the score test statistic compared to another set of model
/// parameters, not necessarily a null model. The degrees of freedom cannot
/// be generally inferred.
pub fn score_test_against(&self, alternative: Array1<F>) -> RegressionResult<F> {
let score_alt = self.score(&alternative);
let fisher_alt = self.fisher(&alternative);
// The is not the same as the cached covariance matrix since it is
// evaluated at the null parameters.
// NOTE: invh/invh_into() are bugged and incorrect!
let inv_fisher_alt = fisher_alt.inv_into()?;
Ok(score_alt.t().dot(&inv_fisher_alt.dot(&score_alt)))
}
/// The degrees of freedom for the likelihood ratio test, the score test,
/// and the Wald test. Not to be confused with `ndf()`, the degrees of
/// freedom in the model fit.
pub fn test_ndf(&self) -> usize {
if self.use_intercept {
self.n_par - 1
} else {
self.n_par
}
}
/// Returns the Wald test statistic compared to a null model with only an
/// intercept (if one is used). This statistic is asymptotically chi-squared
/// distributed with `test_ndf()` degrees of freedom.
pub fn wald_test(&self) -> F {
// The null parameters are all zero except for a possible intercept term
// which optimizes the null model.
let (_, null_params) = self.null_model_fit();
self.wald_test_against(&null_params)
}
/// Returns the Wald test statistic compared to another specified model fit
/// instead of the null model. The degrees of freedom cannot be generally
/// inferred.
pub fn wald_test_against(&self, alternative: &Array1<F>) -> F {
let d_params: Array1<F> = &self.result - alternative;
let fisher_alt: Array2<F> = self.fisher(alternative);
d_params.t().dot(&fisher_alt.dot(&d_params))
}
/// Returns the signed square root of the Wald test statistic for each
/// parameter. Since it does not account for covariance between the
/// parameters it may not be accurate.
pub fn wald_z(&self) -> RegressionResult<Array1<F>> {
let par_cov = self.covariance()?;
let par_variances: ArrayView1<F> = par_cov.diag();
Ok(&self.result / &par_variances.mapv(num_traits::Float::sqrt))
}
}
/// Specialized functions for OLS.
impl<'a, F> Fit<'a, Linear, F>
where
F: 'static + Float,
{
/// Returns the coefficient of multiple correlation, R^2.
pub fn r_sq(&self) -> F {
let y_avg: F = self.data.y.mean().expect("Data should be non-empty");
let total_sum_sq: F = self.data.y.mapv(|y| y - y_avg).mapv(|dy| dy * dy).sum();
(total_sum_sq - self.resid_sum_sq()) / total_sum_sq
}
/// Returns the residual sum of squares, i.e. the sum of the squared residuals.
pub fn resid_sum_sq(&self) -> F {
self.resid_resp().mapv_into(|r| r * r).sum()
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{
model::ModelBuilder,
utility::{one_pad, standardize},
Linear, Logistic,
};
use anyhow::Result;
use approx::assert_abs_diff_eq;
use ndarray::Axis;
/// Checks if the test statistics are invariant based upon whether the data is standardized.
#[test]
fn standardization_invariance() -> Result<()> {
let data_y = array![true, false, false, true, true, true, true, false, true];
let data_x = array![-0.5, 0.3, -0.6, 0.2, 0.3, 1.2, 0.8, 0.6, -0.2].insert_axis(Axis(1));
let lin_off = array![0.1, 0.0, -0.1, 0.2, 0.1, 0.3, 0.4, -0.1, 0.1];
let data_x_std = standardize(data_x.clone());
let model = ModelBuilder::<Logistic>::data(&data_y, &data_x)
.linear_offset(lin_off.clone())
.build()?;
let fit = model.fit()?;
let model_std = ModelBuilder::<Logistic>::data(&data_y, &data_x_std)
.linear_offset(lin_off)
.build()?;
let fit_std = model_std.fit()?;
let lr = fit.lr_test();
let lr_std = fit_std.lr_test();
assert_abs_diff_eq!(lr, lr_std);
eprintln!("about to try score test");
assert_abs_diff_eq!(
fit.score_test()?,
fit_std.score_test()?,
epsilon = f32::EPSILON as f64
);
eprintln!("about to try wald test");
assert_abs_diff_eq!(
fit.wald_test(),
fit_std.wald_test(),
epsilon = 4.0 * f64::EPSILON
);
assert_abs_diff_eq!(fit.aic(), fit_std.aic());
assert_abs_diff_eq!(fit.bic(), fit_std.bic());
eprintln!("about to try deviance");
assert_abs_diff_eq!(fit.deviance(), fit_std.deviance());
// The Wald Z-score of the intercept term is not invariant under a
// linear transformation of the data, but the parameter part seems to
// be, at least for single-component data.
assert_abs_diff_eq!(
fit.wald_z()?[1],
fit_std.wald_z()?[1],
epsilon = 0.01 * f32::EPSILON as f64
);
Ok(())
}
#[test]
fn null_model() -> Result<()> {
let data_y = array![true, false, false, true, true];
let data_x: Array2<f64> = array![[], [], [], [], []];
let model = ModelBuilder::<Logistic>::data(&data_y, &data_x).build()?;
let fit = model.fit()?;
dbg!(fit.n_iter);
dbg!(&fit.result);
// with no offsets, the result should be the link function of the mean.
assert_abs_diff_eq!(
fit.result[0],
<Logistic as Glm>::Link::func(0.6),
epsilon = 4.0 * f64::EPSILON
);
let empty_null_like = fit.null_like();
assert_eq!(fit.test_ndf(), 0);
dbg!(&fit.model_like);
let lr = fit.lr_test();
// Since there is no data, the null likelihood should be identical to
// the fit likelihood, so the likelihood ratio test should yield zero.
assert_abs_diff_eq!(lr, 0.);
// Check that the assertions still hold if linear offsets are included.
let lin_off: Array1<f64> = array![0.2, -0.1, 0.1, 0.0, 0.1];
let model = ModelBuilder::<Logistic>::data(&data_y, &data_x)
.linear_offset(lin_off)
.build()?;
let fit_off = model.fit()?;
let empty_model_like_off = fit_off.model_like;
let empty_null_like_off = fit_off.null_like();
// these two assertions should be equivalent
assert_abs_diff_eq!(fit_off.lr_test(), 0.);
assert_abs_diff_eq!(empty_model_like_off, empty_null_like_off);
// check consistency with data provided
let data_x_with = array![[0.5], [-0.2], [0.3], [0.4], [-0.1]];
let model = ModelBuilder::<Logistic>::data(&data_y, &data_x_with).build()?;
let fit_with = model.fit()?;
dbg!(&fit_with.result);
// The null likelihood of the model with parameters should be the same
// as the likelihood of the model with only the intercept.
assert_abs_diff_eq!(empty_null_like, fit_with.null_like());
Ok(())
}
#[test]
fn null_like_logistic() -> Result<()> {
// 6 true and 4 false for y_bar = 0.6.
let data_y = array![true, true, true, true, true, true, false, false, false, false];
let ybar: f64 = 0.6;
let data_x = array![0.4, 0.2, 0.5, 0.1, 0.6, 0.7, 0.3, 0.8, -0.1, 0.1].insert_axis(Axis(1));
let model = ModelBuilder::<Logistic>::data(&data_y, &data_x).build()?;
let fit = model.fit()?;
let target_null_like = fit
.data
.y
.mapv(|y| {
let eta = (ybar / (1. - ybar)).ln();
y * eta - eta.exp().ln_1p()
})
.sum();
assert_abs_diff_eq!(fit.null_like(), target_null_like);
Ok(())
}
// Check that the deviance is equal to the sum of square deviations for a linear model
#[test]
fn deviance_linear() -> Result<()> {
let data_y = array![0.3, -0.2, 0.5, 0.7, 0.2, 1.4, 1.1, 0.2];
let data_x = array![0.6, 2.1, 0.4, -3.2, 0.7, 0.1, -0.3, 0.5].insert_axis(Axis(1));
let model = ModelBuilder::<Linear>::data(&data_y, &data_x).build()?;
let fit = model.fit()?;
// The predicted values of Y given the model.
let pred_y = fit.predict(&one_pad(data_x.view()), None);
let target_dev = (data_y - pred_y).mapv(|dy| dy * dy).sum();
assert_abs_diff_eq!(fit.deviance(), target_dev,);
Ok(())
}
// Check that the deviance and dispersion parameter are equal up to the number of degrees of
// freedom for a linea model.
#[test]
fn deviance_dispersion_eq_linear() -> Result<()> {
let data_y = array![0.2, -0.1, 0.4, 1.3, 0.2, -0.6, 0.9];
let data_x = array![
[0.4, 0.2],
[0.1, 0.4],
[-0.1, 0.3],
[0.5, 0.7],
[0.4, 0.1],
[-0.2, -0.3],
[0.4, -0.1]
];
let model = ModelBuilder::<Linear>::data(&data_y, &data_x).build()?;
let fit = model.fit()?;
let dev = fit.deviance();
let disp = fit.dispersion();
let ndf = fit.ndf() as f64;
assert_abs_diff_eq!(dev, disp * ndf, epsilon = 4. * f64::EPSILON);
Ok(())
}
// Check that the residuals for a linear model are all consistent.
#[test]
fn residuals_linear() -> Result<()> {
let data_y = array![0.1, -0.3, 0.7, 0.2, 1.2, -0.4];
let data_x = array![0.4, 0.1, 0.3, -0.1, 0.5, 0.6].insert_axis(Axis(1));
let model = ModelBuilder::<Linear>::data(&data_y, &data_x).build()?;
let fit = model.fit()?;
let response = fit.resid_resp();
let pearson = fit.resid_pear();
let deviance = fit.resid_dev();
assert_abs_diff_eq!(response, pearson);
assert_abs_diff_eq!(response, deviance);
let pearson_std = fit.resid_pear_std()?;
let deviance_std = fit.resid_dev_std()?;
let _student = fit.resid_student()?;
assert_abs_diff_eq!(pearson_std, deviance_std, epsilon = 8. * f64::EPSILON);
// // NOTE: Studentization can't be checked directly because the method used is an
// approximation. Another approach will be needed to give exact values.
// let orig_dev = fit.deviance();
// let n_data = data_y.len();
// // Check that the leave-one-out stats hold literally
// let mut loo_dev: Vec<f64> = Vec::new();
// for i in 0..n_data {
// let ya = data_y.slice(s![0..i]);
// let yb = data_y.slice(s![i + 1..]);
// let xa = data_x.slice(s![0..i, ..]);
// let xb = data_x.slice(s![i + 1.., ..]);
// let y_loo = concatenate![Axis(0), ya, yb];
// let x_loo = concatenate![Axis(0), xa, xb];
// let model_i = ModelBuilder::<Linear>::data(&y_loo, &x_loo).build()?;
// let fit_i = model_i.fit()?;
// let yi = data_y[i];
// let xi = data_x.slice(s![i..i + 1, ..]);
// let xi = crate::utility::one_pad(xi);
// let yi_pred: f64 = fit_i.predict(&xi, None)[0];
// let disp_i = fit_i.dispersion();
// let pear_loo = (yi - yi_pred) / disp_i.sqrt();
// let dev_i = fit_i.deviance();
// let d_dev = 2. * (orig_dev - dev_i);
// loo_dev.push(d_dev.sqrt() * (yi - yi_pred).signum());
// }
// let loo_dev: Array1<f64> = loo_dev.into();
// This is off from 1 by a constant factor that depends on the data
// This is only approximately true
// assert_abs_diff_eq!(student, loo_dev);
Ok(())
}
// check the null likelihood for the case where it can be counted exactly.
#[test]
fn null_like_linear() -> Result<()> {
let data_y = array![0.3, -0.2, 0.5, 0.7, 0.2, 1.4, 1.1, 0.2];
let data_x = array![0.6, 2.1, 0.4, -3.2, 0.7, 0.1, -0.3, 0.5].insert_axis(Axis(1));
let ybar: f64 = data_y.mean().unwrap();
let model = ModelBuilder::<Linear>::data(&data_y, &data_x).build()?;
let fit = model.fit()?;
// let target_null_like = data_y.mapv(|y| -0.5 * (y - ybar) * (y - ybar)).sum();
let target_null_like = data_y.mapv(|y| y * ybar - 0.5 * ybar * ybar).sum();
// With the saturated likelihood subtracted the null likelihood should
// just be the sum of squared differences from the mean.
// let target_null_like = 0.;
// dbg!(target_null_like);
let fit_null_like = fit.null_like();
assert_abs_diff_eq!(2. * (fit.model_like - fit_null_like), fit.lr_test());
assert_eq!(fit.test_ndf(), 1);
assert_abs_diff_eq!(
fit_null_like,
target_null_like,
epsilon = 4.0 * f64::EPSILON
);
Ok(())
}
// check the null likelihood where there is no dependence on the X data.
#[test]
fn null_like_logistic_nodep() -> Result<()> {
let data_y = array![true, true, false, false, true, false, false, true];
let data_x = array![0.4, 0.2, 0.4, 0.2, 0.7, 0.7, -0.1, -0.1].insert_axis(Axis(1));
let model = ModelBuilder::<Logistic>::data(&data_y, &data_x).build()?;
let fit = model.fit()?;
let lr = fit.lr_test();
assert_abs_diff_eq!(lr, 0.);
Ok(())
}
// TODO: Test that the statistics behave sensibly under regularization. The
// likelihood ratio test should yield a smaller value.
// Test the basic caching funcions.
#[test]
fn cached_computations() -> Result<()> {
let data_y = array![true, true, false, true, true, false, false, false, true];
let data_x = array![0.4, 0.1, -0.3, 0.7, -0.5, -0.1, 0.8, 1.0, 0.4].insert_axis(Axis(1));
let model = ModelBuilder::<Logistic>::data(&data_y, &data_x).build()?;
let fit = model.fit()?;
let _null_like = fit.null_like();
let _null_like = fit.null_like();
let _cov = fit.covariance()?;
let _wald = fit.wald_z();
Ok(())
}
// Check the consistency of the various statistical tests for linear
// regression, where they should be the most comparable.
#[test]
fn linear_stat_tests() -> Result<()> {
let data_y = array![-0.3, -0.1, 0.0, 0.2, 0.4, 0.5, 0.8, 0.8, 1.1];
let data_x = array![-0.5, -0.2, 0.1, 0.2, 0.5, 0.6, 0.7, 0.9, 1.3].insert_axis(Axis(1));
let model = ModelBuilder::<Linear>::data(&data_y, &data_x).build()?;
let fit = model.fit()?;
let lr = fit.lr_test();
let wald = fit.wald_test();
let score = fit.score_test()?;
assert_abs_diff_eq!(lr, wald, epsilon = 32.0 * f64::EPSILON);
assert_abs_diff_eq!(lr, score, epsilon = 32.0 * f64::EPSILON);
Ok(())
}
}