use anyhow::Result;
use approx::assert_abs_diff_eq;
use ndarray::{Array, Array1, Array2, array, s};
use ndarray_glm::{Logistic, ModelBuilder, error::RegressionError};
mod common;
use common::{array_from_csv, load_logistic_data, y_x_off_from_csv};
#[test]
fn log_termination_0() -> Result<()> {
let (y, x, off) = y_x_off_from_csv::<bool, f32, 1>("tests/data/log_termination_0.csv")?;
let model = ModelBuilder::<Logistic>::data(&y, &x)
.linear_offset(off)
.build()?;
let fit = model.fit()?;
dbg!(&fit.result);
dbg!(&fit.n_iter);
let n_par = fit.result.len();
let r_result = array_from_csv::<f32>("tests/R/log_termination_0/coefficients.csv")?;
assert_abs_diff_eq!(&fit.result, &r_result, epsilon = 1e-5);
assert!(
fit.lr_test_against(&r_result) >= 0.,
"make sure this is better than R's"
);
let r_dev_resid = array_from_csv::<f32>("tests/R/log_termination_0/dev_resid.csv")?;
assert_abs_diff_eq!(fit.resid_dev(), r_dev_resid, epsilon = 1e-5);
let r_flat_cov = array_from_csv::<f32>("tests/R/log_termination_0/covariance.csv")?;
let r_cov = Array::from_shape_vec((n_par, n_par), r_flat_cov.into_raw_vec_and_offset().0)?;
assert_abs_diff_eq!(fit.covariance()?, &r_cov, epsilon = 2e-5);
let eps = 5e-5;
let r_dev = array_from_csv::<f32>("tests/R/log_termination_0/deviance.csv")?[0];
assert_abs_diff_eq!(fit.deviance(), r_dev, epsilon = eps);
let r_aic = array_from_csv::<f32>("tests/R/log_termination_0/aic.csv")?[0];
assert_abs_diff_eq!(fit.aic(), r_aic, epsilon = eps);
let r_bic = array_from_csv::<f32>("tests/R/log_termination_0/bic.csv")?[0];
assert_abs_diff_eq!(fit.bic(), r_bic, epsilon = eps);
let r_stand_resid_pear =
array_from_csv::<f32>("tests/R/log_termination_0/standard_pearson_resid.csv")?;
let r_stand_resid_dev =
array_from_csv::<f32>("tests/R/log_termination_0/standard_deviance_resid.csv")?;
assert_abs_diff_eq!(fit.resid_pear_std()?, r_stand_resid_pear, epsilon = eps);
assert_abs_diff_eq!(fit.resid_dev_std()?, r_stand_resid_dev, epsilon = eps);
let r_stud_resid = array_from_csv::<f32>("tests/R/log_termination_0/student_resid.csv")?;
assert_abs_diff_eq!(fit.resid_student()?, r_stud_resid, epsilon = 0.05);
let r_null_dev = array_from_csv::<f32>("tests/R/log_termination_0/null_dev.csv")?[0];
assert_abs_diff_eq!(fit.lr_test(), r_null_dev - r_dev, epsilon = eps);
Ok(())
}
#[test]
fn log_termination_1() -> Result<()> {
let (y, x, off) = y_x_off_from_csv::<bool, f32, 1>("tests/data/log_termination_1.csv")?;
let model = ModelBuilder::<Logistic>::data(&y, &x)
.linear_offset(off)
.build()?;
let _fit = model.fit()?;
Ok(())
}
#[test]
fn log_regularization() -> Result<()> {
let (y, x, off) = y_x_off_from_csv::<bool, f32, 1>("tests/data/log_regularization.csv")?;
let model = ModelBuilder::<Logistic>::data(&y, &x)
.linear_offset(off)
.build()?;
let _ = match model.fit_options().l2_reg(1e-5).fit() {
Ok(fit) => fit,
Err(err) => {
if let RegressionError::MaxIter { n_iter: _, history } = &err {
dbg!(&history);
}
return Err(err.into());
}
};
Ok(())
}
#[test]
fn logistic_weights() -> Result<()> {
let (y, x, wts) = y_x_off_from_csv::<bool, f32, 2>("tests/data/log_weights.csv")?;
let model = ModelBuilder::<Logistic>::data(&y, &x)
.var_weights(wts.clone())
.build()?;
let fit = model.fit()?;
let eps: f32 = 1e-4;
let n_par = fit.result.len();
let n_obs = y.len();
let r_result = array_from_csv::<f32>("tests/R/log_weights/coefficients.csv")?;
assert_abs_diff_eq!(&fit.result, &r_result, epsilon = eps);
assert!(
fit.lr_test_against(&r_result) >= 0.,
"make sure our fit is at least as good as R's"
);
let r_flat_cov = array_from_csv::<f32>("tests/R/log_weights/covariance.csv")?;
let r_cov = Array::from_shape_vec((n_par, n_par), r_flat_cov.into_raw_vec_and_offset().0)?;
assert_abs_diff_eq!(fit.covariance()?, &r_cov, epsilon = eps);
let r_dev = array_from_csv::<f32>("tests/R/log_weights/deviance.csv")?[0];
assert_abs_diff_eq!(fit.deviance(), r_dev, epsilon = 0.1 * eps);
let r_null_dev = array_from_csv::<f32>("tests/R/log_weights/null_dev.csv")?[0];
assert_abs_diff_eq!(fit.lr_test(), r_null_dev - r_dev, epsilon = 0.1 * eps);
let hat_mat = fit.hat()?;
assert_abs_diff_eq!(
hat_mat.dot(&fit.resid_resp()),
Array::zeros(y.len()),
epsilon = 0.1 * eps,
);
let hat = fit.leverage()?;
let r_hat = array_from_csv::<f32>("tests/R/log_weights/hat.csv")?;
assert_abs_diff_eq!(hat, r_hat, epsilon = eps);
let r_resid_pear = array_from_csv::<f32>("tests/R/log_weights/pearson_resid.csv")?;
assert_abs_diff_eq!(*fit.resid_pear(), r_resid_pear, epsilon = eps);
let r_resid_dev = array_from_csv::<f32>("tests/R/log_weights/dev_resid.csv")?;
assert_abs_diff_eq!(fit.resid_dev(), r_resid_dev, epsilon = eps);
let r_stand_resid_pear =
array_from_csv::<f32>("tests/R/log_weights/standard_pearson_resid.csv")?;
let r_stand_resid_dev =
array_from_csv::<f32>("tests/R/log_weights/standard_deviance_resid.csv")?;
assert_abs_diff_eq!(fit.resid_pear_std()?, r_stand_resid_pear, epsilon = eps);
assert_abs_diff_eq!(fit.resid_dev_std()?, r_stand_resid_dev, epsilon = eps);
let r_aic = array_from_csv::<f32>("tests/R/log_weights/aic.csv")?[0];
assert_abs_diff_eq!(fit.aic(), r_aic, epsilon = 0.2);
let r_loo = array_from_csv::<f32>("tests/R/log_weights/loo_coef.csv")?;
let r_loo = Array::from_shape_vec((n_obs, n_par), r_loo.into_raw_vec_and_offset().0)?;
let loo_exact = fit.loo_exact()?;
let infl_exact = &fit.result - &loo_exact;
let infl_loo = fit.infl_coef()?;
let r_diff = infl_exact.clone() - r_loo.clone();
let loo_diff = infl_exact.clone() - infl_loo.clone();
let all_better = loo_diff.mapv(|x| x.abs()) - r_diff.mapv(|x| x.abs());
let num_positive = all_better.into_iter().filter(|&x| x > 0.).count();
dbg!(num_positive);
assert!(num_positive < 5, "Allowing only four worse elements");
Ok(())
}
fn pred_test_full_x() -> Array2<f64> {
array![[0.5, 0.3, 0.1], [-1.0, -0.5, 0.8], [2.0, 1.2, -0.3]]
}
fn pred_test_sub_x() -> Array2<f64> {
array![[0.3, 0.1], [-0.5, 0.8], [1.2, -0.3]]
}
fn pred_test_off() -> Array1<f64> {
array![0.25, -0.50, 1.00]
}
fn offset_from_x(x: &Array2<f64>) -> Array1<f64> {
x.column(0).mapv(|v| 0.5 * v)
}
#[allow(clippy::too_many_arguments)]
fn check_logistic_scenario(
fit: &ndarray_glm::Fit<Logistic, f64>,
dir: &str,
eps: f64,
has_var_weights: bool,
has_freq_weights: bool,
is_bool: bool,
pred_x: &Array2<f64>,
pred_off: Option<&Array1<f64>>,
) -> Result<()> {
let r =
|name: &str| array_from_csv::<f64>(&format!("tests/R/logistic_results/{dir}/{name}.csv"));
let cov_eps = 100. * eps;
let n_par = fit.result.len();
let n_obs = 30usize;
let r_coef = r("coefficients")?;
assert_abs_diff_eq!(&fit.result, &r_coef, epsilon = eps);
let r_dev = r("deviance")?[0];
assert_abs_diff_eq!(fit.deviance(), r_dev, epsilon = eps);
let r_null_dev = r("null_deviance")?[0];
let our_null_dev = fit.lr_test() + r_dev;
assert_abs_diff_eq!(our_null_dev, r_null_dev, epsilon = cov_eps);
let r_pred = r("predict_resp")?;
assert_abs_diff_eq!(fit.predict(pred_x, pred_off), r_pred, epsilon = eps);
let r_cov_flat = r("covariance")?;
let r_cov = Array::from_shape_vec((n_par, n_par), r_cov_flat.into_raw_vec_and_offset().0)?;
assert_abs_diff_eq!(fit.covariance()?, &r_cov, epsilon = cov_eps);
let r_wald_z = r("wald_z")?;
assert_abs_diff_eq!(fit.wald_z()?, r_wald_z, epsilon = cov_eps);
if is_bool && !has_var_weights {
let r_aic = r("aic")?[0];
assert_abs_diff_eq!(fit.aic(), r_aic, epsilon = eps);
let r_bic = r("bic")?[0];
assert_abs_diff_eq!(fit.bic(), r_bic, epsilon = cov_eps);
}
let r_resid_resp = r("resid_resp")?;
assert_abs_diff_eq!(fit.resid_resp(), r_resid_resp, epsilon = eps);
let r_resid_work = r("resid_work")?;
assert_abs_diff_eq!(fit.resid_work(), r_resid_work, epsilon = cov_eps);
let partial = fit.resid_part();
let n_feat = partial.ncols();
let has_intercept = n_feat + 1 == n_par;
if !has_var_weights || !has_intercept {
let r_partial_flat = r("resid_partial")?;
let r_partial =
Array::from_shape_vec((n_obs, n_feat), r_partial_flat.into_raw_vec_and_offset().0)?;
assert_abs_diff_eq!(partial, r_partial, epsilon = cov_eps);
}
if !has_freq_weights {
let r_resid_pear = r("resid_pear")?;
assert_abs_diff_eq!(*fit.resid_pear(), r_resid_pear, epsilon = eps);
if is_bool {
let r_resid_dev = r("resid_dev")?;
assert_abs_diff_eq!(fit.resid_dev(), r_resid_dev, epsilon = eps);
}
let r_leverage = r("leverage")?;
assert_abs_diff_eq!(fit.leverage()?, r_leverage, epsilon = cov_eps);
let r_resid_pear_std = r("resid_pear_std")?;
assert_abs_diff_eq!(fit.resid_pear_std()?, r_resid_pear_std, epsilon = cov_eps);
if is_bool {
let r_resid_dev_std = r("resid_dev_std")?;
assert_abs_diff_eq!(fit.resid_dev_std()?, r_resid_dev_std, epsilon = cov_eps);
}
let r_cooks = r("cooks")?;
assert_abs_diff_eq!(fit.cooks()?, r_cooks, epsilon = cov_eps);
let _loo_exact = fit.loo_exact()?;
let _infl_approx = fit.infl_coef()?;
}
#[cfg(feature = "stats")]
{
let r_lr_p = r("pvalue_lr_test")?[0];
assert_abs_diff_eq!(fit.pvalue_lr_test(), r_lr_p, epsilon = eps);
let r_wald_p = r("pvalue_wald")?;
assert_abs_diff_eq!(fit.pvalue_wald()?, r_wald_p, epsilon = cov_eps);
let r_exact_p = r("pvalue_exact")?;
assert_abs_diff_eq!(fit.pvalue_exact()?, r_exact_p, epsilon = cov_eps);
}
Ok(())
}
#[test]
fn logistic_bool_none() -> Result<()> {
let (y, _, x, _var_wt, _freq_wt) = load_logistic_data()?;
let model = ModelBuilder::<Logistic>::data(&y, &x).build()?;
let fit = model.fit()?;
check_logistic_scenario(
&fit,
"bool_none",
2e-9,
false,
false,
true,
&pred_test_full_x(),
None,
)
}
#[test]
fn logistic_bool_var_weights() -> Result<()> {
let (y, _, x, var_wt, _freq_wt) = load_logistic_data()?;
let model = ModelBuilder::<Logistic>::data(&y, &x)
.var_weights(var_wt)
.build()?;
let fit = model.fit()?;
check_logistic_scenario(
&fit,
"bool_var",
1e-9,
true,
false,
true,
&pred_test_full_x(),
None,
)
}
#[test]
fn logistic_bool_freq_weights() -> Result<()> {
let (y, _, x, _var_wt, freq_wt) = load_logistic_data()?;
let model = ModelBuilder::<Logistic>::data(&y, &x)
.freq_weights(freq_wt)
.build()?;
let fit = model.fit()?;
check_logistic_scenario(
&fit,
"bool_freq",
1e-8,
false,
true,
true,
&pred_test_full_x(),
None,
)
}
#[test]
fn logistic_bool_both_weights() -> Result<()> {
let (y, _, x, var_wt, freq_wt) = load_logistic_data()?;
let model = ModelBuilder::<Logistic>::data(&y, &x)
.var_weights(var_wt)
.freq_weights(freq_wt)
.build()?;
let fit = model.fit()?;
check_logistic_scenario(
&fit,
"bool_both",
1e-9,
true,
true,
true,
&pred_test_full_x(),
None,
)
}
#[test]
fn logistic_bool_offset_noint() -> Result<()> {
let (y, _, x, _var_wt, _freq_wt) = load_logistic_data()?;
let off = offset_from_x(&x);
let x_sub = x.slice(s![.., 1..]).to_owned();
let model = ModelBuilder::<Logistic>::data(&y, &x_sub)
.linear_offset(off)
.no_constant()
.build()?;
let fit = model.fit()?;
let off_pred = pred_test_off();
check_logistic_scenario(
&fit,
"bool_off",
1e-9,
false,
false,
true,
&pred_test_sub_x(),
Some(&off_pred),
)
}
#[test]
fn logistic_float_none() -> Result<()> {
let (_, y_float, x, _var_wt, _freq_wt) = load_logistic_data()?;
let model = ModelBuilder::<Logistic>::data(&y_float, &x).build()?;
let fit = model.fit()?;
check_logistic_scenario(
&fit,
"float_none",
1e-7,
false,
false,
false,
&pred_test_full_x(),
None,
)
}
#[test]
fn logistic_float_var_weights() -> Result<()> {
let (_, y_float, x, var_wt, _freq_wt) = load_logistic_data()?;
let model = ModelBuilder::<Logistic>::data(&y_float, &x)
.var_weights(var_wt)
.build()?;
let fit = model.fit()?;
check_logistic_scenario(
&fit,
"float_var",
1e-10,
true,
false,
false,
&pred_test_full_x(),
None,
)
}
#[test]
fn logistic_test_against_self_is_zero() -> Result<()> {
let (y, _, x, _, _) = load_logistic_data()?;
let model = ModelBuilder::<Logistic>::data(&y, &x).build()?;
let fit = model.fit()?;
let eps = 64. * f64::EPSILON;
assert_abs_diff_eq!(fit.wald_test_against(&fit.result), 0., epsilon = eps);
assert_abs_diff_eq!(
fit.score_test_against(fit.result.clone())?,
0.,
epsilon = eps
);
Ok(())
}
#[test]
fn logistic_std_vs_nostd() -> Result<()> {
let (y, _, x, var_wt, _) = load_logistic_data()?;
let model_std = ModelBuilder::<Logistic>::data(&y, &x)
.var_weights(var_wt.clone())
.build()?;
let fit_std = model_std.fit()?;
let model_ns = ModelBuilder::<Logistic>::data(&y, &x)
.var_weights(var_wt)
.no_standardize()
.build()?;
let fit_ns = model_ns.fit()?;
let eps = 0.01 * f32::EPSILON as f64;
assert_abs_diff_eq!(fit_std.result, fit_ns.result, epsilon = eps);
assert_abs_diff_eq!(fit_std.deviance(), fit_ns.deviance(), epsilon = eps);
assert_abs_diff_eq!(fit_std.lr_test(), fit_ns.lr_test(), epsilon = eps);
Ok(())
}
#[test]
fn logistic_l2_regularization() -> Result<()> {
let (y, _, x, _var_wt, _freq_wt) = load_logistic_data()?;
let model = ModelBuilder::<Logistic>::data(&y, &x).build()?;
let fit_unreg = model.fit()?;
let fit_l2 = model.fit_options().l2_reg(0.5).fit()?;
let norm_unreg: f64 = fit_unreg.result.slice(s![1..]).mapv(|v| v * v).sum();
let norm_l2: f64 = fit_l2.result.slice(s![1..]).mapv(|v| v * v).sum();
assert!(
norm_l2 <= norm_unreg,
"L2 should shrink coefficients: unreg={norm_unreg}, l2={norm_l2}"
);
let _ = fit_l2.covariance()?;
let _ = fit_l2.wald_z()?;
let _ = fit_l2.lr_test();
Ok(())
}
#[test]
fn logistic_l1_regularization() -> Result<()> {
let (y, _, x, _var_wt, _freq_wt) = load_logistic_data()?;
let model = ModelBuilder::<Logistic>::data(&y, &x).build()?;
let fit_unreg = model.fit()?;
let fit_l1 = model.fit_options().l1_reg(0.1).max_iter(500).fit()?;
let norm_unreg: f64 = fit_unreg.result.slice(s![1..]).mapv(|v| v.abs()).sum();
let norm_l1: f64 = fit_l1.result.slice(s![1..]).mapv(|v| v.abs()).sum();
assert!(
norm_l1 <= norm_unreg,
"L1 should shrink L1-norm: unreg={norm_unreg}, l1={norm_l1}"
);
Ok(())
}