use anofox_regression::diagnostics::{compute_leverage, cooks_distance, variance_inflation_factor};
use anofox_regression::solvers::{
BlsRegressor, ElasticNetRegressor, FittedRegressor, NegativeBinomialRegressor, OlsRegressor,
PoissonRegressor, Regressor, RidgeRegressor, TweedieRegressor, WlsRegressor,
};
use anofox_regression::{NaAction, NaHandler};
use approx::assert_relative_eq;
use faer::{Col, Mat};
fn dataset1() -> (Mat<f64>, Col<f64>) {
let x_data: Vec<f64> = (1..=20).map(|i| i as f64).collect();
let y_data = vec![
6.1854792236,
8.2176509143,
11.6815642057,
14.8164313025,
17.7021341616,
20.4469377420,
24.2557609987,
26.4526704808,
30.5092118569,
32.4686429505,
36.1524348271,
39.6433226964,
40.8055696494,
44.3606056166,
47.4333393318,
50.8179751990,
53.3578735393,
55.1717722895,
58.2797665357,
63.1600566729,
];
let x = Mat::from_fn(20, 1, |i, _| x_data[i]);
let y = Col::from_fn(20, |i| y_data[i]);
(x, y)
}
#[test]
fn test_ols_simple_vs_r() {
let (x, y) = dataset1();
let model = OlsRegressor::builder()
.with_intercept(true)
.compute_inference(true)
.build();
let fitted = model.fit(&x, &y).expect("fit should succeed");
let result = fitted.result();
assert_relative_eq!(
fitted.intercept().unwrap(),
3.008845227701627,
epsilon = 1e-8
);
assert_relative_eq!(fitted.coefficients()[0], 2.960677598286641, epsilon = 1e-8);
assert_relative_eq!(fitted.r_squared(), 0.998773884764589, epsilon = 1e-8);
assert_relative_eq!(
result.intercept_std_error.unwrap(),
0.292895615787820,
epsilon = 1e-6
);
let se = result.std_errors.as_ref().expect("should have SE");
assert_relative_eq!(se[0], 0.024450453598427, epsilon = 1e-6);
assert_relative_eq!(
result.intercept_t_statistic.unwrap(),
10.272756113499822,
epsilon = 1e-4
);
let t_stats = result.t_statistics.as_ref().expect("should have t-stats");
assert_relative_eq!(t_stats[0], 121.088_861_863_778_6, epsilon = 1e-4);
let p_int = result.intercept_p_value.unwrap();
assert!(p_int < 1e-8, "p_intercept={} should be < 1e-8", p_int);
let p_vals = result.p_values.as_ref().expect("should have p-values");
assert!(
p_vals[0] < 1e-20,
"p_coefficient={} should be very small",
p_vals[0]
);
assert_relative_eq!(result.f_statistic, 14_662.512_467_465_243, epsilon = 1.0);
let expected_residuals = [
0.215956397585084,
-0.712549509972986,
-0.209313816892890,
-0.035124318367676,
-0.110099057564337,
];
for (i, &expected) in expected_residuals.iter().enumerate() {
assert_relative_eq!(result.residuals[i], expected, epsilon = 1e-8);
}
}
#[test]
fn test_ols_aic_bic_loglik_vs_r() {
let (x, y) = dataset1();
let model = OlsRegressor::builder().with_intercept(true).build();
let fitted = model.fit(&x, &y).expect("fit should succeed");
let result = fitted.result();
assert!(
result.log_likelihood.is_finite(),
"log_likelihood should be finite"
);
assert!(result.aic.is_finite(), "AIC should be finite");
assert!(result.bic.is_finite(), "BIC should be finite");
assert!(result.bic > result.aic, "BIC should be > AIC");
}
fn dataset2() -> (Mat<f64>, Col<f64>) {
let n = 50;
let x1: Vec<f64> = (0..n).map(|i| i as f64 * 10.0 / 49.0).collect();
let x2: Vec<f64> = x1.iter().map(|&xi| xi.sin() * 5.0).collect();
let y_data = vec![
0.6933614059,
2.6668738795,
7.5982714087,
12.0597551133,
15.4573682745,
15.3951707407,
17.3012019858,
16.9425259175,
19.6967157093,
18.5061691466,
18.9104712325,
17.9136536085,
16.5113175090,
12.7371402652,
11.4286818303,
6.6096654232,
4.8951811761,
2.2585231899,
-1.6745445167,
-1.2782422556,
-2.7444614582,
-4.4448328918,
-3.8921692186,
-5.3363797508,
-5.3148307360,
-2.2387209175,
-1.6320143238,
2.9904234915,
3.9165459002,
8.1406939638,
11.1661469337,
13.5191982841,
19.3106030748,
21.6571384579,
24.1117196144,
26.9266912639,
29.4859308494,
30.5087316990,
28.4438449201,
32.1203208094,
31.2475736825,
30.9863749947,
30.0269817818,
29.0198069830,
24.6912796697,
24.2515193055,
20.6665743461,
18.7279376604,
16.0723888706,
13.5605614995,
];
let mut x = Mat::zeros(n, 2);
for i in 0..n {
x[(i, 0)] = x1[i];
x[(i, 1)] = x2[i];
}
let y = Col::from_fn(n, |i| y_data[i]);
(x, y)
}
#[test]
fn test_ols_multiple_vs_r() {
let (x, y) = dataset2();
let model = OlsRegressor::builder()
.with_intercept(true)
.compute_inference(true)
.build();
let fitted = model.fit(&x, &y).expect("fit should succeed");
assert_relative_eq!(
fitted.intercept().unwrap(),
0.586633340017646,
epsilon = 1e-6
);
assert_relative_eq!(fitted.coefficients()[0], 2.078575906332045, epsilon = 1e-6);
assert_relative_eq!(fitted.coefficients()[1], 3.022440472606421, epsilon = 1e-6);
assert_relative_eq!(fitted.r_squared(), 0.992375515498879, epsilon = 1e-6);
}
#[test]
fn test_ridge_vs_r() {
let (x, y) = dataset2();
let ridge0 = RidgeRegressor::builder()
.with_intercept(true)
.lambda(0.0)
.build();
let fitted0 = ridge0.fit(&x, &y).expect("fit should succeed");
assert_relative_eq!(
fitted0.intercept().unwrap(),
0.586633340017646,
epsilon = 1e-6
);
assert_relative_eq!(fitted0.coefficients()[0], 2.078575906332045, epsilon = 1e-6);
assert_relative_eq!(fitted0.coefficients()[1], 3.022440472606421, epsilon = 1e-6);
let ridge10 = RidgeRegressor::builder()
.with_intercept(true)
.lambda(10.0)
.build();
let fitted10 = ridge10.fit(&x, &y).expect("fit should succeed");
assert!(
fitted10.coefficients()[0].abs() <= fitted0.coefficients()[0].abs() + 0.01,
"Ridge coef1 should shrink with lambda"
);
assert!(
fitted10.coefficients()[1].abs() <= fitted0.coefficients()[1].abs() + 0.01,
"Ridge coef2 should shrink with lambda"
);
}
fn dataset4() -> (Mat<f64>, Col<f64>) {
let x_flat = vec![
1.3709584471,
1.2009653756,
-2.0009292377,
-0.0046207678,
1.3349125854,
-0.5646981714,
1.0447510872,
0.3337771974,
0.7602421677,
-0.8692717639,
0.3631284113,
-1.0032086468,
1.1713251274,
0.0389909129,
0.0554869547,
0.6328626050,
1.8484819017,
2.0595392423,
0.7350721416,
0.0490669132,
0.4042683231,
-0.6667734088,
-1.3768615982,
-0.1464726270,
-0.5783557284,
-0.1061245161,
0.1055138125,
-1.1508555656,
-0.0578873354,
-0.9987386560,
1.5115219974,
-0.4222558819,
-0.7058213948,
0.4823694661,
-0.0024327800,
-0.0946590384,
-0.1223501720,
-1.0540557821,
0.9929436368,
0.6555118828,
2.0184237139,
0.1881930345,
-0.6457437231,
-1.2463954980,
1.4768422790,
-0.0627140991,
0.1191609580,
-0.1853779677,
-0.0334875248,
-1.9091527883,
];
let y_data = vec![
2.2255149499,
-3.3262092547,
3.0945743944,
-1.7303712123,
2.1862750145,
-0.6284633382,
4.8644733530,
-0.5227547193,
5.0684785282,
-0.0083603612,
];
let n = 10;
let p = 5;
let mut x = Mat::zeros(n, p);
for i in 0..n {
for j in 0..p {
x[(i, j)] = x_flat[i * p + j];
}
}
let y = Col::from_fn(n, |i| y_data[i]);
(x, y)
}
#[test]
fn test_elastic_net_basic() {
let (x, y) = dataset4();
let enet = ElasticNetRegressor::builder()
.with_intercept(true)
.lambda(0.1)
.alpha(0.5)
.max_iterations(10000)
.tolerance(1e-10)
.build();
let fitted = enet.fit(&x, &y).expect("fit should succeed");
assert!(
fitted.r_squared() > 0.8,
"R² = {} should be > 0.8",
fitted.r_squared()
);
assert!(
fitted.coefficients()[0].abs() > fitted.coefficients()[2].abs(),
"coef1 should be larger than coef3"
);
}
fn dataset5() -> (Mat<f64>, Col<f64>, Col<f64>) {
let x_data: Vec<f64> = (1..=20).map(|i| i as f64).collect();
let y_data: Vec<f64> = x_data.iter().map(|&xi| 2.0 + 1.5 * xi).collect();
let weights: Vec<f64> = (1..=20)
.map(|i| {
if i <= 10 {
1.0
} else {
0.5
} })
.collect();
let x = Mat::from_fn(20, 1, |i, _| x_data[i]);
let y = Col::from_fn(20, |i| y_data[i]);
let w = Col::from_fn(20, |i| weights[i]);
(x, y, w)
}
#[test]
fn test_wls_vs_r() {
let (x, y, w) = dataset5();
let model = WlsRegressor::builder()
.with_intercept(true)
.weights(w.clone())
.build();
let fitted = model.fit(&x, &y).expect("fit should succeed");
assert!(
fitted.intercept().unwrap().is_finite(),
"WLS intercept should be finite"
);
assert!(
fitted.coefficients()[0].is_finite(),
"WLS coefficient should be finite"
);
assert!(
fitted.r_squared() > 0.99,
"WLS R² = {} should be > 0.99",
fitted.r_squared()
);
assert!(
(fitted.coefficients()[0] - 1.5).abs() < 0.1,
"WLS slope {} should be close to 1.5",
fitted.coefficients()[0]
);
assert!(
(fitted.intercept().unwrap() - 2.0).abs() < 0.5,
"WLS intercept {} should be close to 2.0",
fitted.intercept().unwrap()
);
let ols = OlsRegressor::builder().with_intercept(true).build();
let ols_fitted = ols.fit(&x, &y).expect("fit should succeed");
assert!(ols_fitted.r_squared() > 0.99);
assert!(
(fitted.coefficients()[0] - ols_fitted.coefficients()[0]).abs() < 0.1,
"WLS and OLS coefficients should be similar for this data"
);
}
#[test]
fn test_wls_extreme_weights_vs_r() {
let x_data: Vec<f64> = (1..=30).map(|i| i as f64).collect();
let y_data = vec![
3.6370958447,
4.8870603657,
6.6089385234,
8.2531450420,
9.7021341616,
10.9363252903,
13.5580653982,
13.9242727693,
17.3165813425,
16.9372859009,
19.9353566196,
22.7439744712,
19.6944810886,
22.6096957265,
24.3000179954,
27.0175206369,
27.0167700336,
24.2183802424,
25.8631128357,
34.6402266915,
32.8560589524,
31.0811214452,
36.1045900818,
40.9152192780,
44.2379836532,
39.8807802578,
41.8053726665,
39.0631433615,
46.8342823290,
45.0800153721,
];
let weights: Vec<f64> = (1..=30).map(|i| 1.0 / (i * i) as f64).collect();
let x = Mat::from_fn(30, 1, |i, _| x_data[i]);
let y = Col::from_fn(30, |i| y_data[i]);
let w = Col::from_fn(30, |i| weights[i]);
let model = WlsRegressor::builder()
.with_intercept(true)
.weights(w)
.build();
let fitted = model
.fit(&x, &y)
.expect("fit should succeed with extreme weights");
assert_relative_eq!(fitted.coefficients()[0], 1.488433478007494, epsilon = 1e-4);
assert_relative_eq!(
fitted.intercept().unwrap(),
2.138362439251541,
epsilon = 1e-4
);
assert!(
fitted.r_squared() > 0.98,
"R² = {} should be > 0.98",
fitted.r_squared()
);
}
fn dataset6() -> (Mat<f64>, Col<f64>) {
let x1 = vec![
1.3709584471,
-0.5646981714,
0.3631284113,
0.6328626050,
0.4042683231,
-0.1061245161,
1.5115219974,
-0.0946590384,
2.0184237139,
-0.0627140991,
1.3048696542,
2.2866453927,
-1.3888607011,
-0.2787887668,
-0.1333213364,
0.6359503981,
-0.2842529214,
-2.6564554209,
-2.4404669286,
1.3201133457,
-0.3066385941,
-1.7813084340,
-0.1719173558,
1.2146746992,
1.8951934613,
-0.4304691316,
-0.2572693828,
-1.7631630852,
0.4600973548,
-0.6399948760,
0.4554501232,
0.7048373372,
1.0351035220,
-0.6089263754,
0.5049551233,
-1.7170086791,
-0.7844590084,
-0.8509075942,
-2.4142076499,
0.0361226069,
0.2059986002,
-0.3610572985,
0.7581632357,
-0.7267048271,
-1.3682810444,
0.4328180259,
-0.8113931762,
1.4441012617,
-0.4314462026,
0.6556478834,
];
let x2 = vec![
1.3741776998,
-0.5725365608,
0.3788856865,
0.6392915980,
0.4051659296,
-0.1033590086,
1.5183148856,
-0.0937607095,
1.9884928130,
-0.0598652695,
1.3011973078,
2.2884976983,
-1.3830424638,
-0.2647913985,
-0.1405942570,
0.6489758244,
-0.2808944402,
-2.6460703599,
-2.4312596429,
1.3273221274,
-0.3170697835,
-1.7822102978,
-0.1656821741,
1.2051394656,
1.8897651731,
-0.4246591666,
-0.2495875954,
-1.7585254093,
0.4512395919,
-0.6509926849,
0.4705771933,
0.7074165516,
1.0359879243,
-0.6101353408,
0.4930118343,
-1.7108887101,
-0.7866304068,
-0.8527351612,
-2.4048741867,
0.0443403380,
0.2199197640,
-0.3658190378,
0.7646667213,
-0.7127937225,
-1.3793889332,
0.4242101000,
-0.8227105630,
1.4295091217,
-0.4306463771,
0.6621799268,
];
let x3 = vec![
1.2009653756,
1.0447510872,
-1.0032086468,
1.8484819017,
-0.6667734088,
0.1055138125,
-0.4222558819,
-0.1223501720,
0.1881930345,
0.1191609580,
-0.0250925509,
0.1080727279,
-0.4854352358,
-0.5042171307,
-1.6610990799,
-0.3823337269,
-0.5126502579,
2.7018910003,
-1.3621162312,
0.1372562186,
-1.4936250673,
-1.4704357414,
0.1247023862,
-0.9966391349,
-0.0018226143,
-0.4282588814,
-0.6136716064,
-2.0246778454,
-1.2247479504,
0.1795164411,
0.5676205944,
-0.4928773536,
0.0000628841,
1.1228896434,
1.4398557430,
-1.0971137684,
-0.1173195603,
1.2014984009,
-0.4697295806,
-0.0524694849,
-0.0861072982,
-0.8876790179,
-0.4446840049,
-0.0294448791,
-0.4138688491,
1.1133860234,
-0.4809928417,
-0.4331690326,
0.6968625766,
-1.0563684132,
];
let y_data = vec![
7.3244637835,
2.2290845075,
-0.6997843432,
7.6743480643,
-0.4257062423,
0.4851662412,
2.7523953324,
0.0434903183,
5.3346803663,
1.8758922987,
3.4466987207,
5.3616177772,
-3.1524236685,
-1.2515981335,
-3.9549331385,
1.8411105794,
-1.6028028720,
4.0200873080,
-7.9248335214,
4.4997781383,
-4.2090414596,
-6.5556145580,
0.1577445164,
1.2841614543,
5.2173080689,
-1.2211029019,
-2.0800571500,
-8.2788553566,
-1.5124522095,
0.2553817582,
3.6894899762,
0.6389881286,
3.2547990625,
3.2981433492,
6.1898477889,
-6.3934769908,
-0.5705022883,
3.1797783255,
-5.6557573380,
0.1175426780,
1.2561545960,
-2.5576956398,
1.3085803084,
-1.1887455242,
-3.4577538582,
5.7486815487,
-1.8638774250,
2.8819391940,
3.1353095475,
-0.7933987584,
];
let n = 50;
let mut x = Mat::zeros(n, 3);
for i in 0..n {
x[(i, 0)] = x1[i];
x[(i, 1)] = x2[i];
x[(i, 2)] = x3[i];
}
let y = Col::from_fn(n, |i| y_data[i]);
(x, y)
}
#[test]
fn test_vif_collinearity_vs_r() {
let (x, _y) = dataset6();
let vif = variance_inflation_factor(&x);
assert!(vif[0] > 1000.0, "VIF[0] = {} should be > 1000", vif[0]);
assert!(vif[1] > 1000.0, "VIF[1] = {} should be > 1000", vif[1]);
assert!(vif[2] < 2.0, "VIF[2] = {} should be < 2", vif[2]);
}
#[test]
fn test_ridge_handles_collinearity_vs_r() {
let (x, y) = dataset6();
let ridge = RidgeRegressor::builder()
.with_intercept(true)
.lambda(0.1)
.build();
let fitted = ridge.fit(&x, &y).expect("fit should succeed");
assert!(
fitted.intercept().unwrap().is_finite(),
"Ridge intercept should be finite"
);
for j in 0..3 {
assert!(
fitted.coefficients()[j].is_finite(),
"Ridge coef[{}] should be finite",
j
);
}
assert!(
(fitted.coefficients()[2] - 3.0).abs() < 1.0,
"Ridge coef[2] = {} should be close to 3.0",
fitted.coefficients()[2]
);
}
fn dataset7() -> (Mat<f64>, Col<f64>) {
let x_data = vec![
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17.0, 18.0, 19.0, 50.0,
];
let y_data = vec![
6.3709584471,
7.4353018286,
11.3631284113,
14.6328626050,
17.4042683231,
19.8938754839,
24.5115219974,
25.9053409616,
31.0184237139,
31.9372859009,
36.3048696542,
40.2866453927,
39.6111392989,
43.7212112332,
46.8666786636,
50.6359503981,
52.7157470786,
53.3435445791,
56.5595330714,
72.0000000000,
];
let x = Mat::from_fn(20, 1, |i, _| x_data[i]);
let y = Col::from_fn(20, |i| y_data[i]);
(x, y)
}
#[test]
fn test_leverage_vs_r() {
let (x, _y) = dataset7();
let leverage = compute_leverage(&x, true);
assert_relative_eq!(leverage[0], 0.107894736842105, epsilon = 1e-6);
assert_relative_eq!(leverage[11], 0.050000000000000, epsilon = 1e-6);
assert_relative_eq!(leverage[19], 0.740909090909091, epsilon = 1e-6);
let max_idx = (0..20)
.max_by(|&a, &b| leverage[a].partial_cmp(&leverage[b]).unwrap())
.unwrap();
assert_eq!(max_idx, 19, "Point 20 should have highest leverage");
}
#[test]
fn test_cooks_distance_vs_r() {
let (x, y) = dataset7();
let model = OlsRegressor::builder().with_intercept(true).build();
let fitted = model.fit(&x, &y).expect("fit should succeed");
let leverage = compute_leverage(&x, true);
let residuals = &fitted.result().residuals;
let mse = fitted.result().mse;
let n_params = fitted.result().n_parameters;
let cooks = cooks_distance(residuals, &leverage, mse, n_params);
assert!(
cooks[19] > 10.0,
"Cook's distance for point 20 should be very high, got {}",
cooks[19]
);
let max_idx = (0..20)
.filter(|&i| cooks[i].is_finite())
.max_by(|&a, &b| cooks[a].partial_cmp(&cooks[b]).unwrap())
.unwrap();
assert_eq!(max_idx, 19, "Point 20 should have highest Cook's distance");
for i in 0..19 {
assert!(
cooks[i] < 1.0,
"Cook's distance for point {} should be < 1, got {}",
i + 1,
cooks[i]
);
}
}
#[test]
fn test_full_workflow_matches_r() {
let (x, y) = dataset1();
let ols = OlsRegressor::builder()
.with_intercept(true)
.compute_inference(true)
.build();
let fitted = ols.fit(&x, &y).expect("fit should succeed");
assert_relative_eq!(fitted.r_squared(), 0.998773884764589, epsilon = 1e-6);
assert_relative_eq!(
fitted.result().adj_r_squared,
0.998705767251511,
epsilon = 1e-6
);
assert_relative_eq!(
fitted.result().f_statistic,
14662.512467465243,
epsilon = 1.0
);
let x_new = Mat::from_fn(1, 1, |_, _| 25.0);
let pred = fitted.predict(&x_new);
let expected_pred = 3.008845227701627 + 2.960677598286641 * 25.0;
assert_relative_eq!(pred[0], expected_pred, epsilon = 1e-6);
}
#[test]
fn test_ridge_lambda_scaling_glmnet() {
use anofox_regression::LambdaScaling;
let (x, y) = dataset2();
let n = x.nrows();
let ridge_raw = RidgeRegressor::builder()
.with_intercept(true)
.lambda(1.0)
.lambda_scaling(LambdaScaling::Raw)
.build();
let fitted_raw = ridge_raw.fit(&x, &y).expect("fit should succeed");
let ridge_glmnet = RidgeRegressor::builder()
.with_intercept(true)
.lambda(1.0 / n as f64)
.lambda_scaling(LambdaScaling::Glmnet)
.build();
let fitted_glmnet = ridge_glmnet.fit(&x, &y).expect("fit should succeed");
for j in 0..x.ncols() {
assert_relative_eq!(
fitted_raw.coefficients()[j],
fitted_glmnet.coefficients()[j],
epsilon = 1e-8
);
}
assert_relative_eq!(
fitted_raw.intercept().unwrap(),
fitted_glmnet.intercept().unwrap(),
epsilon = 1e-8
);
}
#[test]
fn test_elastic_net_lambda_scaling_glmnet() {
use anofox_regression::LambdaScaling;
let (x, y) = dataset2();
let n = x.nrows();
let enet_raw = ElasticNetRegressor::builder()
.with_intercept(true)
.lambda(1.0)
.alpha(0.5)
.lambda_scaling(LambdaScaling::Raw)
.build();
let fitted_raw = enet_raw.fit(&x, &y).expect("fit should succeed");
let enet_glmnet = ElasticNetRegressor::builder()
.with_intercept(true)
.lambda(1.0 / n as f64)
.alpha(0.5)
.lambda_scaling(LambdaScaling::Glmnet)
.build();
let fitted_glmnet = enet_glmnet.fit(&x, &y).expect("fit should succeed");
for j in 0..x.ncols() {
assert_relative_eq!(
fitted_raw.coefficients()[j],
fitted_glmnet.coefficients()[j],
epsilon = 1e-6
);
}
assert_relative_eq!(
fitted_raw.intercept().unwrap(),
fitted_glmnet.intercept().unwrap(),
epsilon = 1e-6
);
}
fn dataset_with_na() -> (Mat<f64>, Col<f64>) {
let x_data = [
[1.0, 1.0],
[2.0, f64::NAN],
[f64::NAN, 3.0],
[4.0, 4.0],
[5.0, 5.0],
[6.0, f64::NAN],
[f64::NAN, 7.0],
[8.0, 8.0],
];
let y_data = [2.0, 4.0, 6.0, f64::NAN, 10.0, 12.0, 14.0, 16.0];
let x = Mat::from_fn(8, 2, |i, j| x_data[i][j]);
let y = Col::from_fn(8, |i| y_data[i]);
(x, y)
}
#[test]
fn test_na_omit_vs_r() {
let (x, y) = dataset_with_na();
let result = NaHandler::process(&x, &y, NaAction::Omit).expect("should succeed");
assert_eq!(
result.na_info.n_removed, 5,
"Should remove 5 rows with NA, got {}",
result.na_info.n_removed
);
assert_eq!(result.x_clean.nrows(), 3);
assert_eq!(result.y_clean.nrows(), 3);
assert_relative_eq!(result.x_clean[(0, 0)], 1.0, epsilon = 1e-10);
assert_relative_eq!(result.x_clean[(0, 1)], 1.0, epsilon = 1e-10);
assert_relative_eq!(result.y_clean[0], 2.0, epsilon = 1e-10);
assert_relative_eq!(result.x_clean[(1, 0)], 5.0, epsilon = 1e-10);
assert_relative_eq!(result.x_clean[(1, 1)], 5.0, epsilon = 1e-10);
assert_relative_eq!(result.y_clean[1], 10.0, epsilon = 1e-10);
assert_relative_eq!(result.x_clean[(2, 0)], 8.0, epsilon = 1e-10);
assert_relative_eq!(result.x_clean[(2, 1)], 8.0, epsilon = 1e-10);
assert_relative_eq!(result.y_clean[2], 16.0, epsilon = 1e-10);
}
#[test]
fn test_na_exclude_vs_r() {
let (x, y) = dataset_with_na();
let result = NaHandler::process(&x, &y, NaAction::Exclude).expect("should succeed");
assert_eq!(result.na_info.n_removed, 5);
assert_eq!(result.x_clean.nrows(), 3);
let short_vec = Col::from_fn(3, |i| (i + 1) as f64);
let expanded = result.na_info.expand(&short_vec);
assert_eq!(expanded.nrows(), 8);
assert_relative_eq!(expanded[0], 1.0, epsilon = 1e-10); assert_relative_eq!(expanded[4], 2.0, epsilon = 1e-10); assert_relative_eq!(expanded[7], 3.0, epsilon = 1e-10);
assert!(expanded[1].is_nan());
assert!(expanded[2].is_nan());
assert!(expanded[3].is_nan());
assert!(expanded[5].is_nan());
assert!(expanded[6].is_nan());
}
#[test]
fn test_na_fail_vs_r() {
let (x, y) = dataset_with_na();
let result = NaHandler::process(&x, &y, NaAction::Fail);
assert!(result.is_err(), "na.fail should error on NA data");
}
#[test]
fn test_na_pass_vs_r() {
let (x, y) = dataset_with_na();
let result = NaHandler::process(&x, &y, NaAction::Pass).expect("should succeed");
assert_eq!(result.na_info.n_removed, 0);
assert_eq!(result.x_clean.nrows(), 8);
assert_eq!(result.y_clean.nrows(), 8);
assert!(result.x_clean[(2, 0)].is_nan()); assert!(result.y_clean[3].is_nan()); }
#[test]
fn test_na_clean_data_no_removal() {
let x = Mat::from_fn(5, 2, |i, j| (i + j) as f64);
let y = Col::from_fn(5, |i| (i * 2) as f64);
for action in [NaAction::Omit, NaAction::Exclude, NaAction::Pass] {
let result = NaHandler::process(&x, &y, action).expect("should succeed");
assert_eq!(
result.na_info.n_removed, 0,
"No NA to remove for {:?}",
action
);
assert_eq!(result.x_clean.nrows(), 5);
}
let result = NaHandler::process(&x, &y, NaAction::Fail);
assert!(result.is_ok(), "na.fail should succeed with clean data");
}
#[test]
fn test_nnls_vs_r() {
let a = Mat::from_fn(4, 3, |i, j| (i + j * 4 + 1) as f64);
let b = Col::from_fn(4, |i| (i + 1) as f64);
let fitted = BlsRegressor::nnls()
.build()
.fit(&a, &b)
.expect("fit should succeed");
for j in 0..3 {
assert!(
fitted.coefficients()[j] >= -1e-10,
"NNLS coefficient {} = {} should be >= 0",
j,
fitted.coefficients()[j]
);
}
let predictions = fitted.predict(&a);
let rss: f64 = (0..4).map(|i| (b[i] - predictions[i]).powi(2)).sum();
assert!(rss < 2.0, "RSS {} should be reasonable for NNLS", rss);
}
#[test]
fn test_bls_box_constraints_vs_r() {
let a = Mat::from_fn(10, 3, |i, j| ((i + 1) * (j + 1)) as f64);
let b = Col::from_fn(10, |i| 2.0 * i as f64 + 1.0);
let lower = vec![0.0, -1.0, 0.0];
let upper = vec![0.5, 1.0, f64::INFINITY];
let fitted = BlsRegressor::builder()
.lower_bounds(lower.clone())
.upper_bounds(upper.clone())
.build()
.fit(&a, &b)
.expect("fit should succeed");
assert!(
fitted.coefficients()[0] >= lower[0] - 1e-10,
"coef[0] {} should be >= {}",
fitted.coefficients()[0],
lower[0]
);
assert!(
fitted.coefficients()[0] <= upper[0] + 1e-10,
"coef[0] {} should be <= {}",
fitted.coefficients()[0],
upper[0]
);
assert!(
fitted.coefficients()[1] >= lower[1] - 1e-10,
"coef[1] {} should be >= {}",
fitted.coefficients()[1],
lower[1]
);
assert!(
fitted.coefficients()[1] <= upper[1] + 1e-10,
"coef[1] {} should be <= {}",
fitted.coefficients()[1],
upper[1]
);
assert!(
fitted.coefficients()[2] >= lower[2] - 1e-10,
"coef[2] {} should be >= {}",
fitted.coefficients()[2],
lower[2]
);
}
#[test]
fn test_nnls_vs_ols_when_ols_positive() {
let x = Mat::from_fn(20, 2, |i, _| (i + 1) as f64);
let y = Col::from_fn(20, |i| 1.0 + 0.5 * (i + 1) as f64);
let ols_fitted = OlsRegressor::builder()
.with_intercept(false)
.build()
.fit(&x, &y)
.expect("OLS fit should succeed");
if ols_fitted.coefficients().iter().all(|&c| c >= 0.0) {
let nnls_fitted = BlsRegressor::nnls()
.build()
.fit(&x, &y)
.expect("NNLS fit should succeed");
for j in 0..2 {
assert_relative_eq!(
nnls_fitted.coefficients()[j],
ols_fitted.coefficients()[j],
epsilon = 0.1
);
}
}
}
#[test]
fn test_tweedie_gamma_vs_r() {
let n = 50;
let x = Mat::from_fn(n, 1, |i, _| (i + 1) as f64 / 10.0);
let y = Col::from_fn(n, |i| {
let eta = 0.5 + 0.3 * ((i + 1) as f64 / 10.0);
eta.exp() });
let fitted = TweedieRegressor::gamma()
.with_intercept(true)
.build()
.fit(&x, &y)
.expect("fit should succeed");
assert!(
(fitted.result().intercept.unwrap() - 0.5).abs() < 0.2,
"Gamma GLM intercept {} should be close to 0.5",
fitted.result().intercept.unwrap()
);
assert!(
(fitted.coefficients()[0] - 0.3).abs() < 0.1,
"Gamma GLM slope {} should be close to 0.3",
fitted.coefficients()[0]
);
assert!(
fitted.deviance < fitted.null_deviance,
"Deviance {} should be < null deviance {}",
fitted.deviance,
fitted.null_deviance
);
}
#[test]
fn test_tweedie_poisson_vs_r() {
let n = 30;
let x = Mat::from_fn(n, 1, |i, _| i as f64 / 10.0);
let y = Col::from_fn(n, |i| {
let eta = 1.0 + 0.2 * (i as f64 / 10.0);
eta.exp().max(0.1) });
let fitted = TweedieRegressor::poisson()
.with_intercept(true)
.build()
.fit(&x, &y)
.expect("fit should succeed");
assert!(fitted.result().r_squared > 0.5, "R² should be decent");
assert!(fitted.iterations > 0, "Should have run IRLS iterations");
assert!(fitted.dispersion > 0.0, "Dispersion should be positive");
}
#[test]
fn test_tweedie_gaussian_vs_ols() {
let x = Mat::from_fn(20, 1, |i, _| i as f64);
let y = Col::from_fn(20, |i| 2.0 + 3.0 * i as f64);
let glm_fitted = TweedieRegressor::gaussian()
.with_intercept(true)
.build()
.fit(&x, &y)
.expect("GLM fit should succeed");
let ols_fitted = OlsRegressor::builder()
.with_intercept(true)
.build()
.fit(&x, &y)
.expect("OLS fit should succeed");
assert_relative_eq!(
glm_fitted.result().intercept.unwrap(),
ols_fitted.intercept().unwrap(),
epsilon = 0.5
);
assert_relative_eq!(
glm_fitted.coefficients()[0],
ols_fitted.coefficients()[0],
epsilon = 0.1
);
}
#[test]
fn test_tweedie_compound_poisson_gamma() {
let n = 40;
let x = Mat::from_fn(n, 1, |i, _| i as f64);
let y = Col::from_fn(n, |i| {
if i % 5 == 0 {
0.0 } else {
(1.0 + 0.1 * i as f64).exp()
}
});
let fitted = TweedieRegressor::builder()
.var_power(1.5)
.link_power(0.0) .with_intercept(true)
.build()
.fit(&x, &y)
.expect("fit should succeed");
assert!(fitted.iterations > 0);
assert!(fitted.dispersion > 0.0);
}
#[test]
fn test_tweedie_inverse_gaussian() {
let n = 30;
let x = Mat::from_fn(n, 1, |i, _| (i + 1) as f64);
let y = Col::from_fn(n, |i| {
let eta = 0.5 + 0.02 * (i + 1) as f64;
eta.exp()
});
let fitted = TweedieRegressor::inverse_gaussian()
.with_intercept(true)
.build()
.fit(&x, &y)
.expect("fit should succeed");
assert!(fitted.result().intercept.is_some(), "Should have intercept");
assert!(fitted.deviance.is_finite(), "Deviance should be finite");
}
#[test]
fn test_tweedie_deviance_calculation() {
let x = Mat::from_fn(30, 1, |i, _| i as f64);
let y = Col::from_fn(30, |i| (1.0 + 0.1 * i as f64).exp());
let fitted = TweedieRegressor::gamma()
.with_intercept(true)
.build()
.fit(&x, &y)
.expect("fit should succeed");
let pseudo_r2 = 1.0 - fitted.deviance / fitted.null_deviance;
assert!(
pseudo_r2 > 0.0 && pseudo_r2 <= 1.0,
"Pseudo R² {} should be in (0, 1]",
pseudo_r2
);
}
use anofox_regression::solvers::BinomialRegressor;
#[test]
fn test_binomial_logistic_vs_r() {
let n = 100;
let x = Mat::from_fn(n, 1, |i, _| -3.0 + 6.0 * (i as f64) / (n as f64 - 1.0));
let y = Col::from_fn(n, |i| {
let xi = -3.0 + 6.0 * (i as f64) / (n as f64 - 1.0);
let prob = 1.0 / (1.0 + (-0.5 - 1.0 * xi).exp());
let noise = ((i * 17 + 31) % 100) as f64 / 100.0;
if prob > noise {
1.0
} else {
0.0
}
});
let fitted = BinomialRegressor::logistic()
.with_intercept(true)
.compute_inference(true)
.max_iterations(100)
.build()
.fit(&x, &y)
.expect("fit should succeed");
assert!(
fitted.coefficients()[0] > 0.0,
"Logistic coef should be positive, got {}",
fitted.coefficients()[0]
);
assert!(
fitted.deviance < fitted.null_deviance,
"Deviance {} should be < null deviance {}",
fitted.deviance,
fitted.null_deviance
);
assert!(
fitted.result().std_errors.is_some(),
"Should have standard errors"
);
}
#[test]
fn test_binomial_probit_vs_r() {
let n = 100;
let x = Mat::from_fn(n, 1, |i, _| -2.0 + 4.0 * (i as f64) / (n as f64 - 1.0));
let y = Col::from_fn(n, |i| {
let xi = -2.0 + 4.0 * (i as f64) / (n as f64 - 1.0);
if xi > 0.1 * ((i % 7) as f64 - 3.0) {
1.0
} else {
0.0
}
});
let fitted = BinomialRegressor::probit()
.with_intercept(true)
.build()
.fit(&x, &y)
.expect("fit should succeed");
assert!(fitted.coefficients()[0] > 0.0);
assert!(fitted.deviance < fitted.null_deviance);
}
#[test]
fn test_binomial_cloglog_vs_r() {
let n = 100;
let x = Mat::from_fn(n, 1, |i, _| -2.0 + 4.0 * (i as f64) / (n as f64 - 1.0));
let y = Col::from_fn(n, |i| {
let xi = -2.0 + 4.0 * (i as f64) / (n as f64 - 1.0);
if xi > 0.1 * ((i % 7) as f64 - 3.0) {
1.0
} else {
0.0
}
});
let fitted = BinomialRegressor::cloglog()
.with_intercept(true)
.build()
.fit(&x, &y)
.expect("fit should succeed");
assert!(fitted.coefficients()[0] > 0.0);
assert!(fitted.deviance < fitted.null_deviance);
}
#[test]
fn test_binomial_predict_probability() {
let n = 80;
let x = Mat::from_fn(n, 1, |i, _| -3.0 + 6.0 * (i as f64) / (n as f64 - 1.0));
let y = Col::from_fn(n, |i| {
let xi = -3.0 + 6.0 * (i as f64) / (n as f64 - 1.0);
let prob = 1.0 / (1.0 + (-xi).exp());
if prob > 0.5 + 0.1 * ((i % 5) as f64 - 2.0) / 2.0 {
1.0
} else {
0.0
}
});
let fitted = BinomialRegressor::logistic()
.with_intercept(true)
.build()
.fit(&x, &y)
.expect("fit should succeed");
let probs = fitted.predict_probability(&x);
for i in 0..n {
assert!(
probs[i] > 0.0 && probs[i] < 1.0,
"Probability {} at {} should be in (0, 1)",
probs[i],
i
);
}
assert!(
probs[n - 1] > probs[0],
"Prob at max x should be > prob at min x"
);
}
#[test]
fn test_glm_residuals_tweedie() {
let n = 30;
let x = Mat::from_fn(n, 1, |i, _| (i + 1) as f64);
let y = Col::from_fn(n, |i| {
let base = (1.0 + 0.1 * (i + 1) as f64).exp();
let noise_factor = 1.0 + 0.1 * (((i * 17 + 7) % 20) as f64 - 10.0) / 10.0;
base * noise_factor
});
let fitted = TweedieRegressor::gamma()
.with_intercept(true)
.compute_inference(true)
.build()
.fit(&x, &y)
.expect("fit should succeed");
let pearson = fitted.pearson_residuals();
let deviance = fitted.deviance_residuals();
let working = fitted.working_residuals();
assert_eq!(pearson.nrows(), n);
assert_eq!(deviance.nrows(), n);
assert_eq!(working.nrows(), n);
let mean_pearson: f64 = pearson.iter().sum::<f64>() / n as f64;
assert!(
mean_pearson.abs() < 1.0,
"Mean Pearson residual should be near 0, got {}",
mean_pearson
);
let dev_ss: f64 = deviance.iter().map(|r| r.powi(2)).sum();
assert!(
(dev_ss - fitted.deviance).abs() < 0.1 * fitted.deviance.abs() + 1e-6,
"Sum of squared deviance residuals {} should ≈ deviance {}",
dev_ss,
fitted.deviance
);
}
#[test]
fn test_glm_residuals_binomial() {
let n = 80;
let x = Mat::from_fn(n, 1, |i, _| -3.0 + 6.0 * (i as f64) / (n as f64 - 1.0));
let y = Col::from_fn(n, |i| {
let xi = -3.0 + 6.0 * (i as f64) / (n as f64 - 1.0);
let prob = 1.0 / (1.0 + (-xi).exp());
if prob > 0.5 + 0.1 * ((i % 5) as f64 - 2.0) / 2.0 {
1.0
} else {
0.0
}
});
let fitted = BinomialRegressor::logistic()
.with_intercept(true)
.build()
.fit(&x, &y)
.expect("fit should succeed");
let pearson = fitted.pearson_residuals();
let deviance = fitted.deviance_residuals();
let working = fitted.working_residuals();
assert_eq!(pearson.nrows(), n);
assert_eq!(deviance.nrows(), n);
assert_eq!(working.nrows(), n);
for i in 0..n {
assert!(pearson[i].is_finite(), "Pearson residual {} not finite", i);
assert!(
deviance[i].is_finite(),
"Deviance residual {} not finite",
i
);
assert!(working[i].is_finite(), "Working residual {} not finite", i);
}
}
use anofox_regression::core::PredictionType;
#[test]
fn test_tweedie_predict_with_se() {
let n = 50;
let x = Mat::from_fn(n, 1, |i, _| (i + 1) as f64);
let y = Col::from_fn(n, |i| {
let base = (1.0 + 0.05 * (i + 1) as f64).exp();
let noise_factor = 1.0 + 0.1 * (((i * 13 + 5) % 20) as f64 - 10.0) / 10.0;
base * noise_factor
});
let fitted = TweedieRegressor::gamma()
.with_intercept(true)
.compute_inference(true)
.build()
.fit(&x, &y)
.expect("fit should succeed");
let x_new = Mat::from_fn(5, 1, |i, _| 10.0 + i as f64 * 5.0);
let pred = fitted.predict_with_se(&x_new, PredictionType::Response, None, 0.95);
for i in 0..5 {
assert!(pred.se[i] > 0.0, "SE should be positive at {}", i);
assert!(pred.fit[i] > 0.0, "Prediction should be positive at {}", i);
}
}
#[test]
fn test_tweedie_predict_with_ci() {
use anofox_regression::core::IntervalType;
let n = 50;
let x = Mat::from_fn(n, 1, |i, _| (i + 1) as f64);
let y = Col::from_fn(n, |i| {
let base = (1.0 + 0.05 * (i + 1) as f64).exp();
let noise_factor = 1.0 + 0.1 * (((i * 13 + 5) % 20) as f64 - 10.0) / 10.0;
base * noise_factor
});
let fitted = TweedieRegressor::gamma()
.with_intercept(true)
.compute_inference(true)
.build()
.fit(&x, &y)
.expect("fit should succeed");
let x_new = Mat::from_fn(5, 1, |i, _| 10.0 + i as f64 * 5.0);
let pred = fitted.predict_with_se(
&x_new,
PredictionType::Response,
Some(IntervalType::Confidence),
0.95,
);
for i in 0..5 {
assert!(
pred.lower[i] <= pred.fit[i],
"Lower CI should be <= prediction at {}",
i
);
assert!(
pred.upper[i] >= pred.fit[i],
"Upper CI should be >= prediction at {}",
i
);
assert!(pred.lower[i] > 0.0, "Lower CI should be positive at {}", i);
}
}
#[test]
fn test_binomial_predict_with_se() {
let n = 80;
let x = Mat::from_fn(n, 1, |i, _| -3.0 + 6.0 * (i as f64) / (n as f64 - 1.0));
let y = Col::from_fn(n, |i| {
let xi = -3.0 + 6.0 * (i as f64) / (n as f64 - 1.0);
let prob = 1.0 / (1.0 + (-xi).exp());
if prob > 0.5 + 0.1 * ((i % 5) as f64 - 2.0) / 2.0 {
1.0
} else {
0.0
}
});
let fitted = BinomialRegressor::logistic()
.with_intercept(true)
.compute_inference(true)
.build()
.fit(&x, &y)
.expect("fit should succeed");
let x_new = Mat::from_fn(5, 1, |i, _| -2.0 + i as f64);
let pred = fitted.predict_with_se(&x_new, PredictionType::Response, None, 0.95);
for i in 0..5 {
assert!(pred.se[i] > 0.0, "SE should be positive at {}", i);
assert!(
pred.fit[i] > 0.0 && pred.fit[i] < 1.0,
"Prediction should be in (0,1) at {}",
i
);
}
}
#[test]
fn test_binomial_predict_link_scale() {
let n = 80;
let x = Mat::from_fn(n, 1, |i, _| -3.0 + 6.0 * (i as f64) / (n as f64 - 1.0));
let y = Col::from_fn(n, |i| {
let xi = -3.0 + 6.0 * (i as f64) / (n as f64 - 1.0);
let prob = 1.0 / (1.0 + (-xi).exp());
if prob > 0.5 + 0.1 * ((i % 5) as f64 - 2.0) / 2.0 {
1.0
} else {
0.0
}
});
let fitted = BinomialRegressor::logistic()
.with_intercept(true)
.compute_inference(true)
.build()
.fit(&x, &y)
.expect("fit should succeed");
let x_new = Mat::from_fn(5, 1, |i, _| -2.0 + i as f64);
let pred_link = fitted.predict_with_se(&x_new, PredictionType::Link, None, 0.95);
let pred_resp = fitted.predict_with_se(&x_new, PredictionType::Response, None, 0.95);
for i in 0..5 {
assert!(
pred_link.fit[i].is_finite(),
"Link prediction should be finite"
);
assert!(
pred_resp.fit[i] > 0.0 && pred_resp.fit[i] < 1.0,
"Response prediction should be in (0, 1)"
);
let expected_resp = 1.0 / (1.0 + (-pred_link.fit[i]).exp());
assert_relative_eq!(pred_resp.fit[i], expected_resp, epsilon = 1e-6);
}
}
#[test]
fn test_poisson_log_vs_r() {
let n = 50;
let x = Mat::from_fn(n, 1, |i, _| (i + 1) as f64 / 10.0);
let y = Col::from_fn(n, |i| {
let eta = 0.5 + 0.3 * ((i + 1) as f64 / 10.0);
let mu = eta.exp();
(mu + 0.5 * ((i % 5) as f64 - 2.0)).round().max(0.0)
});
let fitted = PoissonRegressor::log()
.with_intercept(true)
.compute_inference(true)
.build()
.fit(&x, &y)
.expect("fit should succeed");
assert!(
fitted.result().coefficients[0] > 0.0,
"Poisson coef should be positive, got {}",
fitted.result().coefficients[0]
);
assert!(
fitted.deviance < fitted.null_deviance,
"Deviance {} should be < null deviance {}",
fitted.deviance,
fitted.null_deviance
);
assert!(
fitted.result().std_errors.is_some(),
"Should have standard errors"
);
}
#[test]
fn test_poisson_identity_link() {
let n = 50;
let x = Mat::from_fn(n, 1, |i, _| (i + 1) as f64);
let y = Col::from_fn(n, |i| {
let mu = 5.0 + 0.5 * ((i + 1) as f64);
(mu + 0.3 * ((i % 5) as f64 - 2.0)).round().max(0.0)
});
let fitted = PoissonRegressor::identity()
.with_intercept(true)
.build()
.fit(&x, &y)
.expect("fit should succeed");
assert!(fitted.result().coefficients[0] > 0.0);
assert!(fitted.deviance < fitted.null_deviance);
}
#[test]
fn test_poisson_sqrt_link() {
let n = 50;
let x = Mat::from_fn(n, 1, |i, _| (i + 1) as f64 / 10.0);
let y = Col::from_fn(n, |i| {
let eta = (1.0 + 0.2 * ((i + 1) as f64 / 10.0)).powi(2);
(eta + 0.3 * ((i % 5) as f64 - 2.0)).round().max(0.0)
});
let fitted = PoissonRegressor::sqrt()
.with_intercept(true)
.build()
.fit(&x, &y)
.expect("fit should succeed");
assert!(fitted.iterations > 0);
assert!(fitted.deviance.is_finite());
}
#[test]
fn test_poisson_matches_tweedie_poisson() {
let n = 50;
let x = Mat::from_fn(n, 1, |i, _| (i + 1) as f64 / 10.0);
let y = Col::from_fn(n, |i| {
let eta = 0.5 + 0.3 * ((i + 1) as f64 / 10.0);
eta.exp().round().max(1.0)
});
let poisson = PoissonRegressor::log()
.with_intercept(true)
.build()
.fit(&x, &y)
.expect("Poisson fit should succeed");
let tweedie = TweedieRegressor::poisson()
.with_intercept(true)
.build()
.fit(&x, &y)
.expect("Tweedie fit should succeed");
assert_relative_eq!(
poisson.result().intercept.unwrap(),
tweedie.result().intercept.unwrap(),
epsilon = 0.1
);
assert_relative_eq!(
poisson.result().coefficients[0],
tweedie.coefficients()[0],
epsilon = 0.05
);
}
#[test]
fn test_poisson_with_offset() {
let n = 50;
let exposure = Col::from_fn(n, |i| 1.0 + (i % 3) as f64); let offset = Col::from_fn(n, |i| exposure[i].ln());
let x = Mat::from_fn(n, 1, |i, _| (i + 1) as f64 / 10.0);
let y = Col::from_fn(n, |i| {
let rate = (0.5 + 0.2 * ((i + 1) as f64 / 10.0)).exp();
(exposure[i] * rate).round().max(0.0)
});
let fitted = PoissonRegressor::log()
.with_intercept(true)
.offset(offset)
.build()
.fit(&x, &y)
.expect("fit should succeed");
assert!(fitted.iterations > 0);
assert!(
fitted.result().coefficients[0] > 0.0,
"Rate coefficient should be positive"
);
}
#[test]
fn test_poisson_residuals() {
let n = 50;
let x = Mat::from_fn(n, 1, |i, _| (i + 1) as f64 / 10.0);
let y = Col::from_fn(n, |i| {
let eta = 0.5 + 0.3 * ((i + 1) as f64 / 10.0);
(eta.exp() + 0.5 * ((i % 5) as f64 - 2.0)).round().max(0.0)
});
let fitted = PoissonRegressor::log()
.with_intercept(true)
.build()
.fit(&x, &y)
.expect("fit should succeed");
let pearson = fitted.pearson_residuals();
let deviance = fitted.deviance_residuals();
let working = fitted.working_residuals();
assert_eq!(pearson.nrows(), n);
assert_eq!(deviance.nrows(), n);
assert_eq!(working.nrows(), n);
for i in 0..n {
assert!(pearson[i].is_finite());
assert!(deviance[i].is_finite());
assert!(working[i].is_finite());
}
}
#[test]
fn test_negative_binomial_fixed_theta() {
let n = 100;
let x = Mat::from_fn(n, 1, |i, _| (i + 1) as f64 / 10.0);
let y = Col::from_fn(n, |i| {
let mu = (0.5 + 0.2 * ((i + 1) as f64 / 10.0)).exp();
let extra = if i % 4 == 0 { mu * 0.5 } else { 0.0 };
(mu + extra + 0.3 * ((i % 7) as f64 - 3.0)).round().max(0.0)
});
let fitted = NegativeBinomialRegressor::with_theta(2.0)
.with_intercept(true)
.build()
.fit(&x, &y)
.expect("fit should succeed");
assert_relative_eq!(fitted.theta, 2.0, epsilon = 1e-10);
assert!(
fitted.result().coefficients[0] > 0.0,
"NB coef should be positive, got {}",
fitted.result().coefficients[0]
);
assert!(
fitted.deviance < fitted.null_deviance,
"Deviance {} should be < null deviance {}",
fitted.deviance,
fitted.null_deviance
);
}
#[test]
fn test_negative_binomial_estimate_theta() {
let n = 100;
let x = Mat::from_fn(n, 1, |i, _| (i + 1) as f64 / 10.0);
let y = Col::from_fn(n, |i| {
let mu = (0.5 + 0.2 * ((i + 1) as f64 / 10.0)).exp();
let extra = if i % 3 == 0 { mu * 0.8 } else { 0.0 };
(mu + extra + 0.4 * ((i % 5) as f64 - 2.0)).round().max(0.0)
});
let fitted = NegativeBinomialRegressor::builder()
.with_intercept(true)
.estimate_theta(true)
.build()
.fit(&x, &y)
.expect("fit should succeed");
assert!(fitted.theta > 0.0, "Theta should be positive");
assert!(fitted.result().coefficients[0] > 0.0);
}
#[test]
fn test_negative_binomial_overdispersion() {
let n = 100;
let x = Mat::from_fn(n, 1, |i, _| (i + 1) as f64 / 20.0);
let y = Col::from_fn(n, |i| {
let mu = (1.0 + 0.2 * ((i + 1) as f64 / 20.0)).exp();
let extra = if i % 2 == 0 { mu * 1.5 } else { 0.0 };
(mu + extra).round().max(0.0)
});
let fitted = NegativeBinomialRegressor::builder()
.with_intercept(true)
.estimate_theta(true)
.build()
.fit(&x, &y)
.expect("fit should succeed");
let ratio = fitted.overdispersion_ratio();
assert!(ratio > 1.0, "Overdispersion ratio {} should be > 1", ratio);
}
#[test]
fn test_negative_binomial_with_offset() {
let n = 100;
let exposure = Col::from_fn(n, |i| 1.0 + (i % 4) as f64);
let offset = Col::from_fn(n, |i| exposure[i].ln());
let x = Mat::from_fn(n, 1, |i, _| (i + 1) as f64 / 10.0);
let y = Col::from_fn(n, |i| {
let rate = (0.3 + 0.1 * ((i + 1) as f64 / 10.0)).exp();
(exposure[i] * rate + 0.3 * ((i % 5) as f64 - 2.0))
.round()
.max(0.0)
});
let fitted = NegativeBinomialRegressor::builder()
.with_intercept(true)
.offset(offset)
.build()
.fit(&x, &y)
.expect("fit should succeed");
assert!(fitted.iterations > 0);
}
#[test]
fn test_negative_binomial_vs_poisson_overdispersed() {
let n = 100;
let x = Mat::from_fn(n, 1, |i, _| (i + 1) as f64 / 10.0);
let y = Col::from_fn(n, |i| {
let mu = (0.5 + 0.2 * ((i + 1) as f64 / 10.0)).exp();
let extra = if i % 3 == 0 { mu * 2.0 } else { 0.0 };
(mu + extra).round().max(0.0)
});
let poisson = PoissonRegressor::log()
.with_intercept(true)
.build()
.fit(&x, &y)
.expect("Poisson fit should succeed");
let nb = NegativeBinomialRegressor::builder()
.with_intercept(true)
.estimate_theta(true)
.build()
.fit(&x, &y)
.expect("NB fit should succeed");
assert!(nb.dispersion.is_finite());
assert!(poisson.dispersion.is_finite());
}
#[test]
fn test_negative_binomial_approaches_poisson() {
let n = 50;
let x = Mat::from_fn(n, 1, |i, _| (i + 1) as f64 / 10.0);
let y = Col::from_fn(n, |i| {
let eta = 0.5 + 0.3 * ((i + 1) as f64 / 10.0);
eta.exp().round().max(1.0)
});
let nb = NegativeBinomialRegressor::with_theta(1e6)
.with_intercept(true)
.build()
.fit(&x, &y)
.expect("NB fit should succeed");
let poisson = PoissonRegressor::log()
.with_intercept(true)
.build()
.fit(&x, &y)
.expect("Poisson fit should succeed");
assert!(
(nb.result().coefficients[0] - poisson.result().coefficients[0]).abs() < 0.1,
"NB with high theta should match Poisson"
);
}
#[test]
fn test_tweedie_with_offset() {
let n = 50;
let exposure = Col::from_fn(n, |i| 1.0 + (i % 3) as f64);
let offset = Col::from_fn(n, |i| exposure[i].ln());
let x = Mat::from_fn(n, 1, |i, _| (i + 1) as f64 / 10.0);
let y = Col::from_fn(n, |i| {
let rate = (0.5 + 0.1 * ((i + 1) as f64 / 10.0)).exp();
exposure[i] * rate * (1.0 + 0.1 * ((i % 5) as f64 - 2.0))
});
let fitted = TweedieRegressor::gamma()
.with_intercept(true)
.offset(offset)
.build()
.fit(&x, &y)
.expect("fit should succeed");
assert!(fitted.iterations > 0);
assert!(fitted.coefficients()[0] > 0.0);
}
#[test]
fn test_binomial_with_offset() {
let n = 100;
let base_odds = Col::from_fn(n, |i| 0.1 * ((i % 5) as f64));
let offset = Col::from_fn(n, |i| base_odds[i]);
let x = Mat::from_fn(n, 1, |i, _| -3.0 + 6.0 * (i as f64) / (n as f64 - 1.0));
let y = Col::from_fn(n, |i| {
let xi = -3.0 + 6.0 * (i as f64) / (n as f64 - 1.0);
let eta = base_odds[i] + 0.5 + 1.0 * xi;
let prob = 1.0 / (1.0 + (-eta).exp());
if prob > 0.5 {
1.0
} else {
0.0
}
});
let fitted = BinomialRegressor::logistic()
.with_intercept(true)
.offset(offset)
.max_iterations(100)
.build()
.fit(&x, &y)
.expect("fit should succeed");
assert!(fitted.iterations > 0);
}
#[test]
fn test_predict_with_offset() {
let n = 50;
let offset = Col::from_fn(n, |i| ((i % 3) as f64 + 1.0).ln());
let x = Mat::from_fn(n, 1, |i, _| (i + 1) as f64 / 10.0);
let y = Col::from_fn(n, |i| {
let exp_i = (i % 3) as f64 + 1.0;
let rate = (0.5 + 0.2 * ((i + 1) as f64 / 10.0)).exp();
(exp_i * rate).round().max(0.0)
});
let fitted = PoissonRegressor::log()
.with_intercept(true)
.offset(offset)
.build()
.fit(&x, &y)
.expect("fit should succeed");
let x_new = Mat::from_fn(5, 1, |i, _| i as f64 + 1.0);
let offset_new = Col::from_fn(5, |_| 2.0_f64.ln());
let pred_no_offset = fitted.predict_count(&x_new);
let pred_with_offset = fitted.predict_with_offset(&x_new, &offset_new);
for i in 0..5 {
assert!(pred_no_offset[i] > 0.0);
assert!(pred_with_offset[i] > 0.0);
}
}