use anofox_regression::solvers::{FittedRegressor, NegativeBinomialRegressor, Regressor};
use faer::{Col, Mat};
fn main() {
println!("=== Negative Binomial Regression ===\n");
basic_negative_binomial();
estimate_vs_fixed_theta();
overdispersion_detection();
compare_poisson_negbin();
}
fn basic_negative_binomial() {
println!("--- Basic Negative Binomial Regression ---\n");
let n = 100;
let x = Mat::from_fn(n, 2, |i, j| {
if j == 0 {
(i as f64) * 0.05
} else {
((i as f64) * 0.1).sin() * 0.5 + 0.5
}
});
let y = Col::from_fn(n, |i| {
let eta = 0.5 + 0.4 * x[(i, 0)] + 0.3 * x[(i, 1)];
let mu = eta.exp();
let extra_var = ((i as f64 * 1.3).sin()) * (mu * 0.5);
(mu + extra_var).round().max(0.0)
});
let model = NegativeBinomialRegressor::builder()
.estimate_theta(true)
.build();
let fitted = model.fit(&x, &y).expect("fit should succeed");
let result = fitted.result();
println!("True model: log(mu) = 0.5 + 0.4*x1 + 0.3*x2 (with overdispersion)\n");
println!(
"Intercept: {:.4} (true: 0.5)",
fitted.intercept().unwrap_or(0.0)
);
println!(
"Coefficient x1: {:.4} (true: 0.4)",
fitted.coefficients()[0]
);
println!(
"Coefficient x2: {:.4} (true: 0.3)",
fitted.coefficients()[1]
);
println!("\nTheta (dispersion): {:.4}", fitted.theta);
println!("AIC: {:.4}", result.aic);
println!();
}
fn estimate_vs_fixed_theta() {
println!("--- Estimated vs Fixed Theta ---\n");
let n = 80;
let x = Mat::from_fn(n, 1, |i, _| (i as f64) * 0.1);
let y = Col::from_fn(n, |i| {
let mu = (1.0 + 0.3 * x[(i, 0)]).exp();
let noise = ((i as f64 * 0.9).sin()) * (mu * 0.8);
(mu + noise).round().max(0.0)
});
let model_est = NegativeBinomialRegressor::builder()
.estimate_theta(true)
.build();
let fit_est = model_est.fit(&x, &y).expect("fit with estimated theta");
let thetas = [0.5, 1.0, 2.0, 5.0, 10.0, 100.0];
println!(
"{:<15} {:>12} {:>12} {:>12}",
"Theta", "Intercept", "Slope", "AIC"
);
println!("{}", "-".repeat(55));
println!(
"{:<15} {:>12.4} {:>12.4} {:>12.4}",
format!("Est ({:.2})", fit_est.theta),
fit_est.intercept().unwrap_or(0.0),
fit_est.coefficients()[0],
fit_est.result().aic
);
for &theta in &thetas {
let model = NegativeBinomialRegressor::with_theta(theta).build();
let fitted = model.fit(&x, &y).expect("fit with fixed theta");
println!(
"{:<15} {:>12.4} {:>12.4} {:>12.4}",
format!("Fixed ({})", theta),
fitted.intercept().unwrap_or(0.0),
fitted.coefficients()[0],
fitted.result().aic
);
}
println!("\nNote: As theta increases, model approaches Poisson.");
println!(" Lower AIC indicates better fit.");
println!();
}
fn overdispersion_detection() {
println!("--- Overdispersion Detection ---\n");
use anofox_regression::solvers::PoissonRegressor;
let n = 100;
let x = Mat::from_fn(n, 1, |i, _| (i as f64) * 0.08);
let y = Col::from_fn(n, |i| {
let mu = (0.8 + 0.25 * x[(i, 0)]).exp();
let noise = ((i as f64 * 0.7).sin()) * (mu * 1.5);
(mu + noise).round().max(0.0)
});
let poisson_model = PoissonRegressor::log().build();
let poisson_fit = poisson_model.fit(&x, &y).expect("poisson fit");
let negbin_model = NegativeBinomialRegressor::builder()
.estimate_theta(true)
.build();
let negbin_fit = negbin_model.fit(&x, &y).expect("negbin fit");
let y_mean: f64 = y.iter().sum::<f64>() / n as f64;
let y_var: f64 = y.iter().map(|&yi| (yi - y_mean).powi(2)).sum::<f64>() / (n - 1) as f64;
println!("Data summary:");
println!(" Mean(y): {:.4}", y_mean);
println!(" Var(y): {:.4}", y_var);
println!(
" Var/Mean: {:.4} (>1 indicates overdispersion)",
y_var / y_mean
);
let poisson_residuals = &poisson_fit.result().residuals;
let poisson_fitted = &poisson_fit.result().fitted_values;
let pearson_chi_sq: f64 = poisson_residuals
.iter()
.zip(poisson_fitted.iter())
.map(|(&r, &mu)| r.powi(2) / mu.max(0.001))
.sum();
let dispersion_ratio = pearson_chi_sq / (n - 2) as f64;
println!(
" Dispersion: {:.4} (Pearson chi-sq / df)",
dispersion_ratio
);
println!("\nModel comparison:");
println!("{:<20} {:>12} {:>12}", "Model", "AIC", "Deviance/df");
println!("{}", "-".repeat(48));
let poisson_dev = poisson_fit.result().log_likelihood * -2.0;
let negbin_dev = negbin_fit.result().log_likelihood * -2.0;
println!(
"{:<20} {:>12.4} {:>12.4}",
"Poisson",
poisson_fit.result().aic,
poisson_dev / (n - 2) as f64
);
println!(
"{:<20} {:>12.4} {:>12.4}",
format!("NegBin (theta={:.2})", negbin_fit.theta),
negbin_fit.result().aic,
negbin_dev / (n - 3) as f64
);
println!("\nNote: Large Var/Mean ratio suggests overdispersion.");
println!(" Negative Binomial often has lower AIC with overdispersed data.");
println!();
}
fn compare_poisson_negbin() {
println!("--- Poisson vs Negative Binomial ---\n");
use anofox_regression::core::IntervalType;
use anofox_regression::solvers::PoissonRegressor;
let n = 100;
let x = Mat::from_fn(n, 1, |i, _| (i as f64) * 0.1);
let y = Col::from_fn(n, |i| {
let mu = (1.5 + 0.2 * x[(i, 0)]).exp();
let noise = ((i as f64 * 1.1).sin()) * (mu * 0.6);
(mu + noise).round().max(0.0)
});
let poisson = PoissonRegressor::log().build();
let poisson_fit = poisson.fit(&x, &y).expect("poisson");
let negbin = NegativeBinomialRegressor::builder()
.estimate_theta(true)
.build();
let negbin_fit = negbin.fit(&x, &y).expect("negbin");
println!("Coefficient comparison:\n");
println!("{:<15} {:>12} {:>12}", "Model", "Intercept", "Slope");
println!("{}", "-".repeat(42));
println!(
"{:<15} {:>12.4} {:>12.4}",
"Poisson",
poisson_fit.intercept().unwrap_or(0.0),
poisson_fit.coefficients()[0]
);
println!(
"{:<15} {:>12.4} {:>12.4}",
"Neg Binomial",
negbin_fit.intercept().unwrap_or(0.0),
negbin_fit.coefficients()[0]
);
let x_new = Mat::from_fn(5, 1, |i, _| (5 + i * 2) as f64);
let pois_pred = poisson_fit.predict_with_interval(&x_new, Some(IntervalType::Confidence), 0.95);
let nb_pred = negbin_fit.predict_with_interval(&x_new, Some(IntervalType::Confidence), 0.95);
println!("\nPredictions with 95% CI:\n");
println!(
"{:>6} {:>10} {:>10} {:>10} {:>10}",
"x", "Pois.Fit", "Pois.Width", "NB.Fit", "NB.Width"
);
println!("{}", "-".repeat(52));
for i in 0..5 {
let pois_width = pois_pred.upper[i] - pois_pred.lower[i];
let nb_width = nb_pred.upper[i] - nb_pred.lower[i];
println!(
"{:>6.1} {:>10.2} {:>10.2} {:>10.2} {:>10.2}",
x_new[(i, 0)],
pois_pred.fit[i],
pois_width,
nb_pred.fit[i],
nb_width
);
}
println!("\nNote: Negative Binomial often has wider confidence intervals,");
println!(" better reflecting uncertainty with overdispersed data.");
}