use crate::core::{
GlmFamily, IntervalType, PredictionResult, PredictionType, RegressionOptions,
RegressionOptionsBuilder, RegressionResult, TweedieFamily,
};
use crate::diagnostics::{deviance_residuals, pearson_residuals, working_residuals};
use crate::solvers::traits::{FittedRegressor, RegressionError, Regressor};
use crate::utils::detect_constant_columns;
use faer::{Col, Mat};
use statrs::distribution::{ContinuousCDF, FisherSnedecor, Normal};
#[derive(Debug, Clone)]
pub struct TweedieRegressor {
options: RegressionOptions,
family: TweedieFamily,
offset: Option<Col<f64>>,
}
impl TweedieRegressor {
pub fn new(options: RegressionOptions, family: TweedieFamily) -> Self {
Self {
options,
family,
offset: None,
}
}
pub fn builder() -> TweedieRegressorBuilder {
TweedieRegressorBuilder::default()
}
pub fn gaussian() -> TweedieRegressorBuilder {
TweedieRegressorBuilder::default()
.var_power(0.0)
.link_power(1.0)
}
pub fn poisson() -> TweedieRegressorBuilder {
TweedieRegressorBuilder::default()
.var_power(1.0)
.link_power(0.0)
}
pub fn gamma() -> TweedieRegressorBuilder {
TweedieRegressorBuilder::default()
.var_power(2.0)
.link_power(0.0)
}
pub fn inverse_gaussian() -> TweedieRegressorBuilder {
TweedieRegressorBuilder::default()
.var_power(3.0)
.link_power(0.0)
}
fn fit_irls(&self, x: &Mat<f64>, y: &Col<f64>) -> Result<FittedTweedie, RegressionError> {
let n_samples = x.nrows();
let n_features = x.ncols();
let n_params = if self.options.with_intercept {
n_features + 1
} else {
n_features
};
let x_design = if self.options.with_intercept {
let mut x_aug = Mat::zeros(n_samples, n_features + 1);
for i in 0..n_samples {
x_aug[(i, 0)] = 1.0;
for j in 0..n_features {
x_aug[(i, j + 1)] = x[(i, j)];
}
}
x_aug
} else {
x.clone()
};
let aliased = detect_constant_columns(x, self.options.rank_tolerance);
let y_vec: Vec<f64> = (0..n_samples).map(|i| y[i]).collect();
let mut mu: Vec<f64> = self.family.initialize_mu(&y_vec);
let mut eta: Vec<f64> = mu
.iter()
.enumerate()
.map(|(i, &m)| {
let base_eta = self.family.link(m);
if let Some(ref offset) = self.offset {
base_eta - offset[i]
} else {
base_eta
}
})
.collect();
let mut beta = Col::zeros(n_params);
let max_iter = self.options.max_iterations;
let tol = self.options.tolerance;
let mut converged = false;
let mut iterations = 0;
let mut dev = self.family.deviance(&y_vec, &mu);
let mut dev_old: f64;
const MAX_HALVINGS: usize = 10;
for iter in 0..max_iter {
iterations = iter + 1;
dev_old = dev;
let (weights, z) = self.compute_irls_quantities(&y_vec, &mu, &eta);
let beta_new = self.solve_weighted_ls(&x_design, &z, &weights)?;
let beta_old = beta.clone();
let max_change: f64 = beta_new
.iter()
.zip(beta_old.iter())
.map(|(&b_new, &b_old)| {
let diff: f64 = b_new - b_old;
diff.abs()
})
.fold(0.0_f64, f64::max);
beta = beta_new;
self.update_eta_mu(&x_design, &beta, &mut eta, &mut mu, n_samples, n_params)?;
dev = self.family.deviance(&y_vec, &mu);
if dev.is_finite() && dev_old.is_finite() {
let mut n_halvings = 0;
while dev > dev_old + 1e-7 * dev_old.abs() && n_halvings < MAX_HALVINGS {
n_halvings += 1;
for j in 0..n_params {
beta[j] = (beta[j] + beta_old[j]) / 2.0;
}
self.update_eta_mu(&x_design, &beta, &mut eta, &mut mu, n_samples, n_params)?;
dev = self.family.deviance(&y_vec, &mu);
}
}
let dev_converged = (dev - dev_old).abs() / (0.1 + dev.abs()) < tol;
let coef_converged = max_change < tol;
if dev_converged || coef_converged {
converged = true;
break;
}
}
if !converged {
return Err(RegressionError::ConvergenceFailed {
iterations: max_iter,
});
}
self.build_result(
x,
y,
&x_design,
&beta,
&mu,
&eta,
n_params,
iterations,
self.offset.clone(),
aliased,
)
}
fn compute_irls_quantities(&self, y: &[f64], mu: &[f64], eta: &[f64]) -> (Vec<f64>, Vec<f64>) {
let n = y.len();
let mut weights = vec![0.0; n];
let mut z = vec![0.0; n];
for i in 0..n {
weights[i] = self.family.irls_weight(mu[i]);
z[i] = self.family.working_response(y[i], mu[i], eta[i]);
}
(weights, z)
}
fn update_eta_mu(
&self,
x_design: &Mat<f64>,
beta: &Col<f64>,
eta: &mut [f64],
mu: &mut [f64],
n_samples: usize,
n_params: usize,
) -> Result<(), RegressionError> {
for i in 0..n_samples {
let mut eta_i = 0.0;
for j in 0..n_params {
eta_i += x_design[(i, j)] * beta[j];
}
let eta_with_offset = if let Some(ref offset) = self.offset {
eta_i + offset[i]
} else {
eta_i
};
eta[i] = eta_i;
if !self.family.valid_eta(eta_with_offset) {
return Err(RegressionError::NumericalError(
"Invalid linear predictor (eta) during IRLS: non-finite value".to_string(),
));
}
let mu_new = self.family.link_inverse(eta_with_offset);
if !self.family.valid_mu(mu_new) {
mu[i] = self.family.clamp_mu(mu_new);
} else {
mu[i] = mu_new;
}
}
Ok(())
}
fn solve_weighted_ls(
&self,
x: &Mat<f64>,
z: &[f64],
weights: &[f64],
) -> Result<Col<f64>, RegressionError> {
let n_samples = x.nrows();
let n_params = x.ncols();
let lambda = self.options.lambda;
if lambda > 0.0 {
let mut xtwx: Mat<f64> = Mat::zeros(n_params, n_params);
let mut xtwz: Col<f64> = Col::zeros(n_params);
for i in 0..n_samples {
let w = weights[i];
for j in 0..n_params {
xtwz[j] += w * x[(i, j)] * z[i];
for k in 0..n_params {
xtwx[(j, k)] += w * x[(i, j)] * x[(i, k)];
}
}
}
let start_idx = if self.options.with_intercept { 1 } else { 0 };
for j in start_idx..n_params {
xtwx[(j, j)] += lambda;
}
let qr = xtwx.qr();
let q = qr.compute_Q();
let r: Mat<f64> = qr.R().to_owned();
let qty = q.transpose() * &xtwz;
let mut beta: Col<f64> = Col::zeros(n_params);
for i in (0..n_params).rev() {
let mut sum: f64 = qty[i];
for j in (i + 1)..n_params {
sum -= r[(i, j)] * beta[j];
}
if r[(i, i)].abs() > self.options.rank_tolerance {
beta[i] = sum / r[(i, i)];
} else {
beta[i] = 0.0;
}
}
Ok(beta)
} else {
let mut x_weighted = Mat::zeros(n_samples, n_params);
let mut z_weighted = Col::zeros(n_samples);
for i in 0..n_samples {
let sqrt_w = weights[i].sqrt();
for j in 0..n_params {
x_weighted[(i, j)] = sqrt_w * x[(i, j)];
}
z_weighted[i] = sqrt_w * z[i];
}
let qr = x_weighted.col_piv_qr();
let q = qr.compute_Q();
let r = qr.R();
let perm = qr.P();
let qtz = q.transpose() * z_weighted;
let mut beta_perm = Col::zeros(n_params);
for i in (0..n_params).rev() {
let mut sum = qtz[i];
for j in (i + 1)..n_params {
sum -= r[(i, j)] * beta_perm[j];
}
if r[(i, i)].abs() > self.options.rank_tolerance {
beta_perm[i] = sum / r[(i, i)];
} else {
beta_perm[i] = 0.0;
}
}
let mut beta = Col::zeros(n_params);
for i in 0..n_params {
beta[perm.inverse().arrays().0[i]] = beta_perm[i];
}
Ok(beta)
}
}
#[allow(clippy::too_many_arguments)]
fn build_result(
&self,
x: &Mat<f64>,
y: &Col<f64>,
x_design: &Mat<f64>,
beta: &Col<f64>,
mu: &[f64],
_eta: &[f64],
n_params: usize,
iterations: usize,
offset: Option<Col<f64>>,
aliased: Vec<bool>,
) -> Result<FittedTweedie, RegressionError> {
let n_samples = x.nrows();
let n_features = x.ncols();
let (intercept, coefficients) = if self.options.with_intercept {
let int = Some(beta[0]);
let coefs = Col::from_fn(n_features, |j| beta[j + 1]);
(int, coefs)
} else {
(None, beta.clone())
};
let fitted_values = Col::from_fn(n_samples, |i| mu[i]);
let residuals = Col::from_fn(n_samples, |i| y[i] - mu[i]);
let y_vec: Vec<f64> = (0..n_samples).map(|i| y[i]).collect();
let deviance = self.family.deviance(&y_vec, mu);
let null_deviance = self.family.null_deviance(&y_vec);
let df_resid = (n_samples.saturating_sub(n_params)) as f64;
let dispersion = if df_resid > 0.0 {
deviance / df_resid
} else {
1.0
};
let r_squared = if null_deviance > 0.0 {
1.0 - deviance / null_deviance
} else {
f64::NAN
};
let df_total = (n_samples - 1) as f64;
let adj_r_squared = if df_resid > 0.0 && df_total > 0.0 {
1.0 - (1.0 - r_squared) * df_total / df_resid
} else {
f64::NAN
};
let rss: f64 = residuals.iter().map(|&r| r.powi(2)).sum();
let mse = if df_resid > 0.0 {
rss / df_resid
} else {
f64::NAN
};
let rmse = mse.sqrt();
let df_model = (n_params - if intercept.is_some() { 1 } else { 0 }) as f64;
let f_statistic = if df_model > 0.0 && df_resid > 0.0 && dispersion > 0.0 {
((null_deviance - deviance) / df_model) / dispersion
} else {
f64::NAN
};
let f_pvalue = if f_statistic.is_finite() && df_model > 0.0 && df_resid > 0.0 {
FisherSnedecor::new(df_model, df_resid)
.map(|d| 1.0 - d.cdf(f_statistic))
.unwrap_or(f64::NAN)
} else {
f64::NAN
};
let n = n_samples as f64;
let k = n_params as f64;
let log_likelihood = -deviance / (2.0 * dispersion)
- n * (2.0 * std::f64::consts::PI * dispersion).ln() / 2.0;
let aic = 2.0 * k - 2.0 * log_likelihood;
let aicc = if (n - k - 1.0) > 0.0 {
aic + 2.0 * k * (k + 1.0) / (n - k - 1.0)
} else {
f64::NAN
};
let bic = k * n.ln() - 2.0 * log_likelihood;
let rank = n_params;
let mut result = RegressionResult::empty(n_features, n_samples);
result.coefficients = coefficients;
result.intercept = intercept;
result.residuals = residuals;
result.fitted_values = fitted_values;
result.rank = rank;
result.n_parameters = n_params;
result.n_observations = n_samples;
result.aliased = aliased.clone();
result.r_squared = r_squared;
result.adj_r_squared = adj_r_squared;
result.mse = mse;
result.rmse = rmse;
result.f_statistic = f_statistic;
result.f_pvalue = f_pvalue;
result.aic = aic;
result.aicc = aicc;
result.bic = bic;
result.log_likelihood = log_likelihood;
result.confidence_level = self.options.confidence_level;
let mut xtwx_inverse = None;
if self.options.compute_inference {
if let Ok((se, xtwx_inv)) =
self.compute_standard_errors_and_covariance(x_design, mu, dispersion)
{
result.std_errors = Some(if self.options.with_intercept {
Col::from_fn(n_features, |j| se[j + 1])
} else {
se.clone()
});
if self.options.with_intercept {
result.intercept_std_error = Some(se[0]);
}
xtwx_inverse = Some(xtwx_inv);
}
}
Ok(FittedTweedie {
result,
options: self.options.clone(),
family: self.family,
deviance,
null_deviance,
dispersion,
iterations,
y_values: y.clone(),
xtwx_inverse,
offset,
aliased,
})
}
fn compute_standard_errors_and_covariance(
&self,
x: &Mat<f64>,
mu: &[f64],
dispersion: f64,
) -> Result<(Col<f64>, Mat<f64>), RegressionError> {
let n_samples = x.nrows();
let n_params = x.ncols();
let mut xtwx: Mat<f64> = Mat::zeros(n_params, n_params);
for i in 0..n_samples {
let w = self.family.irls_weight(mu[i]);
for j in 0..n_params {
for k in 0..n_params {
xtwx[(j, k)] += w * x[(i, j)] * x[(i, k)];
}
}
}
let qr = xtwx.qr();
let q = qr.compute_Q();
let r = qr.R().to_owned();
let mut xtwx_inv: Mat<f64> = Mat::zeros(n_params, n_params);
for col in 0..n_params {
let mut e = Col::zeros(n_params);
e[col] = 1.0;
let qte = q.transpose() * e;
let mut sol = Col::zeros(n_params);
for i in (0..n_params).rev() {
let mut sum = qte[i];
for j in (i + 1)..n_params {
sum -= r[(i, j)] * sol[j];
}
if r[(i, i)].abs() > 1e-14 {
sol[i] = sum / r[(i, i)];
}
}
for i in 0..n_params {
xtwx_inv[(i, col)] = sol[i];
}
}
let se = Col::from_fn(n_params, |j| (dispersion * xtwx_inv[(j, j)]).sqrt());
Ok((se, xtwx_inv))
}
}
impl Regressor for TweedieRegressor {
type Fitted = FittedTweedie;
fn fit(&self, x: &Mat<f64>, y: &Col<f64>) -> Result<Self::Fitted, RegressionError> {
let n_samples = x.nrows();
let n_features = x.ncols();
if x.nrows() != y.nrows() {
return Err(RegressionError::DimensionMismatch {
x_rows: x.nrows(),
y_len: y.nrows(),
});
}
if n_samples < 2 {
return Err(RegressionError::InsufficientObservations {
needed: 2,
got: n_samples,
});
}
let n_params = if self.options.with_intercept {
n_features + 1
} else {
n_features
};
if n_samples < n_params {
return Err(RegressionError::InsufficientObservations {
needed: n_params,
got: n_samples,
});
}
if !self.family.is_valid() {
return Err(RegressionError::NumericalError(
"var_power in (0, 1) is not allowed".to_string(),
));
}
if let Some(ref offset) = self.offset {
if offset.nrows() != n_samples {
return Err(RegressionError::DimensionMismatch {
x_rows: n_samples,
y_len: offset.nrows(),
});
}
}
for i in 0..n_samples {
if self.family.var_power > 0.0 && y[i] < 0.0 {
return Err(RegressionError::NumericalError(format!(
"y values must be non-negative for var_power > 0, got y[{}] = {}",
i, y[i]
)));
}
}
self.fit_irls(x, y)
}
}
#[derive(Debug, Clone)]
pub struct FittedTweedie {
result: RegressionResult,
options: RegressionOptions,
family: TweedieFamily,
pub deviance: f64,
pub null_deviance: f64,
pub dispersion: f64,
pub iterations: usize,
y_values: Col<f64>,
xtwx_inverse: Option<Mat<f64>>,
#[allow(dead_code)]
offset: Option<Col<f64>>,
#[allow(dead_code)]
aliased: Vec<bool>,
}
impl FittedTweedie {
pub fn family(&self) -> &TweedieFamily {
&self.family
}
pub fn predict_mu(&self, x: &Mat<f64>) -> Col<f64> {
self.predict(x)
}
pub fn predict_eta(&self, x: &Mat<f64>) -> Col<f64> {
let mu = self.predict(x);
Col::from_fn(mu.nrows(), |i| self.family.link(mu[i]))
}
pub fn pearson_residuals(&self) -> Col<f64> {
let mu = &self.result.fitted_values;
pearson_residuals(&self.y_values, mu, &self.family)
}
pub fn deviance_residuals(&self) -> Col<f64> {
let mu = &self.result.fitted_values;
deviance_residuals(&self.y_values, mu, &self.family)
}
pub fn working_residuals(&self) -> Col<f64> {
let mu = &self.result.fitted_values;
working_residuals(&self.y_values, mu, &self.family)
}
pub fn predict_with_offset(&self, x: &Mat<f64>, offset: &Col<f64>) -> Col<f64> {
let n_samples = x.nrows();
let n_features = x.ncols();
let intercept = self.result.intercept.unwrap_or(0.0);
Col::from_fn(n_samples, |i| {
let mut eta = intercept;
for j in 0..n_features {
eta += x[(i, j)] * self.result.coefficients[j];
}
eta += offset[i];
self.family.link_inverse(eta)
})
}
pub fn predict_with_se(
&self,
x: &Mat<f64>,
pred_type: PredictionType,
interval: Option<IntervalType>,
level: f64,
) -> PredictionResult {
let n_new = x.nrows();
let n_features = x.ncols();
let xtwx_inv = match &self.xtwx_inverse {
Some(inv) => inv,
None => {
let predictions = match pred_type {
PredictionType::Response => self.predict(x),
PredictionType::Link => self.predict_eta(x),
};
return PredictionResult::point_only(predictions);
}
};
let x_design = if self.options.with_intercept {
let mut x_aug = Mat::zeros(n_new, n_features + 1);
for i in 0..n_new {
x_aug[(i, 0)] = 1.0;
for j in 0..n_features {
x_aug[(i, j + 1)] = x[(i, j)];
}
}
x_aug
} else {
x.clone()
};
let n_params = xtwx_inv.nrows();
let mut eta = Col::zeros(n_new);
let mut se_eta = Col::zeros(n_new);
for i in 0..n_new {
let mut eta_i = 0.0;
for j in 0..n_params {
eta_i += x_design[(i, j)]
* if self.options.with_intercept && j == 0 {
self.result.intercept.unwrap_or(0.0)
} else {
let coef_idx = if self.options.with_intercept {
j - 1
} else {
j
};
if coef_idx < self.result.coefficients.nrows() {
self.result.coefficients[coef_idx]
} else {
0.0
}
};
}
eta[i] = eta_i;
let mut var_eta = 0.0;
for j in 0..n_params {
for k in 0..n_params {
var_eta += x_design[(i, j)] * xtwx_inv[(j, k)] * x_design[(i, k)];
}
}
se_eta[i] = (var_eta * self.dispersion).sqrt();
}
let (fit, se) = match pred_type {
PredictionType::Link => (eta.clone(), se_eta.clone()),
PredictionType::Response => {
let mu = Col::from_fn(n_new, |i| self.family.link_inverse(eta[i]));
let se_mu = Col::from_fn(n_new, |i| {
let dmu_deta = self.family.link_inverse_derivative(eta[i]);
se_eta[i] * dmu_deta.abs()
});
(mu, se_mu)
}
};
match interval {
None => PredictionResult::with_intervals(
fit.clone(),
Col::zeros(n_new),
Col::zeros(n_new),
se,
),
Some(_interval_type) => {
let alpha = 1.0 - level;
let z = Normal::new(0.0, 1.0)
.map(|d| d.inverse_cdf(1.0 - alpha / 2.0))
.unwrap_or(1.96);
let (lower, upper) = match pred_type {
PredictionType::Link => {
let lower = Col::from_fn(n_new, |i| eta[i] - z * se_eta[i]);
let upper = Col::from_fn(n_new, |i| eta[i] + z * se_eta[i]);
(lower, upper)
}
PredictionType::Response => {
let lower = Col::from_fn(n_new, |i| {
self.family.link_inverse(eta[i] - z * se_eta[i])
});
let upper = Col::from_fn(n_new, |i| {
self.family.link_inverse(eta[i] + z * se_eta[i])
});
(lower, upper)
}
};
PredictionResult::with_intervals(fit, lower, upper, se)
}
}
}
}
impl FittedRegressor for FittedTweedie {
fn predict(&self, x: &Mat<f64>) -> Col<f64> {
let n_samples = x.nrows();
let n_features = x.ncols();
let intercept = self.result.intercept.unwrap_or(0.0);
Col::from_fn(n_samples, |i| {
let mut eta = intercept;
for j in 0..n_features {
eta += x[(i, j)] * self.result.coefficients[j];
}
self.family.link_inverse(eta)
})
}
fn result(&self) -> &RegressionResult {
&self.result
}
fn predict_with_interval(
&self,
x: &Mat<f64>,
interval: Option<IntervalType>,
_level: f64,
) -> PredictionResult {
let predictions = self.predict(x);
match interval {
None => PredictionResult::point_only(predictions),
Some(_) => {
let n = x.nrows();
let nan_vec = Col::from_fn(n, |_| f64::NAN);
PredictionResult::with_intervals(
predictions,
nan_vec.clone(),
nan_vec.clone(),
nan_vec,
)
}
}
}
}
#[derive(Debug, Clone, Default)]
pub struct TweedieRegressorBuilder {
options_builder: RegressionOptionsBuilder,
var_power: f64,
link_power: Option<f64>,
offset: Option<Col<f64>>,
}
impl TweedieRegressorBuilder {
pub fn new() -> Self {
Self {
options_builder: RegressionOptionsBuilder::default(),
var_power: 1.5, link_power: None,
offset: None,
}
}
pub fn with_intercept(mut self, include: bool) -> Self {
self.options_builder = self.options_builder.with_intercept(include);
self
}
pub fn compute_inference(mut self, compute: bool) -> Self {
self.options_builder = self.options_builder.compute_inference(compute);
self
}
pub fn confidence_level(mut self, level: f64) -> Self {
self.options_builder = self.options_builder.confidence_level(level);
self
}
pub fn max_iterations(mut self, max_iter: usize) -> Self {
self.options_builder = self.options_builder.max_iterations(max_iter);
self
}
pub fn tolerance(mut self, tol: f64) -> Self {
self.options_builder = self.options_builder.tolerance(tol);
self
}
pub fn lambda(mut self, lambda: f64) -> Self {
self.options_builder = self.options_builder.lambda(lambda);
self
}
pub fn var_power(mut self, p: f64) -> Self {
self.var_power = p;
self
}
pub fn link_power(mut self, q: f64) -> Self {
self.link_power = Some(q);
self
}
pub fn offset(mut self, offset: Col<f64>) -> Self {
self.offset = Some(offset);
self
}
pub fn build(self) -> TweedieRegressor {
let link_power = self.link_power.unwrap_or(1.0 - self.var_power);
let family = TweedieFamily::new(self.var_power, link_power);
TweedieRegressor {
options: self.options_builder.build_unchecked(),
family,
offset: self.offset,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_gaussian_regression() {
let x = Mat::from_fn(20, 1, |i, _| i as f64);
let y = Col::from_fn(20, |i| 2.0 + 3.0 * i as f64);
let fitted = TweedieRegressor::gaussian()
.with_intercept(true)
.build()
.fit(&x, &y)
.expect("model should fit");
assert!((fitted.result.intercept.expect("intercept exists") - 2.0).abs() < 0.5);
assert!((fitted.result.coefficients[0] - 3.0).abs() < 0.1);
}
#[test]
fn test_poisson_regression() {
let x = Mat::from_fn(50, 1, |i, _| i as f64 / 10.0);
let y = Col::from_fn(50, |i| {
let eta = 0.1 + 0.2 * (i as f64 / 10.0);
eta.exp().max(0.1)
});
let fitted = TweedieRegressor::poisson()
.with_intercept(true)
.build()
.fit(&x, &y)
.expect("model should fit");
assert!(fitted.result.r_squared > 0.5);
assert!(fitted.deviance < fitted.null_deviance);
}
#[test]
fn test_gamma_regression() {
let x = Mat::from_fn(30, 1, |i, _| (i + 1) as f64);
let y = Col::from_fn(30, |i| {
let eta = 1.0 + 0.05 * (i + 1) as f64;
eta.exp()
});
let fitted = TweedieRegressor::gamma()
.with_intercept(true)
.build()
.fit(&x, &y)
.expect("model should fit");
assert!(fitted.result.r_squared > 0.5);
}
#[test]
fn test_compound_poisson_gamma() {
let x = Mat::from_fn(30, 1, |i, _| i as f64);
let y = Col::from_fn(30, |i| {
if i % 5 == 0 {
0.0
} else {
(1.0 + 0.1 * i as f64).exp()
}
});
let fitted = TweedieRegressor::builder()
.var_power(1.5)
.link_power(0.0)
.with_intercept(true)
.build()
.fit(&x, &y)
.expect("model should fit");
assert!(fitted.iterations > 0);
assert!(fitted.dispersion > 0.0);
}
#[test]
fn test_deviance_decrease() {
let x = Mat::from_fn(30, 1, |i, _| i as f64);
let y = Col::from_fn(30, |i| (1.0 + 0.1 * i as f64).exp());
let fitted = TweedieRegressor::gamma()
.with_intercept(true)
.build()
.fit(&x, &y)
.expect("model should fit");
assert!(
fitted.deviance <= fitted.null_deviance * 1.01,
"Deviance {} should be <= null deviance {}",
fitted.deviance,
fitted.null_deviance
);
}
#[test]
fn test_predict() {
let x = Mat::from_fn(20, 1, |i, _| i as f64);
let y = Col::from_fn(20, |i| (1.0 + 0.1 * i as f64).exp());
let fitted = TweedieRegressor::gamma()
.with_intercept(true)
.build()
.fit(&x, &y)
.expect("model should fit");
let x_new = Mat::from_fn(5, 1, |i, _| (i + 20) as f64);
let pred = fitted.predict(&x_new);
for i in 0..5 {
assert!(pred[i] > 0.0);
}
}
#[test]
fn test_negative_y_error() {
let x = Mat::from_fn(10, 1, |i, _| i as f64);
let y = Col::from_fn(10, |i| if i < 5 { i as f64 } else { -1.0 });
let result = TweedieRegressor::gamma().build().fit(&x, &y);
assert!(matches!(result, Err(RegressionError::NumericalError(_))));
}
#[test]
fn test_standard_errors() {
let x = Mat::from_fn(30, 1, |i, _| i as f64);
let y = Col::from_fn(30, |i| (1.0 + 0.1 * i as f64).exp());
let fitted = TweedieRegressor::gamma()
.with_intercept(true)
.compute_inference(true)
.build()
.fit(&x, &y)
.expect("model should fit");
assert!(fitted.result.std_errors.is_some());
let se = fitted.result.std_errors.as_ref().expect("std errors exist");
assert!(se[0] > 0.0);
}
#[test]
fn test_tweedie_new_constructor() {
let options = RegressionOptionsBuilder::default()
.build()
.expect("valid options");
let family = TweedieFamily::new(2.0, 0.0); let regressor = TweedieRegressor::new(options, family);
let x = Mat::from_fn(20, 1, |i, _| (i + 1) as f64);
let y = Col::from_fn(20, |i| (i + 1) as f64);
let fitted = regressor.fit(&x, &y).expect("should fit");
assert!(fitted.result.coefficients.nrows() > 0);
}
#[test]
fn test_inverse_gaussian_regression() {
let x = Mat::from_fn(30, 1, |i, _| (i + 1) as f64);
let y = Col::from_fn(30, |i| ((i + 1) as f64).powf(1.5));
let fitted = TweedieRegressor::inverse_gaussian()
.with_intercept(true)
.build()
.fit(&x, &y)
.expect("model should fit");
assert!(fitted.result.r_squared > 0.0);
}
#[test]
fn test_fitted_family_getter() {
let x = Mat::from_fn(20, 1, |i, _| i as f64);
let y = Col::from_fn(20, |i| (1.0 + 0.1 * i as f64).exp());
let fitted = TweedieRegressor::gamma()
.with_intercept(true)
.build()
.fit(&x, &y)
.expect("model should fit");
let family = fitted.family();
assert!((family.var_power - 2.0).abs() < 1e-10);
assert!((family.link_power - 0.0).abs() < 1e-10);
}
#[test]
fn test_predict_eta() {
let x = Mat::from_fn(20, 1, |i, _| i as f64);
let y = Col::from_fn(20, |i| (1.0 + 0.1 * i as f64).exp());
let fitted = TweedieRegressor::gamma()
.with_intercept(true)
.build()
.fit(&x, &y)
.expect("model should fit");
let x_new = Mat::from_fn(5, 1, |i, _| (i + 20) as f64);
let eta = fitted.predict_eta(&x_new);
let mu = fitted.predict(&x_new);
for i in 0..5 {
assert!((eta[i] - mu[i].ln()).abs() < 1e-10);
}
}
#[test]
fn test_residual_methods() {
let x = Mat::from_fn(30, 1, |i, _| i as f64);
let y = Col::from_fn(30, |i| (1.0 + 0.1 * i as f64).exp() + (i % 3) as f64 * 0.1);
let fitted = TweedieRegressor::gamma()
.with_intercept(true)
.build()
.fit(&x, &y)
.expect("model should fit");
let pearson = fitted.pearson_residuals();
let deviance = fitted.deviance_residuals();
let working = fitted.working_residuals();
for i in 0..30 {
assert!(pearson[i].is_finite());
assert!(deviance[i].is_finite());
assert!(working[i].is_finite());
}
}
#[test]
fn test_predict_with_offset() {
let x = Mat::from_fn(30, 1, |i, _| i as f64);
let y = Col::from_fn(30, |i| (1.0 + 0.1 * i as f64).exp());
let fitted = TweedieRegressor::gamma()
.with_intercept(true)
.build()
.fit(&x, &y)
.expect("model should fit");
let x_new = Mat::from_fn(5, 1, |i, _| (i + 20) as f64);
let offset = Col::from_fn(5, |i| (i as f64) * 0.1);
let pred_no_offset = fitted.predict(&x_new);
let pred_with_offset = fitted.predict_with_offset(&x_new, &offset);
for i in 0..5 {
let expected = pred_no_offset[i] * offset[i].exp();
assert!(
(pred_with_offset[i] - expected).abs() / expected < 0.01,
"Expected {}, got {}",
expected,
pred_with_offset[i]
);
}
}
#[test]
fn test_predict_with_se_link_scale() {
let x = Mat::from_fn(30, 1, |i, _| i as f64);
let y = Col::from_fn(30, |i| (1.0 + 0.1 * i as f64).exp());
let fitted = TweedieRegressor::gamma()
.with_intercept(true)
.compute_inference(true)
.build()
.fit(&x, &y)
.expect("model should fit");
let x_new = Mat::from_fn(5, 1, |i, _| (i + 20) as f64);
let result = fitted.predict_with_se(&x_new, PredictionType::Link, None, 0.95);
assert_eq!(result.fit.nrows(), 5);
assert_eq!(result.se.nrows(), 5);
for i in 0..5 {
assert!(result.se[i] > 0.0);
}
}
#[test]
fn test_predict_with_se_response_scale() {
let x = Mat::from_fn(30, 1, |i, _| i as f64);
let y = Col::from_fn(30, |i| (1.0 + 0.1 * i as f64).exp());
let fitted = TweedieRegressor::gamma()
.with_intercept(true)
.compute_inference(true)
.build()
.fit(&x, &y)
.expect("model should fit");
let x_new = Mat::from_fn(5, 1, |i, _| (i + 20) as f64);
let result = fitted.predict_with_se(&x_new, PredictionType::Response, None, 0.95);
assert_eq!(result.fit.nrows(), 5);
for i in 0..5 {
assert!(result.fit[i] > 0.0); }
}
#[test]
fn test_predict_with_se_confidence_interval() {
let x = Mat::from_fn(30, 1, |i, _| i as f64);
let y = Col::from_fn(30, |i| (1.0 + 0.1 * i as f64).exp());
let fitted = TweedieRegressor::gamma()
.with_intercept(true)
.compute_inference(true)
.build()
.fit(&x, &y)
.expect("model should fit");
let x_new = Mat::from_fn(5, 1, |i, _| (i + 20) as f64);
let result = fitted.predict_with_se(
&x_new,
PredictionType::Response,
Some(IntervalType::Confidence),
0.95,
);
assert_eq!(result.fit.nrows(), 5);
assert_eq!(result.lower.nrows(), 5);
assert_eq!(result.upper.nrows(), 5);
for i in 0..5 {
assert!(result.lower[i] < result.fit[i]);
assert!(result.upper[i] > result.fit[i]);
}
}
#[test]
fn test_predict_with_se_no_inference() {
let x = Mat::from_fn(30, 1, |i, _| i as f64);
let y = Col::from_fn(30, |i| (1.0 + 0.1 * i as f64).exp());
let fitted = TweedieRegressor::gamma()
.with_intercept(true)
.compute_inference(false)
.build()
.fit(&x, &y)
.expect("model should fit");
let x_new = Mat::from_fn(5, 1, |i, _| (i + 20) as f64);
let result = fitted.predict_with_se(&x_new, PredictionType::Response, None, 0.95);
assert_eq!(result.fit.nrows(), 5);
}
#[test]
fn test_dimension_mismatch_error() {
let x = Mat::from_fn(10, 2, |i, j| (i + j) as f64);
let y = Col::from_fn(5, |i| i as f64);
let result = TweedieRegressor::gamma().build().fit(&x, &y);
assert!(matches!(
result,
Err(RegressionError::DimensionMismatch { .. })
));
}
#[test]
fn test_insufficient_observations_error() {
let x = Mat::from_fn(1, 2, |_, _| 1.0);
let y = Col::from_fn(1, |_| 1.0);
let result = TweedieRegressor::gamma().build().fit(&x, &y);
assert!(matches!(
result,
Err(RegressionError::InsufficientObservations { .. })
));
}
#[test]
fn test_insufficient_observations_for_params() {
let x = Mat::from_fn(3, 5, |i, j| (i + j) as f64); let y = Col::from_fn(3, |i| (i + 1) as f64);
let result = TweedieRegressor::gamma()
.with_intercept(true)
.build()
.fit(&x, &y);
assert!(matches!(
result,
Err(RegressionError::InsufficientObservations { .. })
));
}
#[test]
fn test_without_intercept() {
let x = Mat::from_fn(30, 1, |i, _| (i + 1) as f64);
let y = Col::from_fn(30, |i| (0.1 * (i + 1) as f64).exp());
let fitted = TweedieRegressor::gamma()
.with_intercept(false)
.build()
.fit(&x, &y)
.expect("model should fit");
assert!(fitted.result.intercept.is_none());
}
#[test]
fn test_predict_without_intercept() {
let x = Mat::from_fn(30, 1, |i, _| (i + 1) as f64);
let y = Col::from_fn(30, |i| (0.1 * (i + 1) as f64).exp());
let fitted = TweedieRegressor::gamma()
.with_intercept(false)
.build()
.fit(&x, &y)
.expect("model should fit");
let x_new = Mat::from_fn(5, 1, |i, _| (i + 30) as f64);
let pred = fitted.predict(&x_new);
for i in 0..5 {
assert!(pred[i] > 0.0);
}
}
#[test]
fn test_penalized_irls() {
let x = Mat::from_fn(30, 1, |i, _| i as f64);
let y = Col::from_fn(30, |i| (1.0 + 0.1 * i as f64).exp());
let fitted_penalized = TweedieRegressor::gamma()
.with_intercept(true)
.lambda(0.1)
.max_iterations(100)
.build()
.fit(&x, &y)
.expect("penalized model should fit");
let fitted_unpenalized = TweedieRegressor::gamma()
.with_intercept(true)
.lambda(0.0)
.max_iterations(100)
.build()
.fit(&x, &y)
.expect("unpenalized model should fit");
assert!(fitted_penalized.iterations < 100);
assert!(fitted_unpenalized.iterations < 100);
let coef_penalized = fitted_penalized.result.coefficients[0].abs();
let coef_unpenalized = fitted_unpenalized.result.coefficients[0].abs();
assert!(
coef_penalized <= coef_unpenalized + 0.1,
"Penalized coefficient {} should not be much larger than unpenalized {}",
coef_penalized,
coef_unpenalized
);
}
#[test]
fn test_penalized_irls_no_intercept() {
let x = Mat::from_fn(30, 1, |i, _| (i + 1) as f64);
let y = Col::from_fn(30, |i| (0.1 * (i + 1) as f64).exp());
let fitted = TweedieRegressor::gamma()
.with_intercept(false)
.lambda(0.5)
.max_iterations(100)
.build()
.fit(&x, &y)
.expect("penalized model without intercept should fit");
assert!(fitted.result.intercept.is_none());
assert!(fitted.iterations < 100);
}
#[test]
fn test_penalized_irls_high_lambda() {
let x = Mat::from_fn(30, 1, |i, _| i as f64);
let y = Col::from_fn(30, |i| (1.0 + 0.1 * i as f64).exp());
let fitted = TweedieRegressor::gamma()
.with_intercept(true)
.lambda(100.0)
.max_iterations(100)
.build()
.fit(&x, &y)
.expect("heavily penalized model should fit");
assert!(
fitted.result.coefficients[0].abs() < 0.5,
"Heavily penalized coefficient should be small: {}",
fitted.result.coefficients[0]
);
}
#[test]
fn test_penalized_irls_multivariate() {
let n = 50;
let x = Mat::from_fn(n, 3, |i, j| (i as f64 + j as f64 * 0.5) / 10.0);
let y = Col::from_fn(n, |i| {
let xi = (i as f64) / 10.0;
(1.0 + 0.1 * xi).exp()
});
let fitted = TweedieRegressor::gamma()
.with_intercept(true)
.lambda(0.1)
.max_iterations(100)
.build()
.fit(&x, &y)
.expect("multivariate penalized model should fit");
assert_eq!(fitted.result.coefficients.nrows(), 3);
assert!(fitted.iterations < 100);
}
#[test]
fn test_penalized_irls_poisson() {
let x = Mat::from_fn(50, 1, |i, _| i as f64 / 10.0);
let y = Col::from_fn(50, |i| {
let eta = 0.1 + 0.2 * (i as f64 / 10.0);
eta.exp().max(0.1)
});
let fitted = TweedieRegressor::poisson()
.with_intercept(true)
.lambda(0.5)
.max_iterations(100)
.build()
.fit(&x, &y)
.expect("penalized Poisson model should fit");
assert!(fitted.iterations < 100);
assert!(fitted.result.r_squared > 0.0);
}
}