use crate::core::{
IntervalType, PredictionResult, RegressionOptions, RegressionOptionsBuilder, RegressionResult,
};
use crate::inference::CoefficientInference;
use crate::solvers::traits::{FittedRegressor, RegressionError, Regressor};
use argmin::core::{CostFunction, Executor, Gradient, State};
use argmin::solver::linesearch::MoreThuenteLineSearch;
use argmin::solver::quasinewton::LBFGS;
use faer::{Col, Mat};
use statrs::distribution::{ContinuousCDF, FisherSnedecor};
use statrs::function::gamma::ln_gamma;
use std::f64::consts::PI;
#[derive(Debug, Clone, Copy, PartialEq, Default)]
pub enum AlmLoss {
#[default]
Likelihood,
MSE,
MAE,
HAM,
ROLE {
trim: f64,
},
}
impl AlmLoss {
pub fn role() -> Self {
AlmLoss::ROLE { trim: 0.05 }
}
pub fn role_with_trim(trim: f64) -> Self {
AlmLoss::ROLE {
trim: trim.clamp(0.0, 0.5),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Default)]
pub enum AlmDistribution {
#[default]
Normal,
Laplace,
StudentT,
Logistic,
AsymmetricLaplace,
GeneralisedNormal,
S,
LogNormal,
LogLaplace,
LogS,
LogGeneralisedNormal,
FoldedNormal,
RectifiedNormal,
BoxCoxNormal,
Gamma,
InverseGaussian,
Exponential,
Beta,
LogitNormal,
Poisson,
NegativeBinomial,
Binomial,
Geometric,
CumulativeLogistic,
CumulativeNormal,
}
impl AlmDistribution {
pub fn canonical_link(&self) -> LinkFunction {
match self {
AlmDistribution::Normal => LinkFunction::Identity,
AlmDistribution::Laplace => LinkFunction::Identity,
AlmDistribution::StudentT => LinkFunction::Identity,
AlmDistribution::Logistic => LinkFunction::Identity,
AlmDistribution::AsymmetricLaplace => LinkFunction::Identity,
AlmDistribution::GeneralisedNormal => LinkFunction::Identity,
AlmDistribution::S => LinkFunction::Identity,
AlmDistribution::LogNormal => LinkFunction::Log,
AlmDistribution::LogLaplace => LinkFunction::Log,
AlmDistribution::LogS => LinkFunction::Log,
AlmDistribution::LogGeneralisedNormal => LinkFunction::Log,
AlmDistribution::FoldedNormal => LinkFunction::Identity,
AlmDistribution::RectifiedNormal => LinkFunction::Identity,
AlmDistribution::BoxCoxNormal => LinkFunction::Identity,
AlmDistribution::Gamma => LinkFunction::Log,
AlmDistribution::InverseGaussian => LinkFunction::Log,
AlmDistribution::Exponential => LinkFunction::Log,
AlmDistribution::Beta => LinkFunction::Logit,
AlmDistribution::LogitNormal => LinkFunction::Identity, AlmDistribution::Poisson => LinkFunction::Log,
AlmDistribution::NegativeBinomial => LinkFunction::Log,
AlmDistribution::Binomial => LinkFunction::Logit,
AlmDistribution::Geometric => LinkFunction::Log, AlmDistribution::CumulativeLogistic => LinkFunction::Logit,
AlmDistribution::CumulativeNormal => LinkFunction::Probit,
}
}
pub fn requires_positive(&self) -> bool {
matches!(
self,
AlmDistribution::LogNormal
| AlmDistribution::LogLaplace
| AlmDistribution::LogS
| AlmDistribution::LogGeneralisedNormal
| AlmDistribution::FoldedNormal
| AlmDistribution::Gamma
| AlmDistribution::InverseGaussian
| AlmDistribution::Exponential
)
}
pub fn is_count(&self) -> bool {
matches!(
self,
AlmDistribution::Poisson
| AlmDistribution::NegativeBinomial
| AlmDistribution::Binomial
| AlmDistribution::Geometric
)
}
pub fn requires_unit_interval(&self) -> bool {
matches!(self, AlmDistribution::Beta | AlmDistribution::LogitNormal)
}
pub fn requires_numerical_optimization(&self) -> bool {
matches!(
self,
AlmDistribution::FoldedNormal
| AlmDistribution::RectifiedNormal
| AlmDistribution::S
| AlmDistribution::LogS
| AlmDistribution::BoxCoxNormal
| AlmDistribution::Beta
)
}
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum LinkFunction {
Identity,
Log,
Logit,
Probit,
Inverse,
Sqrt,
Cloglog,
}
impl LinkFunction {
pub fn link(&self, mu: f64) -> f64 {
match self {
LinkFunction::Identity => mu,
LinkFunction::Log => mu.ln(),
LinkFunction::Logit => (mu / (1.0 - mu)).ln(),
LinkFunction::Probit => probit(mu),
LinkFunction::Inverse => 1.0 / mu,
LinkFunction::Sqrt => mu.sqrt(),
LinkFunction::Cloglog => (-(1.0 - mu).ln()).ln(),
}
}
pub fn inverse(&self, eta: f64) -> f64 {
match self {
LinkFunction::Identity => eta,
LinkFunction::Log => eta.exp(),
LinkFunction::Logit => 1.0 / (1.0 + (-eta).exp()),
LinkFunction::Probit => normal_cdf(eta),
LinkFunction::Inverse => 1.0 / eta,
LinkFunction::Sqrt => eta.powi(2),
LinkFunction::Cloglog => 1.0 - (-eta.exp()).exp(),
}
}
pub fn inverse_derivative(&self, eta: f64) -> f64 {
match self {
LinkFunction::Identity => 1.0,
LinkFunction::Log => eta.exp(),
LinkFunction::Logit => {
let p = 1.0 / (1.0 + (-eta).exp());
p * (1.0 - p)
}
LinkFunction::Probit => normal_pdf(eta),
LinkFunction::Inverse => -1.0 / (eta * eta),
LinkFunction::Sqrt => 2.0 * eta,
LinkFunction::Cloglog => {
let exp_eta = eta.exp();
exp_eta * (-exp_eta).exp()
}
}
}
}
fn ll_normal(y: &Col<f64>, mu: &Col<f64>, scale: f64) -> f64 {
let n = y.nrows();
let sigma2 = scale * scale;
let rss: f64 = (0..n).map(|i| (y[i] - mu[i]).powi(2)).sum();
-0.5 * n as f64 * (2.0 * PI * sigma2).ln() - rss / (2.0 * sigma2)
}
fn ll_laplace(y: &Col<f64>, mu: &Col<f64>, scale: f64) -> f64 {
let n = y.nrows();
let sad: f64 = (0..n).map(|i| (y[i] - mu[i]).abs()).sum();
-(n as f64) * (2.0 * scale).ln() - sad / scale
}
fn ll_student_t(y: &Col<f64>, mu: &Col<f64>, scale: f64, df: f64) -> f64 {
let n = y.nrows();
(0..n)
.map(|i| {
let z = (y[i] - mu[i]) / scale;
ln_gamma((df + 1.0) / 2.0)
- ln_gamma(df / 2.0)
- 0.5 * (PI * df).ln()
- scale.ln()
- ((df + 1.0) / 2.0) * (1.0 + z * z / df).ln()
})
.sum()
}
fn ll_logistic(y: &Col<f64>, mu: &Col<f64>, scale: f64) -> f64 {
let n = y.nrows();
(0..n)
.map(|i| {
let z = (y[i] - mu[i]) / scale;
-z - 2.0 * (1.0 + (-z).exp()).ln() - scale.ln()
})
.sum()
}
fn ll_asymmetric_laplace(y: &Col<f64>, mu: &Col<f64>, scale: f64, alpha: f64) -> f64 {
let n = y.nrows();
(0..n)
.map(|i| {
let e = y[i] - mu[i];
let rho = if e >= 0.0 {
alpha * e
} else {
(alpha - 1.0) * e
};
(alpha * (1.0 - alpha)).ln() - scale.ln() - rho / scale
})
.sum()
}
fn ll_generalised_normal(y: &Col<f64>, mu: &Col<f64>, scale: f64, shape: f64) -> f64 {
let n = y.nrows();
(0..n)
.map(|i| {
let z = ((y[i] - mu[i]) / scale).abs();
(shape / (2.0 * scale)).ln() - ln_gamma(1.0 / shape) - z.powf(shape)
})
.sum()
}
fn ll_s_distribution(y: &Col<f64>, mu: &Col<f64>, scale: f64) -> f64 {
let n = y.nrows();
let scale_sq = scale * scale;
(0..n)
.map(|i| {
let abs_resid = (y[i] - mu[i]).abs();
-(4.0 * scale_sq).ln() - abs_resid.sqrt() / scale
})
.sum()
}
fn ll_log_normal(y: &Col<f64>, mu: &Col<f64>, scale: f64) -> f64 {
let n = y.nrows();
let sigma2 = scale * scale;
let mut ll = 0.0;
for i in 0..n {
if y[i] <= 0.0 || mu[i] <= 0.0 {
return f64::NEG_INFINITY;
}
let log_y = y[i].ln();
let log_mu = mu[i].ln();
ll +=
-log_y - scale.ln() - 0.5 * (2.0 * PI).ln() - (log_y - log_mu).powi(2) / (2.0 * sigma2);
}
ll
}
fn ll_log_laplace(y: &Col<f64>, mu: &Col<f64>, scale: f64) -> f64 {
let n = y.nrows();
let mut ll = 0.0;
for i in 0..n {
if y[i] <= 0.0 || mu[i] <= 0.0 {
return f64::NEG_INFINITY;
}
ll += -(2.0 * scale).ln() - y[i].ln() - (y[i].ln() - mu[i].ln()).abs() / scale;
}
ll
}
fn ll_log_s(y: &Col<f64>, mu: &Col<f64>, scale: f64) -> f64 {
let n = y.nrows();
let mut ll = 0.0;
for i in 0..n {
if y[i] <= 0.0 || mu[i] <= 0.0 {
return f64::NEG_INFINITY;
}
let z = (y[i].ln() - mu[i].ln()).abs() / scale;
ll += (0.25 / scale).ln() - y[i].ln() - z.sqrt();
}
ll
}
fn ll_log_generalised_normal(y: &Col<f64>, mu: &Col<f64>, scale: f64, shape: f64) -> f64 {
let n = y.nrows();
let mut ll = 0.0;
for i in 0..n {
if y[i] <= 0.0 || mu[i] <= 0.0 {
return f64::NEG_INFINITY;
}
let z = (y[i].ln() - mu[i].ln()).abs() / scale;
ll += (shape / (2.0 * scale)).ln() - ln_gamma(1.0 / shape) - y[i].ln() - z.powf(shape);
}
ll
}
fn ll_folded_normal(y: &Col<f64>, mu: &Col<f64>, scale: f64) -> f64 {
let n = y.nrows();
let mut ll = 0.0;
for i in 0..n {
if y[i] < 0.0 {
return f64::NEG_INFINITY;
}
let z1 = (y[i] - mu[i]) / scale;
let z2 = (y[i] + mu[i]) / scale;
let pdf = ((-0.5 * z1 * z1).exp() + (-0.5 * z2 * z2).exp()) / (scale * (2.0 * PI).sqrt());
if pdf <= 0.0 {
return f64::NEG_INFINITY;
}
ll += pdf.ln();
}
ll
}
fn ll_rectified_normal(y: &Col<f64>, mu: &Col<f64>, scale: f64) -> f64 {
let n = y.nrows();
let mut ll = 0.0;
for i in 0..n {
if y[i] > 0.0 {
let z = (y[i] - mu[i]) / scale;
ll += -0.5 * (2.0 * PI).ln() - scale.ln() - 0.5 * z * z;
} else if y[i] == 0.0 {
let z = -mu[i] / scale;
ll += normal_cdf(z).ln();
} else {
return f64::NEG_INFINITY;
}
}
ll
}
fn ll_box_cox_normal(y: &Col<f64>, mu: &Col<f64>, scale: f64, lambda: f64) -> f64 {
let n = y.nrows();
let sigma2 = scale * scale;
let mut ll = 0.0;
for i in 0..n {
if y[i] <= 0.0 {
return f64::NEG_INFINITY;
}
let y_trans = if lambda.abs() < 1e-10 {
y[i].ln()
} else {
(y[i].powf(lambda) - 1.0) / lambda
};
let mu_trans = if lambda.abs() < 1e-10 {
mu[i].ln()
} else {
(mu[i].powf(lambda) - 1.0) / lambda
};
ll += -0.5 * (2.0 * PI * sigma2).ln() - (y_trans - mu_trans).powi(2) / (2.0 * sigma2)
+ (lambda - 1.0) * y[i].ln();
}
ll
}
fn ll_gamma(y: &Col<f64>, mu: &Col<f64>, shape: f64) -> f64 {
let n = y.nrows();
let mut ll = 0.0;
for i in 0..n {
if y[i] <= 0.0 || mu[i] <= 0.0 {
return f64::NEG_INFINITY;
}
let rate = shape / mu[i];
ll += shape * rate.ln() + (shape - 1.0) * y[i].ln() - rate * y[i] - ln_gamma(shape);
}
ll
}
fn ll_inverse_gaussian(y: &Col<f64>, mu: &Col<f64>, lambda: f64) -> f64 {
let n = y.nrows();
let mut ll = 0.0;
for i in 0..n {
if y[i] <= 0.0 || mu[i] <= 0.0 {
return f64::NEG_INFINITY;
}
ll += 0.5 * (lambda / (2.0 * PI * y[i].powi(3))).ln()
- lambda * (y[i] - mu[i]).powi(2) / (2.0 * mu[i].powi(2) * y[i]);
}
ll
}
fn ll_exponential(y: &Col<f64>, mu: &Col<f64>) -> f64 {
let n = y.nrows();
let mut ll = 0.0;
for i in 0..n {
if y[i] < 0.0 || mu[i] <= 0.0 {
return f64::NEG_INFINITY;
}
ll += -mu[i].ln() - y[i] / mu[i];
}
ll
}
fn ll_beta(y: &Col<f64>, mu: &Col<f64>, phi: f64) -> f64 {
let n = y.nrows();
let mut ll = 0.0;
for i in 0..n {
if y[i] <= 0.0 || y[i] >= 1.0 || mu[i] <= 0.0 || mu[i] >= 1.0 {
return f64::NEG_INFINITY;
}
let alpha = mu[i] * phi;
let beta_param = (1.0 - mu[i]) * phi;
ll += ln_gamma(phi) - ln_gamma(alpha) - ln_gamma(beta_param)
+ (alpha - 1.0) * y[i].ln()
+ (beta_param - 1.0) * (1.0 - y[i]).ln();
}
ll
}
fn ll_logit_normal(y: &Col<f64>, mu: &Col<f64>, scale: f64) -> f64 {
let n = y.nrows();
let sigma2 = scale * scale;
let mut ll = 0.0;
for i in 0..n {
if y[i] <= 0.0 || y[i] >= 1.0 {
return f64::NEG_INFINITY;
}
let logit_y = (y[i] / (1.0 - y[i])).ln();
ll += -0.5 * (2.0 * PI * sigma2).ln()
- (logit_y - mu[i]).powi(2) / (2.0 * sigma2)
- y[i].ln()
- (1.0 - y[i]).ln();
}
ll
}
fn ll_poisson(y: &Col<f64>, mu: &Col<f64>) -> f64 {
let n = y.nrows();
(0..n)
.map(|i| {
let yi = y[i].round().max(0.0);
let mui = mu[i].max(1e-10);
yi * mui.ln() - mui - ln_gamma(yi + 1.0)
})
.sum()
}
fn ll_negative_binomial(y: &Col<f64>, mu: &Col<f64>, size: f64) -> f64 {
let n = y.nrows();
(0..n)
.map(|i| {
let yi = y[i].round().max(0.0);
let mui = mu[i].max(1e-10);
let p = size / (size + mui);
ln_gamma(yi + size) - ln_gamma(size) - ln_gamma(yi + 1.0)
+ size * p.ln()
+ yi * (1.0 - p).ln()
})
.sum()
}
fn ll_binomial(y: &Col<f64>, mu: &Col<f64>, n_trials: f64) -> f64 {
let n = y.nrows();
(0..n)
.map(|i| {
let p = mu[i].clamp(1e-10, 1.0 - 1e-10);
let k = (y[i] * n_trials).round().max(0.0).min(n_trials);
ln_gamma(n_trials + 1.0) - ln_gamma(k + 1.0) - ln_gamma(n_trials - k + 1.0)
+ k * p.ln()
+ (n_trials - k) * (1.0 - p).ln()
})
.sum()
}
fn ll_geometric(y: &Col<f64>, mu: &Col<f64>) -> f64 {
let n = y.nrows();
(0..n)
.map(|i| {
let lambda = mu[i].max(1e-10); let p = 1.0 / (1.0 + lambda); let k = y[i].round().max(0.0); p.ln() + k * (1.0 - p).ln()
})
.sum()
}
fn ll_cumulative_logistic(y: &Col<f64>, mu: &Col<f64>) -> f64 {
let n = y.nrows();
(0..n)
.map(|i| {
let p = mu[i].clamp(1e-10, 1.0 - 1e-10);
let yi = y[i];
yi * p.ln() + (1.0 - yi) * (1.0 - p).ln()
})
.sum()
}
fn ll_cumulative_normal(y: &Col<f64>, mu: &Col<f64>) -> f64 {
let n = y.nrows();
(0..n)
.map(|i| {
let p = mu[i].clamp(1e-10, 1.0 - 1e-10);
let yi = y[i];
yi * p.ln() + (1.0 - yi) * (1.0 - p).ln()
})
.sum()
}
pub fn log_likelihood(
y: &Col<f64>,
mu: &Col<f64>,
distribution: AlmDistribution,
scale: f64,
extra: Option<f64>,
) -> f64 {
match distribution {
AlmDistribution::Normal => ll_normal(y, mu, scale),
AlmDistribution::Laplace => ll_laplace(y, mu, scale),
AlmDistribution::StudentT => ll_student_t(y, mu, scale, extra.unwrap_or(5.0)),
AlmDistribution::Logistic => ll_logistic(y, mu, scale),
AlmDistribution::AsymmetricLaplace => {
ll_asymmetric_laplace(y, mu, scale, extra.unwrap_or(0.5))
}
AlmDistribution::GeneralisedNormal => {
ll_generalised_normal(y, mu, scale, extra.unwrap_or(2.0))
}
AlmDistribution::S => ll_s_distribution(y, mu, scale),
AlmDistribution::LogNormal => ll_log_normal(y, mu, scale),
AlmDistribution::LogLaplace => ll_log_laplace(y, mu, scale),
AlmDistribution::LogS => ll_log_s(y, mu, scale),
AlmDistribution::LogGeneralisedNormal => {
ll_log_generalised_normal(y, mu, scale, extra.unwrap_or(2.0))
}
AlmDistribution::FoldedNormal => ll_folded_normal(y, mu, scale),
AlmDistribution::RectifiedNormal => ll_rectified_normal(y, mu, scale),
AlmDistribution::BoxCoxNormal => ll_box_cox_normal(y, mu, scale, extra.unwrap_or(1.0)),
AlmDistribution::Gamma => ll_gamma(y, mu, extra.unwrap_or(1.0)),
AlmDistribution::InverseGaussian => ll_inverse_gaussian(y, mu, extra.unwrap_or(1.0)),
AlmDistribution::Exponential => ll_exponential(y, mu),
AlmDistribution::Beta => ll_beta(y, mu, scale), AlmDistribution::LogitNormal => ll_logit_normal(y, mu, scale),
AlmDistribution::Poisson => ll_poisson(y, mu),
AlmDistribution::NegativeBinomial => ll_negative_binomial(y, mu, extra.unwrap_or(1.0)),
AlmDistribution::Binomial => ll_binomial(y, mu, extra.unwrap_or(1.0)),
AlmDistribution::Geometric => ll_geometric(y, mu),
AlmDistribution::CumulativeLogistic => ll_cumulative_logistic(y, mu),
AlmDistribution::CumulativeNormal => ll_cumulative_normal(y, mu),
}
}
#[derive(Clone)]
struct NegLogLikelihoodCost<'a> {
x: &'a Mat<f64>,
y: &'a Col<f64>,
distribution: AlmDistribution,
link: LinkFunction,
extra_parameter: Option<f64>,
with_intercept: bool,
}
impl<'a> NegLogLikelihoodCost<'a> {
fn compute_mu_from_params(&self, params: &[f64]) -> Col<f64> {
let n = self.x.nrows();
let p = self.x.ncols();
let mut eta = Col::zeros(n);
if self.with_intercept {
let intercept = params[0];
for i in 0..n {
eta[i] = intercept;
for j in 0..p {
eta[i] += self.x[(i, j)] * params[j + 1];
}
}
} else {
for i in 0..n {
for (j, ¶m_j) in params.iter().enumerate().take(p) {
eta[i] += self.x[(i, j)] * param_j;
}
}
}
let mut mu = Col::zeros(n);
for i in 0..n {
mu[i] = self.link.inverse(eta[i]);
match self.distribution {
AlmDistribution::Poisson
| AlmDistribution::NegativeBinomial
| AlmDistribution::Gamma
| AlmDistribution::InverseGaussian
| AlmDistribution::Exponential
| AlmDistribution::FoldedNormal
| AlmDistribution::LogNormal
| AlmDistribution::LogLaplace
| AlmDistribution::LogS
| AlmDistribution::LogGeneralisedNormal => {
mu[i] = mu[i].max(1e-10);
}
AlmDistribution::Beta | AlmDistribution::Binomial => {
mu[i] = mu[i].clamp(1e-10, 1.0 - 1e-10);
}
AlmDistribution::Geometric => {
mu[i] = mu[i].max(1e-10);
}
AlmDistribution::RectifiedNormal => {
}
_ => {}
}
}
mu
}
fn estimate_scale_from_mu(&self, mu: &Col<f64>) -> f64 {
let n = self.y.nrows();
let p = if self.with_intercept {
self.x.ncols() + 1
} else {
self.x.ncols()
};
let df = (n - p) as f64;
estimate_scale(
self.y,
mu,
self.distribution,
df.max(1.0),
self.extra_parameter,
)
}
}
impl CostFunction for NegLogLikelihoodCost<'_> {
type Param = Vec<f64>;
type Output = f64;
fn cost(&self, params: &Self::Param) -> Result<Self::Output, argmin::core::Error> {
let mu = self.compute_mu_from_params(params);
let scale = self.estimate_scale_from_mu(&mu);
let ll = log_likelihood(self.y, &mu, self.distribution, scale, self.extra_parameter);
if ll.is_finite() {
Ok(-ll)
} else {
Ok(1e20) }
}
}
impl Gradient for NegLogLikelihoodCost<'_> {
type Param = Vec<f64>;
type Gradient = Vec<f64>;
fn gradient(&self, params: &Self::Param) -> Result<Self::Gradient, argmin::core::Error> {
let eps = 1e-7;
let mut grad = vec![0.0; params.len()];
for i in 0..params.len() {
let mut params_plus = params.clone();
let mut params_minus = params.clone();
params_plus[i] += eps;
params_minus[i] -= eps;
let cost_plus = self.cost(¶ms_plus)?;
let cost_minus = self.cost(¶ms_minus)?;
grad[i] = (cost_plus - cost_minus) / (2.0 * eps);
}
Ok(grad)
}
}
pub fn estimate_scale(
y: &Col<f64>,
mu: &Col<f64>,
distribution: AlmDistribution,
df: f64,
extra: Option<f64>,
) -> f64 {
let n = y.nrows();
match distribution {
AlmDistribution::Normal | AlmDistribution::RectifiedNormal => {
let rss: f64 = (0..n).map(|i| (y[i] - mu[i]).powi(2)).sum();
(rss / df).sqrt()
}
AlmDistribution::FoldedNormal => {
let mean_y_sq: f64 = (0..n).map(|i| y[i] * y[i]).sum::<f64>() / n as f64;
let mean_mu_sq: f64 = (0..n).map(|i| mu[i] * mu[i]).sum::<f64>() / n as f64;
let sigma_sq = (mean_y_sq - mean_mu_sq).max(0.01);
sigma_sq.sqrt()
}
AlmDistribution::Laplace => {
let sad: f64 = (0..n).map(|i| (y[i] - mu[i]).abs()).sum();
sad / n as f64
}
AlmDistribution::LogLaplace => {
let sad: f64 = (0..n)
.filter(|&i| y[i] > 0.0 && mu[i] > 0.0)
.map(|i| (y[i].ln() - mu[i].ln()).abs())
.sum();
sad / n as f64
}
AlmDistribution::StudentT | AlmDistribution::Logistic => {
let mut abs_residuals: Vec<f64> = (0..n).map(|i| (y[i] - mu[i]).abs()).collect();
abs_residuals.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let median_idx = n / 2;
let mad = abs_residuals[median_idx];
mad / 0.6745 }
AlmDistribution::LogNormal => {
let rss: f64 = (0..n)
.filter(|&i| y[i] > 0.0 && mu[i] > 0.0)
.map(|i| (y[i].ln() - mu[i].ln()).powi(2))
.sum();
(rss / df).sqrt()
}
AlmDistribution::LogitNormal => {
let rss: f64 = (0..n)
.filter(|&i| y[i] > 0.0 && y[i] < 1.0)
.map(|i| {
let yi = y[i].clamp(0.001, 0.999);
let logit_y = (yi / (1.0 - yi)).ln();
(logit_y - mu[i]).powi(2)
})
.sum();
(rss / df).sqrt()
}
AlmDistribution::S => {
let ham: f64 = (0..n).map(|i| (y[i] - mu[i]).abs().sqrt()).sum();
(ham / (2.0 * n as f64)).max(1e-10)
}
AlmDistribution::LogS => {
let ham: f64 = (0..n)
.filter(|&i| y[i] > 0.0 && mu[i] > 0.0)
.map(|i| (y[i].ln() - mu[i].ln()).abs().sqrt())
.sum();
(ham / (2.0 * n as f64)).max(1e-10)
}
AlmDistribution::AsymmetricLaplace => {
let alpha = extra.unwrap_or(0.5);
let weighted_sum: f64 = (0..n)
.map(|i| {
let e = y[i] - mu[i];
if e >= 0.0 {
alpha * e.abs()
} else {
(1.0 - alpha) * e.abs()
}
})
.sum();
(weighted_sum / n as f64).max(1e-10)
}
AlmDistribution::GeneralisedNormal | AlmDistribution::BoxCoxNormal => {
let mut abs_residuals: Vec<f64> = (0..n).map(|i| (y[i] - mu[i]).abs()).collect();
abs_residuals.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
abs_residuals[n / 2] / 0.6745
}
AlmDistribution::LogGeneralisedNormal => {
let mut abs_residuals: Vec<f64> = (0..n)
.filter(|&i| y[i] > 0.0 && mu[i] > 0.0)
.map(|i| (y[i].ln() - mu[i].ln()).abs())
.collect();
abs_residuals.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
if abs_residuals.is_empty() {
1.0
} else {
abs_residuals[abs_residuals.len() / 2] / 0.6745
}
}
AlmDistribution::Beta => {
let var_y: f64 = (0..n)
.map(|i| {
let resid = y[i] - mu[i];
resid * resid
})
.sum::<f64>()
/ df.max(1.0);
let mean_mu_var: f64 = (0..n)
.map(|i| {
let mi = mu[i].clamp(0.01, 0.99);
mi * (1.0 - mi)
})
.sum::<f64>()
/ n as f64;
(mean_mu_var / var_y.max(1e-10) - 1.0).max(1.0)
}
_ => 1.0,
}
}
fn compute_mse(y: &Col<f64>, mu: &Col<f64>) -> f64 {
let n = y.nrows();
if n == 0 {
return 0.0;
}
let sum: f64 = (0..n).map(|i| (y[i] - mu[i]).powi(2)).sum();
sum / n as f64
}
fn compute_mae(y: &Col<f64>, mu: &Col<f64>) -> f64 {
let n = y.nrows();
if n == 0 {
return 0.0;
}
let sum: f64 = (0..n).map(|i| (y[i] - mu[i]).abs()).sum();
sum / n as f64
}
fn compute_ham(y: &Col<f64>, mu: &Col<f64>) -> f64 {
let n = y.nrows();
if n == 0 {
return 0.0;
}
let sum: f64 = (0..n).map(|i| (y[i] - mu[i]).abs().sqrt()).sum();
sum / n as f64
}
fn compute_role(
y: &Col<f64>,
mu: &Col<f64>,
distribution: AlmDistribution,
scale: f64,
extra: Option<f64>,
trim: f64,
) -> f64 {
let n = y.nrows();
if n == 0 {
return 0.0;
}
let mut point_lls: Vec<f64> = (0..n)
.map(|i| {
let yi = Col::from_fn(1, |_| y[i]);
let mui = Col::from_fn(1, |_| mu[i]);
log_likelihood(&yi, &mui, distribution, scale, extra)
})
.collect();
point_lls.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let n_trim = ((n as f64 * trim).ceil() as usize).min(n - 1);
point_lls.iter().skip(n_trim).sum()
}
pub fn compute_loss(
y: &Col<f64>,
mu: &Col<f64>,
loss: AlmLoss,
distribution: AlmDistribution,
scale: f64,
extra: Option<f64>,
) -> f64 {
match loss {
AlmLoss::Likelihood => {
-log_likelihood(y, mu, distribution, scale, extra)
}
AlmLoss::MSE => compute_mse(y, mu),
AlmLoss::MAE => compute_mae(y, mu),
AlmLoss::HAM => compute_ham(y, mu),
AlmLoss::ROLE { trim } => {
-compute_role(y, mu, distribution, scale, extra, trim)
}
}
}
fn normal_cdf(x: f64) -> f64 {
0.5 * (1.0 + erf(x / std::f64::consts::SQRT_2))
}
fn normal_pdf(x: f64) -> f64 {
(-0.5 * x * x).exp() / (2.0 * PI).sqrt()
}
#[allow(clippy::excessive_precision)]
fn probit(p: f64) -> f64 {
if p <= 0.0 {
return f64::NEG_INFINITY;
}
if p >= 1.0 {
return f64::INFINITY;
}
let a = [
-3.969683028665376e1,
2.209460984245205e2,
-2.759285104469687e2,
1.383577518672690e2,
-3.066479806614716e1,
2.506628277459239e0,
];
let b = [
-5.447609879822406e1,
1.615858368580409e2,
-1.556989798598866e2,
6.680131188771972e1,
-1.328068155288572e+01,
];
let c = [
-7.784894002430293e-03,
-3.223964580411365e-01,
-2.400758277161838e+00,
-2.549732539343734e+00,
4.374664141464968e+00,
2.938163982698783e+00,
];
let d = [
7.784695709041462e-03,
3.224671290700398e-01,
2.445134137142996e+00,
3.754408661907416e+00,
];
let p_low = 0.02425;
let p_high = 1.0 - p_low;
if p < p_low {
let q = (-2.0 * p.ln()).sqrt();
(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5])
/ ((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1.0)
} else if p <= p_high {
let q = p - 0.5;
let r = q * q;
(((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * q
/ (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1.0)
} else {
let q = (-2.0 * (1.0 - p).ln()).sqrt();
-(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5])
/ ((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1.0)
}
}
fn erf(x: f64) -> f64 {
let a1 = 0.254829592;
let a2 = -0.284496736;
let a3 = 1.421413741;
let a4 = -1.453152027;
let a5 = 1.061405429;
let p = 0.3275911;
let sign = if x < 0.0 { -1.0 } else { 1.0 };
let x = x.abs();
let t = 1.0 / (1.0 + p * x);
let y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * (-x * x).exp();
sign * y
}
#[derive(Debug, Clone)]
pub struct AlmRegressor {
options: RegressionOptions,
distribution: AlmDistribution,
link: LinkFunction,
extra_parameter: Option<f64>,
loss: AlmLoss,
max_iterations: usize,
tolerance: f64,
}
impl AlmRegressor {
pub fn new(
options: RegressionOptions,
distribution: AlmDistribution,
link: Option<LinkFunction>,
extra_parameter: Option<f64>,
) -> Self {
let link = link.unwrap_or_else(|| distribution.canonical_link());
Self {
options,
distribution,
link,
extra_parameter,
loss: AlmLoss::default(),
max_iterations: 100,
tolerance: 1e-8,
}
}
pub fn loss(&self) -> AlmLoss {
self.loss
}
pub fn builder() -> AlmRegressorBuilder {
AlmRegressorBuilder::default()
}
pub fn distribution(&self) -> AlmDistribution {
self.distribution
}
pub fn link(&self) -> LinkFunction {
self.link
}
fn fit_irls(&self, x: &Mat<f64>, y: &Col<f64>) -> Result<FittedAlm, RegressionError> {
let n = x.nrows();
let p = x.ncols();
let n_params = if self.options.with_intercept {
p + 1
} else {
p
};
let mut beta = self.initialize_coefficients(x, y)?;
let mut eta = self.compute_eta(x, &beta);
let mut mu = self.compute_mu(&eta);
let df = (n - n_params) as f64;
let mut scale =
estimate_scale(y, &mu, self.distribution, df.max(1.0), self.extra_parameter);
let mut prev_loss = compute_loss(
y,
&mu,
self.loss,
self.distribution,
scale,
self.extra_parameter,
);
for iter in 0..self.max_iterations {
let (weights, z) = self.compute_irls_components(y, &mu, &eta);
beta = self.weighted_ls_step(x, &z, &weights, &beta)?;
eta = self.compute_eta(x, &beta);
mu = self.compute_mu(&eta);
scale = estimate_scale(y, &mu, self.distribution, df.max(1.0), self.extra_parameter);
let loss = compute_loss(
y,
&mu,
self.loss,
self.distribution,
scale,
self.extra_parameter,
);
if (loss - prev_loss).abs() < self.tolerance * (1.0 + prev_loss.abs()) {
break;
}
if iter == self.max_iterations - 1 {
}
prev_loss = loss;
}
self.build_result(x, y, &beta, &mu, scale)
}
fn fit_numerical(&self, x: &Mat<f64>, y: &Col<f64>) -> Result<FittedAlm, RegressionError> {
let n = x.nrows();
let p = x.ncols();
let n_params = if self.options.with_intercept {
p + 1
} else {
p
};
let cost = NegLogLikelihoodCost {
x,
y,
distribution: self.distribution,
link: self.link,
extra_parameter: self.extra_parameter,
with_intercept: self.options.with_intercept,
};
let mut best_params: Option<Vec<f64>> = None;
let mut best_cost = f64::INFINITY;
let starting_points = self.generate_starting_points(x, y, n_params)?;
for init_params in starting_points {
let linesearch = MoreThuenteLineSearch::new();
let solver = LBFGS::new(linesearch, 7);
let result = Executor::new(cost.clone(), solver)
.configure(|state| {
state
.param(init_params.clone())
.max_iters(self.max_iterations as u64)
.target_cost(f64::NEG_INFINITY)
})
.run();
if let Ok(res) = result {
if let Some(params) = res.state().get_best_param() {
if let Ok(cost_val) = cost.cost(params) {
if cost_val < best_cost {
best_cost = cost_val;
best_params = Some(params.clone());
}
}
}
}
}
let final_params = best_params.unwrap_or_else(|| {
let init_beta = self
.initialize_coefficients(x, y)
.unwrap_or_else(|_| Col::zeros(n_params));
(0..n_params).map(|i| init_beta[i]).collect()
});
let beta = Col::from_fn(n_params, |i| final_params[i]);
let eta = self.compute_eta(x, &beta);
let mu = self.compute_mu(&eta);
let df = (n - n_params) as f64;
let scale = estimate_scale(y, &mu, self.distribution, df.max(1.0), self.extra_parameter);
self.build_result(x, y, &beta, &mu, scale)
}
fn generate_starting_points(
&self,
x: &Mat<f64>,
y: &Col<f64>,
n_params: usize,
) -> Result<Vec<Vec<f64>>, RegressionError> {
let mut starting_points = Vec::new();
let ols_beta = self.initialize_coefficients(x, y)?;
let ols_params: Vec<f64> = (0..n_params).map(|i| ols_beta[i]).collect();
starting_points.push(ols_params.clone());
let scaled_params: Vec<f64> = ols_params.iter().map(|&x| x * 0.1).collect();
starting_points.push(scaled_params);
let near_zero: Vec<f64> = vec![0.01; n_params];
starting_points.push(near_zero);
match self.distribution {
AlmDistribution::FoldedNormal => {
let n = x.nrows();
let p = x.ncols();
let y_mean: f64 = (0..n).map(|i| y[i]).sum::<f64>() / n as f64;
let mut fn_start1 = vec![0.0; n_params];
if self.options.with_intercept && !fn_start1.is_empty() {
fn_start1[0] = 0.1 * y_mean;
if p > 0 && fn_start1.len() > 1 {
fn_start1[1] = 0.05;
}
}
starting_points.push(fn_start1);
let mut fn_start2 = vec![0.0; n_params];
if self.options.with_intercept && !fn_start2.is_empty() {
fn_start2[0] = 0.5;
if p > 0 && fn_start2.len() > 1 {
fn_start2[1] = 0.05;
}
}
starting_points.push(fn_start2);
}
AlmDistribution::RectifiedNormal => {
let mut small_start = ols_params.clone();
if self.options.with_intercept && !small_start.is_empty() {
small_start[0] = 0.0;
}
starting_points.push(small_start);
}
AlmDistribution::Beta => {
let n = x.nrows();
let p = x.ncols();
let y_mean: f64 = (0..n).map(|i| y[i]).sum::<f64>() / n as f64;
let logit_mean = (y_mean.clamp(0.01, 0.99) / (1.0 - y_mean.clamp(0.01, 0.99))).ln();
let mut beta_start = vec![0.0; n_params];
if self.options.with_intercept && !beta_start.is_empty() {
beta_start[0] = logit_mean;
}
starting_points.push(beta_start);
let mut beta_start2 = vec![0.0; n_params];
if self.options.with_intercept && !beta_start2.is_empty() {
beta_start2[0] = 1.0; if p > 0 && beta_start2.len() > 1 {
beta_start2[1] = 0.5; }
}
starting_points.push(beta_start2);
let y_logit: Col<f64> = Col::from_fn(n, |i| {
let yi = y[i].clamp(0.01, 0.99);
(yi / (1.0 - yi)).ln()
});
if let Ok(logit_beta) = self.initialize_coefficients_for_y(x, &y_logit) {
let logit_params: Vec<f64> = (0..n_params).map(|i| logit_beta[i]).collect();
starting_points.push(logit_params);
}
}
AlmDistribution::S | AlmDistribution::LogS => {
let n = x.nrows();
let p = x.ncols();
let mut neg_start = ols_params.clone();
if self.options.with_intercept && !neg_start.is_empty() {
if p > 0 && neg_start.len() > 1 {
let x_mean: f64 = (0..n).map(|i| x[(i, 0)]).sum::<f64>() / n as f64;
neg_start[0] = -neg_start[1].abs() * x_mean * 0.1;
}
}
starting_points.push(neg_start);
let mut neg_start2 = ols_params.clone();
if self.options.with_intercept && !neg_start2.is_empty() {
neg_start2[0] = -2.0;
}
starting_points.push(neg_start2);
}
AlmDistribution::BoxCoxNormal => {
let lambda = self.extra_parameter.unwrap_or(1.0);
let n = x.nrows();
let y_trans: Col<f64> = Col::from_fn(n, |i| {
if lambda.abs() < 1e-10 {
y[i].ln()
} else {
(y[i].powf(lambda) - 1.0) / lambda
}
});
if let Ok(trans_beta) = self.initialize_coefficients_for_y(x, &y_trans) {
let trans_params: Vec<f64> = (0..n_params).map(|i| trans_beta[i]).collect();
starting_points.push(trans_params);
}
}
_ => {}
}
Ok(starting_points)
}
fn initialize_coefficients_for_y(
&self,
x: &Mat<f64>,
y: &Col<f64>,
) -> Result<Col<f64>, RegressionError> {
let n = x.nrows();
let p = x.ncols();
if self.options.with_intercept {
let mut x_aug = Mat::zeros(n, p + 1);
for i in 0..n {
x_aug[(i, 0)] = 1.0;
for j in 0..p {
x_aug[(i, j + 1)] = x[(i, j)];
}
}
let qr = x_aug.col_piv_qr();
let perm = qr.P();
let perm_arr = perm.arrays().0;
let q = qr.compute_Q();
let r = qr.R();
let ncols = p + 1;
let qty = q.transpose() * y;
let mut beta_perm = Col::zeros(ncols);
for i in (0..ncols.min(n)).rev() {
let mut sum = qty[i];
for j in (i + 1)..ncols.min(n) {
sum -= r[(i, j)] * beta_perm[j];
}
if r[(i, i)].abs() > 1e-10 {
beta_perm[i] = sum / r[(i, i)];
}
}
let mut beta = Col::zeros(ncols);
for i in 0..ncols {
let orig_col = perm_arr[i];
beta[orig_col] = beta_perm[i];
}
Ok(beta)
} else {
let qr = x.col_piv_qr();
let perm = qr.P();
let perm_arr = perm.arrays().0;
let q = qr.compute_Q();
let r = qr.R();
let qty = q.transpose() * y;
let mut beta_perm = Col::zeros(p);
for i in (0..p.min(n)).rev() {
let mut sum = qty[i];
for j in (i + 1)..p.min(n) {
sum -= r[(i, j)] * beta_perm[j];
}
if r[(i, i)].abs() > 1e-10 {
beta_perm[i] = sum / r[(i, i)];
}
}
let mut beta = Col::zeros(p);
for i in 0..p {
let orig_col = perm_arr[i];
beta[orig_col] = beta_perm[i];
}
Ok(beta)
}
}
fn initialize_coefficients(
&self,
x: &Mat<f64>,
y: &Col<f64>,
) -> Result<Col<f64>, RegressionError> {
let n = x.nrows();
let p = x.ncols();
let y_init: Col<f64> = if matches!(self.distribution, AlmDistribution::LogitNormal) {
Col::from_fn(n, |i| {
let yi = y[i].clamp(0.001, 0.999);
(yi / (1.0 - yi)).ln()
})
} else {
match self.link {
LinkFunction::Identity => y.clone(),
LinkFunction::Log => Col::from_fn(n, |i| if y[i] > 0.0 { y[i].ln() } else { 0.0 }),
LinkFunction::Logit => Col::from_fn(n, |i| {
let yi = y[i].clamp(0.01, 0.99);
(yi / (1.0 - yi)).ln()
}),
LinkFunction::Probit => Col::from_fn(n, |i| probit(y[i].clamp(0.01, 0.99))),
LinkFunction::Inverse => {
Col::from_fn(n, |i| if y[i] != 0.0 { 1.0 / y[i] } else { 1.0 })
}
LinkFunction::Sqrt => Col::from_fn(n, |i| y[i].max(0.0).sqrt()),
LinkFunction::Cloglog => Col::from_fn(n, |i| {
let yi = y[i].clamp(0.01, 0.99);
(-(1.0 - yi).ln()).ln()
}),
}
};
if self.options.with_intercept {
let mut x_aug = Mat::zeros(n, p + 1);
for i in 0..n {
x_aug[(i, 0)] = 1.0;
for j in 0..p {
x_aug[(i, j + 1)] = x[(i, j)];
}
}
let qr = x_aug.col_piv_qr();
let perm = qr.P();
let perm_arr = perm.arrays().0;
let q = qr.compute_Q();
let r = qr.R();
let ncols = p + 1;
let qty = q.transpose() * &y_init;
let mut beta_perm = Col::zeros(ncols);
for i in (0..ncols.min(n)).rev() {
let mut sum = qty[i];
for j in (i + 1)..ncols.min(n) {
sum -= r[(i, j)] * beta_perm[j];
}
if r[(i, i)].abs() > 1e-10 {
beta_perm[i] = sum / r[(i, i)];
}
}
let mut beta = Col::zeros(ncols);
for i in 0..ncols {
let orig_col = perm_arr[i];
beta[orig_col] = beta_perm[i];
}
Ok(beta)
} else {
let qr = x.col_piv_qr();
let perm = qr.P();
let perm_arr = perm.arrays().0;
let q = qr.compute_Q();
let r = qr.R();
let qty = q.transpose() * &y_init;
let mut beta_perm = Col::zeros(p);
for i in (0..p.min(n)).rev() {
let mut sum = qty[i];
for j in (i + 1)..p.min(n) {
sum -= r[(i, j)] * beta_perm[j];
}
if r[(i, i)].abs() > 1e-10 {
beta_perm[i] = sum / r[(i, i)];
}
}
let mut beta = Col::zeros(p);
for i in 0..p {
let orig_col = perm_arr[i];
beta[orig_col] = beta_perm[i];
}
Ok(beta)
}
}
fn compute_eta(&self, x: &Mat<f64>, beta: &Col<f64>) -> Col<f64> {
let n = x.nrows();
let p = x.ncols();
if self.options.with_intercept {
let intercept = beta[0];
let mut eta = Col::zeros(n);
for i in 0..n {
eta[i] = intercept;
for j in 0..p {
eta[i] += x[(i, j)] * beta[j + 1];
}
}
eta
} else {
let mut eta = Col::zeros(n);
for i in 0..n {
for j in 0..p {
eta[i] += x[(i, j)] * beta[j];
}
}
eta
}
}
fn compute_mu(&self, eta: &Col<f64>) -> Col<f64> {
let n = eta.nrows();
let mut mu = Col::zeros(n);
for i in 0..n {
mu[i] = self.link.inverse(eta[i]);
if self.distribution.requires_positive() {
mu[i] = mu[i].max(1e-10);
}
if self.distribution.requires_unit_interval()
&& !matches!(self.distribution, AlmDistribution::LogitNormal)
{
mu[i] = mu[i].clamp(1e-10, 1.0 - 1e-10);
}
}
mu
}
fn compute_irls_components(
&self,
y: &Col<f64>,
mu: &Col<f64>,
eta: &Col<f64>,
) -> (Col<f64>, Col<f64>) {
let n = y.nrows();
let mut weights = Col::zeros(n);
let mut z = Col::zeros(n);
for i in 0..n {
let d_mu = self.link.inverse_derivative(eta[i]).max(1e-10);
let v = self.variance_function(mu[i]);
let weight = match self.distribution {
AlmDistribution::Laplace => {
let abs_resid = (y[i] - mu[i]).abs().max(1e-6);
1.0 / abs_resid
}
AlmDistribution::LogLaplace => {
let abs_resid = (y[i].ln() - mu[i].ln()).abs().max(1e-6);
1.0 / abs_resid
}
AlmDistribution::AsymmetricLaplace => {
let alpha = self.extra_parameter.unwrap_or(0.5);
let resid = y[i] - mu[i];
let abs_resid = resid.abs().max(1e-6);
if resid >= 0.0 {
alpha / abs_resid
} else {
(1.0 - alpha) / abs_resid
}
}
AlmDistribution::StudentT => {
let df = self.extra_parameter.unwrap_or(5.0);
let resid = y[i] - mu[i];
let scale_est = 1.0; let z_sq = (resid / scale_est).powi(2);
(df + 1.0) / (df + z_sq)
}
AlmDistribution::S | AlmDistribution::LogS => {
let abs_resid = (y[i] - mu[i]).abs().max(1e-6);
1.0 / abs_resid
}
AlmDistribution::GeneralisedNormal | AlmDistribution::LogGeneralisedNormal => {
let shape = self.extra_parameter.unwrap_or(2.0);
let abs_resid = (y[i] - mu[i]).abs().max(1e-6);
abs_resid.powf(shape - 2.0).max(1e-10)
}
_ => {
(d_mu * d_mu / v).max(1e-10)
}
};
weights[i] = weight;
let response = match self.distribution {
AlmDistribution::LogitNormal => {
let yi = y[i].clamp(0.001, 0.999);
(yi / (1.0 - yi)).ln() }
AlmDistribution::LogLaplace
| AlmDistribution::LogS
| AlmDistribution::LogGeneralisedNormal => {
y[i].max(1e-10).ln()
}
_ => y[i],
};
z[i] = if matches!(
self.distribution,
AlmDistribution::LogLaplace
| AlmDistribution::LogS
| AlmDistribution::LogGeneralisedNormal
| AlmDistribution::LogitNormal
) {
response } else {
eta[i] + (response - mu[i]) / d_mu
};
}
(weights, z)
}
fn variance_function(&self, mu: f64) -> f64 {
match self.distribution {
AlmDistribution::Normal
| AlmDistribution::Laplace
| AlmDistribution::StudentT
| AlmDistribution::Logistic
| AlmDistribution::AsymmetricLaplace
| AlmDistribution::GeneralisedNormal
| AlmDistribution::S => 1.0,
AlmDistribution::Poisson => mu.max(1e-10),
AlmDistribution::Binomial | AlmDistribution::Geometric => {
let p = mu.clamp(1e-10, 1.0 - 1e-10);
p * (1.0 - p)
}
AlmDistribution::NegativeBinomial => {
let size = self.extra_parameter.unwrap_or(1.0);
mu + mu * mu / size
}
AlmDistribution::Gamma | AlmDistribution::InverseGaussian => mu * mu,
AlmDistribution::Exponential => mu * mu,
AlmDistribution::Beta => {
let phi = self.extra_parameter.unwrap_or(1.0);
let p = mu.clamp(1e-10, 1.0 - 1e-10);
p * (1.0 - p) / (1.0 + phi)
}
AlmDistribution::LogNormal
| AlmDistribution::LogLaplace
| AlmDistribution::LogS
| AlmDistribution::LogGeneralisedNormal
| AlmDistribution::FoldedNormal
| AlmDistribution::RectifiedNormal
| AlmDistribution::BoxCoxNormal
| AlmDistribution::LogitNormal => 1.0,
AlmDistribution::CumulativeLogistic | AlmDistribution::CumulativeNormal => 1.0,
}
}
fn weighted_ls_step(
&self,
x: &Mat<f64>,
z: &Col<f64>,
weights: &Col<f64>,
_prev_beta: &Col<f64>,
) -> Result<Col<f64>, RegressionError> {
let n = x.nrows();
let p = x.ncols();
if self.options.with_intercept {
let ncols = p + 1;
let mut xw = Mat::zeros(n, ncols);
let mut zw = Col::zeros(n);
for i in 0..n {
let w_sqrt = weights[i].sqrt();
xw[(i, 0)] = w_sqrt;
for j in 0..p {
xw[(i, j + 1)] = x[(i, j)] * w_sqrt;
}
zw[i] = z[i] * w_sqrt;
}
let qr = xw.col_piv_qr();
let perm = qr.P();
let perm_arr = perm.arrays().0;
let q = qr.compute_Q();
let r = qr.R();
let qty = q.transpose() * &zw;
let mut beta_perm = Col::zeros(ncols);
for i in (0..ncols.min(n)).rev() {
let mut sum = qty[i];
for j in (i + 1)..ncols.min(n) {
sum -= r[(i, j)] * beta_perm[j];
}
if r[(i, i)].abs() > 1e-10 {
beta_perm[i] = sum / r[(i, i)];
}
}
let mut beta = Col::zeros(ncols);
for i in 0..ncols {
let orig_col = perm_arr[i];
beta[orig_col] = beta_perm[i];
}
Ok(beta)
} else {
let mut xw = Mat::zeros(n, p);
let mut zw = Col::zeros(n);
for i in 0..n {
let w_sqrt = weights[i].sqrt();
for j in 0..p {
xw[(i, j)] = x[(i, j)] * w_sqrt;
}
zw[i] = z[i] * w_sqrt;
}
let qr = xw.col_piv_qr();
let perm = qr.P();
let perm_arr = perm.arrays().0;
let q = qr.compute_Q();
let r = qr.R();
let qty = q.transpose() * &zw;
let mut beta_perm = Col::zeros(p);
for i in (0..p.min(n)).rev() {
let mut sum = qty[i];
for j in (i + 1)..p.min(n) {
sum -= r[(i, j)] * beta_perm[j];
}
if r[(i, i)].abs() > 1e-10 {
beta_perm[i] = sum / r[(i, i)];
}
}
let mut beta = Col::zeros(p);
for i in 0..p {
let orig_col = perm_arr[i];
beta[orig_col] = beta_perm[i];
}
Ok(beta)
}
}
fn build_result(
&self,
x: &Mat<f64>,
y: &Col<f64>,
beta: &Col<f64>,
mu: &Col<f64>,
scale: f64,
) -> Result<FittedAlm, RegressionError> {
let n = x.nrows();
let p = x.ncols();
let (coefficients, intercept) = if self.options.with_intercept {
let intercept = beta[0];
let mut coefs = Col::zeros(p);
for j in 0..p {
coefs[j] = beta[j + 1];
}
(coefs, Some(intercept))
} else {
(beta.clone(), None)
};
let n_params = if self.options.with_intercept {
p + 2 } else {
p + 1 };
let mut residuals = Col::zeros(n);
for i in 0..n {
residuals[i] = y[i] - mu[i];
}
let y_mean: f64 = y.iter().sum::<f64>() / n as f64;
let tss: f64 = y.iter().map(|&yi| (yi - y_mean).powi(2)).sum();
let rss: f64 = residuals.iter().map(|&r| r.powi(2)).sum();
let r_squared = if tss > 0.0 {
(1.0 - rss / tss).clamp(0.0, 1.0)
} else if rss < 1e-10 {
1.0
} else {
0.0
};
let df_total = (n - 1) as f64;
let df_resid = (n - n_params) as f64;
let adj_r_squared = if df_resid > 0.0 && df_total > 0.0 {
1.0 - (1.0 - r_squared) * df_total / df_resid
} else {
f64::NAN
};
let mse = if df_resid > 0.0 {
rss / df_resid
} else {
f64::NAN
};
let rmse = mse.sqrt();
let ll = log_likelihood(y, mu, self.distribution, scale, self.extra_parameter);
let k = n_params as f64;
let aic = if ll.is_finite() {
2.0 * k - 2.0 * ll
} else {
f64::NAN
};
let aicc = if ll.is_finite() && (n as f64 - k - 1.0) > 0.0 {
aic + 2.0 * k * (k + 1.0) / (n as f64 - k - 1.0)
} else {
f64::NAN
};
let bic = if ll.is_finite() {
k * (n as f64).ln() - 2.0 * ll
} else {
f64::NAN
};
let ess = tss - rss;
let df_model = (n_params - if intercept.is_some() { 1 } else { 0 } - 1) as f64; let f_statistic = if df_model > 0.0 && df_resid > 0.0 && mse > 0.0 {
(ess / df_model) / mse
} else {
f64::NAN
};
let f_pvalue = if f_statistic.is_finite() && df_model > 0.0 && df_resid > 0.0 {
let f_dist = FisherSnedecor::new(df_model, df_resid).ok();
f_dist.map_or(f64::NAN, |d| 1.0 - d.cdf(f_statistic))
} else {
f64::NAN
};
let aliased = vec![false; p];
let mut result = RegressionResult::empty(p, n);
result.coefficients = coefficients.clone();
result.intercept = intercept;
result.residuals = residuals;
result.fitted_values = mu.clone();
result.rank = p;
result.n_parameters = n_params;
result.n_observations = n;
result.aliased = aliased;
result.r_squared = r_squared;
result.adj_r_squared = adj_r_squared;
result.mse = mse;
result.rmse = rmse;
result.f_statistic = f_statistic;
result.f_pvalue = f_pvalue;
result.aic = aic;
result.aicc = aicc;
result.bic = bic;
result.log_likelihood = ll;
result.confidence_level = self.options.confidence_level;
if self.options.compute_inference {
self.compute_inference(x, &mut result, scale)?;
}
Ok(FittedAlm {
options: self.options.clone(),
distribution: self.distribution,
link: self.link,
extra_parameter: self.extra_parameter,
loss: self.loss,
scale,
result,
})
}
fn compute_inference(
&self,
x: &Mat<f64>,
result: &mut RegressionResult,
_scale: f64,
) -> Result<(), RegressionError> {
let df = result.residual_df() as f64;
if df <= 0.0 || !result.mse.is_finite() {
return Ok(());
}
if let Some(intercept) = result.intercept {
if let Ok((se, se_int)) =
CoefficientInference::standard_errors_with_intercept(x, result.mse, &result.aliased)
{
let t_stats = CoefficientInference::t_statistics(&result.coefficients, &se);
let p_vals = CoefficientInference::p_values(&t_stats, df);
let (ci_lower, ci_upper) = CoefficientInference::confidence_intervals(
&result.coefficients,
&se,
df,
self.options.confidence_level,
);
result.std_errors = Some(se);
result.t_statistics = Some(t_stats);
result.p_values = Some(p_vals);
result.conf_interval_lower = Some(ci_lower);
result.conf_interval_upper = Some(ci_upper);
let t_int = if se_int > 0.0 {
intercept / se_int
} else {
f64::NAN
};
use statrs::distribution::StudentsT;
let t_dist = StudentsT::new(0.0, 1.0, df).ok();
let p_int = if t_int.is_finite() {
t_dist.map_or(f64::NAN, |d| 2.0 * (1.0 - d.cdf(t_int.abs())))
} else {
f64::NAN
};
let t_crit = t_dist.map_or(f64::NAN, |d| {
d.inverse_cdf(1.0 - (1.0 - self.options.confidence_level) / 2.0)
});
let ci_int = (intercept - t_crit * se_int, intercept + t_crit * se_int);
result.intercept_std_error = Some(se_int);
result.intercept_t_statistic = Some(t_int);
result.intercept_p_value = Some(p_int);
result.intercept_conf_interval = Some(ci_int);
}
} else if let Ok(se) = CoefficientInference::standard_errors(x, result.mse, &result.aliased)
{
let t_stats = CoefficientInference::t_statistics(&result.coefficients, &se);
let p_vals = CoefficientInference::p_values(&t_stats, df);
let (ci_lower, ci_upper) = CoefficientInference::confidence_intervals(
&result.coefficients,
&se,
df,
self.options.confidence_level,
);
result.std_errors = Some(se);
result.t_statistics = Some(t_stats);
result.p_values = Some(p_vals);
result.conf_interval_lower = Some(ci_lower);
result.conf_interval_upper = Some(ci_upper);
}
Ok(())
}
}
impl Regressor for AlmRegressor {
type Fitted = FittedAlm;
fn fit(&self, x: &Mat<f64>, y: &Col<f64>) -> Result<Self::Fitted, RegressionError> {
let n_samples = x.nrows();
let n_features = x.ncols();
if x.nrows() != y.nrows() {
return Err(RegressionError::DimensionMismatch {
x_rows: x.nrows(),
y_len: y.nrows(),
});
}
if n_samples < 2 {
return Err(RegressionError::InsufficientObservations {
needed: 2,
got: n_samples,
});
}
let n_params = if self.options.with_intercept {
n_features + 2 } else {
n_features + 1 };
if n_samples < n_params {
return Err(RegressionError::InsufficientObservations {
needed: n_params,
got: n_samples,
});
}
if self.distribution.requires_positive() {
for i in 0..n_samples {
if y[i] <= 0.0 {
return Err(RegressionError::NumericalError(format!(
"Distribution {:?} requires positive response values",
self.distribution
)));
}
}
}
if self.distribution.requires_unit_interval() {
for i in 0..n_samples {
if y[i] <= 0.0 || y[i] >= 1.0 {
return Err(RegressionError::NumericalError(format!(
"Distribution {:?} requires response values in (0, 1)",
self.distribution
)));
}
}
}
if self.distribution.requires_numerical_optimization() {
self.fit_numerical(x, y)
} else {
self.fit_irls(x, y)
}
}
}
#[derive(Debug, Clone)]
pub struct FittedAlm {
#[allow(dead_code)]
options: RegressionOptions,
distribution: AlmDistribution,
link: LinkFunction,
extra_parameter: Option<f64>,
loss: AlmLoss,
scale: f64,
result: RegressionResult,
}
impl FittedAlm {
pub fn distribution(&self) -> AlmDistribution {
self.distribution
}
pub fn link(&self) -> LinkFunction {
self.link
}
pub fn loss(&self) -> AlmLoss {
self.loss
}
pub fn scale(&self) -> f64 {
self.scale
}
pub fn extra_parameter(&self) -> Option<f64> {
self.extra_parameter
}
}
impl FittedRegressor for FittedAlm {
fn predict(&self, x: &Mat<f64>) -> Col<f64> {
let n = x.nrows();
let p = x.ncols();
let mut predictions = Col::zeros(n);
let intercept = self.result.intercept.unwrap_or(0.0);
for i in 0..n {
let mut eta = intercept;
for j in 0..p {
if !self.result.aliased[j] && !self.result.coefficients[j].is_nan() {
eta += x[(i, j)] * self.result.coefficients[j];
}
}
predictions[i] = self.link.inverse(eta);
}
predictions
}
fn result(&self) -> &RegressionResult {
&self.result
}
fn predict_with_interval(
&self,
x: &Mat<f64>,
interval: Option<IntervalType>,
_level: f64,
) -> PredictionResult {
let predictions = self.predict(x);
match interval {
None => PredictionResult::point_only(predictions),
Some(_) => {
let n = x.nrows();
let mut lower = Col::zeros(n);
let mut upper = Col::zeros(n);
let mut se = Col::zeros(n);
for i in 0..n {
lower[i] = f64::NAN;
upper[i] = f64::NAN;
se[i] = f64::NAN;
}
PredictionResult::with_intervals(predictions, lower, upper, se)
}
}
}
}
#[derive(Debug, Clone)]
pub struct AlmRegressorBuilder {
options_builder: RegressionOptionsBuilder,
distribution: AlmDistribution,
link: Option<LinkFunction>,
extra_parameter: Option<f64>,
loss: AlmLoss,
max_iterations: usize,
tolerance: f64,
}
impl Default for AlmRegressorBuilder {
fn default() -> Self {
Self {
options_builder: RegressionOptionsBuilder::default(),
distribution: AlmDistribution::Normal,
link: None,
extra_parameter: None,
loss: AlmLoss::default(),
max_iterations: 100,
tolerance: 1e-8,
}
}
}
impl AlmRegressorBuilder {
pub fn new() -> Self {
Self::default()
}
pub fn distribution(mut self, dist: AlmDistribution) -> Self {
self.distribution = dist;
self
}
pub fn link(mut self, link: LinkFunction) -> Self {
self.link = Some(link);
self
}
pub fn extra_parameter(mut self, param: f64) -> Self {
self.extra_parameter = Some(param);
self
}
pub fn with_intercept(mut self, include: bool) -> Self {
self.options_builder = self.options_builder.with_intercept(include);
self
}
pub fn compute_inference(mut self, compute: bool) -> Self {
self.options_builder = self.options_builder.compute_inference(compute);
self
}
pub fn confidence_level(mut self, level: f64) -> Self {
self.options_builder = self.options_builder.confidence_level(level);
self
}
pub fn max_iterations(mut self, iters: usize) -> Self {
self.max_iterations = iters;
self
}
pub fn tolerance(mut self, tol: f64) -> Self {
self.tolerance = tol;
self
}
pub fn loss(mut self, loss: AlmLoss) -> Self {
self.loss = loss;
self
}
pub fn role_trim(mut self, trim: f64) -> Self {
self.loss = AlmLoss::role_with_trim(trim);
self
}
pub fn build(self) -> AlmRegressor {
let options = self.options_builder.build_unchecked();
let mut alm =
AlmRegressor::new(options, self.distribution, self.link, self.extra_parameter);
alm.loss = self.loss;
alm.max_iterations = self.max_iterations;
alm.tolerance = self.tolerance;
alm
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_normal_likelihood() {
let y = Col::from_fn(5, |i| i as f64);
let mu = Col::from_fn(5, |i| i as f64);
let ll = log_likelihood(&y, &mu, AlmDistribution::Normal, 1.0, None);
let expected = -2.5 * (2.0 * PI).ln();
assert!((ll - expected).abs() < 1e-10);
}
#[test]
fn test_laplace_likelihood() {
let y = Col::from_fn(5, |i| i as f64);
let mu = Col::from_fn(5, |i| i as f64);
let ll = log_likelihood(&y, &mu, AlmDistribution::Laplace, 1.0, None);
let expected = -5.0 * (2.0_f64).ln();
assert!((ll - expected).abs() < 1e-10);
}
#[test]
fn test_link_functions() {
assert!((LinkFunction::Identity.inverse(2.0) - 2.0).abs() < 1e-10);
assert!((LinkFunction::Log.inverse(0.0) - 1.0).abs() < 1e-10);
assert!((LinkFunction::Log.inverse(1.0) - std::f64::consts::E).abs() < 1e-10);
assert!((LinkFunction::Logit.inverse(0.0) - 0.5).abs() < 1e-10);
}
#[test]
fn test_ll_student_t() {
let y = Col::from_fn(5, |i| i as f64);
let mu = Col::from_fn(5, |i| i as f64);
let ll = log_likelihood(&y, &mu, AlmDistribution::StudentT, 1.0, Some(5.0));
assert!(ll.is_finite());
assert!(ll > -100.0); }
#[test]
fn test_ll_logistic() {
let y = Col::from_fn(5, |i| i as f64);
let mu = Col::from_fn(5, |i| i as f64);
let ll = log_likelihood(&y, &mu, AlmDistribution::Logistic, 1.0, None);
assert!(ll.is_finite());
assert!(ll > -100.0);
}
#[test]
fn test_ll_gamma() {
let y = Col::from_fn(5, |i| (i + 1) as f64); let mu = Col::from_fn(5, |i| (i + 1) as f64);
let ll = log_likelihood(&y, &mu, AlmDistribution::Gamma, 1.0, None);
assert!(ll.is_finite());
}
#[test]
fn test_ll_exponential() {
let y = Col::from_fn(5, |i| (i + 1) as f64); let mu = Col::from_fn(5, |i| (i + 1) as f64);
let ll = log_likelihood(&y, &mu, AlmDistribution::Exponential, 1.0, None);
assert!(ll.is_finite());
}
#[test]
fn test_ll_beta() {
let y = Col::from_fn(5, |i| 0.1 + 0.15 * i as f64); let mu = Col::from_fn(5, |i| 0.1 + 0.15 * i as f64);
let ll = log_likelihood(&y, &mu, AlmDistribution::Beta, 0.1, None);
assert!(ll.is_finite());
}
#[test]
fn test_ll_poisson() {
let y = Col::from_fn(5, |i| i as f64); let mu = Col::from_fn(5, |i| (i as f64).max(0.1));
let ll = log_likelihood(&y, &mu, AlmDistribution::Poisson, 1.0, None);
assert!(ll.is_finite());
}
#[test]
fn test_ll_negative_binomial() {
let y = Col::from_fn(5, |i| i as f64);
let mu = Col::from_fn(5, |i| (i as f64).max(0.1));
let ll = log_likelihood(&y, &mu, AlmDistribution::NegativeBinomial, 1.0, Some(2.0));
assert!(ll.is_finite());
}
#[test]
fn test_ll_binomial() {
let y = Col::from_fn(5, |i| if i % 2 == 0 { 0.0 } else { 1.0 });
let mu = Col::from_fn(5, |_| 0.5);
let ll = log_likelihood(&y, &mu, AlmDistribution::Binomial, 1.0, Some(1.0));
assert!(ll.is_finite());
}
#[test]
fn test_ll_inverse_gaussian() {
let y = Col::from_fn(5, |i| (i + 1) as f64);
let mu = Col::from_fn(5, |i| (i + 1) as f64);
let ll = log_likelihood(&y, &mu, AlmDistribution::InverseGaussian, 1.0, None);
assert!(ll.is_finite());
}
#[test]
fn test_ll_log_normal() {
let y = Col::from_fn(5, |i| (i + 1) as f64);
let mu = Col::from_fn(5, |i| (i + 1) as f64);
let ll = log_likelihood(&y, &mu, AlmDistribution::LogNormal, 1.0, None);
assert!(ll.is_finite());
}
#[test]
fn test_ll_asymmetric_laplace() {
let y = Col::from_fn(5, |i| i as f64);
let mu = Col::from_fn(5, |i| i as f64);
let ll = log_likelihood(&y, &mu, AlmDistribution::AsymmetricLaplace, 1.0, Some(0.5));
assert!(ll.is_finite());
}
#[test]
fn test_ll_generalised_normal() {
let y = Col::from_fn(5, |i| i as f64);
let mu = Col::from_fn(5, |i| i as f64);
let ll = log_likelihood(&y, &mu, AlmDistribution::GeneralisedNormal, 1.0, Some(2.0));
assert!(ll.is_finite());
}
#[test]
fn test_ll_geometric() {
let y = Col::from_fn(5, |i| i as f64);
let mu = Col::from_fn(5, |_| 0.5);
let ll = log_likelihood(&y, &mu, AlmDistribution::Geometric, 1.0, None);
assert!(ll.is_finite());
}
#[test]
fn test_alm_fit_normal() {
let x = Mat::from_fn(50, 1, |i, _| i as f64);
let y = Col::from_fn(50, |i| 1.0 + 2.0 * i as f64 + 0.1 * (i as f64).sin());
let fitted = AlmRegressor::builder()
.distribution(AlmDistribution::Normal)
.with_intercept(true)
.build()
.fit(&x, &y)
.expect("should fit");
assert!(fitted.coefficients()[0] > 1.5); assert!(fitted.scale() > 0.0);
}
#[test]
fn test_alm_fit_laplace() {
let x = Mat::from_fn(50, 1, |i, _| i as f64);
let y = Col::from_fn(50, |i| 1.0 + 2.0 * i as f64);
let fitted = AlmRegressor::builder()
.distribution(AlmDistribution::Laplace)
.with_intercept(true)
.build()
.fit(&x, &y)
.expect("should fit");
assert!(fitted.coefficients()[0] > 1.5);
}
#[test]
fn test_alm_fit_student_t() {
let x = Mat::from_fn(50, 1, |i, _| i as f64);
let y = Col::from_fn(50, |i| 1.0 + 2.0 * i as f64);
let fitted = AlmRegressor::builder()
.distribution(AlmDistribution::StudentT)
.extra_parameter(5.0) .with_intercept(true)
.build()
.fit(&x, &y)
.expect("should fit");
assert!(fitted.coefficients()[0] > 1.5);
assert_eq!(fitted.extra_parameter(), Some(5.0));
}
#[test]
fn test_alm_fit_gamma() {
let x = Mat::from_fn(50, 1, |i, _| i as f64 / 10.0);
let y = Col::from_fn(50, |i| (1.0 + 0.5 * i as f64 / 10.0).exp());
let fitted = AlmRegressor::builder()
.distribution(AlmDistribution::Gamma)
.with_intercept(true)
.build()
.fit(&x, &y)
.expect("should fit");
assert!(fitted.scale() > 0.0);
}
#[test]
fn test_alm_fit_poisson() {
let x = Mat::from_fn(50, 1, |i, _| i as f64 / 10.0);
let y = Col::from_fn(50, |i| ((0.5 + 0.1 * i as f64 / 10.0).exp()).round());
let fitted = AlmRegressor::builder()
.distribution(AlmDistribution::Poisson)
.with_intercept(true)
.build()
.fit(&x, &y)
.expect("should fit");
assert!(fitted.result().coefficients.nrows() == 1);
}
#[test]
fn test_alm_builder_fluent_api() {
let model = AlmRegressor::builder()
.distribution(AlmDistribution::StudentT)
.link(LinkFunction::Identity)
.extra_parameter(10.0)
.with_intercept(true)
.compute_inference(true)
.max_iterations(50)
.tolerance(1e-6)
.build();
assert_eq!(model.distribution, AlmDistribution::StudentT);
assert_eq!(model.link, LinkFunction::Identity);
assert_eq!(model.extra_parameter, Some(10.0));
assert!(model.options.with_intercept);
assert_eq!(model.max_iterations, 50);
}
#[test]
fn test_alm_with_intercept() {
let x = Mat::from_fn(30, 1, |i, _| i as f64);
let y = Col::from_fn(30, |i| 5.0 + 2.0 * i as f64);
let fitted = AlmRegressor::builder()
.distribution(AlmDistribution::Normal)
.with_intercept(true)
.build()
.fit(&x, &y)
.expect("should fit");
let intercept = fitted.result().intercept.unwrap_or(0.0);
assert!((intercept - 5.0).abs() < 1.0);
}
#[test]
fn test_alm_without_intercept() {
let x = Mat::from_fn(30, 1, |i, _| i as f64);
let y = Col::from_fn(30, |i| 2.0 * i as f64);
let fitted = AlmRegressor::builder()
.distribution(AlmDistribution::Normal)
.with_intercept(false)
.build()
.fit(&x, &y)
.expect("should fit");
assert!(fitted.result().intercept.is_none());
}
#[test]
fn test_probit_link() {
assert!((LinkFunction::Probit.inverse(0.0) - 0.5).abs() < 1e-6);
assert!(LinkFunction::Probit.inverse(-2.0) < 0.1);
assert!(LinkFunction::Probit.inverse(2.0) > 0.9);
}
#[test]
fn test_sqrt_link() {
assert!((LinkFunction::Sqrt.inverse(2.0) - 4.0).abs() < 1e-10);
assert!((LinkFunction::Sqrt.inverse(3.0) - 9.0).abs() < 1e-10);
}
#[test]
fn test_inverse_link() {
assert!((LinkFunction::Inverse.inverse(0.5) - 2.0).abs() < 1e-10);
assert!((LinkFunction::Inverse.inverse(0.25) - 4.0).abs() < 1e-10);
}
#[test]
fn test_cloglog_link() {
let eta = LinkFunction::Cloglog.inverse(-0.367);
assert!((eta - 0.5).abs() < 0.01);
}
#[test]
fn test_estimate_scale_normal() {
let y = Col::from_fn(100, |i| i as f64);
let mu = Col::from_fn(100, |i| i as f64 + 0.5); let scale = estimate_scale(&y, &mu, AlmDistribution::Normal, 98.0, None);
assert!(scale > 0.0);
assert!(scale < 10.0); }
#[test]
fn test_estimate_scale_laplace() {
let y = Col::from_fn(100, |i| i as f64);
let mu = Col::from_fn(100, |i| i as f64 + 0.5);
let scale = estimate_scale(&y, &mu, AlmDistribution::Laplace, 98.0, None);
assert!(scale > 0.0);
}
#[test]
fn test_estimate_scale_gamma() {
let y = Col::from_fn(100, |i| (i + 1) as f64);
let mu = Col::from_fn(100, |i| (i + 1) as f64);
let scale = estimate_scale(&y, &mu, AlmDistribution::Gamma, 98.0, None);
assert!(scale > 0.0);
}
#[test]
fn test_canonical_links() {
assert_eq!(
AlmDistribution::Normal.canonical_link(),
LinkFunction::Identity
);
assert_eq!(AlmDistribution::Gamma.canonical_link(), LinkFunction::Log);
assert_eq!(AlmDistribution::Poisson.canonical_link(), LinkFunction::Log);
assert_eq!(
AlmDistribution::Binomial.canonical_link(),
LinkFunction::Logit
);
assert_eq!(AlmDistribution::Beta.canonical_link(), LinkFunction::Logit);
}
#[test]
fn test_requires_positive() {
assert!(!AlmDistribution::Normal.requires_positive());
assert!(AlmDistribution::Gamma.requires_positive());
assert!(AlmDistribution::LogNormal.requires_positive());
assert!(AlmDistribution::Exponential.requires_positive());
}
#[test]
fn test_requires_unit_interval() {
assert!(!AlmDistribution::Normal.requires_unit_interval());
assert!(AlmDistribution::Beta.requires_unit_interval());
assert!(AlmDistribution::LogitNormal.requires_unit_interval());
}
#[test]
fn test_alm_predict() {
let x = Mat::from_fn(30, 1, |i, _| i as f64);
let y = Col::from_fn(30, |i| 1.0 + 2.0 * i as f64);
let fitted = AlmRegressor::builder()
.distribution(AlmDistribution::Normal)
.with_intercept(true)
.build()
.fit(&x, &y)
.expect("should fit");
let x_new = Mat::from_fn(5, 1, |i, _| (i + 30) as f64);
let preds = fitted.predict(&x_new);
assert_eq!(preds.nrows(), 5);
for i in 1..5 {
assert!(preds[i] > preds[i - 1]);
}
}
#[test]
fn test_ll_s_distribution() {
let y = Col::from_fn(10, |i| (i + 1) as f64);
let mu = Col::from_fn(10, |i| (i + 1) as f64 + 0.1);
let ll = log_likelihood(&y, &mu, AlmDistribution::S, 1.0, None);
assert!(ll.is_finite());
assert!(ll < 0.0); }
#[test]
fn test_ll_log_normal_positive() {
let y = Col::from_fn(10, |i| (i + 1) as f64);
let mu = Col::from_fn(10, |i| (i + 1) as f64 + 0.1);
let ll = log_likelihood(&y, &mu, AlmDistribution::LogNormal, 0.5, None);
assert!(ll.is_finite());
}
#[test]
fn test_ll_log_normal_negative_y() {
let y = Col::from_fn(10, |i| (i as f64) - 2.0); let mu = Col::from_fn(10, |i| (i + 1) as f64);
let ll = log_likelihood(&y, &mu, AlmDistribution::LogNormal, 0.5, None);
assert!(ll == f64::NEG_INFINITY);
}
#[test]
fn test_ll_log_laplace() {
let y = Col::from_fn(10, |i| (i + 1) as f64);
let mu = Col::from_fn(10, |i| (i + 1) as f64 + 0.5);
let ll = log_likelihood(&y, &mu, AlmDistribution::LogLaplace, 1.0, None);
assert!(ll.is_finite());
}
#[test]
fn test_ll_log_s() {
let y = Col::from_fn(10, |i| (i + 1) as f64);
let mu = Col::from_fn(10, |i| (i + 1) as f64 + 0.2);
let ll = log_likelihood(&y, &mu, AlmDistribution::LogS, 1.0, None);
assert!(ll.is_finite());
}
#[test]
fn test_ll_log_generalised_normal() {
let y = Col::from_fn(10, |i| (i + 1) as f64);
let mu = Col::from_fn(10, |i| (i + 1) as f64 + 0.1);
let ll = log_likelihood(
&y,
&mu,
AlmDistribution::LogGeneralisedNormal,
0.5,
Some(2.0),
);
assert!(ll.is_finite());
}
#[test]
fn test_ll_folded_normal() {
let y = Col::from_fn(10, |i| (i + 1) as f64); let mu = Col::from_fn(10, |i| (i + 1) as f64);
let ll = log_likelihood(&y, &mu, AlmDistribution::FoldedNormal, 1.0, None);
assert!(ll.is_finite());
}
#[test]
fn test_ll_folded_normal_negative() {
let y = Col::from_fn(10, |i| (i as f64) - 5.0); let mu = Col::from_fn(10, |i| (i + 1) as f64);
let ll = log_likelihood(&y, &mu, AlmDistribution::FoldedNormal, 1.0, None);
assert!(ll == f64::NEG_INFINITY);
}
#[test]
fn test_ll_rectified_normal() {
let y = Col::from_fn(10, |i| if i < 3 { 0.0 } else { (i - 2) as f64 });
let mu = Col::from_fn(10, |i| (i as f64) * 0.5);
let ll = log_likelihood(&y, &mu, AlmDistribution::RectifiedNormal, 1.0, None);
assert!(ll.is_finite());
}
#[test]
fn test_ll_rectified_normal_negative() {
let y = Col::from_fn(10, |i| (i as f64) - 5.0); let mu = Col::from_fn(10, |i| (i + 1) as f64);
let ll = log_likelihood(&y, &mu, AlmDistribution::RectifiedNormal, 1.0, None);
assert!(ll == f64::NEG_INFINITY);
}
#[test]
fn test_ll_box_cox_normal() {
let y = Col::from_fn(10, |i| (i + 1) as f64);
let mu = Col::from_fn(10, |i| (i + 1) as f64 + 0.1);
let ll = log_likelihood(&y, &mu, AlmDistribution::BoxCoxNormal, 1.0, Some(0.5));
assert!(ll.is_finite());
}
#[test]
fn test_ll_box_cox_normal_lambda_zero() {
let y = Col::from_fn(10, |i| (i + 1) as f64);
let mu = Col::from_fn(10, |i| (i + 1) as f64 + 0.1);
let ll = log_likelihood(&y, &mu, AlmDistribution::BoxCoxNormal, 1.0, Some(0.0));
assert!(ll.is_finite());
}
#[test]
fn test_ll_logit_normal() {
let y = Col::from_fn(10, |i| (i as f64 + 1.0) / 12.0); let mu = Col::from_fn(10, |i| (i as f64 + 1.5) / 12.0);
let ll = log_likelihood(&y, &mu, AlmDistribution::LogitNormal, 0.5, None);
assert!(ll.is_finite());
}
#[test]
fn test_ll_cumulative_distributions() {
let y = Col::from_fn(10, |i| if i < 5 { 0.0 } else { 1.0 });
let mu = Col::from_fn(10, |i| (i as f64 / 10.0).clamp(0.1, 0.9)); let ll1 = log_likelihood(&y, &mu, AlmDistribution::CumulativeLogistic, 1.0, None);
let ll2 = log_likelihood(&y, &mu, AlmDistribution::CumulativeNormal, 1.0, None);
assert!(ll1.is_finite());
assert!(ll2.is_finite());
}
#[test]
fn test_estimate_scale_laplace_mad() {
let y = Col::from_fn(100, |i| (i as f64) + 0.5);
let mu = Col::from_fn(100, |i| i as f64);
let scale = estimate_scale(&y, &mu, AlmDistribution::Laplace, 99.0, None);
assert!(scale > 0.0);
assert!((scale - 0.5).abs() < 0.1); }
#[test]
fn test_estimate_scale_student_t() {
let y = Col::from_fn(100, |i| (i as f64) + (i % 2) as f64 * 2.0 - 1.0);
let mu = Col::from_fn(100, |i| i as f64);
let scale = estimate_scale(&y, &mu, AlmDistribution::StudentT, 99.0, None);
assert!(scale > 0.0);
}
#[test]
fn test_estimate_scale_logistic() {
let y = Col::from_fn(100, |i| (i as f64) + 0.5);
let mu = Col::from_fn(100, |i| i as f64);
let scale = estimate_scale(&y, &mu, AlmDistribution::Logistic, 99.0, None);
assert!(scale > 0.0);
}
#[test]
fn test_estimate_scale_log_normal() {
let y = Col::from_fn(100, |i| ((i + 1) as f64).exp());
let mu = Col::from_fn(100, |i| ((i + 1) as f64 + 0.1).exp());
let scale = estimate_scale(&y, &mu, AlmDistribution::LogNormal, 99.0, None);
assert!(scale > 0.0);
}
#[test]
fn test_estimate_scale_asymmetric_laplace() {
let y = Col::from_fn(100, |i| (i as f64) + 0.5);
let mu = Col::from_fn(100, |i| i as f64);
let scale = estimate_scale(&y, &mu, AlmDistribution::AsymmetricLaplace, 99.0, Some(0.5));
assert!(scale > 0.0);
}
#[test]
fn test_estimate_scale_generalised_normal() {
let y = Col::from_fn(100, |i| (i as f64) + 0.5);
let mu = Col::from_fn(100, |i| i as f64);
let scale = estimate_scale(&y, &mu, AlmDistribution::GeneralisedNormal, 99.0, None);
assert!(scale > 0.0);
}
#[test]
fn test_estimate_scale_s_distribution() {
let y = Col::from_fn(100, |i| (i as f64) + 0.5);
let mu = Col::from_fn(100, |i| i as f64);
let scale = estimate_scale(&y, &mu, AlmDistribution::S, 99.0, None);
assert!(scale > 0.0);
}
#[test]
fn test_estimate_scale_log_s() {
let y = Col::from_fn(100, |i| ((i + 1) as f64).exp());
let mu = Col::from_fn(100, |i| ((i + 1) as f64 + 0.1).exp());
let scale = estimate_scale(&y, &mu, AlmDistribution::LogS, 99.0, None);
assert!(scale > 0.0);
}
#[test]
fn test_estimate_scale_box_cox() {
let y = Col::from_fn(100, |i| (i as f64) + 0.5);
let mu = Col::from_fn(100, |i| i as f64);
let scale = estimate_scale(&y, &mu, AlmDistribution::BoxCoxNormal, 99.0, None);
assert!(scale > 0.0);
}
#[test]
fn test_estimate_scale_discrete_is_one() {
let y = Col::from_fn(100, |i| i as f64);
let mu = Col::from_fn(100, |i| i as f64);
let scale = estimate_scale(&y, &mu, AlmDistribution::Poisson, 99.0, None);
assert!((scale - 1.0).abs() < 1e-10);
}
#[test]
fn test_fit_dimension_mismatch() {
let x = Mat::from_fn(10, 2, |i, j| (i + j) as f64);
let y = Col::from_fn(5, |i| i as f64);
let result = AlmRegressor::builder()
.distribution(AlmDistribution::Normal)
.build()
.fit(&x, &y);
assert!(result.is_err());
}
#[test]
fn test_fit_insufficient_observations() {
let x = Mat::from_fn(1, 2, |_, _| 1.0);
let y = Col::from_fn(1, |_| 1.0);
let result = AlmRegressor::builder()
.distribution(AlmDistribution::Normal)
.build()
.fit(&x, &y);
assert!(result.is_err());
}
#[test]
fn test_fit_positive_required_violation() {
let x = Mat::from_fn(20, 1, |i, _| i as f64);
let y = Col::from_fn(20, |i| (i as f64) - 5.0);
let result = AlmRegressor::builder()
.distribution(AlmDistribution::LogNormal) .build()
.fit(&x, &y);
assert!(result.is_err());
}
#[test]
fn test_fit_unit_interval_required_violation() {
let x = Mat::from_fn(20, 1, |i, _| i as f64);
let y = Col::from_fn(20, |_| 1.5);
let result = AlmRegressor::builder()
.distribution(AlmDistribution::Beta) .build()
.fit(&x, &y);
assert!(result.is_err());
}
#[test]
fn test_variance_function_poisson() {
let regressor = AlmRegressor::builder()
.distribution(AlmDistribution::Poisson)
.build();
let var = regressor.variance_function(5.0);
assert!((var - 5.0).abs() < 1e-10);
}
#[test]
fn test_variance_function_binomial() {
let regressor = AlmRegressor::builder()
.distribution(AlmDistribution::Binomial)
.build();
let var = regressor.variance_function(0.5);
assert!((var - 0.25).abs() < 1e-10); }
#[test]
fn test_variance_function_geometric() {
let regressor = AlmRegressor::builder()
.distribution(AlmDistribution::Geometric)
.build();
let var = regressor.variance_function(0.3);
let expected = 0.3 * (1.0 - 0.3); assert!((var - expected).abs() < 1e-10);
}
#[test]
fn test_variance_function_negative_binomial() {
let regressor = AlmRegressor::builder()
.distribution(AlmDistribution::NegativeBinomial)
.extra_parameter(2.0)
.build();
let var = regressor.variance_function(4.0);
let expected = 4.0 + 4.0 * 4.0 / 2.0; assert!((var - expected).abs() < 1e-10);
}
#[test]
fn test_variance_function_gamma() {
let regressor = AlmRegressor::builder()
.distribution(AlmDistribution::Gamma)
.build();
let var = regressor.variance_function(3.0);
assert!((var - 9.0).abs() < 1e-10); }
#[test]
fn test_variance_function_inverse_gaussian() {
let regressor = AlmRegressor::builder()
.distribution(AlmDistribution::InverseGaussian)
.build();
let var = regressor.variance_function(2.0);
assert!((var - 4.0).abs() < 1e-10); }
#[test]
fn test_variance_function_exponential() {
let regressor = AlmRegressor::builder()
.distribution(AlmDistribution::Exponential)
.build();
let var = regressor.variance_function(5.0);
assert!((var - 25.0).abs() < 1e-10); }
#[test]
fn test_variance_function_beta() {
let regressor = AlmRegressor::builder()
.distribution(AlmDistribution::Beta)
.extra_parameter(3.0) .build();
let var = regressor.variance_function(0.5);
let expected = 0.5 * 0.5 / (1.0 + 3.0); assert!((var - expected).abs() < 1e-10);
}
#[test]
fn test_variance_function_log_distributions() {
for dist in [
AlmDistribution::LogNormal,
AlmDistribution::LogLaplace,
AlmDistribution::LogS,
AlmDistribution::LogGeneralisedNormal,
AlmDistribution::FoldedNormal,
AlmDistribution::RectifiedNormal,
AlmDistribution::BoxCoxNormal,
AlmDistribution::LogitNormal,
] {
let regressor = AlmRegressor::builder().distribution(dist).build();
let var = regressor.variance_function(2.0);
assert!((var - 1.0).abs() < 1e-10, "Failed for {:?}", dist);
}
}
#[test]
fn test_variance_function_cumulative() {
for dist in [
AlmDistribution::CumulativeLogistic,
AlmDistribution::CumulativeNormal,
] {
let regressor = AlmRegressor::builder().distribution(dist).build();
let var = regressor.variance_function(0.5);
assert!((var - 1.0).abs() < 1e-10, "Failed for {:?}", dist);
}
}
#[test]
fn test_canonical_link_all_distributions() {
assert_eq!(
AlmDistribution::StudentT.canonical_link(),
LinkFunction::Identity
);
assert_eq!(
AlmDistribution::Logistic.canonical_link(),
LinkFunction::Identity
);
assert_eq!(
AlmDistribution::AsymmetricLaplace.canonical_link(),
LinkFunction::Identity
);
assert_eq!(
AlmDistribution::GeneralisedNormal.canonical_link(),
LinkFunction::Identity
);
assert_eq!(AlmDistribution::S.canonical_link(), LinkFunction::Identity);
assert_eq!(
AlmDistribution::LogNormal.canonical_link(),
LinkFunction::Log
);
assert_eq!(
AlmDistribution::LogLaplace.canonical_link(),
LinkFunction::Log
);
assert_eq!(AlmDistribution::LogS.canonical_link(), LinkFunction::Log);
assert_eq!(
AlmDistribution::LogGeneralisedNormal.canonical_link(),
LinkFunction::Log
);
assert_eq!(
AlmDistribution::FoldedNormal.canonical_link(),
LinkFunction::Identity
);
assert_eq!(
AlmDistribution::RectifiedNormal.canonical_link(),
LinkFunction::Identity
);
assert_eq!(
AlmDistribution::BoxCoxNormal.canonical_link(),
LinkFunction::Identity
);
assert_eq!(AlmDistribution::Gamma.canonical_link(), LinkFunction::Log);
assert_eq!(
AlmDistribution::InverseGaussian.canonical_link(),
LinkFunction::Log
);
assert_eq!(
AlmDistribution::Exponential.canonical_link(),
LinkFunction::Log
);
assert_eq!(AlmDistribution::Beta.canonical_link(), LinkFunction::Logit);
assert_eq!(
AlmDistribution::LogitNormal.canonical_link(),
LinkFunction::Identity );
assert_eq!(AlmDistribution::Poisson.canonical_link(), LinkFunction::Log);
assert_eq!(
AlmDistribution::NegativeBinomial.canonical_link(),
LinkFunction::Log
);
assert_eq!(
AlmDistribution::Binomial.canonical_link(),
LinkFunction::Logit
);
assert_eq!(
AlmDistribution::Geometric.canonical_link(),
LinkFunction::Log );
assert_eq!(
AlmDistribution::CumulativeLogistic.canonical_link(),
LinkFunction::Logit
);
assert_eq!(
AlmDistribution::CumulativeNormal.canonical_link(),
LinkFunction::Probit
);
}
#[test]
fn test_is_count() {
assert!(AlmDistribution::Poisson.is_count());
assert!(AlmDistribution::NegativeBinomial.is_count());
assert!(AlmDistribution::Binomial.is_count());
assert!(AlmDistribution::Geometric.is_count());
assert!(!AlmDistribution::Normal.is_count());
assert!(!AlmDistribution::Gamma.is_count());
}
#[test]
fn test_link_function_link() {
assert!((LinkFunction::Identity.link(5.0) - 5.0).abs() < 1e-10);
assert!((LinkFunction::Log.link(std::f64::consts::E) - 1.0).abs() < 1e-10);
assert!((LinkFunction::Logit.link(0.5) - 0.0).abs() < 1e-10);
assert!((LinkFunction::Inverse.link(2.0) - 0.5).abs() < 1e-10);
assert!((LinkFunction::Sqrt.link(4.0) - 2.0).abs() < 1e-10);
let cloglog_result = LinkFunction::Cloglog.link(0.5);
assert!(cloglog_result.is_finite());
}
#[test]
fn test_link_function_inverse() {
assert!((LinkFunction::Identity.inverse(5.0) - 5.0).abs() < 1e-10);
assert!((LinkFunction::Log.inverse(0.0) - 1.0).abs() < 1e-10);
assert!((LinkFunction::Logit.inverse(0.0) - 0.5).abs() < 1e-10);
assert!((LinkFunction::Inverse.inverse(2.0) - 0.5).abs() < 1e-10);
assert!((LinkFunction::Sqrt.inverse(2.0) - 4.0).abs() < 1e-10);
let cloglog_result = LinkFunction::Cloglog.inverse(0.0);
assert!(cloglog_result > 0.0 && cloglog_result < 1.0);
}
#[test]
fn test_link_function_inverse_derivative() {
assert!((LinkFunction::Identity.inverse_derivative(5.0) - 1.0).abs() < 1e-10);
assert!((LinkFunction::Log.inverse_derivative(2.0) - 2.0_f64.exp()).abs() < 0.01);
let logit_deriv = LinkFunction::Logit.inverse_derivative(0.0);
assert!((logit_deriv - 0.25).abs() < 1e-10);
assert!((LinkFunction::Sqrt.inverse_derivative(2.0) - 4.0).abs() < 1e-10);
}
#[test]
fn test_probit_link_roundtrip() {
let mu = LinkFunction::Probit.inverse(0.0);
assert!((mu - 0.5).abs() < 1e-6);
let eta = LinkFunction::Probit.link(0.5);
assert!(eta.abs() < 1e-6); }
#[test]
fn test_probit_extreme_values() {
let eta_low = LinkFunction::Probit.link(0.001);
assert!(eta_low < -2.0);
let eta_high = LinkFunction::Probit.link(0.999);
assert!(eta_high > 2.0);
let eta_zero = LinkFunction::Probit.link(0.0);
assert!(eta_zero == f64::NEG_INFINITY);
let eta_one = LinkFunction::Probit.link(1.0);
assert!(eta_one == f64::INFINITY);
}
#[test]
fn test_predict_with_interval() {
let x = Mat::from_fn(30, 1, |i, _| i as f64);
let y = Col::from_fn(30, |i| 1.0 + 2.0 * i as f64);
let fitted = AlmRegressor::builder()
.distribution(AlmDistribution::Normal)
.with_intercept(true)
.build()
.fit(&x, &y)
.expect("should fit");
let x_new = Mat::from_fn(5, 1, |i, _| (i + 30) as f64);
let result_none = fitted.predict_with_interval(&x_new, None, 0.95);
assert_eq!(result_none.fit.nrows(), 5);
let result_ci =
fitted.predict_with_interval(&x_new, Some(crate::core::IntervalType::Confidence), 0.95);
assert_eq!(result_ci.fit.nrows(), 5);
assert_eq!(result_ci.lower.nrows(), 5);
assert_eq!(result_ci.upper.nrows(), 5);
}
#[test]
fn test_alm_fit_log_normal() {
let x = Mat::from_fn(30, 1, |i, _| i as f64);
let y = Col::from_fn(30, |i| (1.0 + 0.1 * i as f64).exp());
let fitted = AlmRegressor::builder()
.distribution(AlmDistribution::LogNormal)
.with_intercept(true)
.build()
.fit(&x, &y);
assert!(fitted.is_ok());
}
#[test]
fn test_alm_fit_geometric() {
let x = Mat::from_fn(50, 1, |i, _| i as f64);
let y = Col::from_fn(50, |i| ((i % 5) + 1) as f64);
let fitted = AlmRegressor::builder()
.distribution(AlmDistribution::Geometric)
.with_intercept(true)
.build()
.fit(&x, &y);
let _ = fitted;
}
#[test]
fn test_alm_fit_beta() {
let x = Mat::from_fn(30, 1, |i, _| i as f64);
let y = Col::from_fn(30, |i| 0.1 + 0.8 * (i as f64 / 30.0));
let fitted = AlmRegressor::builder()
.distribution(AlmDistribution::Beta)
.extra_parameter(5.0)
.with_intercept(true)
.build()
.fit(&x, &y);
assert!(fitted.is_ok());
}
#[test]
fn test_alm_fit_inverse_gaussian() {
let x = Mat::from_fn(30, 1, |i, _| i as f64);
let y = Col::from_fn(30, |i| (i + 1) as f64);
let fitted = AlmRegressor::builder()
.distribution(AlmDistribution::InverseGaussian)
.with_intercept(true)
.build()
.fit(&x, &y);
assert!(fitted.is_ok());
}
#[test]
fn test_alm_fit_exponential() {
let x = Mat::from_fn(30, 1, |i, _| i as f64);
let y = Col::from_fn(30, |i| (i + 1) as f64);
let fitted = AlmRegressor::builder()
.distribution(AlmDistribution::Exponential)
.with_intercept(true)
.build()
.fit(&x, &y);
assert!(fitted.is_ok());
}
#[test]
fn test_alm_fit_binomial() {
let x = Mat::from_fn(50, 1, |i, _| i as f64);
let y = Col::from_fn(50, |i| if i > 25 { 1.0 } else { 0.0 });
let fitted = AlmRegressor::builder()
.distribution(AlmDistribution::Binomial)
.extra_parameter(1.0) .with_intercept(true)
.build()
.fit(&x, &y);
let _ = fitted;
}
#[test]
fn test_alm_fit_negative_binomial() {
let x = Mat::from_fn(50, 1, |i, _| i as f64);
let y = Col::from_fn(50, |i| (i % 10) as f64);
let fitted = AlmRegressor::builder()
.distribution(AlmDistribution::NegativeBinomial)
.extra_parameter(2.0)
.with_intercept(true)
.build()
.fit(&x, &y);
let _ = fitted;
}
}