use crate::core::{
IntervalType, PredictionResult, RegressionOptions, RegressionOptionsBuilder, RegressionResult,
};
use crate::solvers::alm::{log_likelihood, AlmDistribution, AlmRegressor, FittedAlm};
use crate::solvers::lowess::lowess_smooth_weights;
use crate::solvers::traits::{FittedRegressor, RegressionError, Regressor};
use faer::{Col, Mat};
use statrs::distribution::{ContinuousCDF, StudentsT};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum InformationCriterion {
AIC,
#[default]
AICc,
BIC,
}
impl InformationCriterion {
pub fn compute(&self, log_lik: f64, k: usize, n: usize) -> f64 {
let k_f = k as f64;
let n_f = n as f64;
match self {
InformationCriterion::AIC => 2.0 * k_f - 2.0 * log_lik,
InformationCriterion::AICc => {
let aic = 2.0 * k_f - 2.0 * log_lik;
if n_f - k_f - 1.0 > 0.0 {
aic + 2.0 * k_f * (k_f + 1.0) / (n_f - k_f - 1.0)
} else {
aic
}
}
InformationCriterion::BIC => k_f * n_f.ln() - 2.0 * log_lik,
}
}
}
#[derive(Debug, Clone)]
pub struct ModelSpec {
pub included_vars: Vec<usize>,
pub n_params: usize,
}
#[derive(Debug, Clone)]
pub struct LmDynamicRegressor {
options: RegressionOptions,
ic_type: InformationCriterion,
distribution: AlmDistribution,
lowess_span: Option<f64>,
max_models: Option<usize>,
}
impl Default for LmDynamicRegressor {
fn default() -> Self {
Self {
options: RegressionOptionsBuilder::default().build_unchecked(),
ic_type: InformationCriterion::default(),
distribution: AlmDistribution::Normal,
lowess_span: Some(0.3), max_models: Some(64), }
}
}
impl LmDynamicRegressor {
pub fn new() -> Self {
Self::default()
}
pub fn builder() -> LmDynamicRegressorBuilder {
LmDynamicRegressorBuilder::default()
}
pub fn ic_type(&self) -> InformationCriterion {
self.ic_type
}
pub fn distribution(&self) -> AlmDistribution {
self.distribution
}
pub fn lowess_span(&self) -> Option<f64> {
self.lowess_span
}
fn generate_model_specs(&self, p: usize) -> Vec<ModelSpec> {
let max = self.max_models.unwrap_or(usize::MAX);
let mut specs = Vec::new();
let total_models = (1usize << p) - 1;
for mask in 1..=total_models {
if specs.len() >= max {
break;
}
let included: Vec<usize> = (0..p).filter(|&j| (mask >> j) & 1 == 1).collect();
let n_params = included.len() + if self.options.with_intercept { 1 } else { 0 };
specs.push(ModelSpec {
included_vars: included,
n_params,
});
}
specs
}
fn extract_columns(&self, x: &Mat<f64>, included: &[usize]) -> Mat<f64> {
let n = x.nrows();
let p_new = included.len();
let mut x_new = Mat::zeros(n, p_new);
for (new_j, &orig_j) in included.iter().enumerate() {
for i in 0..n {
x_new[(i, new_j)] = x[(i, orig_j)];
}
}
x_new
}
fn fit_internal(&self, x: &Mat<f64>, y: &Col<f64>) -> Result<FittedLmDynamic, RegressionError> {
let n = x.nrows();
let p = x.ncols();
if n < 3 {
return Err(RegressionError::InsufficientObservations { needed: 3, got: n });
}
let model_specs = self.generate_model_specs(p);
let n_models = model_specs.len();
if n_models == 0 {
return Err(RegressionError::InsufficientObservations { needed: 1, got: 0 });
}
let mut fitted_models: Vec<Option<FittedAlm>> = Vec::with_capacity(n_models);
let mut pointwise_ic = Mat::from_fn(n, n_models, |_, _| f64::INFINITY);
for (m, spec) in model_specs.iter().enumerate() {
let x_subset = self.extract_columns(x, &spec.included_vars);
let alm = AlmRegressor::builder()
.distribution(self.distribution)
.with_intercept(self.options.with_intercept)
.compute_inference(false)
.build();
match alm.fit(&x_subset, y) {
Ok(fitted) => {
let mu = &fitted.result().fitted_values;
let scale = fitted.scale();
for i in 0..n {
let yi = Col::from_fn(1, |_| y[i]);
let mui = Col::from_fn(1, |_| mu[i]);
let ll_i = log_likelihood(
&yi,
&mui,
self.distribution,
scale,
fitted.extra_parameter(),
);
pointwise_ic[(i, m)] = self.ic_type.compute(ll_i, spec.n_params, n);
}
fitted_models.push(Some(fitted));
}
Err(_) => {
fitted_models.push(None);
}
}
}
let mut model_weights = Mat::zeros(n, n_models);
for i in 0..n {
let min_ic = (0..n_models)
.map(|m| pointwise_ic[(i, m)])
.fold(f64::INFINITY, f64::min);
let mut row_sum = 0.0;
for m in 0..n_models {
let delta = pointwise_ic[(i, m)] - min_ic;
let w = (-0.5 * delta).exp();
model_weights[(i, m)] = w;
row_sum += w;
}
if row_sum > 1e-10 {
for m in 0..n_models {
model_weights[(i, m)] /= row_sum;
}
} else {
for m in 0..n_models {
model_weights[(i, m)] = 1.0 / n_models as f64;
}
}
}
let smoothed_weights = self
.lowess_span
.map(|span| lowess_smooth_weights(&model_weights, span));
let weights_to_use = smoothed_weights.as_ref().unwrap_or(&model_weights);
let dynamic_coefficients =
self.compute_dynamic_coefficients(&model_specs, &fitted_models, weights_to_use, p);
let avg_coefs = self.compute_average_coefficients(&dynamic_coefficients);
let result = self.build_base_result(x, y, &avg_coefs, &dynamic_coefficients)?;
Ok(FittedLmDynamic {
options: self.options.clone(),
distribution: self.distribution,
result,
dynamic_coefficients,
model_weights,
smoothed_weights,
model_specs,
pointwise_ic,
n_features: p,
original_x: x.to_owned(),
has_intercept: self.options.with_intercept,
})
}
fn compute_dynamic_coefficients(
&self,
specs: &[ModelSpec],
fitted_models: &[Option<FittedAlm>],
weights: &Mat<f64>,
p: usize,
) -> Mat<f64> {
let n = weights.nrows();
let n_coef_cols = p + if self.options.with_intercept { 1 } else { 0 };
let mut dynamic_coefs = Mat::zeros(n, n_coef_cols);
for i in 0..n {
for (m, (spec, fitted_opt)) in specs.iter().zip(fitted_models.iter()).enumerate() {
let weight = weights[(i, m)];
if let Some(fitted) = fitted_opt {
let coefs = &fitted.result().coefficients;
if self.options.with_intercept {
if let Some(intercept) = fitted.result().intercept {
dynamic_coefs[(i, 0)] += weight * intercept;
}
}
for (subset_j, &orig_j) in spec.included_vars.iter().enumerate() {
let col_idx = if self.options.with_intercept {
orig_j + 1
} else {
orig_j
};
if subset_j < coefs.nrows() {
dynamic_coefs[(i, col_idx)] += weight * coefs[subset_j];
}
}
}
}
}
dynamic_coefs
}
fn compute_average_coefficients(&self, dynamic_coefs: &Mat<f64>) -> Col<f64> {
let n = dynamic_coefs.nrows();
let p = dynamic_coefs.ncols();
Col::from_fn(p, |j| {
let sum: f64 = (0..n).map(|i| dynamic_coefs[(i, j)]).sum();
sum / n as f64
})
}
fn build_base_result(
&self,
x: &Mat<f64>,
y: &Col<f64>,
avg_coefs: &Col<f64>,
dynamic_coefs: &Mat<f64>,
) -> Result<RegressionResult, RegressionError> {
let n = x.nrows();
let p = x.ncols();
let mut fitted_values = Col::zeros(n);
for i in 0..n {
if self.options.with_intercept {
fitted_values[i] = dynamic_coefs[(i, 0)]; for j in 0..p {
fitted_values[i] += x[(i, j)] * dynamic_coefs[(i, j + 1)];
}
} else {
for j in 0..p {
fitted_values[i] += x[(i, j)] * dynamic_coefs[(i, j)];
}
}
}
let mut residuals = Col::zeros(n);
for i in 0..n {
residuals[i] = y[i] - fitted_values[i];
}
let (coefficients, intercept) = if self.options.with_intercept {
let intercept = avg_coefs[0];
let coefs = Col::from_fn(p, |j| avg_coefs[j + 1]);
(coefs, Some(intercept))
} else {
(avg_coefs.clone(), None)
};
let y_mean: f64 = y.iter().sum::<f64>() / n as f64;
let tss: f64 = y.iter().map(|&yi| (yi - y_mean).powi(2)).sum();
let rss: f64 = residuals.iter().map(|&r| r.powi(2)).sum();
let r_squared = if tss > 0.0 {
(1.0 - rss / tss).clamp(0.0, 1.0)
} else {
1.0
};
let n_params = avg_coefs.nrows();
let df_resid = (n - n_params) as f64;
let adj_r_squared = if df_resid > 0.0 {
1.0 - (1.0 - r_squared) * (n - 1) as f64 / df_resid
} else {
f64::NAN
};
let mse = if df_resid > 0.0 {
rss / df_resid
} else {
f64::NAN
};
let rmse = mse.sqrt();
let aliased = vec![false; p];
let mut result = RegressionResult::empty(p, n);
result.coefficients = coefficients;
result.intercept = intercept;
result.residuals = residuals;
result.fitted_values = fitted_values;
result.rank = p;
result.n_parameters = n_params;
result.n_observations = n;
result.aliased = aliased;
result.r_squared = r_squared;
result.adj_r_squared = adj_r_squared;
result.mse = mse;
result.rmse = rmse;
result.confidence_level = self.options.confidence_level;
Ok(result)
}
}
impl Regressor for LmDynamicRegressor {
type Fitted = FittedLmDynamic;
fn fit(&self, x: &Mat<f64>, y: &Col<f64>) -> Result<Self::Fitted, RegressionError> {
if x.nrows() != y.nrows() {
return Err(RegressionError::DimensionMismatch {
x_rows: x.nrows(),
y_len: y.nrows(),
});
}
self.fit_internal(x, y)
}
}
#[derive(Debug, Clone)]
pub struct FittedLmDynamic {
#[allow(dead_code)]
options: RegressionOptions,
distribution: AlmDistribution,
result: RegressionResult,
dynamic_coefficients: Mat<f64>,
model_weights: Mat<f64>,
smoothed_weights: Option<Mat<f64>>,
model_specs: Vec<ModelSpec>,
pointwise_ic: Mat<f64>,
#[allow(dead_code)]
n_features: usize,
original_x: Mat<f64>,
has_intercept: bool,
}
impl FittedLmDynamic {
pub fn distribution(&self) -> AlmDistribution {
self.distribution
}
pub fn dynamic_coefficients(&self) -> &Mat<f64> {
&self.dynamic_coefficients
}
pub fn model_weights(&self) -> &Mat<f64> {
&self.model_weights
}
pub fn smoothed_weights(&self) -> Option<&Mat<f64>> {
self.smoothed_weights.as_ref()
}
pub fn model_specs(&self) -> &[ModelSpec] {
&self.model_specs
}
pub fn pointwise_ic(&self) -> &Mat<f64> {
&self.pointwise_ic
}
pub fn coefficient_at(&self, obs_index: usize, coef_index: usize) -> Option<f64> {
if obs_index < self.dynamic_coefficients.nrows()
&& coef_index < self.dynamic_coefficients.ncols()
{
Some(self.dynamic_coefficients[(obs_index, coef_index)])
} else {
None
}
}
pub fn coefficients_at(&self, obs_index: usize) -> Option<Col<f64>> {
if obs_index < self.dynamic_coefficients.nrows() {
let ncols = self.dynamic_coefficients.ncols();
Some(Col::from_fn(ncols, |j| {
self.dynamic_coefficients[(obs_index, j)]
}))
} else {
None
}
}
fn compute_dynamic_intervals(
&self,
x_new: &Mat<f64>,
predictions: &Col<f64>,
interval_type: IntervalType,
level: f64,
) -> PredictionResult {
let n_new = x_new.nrows();
let df = self.result.residual_df() as f64;
if df <= 0.0 || !self.result.mse.is_finite() || self.result.mse < 0.0 {
return self.create_nan_intervals(predictions, n_new);
}
let xtx_inv = match self.compute_xtx_inverse() {
Some(inv) => inv,
None => return self.create_nan_intervals(predictions, n_new),
};
let t_crit = self.compute_t_critical(df, level);
let mse = self.result.mse;
let mut se = Col::zeros(n_new);
let mut lower = Col::zeros(n_new);
let mut upper = Col::zeros(n_new);
for i in 0..n_new {
let x0 = self.build_obs_vector(x_new, i);
let h = self.compute_leverage(&x0, &xtx_inv);
let var = match interval_type {
IntervalType::Confidence => mse * h,
IntervalType::Prediction => mse * (1.0 + h),
};
se[i] = if var >= 0.0 { var.sqrt() } else { f64::NAN };
let margin = t_crit * se[i];
lower[i] = predictions[i] - margin;
upper[i] = predictions[i] + margin;
}
PredictionResult::with_intervals(predictions.clone(), lower, upper, se)
}
fn compute_xtx_inverse(&self) -> Option<Mat<f64>> {
let n = self.original_x.nrows();
let p = self.original_x.ncols();
let design = if self.has_intercept {
let aug_size = p + 1;
Mat::from_fn(n, aug_size, |i, j| {
if j == 0 {
1.0
} else {
self.original_x[(i, j - 1)]
}
})
} else {
self.original_x.clone()
};
let xtx = design.transpose() * &design;
let dim = xtx.nrows();
let qr = xtx.qr();
let q = qr.compute_Q();
let r = qr.R().to_owned();
for i in 0..dim {
if r[(i, i)].abs() < 1e-10 {
return None;
}
}
let qt = q.transpose().to_owned();
let mut inv = Mat::zeros(dim, dim);
for col in 0..dim {
for i in (0..dim).rev() {
let mut sum = qt[(i, col)];
for j in (i + 1)..dim {
sum -= r[(i, j)] * inv[(j, col)];
}
inv[(i, col)] = sum / r[(i, i)];
}
}
Some(inv)
}
fn build_obs_vector(&self, x_new: &Mat<f64>, row: usize) -> Col<f64> {
let n_features = x_new.ncols();
if self.has_intercept {
let mut x0 = Col::zeros(n_features + 1);
x0[0] = 1.0;
for j in 0..n_features {
x0[j + 1] = x_new[(row, j)];
}
x0
} else {
Col::from_fn(n_features, |j| x_new[(row, j)])
}
}
fn compute_leverage(&self, x0: &Col<f64>, xtx_inv: &Mat<f64>) -> f64 {
let p = x0.nrows();
let mut xtx_inv_x0 = Col::zeros(p);
for i in 0..p {
let mut sum = 0.0;
for j in 0..p {
sum += xtx_inv[(i, j)] * x0[j];
}
xtx_inv_x0[i] = sum;
}
let mut h = 0.0;
for i in 0..p {
h += x0[i] * xtx_inv_x0[i];
}
h.max(0.0) }
fn compute_t_critical(&self, df: f64, level: f64) -> f64 {
let t_dist = StudentsT::new(0.0, 1.0, df).expect("valid t-distribution parameters");
let alpha = 1.0 - level;
t_dist.inverse_cdf(1.0 - alpha / 2.0)
}
fn create_nan_intervals(&self, predictions: &Col<f64>, n: usize) -> PredictionResult {
let se = Col::from_fn(n, |_| f64::NAN);
let lower = Col::from_fn(n, |_| f64::NAN);
let upper = Col::from_fn(n, |_| f64::NAN);
PredictionResult::with_intervals(predictions.clone(), lower, upper, se)
}
}
impl FittedRegressor for FittedLmDynamic {
fn predict(&self, x: &Mat<f64>) -> Col<f64> {
let n = x.nrows();
let p = x.ncols();
let mut predictions = Col::zeros(n);
let intercept = self.result.intercept.unwrap_or(0.0);
for i in 0..n {
predictions[i] = intercept;
for j in 0..p.min(self.result.coefficients.nrows()) {
predictions[i] += x[(i, j)] * self.result.coefficients[j];
}
}
predictions
}
fn result(&self) -> &RegressionResult {
&self.result
}
fn predict_with_interval(
&self,
x: &Mat<f64>,
interval: Option<IntervalType>,
level: f64,
) -> PredictionResult {
let predictions = self.predict(x);
match interval {
None => PredictionResult::point_only(predictions),
Some(interval_type) => {
self.compute_dynamic_intervals(x, &predictions, interval_type, level)
}
}
}
}
#[derive(Debug, Clone)]
pub struct LmDynamicRegressorBuilder {
options_builder: RegressionOptionsBuilder,
ic_type: InformationCriterion,
distribution: AlmDistribution,
lowess_span: Option<f64>,
max_models: Option<usize>,
}
impl Default for LmDynamicRegressorBuilder {
fn default() -> Self {
Self {
options_builder: RegressionOptionsBuilder::default(),
ic_type: InformationCriterion::default(),
distribution: AlmDistribution::default(),
lowess_span: Some(0.3),
max_models: Some(64),
}
}
}
impl LmDynamicRegressorBuilder {
pub fn new() -> Self {
Self::default()
}
pub fn ic(mut self, ic: InformationCriterion) -> Self {
self.ic_type = ic;
self
}
pub fn distribution(mut self, dist: AlmDistribution) -> Self {
self.distribution = dist;
self
}
pub fn lowess_span(mut self, span: f64) -> Self {
self.lowess_span = Some(span.clamp(0.05, 1.0));
self
}
pub fn no_smoothing(mut self) -> Self {
self.lowess_span = None;
self
}
pub fn max_models(mut self, max: usize) -> Self {
self.max_models = Some(max);
self
}
pub fn with_intercept(mut self, include: bool) -> Self {
self.options_builder = self.options_builder.with_intercept(include);
self
}
pub fn confidence_level(mut self, level: f64) -> Self {
self.options_builder = self.options_builder.confidence_level(level);
self
}
pub fn build(self) -> LmDynamicRegressor {
LmDynamicRegressor {
options: self.options_builder.build_unchecked(),
ic_type: self.ic_type,
distribution: self.distribution,
lowess_span: self.lowess_span,
max_models: self.max_models,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_information_criterion_compute() {
let aic = InformationCriterion::AIC.compute(-10.0, 3, 100);
assert!((aic - 26.0).abs() < 1e-10);
let bic = InformationCriterion::BIC.compute(-10.0, 3, 100);
let expected_bic = 3.0 * (100.0_f64).ln() + 20.0;
assert!((bic - expected_bic).abs() < 1e-10);
}
#[test]
fn test_builder_defaults() {
let model = LmDynamicRegressor::builder().build();
assert_eq!(model.ic_type(), InformationCriterion::AICc);
assert_eq!(model.distribution(), AlmDistribution::Normal);
assert!(model.lowess_span().is_some());
}
#[test]
fn test_builder_custom() {
let model = LmDynamicRegressor::builder()
.ic(InformationCriterion::BIC)
.distribution(AlmDistribution::Laplace)
.lowess_span(0.5)
.max_models(32)
.with_intercept(false)
.build();
assert_eq!(model.ic_type(), InformationCriterion::BIC);
assert_eq!(model.distribution(), AlmDistribution::Laplace);
assert_eq!(model.lowess_span(), Some(0.5));
}
#[test]
fn test_fit_simple() {
let x = Mat::from_fn(30, 1, |i, _| (i + 1) as f64);
let y = Col::from_fn(30, |i| 2.0 + 1.5 * (i + 1) as f64 + 0.1 * (i as f64).sin());
let model = LmDynamicRegressor::builder()
.ic(InformationCriterion::AICc)
.with_intercept(true)
.build();
let result = model.fit(&x, &y);
assert!(result.is_ok(), "Should fit successfully");
let fitted = result.unwrap();
assert_eq!(fitted.dynamic_coefficients().nrows(), 30);
assert_eq!(fitted.dynamic_coefficients().ncols(), 2);
for i in 0..30 {
let row_sum: f64 = (0..fitted.model_weights().ncols())
.map(|j| fitted.model_weights()[(i, j)])
.sum();
assert!(
(row_sum - 1.0).abs() < 1e-6,
"Row {} weights sum to {}",
i,
row_sum
);
}
}
#[test]
fn test_fit_multiple_features() {
let x = Mat::from_fn(40, 3, |i, j| (i + j + 1) as f64 * 0.1);
let y = Col::from_fn(40, |i| {
let i_f = i as f64;
1.0 + 0.5 * (i_f + 1.0) * 0.1 + 0.3 * (i_f + 2.0) * 0.1
});
let model = LmDynamicRegressor::builder()
.max_models(7) .with_intercept(true)
.build();
let result = model.fit(&x, &y);
assert!(result.is_ok(), "Should fit with multiple features");
let fitted = result.unwrap();
assert!(fitted.model_specs().len() <= 7);
assert!(!fitted.model_specs().is_empty());
}
#[test]
fn test_dynamic_coefficients_vary() {
let x = Mat::from_fn(50, 1, |i, _| (i + 1) as f64);
let y = Col::from_fn(50, |i| {
let t = i as f64;
let slope = 1.0 + t / 50.0;
slope * (i + 1) as f64
});
let model = LmDynamicRegressor::builder()
.no_smoothing() .with_intercept(true)
.build();
let fitted = model.fit(&x, &y).expect("Should fit");
let coef_start = fitted.coefficient_at(0, 1).unwrap(); let coef_end = fitted.coefficient_at(49, 1).unwrap();
assert!(coef_start > 0.0, "Start coefficient should be positive");
assert!(coef_end > 0.0, "End coefficient should be positive");
}
#[test]
fn test_smoothed_weights_exist() {
let x = Mat::from_fn(30, 1, |i, _| i as f64);
let y = Col::from_fn(30, |i| i as f64 + 0.1);
let model = LmDynamicRegressor::builder().lowess_span(0.3).build();
let fitted = model.fit(&x, &y).expect("Should fit");
assert!(fitted.smoothed_weights().is_some());
let sw = fitted.smoothed_weights().unwrap();
for i in 0..sw.nrows() {
let row_sum: f64 = (0..sw.ncols()).map(|j| sw[(i, j)]).sum();
assert!((row_sum - 1.0).abs() < 1e-6);
}
}
#[test]
fn test_predict() {
let x = Mat::from_fn(30, 1, |i, _| (i + 1) as f64);
let y = Col::from_fn(30, |i| 2.0 + 1.5 * (i + 1) as f64);
let model = LmDynamicRegressor::builder().with_intercept(true).build();
let fitted = model.fit(&x, &y).expect("Should fit");
let x_new = Mat::from_fn(5, 1, |i, _| (i + 31) as f64);
let predictions = fitted.predict(&x_new);
assert_eq!(predictions.nrows(), 5);
for i in 0..5 {
assert!(predictions[i] > 0.0);
assert!(predictions[i] < 200.0);
}
}
#[test]
fn test_fit_insufficient_observations() {
let x = Mat::from_fn(2, 1, |i, _| i as f64);
let y = Col::from_fn(2, |i| i as f64);
let model = LmDynamicRegressor::builder().build();
let result = model.fit(&x, &y);
assert!(result.is_err());
match result {
Err(RegressionError::InsufficientObservations { needed, got }) => {
assert_eq!(needed, 3);
assert_eq!(got, 2);
}
_ => panic!("Expected InsufficientObservations error"),
}
}
#[test]
fn test_fit_dimension_mismatch() {
let x = Mat::from_fn(10, 2, |i, j| (i + j) as f64);
let y = Col::from_fn(5, |i| i as f64);
let model = LmDynamicRegressor::builder().build();
let result = model.fit(&x, &y);
assert!(result.is_err());
match result {
Err(RegressionError::DimensionMismatch { x_rows, y_len }) => {
assert_eq!(x_rows, 10);
assert_eq!(y_len, 5);
}
_ => panic!("Expected DimensionMismatch error"),
}
}
#[test]
fn test_fit_zero_features() {
let x = Mat::<f64>::zeros(10, 0); let y = Col::from_fn(10, |i| i as f64);
let model = LmDynamicRegressor::builder().build();
let result = model.fit(&x, &y);
assert!(result.is_err());
}
#[test]
fn test_builder_lowess_span_clamping() {
let model1 = LmDynamicRegressor::builder().lowess_span(0.01).build();
assert_eq!(model1.lowess_span(), Some(0.05));
let model2 = LmDynamicRegressor::builder().lowess_span(1.5).build();
assert_eq!(model2.lowess_span(), Some(1.0));
let model3 = LmDynamicRegressor::builder().lowess_span(0.4).build();
assert_eq!(model3.lowess_span(), Some(0.4));
}
#[test]
fn test_builder_no_smoothing() {
let model = LmDynamicRegressor::builder().no_smoothing().build();
assert_eq!(model.lowess_span(), None);
}
#[test]
fn test_fit_without_smoothing() {
let x = Mat::from_fn(30, 1, |i, _| (i + 1) as f64);
let y = Col::from_fn(30, |i| 2.0 + 1.5 * (i + 1) as f64);
let model = LmDynamicRegressor::builder()
.no_smoothing()
.with_intercept(true)
.build();
let fitted = model.fit(&x, &y).expect("Should fit");
assert!(fitted.smoothed_weights().is_none());
for i in 0..30 {
let sum: f64 = (0..fitted.model_weights().ncols())
.map(|j| fitted.model_weights()[(i, j)])
.sum();
assert!((sum - 1.0).abs() < 1e-6);
}
}
#[test]
fn test_fit_with_laplace_distribution() {
let x = Mat::from_fn(50, 1, |i, _| (i + 1) as f64);
let y = Col::from_fn(50, |i| {
let t = (i + 1) as f64;
2.0 + 1.5 * t + if i % 3 == 0 { 5.0 } else { 0.0 } });
let model = LmDynamicRegressor::builder()
.distribution(AlmDistribution::Laplace)
.with_intercept(true)
.build();
assert_eq!(model.distribution(), AlmDistribution::Laplace);
let result = model.fit(&x, &y);
assert!(result.is_ok());
let fitted = result.unwrap();
assert_eq!(fitted.distribution(), AlmDistribution::Laplace);
}
#[test]
fn test_fit_no_intercept() {
let x = Mat::from_fn(30, 2, |i, j| ((i + 1) * (j + 1)) as f64);
let y = Col::from_fn(30, |i| {
let i_f = (i + 1) as f64;
2.0 * i_f + 3.0 * i_f * 2.0
});
let model = LmDynamicRegressor::builder().with_intercept(false).build();
let fitted = model.fit(&x, &y).expect("Should fit");
assert!(fitted.result().intercept.is_none());
assert_eq!(fitted.dynamic_coefficients().ncols(), 2);
}
#[test]
fn test_dynamic_coefficient_mapping() {
let x = Mat::from_fn(30, 2, |i, j| {
if j == 0 {
i as f64
} else {
(i as f64).sin() * 10.0
}
});
let y = Col::from_fn(30, |i| 1.0 + 2.0 * i as f64 + 3.0 * (i as f64).sin() * 10.0);
let model = LmDynamicRegressor::builder()
.max_models(7) .with_intercept(true)
.build();
let fitted = model.fit(&x, &y).expect("Should fit");
assert_eq!(fitted.dynamic_coefficients().ncols(), 3);
for i in 0..30 {
for j in 0..3 {
assert!(
fitted.dynamic_coefficients()[(i, j)].is_finite(),
"Dynamic coef[{},{}] should be finite",
i,
j
);
}
}
}
#[test]
fn test_information_criterion_aicc_small_sample() {
let log_lik = -10.0;
let k = 5;
let n = 5;
let aicc = InformationCriterion::AICc.compute(log_lik, k, n);
let aic = InformationCriterion::AIC.compute(log_lik, k, n);
assert!((aicc - aic).abs() < 1e-10);
}
#[test]
fn test_information_criterion_bic() {
let log_lik = -15.0;
let k = 4;
let n = 50;
let bic = InformationCriterion::BIC.compute(log_lik, k, n);
let expected = 4.0 * (50.0_f64).ln() + 30.0;
assert!((bic - expected).abs() < 1e-10);
}
#[test]
fn test_coefficient_at() {
let x = Mat::from_fn(30, 2, |i, j| (i + j) as f64 * 0.1);
let y = Col::from_fn(30, |i| i as f64);
let model = LmDynamicRegressor::builder().with_intercept(true).build();
let fitted = model.fit(&x, &y).expect("Should fit");
assert!(fitted.coefficient_at(0, 0).is_some());
assert!(fitted.coefficient_at(29, 2).is_some());
assert!(fitted.coefficient_at(30, 0).is_none()); assert!(fitted.coefficient_at(0, 10).is_none()); }
#[test]
fn test_coefficients_at() {
let x = Mat::from_fn(30, 2, |i, j| (i + j) as f64 * 0.1);
let y = Col::from_fn(30, |i| i as f64);
let model = LmDynamicRegressor::builder().with_intercept(true).build();
let fitted = model.fit(&x, &y).expect("Should fit");
let coefs = fitted.coefficients_at(15);
assert!(coefs.is_some());
let coefs = coefs.unwrap();
assert_eq!(coefs.nrows(), 3);
assert!(fitted.coefficients_at(100).is_none());
}
#[test]
fn test_predict_with_interval_none() {
let x = Mat::from_fn(30, 1, |i, _| (i + 1) as f64);
let y = Col::from_fn(30, |i| 2.0 + 1.5 * (i + 1) as f64);
let model = LmDynamicRegressor::builder().with_intercept(true).build();
let fitted = model.fit(&x, &y).expect("Should fit");
let x_new = Mat::from_fn(5, 1, |i, _| (i + 31) as f64);
let result = fitted.predict_with_interval(&x_new, None, 0.95);
assert_eq!(result.fit.nrows(), 5);
for i in 0..5 {
assert!(result.fit[i].is_finite());
}
}
#[test]
fn test_model_weights_normalization() {
let x = Mat::from_fn(40, 2, |i, j| (i + j) as f64 * 0.1);
let y = Col::from_fn(40, |i| i as f64 + (i as f64).sin());
let model = LmDynamicRegressor::builder()
.max_models(3)
.with_intercept(true)
.build();
let fitted = model.fit(&x, &y).expect("Should fit");
for i in 0..40 {
let row_sum: f64 = (0..fitted.model_weights().ncols())
.map(|j| fitted.model_weights()[(i, j)])
.sum();
assert!(
(row_sum - 1.0).abs() < 1e-6,
"Row {} weights sum to {} instead of 1.0",
i,
row_sum
);
for j in 0..fitted.model_weights().ncols() {
assert!(
fitted.model_weights()[(i, j)] >= 0.0,
"Weight[{},{}] = {} is negative",
i,
j,
fitted.model_weights()[(i, j)]
);
}
}
}
#[test]
fn test_pointwise_ic_dimensions() {
let x = Mat::from_fn(30, 2, |i, j| (i + j) as f64 * 0.1);
let y = Col::from_fn(30, |i| i as f64);
let model = LmDynamicRegressor::builder()
.max_models(3)
.with_intercept(true)
.build();
let fitted = model.fit(&x, &y).expect("Should fit");
let ic = fitted.pointwise_ic();
assert_eq!(ic.nrows(), 30);
assert_eq!(ic.ncols(), fitted.model_specs().len());
}
#[test]
fn test_distribution_accessor() {
let model = LmDynamicRegressor::builder()
.distribution(AlmDistribution::Laplace)
.build();
let x = Mat::from_fn(30, 1, |i, _| i as f64);
let y = Col::from_fn(30, |i| i as f64);
let fitted = model.fit(&x, &y).expect("Should fit");
assert_eq!(fitted.distribution(), AlmDistribution::Laplace);
}
#[test]
fn test_max_models_limit() {
let x = Mat::from_fn(30, 5, |i, j| (i + j) as f64 * 0.1);
let y = Col::from_fn(30, |i| i as f64);
let model = LmDynamicRegressor::builder()
.max_models(5) .build();
let fitted = model.fit(&x, &y).expect("Should fit");
assert!(fitted.model_specs().len() <= 5);
}
#[test]
fn test_predict_with_prediction_interval() {
let x = Mat::from_fn(50, 1, |i, _| (i + 1) as f64);
let y = Col::from_fn(50, |i| {
let t = (i + 1) as f64;
2.0 + 1.5 * t + (t * 0.1).sin() * 0.5
});
let model = LmDynamicRegressor::builder().with_intercept(true).build();
let fitted = model.fit(&x, &y).expect("Should fit");
let x_new = Mat::from_fn(5, 1, |i, _| (i + 51) as f64);
let result = fitted.predict_with_interval(&x_new, Some(IntervalType::Prediction), 0.95);
assert_eq!(result.fit.nrows(), 5);
assert_eq!(result.lower.nrows(), 5);
assert_eq!(result.upper.nrows(), 5);
assert_eq!(result.se.nrows(), 5);
for i in 0..5 {
assert!(result.fit[i].is_finite(), "Prediction should be finite");
assert!(result.lower[i].is_finite(), "Lower bound should be finite");
assert!(result.upper[i].is_finite(), "Upper bound should be finite");
assert!(result.se[i].is_finite(), "SE should be finite");
assert!(
result.lower[i] < result.fit[i],
"Lower bound should be below prediction"
);
assert!(
result.upper[i] > result.fit[i],
"Upper bound should be above prediction"
);
assert!(result.se[i] > 0.0, "SE should be positive");
}
}
#[test]
fn test_predict_with_confidence_interval() {
let x = Mat::from_fn(50, 1, |i, _| (i + 1) as f64);
let y = Col::from_fn(50, |i| {
let t = (i + 1) as f64;
2.0 + 1.5 * t + (t * 0.1).sin() * 0.5
});
let model = LmDynamicRegressor::builder().with_intercept(true).build();
let fitted = model.fit(&x, &y).expect("Should fit");
let x_new = Mat::from_fn(5, 1, |i, _| (i + 51) as f64);
let result = fitted.predict_with_interval(&x_new, Some(IntervalType::Confidence), 0.95);
for i in 0..5 {
assert!(result.lower[i].is_finite());
assert!(result.upper[i].is_finite());
assert!(result.lower[i] < result.fit[i]);
assert!(result.upper[i] > result.fit[i]);
}
}
#[test]
fn test_prediction_interval_wider_than_confidence() {
let x = Mat::from_fn(50, 1, |i, _| (i + 1) as f64);
let y = Col::from_fn(50, |i| {
let t = (i + 1) as f64;
2.0 + 1.5 * t + (t * 0.1).sin() * 0.5
});
let model = LmDynamicRegressor::builder().with_intercept(true).build();
let fitted = model.fit(&x, &y).expect("Should fit");
let x_new = Mat::from_fn(5, 1, |i, _| (i + 51) as f64);
let pred = fitted.predict_with_interval(&x_new, Some(IntervalType::Prediction), 0.95);
let conf = fitted.predict_with_interval(&x_new, Some(IntervalType::Confidence), 0.95);
for i in 0..5 {
assert!(
pred.se[i] > conf.se[i],
"Prediction SE {} should be > Confidence SE {}",
pred.se[i],
conf.se[i]
);
}
}
#[test]
fn test_different_confidence_levels() {
let x = Mat::from_fn(50, 1, |i, _| (i + 1) as f64);
let y = Col::from_fn(50, |i| {
let t = (i + 1) as f64;
2.0 + 1.5 * t + (t * 0.1).sin() * 0.5
});
let model = LmDynamicRegressor::builder().with_intercept(true).build();
let fitted = model.fit(&x, &y).expect("Should fit");
let x_new = Mat::from_fn(3, 1, |i, _| (i + 51) as f64);
let result_90 = fitted.predict_with_interval(&x_new, Some(IntervalType::Prediction), 0.90);
let result_95 = fitted.predict_with_interval(&x_new, Some(IntervalType::Prediction), 0.95);
let result_99 = fitted.predict_with_interval(&x_new, Some(IntervalType::Prediction), 0.99);
for i in 0..3 {
let width_90 = result_90.upper[i] - result_90.lower[i];
let width_95 = result_95.upper[i] - result_95.lower[i];
let width_99 = result_99.upper[i] - result_99.lower[i];
assert!(
width_95 > width_90,
"95% interval should be wider than 90%: {} vs {}",
width_95,
width_90
);
assert!(
width_99 > width_95,
"99% interval should be wider than 95%: {} vs {}",
width_99,
width_95
);
}
}
#[test]
fn test_interval_no_intercept() {
let x = Mat::from_fn(50, 2, |i, j| {
let i_f = (i + 1) as f64;
if j == 0 {
i_f
} else {
(i_f * 0.1).sin() * 10.0
}
});
let y = Col::from_fn(50, |i| {
let i_f = (i + 1) as f64;
2.0 * i_f + 3.0 * (i_f * 0.1).sin() * 10.0
});
let model = LmDynamicRegressor::builder().with_intercept(false).build();
let fitted = model.fit(&x, &y).expect("Should fit");
let x_new = Mat::from_fn(5, 2, |i, j| {
let i_f = (i + 51) as f64;
if j == 0 {
i_f
} else {
(i_f * 0.1).sin() * 10.0
}
});
let result = fitted.predict_with_interval(&x_new, Some(IntervalType::Prediction), 0.95);
for i in 0..5 {
assert!(result.lower[i].is_finite(), "Lower should be finite");
assert!(result.upper[i].is_finite(), "Upper should be finite");
}
}
#[test]
fn test_interval_multiple_features() {
let x = Mat::from_fn(60, 3, |i, j| {
let i_f = (i + 1) as f64;
match j {
0 => i_f,
1 => i_f.powi(2) * 0.01,
_ => (i_f * 0.1).sin(),
}
});
let y = Col::from_fn(60, |i| {
let t = (i + 1) as f64;
1.0 + 0.5 * t + 0.1 * t.powi(2) * 0.01 + 0.3 * (t * 0.1).sin()
});
let model = LmDynamicRegressor::builder()
.with_intercept(true)
.max_models(7)
.build();
let fitted = model.fit(&x, &y).expect("Should fit");
let x_new = Mat::from_fn(5, 3, |i, j| {
let i_f = (i + 61) as f64;
match j {
0 => i_f,
1 => i_f.powi(2) * 0.01,
_ => (i_f * 0.1).sin(),
}
});
let result = fitted.predict_with_interval(&x_new, Some(IntervalType::Prediction), 0.95);
for i in 0..5 {
assert!(result.lower[i].is_finite());
assert!(result.upper[i].is_finite());
assert!(result.se[i] > 0.0);
}
}
#[test]
fn test_interval_compute_xtx_inverse() {
let x = Mat::from_fn(30, 1, |i, _| (i + 1) as f64);
let y = Col::from_fn(30, |i| 2.0 + 1.5 * (i + 1) as f64);
let model = LmDynamicRegressor::builder().with_intercept(true).build();
let fitted = model.fit(&x, &y).expect("Should fit");
let x_new = Mat::from_fn(3, 1, |i, _| (i + 31) as f64);
let result = fitted.predict_with_interval(&x_new, Some(IntervalType::Confidence), 0.95);
for i in 0..3 {
assert!(result.se[i].is_finite() && result.se[i] > 0.0);
}
}
#[test]
fn test_interval_extrapolation_leverage() {
let x = Mat::from_fn(50, 1, |i, _| (i + 1) as f64); let y = Col::from_fn(50, |i| 2.0 + 1.5 * (i + 1) as f64);
let model = LmDynamicRegressor::builder().with_intercept(true).build();
let fitted = model.fit(&x, &y).expect("Should fit");
let x_interp = Mat::from_fn(1, 1, |_, _| 25.0);
let result_interp =
fitted.predict_with_interval(&x_interp, Some(IntervalType::Confidence), 0.95);
let x_extrap = Mat::from_fn(1, 1, |_, _| 100.0);
let result_extrap =
fitted.predict_with_interval(&x_extrap, Some(IntervalType::Confidence), 0.95);
let se_interp = result_interp.se[0];
let se_extrap = result_extrap.se[0];
assert!(
se_extrap > se_interp,
"Extrapolation SE {} should be > interpolation SE {}",
se_extrap,
se_interp
);
}
}