use crate::core::{IntervalType, PredictionResult, RegressionResult};
use crate::solvers::traits::{FittedRegressor, RegressionError, Regressor};
use faer::{Col, Mat};
#[derive(Debug, Clone)]
pub struct QuantileRegressor {
tau: f64,
with_intercept: bool,
max_iterations: usize,
tolerance: f64,
epsilon: f64,
weights: Option<Col<f64>>,
}
impl QuantileRegressor {
pub fn new(tau: f64) -> Self {
Self {
tau,
with_intercept: true,
max_iterations: 100,
tolerance: 1e-6,
epsilon: 1e-6,
weights: None,
}
}
pub fn builder() -> QuantileRegressorBuilder {
QuantileRegressorBuilder::default()
}
fn fit_irls(
&self,
x: &Mat<f64>,
y: &Col<f64>,
) -> Result<(Col<f64>, Option<f64>), RegressionError> {
let n = x.nrows();
let p = x.ncols();
let x_aug = if self.with_intercept {
let mut x_new = Mat::zeros(n, p + 1);
for i in 0..n {
x_new[(i, 0)] = 1.0;
for j in 0..p {
x_new[(i, j + 1)] = x[(i, j)];
}
}
x_new
} else {
x.clone()
};
let p_aug = x_aug.ncols();
let mut beta = self.ols_solve(&x_aug, y)?;
for _iter in 0..self.max_iterations {
let mut residuals = Col::zeros(n);
for i in 0..n {
let mut pred = 0.0;
for j in 0..p_aug {
pred += x_aug[(i, j)] * beta[j];
}
residuals[i] = y[i] - pred;
}
let mut w = Col::zeros(n);
for i in 0..n {
let abs_r = residuals[i].abs().max(self.epsilon);
if residuals[i] >= 0.0 {
w[i] = self.tau / abs_r;
} else {
w[i] = (1.0 - self.tau) / abs_r;
}
if let Some(ref obs_weights) = self.weights {
w[i] *= obs_weights[i];
}
}
let beta_new = self.wls_solve(&x_aug, y, &w)?;
let mut max_change = 0.0f64;
for j in 0..p_aug {
let change = (beta_new[j] - beta[j]).abs();
max_change = max_change.max(change);
}
beta = beta_new;
if max_change < self.tolerance {
break;
}
}
if self.with_intercept {
let intercept = beta[0];
let mut coefficients = Col::zeros(p);
for j in 0..p {
coefficients[j] = beta[j + 1];
}
Ok((coefficients, Some(intercept)))
} else {
Ok((beta, None))
}
}
fn ols_solve(&self, x: &Mat<f64>, y: &Col<f64>) -> Result<Col<f64>, RegressionError> {
let n = x.nrows();
let p = x.ncols();
let mut xtx = Mat::zeros(p, p);
for i in 0..p {
for j in 0..p {
let mut sum = 0.0;
for k in 0..n {
sum += x[(k, i)] * x[(k, j)];
}
xtx[(i, j)] = sum;
}
}
let mut xty = Col::zeros(p);
for j in 0..p {
let mut sum = 0.0;
for i in 0..n {
sum += x[(i, j)] * y[i];
}
xty[j] = sum;
}
self.solve_symmetric(&xtx, &xty)
}
fn wls_solve(
&self,
x: &Mat<f64>,
y: &Col<f64>,
w: &Col<f64>,
) -> Result<Col<f64>, RegressionError> {
let n = x.nrows();
let p = x.ncols();
let mut xtwx = Mat::zeros(p, p);
for i in 0..p {
for j in 0..p {
let mut sum = 0.0;
for k in 0..n {
sum += x[(k, i)] * w[k] * x[(k, j)];
}
xtwx[(i, j)] = sum;
}
}
let mut xtwy = Col::zeros(p);
for j in 0..p {
let mut sum = 0.0;
for i in 0..n {
sum += x[(i, j)] * w[i] * y[i];
}
xtwy[j] = sum;
}
self.solve_symmetric(&xtwx, &xtwy)
}
fn solve_symmetric(&self, a: &Mat<f64>, b: &Col<f64>) -> Result<Col<f64>, RegressionError> {
let n = a.nrows();
let mut l: Mat<f64> = Mat::zeros(n, n);
for j in 0..n {
let mut sum = 0.0;
for k in 0..j {
sum += l[(j, k)].powi(2);
}
let diag = a[(j, j)] - sum;
if diag <= 0.0 {
l[(j, j)] = (a[(j, j)] + 1e-10).sqrt();
} else {
l[(j, j)] = diag.sqrt();
}
for i in (j + 1)..n {
let mut sum = 0.0;
for k in 0..j {
sum += l[(i, k)] * l[(j, k)];
}
l[(i, j)] = (a[(i, j)] - sum) / l[(j, j)];
}
}
let mut y_sol = Col::zeros(n);
for i in 0..n {
let mut sum = b[i];
for j in 0..i {
sum -= l[(i, j)] * y_sol[j];
}
y_sol[i] = sum / l[(i, i)];
}
let mut x = Col::zeros(n);
for i in (0..n).rev() {
let mut sum = y_sol[i];
for j in (i + 1)..n {
sum -= l[(j, i)] * x[j];
}
x[i] = sum / l[(i, i)];
}
Ok(x)
}
fn check_loss(&self, residuals: &Col<f64>) -> f64 {
let mut loss = 0.0;
for i in 0..residuals.nrows() {
let r = residuals[i];
if r >= 0.0 {
loss += self.tau * r;
} else {
loss += (self.tau - 1.0) * r;
}
}
loss
}
}
impl Regressor for QuantileRegressor {
type Fitted = FittedQuantile;
fn fit(&self, x: &Mat<f64>, y: &Col<f64>) -> Result<Self::Fitted, RegressionError> {
let n_samples = x.nrows();
let n_features = x.ncols();
if x.nrows() != y.nrows() {
return Err(RegressionError::DimensionMismatch {
x_rows: x.nrows(),
y_len: y.nrows(),
});
}
let min_obs = if self.with_intercept {
n_features + 2
} else {
n_features + 1
};
if n_samples < min_obs {
return Err(RegressionError::InsufficientObservations {
needed: min_obs,
got: n_samples,
});
}
if self.tau <= 0.0 || self.tau >= 1.0 {
return Err(RegressionError::NumericalError(format!(
"tau must be between 0 and 1, got {}",
self.tau
)));
}
if let Some(ref w) = self.weights {
if w.nrows() != n_samples {
return Err(RegressionError::DimensionMismatch {
x_rows: n_samples,
y_len: w.nrows(),
});
}
if w.iter().any(|&wi| wi < 0.0) {
return Err(RegressionError::InvalidWeights);
}
}
let (coefficients, intercept) = self.fit_irls(x, y)?;
let mut fitted_values = Col::zeros(n_samples);
let mut residuals = Col::zeros(n_samples);
for i in 0..n_samples {
let mut pred = intercept.unwrap_or(0.0);
for j in 0..n_features {
pred += x[(i, j)] * coefficients[j];
}
fitted_values[i] = pred;
residuals[i] = y[i] - pred;
}
let full_loss = self.check_loss(&residuals);
let y_quantile = self.compute_quantile(y);
let null_residuals = Col::from_fn(n_samples, |i| y[i] - y_quantile);
let null_loss = self.check_loss(&null_residuals);
let pseudo_r_squared = if null_loss > 0.0 {
1.0 - full_loss / null_loss
} else {
0.0
};
let n_params = if intercept.is_some() {
n_features + 1
} else {
n_features
};
let mut result = RegressionResult::empty(n_features, n_samples);
result.coefficients = coefficients.clone();
result.intercept = intercept;
result.residuals = residuals;
result.fitted_values = fitted_values;
result.n_parameters = n_params;
result.n_observations = n_samples;
result.r_squared = pseudo_r_squared;
result.adj_r_squared = f64::NAN; result.aliased = vec![false; n_features];
Ok(FittedQuantile {
tau: self.tau,
with_intercept: self.with_intercept,
result,
check_loss: full_loss,
})
}
}
impl QuantileRegressor {
fn compute_quantile(&self, y: &Col<f64>) -> f64 {
let mut sorted: Vec<f64> = y.iter().copied().collect();
sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let n = sorted.len();
let index = (self.tau * (n - 1) as f64).floor() as usize;
let frac = self.tau * (n - 1) as f64 - index as f64;
if index + 1 < n {
sorted[index] * (1.0 - frac) + sorted[index + 1] * frac
} else {
sorted[index]
}
}
}
#[derive(Debug, Clone)]
pub struct FittedQuantile {
tau: f64,
#[allow(dead_code)]
with_intercept: bool,
result: RegressionResult,
check_loss: f64,
}
impl FittedQuantile {
pub fn tau(&self) -> f64 {
self.tau
}
pub fn check_loss(&self) -> f64 {
self.check_loss
}
pub fn pseudo_r_squared(&self) -> f64 {
self.result.r_squared
}
}
impl FittedRegressor for FittedQuantile {
fn predict(&self, x: &Mat<f64>) -> Col<f64> {
let n_samples = x.nrows();
let n_features = x.ncols();
let mut predictions = Col::zeros(n_samples);
let intercept = self.result.intercept.unwrap_or(0.0);
for i in 0..n_samples {
let mut pred = intercept;
for j in 0..n_features {
pred += x[(i, j)] * self.result.coefficients[j];
}
predictions[i] = pred;
}
predictions
}
fn result(&self) -> &RegressionResult {
&self.result
}
fn predict_with_interval(
&self,
x: &Mat<f64>,
interval: Option<IntervalType>,
level: f64,
) -> PredictionResult {
let predictions = self.predict(x);
match interval {
None => PredictionResult::point_only(predictions),
Some(_interval_type) => {
let n = x.nrows();
let mut lower = Col::zeros(n);
let mut upper = Col::zeros(n);
let mut se = Col::zeros(n);
for i in 0..n {
lower[i] = f64::NAN;
upper[i] = f64::NAN;
se[i] = f64::NAN;
}
let _ = level;
PredictionResult::with_intervals(predictions, lower, upper, se)
}
}
}
}
#[derive(Debug, Clone)]
pub struct QuantileRegressorBuilder {
tau: f64,
with_intercept: bool,
max_iterations: usize,
tolerance: f64,
epsilon: f64,
weights: Option<Col<f64>>,
}
impl Default for QuantileRegressorBuilder {
fn default() -> Self {
Self {
tau: 0.5,
with_intercept: true,
max_iterations: 100,
tolerance: 1e-6,
epsilon: 1e-6,
weights: None,
}
}
}
impl QuantileRegressorBuilder {
pub fn new() -> Self {
Self::default()
}
pub fn tau(mut self, tau: f64) -> Self {
self.tau = tau;
self
}
pub fn with_intercept(mut self, include: bool) -> Self {
self.with_intercept = include;
self
}
pub fn max_iterations(mut self, max_iter: usize) -> Self {
self.max_iterations = max_iter;
self
}
pub fn tolerance(mut self, tol: f64) -> Self {
self.tolerance = tol;
self
}
pub fn epsilon(mut self, eps: f64) -> Self {
self.epsilon = eps;
self
}
pub fn weights(mut self, w: Col<f64>) -> Self {
self.weights = Some(w);
self
}
pub fn build(self) -> QuantileRegressor {
QuantileRegressor {
tau: self.tau,
with_intercept: self.with_intercept,
max_iterations: self.max_iterations,
tolerance: self.tolerance,
epsilon: self.epsilon,
weights: self.weights,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_median_regression() {
let x = Mat::from_fn(50, 1, |i, _| i as f64);
let y = Col::from_fn(50, |i| 2.0 + 1.5 * i as f64);
let model = QuantileRegressor::builder().tau(0.5).build();
let fitted = model.fit(&x, &y).expect("model should fit");
assert!((fitted.intercept().unwrap() - 2.0).abs() < 0.5);
assert!((fitted.coefficients()[0] - 1.5).abs() < 0.1);
}
#[test]
fn test_different_quantiles() {
let x = Mat::from_fn(100, 1, |i, _| i as f64);
let y = Col::from_fn(100, |i| 1.0 + 2.0 * i as f64);
let model_25 = QuantileRegressor::builder().tau(0.25).build();
let fitted_25 = model_25.fit(&x, &y).expect("model should fit");
let model_75 = QuantileRegressor::builder().tau(0.75).build();
let fitted_75 = model_75.fit(&x, &y).expect("model should fit");
assert!((fitted_25.coefficients()[0] - 2.0).abs() < 0.2);
assert!((fitted_75.coefficients()[0] - 2.0).abs() < 0.2);
}
#[test]
fn test_no_intercept() {
let x = Mat::from_fn(50, 1, |i, _| (i + 1) as f64);
let y = Col::from_fn(50, |i| 2.5 * (i + 1) as f64);
let model = QuantileRegressor::builder()
.tau(0.5)
.with_intercept(false)
.build();
let fitted = model.fit(&x, &y).expect("model should fit");
assert!(fitted.intercept().is_none());
assert!((fitted.coefficients()[0] - 2.5).abs() < 0.1);
}
#[test]
fn test_predict() {
let x = Mat::from_fn(50, 1, |i, _| i as f64);
let y = Col::from_fn(50, |i| 1.0 + 2.0 * i as f64);
let model = QuantileRegressor::builder().tau(0.5).build();
let fitted = model.fit(&x, &y).expect("model should fit");
let x_new = Mat::from_fn(5, 1, |i, _| (i + 100) as f64);
let preds = fitted.predict(&x_new);
for i in 0..5 {
let expected = 1.0 + 2.0 * (i + 100) as f64;
assert!((preds[i] - expected).abs() < 1.0);
}
}
#[test]
fn test_invalid_tau() {
let x = Mat::from_fn(20, 1, |i, _| i as f64);
let y = Col::from_fn(20, |i| i as f64);
let model = QuantileRegressor::builder().tau(1.5).build();
let result = model.fit(&x, &y);
assert!(result.is_err());
}
#[test]
fn test_dimension_mismatch() {
let x = Mat::from_fn(10, 2, |i, j| (i + j) as f64);
let y = Col::from_fn(5, |i| i as f64);
let model = QuantileRegressor::builder().build();
let result = model.fit(&x, &y);
assert!(result.is_err());
}
}