use ndarray::{Array1, Array2, Axis};
use ndarray_rand::rand_distr::Uniform;
use rand::distributions::Distribution;
use crate::{GreenersError, OLS, CovarianceType, DataFrame, Formula};
use std::fmt;
#[derive(Debug)]
pub struct QuantileResult {
pub tau: f64,
pub params: Array1<f64>,
pub std_errors: Array1<f64>,
pub t_values: Array1<f64>,
pub p_values: Array1<f64>,
pub r_squared: f64, pub iterations: usize,
}
impl fmt::Display for QuantileResult {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
writeln!(f, "\n{:=^78}", format!(" Quantile Regression (tau={:.2}) ", self.tau))?;
writeln!(f, "{:<20} {:>15.4} (Pseudo)", "R-squared:", self.r_squared)?;
writeln!(f, "{:<20} {:>15}", "Iterations:", self.iterations)?;
writeln!(f, "\n{:-^78}", "")?;
writeln!(f, "{:<10} | {:>10} | {:>10} | {:>8} | {:>8}",
"Variable", "Coef", "Std Err", "t", "P>|t|")?;
writeln!(f, "{:-^78}", "")?;
for i in 0..self.params.len() {
writeln!(f, "x{:<9} | {:>10.4} | {:>10.4} | {:>8.3} | {:>8.3}",
i, self.params[i], self.std_errors[i], self.t_values[i], self.p_values[i]
)?;
}
writeln!(f, "{:=^78}", "")
}
}
pub struct QuantileReg;
impl QuantileReg {
pub fn from_formula(
formula: &Formula,
data: &DataFrame,
tau: f64,
n_boot: usize,
) -> Result<QuantileResult, GreenersError> {
let (y, x) = data.to_design_matrix(formula)?;
Self::fit(&y, &x, tau, n_boot)
}
pub fn fit(
y: &Array1<f64>,
x: &Array2<f64>,
tau: f64,
n_boot: usize
) -> Result<QuantileResult, GreenersError> {
if tau <= 0.0 || tau >= 1.0 {
return Err(GreenersError::OptimizationFailed); }
let (params, iter) = Self::irls_solver(y, x, tau)?;
let std_errors = Self::bootstrap_se(y, x, tau, n_boot)?;
let t_values = ¶ms / &std_errors;
let normal = statrs::distribution::Normal::new(0.0, 1.0).unwrap();
use statrs::distribution::ContinuousCDF;
let p_values = t_values.mapv(|t| 2.0 * (1.0 - normal.cdf(t.abs())));
let loss_model = Self::check_loss(y, &x.dot(¶ms), tau);
let n = y.len();
let ones = Array2::<f64>::ones((n, 1));
let (params_null, _) = Self::irls_solver(y, &ones, tau)?;
let loss_null = Self::check_loss(y, &ones.dot(¶ms_null), tau);
let pseudo_r2 = 1.0 - (loss_model / loss_null);
Ok(QuantileResult {
tau,
params,
std_errors,
t_values,
p_values,
r_squared: pseudo_r2,
iterations: iter,
})
}
fn irls_solver(
y: &Array1<f64>,
x: &Array2<f64>,
tau: f64
) -> Result<(Array1<f64>, usize), GreenersError> {
let n = y.len();
let max_iter = 1000;
let tol = 1e-6;
let epsilon = 1e-6;
let ols = OLS::fit(y, x, CovarianceType::NonRobust)?;
let mut beta = ols.params;
let mut diff = 1.0;
let mut iter = 0;
while diff > tol && iter < max_iter {
let old_beta = beta.clone();
let pred = x.dot(&beta);
let residuals = y - &pred;
let mut weights = Array1::<f64>::zeros(n);
for i in 0..n {
let u = residuals[i];
let abs_u = u.abs().max(epsilon);
let w = if u >= 0.0 {
tau / abs_u
} else {
(1.0 - tau) / abs_u
};
weights[i] = w;
}
let sqrt_w = weights.mapv(f64::sqrt);
let y_w = y * &sqrt_w;
let mut x_w = x.clone();
for (i, mut row) in x_w.axis_iter_mut(Axis(0)).enumerate() {
row *= sqrt_w[i];
}
let wls_res = OLS::fit(&y_w, &x_w, CovarianceType::NonRobust);
if let Ok(res) = wls_res {
beta = res.params;
} else {
return Err(GreenersError::OptimizationFailed);
}
diff = (&beta - &old_beta).mapv(|v| v.abs()).sum();
iter += 1;
}
Ok((beta, iter))
}
fn bootstrap_se(
y: &Array1<f64>,
x: &Array2<f64>,
tau: f64,
n_boot: usize
) -> Result<Array1<f64>, GreenersError> {
let n = y.len();
let k = x.ncols();
let mut boot_betas = Array2::<f64>::zeros((n_boot, k));
let mut rng = rand::thread_rng();
for b in 0..n_boot {
let indices = Array1::from_iter((0..n).map(|_| Uniform::new(0, n).sample(&mut rng)));
let mut y_boot_vec = Vec::with_capacity(n);
let mut x_boot_vec = Vec::with_capacity(n * k);
for &idx in &indices {
y_boot_vec.push(y[idx]);
for val in x.row(idx) {
x_boot_vec.push(*val);
}
}
let y_boot = Array1::from(y_boot_vec);
let x_boot = Array2::from_shape_vec((n, k), x_boot_vec)
.map_err(|_| GreenersError::ShapeMismatch("Bootstrap shape error".into()))?;
if let Ok((beta_b, _)) = Self::irls_solver(&y_boot, &x_boot, tau) {
boot_betas.row_mut(b).assign(&beta_b);
}
}
let mut std_errs = Array1::<f64>::zeros(k);
for j in 0..k {
let col = boot_betas.column(j);
let mean = col.mean().unwrap_or(0.0);
let var = col.mapv(|v| (v - mean).powi(2)).sum() / ((n_boot - 1) as f64);
std_errs[j] = var.sqrt();
}
Ok(std_errs)
}
fn check_loss(y: &Array1<f64>, y_pred: &Array1<f64>, tau: f64) -> f64 {
let res = y - y_pred;
res.mapv(|u| if u >= 0.0 { tau * u } else { (tau - 1.0) * u }).sum()
}
}