use crate::error::{StatsError, StatsResult};
use crate::panel::fixed_effects::{FEResult, FixedEffectsModel};
use scirs2_core::ndarray::{Array1, Array2, ArrayView1, ArrayView2};
use scirs2_core::numeric::{Float, FromPrimitive};
use scirs2_linalg::{lstsq, solve};
fn matmul<F: Float + std::iter::Sum>(a: &Array2<F>, b: &Array2<F>) -> StatsResult<Array2<F>> {
let (m, k) = a.dim();
let (kb, n) = b.dim();
if k != kb {
return Err(StatsError::DimensionMismatch(format!(
"matmul dim: {} vs {}",
k, kb
)));
}
let mut c = Array2::zeros((m, n));
for i in 0..m {
for j in 0..n {
let mut s = F::zero();
for l in 0..k {
s = s + a[[i, l]] * b[[l, j]];
}
c[[i, j]] = s;
}
}
Ok(c)
}
fn ols<F>(x: &Array2<F>, y: &Array1<F>) -> StatsResult<(Array1<F>, Array1<F>)>
where
F: Float
+ std::iter::Sum
+ std::fmt::Debug
+ std::fmt::Display
+ scirs2_core::numeric::NumAssign
+ scirs2_core::numeric::One
+ scirs2_core::ndarray::ScalarOperand
+ FromPrimitive
+ Send
+ Sync
+ 'static,
{
let n = y.len();
let result = lstsq(&x.view(), &y.view(), None)
.map_err(|e| StatsError::ComputationError(format!("lstsq: {e}")))?;
let c = result.solution;
let mut fitted = Array1::zeros(n);
for i in 0..n {
for j in 0..c.len() {
fitted[i] = fitted[i] + x[[i, j]] * c[j];
}
}
let resid: Array1<F> = y.iter().zip(fitted.iter()).map(|(&y, &f)| y - f).collect();
Ok((c, resid))
}
#[derive(Debug, Clone)]
pub struct REResult<F> {
pub coefficients: Array1<F>,
pub std_errors: Array1<F>,
pub t_stats: Array1<F>,
pub sigma2_epsilon: F,
pub sigma2_u: F,
pub theta: F,
pub r2_overall: F,
pub fitted: Array1<F>,
pub residuals: Array1<F>,
pub blups: Array1<F>,
pub n_obs: usize,
pub n_entities: usize,
}
pub struct RandomEffectsModel;
impl RandomEffectsModel {
pub fn fit<F>(
x: &ArrayView2<F>,
y: &ArrayView1<F>,
entity: &[usize],
time: &[usize],
) -> StatsResult<REResult<F>>
where
F: Float
+ std::iter::Sum
+ std::fmt::Debug
+ std::fmt::Display
+ scirs2_core::numeric::NumAssign
+ scirs2_core::numeric::One
+ scirs2_core::ndarray::ScalarOperand
+ FromPrimitive
+ Send
+ Sync
+ 'static,
{
let n = y.len();
let (nx, k) = x.dim();
if nx != n || entity.len() != n || time.len() != n {
return Err(StatsError::DimensionMismatch(
"x, y, entity, time lengths must match".to_string(),
));
}
let n_entities = entity.iter().copied().max().map(|m| m + 1).unwrap_or(0);
if n_entities < 2 {
return Err(StatsError::InsufficientData(
"Need at least 2 entities for RE estimation".to_string(),
));
}
let fe = FixedEffectsModel::fit(x, y, entity, time, false)?;
let resid_within = &fe.residuals;
let df_within = if n > n_entities + k { n - n_entities - k } else { 1 };
let ss_within: F = resid_within.iter().map(|&r| r * r).sum();
let sigma2_eps = ss_within
/ F::from_usize(df_within).ok_or_else(|| {
StatsError::ComputationError("FromPrimitive failed".to_string())
})?;
let mut e_counts = vec![0usize; n_entities];
for &eid in entity.iter() {
e_counts[eid] += 1;
}
let t_bar = F::from_usize(n).unwrap_or(F::one())
/ F::from_usize(n_entities).unwrap_or(F::one());
let mut y_mean_e = vec![F::zero(); n_entities];
let mut x_mean_e = vec![vec![F::zero(); k]; n_entities];
for (i, &eid) in entity.iter().enumerate() {
y_mean_e[eid] = y_mean_e[eid] + y[i];
for j in 0..k {
x_mean_e[eid][j] = x_mean_e[eid][j] + x[[i, j]];
}
}
for eid in 0..n_entities {
let cnt = F::from_usize(e_counts[eid]).unwrap_or(F::one());
y_mean_e[eid] = y_mean_e[eid] / cnt;
for j in 0..k {
x_mean_e[eid][j] = x_mean_e[eid][j] / cnt;
}
}
let xb_flat: Vec<F> = x_mean_e
.iter()
.flat_map(|row| std::iter::once(F::one()).chain(row.iter().copied()))
.collect();
let xb = Array2::from_shape_vec((n_entities, k + 1), xb_flat)
.map_err(|e| StatsError::ComputationError(format!("reshape: {e}")))?;
let yb = Array1::from(y_mean_e.clone());
let (_coeffs_b, resid_b) = ols(&xb, &yb)?;
let ss_between: F = resid_b.iter().map(|&r| r * r).sum();
let df_between = if n_entities > k + 1 { n_entities - k - 1 } else { 1 };
let sigma2_b = ss_between
/ F::from_usize(df_between).ok_or_else(|| {
StatsError::ComputationError("FromPrimitive failed".to_string())
})?;
let sigma2_u_raw = sigma2_b - sigma2_eps / t_bar;
let sigma2_u = if sigma2_u_raw > F::zero() {
sigma2_u_raw
} else {
F::zero()
};
let theta_vec: Vec<F> = e_counts
.iter()
.map(|&ti| {
let ti_f = F::from_usize(ti).unwrap_or(F::one());
let denom_sq = ti_f * sigma2_u + sigma2_eps;
if denom_sq > F::zero() {
F::one() - sigma2_eps.sqrt() / denom_sq.sqrt()
} else {
F::zero()
}
})
.collect();
let theta_glob = {
let denom_sq = t_bar * sigma2_u + sigma2_eps;
if denom_sq > F::zero() {
F::one() - sigma2_eps.sqrt() / denom_sq.sqrt()
} else {
F::zero()
}
};
let mut yq: Vec<F> = Vec::with_capacity(n);
let mut xq_rows: Vec<Vec<F>> = Vec::with_capacity(n);
for (i, &eid) in entity.iter().enumerate() {
let th = theta_vec[eid];
yq.push(y[i] - th * y_mean_e[eid]);
let mut row = vec![F::one() - th]; for j in 0..k {
row.push(x[[i, j]] - th * x_mean_e[eid][j]);
}
xq_rows.push(row);
}
let yq_arr = Array1::from(yq);
let xq_flat: Vec<F> = xq_rows.iter().flat_map(|r| r.iter().copied()).collect();
let xq = Array2::from_shape_vec((n, k + 1), xq_flat)
.map_err(|e| StatsError::ComputationError(format!("reshape: {e}")))?;
let (coeffs_full, resid) = ols(&xq, &yq_arr)?;
let intercept = coeffs_full[0];
let slopes: Array1<F> = coeffs_full.slice(scirs2_core::ndarray::s![1..]).to_owned();
let mut fitted = Array1::zeros(n);
for i in 0..n {
let mut fi = intercept;
for j in 0..k {
fi = fi + x[[i, j]] * slopes[j];
}
fitted[i] = fi;
}
let orig_resid: Array1<F> = (0..n).map(|i| y[i] - fitted[i]).collect();
let y_bar = y.iter().copied().sum::<F>() / F::from_usize(n).unwrap_or(F::one());
let ss_tot: F = y.iter().map(|&v| (v - y_bar) * (v - y_bar)).sum();
let ss_res: F = orig_resid.iter().map(|&r| r * r).sum();
let r2 = if ss_tot > F::zero() {
F::one() - ss_res / ss_tot
} else {
F::zero()
};
let nf = F::from_usize(n).unwrap_or(F::one());
let df_res_f = F::from_usize(if n > k + 1 { n - k - 1 } else { 1 }).unwrap_or(F::one());
let sigma2_resid = resid.iter().map(|&r| r * r).sum::<F>() / df_res_f;
let xtx = matmul(&xq.t().to_owned(), &xq)?;
let std_errors = xtx_inv_diag_se(&xtx, sigma2_resid)?;
let se_slopes: Array1<F> = std_errors.slice(scirs2_core::ndarray::s![1..]).to_owned();
let t_stats: Array1<F> = slopes
.iter()
.zip(se_slopes.iter())
.map(|(&c, &se)| if se > F::zero() { c / se } else { F::zero() })
.collect();
let mut blup_sum = vec![F::zero(); n_entities];
for (i, &eid) in entity.iter().enumerate() {
blup_sum[eid] = blup_sum[eid] + orig_resid[i];
}
let blups: Array1<F> = (0..n_entities)
.map(|eid| {
if e_counts[eid] == 0 {
return F::zero();
}
let ti = F::from_usize(e_counts[eid]).unwrap_or(F::one());
let e_mean = blup_sum[eid] / ti;
let denom = sigma2_u + sigma2_eps / ti;
if denom > F::zero() {
sigma2_u / denom * e_mean
} else {
F::zero()
}
})
.collect();
Ok(REResult {
coefficients: slopes,
std_errors: se_slopes,
t_stats,
sigma2_epsilon: sigma2_eps,
sigma2_u,
theta: theta_glob,
r2_overall: r2,
fitted,
residuals: orig_resid,
blups,
n_obs: n,
n_entities,
})
}
}
#[derive(Debug, Clone)]
pub struct HausmanTestResult<F> {
pub h_stat: F,
pub df: usize,
pub p_value: F,
pub coeff_diff: Array1<F>,
}
pub struct HausmanTest;
impl HausmanTest {
pub fn test<F>(fe: &FEResult<F>, re: &REResult<F>) -> StatsResult<HausmanTestResult<F>>
where
F: Float
+ std::iter::Sum
+ std::fmt::Debug
+ std::fmt::Display
+ scirs2_core::numeric::NumAssign
+ scirs2_core::numeric::One
+ scirs2_core::ndarray::ScalarOperand
+ FromPrimitive
+ Send
+ Sync
+ 'static,
{
let kfe = fe.coefficients.len();
let kre = re.coefficients.len();
if kfe != kre {
return Err(StatsError::DimensionMismatch(format!(
"FE has {} coefficients but RE has {}",
kfe, kre
)));
}
let k = kfe;
let q: Array1<F> = fe
.coefficients
.iter()
.zip(re.coefficients.iter())
.map(|(&bfe, &bre)| bfe - bre)
.collect();
let mut var_q = Array2::<F>::zeros((k, k));
for j in 0..k {
let v = fe.std_errors[j] * fe.std_errors[j] - re.std_errors[j] * re.std_errors[j];
var_q[[j, j]] = if v > F::zero() {
v
} else {
F::from_f64(1e-10).unwrap_or(F::zero())
};
}
let h_stat: F = (0..k)
.map(|j| {
let vj = var_q[[j, j]];
if vj > F::zero() {
q[j] * q[j] / vj
} else {
F::zero()
}
})
.sum();
let p_value = chi2_upper_tail_pvalue(h_stat, k);
Ok(HausmanTestResult {
h_stat,
df: k,
p_value,
coeff_diff: q,
})
}
}
#[derive(Debug, Clone)]
pub struct LmmConfig {
pub random_slopes: bool,
pub max_iter: usize,
pub tol: f64,
}
impl Default for LmmConfig {
fn default() -> Self {
LmmConfig {
random_slopes: false,
max_iter: 200,
tol: 1e-8,
}
}
}
#[derive(Debug, Clone)]
pub struct LmmResult<F> {
pub fixed_effects: Array1<F>,
pub fixed_se: Array1<F>,
pub random_intercepts: Array1<F>,
pub random_slopes: Option<Array2<F>>,
pub sigma2_resid: F,
pub sigma2_u: F,
pub reml_loglik: F,
pub n_obs: usize,
pub n_entities: usize,
}
pub struct LinearMixedModel {
pub config: LmmConfig,
}
impl LinearMixedModel {
pub fn new() -> Self {
LinearMixedModel {
config: LmmConfig::default(),
}
}
pub fn with_config(config: LmmConfig) -> Self {
LinearMixedModel { config }
}
pub fn fit<F>(
&self,
x: &ArrayView2<F>,
y: &ArrayView1<F>,
entity: &[usize],
) -> StatsResult<LmmResult<F>>
where
F: Float
+ std::iter::Sum
+ std::fmt::Debug
+ std::fmt::Display
+ scirs2_core::numeric::NumAssign
+ scirs2_core::numeric::One
+ scirs2_core::ndarray::ScalarOperand
+ FromPrimitive
+ Send
+ Sync
+ 'static,
{
let n = y.len();
let (nx, k) = x.dim();
if nx != n || entity.len() != n {
return Err(StatsError::DimensionMismatch(
"x, y, entity lengths must match".to_string(),
));
}
let n_entities = entity.iter().copied().max().map(|m| m + 1).unwrap_or(0);
let mut e_counts = vec![0usize; n_entities];
for &eid in entity.iter() {
e_counts[eid] += 1;
}
let mut sigma2_eps = F::one();
let mut sigma2_u = F::from_f64(0.5).unwrap_or(F::one());
let tol = F::from_f64(self.config.tol).unwrap_or(F::from_f64(1e-8).unwrap_or(F::zero()));
let mut coeffs = Array1::zeros(k + 1); let mut blups = Array1::zeros(n_entities);
for _iter in 0..self.config.max_iter {
let mut resid_cur = y.to_owned();
for i in 0..n {
let mut fi = coeffs[0]; for j in 0..k {
fi = fi + x[[i, j]] * coeffs[j + 1];
}
resid_cur[i] = resid_cur[i] - fi;
}
let mut e_res_sum = vec![F::zero(); n_entities];
for (i, &eid) in entity.iter().enumerate() {
e_res_sum[eid] = e_res_sum[eid] + resid_cur[i];
}
let mut new_blups = Array1::zeros(n_entities);
for eid in 0..n_entities {
if e_counts[eid] == 0 {
continue;
}
let ti = F::from_usize(e_counts[eid]).unwrap_or(F::one());
let e_mean = e_res_sum[eid] / ti;
let denom = sigma2_u + sigma2_eps / ti;
new_blups[eid] = if denom > F::zero() {
sigma2_u / denom * e_mean
} else {
F::zero()
};
}
let mut yq_vec: Vec<F> = Vec::with_capacity(n);
let mut xq_rows: Vec<Vec<F>> = Vec::with_capacity(n);
for (i, &eid) in entity.iter().enumerate() {
let ti = F::from_usize(e_counts[eid]).unwrap_or(F::one());
let denom = sigma2_u + sigma2_eps / ti;
let theta_i = if denom > F::zero() {
sigma2_u / denom
} else {
F::zero()
};
yq_vec.push(y[i] - new_blups[eid]);
let mut row = vec![F::one()]; for j in 0..k {
row.push(x[[i, j]]);
}
xq_rows.push(row);
}
let yq = Array1::from(yq_vec);
let xq_flat: Vec<F> = xq_rows.iter().flat_map(|r| r.iter().copied()).collect();
let xq = Array2::from_shape_vec((n, k + 1), xq_flat)
.map_err(|e| StatsError::ComputationError(format!("reshape: {e}")))?;
let (new_coeffs, resid_m) = ols(&xq, &yq)?;
let ss_eps: F = resid_m.iter().map(|&r| r * r).sum();
let df_eps = if n > k + 1 { n - k - 1 } else { 1 };
let new_sigma2_eps =
ss_eps / F::from_usize(df_eps).unwrap_or(F::one());
let ss_u: F = new_blups.iter().map(|&u| u * u).sum();
let df_u = if n_entities > 0 { n_entities } else { 1 };
let new_sigma2_u = ss_u / F::from_usize(df_u).unwrap_or(F::one());
let delta_coeffs: F = new_coeffs
.iter()
.zip(coeffs.iter())
.map(|(&a, &b)| (a - b) * (a - b))
.sum::<F>()
.sqrt();
let delta_sig =
(new_sigma2_eps - sigma2_eps).abs() + (new_sigma2_u - sigma2_u).abs();
coeffs = new_coeffs;
blups = new_blups;
sigma2_eps = if new_sigma2_eps > F::zero() { new_sigma2_eps } else { F::zero() };
sigma2_u = if new_sigma2_u > F::zero() { new_sigma2_u } else { F::zero() };
if delta_coeffs < tol && delta_sig < tol {
break;
}
}
let mut fitted = Array1::zeros(n);
for i in 0..n {
let mut fi = coeffs[0];
for j in 0..k {
fi = fi + x[[i, j]] * coeffs[j + 1];
}
fi = fi + blups[entity[i]];
fitted[i] = fi;
}
let residuals: Array1<F> = (0..n).map(|i| y[i] - fitted[i]).collect();
let nf = F::from_usize(n).unwrap_or(F::one());
let df_f = F::from_usize(if n > k + 1 { n - k - 1 } else { 1 }).unwrap_or(F::one());
let sigma2_final = residuals.iter().map(|&r| r * r).sum::<F>() / df_f;
let xq_for_se_flat: Vec<F> = (0..n)
.flat_map(|i| {
std::iter::once(F::one())
.chain((0..k).map(move |j| x[[i, j]]))
})
.collect();
let xq_for_se = Array2::from_shape_vec((n, k + 1), xq_for_se_flat)
.map_err(|e| StatsError::ComputationError(format!("reshape: {e}")))?;
let xtx = matmul(&xq_for_se.t().to_owned(), &xq_for_se)?;
let se_full = xtx_inv_diag_se(&xtx, sigma2_final)?;
let fixed_se: Array1<F> = se_full.slice(scirs2_core::ndarray::s![1..]).to_owned();
let fixed_coef: Array1<F> = coeffs.slice(scirs2_core::ndarray::s![1..]).to_owned();
let reml_loglik = if sigma2_eps > F::zero() {
let two = F::from_f64(2.0).unwrap_or(F::one());
-nf / two * sigma2_eps.ln() - nf / two
} else {
F::zero()
};
Ok(LmmResult {
fixed_effects: fixed_coef,
fixed_se,
random_intercepts: blups,
random_slopes: None, sigma2_resid: sigma2_eps,
sigma2_u,
reml_loglik,
n_obs: n,
n_entities,
})
}
}
impl Default for LinearMixedModel {
fn default() -> Self {
Self::new()
}
}
pub struct REML;
impl REML {
pub fn estimate<F>(
x: &ArrayView2<F>,
y: &ArrayView1<F>,
entity: &[usize],
) -> StatsResult<(F, F, F)>
where
F: Float
+ std::iter::Sum
+ std::fmt::Debug
+ std::fmt::Display
+ scirs2_core::numeric::NumAssign
+ scirs2_core::numeric::One
+ scirs2_core::ndarray::ScalarOperand
+ FromPrimitive
+ Send
+ Sync
+ 'static,
{
let lmm = LinearMixedModel::new();
let result = lmm.fit(x, y, entity)?;
Ok((result.sigma2_u, result.sigma2_resid, result.reml_loglik))
}
}
fn xtx_inv_diag_se<F>(xtx: &Array2<F>, sigma2: F) -> StatsResult<Array1<F>>
where
F: Float
+ std::iter::Sum
+ std::fmt::Debug
+ std::fmt::Display
+ scirs2_core::numeric::NumAssign
+ scirs2_core::numeric::One
+ scirs2_core::ndarray::ScalarOperand
+ FromPrimitive
+ Send
+ Sync
+ 'static,
{
let k = xtx.nrows();
let mut se = Array1::zeros(k);
for j in 0..k {
let mut ej = Array1::zeros(k);
ej[j] = F::one();
let vj = solve(&xtx.view(), &ej.view())
.map_err(|e| StatsError::ComputationError(format!("solve: {e}")))?;
let var_j = vj[j] * sigma2;
se[j] = if var_j >= F::zero() {
var_j.sqrt()
} else {
F::zero()
};
}
Ok(se)
}
fn chi2_upper_tail_pvalue<F: Float + FromPrimitive>(chi2: F, df: usize) -> F {
if chi2 <= F::zero() {
return F::one();
}
let k = F::from_usize(df).unwrap_or(F::one());
let two = F::from_f64(2.0).unwrap_or(F::one());
let nine = F::from_f64(9.0).unwrap_or(F::one());
let factor = two / (nine * k);
let x = (chi2 / k).cbrt();
let mu = F::one() - factor;
let sigma = factor.sqrt();
let z = (x - mu) / sigma;
p_value_normal_upper(z)
}
fn p_value_normal_upper<F: Float + FromPrimitive>(z: F) -> F {
let p1 = F::from_f64(0.2316419).unwrap_or(F::zero());
let b1 = F::from_f64(0.319381530).unwrap_or(F::zero());
let b2 = F::from_f64(-0.356563782).unwrap_or(F::zero());
let b3 = F::from_f64(1.781477937).unwrap_or(F::zero());
let b4 = F::from_f64(-1.821255978).unwrap_or(F::zero());
let b5 = F::from_f64(1.330274429).unwrap_or(F::zero());
let sqrt2pi_inv = F::from_f64(0.39894228).unwrap_or(F::zero());
let two = F::from_f64(2.0).unwrap_or(F::one());
let abs_z = if z < F::zero() { -z } else { z };
let t = F::one() / (F::one() + p1 * abs_z);
let poly = t * (b1 + t * (b2 + t * (b3 + t * (b4 + t * b5))));
let phi = sqrt2pi_inv * (-(abs_z * abs_z) / two).exp();
let p_upper = (phi * poly).max(F::zero()).min(F::one());
if z >= F::zero() { p_upper } else { F::one() - p_upper }
}
#[cfg(test)]
mod tests {
use super::*;
use scirs2_core::ndarray::{Array1, Array2};
fn make_re_panel() -> (Array2<f64>, Array1<f64>, Vec<usize>, Vec<usize>) {
let n_ent = 10;
let t_per = 5;
let n = n_ent * t_per;
let mut x_vals = Vec::with_capacity(n);
let mut y_vals = Vec::with_capacity(n);
let entity: Vec<usize> = (0..n_ent)
.flat_map(|e| std::iter::repeat(e).take(t_per))
.collect();
let time: Vec<usize> = (0..t_per).cycle().take(n).collect();
let effects = [-2.0, -1.5, -1.0, -0.5, 0.0, 0.5, 1.0, 1.5, 2.0, 2.5_f64];
for (i, &eid) in entity.iter().enumerate() {
let x_v = (i as f64) * 0.3 + 1.0;
let y_v = 2.0 * x_v + effects[eid] + (i as f64) * 0.01;
x_vals.push(x_v);
y_vals.push(y_v);
}
let x = Array2::from_shape_vec((n, 1), x_vals).unwrap();
let y = Array1::from(y_vals);
(x, y, entity, time)
}
#[test]
fn test_re_model_slope() {
let (x, y, entity, time) = make_re_panel();
let result = RandomEffectsModel::fit(&x.view(), &y.view(), &entity, &time)
.expect("RE fit failed");
let slope = result.coefficients[0];
assert!(
(slope - 2.0).abs() < 0.1,
"RE slope: expected ~2.0, got {}",
slope
);
assert!(result.sigma2_u >= 0.0, "sigma2_u should be non-negative");
assert!(result.sigma2_epsilon >= 0.0, "sigma2_eps should be non-negative");
}
#[test]
fn test_hausman_test() {
let (x, y, entity, time) = make_re_panel();
let fe = FixedEffectsModel::fit(&x.view(), &y.view(), &entity, &time, false)
.expect("FE fit");
let re = RandomEffectsModel::fit(&x.view(), &y.view(), &entity, &time)
.expect("RE fit");
let ht = HausmanTest::test(&fe, &re).expect("Hausman test failed");
assert!(ht.h_stat >= 0.0, "H-stat should be non-negative");
assert!(ht.p_value >= 0.0 && ht.p_value <= 1.0, "p-value in [0,1]");
}
#[test]
fn test_lmm_fit() {
let (x, y, entity, time) = make_re_panel();
let lmm = LinearMixedModel::new();
let result = lmm.fit(&x.view(), &y.view(), &entity).expect("LMM fit failed");
let slope = result.fixed_effects[0];
assert!(
(slope - 2.0).abs() < 0.3,
"LMM slope: expected ~2.0, got {}",
slope
);
assert_eq!(result.random_intercepts.len(), 10);
}
#[test]
fn test_reml_estimate() {
let (x, y, entity, _time) = make_re_panel();
let (sigma2_u, sigma2_eps, loglik) =
REML::estimate(&x.view(), &y.view(), &entity).expect("REML failed");
assert!(sigma2_u >= 0.0, "REML sigma2_u must be non-negative");
assert!(sigma2_eps >= 0.0, "REML sigma2_eps must be non-negative");
}
}