use crate::error::{NumRs2Error, Result};
fn cholesky_lower(a: &[f64], n: usize) -> Result<Vec<f64>> {
let mut l = vec![0.0_f64; n * n];
for i in 0..n {
for j in 0..=i {
let mut s = a[i * n + j];
for p in 0..j {
s -= l[i * n + p] * l[j * n + p];
}
if i == j {
if s <= 0.0 {
return Err(NumRs2Error::ComputationError(format!(
"cholesky: matrix is not positive definite at diagonal ({i},{i})"
)));
}
l[i * n + j] = s.sqrt();
} else {
l[i * n + j] = s / l[j * n + j];
}
}
}
Ok(l)
}
fn forward_sub(l: &[f64], b: &[f64], n: usize) -> Vec<f64> {
let mut x = vec![0.0_f64; n];
for i in 0..n {
let mut s = b[i];
for j in 0..i {
s -= l[i * n + j] * x[j];
}
x[i] = s / l[i * n + i];
}
x
}
fn back_sub(l: &[f64], b: &[f64], n: usize) -> Vec<f64> {
let mut x = vec![0.0_f64; n];
for i in (0..n).rev() {
let mut s = b[i];
for j in (i + 1)..n {
s -= l[j * n + i] * x[j]; }
x[i] = s / l[i * n + i];
}
x
}
fn invert_spd(a: &[f64], n: usize) -> Result<Vec<f64>> {
let l = cholesky_lower(a, n)?;
let mut inv = vec![0.0_f64; n * n];
for j in 0..n {
let mut ej = vec![0.0_f64; n];
ej[j] = 1.0;
let y = forward_sub(&l, &ej, n);
let x = back_sub(&l, &y, n);
for i in 0..n {
inv[i * n + j] = x[i];
}
}
Ok(inv)
}
fn mat_mul(a: &[f64], m: usize, k: usize, b: &[f64], n: usize) -> Vec<f64> {
let mut c = vec![0.0_f64; m * n];
for i in 0..m {
for j in 0..n {
let mut s = 0.0_f64;
for p in 0..k {
s += a[i * k + p] * b[p * n + j];
}
c[i * n + j] = s;
}
}
c
}
fn mat_t_vec(a: &[f64], k: usize, m: usize, b: &[f64]) -> Vec<f64> {
let mut r = vec![0.0_f64; m];
for i in 0..k {
for j in 0..m {
r[j] += a[i * m + j] * b[i];
}
}
r
}
fn ols_fit(x: &[f64], y: &[f64], n: usize, k: usize) -> Result<(Vec<f64>, Vec<f64>, Vec<f64>)> {
if n < k {
return Err(NumRs2Error::ComputationError(format!(
"ols_fit: n={n} < k={k}, not enough observations"
)));
}
let xtx = mat_mul(
&{
let mut xt = vec![0.0_f64; k * n];
for i in 0..n {
for j in 0..k {
xt[j * n + i] = x[i * k + j];
}
}
xt
},
k,
n,
x,
k,
);
let xty = {
let mut v = vec![0.0_f64; k];
for j in 0..k {
for i in 0..n {
v[j] += x[i * k + j] * y[i];
}
}
v
};
let xtx_inv = invert_spd(&xtx, k)?;
let beta = mat_mul(&xtx_inv, k, k, &xty, 1);
let resid: Vec<f64> = (0..n)
.map(|i| {
let fitted: f64 = (0..k).map(|j| x[i * k + j] * beta[j]).sum();
y[i] - fitted
})
.collect();
Ok((beta, resid, xtx_inv))
}
fn erf_approx(x: f64) -> f64 {
let t = 1.0 / (1.0 + 0.3275911 * x.abs());
let y = 1.0
- (0.254_829_592
+ (-0.284_496_736 + (1.421_413_741 + (-1.453_152_027 + 1.061_405_429 * t) * t) * t)
* t)
* t
* (-x * x).exp();
if x >= 0.0 {
y
} else {
-y
}
}
fn normal_cdf(x: f64) -> f64 {
0.5 * (1.0 + erf_approx(x / std::f64::consts::SQRT_2))
}
fn normal_p_value(z: f64) -> f64 {
2.0 * (1.0 - normal_cdf(z.abs()))
}
fn t_p_value(t: f64, df: f64) -> f64 {
if df <= 0.0 {
return 1.0;
}
if df > 200.0 {
return normal_p_value(t);
}
let x = df / (df + t * t);
regularised_incomplete_beta(x, df / 2.0, 0.5).clamp(0.0, 1.0)
}
fn regularised_incomplete_beta(x: f64, a: f64, b: f64) -> f64 {
if x <= 0.0 {
return 0.0;
}
if x >= 1.0 {
return 1.0;
}
if x > (a + 1.0) / (a + b + 2.0) {
return 1.0 - regularised_incomplete_beta(1.0 - x, b, a);
}
let lbeta = lgamma(a) + lgamma(b) - lgamma(a + b);
let front = (x.ln() * a + (1.0 - x).ln() * b - lbeta).exp() / a;
front * betacf(x, a, b)
}
fn betacf(x: f64, a: f64, b: f64) -> f64 {
let max_iter = 200_usize;
let eps = 3.0e-7_f64;
let qab = a + b;
let qap = a + 1.0;
let qam = a - 1.0;
let mut c = 1.0_f64;
let mut d = 1.0 - qab * x / qap;
d = if d.abs() < 1e-30 { 1e-30 } else { 1.0 / d };
let mut h = d;
for m in 1..=max_iter {
let m = m as f64;
let m2 = 2.0 * m;
let mut aa = m * (b - m) * x / ((qam + m2) * (a + m2));
d = 1.0 + aa * d;
if d.abs() < 1e-30 {
d = 1e-30;
}
c = 1.0 + aa / c;
if c.abs() < 1e-30 {
c = 1e-30;
}
d = 1.0 / d;
h *= d * c;
aa = -(a + m) * (qab + m) * x / ((a + m2) * (qap + m2));
d = 1.0 + aa * d;
if d.abs() < 1e-30 {
d = 1e-30;
}
c = 1.0 + aa / c;
if c.abs() < 1e-30 {
c = 1e-30;
}
d = 1.0 / d;
let delta = d * c;
h *= delta;
if (delta - 1.0).abs() < eps {
break;
}
}
h
}
fn lgamma(z: f64) -> f64 {
let g = 7.0_f64;
let c = [
0.999_999_999_999_809_93,
676.520_368_121_885_10,
-1_259.139_216_722_402_9,
771.323_428_777_653_13,
-176.615_029_162_140_6,
12.507_343_278_686_905,
-0.138_571_095_265_720_12,
9.984_369_578_019_571_6e-6,
1.505_632_735_149_311_6e-7,
];
let z = z - 1.0;
let mut x = c[0];
for (i, &ci) in c[1..].iter().enumerate() {
x += ci / (z + i as f64 + 1.0);
}
let t = z + g + 0.5;
0.5 * std::f64::consts::TAU.ln() + (z + 0.5) * t.ln() - t + x.ln()
}
fn t_quantile_two_sided(p: f64, df: f64) -> f64 {
let mut lo = 0.0_f64;
let mut hi = 20.0_f64;
while t_p_value(hi, df) > p {
hi *= 2.0;
}
for _ in 0..60 {
let mid = (lo + hi) / 2.0;
if t_p_value(mid, df) > p {
lo = mid;
} else {
hi = mid;
}
}
(lo + hi) / 2.0
}
fn sigmoid(x: f64) -> f64 {
if x >= 0.0 {
1.0 / (1.0 + (-x).exp())
} else {
let ex = x.exp();
ex / (1.0 + ex)
}
}
#[derive(Debug, Clone)]
pub struct IVEstimate {
pub coef: Vec<f64>,
pub std_errors: Vec<f64>,
pub t_stats: Vec<f64>,
pub p_values: Vec<f64>,
pub first_stage_f: Option<f64>,
pub partial_r_squared: Option<f64>,
pub n_obs: usize,
pub estimator: String,
}
#[derive(Debug, Clone)]
pub struct WeakInstrumentSummary {
pub f_statistics: Vec<f64>,
pub partial_r_squared: Vec<f64>,
pub critical_value_10pct: f64,
pub instruments_weak: Vec<bool>,
}
fn first_stage_diagnostics(
x: &[f64],
x_exog: &[f64],
z_excl: &[f64],
n: usize,
k_exog: usize,
l_excl: usize,
) -> Result<(f64, f64)> {
let l_total = l_excl + k_exog;
let mut z_full = vec![0.0_f64; n * l_total];
for i in 0..n {
for j in 0..l_excl {
z_full[i * l_total + j] = z_excl[i * l_excl + j];
}
for j in 0..k_exog {
z_full[i * l_total + l_excl + j] = x_exog[i * k_exog + j];
}
}
let (_, resid_r, _) = ols_fit(x_exog, x, n, k_exog)?;
let rss_r: f64 = resid_r.iter().map(|&r| r * r).sum();
let (_, resid_u, _) = ols_fit(&z_full, x, n, l_total)?;
let rss_u: f64 = resid_u.iter().map(|&r| r * r).sum();
let df_u = (n - l_total) as f64;
if df_u <= 0.0 {
return Err(NumRs2Error::ComputationError(
"first_stage_diagnostics: insufficient degrees of freedom".into(),
));
}
let f_stat = if rss_u > 1e-15 {
((rss_r - rss_u) / l_excl as f64) / (rss_u / df_u)
} else {
f64::INFINITY
};
let x_mean: f64 = x.iter().sum::<f64>() / n as f64;
let tss: f64 = x.iter().map(|&xi| (xi - x_mean).powi(2)).sum();
let partial_r2 = if tss > 1e-15 {
((rss_r - rss_u) / tss).clamp(0.0, 1.0)
} else {
0.0
};
Ok((f_stat.max(0.0), partial_r2))
}
pub fn two_stage_least_squares(y: &[f64], x: &[f64], z: &[f64]) -> Result<IVEstimate> {
let n = y.len();
if x.len() != n || z.len() != n {
return Err(NumRs2Error::DimensionMismatch(
"two_stage_least_squares: y, x, and z must all have the same length".into(),
));
}
if n < 4 {
return Err(NumRs2Error::ComputationError(
"two_stage_least_squares: need at least 4 observations".into(),
));
}
let l_total = 2_usize; let mut z_full = vec![0.0_f64; n * l_total];
for i in 0..n {
z_full[i * l_total + 0] = z[i];
z_full[i * l_total + 1] = 1.0;
}
let k_total = 2_usize;
let mut x_mat = vec![0.0_f64; n * k_total];
for i in 0..n {
x_mat[i * k_total + 0] = x[i];
x_mat[i * k_total + 1] = 1.0;
}
let (ztx0, _, _) = ols_fit(
&z_full,
&(0..n).map(|i| x_mat[i * k_total]).collect::<Vec<_>>(),
n,
l_total,
)?;
let x_hat: Vec<f64> = (0..n)
.map(|i| z_full[i * l_total] * ztx0[0] + z_full[i * l_total + 1] * ztx0[1])
.collect();
let mut x_hat_mat = vec![0.0_f64; n * k_total];
for i in 0..n {
x_hat_mat[i * k_total + 0] = x_hat[i];
x_hat_mat[i * k_total + 1] = 1.0;
}
let mut xht_x = vec![0.0_f64; k_total * k_total];
for r in 0..k_total {
for c in 0..k_total {
for i in 0..n {
xht_x[r * k_total + c] += x_hat_mat[i * k_total + r] * x_mat[i * k_total + c];
}
}
}
let xhtx_inv = invert_spd(&xht_x, k_total)?;
let xht_y: Vec<f64> = (0..k_total)
.map(|j| (0..n).map(|i| x_hat_mat[i * k_total + j] * y[i]).sum())
.collect();
let beta = mat_mul(&xhtx_inv, k_total, k_total, &xht_y, 1);
let residuals: Vec<f64> = (0..n)
.map(|i| {
let fitted: f64 = (0..k_total).map(|j| x_mat[i * k_total + j] * beta[j]).sum();
y[i] - fitted
})
.collect();
let df = n - k_total;
if df == 0 {
return Err(NumRs2Error::ComputationError(
"two_stage_least_squares: no degrees of freedom remaining".into(),
));
}
let scale = n as f64 / df as f64;
let mut meat = vec![0.0_f64; k_total * k_total];
for i in 0..n {
let ui2 = residuals[i] * residuals[i];
for r in 0..k_total {
for c in 0..k_total {
meat[r * k_total + c] += ui2 * x_hat_mat[i * k_total + r] * x_mat[i * k_total + c];
}
}
}
let vcov_inner = mat_mul(&xhtx_inv, k_total, k_total, &meat, k_total);
let vcov = mat_mul(&vcov_inner, k_total, k_total, &xhtx_inv, k_total);
let std_errors: Vec<f64> = (0..k_total)
.map(|j| (scale * vcov[j * k_total + j]).max(0.0).sqrt())
.collect();
let t_stats: Vec<f64> = beta
.iter()
.zip(std_errors.iter())
.map(|(&b, &se)| if se > 0.0 { b / se } else { 0.0 })
.collect();
let p_values: Vec<f64> = t_stats.iter().map(|&t| t_p_value(t, df as f64)).collect();
let x_exog = vec![1.0_f64; n]; let z_excl: Vec<f64> = z.to_vec();
let z_excl_mat = z_excl.clone(); let (fs_f, pr2) = first_stage_diagnostics(x, &x_exog, &z_excl_mat, n, 1, 1)?;
Ok(IVEstimate {
coef: beta,
std_errors,
t_stats,
p_values,
first_stage_f: Some(fs_f),
partial_r_squared: Some(pr2),
n_obs: n,
estimator: "2SLS".into(),
})
}
pub fn weak_instrument_test(y: &[f64], x: &[f64], z: &[f64]) -> Result<WeakInstrumentSummary> {
let n = y.len();
if x.len() != n || z.len() != n {
return Err(NumRs2Error::DimensionMismatch(
"weak_instrument_test: y, x, and z must all have the same length".into(),
));
}
if n < 4 {
return Err(NumRs2Error::ComputationError(
"weak_instrument_test: need at least 4 observations".into(),
));
}
let x_exog = vec![1.0_f64; n]; let (f_stat, pr2) = first_stage_diagnostics(x, &x_exog, z, n, 1, 1)?;
let critical_value_10pct = 10.0_f64;
Ok(WeakInstrumentSummary {
f_statistics: vec![f_stat],
partial_r_squared: vec![pr2],
critical_value_10pct,
instruments_weak: vec![f_stat < critical_value_10pct],
})
}
#[derive(Debug, Clone)]
pub struct DiDEstimate {
pub att: f64,
pub std_error: f64,
pub t_stat: f64,
pub p_value: f64,
pub conf_interval: [f64; 2],
pub parallel_trends_p: Option<f64>,
pub n_treated: usize,
pub n_control: usize,
}
pub fn difference_in_differences(
outcome_pre_treat: &[f64],
outcome_post_treat: &[f64],
outcome_pre_control: &[f64],
outcome_post_control: &[f64],
) -> Result<DiDEstimate> {
let n_treat = outcome_pre_treat.len();
let n_ctrl = outcome_pre_control.len();
if n_treat == 0 || outcome_post_treat.is_empty() {
return Err(NumRs2Error::ComputationError(
"difference_in_differences: treated group must have at least one observation".into(),
));
}
if n_ctrl == 0 || outcome_post_control.is_empty() {
return Err(NumRs2Error::ComputationError(
"difference_in_differences: control group must have at least one observation".into(),
));
}
if outcome_pre_treat.len() != outcome_post_treat.len() {
return Err(NumRs2Error::DimensionMismatch(
"difference_in_differences: pre and post treated slices must have equal length".into(),
));
}
if outcome_pre_control.len() != outcome_post_control.len() {
return Err(NumRs2Error::DimensionMismatch(
"difference_in_differences: pre and post control slices must have equal length".into(),
));
}
let n_units = n_treat + n_ctrl;
let n_periods = 2_usize;
let n_obs = n_units * n_periods;
let k = n_units + 2;
let mut x_mat = vec![0.0_f64; n_obs * k];
let mut y_vec = vec![0.0_f64; n_obs];
for (unit_idx, is_treated) in (0..n_units).map(|i| (i, i < n_treat)) {
for period in 0..n_periods {
let row = unit_idx * n_periods + period;
y_vec[row] = if is_treated {
if period == 0 {
outcome_pre_treat[unit_idx]
} else {
outcome_post_treat[unit_idx]
}
} else {
let ctrl_idx = unit_idx - n_treat;
if period == 0 {
outcome_pre_control[ctrl_idx]
} else {
outcome_post_control[ctrl_idx]
}
};
x_mat[row * k + 0] = 1.0;
if unit_idx > 0 {
x_mat[row * k + unit_idx] = 1.0;
}
if period == 1 {
x_mat[row * k + n_units] = 1.0;
}
if is_treated && period == 1 {
x_mat[row * k + k - 1] = 1.0;
}
}
}
let (beta, resid, xtx_inv) = ols_fit(&x_mat, &y_vec, n_obs, k)?;
let att = beta[k - 1];
let df = (n_obs - k) as f64;
let s2 = resid.iter().map(|&r| r * r).sum::<f64>() / df.max(1.0);
let var_att = xtx_inv[(k - 1) * k + (k - 1)] * s2;
let se = var_att.max(0.0).sqrt();
let t_stat = if se > 1e-15 { att / se } else { 0.0 };
let p_value = t_p_value(t_stat, df);
let t_crit = t_quantile_two_sided(0.05, df);
let conf_interval = [att - t_crit * se, att + t_crit * se];
Ok(DiDEstimate {
att,
std_error: se,
t_stat,
p_value,
conf_interval,
parallel_trends_p: None, n_treated: n_treat,
n_control: n_ctrl,
})
}
#[derive(Debug, Clone)]
pub struct PropensityScores {
pub scores: Vec<f64>,
pub coefficients: Vec<f64>,
}
pub fn estimate_propensity_scores(
covariates: &[Vec<f64>],
treatment: &[bool],
) -> Result<PropensityScores> {
let n = covariates.len();
if treatment.len() != n {
return Err(NumRs2Error::DimensionMismatch(
"estimate_propensity_scores: covariates and treatment must have the same length".into(),
));
}
if n == 0 {
return Err(NumRs2Error::ComputationError(
"estimate_propensity_scores: need at least one observation".into(),
));
}
let k_cov = covariates[0].len();
for (idx, row) in covariates.iter().enumerate() {
if row.len() != k_cov {
return Err(NumRs2Error::DimensionMismatch(format!(
"estimate_propensity_scores: covariate row {idx} has length {} but expected {k_cov}",
row.len()
)));
}
}
let k1 = k_cov + 1;
let mut x_mat = vec![0.0_f64; n * k1];
let mut w_vec = vec![0.0_f64; n];
for i in 0..n {
x_mat[i * k1 + 0] = 1.0; for j in 0..k_cov {
x_mat[i * k1 + j + 1] = covariates[i][j];
}
w_vec[i] = if treatment[i] { 1.0 } else { 0.0 };
}
let lambda = 1e-4_f64;
let max_iter = 200_usize;
let tol = 1e-8_f64;
let mut beta = vec![0.0_f64; k1];
for _iter in 0..max_iter {
let mu: Vec<f64> = (0..n)
.map(|i| {
let eta: f64 = (0..k1).map(|j| x_mat[i * k1 + j] * beta[j]).sum();
sigmoid(eta)
})
.collect();
let v: Vec<f64> = mu.iter().map(|&m| (m * (1.0 - m)).max(1e-8)).collect();
let mut grad = vec![0.0_f64; k1];
for j in 0..k1 {
for i in 0..n {
grad[j] += x_mat[i * k1 + j] * (w_vec[i] - mu[i]);
}
if j > 0 {
grad[j] -= lambda * beta[j];
}
}
let mut hess = vec![0.0_f64; k1 * k1];
for i in 0..n {
for r in 0..k1 {
for c in 0..k1 {
hess[r * k1 + c] += v[i] * x_mat[i * k1 + r] * x_mat[i * k1 + c];
}
}
}
for j in 1..k1 {
hess[j * k1 + j] += lambda;
}
let h_inv = invert_spd(&hess, k1)?;
let delta = mat_mul(&h_inv, k1, k1, &grad, 1);
let step_norm: f64 = delta.iter().map(|&d| d * d).sum::<f64>().sqrt();
for j in 0..k1 {
beta[j] += delta[j];
}
if step_norm < tol {
break;
}
}
let scores: Vec<f64> = (0..n)
.map(|i| {
let eta: f64 = (0..k1).map(|j| x_mat[i * k1 + j] * beta[j]).sum();
sigmoid(eta)
})
.collect();
Ok(PropensityScores {
scores,
coefficients: beta,
})
}
pub fn ipw_ate(outcome: &[f64], treatment: &[bool], propensity_scores: &[f64]) -> Result<f64> {
let n = outcome.len();
if treatment.len() != n || propensity_scores.len() != n {
return Err(NumRs2Error::DimensionMismatch(
"ipw_ate: outcome, treatment, and propensity_scores must all have the same length"
.into(),
));
}
if n == 0 {
return Err(NumRs2Error::ComputationError(
"ipw_ate: need at least one observation".into(),
));
}
let eps = 0.01_f64;
let ate: f64 = (0..n)
.map(|i| {
let wi = if treatment[i] { 1.0_f64 } else { 0.0 };
let yi = outcome[i];
let pi = propensity_scores[i].clamp(eps, 1.0 - eps);
wi * yi / pi - (1.0 - wi) * yi / (1.0 - pi)
})
.sum::<f64>()
/ n as f64;
Ok(ate)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_did_known_treatment_effect() {
let n = 5_usize;
let trend = 2.0_f64;
let true_att = 5.0_f64;
let pre_t: Vec<f64> = (0..n).map(|i| 10.0 + i as f64 * 0.5).collect();
let post_t: Vec<f64> = pre_t.iter().map(|&v| v + trend + true_att).collect();
let pre_c: Vec<f64> = (0..n).map(|i| 10.0 + i as f64 * 0.5).collect();
let post_c: Vec<f64> = pre_c.iter().map(|&v| v + trend).collect();
let result = difference_in_differences(&pre_t, &post_t, &pre_c, &post_c).unwrap();
assert!(
(result.att - true_att).abs() < 0.5,
"Expected ATT ≈ {true_att}, got {:.4}",
result.att
);
assert_eq!(result.n_treated, n);
assert_eq!(result.n_control, n);
}
#[test]
fn test_did_zero_treatment_effect() {
let pre_t = vec![10.0, 11.0, 12.0, 13.0];
let post_t = vec![14.0, 15.0, 16.0, 17.0]; let pre_c = vec![10.0, 11.0, 12.0, 13.0];
let post_c = vec![14.0, 15.0, 16.0, 17.0];
let result = difference_in_differences(&pre_t, &post_t, &pre_c, &post_c).unwrap();
assert!(
result.att.abs() < 1e-6,
"Expected ATT ≈ 0, got {:.8}",
result.att
);
assert!(
result.p_value > 0.05,
"Expected non-significant result, got p = {:.4}",
result.p_value
);
}
#[test]
fn test_did_dimension_mismatch() {
let err = difference_in_differences(
&[1.0, 2.0],
&[3.0], &[1.0, 2.0],
&[3.0, 4.0],
);
assert!(err.is_err(), "Expected an error for mismatched lengths");
}
#[test]
fn test_did_negative_treatment_effect() {
let n = 6_usize;
let true_att = -3.0_f64;
let pre_t: Vec<f64> = (0..n).map(|i| 20.0 + i as f64).collect();
let post_t: Vec<f64> = pre_t.iter().map(|&v| v + 1.0 + true_att).collect();
let pre_c: Vec<f64> = (0..n).map(|i| 20.0 + i as f64).collect();
let post_c: Vec<f64> = pre_c.iter().map(|&v| v + 1.0).collect();
let result = difference_in_differences(&pre_t, &post_t, &pre_c, &post_c).unwrap();
assert!(
(result.att - true_att).abs() < 0.5,
"Expected ATT ≈ {true_att}, got {:.4}",
result.att
);
}
#[test]
fn test_propensity_score_monotonicity() {
let covariates: Vec<Vec<f64>> = vec![
vec![-2.0],
vec![-1.5],
vec![-1.0],
vec![-0.5],
vec![0.5],
vec![1.0],
vec![1.5],
vec![2.0],
];
let treatment = vec![false, false, false, false, true, true, true, true];
let ps = estimate_propensity_scores(&covariates, &treatment).unwrap();
assert_eq!(ps.scores.len(), 8);
for &s in &ps.scores {
assert!(s > 0.0 && s < 1.0, "score {s} outside (0, 1)");
}
let high_mean: f64 = ps.scores[4..].iter().sum::<f64>() / 4.0;
let low_mean: f64 = ps.scores[..4].iter().sum::<f64>() / 4.0;
assert!(
high_mean > low_mean,
"Expected higher PS for higher-X units: high={high_mean:.3}, low={low_mean:.3}"
);
}
#[test]
fn test_propensity_score_dimension_mismatch() {
let covariates = vec![vec![1.0], vec![2.0]];
let treatment = vec![false]; assert!(estimate_propensity_scores(&covariates, &treatment).is_err());
}
#[test]
fn test_ipw_ate_known_effect() {
let n = 20_usize;
let true_ate = 2.0_f64;
let covariates: Vec<Vec<f64>> = (0..n).map(|i| vec![i as f64 / n as f64]).collect();
let treatment: Vec<bool> = (0..n).map(|i| i >= n / 2).collect();
let outcome: Vec<f64> = (0..n)
.map(|i| if treatment[i] { 1.0 + true_ate } else { 1.0 })
.collect();
let ps = estimate_propensity_scores(&covariates, &treatment).unwrap();
let ate = ipw_ate(&outcome, &treatment, &ps.scores).unwrap();
assert!(
(ate - true_ate).abs() < 1.0,
"Expected IPW ATE ≈ {true_ate}, got {ate:.4}"
);
}
#[test]
fn test_ipw_dimension_mismatch() {
let outcome = vec![1.0, 2.0, 3.0];
let treatment = vec![false, true]; let ps = vec![0.3, 0.6, 0.5];
assert!(ipw_ate(&outcome, &treatment, &ps).is_err());
}
#[test]
fn test_two_stage_least_squares_recovery() {
let n = 100_usize;
let z: Vec<f64> = (0..n)
.map(|i| (i as f64) / (n as f64) * 4.0 - 2.0)
.collect();
let x: Vec<f64> = z.iter().map(|&zi| 0.8 * zi).collect();
let y: Vec<f64> = x.iter().map(|&xi| 2.0 * xi + 0.5).collect();
let result = two_stage_least_squares(&y, &x, &z).unwrap();
let beta_x = result.coef[0];
assert!(
(beta_x - 2.0).abs() < 0.1,
"Expected β_x ≈ 2.0, got {beta_x:.6}"
);
assert_eq!(result.n_obs, n);
assert_eq!(result.estimator, "2SLS");
assert!(
result.first_stage_f.unwrap() > 10.0,
"Expected strong first-stage F"
);
}
#[test]
fn test_weak_instrument_strong_z() {
let n = 60_usize;
let z: Vec<f64> = (0..n).map(|i| i as f64).collect();
let x: Vec<f64> = z.iter().map(|&zi| 2.0 * zi + 1.0).collect();
let y: Vec<f64> = x.iter().map(|&xi| xi + 0.1).collect();
let summary = weak_instrument_test(&y, &x, &z).unwrap();
assert_eq!(summary.f_statistics.len(), 1);
assert!(
summary.f_statistics[0] > 100.0,
"Expected very large F for strong instrument, got {:.2}",
summary.f_statistics[0]
);
assert!(
!summary.instruments_weak[0],
"Should not be flagged as weak"
);
}
}