use scirs2_core::ndarray::{Array1, Array2, ArrayView1, ArrayView2};
use crate::error::{StatsError, StatsResult};
#[derive(Debug, Clone)]
pub struct EstimationResult {
pub estimate: f64,
pub std_error: f64,
pub conf_interval: [f64; 2],
pub p_value: f64,
pub estimand: String,
pub estimator: String,
pub n_obs: usize,
pub diagnostics: std::collections::HashMap<String, f64>,
}
impl EstimationResult {
fn new(
estimate: f64,
std_error: f64,
estimand: impl Into<String>,
estimator: impl Into<String>,
n_obs: usize,
) -> Self {
let z = 1.959_964; let margin = z * std_error;
let p = two_sided_p(estimate / std_error.max(f64::EPSILON));
Self {
estimate,
std_error,
conf_interval: [estimate - margin, estimate + margin],
p_value: p,
estimand: estimand.into(),
estimator: estimator.into(),
n_obs,
diagnostics: std::collections::HashMap::new(),
}
}
fn with_diagnostic(mut self, key: impl Into<String>, val: f64) -> Self {
self.diagnostics.insert(key.into(), val);
self
}
}
fn two_sided_p(z: f64) -> f64 {
2.0 * (1.0 - normal_cdf(z.abs()))
}
fn normal_cdf(x: f64) -> f64 {
0.5 * (1.0 + erf(x / std::f64::consts::SQRT_2))
}
fn erf(x: f64) -> f64 {
let t = 1.0 / (1.0 + 0.3275911 * x.abs());
let poly = t
* (0.254_829_592
+ t * (-0.284_496_736
+ t * (1.421_413_741 + t * (-1.453_152_027 + t * 1.061_405_429))));
let sign = if x >= 0.0 { 1.0 } else { -1.0 };
sign * (1.0 - poly * (-x * x).exp())
}
fn logistic_regression(
x: ArrayView2<f64>,
y: ArrayView1<f64>,
max_iter: usize,
lr: f64,
tol: f64,
) -> StatsResult<Array1<f64>> {
let (n, p) = x.dim();
let mut coef = Array1::<f64>::zeros(p + 1);
for _iter in 0..max_iter {
let mut grad = Array1::<f64>::zeros(p + 1);
let mut loss = 0.0_f64;
for i in 0..n {
let xi = x.row(i);
let linear: f64 = coef[0]
+ xi.iter()
.zip(coef.iter().skip(1))
.map(|(a, b)| a * b)
.sum::<f64>();
let prob = 1.0 / (1.0 + (-linear).exp());
let err = prob - y[i];
loss += -(y[i] * prob.ln() + (1.0 - y[i]) * (1.0 - prob).ln());
grad[0] += err;
for j in 0..p {
grad[j + 1] += err * xi[j];
}
}
loss /= n as f64;
for j in 0..=(p) {
coef[j] -= lr * grad[j] / n as f64;
}
if grad.iter().map(|g| g * g).sum::<f64>().sqrt() / (n as f64) < tol {
break;
}
let _ = loss;
}
Ok(coef)
}
fn predict_proba(x: ArrayView2<f64>, coef: &Array1<f64>) -> Array1<f64> {
let (n, p) = x.dim();
let mut probs = Array1::<f64>::zeros(n);
for i in 0..n {
let xi = x.row(i);
let linear: f64 = coef[0]
+ xi.iter()
.zip(coef.iter().skip(1))
.map(|(a, b)| a * b)
.sum::<f64>();
probs[i] = 1.0 / (1.0 + (-linear).exp());
}
probs
}
pub struct IPWEstimator {
pub stabilised: bool,
pub max_iter: usize,
}
impl Default for IPWEstimator {
fn default() -> Self {
Self {
stabilised: true,
max_iter: 500,
}
}
}
impl IPWEstimator {
pub fn estimate(
&self,
covariates: ArrayView2<f64>,
treatment: ArrayView1<f64>,
outcome: ArrayView1<f64>,
) -> StatsResult<EstimationResult> {
let n = outcome.len();
if covariates.nrows() != n || treatment.len() != n {
return Err(StatsError::DimensionMismatch(
"Covariate, treatment, and outcome dimensions must match".to_owned(),
));
}
let coef = logistic_regression(covariates, treatment, self.max_iter, 0.1, 1e-6)?;
let ps = predict_proba(covariates, &coef);
let eps = 1e-6_f64;
let ps_clip: Array1<f64> = ps.mapv(|p| p.clamp(eps, 1.0 - eps));
let mut ate_sum = 0.0_f64;
let mut w1_sum = 0.0_f64;
let mut w0_sum = 0.0_f64;
for i in 0..n {
let ti = treatment[i];
let yi = outcome[i];
let ei = ps_clip[i];
if self.stabilised {
w1_sum += ti / ei;
w0_sum += (1.0 - ti) / (1.0 - ei);
ate_sum += ti * yi / ei - (1.0 - ti) * yi / (1.0 - ei);
} else {
ate_sum += ti * yi / ei - (1.0 - ti) * yi / (1.0 - ei);
}
}
let ate = if self.stabilised {
let mu1 = ate_sum / n as f64 + (w0_sum / n as f64) * 0.0; let mu1_h: f64 = (0..n)
.map(|i| treatment[i] * outcome[i] / ps_clip[i])
.sum::<f64>()
/ (0..n)
.map(|i| treatment[i] / ps_clip[i])
.sum::<f64>()
.max(f64::EPSILON);
let mu0_h: f64 = (0..n)
.map(|i| (1.0 - treatment[i]) * outcome[i] / (1.0 - ps_clip[i]))
.sum::<f64>()
/ (0..n)
.map(|i| (1.0 - treatment[i]) / (1.0 - ps_clip[i]))
.sum::<f64>()
.max(f64::EPSILON);
let _ = mu1;
mu1_h - mu0_h
} else {
ate_sum / n as f64
};
let se = bootstrap_se_ipw(&ps_clip, &treatment, &outcome, self.stabilised, 50);
Ok(
EstimationResult::new(ate, se, "ATE", "IPW", n).with_diagnostic(
"mean_ps_treated",
ps_clip
.iter()
.zip(treatment.iter())
.filter(|(_, &t)| t > 0.5)
.map(|(p, _)| p)
.sum::<f64>()
/ treatment.iter().filter(|&&t| t > 0.5).count().max(1) as f64,
),
)
}
}
fn bootstrap_se_ipw(
ps: &Array1<f64>,
treatment: &ArrayView1<f64>,
outcome: &ArrayView1<f64>,
stabilised: bool,
n_boot: usize,
) -> f64 {
let n = ps.len();
let mut estimates = Vec::with_capacity(n_boot);
let mut rng_state: u64 = 12345;
for _ in 0..n_boot {
let mut sample_ate = 0.0_f64;
let mut w1 = 0.0_f64;
let mut w0 = 0.0_f64;
for _ in 0..n {
rng_state = rng_state
.wrapping_mul(6364136223846793005)
.wrapping_add(1442695040888963407);
let idx = (rng_state >> 33) as usize % n;
let ti = treatment[idx];
let yi = outcome[idx];
let ei = ps[idx];
w1 += ti / ei;
w0 += (1.0 - ti) / (1.0 - ei);
sample_ate += ti * yi / ei - (1.0 - ti) * yi / (1.0 - ei);
}
let ate = if stabilised {
let mu1 = (0..n)
.map(|i| treatment[i] * outcome[i] / ps[i])
.sum::<f64>()
/ w1.max(f64::EPSILON);
let mu0 = (0..n)
.map(|i| (1.0 - treatment[i]) * outcome[i] / (1.0 - ps[i]))
.sum::<f64>()
/ w0.max(f64::EPSILON);
mu1 - mu0
} else {
sample_ate / n as f64
};
estimates.push(ate);
}
let mean = estimates.iter().sum::<f64>() / n_boot as f64;
let var = estimates.iter().map(|&e| (e - mean).powi(2)).sum::<f64>() / (n_boot - 1) as f64;
var.sqrt()
}
pub struct DoublyRobustEstimator {
pub ps_max_iter: usize,
pub outcome_poly_degree: usize,
}
impl Default for DoublyRobustEstimator {
fn default() -> Self {
Self {
ps_max_iter: 500,
outcome_poly_degree: 1,
}
}
}
impl DoublyRobustEstimator {
pub fn estimate(
&self,
covariates: ArrayView2<f64>,
treatment: ArrayView1<f64>,
outcome: ArrayView1<f64>,
) -> StatsResult<EstimationResult> {
let n = outcome.len();
if covariates.nrows() != n || treatment.len() != n {
return Err(StatsError::DimensionMismatch(
"Dimensions must match".to_owned(),
));
}
let coef_ps = logistic_regression(covariates, treatment, self.ps_max_iter, 0.1, 1e-6)?;
let ps = predict_proba(covariates, &coef_ps).mapv(|p| p.clamp(1e-6, 1.0 - 1e-6));
let (mu1, mu0) = outcome_regression_linear(covariates, treatment, outcome)?;
let mut aipw_scores = Array1::<f64>::zeros(n);
for i in 0..n {
let ti = treatment[i];
let yi = outcome[i];
let ei = ps[i];
aipw_scores[i] =
mu1[i] - mu0[i] + ti * (yi - mu1[i]) / ei - (1.0 - ti) * (yi - mu0[i]) / (1.0 - ei);
}
let ate = aipw_scores.mean().unwrap_or(0.0);
let variance =
aipw_scores.iter().map(|&s| (s - ate).powi(2)).sum::<f64>() / ((n - 1) as f64);
let se = (variance / n as f64).sqrt();
Ok(EstimationResult::new(
ate,
se,
"ATE",
"AIPW (Doubly-Robust)",
n,
))
}
}
fn outcome_regression_linear(
covariates: ArrayView2<f64>,
treatment: ArrayView1<f64>,
outcome: ArrayView1<f64>,
) -> StatsResult<(Array1<f64>, Array1<f64>)> {
let n = covariates.nrows();
let p = covariates.ncols();
let mut design = Array2::<f64>::zeros((n, p + 2));
for i in 0..n {
design[[i, 0]] = 1.0;
design[[i, 1]] = treatment[i];
for j in 0..p {
design[[i, j + 2]] = covariates[[i, j]];
}
}
let coef = ols_estimate(design.view(), outcome)?;
let mut mu1 = Array1::<f64>::zeros(n);
let mut mu0 = Array1::<f64>::zeros(n);
for i in 0..n {
let mut pred1 = coef[0] + coef[1]; let mut pred0 = coef[0]; for j in 0..p {
pred1 += coef[j + 2] * covariates[[i, j]];
pred0 += coef[j + 2] * covariates[[i, j]];
}
mu1[i] = pred1;
mu0[i] = pred0;
}
Ok((mu1, mu0))
}
fn ols_estimate(x: ArrayView2<f64>, y: ArrayView1<f64>) -> StatsResult<Array1<f64>> {
let (n, p) = x.dim();
let mut xtx = Array2::<f64>::zeros((p, p));
let mut xty = Array1::<f64>::zeros(p);
for i in 0..n {
let xi = x.row(i);
for j in 0..p {
xty[j] += xi[j] * y[i];
for k in 0..p {
xtx[[j, k]] += xi[j] * xi[k];
}
}
}
gauss_jordan(xtx, xty)
}
fn gauss_jordan(mut a: Array2<f64>, mut b: Array1<f64>) -> StatsResult<Array1<f64>> {
let n = b.len();
for col in 0..n {
let pivot_row = (col..n).max_by(|&i, &j| {
a[[i, col]]
.abs()
.partial_cmp(&a[[j, col]].abs())
.unwrap_or(std::cmp::Ordering::Equal)
});
let pivot_row =
pivot_row.ok_or_else(|| StatsError::ComputationError("Singular matrix".to_owned()))?;
for k in 0..n {
let tmp = a[[col, k]];
a[[col, k]] = a[[pivot_row, k]];
a[[pivot_row, k]] = tmp;
}
let tmp = b[pivot_row];
b[pivot_row] = b[col];
b[col] = tmp;
let pivot = a[[col, col]];
if pivot.abs() < 1e-12 {
return Err(StatsError::ComputationError(
"Singular matrix in OLS".to_owned(),
));
}
for k in col..n {
a[[col, k]] /= pivot;
}
b[col] /= pivot;
for row in 0..n {
if row != col {
let factor = a[[row, col]];
for k in col..n {
let av = a[[col, k]];
a[[row, k]] -= factor * av;
}
b[row] -= factor * b[col];
}
}
}
Ok(b)
}
pub struct NearestNeighborMatching {
pub k: usize,
pub use_propensity_score: bool,
pub ps_max_iter: usize,
pub with_replacement: bool,
}
impl Default for NearestNeighborMatching {
fn default() -> Self {
Self {
k: 1,
use_propensity_score: false,
ps_max_iter: 500,
with_replacement: true,
}
}
}
impl NearestNeighborMatching {
pub fn estimate(
&self,
covariates: ArrayView2<f64>,
treatment: ArrayView1<f64>,
outcome: ArrayView1<f64>,
) -> StatsResult<EstimationResult> {
let n = treatment.len();
if covariates.nrows() != n || outcome.len() != n {
return Err(StatsError::DimensionMismatch(
"Dimensions must match".to_owned(),
));
}
let match_features: Array2<f64> = if self.use_propensity_score {
let coef = logistic_regression(covariates, treatment, self.ps_max_iter, 0.1, 1e-6)?;
let ps = predict_proba(covariates, &coef);
ps.insert_axis(scirs2_core::ndarray::Axis(1))
} else {
covariates.to_owned()
};
let treated_idx: Vec<usize> = (0..n).filter(|&i| treatment[i] > 0.5).collect();
let control_idx: Vec<usize> = (0..n).filter(|&i| treatment[i] <= 0.5).collect();
if treated_idx.is_empty() || control_idx.is_empty() {
return Err(StatsError::InsufficientData(
"Need both treated and control units".to_owned(),
));
}
let variances = column_variances(match_features.view());
let mut att_contributions = Vec::with_capacity(treated_idx.len());
let mut used_controls: std::collections::HashSet<usize> = std::collections::HashSet::new();
for &ti in &treated_idx {
let mut distances: Vec<(usize, f64)> = control_idx
.iter()
.filter(|&&ci| self.with_replacement || !used_controls.contains(&ci))
.map(|&ci| {
let d = mahalanobis_dist(
match_features.row(ti),
match_features.row(ci),
&variances,
);
(ci, d)
})
.collect();
distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
let k_matches = self.k.min(distances.len());
if k_matches == 0 {
continue;
}
let matched_y: f64 = distances[..k_matches]
.iter()
.map(|&(ci, _)| outcome[ci])
.sum::<f64>()
/ k_matches as f64;
att_contributions.push(outcome[ti] - matched_y);
if !self.with_replacement {
for &(ci, _) in &distances[..k_matches] {
used_controls.insert(ci);
}
}
}
if att_contributions.is_empty() {
return Err(StatsError::InsufficientData(
"No matched pairs found".to_owned(),
));
}
let att = att_contributions.iter().sum::<f64>() / att_contributions.len() as f64;
let var = att_contributions
.iter()
.map(|&d| (d - att).powi(2))
.sum::<f64>()
/ (att_contributions.len().saturating_sub(1).max(1) as f64);
let se = (var / att_contributions.len() as f64).sqrt();
Ok(EstimationResult::new(
att,
se,
"ATT",
if self.use_propensity_score {
"PS Matching (NN)"
} else {
"Covariate Matching (NN)"
},
n,
))
}
}
fn column_variances(x: ArrayView2<f64>) -> Array1<f64> {
let (n, p) = x.dim();
let mut vars = Array1::<f64>::zeros(p);
for j in 0..p {
let col = x.column(j);
let mean = col.mean().unwrap_or(0.0);
let v = col.iter().map(|&xi| (xi - mean).powi(2)).sum::<f64>() / (n as f64);
vars[j] = v.max(1e-10);
}
vars
}
fn mahalanobis_dist(
a: scirs2_core::ndarray::ArrayView1<f64>,
b: scirs2_core::ndarray::ArrayView1<f64>,
variances: &Array1<f64>,
) -> f64 {
a.iter()
.zip(b.iter())
.zip(variances.iter())
.map(|((&ai, &bi), &vi)| (ai - bi).powi(2) / vi)
.sum::<f64>()
.sqrt()
}
pub struct RegressionDiscontinuity {
pub cutoff: f64,
pub bandwidth: f64,
pub local_linear: bool,
pub fuzzy: bool,
}
impl Default for RegressionDiscontinuity {
fn default() -> Self {
Self {
cutoff: 0.0,
bandwidth: 1.0,
local_linear: true,
fuzzy: false,
}
}
}
impl RegressionDiscontinuity {
pub fn estimate(
&self,
running: ArrayView1<f64>,
outcome: ArrayView1<f64>,
treatment: Option<ArrayView1<f64>>,
) -> StatsResult<EstimationResult> {
let n = running.len();
if outcome.len() != n {
return Err(StatsError::DimensionMismatch(
"running and outcome must have equal length".to_owned(),
));
}
let in_window: Vec<usize> = (0..n)
.filter(|&i| (running[i] - self.cutoff).abs() <= self.bandwidth)
.collect();
if in_window.len() < 4 {
return Err(StatsError::InsufficientData(
"Too few observations within the bandwidth window".to_owned(),
));
}
let above: Vec<usize> = in_window
.iter()
.copied()
.filter(|&i| running[i] >= self.cutoff)
.collect();
let below: Vec<usize> = in_window
.iter()
.copied()
.filter(|&i| running[i] < self.cutoff)
.collect();
let (tau_above, _se_above) = local_linear_fit(&running, &outcome, &above, self.cutoff)?;
let (tau_below, _se_below) = local_linear_fit(&running, &outcome, &below, self.cutoff)?;
let reduced_form = tau_above - tau_below;
let (estimate, estimand) = if self.fuzzy {
let treat = treatment.ok_or_else(|| {
StatsError::InvalidArgument("Treatment vector required for fuzzy RD".to_owned())
})?;
let (t_above, _) = local_linear_fit(&running, &treat, &above, self.cutoff)?;
let (t_below, _) = local_linear_fit(&running, &treat, &below, self.cutoff)?;
let first_stage = t_above - t_below;
if first_stage.abs() < 1e-8 {
return Err(StatsError::ComputationError(
"Weak first stage in fuzzy RD".to_owned(),
));
}
(reduced_form / first_stage, "LATE (Fuzzy RD)")
} else {
(reduced_form, "ATE (Sharp RD)")
};
let se = rdd_se(&running, &outcome, &in_window, self.cutoff, estimate);
let mut res = EstimationResult::new(estimate, se, estimand, "RDD", n);
res.diagnostics
.insert("n_above".to_string(), above.len() as f64);
res.diagnostics
.insert("n_below".to_string(), below.len() as f64);
res.diagnostics
.insert("bandwidth".to_string(), self.bandwidth);
Ok(res)
}
pub fn ik_bandwidth(running: ArrayView1<f64>, outcome: ArrayView1<f64>, cutoff: f64) -> f64 {
let n = running.len() as f64;
let mean = running.mean().unwrap_or(cutoff);
let var = running.iter().map(|&r| (r - mean).powi(2)).sum::<f64>() / n;
let sigma_x = var.sqrt();
let _ = outcome; sigma_x * n.powf(-0.2)
}
}
fn local_linear_fit(
running: &ArrayView1<f64>,
outcome: &ArrayView1<f64>,
indices: &[usize],
cutoff: f64,
) -> StatsResult<(f64, f64)> {
let n = indices.len();
if n < 2 {
return Err(StatsError::InsufficientData(
"Need at least 2 observations for local linear fit".to_owned(),
));
}
let x_c: Vec<f64> = indices.iter().map(|&i| running[*&i] - cutoff).collect();
let y: Vec<f64> = indices.iter().map(|&i| outcome[*&i]).collect();
let mut s0 = 0.0_f64;
let mut s1 = 0.0_f64;
let mut s2 = 0.0_f64;
let mut t0 = 0.0_f64;
let mut t1 = 0.0_f64;
for k in 0..n {
s0 += 1.0;
s1 += x_c[k];
s2 += x_c[k].powi(2);
t0 += y[k];
t1 += x_c[k] * y[k];
}
let det = s0 * s2 - s1 * s1;
if det.abs() < 1e-12 {
return Err(StatsError::ComputationError(
"Degenerate local linear design matrix".to_owned(),
));
}
let intercept = (s2 * t0 - s1 * t1) / det;
let slope = (s0 * t1 - s1 * t0) / det;
let residuals: Vec<f64> = (0..n).map(|k| y[k] - intercept - slope * x_c[k]).collect();
let sigma2 = residuals.iter().map(|r| r * r).sum::<f64>() / (n.saturating_sub(2).max(1) as f64);
let se = (sigma2 * s2 / det.max(f64::EPSILON)).sqrt();
Ok((intercept, se))
}
fn rdd_se(
running: &ArrayView1<f64>,
_outcome: &ArrayView1<f64>,
in_window: &[usize],
_cutoff: f64,
_estimate: f64,
) -> f64 {
let nw = in_window.len() as f64;
let spread = running
.iter()
.filter(|&&r| in_window.iter().any(|&i| (running[i] - r).abs() < 1e-12))
.map(|&r| r)
.collect::<Vec<_>>();
let var_r = spread.iter().map(|&r| r * r).sum::<f64>() / nw.max(1.0);
(var_r / nw).sqrt()
}
pub struct SyntheticControlEstimator {
pub max_iter: usize,
pub tol: f64,
}
impl Default for SyntheticControlEstimator {
fn default() -> Self {
Self {
max_iter: 1000,
tol: 1e-7,
}
}
}
#[derive(Debug, Clone)]
pub struct SyntheticControlResult {
pub weights: Array1<f64>,
pub att_series: Array1<f64>,
pub att_mean: f64,
pub pre_rmspe: f64,
pub post_rmspe: f64,
}
impl SyntheticControlEstimator {
pub fn estimate(
&self,
treated_pre: ArrayView1<f64>,
donors_pre: ArrayView2<f64>,
treated_post: ArrayView1<f64>,
donors_post: ArrayView2<f64>,
) -> StatsResult<SyntheticControlResult> {
let t0 = treated_pre.len();
let j = donors_pre.ncols();
if donors_pre.nrows() != t0 {
return Err(StatsError::DimensionMismatch(
"donors_pre rows must match treated_pre length".to_owned(),
));
}
let t1 = treated_post.len();
if donors_post.nrows() != t1 || donors_post.ncols() != j {
return Err(StatsError::DimensionMismatch(
"donors_post dimensions inconsistent".to_owned(),
));
}
if j == 0 {
return Err(StatsError::InsufficientData(
"Need at least one donor unit".to_owned(),
));
}
let weights = self.fit_weights(treated_pre, donors_pre)?;
let pre_synth: Array1<f64> = (0..t0)
.map(|t| (0..j).map(|k| donors_pre[[t, k]] * weights[k]).sum::<f64>())
.collect();
let post_synth: Array1<f64> = (0..t1)
.map(|t| {
(0..j)
.map(|k| donors_post[[t, k]] * weights[k])
.sum::<f64>()
})
.collect();
let pre_rmspe = (pre_synth
.iter()
.zip(treated_pre.iter())
.map(|(&s, &y)| (s - y).powi(2))
.sum::<f64>()
/ t0 as f64)
.sqrt();
let att_series: Array1<f64> = treated_post
.iter()
.zip(post_synth.iter())
.map(|(&y, &s)| y - s)
.collect();
let att_mean = att_series.mean().unwrap_or(0.0);
let post_rmspe = (att_series.iter().map(|&d| d.powi(2)).sum::<f64>() / t1 as f64).sqrt();
Ok(SyntheticControlResult {
weights,
att_series,
att_mean,
pre_rmspe,
post_rmspe,
})
}
fn fit_weights(
&self,
target: ArrayView1<f64>,
donors: ArrayView2<f64>,
) -> StatsResult<Array1<f64>> {
let (t0, j) = donors.dim();
let mut w = Array1::<f64>::from_elem(j, 1.0 / j as f64);
for _iter in 0..self.max_iter {
let synth: Array1<f64> = (0..t0)
.map(|t| (0..j).map(|k| donors[[t, k]] * w[k]).sum::<f64>())
.collect();
let residual: Array1<f64> = (0..t0).map(|t| target[t] - synth[t]).collect();
let mut grad = Array1::<f64>::zeros(j);
for k in 0..j {
for t in 0..t0 {
grad[k] -= 2.0 * donors[[t, k]] * residual[t];
}
}
let lr = 0.01;
let mut w_new = Array1::<f64>::zeros(j);
for k in 0..j {
w_new[k] = (w[k] - lr * grad[k]).max(0.0);
}
let s = w_new.sum();
if s > 1e-10 {
w_new.mapv_inplace(|x| x / s);
} else {
w_new.fill(1.0 / j as f64);
}
let diff: f64 = w_new
.iter()
.zip(w.iter())
.map(|(&a, &b)| (a - b).powi(2))
.sum::<f64>()
.sqrt();
w = w_new;
if diff < self.tol {
break;
}
}
Ok(w)
}
}
pub struct DifferenceInDifferences {
pub n_pre_periods: usize,
}
impl Default for DifferenceInDifferences {
fn default() -> Self {
Self { n_pre_periods: 2 }
}
}
#[derive(Debug, Clone)]
pub struct DiDResult {
pub att: f64,
pub std_error: f64,
pub conf_interval: [f64; 2],
pub p_value: f64,
pub parallel_trends_p: f64,
pub parallel_trends_ok: bool,
pub n_treated: usize,
pub n_control: usize,
}
impl DifferenceInDifferences {
pub fn estimate(
&self,
outcomes_pre: ArrayView2<f64>,
outcomes_post: ArrayView2<f64>,
treatment: ArrayView1<f64>,
) -> StatsResult<DiDResult> {
let n = treatment.len();
if outcomes_pre.nrows() != n || outcomes_post.nrows() != n {
return Err(StatsError::DimensionMismatch(
"outcome arrays must have n_units rows".to_owned(),
));
}
let n_post = outcomes_post.ncols();
let n_pre = outcomes_pre.ncols();
let treated_idx: Vec<usize> = (0..n).filter(|&i| treatment[i] > 0.5).collect();
let control_idx: Vec<usize> = (0..n).filter(|&i| treatment[i] <= 0.5).collect();
if treated_idx.is_empty() || control_idx.is_empty() {
return Err(StatsError::InsufficientData(
"Need both treated and control units".to_owned(),
));
}
let mean_pre_treated = group_mean(&outcomes_pre, &treated_idx);
let mean_pre_control = group_mean(&outcomes_pre, &control_idx);
let mean_post_treated = group_mean(&outcomes_post, &treated_idx);
let mean_post_control = group_mean(&outcomes_post, &control_idx);
let diff_treated = mean_post_treated - mean_pre_treated;
let diff_control = mean_post_control - mean_pre_control;
let att = diff_treated - diff_control;
let se = did_bootstrap_se(
&outcomes_pre,
&outcomes_post,
&treatment,
&treated_idx,
&control_idx,
100,
);
let z = att / se.max(f64::EPSILON);
let p_value = two_sided_p(z);
let margin = 1.959_964 * se;
let parallel_trends_p =
parallel_trends_test(&outcomes_pre, &treatment, &treated_idx, &control_idx);
Ok(DiDResult {
att,
std_error: se,
conf_interval: [att - margin, att + margin],
p_value,
parallel_trends_p,
parallel_trends_ok: parallel_trends_p > 0.05,
n_treated: treated_idx.len(),
n_control: control_idx.len(),
})
}
}
fn group_mean(outcomes: &ArrayView2<f64>, indices: &[usize]) -> f64 {
if indices.is_empty() {
return 0.0;
}
let total: f64 = indices
.iter()
.flat_map(|&i| outcomes.row(i).iter().copied().collect::<Vec<_>>())
.sum();
total / (indices.len() * outcomes.ncols()) as f64
}
fn did_bootstrap_se(
pre: &ArrayView2<f64>,
post: &ArrayView2<f64>,
treatment: &ArrayView1<f64>,
_treated: &[usize],
_control: &[usize],
n_boot: usize,
) -> f64 {
let n = treatment.len();
let mut ests = Vec::with_capacity(n_boot);
let mut rng: u64 = 99991;
for _ in 0..n_boot {
let mut t_pre = 0.0_f64;
let mut t_post = 0.0_f64;
let mut c_pre = 0.0_f64;
let mut c_post = 0.0_f64;
let mut nt = 0.0_f64;
let mut nc = 0.0_f64;
for _ in 0..n {
rng = rng.wrapping_mul(6364136223846793005).wrapping_add(1);
let idx = (rng >> 33) as usize % n;
let ti = treatment[idx];
let pre_mean = pre.row(idx).mean().unwrap_or(0.0);
let post_mean = post.row(idx).mean().unwrap_or(0.0);
if ti > 0.5 {
t_pre += pre_mean;
t_post += post_mean;
nt += 1.0;
} else {
c_pre += pre_mean;
c_post += post_mean;
nc += 1.0;
}
}
if nt > 0.0 && nc > 0.0 {
let att_b = (t_post / nt - t_pre / nt) - (c_post / nc - c_pre / nc);
ests.push(att_b);
}
}
if ests.is_empty() {
return 0.0;
}
let mean = ests.iter().sum::<f64>() / ests.len() as f64;
let var =
ests.iter().map(|&e| (e - mean).powi(2)).sum::<f64>() / (ests.len() - 1).max(1) as f64;
var.sqrt()
}
fn parallel_trends_test(
outcomes_pre: &ArrayView2<f64>,
treatment: &ArrayView1<f64>,
treated_idx: &[usize],
control_idx: &[usize],
) -> f64 {
let n_pre = outcomes_pre.ncols();
if n_pre < 2 {
return 1.0; }
let mut treated_changes = Vec::new();
let mut control_changes = Vec::new();
for t in 1..n_pre {
let tc: f64 = treated_idx
.iter()
.map(|&i| outcomes_pre[[i, t]] - outcomes_pre[[i, t - 1]])
.sum::<f64>()
/ treated_idx.len().max(1) as f64;
let cc: f64 = control_idx
.iter()
.map(|&i| outcomes_pre[[i, t]] - outcomes_pre[[i, t - 1]])
.sum::<f64>()
/ control_idx.len().max(1) as f64;
treated_changes.push(tc);
control_changes.push(cc);
}
let n_t = treated_changes.len();
let n_c = control_changes.len();
let mu_t = treated_changes.iter().sum::<f64>() / n_t as f64;
let mu_c = control_changes.iter().sum::<f64>() / n_c as f64;
let var_t = treated_changes
.iter()
.map(|&x| (x - mu_t).powi(2))
.sum::<f64>()
/ n_t.saturating_sub(1).max(1) as f64;
let var_c = control_changes
.iter()
.map(|&x| (x - mu_c).powi(2))
.sum::<f64>()
/ n_c.saturating_sub(1).max(1) as f64;
let se = (var_t / n_t as f64 + var_c / n_c as f64).sqrt();
if se < f64::EPSILON {
return 1.0;
}
let t_stat = (mu_t - mu_c) / se;
two_sided_p(t_stat)
}
#[cfg(test)]
mod tests {
use super::*;
use scirs2_core::ndarray::array;
#[test]
fn test_ipw_estimator_simple() {
let cov = Array2::<f64>::from_shape_fn((20, 2), |(i, j)| {
if j == 0 {
if i < 10 {
1.0
} else {
0.0
}
} else {
(i as f64).sin()
}
});
let treat = Array1::from_iter((0..20).map(|i| if i < 10 { 1.0 } else { 0.0 }));
let outcome = Array1::from_iter((0..20).map(|i| if i < 10 { 2.0 } else { 0.0 }));
let est = IPWEstimator::default();
let res = est
.estimate(cov.view(), treat.view(), outcome.view())
.unwrap();
assert!((res.estimate - 2.0).abs() < 1.0, "ATE={}", res.estimate);
}
#[test]
fn test_doubly_robust() {
let n = 30;
let cov = Array2::<f64>::from_shape_fn((n, 1), |(i, _)| i as f64 / n as f64);
let treat = Array1::from_iter((0..n).map(|i| if i < n / 2 { 1.0 } else { 0.0 }));
let outcome = Array1::from_iter((0..n).map(|i| {
if i < n / 2 {
1.0 + i as f64 * 0.01
} else {
i as f64 * 0.01
}
}));
let est = DoublyRobustEstimator::default();
let res = est
.estimate(cov.view(), treat.view(), outcome.view())
.unwrap();
assert!(res.estimate.is_finite());
}
#[test]
fn test_nn_matching() {
let n = 20;
let cov = Array2::<f64>::from_shape_fn((n, 1), |(i, _)| i as f64);
let treat = Array1::from_iter((0..n).map(|i| if i % 2 == 0 { 1.0 } else { 0.0 }));
let outcome = Array1::from_iter((0..n).map(|i| if i % 2 == 0 { 2.0 } else { 0.0 }));
let est = NearestNeighborMatching::default();
let res = est
.estimate(cov.view(), treat.view(), outcome.view())
.unwrap();
assert!((res.estimate - 2.0).abs() < 0.5, "ATT={}", res.estimate);
}
#[test]
fn test_rdd_sharp() {
let n = 40;
let running = Array1::from_iter((0..n).map(|i| -2.0 + i as f64 * 4.0 / n as f64));
let outcome = Array1::from_iter((0..n).map(|i| {
if running[i] >= 0.0 {
3.0 + running[i] * 0.5
} else {
running[i] * 0.5
}
}));
let rdd = RegressionDiscontinuity {
cutoff: 0.0,
bandwidth: 1.5,
local_linear: true,
fuzzy: false,
};
let res = rdd.estimate(running.view(), outcome.view(), None).unwrap();
assert!(
(res.estimate - 3.0).abs() < 1.0,
"RDD estimate={}",
res.estimate
);
}
#[test]
fn test_synthetic_control() {
let treated_pre = array![1.0, 2.0, 3.0, 4.0, 5.0];
let donors_pre =
Array2::from_shape_fn((5, 3), |(t, j)| (t + 1) as f64 * [1.1, 0.9, 1.0][j]);
let treated_post = array![8.0, 9.0];
let donors_post =
Array2::from_shape_fn((2, 3), |(t, j)| (t + 6) as f64 * [1.1, 0.9, 1.0][j]);
let est = SyntheticControlEstimator::default();
let res = est
.estimate(
treated_pre.view(),
donors_pre.view(),
treated_post.view(),
donors_post.view(),
)
.unwrap();
assert!((res.weights.sum() - 1.0).abs() < 1e-5);
assert!(res.att_series.len() == 2);
}
#[test]
fn test_did() {
let n = 20;
let pre = Array2::from_shape_fn((n, 3), |(i, t)| (i as f64 * 0.1) + t as f64 * 0.5);
let post = Array2::from_shape_fn((n, 2), |(i, t)| {
let base = (i as f64 * 0.1) + 1.5 + t as f64 * 0.5;
if i < 10 {
base + 2.0
} else {
base
}
});
let treat = Array1::from_iter((0..n).map(|i| if i < 10 { 1.0 } else { 0.0 }));
let did = DifferenceInDifferences { n_pre_periods: 3 };
let res = did.estimate(pre.view(), post.view(), treat.view()).unwrap();
assert!((res.att - 2.0).abs() < 0.3, "DiD ATT={}", res.att);
}
}