use crate::error::{StatsError, StatsResult};
use scirs2_core::ndarray::{Array1, Array2, ArrayView1, ArrayView2};
#[derive(Debug, Clone)]
pub struct SyntheticControlResult {
pub weights: Array1<f64>,
pub gap_series: Array1<f64>,
pub pre_rmspe: f64,
pub post_rmspe: f64,
pub rmspe_ratio: f64,
pub synthetic_series: Array1<f64>,
pub placebo_p_value: Option<f64>,
pub placebo_results: Option<Vec<PlaceboResult>>,
pub v_weights: Option<Array1<f64>>,
}
#[derive(Debug, Clone)]
pub struct PlaceboResult {
pub donor_index: usize,
pub pre_rmspe: f64,
pub post_rmspe: f64,
pub rmspe_ratio: f64,
pub gap_series: Array1<f64>,
}
pub struct SyntheticControlEstimator {
pub max_iter: usize,
pub tol: f64,
pub run_placebo: bool,
pub v_weights: Option<Vec<f64>>,
}
impl SyntheticControlEstimator {
pub fn new() -> Self {
Self {
max_iter: 5000,
tol: 1e-8,
run_placebo: false,
v_weights: None,
}
}
pub fn with_placebo() -> Self {
Self {
max_iter: 5000,
tol: 1e-8,
run_placebo: true,
v_weights: None,
}
}
pub fn set_v_weights(mut self, v: Vec<f64>) -> Self {
self.v_weights = Some(v);
self
}
pub fn set_optimization(mut self, max_iter: usize, tol: f64) -> Self {
self.max_iter = max_iter;
self.tol = tol;
self
}
pub fn fit(
&self,
y_treated_pre: &ArrayView1<f64>,
y_treated_post: &ArrayView1<f64>,
y_donors_pre: &ArrayView2<f64>,
y_donors_post: &ArrayView2<f64>,
) -> StatsResult<SyntheticControlResult> {
let t_pre = y_treated_pre.len();
let t_post = y_treated_post.len();
let n_donors = y_donors_pre.ncols();
if t_pre == 0 {
return Err(StatsError::InsufficientData(
"Need at least one pre-treatment period".into(),
));
}
if t_post == 0 {
return Err(StatsError::InsufficientData(
"Need at least one post-treatment period".into(),
));
}
if n_donors == 0 {
return Err(StatsError::InvalidArgument(
"Need at least one donor unit".into(),
));
}
if y_donors_pre.nrows() != t_pre {
return Err(StatsError::DimensionMismatch(
"y_donors_pre rows must equal y_treated_pre length".into(),
));
}
if y_donors_post.nrows() != t_post {
return Err(StatsError::DimensionMismatch(
"y_donors_post rows must equal y_treated_post length".into(),
));
}
if y_donors_post.ncols() != n_donors {
return Err(StatsError::DimensionMismatch(
"y_donors_post must have same number of donors as y_donors_pre".into(),
));
}
let v_diag = self.get_v_diag(t_pre)?;
let weights = self.optimize_weights(y_treated_pre, y_donors_pre, &v_diag)?;
let synthetic_pre = y_donors_pre.dot(&weights);
let synthetic_post = y_donors_post.dot(&weights);
let gap_series = y_treated_post.to_owned() - &synthetic_post;
let pre_gap = y_treated_pre.to_owned() - &synthetic_pre;
let pre_rmspe = rmspe(&pre_gap);
let post_rmspe = rmspe(&gap_series);
let rmspe_ratio = if pre_rmspe > 1e-15 {
post_rmspe / pre_rmspe
} else {
f64::INFINITY
};
let mut synthetic_series = Array1::<f64>::zeros(t_pre + t_post);
for t in 0..t_pre {
synthetic_series[t] = synthetic_pre[t];
}
for t in 0..t_post {
synthetic_series[t_pre + t] = synthetic_post[t];
}
let (placebo_p, placebo_results) = if self.run_placebo {
let (p, results) = self.run_placebo_tests(
y_treated_pre,
y_treated_post,
y_donors_pre,
y_donors_post,
rmspe_ratio,
&v_diag,
)?;
(Some(p), Some(results))
} else {
(None, None)
};
Ok(SyntheticControlResult {
weights,
gap_series,
pre_rmspe,
post_rmspe,
rmspe_ratio,
synthetic_series,
placebo_p_value: placebo_p,
placebo_results,
v_weights: Some(v_diag),
})
}
pub fn fit_with_predictors(
&self,
predictors_treated: &ArrayView1<f64>,
predictors_donors: &ArrayView2<f64>,
y_treated_pre: &ArrayView1<f64>,
y_donors_pre: &ArrayView2<f64>,
y_treated_post: &ArrayView1<f64>,
y_donors_post: &ArrayView2<f64>,
) -> StatsResult<SyntheticControlResult> {
let n_pred = predictors_treated.len();
let t_pre = y_treated_pre.len();
let n_donors = y_donors_pre.ncols();
if predictors_donors.nrows() != n_pred {
return Err(StatsError::DimensionMismatch(
"predictors_donors rows must match predictors_treated length".into(),
));
}
if predictors_donors.ncols() != n_donors {
return Err(StatsError::DimensionMismatch(
"predictors_donors columns must match number of donors".into(),
));
}
let total_rows = n_pred + t_pre;
let mut x_treated = Array1::<f64>::zeros(total_rows);
let mut x_donors = Array2::<f64>::zeros((total_rows, n_donors));
for p in 0..n_pred {
x_treated[p] = predictors_treated[p];
for j in 0..n_donors {
x_donors[[p, j]] = predictors_donors[[p, j]];
}
}
for t in 0..t_pre {
x_treated[n_pred + t] = y_treated_pre[t];
for j in 0..n_donors {
x_donors[[n_pred + t, j]] = y_donors_pre[[t, j]];
}
}
let v_diag = self.get_v_diag(total_rows)?;
let weights = self.optimize_weights(&x_treated.view(), &x_donors.view(), &v_diag)?;
let synthetic_pre = y_donors_pre.dot(&weights);
let synthetic_post = y_donors_post.dot(&weights);
let gap_series = y_treated_post.to_owned() - &synthetic_post;
let pre_gap = y_treated_pre.to_owned() - &synthetic_pre;
let pre_rmspe = rmspe(&pre_gap);
let post_rmspe = rmspe(&gap_series);
let rmspe_ratio = if pre_rmspe > 1e-15 {
post_rmspe / pre_rmspe
} else {
f64::INFINITY
};
let t_post = y_treated_post.len();
let mut synthetic_series = Array1::<f64>::zeros(t_pre + t_post);
for t in 0..t_pre {
synthetic_series[t] = synthetic_pre[t];
}
for t in 0..t_post {
synthetic_series[t_pre + t] = synthetic_post[t];
}
Ok(SyntheticControlResult {
weights,
gap_series,
pre_rmspe,
post_rmspe,
rmspe_ratio,
synthetic_series,
placebo_p_value: None,
placebo_results: None,
v_weights: Some(v_diag),
})
}
fn get_v_diag(&self, n_rows: usize) -> StatsResult<Array1<f64>> {
match &self.v_weights {
Some(v) => {
if v.len() != n_rows {
return Err(StatsError::DimensionMismatch(format!(
"V weights length {} != number of predictors/periods {n_rows}",
v.len()
)));
}
Ok(Array1::from_vec(v.clone()))
}
None => Ok(Array1::ones(n_rows)),
}
}
fn optimize_weights(
&self,
x_treated: &ArrayView1<f64>,
x_donors: &ArrayView2<f64>,
v_diag: &Array1<f64>,
) -> StatsResult<Array1<f64>> {
let n_donors = x_donors.ncols();
let n_rows = x_donors.nrows();
if n_donors == 0 {
return Err(StatsError::InvalidArgument(
"Need at least one donor".into(),
));
}
let v_sqrt: Array1<f64> = v_diag.mapv(|v| v.max(0.0).sqrt());
let mut x0_v = Array2::<f64>::zeros((n_rows, n_donors));
let mut x1_v = Array1::<f64>::zeros(n_rows);
for t in 0..n_rows {
x1_v[t] = x_treated[t] * v_sqrt[t];
for j in 0..n_donors {
x0_v[[t, j]] = x_donors[[t, j]] * v_sqrt[t];
}
}
let x0t_x0 = x0_v.t().dot(&x0_v);
let x0t_x1 = x0_v.t().dot(&x1_v);
let max_row_sum: f64 = x0t_x0
.rows()
.into_iter()
.map(|row| row.iter().map(|&v| v.abs()).sum::<f64>())
.fold(0.0_f64, f64::max);
let lr = if max_row_sum > 0.0 {
0.5 / max_row_sum
} else {
1e-3
};
let mut w = Array1::from_elem(n_donors, 1.0 / n_donors as f64);
for _ in 0..self.max_iter {
let grad = x0t_x0.dot(&w) - &x0t_x1;
let w_new_raw = &w - &grad.mapv(|g| g * lr);
let w_new = project_simplex(&w_new_raw.view());
let diff: f64 = (&w_new - &w).iter().map(|&d| d * d).sum::<f64>().sqrt();
w = w_new;
if diff < self.tol {
break;
}
}
Ok(w)
}
fn run_placebo_tests(
&self,
y_treated_pre: &ArrayView1<f64>,
y_treated_post: &ArrayView1<f64>,
y_donors_pre: &ArrayView2<f64>,
y_donors_post: &ArrayView2<f64>,
treated_rmspe_ratio: f64,
v_diag: &Array1<f64>,
) -> StatsResult<(f64, Vec<PlaceboResult>)> {
let t_pre = y_treated_pre.len();
let t_post = y_treated_post.len();
let n_donors = y_donors_pre.ncols();
let mut placebo_results = Vec::with_capacity(n_donors);
let mut n_extreme = 0_usize;
for d in 0..n_donors {
let placebo_treated_pre = y_donors_pre.column(d).to_owned();
let placebo_treated_post = y_donors_post.column(d).to_owned();
let n_placebo_donors = n_donors; let mut placebo_donors_pre = Array2::<f64>::zeros((t_pre, n_placebo_donors));
let mut placebo_donors_post = Array2::<f64>::zeros((t_post, n_placebo_donors));
let mut col = 0;
for j in 0..n_donors {
if j == d {
for t in 0..t_pre {
placebo_donors_pre[[t, col]] = y_treated_pre[t];
}
for t in 0..t_post {
placebo_donors_post[[t, col]] = y_treated_post[t];
}
} else {
for t in 0..t_pre {
placebo_donors_pre[[t, col]] = y_donors_pre[[t, j]];
}
for t in 0..t_post {
placebo_donors_post[[t, col]] = y_donors_post[[t, j]];
}
}
col += 1;
}
let placebo_weights = match self.optimize_weights(
&placebo_treated_pre.view(),
&placebo_donors_pre.view(),
v_diag,
) {
Ok(w) => w,
Err(_) => continue, };
let synth_pre = placebo_donors_pre.dot(&placebo_weights);
let synth_post = placebo_donors_post.dot(&placebo_weights);
let pre_gap = placebo_treated_pre - &synth_pre;
let post_gap = placebo_treated_post - &synth_post;
let pre_rmspe_p = rmspe(&pre_gap);
let post_rmspe_p = rmspe(&post_gap);
let ratio = if pre_rmspe_p > 1e-15 {
post_rmspe_p / pre_rmspe_p
} else {
f64::INFINITY
};
if ratio >= treated_rmspe_ratio {
n_extreme += 1;
}
placebo_results.push(PlaceboResult {
donor_index: d,
pre_rmspe: pre_rmspe_p,
post_rmspe: post_rmspe_p,
rmspe_ratio: ratio,
gap_series: post_gap,
});
}
let p_value = (n_extreme as f64 + 1.0) / (n_donors as f64 + 1.0);
Ok((p_value, placebo_results))
}
}
impl Default for SyntheticControlEstimator {
fn default() -> Self {
Self::new()
}
}
fn rmspe(gaps: &Array1<f64>) -> f64 {
if gaps.is_empty() {
return 0.0;
}
let mse: f64 = gaps.iter().map(|&g| g * g).sum::<f64>() / gaps.len() as f64;
mse.sqrt()
}
fn project_simplex(v: &ArrayView1<f64>) -> Array1<f64> {
let n = v.len();
let mut u: Vec<f64> = v.to_vec();
u.sort_by(|a, b| b.partial_cmp(a).unwrap_or(std::cmp::Ordering::Equal));
let mut rho = 0_usize;
let mut cum = 0.0_f64;
for (j, &uj) in u.iter().enumerate() {
cum += uj;
if uj - (cum - 1.0) / (j as f64 + 1.0) > 0.0 {
rho = j;
}
}
let cum_rho: f64 = u[..=rho].iter().sum();
let theta = (cum_rho - 1.0) / (rho as f64 + 1.0);
v.mapv(|vi| (vi - theta).max(0.0))
}
#[cfg(test)]
mod tests {
use super::*;
use scirs2_core::ndarray::{Array1, Array2};
#[test]
fn test_synthetic_control_perfect_fit() {
let t_pre = 20;
let t_post = 10;
let n_donors = 3;
let y_treated_pre: Array1<f64> = (0..t_pre).map(|t| t as f64).collect();
let y_treated_post: Array1<f64> = (t_pre..t_pre + t_post).map(|t| t as f64 + 5.0).collect();
let mut y_donors_pre = Array2::<f64>::zeros((t_pre, n_donors));
let mut y_donors_post = Array2::<f64>::zeros((t_post, n_donors));
for t in 0..t_pre {
y_donors_pre[[t, 0]] = t as f64; y_donors_pre[[t, 1]] = 100.0 + t as f64 * 3.0; y_donors_pre[[t, 2]] = -50.0 + (t as f64).powi(2); }
for t in 0..t_post {
let tt = (t_pre + t) as f64;
y_donors_post[[t, 0]] = tt;
y_donors_post[[t, 1]] = 100.0 + tt * 3.0;
y_donors_post[[t, 2]] = -50.0 + tt.powi(2);
}
let sc = SyntheticControlEstimator::new().set_optimization(10000, 1e-10);
let res = sc
.fit(
&y_treated_pre.view(),
&y_treated_post.view(),
&y_donors_pre.view(),
&y_donors_post.view(),
)
.expect("SC should succeed");
let wsum: f64 = res.weights.iter().sum();
assert!(
(wsum - 1.0).abs() < 1e-6,
"Weights should sum to 1, got {}",
wsum
);
assert!(res.weights.iter().all(|&w| w >= -1e-10));
assert!(
res.pre_rmspe < 2.0,
"Pre-RMSPE should be small, got {}",
res.pre_rmspe
);
let max_w = res
.weights
.iter()
.cloned()
.fold(f64::NEG_INFINITY, f64::max);
assert!(
(res.weights[0] - max_w).abs() < 1e-6,
"Donor 0 should get largest weight (w0={}, max={})",
res.weights[0],
max_w
);
}
#[test]
fn test_synthetic_control_treatment_effect() {
let t_pre = 15;
let t_post = 10;
let te = 10.0_f64;
let y_treated_pre: Array1<f64> = (0..t_pre).map(|t| t as f64).collect();
let y_treated_post: Array1<f64> = (t_pre..t_pre + t_post).map(|t| t as f64 + te).collect();
let mut y_donors_pre = Array2::<f64>::zeros((t_pre, 2));
let mut y_donors_post = Array2::<f64>::zeros((t_post, 2));
for t in 0..t_pre {
y_donors_pre[[t, 0]] = t as f64;
y_donors_pre[[t, 1]] = t as f64 + 0.1;
}
for t in 0..t_post {
let tt = (t_pre + t) as f64;
y_donors_post[[t, 0]] = tt;
y_donors_post[[t, 1]] = tt + 0.1;
}
let sc = SyntheticControlEstimator::new();
let res = sc
.fit(
&y_treated_pre.view(),
&y_treated_post.view(),
&y_donors_pre.view(),
&y_donors_post.view(),
)
.expect("SC should succeed");
let avg_gap: f64 = res.gap_series.iter().sum::<f64>() / res.gap_series.len() as f64;
assert!(
(avg_gap - te).abs() < 2.0,
"Average gap should be ~{te}, got {}",
avg_gap
);
}
#[test]
fn test_synthetic_control_placebo() {
let t_pre = 10;
let t_post = 5;
let n_donors = 3;
let y_treated_pre: Array1<f64> = (0..t_pre).map(|t| t as f64).collect();
let y_treated_post: Array1<f64> = (t_pre..t_pre + t_post).map(|t| t as f64).collect();
let mut y_donors_pre = Array2::<f64>::zeros((t_pre, n_donors));
let mut y_donors_post = Array2::<f64>::zeros((t_post, n_donors));
for t in 0..t_pre {
for j in 0..n_donors {
y_donors_pre[[t, j]] = t as f64 + (j as f64) * 0.01;
}
}
for t in 0..t_post {
let tt = (t_pre + t) as f64;
for j in 0..n_donors {
y_donors_post[[t, j]] = tt + (j as f64) * 0.01;
}
}
let sc = SyntheticControlEstimator::with_placebo();
let res = sc
.fit(
&y_treated_pre.view(),
&y_treated_post.view(),
&y_donors_pre.view(),
&y_donors_post.view(),
)
.expect("SC with placebo should succeed");
assert!(
res.placebo_p_value.is_some(),
"Placebo p-value should be computed"
);
let p = res.placebo_p_value.expect("checked above");
assert!(p > 0.0 && p <= 1.0, "p-value should be in (0, 1], got {p}");
}
#[test]
fn test_synthetic_control_with_predictors() {
let t_pre = 10;
let t_post = 5;
let n_donors = 3;
let n_pred = 2;
let pred_treated = Array1::from_vec(vec![10.0, 20.0]);
let mut pred_donors = Array2::<f64>::zeros((n_pred, n_donors));
pred_donors[[0, 0]] = 10.0;
pred_donors[[1, 0]] = 20.0; pred_donors[[0, 1]] = 5.0;
pred_donors[[1, 1]] = 25.0;
pred_donors[[0, 2]] = 15.0;
pred_donors[[1, 2]] = 15.0;
let y_treated_pre: Array1<f64> = (0..t_pre).map(|t| t as f64).collect();
let y_treated_post: Array1<f64> = (t_pre..t_pre + t_post).map(|t| t as f64 + 3.0).collect();
let mut y_donors_pre = Array2::<f64>::zeros((t_pre, n_donors));
let mut y_donors_post = Array2::<f64>::zeros((t_post, n_donors));
for t in 0..t_pre {
y_donors_pre[[t, 0]] = t as f64;
y_donors_pre[[t, 1]] = t as f64 * 0.5;
y_donors_pre[[t, 2]] = t as f64 * 1.5;
}
for t in 0..t_post {
let tt = (t_pre + t) as f64;
y_donors_post[[t, 0]] = tt;
y_donors_post[[t, 1]] = tt * 0.5;
y_donors_post[[t, 2]] = tt * 1.5;
}
let sc = SyntheticControlEstimator::new();
let res = sc
.fit_with_predictors(
&pred_treated.view(),
&pred_donors.view(),
&y_treated_pre.view(),
&y_donors_pre.view(),
&y_treated_post.view(),
&y_donors_post.view(),
)
.expect("SC with predictors should succeed");
let wsum: f64 = res.weights.iter().sum();
assert!((wsum - 1.0).abs() < 1e-6);
assert!(res.weights.iter().all(|&w| w >= -1e-10));
}
#[test]
fn test_synthetic_control_v_weights() {
let t_pre = 10;
let t_post = 5;
let y_treated_pre: Array1<f64> = (0..t_pre).map(|t| t as f64).collect();
let y_treated_post: Array1<f64> = (t_pre..t_pre + t_post).map(|t| t as f64).collect();
let mut y_donors_pre = Array2::<f64>::zeros((t_pre, 2));
let mut y_donors_post = Array2::<f64>::zeros((t_post, 2));
for t in 0..t_pre {
y_donors_pre[[t, 0]] = t as f64 + 0.1;
y_donors_pre[[t, 1]] = t as f64 * 2.0;
}
for t in 0..t_post {
let tt = (t_pre + t) as f64;
y_donors_post[[t, 0]] = tt + 0.1;
y_donors_post[[t, 1]] = tt * 2.0;
}
let v = (0..t_pre)
.map(|t| (t as f64 + 1.0) / t_pre as f64)
.collect();
let sc = SyntheticControlEstimator::new().set_v_weights(v);
let res = sc
.fit(
&y_treated_pre.view(),
&y_treated_post.view(),
&y_donors_pre.view(),
&y_donors_post.view(),
)
.expect("SC with V weights should succeed");
let wsum: f64 = res.weights.iter().sum();
assert!((wsum - 1.0).abs() < 1e-6);
assert!(res.v_weights.is_some());
}
}