1use scirs2_core::ndarray::{Array1, Array2, ArrayView1, ArrayView2};
29
30use crate::error::{StatsError, StatsResult};
31
32#[derive(Debug, Clone)]
38pub struct EstimationResult {
39 pub estimate: f64,
41 pub std_error: f64,
43 pub conf_interval: [f64; 2],
45 pub p_value: f64,
47 pub estimand: String,
49 pub estimator: String,
51 pub n_obs: usize,
53 pub diagnostics: std::collections::HashMap<String, f64>,
55}
56
57impl EstimationResult {
58 fn new(
59 estimate: f64,
60 std_error: f64,
61 estimand: impl Into<String>,
62 estimator: impl Into<String>,
63 n_obs: usize,
64 ) -> Self {
65 let z = 1.959_964; let margin = z * std_error;
67 let p = two_sided_p(estimate / std_error.max(f64::EPSILON));
68 Self {
69 estimate,
70 std_error,
71 conf_interval: [estimate - margin, estimate + margin],
72 p_value: p,
73 estimand: estimand.into(),
74 estimator: estimator.into(),
75 n_obs,
76 diagnostics: std::collections::HashMap::new(),
77 }
78 }
79
80 fn with_diagnostic(mut self, key: impl Into<String>, val: f64) -> Self {
81 self.diagnostics.insert(key.into(), val);
82 self
83 }
84}
85
86fn two_sided_p(z: f64) -> f64 {
91 2.0 * (1.0 - normal_cdf(z.abs()))
92}
93
94fn normal_cdf(x: f64) -> f64 {
95 0.5 * (1.0 + erf(x / std::f64::consts::SQRT_2))
96}
97
98fn erf(x: f64) -> f64 {
99 let t = 1.0 / (1.0 + 0.3275911 * x.abs());
101 let poly = t
102 * (0.254_829_592
103 + t * (-0.284_496_736
104 + t * (1.421_413_741 + t * (-1.453_152_027 + t * 1.061_405_429))));
105 let sign = if x >= 0.0 { 1.0 } else { -1.0 };
106 sign * (1.0 - poly * (-x * x).exp())
107}
108
109fn logistic_regression(
116 x: ArrayView2<f64>,
117 y: ArrayView1<f64>,
118 max_iter: usize,
119 lr: f64,
120 tol: f64,
121) -> StatsResult<Array1<f64>> {
122 let (n, p) = x.dim();
123 let mut coef = Array1::<f64>::zeros(p + 1);
124
125 for _iter in 0..max_iter {
126 let mut grad = Array1::<f64>::zeros(p + 1);
128 let mut loss = 0.0_f64;
129 for i in 0..n {
130 let xi = x.row(i);
131 let linear: f64 = coef[0]
132 + xi.iter()
133 .zip(coef.iter().skip(1))
134 .map(|(a, b)| a * b)
135 .sum::<f64>();
136 let prob = 1.0 / (1.0 + (-linear).exp());
137 let err = prob - y[i];
138 loss += -(y[i] * prob.ln() + (1.0 - y[i]) * (1.0 - prob).ln());
139 grad[0] += err;
140 for j in 0..p {
141 grad[j + 1] += err * xi[j];
142 }
143 }
144 loss /= n as f64;
145 for j in 0..=(p) {
146 coef[j] -= lr * grad[j] / n as f64;
147 }
148 if grad.iter().map(|g| g * g).sum::<f64>().sqrt() / (n as f64) < tol {
149 break;
150 }
151 let _ = loss;
152 }
153 Ok(coef)
154}
155
156fn predict_proba(x: ArrayView2<f64>, coef: &Array1<f64>) -> Array1<f64> {
157 let (n, p) = x.dim();
158 let mut probs = Array1::<f64>::zeros(n);
159 for i in 0..n {
160 let xi = x.row(i);
161 let linear: f64 = coef[0]
162 + xi.iter()
163 .zip(coef.iter().skip(1))
164 .map(|(a, b)| a * b)
165 .sum::<f64>();
166 probs[i] = 1.0 / (1.0 + (-linear).exp());
167 }
168 probs
169}
170
171pub struct IPWEstimator {
181 pub stabilised: bool,
183 pub max_iter: usize,
185}
186
187impl Default for IPWEstimator {
188 fn default() -> Self {
189 Self {
190 stabilised: true,
191 max_iter: 500,
192 }
193 }
194}
195
196impl IPWEstimator {
197 pub fn estimate(
204 &self,
205 covariates: ArrayView2<f64>,
206 treatment: ArrayView1<f64>,
207 outcome: ArrayView1<f64>,
208 ) -> StatsResult<EstimationResult> {
209 let n = outcome.len();
210 if covariates.nrows() != n || treatment.len() != n {
211 return Err(StatsError::DimensionMismatch(
212 "Covariate, treatment, and outcome dimensions must match".to_owned(),
213 ));
214 }
215
216 let coef = logistic_regression(covariates, treatment, self.max_iter, 0.1, 1e-6)?;
218 let ps = predict_proba(covariates, &coef);
219
220 let eps = 1e-6_f64;
222 let ps_clip: Array1<f64> = ps.mapv(|p| p.clamp(eps, 1.0 - eps));
223
224 let mut ate_sum = 0.0_f64;
226 let mut w1_sum = 0.0_f64;
227 let mut w0_sum = 0.0_f64;
228
229 for i in 0..n {
230 let ti = treatment[i];
231 let yi = outcome[i];
232 let ei = ps_clip[i];
233 if self.stabilised {
234 w1_sum += ti / ei;
235 w0_sum += (1.0 - ti) / (1.0 - ei);
236 ate_sum += ti * yi / ei - (1.0 - ti) * yi / (1.0 - ei);
237 } else {
238 ate_sum += ti * yi / ei - (1.0 - ti) * yi / (1.0 - ei);
239 }
240 }
241
242 let ate = if self.stabilised {
243 let mu1 = ate_sum / n as f64 + (w0_sum / n as f64) * 0.0; let mu1_h: f64 = (0..n)
246 .map(|i| treatment[i] * outcome[i] / ps_clip[i])
247 .sum::<f64>()
248 / (0..n)
249 .map(|i| treatment[i] / ps_clip[i])
250 .sum::<f64>()
251 .max(f64::EPSILON);
252 let mu0_h: f64 = (0..n)
253 .map(|i| (1.0 - treatment[i]) * outcome[i] / (1.0 - ps_clip[i]))
254 .sum::<f64>()
255 / (0..n)
256 .map(|i| (1.0 - treatment[i]) / (1.0 - ps_clip[i]))
257 .sum::<f64>()
258 .max(f64::EPSILON);
259 let _ = mu1;
260 mu1_h - mu0_h
261 } else {
262 ate_sum / n as f64
263 };
264
265 let se = bootstrap_se_ipw(&ps_clip, &treatment, &outcome, self.stabilised, 50);
267
268 Ok(
269 EstimationResult::new(ate, se, "ATE", "IPW", n).with_diagnostic(
270 "mean_ps_treated",
271 ps_clip
272 .iter()
273 .zip(treatment.iter())
274 .filter(|(_, &t)| t > 0.5)
275 .map(|(p, _)| p)
276 .sum::<f64>()
277 / treatment.iter().filter(|&&t| t > 0.5).count().max(1) as f64,
278 ),
279 )
280 }
281}
282
283fn bootstrap_se_ipw(
284 ps: &Array1<f64>,
285 treatment: &ArrayView1<f64>,
286 outcome: &ArrayView1<f64>,
287 stabilised: bool,
288 n_boot: usize,
289) -> f64 {
290 let n = ps.len();
291 let mut estimates = Vec::with_capacity(n_boot);
292 let mut rng_state: u64 = 12345;
294 for _ in 0..n_boot {
295 let mut sample_ate = 0.0_f64;
296 let mut w1 = 0.0_f64;
297 let mut w0 = 0.0_f64;
298 for _ in 0..n {
299 rng_state = rng_state
300 .wrapping_mul(6364136223846793005)
301 .wrapping_add(1442695040888963407);
302 let idx = (rng_state >> 33) as usize % n;
303 let ti = treatment[idx];
304 let yi = outcome[idx];
305 let ei = ps[idx];
306 w1 += ti / ei;
307 w0 += (1.0 - ti) / (1.0 - ei);
308 sample_ate += ti * yi / ei - (1.0 - ti) * yi / (1.0 - ei);
309 }
310 let ate = if stabilised {
311 let mu1 = (0..n)
312 .map(|i| treatment[i] * outcome[i] / ps[i])
313 .sum::<f64>()
314 / w1.max(f64::EPSILON);
315 let mu0 = (0..n)
316 .map(|i| (1.0 - treatment[i]) * outcome[i] / (1.0 - ps[i]))
317 .sum::<f64>()
318 / w0.max(f64::EPSILON);
319 mu1 - mu0
320 } else {
321 sample_ate / n as f64
322 };
323 estimates.push(ate);
324 }
325 let mean = estimates.iter().sum::<f64>() / n_boot as f64;
326 let var = estimates.iter().map(|&e| (e - mean).powi(2)).sum::<f64>() / (n_boot - 1) as f64;
327 var.sqrt()
328}
329
330pub struct DoublyRobustEstimator {
339 pub ps_max_iter: usize,
341 pub outcome_poly_degree: usize,
343}
344
345impl Default for DoublyRobustEstimator {
346 fn default() -> Self {
347 Self {
348 ps_max_iter: 500,
349 outcome_poly_degree: 1,
350 }
351 }
352}
353
354impl DoublyRobustEstimator {
355 pub fn estimate(
357 &self,
358 covariates: ArrayView2<f64>,
359 treatment: ArrayView1<f64>,
360 outcome: ArrayView1<f64>,
361 ) -> StatsResult<EstimationResult> {
362 let n = outcome.len();
363 if covariates.nrows() != n || treatment.len() != n {
364 return Err(StatsError::DimensionMismatch(
365 "Dimensions must match".to_owned(),
366 ));
367 }
368
369 let coef_ps = logistic_regression(covariates, treatment, self.ps_max_iter, 0.1, 1e-6)?;
371 let ps = predict_proba(covariates, &coef_ps).mapv(|p| p.clamp(1e-6, 1.0 - 1e-6));
372
373 let (mu1, mu0) = outcome_regression_linear(covariates, treatment, outcome)?;
375
376 let mut aipw_scores = Array1::<f64>::zeros(n);
378 for i in 0..n {
379 let ti = treatment[i];
380 let yi = outcome[i];
381 let ei = ps[i];
382 aipw_scores[i] =
384 mu1[i] - mu0[i] + ti * (yi - mu1[i]) / ei - (1.0 - ti) * (yi - mu0[i]) / (1.0 - ei);
385 }
386
387 let ate = aipw_scores.mean().unwrap_or(0.0);
388 let variance =
389 aipw_scores.iter().map(|&s| (s - ate).powi(2)).sum::<f64>() / ((n - 1) as f64);
390 let se = (variance / n as f64).sqrt();
391
392 Ok(EstimationResult::new(
393 ate,
394 se,
395 "ATE",
396 "AIPW (Doubly-Robust)",
397 n,
398 ))
399 }
400}
401
402fn outcome_regression_linear(
404 covariates: ArrayView2<f64>,
405 treatment: ArrayView1<f64>,
406 outcome: ArrayView1<f64>,
407) -> StatsResult<(Array1<f64>, Array1<f64>)> {
408 let n = covariates.nrows();
409 let p = covariates.ncols();
410 let mut design = Array2::<f64>::zeros((n, p + 2));
412 for i in 0..n {
413 design[[i, 0]] = 1.0;
414 design[[i, 1]] = treatment[i];
415 for j in 0..p {
416 design[[i, j + 2]] = covariates[[i, j]];
417 }
418 }
419 let coef = ols_estimate(design.view(), outcome)?;
421
422 let mut mu1 = Array1::<f64>::zeros(n);
424 let mut mu0 = Array1::<f64>::zeros(n);
425 for i in 0..n {
426 let mut pred1 = coef[0] + coef[1]; let mut pred0 = coef[0]; for j in 0..p {
429 pred1 += coef[j + 2] * covariates[[i, j]];
430 pred0 += coef[j + 2] * covariates[[i, j]];
431 }
432 mu1[i] = pred1;
433 mu0[i] = pred0;
434 }
435 Ok((mu1, mu0))
436}
437
438fn ols_estimate(x: ArrayView2<f64>, y: ArrayView1<f64>) -> StatsResult<Array1<f64>> {
440 let (n, p) = x.dim();
441 let mut xtx = Array2::<f64>::zeros((p, p));
443 let mut xty = Array1::<f64>::zeros(p);
444 for i in 0..n {
445 let xi = x.row(i);
446 for j in 0..p {
447 xty[j] += xi[j] * y[i];
448 for k in 0..p {
449 xtx[[j, k]] += xi[j] * xi[k];
450 }
451 }
452 }
453 gauss_jordan(xtx, xty)
455}
456
457fn gauss_jordan(mut a: Array2<f64>, mut b: Array1<f64>) -> StatsResult<Array1<f64>> {
458 let n = b.len();
459 for col in 0..n {
460 let pivot_row = (col..n).max_by(|&i, &j| {
462 a[[i, col]]
463 .abs()
464 .partial_cmp(&a[[j, col]].abs())
465 .unwrap_or(std::cmp::Ordering::Equal)
466 });
467 let pivot_row =
468 pivot_row.ok_or_else(|| StatsError::ComputationError("Singular matrix".to_owned()))?;
469 for k in 0..n {
471 let tmp = a[[col, k]];
472 a[[col, k]] = a[[pivot_row, k]];
473 a[[pivot_row, k]] = tmp;
474 }
475 let tmp = b[pivot_row];
476 b[pivot_row] = b[col];
477 b[col] = tmp;
478
479 let pivot = a[[col, col]];
480 if pivot.abs() < 1e-12 {
481 return Err(StatsError::ComputationError(
482 "Singular matrix in OLS".to_owned(),
483 ));
484 }
485 for k in col..n {
486 a[[col, k]] /= pivot;
487 }
488 b[col] /= pivot;
489
490 for row in 0..n {
491 if row != col {
492 let factor = a[[row, col]];
493 for k in col..n {
494 let av = a[[col, k]];
495 a[[row, k]] -= factor * av;
496 }
497 b[row] -= factor * b[col];
498 }
499 }
500 }
501 Ok(b)
502}
503
504pub struct NearestNeighborMatching {
511 pub k: usize,
513 pub use_propensity_score: bool,
515 pub ps_max_iter: usize,
517 pub with_replacement: bool,
519}
520
521impl Default for NearestNeighborMatching {
522 fn default() -> Self {
523 Self {
524 k: 1,
525 use_propensity_score: false,
526 ps_max_iter: 500,
527 with_replacement: true,
528 }
529 }
530}
531
532impl NearestNeighborMatching {
533 pub fn estimate(
535 &self,
536 covariates: ArrayView2<f64>,
537 treatment: ArrayView1<f64>,
538 outcome: ArrayView1<f64>,
539 ) -> StatsResult<EstimationResult> {
540 let n = treatment.len();
541 if covariates.nrows() != n || outcome.len() != n {
542 return Err(StatsError::DimensionMismatch(
543 "Dimensions must match".to_owned(),
544 ));
545 }
546
547 let match_features: Array2<f64> = if self.use_propensity_score {
549 let coef = logistic_regression(covariates, treatment, self.ps_max_iter, 0.1, 1e-6)?;
550 let ps = predict_proba(covariates, &coef);
551 ps.insert_axis(scirs2_core::ndarray::Axis(1))
552 } else {
553 covariates.to_owned()
554 };
555
556 let treated_idx: Vec<usize> = (0..n).filter(|&i| treatment[i] > 0.5).collect();
557 let control_idx: Vec<usize> = (0..n).filter(|&i| treatment[i] <= 0.5).collect();
558
559 if treated_idx.is_empty() || control_idx.is_empty() {
560 return Err(StatsError::InsufficientData(
561 "Need both treated and control units".to_owned(),
562 ));
563 }
564
565 let variances = column_variances(match_features.view());
567
568 let mut att_contributions = Vec::with_capacity(treated_idx.len());
569 let mut used_controls: std::collections::HashSet<usize> = std::collections::HashSet::new();
570
571 for &ti in &treated_idx {
572 let mut distances: Vec<(usize, f64)> = control_idx
574 .iter()
575 .filter(|&&ci| self.with_replacement || !used_controls.contains(&ci))
576 .map(|&ci| {
577 let d = mahalanobis_dist(
578 match_features.row(ti),
579 match_features.row(ci),
580 &variances,
581 );
582 (ci, d)
583 })
584 .collect();
585 distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
586 let k_matches = self.k.min(distances.len());
587 if k_matches == 0 {
588 continue;
589 }
590 let matched_y: f64 = distances[..k_matches]
591 .iter()
592 .map(|&(ci, _)| outcome[ci])
593 .sum::<f64>()
594 / k_matches as f64;
595 att_contributions.push(outcome[ti] - matched_y);
596 if !self.with_replacement {
597 for &(ci, _) in &distances[..k_matches] {
598 used_controls.insert(ci);
599 }
600 }
601 }
602
603 if att_contributions.is_empty() {
604 return Err(StatsError::InsufficientData(
605 "No matched pairs found".to_owned(),
606 ));
607 }
608
609 let att = att_contributions.iter().sum::<f64>() / att_contributions.len() as f64;
610 let var = att_contributions
611 .iter()
612 .map(|&d| (d - att).powi(2))
613 .sum::<f64>()
614 / (att_contributions.len().saturating_sub(1).max(1) as f64);
615 let se = (var / att_contributions.len() as f64).sqrt();
616
617 Ok(EstimationResult::new(
618 att,
619 se,
620 "ATT",
621 if self.use_propensity_score {
622 "PS Matching (NN)"
623 } else {
624 "Covariate Matching (NN)"
625 },
626 n,
627 ))
628 }
629}
630
631fn column_variances(x: ArrayView2<f64>) -> Array1<f64> {
632 let (n, p) = x.dim();
633 let mut vars = Array1::<f64>::zeros(p);
634 for j in 0..p {
635 let col = x.column(j);
636 let mean = col.mean().unwrap_or(0.0);
637 let v = col.iter().map(|&xi| (xi - mean).powi(2)).sum::<f64>() / (n as f64);
638 vars[j] = v.max(1e-10);
639 }
640 vars
641}
642
643fn mahalanobis_dist(
644 a: scirs2_core::ndarray::ArrayView1<f64>,
645 b: scirs2_core::ndarray::ArrayView1<f64>,
646 variances: &Array1<f64>,
647) -> f64 {
648 a.iter()
649 .zip(b.iter())
650 .zip(variances.iter())
651 .map(|((&ai, &bi), &vi)| (ai - bi).powi(2) / vi)
652 .sum::<f64>()
653 .sqrt()
654}
655
656pub struct RegressionDiscontinuity {
662 pub cutoff: f64,
664 pub bandwidth: f64,
666 pub local_linear: bool,
668 pub fuzzy: bool,
670}
671
672impl Default for RegressionDiscontinuity {
673 fn default() -> Self {
674 Self {
675 cutoff: 0.0,
676 bandwidth: 1.0,
677 local_linear: true,
678 fuzzy: false,
679 }
680 }
681}
682
683impl RegressionDiscontinuity {
684 pub fn estimate(
691 &self,
692 running: ArrayView1<f64>,
693 outcome: ArrayView1<f64>,
694 treatment: Option<ArrayView1<f64>>,
695 ) -> StatsResult<EstimationResult> {
696 let n = running.len();
697 if outcome.len() != n {
698 return Err(StatsError::DimensionMismatch(
699 "running and outcome must have equal length".to_owned(),
700 ));
701 }
702
703 let in_window: Vec<usize> = (0..n)
705 .filter(|&i| (running[i] - self.cutoff).abs() <= self.bandwidth)
706 .collect();
707
708 if in_window.len() < 4 {
709 return Err(StatsError::InsufficientData(
710 "Too few observations within the bandwidth window".to_owned(),
711 ));
712 }
713
714 let above: Vec<usize> = in_window
715 .iter()
716 .copied()
717 .filter(|&i| running[i] >= self.cutoff)
718 .collect();
719 let below: Vec<usize> = in_window
720 .iter()
721 .copied()
722 .filter(|&i| running[i] < self.cutoff)
723 .collect();
724
725 let (tau_above, _se_above) = local_linear_fit(&running, &outcome, &above, self.cutoff)?;
727 let (tau_below, _se_below) = local_linear_fit(&running, &outcome, &below, self.cutoff)?;
728
729 let reduced_form = tau_above - tau_below;
730
731 let (estimate, estimand) = if self.fuzzy {
732 let treat = treatment.ok_or_else(|| {
734 StatsError::InvalidArgument("Treatment vector required for fuzzy RD".to_owned())
735 })?;
736 let (t_above, _) = local_linear_fit(&running, &treat, &above, self.cutoff)?;
737 let (t_below, _) = local_linear_fit(&running, &treat, &below, self.cutoff)?;
738 let first_stage = t_above - t_below;
739 if first_stage.abs() < 1e-8 {
740 return Err(StatsError::ComputationError(
741 "Weak first stage in fuzzy RD".to_owned(),
742 ));
743 }
744 (reduced_form / first_stage, "LATE (Fuzzy RD)")
745 } else {
746 (reduced_form, "ATE (Sharp RD)")
747 };
748
749 let se = rdd_se(&running, &outcome, &in_window, self.cutoff, estimate);
751
752 let mut res = EstimationResult::new(estimate, se, estimand, "RDD", n);
753 res.diagnostics
754 .insert("n_above".to_string(), above.len() as f64);
755 res.diagnostics
756 .insert("n_below".to_string(), below.len() as f64);
757 res.diagnostics
758 .insert("bandwidth".to_string(), self.bandwidth);
759 Ok(res)
760 }
761
762 pub fn ik_bandwidth(running: ArrayView1<f64>, outcome: ArrayView1<f64>, cutoff: f64) -> f64 {
764 let n = running.len() as f64;
765 let mean = running.mean().unwrap_or(cutoff);
767 let var = running.iter().map(|&r| (r - mean).powi(2)).sum::<f64>() / n;
768 let sigma_x = var.sqrt();
769 let _ = outcome; sigma_x * n.powf(-0.2)
771 }
772}
773
774fn local_linear_fit(
775 running: &ArrayView1<f64>,
776 outcome: &ArrayView1<f64>,
777 indices: &[usize],
778 cutoff: f64,
779) -> StatsResult<(f64, f64)> {
780 let n = indices.len();
781 if n < 2 {
782 return Err(StatsError::InsufficientData(
783 "Need at least 2 observations for local linear fit".to_owned(),
784 ));
785 }
786 let x_c: Vec<f64> = indices.iter().map(|&i| running[*&i] - cutoff).collect();
788 let y: Vec<f64> = indices.iter().map(|&i| outcome[*&i]).collect();
789
790 let mut s0 = 0.0_f64;
792 let mut s1 = 0.0_f64;
793 let mut s2 = 0.0_f64;
794 let mut t0 = 0.0_f64;
795 let mut t1 = 0.0_f64;
796 for k in 0..n {
797 s0 += 1.0;
798 s1 += x_c[k];
799 s2 += x_c[k].powi(2);
800 t0 += y[k];
801 t1 += x_c[k] * y[k];
802 }
803 let det = s0 * s2 - s1 * s1;
804 if det.abs() < 1e-12 {
805 return Err(StatsError::ComputationError(
806 "Degenerate local linear design matrix".to_owned(),
807 ));
808 }
809 let intercept = (s2 * t0 - s1 * t1) / det;
810 let slope = (s0 * t1 - s1 * t0) / det;
811 let residuals: Vec<f64> = (0..n).map(|k| y[k] - intercept - slope * x_c[k]).collect();
813 let sigma2 = residuals.iter().map(|r| r * r).sum::<f64>() / (n.saturating_sub(2).max(1) as f64);
814 let se = (sigma2 * s2 / det.max(f64::EPSILON)).sqrt();
815 Ok((intercept, se))
816}
817
818fn rdd_se(
819 running: &ArrayView1<f64>,
820 _outcome: &ArrayView1<f64>,
821 in_window: &[usize],
822 _cutoff: f64,
823 _estimate: f64,
824) -> f64 {
825 let nw = in_window.len() as f64;
827 let spread = running
828 .iter()
829 .filter(|&&r| in_window.iter().any(|&i| (running[i] - r).abs() < 1e-12))
830 .map(|&r| r)
831 .collect::<Vec<_>>();
832 let var_r = spread.iter().map(|&r| r * r).sum::<f64>() / nw.max(1.0);
833 (var_r / nw).sqrt()
834}
835
836pub struct SyntheticControlEstimator {
845 pub max_iter: usize,
847 pub tol: f64,
849}
850
851impl Default for SyntheticControlEstimator {
852 fn default() -> Self {
853 Self {
854 max_iter: 1000,
855 tol: 1e-7,
856 }
857 }
858}
859
860#[derive(Debug, Clone)]
862pub struct SyntheticControlResult {
863 pub weights: Array1<f64>,
865 pub att_series: Array1<f64>,
867 pub att_mean: f64,
869 pub pre_rmspe: f64,
871 pub post_rmspe: f64,
873}
874
875impl SyntheticControlEstimator {
876 pub fn estimate(
884 &self,
885 treated_pre: ArrayView1<f64>,
886 donors_pre: ArrayView2<f64>,
887 treated_post: ArrayView1<f64>,
888 donors_post: ArrayView2<f64>,
889 ) -> StatsResult<SyntheticControlResult> {
890 let t0 = treated_pre.len();
891 let j = donors_pre.ncols();
892 if donors_pre.nrows() != t0 {
893 return Err(StatsError::DimensionMismatch(
894 "donors_pre rows must match treated_pre length".to_owned(),
895 ));
896 }
897 let t1 = treated_post.len();
898 if donors_post.nrows() != t1 || donors_post.ncols() != j {
899 return Err(StatsError::DimensionMismatch(
900 "donors_post dimensions inconsistent".to_owned(),
901 ));
902 }
903 if j == 0 {
904 return Err(StatsError::InsufficientData(
905 "Need at least one donor unit".to_owned(),
906 ));
907 }
908
909 let weights = self.fit_weights(treated_pre, donors_pre)?;
912
913 let pre_synth: Array1<f64> = (0..t0)
915 .map(|t| (0..j).map(|k| donors_pre[[t, k]] * weights[k]).sum::<f64>())
916 .collect();
917 let post_synth: Array1<f64> = (0..t1)
918 .map(|t| {
919 (0..j)
920 .map(|k| donors_post[[t, k]] * weights[k])
921 .sum::<f64>()
922 })
923 .collect();
924
925 let pre_rmspe = (pre_synth
926 .iter()
927 .zip(treated_pre.iter())
928 .map(|(&s, &y)| (s - y).powi(2))
929 .sum::<f64>()
930 / t0 as f64)
931 .sqrt();
932
933 let att_series: Array1<f64> = treated_post
934 .iter()
935 .zip(post_synth.iter())
936 .map(|(&y, &s)| y - s)
937 .collect();
938 let att_mean = att_series.mean().unwrap_or(0.0);
939 let post_rmspe = (att_series.iter().map(|&d| d.powi(2)).sum::<f64>() / t1 as f64).sqrt();
940
941 Ok(SyntheticControlResult {
942 weights,
943 att_series,
944 att_mean,
945 pre_rmspe,
946 post_rmspe,
947 })
948 }
949
950 fn fit_weights(
951 &self,
952 target: ArrayView1<f64>,
953 donors: ArrayView2<f64>,
954 ) -> StatsResult<Array1<f64>> {
955 let (t0, j) = donors.dim();
956 let mut w = Array1::<f64>::from_elem(j, 1.0 / j as f64);
957
958 for _iter in 0..self.max_iter {
959 let synth: Array1<f64> = (0..t0)
961 .map(|t| (0..j).map(|k| donors[[t, k]] * w[k]).sum::<f64>())
962 .collect();
963 let residual: Array1<f64> = (0..t0).map(|t| target[t] - synth[t]).collect();
964 let mut grad = Array1::<f64>::zeros(j);
965 for k in 0..j {
966 for t in 0..t0 {
967 grad[k] -= 2.0 * donors[[t, k]] * residual[t];
968 }
969 }
970
971 let lr = 0.01;
973 let mut w_new = Array1::<f64>::zeros(j);
974 for k in 0..j {
975 w_new[k] = (w[k] - lr * grad[k]).max(0.0);
976 }
977 let s = w_new.sum();
979 if s > 1e-10 {
980 w_new.mapv_inplace(|x| x / s);
981 } else {
982 w_new.fill(1.0 / j as f64);
983 }
984 let diff: f64 = w_new
986 .iter()
987 .zip(w.iter())
988 .map(|(&a, &b)| (a - b).powi(2))
989 .sum::<f64>()
990 .sqrt();
991 w = w_new;
992 if diff < self.tol {
993 break;
994 }
995 }
996 Ok(w)
997 }
998}
999
1000pub struct DifferenceInDifferences {
1006 pub n_pre_periods: usize,
1008}
1009
1010impl Default for DifferenceInDifferences {
1011 fn default() -> Self {
1012 Self { n_pre_periods: 2 }
1013 }
1014}
1015
1016#[derive(Debug, Clone)]
1018pub struct DiDResult {
1019 pub att: f64,
1021 pub std_error: f64,
1023 pub conf_interval: [f64; 2],
1025 pub p_value: f64,
1027 pub parallel_trends_p: f64,
1029 pub parallel_trends_ok: bool,
1031 pub n_treated: usize,
1033 pub n_control: usize,
1035}
1036
1037impl DifferenceInDifferences {
1038 pub fn estimate(
1045 &self,
1046 outcomes_pre: ArrayView2<f64>,
1047 outcomes_post: ArrayView2<f64>,
1048 treatment: ArrayView1<f64>,
1049 ) -> StatsResult<DiDResult> {
1050 let n = treatment.len();
1051 if outcomes_pre.nrows() != n || outcomes_post.nrows() != n {
1052 return Err(StatsError::DimensionMismatch(
1053 "outcome arrays must have n_units rows".to_owned(),
1054 ));
1055 }
1056
1057 let n_post = outcomes_post.ncols();
1058 let n_pre = outcomes_pre.ncols();
1059
1060 let treated_idx: Vec<usize> = (0..n).filter(|&i| treatment[i] > 0.5).collect();
1061 let control_idx: Vec<usize> = (0..n).filter(|&i| treatment[i] <= 0.5).collect();
1062
1063 if treated_idx.is_empty() || control_idx.is_empty() {
1064 return Err(StatsError::InsufficientData(
1065 "Need both treated and control units".to_owned(),
1066 ));
1067 }
1068
1069 let mean_pre_treated = group_mean(&outcomes_pre, &treated_idx);
1071 let mean_pre_control = group_mean(&outcomes_pre, &control_idx);
1072 let mean_post_treated = group_mean(&outcomes_post, &treated_idx);
1073 let mean_post_control = group_mean(&outcomes_post, &control_idx);
1074
1075 let diff_treated = mean_post_treated - mean_pre_treated;
1077 let diff_control = mean_post_control - mean_pre_control;
1078 let att = diff_treated - diff_control;
1079
1080 let se = did_bootstrap_se(
1082 &outcomes_pre,
1083 &outcomes_post,
1084 &treatment,
1085 &treated_idx,
1086 &control_idx,
1087 100,
1088 );
1089
1090 let z = att / se.max(f64::EPSILON);
1091 let p_value = two_sided_p(z);
1092 let margin = 1.959_964 * se;
1093
1094 let parallel_trends_p =
1096 parallel_trends_test(&outcomes_pre, &treatment, &treated_idx, &control_idx);
1097
1098 Ok(DiDResult {
1099 att,
1100 std_error: se,
1101 conf_interval: [att - margin, att + margin],
1102 p_value,
1103 parallel_trends_p,
1104 parallel_trends_ok: parallel_trends_p > 0.05,
1105 n_treated: treated_idx.len(),
1106 n_control: control_idx.len(),
1107 })
1108 }
1109}
1110
1111fn group_mean(outcomes: &ArrayView2<f64>, indices: &[usize]) -> f64 {
1112 if indices.is_empty() {
1113 return 0.0;
1114 }
1115 let total: f64 = indices
1116 .iter()
1117 .flat_map(|&i| outcomes.row(i).iter().copied().collect::<Vec<_>>())
1118 .sum();
1119 total / (indices.len() * outcomes.ncols()) as f64
1120}
1121
1122fn did_bootstrap_se(
1123 pre: &ArrayView2<f64>,
1124 post: &ArrayView2<f64>,
1125 treatment: &ArrayView1<f64>,
1126 _treated: &[usize],
1127 _control: &[usize],
1128 n_boot: usize,
1129) -> f64 {
1130 let n = treatment.len();
1131 let mut ests = Vec::with_capacity(n_boot);
1132 let mut rng: u64 = 99991;
1133 for _ in 0..n_boot {
1134 let mut t_pre = 0.0_f64;
1135 let mut t_post = 0.0_f64;
1136 let mut c_pre = 0.0_f64;
1137 let mut c_post = 0.0_f64;
1138 let mut nt = 0.0_f64;
1139 let mut nc = 0.0_f64;
1140 for _ in 0..n {
1141 rng = rng.wrapping_mul(6364136223846793005).wrapping_add(1);
1142 let idx = (rng >> 33) as usize % n;
1143 let ti = treatment[idx];
1144 let pre_mean = pre.row(idx).mean().unwrap_or(0.0);
1145 let post_mean = post.row(idx).mean().unwrap_or(0.0);
1146 if ti > 0.5 {
1147 t_pre += pre_mean;
1148 t_post += post_mean;
1149 nt += 1.0;
1150 } else {
1151 c_pre += pre_mean;
1152 c_post += post_mean;
1153 nc += 1.0;
1154 }
1155 }
1156 if nt > 0.0 && nc > 0.0 {
1157 let att_b = (t_post / nt - t_pre / nt) - (c_post / nc - c_pre / nc);
1158 ests.push(att_b);
1159 }
1160 }
1161 if ests.is_empty() {
1162 return 0.0;
1163 }
1164 let mean = ests.iter().sum::<f64>() / ests.len() as f64;
1165 let var =
1166 ests.iter().map(|&e| (e - mean).powi(2)).sum::<f64>() / (ests.len() - 1).max(1) as f64;
1167 var.sqrt()
1168}
1169
1170fn parallel_trends_test(
1171 outcomes_pre: &ArrayView2<f64>,
1172 treatment: &ArrayView1<f64>,
1173 treated_idx: &[usize],
1174 control_idx: &[usize],
1175) -> f64 {
1176 let n_pre = outcomes_pre.ncols();
1179 if n_pre < 2 {
1180 return 1.0; }
1182
1183 let mut treated_changes = Vec::new();
1185 let mut control_changes = Vec::new();
1186 for t in 1..n_pre {
1187 let tc: f64 = treated_idx
1188 .iter()
1189 .map(|&i| outcomes_pre[[i, t]] - outcomes_pre[[i, t - 1]])
1190 .sum::<f64>()
1191 / treated_idx.len().max(1) as f64;
1192 let cc: f64 = control_idx
1193 .iter()
1194 .map(|&i| outcomes_pre[[i, t]] - outcomes_pre[[i, t - 1]])
1195 .sum::<f64>()
1196 / control_idx.len().max(1) as f64;
1197 treated_changes.push(tc);
1198 control_changes.push(cc);
1199 }
1200
1201 let n_t = treated_changes.len();
1203 let n_c = control_changes.len();
1204 let mu_t = treated_changes.iter().sum::<f64>() / n_t as f64;
1205 let mu_c = control_changes.iter().sum::<f64>() / n_c as f64;
1206 let var_t = treated_changes
1207 .iter()
1208 .map(|&x| (x - mu_t).powi(2))
1209 .sum::<f64>()
1210 / n_t.saturating_sub(1).max(1) as f64;
1211 let var_c = control_changes
1212 .iter()
1213 .map(|&x| (x - mu_c).powi(2))
1214 .sum::<f64>()
1215 / n_c.saturating_sub(1).max(1) as f64;
1216 let se = (var_t / n_t as f64 + var_c / n_c as f64).sqrt();
1217 if se < f64::EPSILON {
1218 return 1.0;
1219 }
1220 let t_stat = (mu_t - mu_c) / se;
1221 two_sided_p(t_stat)
1222}
1223
1224#[cfg(test)]
1229mod tests {
1230 use super::*;
1231 use scirs2_core::ndarray::array;
1232
1233 #[test]
1234 fn test_ipw_estimator_simple() {
1235 let cov = Array2::<f64>::from_shape_fn((20, 2), |(i, j)| {
1237 if j == 0 {
1238 if i < 10 {
1239 1.0
1240 } else {
1241 0.0
1242 }
1243 } else {
1244 (i as f64).sin()
1245 }
1246 });
1247 let treat = Array1::from_iter((0..20).map(|i| if i < 10 { 1.0 } else { 0.0 }));
1248 let outcome = Array1::from_iter((0..20).map(|i| if i < 10 { 2.0 } else { 0.0 }));
1249 let est = IPWEstimator::default();
1250 let res = est
1251 .estimate(cov.view(), treat.view(), outcome.view())
1252 .unwrap();
1253 assert!((res.estimate - 2.0).abs() < 1.0, "ATE={}", res.estimate);
1255 }
1256
1257 #[test]
1258 fn test_doubly_robust() {
1259 let n = 30;
1260 let cov = Array2::<f64>::from_shape_fn((n, 1), |(i, _)| i as f64 / n as f64);
1261 let treat = Array1::from_iter((0..n).map(|i| if i < n / 2 { 1.0 } else { 0.0 }));
1262 let outcome = Array1::from_iter((0..n).map(|i| {
1263 if i < n / 2 {
1264 1.0 + i as f64 * 0.01
1265 } else {
1266 i as f64 * 0.01
1267 }
1268 }));
1269 let est = DoublyRobustEstimator::default();
1270 let res = est
1271 .estimate(cov.view(), treat.view(), outcome.view())
1272 .unwrap();
1273 assert!(res.estimate.is_finite());
1274 }
1275
1276 #[test]
1277 fn test_nn_matching() {
1278 let n = 20;
1279 let cov = Array2::<f64>::from_shape_fn((n, 1), |(i, _)| i as f64);
1280 let treat = Array1::from_iter((0..n).map(|i| if i % 2 == 0 { 1.0 } else { 0.0 }));
1281 let outcome = Array1::from_iter((0..n).map(|i| if i % 2 == 0 { 2.0 } else { 0.0 }));
1282 let est = NearestNeighborMatching::default();
1283 let res = est
1284 .estimate(cov.view(), treat.view(), outcome.view())
1285 .unwrap();
1286 assert!((res.estimate - 2.0).abs() < 0.5, "ATT={}", res.estimate);
1288 }
1289
1290 #[test]
1291 fn test_rdd_sharp() {
1292 let n = 40;
1294 let running = Array1::from_iter((0..n).map(|i| -2.0 + i as f64 * 4.0 / n as f64));
1295 let outcome = Array1::from_iter((0..n).map(|i| {
1296 if running[i] >= 0.0 {
1297 3.0 + running[i] * 0.5
1298 } else {
1299 running[i] * 0.5
1300 }
1301 }));
1302 let rdd = RegressionDiscontinuity {
1303 cutoff: 0.0,
1304 bandwidth: 1.5,
1305 local_linear: true,
1306 fuzzy: false,
1307 };
1308 let res = rdd.estimate(running.view(), outcome.view(), None).unwrap();
1309 assert!(
1311 (res.estimate - 3.0).abs() < 1.0,
1312 "RDD estimate={}",
1313 res.estimate
1314 );
1315 }
1316
1317 #[test]
1318 fn test_synthetic_control() {
1319 let treated_pre = array![1.0, 2.0, 3.0, 4.0, 5.0];
1320 let donors_pre =
1321 Array2::from_shape_fn((5, 3), |(t, j)| (t + 1) as f64 * [1.1, 0.9, 1.0][j]);
1322 let treated_post = array![8.0, 9.0];
1323 let donors_post =
1324 Array2::from_shape_fn((2, 3), |(t, j)| (t + 6) as f64 * [1.1, 0.9, 1.0][j]);
1325 let est = SyntheticControlEstimator::default();
1326 let res = est
1327 .estimate(
1328 treated_pre.view(),
1329 donors_pre.view(),
1330 treated_post.view(),
1331 donors_post.view(),
1332 )
1333 .unwrap();
1334 assert!((res.weights.sum() - 1.0).abs() < 1e-5);
1335 assert!(res.att_series.len() == 2);
1336 }
1337
1338 #[test]
1339 fn test_did() {
1340 let n = 20;
1342 let pre = Array2::from_shape_fn((n, 3), |(i, t)| (i as f64 * 0.1) + t as f64 * 0.5);
1343 let post = Array2::from_shape_fn((n, 2), |(i, t)| {
1344 let base = (i as f64 * 0.1) + 1.5 + t as f64 * 0.5;
1345 if i < 10 {
1346 base + 2.0
1347 } else {
1348 base
1349 }
1350 });
1351 let treat = Array1::from_iter((0..n).map(|i| if i < 10 { 1.0 } else { 0.0 }));
1352 let did = DifferenceInDifferences { n_pre_periods: 3 };
1353 let res = did.estimate(pre.view(), post.view(), treat.view()).unwrap();
1354 assert!((res.att - 2.0).abs() < 0.3, "DiD ATT={}", res.att);
1355 }
1356}