greeners/bootstrap.rs
1use crate::error::GreenersError;
2use ndarray::{Array1, Array2};
3use ndarray_linalg::Inverse;
4use rand::seq::SliceRandom;
5use rand::thread_rng;
6
7/// Bootstrap methods for statistical inference
8pub struct Bootstrap;
9
10impl Bootstrap {
11 /// Pairs bootstrap for OLS regression
12 ///
13 /// Resamples (y, X) pairs with replacement to estimate sampling distribution
14 ///
15 /// # Arguments
16 /// * `y` - Dependent variable (n × 1)
17 /// * `x` - Design matrix (n × k)
18 /// * `n_bootstrap` - Number of bootstrap replications (recommended: 1000-10000)
19 ///
20 /// # Returns
21 /// Array of bootstrap coefficient estimates (n_bootstrap × k)
22 ///
23 /// # Examples
24 ///
25 /// ```rust
26 /// use greeners::Bootstrap;
27 /// use ndarray::{Array1, Array2};
28 ///
29 /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
30 /// let y = Array1::from(vec![1.0, 2.0, 3.0, 4.0, 5.0]);
31 /// let x = Array2::from_shape_vec((5, 2), vec![
32 /// 1.0, 1.0,
33 /// 1.0, 2.0,
34 /// 1.0, 3.0,
35 /// 1.0, 4.0,
36 /// 1.0, 5.0
37 /// ])?;
38 ///
39 /// let boot_coefs = Bootstrap::pairs_bootstrap(&y, &x, 100)?;
40 /// // ...
41 /// # Ok(())
42 /// # }
43 /// ```
44 pub fn pairs_bootstrap(
45 y: &Array1<f64>,
46 x: &Array2<f64>,
47 n_bootstrap: usize,
48 ) -> Result<Array2<f64>, GreenersError> {
49 let n = y.len();
50 let k = x.ncols();
51
52 if x.nrows() != n {
53 return Err(GreenersError::ShapeMismatch(
54 "X and y must have same number of rows".to_string(),
55 ));
56 }
57
58 let mut rng = thread_rng();
59 let indices: Vec<usize> = (0..n).collect();
60
61 // Store bootstrap coefficients
62 let mut boot_coefs = Array2::<f64>::zeros((n_bootstrap, k));
63
64 for b in 0..n_bootstrap {
65 // Resample indices with replacement
66 let mut boot_indices = vec![0; n];
67 // for i in 0..n {
68 for indice in boot_indices.iter_mut().take(n) {
69 *indice = *indices.choose(&mut rng).unwrap();
70 }
71
72 // Create bootstrap sample
73 let mut y_boot = Array1::<f64>::zeros(n);
74 let mut x_boot = Array2::<f64>::zeros((n, k));
75
76 for (i, &idx) in boot_indices.iter().enumerate() {
77 y_boot[i] = y[idx];
78 x_boot.row_mut(i).assign(&x.row(idx));
79 }
80
81 // Fit OLS on bootstrap sample
82 let xt_x = x_boot.t().dot(&x_boot);
83 let xt_y = x_boot.t().dot(&y_boot);
84
85 match xt_x.inv() {
86 Ok(xt_x_inv) => {
87 let beta_boot = xt_x_inv.dot(&xt_y);
88 boot_coefs.row_mut(b).assign(&beta_boot);
89 }
90 Err(_) => {
91 // Singular matrix in this bootstrap sample - use original estimate
92 // This is rare but can happen with small samples
93 let xt_x_orig = x.t().dot(x);
94 let xt_y_orig = x.t().dot(y);
95 if let Ok(inv) = xt_x_orig.inv() {
96 let beta_orig = inv.dot(&xt_y_orig);
97 boot_coefs.row_mut(b).assign(&beta_orig);
98 }
99 }
100 }
101 }
102
103 Ok(boot_coefs)
104 }
105
106 /// Calculate bootstrap standard errors from bootstrap coefficient matrix
107 ///
108 /// # Arguments
109 /// * `boot_coefs` - Bootstrap coefficient matrix (n_bootstrap × k)
110 ///
111 /// # Returns
112 /// Standard errors for each coefficient
113 pub fn bootstrap_se(boot_coefs: &Array2<f64>) -> Array1<f64> {
114 boot_coefs.std_axis(ndarray::Axis(0), 0.0)
115 }
116
117 /// Calculate bootstrap percentile confidence intervals
118 ///
119 /// # Arguments
120 /// * `boot_coefs` - Bootstrap coefficient matrix (n_bootstrap × k)
121 /// * `alpha` - Significance level (e.g., 0.05 for 95% CI)
122 ///
123 /// # Returns
124 /// Tuple of (lower_bounds, upper_bounds)
125 pub fn percentile_ci(boot_coefs: &Array2<f64>, alpha: f64) -> (Array1<f64>, Array1<f64>) {
126 let k = boot_coefs.ncols();
127 let n_boot = boot_coefs.nrows();
128
129 let lower_idx = ((alpha / 2.0) * n_boot as f64).floor() as usize;
130 let upper_idx = ((1.0 - alpha / 2.0) * n_boot as f64).ceil() as usize;
131
132 let mut lower = Array1::<f64>::zeros(k);
133 let mut upper = Array1::<f64>::zeros(k);
134
135 for j in 0..k {
136 let mut col: Vec<f64> = boot_coefs.column(j).to_vec();
137 col.sort_by(|a, b| a.partial_cmp(b).unwrap());
138
139 lower[j] = col[lower_idx.min(n_boot - 1)];
140 upper[j] = col[upper_idx.min(n_boot - 1)];
141 }
142
143 (lower, upper)
144 }
145}
146
147/// Hypothesis testing methods
148pub struct HypothesisTest;
149
150impl HypothesisTest {
151 /// Wald test for linear restrictions on coefficients
152 ///
153 /// Tests H₀: R·β = q against H₁: R·β ≠ q
154 ///
155 /// # Arguments
156 /// * `beta` - Coefficient estimates (k × 1)
157 /// * `cov_matrix` - Variance-covariance matrix (k × k)
158 /// * `r` - Restriction matrix (m × k) where m = number of restrictions
159 /// * `q` - Restriction values (m × 1), usually zeros
160 ///
161 /// # Returns
162 /// Tuple of (wald_statistic, p_value, degrees_of_freedom)
163 ///
164 /// # Examples
165 ///
166 /// ```rust
167 /// use greeners::HypothesisTest;
168 /// use ndarray::{Array1, Array2};
169 ///
170 /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
171 /// // Setup de dados falsos para o exemplo
172 /// let beta = Array1::from(vec![0.5, 2.0, 0.0]);
173 /// let cov_matrix = Array2::eye(3); // Matriz identidade como exemplo
174 ///
175 /// // Testando se beta[1] = 0 e beta[2] = 0
176 /// let r = Array2::from_shape_vec((2, 3), vec![
177 /// 0.0, 1.0, 0.0,
178 /// 0.0, 0.0, 1.0,
179 /// ])?;
180 /// let q = Array1::from(vec![0.0, 0.0]);
181 ///
182 /// let (wald_stat, p_value, df) = HypothesisTest::wald_test(&beta, &cov_matrix, &r, &q)?;
183 /// # Ok(())
184 /// # }
185 /// ```
186 pub fn wald_test(
187 beta: &Array1<f64>,
188 cov_matrix: &Array2<f64>,
189 r: &Array2<f64>,
190 q: &Array1<f64>,
191 ) -> Result<(f64, f64, usize), GreenersError> {
192 use statrs::distribution::{ChiSquared, ContinuousCDF};
193
194 let m = r.nrows(); // Number of restrictions
195
196 // Compute R·β - q
197 let r_beta = r.dot(beta);
198 let diff = &r_beta - q;
199
200 // Compute R·Cov(β)·R'
201 let r_cov = r.dot(cov_matrix);
202 let r_cov_rt = r_cov.dot(&r.t());
203
204 // Invert R·Cov(β)·R'
205 let r_cov_rt_inv = r_cov_rt.inv()?;
206
207 // Wald statistic: (R·β - q)' · [R·Cov(β)·R']^(-1) · (R·β - q)
208 let wald_stat = diff.dot(&r_cov_rt_inv.dot(&diff));
209
210 // Under H₀, Wald ~ χ²(m)
211 let chi2_dist = ChiSquared::new(m as f64).map_err(|_| GreenersError::OptimizationFailed)?;
212 let p_value = 1.0 - chi2_dist.cdf(wald_stat);
213
214 Ok((wald_stat, p_value, m))
215 }
216
217 /// F-test for nested models (OLS specific)
218 ///
219 /// Tests whether restricted model is adequate vs full model
220 ///
221 /// # Arguments
222 /// * `ssr_restricted` - Sum of squared residuals from restricted model
223 /// * `ssr_full` - Sum of squared residuals from full model
224 /// * `n` - Number of observations
225 /// * `k_full` - Number of parameters in full model
226 /// * `k_restricted` - Number of parameters in restricted model
227 ///
228 /// # Returns
229 /// Tuple of (f_statistic, p_value, df_numerator, df_denominator)
230 ///
231 /// # Formula
232 /// F = [(SSR_r - SSR_f) / (k_f - k_r)] / [SSR_f / (n - k_f)]
233 pub fn f_test_nested(
234 ssr_restricted: f64,
235 ssr_full: f64,
236 n: usize,
237 k_full: usize,
238 k_restricted: usize,
239 ) -> Result<(f64, f64, usize, usize), GreenersError> {
240 use statrs::distribution::{ContinuousCDF, FisherSnedecor};
241
242 let df_num = k_full - k_restricted;
243 let df_denom = n - k_full;
244
245 if df_num == 0 {
246 return Err(GreenersError::ShapeMismatch(
247 "Models have same number of parameters".to_string(),
248 ));
249 }
250
251 // F-statistic
252 let f_stat = ((ssr_restricted - ssr_full) / df_num as f64) / (ssr_full / df_denom as f64);
253
254 // p-value from F distribution
255 let f_dist = FisherSnedecor::new(df_num as f64, df_denom as f64)
256 .map_err(|_| GreenersError::OptimizationFailed)?;
257 let p_value = 1.0 - f_dist.cdf(f_stat);
258
259 Ok((f_stat, p_value, df_num, df_denom))
260 }
261
262 /// Joint significance test (all coefficients except intercept = 0)
263 ///
264 /// Convenience wrapper for Wald test of all slope coefficients
265 ///
266 /// # Arguments
267 /// * `beta` - Coefficient estimates (including intercept)
268 /// * `cov_matrix` - Variance-covariance matrix
269 /// * `has_intercept` - Whether first coefficient is intercept
270 ///
271 /// # Returns
272 /// Tuple of (test_statistic, p_value, degrees_of_freedom)
273 pub fn joint_significance(
274 beta: &Array1<f64>,
275 cov_matrix: &Array2<f64>,
276 has_intercept: bool,
277 ) -> Result<(f64, f64, usize), GreenersError> {
278 let k = beta.len();
279 let start_idx = if has_intercept { 1 } else { 0 };
280 let m = k - start_idx;
281
282 if m == 0 {
283 return Err(GreenersError::ShapeMismatch(
284 "No slope coefficients to test".to_string(),
285 ));
286 }
287
288 // Build restriction matrix: test all slope coefficients = 0
289 let mut r = Array2::<f64>::zeros((m, k));
290 for i in 0..m {
291 r[[i, start_idx + i]] = 1.0;
292 }
293
294 let q = Array1::<f64>::zeros(m);
295
296 Self::wald_test(beta, cov_matrix, &r, &q)
297 }
298}