Skip to main content

greeners/
bootstrap.rs

1use crate::error::GreenersError;
2use ndarray::{Array1, Array2};
3use ndarray_linalg::Inverse;
4use rand::seq::SliceRandom;
5use rand::thread_rng;
6
7/// Bootstrap methods for statistical inference
8pub struct Bootstrap;
9
10impl Bootstrap {
11    /// Pairs bootstrap for OLS regression
12    ///
13    /// Resamples (y, X) pairs with replacement to estimate sampling distribution
14    ///
15    /// # Arguments
16    /// * `y` - Dependent variable (n × 1)
17    /// * `x` - Design matrix (n × k)
18    /// * `n_bootstrap` - Number of bootstrap replications (recommended: 1000-10000)
19    ///
20    /// # Returns
21    /// Array of bootstrap coefficient estimates (n_bootstrap × k)
22    ///
23    /// # Examples
24    ///
25    /// ```rust
26    /// use greeners::Bootstrap;
27    /// use ndarray::{Array1, Array2};
28    ///
29    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
30    /// let y = Array1::from(vec![1.0, 2.0, 3.0, 4.0, 5.0]);
31    /// let x = Array2::from_shape_vec((5, 2), vec![
32    ///     1.0, 1.0,
33    ///     1.0, 2.0,
34    ///     1.0, 3.0,
35    ///     1.0, 4.0,
36    ///     1.0, 5.0
37    /// ])?;
38    ///
39    /// let boot_coefs = Bootstrap::pairs_bootstrap(&y, &x, 100)?;
40    /// // ...
41    /// # Ok(())
42    /// # }
43    /// ```
44    pub fn pairs_bootstrap(
45        y: &Array1<f64>,
46        x: &Array2<f64>,
47        n_bootstrap: usize,
48    ) -> Result<Array2<f64>, GreenersError> {
49        let n = y.len();
50        let k = x.ncols();
51
52        if x.nrows() != n {
53            return Err(GreenersError::ShapeMismatch(
54                "X and y must have same number of rows".to_string(),
55            ));
56        }
57
58        let mut rng = thread_rng();
59        let indices: Vec<usize> = (0..n).collect();
60
61        // Store bootstrap coefficients
62        let mut boot_coefs = Array2::<f64>::zeros((n_bootstrap, k));
63
64        for b in 0..n_bootstrap {
65            // Resample indices with replacement
66            let mut boot_indices = vec![0; n];
67            // for i in 0..n {
68            for indice in boot_indices.iter_mut().take(n) {
69                *indice = *indices.choose(&mut rng).unwrap();
70            }
71
72            // Create bootstrap sample
73            let mut y_boot = Array1::<f64>::zeros(n);
74            let mut x_boot = Array2::<f64>::zeros((n, k));
75
76            for (i, &idx) in boot_indices.iter().enumerate() {
77                y_boot[i] = y[idx];
78                x_boot.row_mut(i).assign(&x.row(idx));
79            }
80
81            // Fit OLS on bootstrap sample
82            let xt_x = x_boot.t().dot(&x_boot);
83            let xt_y = x_boot.t().dot(&y_boot);
84
85            match xt_x.inv() {
86                Ok(xt_x_inv) => {
87                    let beta_boot = xt_x_inv.dot(&xt_y);
88                    boot_coefs.row_mut(b).assign(&beta_boot);
89                }
90                Err(_) => {
91                    // Singular matrix in this bootstrap sample - use original estimate
92                    // This is rare but can happen with small samples
93                    let xt_x_orig = x.t().dot(x);
94                    let xt_y_orig = x.t().dot(y);
95                    if let Ok(inv) = xt_x_orig.inv() {
96                        let beta_orig = inv.dot(&xt_y_orig);
97                        boot_coefs.row_mut(b).assign(&beta_orig);
98                    }
99                }
100            }
101        }
102
103        Ok(boot_coefs)
104    }
105
106    /// Calculate bootstrap standard errors from bootstrap coefficient matrix
107    ///
108    /// # Arguments
109    /// * `boot_coefs` - Bootstrap coefficient matrix (n_bootstrap × k)
110    ///
111    /// # Returns
112    /// Standard errors for each coefficient
113    pub fn bootstrap_se(boot_coefs: &Array2<f64>) -> Array1<f64> {
114        boot_coefs.std_axis(ndarray::Axis(0), 0.0)
115    }
116
117    /// Calculate bootstrap percentile confidence intervals
118    ///
119    /// # Arguments
120    /// * `boot_coefs` - Bootstrap coefficient matrix (n_bootstrap × k)
121    /// * `alpha` - Significance level (e.g., 0.05 for 95% CI)
122    ///
123    /// # Returns
124    /// Tuple of (lower_bounds, upper_bounds)
125    pub fn percentile_ci(boot_coefs: &Array2<f64>, alpha: f64) -> (Array1<f64>, Array1<f64>) {
126        let k = boot_coefs.ncols();
127        let n_boot = boot_coefs.nrows();
128
129        let lower_idx = ((alpha / 2.0) * n_boot as f64).floor() as usize;
130        let upper_idx = ((1.0 - alpha / 2.0) * n_boot as f64).ceil() as usize;
131
132        let mut lower = Array1::<f64>::zeros(k);
133        let mut upper = Array1::<f64>::zeros(k);
134
135        for j in 0..k {
136            let mut col: Vec<f64> = boot_coefs.column(j).to_vec();
137            col.sort_by(|a, b| a.partial_cmp(b).unwrap());
138
139            lower[j] = col[lower_idx.min(n_boot - 1)];
140            upper[j] = col[upper_idx.min(n_boot - 1)];
141        }
142
143        (lower, upper)
144    }
145}
146
147/// Hypothesis testing methods
148pub struct HypothesisTest;
149
150impl HypothesisTest {
151    /// Wald test for linear restrictions on coefficients
152    ///
153    /// Tests H₀: R·β = q against H₁: R·β ≠ q
154    ///
155    /// # Arguments
156    /// * `beta` - Coefficient estimates (k × 1)
157    /// * `cov_matrix` - Variance-covariance matrix (k × k)
158    /// * `r` - Restriction matrix (m × k) where m = number of restrictions
159    /// * `q` - Restriction values (m × 1), usually zeros
160    ///
161    /// # Returns
162    /// Tuple of (wald_statistic, p_value, degrees_of_freedom)
163    ///
164    /// # Examples
165    ///
166    /// ```rust
167    /// use greeners::HypothesisTest;
168    /// use ndarray::{Array1, Array2};
169    ///
170    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
171    /// // Setup de dados falsos para o exemplo
172    /// let beta = Array1::from(vec![0.5, 2.0, 0.0]);
173    /// let cov_matrix = Array2::eye(3); // Matriz identidade como exemplo
174    ///
175    /// // Testando se beta[1] = 0 e beta[2] = 0
176    /// let r = Array2::from_shape_vec((2, 3), vec![
177    ///     0.0, 1.0, 0.0,
178    ///     0.0, 0.0, 1.0,
179    /// ])?;
180    /// let q = Array1::from(vec![0.0, 0.0]);
181    ///
182    /// let (wald_stat, p_value, df) = HypothesisTest::wald_test(&beta, &cov_matrix, &r, &q)?;
183    /// # Ok(())
184    /// # }
185    /// ```
186    pub fn wald_test(
187        beta: &Array1<f64>,
188        cov_matrix: &Array2<f64>,
189        r: &Array2<f64>,
190        q: &Array1<f64>,
191    ) -> Result<(f64, f64, usize), GreenersError> {
192        use statrs::distribution::{ChiSquared, ContinuousCDF};
193
194        let m = r.nrows(); // Number of restrictions
195
196        // Compute R·β - q
197        let r_beta = r.dot(beta);
198        let diff = &r_beta - q;
199
200        // Compute R·Cov(β)·R'
201        let r_cov = r.dot(cov_matrix);
202        let r_cov_rt = r_cov.dot(&r.t());
203
204        // Invert R·Cov(β)·R'
205        let r_cov_rt_inv = r_cov_rt.inv()?;
206
207        // Wald statistic: (R·β - q)' · [R·Cov(β)·R']^(-1) · (R·β - q)
208        let wald_stat = diff.dot(&r_cov_rt_inv.dot(&diff));
209
210        // Under H₀, Wald ~ χ²(m)
211        let chi2_dist = ChiSquared::new(m as f64).map_err(|_| GreenersError::OptimizationFailed)?;
212        let p_value = 1.0 - chi2_dist.cdf(wald_stat);
213
214        Ok((wald_stat, p_value, m))
215    }
216
217    /// F-test for nested models (OLS specific)
218    ///
219    /// Tests whether restricted model is adequate vs full model
220    ///
221    /// # Arguments
222    /// * `ssr_restricted` - Sum of squared residuals from restricted model
223    /// * `ssr_full` - Sum of squared residuals from full model
224    /// * `n` - Number of observations
225    /// * `k_full` - Number of parameters in full model
226    /// * `k_restricted` - Number of parameters in restricted model
227    ///
228    /// # Returns
229    /// Tuple of (f_statistic, p_value, df_numerator, df_denominator)
230    ///
231    /// # Formula
232    /// F = [(SSR_r - SSR_f) / (k_f - k_r)] / [SSR_f / (n - k_f)]
233    pub fn f_test_nested(
234        ssr_restricted: f64,
235        ssr_full: f64,
236        n: usize,
237        k_full: usize,
238        k_restricted: usize,
239    ) -> Result<(f64, f64, usize, usize), GreenersError> {
240        use statrs::distribution::{ContinuousCDF, FisherSnedecor};
241
242        let df_num = k_full - k_restricted;
243        let df_denom = n - k_full;
244
245        if df_num == 0 {
246            return Err(GreenersError::ShapeMismatch(
247                "Models have same number of parameters".to_string(),
248            ));
249        }
250
251        // F-statistic
252        let f_stat = ((ssr_restricted - ssr_full) / df_num as f64) / (ssr_full / df_denom as f64);
253
254        // p-value from F distribution
255        let f_dist = FisherSnedecor::new(df_num as f64, df_denom as f64)
256            .map_err(|_| GreenersError::OptimizationFailed)?;
257        let p_value = 1.0 - f_dist.cdf(f_stat);
258
259        Ok((f_stat, p_value, df_num, df_denom))
260    }
261
262    /// Joint significance test (all coefficients except intercept = 0)
263    ///
264    /// Convenience wrapper for Wald test of all slope coefficients
265    ///
266    /// # Arguments
267    /// * `beta` - Coefficient estimates (including intercept)
268    /// * `cov_matrix` - Variance-covariance matrix
269    /// * `has_intercept` - Whether first coefficient is intercept
270    ///
271    /// # Returns
272    /// Tuple of (test_statistic, p_value, degrees_of_freedom)
273    pub fn joint_significance(
274        beta: &Array1<f64>,
275        cov_matrix: &Array2<f64>,
276        has_intercept: bool,
277    ) -> Result<(f64, f64, usize), GreenersError> {
278        let k = beta.len();
279        let start_idx = if has_intercept { 1 } else { 0 };
280        let m = k - start_idx;
281
282        if m == 0 {
283            return Err(GreenersError::ShapeMismatch(
284                "No slope coefficients to test".to_string(),
285            ));
286        }
287
288        // Build restriction matrix: test all slope coefficients = 0
289        let mut r = Array2::<f64>::zeros((m, k));
290        for i in 0..m {
291            r[[i, start_idx + i]] = 1.0;
292        }
293
294        let q = Array1::<f64>::zeros(m);
295
296        Self::wald_test(beta, cov_matrix, &r, &q)
297    }
298}