Skip to main content

tacet_core/statistics/
bootstrap.rs

1//! Block bootstrap resampling for time series data.
2//!
3//! This module implements block bootstrap resampling which preserves
4//! the autocorrelation structure of timing measurements. This is critical
5//! for accurate covariance estimation when measurements have temporal
6//! dependencies.
7
8extern crate alloc;
9
10use alloc::vec::Vec;
11
12use rand::Rng;
13
14use crate::math;
15use crate::types::TimingSample;
16
17/// Counter-based RNG seed generation using SplitMix64.
18///
19/// This is a stateless PRF that generates deterministic, well-distributed
20/// seeds from a base seed and counter. Using this instead of simple addition
21/// provides better statistical properties and avoids sequential correlation.
22///
23/// # Arguments
24///
25/// * `base_seed` - Base random seed
26/// * `counter` - Iteration counter (0, 1, 2, ...)
27///
28/// # Returns
29///
30/// A 64-bit seed suitable for initializing an RNG.
31///
32/// # Performance
33///
34/// ~2-3ns per call (negligible compared to ~100ns for RNG initialization).
35#[inline]
36pub fn counter_rng_seed(base_seed: u64, counter: u64) -> u64 {
37    // SplitMix64: high-quality 64-bit hash function
38    // See: https://xoshiro.di.unimi.it/splitmix64.c
39    let mut z = base_seed.wrapping_add(counter.wrapping_mul(0x9e3779b97f4a7c15));
40    z = (z ^ (z >> 30)).wrapping_mul(0xbf58476d1ce4e5b9);
41    z = (z ^ (z >> 27)).wrapping_mul(0x94d049bb133111eb);
42    z ^ (z >> 31)
43}
44
45/// Compute the optimal block size for block bootstrap using n^(1/3) scaling.
46///
47/// The block length follows Politis & Romano (1994) and Kunsch (1989):
48/// l(n) = O(n^(1/3)) balances bias-variance tradeoff for stationary data.
49///
50/// The constant 1.3 is an engineering default that works well empirically.
51/// At n=30k, this gives ~40 blocks (vs ~173 with sqrt(n)), preserving
52/// autocorrelation structure better.
53///
54/// # Arguments
55///
56/// * `n` - Number of samples in the original data
57///
58/// # Returns
59///
60/// The recommended block size, minimum 1.
61pub fn compute_block_size(n: usize) -> usize {
62    let block_size = math::ceil(1.3 * math::cbrt(n as f64)) as usize;
63    block_size.max(1)
64}
65
66/// Perform block bootstrap resampling into an existing buffer.
67///
68/// This is an optimized version of `block_bootstrap_resample` that writes
69/// into a preallocated buffer instead of allocating a new Vec. This eliminates
70/// allocation overhead in hot loops.
71///
72/// # Arguments
73///
74/// * `data` - Slice of timing measurements for one class
75/// * `block_size` - Size of contiguous blocks to resample
76/// * `rng` - Random number generator
77/// * `out` - Output buffer (must have same length as `data`)
78///
79/// # Panics
80///
81/// Panics if `out.len() != data.len()`.
82///
83/// # Performance
84///
85/// ~2-3× faster than allocating version when called repeatedly with
86/// the same buffer, due to eliminating allocator overhead and using
87/// fast `copy_from_slice` memcpy.
88pub fn block_bootstrap_resample_into<R: Rng>(
89    data: &[f64],
90    block_size: usize,
91    rng: &mut R,
92    out: &mut [f64],
93) {
94    assert_eq!(
95        out.len(),
96        data.len(),
97        "Output buffer must have same length as input data"
98    );
99
100    if data.is_empty() {
101        return;
102    }
103
104    let n = data.len();
105    let block_size = block_size.max(1).min(n);
106
107    // Number of possible starting positions for blocks
108    let num_block_starts = n.saturating_sub(block_size) + 1;
109    if num_block_starts == 0 {
110        out.copy_from_slice(data);
111        return;
112    }
113
114    let mut pos = 0;
115
116    // Sample blocks until we fill the output buffer
117    while pos < n {
118        // Random starting position for this block
119        let start = rng.random_range(0..num_block_starts);
120        let block_end = (start + block_size).min(n);
121        let block_len = block_end - start;
122
123        // How much can we copy without overflowing output?
124        let copy_len = block_len.min(n - pos);
125
126        // Fast memcpy of the block
127        out[pos..pos + copy_len].copy_from_slice(&data[start..start + copy_len]);
128
129        pos += copy_len;
130    }
131}
132
133/// Perform block bootstrap resampling on timing measurements.
134///
135/// Resamples measurements from a single class (fixed or random) while
136/// preserving autocorrelation structure. Blocks of consecutive measurements
137/// are sampled with replacement.
138///
139/// # Arguments
140///
141/// * `data` - Slice of timing measurements for one class
142/// * `block_size` - Size of contiguous blocks to resample
143/// * `rng` - Random number generator
144///
145/// # Returns
146///
147/// A new vector of resampled measurements with the same length as input.
148///
149/// # Algorithm
150///
151/// 1. Divide data into overlapping blocks of size `block_size`
152/// 2. Sample blocks with replacement until we have n samples
153/// 3. Truncate to exactly n samples
154pub fn block_bootstrap_resample<R: Rng>(data: &[f64], block_size: usize, rng: &mut R) -> Vec<f64> {
155    if data.is_empty() {
156        return Vec::new();
157    }
158
159    let n = data.len();
160    let block_size = block_size.max(1).min(n);
161
162    // Number of possible starting positions for blocks
163    let num_block_starts = n.saturating_sub(block_size) + 1;
164    if num_block_starts == 0 {
165        return data.to_vec();
166    }
167
168    let mut result = Vec::with_capacity(n);
169
170    // Sample blocks until we have enough data
171    while result.len() < n {
172        // Random starting position for this block
173        let start = rng.random_range(0..num_block_starts);
174        let end = (start + block_size).min(n);
175
176        // Add the block to our resampled data
177        for &value in data.iter().take(end).skip(start) {
178            if result.len() >= n {
179                break;
180            }
181            result.push(value);
182        }
183    }
184
185    result
186}
187
188/// Perform stratified block bootstrap on interleaved measurements.
189///
190/// When measurements alternate between fixed and random classes,
191/// this function resamples within each class separately to preserve
192/// both the autocorrelation structure and class separation.
193///
194/// # Arguments
195///
196/// * `fixed_data` - Measurements for the fixed input class
197/// * `random_data` - Measurements for the random input class
198/// * `block_size` - Size of contiguous blocks to resample
199/// * `rng` - Random number generator
200///
201/// # Returns
202///
203/// A tuple of (resampled_fixed, resampled_random) vectors.
204#[cfg(test)]
205pub fn stratified_block_bootstrap<R: Rng>(
206    fixed_data: &[f64],
207    random_data: &[f64],
208    block_size: usize,
209    rng: &mut R,
210) -> (Vec<f64>, Vec<f64>) {
211    let resampled_fixed = block_bootstrap_resample(fixed_data, block_size, rng);
212    let resampled_random = block_bootstrap_resample(random_data, block_size, rng);
213
214    (resampled_fixed, resampled_random)
215}
216
217/// Perform joint block bootstrap resampling on interleaved measurements.
218///
219/// This preserves temporal pairing between fixed and random samples, which
220/// captures the cross-covariance Cov(q_F, q_R) > 0 from common-mode noise.
221/// Joint resampling gives the correct (smaller) Var(Δ), improving statistical
222/// power compared to independent resampling.
223///
224/// # Arguments
225///
226/// * `interleaved` - Samples in measurement order, each tagged with its class
227/// * `block_size` - Size of contiguous blocks to resample
228/// * `rng` - Random number generator
229///
230/// # Returns
231///
232/// A new vector of resampled `TimingSample`s with the same length as input.
233///
234/// # Algorithm
235///
236/// 1. Sample blocks of consecutive measurements (preserving F/R pairing)
237/// 2. The class labels travel with their timing values
238/// 3. After resampling, caller splits by class and computes quantile difference
239#[allow(dead_code)]
240pub fn block_bootstrap_resample_joint<R: Rng>(
241    interleaved: &[TimingSample],
242    block_size: usize,
243    rng: &mut R,
244) -> Vec<TimingSample> {
245    if interleaved.is_empty() {
246        return Vec::new();
247    }
248
249    let n = interleaved.len();
250    let block_size = block_size.max(1).min(n);
251
252    // Number of possible starting positions for blocks
253    let num_block_starts = n.saturating_sub(block_size) + 1;
254    if num_block_starts == 0 {
255        return interleaved.to_vec();
256    }
257
258    let mut result = Vec::with_capacity(n);
259
260    // Sample blocks until we have enough data
261    while result.len() < n {
262        // Random starting position for this block
263        let start = rng.random_range(0..num_block_starts);
264        let end = (start + block_size).min(n);
265
266        // Add the entire block (preserving class labels)
267        for sample in interleaved.iter().take(end).skip(start) {
268            if result.len() >= n {
269                break;
270            }
271            result.push(*sample);
272        }
273    }
274
275    result
276}
277
278/// Perform joint block bootstrap resampling into an existing buffer.
279///
280/// This is an optimized version that writes into a preallocated buffer.
281/// Used in parallel bootstrap for better performance.
282///
283/// # Arguments
284///
285/// * `interleaved` - Samples in measurement order, each tagged with its class
286/// * `block_size` - Size of contiguous blocks to resample
287/// * `rng` - Random number generator
288/// * `out` - Output buffer (must have same length as `interleaved`)
289///
290/// # Panics
291///
292/// Panics if `out.len() != interleaved.len()`.
293pub fn block_bootstrap_resample_joint_into<R: Rng>(
294    interleaved: &[TimingSample],
295    block_size: usize,
296    rng: &mut R,
297    out: &mut [TimingSample],
298) {
299    assert_eq!(
300        out.len(),
301        interleaved.len(),
302        "Output buffer must have same length as input data"
303    );
304
305    if interleaved.is_empty() {
306        return;
307    }
308
309    let n = interleaved.len();
310    let block_size = block_size.max(1).min(n);
311
312    // Number of possible starting positions for blocks
313    let num_block_starts = n.saturating_sub(block_size) + 1;
314    if num_block_starts == 0 {
315        out.copy_from_slice(interleaved);
316        return;
317    }
318
319    let mut pos = 0;
320
321    // Sample blocks until we fill the output buffer
322    while pos < n {
323        // Random starting position for this block
324        let start = rng.random_range(0..num_block_starts);
325        let block_end = (start + block_size).min(n);
326        let block_len = block_end - start;
327
328        // How much can we copy without overflowing output?
329        let copy_len = block_len.min(n - pos);
330
331        // Copy the block (preserving class labels)
332        out[pos..pos + copy_len].copy_from_slice(&interleaved[start..start + copy_len]);
333
334        pos += copy_len;
335    }
336}
337
338#[cfg(test)]
339mod tests {
340    use super::*;
341    use rand::SeedableRng;
342    use rand_xoshiro::Xoshiro256PlusPlus;
343
344    #[test]
345    fn test_block_size_computation() {
346        // Formula: ceil(1.3 * n^(1/3))
347        // n=100:   1.3 * 100^(1/3) = 1.3 * 4.6416 = 6.03 -> ceil = 7
348        // n=10000: 1.3 * 10000^(1/3) = 1.3 * 21.544 = 28.007 -> ceil = 29
349        // n=30000: 1.3 * 30000^(1/3) = 1.3 * 31.072 = 40.39 -> ceil = 41
350        assert_eq!(compute_block_size(100), 7);
351        assert_eq!(compute_block_size(10000), 29);
352        assert_eq!(compute_block_size(30000), 41);
353        assert_eq!(compute_block_size(1), 2); // 1.3 * 1^(1/3) = 1.3 -> ceil = 2
354        assert_eq!(compute_block_size(0), 1); // Minimum is 1
355    }
356
357    #[test]
358    fn test_bootstrap_preserves_length() {
359        let data: Vec<f64> = (0..100).map(|x| x as f64).collect();
360        let mut rng = Xoshiro256PlusPlus::seed_from_u64(42);
361
362        let resampled = block_bootstrap_resample(&data, 10, &mut rng);
363        assert_eq!(resampled.len(), data.len());
364    }
365
366    #[test]
367    fn test_bootstrap_samples_from_data() {
368        let data: Vec<f64> = (0..100).map(|x| x as f64).collect();
369        let mut rng = Xoshiro256PlusPlus::seed_from_u64(42);
370
371        let resampled = block_bootstrap_resample(&data, 10, &mut rng);
372
373        // All resampled values should be from original data
374        for val in &resampled {
375            assert!(data.contains(val));
376        }
377    }
378
379    #[test]
380    fn test_empty_data() {
381        let data: Vec<f64> = vec![];
382        let mut rng = Xoshiro256PlusPlus::seed_from_u64(42);
383
384        let resampled = block_bootstrap_resample(&data, 10, &mut rng);
385        assert!(resampled.is_empty());
386    }
387
388    #[test]
389    fn test_stratified_bootstrap() {
390        let fixed: Vec<f64> = (0..50).map(|x| x as f64).collect();
391        let random: Vec<f64> = (50..100).map(|x| x as f64).collect();
392        let mut rng = Xoshiro256PlusPlus::seed_from_u64(42);
393
394        let (resampled_fixed, resampled_random) =
395            stratified_block_bootstrap(&fixed, &random, 7, &mut rng);
396
397        assert_eq!(resampled_fixed.len(), fixed.len());
398        assert_eq!(resampled_random.len(), random.len());
399
400        // Fixed resamples should come from fixed data
401        for val in &resampled_fixed {
402            assert!(fixed.contains(val));
403        }
404
405        // Random resamples should come from random data
406        for val in &resampled_random {
407            assert!(random.contains(val));
408        }
409    }
410}