tacet_core/statistics/bootstrap.rs
1//! Block bootstrap resampling for time series data.
2//!
3//! This module implements block bootstrap resampling which preserves
4//! the autocorrelation structure of timing measurements. This is critical
5//! for accurate covariance estimation when measurements have temporal
6//! dependencies.
7
8extern crate alloc;
9
10use alloc::vec::Vec;
11
12use rand::Rng;
13
14use crate::math;
15use crate::types::TimingSample;
16
17/// Counter-based RNG seed generation using SplitMix64.
18///
19/// This is a stateless PRF that generates deterministic, well-distributed
20/// seeds from a base seed and counter. Using this instead of simple addition
21/// provides better statistical properties and avoids sequential correlation.
22///
23/// # Arguments
24///
25/// * `base_seed` - Base random seed
26/// * `counter` - Iteration counter (0, 1, 2, ...)
27///
28/// # Returns
29///
30/// A 64-bit seed suitable for initializing an RNG.
31///
32/// # Performance
33///
34/// ~2-3ns per call (negligible compared to ~100ns for RNG initialization).
35#[inline]
36pub fn counter_rng_seed(base_seed: u64, counter: u64) -> u64 {
37 // SplitMix64: high-quality 64-bit hash function
38 // See: https://xoshiro.di.unimi.it/splitmix64.c
39 let mut z = base_seed.wrapping_add(counter.wrapping_mul(0x9e3779b97f4a7c15));
40 z = (z ^ (z >> 30)).wrapping_mul(0xbf58476d1ce4e5b9);
41 z = (z ^ (z >> 27)).wrapping_mul(0x94d049bb133111eb);
42 z ^ (z >> 31)
43}
44
45/// Compute the optimal block size for block bootstrap using n^(1/3) scaling.
46///
47/// The block length follows Politis & Romano (1994) and Kunsch (1989):
48/// l(n) = O(n^(1/3)) balances bias-variance tradeoff for stationary data.
49///
50/// The constant 1.3 is an engineering default that works well empirically.
51/// At n=30k, this gives ~40 blocks (vs ~173 with sqrt(n)), preserving
52/// autocorrelation structure better.
53///
54/// # Arguments
55///
56/// * `n` - Number of samples in the original data
57///
58/// # Returns
59///
60/// The recommended block size, minimum 1.
61pub fn compute_block_size(n: usize) -> usize {
62 let block_size = math::ceil(1.3 * math::cbrt(n as f64)) as usize;
63 block_size.max(1)
64}
65
66/// Perform block bootstrap resampling into an existing buffer.
67///
68/// This is an optimized version of `block_bootstrap_resample` that writes
69/// into a preallocated buffer instead of allocating a new Vec. This eliminates
70/// allocation overhead in hot loops.
71///
72/// # Arguments
73///
74/// * `data` - Slice of timing measurements for one class
75/// * `block_size` - Size of contiguous blocks to resample
76/// * `rng` - Random number generator
77/// * `out` - Output buffer (must have same length as `data`)
78///
79/// # Panics
80///
81/// Panics if `out.len() != data.len()`.
82///
83/// # Performance
84///
85/// ~2-3× faster than allocating version when called repeatedly with
86/// the same buffer, due to eliminating allocator overhead and using
87/// fast `copy_from_slice` memcpy.
88pub fn block_bootstrap_resample_into<R: Rng>(
89 data: &[f64],
90 block_size: usize,
91 rng: &mut R,
92 out: &mut [f64],
93) {
94 assert_eq!(
95 out.len(),
96 data.len(),
97 "Output buffer must have same length as input data"
98 );
99
100 if data.is_empty() {
101 return;
102 }
103
104 let n = data.len();
105 let block_size = block_size.max(1).min(n);
106
107 // Number of possible starting positions for blocks
108 let num_block_starts = n.saturating_sub(block_size) + 1;
109 if num_block_starts == 0 {
110 out.copy_from_slice(data);
111 return;
112 }
113
114 let mut pos = 0;
115
116 // Sample blocks until we fill the output buffer
117 while pos < n {
118 // Random starting position for this block
119 let start = rng.random_range(0..num_block_starts);
120 let block_end = (start + block_size).min(n);
121 let block_len = block_end - start;
122
123 // How much can we copy without overflowing output?
124 let copy_len = block_len.min(n - pos);
125
126 // Fast memcpy of the block
127 out[pos..pos + copy_len].copy_from_slice(&data[start..start + copy_len]);
128
129 pos += copy_len;
130 }
131}
132
133/// Perform block bootstrap resampling on timing measurements.
134///
135/// Resamples measurements from a single class (fixed or random) while
136/// preserving autocorrelation structure. Blocks of consecutive measurements
137/// are sampled with replacement.
138///
139/// # Arguments
140///
141/// * `data` - Slice of timing measurements for one class
142/// * `block_size` - Size of contiguous blocks to resample
143/// * `rng` - Random number generator
144///
145/// # Returns
146///
147/// A new vector of resampled measurements with the same length as input.
148///
149/// # Algorithm
150///
151/// 1. Divide data into overlapping blocks of size `block_size`
152/// 2. Sample blocks with replacement until we have n samples
153/// 3. Truncate to exactly n samples
154pub fn block_bootstrap_resample<R: Rng>(data: &[f64], block_size: usize, rng: &mut R) -> Vec<f64> {
155 if data.is_empty() {
156 return Vec::new();
157 }
158
159 let n = data.len();
160 let block_size = block_size.max(1).min(n);
161
162 // Number of possible starting positions for blocks
163 let num_block_starts = n.saturating_sub(block_size) + 1;
164 if num_block_starts == 0 {
165 return data.to_vec();
166 }
167
168 let mut result = Vec::with_capacity(n);
169
170 // Sample blocks until we have enough data
171 while result.len() < n {
172 // Random starting position for this block
173 let start = rng.random_range(0..num_block_starts);
174 let end = (start + block_size).min(n);
175
176 // Add the block to our resampled data
177 for &value in data.iter().take(end).skip(start) {
178 if result.len() >= n {
179 break;
180 }
181 result.push(value);
182 }
183 }
184
185 result
186}
187
188/// Perform stratified block bootstrap on interleaved measurements.
189///
190/// When measurements alternate between fixed and random classes,
191/// this function resamples within each class separately to preserve
192/// both the autocorrelation structure and class separation.
193///
194/// # Arguments
195///
196/// * `fixed_data` - Measurements for the fixed input class
197/// * `random_data` - Measurements for the random input class
198/// * `block_size` - Size of contiguous blocks to resample
199/// * `rng` - Random number generator
200///
201/// # Returns
202///
203/// A tuple of (resampled_fixed, resampled_random) vectors.
204#[cfg(test)]
205pub fn stratified_block_bootstrap<R: Rng>(
206 fixed_data: &[f64],
207 random_data: &[f64],
208 block_size: usize,
209 rng: &mut R,
210) -> (Vec<f64>, Vec<f64>) {
211 let resampled_fixed = block_bootstrap_resample(fixed_data, block_size, rng);
212 let resampled_random = block_bootstrap_resample(random_data, block_size, rng);
213
214 (resampled_fixed, resampled_random)
215}
216
217/// Perform joint block bootstrap resampling on interleaved measurements.
218///
219/// This preserves temporal pairing between fixed and random samples, which
220/// captures the cross-covariance Cov(q_F, q_R) > 0 from common-mode noise.
221/// Joint resampling gives the correct (smaller) Var(Δ), improving statistical
222/// power compared to independent resampling.
223///
224/// # Arguments
225///
226/// * `interleaved` - Samples in measurement order, each tagged with its class
227/// * `block_size` - Size of contiguous blocks to resample
228/// * `rng` - Random number generator
229///
230/// # Returns
231///
232/// A new vector of resampled `TimingSample`s with the same length as input.
233///
234/// # Algorithm
235///
236/// 1. Sample blocks of consecutive measurements (preserving F/R pairing)
237/// 2. The class labels travel with their timing values
238/// 3. After resampling, caller splits by class and computes quantile difference
239#[allow(dead_code)]
240pub fn block_bootstrap_resample_joint<R: Rng>(
241 interleaved: &[TimingSample],
242 block_size: usize,
243 rng: &mut R,
244) -> Vec<TimingSample> {
245 if interleaved.is_empty() {
246 return Vec::new();
247 }
248
249 let n = interleaved.len();
250 let block_size = block_size.max(1).min(n);
251
252 // Number of possible starting positions for blocks
253 let num_block_starts = n.saturating_sub(block_size) + 1;
254 if num_block_starts == 0 {
255 return interleaved.to_vec();
256 }
257
258 let mut result = Vec::with_capacity(n);
259
260 // Sample blocks until we have enough data
261 while result.len() < n {
262 // Random starting position for this block
263 let start = rng.random_range(0..num_block_starts);
264 let end = (start + block_size).min(n);
265
266 // Add the entire block (preserving class labels)
267 for sample in interleaved.iter().take(end).skip(start) {
268 if result.len() >= n {
269 break;
270 }
271 result.push(*sample);
272 }
273 }
274
275 result
276}
277
278/// Perform joint block bootstrap resampling into an existing buffer.
279///
280/// This is an optimized version that writes into a preallocated buffer.
281/// Used in parallel bootstrap for better performance.
282///
283/// # Arguments
284///
285/// * `interleaved` - Samples in measurement order, each tagged with its class
286/// * `block_size` - Size of contiguous blocks to resample
287/// * `rng` - Random number generator
288/// * `out` - Output buffer (must have same length as `interleaved`)
289///
290/// # Panics
291///
292/// Panics if `out.len() != interleaved.len()`.
293pub fn block_bootstrap_resample_joint_into<R: Rng>(
294 interleaved: &[TimingSample],
295 block_size: usize,
296 rng: &mut R,
297 out: &mut [TimingSample],
298) {
299 assert_eq!(
300 out.len(),
301 interleaved.len(),
302 "Output buffer must have same length as input data"
303 );
304
305 if interleaved.is_empty() {
306 return;
307 }
308
309 let n = interleaved.len();
310 let block_size = block_size.max(1).min(n);
311
312 // Number of possible starting positions for blocks
313 let num_block_starts = n.saturating_sub(block_size) + 1;
314 if num_block_starts == 0 {
315 out.copy_from_slice(interleaved);
316 return;
317 }
318
319 let mut pos = 0;
320
321 // Sample blocks until we fill the output buffer
322 while pos < n {
323 // Random starting position for this block
324 let start = rng.random_range(0..num_block_starts);
325 let block_end = (start + block_size).min(n);
326 let block_len = block_end - start;
327
328 // How much can we copy without overflowing output?
329 let copy_len = block_len.min(n - pos);
330
331 // Copy the block (preserving class labels)
332 out[pos..pos + copy_len].copy_from_slice(&interleaved[start..start + copy_len]);
333
334 pos += copy_len;
335 }
336}
337
338#[cfg(test)]
339mod tests {
340 use super::*;
341 use rand::SeedableRng;
342 use rand_xoshiro::Xoshiro256PlusPlus;
343
344 #[test]
345 fn test_block_size_computation() {
346 // Formula: ceil(1.3 * n^(1/3))
347 // n=100: 1.3 * 100^(1/3) = 1.3 * 4.6416 = 6.03 -> ceil = 7
348 // n=10000: 1.3 * 10000^(1/3) = 1.3 * 21.544 = 28.007 -> ceil = 29
349 // n=30000: 1.3 * 30000^(1/3) = 1.3 * 31.072 = 40.39 -> ceil = 41
350 assert_eq!(compute_block_size(100), 7);
351 assert_eq!(compute_block_size(10000), 29);
352 assert_eq!(compute_block_size(30000), 41);
353 assert_eq!(compute_block_size(1), 2); // 1.3 * 1^(1/3) = 1.3 -> ceil = 2
354 assert_eq!(compute_block_size(0), 1); // Minimum is 1
355 }
356
357 #[test]
358 fn test_bootstrap_preserves_length() {
359 let data: Vec<f64> = (0..100).map(|x| x as f64).collect();
360 let mut rng = Xoshiro256PlusPlus::seed_from_u64(42);
361
362 let resampled = block_bootstrap_resample(&data, 10, &mut rng);
363 assert_eq!(resampled.len(), data.len());
364 }
365
366 #[test]
367 fn test_bootstrap_samples_from_data() {
368 let data: Vec<f64> = (0..100).map(|x| x as f64).collect();
369 let mut rng = Xoshiro256PlusPlus::seed_from_u64(42);
370
371 let resampled = block_bootstrap_resample(&data, 10, &mut rng);
372
373 // All resampled values should be from original data
374 for val in &resampled {
375 assert!(data.contains(val));
376 }
377 }
378
379 #[test]
380 fn test_empty_data() {
381 let data: Vec<f64> = vec![];
382 let mut rng = Xoshiro256PlusPlus::seed_from_u64(42);
383
384 let resampled = block_bootstrap_resample(&data, 10, &mut rng);
385 assert!(resampled.is_empty());
386 }
387
388 #[test]
389 fn test_stratified_bootstrap() {
390 let fixed: Vec<f64> = (0..50).map(|x| x as f64).collect();
391 let random: Vec<f64> = (50..100).map(|x| x as f64).collect();
392 let mut rng = Xoshiro256PlusPlus::seed_from_u64(42);
393
394 let (resampled_fixed, resampled_random) =
395 stratified_block_bootstrap(&fixed, &random, 7, &mut rng);
396
397 assert_eq!(resampled_fixed.len(), fixed.len());
398 assert_eq!(resampled_random.len(), random.len());
399
400 // Fixed resamples should come from fixed data
401 for val in &resampled_fixed {
402 assert!(fixed.contains(val));
403 }
404
405 // Random resamples should come from random data
406 for val in &resampled_random {
407 assert!(random.contains(val));
408 }
409 }
410}