1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
//! Univariate analysis

mod bootstrap;
mod percentiles;
mod resamples;
mod sample;

pub mod kde;
pub mod mixed;
pub mod outliers;

use float::Float;
use num_cpus;
use std::cmp;
use thread_scoped as thread;

use tuple::{Tuple, TupledDistributionsBuilder};

use self::resamples::Resamples;

pub use self::percentiles::Percentiles;
pub use self::sample::Sample;

/// Performs a two-sample bootstrap
///
/// - Multithreaded
/// - Time: `O(nresamples)`
/// - Memory: `O(nresamples)`
#[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_lossless))]
pub fn bootstrap<A, B, T, S>(
    a: &Sample<A>,
    b: &Sample<B>,
    nresamples: usize,
    statistic: S,
) -> T::Distributions
where
    A: Float,
    B: Float,
    S: Fn(&Sample<A>, &Sample<B>) -> T,
    S: Sync,
    T: Tuple,
    T::Distributions: Send,
    T::Builder: Send,
{
    let ncpus = num_cpus::get();

    unsafe {
        // TODO need some sensible threshold to trigger the multi-threaded path
        if true {
            //ncpus > 1 && nresamples > a.len() + b.len() {
            let granularity = nresamples / ncpus + 1;
            let granularity_sqrt = (granularity as f64).sqrt().ceil() as usize;
            let statistic = &statistic;
            let mut cutoff = 0;

            let chunks = (0..ncpus)
                .map(|_| {
                    let mut sub_distributions: T::Builder =
                        TupledDistributionsBuilder::new(granularity);
                    let start = cutoff;
                    let end = cmp::min(start + granularity, nresamples);
                    cutoff = end;

                    thread::scoped(move || {
                        let mut a_resamples = Resamples::new(a);
                        let mut b_resamples = Resamples::new(b);
                        let mut i = start;

                        for _ in 0..granularity_sqrt {
                            let a_resample = a_resamples.next();

                            for _ in 0..granularity_sqrt {
                                if i == end {
                                    return sub_distributions;
                                }

                                let b_resample = b_resamples.next();

                                sub_distributions.push(statistic(a_resample, b_resample));

                                i += 1;
                            }
                        }
                        sub_distributions
                    })
                })
                .collect::<Vec<_>>();

            let mut builder: T::Builder = TupledDistributionsBuilder::new(nresamples);
            for chunk in chunks {
                builder.extend(&mut (chunk.join()));
            }
            builder.complete()
        } else {
            let nresamples_sqrt = (nresamples as f64).sqrt().ceil() as usize;
            let mut a_resamples = Resamples::new(a);
            let mut b_resamples = Resamples::new(b);
            let mut distributions: T::Builder = TupledDistributionsBuilder::new(nresamples);

            let mut i = 0;
            'outer: for _ in 0..nresamples_sqrt {
                let a_resample = a_resamples.next();

                for _ in 0..nresamples_sqrt {
                    if i == nresamples {
                        break 'outer;
                    }

                    let b_resample = b_resamples.next();

                    distributions.push(statistic(a_resample, b_resample));

                    i += 1;
                }
            }

            distributions.complete()
        }
    }
}