criterion_stats/univariate/
sample.rs

1use std::{cmp, mem, ops};
2
3use cast;
4use float::Float;
5use num_cpus;
6use thread_scoped as thread;
7
8use tuple::{Tuple, TupledDistributionsBuilder};
9use univariate::resamples::Resamples;
10use univariate::Percentiles;
11
12/// A collection of data points drawn from a population
13///
14/// Invariants:
15///
16/// - The sample contains at least 2 data points
17/// - The sample contains no `NaN`s
18pub struct Sample<A>([A]);
19
20// TODO(rust-lang/rfcs#735) move this `impl` into a private percentiles module
21impl<A> Sample<A>
22where
23    A: Float,
24{
25    /// Creates a new sample from an existing slice
26    ///
27    /// # Panics
28    ///
29    /// Panics if `slice` contains any `NaN` or if `slice` has less than two elements
30    #[cfg_attr(feature = "cargo-clippy", allow(clippy::new_ret_no_self))]
31    pub fn new(slice: &[A]) -> &Sample<A> {
32        assert!(slice.len() > 1 && slice.iter().all(|x| !x.is_nan()));
33
34        unsafe { mem::transmute(slice) }
35    }
36
37    /// Returns the biggest element in the sample
38    ///
39    /// - Time: `O(length)`
40    pub fn max(&self) -> A {
41        let mut elems = self.iter();
42
43        match elems.next() {
44            Some(&head) => elems.fold(head, |a, &b| a.max(b)),
45            // NB `unreachable!` because `Sample` is guaranteed to have at least one data point
46            None => unreachable!(),
47        }
48    }
49
50    /// Returns the arithmetic average of the sample
51    ///
52    /// - Time: `O(length)`
53    pub fn mean(&self) -> A {
54        let n = self.len();
55
56        self.sum() / A::cast(n)
57    }
58
59    /// Returns the median absolute deviation
60    ///
61    /// The `median` can be optionally passed along to speed up (2X) the computation
62    ///
63    /// - Time: `O(length)`
64    /// - Memory: `O(length)`
65    pub fn median_abs_dev(&self, median: Option<A>) -> A
66    where
67        usize: cast::From<A, Output = Result<usize, cast::Error>>,
68    {
69        let median = median.unwrap_or_else(|| self.percentiles().median());
70
71        // NB Although this operation can be SIMD accelerated, the gain is negligible because the
72        // bottle neck is the sorting operation which is part of the computation of the median
73        let abs_devs = self.iter().map(|&x| (x - median).abs()).collect::<Vec<_>>();
74
75        let abs_devs: &Self = Self::new(&abs_devs);
76
77        abs_devs.percentiles().median() * A::cast(1.4826)
78    }
79
80    /// Returns the median absolute deviation as a percentage of the median
81    ///
82    /// - Time: `O(length)`
83    /// - Memory: `O(length)`
84    pub fn median_abs_dev_pct(&self) -> A
85    where
86        usize: cast::From<A, Output = Result<usize, cast::Error>>,
87    {
88        let _100 = A::cast(100);
89        let median = self.percentiles().median();
90        let mad = self.median_abs_dev(Some(median));
91
92        (mad / median) * _100
93    }
94
95    /// Returns the smallest element in the sample
96    ///
97    /// - Time: `O(length)`
98    pub fn min(&self) -> A {
99        let mut elems = self.iter();
100
101        match elems.next() {
102            Some(&elem) => elems.fold(elem, |a, &b| a.min(b)),
103            // NB `unreachable!` because `Sample` is guaranteed to have at least one data point
104            None => unreachable!(),
105        }
106    }
107
108    /// Returns a "view" into the percentiles of the sample
109    ///
110    /// This "view" makes consecutive computations of percentiles much faster (`O(1)`)
111    ///
112    /// - Time: `O(N log N) where N = length`
113    /// - Memory: `O(length)`
114    pub fn percentiles(&self) -> Percentiles<A>
115    where
116        usize: cast::From<A, Output = Result<usize, cast::Error>>,
117    {
118        use std::cmp::Ordering;
119
120        // NB This function assumes that there are no `NaN`s in the sample
121        fn cmp<T>(a: &T, b: &T) -> Ordering
122        where
123            T: PartialOrd,
124        {
125            if a < b {
126                Ordering::Less
127            } else if a == b {
128                Ordering::Equal
129            } else {
130                Ordering::Greater
131            }
132        }
133
134        let mut v = self.to_vec().into_boxed_slice();
135        v.sort_by(cmp);
136
137        // NB :-1: to intra-crate privacy rules
138        unsafe { mem::transmute(v) }
139    }
140
141    /// Returns the standard deviation of the sample
142    ///
143    /// The `mean` can be optionally passed along to speed up (2X) the computation
144    ///
145    /// - Time: `O(length)`
146    pub fn std_dev(&self, mean: Option<A>) -> A {
147        self.var(mean).sqrt()
148    }
149
150    /// Returns the standard deviation as a percentage of the mean
151    ///
152    /// - Time: `O(length)`
153    pub fn std_dev_pct(&self) -> A {
154        let _100 = A::cast(100);
155        let mean = self.mean();
156        let std_dev = self.std_dev(Some(mean));
157
158        (std_dev / mean) * _100
159    }
160
161    /// Returns the sum of all the elements of the sample
162    ///
163    /// - Time: `O(length)`
164    pub fn sum(&self) -> A {
165        ::sum(self)
166    }
167
168    /// Returns the t score between these two samples
169    ///
170    /// - Time: `O(length)`
171    pub fn t(&self, other: &Sample<A>) -> A {
172        let (x_bar, y_bar) = (self.mean(), other.mean());
173        let (s2_x, s2_y) = (self.var(Some(x_bar)), other.var(Some(y_bar)));
174        let n_x = A::cast(self.len());
175        let n_y = A::cast(other.len());
176        let num = x_bar - y_bar;
177        let den = (s2_x / n_x + s2_y / n_y).sqrt();
178
179        num / den
180    }
181
182    /// Returns the variance of the sample
183    ///
184    /// The `mean` can be optionally passed along to speed up (2X) the computation
185    ///
186    /// - Time: `O(length)`
187    pub fn var(&self, mean: Option<A>) -> A {
188        use std::ops::Add;
189
190        let mean = mean.unwrap_or_else(|| self.mean());
191        let slice = self;
192
193        let sum = slice
194            .iter()
195            .map(|&x| (x - mean).powi(2))
196            .fold(A::cast(0), Add::add);
197
198        sum / A::cast(slice.len() - 1)
199    }
200
201    // TODO Remove the `T` parameter in favor of `S::Output`
202    /// Returns the bootstrap distributions of the parameters estimated by the 1-sample statistic
203    ///
204    /// - Multi-threaded
205    /// - Time: `O(nresamples)`
206    /// - Memory: `O(nresamples)`
207    pub fn bootstrap<T, S>(&self, nresamples: usize, statistic: S) -> T::Distributions
208    where
209        S: Fn(&Sample<A>) -> T,
210        S: Sync,
211        T: Tuple,
212        T: Send,
213        T::Distributions: Send,
214        T::Builder: Send,
215    {
216        let ncpus = num_cpus::get();
217
218        unsafe {
219            // TODO need some sensible threshold to trigger the multi-threaded path
220            if ncpus > 1 && nresamples > self.len() {
221                let granularity = nresamples / ncpus + 1;
222                let statistic = &statistic;
223
224                let chunks = (0..ncpus)
225                    .map(|i| {
226                        // for now I'll make do with aliasing and careful non-overlapping indexing
227                        let mut sub_distributions: T::Builder =
228                            TupledDistributionsBuilder::new(granularity);
229                        let mut resamples = Resamples::new(self);
230                        let offset = i * granularity;
231
232                        thread::scoped(move || {
233                            for _ in offset..cmp::min(offset + granularity, nresamples) {
234                                sub_distributions.push(statistic(resamples.next()))
235                            }
236                            sub_distributions
237                        })
238                    })
239                    .collect::<Vec<_>>();
240
241                let mut builder: T::Builder = TupledDistributionsBuilder::new(nresamples);
242                for chunk in chunks {
243                    builder.extend(&mut (chunk.join()));
244                }
245                builder.complete()
246            } else {
247                let mut builder: T::Builder = TupledDistributionsBuilder::new(nresamples);
248                let mut resamples = Resamples::new(self);
249
250                for _ in 0..nresamples {
251                    builder.push(statistic(resamples.next()));
252                }
253
254                builder.complete()
255            }
256        }
257    }
258
259    #[cfg(test)]
260    pub fn iqr(&self) -> A
261    where
262        usize: cast::From<A, Output = Result<usize, cast::Error>>,
263    {
264        self.percentiles().iqr()
265    }
266
267    #[cfg(test)]
268    pub fn median(&self) -> A
269    where
270        usize: cast::From<A, Output = Result<usize, cast::Error>>,
271    {
272        self.percentiles().median()
273    }
274}
275
276impl<A> ops::Deref for Sample<A> {
277    type Target = [A];
278
279    fn deref(&self) -> &[A] {
280        &self.0
281    }
282}