bitbelay_tests/correlation/
bitwise.rs1use std::collections::HashMap;
4use std::hash::BuildHasher;
5use std::hash::Hasher as _;
6use std::num::NonZeroUsize;
7
8use bitbelay_providers::Provider;
9use bitbelay_report::section;
10use bitbelay_report::section::test;
11use bitbelay_report::section::test::module;
12use bitbelay_statistics::correlation::pearson;
13use colored::Colorize as _;
14use ordered_float::OrderedFloat;
15use tracing::debug;
16use tracing::info;
17
18pub type Results = HashMap<(usize, usize), Option<f64>>;
20
21#[derive(Debug)]
23pub struct Test<'a, H: BuildHasher, const N: usize> {
24 build_hasher: &'a H,
26
27 bit_values: [Vec<f64>; N],
29
30 threshold: f64,
33}
34
35impl<'a, H: BuildHasher, const N: usize> Test<'a, H, N> {
36 pub fn new(build_hasher: &'a H, threshold: f64) -> Self {
49 Self {
50 build_hasher,
51 bit_values: [(); N].map(|_| Vec::new()),
52 threshold,
53 }
54 }
55
56 pub fn bit_values(&self) -> &[Vec<f64>; N] {
77 &self.bit_values
78 }
79
80 pub fn threshold(&self) -> f64 {
95 self.threshold
96 }
97
98 pub fn run(&mut self, provider: &mut Box<dyn Provider>, iterations: NonZeroUsize) {
123 let hashes = compute_hashes(self.build_hasher, provider, iterations);
124 let newly_computed_bit_values = extract_bit_values_from_hashes::<u64, N>(&hashes);
125
126 for (i, values) in newly_computed_bit_values.iter().enumerate() {
127 self.bit_values[i].extend(values);
131 }
132 }
133
134 pub fn results(&self) -> Option<Results> {
157 info!("Computing Pearson correlations for each bit-bit mapping.");
158
159 if self.bit_values.first().unwrap().is_empty() {
162 return None;
163 }
164
165 let mut results = HashMap::new();
166
167 for i in 0..N {
168 for j in 0..N {
169 let correlation = pearson::correlation(&self.bit_values[i], &self.bit_values[j]);
172 results.insert((i, j), correlation);
173 }
174
175 debug!("Computed all bit-bit correlations for bit index {}.", i);
176 }
177
178 Some(results)
179 }
180}
181
182impl<'a, H: BuildHasher, const N: usize> crate::r#trait::Test for Test<'a, H, N> {
183 fn title(&self) -> &'static str {
184 "Bitwise Correlation"
185 }
186
187 fn report_section(&self) -> bitbelay_report::section::Test {
188 if self.bit_values.first().unwrap().is_empty() {
191 panic!("a report can only be generated when at least one test has been run!");
192 }
193
194 let mut correlations = self
195 .results()
196 .unwrap()
199 .into_iter()
200 .filter(|((i, j), _)| i != j)
201 .map(|(pos, corr)| {
202 (
203 pos,
204 OrderedFloat(match corr {
205 Some(corr) => corr.abs(),
206 None => panic!(
207 "one of the correlations was [`None`], which means that there was \
208 some issue computing the correlation metrics. This is especially \
209 possible when there were not many iterations run: for example, if \
210 every bit in one of the arrays was a 0. We recommend trying the test \
211 again with more samples. If this continue to happen, please file an \
212 issue."
213 ),
214 }),
215 )
216 })
217 .collect::<Vec<_>>();
218 correlations.sort_by(|(_, a), (_, b)| b.cmp(a));
219
220 let (result, mut details) = if correlations
221 .iter()
222 .any(|(_, correlation)| correlation.into_inner() >= self.threshold)
223 {
224 (
225 module::Result::Fail,
226 String::from(
227 "One or more non-diagonals had a correlation greater than or equal to the \
228 threshold.\n\n",
229 ),
230 )
231 } else {
232 (
233 module::Result::Pass,
234 String::from("All non-diagonals had a correlation lower than the threshold.\n\n"),
235 )
236 };
237
238 details.push_str("Maxmium correlation values:\n");
239
240 correlations
241 .iter()
242 .take(10)
243 .map(|((i, j), correlation)| format!("\n * ({}, {}) => {:.4}", i, j, correlation))
244 .for_each(|s| details.push_str(&s));
245
246 let module =
247 module::Module::new(result, "Pearson correlation threshold", None, Some(details));
248 get_report_base().push_module(module).try_build().unwrap()
249 }
250}
251
252fn compute_hashes<H: BuildHasher>(
255 build_hasher: &H,
256 provider: &mut Box<dyn Provider>,
257 iterations: NonZeroUsize,
258) -> Vec<u64> {
259 info!("Computing {} hashes.", iterations);
260
261 provider
262 .provide(iterations.get())
263 .into_iter()
264 .enumerate()
265 .map(|(i, input)| {
266 if i % 1_000 == 0 && i > 0 {
267 debug!("Computed {} hashes.", i);
268 }
269
270 let mut hasher = build_hasher.build_hasher();
271 hasher.write(input);
272 hasher.finish()
273 })
274 .collect()
275}
276
277fn extract_bit_values_from_hashes<T, const N: usize>(hashes: &[T]) -> Vec<Vec<f64>>
283where
284 T: Copy
285 + std::ops::Shr<usize, Output = T>
286 + std::ops::BitAnd<Output = T>
287 + From<u64>
288 + Into<u64>,
289{
290 info!(
291 "Extracing bit values across {} {}-bit hashes.",
292 hashes.len(),
293 N
294 );
295
296 let mut bit_values: Vec<Vec<f64>> = vec![Vec::new(); N];
297
298 for hash in hashes {
299 for (i, bit_value) in bit_values.iter_mut().enumerate() {
300 let as_byte = ((*hash >> i) & T::from(1_u64)).into();
301 bit_value.push(as_byte as f64);
302 }
303 }
304
305 bit_values
306}
307
308pub fn get_report_base() -> section::test::Builder {
311 let overview = "The bitwise correlation test assesses the correlation between each pair of \
312 bits, known as 'bit-bit comparisons', for a set of output hashes using the \
313 Pearson correlation coefficient.";
314
315 let algorithm =
316 "For a specified hash function, a provider and number of iterations is specified:\n\n(1) \
317 A random input is generated from the provider and the output hash is computed. This \
318 happens for the number of iterations specified, and the results are accumulated in an \
319 array. You can think of this as, roughly, a matrix of bits where each row is an output \
320 hash and each column is the bit at position _i_ of the output hash.\n\n(2) This matrix \
321 of bits is effectively transposed, meaning that an array is created for each bit \
322 position, with each array containing the values of that bit across all hashes. These \
323 arrays are referred to as 'bit values' for their respective bit positions.\n\n(3) For \
324 each pair of bit positions, the Pearson correlation is calculated between their \
325 corresponding arrays of bit values. This measures the level of correlation between every \
326 pair of output bits. Note that, though Pearson correlation is symmetric (meaning the \
327 correlation of (i, j) is the same as the correlation of (j, i)), all pairwise \
328 comparisons are computed.\n\n(4) The resulting correlations are stored in a HashMap, \
329 with each key being a tuple `(i, j)` representing the Pearson correlation coefficient \
330 between the bit values at position _i_ and the bit values at position _j_.";
331
332 let interpretation = "* A 'good' result is one where bits within the output hashes are not \
333 highly correlated with one another. This indicates that the bits are \
334 largely independent under the test.\n\n * There is one exception, \
335 called the 'diagonal' of the correlation matrix. The rationale behind \
336 this is straightforward: when an array of bit values at position _i_ is \
337 compared against itself, the arrays are identical, and the correlation \
338 should be 1.0. This presence of this phenomenon is often used as a \
339 check to ensure that results are being calculated as expected.";
340
341 let sources = "* https://en.wikipedia.org/wiki/Pearson_correlation_coefficient";
342
343 test::Builder::default()
344 .title("Bitwise Pearson Correlation".to_string())
345 .unwrap()
346 .description(format!(
347 "{}\n\n{}\n\n{}\n\n{}\n\n{}\n\n{}\n\n{}\n\n{}",
348 "Overview".italic(),
349 overview,
350 "Algorithm".italic(),
351 algorithm,
352 "Interpretation".italic(),
353 interpretation,
354 "Sources".italic(),
355 sources
356 ))
357 .unwrap()
358}
359
360#[cfg(test)]
361mod tests {
362 use crate::correlation::bitwise::extract_bit_values_from_hashes;
363
364 #[test]
365 fn it_extracts_bit_values_from_hashes_correctly() {
366 let hashes: [u64; 4] = [0x00, 0x01, 0x02, 0x03];
367 let bit_values = extract_bit_values_from_hashes::<u64, 64>(&hashes[..]);
368
369 #[allow(clippy::needless_range_loop)]
370 for i in 2..64 {
371 assert_eq!(bit_values[i], &[0., 0., 0., 0.]);
372 }
373
374 assert_eq!(bit_values[1], &[0., 0., 1., 1.]);
375 assert_eq!(bit_values[0], &[0., 1., 0., 1.]);
376 }
377}