bitbelay_tests/avalanche/
sac.rs

1//! Strict avalanche criterion test.
2//!
3//! # Sources
4//!
5//! * [Wikipedia] has a fairly good explanation of the SAC test.
6//!
7//! [Wikipedia]: https://en.wikipedia.org/wiki/Avalanche_effect#Strict_avalanche_criterion
8
9pub mod experiment;
10
11use std::hash::BuildHasher;
12use std::num::NonZeroUsize;
13
14use bitbelay_providers::Provider;
15use bitbelay_report::section;
16use bitbelay_report::section::test::module;
17use bitbelay_report::section::test::Builder;
18use bitbelay_report::section::test::Module;
19use colored::Colorize;
20pub use experiment::Experiment;
21use lazy_static::lazy_static;
22use ordered_float::OrderedFloat;
23
24lazy_static! {
25    static ref ONE_PCT_CHAR: String = ".".green().to_string();
26    static ref FIVE_PCT_CHAR: String = "?".yellow().to_string();
27    static ref OTHER_PCT_CHAR: String = "!".red().to_string();
28}
29
30/// An error related to a [`Test`].
31#[derive(Debug)]
32pub enum Error {
33    /// An experiment error.
34    Experiment(experiment::Error),
35
36    /// An invalid value was passed for max deviance.
37    InvalidMaxDeviance(f64),
38}
39
40impl std::fmt::Display for Error {
41    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
42        match self {
43            Error::Experiment(err) => write!(f, "experiment error: {err}"),
44            Error::InvalidMaxDeviance(value) => {
45                write!(
46                    f,
47                    "max deviance must be between 0.0 and 1.0, received {value}"
48                )
49            }
50        }
51    }
52}
53
54impl std::error::Error for Error {}
55
56/// A [`Result`](std::result::Result) with an [`Error`].
57type Result<T> = std::result::Result<T, Error>;
58
59/// The results of a [`Test`](section::Test).
60#[derive(Debug)]
61pub struct Results {
62    /// Whether the test succeeded or not.
63    pub succeeded: bool,
64
65    /// The maximum bias we encountered.
66    ///
67    /// * The first item in the tuple is the index where the max bias occurred.
68    /// * The second item in the tuple is the bias itself.
69    pub max_bias: (usize, OrderedFloat<f64>),
70
71    /// The offset of each bit in the output from the expected bit flip
72    /// probability.
73    pub bit_bias_offsets: Vec<(usize, OrderedFloat<f64>)>,
74}
75
76/// A strict avalanche criterion test.
77#[derive(Debug)]
78pub struct Test<'a, H: BuildHasher, const N: usize> {
79    /// The build hasher.
80    build_hasher: &'a H,
81
82    /// The data provider.
83    provider: Box<dyn Provider>,
84
85    /// The total number of bit flips for each bit in the output hash.
86    bit_flips: [usize; N],
87
88    /// The number of iterations within each experiment.
89    iterations_per_experiment: NonZeroUsize,
90
91    /// The total number of experiments that have been carried out.
92    total_experiments: usize,
93
94    /// The maximum deviance that any single bit can have from `0.5` for the
95    /// test to be considered successful.
96    ///
97    /// Note that this is a fraction (`0.01`), not a percentage (`1`).
98    max_deviance: f64,
99}
100
101impl<'a, H: BuildHasher, const N: usize> Test<'a, H, N> {
102    /// Creates a new [`Test`].
103    ///
104    /// # Examples
105    ///
106    /// ```
107    /// use std::hash::RandomState;
108    /// use std::num::NonZeroUsize;
109    ///
110    /// use bitbelay_providers::ascii::AlphanumericProvider;
111    /// use bitbelay_tests::avalanche::sac::Test;
112    ///
113    /// let hasher = RandomState::new();
114    /// let test = Test::<RandomState, 64>::try_new(
115    ///     &hasher,
116    ///     Box::new(AlphanumericProvider::new(10)),
117    ///     NonZeroUsize::try_from(1000).unwrap(),
118    ///     0.01,
119    /// )
120    /// .unwrap();
121    ///
122    /// assert_eq!(test.bit_flips().iter().sum::<usize>(), 0);
123    /// assert_eq!(test.total_experiments(), 0);
124    /// ```
125    pub fn try_new(
126        build_hasher: &'a H,
127        provider: Box<dyn Provider>,
128        iterations_per_experiment: NonZeroUsize,
129        max_deviance: f64,
130    ) -> Result<Self> {
131        if !(0.0..=1.0).contains(&max_deviance) {
132            return Err(Error::InvalidMaxDeviance(max_deviance));
133        }
134
135        Ok(Self {
136            build_hasher,
137            provider,
138            bit_flips: [0usize; N],
139            iterations_per_experiment,
140            total_experiments: 0,
141            max_deviance,
142        })
143    }
144
145    /// Gets the build hasher for this [`Test`].
146    ///
147    /// # Examples
148    ///
149    /// ```
150    /// use std::hash::BuildHasher as _;
151    /// use std::hash::RandomState;
152    /// use std::num::NonZeroUsize;
153    ///
154    /// use bitbelay_providers::ascii::AlphanumericProvider;
155    /// use bitbelay_tests::avalanche::sac::Test;
156    ///
157    /// let hasher = RandomState::new();
158    /// let test = Test::<RandomState, 64>::try_new(
159    ///     &hasher,
160    ///     Box::new(AlphanumericProvider::new(10)),
161    ///     NonZeroUsize::try_from(1000).unwrap(),
162    ///     0.01,
163    /// )
164    /// .unwrap();
165    ///
166    /// // Used as a surrogate to test that the [`BuildHasher`]s are the same.
167    /// assert_eq!(test.build_hasher().hash_one("42"), hasher.hash_one("42"));
168    /// ```
169    pub fn build_hasher(&self) -> &H {
170        self.build_hasher
171    }
172
173    /// Gets the data provider for this [`Test`].
174    ///
175    /// # Examples
176    ///
177    /// ```
178    /// use std::hash::RandomState;
179    /// use std::num::NonZeroUsize;
180    ///
181    /// use bitbelay_providers::ascii::AlphanumericProvider;
182    /// use bitbelay_providers::Provider as _;
183    /// use bitbelay_tests::avalanche::sac::Test;
184    ///
185    /// let provider = Box::new(AlphanumericProvider::new(10));
186    /// let hasher = RandomState::new();
187    /// let test = Test::<RandomState, 64>::try_new(
188    ///     &hasher,
189    ///     provider.clone(),
190    ///     NonZeroUsize::try_from(1000).unwrap(),
191    ///     0.01,
192    /// )
193    /// .unwrap();
194    ///
195    /// assert_eq!(test.provider().name(), provider.name());
196    /// ```
197    pub fn provider(&self) -> &dyn Provider {
198        self.provider.as_ref()
199    }
200
201    /// Gets the current number of flips for each output bit in the [`Test`].
202    ///
203    /// # Examples
204    ///
205    /// ```
206    /// use std::hash::RandomState;
207    /// use std::num::NonZeroUsize;
208    ///
209    /// use bitbelay_providers::ascii::AlphanumericProvider;
210    /// use bitbelay_tests::avalanche::sac::Test;
211    ///
212    /// let hasher = RandomState::new();
213    /// let test = Test::<RandomState, 64>::try_new(
214    ///     &hasher,
215    ///     Box::new(AlphanumericProvider::new(10)),
216    ///     NonZeroUsize::try_from(1000).unwrap(),
217    ///     0.01,
218    /// )
219    /// .unwrap();
220    ///
221    /// assert_eq!(test.bit_flips().iter().sum::<usize>(), 0);
222    /// ```
223    pub fn bit_flips(&self) -> [usize; N] {
224        self.bit_flips
225    }
226
227    /// Gets the number of iterations for each experiment in the [`Test`].
228    ///
229    /// # Examples
230    ///
231    /// ```
232    /// use std::hash::RandomState;
233    /// use std::num::NonZeroUsize;
234    ///
235    /// use bitbelay_providers::ascii::AlphanumericProvider;
236    /// use bitbelay_tests::avalanche::sac::Test;
237    ///
238    /// let hasher = RandomState::new();
239    /// let test = Test::<RandomState, 64>::try_new(
240    ///     &hasher,
241    ///     Box::new(AlphanumericProvider::new(10)),
242    ///     NonZeroUsize::try_from(1000).unwrap(),
243    ///     0.01,
244    /// )
245    /// .unwrap();
246    ///
247    /// assert_eq!(test.iterations_per_experiment().get(), 1000);
248    /// ```
249    pub fn iterations_per_experiment(&self) -> NonZeroUsize {
250        self.iterations_per_experiment
251    }
252
253    /// Gets the number of experiments that have been run within the [`Test`].
254    ///
255    /// # Examples
256    ///
257    /// ```
258    /// use std::hash::RandomState;
259    /// use std::num::NonZeroUsize;
260    ///
261    /// use bitbelay_providers::ascii::AlphanumericProvider;
262    /// use bitbelay_tests::avalanche::sac::Test;
263    ///
264    /// let hasher = RandomState::new();
265    /// let test = Test::<RandomState, 64>::try_new(
266    ///     &hasher,
267    ///     Box::new(AlphanumericProvider::new(10)),
268    ///     NonZeroUsize::try_from(1000).unwrap(),
269    ///     0.01,
270    /// )
271    /// .unwrap();
272    ///
273    /// assert_eq!(test.total_experiments(), 0);
274    /// ```
275    pub fn total_experiments(&self) -> usize {
276        self.total_experiments
277    }
278
279    /// Gets the max deviance allowed for any bit within the [`Test`] for the
280    /// [`Test`] to be considered passing.
281    ///
282    /// # Examples
283    ///
284    /// ```
285    /// use std::hash::RandomState;
286    /// use std::num::NonZeroUsize;
287    ///
288    /// use bitbelay_providers::ascii::AlphanumericProvider;
289    /// use bitbelay_tests::avalanche::sac::Test;
290    ///
291    /// let hasher = RandomState::new();
292    /// let test = Test::<RandomState, 64>::try_new(
293    ///     &hasher,
294    ///     Box::new(AlphanumericProvider::new(10)),
295    ///     NonZeroUsize::try_from(1000).unwrap(),
296    ///     0.01,
297    /// )
298    /// .unwrap();
299    ///
300    /// assert_eq!(test.max_deviance(), 0.01);
301    /// ```
302    pub fn max_deviance(&self) -> f64 {
303        self.max_deviance
304    }
305
306    /// Runs a single experiment.
307    ///
308    /// # Examples
309    ///
310    /// ```
311    /// use std::hash::RandomState;
312    /// use std::num::NonZeroUsize;
313    ///
314    /// use bitbelay_providers::ascii::AlphanumericProvider;
315    /// use bitbelay_tests::avalanche::sac::Test;
316    ///
317    /// let hasher = RandomState::new();
318    /// let mut test = Test::<RandomState, 64>::try_new(
319    ///     &hasher,
320    ///     Box::new(AlphanumericProvider::new(10)),
321    ///     NonZeroUsize::try_from(1000).unwrap(),
322    ///     0.01,
323    /// )
324    /// .unwrap();
325    ///
326    /// test.run_single_experiment();
327    /// assert_eq!(test.total_experiments(), 1);
328    /// ```
329    pub fn run_single_experiment(&mut self) -> Result<()> {
330        // SAFETY: we hardcode generating one value, so we know this pop must unwrap.
331        let data = self.provider.provide(1).pop().unwrap();
332
333        let results = Experiment::<H, N>::try_new(self.build_hasher, data)
334            .map_err(Error::Experiment)?
335            .run(self.iterations_per_experiment);
336
337        debug_assert_eq!(self.bit_flips.len(), results.len());
338
339        for (i, value) in results.iter().enumerate() {
340            self.bit_flips[i] += value;
341        }
342
343        self.total_experiments += 1;
344        Ok(())
345    }
346
347    /// Generates a set of [`Results`] based on the current state of the
348    /// [`Test`].
349    ///
350    /// # Examples
351    ///
352    /// ```
353    /// use std::hash::RandomState;
354    /// use std::num::NonZeroUsize;
355    ///
356    /// use bitbelay_providers::ascii::AlphanumericProvider;
357    /// use bitbelay_tests::avalanche::sac::Test;
358    ///
359    /// let hasher = RandomState::new();
360    /// let mut test = Test::<RandomState, 64>::try_new(
361    ///     &hasher,
362    ///     Box::new(AlphanumericProvider::new(10)),
363    ///     NonZeroUsize::try_from(100000).unwrap(),
364    ///     0.01,
365    /// )
366    /// .unwrap();
367    ///
368    /// test.run_single_experiment();
369    ///
370    /// let results = test.results();
371    /// // Do something with the results.
372    /// ```
373    pub fn results(&self) -> Results {
374        let iterations = (self.total_experiments * self.iterations_per_experiment.get()) as f64;
375
376        let bits = self
377            .bit_flips
378            .iter()
379            .map(|flips| *flips as f64 / iterations)
380            .enumerate()
381            .map(|(i, value)| (i, OrderedFloat((value - 0.5).abs())))
382            .collect::<Vec<_>>();
383
384        let (index, max_bias) = bits
385            .iter()
386            .max_by_key(|&(_, value)| value)
387            // SAFETY: there will be are least one result, as the number of iterations
388            // per experiment is a `NonZeroUsize`. As such, there will always be a
389            // maximum element, and this will always unwrap.
390            .unwrap();
391
392        tracing::info!("Max bias is bit {} with {:.2}%", index, max_bias * 100.0);
393
394        Results {
395            succeeded: *max_bias <= OrderedFloat(self.max_deviance),
396            max_bias: (*index, *max_bias),
397            bit_bias_offsets: bits,
398        }
399    }
400}
401
402impl<'a, H: BuildHasher, const N: usize> crate::r#trait::Test for Test<'a, H, N> {
403    fn title(&self) -> &'static str {
404        "Strict Avalanche Criterion"
405    }
406
407    fn report_section(&self) -> section::Test {
408        let mut results = self.results();
409        let visual = generate_visual_from_bits(&results.bit_bias_offsets);
410
411        let (result, summary) = if results.succeeded {
412            (
413                module::Result::Pass,
414                format!(
415                    "The bias for every bit fell within a range of 0.5 ± {}.",
416                    self.max_deviance
417                ),
418            )
419        } else {
420            (
421                module::Result::Fail,
422                format!(
423                    "At least one bit had a bias that fell outside the range of 0.5 ± {}. See the \
424                     bit bias profile and the most biased bits below for more information on \
425                     which bits failed.",
426                    self.max_deviance
427                ),
428            )
429        };
430
431        let mut details = format!(
432            "{}\n\n{}\n\n{} => b <= 1% bias\n{} => b <= 5% bias\n{} => b  > 5% bias\n\nBit 0{}Bit \
433             64\n{}\n\n{}\n",
434            summary,
435            "Bit Bias Profile".italic(),
436            *ONE_PCT_CHAR,
437            *FIVE_PCT_CHAR,
438            *OTHER_PCT_CHAR,
439            " ".repeat(55),
440            visual,
441            "Most Biased Bits".italic(),
442        );
443
444        results.bit_bias_offsets.sort_by_key(|(_, bias)| -*bias);
445        for (index, bias_offset) in results.bit_bias_offsets.into_iter().take(10) {
446            details.push_str(&format!(
447                "\n* Index {:>2} had a bias offset of {:.2}%.",
448                index,
449                bias_offset * 100.0
450            ));
451        }
452
453        get_report_base()
454            .push_module(Module::new(
455                result,
456                "Strict Avalanche Criterion",
457                None,
458                Some(details),
459            ))
460            .try_build()
461            .unwrap()
462    }
463}
464
465/// Generates a visualization of which bits are biased (if any) from the bit
466/// bias offset contained within a [`Results`].
467fn generate_visual_from_bits(bit_bias_offsets: &[(usize, OrderedFloat<f64>)]) -> String {
468    let mut visual = String::from("[");
469
470    for (_, probability) in bit_bias_offsets.iter() {
471        if probability <= &OrderedFloat(0.01) {
472            visual.push_str(&format!("{}", &".".green()));
473        } else if probability <= &OrderedFloat(0.05) {
474            visual.push_str(&format!("{}", &"?".yellow()));
475        } else {
476            visual.push_str(&format!("{}", &"!".red()));
477        }
478    }
479
480    visual.push(']');
481    visual
482}
483
484/// Populates the boilerplate report information within a
485/// [`Test`](section::Test).
486pub fn get_report_base() -> section::test::Builder {
487    let overview = "The Strict Avalanche Criterion (SAC) is a test to determine whether a hash \
488                    function exhibits strong avalanching effects.\n\nBriefly, the avalanche \
489                    effect is a desirable trait for a hash function whereby small changes in the \
490                    input to the hash function cause significant changes in the output of the \
491                    hash function. When a hash function does _not_ exhibit strong avalanching \
492                    effects, it likely suffers from poor randomization, opening the door to \
493                    various types of attacks.\n\nThe SAC introduces a formal test to ensure that \
494                    a hash function exhibits sufficient avalanching. The core idea is this: when \
495                    a single bit within the input data is randomly chosen and flipped, half of \
496                    the hash's output bits for that input data should also change. Ideally, there \
497                    won't be any bias as to which bits flip.";
498
499    let algorithm =
500        "For the hash function and data provider chosen, the algorithm runs multiple experiments. \
501         For each experiment,\n\n(1) An array with a length matching the number of bits in the \
502         output hash is initialized, and every element is set to 0. Each index in the array \
503         represents a counter for the number of times that specific output bit flips during the \
504         experiment.\n\n(2) A starting value for the input data is randomly generated.\n\n(3) For \
505         a number of iterations, the following repeats:\n\n  * The hash current of the the input \
506         data is computed (the 'prior hash').\n  * A single, random bit of the input data is \
507         flipped.\n  * The hash of the new input data is computed (the 'new hash').\n  * Each bit \
508         in the prior hash and the new hash are compared.\n    * Each bit that changes is \
509         incremented by 1 in the tally array.\n\nThis process continues for a specified number of \
510         experiment iterations.\n\nAfter all iterations have completed, the fraction of \
511         iterations where each output bit flipped is calculated. In hash functions with strong \
512         avlanching effects, each bit in the output should change roughly 50% of the time.";
513
514    let interpretation = "* Each test has a set bias tolerance. For the test to pass, the bias \
515                          for every output bit must fall within the range of the expected value \
516                          (50%) ± the bias tolerance provided.\n\n* A bit bias profile is graphed \
517                          below. This should give you a sense of which bits were biased and by \
518                          what magnitude.\n\n* The most biased bits are also sorted in the \
519                          respective section below. Use this list to determine the exact bias of \
520                          the most biased bits.";
521
522    Builder::default()
523        .title("Strict Avalanche Criterion")
524        .unwrap()
525        .description(format!(
526            "{}\n\n{}\n\n{}\n\n{}\n\n{}\n\n{}",
527            "Overview".italic(),
528            overview,
529            "Algorithm".italic(),
530            algorithm,
531            "Interpretation".italic(),
532            interpretation
533        ))
534        .unwrap()
535}