rust_selfsimilar/
lib.rs

1//! A fast generator of discrete random number generator
2//! The self similar distribution (80-20 rule) was described in 
3//!     Jim Gray, Prakash Sundaresan, Susanne Englert, Ken Baclawski, and Peter J. Weinberger.1994.
4//!     Quickly generating billion-record synthetic databases. 
5//!     SIGMOD Rec. 23, 2 (June 1994), 243–252.
6//!     DOI:https://doi.org/10.1145/191843.191886
7//! 
8//! Integers between 1...N.
9//!     The first h•N integers get 1-h of the distribution.
10//! 
11//! For example: 
12//!     if N = 25 and h= .10, then
13//!     80% of the weight goes to the first 5 integers.
14//!     and 64% of the weight goes to the first integer.
15
16#[derive(Clone)]
17pub struct SelfSimilarDistribution {
18    low: usize,
19    high: usize,
20    skew: f64,
21}
22
23impl SelfSimilarDistribution {
24    pub fn new(low: usize, high: usize, skew: f64) -> Self {
25        Self { low, high, skew }
26    }
27
28    fn next<R: rand::Rng + ?Sized>(&self, rng: &mut R) -> usize {
29        let next: f64 = rng.gen_range(0.0..1.0);
30        let range = (self.high - self.low) as f64;
31        let low = self.low as f64;
32
33        let rv = low + range * next.powf(self.skew.log2() / (1.0 - self.skew).log2());
34
35        rv as usize
36    }
37}
38
39impl rand::distributions::Distribution<usize> for SelfSimilarDistribution {
40    fn sample<R: rand::Rng + ?Sized>(&self, rng: &mut R) -> usize {
41        self.next(rng)
42    }
43}
44
45use std::fmt;
46impl fmt::Debug for SelfSimilarDistribution {
47    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
48        f.debug_struct("SelfSimilarDistribution")
49            .field("low", &self.low)
50            .field("high", &self.high)
51            .field("skew", &self.skew)
52            .finish()
53    }
54}
55
56#[cfg(test)]
57mod tests {
58    use super::*;
59    use rand::distributions::Distribution;
60
61    #[test]
62    fn self_similar() {
63        let max = 1000;
64        let dist = SelfSimilarDistribution::new(0, max, 0.3);
65        let mut rng = rand::thread_rng();
66
67        // a very naive test to ensure numbers smaller than 200 should be more than half generated.
68        let division = 200;
69        let mut division_cnt = 0;
70        for _i in 0..max {
71            let v = dist.sample(&mut rng);
72            if v < division {
73                division_cnt += 1;
74            }
75        }
76        assert!(division_cnt > max / 2);
77    }
78}