#![warn(rust_2018_idioms)]
use rand::Rng;
#[derive(Clone, Copy)]
#[deprecated = "prefer rand_distr::Zipf"]
pub struct ZipfDistribution {
num_elements: f64,
exponent: f64,
h_integral_x1: f64,
h_integral_num_elements: f64,
s: f64,
}
impl ZipfDistribution {
pub fn new(num_elements: usize, exponent: f64) -> Result<Self, ()> {
if num_elements == 0 {
return Err(());
}
if exponent <= 0f64 {
return Err(());
}
let z = ZipfDistribution {
num_elements: num_elements as f64,
exponent,
h_integral_x1: ZipfDistribution::h_integral(1.5, exponent) - 1f64,
h_integral_num_elements: ZipfDistribution::h_integral(
num_elements as f64 + 0.5,
exponent,
),
s: 2f64
- ZipfDistribution::h_integral_inv(
ZipfDistribution::h_integral(2.5, exponent)
- ZipfDistribution::h(2f64, exponent),
exponent,
),
};
Ok(z)
}
}
impl ZipfDistribution {
fn next<R: Rng + ?Sized>(&self, rng: &mut R) -> usize {
let hnum = self.h_integral_num_elements;
loop {
use std::cmp;
let u: f64 = hnum + rng.gen::<f64>() * (self.h_integral_x1 - hnum);
let x: f64 = ZipfDistribution::h_integral_inv(u, self.exponent);
let k64 = x.max(1.0).min(self.num_elements);
let k = cmp::max(1, (k64 + 0.5) as usize);
if k64 - x <= self.s
|| u >= ZipfDistribution::h_integral(k64 + 0.5, self.exponent)
- ZipfDistribution::h(k64, self.exponent)
{
return k;
}
}
}
}
impl rand::distributions::Distribution<usize> for ZipfDistribution {
fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> usize {
self.next(rng)
}
}
use std::fmt;
impl fmt::Debug for ZipfDistribution {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
f.debug_struct("ZipfDistribution")
.field("e", &self.exponent)
.field("n", &self.num_elements)
.finish()
}
}
impl ZipfDistribution {
fn h_integral(x: f64, exponent: f64) -> f64 {
let log_x = x.ln();
helper2((1f64 - exponent) * log_x) * log_x
}
fn h(x: f64, exponent: f64) -> f64 {
(-exponent * x.ln()).exp()
}
fn h_integral_inv(x: f64, exponent: f64) -> f64 {
let mut t: f64 = x * (1f64 - exponent);
if t < -1f64 {
t = -1f64;
}
(helper1(t) * x).exp()
}
}
fn helper1(x: f64) -> f64 {
if x.abs() > 1e-8 {
x.ln_1p() / x
} else {
1f64 - x * (0.5 - x * (1.0 / 3.0 - 0.25 * x))
}
}
fn helper2(x: f64) -> f64 {
if x.abs() > 1e-8 {
x.exp_m1() / x
} else {
1f64 + x * 0.5 * (1f64 + x * 1.0 / 3.0 * (1f64 + 0.25 * x))
}
}
#[cfg(test)]
mod test {
use super::ZipfDistribution;
use rand::distributions::Distribution;
#[inline]
fn test(alpha: f64) {
const N: usize = 100;
let samples = (2.0f64.powf(alpha) * 5000000.0) as usize;
let mut rng = rand::thread_rng();
let zipf = ZipfDistribution::new(N, alpha).unwrap();
let harmonic: f64 = (1..=N).map(|n| 1.0 / (n as f64).powf(alpha)).sum();
let mut buckets = vec![0; N];
for _ in 0..samples {
let sample = zipf.sample(&mut rng);
buckets[sample - 1] += 1;
}
for i in 0..N {
let freq = buckets[i] as f64 / samples as f64;
let expected = (1.0 / ((i + 1) as f64).powf(alpha)) / harmonic;
let off_by = (expected - freq).abs();
assert!(off_by < 0.1);
let good = off_by < expected;
if !good {
panic!("got {}, expected {} for k = {}", freq, expected, i + 1);
}
}
}
#[test]
fn one() {
test(1.00);
}
#[test]
fn two() {
test(2.00);
}
#[test]
fn three() {
test(3.00);
}
#[test]
fn float() {
test(1.08);
}
#[test]
fn debug() {
eprintln!("{:?}", ZipfDistribution::new(100, 1.0).unwrap());
}
#[test]
fn errs() {
ZipfDistribution::new(0, 1.0).unwrap_err();
ZipfDistribution::new(100, 0.0).unwrap_err();
ZipfDistribution::new(100, -1.0).unwrap_err();
}
}