use exaloglog::ExaLogLog;
use rand::{Rng, SeedableRng, rngs::StdRng};
const TRIALS: usize = 200;
const N: u64 = 1_000_000;
fn main() {
println!(
"Head-to-head: ExaLogLog vs HyperLogLog\n\
{TRIALS} trials at n = {N} per config.\n"
);
println!("=== Equal-m comparison: same register count m=2^p ===\n");
println!(
"{:<6} {:>10} {:>10} {:>10} {:>10} {:>10} {:>10}",
"p", "ELL_B", "HLL6_B", "HLL8_B", "ELL_RMSE%", "HLL6_RMSE%", "HLL8_RMSE%"
);
println!("{}", "-".repeat(72));
for p in [8u32, 10, 12, 14] {
let m = 1u64 << p;
let ell_bytes = ((m as usize) * 7) / 2; let hll6_bytes = (m as usize * 6).div_ceil(8); let hll8_bytes = m as usize;
let ell_rmse = run_ell(p);
let hll6_rmse = run_hll(p, HllReg::Bits6);
let hll8_rmse = run_hll(p, HllReg::Bits8);
println!(
"{:<6} {:>10} {:>10} {:>10} {:>10.3} {:>10.3} {:>10.3}",
p,
ell_bytes,
hll6_bytes,
hll8_bytes,
ell_rmse * 100.0,
hll6_rmse * 100.0,
hll8_rmse * 100.0,
);
}
println!();
println!("=== Equal-RMSE comparison: smallest config per algorithm ===\n");
println!("How small can each algorithm get while meeting a target RMSE?");
println!();
println!(
"{:<10} {:>8} {:>10} {:>10} {:>10} {:>16}",
"target", "ELL_p", "ELL_B", "HLL6_B", "HLL8_B", "ELL_savings_vs_HLL6"
);
println!("{}", "-".repeat(70));
for target_pct in [2.0_f64, 1.5, 1.0, 0.7, 0.5] {
let target = target_pct / 100.0;
let (ell_p, ell_b) =
smallest_meeting(target, run_ell, |p| (((1u64 << p) as usize) * 7) / 2);
let (_, hll6_b) = smallest_meeting(
target,
|p| run_hll(p, HllReg::Bits6),
|p| ((1usize << p) * 6).div_ceil(8),
);
let (_, hll8_b) = smallest_meeting(target, |p| run_hll(p, HllReg::Bits8), |p| 1usize << p);
let saving = 100.0 * (1.0 - ell_b as f64 / hll6_b as f64);
println!(
"{:<9.2}% {:>8} {:>10} {:>10} {:>10} {:>15.1}%",
target_pct, ell_p, ell_b, hll6_b, hll8_b, saving
);
}
println!();
println!("ELL_B = ExaLogLog packed (28 bits/register, 7 bytes per pair)");
println!("HLL6_B = HyperLogLog with 6-bit packed registers (Apache DataSketches style)");
println!("HLL8_B = HyperLogLog with 8-bit registers (typical Rust crate layout)");
}
#[derive(Copy, Clone)]
enum HllReg {
Bits6,
Bits8,
}
fn run_ell(p: u32) -> f64 {
let mut sq = 0.0;
for trial in 0..TRIALS {
let mut rng = StdRng::seed_from_u64(0xCAFE_BABE_0000_0000 ^ trial as u64);
let mut s = ExaLogLog::new(p);
for _ in 0..N {
let h: u64 = rng.r#gen();
s.add_hash(h);
}
let est = s.estimate_ml();
let err = (est - N as f64) / N as f64;
sq += err * err;
}
(sq / TRIALS as f64).sqrt()
}
fn run_hll(p: u32, reg: HllReg) -> f64 {
let mut sq = 0.0;
for trial in 0..TRIALS {
let mut rng = StdRng::seed_from_u64(0xCAFE_BABE_0000_0000 ^ trial as u64);
let mut h = Hll::new(p, reg);
for _ in 0..N {
let v: u64 = rng.r#gen();
h.add_hash(v);
}
let est = h.estimate();
let err = (est - N as f64) / N as f64;
sq += err * err;
}
(sq / TRIALS as f64).sqrt()
}
fn smallest_meeting<F, B>(target: f64, mut measure: F, mut byte_fn: B) -> (u32, usize)
where
F: FnMut(u32) -> f64,
B: FnMut(u32) -> usize,
{
for p in 4u32..=18 {
let r = measure(p);
if r <= target * 1.05 {
return (p, byte_fn(p));
}
}
(18, byte_fn(18))
}
struct Hll {
p: u32,
m: usize,
registers: Vec<u8>,
_reg: HllReg,
}
impl Hll {
fn new(p: u32, reg: HllReg) -> Self {
let m = 1usize << p;
Self {
p,
m,
registers: vec![0u8; m],
_reg: reg,
}
}
fn add_hash(&mut self, hash: u64) {
let i = (hash & ((1u64 << self.p) - 1)) as usize;
let w = hash >> self.p;
let rho = if w == 0 {
(65 - self.p) as u8
} else {
(w.leading_zeros() - self.p) as u8 + 1
};
if rho > self.registers[i] {
self.registers[i] = rho;
}
}
fn estimate(&self) -> f64 {
let m = self.m as f64;
let alpha = match self.p {
4 => 0.673,
5 => 0.697,
6 => 0.709,
_ => 0.7213 / (1.0 + 1.079 / m),
};
let mut sum = 0.0;
let mut zeros = 0;
for &r in &self.registers {
sum += 2f64.powi(-(r as i32));
if r == 0 {
zeros += 1;
}
}
let raw = alpha * m * m / sum;
if raw <= 2.5 * m && zeros > 0 {
return m * (m / zeros as f64).ln();
}
let two_pow_32 = (1u64 << 32) as f64;
if raw > two_pow_32 / 30.0 {
return -two_pow_32 * (1.0 - raw / two_pow_32).ln();
}
raw
}
}