use crate::HyperLogLog;
pub fn estimate_union(a: &HyperLogLog, b: &HyperLogLog) -> Result<f64, &'static str> {
if a.precision() != b.precision() {
return Err("precision mismatch");
}
let mut merged = HyperLogLog::new(a.precision());
let ra = a.registers();
let rb = b.registers();
merged.apply_paired_max(ra, rb);
Ok(merged.estimate())
}
pub fn estimate_intersect(a: &HyperLogLog, b: &HyperLogLog) -> Result<f64, &'static str> {
let ea = a.estimate();
let eb = b.estimate();
let union = estimate_union(a, b)?;
let inter = ea + eb - union;
Ok(inter.max(0.0))
}
impl HyperLogLog {
pub(crate) fn apply_paired_max(&mut self, a: &[u8], b: &[u8]) {
debug_assert_eq!(a.len(), self.registers().len());
debug_assert_eq!(b.len(), self.registers().len());
for (i, (x, y)) in a.iter().zip(b.iter()).enumerate() {
let m = (*x).max(*y);
if m > self.registers[i] {
self.registers[i] = m;
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn disjoint_sets_intersect_near_zero() {
let mut a = HyperLogLog::new(12);
let mut b = HyperLogLog::new(12);
for i in 0..5_000 {
a.add(&format!("a-{i}"));
b.add(&format!("b-{i}"));
}
let inter = estimate_intersect(&a, &b).unwrap();
assert!(inter < 500.0, "disjoint intersection ~0, got {inter}");
}
#[test]
fn identical_sets_intersect_near_cardinality() {
let mut a = HyperLogLog::new(12);
let mut b = HyperLogLog::new(12);
for i in 0..5_000 {
let k = format!("k-{i}");
a.add(&k);
b.add(&k);
}
let inter = estimate_intersect(&a, &b).unwrap();
let rel = (inter - 5_000.0).abs() / 5_000.0;
assert!(
rel < 0.10,
"identical intersection ≈ |A|, got {inter} (rel {rel:.3})"
);
}
#[test]
fn union_matches_merge() {
let mut a = HyperLogLog::new(12);
let mut b = HyperLogLog::new(12);
for i in 0..3_000 {
a.add(&format!("a-{i}"));
}
for i in 0..3_000 {
b.add(&format!("b-{i}"));
}
let union = estimate_union(&a, &b).unwrap();
let mut merged = HyperLogLog::new(12);
merged.merge(&a).unwrap();
merged.merge(&b).unwrap();
let est = merged.estimate();
let rel = (union - est).abs() / est.max(1.0);
assert!(
rel < 0.01,
"union should equal merge: union={union} merged={est}"
);
}
#[test]
fn partial_overlap_makes_sense() {
let mut a = HyperLogLog::new(13);
let mut b = HyperLogLog::new(13);
for i in 0..10_000 {
a.add(&format!("a-{i}"));
}
for i in 0..10_000 {
b.add(&format!("b-{i}"));
}
for i in 0..3_000 {
let k = format!("both-{i}");
a.add(&k);
b.add(&k);
}
let inter = estimate_intersect(&a, &b).unwrap();
let rel = (inter - 3_000.0).abs() / 3_000.0;
assert!(rel < 0.5, "3k overlap, got {inter} (rel {rel:.3})");
}
#[test]
fn precision_mismatch_errors() {
let a = HyperLogLog::new(12);
let b = HyperLogLog::new(13);
assert!(estimate_union(&a, &b).is_err());
assert!(estimate_intersect(&a, &b).is_err());
}
}