mod common;
use std::fs;
use std::path::PathBuf;
use common::serialization_test_data;
use datasketches::hll::HllSketch;
fn test_sketch_file(path: PathBuf, expected_cardinality: usize, expected_lg_k: u8) {
let expected = expected_cardinality as f64;
let bytes = fs::read(&path).unwrap();
let sketch1 = HllSketch::deserialize(&bytes).unwrap();
let estimate1 = sketch1.estimate();
assert_eq!(
sketch1.lg_config_k(),
expected_lg_k,
"Wrong lg_config_k in {}",
path.display()
);
if expected > 0.0 {
let error_margin = 0.02; let lower_bound = expected * (1.0 - error_margin);
let upper_bound = expected * (1.0 + error_margin);
assert!(
estimate1 >= lower_bound && estimate1 <= upper_bound,
"Estimate {} outside bounds [{}, {}] for expected {} in {}",
estimate1,
lower_bound,
upper_bound,
expected,
path.display()
);
} else {
assert!(
estimate1 < 1.0,
"Expected near-zero estimate for empty sketch, got {} in {}",
estimate1,
path.display()
);
}
let serialized_bytes = sketch1.serialize();
let sketch2 = HllSketch::deserialize(&serialized_bytes).unwrap_or_else(|err| {
panic!(
"Deserialization failed after round-trip for {}: {}",
path.display(),
err
)
});
assert_eq!(
sketch1.lg_config_k(),
sketch2.lg_config_k(),
"lg_config_k mismatch after round-trip for {}",
path.display()
);
assert_eq!(
sketch1,
sketch2,
"Sketches are not equal after round-trip for {}",
path.display()
);
let estimate2 = sketch2.estimate();
assert_eq!(
estimate1,
estimate2,
"Estimates differ after round-trip for {}",
path.display()
);
}
#[test]
fn test_java_hll4_compatibility() {
let test_cases = [0, 1, 10, 100, 1000, 10000, 100000, 1000000];
for n in test_cases {
let filename = format!("hll4_n{}_java.sk", n);
let path = serialization_test_data("java_generated_files", &filename);
test_sketch_file(path, n, 12);
}
}
#[test]
fn test_java_hll6_compatibility() {
let test_cases = [0, 1, 10, 100, 1000, 10000, 100000, 1000000];
for n in test_cases {
let filename = format!("hll6_n{}_java.sk", n);
let path = serialization_test_data("java_generated_files", &filename);
test_sketch_file(path, n, 12);
}
}
#[test]
fn test_java_hll8_compatibility() {
let test_cases = [0, 1, 10, 100, 1000, 10000, 100000, 1000000];
for n in test_cases {
let filename = format!("hll8_n{}_java.sk", n);
let path = serialization_test_data("java_generated_files", &filename);
test_sketch_file(path, n, 12);
}
}
#[test]
fn test_cpp_hll4_compatibility() {
let test_cases = [0, 1, 10, 100, 1000, 10000, 100000, 1000000];
for n in test_cases {
let filename = format!("hll4_n{}_cpp.sk", n);
let path = serialization_test_data("cpp_generated_files", &filename);
test_sketch_file(path, n, 12);
}
}
#[test]
fn test_cpp_hll6_compatibility() {
let test_cases = [0, 1, 10, 100, 1000, 10000, 100000, 1000000];
for n in test_cases {
let filename = format!("hll6_n{}_cpp.sk", n);
let path = serialization_test_data("cpp_generated_files", &filename);
test_sketch_file(path, n, 12);
}
}
#[test]
fn test_cpp_hll8_compatibility() {
let test_cases = [0, 1, 10, 100, 1000, 10000, 100000, 1000000];
for n in test_cases {
let filename = format!("hll8_n{}_cpp.sk", n);
let path = serialization_test_data("cpp_generated_files", &filename);
test_sketch_file(path, n, 12);
}
}
#[test]
fn test_estimate_accuracy() {
let test_cases = [
("java_generated_files", "hll8_n1000_java.sk", 1000),
("java_generated_files", "hll8_n10000_java.sk", 10000),
("java_generated_files", "hll8_n100000_java.sk", 100000),
("java_generated_files", "hll8_n1000000_java.sk", 1000000),
];
println!("\nCardinality Estimation Accuracy:");
println!("{:<12} {:<12} {:<10}", "Expected", "Estimate", "Error %");
println!("{:-<40}", "");
for (dir, file, expected) in test_cases {
let path = serialization_test_data(dir, file);
let bytes = fs::read(&path).unwrap();
let sketch = HllSketch::deserialize(&bytes).unwrap();
let estimate = sketch.estimate();
let error_pct = ((estimate - expected as f64).abs() / expected as f64) * 100.;
println!("{:<12} {:<12.0} {:<10.3}", expected, estimate, error_pct,);
assert!(error_pct < 2., "Error too high: {:.3}%", error_pct);
}
}