mod common;
use std::fs;
use std::path::PathBuf;
use common::serialization_test_data;
use datasketches::bloom::BloomFilter;
fn test_bloom_filter_file(path: PathBuf, expected_num_items: u64, expected_num_hashes: u16) {
let bytes = fs::read(&path).unwrap();
let filter1 = BloomFilter::deserialize(&bytes).unwrap();
assert_eq!(
filter1.num_hashes(),
expected_num_hashes,
"Wrong num_hashes in {}",
path.display()
);
if expected_num_items == 0 {
assert!(filter1.is_empty(), "Filter should be empty for n=0");
assert_eq!(
filter1.bits_used(),
0,
"Empty filter should have 0 bits set"
);
} else {
assert!(
!filter1.is_empty(),
"Filter should not be empty for n={}",
expected_num_items
);
assert!(
filter1.bits_used() > 0,
"Non-empty filter should have bits set"
);
}
let num_inserted = expected_num_items / 10;
if num_inserted > 0 {
let sample_size = std::cmp::min(num_inserted, 100);
let mut false_negatives = 0;
for i in 0..sample_size {
if !filter1.contains(&i) {
false_negatives += 1;
}
}
assert_eq!(
false_negatives,
0,
"Found {} false negatives out of {} items in {}",
false_negatives,
sample_size,
path.display()
);
}
let serialized_bytes = filter1.serialize();
let filter2 = BloomFilter::deserialize(&serialized_bytes).unwrap_or_else(|err| {
panic!(
"Deserialization failed after round-trip for {}: {}",
path.display(),
err
)
});
assert_eq!(
filter1.num_hashes(),
filter2.num_hashes(),
"num_hashes mismatch after round-trip for {}",
path.display()
);
assert_eq!(
filter1.capacity(),
filter2.capacity(),
"capacity mismatch after round-trip for {}",
path.display()
);
assert_eq!(
filter1.bits_used(),
filter2.bits_used(),
"bits_used mismatch after round-trip for {}",
path.display()
);
if num_inserted > 0 {
let sample_size = std::cmp::min(num_inserted, 100);
for i in 0..sample_size {
assert_eq!(
filter1.contains(&i),
filter2.contains(&i),
"Item {} presence differs after round-trip",
i
);
}
}
}
#[test]
fn test_java_bloom_n0_h3() {
let path = serialization_test_data("java_generated_files", "bf_n0_h3_java.sk");
test_bloom_filter_file(path, 0, 3);
}
#[test]
fn test_java_bloom_n0_h5() {
let path = serialization_test_data("java_generated_files", "bf_n0_h5_java.sk");
test_bloom_filter_file(path, 0, 5);
}
#[test]
fn test_java_bloom_n10000_h3() {
let path = serialization_test_data("java_generated_files", "bf_n10000_h3_java.sk");
test_bloom_filter_file(path, 10000, 3);
}
#[test]
fn test_java_bloom_n10000_h5() {
let path = serialization_test_data("java_generated_files", "bf_n10000_h5_java.sk");
test_bloom_filter_file(path, 10000, 5);
}
#[test]
fn test_java_bloom_n2000000_h3() {
let path = serialization_test_data("java_generated_files", "bf_n2000000_h3_java.sk");
test_bloom_filter_file(path, 2000000, 3);
}
#[test]
fn test_java_bloom_n2000000_h5() {
let path = serialization_test_data("java_generated_files", "bf_n2000000_h5_java.sk");
test_bloom_filter_file(path, 2000000, 5);
}
#[test]
fn test_java_bloom_n30000000_h3() {
let path = serialization_test_data("java_generated_files", "bf_n30000000_h3_java.sk");
test_bloom_filter_file(path, 30000000, 3);
}
#[test]
fn test_java_bloom_n30000000_h5() {
let path = serialization_test_data("java_generated_files", "bf_n30000000_h5_java.sk");
test_bloom_filter_file(path, 30000000, 5);
}
#[test]
fn test_cpp_bloom_n0_h3() {
let path = serialization_test_data("cpp_generated_files", "bf_n0_h3_cpp.sk");
test_bloom_filter_file(path, 0, 3);
}
#[test]
fn test_cpp_bloom_n0_h5() {
let path = serialization_test_data("cpp_generated_files", "bf_n0_h5_cpp.sk");
test_bloom_filter_file(path, 0, 5);
}
#[test]
fn test_cpp_bloom_n10000_h3() {
let path = serialization_test_data("cpp_generated_files", "bf_n10000_h3_cpp.sk");
test_bloom_filter_file(path, 10000, 3);
}
#[test]
fn test_cpp_bloom_n10000_h5() {
let path = serialization_test_data("cpp_generated_files", "bf_n10000_h5_cpp.sk");
test_bloom_filter_file(path, 10000, 5);
}
#[test]
fn test_cpp_bloom_n2000000_h3() {
let path = serialization_test_data("cpp_generated_files", "bf_n2000000_h3_cpp.sk");
test_bloom_filter_file(path, 2000000, 3);
}
#[test]
fn test_cpp_bloom_n2000000_h5() {
let path = serialization_test_data("cpp_generated_files", "bf_n2000000_h5_cpp.sk");
test_bloom_filter_file(path, 2000000, 5);
}
#[test]
fn test_cpp_bloom_n30000000_h3() {
let path = serialization_test_data("cpp_generated_files", "bf_n30000000_h3_cpp.sk");
test_bloom_filter_file(path, 30000000, 3);
}
#[test]
fn test_cpp_bloom_n30000000_h5() {
let path = serialization_test_data("cpp_generated_files", "bf_n30000000_h5_cpp.sk");
test_bloom_filter_file(path, 30000000, 5);
}