use std::collections::HashSet;
use std::time::Instant;
use anyhow::{Context, Result};
use rand::prelude::*;
use pthash::{
BuildConfiguration, DictionaryDictionary, Hashable, Minimal, MurmurHash2_64, Phf, SinglePhf,
};
fn main() {
if let Err(e) = main_() {
eprintln!("Error: {}", e);
std::process::exit(1);
}
}
fn main_() -> Result<()> {
stderrlog::new()
.verbosity(2)
.timestamp(stderrlog::Timestamp::Second)
.init()
.expect("Could not initialize stderrlog");
let num_keys: usize = 10000000;
let seed: u64 = 1234567890;
log::info!("generating input data...");
let mut keys = HashSet::<u64>::with_capacity(num_keys);
let mut rng = StdRng::seed_from_u64(seed);
while keys.len() < num_keys {
keys.insert(rng.gen());
}
let mut keys: Vec<_> = keys.into_iter().collect();
keys.sort();
keys.shuffle(&mut rng);
let temp_dir = tempfile::tempdir()?;
let mut config = BuildConfiguration::new(temp_dir.path().to_owned());
config.c = 6.0;
config.alpha = 0.94;
config.verbose_output = true;
let mut f = SinglePhf::<Minimal, MurmurHash2_64, DictionaryDictionary>::new();
log::info!("building the function...");
let start = Instant::now();
let timings = f
.build_in_internal_memory_from_bytes(&keys, &config)
.context("Could not build PHF")?;
log::info!("function built in {} seconds", start.elapsed().as_secs());
let total_seconds = timings.partitioning_seconds
+ timings.mapping_ordering_seconds
+ timings.searching_seconds
+ timings.encoding_seconds;
log::info!("computed: {} seconds", total_seconds.as_secs_f64());
let bits_per_key = (f.num_bits() as f64) / (f.num_keys() as f64);
log::info!("function uses {} [bits/key]", bits_per_key);
check(&keys, &f)?;
log::info!("EVERYTHING OK!");
for i in 0..10 {
log::info!("f({}) = {}", keys[i], f.hash(keys[i]));
}
log::info!("serializing the function to disk...");
let output_path = temp_dir.path().join("pthash.bin");
f.save(&output_path).context("Could not write PHF")?;
log::info!("reading the function from disk...");
{
let other = SinglePhf::<Minimal, MurmurHash2_64, encoders::DictionaryDictionary>::load(
&output_path,
)
.context("Could not read PHF")?;
for i in 0..10 {
log::info!("f({}) = {}", keys[i], other.hash(keys[i]));
assert_eq!(f.hash(keys[i]), other.hash(keys[i]));
}
}
Ok(())
}