use std::{hash::Hash, hint::black_box, time::Instant};
use bitm::{BitAccess, BitVec};
use cpu_time::{ProcessTime, ThreadTime};
use crate::{BenchmarkResult, BuildStats, Conf, SearchStats, Threads};
#[inline(never)]
fn warn_size_diff(size_st: usize, size_mt: usize) {
if size_st != size_mt {
eprintln!("WARNING: ST/MT differ in sizes, {} != {}", size_st, size_mt);
}
}
#[inline(never)]
fn check_collision(seen: &mut [u64], input_len: usize, index: usize) {
assert!(index < input_len, "MPHF assigns too large value {}>{}.", index, input_len);
assert!(!seen.get_bit(index), "MPHF assigns the same value {index} to two keys of input.");
seen.set_bit(index);
}
#[inline(never)]
fn ensure_no_absences(absences_found: f64) {
assert_eq!(absences_found, 0.0, "MPHF does not assign the value for {}% keys of the input", absences_found*100.0);
}
pub trait TypeToQuery {
type ToHash<'s>: ?Sized + Hash + 's where Self: 's;
fn to_query_type<'s>(&'s self) -> &'s Self::ToHash::<'s>;
}
impl TypeToQuery for u32 {
type ToHash<'s> = u32;
#[inline(always)] fn to_query_type<'s>(&'s self) -> &'s Self::ToHash::<'s> { self }
}
impl TypeToQuery for u64 {
type ToHash<'s> = u64;
#[inline(always)] fn to_query_type<'s>(&'s self) -> &'s Self::ToHash::<'s> { self }
}
impl TypeToQuery for Box<[u8]> {
type ToHash<'s> = [u8];
#[inline(always)] fn to_query_type<'s>(&'s self) -> &'s Self::ToHash::<'s> { self.as_ref() }
}
pub trait MPHFBuilder<K: Hash + TypeToQuery> {
const CAN_DETECT_ABSENCE: bool = true;
const BUILD_THREADS: Threads = Threads::Both;
const BUILD_THREADS_DOES_NOT_CHANGE_SIZE: bool = true;
type MPHF;
type Value;
fn new(&self, keys: &[K], use_multiple_threads: bool) -> Self::MPHF;
fn value_ex(mphf: &Self::MPHF, key: &K, levels: &mut usize) -> Option<u64>;
fn value(mphf: &Self::MPHF, key: &K) -> Self::Value;
fn mphf_size(mphf: &Self::MPHF) -> usize;
}
fn benchmark_build_st<K: Hash + TypeToQuery, B: MPHFBuilder<K>>(b: &B, keys: &[K], conf: &Conf) -> (B::MPHF, BuildStats) {
std::thread::sleep(std::time::Duration::from_millis(conf.cooling as u64));
let start_moment = ThreadTime::now(); for _ in 1..conf.build_runs { b.new(keys, false); }
let h = b.new(keys, false);
let build_time_seconds = start_moment.elapsed().as_secs_f64();
(h, BuildStats { time_st: build_time_seconds / conf.build_runs as f64, time_mt: f64::NAN } )
}
fn benchmark_build_mt<K: Hash + TypeToQuery, B: MPHFBuilder<K>>(b: &B, keys: &[K], conf: &Conf) -> (B::MPHF, BuildStats) {
std::thread::sleep(std::time::Duration::from_millis(conf.cooling as u64));
let start_moment = Instant::now();
for _ in 1..conf.build_runs { b.new(keys, true); }
let h = b.new(keys, true);
let build_time_seconds = start_moment.elapsed().as_secs_f64();
(h, BuildStats { time_st: f64::NAN, time_mt: build_time_seconds / conf.build_runs as f64 } )
}
fn benchmark_build<K: Hash + TypeToQuery, B: MPHFBuilder<K>>(b: &B, keys: &[K], conf: &Conf) -> (B::MPHF, BuildStats) {
match (B::BUILD_THREADS, conf.threads) {
(Threads::Single, _) | (Threads::Both, Threads::Single) => benchmark_build_st(b, keys, conf),
(Threads::Multi, _) | (Threads::Both, Threads::Multi) => benchmark_build_mt(b, keys, conf),
_ => { let (mphf, result_st) = benchmark_build_st(b, keys, conf);
let size_st = B::BUILD_THREADS_DOES_NOT_CHANGE_SIZE.then(|| B::mphf_size(&mphf));
drop(mphf);
let (mphf, mut result_mt) = benchmark_build_mt(b, keys, conf);
if let Some(size_st) = size_st { warn_size_diff(size_st, B::mphf_size(&mphf)); }
result_mt.time_st = result_st.time_st;
(mphf, result_mt)
}
}
}
fn benchmark_lookup<K: Hash + TypeToQuery, B: MPHFBuilder<K>>(mphf: &B::MPHF, input: &[K], verify: bool, conf: &Conf) -> SearchStats {
if input.is_empty() || conf.lookup_runs == 0 { return SearchStats::nan(); }
let mut extra_levels_searched = 0;
let mut not_found = 0usize;
if verify {
let mut seen = Box::<[u64]>::with_zeroed_bits(input.len());
for v in input {
if let Some(index) = B::value_ex(mphf, v, &mut extra_levels_searched) {
check_collision(&mut seen, input.len(), index as usize);
} else {
not_found += 1;
}
}
} else {
for v in input {
if B::value_ex(mphf, v, &mut extra_levels_searched).is_none() {
not_found += 1;
}
}
}
std::thread::sleep(std::time::Duration::from_millis(conf.cooling as u64));
let start_process_moment = ProcessTime::now();
for _ in 0..conf.lookup_runs {
for v in input { black_box(B::value(mphf, v)); }
}
let seconds = start_process_moment.elapsed().as_secs_f64();
let divider = input.len() as f64;
SearchStats {
avg_deep: extra_levels_searched as f64 / divider,
avg_lookup_time: seconds / (divider * conf.lookup_runs as f64),
absences_found: not_found as f64 / divider
}
}
pub fn benchmark<K: Hash + TypeToQuery, B: MPHFBuilder<K>>(b: B, i: &(Vec<K>, Vec<K>), conf: &Conf) -> BenchmarkResult {
let (h, build) = benchmark_build(&b, &i.0, conf);
let size_bytes = B::mphf_size(&h);
let bits_per_value = 8.0 * size_bytes as f64 / i.0.len() as f64;
if conf.lookup_runs == 0 {
return BenchmarkResult { included: SearchStats::nan(), absent: SearchStats::nan(), size_bytes, bits_per_value, build }
}
let included = benchmark_lookup::<K, B>(&h, &i.0, conf.verify, conf);
ensure_no_absences(included.absences_found);
let absent = if conf.save_details && B::CAN_DETECT_ABSENCE {
benchmark_lookup::<K, B>(&h, &i.1, false, conf)
} else {
SearchStats::nan()
};
BenchmarkResult { included, absent, size_bytes, bits_per_value, build }
}