use dyn_size_of::GetSize;
use voracious_radix_sort::RadixSort;
use crate::{phast::{builder::{build_mt, build_st, BuildConf}, conf::Conf, evaluator::BucketToActivateEvaluator, function::SeedEx, Params, SeedChooser, SeedOnly, WINDOW_SIZE}, seeds::SeedSize};
use std::hash::{BuildHasher, Hash, RandomState};
pub struct Partial<SS, SC = SeedOnly, S = RandomState> where SS: SeedSize {
seeds: SeedEx<SS::VecElement>,
hasher: S,
seed_chooser: SC,
seed_size: SS,
}
impl<SC, SS: SeedSize, S> GetSize for Partial<SS, SC, S> {
fn size_bytes_dyn(&self) -> usize { self.seeds.size_bytes_dyn() }
fn size_bytes_content_dyn(&self) -> usize { self.seeds.size_bytes_content_dyn() }
const USES_DYN_MEM: bool = true;
}
impl<SS: SeedSize, SC: SeedChooser> Partial<SS, SC, ()> {
pub fn with_hashes_bps_conf_sc_be_u<'k, BE>(hashes: &'k mut [u64], seed_size: SS, conf: Conf, seed_chooser: SC, bucket_evaluator: BE) -> (Self, usize)
where BE: BucketToActivateEvaluator
{
let (f, build_conf) = Self::build_st(hashes, seed_size, conf, (), seed_chooser, bucket_evaluator);
let unassigned = build_conf.unassigned_len(&f.seeds.seeds);
(f, unassigned)
}
pub fn with_hashes_bps_conf_bs_threads_sc_be_u<'k, BE>(hashes: &'k mut [u64], seed_size: SS, conf: Conf, threads_num: usize, seed_chooser: SC, bucket_evaluator: BE) -> (Self, usize)
where BE: BucketToActivateEvaluator + Sync, SC: Sync, BE::Value: Send
{
let (f, build_conf) = Self::build_mt(hashes, seed_size, conf, threads_num, (), seed_chooser, bucket_evaluator);
let unassigned = build_conf.unassigned_len(&f.seeds.seeds);
(f, unassigned)
}
pub fn with_hashes_bps_conf_sc_be<'k, BE>(hashes: &'k mut [u64], seed_size: SS, conf: Conf, seed_chooser: SC, bucket_evaluator: BE) -> Self
where BE: BucketToActivateEvaluator
{
Self::build_st(hashes, seed_size, conf, (), seed_chooser, bucket_evaluator).0
}
pub fn with_hashes_bps_conf_bs_threads_sc_be<'k, BE>(hashes: &'k mut [u64], seed_size: SS, conf: Conf, threads_num: usize, seed_chooser: SC, bucket_evaluator: BE) -> Self
where BE: BucketToActivateEvaluator + Sync, SC: Sync, BE::Value: Send
{
Self::build_mt(hashes, seed_size, conf, threads_num, (), seed_chooser, bucket_evaluator).0
}
pub fn with_hashes_p_sc_be_u<'k, BE>(hashes: &'k mut [u64], params: &Params<SS>, seed_chooser: SC, bucket_evaluator: BE) -> (Self, usize)
where BE: BucketToActivateEvaluator
{
let conf = seed_chooser.conf_for_minimal_p(hashes.len(), params);
Self::with_hashes_bps_conf_sc_be_u(hashes, params.seed_size, conf, seed_chooser, bucket_evaluator)
}
pub fn with_hashes_p_threads_sc_be_u<'k, BE>(hashes: &'k mut [u64], params: &Params<SS>, threads_num: usize, seed_chooser: SC, bucket_evaluator: BE) -> (Self, usize)
where BE: BucketToActivateEvaluator + Sync, SC: Sync, BE::Value: Send
{
let conf = seed_chooser.conf_for_minimal_p(hashes.len(), params);
Self::with_hashes_bps_conf_bs_threads_sc_be_u(hashes, params.seed_size, conf, threads_num, seed_chooser, bucket_evaluator)
}
pub fn with_hashes_p_sc_be<'k, BE>(hashes: &'k mut [u64], params: &Params<SS>, seed_chooser: SC, bucket_evaluator: BE) -> Self
where BE: BucketToActivateEvaluator
{
let conf = seed_chooser.conf_for_minimal_p(hashes.len(), params);
Self::with_hashes_bps_conf_sc_be(hashes, params.seed_size, conf, seed_chooser, bucket_evaluator)
}
pub fn with_hashes_p_threads_sc_be<'k, BE>(hashes: &'k mut [u64], params: &Params<SS>, threads_num: usize, seed_chooser: SC, bucket_evaluator: BE) -> Self
where BE: BucketToActivateEvaluator + Sync, SC: Sync, BE::Value: Send
{
let conf = seed_chooser.conf_for_minimal_p(hashes.len(), params);
Self::with_hashes_bps_conf_bs_threads_sc_be(hashes, params.seed_size, conf, threads_num, seed_chooser, bucket_evaluator)
}
pub fn with_hashes_p_sc_u<'k, BE>(hashes: &'k mut [u64], params: &Params<SS>, seed_chooser: SC) -> (Self, usize)
where BE: BucketToActivateEvaluator
{
let conf = seed_chooser.conf_for_minimal_p(hashes.len(), params);
let bucket_evaluator = seed_chooser.bucket_evaluator(params.bits_per_seed(), conf.slice_len());
Self::with_hashes_bps_conf_sc_be_u(hashes, params.seed_size, conf, seed_chooser, bucket_evaluator)
}
pub fn with_hashes_p_threads_sc_u<'k, BE>(hashes: &'k mut [u64], params: &Params<SS>, threads_num: usize, seed_chooser: SC) -> (Self, usize)
where BE: BucketToActivateEvaluator + Sync, SC: Sync, BE::Value: Send
{
let conf = seed_chooser.conf_for_minimal_p(hashes.len(), params);
let bucket_evaluator = seed_chooser.bucket_evaluator(params.bits_per_seed(), conf.slice_len());
Self::with_hashes_bps_conf_bs_threads_sc_be_u(hashes, params.seed_size, conf, threads_num, seed_chooser, bucket_evaluator)
}
pub fn with_hashes_p_sc<'k>(hashes: &'k mut [u64], params: &Params<SS>, seed_chooser: SC) -> Self
{
let conf = seed_chooser.conf_for_minimal_p(hashes.len(), params);
let bucket_evaluator = seed_chooser.bucket_evaluator(params.bits_per_seed(), conf.slice_len());
Self::with_hashes_bps_conf_sc_be(hashes, params.seed_size, conf, seed_chooser, bucket_evaluator)
}
pub fn with_hashes_p_threads_sc<'k>(hashes: &'k mut [u64], params: &Params<SS>, threads_num: usize, seed_chooser: SC) -> Self
where SC: Sync
{
let conf = seed_chooser.conf_for_minimal_p(hashes.len(), params);
let bucket_evaluator = seed_chooser.bucket_evaluator(params.bits_per_seed(), conf.slice_len());
Self::with_hashes_bps_conf_bs_threads_sc_be(hashes, params.seed_size, conf, threads_num, seed_chooser, bucket_evaluator)
}
}
impl<SS: SeedSize, SC: SeedChooser, S> Partial<SS, SC, S> {
#[inline(always)]
pub fn get_for_hash(&self, key_hash: u64) -> Option<usize> {
let seed = unsafe { self.seeds.seed_for(self.seed_size, key_hash) };
(seed != 0).then(|| self.seed_chooser.f(key_hash, seed, &self.seeds.conf))
}
fn build_st<'k, BE>(hashes: &'k mut [u64], seed_size: SS, conf: Conf, hasher: S, seed_chooser: SC, bucket_evaluator: BE)
-> (Self, BuildConf<'k, BE, SS, SC>)
where BE: BucketToActivateEvaluator
{
hashes.voracious_sort();
let (seeds, build_conf) = build_st(hashes, conf, seed_size, bucket_evaluator, seed_chooser);
(Self {
seeds: SeedEx{ seeds, conf },
hasher,
seed_chooser,
seed_size
}, build_conf)
}
fn build_mt<'k, BE>(hashes: &'k mut [u64], seed_size: SS, conf: Conf, threads_num: usize, hasher: S, seed_chooser: SC, bucket_evaluator: BE)
-> (Self, BuildConf<'k, BE, SS, SC>)
where BE: BucketToActivateEvaluator + Sync, SC: Sync, BE::Value: Send
{
if threads_num == 1 { return Self::build_st(hashes, seed_size, conf, hasher, seed_chooser, bucket_evaluator); }
hashes.voracious_mt_sort(threads_num);
let (seeds, build_conf) = build_mt(hashes, conf, seed_size, WINDOW_SIZE, bucket_evaluator, seed_chooser, threads_num);
(Self {
seeds: SeedEx{ seeds, conf },
hasher,
seed_chooser,
seed_size,
}, build_conf)
}
}
impl<SS: SeedSize, SC: SeedChooser, S> Partial<SS, SC, S> {
#[inline(always)] pub fn minimal_output_range(&self, num_of_keys: u32) -> usize {
self.seed_chooser.minimal_output_range(num_of_keys as usize)
}
pub fn output_range(&self) -> usize {
self.seeds.conf.output_range(self.seed_chooser, self.seed_size.into())
}
}
impl<SS: SeedSize, SC: SeedChooser, S: BuildHasher> Partial<SS, SC, S> {
#[inline(always)]
pub fn get<K>(&self, key: &K) -> Option<usize> where K: Hash + ?Sized {
self.get_for_hash(self.hasher.hash_one(key))
}
pub fn with_keys_p_hash_sc_be_u<'k, K, BE>(keys: impl Iterator<Item = K>, params: &Params<SS>, hasher: S, seed_chooser: SC, bucket_evaluator: BE)
-> (Self, usize)
where K: Hash, BE: BucketToActivateEvaluator
{
let mut hashes: Box<[_]> = keys.map(|k| hasher.hash_one(k)).collect();
let conf = seed_chooser.conf_for_minimal_p(hashes.len(), params);
let (f, build_conf) = Self::build_st(&mut hashes, params.seed_size, conf, hasher, seed_chooser, bucket_evaluator);
let unassigned = build_conf.unassigned_len(&f.seeds.seeds);
(f, unassigned)
}
}