use crate::bits::BitFieldVec;
use crate::func::{VFunc, shard_edge::ShardEdge};
use crate::traits::Unaligned;
use crate::traits::{Backend, Word};
use crate::utils::{BinSafe, Sig, ToSig};
use mem_dbg::*;
use num_primitive::{PrimitiveInteger, PrimitiveNumber, PrimitiveNumberAs};
use std::borrow::Borrow;
use std::ops::Index;
use value_traits::slices::SliceByValue;
#[derive(Debug, Clone, MemSize, MemDbg)]
#[cfg_attr(
feature = "epserde",
derive(epserde::Epserde),
epserde(bound(
deser = "for<'a> <F as epserde::deser::DeserInner>::DeserType<'a>: Backend<Word = F::Word>"
))
)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct VFilter<F: Backend> {
pub(crate) func: F,
pub(crate) filter_mask: F::Word,
}
impl<F: Backend> VFilter<F> {
pub fn from_parts(func: F, filter_mask: F::Word) -> Self {
Self { func, filter_mask }
}
}
impl<K: ?Sized + ToSig<S>, D: SliceByValue<Value: Word + BinSafe>, S: Sig, E: ShardEdge<S, 3>>
VFilter<VFunc<K, D, S, E>>
where
u64: PrimitiveNumberAs<D::Value>,
{
#[inline(always)]
pub fn contains_by_sig(&self, sig: S) -> bool {
let expected =
self.func.shard_edge.remixed_hash(sig).as_to::<D::Value>() & self.filter_mask;
self.func.get_by_sig(sig) == expected
}
#[inline]
pub fn contains(&self, key: impl Borrow<K>) -> bool {
self.contains_by_sig(K::to_sig(key.borrow(), self.func.seed))
}
pub const fn len(&self) -> usize {
self.func.num_keys
}
pub const fn is_empty(&self) -> bool {
self.func.num_keys == 0
}
pub fn hash_bits(&self) -> u32 {
D::Value::BITS - self.filter_mask.leading_zeros()
}
}
impl<
K: ?Sized + ToSig<S>,
D: SliceByValue<Value: Word + BinSafe>,
S: Sig,
E: ShardEdge<S, 3>,
B: Borrow<K>,
> Index<B> for VFilter<VFunc<K, D, S, E>>
where
u64: PrimitiveNumberAs<D::Value>,
{
type Output = bool;
#[inline(always)]
fn index(&self, key: B) -> &Self::Output {
if self.contains(key) { &true } else { &false }
}
}
impl<K: ?Sized, W: Word + BinSafe, S: Sig, E: ShardEdge<S, 3>> crate::traits::TryIntoUnaligned
for VFilter<VFunc<K, BitFieldVec<Box<[W]>>, S, E>>
{
type Unaligned = VFilter<VFunc<K, Unaligned<BitFieldVec<Box<[W]>>>, S, E>>;
fn try_into_unaligned(
self,
) -> Result<Self::Unaligned, crate::traits::UnalignedConversionError> {
Ok(VFilter {
func: self.func.try_into_unaligned()?,
filter_mask: self.filter_mask,
})
}
}
impl<K: ?Sized, W: Word + BinSafe, S: Sig, E: ShardEdge<S, 3>>
From<Unaligned<VFilter<VFunc<K, BitFieldVec<Box<[W]>>, S, E>>>>
for VFilter<VFunc<K, BitFieldVec<Box<[W]>>, S, E>>
{
fn from(f: Unaligned<VFilter<VFunc<K, BitFieldVec<Box<[W]>>, S, E>>>) -> Self {
VFilter {
func: f.func.into(),
filter_mask: f.filter_mask,
}
}
}
#[cfg(feature = "rayon")]
mod build {
use super::*;
use crate::func::VBuilder;
use crate::utils::{EmptyVal, FallibleRewindableLender, SigVal};
use anyhow::Result;
use core::error::Error;
use dsi_progress_logger::ProgressLog;
use lender::*;
use rdst::RadixKey;
use std::ops::{BitXor, BitXorAssign};
use value_traits::slices::SliceByValueMut;
impl<K, W, S, E> VFilter<VFunc<K, Box<[W]>, S, E>>
where
K: ?Sized + ToSig<S> + std::fmt::Debug,
W: Word + BinSafe,
S: Sig + Send + Sync,
E: ShardEdge<S, 3>,
SigVal<S, EmptyVal>: RadixKey,
SigVal<E::LocalSig, EmptyVal>: BitXor + BitXorAssign,
{
pub fn try_new<B: ?Sized + Borrow<K>>(
keys: impl FallibleRewindableLender<
RewindError: Error + Send + Sync + 'static,
Error: Error + Send + Sync + 'static,
> + for<'lend> FallibleLending<'lend, Lend = &'lend B>,
n: usize,
pl: &mut (impl ProgressLog + Clone + Send + Sync),
) -> Result<Self>
where
for<'a> <<Box<[W]> as SliceByValueMut>::ChunksMut<'a> as Iterator>::Item:
crate::traits::bit_field_slice::BitFieldSliceMut,
for<'a> <Box<[W]> as SliceByValueMut>::ChunksMut<'a>: Send,
for<'a> <<Box<[W]> as SliceByValueMut>::ChunksMut<'a> as Iterator>::Item: Send,
{
Self::try_new_with_builder(keys, n, VBuilder::default(), pl)
}
pub fn try_new_with_builder<B: ?Sized + Borrow<K>, P: ProgressLog + Clone + Send + Sync>(
keys: impl FallibleRewindableLender<
RewindError: Error + Send + Sync + 'static,
Error: Error + Send + Sync + 'static,
> + for<'lend> FallibleLending<'lend, Lend = &'lend B>,
n: usize,
builder: VBuilder<Box<[W]>, S, E>,
pl: &mut P,
) -> Result<Self>
where
for<'a> <<Box<[W]> as SliceByValueMut>::ChunksMut<'a> as Iterator>::Item:
crate::traits::BitFieldSliceMut,
for<'a> <Box<[W]> as SliceByValueMut>::ChunksMut<'a>: Send,
for<'a> <<Box<[W]> as SliceByValueMut>::ChunksMut<'a> as Iterator>::Item: Send,
{
let filter_mask = W::MAX;
let func = builder.expected_num_keys(n).try_build_filter(
keys,
W::BITS as usize,
|_, len| vec![W::ZERO; len].into(),
&|shard_edge, sig_val| {
W::as_from(crate::func::mix64(shard_edge.edge_hash(sig_val.sig)))
},
pl,
)?;
Ok(VFilter { func, filter_mask })
}
pub fn try_par_new<P: ProgressLog + Clone + Send + Sync>(
keys: &[impl Borrow<K> + Sync],
pl: &mut P,
) -> Result<Self>
where
K: Sync,
S: Send,
{
Self::try_par_new_with_builder(keys, VBuilder::default(), pl)
}
pub fn try_par_new_with_builder<P: ProgressLog + Clone + Send + Sync>(
keys: &[impl Borrow<K> + Sync],
builder: VBuilder<Box<[W]>, S, E>,
pl: &mut P,
) -> Result<Self>
where
K: Sync,
S: Send,
{
let n = keys.len();
let filter_mask = W::MAX;
let func = builder.expected_num_keys(n).try_par_populate_and_build(
keys,
&|_| EmptyVal::default(),
&mut |builder,
seed,
mut store,
_max_value,
_num_keys,
pl: &mut P,
_state: &mut ()| {
builder.bit_width = W::BITS as usize;
let data: Box<[W]> = vec![
W::ZERO;
builder.shard_edge.num_vertices()
* builder.shard_edge.num_shards()
]
.into();
pl.info(format_args!(
"Number of keys: {} Bit width: {}",
builder.num_keys, builder.bit_width,
));
let func = builder.try_build_from_shard_iter(
seed,
data,
store.drain(),
&|shard_edge: &E, sig_val| {
W::as_from(crate::func::mix64(shard_edge.edge_hash(sig_val.sig)))
},
&|_| {},
pl,
)?;
Ok(func)
},
pl,
(),
)?;
Ok(VFilter { func, filter_mask })
}
}
impl<K, W, S, E> VFilter<VFunc<K, BitFieldVec<Box<[W]>>, S, E>>
where
K: ?Sized + ToSig<S> + std::fmt::Debug,
W: Word + BinSafe,
S: Sig + Send + Sync,
E: ShardEdge<S, 3>,
SigVal<S, EmptyVal>: RadixKey,
SigVal<E::LocalSig, EmptyVal>: BitXor + BitXorAssign,
{
pub fn try_new<B: ?Sized + Borrow<K>>(
keys: impl FallibleRewindableLender<
RewindError: Error + Send + Sync + 'static,
Error: Error + Send + Sync + 'static,
> + for<'lend> FallibleLending<'lend, Lend = &'lend B>,
n: usize,
filter_bits: usize,
pl: &mut (impl ProgressLog + Clone + Send + Sync),
) -> Result<Self>
where
for<'a> <BitFieldVec<Box<[W]>> as SliceByValueMut>::ChunksMut<'a>: Send,
for<'a> <<BitFieldVec<Box<[W]>> as SliceByValueMut>::ChunksMut<'a> as Iterator>::Item:
Send,
{
Self::try_new_with_builder(keys, n, filter_bits, VBuilder::default(), pl)
}
pub fn try_new_with_builder<B: ?Sized + Borrow<K>, P: ProgressLog + Clone + Send + Sync>(
keys: impl FallibleRewindableLender<
RewindError: Error + Send + Sync + 'static,
Error: Error + Send + Sync + 'static,
> + for<'lend> FallibleLending<'lend, Lend = &'lend B>,
n: usize,
filter_bits: usize,
builder: VBuilder<BitFieldVec<Box<[W]>>, S, E>,
pl: &mut P,
) -> Result<Self>
where
for<'a> <BitFieldVec<Box<[W]>> as SliceByValueMut>::ChunksMut<'a>: Send,
for<'a> <<BitFieldVec<Box<[W]>> as SliceByValueMut>::ChunksMut<'a> as Iterator>::Item:
Send,
{
assert!(filter_bits > 0);
assert!(filter_bits <= W::BITS as usize);
let filter_mask = W::MAX >> (W::BITS - filter_bits as u32);
let func = builder.expected_num_keys(n).try_build_filter(
keys,
filter_bits,
BitFieldVec::new_padded,
&|shard_edge, sig_val| {
W::as_from(crate::func::mix64(shard_edge.edge_hash(sig_val.sig))) & filter_mask
},
pl,
)?;
Ok(VFilter { func, filter_mask })
}
pub fn try_par_new<P: ProgressLog + Clone + Send + Sync>(
keys: &[impl Borrow<K> + Sync],
filter_bits: usize,
pl: &mut P,
) -> Result<Self>
where
K: Sync,
S: Send,
{
Self::try_par_new_with_builder(keys, filter_bits, VBuilder::default(), pl)
}
pub fn try_par_new_with_builder<P: ProgressLog + Clone + Send + Sync>(
keys: &[impl Borrow<K> + Sync],
filter_bits: usize,
builder: VBuilder<BitFieldVec<Box<[W]>>, S, E>,
pl: &mut P,
) -> Result<Self>
where
K: Sync,
S: Send,
{
assert!(filter_bits > 0);
assert!(filter_bits <= W::BITS as usize);
let n = keys.len();
let filter_mask = W::MAX >> (W::BITS - filter_bits as u32);
let func = builder.expected_num_keys(n).try_par_populate_and_build(
keys,
&|_| EmptyVal::default(),
&mut |builder,
seed,
mut store,
_max_value,
_num_keys,
pl: &mut P,
_state: &mut ()| {
builder.bit_width = filter_bits;
let data = BitFieldVec::<Box<[W]>>::new_padded(
builder.bit_width,
builder.shard_edge.num_vertices() * builder.shard_edge.num_shards(),
);
pl.info(format_args!(
"Number of keys: {} Bit width: {}",
builder.num_keys, builder.bit_width,
));
let func = builder.try_build_from_shard_iter(
seed,
data,
store.drain(),
&|shard_edge: &E, sig_val| {
W::as_from(crate::func::mix64(shard_edge.edge_hash(sig_val.sig)))
& filter_mask
},
&|_| {},
pl,
)?;
Ok(func)
},
pl,
(),
)?;
Ok(VFilter { func, filter_mask })
}
}
}
#[cfg(all(test, feature = "rayon"))]
mod tests {
use std::ops::{BitXor, BitXorAssign};
use dsi_progress_logger::no_logging;
use num_primitive::PrimitiveNumberAs;
use rdst::RadixKey;
use crate::{
func::{
VBuilder, VFunc,
shard_edge::{FuseLge3NoShards, FuseLge3Shards},
},
utils::{EmptyVal, FromCloneableIntoIterator, Sig, SigVal, ToSig},
};
use super::{ShardEdge, VFilter};
#[test]
fn test_filter_func() -> anyhow::Result<()> {
_test_filter_func::<[u64; 1], FuseLge3NoShards>()?;
_test_filter_func::<[u64; 2], FuseLge3Shards>()?;
Ok(())
}
fn _test_filter_func<S: Sig + Send + Sync, E: ShardEdge<S, 3, LocalSig = [u64; 1]>>()
-> anyhow::Result<()>
where
usize: ToSig<S>,
u128: PrimitiveNumberAs<usize>,
SigVal<S, EmptyVal>: RadixKey + BitXor + BitXorAssign,
SigVal<E::LocalSig, EmptyVal>: RadixKey + BitXor + BitXorAssign,
{
for n in [0_usize, 10, 1000, 100_000, 1_000_000] {
let filter = <VFilter<VFunc<usize, Box<[u8]>, S, E>>>::try_new_with_builder(
FromCloneableIntoIterator::from(0..n),
n,
VBuilder::default().log2_buckets(4).offline(false),
no_logging![],
)?;
for i in 0..n {
let sig = ToSig::<S>::to_sig(i, filter.func.seed);
assert_eq!(
filter.func.shard_edge.remixed_hash(sig) & 0xFF,
filter.func.get_by_sig(sig) as u64,
"Hash mismatch for key {i}"
);
}
}
Ok(())
}
}