fusefilter 0.1.0

No alloc membership approximation
// reduce/sizing
#![feature(const_generics)]
#![feature(const_fn_floating_point_arithmetic)]
#![feature(const_evaluatable_checked)]
use core::any::{Any, TypeId};
use core::cmp::PartialEq;
use core::convert::TryFrom;
use core::hash::{Hash, Hasher, SipHasher};
use core::ops::{BitXor, BitXorAssign};

use core::fmt::Display;

pub mod num;
use crate::num::AsPrimitive;
use crate::num::FromPrimitive;

const H3: u64 = 0xBF58_476D_1CE4_E5B9;
const ARITY: u32 = 3;
const SEGMENT_COUNT: u32 = 100;
pub const FUSE_OVERHEAD: f64 = 1.0 / 0.879;
pub const FUSE_CONSTANT: f64 = 1024.0;
const SLOTS: u32 = SEGMENT_COUNT + ARITY - 1;

#[derive(Debug)]
pub struct Fuse<T: AsPrimitive<T> + Default + Copy, const N: usize>
where
    [T; capacity::<N>()]: Sized,
    T: BitXor,
    T: BitXorAssign,
    T: BitXor<Output = T>,
    T: PartialEq,
    u64: AsPrimitive<T>,
    T: AsPrimitive<T>,
    T: FromPrimitive,
{
    pub seed: u64,
    pub segment_length: usize,
    pub fingerprints: [T; capacity::<{ N }>()],
}

impl<T, const N: usize> Fuse<T, N>
where
    [T; capacity::<N>()]: Sized,
    T: BitXor,
    T: Default,
    T: BitXorAssign,
    T: BitXor<Output = T>,
    T: PartialEq,
    u64: AsPrimitive<T>,
    T: AsPrimitive<T>,
    T: FromPrimitive,
{
    pub fn populate(keys: [u64; N]) -> Self {
        const MAX_ITER: usize = 1000;
        let segment_length = capacity::<{ N }>() / (SLOTS as usize);

        let mut H: [HSet; capacity::<{ N }>()] = [HSet::default(); capacity::<{ N }>()];
        let mut Q: [KeyIndex; capacity::<{ N }>()] = [KeyIndex::default(); capacity::<{ N }>()];
        let mut stack: [KeyIndex; N] = [KeyIndex::default(); N];

        let mut rng = 1;
        let mut seed = splitmix64(&mut rng);
        for _ in 0..MAX_ITER {
            // Populate H by adding each key to its respective set.
            for key in keys.iter() {
                let HashSet { hash, hset } = HashSet::fuse_from(*key, segment_length, seed);

                for b in 0..3 {
                    H[hset[b]].mask ^= hash;
                    H[hset[b]].count += 1;
                }
            }

            let mut q_size = 0;
            for idx in 0..(capacity::<{ N }>()) {
                if H[idx].count == 1 {
                    Q[q_size].index = idx;
                    Q[q_size].hash = H[idx].mask;
                    q_size += 1;
                }
            }

            let mut stack_size = 0;
            while q_size > 0 {
                q_size -= 1;
                let ki = Q[q_size];
                if H[ki.index].count == 0 {
                    continue;
                }

                let H012 { hset } = H012::from(ki.hash, segment_length as u32);

                stack[stack_size] = ki;
                stack_size += 1;

                for b in 0..3 {
                    let setidx = hset[b];
                    H[setidx].mask ^= ki.hash;
                    H[setidx].count -= 1;
                    if H[setidx].count == 1 {
                        Q[q_size].index = setidx;
                        Q[q_size].hash = H[setidx].mask;
                        q_size += 1;
                    }
                }
            }

            if stack_size == N {
                break;
            }

            for set in H.iter_mut() {
                *set = HSet::default();
            }
            seed = splitmix64(&mut rng)
        }

        let mut B: [T; capacity::<{ N }>()] = [T::default(); capacity::<N>()];
        for i in 0..N {
            B[i] = T::default();
        }
        for ki in stack.iter().rev() {
            let H012 { hset: [h0, h1, h2] } = H012::from(ki.hash, segment_length as u32);
            let fp = (fingerprint(ki.hash).as_())
                ^ match ki.index {
                    h if h == h0 => B[h1] ^ B[h2],
                    h if h == h1 => B[h0] ^ B[h2],
                    h if h == h2 => B[h0] ^ B[h1],
                    _ => B[h0] ^ B[h1], 
                };
            B[ki.index] = fp;
        }

        Self {
            seed,
            segment_length,
            fingerprints: B,
        }
    }

    pub fn try_from<U: Any + Hash + AsAny + Clone>(keys: &[U; N]) -> Self {
        let mut k: [u64; N] = [0_u64; N];
        if TypeId::of::<u64>() != TypeId::of::<U>() {
            for i in 0..N {
                k[i] = hash::<U, SipHasher>(&keys[i]);
            }
            k = *k.as_any().downcast_ref::<[u64; N]>().unwrap();
        } else {
            k = *keys.as_any().downcast_ref::<[u64; N]>().unwrap();
        }
        Self::populate(k.clone())
    }

    pub fn contains<U: Any + Hash + AsAny + Display + core::fmt::Debug>(
        &self,
        key: &U,
    ) -> bool {
        let mut k: u64;
        if TypeId::of::<u64>() != TypeId::of::<U>() {
            k = hash::<U, SipHasher>(&key);
        } else {
            k = *key.as_any().downcast_ref::<u64>().unwrap();
        }

        let HashSet {
            hash,
            hset: [h0, h1, h2],
        } = HashSet::fuse_from(k, self.segment_length, self.seed);

        let fp = fingerprint(hash).as_();
        fp == self.fingerprints[h0] ^ self.fingerprints[h1] ^ self.fingerprints[h2]

    }
}

impl HashSet {
    pub const fn fuse_from(key: u64, segment_length: usize, seed: u64) -> Self {
        let hash = mix(key, seed);
        let H012 { hset } = H012::from(hash, segment_length as u32);

        Self { hash, hset }
    }
}

pub struct H012 {
    pub hset: [usize; 3],
}

impl H012 {
    pub const fn from(hash: u64, segment_length: u32) -> Self {

        let r0 = hash as u32;
        let r1 = rotl64(hash, 21) as u32;
        let r2 = rotl64(hash, 42) as u32;
        let r3 = ((H3.overflowing_mul(hash).0) >> 32) as u32;

        let seg = reduce(r0, SEGMENT_COUNT);

        Self {
            hset: [
                (seg * segment_length + reduce(r1, segment_length)) as usize,
                ((seg + 1) * segment_length + reduce(r2, segment_length)) as usize,
                ((seg + 2) * segment_length + reduce(r3, segment_length)) as usize,
            ],
        }
    }
}

pub struct HashSet {
    pub hash: u64,
    pub hset: [usize; 3],
}

/// The hash of a key and the index of that key in the construction array H.
#[derive(Default, Copy, Clone)]
pub struct KeyIndex {
    pub hash: u64,
    pub index: usize,
}

/// A set in the construction array H. Elements are encoded via xor with the mask.
#[derive(Copy, Default, Clone)]
pub struct HSet {
    pub count: u32,
    pub mask: u64,
}

const fn mix(key: u64, seed: u64) -> u64 {
    mix64(key.overflowing_add(seed).0)
}

pub const fn mix64(mut k: u64) -> u64 {
    k ^= k >> 33;
    k = k.overflowing_mul(0xff51_afd7_ed55_8ccd).0;
    k ^= k >> 33;
    k = k.overflowing_mul(0xc4ce_b9fe_1a85_ec53).0;
    k ^= k >> 33;
    k
}

fn fingerprint(hash: u64) -> u64 {
    return hash ^ (hash >> 32);
}

const fn rotl64(n: u64, c: isize) -> u64 {
    (n << ((c & 63) as usize)) | (n >> (((-c) & 63) as usize))
}

// http://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
const fn reduce(hash: u32, n: u32) -> u32 {
    (((hash as u64) * (n as u64)) >> 32) as u32
}

pub fn splitmix64(seed: &mut u64) -> u64 {
    *seed = (*seed).overflowing_add(0x9e37_79b9_7f4a_7c15).0;
    let mut z = *seed;
    z = (z ^ (z >> 30)).overflowing_mul(0xbf58_476d_1ce4_e5b9).0;
    z = (z ^ (z >> 27)).overflowing_mul(0x94d0_49bb_1331_11eb).0;
    z ^ (z >> 31)
}

pub const fn capacity<const N: usize>() -> usize {
    (((FUSE_OVERHEAD * (N as f64) + FUSE_CONSTANT) as u32) / SLOTS * SLOTS) as usize
}

pub const fn sl<const N: usize>() -> u32 {
    (capacity::<N>() as u32) / SLOTS
}

fn hash<T: Hash, H: Hasher + Default>(key: &T) -> u64 {
    let mut hasher = H::default();
    key.hash(&mut hasher);
    hasher.finish()
}

// https://users.rust-lang.org/t/calling-any-downcast-ref-requires-static/52071
pub trait AsAny {
    fn as_any(&self) -> &dyn Any;
}

impl<T: Any> AsAny for T {
    fn as_any(&self) -> &dyn Any {
        self
    }
}