murmur3_32 0.1.0

implementation of MurmurHash3 32bit algorithm
Documentation
#![no_std]

#[cfg(any(test, feature = "std"))]
mod io;
#[cfg(any(test, feature = "std"))]
pub use io::Murmur3Io;

const C1: u32 = 0xcc9e_2d51;
const C2: u32 = 0x1b87_3593;
const D: u32 = 0xe654_6b64;
const FMIX1: u32 = 0x85eb_ca6b;
const FMIX2: u32 = 0xc2b2_ae35;

const DEFAULT_SEED: u32 = 0;

pub struct Murmur3 {
    h: u32,
    len: u32,
    remainder: [u8; 4],
    remainder_len: u8,
}
impl Murmur3 {
    #[inline]
    pub fn new(seed: u32) -> Self {
        Self {
            h: seed,
            remainder: [0, 0, 0, 0],
            remainder_len: 0,
            len: 0,
        }
    }
    #[inline]
    pub fn hash<B: AsRef<[u8]>>(seed: u32, data: B) -> u32 {
        hash_bytes(seed, data.as_ref())
    }
    #[inline(always)]
    fn spare(&mut self) -> &mut [u8] {
        &mut self.remainder[self.remainder_len as usize..]
    }
    #[inline]
    pub fn write(&mut self, data: &[u8]) {
        self.len = self.len.wrapping_add(data.len() as u32);
        if self.remainder_len as usize + data.len() < 4 {
            self.spare()[..data.len()].copy_from_slice(data);
            self.remainder_len += data.len() as u8;
            return;
        }
        let spare = self.spare();
        let (first, data) = &data.split_at(spare.len());
        spare.copy_from_slice(first);
        self.h = one_round(self.h, self.remainder);

        let mut it = data.chunks_exact(4);
        for chunk in it.by_ref() {
            self.h = one_round(self.h, chunk.try_into().unwrap());
        }
        self.remainder_len = data.len() as u8 & 3;
        self.remainder[..self.remainder_len as usize].copy_from_slice(it.remainder());
    }
    #[inline]
    pub fn finish(mut self) -> u32 {
        let rem_len = self.remainder_len as usize;
        if rem_len > 0 {
            self.remainder[rem_len..].fill(0);
            self.h ^= calc_k(self.remainder);
        }
        fmix32(self.h ^ self.len)
    }
}

#[inline(always)]
fn calc_k(chunk: [u8; 4]) -> u32 {
    u32::from_le_bytes(chunk)
        .wrapping_mul(C1)
        .rotate_left(15)
        .wrapping_mul(C2)
}
#[inline(always)]
fn one_round(h: u32, chunk: [u8; 4]) -> u32 {
    (h ^ calc_k(chunk))
        .rotate_left(13)
        .wrapping_mul(5)
        .wrapping_add(D)
}
#[inline(always)]
fn fmix32(mut h: u32) -> u32 {
    h = (h ^ h >> 16).wrapping_mul(FMIX1);
    h = (h ^ h >> 13).wrapping_mul(FMIX2);
    h ^ h >> 16
}

#[inline]
fn hash_bytes(mut seed: u32, data: &[u8]) -> u32 {
    let mut it = data.chunks_exact(4);
    for chunk in it.by_ref() {
        seed = one_round(seed, chunk.try_into().unwrap());
    }
    let rem = it.remainder();
    if rem.len() > 0 {
        let mut last_chunk = [0, 0, 0, 0];
        last_chunk[..rem.len()].copy_from_slice(rem);
        seed ^= calc_k(last_chunk);
    }
    fmix32(seed ^ data.len() as u32)
}

impl core::hash::Hasher for Murmur3 {
    fn write(&mut self, bytes: &[u8]) {
        self.write(bytes);
    }
    fn finish(&self) -> u64 {
        let mut h = self.h;
        let rem_len = self.remainder_len as usize;
        if rem_len > 0 {
            let mut rem = self.remainder;
            rem[rem_len..].fill(0);
            h ^= calc_k(rem);
        }
        fmix32(h ^ self.len) as _
    }
}

impl core::default::Default for Murmur3 {
    fn default() -> Self {
        Self::new(DEFAULT_SEED)
    }
}

#[cfg(test)]
mod test {
    extern crate std;

    use super::*;
    use std::{
        io::{self, Write},
        vec::Vec,
    };

    fn hash(bytes: &[u8]) -> u32 {
        const SEED: u32 = 3_242_157_231;

        let mut hasher = Murmur3::new(SEED);
        let mut hasher2 = Murmur3::new(SEED);
        let mut buf = Vec::<u8>::new();
        let mut writer = Murmur3Io::new(SEED, &mut buf);

        hasher.write(bytes);
        std::hash::Hasher::write(&mut hasher2, bytes);
        writer.write(bytes).unwrap();
        assert_eq!(hasher.len as usize, bytes.len());
        assert_eq!(hasher2.len as usize, bytes.len());
        assert_eq!(writer.hasher.len as usize, bytes.len());
        let (ret, _) = writer.finish();
        assert_eq!(Murmur3::hash(SEED, bytes), ret);
        assert_eq!(hasher.finish(), ret);
        assert_eq!(std::hash::Hasher::finish(&hasher2), ret as u64);

        let mut reader = Murmur3Io::new(SEED, buf.as_slice());
        io::copy(&mut reader, &mut io::empty()).unwrap();
        let (hash, _) = reader.finish();
        assert_eq!(hash, ret);
        ret
    }

    #[test]
    fn murmur3() {
        assert_eq!(hash(b""), 36_859_204);
        assert_eq!(hash(b"a"), 3_144_985_375);
        assert_eq!(hash(b"ab"), 3_262_304_301);
        assert_eq!(hash(b"abc"), 476_091_040);
        assert_eq!(hash(b"abcd"), 412_992_581);
        assert_eq!(hash(b"abcde"), 2_747_833_956);
        assert_eq!(hash(b"abcdefghijklmnop"), 2_078_305_053);
    }
}