xor_hasher 0.1.0

Performant hashers for input data that is already highly randomized
Documentation
//! A library that provides [Hasher] and [BuildHasher] objects
//! that directly XORs bytes into an internal state without using a hash function.
//! They are intended to improve the performance of hash-based data structures when
//! the input keys to be hashed are not user-controlled and are already highly
//! randomized (e.g. SHA256 hashes).
#[cfg(feature="getrandom")]
use getrandom::u64 as rand_u64;
#[cfg(feature="rand_core")]
use rand_core::{Rng, TryRng};

use core::hash::{BuildHasher, Hasher};

#[cfg(feature="std")]
use std::collections::{HashMap, HashSet};

#[cfg(feature="heapless")]
use heapless::{IndexMap, IndexSet};

/// A [Hasher] object that XORs in bytes directly.
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct XorHasher {
    state: u64,
}
impl XorHasher {
    /// Creates a [XorHasher] from a given initial state.
    pub const fn new(seed: u64) -> Self {
        Self {
            state: seed,
        }
    }
}

impl Hasher for XorHasher {
    fn finish(&self) -> u64 {
        self.state
    }

    fn write(&mut self, bytes: &[u8]) {
        let (u64_chunks, remainder) = bytes.as_chunks::<8>();
        for u64_chunk in u64_chunks {
            let u64_val = u64::from_ne_bytes(*u64_chunk);
            self.state ^= u64_val;
        }
        let mut final_chunk = [0x00; 8];
        final_chunk[..remainder.len()].copy_from_slice(remainder);
        self.state ^= u64::from_ne_bytes(final_chunk)
    }
}

/// A [BuildHasher] object that builds an [XorHasher].
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct XorHashBuilder {
    seed: u64
}
impl XorHashBuilder {
    /// Creates a [XorHashBuilder] that uses a given seed.
    pub const fn new(seed: u64) -> Self {
        Self {
            seed
        }
    }
}
#[cfg(feature="getrandom")]
impl XorHashBuilder {
    /// Creates a [XorHashBuilder] seeded from OS-provided randomness.
    pub fn new_from_os_rand() -> Result<Self, getrandom::Error> {
        Ok(Self::new(rand_u64()?))
    }
}
#[cfg(feature="getrandom")]
impl Default for XorHashBuilder {
    /// Creates a [XorHashBuilder] seeded from OS-provided randomness.
    fn default() -> Self {
        Self::new_from_os_rand().expect("Could not get random u64 seed")
    }
}

#[cfg(feature="rand_core")]
impl XorHashBuilder {
    /// Creates a [XorHashBuilder] seeded from the given [Rng].
    pub fn new_from_rng<R: Rng>(rng: &mut R) -> Self {
        Self::new(rng.next_u64())
    }
    /// Creates a [XorHashBuilder] seeded from the given [TryRng].
    pub fn try_new_from_rng<R: TryRng>(rng: &mut R) -> Result<Self, R::Error> {
        Ok(Self::new(rng.try_next_u64()?))
    }
}

impl BuildHasher for XorHashBuilder {
    type Hasher = XorHasher;

    fn build_hasher(&self) -> Self::Hasher {
        XorHasher::new(self.seed)
    }
}

/// A [HashMap] that uses [XorHashBuilder] for hashing.
#[cfg(feature="std")]
pub type XorHashMap<K, V> = HashMap<K, V, XorHashBuilder>;

/// A [HashSet] that uses [XorHashBuilder] for hashing.
#[cfg(feature="std")]
pub type XorHashSet<T> = HashSet<T, XorHashBuilder>;

/// A [IndexMap] that uses [XorHashBuilder] for hashing.
#[cfg(feature="heapless")]
pub type XorIndexMap<K, V, const N: usize> = IndexMap<K, V, XorHashBuilder, N>;
/// A [IndexSet] that uses [XorHashBuilder] for hashing.
#[cfg(feature="heapless")]
pub type XorIndexSet<T, const N: usize> = IndexSet<T, XorHashBuilder, N>;

#[cfg(test)]
mod test {
    use super::*;

    #[test]
    fn test_xor_hash_basic() {
        let mut hasher = XorHasher::new(0);
        hasher.write_u64(0x0102030405060708);

        assert_eq!(hasher.finish(), 0x0102030405060708);

        hasher.write_u64(0x0102030405060708);

        assert_eq!(hasher.finish(), 0);
    }

    #[test]
    fn test_xor_hash_seeding() {
        let mut hasher = XorHasher::new(1);
        hasher.write_u64(1);

        assert_eq!(hasher.finish(), 0);
    }

    #[test]
    fn test_xor_hash_partial_len() {
        let mut hasher = XorHasher::new(0);
        hasher.write(&[1,2,3,4,5]);
        let expected = u64::from_ne_bytes([1,2,3,4,5,0,0,0]);

        assert_eq!(hasher.finish(), expected);
    }

    #[test]
    fn test_xor_hash_and_builder() {
        let hasher = XorHasher::new(6769);
        let hash_builder = XorHashBuilder::new(6769);

        // Consecutive calls must be equivalent
        assert_eq!(hash_builder.build_hasher(), hash_builder.build_hasher());

        assert_eq!(hasher, hash_builder.build_hasher());
    }

    #[cfg(feature="std")]
    #[test]
    fn test_hashset() {
        let hash_builder = XorHashBuilder::new(413);
        let mut hash_set = XorHashSet::with_hasher(hash_builder);

        hash_set.insert("cirno");
        hash_set.insert("fumo");
        hash_set.insert("(9)");
    }

    #[cfg(feature="std")]
    #[test]
    fn test_hashmap() {
        let hash_builder = XorHashBuilder::new(413);
        let mut hash_set = XorHashMap::with_hasher(hash_builder);

        hash_set.insert("cirno", 0);
        hash_set.insert("fumo", 1);
        hash_set.insert("(9)", 9);
    }
}