1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
//! `MumHash`, Hashing functions and PRNGs based on them
//!
//! by Vladimir Makarov <vmakarov@gcc.gnu.org>
//!
//! https://github.com/vnmakarov/mum-hash
//!
//! * MUM hash is a **fast non-cryptographic hash function**
//!   suitable for different hash table implementations
//! * MUM means **MU**ltiply and **M**ix
//!   * It is a name of the base transformation on which hashing is implemented
//!   * Modern processors have a fast logic to do long number multiplications
//!   * It is very attractable to use it for fast hashing
//!     * For example, 64x64-bit multiplication can do the same work as 32
//!       shifts and additions
//!   * I'd like to call it Multiply and Reduce.  Unfortunately, MUR
//!     (Multiply and Rotate) is already taken for famous hashing
//!     technique designed by Austin Appleby
//!   * I've chosen the name also as I am releasing it on Mother's day
//! * MUM hash passes **all** [`SMHasher`](https://github.com/aappleby/smhasher) tests
//!   * For comparison, only 4 out of 15 non-cryptographic hash functions
//!     in `SMHasher` passes the tests, e.g. well known FNV, Murmur2,
//!     Lookup, and Superfast hashes fail the tests
//! * MUM algorithm is **simpler** than City64 and Spooky ones
//! * MUM is specifically **designed for 64-bit CPUs** (Sorry, I did not want to
//!   spend my time on dying architectures)
//!   * Still MUM will work for 32-bit CPUs and it will be sometimes
//!     faster Spooky and City
//! * On x86-64 MUM hash is **faster** than City64 and Spooky on all tests except for one
//!   test for the bulky speed
//!   * Starting with 240-byte strings, City uses Intel SSE4.2 crc32 instruction
//!   * I could use the same instruction but I don't want to complicate the algorithm
//!   * In typical scenario, such long strings are rare.  Usually another
//!     interface (see `mum_hash_step`) is used for hashing big data structures
//! * MUM has a **fast startup**.  It is particular good to hash small keys
//!   which are a majority of hash table applications
//!
//! # Example
//!
//! ```
//! use std::hash::{Hash, Hasher};
//!
//! use fasthash::{mum, MumHasher};
//!
//! fn hash<T: Hash>(t: &T) -> u64 {
//!     let mut s: MumHasher = Default::default();
//!     t.hash(&mut s);
//!     s.finish()
//! }
//!
//! let h = mum::hash64(b"hello world\xff");
//!
//! assert_eq!(h, hash(&"hello world"));
//! ```
//!
#![allow(non_camel_case_types)]
use std::os::raw::c_void;

use ffi;

use hasher::{FastHash, FastHasher};

/// `MumHash` 64-bit hash functions
pub struct MumHash {}

impl FastHash for MumHash {
    type Value = u64;
    type Seed = u64;

    #[inline]
    fn hash_with_seed<T: AsRef<[u8]>>(bytes: &T, seed: u64) -> u64 {
        unsafe {
            ffi::mum_hash_(bytes.as_ref().as_ptr() as *const c_void,
                           bytes.as_ref().len(),
                           seed)
        }
    }
}

impl_hasher!(MumHasher, MumHash);

/// `MumHash` 64-bit hash functions for a byte array.
#[inline]
pub fn hash64<T: AsRef<[u8]>>(v: &T) -> u64 {
    MumHash::hash(v)
}

/// `MumHash` 64-bit hash function for a byte array.
/// For convenience, a 64-bit seed is also hashed into the result.
#[inline]
pub fn hash64_with_seed<T: AsRef<[u8]>>(v: &T, seed: u64) -> u64 {
    MumHash::hash_with_seed(v, seed)
}

#[cfg(test)]
mod tests {
    use std::hash::Hasher;

    use hasher::{FastHash, FastHasher};
    use super::*;

    #[test]
    fn test_mum64() {
        assert_eq!(MumHash::hash(b"hello"), 9723359729180093834);
        assert_eq!(MumHash::hash_with_seed(b"hello", 123), 12693953100868515521);
        assert_eq!(MumHash::hash(b"helloworld"), 9122204010978352975);

        let mut h = MumHasher::new();

        h.write(b"hello");
        assert_eq!(h.finish(), 9723359729180093834);

        h.write(b"world");
        assert_eq!(h.finish(), 9122204010978352975);
    }
}