fasthash_fork/
mum.rs

1//! `MumHash`, Hashing functions and PRNGs based on them
2//!
3//! by Vladimir Makarov <vmakarov@gcc.gnu.org>
4//!
5//! https://github.com/vnmakarov/mum-hash
6//!
7//! * MUM hash is a **fast non-cryptographic hash function**
8//!   suitable for different hash table implementations
9//! * MUM means **MU**ltiply and **M**ix
10//!   * It is a name of the base transformation on which hashing is implemented
11//!   * Modern processors have a fast logic to do long number multiplications
12//!   * It is very attractable to use it for fast hashing
13//!     * For example, 64x64-bit multiplication can do the same work as 32
14//!       shifts and additions
15//!   * I'd like to call it Multiply and Reduce.  Unfortunately, MUR
16//!     (Multiply and Rotate) is already taken for famous hashing
17//!     technique designed by Austin Appleby
18//!   * I've chosen the name also as I am releasing it on Mother's day
19//! * MUM hash passes **all** [`SMHasher`](https://github.com/aappleby/smhasher) tests
20//!   * For comparison, only 4 out of 15 non-cryptographic hash functions
21//!     in `SMHasher` passes the tests, e.g. well known FNV, Murmur2,
22//!     Lookup, and Superfast hashes fail the tests
23//! * MUM algorithm is **simpler** than City64 and Spooky ones
24//! * MUM is specifically **designed for 64-bit CPUs** (Sorry, I did not want to
25//!   spend my time on dying architectures)
26//!   * Still MUM will work for 32-bit CPUs and it will be sometimes
27//!     faster Spooky and City
28//! * On x86-64 MUM hash is **faster** than City64 and Spooky on all tests except for one
29//!   test for the bulky speed
30//!   * Starting with 240-byte strings, City uses Intel SSE4.2 crc32 instruction
31//!   * I could use the same instruction but I don't want to complicate the algorithm
32//!   * In typical scenario, such long strings are rare.  Usually another
33//!     interface (see `mum_hash_step`) is used for hashing big data structures
34//! * MUM has a **fast startup**.  It is particular good to hash small keys
35//!   which are a majority of hash table applications
36//!
37//! # Example
38//!
39//! ```
40//! use std::hash::{Hash, Hasher};
41//!
42//! use fasthash::{mum, MumHasher};
43//!
44//! fn hash<T: Hash>(t: &T) -> u64 {
45//!     let mut s: MumHasher = Default::default();
46//!     t.hash(&mut s);
47//!     s.finish()
48//! }
49//!
50//! let h = mum::hash64(b"hello world\xff");
51//!
52//! assert_eq!(h, hash(&"hello world"));
53//! ```
54//!
55#![allow(non_camel_case_types)]
56use std::os::raw::c_void;
57
58use crate::ffi;
59
60use crate::hasher::FastHash;
61
62/// `MumHash` 64-bit hash functions
63///
64/// # Example
65///
66/// ```
67/// use fasthash::{mum::Hash64, FastHash};
68///
69/// assert_eq!(Hash64::hash(b"hello"), 9723359729180093834);
70/// assert_eq!(Hash64::hash_with_seed(b"hello", 123), 12693953100868515521);
71/// assert_eq!(Hash64::hash(b"helloworld"), 9122204010978352975);
72/// ```
73#[derive(Clone, Default)]
74pub struct Hash64;
75
76impl FastHash for Hash64 {
77    type Hash = u64;
78    type Seed = u64;
79
80    #[inline(always)]
81    fn hash_with_seed<T: AsRef<[u8]>>(bytes: T, seed: u64) -> u64 {
82        unsafe {
83            ffi::mum_hash_(
84                bytes.as_ref().as_ptr() as *const c_void,
85                bytes.as_ref().len(),
86                seed,
87            )
88        }
89    }
90}
91
92trivial_hasher! {
93    /// # Example
94    ///
95    /// ```
96    /// use std::hash::Hasher;
97    ///
98    /// use fasthash::{mum::Hasher64, FastHasher};
99    ///
100    /// let mut h = Hasher64::new();
101    ///
102    /// h.write(b"hello");
103    /// assert_eq!(h.finish(), 9723359729180093834);
104    ///
105    /// h.write(b"world");
106    /// assert_eq!(h.finish(), 9122204010978352975);
107    /// ```
108    Hasher64(Hash64) -> u64
109}
110
111/// `MumHash` 64-bit hash functions for a byte array.
112#[inline(always)]
113pub fn hash64<T: AsRef<[u8]>>(v: T) -> u64 {
114    Hash64::hash(v)
115}
116
117/// `MumHash` 64-bit hash function for a byte array.
118/// For convenience, a 64-bit seed is also hashed into the result.
119#[inline(always)]
120pub fn hash64_with_seed<T: AsRef<[u8]>>(v: T, seed: u64) -> u64 {
121    Hash64::hash_with_seed(v, seed)
122}