fasthash_fork/mum.rs
1//! `MumHash`, Hashing functions and PRNGs based on them
2//!
3//! by Vladimir Makarov <vmakarov@gcc.gnu.org>
4//!
5//! https://github.com/vnmakarov/mum-hash
6//!
7//! * MUM hash is a **fast non-cryptographic hash function**
8//! suitable for different hash table implementations
9//! * MUM means **MU**ltiply and **M**ix
10//! * It is a name of the base transformation on which hashing is implemented
11//! * Modern processors have a fast logic to do long number multiplications
12//! * It is very attractable to use it for fast hashing
13//! * For example, 64x64-bit multiplication can do the same work as 32
14//! shifts and additions
15//! * I'd like to call it Multiply and Reduce. Unfortunately, MUR
16//! (Multiply and Rotate) is already taken for famous hashing
17//! technique designed by Austin Appleby
18//! * I've chosen the name also as I am releasing it on Mother's day
19//! * MUM hash passes **all** [`SMHasher`](https://github.com/aappleby/smhasher) tests
20//! * For comparison, only 4 out of 15 non-cryptographic hash functions
21//! in `SMHasher` passes the tests, e.g. well known FNV, Murmur2,
22//! Lookup, and Superfast hashes fail the tests
23//! * MUM algorithm is **simpler** than City64 and Spooky ones
24//! * MUM is specifically **designed for 64-bit CPUs** (Sorry, I did not want to
25//! spend my time on dying architectures)
26//! * Still MUM will work for 32-bit CPUs and it will be sometimes
27//! faster Spooky and City
28//! * On x86-64 MUM hash is **faster** than City64 and Spooky on all tests except for one
29//! test for the bulky speed
30//! * Starting with 240-byte strings, City uses Intel SSE4.2 crc32 instruction
31//! * I could use the same instruction but I don't want to complicate the algorithm
32//! * In typical scenario, such long strings are rare. Usually another
33//! interface (see `mum_hash_step`) is used for hashing big data structures
34//! * MUM has a **fast startup**. It is particular good to hash small keys
35//! which are a majority of hash table applications
36//!
37//! # Example
38//!
39//! ```
40//! use std::hash::{Hash, Hasher};
41//!
42//! use fasthash::{mum, MumHasher};
43//!
44//! fn hash<T: Hash>(t: &T) -> u64 {
45//! let mut s: MumHasher = Default::default();
46//! t.hash(&mut s);
47//! s.finish()
48//! }
49//!
50//! let h = mum::hash64(b"hello world\xff");
51//!
52//! assert_eq!(h, hash(&"hello world"));
53//! ```
54//!
55#![allow(non_camel_case_types)]
56use std::os::raw::c_void;
57
58use crate::ffi;
59
60use crate::hasher::FastHash;
61
62/// `MumHash` 64-bit hash functions
63///
64/// # Example
65///
66/// ```
67/// use fasthash::{mum::Hash64, FastHash};
68///
69/// assert_eq!(Hash64::hash(b"hello"), 9723359729180093834);
70/// assert_eq!(Hash64::hash_with_seed(b"hello", 123), 12693953100868515521);
71/// assert_eq!(Hash64::hash(b"helloworld"), 9122204010978352975);
72/// ```
73#[derive(Clone, Default)]
74pub struct Hash64;
75
76impl FastHash for Hash64 {
77 type Hash = u64;
78 type Seed = u64;
79
80 #[inline(always)]
81 fn hash_with_seed<T: AsRef<[u8]>>(bytes: T, seed: u64) -> u64 {
82 unsafe {
83 ffi::mum_hash_(
84 bytes.as_ref().as_ptr() as *const c_void,
85 bytes.as_ref().len(),
86 seed,
87 )
88 }
89 }
90}
91
92trivial_hasher! {
93 /// # Example
94 ///
95 /// ```
96 /// use std::hash::Hasher;
97 ///
98 /// use fasthash::{mum::Hasher64, FastHasher};
99 ///
100 /// let mut h = Hasher64::new();
101 ///
102 /// h.write(b"hello");
103 /// assert_eq!(h.finish(), 9723359729180093834);
104 ///
105 /// h.write(b"world");
106 /// assert_eq!(h.finish(), 9122204010978352975);
107 /// ```
108 Hasher64(Hash64) -> u64
109}
110
111/// `MumHash` 64-bit hash functions for a byte array.
112#[inline(always)]
113pub fn hash64<T: AsRef<[u8]>>(v: T) -> u64 {
114 Hash64::hash(v)
115}
116
117/// `MumHash` 64-bit hash function for a byte array.
118/// For convenience, a 64-bit seed is also hashed into the result.
119#[inline(always)]
120pub fn hash64_with_seed<T: AsRef<[u8]>>(v: T, seed: u64) -> u64 {
121 Hash64::hash_with_seed(v, seed)
122}