1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124
//! `MumHash`, Hashing functions and PRNGs based on them //! //! by Vladimir Makarov <vmakarov@gcc.gnu.org> //! //! https://github.com/vnmakarov/mum-hash //! //! * MUM hash is a **fast non-cryptographic hash function** //! suitable for different hash table implementations //! * MUM means **MU**ltiply and **M**ix //! * It is a name of the base transformation on which hashing is implemented //! * Modern processors have a fast logic to do long number multiplications //! * It is very attractable to use it for fast hashing //! * For example, 64x64-bit multiplication can do the same work as 32 //! shifts and additions //! * I'd like to call it Multiply and Reduce. Unfortunately, MUR //! (Multiply and Rotate) is already taken for famous hashing //! technique designed by Austin Appleby //! * I've chosen the name also as I am releasing it on Mother's day //! * MUM hash passes **all** [`SMHasher`](https://github.com/aappleby/smhasher) tests //! * For comparison, only 4 out of 15 non-cryptographic hash functions //! in `SMHasher` passes the tests, e.g. well known FNV, Murmur2, //! Lookup, and Superfast hashes fail the tests //! * MUM algorithm is **simpler** than City64 and Spooky ones //! * MUM is specifically **designed for 64-bit CPUs** (Sorry, I did not want to //! spend my time on dying architectures) //! * Still MUM will work for 32-bit CPUs and it will be sometimes //! faster Spooky and City //! * On x86-64 MUM hash is **faster** than City64 and Spooky on all tests except for one //! test for the bulky speed //! * Starting with 240-byte strings, City uses Intel SSE4.2 crc32 instruction //! * I could use the same instruction but I don't want to complicate the algorithm //! * In typical scenario, such long strings are rare. Usually another //! interface (see `mum_hash_step`) is used for hashing big data structures //! * MUM has a **fast startup**. It is particular good to hash small keys //! which are a majority of hash table applications //! //! # Example //! //! ``` //! use std::hash::{Hash, Hasher}; //! //! use fasthash::{mum, MumHasher}; //! //! fn hash<T: Hash>(t: &T) -> u64 { //! let mut s: MumHasher = Default::default(); //! t.hash(&mut s); //! s.finish() //! } //! //! let h = mum::hash64(b"hello world\xff"); //! //! assert_eq!(h, hash(&"hello world")); //! ``` //! #![allow(non_camel_case_types)] use std::os::raw::c_void; use ffi; use hasher::FastHash; /// `MumHash` 64-bit hash functions /// /// # Example /// /// ``` /// use fasthash::{mum::Hash64, FastHash}; /// /// assert_eq!(Hash64::hash(b"hello"), 9723359729180093834); /// assert_eq!(Hash64::hash_with_seed(b"hello", 123), 12693953100868515521); /// assert_eq!(Hash64::hash(b"helloworld"), 9122204010978352975); /// ``` pub struct Hash64; impl FastHash for Hash64 { type Hash = u64; type Seed = u64; #[inline(always)] fn hash_with_seed<T: AsRef<[u8]>>(bytes: T, seed: u64) -> u64 { unsafe { ffi::mum_hash_( bytes.as_ref().as_ptr() as *const c_void, bytes.as_ref().len(), seed, ) } } } impl_hasher!( #[doc = r#" # Example ``` use std::hash::Hasher; use fasthash::{mum::Hasher64, FastHasher}; let mut h = Hasher64::new(); h.write(b"hello"); assert_eq!(h.finish(), 9723359729180093834); h.write(b"world"); assert_eq!(h.finish(), 9122204010978352975); ``` "#] Hasher64, Hash64 ); /// `MumHash` 64-bit hash functions for a byte array. #[inline(always)] pub fn hash64<T: AsRef<[u8]>>(v: T) -> u64 { Hash64::hash(v) } /// `MumHash` 64-bit hash function for a byte array. /// For convenience, a 64-bit seed is also hashed into the result. #[inline(always)] pub fn hash64_with_seed<T: AsRef<[u8]>>(v: T, seed: u64) -> u64 { Hash64::hash_with_seed(v, seed) }