omango_wyhash/lib.rs
1// Copyright (c) 2024 Trung Tran <tqtrungse@gmail.com>
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy
4// of this software and associated documentation files (the "Software"), to deal
5// in the Software without restriction, including without limitation the rights
6// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7// copies of the Software, and to permit persons to whom the Software is
8// furnished to do so, subject to the following conditions:
9//
10// The above copyright notice and this permission notice shall be included in all
11// copies or substantial portions of the Software.
12//
13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19// SOFTWARE.
20
21//! The implementation is based on the Wang Yi hash final version 4.2.
22//!
23//! Source: '<https://github.com/wangyi-fudan/wyhash/blob/master/wyhash.h>'
24
25use lazy_static::lazy_static;
26use omango_util::hint::{likely, unlikely};
27
28use crate::{
29 platform::{
30 mum,
31 rot::{r4, r8},
32 },
33 util::{is_prime, mix, r3},
34};
35
36mod platform;
37mod util;
38
39lazy_static! {
40 pub static ref SECRET: [u64; 4] = [
41 0x2d358dccaa6c78a5,
42 0x8bb84b93962eacc9,
43 0x4b33a62ed433d4a3,
44 0x4d5a2da51de1aa47,
45 ];
46}
47
48#[inline(always)]
49pub fn hash(key: &[u8], seed: u64, secret: &[u64]) -> u64 {
50 let len = key.len();
51 let mut a: u64 = 0;
52 let mut b: u64 = 0;
53 let mut see = seed ^ mix(seed ^ secret[0], secret[1]);
54
55 if likely(len <= 16) {
56 if likely(len >= 4) {
57 a = (r4(key) << 32) | r4(&key[((len >> 3) << 2)..]);
58 b = (r4(&key[len - 4..]) << 32) | r4(&key[len - 4 - ((len >> 3) << 2)..]);
59 } else if likely(len > 0) {
60 a = r3(key);
61 }
62 } else {
63 let mut i = len;
64 let mut p = key;
65
66 if unlikely(i >= 48) {
67 let mut see1 = see;
68 let mut see2 = see;
69
70 loop {
71 see = mix(r8(p) ^ secret[1], r8(&p[8..]) ^ see);
72 see1 = mix(r8(&p[16..]) ^ secret[2], r8(&p[24..]) ^ see1);
73 see2 = mix(r8(&p[32..]) ^ secret[3], r8(&p[40..]) ^ see2);
74
75 p = &p[48..];
76 i -= 48;
77
78 if unlikely(i < 48) {
79 break;
80 }
81 }
82 see ^= see1 ^ see2;
83 }
84
85 while unlikely(i > 16) {
86 see = mix(r8(p) ^ secret[1], r8(&p[8..]) ^ see);
87 p = &p[16..];
88 i -= 16;
89 }
90 a = r8(&key[len - i - 16..]);
91 b = r8(&key[len - i - 8..]);
92 }
93
94 a ^= secret[1];
95 b ^= see;
96 (a, b) = mum(a, b);
97
98 mix(a ^ secret[0] ^ (len as u64), b ^ secret[1])
99}
100
101/// Make your own secret.
102///
103/// Own secret must have the same length of default secret is 4.
104#[inline(always)]
105pub fn make_secret(seed: u64, out: &mut [u64]) {
106 let c: [u8; 70] = [
107 15, 23, 27, 29, 30, 39, 43,
108 45, 46, 51, 53, 54, 57, 58,
109 60, 71, 75, 77, 78, 83, 85,
110 86, 89, 90, 92, 99, 101, 102,
111 105, 106, 108, 113, 114, 116, 120,
112 135, 139, 141, 142, 147, 149, 150,
113 153, 154, 156, 163, 165, 166, 169,
114 170, 172, 177, 178, 180, 184, 195,
115 197, 198, 201, 202, 204, 209, 210,
116 212, 216, 225, 226, 228, 232, 240
117 ];
118 let mut see = seed;
119
120 for i in (0..4).step_by(1) {
121 let mut ok;
122 loop {
123 ok = true;
124 out[i] = 0;
125
126 for j in (0..64).step_by(8) {
127 out[i] |= (c[(unsafe { rand(&mut see) } % (std::mem::size_of_val(&c) as u64)) as usize] as u64) << j;
128 }
129
130 if out[i] % 2 == 0 {
131 continue;
132 }
133
134 for j in (0..i).step_by(1) {
135 if (out[j] ^ out[i]).count_ones() != 32 {
136 ok = false;
137 break;
138 }
139 }
140
141 if ok && !is_prime(out[i]) {
142 ok = false;
143 }
144
145 if ok {
146 break;
147 }
148 }
149 }
150}
151
152/// A useful 64bit-64bit mix function to produce deterministic pseudo random numbers
153/// that can pass BigCrush and PractRand.
154#[inline(always)]
155pub fn hash64(a: u64, b: u64) -> u64 {
156 let (lo, hi) = mum(a ^ SECRET[0], b ^ SECRET[1]);
157 mix(lo ^ SECRET[0], hi ^ SECRET[1])
158}
159
160/// The rand PRNG that pass BigCrush and PractRand.
161/// # Safety
162///
163/// This function is unsafe because it operates on raw pointers and
164/// assumes that the caller guarantees the following:
165///
166/// - `ptr` must be a valid mutable pointer to a valid memory location.
167/// - The memory referenced by `ptr` must not be concurrently accessed
168/// or modified by any other code.
169/// - The caller must ensure that the pointer is not null.
170/// - The caller must ensure that the lifetime of the referenced data
171/// exceeds the lifetime of the pointer.
172///
173/// Violating these requirements can lead to undefined behavior, including
174/// memory unsafely, data races, and program crashes.
175///
176/// # Examples
177///
178/// ```
179/// use omango_wyhash::rand;
180///
181/// let mut data = 42u64;
182/// let ptr = &mut data as *mut u64;
183/// let result: u64;
184/// unsafe {
185/// result = rand(ptr);
186/// }
187/// ```
188#[inline(always)]
189pub unsafe fn rand(seed: *mut u64) -> u64 {
190 *seed += SECRET[0];
191 mix(*seed, (*seed) ^ SECRET[1])
192}
193
194/// Convert any 64-bit pseudo random numbers to uniform distribution [0,1).
195/// It can be combined with [rand], [hash64] or [hash].
196#[inline(always)]
197pub fn to_u01(r: u64) -> f64 {
198 let norm = 1.0 / (((1u64) << 52) as f64);
199 ((r >> 12) as f64) * norm
200}
201
202/// Convert any 64-bit pseudo random numbers to APPROXIMATE Gaussian distribution.
203/// It can be combined with [rand], [hash64] or [hash].
204#[inline(always)]
205pub fn to_gau(r: u64) -> f64 {
206 let norm = 1.0 / ((1 << 20) as f64);
207 (((r & 0x1fffff) + ((r >> 21) & 0x1fffff) + ((r >> 42) & 0x1fffff)) as f64) * norm - 3.0
208}
209
210/// Fast range integer random number generation on [0,k) credit to Daniel Lemire.
211/// It can be combined with [rand], [hash64] or [hash].
212#[inline(always)]
213pub fn to_u0k(r: u64, k: u64) -> u64 {
214 let (_, hi) = mum(r, k);
215 hi
216}
217
218mod test {
219 #[test]
220 fn test_vector() {
221 let messages: [&str; 7] = [
222 "",
223 "a",
224 "abc",
225 "message digest",
226 "abcdefghijklmnopqrstuvwxyz",
227 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789",
228 "12345678901234567890123456789012345678901234567890123456789012345678901234567890",
229 ];
230
231 let mut h: u64;
232 for i in (0..7).step_by(1) {
233 h = crate::hash(messages[i].as_bytes(), i as u64, crate::SECRET.as_slice());
234 println!("{}-{}", h, messages[i]);
235 }
236 }
237}