omango_wyhash/
lib.rs

1// Copyright (c) 2024 Trung Tran <tqtrungse@gmail.com>
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy
4// of this software and associated documentation files (the "Software"), to deal
5// in the Software without restriction, including without limitation the rights
6// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7// copies of the Software, and to permit persons to whom the Software is
8// furnished to do so, subject to the following conditions:
9//
10// The above copyright notice and this permission notice shall be included in all
11// copies or substantial portions of the Software.
12//
13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19// SOFTWARE.
20
21//! The implementation is based on the Wang Yi hash final version 4.2.
22//!
23//! Source: '<https://github.com/wangyi-fudan/wyhash/blob/master/wyhash.h>'
24
25use lazy_static::lazy_static;
26use omango_util::hint::{likely, unlikely};
27
28use crate::{
29    platform::{
30        mum,
31        rot::{r4, r8},
32    },
33    util::{is_prime, mix, r3},
34};
35
36mod platform;
37mod util;
38
39lazy_static! {
40    pub static ref SECRET: [u64; 4] = [
41        0x2d358dccaa6c78a5, 
42        0x8bb84b93962eacc9, 
43        0x4b33a62ed433d4a3, 
44        0x4d5a2da51de1aa47,
45    ];
46}
47
48#[inline(always)]
49pub fn hash(key: &[u8], seed: u64, secret: &[u64]) -> u64 {
50    let len = key.len();
51    let mut a: u64 = 0;
52    let mut b: u64 = 0;
53    let mut see = seed ^ mix(seed ^ secret[0], secret[1]);
54
55    if likely(len <= 16) {
56        if likely(len >= 4) {
57            a = (r4(key) << 32) | r4(&key[((len >> 3) << 2)..]);
58            b = (r4(&key[len - 4..]) << 32) | r4(&key[len - 4 - ((len >> 3) << 2)..]);
59        } else if likely(len > 0) {
60            a = r3(key);
61        }
62    } else {
63        let mut i = len;
64        let mut p = key;
65
66        if unlikely(i >= 48) {
67            let mut see1 = see;
68            let mut see2 = see;
69
70            loop {
71                see = mix(r8(p) ^ secret[1], r8(&p[8..]) ^ see);
72                see1 = mix(r8(&p[16..]) ^ secret[2], r8(&p[24..]) ^ see1);
73                see2 = mix(r8(&p[32..]) ^ secret[3], r8(&p[40..]) ^ see2);
74
75                p = &p[48..];
76                i -= 48;
77
78                if unlikely(i < 48) {
79                    break;
80                }
81            }
82            see ^= see1 ^ see2;
83        }
84
85        while unlikely(i > 16) {
86            see = mix(r8(p) ^ secret[1], r8(&p[8..]) ^ see);
87            p = &p[16..];
88            i -= 16;
89        }
90        a = r8(&key[len - i - 16..]);
91        b = r8(&key[len - i - 8..]);
92    }
93
94    a ^= secret[1];
95    b ^= see;
96    (a, b) = mum(a, b);
97
98    mix(a ^ secret[0] ^ (len as u64), b ^ secret[1])
99}
100
101/// Make your own secret.
102///
103/// Own secret must have the same length of default secret is 4.
104#[inline(always)]
105pub fn make_secret(seed: u64, out: &mut [u64]) {
106    let c: [u8; 70] = [
107        15, 23, 27, 29, 30, 39, 43,
108        45, 46, 51, 53, 54, 57, 58,
109        60, 71, 75, 77, 78, 83, 85,
110        86, 89, 90, 92, 99, 101, 102,
111        105, 106, 108, 113, 114, 116, 120,
112        135, 139, 141, 142, 147, 149, 150,
113        153, 154, 156, 163, 165, 166, 169,
114        170, 172, 177, 178, 180, 184, 195,
115        197, 198, 201, 202, 204, 209, 210,
116        212, 216, 225, 226, 228, 232, 240
117    ];
118    let mut see = seed;
119
120    for i in (0..4).step_by(1) {
121        let mut ok;
122        loop {
123            ok = true;
124            out[i] = 0;
125
126            for j in (0..64).step_by(8) {
127                out[i] |= (c[(unsafe { rand(&mut see) } % (std::mem::size_of_val(&c) as u64)) as usize] as u64) << j;
128            }
129
130            if out[i] % 2 == 0 {
131                continue;
132            }
133
134            for j in (0..i).step_by(1) {
135                if (out[j] ^ out[i]).count_ones() != 32 {
136                    ok = false;
137                    break;
138                }
139            }
140
141            if ok && !is_prime(out[i]) {
142                ok = false;
143            }
144
145            if ok {
146                break;
147            }
148        }
149    }
150}
151
152/// A useful 64bit-64bit mix function to produce deterministic pseudo random numbers 
153/// that can pass BigCrush and PractRand.
154#[inline(always)]
155pub fn hash64(a: u64, b: u64) -> u64 {
156    let (lo, hi) = mum(a ^ SECRET[0], b ^ SECRET[1]);
157    mix(lo ^ SECRET[0], hi ^ SECRET[1])
158}
159
160/// The rand PRNG that pass BigCrush and PractRand.
161/// # Safety
162///
163/// This function is unsafe because it operates on raw pointers and
164/// assumes that the caller guarantees the following:
165///
166/// - `ptr` must be a valid mutable pointer to a valid memory location.
167/// - The memory referenced by `ptr` must not be concurrently accessed
168///   or modified by any other code.
169/// - The caller must ensure that the pointer is not null.
170/// - The caller must ensure that the lifetime of the referenced data
171///   exceeds the lifetime of the pointer.
172///
173/// Violating these requirements can lead to undefined behavior, including
174/// memory unsafely, data races, and program crashes.
175///
176/// # Examples
177///
178/// ```
179/// use omango_wyhash::rand;
180///
181/// let mut data = 42u64;
182/// let ptr = &mut data as *mut u64;
183/// let result: u64;
184/// unsafe {
185///     result = rand(ptr);
186/// }
187/// ```
188#[inline(always)]
189pub unsafe fn rand(seed: *mut u64) -> u64 {
190    *seed += SECRET[0];
191    mix(*seed, (*seed) ^ SECRET[1])
192}
193
194/// Convert any 64-bit pseudo random numbers to uniform distribution [0,1). 
195/// It can be combined with [rand], [hash64] or [hash].
196#[inline(always)]
197pub fn to_u01(r: u64) -> f64 {
198    let norm = 1.0 / (((1u64) << 52) as f64);
199    ((r >> 12) as f64) * norm
200}
201
202/// Convert any 64-bit pseudo random numbers to APPROXIMATE Gaussian distribution. 
203/// It can be combined with [rand], [hash64] or [hash].
204#[inline(always)]
205pub fn to_gau(r: u64) -> f64 {
206    let norm = 1.0 / ((1 << 20) as f64);
207    (((r & 0x1fffff) + ((r >> 21) & 0x1fffff) + ((r >> 42) & 0x1fffff)) as f64) * norm - 3.0
208}
209
210/// Fast range integer random number generation on [0,k) credit to Daniel Lemire. 
211/// It can be combined with [rand], [hash64] or [hash].
212#[inline(always)]
213pub fn to_u0k(r: u64, k: u64) -> u64 {
214    let (_, hi) = mum(r, k);
215    hi
216}
217
218mod test {
219    #[test]
220    fn test_vector() {
221        let messages: [&str; 7] = [
222            "",
223            "a",
224            "abc",
225            "message digest",
226            "abcdefghijklmnopqrstuvwxyz",
227            "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789",
228            "12345678901234567890123456789012345678901234567890123456789012345678901234567890",
229        ];
230
231        let mut h: u64;
232        for i in (0..7).step_by(1) {
233            h = crate::hash(messages[i].as_bytes(), i as u64, crate::SECRET.as_slice());
234            println!("{}-{}", h, messages[i]);
235        }
236    }
237}