edit/
hash.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! Provides fast, non-cryptographic hash functions.
5
6/// The venerable wyhash hash function.
7///
8/// It's fast, has good statistical properties, and is in the public domain.
9/// See: <https://github.com/wangyi-fudan/wyhash>
10/// If you visit the link, you'll find that it was superseded by "rapidhash",
11/// but that's not particularly interesting for this project. rapidhash results
12/// in way larger assembly and isn't faster when hashing small amounts of data.
13pub fn hash(mut seed: u64, data: &[u8]) -> u64 {
14    unsafe {
15        const S0: u64 = 0xa0761d6478bd642f;
16        const S1: u64 = 0xe7037ed1a0b428db;
17        const S2: u64 = 0x8ebc6af09c88c6e3;
18        const S3: u64 = 0x589965cc75374cc3;
19
20        let len = data.len();
21        let mut p = data.as_ptr();
22        let a;
23        let b;
24
25        seed ^= S0;
26
27        if len <= 16 {
28            if len >= 4 {
29                a = (wyr4(p) << 32) | wyr4(p.add((len >> 3) << 2));
30                b = (wyr4(p.add(len - 4)) << 32) | wyr4(p.add(len - 4 - ((len >> 3) << 2)));
31            } else if len > 0 {
32                a = wyr3(p, len);
33                b = 0;
34            } else {
35                a = 0;
36                b = 0;
37            }
38        } else {
39            let mut i = len;
40            if i > 48 {
41                let mut seed1 = seed;
42                let mut seed2 = seed;
43                while {
44                    seed = wymix(wyr8(p) ^ S1, wyr8(p.add(8)) ^ seed);
45                    seed1 = wymix(wyr8(p.add(16)) ^ S2, wyr8(p.add(24)) ^ seed1);
46                    seed2 = wymix(wyr8(p.add(32)) ^ S3, wyr8(p.add(40)) ^ seed2);
47                    p = p.add(48);
48                    i -= 48;
49                    i > 48
50                } {}
51                seed ^= seed1 ^ seed2;
52            }
53            while i > 16 {
54                seed = wymix(wyr8(p) ^ S1, wyr8(p.add(8)) ^ seed);
55                i -= 16;
56                p = p.add(16);
57            }
58            a = wyr8(p.offset(i as isize - 16));
59            b = wyr8(p.offset(i as isize - 8));
60        }
61
62        wymix(S1 ^ (len as u64), wymix(a ^ S1, b ^ seed))
63    }
64}
65
66unsafe fn wyr3(p: *const u8, k: usize) -> u64 {
67    let p0 = unsafe { p.read() as u64 };
68    let p1 = unsafe { p.add(k >> 1).read() as u64 };
69    let p2 = unsafe { p.add(k - 1).read() as u64 };
70    (p0 << 16) | (p1 << 8) | p2
71}
72
73unsafe fn wyr4(p: *const u8) -> u64 {
74    unsafe { (p as *const u32).read_unaligned() as u64 }
75}
76
77unsafe fn wyr8(p: *const u8) -> u64 {
78    unsafe { (p as *const u64).read_unaligned() }
79}
80
81// This is a weak mix function on its own. It may be worth considering
82// replacing external uses of this function with a stronger one.
83// On the other hand, it's very fast.
84pub fn wymix(lhs: u64, rhs: u64) -> u64 {
85    let lhs = lhs as u128;
86    let rhs = rhs as u128;
87    let r = lhs * rhs;
88    (r >> 64) as u64 ^ (r as u64)
89}
90
91pub fn hash_str(seed: u64, s: &str) -> u64 {
92    hash(seed, s.as_bytes())
93}