naive_cityhash/
lib.rs

1#![no_std]
2#![allow(clippy::many_single_char_names)]
3
4use core::mem::size_of;
5use core::ptr::read_unaligned;
6
7#[derive(Clone, Copy, Debug, PartialEq, Eq)]
8pub struct U128 {
9    pub lo: u64,
10    pub hi: u64,
11}
12
13impl U128 {
14    #[inline]
15    pub const fn new(lo: u64, hi: u64) -> Self {
16        Self { lo, hi }
17    }
18
19    #[inline]
20    pub const fn lo(&self) -> u64 {
21        self.lo
22    }
23
24    #[inline]
25    pub const fn hi(&self) -> u64 {
26        self.hi
27    }
28}
29
30impl From<u128> for U128 {
31    fn from(source: u128) -> Self {
32        Self {
33            lo: source as u64,
34            hi: (source >> 64) as u64,
35        }
36    }
37}
38
39impl From<U128> for u128 {
40    fn from(val: U128) -> Self {
41        (val.lo as u128) | ((val.hi as u128) << 64)
42    }
43}
44
45const K0: u64 = 0xc3a5c85c97cb3127u64;
46const K1: u64 = 0xb492b66fbe98f273u64;
47const K2: u64 = 0x9ae16a3b2f90404fu64;
48const K3: u64 = 0xc949d7c7509e6557u64;
49
50#[cfg(target_endian = "little")]
51#[inline]
52pub fn fetch64(data: &[u8]) -> u64 {
53    debug_assert!(data.len() >= size_of::<u64>());
54    let ptr = data.as_ptr() as *const u64;
55    unsafe { read_unaligned(ptr) }
56}
57
58#[cfg(target_endian = "little")]
59#[inline]
60fn fetch32(data: &[u8]) -> u32 {
61    debug_assert!(data.len() >= size_of::<u32>());
62    let ptr = data.as_ptr() as *const u32;
63    unsafe { read_unaligned(ptr) }
64}
65
66#[cfg(not(target_endian = "little"))]
67#[inline]
68fn fetch64(data: &[u8]) -> u64 {
69    debug_assert!(data.len() >= mem::size_of::<u64>());
70    let ptr = data.as_ptr() as *const u64;
71    let data = unsafe { read_unaligned(ptr) };
72    data.swap_bytes()
73}
74
75#[cfg(not(target_endian = "little"))]
76#[inline]
77fn fetch32(data: &[u8]) -> u32 {
78    debug_assert!(data.len() >= size_of::<u32>());
79    let ptr = data.as_ptr() as *const u32;
80    let data = unsafe { read_unaligned(ptr) };
81    data.swap_bytes()
82}
83
84// rotate, but `shift` must not be eq 0
85#[inline(always)]
86fn rotate_least(val: u64, shift: u64) -> u64 {
87    (val >> shift) | (val << (64 - shift))
88}
89
90#[inline(always)]
91fn shift_mix(val: u64) -> u64 {
92    val ^ (val >> 47)
93}
94
95fn hash_len16(u: u64, v: u64) -> u64 {
96    hash128_to_64(u, v)
97}
98
99#[inline(always)]
100fn hash128_to_64(l: u64, h: u64) -> u64 {
101    const K_MUL: u64 = 0x9ddfea08eb382d69u64;
102    let mut a = (h ^ l).wrapping_mul(K_MUL);
103    a ^= a >> 47;
104    let mut b = (h ^ a).wrapping_mul(K_MUL);
105    b ^= b >> 47;
106    b.wrapping_mul(K_MUL)
107}
108
109fn hash_len0to16(data: &[u8]) -> u64 {
110    if data.len() > 8 {
111        let a = fetch64(data);
112        let b = fetch64(&data[data.len() - 8..]);
113        b ^ hash_len16(
114            a,
115            rotate_least(b.wrapping_add(data.len() as u64), data.len() as u64),
116        )
117    } else if data.len() >= 4 {
118        let a = fetch32(data) as u64;
119
120        hash_len16(
121            (a << 3).wrapping_add(data.len() as u64),
122            fetch32(&data[data.len() - 4..]) as u64,
123        )
124    } else if !data.is_empty() {
125        let a: u8 = data[0];
126        let b: u8 = data[data.len() >> 1];
127        let c: u8 = data[data.len() - 1];
128        let y = (a as u64).wrapping_add((b as u64) << 8);
129        let z = (data.len() as u64).wrapping_add((c as u64) << 2);
130
131        shift_mix(y.wrapping_mul(K2) ^ z.wrapping_mul(K3)).wrapping_mul(K2)
132    } else {
133        K2
134    }
135}
136
137fn hash_len17to32(data: &[u8]) -> u64 {
138    debug_assert!(data.len() > 16);
139
140    let a = fetch64(data).wrapping_mul(K1);
141    let b = fetch64(&data[8..]);
142    let c = fetch64(&data[data.len() - 8..]).wrapping_mul(K2);
143    let d = fetch64(&data[data.len() - 16..]).wrapping_mul(K0);
144    hash_len16(
145        rotate_least(a.wrapping_sub(b), 43)
146            .wrapping_add(rotate_least(c, 30))
147            .wrapping_add(d),
148        a.wrapping_add(rotate_least(b ^ K3, 20))
149            .wrapping_sub(c)
150            .wrapping_add(data.len() as u64),
151    )
152}
153
154fn hash_len33to64(data: &[u8]) -> u64 {
155    debug_assert!(data.len() > 32);
156
157    let mut z = fetch64(&data[24..]);
158    let mut a = fetch64(data).wrapping_add(
159        K0.wrapping_mul((data.len() as u64).wrapping_add(fetch64(&data[data.len() - 16..]))),
160    );
161    let mut b = rotate_least(a.wrapping_add(z), 52);
162    let mut c = rotate_least(a, 37);
163    a = fetch64(&data[8..]).wrapping_add(a);
164    c = rotate_least(a, 7).wrapping_add(c);
165    a = fetch64(&data[16..]).wrapping_add(a);
166
167    let vf = a.wrapping_add(z);
168    let vs = b.wrapping_add(rotate_least(a, 31)).wrapping_add(c);
169    a = fetch64(&data[16..]) + fetch64(&data[data.len() - 32..]);
170    z = fetch64(&data[data.len() - 8..]);
171    b = rotate_least(a.wrapping_add(z), 52);
172    c = rotate_least(a, 37);
173    a = fetch64(&data[data.len() - 24..]).wrapping_add(a);
174    c = rotate_least(a, 7).wrapping_add(c);
175    a = fetch64(&data[data.len() - 16..]).wrapping_add(a);
176    let wf = a.wrapping_add(z);
177    let ws = b.wrapping_add(rotate_least(a, 31)).wrapping_add(c);
178    let r = shift_mix(
179        K2.wrapping_mul(vf.wrapping_add(ws))
180            .wrapping_add(K0.wrapping_mul(wf.wrapping_add(vs))),
181    );
182    shift_mix(vs.wrapping_add(r.wrapping_mul(K0))).wrapping_mul(K2)
183}
184
185fn weak_hash_len32_with_seeds(data: &[u8], a: u64, b: u64) -> (u64, u64) {
186    _weak_hash_len32_with_seeds(
187        fetch64(data),
188        fetch64(&data[8..]),
189        fetch64(&data[16..]),
190        fetch64(&data[24..]),
191        a,
192        b,
193    )
194}
195
196#[inline(always)]
197fn _weak_hash_len32_with_seeds(
198    w: u64,
199    x: u64,
200    y: u64,
201    z: u64,
202    mut a: u64,
203    mut b: u64,
204) -> (u64, u64) {
205    a = a.wrapping_add(w);
206    b = rotate_least(b.wrapping_add(a).wrapping_add(z), 21);
207    let c = a;
208    a = a.wrapping_add(x).wrapping_add(y);
209    b = b.wrapping_add(rotate_least(a, 44));
210    (a.wrapping_add(z), b.wrapping_add(c))
211}
212
213fn city_murmur(mut data: &[u8], seed: U128) -> U128 {
214    let mut a = seed.lo;
215    let mut b = seed.hi;
216    let mut c: u64;
217    let mut d: u64;
218
219    if data.len() <= 16 {
220        a = shift_mix(a.wrapping_mul(K1)).wrapping_mul(K1);
221        c = b.wrapping_mul(K1).wrapping_add(hash_len0to16(data));
222        d = if data.len() >= 8 { fetch64(data) } else { c };
223        d = shift_mix(a.wrapping_add(d));
224    } else {
225        c = hash_len16(fetch64(&data[data.len() - 8..]).wrapping_add(K1), a);
226        d = hash_len16(
227            b.wrapping_add(data.len() as u64),
228            c.wrapping_add(fetch64(&data[data.len() - 16..])),
229        );
230        a = a.wrapping_add(d);
231        loop {
232            a ^= shift_mix(fetch64(data).wrapping_mul(K1)).wrapping_mul(K1);
233            a = a.wrapping_mul(K1);
234            b ^= a;
235            c ^= shift_mix(fetch64(&data[8..]).wrapping_mul(K1)).wrapping_mul(K1);
236            c = c.wrapping_mul(K1);
237            d ^= c;
238            data = &data[16..];
239            if data.len() <= 16 {
240                break;
241            }
242        }
243    }
244
245    a = hash_len16(a, c);
246    b = hash_len16(d, b);
247    U128::new(a ^ b, hash_len16(b, a))
248}
249
250pub fn cityhash128_with_seed(mut data: &[u8], seed: U128) -> U128 {
251    if data.len() < 128 {
252        return city_murmur(data, seed);
253    }
254
255    let mut x = seed.lo;
256    let mut y = seed.hi;
257    let mut z = K1.wrapping_mul(data.len() as u64);
258    let t: u64 = K1
259        .wrapping_mul(rotate_least(y ^ K1, 49))
260        .wrapping_add(fetch64(data));
261    let mut v = (
262        t,
263        K1.wrapping_mul(rotate_least(t, 42))
264            .wrapping_add(fetch64(&data[8..])),
265    );
266    let mut w = (
267        K1.wrapping_mul(rotate_least(y.wrapping_add(z), 35))
268            .wrapping_add(x),
269        K1.wrapping_mul(rotate_least(x.wrapping_add(fetch64(&data[88..])), 53)),
270    );
271
272    loop {
273        x = K1.wrapping_mul(rotate_least(
274            x.wrapping_add(y)
275                .wrapping_add(v.0)
276                .wrapping_add(fetch64(&data[16..])),
277            37,
278        ));
279        y = K1.wrapping_mul(rotate_least(
280            y.wrapping_add(v.1).wrapping_add(fetch64(&data[48..])),
281            42,
282        ));
283        x ^= w.1;
284        y ^= v.0;
285        z = rotate_least(z ^ w.0, 33);
286        v = weak_hash_len32_with_seeds(data, K1.wrapping_mul(v.1), x.wrapping_add(w.0));
287        w = weak_hash_len32_with_seeds(&data[32..], z.wrapping_add(w.1), y);
288        core::mem::swap(&mut z, &mut x);
289
290        data = &data[64..];
291        x = K1.wrapping_mul(rotate_least(
292            x.wrapping_add(y)
293                .wrapping_add(v.0)
294                .wrapping_add(fetch64(&data[16..])),
295            37,
296        ));
297        y = K1.wrapping_mul(rotate_least(
298            y.wrapping_add(v.1).wrapping_add(fetch64(&data[48..])),
299            42,
300        ));
301        x ^= w.1;
302        y ^= v.0;
303        z = rotate_least(z ^ w.0, 33);
304        v = weak_hash_len32_with_seeds(data, K1.wrapping_mul(v.1), x.wrapping_add(w.0));
305        w = weak_hash_len32_with_seeds(&data[32..], z.wrapping_add(w.1), y);
306        core::mem::swap(&mut z, &mut x);
307        if data.len() < (128 + 64) {
308            break;
309        }
310        data = &data[64..];
311    }
312
313    y = y.wrapping_add(K0.wrapping_mul(rotate_least(w.0, 37)).wrapping_add(z));
314    x = x.wrapping_add(K0.wrapping_mul(rotate_least(v.0.wrapping_add(z), 49)));
315
316    while data.len() > 64 {
317        y = K0
318            .wrapping_mul(rotate_least(y.wrapping_sub(x), 42))
319            .wrapping_add(v.1);
320        w.0 = w.0.wrapping_add(fetch64(&data[data.len() - 16..]));
321        x = K0.wrapping_mul(rotate_least(x, 49)).wrapping_add(w.0);
322        w.0 = w.0.wrapping_add(v.0);
323        v = weak_hash_len32_with_seeds(&data[data.len() - 32..], v.0, v.1);
324        data = &data[0..data.len() - 32];
325    }
326
327    x = hash_len16(x, v.0);
328    y = hash_len16(y, w.0);
329
330    U128 {
331        lo: hash_len16(x.wrapping_add(v.1), w.1).wrapping_add(y),
332        hi: hash_len16(x.wrapping_add(w.1), y.wrapping_add(v.1)),
333    }
334}
335
336#[inline]
337pub fn cityhash128(data: &[u8]) -> U128 {
338    if data.len() >= 16 {
339        cityhash128_with_seed(
340            &data[16..],
341            U128 {
342                lo: fetch64(data) ^ K3,
343                hi: fetch64(&data[8..]),
344            },
345        )
346    } else if data.len() >= 8 {
347        cityhash128_with_seed(
348            b"",
349            U128 {
350                lo: fetch64(data) ^ (K0.wrapping_mul(data.len() as u64)),
351                hi: fetch64(&data[data.len() - 8..]) ^ K1,
352            },
353        )
354    } else {
355        cityhash128_with_seed(data, U128 { lo: K0, hi: K1 })
356    }
357}
358
359#[inline]
360pub fn cityhash64(data: &[u8]) -> u64 {
361    if data.len() <= 32 {
362        if data.len() <= 16 {
363            return hash_len0to16(data);
364        } else {
365            return hash_len17to32(data);
366        }
367    } else if data.len() <= 64 {
368        return hash_len33to64(data);
369    }
370
371    let mut x = fetch64(data);
372    let mut y = fetch64(&data[data.len() - 16..]) ^ K1;
373    let mut z = fetch64(&data[data.len() - 56..]) ^ K0;
374
375    let mut v: (u64, u64) =
376        weak_hash_len32_with_seeds(&data[data.len() - 64..], data.len() as u64, y);
377    let mut w: (u64, u64) = weak_hash_len32_with_seeds(
378        &data[data.len() - 32..],
379        K1.wrapping_mul(data.len() as u64),
380        K0,
381    );
382
383    z = shift_mix(v.1).wrapping_mul(K1).wrapping_add(z);
384    x = rotate_least(z.wrapping_add(x), 39).wrapping_mul(K1);
385    y = rotate_least(y, 33).wrapping_mul(K1);
386
387    let mut len = (data.len() - 1) & !63;
388
389    let mut data = data;
390
391    loop {
392        x = rotate_least(
393            x.wrapping_add(y)
394                .wrapping_add(v.0)
395                .wrapping_add(fetch64(&data[16..])),
396            37,
397        )
398        .wrapping_mul(K1);
399        y = rotate_least(y.wrapping_add(v.1).wrapping_add(fetch64(&data[48..])), 42)
400            .wrapping_mul(K1);
401        x ^= w.1;
402        y ^= v.0;
403        z = rotate_least(z ^ w.0, 33);
404        v = weak_hash_len32_with_seeds(data, v.1.wrapping_mul(K1), x.wrapping_add(w.0));
405        w = weak_hash_len32_with_seeds(&data[32..], z.wrapping_add(w.1), y);
406        core::mem::swap(&mut z, &mut x);
407
408        len -= 64;
409
410        if len == 0 {
411            break;
412        }
413
414        data = &data[64..];
415    }
416
417    hash_len16(
418        hash_len16(v.0, w.0)
419            .wrapping_add(shift_mix(y).wrapping_mul(K1))
420            .wrapping_add(z),
421        hash_len16(v.1, w.1).wrapping_add(x),
422    )
423}
424
425pub fn cityhash64_with_seed(data: &[u8], seed: u64) -> u64 {
426    cityhash64_with_seeds(data, K2, seed)
427}
428
429pub fn cityhash64_with_seeds(data: &[u8], seed0: u64, seed1: u64) -> u64 {
430    hash_len16(cityhash64(data).wrapping_sub(seed0), seed1)
431}