cityhash_102_rs/
lib.rs

1#[cfg(test)]
2mod test;
3#[cfg(test)]
4mod test_128;
5
6use std::ptr::read_unaligned;
7
8pub const K0: u64 = 0xc3a5c85c97cb3127;
9pub const K1: u64 = 0xb492b66fbe98f273;
10pub const K2: u64 = 0x9ae16a3b2f90404f;
11pub const K3: u64 = 0xc949d7c7509e6557;
12pub const K_MUL: u64 = 0x9ddfea08eb382d69;
13
14// low bits, high bits
15#[derive(Debug, Clone, Copy)]
16pub struct U128(u64, u64);
17
18impl From<u128> for U128 {
19    fn from(value: u128) -> Self {
20        let high = value >> 64;
21        U128(value as u64, high as u64)
22    }
23}
24
25impl From<U128> for u128 {
26    fn from(value: U128) -> Self {
27        u128::from(value.1) << 64 | u128::from(value.0)
28    }
29}
30
31#[inline]
32pub fn fetch32(data: &[u8]) -> u32 {
33    let p = data.as_ptr() as *const u32;
34    let p = unsafe { read_unaligned(p) };
35    if cfg!(not(target_endian = "little")) {
36        return p.swap_bytes();
37    }
38
39    p
40}
41
42#[inline]
43pub fn fetch64(data: &[u8]) -> u64 {
44    let p = data.as_ptr() as *const u64;
45    let p = unsafe { read_unaligned(p) };
46    if cfg!(not(target_endian = "little")) {
47        return p.swap_bytes();
48    }
49
50    p
51}
52
53#[inline]
54pub fn rotate(val: u64, shift: i32) -> u64 {
55    if shift == 0 {
56        val
57    } else {
58        (val >> shift) | (val << (64 - shift))
59    }
60}
61
62#[inline]
63pub fn rotate_by_at_least1(val: u64, shift: i32) -> u64 {
64    (val >> shift) | (val << (64 - shift))
65}
66
67#[inline]
68pub fn shift_mix(val: u64) -> u64 {
69    val ^ (val >> 47)
70}
71
72#[inline]
73pub fn hash128_to_64(x: U128) -> u64 {
74    let mut a = (x.0 ^ x.1).wrapping_mul(K_MUL);
75    a ^= a >> 47;
76    let mut b = (x.1 ^ a).wrapping_mul(K_MUL);
77    b ^= b >> 47;
78    b.wrapping_mul(K_MUL)
79}
80
81#[inline]
82pub fn hash_len_16(u: u64, v: u64) -> u64 {
83    hash128_to_64(U128(u, v))
84}
85
86pub fn hash_len_0_16(data: &[u8]) -> u64 {
87    if data.len() > 8 {
88        let a = fetch64(data);
89        let b = fetch64(&data[data.len() - 8..]);
90        hash_len_16(
91            a,
92            rotate_by_at_least1(b.wrapping_add(data.len() as u64), data.len() as i32),
93        ) ^ b
94    } else if data.len() >= 4 {
95        let a = fetch32(data) as u64;
96        hash_len_16(
97            (a << 3).wrapping_add(data.len() as u64),
98            fetch32(&data[data.len() - 4..]) as u64,
99        )
100    } else if !data.is_empty() {
101        let a = data[0] as u64;
102        let b = data[data.len() >> 1] as u64;
103        let c = data[data.len() - 1] as u64;
104        let y = a.wrapping_add(b << 8);
105        let z = (c << 2).wrapping_add(data.len() as u64);
106        shift_mix(y.wrapping_mul(K2) ^ z.wrapping_mul(K3)).wrapping_mul(K2)
107    } else {
108        K2
109    }
110}
111
112pub fn hash_len_17_32(data: &[u8]) -> u64 {
113    let a = fetch64(data).wrapping_mul(K1);
114    let b = fetch64(&data[8..]);
115    let c = fetch64(&data[data.len() - 8..]).wrapping_mul(K2);
116    let d = fetch64(&data[data.len() - 16..]).wrapping_mul(K0);
117    hash_len_16(
118        rotate(a.wrapping_sub(b), 43)
119            .wrapping_add(rotate(c, 30))
120            .wrapping_add(d),
121        a.wrapping_add(rotate(b ^ K3, 20))
122            .wrapping_sub(c)
123            .wrapping_add(data.len() as u64),
124    )
125}
126
127pub fn weak_hash_len32_with_seeds_p(
128    w: u64,
129    x: u64,
130    y: u64,
131    z: u64,
132    mut a: u64,
133    mut b: u64,
134) -> (u64, u64) {
135    a = a.wrapping_add(w);
136    b = rotate(b.wrapping_add(a).wrapping_add(z), 21);
137    let c = a;
138    a = a.wrapping_add(x);
139    a = a.wrapping_add(y);
140    b = b.wrapping_add(rotate(a, 44));
141    (a.wrapping_add(z), b.wrapping_add(c))
142}
143
144#[inline]
145pub fn weak_hash_len32_with_seeds(data: &[u8], a: u64, b: u64) -> (u64, u64) {
146    weak_hash_len32_with_seeds_p(
147        fetch64(data),
148        fetch64(&data[8..]),
149        fetch64(&data[16..]),
150        fetch64(&data[24..]),
151        a,
152        b,
153    )
154}
155
156pub fn hash_len_33_64(data: &[u8]) -> u64 {
157    let mut z = fetch64(&data[24..]);
158    let mut a = fetch64(data).wrapping_add(
159        fetch64(&data[data.len() - 16..])
160            .wrapping_add(data.len() as u64)
161            .wrapping_mul(K0),
162    );
163    let mut b = rotate(a + z, 52);
164    let mut c = rotate(a, 37);
165    a = a.wrapping_add(fetch64(&data[8..]));
166    c = c.wrapping_add(rotate(a, 7));
167    a = a.wrapping_add(fetch64(&data[16..]));
168    let vf = a.wrapping_add(z);
169    let vs = b.wrapping_add(rotate(a, 31)).wrapping_add(c);
170    a = fetch64(&data[16..]).wrapping_add(fetch64(&data[data.len() - 32..]));
171    z = fetch64(&data[data.len() - 8..]);
172    b = rotate(a.wrapping_add(z), 52);
173    c = rotate(a, 37);
174    a = a.wrapping_add(fetch64(&data[data.len() - 24..]));
175    c = c.wrapping_add(rotate(a, 7));
176    a = a.wrapping_add(fetch64(&data[data.len() - 16..]));
177    let wf = a.wrapping_add(z);
178    let ws = b.wrapping_add(rotate(a, 31)).wrapping_add(c);
179    let r = shift_mix(
180        vf.wrapping_add(ws)
181            .wrapping_mul(K2)
182            .wrapping_add((wf.wrapping_add(vs)).wrapping_mul(K0)),
183    );
184    shift_mix(r.wrapping_mul(K0).wrapping_add(vs)).wrapping_mul(K2)
185}
186
187pub fn city_hash_64(data: &[u8]) -> u64 {
188    if data.len() <= 32 {
189        if data.len() <= 16 {
190            return hash_len_0_16(data);
191        } else {
192            return hash_len_17_32(data);
193        }
194    } else if data.len() <= 64 {
195        return hash_len_33_64(data);
196    }
197
198    let mut x = fetch64(data);
199    let mut y = fetch64(&data[data.len() - 16..]) ^ K1;
200    let mut z = fetch64(&data[data.len() - 56..]) ^ K0;
201    let mut v = weak_hash_len32_with_seeds(&data[data.len() - 64..], data.len() as u64, y);
202    let mut w = weak_hash_len32_with_seeds(
203        &data[data.len() - 32..],
204        (data.len() as u64).wrapping_mul(K1),
205        K0,
206    );
207
208    z = z.wrapping_add(shift_mix(v.1).wrapping_mul(K1));
209    x = rotate(z.wrapping_add(x), 39).wrapping_mul(K1);
210    y = rotate(y, 33).wrapping_mul(K1);
211
212    let mut len = (data.len() - 1) & !63;
213    let mut offset = 0;
214    loop {
215        x = rotate(
216            x.wrapping_add(y)
217                .wrapping_add(v.0)
218                .wrapping_add(fetch64(&data[offset + 16..])),
219            37,
220        )
221        .wrapping_mul(K1);
222        y = rotate(
223            y.wrapping_add(v.1)
224                .wrapping_add(fetch64(&data[offset + 48..])),
225            42,
226        )
227        .wrapping_mul(K1);
228        x ^= w.1;
229        y ^= v.0;
230        z = rotate(z ^ w.0, 33);
231        v = weak_hash_len32_with_seeds(&data[offset..], v.1.wrapping_mul(K1), x.wrapping_add(w.0));
232        w = weak_hash_len32_with_seeds(&data[offset + 32..], z.wrapping_add(w.1), y);
233        std::mem::swap(&mut z, &mut x);
234        offset += 64;
235        len -= 64;
236        if len == 0 {
237            break;
238        }
239    }
240
241    hash_len_16(
242        hash_len_16(v.0, w.0).wrapping_add(shift_mix(y).wrapping_mul(K1).wrapping_add(z)),
243        hash_len_16(v.1, w.1).wrapping_add(x),
244    )
245}
246
247pub fn city_murmur(data: &[u8], seed: u128) -> u128 {
248    let seed = U128::from(seed);
249    let mut a = seed.0;
250    let mut b = seed.1;
251    let mut c;
252    let mut d;
253
254    let mut len = data.len();
255    if len <= 16 {
256        a = shift_mix(a.wrapping_mul(K1)).wrapping_mul(K1);
257        c = b.wrapping_mul(K1).wrapping_add(hash_len_0_16(data));
258        let additive = if len >= 8 { fetch64(data) } else { c };
259        d = shift_mix(a.wrapping_add(additive));
260    } else {
261        c = hash_len_16(fetch64(&data[len - 8..]).wrapping_add(K1), a);
262        d = hash_len_16(
263            b.wrapping_add(len as u64),
264            c.wrapping_add(fetch64(&data[len - 16..])),
265        );
266        a = a.wrapping_add(d);
267
268        let mut offset = 0;
269        loop {
270            a ^= shift_mix(fetch64(&data[offset..]).wrapping_mul(K1)).wrapping_mul(K1);
271            a = a.wrapping_mul(K1);
272            b ^= a;
273            c ^= shift_mix(fetch64(&data[offset + 8..]).wrapping_mul(K1)).wrapping_mul(K1);
274            c = c.wrapping_mul(K1);
275            d ^= c;
276            offset += 16;
277            len -= 16;
278            if len <= 16 {
279                break;
280            }
281        }
282    }
283
284    a = hash_len_16(a, c);
285    b = hash_len_16(d, b);
286    U128(a ^ b, hash_len_16(b, a)).into()
287}
288
289pub fn city_hash_128_seed(data: &[u8], seed: u128) -> u128 {
290    if data.len() < 128 {
291        return city_murmur(data, seed);
292    }
293
294    let seed = U128::from(seed);
295    let mut v = (0, 0);
296    let mut w = (0, 0);
297    let mut x = seed.0;
298    let mut y = seed.1;
299    let mut z = (data.len() as u64).wrapping_mul(K1);
300    v.0 = rotate(y ^ K1, 49)
301        .wrapping_mul(K1)
302        .wrapping_add(fetch64(data));
303    v.1 = rotate(v.0, 42)
304        .wrapping_mul(K1)
305        .wrapping_add(fetch64(&data[8..]));
306    w.0 = rotate(y.wrapping_add(z), 35)
307        .wrapping_mul(K1)
308        .wrapping_add(x);
309    w.1 = rotate(x.wrapping_add(fetch64(&data[88..])), 53).wrapping_mul(K1);
310
311    let mut offset = 0;
312    let mut len = data.len();
313    loop {
314        x = rotate(
315            x.wrapping_add(y)
316                .wrapping_add(v.0)
317                .wrapping_add(fetch64(&data[offset + 16..])),
318            37,
319        )
320        .wrapping_mul(K1);
321        y = rotate(
322            y.wrapping_add(v.1)
323                .wrapping_add(fetch64(&data[offset + 48..])),
324            42,
325        )
326        .wrapping_mul(K1);
327        x ^= w.1;
328        y ^= v.0;
329        z = rotate(z ^ w.0, 33);
330        v = weak_hash_len32_with_seeds(&data[offset..], v.1.wrapping_mul(K1), x.wrapping_add(w.0));
331        w = weak_hash_len32_with_seeds(&data[offset + 32..], z.wrapping_add(w.1), y);
332        std::mem::swap(&mut z, &mut x);
333        offset += 64;
334
335        x = rotate(
336            x.wrapping_add(y)
337                .wrapping_add(v.0)
338                .wrapping_add(fetch64(&data[offset + 16..])),
339            37,
340        )
341        .wrapping_mul(K1);
342        y = rotate(
343            y.wrapping_add(v.1)
344                .wrapping_add(fetch64(&data[offset + 48..])),
345            42,
346        )
347        .wrapping_mul(K1);
348        x ^= w.1;
349        y ^= v.0;
350        z = rotate(z ^ w.0, 33);
351        v = weak_hash_len32_with_seeds(&data[offset..], v.1.wrapping_mul(K1), x.wrapping_add(w.0));
352        w = weak_hash_len32_with_seeds(&data[offset + 32..], z.wrapping_add(w.1), y);
353        std::mem::swap(&mut z, &mut x);
354        offset += 64;
355        len -= 128;
356        if len < 128 {
357            break;
358        }
359    }
360    y = y.wrapping_add(rotate(w.0, 37).wrapping_mul(K0).wrapping_add(z));
361    x = x.wrapping_add(rotate(v.0.wrapping_add(z), 49).wrapping_mul(K0));
362
363    let mut tail_done = 0;
364    while tail_done < len {
365        tail_done += 32;
366        y = rotate(y.wrapping_sub(x), 42)
367            .wrapping_mul(K0)
368            .wrapping_add(v.1);
369        w.0 = w.0.wrapping_add(fetch64(&data[offset + len - tail_done + 16..]));
370        x = rotate(x, 49).wrapping_mul(K0).wrapping_add(w.0);
371        w.0 = w.0.wrapping_add(v.0);
372        v = weak_hash_len32_with_seeds(&data[offset + len - tail_done..], v.0, v.1);
373    }
374
375    x = hash_len_16(x, v.0);
376    y = hash_len_16(y, w.0);
377
378    U128(
379        hash_len_16(x.wrapping_add(v.1), w.1).wrapping_add(y),
380        hash_len_16(x.wrapping_add(w.1), y.wrapping_add(v.1)),
381    )
382    .into()
383}
384
385pub fn city_hash_128(data: &[u8]) -> u128 {
386    if data.len() >= 16 {
387        city_hash_128_seed(
388            &data[16..],
389            U128(fetch64(data) ^ K3, fetch64(&data[8..])).into(),
390        )
391    } else if data.len() >= 8 {
392        city_hash_128_seed(
393            &[],
394            U128(
395                fetch64(data) ^ ((data.len() as u64).wrapping_mul(K0)),
396                fetch64(&data[data.len() - 8..]) ^ K1,
397            )
398            .into(),
399        )
400    } else {
401        city_hash_128_seed(data, U128(K0, K1).into())
402    }
403}