swh_graph/java_compat/mph/
spooky.rs

1/*
2 *
3 * SPDX-FileCopyrightText: 2023 Tommaso Fontana
4 * SPDX-FileCopyrightText: 2023 Inria
5 * SPDX-FileCopyrightText: 2023 Sebastiano Vigna
6 *
7 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
8 */
9
10//! A pure Rust implementation of [SpookyHash](https://burtleburtle.net/bob/hash/spooky.html).
11//!
12//! Ported from <https://github.com/vigna/Sux4J/blob/master/c/mph.c>.
13
14// The original is SC_CONST = 0xdeadbeefdeadbeef but we must
15// be compatible with the original Java implementation in Sux4J.
16pub const SC_CONST: u64 = 0x9e3779b97f4a7c13;
17
18#[inline(always)]
19#[must_use]
20pub const fn spooky_short_mix(mut h: [u64; 4]) -> [u64; 4] {
21    h[2] = h[2].rotate_left(50);
22    h[2] = h[2].wrapping_add(h[3]);
23    h[0] ^= h[2];
24    h[3] = h[3].rotate_left(52);
25    h[3] = h[3].wrapping_add(h[0]);
26    h[1] ^= h[3];
27    h[0] = h[0].rotate_left(30);
28    h[0] = h[0].wrapping_add(h[1]);
29    h[2] ^= h[0];
30    h[1] = h[1].rotate_left(41);
31    h[1] = h[1].wrapping_add(h[2]);
32    h[3] ^= h[1];
33    h[2] = h[2].rotate_left(54);
34    h[2] = h[2].wrapping_add(h[3]);
35    h[0] ^= h[2];
36    h[3] = h[3].rotate_left(48);
37    h[3] = h[3].wrapping_add(h[0]);
38    h[1] ^= h[3];
39    h[0] = h[0].rotate_left(38);
40    h[0] = h[0].wrapping_add(h[1]);
41    h[2] ^= h[0];
42    h[1] = h[1].rotate_left(37);
43    h[1] = h[1].wrapping_add(h[2]);
44    h[3] ^= h[1];
45    h[2] = h[2].rotate_left(62);
46    h[2] = h[2].wrapping_add(h[3]);
47    h[0] ^= h[2];
48    h[3] = h[3].rotate_left(34);
49    h[3] = h[3].wrapping_add(h[0]);
50    h[1] ^= h[3];
51    h[0] = h[0].rotate_left(5);
52    h[0] = h[0].wrapping_add(h[1]);
53    h[2] ^= h[0];
54    h[1] = h[1].rotate_left(36);
55    h[1] = h[1].wrapping_add(h[2]);
56    h[3] ^= h[1];
57    h
58}
59
60#[inline(always)]
61#[must_use]
62const fn spooky_short_end(mut h: [u64; 4]) -> [u64; 4] {
63    h[3] ^= h[2];
64    h[2] = h[2].rotate_left(15);
65    h[3] = h[3].wrapping_add(h[2]);
66    h[0] ^= h[3];
67    h[3] = h[3].rotate_left(52);
68    h[0] = h[0].wrapping_add(h[3]);
69    h[1] ^= h[0];
70    h[0] = h[0].rotate_left(26);
71    h[1] = h[1].wrapping_add(h[0]);
72    h[2] ^= h[1];
73    h[1] = h[1].rotate_left(51);
74    h[2] = h[2].wrapping_add(h[1]);
75    h[3] ^= h[2];
76    h[2] = h[2].rotate_left(28);
77    h[3] = h[3].wrapping_add(h[2]);
78    h[0] ^= h[3];
79    h[3] = h[3].rotate_left(9);
80    h[0] = h[0].wrapping_add(h[3]);
81    h[1] ^= h[0];
82    h[0] = h[0].rotate_left(47);
83    h[1] = h[1].wrapping_add(h[0]);
84    h[2] ^= h[1];
85    h[1] = h[1].rotate_left(54);
86    h[2] = h[2].wrapping_add(h[1]);
87    h[3] ^= h[2];
88    h[2] = h[2].rotate_left(32);
89    h[3] = h[3].wrapping_add(h[2]);
90    h[0] ^= h[3];
91    h[3] = h[3].rotate_left(25);
92    h[0] = h[0].wrapping_add(h[3]);
93    h[1] ^= h[0];
94    h[0] = h[0].rotate_left(63);
95    h[1] = h[1].wrapping_add(h[0]);
96    h
97}
98
99#[inline(always)]
100#[must_use]
101pub const fn spooky_short_rehash(signature: &[u64; 4], seed: u64) -> [u64; 4] {
102    spooky_short_mix([
103        seed,
104        SC_CONST.wrapping_add(signature[0]),
105        SC_CONST.wrapping_add(signature[1]),
106        SC_CONST,
107    ])
108}
109
110#[must_use]
111#[inline]
112pub fn spooky_short(data: &[u8], seed: u64) -> [u64; 4] {
113    let mut h = [seed, seed, SC_CONST, SC_CONST];
114
115    let iter = data.chunks_exact(32);
116    let mut reminder = iter.remainder();
117
118    for chunk in iter {
119        // handle all complete sets of 32 bytes
120        h[2] = h[2].wrapping_add(u64::from_le_bytes(chunk[0..8].try_into().unwrap()));
121        h[3] = h[3].wrapping_add(u64::from_le_bytes(chunk[8..16].try_into().unwrap()));
122        h = spooky_short_mix(h);
123        h[0] = h[0].wrapping_add(u64::from_le_bytes(chunk[16..24].try_into().unwrap()));
124        h[1] = h[1].wrapping_add(u64::from_le_bytes(chunk[24..32].try_into().unwrap()));
125    }
126
127    //Handle the case of 16+ remaining bytes.
128    if reminder.len() >= 16 {
129        h[2] = h[2].wrapping_add(u64::from_le_bytes(reminder[0..8].try_into().unwrap()));
130        h[3] = h[3].wrapping_add(u64::from_le_bytes(reminder[8..16].try_into().unwrap()));
131        h = spooky_short_mix(h);
132        reminder = &reminder[16..];
133    }
134
135    // Handle the last 0..15 bytes, and its length
136    // We copy it into a buffer filled with zeros so we can simplify the
137    // code
138    if reminder.is_empty() {
139        h[2] = h[2].wrapping_add(SC_CONST);
140        h[3] = h[3].wrapping_add(SC_CONST);
141    } else {
142        let mut buffer: [u8; 16] = [0; 16];
143        buffer[..reminder.len()].copy_from_slice(reminder);
144        h[2] = h[2].wrapping_add(u64::from_le_bytes(buffer[0..8].try_into().unwrap()));
145        h[3] = h[3].wrapping_add(u64::from_le_bytes(buffer[8..16].try_into().unwrap()));
146    }
147
148    h[0] = h[0].wrapping_add((data.len() as u64) * 8);
149    spooky_short_end(h)
150}