gxhash/gxhash/
mod.rs

1pub(crate) mod platform;
2
3use platform::*;
4
5/// Hashes an arbitrary stream of bytes to an u32.
6///
7/// # Example
8///
9/// ```
10/// let bytes = [42u8; 1000];
11/// let seed = 1234;
12/// println!("Hash is {:x}!", gxhash::gxhash32(&bytes, seed));
13/// ```
14#[inline(always)]
15pub fn gxhash32(input: &[u8], seed: i64) -> u32 {
16    unsafe {
17        let p = &gxhash(input, create_seed(seed)) as *const State as *const u32;
18        *p
19    }
20}
21
22/// Hashes an arbitrary stream of bytes to an u64.
23///
24/// # Example
25///
26/// ```
27/// let bytes = [42u8; 1000];
28/// let seed = 1234;
29/// println!("Hash is {:x}!", gxhash::gxhash64(&bytes, seed));
30/// ```
31#[inline(always)]
32pub fn gxhash64(input: &[u8], seed: i64) -> u64 {
33    unsafe {
34        let p = &gxhash(input, create_seed(seed)) as *const State as *const u64;
35        *p
36    }
37}
38
39/// Hashes an arbitrary stream of bytes to an u128.
40///
41/// # Example
42///
43/// ```
44/// let bytes = [42u8; 1000];
45/// let seed = 1234;
46/// println!("Hash is {:x}!", gxhash::gxhash128(&bytes, seed));
47/// ```
48#[inline(always)]
49pub fn gxhash128(input: &[u8], seed: i64) -> u128 {
50    unsafe {
51        let p = &gxhash(input, create_seed(seed)) as *const State as *const u128;
52        *p
53    }
54}
55
56macro_rules! load_unaligned {
57    ($ptr:ident, $($var:ident),+) => {
58        $(
59            #[allow(unused_mut)]
60            let mut $var = load_unaligned($ptr);
61            $ptr = ($ptr).offset(1);
62        )+
63    };
64}
65
66pub(crate) use load_unaligned;
67
68#[inline(always)]
69pub(crate) unsafe fn gxhash(input: &[u8], seed: State) -> State {
70    finalize(aes_encrypt(compress_all(input), seed))
71}
72
73#[inline(always)]
74pub(crate) unsafe fn compress_all(input: &[u8]) -> State {
75
76    let len = input.len();
77    let mut ptr = input.as_ptr() as *const State;
78
79    if len == 0 {
80        return create_empty();
81    }
82
83    if len <= VECTOR_SIZE {
84        // Input fits on a single SIMD vector, however we might read beyond the input message
85        // Thus we need this safe method that checks if it can safely read beyond or must copy
86        return get_partial(ptr, len);
87    }
88
89    let mut hash_vector: State;
90    let end = ptr as usize + len;
91
92    let extra_bytes_count = len % VECTOR_SIZE;
93    if extra_bytes_count == 0 {
94        load_unaligned!(ptr, v0);
95        hash_vector = v0;
96    } else {
97        // If the input length does not match the length of a whole number of SIMD vectors,
98        // it means we'll need to read a partial vector. We can start with the partial vector first,
99        // so that we can safely read beyond since we expect the following bytes to still be part of
100        // the input
101        hash_vector = get_partial_unsafe(ptr, extra_bytes_count);
102        ptr = ptr.cast::<u8>().add(extra_bytes_count).cast();
103    }
104
105    load_unaligned!(ptr, v0);
106
107    if len > VECTOR_SIZE * 2 {
108        // Fast path when input length > 32 and <= 48
109        load_unaligned!(ptr, v);
110        v0 = aes_encrypt(v0, v);
111
112        if len > VECTOR_SIZE * 3 {
113            // Fast path when input length > 48 and <= 64
114            load_unaligned!(ptr, v);
115            v0 = aes_encrypt(v0, v);
116
117            if len > VECTOR_SIZE * 4 {
118                // Input message is large and we can use the high ILP loop
119                hash_vector = compress_many(ptr, end, hash_vector, len);
120            }
121        }
122    }
123    
124    return aes_encrypt_last(hash_vector, 
125        aes_encrypt(aes_encrypt(v0, ld(KEYS.as_ptr())), ld(KEYS.as_ptr().offset(4))));
126}
127
128#[inline(always)]
129unsafe fn compress_many(mut ptr: *const State, end: usize, hash_vector: State, len: usize) -> State {
130
131    const UNROLL_FACTOR: usize = 8;
132
133    let remaining_bytes = end -  ptr as usize;
134
135    let unrollable_blocks_count: usize = remaining_bytes / (VECTOR_SIZE * UNROLL_FACTOR) * UNROLL_FACTOR; 
136
137    let remaining_bytes = remaining_bytes - unrollable_blocks_count * VECTOR_SIZE;
138    let end_address = ptr.add(remaining_bytes / VECTOR_SIZE) as usize;
139
140    // Process first individual blocks until we have a whole number of 8 blocks
141    let mut hash_vector = hash_vector;
142    while (ptr as usize) < end_address {
143        load_unaligned!(ptr, v0);
144        hash_vector = aes_encrypt(hash_vector, v0);
145    }
146
147    // Process the remaining n * 8 blocks
148    // This part may use 128-bit or 256-bit
149    compress_8(ptr, end, hash_vector, len)
150}
151
152#[cfg(test)]
153mod tests {
154
155    use super::*;
156    use rand::Rng;
157
158    #[test]
159    fn all_blocks_are_consumed() {
160        for s in 1..1200 {
161            let mut bytes = vec![42u8; s];
162            let ref_hash = gxhash32(&bytes, 0);
163    
164            for i in 0..bytes.len() {
165                let swap = bytes[i];
166                bytes[i] = 82;
167                let new_hash = gxhash32(&bytes, 0);
168                bytes[i] = swap;
169    
170                assert_ne!(ref_hash, new_hash, "byte {i} not processed for input of size {s}");
171            }
172        }
173    }
174
175    #[test]
176    fn add_zeroes_mutates_hash() {
177        let mut bytes = [0u8; 1200];
178
179        let mut rng = rand::thread_rng();
180        rng.fill(&mut bytes[..32]);
181
182        let mut ref_hash = 0;
183
184        for i in 32..100 {
185            let new_hash = gxhash32(&mut bytes[..i], 0);
186            assert_ne!(ref_hash, new_hash, "Same hash at size {i} ({new_hash})");
187            ref_hash = new_hash;
188        }
189    }
190
191    #[test]
192    fn does_not_hash_outside_of_bounds() {
193        let mut bytes = [0u8; 1200];
194        const OFFSET: usize = 100;
195
196        let mut rng = rand::thread_rng();
197        rng.fill(bytes.as_mut_slice());
198
199        for i in 1..1000 {
200            let hash = gxhash32(&bytes[OFFSET..i+OFFSET], 42);
201            // We change the bytes right before and after the input slice. It shouldn't alter the hash.
202            rng.fill(&mut bytes[..OFFSET]);
203            rng.fill(&mut bytes[i+OFFSET..]);
204            let new_hash = gxhash32(&bytes[OFFSET..i+OFFSET], 42);
205            assert_eq!(new_hash, hash, "Hashed changed for input size {i} ({new_hash} != {hash})");
206        }
207    }
208
209    #[test]
210    fn hash_of_zero_is_not_zero() {
211        assert_ne!(0, gxhash32(&[0u8; 0], 0));
212        assert_ne!(0, gxhash32(&[0u8; 1], 0));
213        assert_ne!(0, gxhash32(&[0u8; 1200], 0));
214    }
215
216    #[test]
217    fn is_stable() {
218        assert_eq!(2533353535, gxhash32(&[0u8; 0], 0));
219        assert_eq!(4243413987, gxhash32(&[0u8; 1], 0));
220        assert_eq!(2401749549, gxhash32(&[0u8; 1000], 0));
221        assert_eq!(4156851105, gxhash32(&[42u8; 4242], 42));
222        assert_eq!(1981427771, gxhash32(&[42u8; 4242], -42));
223        assert_eq!(1156095992, gxhash32(b"Hello World", i64::MAX));
224        assert_eq!(540827083, gxhash32(b"Hello World", i64::MIN));
225    }
226}