musli_zerocopy/
sip.rs

1// Copied and trimmed from the siphasher project under the MIT license.
2//
3// See https://github.com/jedisct1/rust-siphash/tree/e085f9387d14b40d92e2478f11dcb772149147be
4
5use core::cmp;
6use core::hash;
7use core::marker::PhantomData;
8use core::mem;
9use core::ptr;
10
11/// A 128-bit (2x64) hash output
12#[derive(Debug, Clone, Copy, Default)]
13pub(crate) struct Hash128 {
14    pub(crate) h1: u64,
15    pub(crate) h2: u64,
16}
17
18/// An implementation of SipHash128 1-3.
19#[derive(Debug, Clone, Copy)]
20pub(crate) struct SipHasher13 {
21    hasher: Hasher<Sip13Rounds>,
22}
23
24#[derive(Debug, Copy)]
25struct Hasher<S>
26where
27    S: Sip,
28{
29    k0: u64,
30    k1: u64,
31    // how many bytes we've processed
32    length: usize,
33    // hash State
34    state: State,
35    // unprocessed bytes le
36    tail: u64,
37    // how many bytes in tail are valid
38    ntail: usize,
39    _marker: PhantomData<S>,
40}
41
42#[derive(Debug, Clone, Copy)]
43struct State {
44    // v0, v2 and v1, v3 show up in pairs in the algorithm, and simd
45    // implementations of SipHash will use vectors of v02 and v13. By placing
46    // them in this order in the struct, the compiler can pick up on just a few
47    // simd optimizations by itself.
48    v0: u64,
49    v2: u64,
50    v1: u64,
51    v3: u64,
52}
53
54macro_rules! compress {
55    ($state:expr) => {{
56        compress!($state.v0, $state.v1, $state.v2, $state.v3)
57    }};
58    ($v0:expr, $v1:expr, $v2:expr, $v3:expr) => {{
59        $v0 = $v0.wrapping_add($v1);
60        $v1 = $v1.rotate_left(13);
61        $v1 ^= $v0;
62        $v0 = $v0.rotate_left(32);
63        $v2 = $v2.wrapping_add($v3);
64        $v3 = $v3.rotate_left(16);
65        $v3 ^= $v2;
66        $v0 = $v0.wrapping_add($v3);
67        $v3 = $v3.rotate_left(21);
68        $v3 ^= $v0;
69        $v2 = $v2.wrapping_add($v1);
70        $v1 = $v1.rotate_left(17);
71        $v1 ^= $v2;
72        $v2 = $v2.rotate_left(32);
73    }};
74}
75
76/// Loads an integer of the desired type from a byte stream, in LE order. Uses
77/// `copy_nonoverlapping` to let the compiler generate the most efficient way
78/// to load it from a possibly unaligned address.
79///
80/// Unsafe because: unchecked indexing at `i..i+size_of(int_ty)`
81macro_rules! load_int_le {
82    ($buf:expr, $i:expr, $int_ty:ident) => {{
83        debug_assert!($i + mem::size_of::<$int_ty>() <= $buf.len());
84        let mut data = 0 as $int_ty;
85        ptr::copy_nonoverlapping(
86            $buf.as_ptr().add($i),
87            &mut data as *mut _ as *mut u8,
88            mem::size_of::<$int_ty>(),
89        );
90        data.to_le()
91    }};
92}
93
94/// Loads a u64 using up to 7 bytes of a byte slice. It looks clumsy but the
95/// `copy_nonoverlapping` calls that occur (via `load_int_le!`) all have fixed
96/// sizes and avoid calling `memcpy`, which is good for speed.
97///
98/// Unsafe because: unchecked indexing at start..start+len
99#[inline]
100unsafe fn u8to64_le(buf: &[u8], start: usize, len: usize) -> u64 {
101    debug_assert!(len < 8);
102    let mut i = 0; // current byte index (from LSB) in the output u64
103    let mut out = 0;
104    if i + 3 < len {
105        out = load_int_le!(buf, start + i, u32) as u64;
106        i += 4;
107    }
108    if i + 1 < len {
109        out |= (load_int_le!(buf, start + i, u16) as u64) << (i * 8);
110        i += 2
111    }
112    if i < len {
113        out |= (*buf.get_unchecked(start + i) as u64) << (i * 8);
114        i += 1;
115    }
116    debug_assert_eq!(i, len);
117    out
118}
119
120pub(crate) trait Hasher128 {
121    /// Return a 128-bit hash
122    fn finish128(&self) -> Hash128;
123}
124
125impl SipHasher13 {
126    /// Creates a `SipHasher13` that is keyed off the provided keys.
127    #[inline]
128    pub(crate) fn new_with_keys(key0: u64, key1: u64) -> SipHasher13 {
129        SipHasher13 {
130            hasher: Hasher::new_with_keys(key0, key1),
131        }
132    }
133}
134
135impl Hasher128 for SipHasher13 {
136    /// Return a 128-bit hash
137    #[inline]
138    fn finish128(&self) -> Hash128 {
139        self.hasher.finish128()
140    }
141}
142
143impl<S> Hasher<S>
144where
145    S: Sip,
146{
147    #[inline]
148    fn new_with_keys(key0: u64, key1: u64) -> Hasher<S> {
149        let mut state = Hasher {
150            k0: key0,
151            k1: key1,
152            length: 0,
153            state: State {
154                v0: 0,
155                v1: 0xee,
156                v2: 0,
157                v3: 0,
158            },
159            tail: 0,
160            ntail: 0,
161            _marker: PhantomData,
162        };
163        state.reset();
164        state
165    }
166
167    #[inline]
168    fn reset(&mut self) {
169        self.length = 0;
170        self.state.v0 = self.k0 ^ 0x736f6d6570736575;
171        self.state.v1 = self.k1 ^ 0x646f72616e646f83;
172        self.state.v2 = self.k0 ^ 0x6c7967656e657261;
173        self.state.v3 = self.k1 ^ 0x7465646279746573;
174        self.ntail = 0;
175    }
176
177    // A specialized write function for values with size <= 8.
178    //
179    // The hashing of multi-byte integers depends on endianness. E.g.:
180    // - little-endian: `write_u32(0xDDCCBBAA)` == `write([0xAA, 0xBB, 0xCC, 0xDD])`
181    // - big-endian:    `write_u32(0xDDCCBBAA)` == `write([0xDD, 0xCC, 0xBB, 0xAA])`
182    //
183    // This function does the right thing for little-endian hardware. On
184    // big-endian hardware `x` must be byte-swapped first to give the right
185    // behaviour. After any byte-swapping, the input must be zero-extended to
186    // 64-bits. The caller is responsible for the byte-swapping and
187    // zero-extension.
188    #[inline]
189    fn short_write<T>(&mut self, _x: T, x: u64) {
190        let size = mem::size_of::<T>();
191        self.length += size;
192
193        // The original number must be zero-extended, not sign-extended.
194        debug_assert!(if size < 8 { x >> (8 * size) == 0 } else { true });
195
196        // The number of bytes needed to fill `self.tail`.
197        let needed = 8 - self.ntail;
198
199        self.tail |= x << (8 * self.ntail);
200        if size < needed {
201            self.ntail += size;
202            return;
203        }
204
205        // `self.tail` is full, process it.
206        self.state.v3 ^= self.tail;
207        S::c_rounds(&mut self.state);
208        self.state.v0 ^= self.tail;
209
210        self.ntail = size - needed;
211        self.tail = if needed < 8 { x >> (8 * needed) } else { 0 };
212    }
213}
214
215impl<S> Hasher<S>
216where
217    S: Sip,
218{
219    #[inline]
220    pub(crate) fn finish128(&self) -> Hash128 {
221        let mut state = self.state;
222
223        let b: u64 = ((self.length as u64 & 0xff) << 56) | self.tail;
224
225        state.v3 ^= b;
226        S::c_rounds(&mut state);
227        state.v0 ^= b;
228
229        state.v2 ^= 0xee;
230        S::d_rounds(&mut state);
231        let h1 = state.v0 ^ state.v1 ^ state.v2 ^ state.v3;
232
233        state.v1 ^= 0xdd;
234        S::d_rounds(&mut state);
235        let h2 = state.v0 ^ state.v1 ^ state.v2 ^ state.v3;
236
237        Hash128 { h1, h2 }
238    }
239}
240
241impl hash::Hasher for SipHasher13 {
242    #[inline]
243    fn write(&mut self, msg: &[u8]) {
244        self.hasher.write(msg)
245    }
246
247    #[inline]
248    fn finish(&self) -> u64 {
249        self.hasher.finish()
250    }
251
252    #[inline]
253    fn write_usize(&mut self, i: usize) {
254        self.hasher.write_usize(i);
255    }
256
257    #[inline]
258    fn write_u8(&mut self, i: u8) {
259        self.hasher.write_u8(i);
260    }
261
262    #[inline]
263    fn write_u16(&mut self, i: u16) {
264        self.hasher.write_u16(i);
265    }
266
267    #[inline]
268    fn write_u32(&mut self, i: u32) {
269        self.hasher.write_u32(i);
270    }
271
272    #[inline]
273    fn write_u64(&mut self, i: u64) {
274        self.hasher.write_u64(i);
275    }
276}
277
278impl<S> hash::Hasher for Hasher<S>
279where
280    S: Sip,
281{
282    #[inline]
283    fn write_usize(&mut self, i: usize) {
284        self.short_write(i, i.to_le() as u64);
285    }
286
287    #[inline]
288    fn write_u8(&mut self, i: u8) {
289        self.short_write(i, i as u64);
290    }
291
292    #[inline]
293    fn write_u32(&mut self, i: u32) {
294        self.short_write(i, i.to_le() as u64);
295    }
296
297    #[inline]
298    fn write_u64(&mut self, i: u64) {
299        self.short_write(i, i.to_le());
300    }
301
302    #[inline]
303    fn write(&mut self, msg: &[u8]) {
304        let length = msg.len();
305        self.length += length;
306
307        let mut needed = 0;
308
309        if self.ntail != 0 {
310            needed = 8 - self.ntail;
311            self.tail |= unsafe { u8to64_le(msg, 0, cmp::min(length, needed)) } << (8 * self.ntail);
312            if length < needed {
313                self.ntail += length;
314                return;
315            } else {
316                self.state.v3 ^= self.tail;
317                S::c_rounds(&mut self.state);
318                self.state.v0 ^= self.tail;
319                self.ntail = 0;
320            }
321        }
322
323        // Buffered tail is now flushed, process new input.
324        let len = length - needed;
325        let left = len & 0x7;
326
327        let mut i = needed;
328        while i < len - left {
329            let mi = unsafe { load_int_le!(msg, i, u64) };
330
331            self.state.v3 ^= mi;
332            S::c_rounds(&mut self.state);
333            self.state.v0 ^= mi;
334
335            i += 8;
336        }
337
338        self.tail = unsafe { u8to64_le(msg, i, left) };
339        self.ntail = left;
340    }
341
342    #[inline]
343    fn finish(&self) -> u64 {
344        self.finish128().h2
345    }
346}
347
348impl<S> Clone for Hasher<S>
349where
350    S: Sip,
351{
352    #[inline]
353    fn clone(&self) -> Hasher<S> {
354        Hasher {
355            k0: self.k0,
356            k1: self.k1,
357            length: self.length,
358            state: self.state,
359            tail: self.tail,
360            ntail: self.ntail,
361            _marker: self._marker,
362        }
363    }
364}
365
366trait Sip {
367    fn c_rounds(_: &mut State);
368
369    fn d_rounds(_: &mut State);
370}
371
372#[derive(Debug, Clone, Copy, Default)]
373struct Sip13Rounds;
374
375impl Sip for Sip13Rounds {
376    #[inline]
377    fn c_rounds(state: &mut State) {
378        compress!(state);
379    }
380
381    #[inline]
382    fn d_rounds(state: &mut State) {
383        compress!(state);
384        compress!(state);
385        compress!(state);
386    }
387}