rand_seeder/
sip.rs

1// Copyright 2012-2018 The Rust Project Developers. See the COPYRIGHT
2// file at the top-level directory of this distribution and at
3// http://rust-lang.org/COPYRIGHT.
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
10
11//! An implementation of SipHash.
12//!
13//! Shamelessly stolen from the Rust std lib (libcore/hash/sip.rs) and adapted.
14//!
15//! Only the official variant, SipHash 2-4, is included. Other variants such
16//! as the reduced 1-3 (used by libstd's `DefaultHasher`) could be added easily.
17
18use core::marker::PhantomData;
19use core::{cmp, hash, mem, ptr, slice};
20use rand_core::{impls, le, RngCore, SeedableRng};
21
22/// A portable implementation of SipHash 2-4.
23///
24/// This implementation will produce 8-byte (`u64`) output compliant with the
25/// reference implementation. Additionally, it can be extended into an RNG able
26/// to produce unlimited output via [`SipHasher::into_rng`].
27///
28/// [SipHash](https://en.wikipedia.org/wiki/SipHash)
29/// is a general-purpose hashing function: it runs at a good
30/// speed (competitive with Spooky and City) and permits strong keyed hashing.
31///
32/// Although the SipHash algorithm is considered strong, it is not intended for
33/// cryptographic uses (e.g. authentication).
34#[derive(Debug, Clone, Default)]
35pub struct SipHasher {
36    hasher: Hasher<Sip24Rounds>,
37}
38
39/// A generator built using SipHash's primitives.
40///
41/// [`SipRng`] is statistically high-quality, passing practrand tests to at
42/// least 4 TiB. It is also reasonably fast, though not quite competitive with
43/// the best non-cryptographic RNGs or optimised block RNGs such as ChaCha.
44///
45/// This implementation is fixed to use two "compression" rounds between output
46/// values (similar to SipHash 2-4). Construction via [`SipHasher::into_rng`]
47/// adds two extra rounds to maintain four rounds between final input
48/// consumption and the first output, however this first result is not identical
49/// to SipHash's result.
50///
51/// Although this generator is heavily based on the design of SipHash, it has
52/// not been reviewed for cryptographic strength, and thus cannot be recommended
53/// for applications requiring this property.
54#[derive(Debug, Clone)]
55pub struct SipRng {
56    state: State,
57    adj: u64,
58}
59
60#[derive(Debug)]
61struct Hasher<S: Sip> {
62    k0: u64,
63    k1: u64,
64    length: usize, // how many bytes we've processed
65    state: State,  // hash State
66    tail: u64,     // unprocessed bytes le
67    ntail: usize,  // how many bytes in tail are valid
68    _marker: PhantomData<S>,
69}
70
71#[derive(Debug, Clone, Copy)]
72#[repr(C)]
73struct State {
74    // v0, v2 and v1, v3 show up in pairs in the algorithm,
75    // and simd implementations of SipHash will use vectors
76    // of v02 and v13. By placing them in this order in the struct,
77    // the compiler can pick up on just a few simd optimizations by itself.
78    v0: u64,
79    v2: u64,
80    v1: u64,
81    v3: u64,
82}
83
84macro_rules! compress {
85    ($state:expr) => {{
86        compress!($state.v0, $state.v1, $state.v2, $state.v3)
87    }};
88    ($v0:expr, $v1:expr, $v2:expr, $v3:expr) => {{
89        $v0 = $v0.wrapping_add($v1);
90        $v1 = $v1.rotate_left(13);
91        $v1 ^= $v0;
92        $v0 = $v0.rotate_left(32);
93        $v2 = $v2.wrapping_add($v3);
94        $v3 = $v3.rotate_left(16);
95        $v3 ^= $v2;
96        $v0 = $v0.wrapping_add($v3);
97        $v3 = $v3.rotate_left(21);
98        $v3 ^= $v0;
99        $v2 = $v2.wrapping_add($v1);
100        $v1 = $v1.rotate_left(17);
101        $v1 ^= $v2;
102        $v2 = $v2.rotate_left(32);
103    }};
104}
105
106/// Loads an integer of the desired type from a byte stream, in LE order. Uses
107/// `copy_nonoverlapping` to let the compiler generate the most efficient way
108/// to load it from a possibly unaligned address.
109///
110/// Unsafe because: unchecked indexing at i..i+size_of(int_ty)
111macro_rules! load_int_le {
112    ($buf:expr, $i:expr, $int_ty:ident) => {{
113        debug_assert!($i + mem::size_of::<$int_ty>() <= $buf.len());
114        let mut data = 0 as $int_ty;
115        ptr::copy_nonoverlapping(
116            $buf.as_ptr().add($i),
117            &mut data as *mut _ as *mut u8,
118            mem::size_of::<$int_ty>(),
119        );
120        data.to_le()
121    }};
122}
123
124/// Loads an u64 using up to 7 bytes of a byte slice.
125///
126/// Unsafe because: unchecked indexing at start..start+len
127#[inline]
128unsafe fn u8to64_le(buf: &[u8], start: usize, len: usize) -> u64 {
129    unsafe {
130        debug_assert!(len < 8);
131        let mut i = 0; // current byte index (from LSB) in the output u64
132        let mut out = 0;
133        if i + 3 < len {
134            out = load_int_le!(buf, start + i, u32) as u64;
135            i += 4;
136        }
137        if i + 1 < len {
138            out |= (load_int_le!(buf, start + i, u16) as u64) << (i * 8);
139            i += 2
140        }
141        if i < len {
142            out |= (*buf.as_ptr().add(start + i) as u64) << (i * 8);
143            i += 1;
144        }
145        debug_assert_eq!(i, len);
146        out
147    }
148}
149
150impl SipHasher {
151    /// Create a new `SipHasher` (equivalent to `from_keys(0, 0)`).
152    #[inline]
153    pub fn new() -> Self {
154        SipHasher::from_keys(0, 0)
155    }
156
157    /// Create a `SipHasher` using the given keys.
158    #[inline]
159    pub fn from_keys(key0: u64, key1: u64) -> Self {
160        SipHasher {
161            hasher: Hasher::from_keys(key0, key1),
162        }
163    }
164
165    /// Finish writes and convert the hasher's core into a generator.
166    ///
167    /// This offers a fast, elegant transition from hash function to generator
168    /// which maintains (up to) 256 bits of entropy.
169    ///
170    /// Note that this transition has not been reviewed for cryptographic
171    /// strength, and might break SipHash's security.
172    pub fn into_rng(self) -> SipRng {
173        self.hasher.into_rng()
174    }
175}
176
177impl SipRng {
178    // private constructor
179    fn from_state(state: State) -> SipRng {
180        SipRng { state, adj: 0x13 }
181    }
182}
183
184impl RngCore for SipRng {
185    fn next_u32(&mut self) -> u32 {
186        // Lazy way to implement. Good enough for seeding RNGs.
187        self.next_u64() as u32
188    }
189
190    fn next_u64(&mut self) -> u64 {
191        self.state.v2 ^= self.adj;
192        self.adj = self.adj.wrapping_sub(0x11);
193
194        Sip24Rounds::c_rounds(&mut self.state);
195
196        self.state.v0 ^ self.state.v1 ^ self.state.v2 ^ self.state.v3
197    }
198
199    fn fill_bytes(&mut self, dest: &mut [u8]) {
200        impls::fill_bytes_via_next(self, dest)
201    }
202}
203
204impl SeedableRng for SipRng {
205    type Seed = [u8; 32];
206
207    fn from_seed(seed: Self::Seed) -> Self {
208        let mut keys = [0u64; 4];
209        le::read_u64_into(&seed, &mut keys);
210        SipRng::from_state(State {
211            v0: keys[0],
212            v1: keys[1],
213            v2: keys[2],
214            v3: keys[3],
215        })
216    }
217}
218
219impl<S: Sip> Hasher<S> {
220    #[inline]
221    fn from_keys(key0: u64, key1: u64) -> Hasher<S> {
222        let mut state = Hasher {
223            k0: key0,
224            k1: key1,
225            length: 0,
226            state: State {
227                v0: 0,
228                v1: 0,
229                v2: 0,
230                v3: 0,
231            },
232            tail: 0,
233            ntail: 0,
234            _marker: PhantomData,
235        };
236        state.reset();
237        state
238    }
239
240    #[inline]
241    fn reset(&mut self) {
242        self.length = 0;
243        self.state.v0 = self.k0 ^ 0x736f6d6570736575;
244        self.state.v1 = self.k1 ^ 0x646f72616e646f6d;
245        self.state.v2 = self.k0 ^ 0x6c7967656e657261;
246        self.state.v3 = self.k1 ^ 0x7465646279746573;
247        self.ntail = 0;
248    }
249
250    // Specialized write function that is only valid for buffers with len <= 8.
251    // It's used to force inlining of write_u8 and write_usize, those would normally be inlined
252    // except for composite types (that includes slices and str hashing because of delimiter).
253    // Without this extra push the compiler is very reluctant to inline delimiter writes,
254    // degrading performance substantially for the most common use cases.
255    #[inline]
256    fn short_write(&mut self, msg: &[u8]) {
257        debug_assert!(msg.len() <= 8);
258        let length = msg.len();
259        self.length += length;
260
261        let needed = 8 - self.ntail;
262        let fill = cmp::min(length, needed);
263        if fill == 8 {
264            self.tail = unsafe { load_int_le!(msg, 0, u64) };
265        } else {
266            self.tail |= unsafe { u8to64_le(msg, 0, fill) } << (8 * self.ntail);
267            if length < needed {
268                self.ntail += length;
269                return;
270            }
271        }
272        self.state.v3 ^= self.tail;
273        S::c_rounds(&mut self.state);
274        self.state.v0 ^= self.tail;
275
276        // Buffered tail is now flushed, process new input.
277        self.ntail = length - needed;
278        self.tail = unsafe { u8to64_le(msg, needed, self.ntail) };
279    }
280
281    fn into_rng(self) -> SipRng {
282        let mut state = self.state;
283
284        // Finish
285        let b: u64 = ((self.length as u64 & 0xff) << 56) | self.tail;
286
287        state.v3 ^= b;
288        S::c_rounds(&mut state);
289        state.v0 ^= b;
290
291        // This is supposed to be d - c rounds (here 4 - 2 = 2)
292        S::c_rounds(&mut state);
293
294        SipRng::from_state(state)
295    }
296}
297
298/// Implements the standard library's `Hasher` trait.
299///
300/// Note that all methods are implemented directly to fix Endianness, unlike
301/// the default implementations in the standard library.
302impl hash::Hasher for SipHasher {
303    #[inline]
304    fn finish(&self) -> u64 {
305        self.hasher.finish()
306    }
307
308    #[inline]
309    fn write(&mut self, msg: &[u8]) {
310        self.hasher.write(msg)
311    }
312
313    #[inline]
314    fn write_u8(&mut self, i: u8) {
315        self.write(&[i])
316    }
317    #[inline]
318    fn write_u16(&mut self, i: u16) {
319        self.write(&i.to_le_bytes())
320    }
321    #[inline]
322    fn write_u32(&mut self, i: u32) {
323        self.write(&i.to_le_bytes())
324    }
325    #[inline]
326    fn write_u64(&mut self, i: u64) {
327        self.write(&i.to_le_bytes())
328    }
329    #[inline]
330    fn write_u128(&mut self, i: u128) {
331        self.write(&i.to_le_bytes())
332    }
333
334    /// For portability reasons, the `usize` input is interpreted as a `u128`.
335    #[inline]
336    fn write_usize(&mut self, i: usize) {
337        self.write_u128(i as u128);
338    }
339
340    #[inline]
341    fn write_i8(&mut self, i: i8) {
342        self.write_u8(i as u8)
343    }
344    #[inline]
345    fn write_i16(&mut self, i: i16) {
346        self.write(&i.to_le_bytes())
347    }
348    #[inline]
349    fn write_i32(&mut self, i: i32) {
350        self.write(&i.to_le_bytes())
351    }
352    #[inline]
353    fn write_i64(&mut self, i: i64) {
354        self.write(&i.to_le_bytes())
355    }
356    #[inline]
357    fn write_i128(&mut self, i: i128) {
358        self.write(&i.to_le_bytes())
359    }
360
361    /// For portability reasons, the `isize` input is interpreted as a `i128`.
362    #[inline]
363    fn write_isize(&mut self, i: isize) {
364        self.write_i128(i as i128);
365    }
366}
367
368impl<S: Sip> hash::Hasher for Hasher<S> {
369    // see short_write comment for explanation
370    #[inline]
371    fn write_usize(&mut self, i: usize) {
372        let bytes = unsafe {
373            slice::from_raw_parts(&i as *const usize as *const u8, mem::size_of::<usize>())
374        };
375        self.short_write(bytes);
376    }
377
378    // see short_write comment for explanation
379    #[inline]
380    fn write_u8(&mut self, i: u8) {
381        self.short_write(&[i]);
382    }
383
384    #[inline]
385    fn write(&mut self, msg: &[u8]) {
386        let length = msg.len();
387        self.length += length;
388
389        let mut needed = 0;
390
391        if self.ntail != 0 {
392            needed = 8 - self.ntail;
393            self.tail |= unsafe { u8to64_le(msg, 0, cmp::min(length, needed)) } << (8 * self.ntail);
394            if length < needed {
395                self.ntail += length;
396                return;
397            } else {
398                self.state.v3 ^= self.tail;
399                S::c_rounds(&mut self.state);
400                self.state.v0 ^= self.tail;
401                self.ntail = 0;
402            }
403        }
404
405        // Buffered tail is now flushed, process new input.
406        let len = length - needed;
407        let left = len & 0x7;
408
409        let mut i = needed;
410        while i < len - left {
411            let mi = unsafe { load_int_le!(msg, i, u64) };
412
413            self.state.v3 ^= mi;
414            S::c_rounds(&mut self.state);
415            self.state.v0 ^= mi;
416
417            i += 8;
418        }
419
420        self.tail = unsafe { u8to64_le(msg, i, left) };
421        self.ntail = left;
422    }
423
424    /// Reduces the state to a `u64` value, as in the reference implementation.
425    #[inline]
426    fn finish(&self) -> u64 {
427        let mut state = self.state;
428
429        let b: u64 = ((self.length as u64 & 0xff) << 56) | self.tail;
430
431        state.v3 ^= b;
432        S::c_rounds(&mut state);
433        state.v0 ^= b;
434
435        state.v2 ^= 0xff;
436        S::d_rounds(&mut state);
437
438        state.v0 ^ state.v1 ^ state.v2 ^ state.v3
439    }
440}
441
442impl<H: hash::Hash> From<H> for SipHasher {
443    #[inline]
444    fn from(h: H) -> SipHasher {
445        let mut hasher = SipHasher::new();
446        h.hash(&mut hasher);
447        hasher
448    }
449}
450
451impl<S: Sip> Clone for Hasher<S> {
452    #[inline]
453    fn clone(&self) -> Hasher<S> {
454        Hasher {
455            k0: self.k0,
456            k1: self.k1,
457            length: self.length,
458            state: self.state,
459            tail: self.tail,
460            ntail: self.ntail,
461            _marker: self._marker,
462        }
463    }
464}
465
466impl<S: Sip> Default for Hasher<S> {
467    /// Creates a `Hasher<S>` with the two initial keys set to 0.
468    #[inline]
469    fn default() -> Hasher<S> {
470        Hasher::from_keys(0, 0)
471    }
472}
473
474#[doc(hidden)]
475trait Sip {
476    fn c_rounds(_: &mut State);
477    fn d_rounds(_: &mut State);
478}
479
480#[derive(Debug, Clone, Default)]
481struct Sip24Rounds;
482
483impl Sip for Sip24Rounds {
484    #[inline]
485    fn c_rounds(state: &mut State) {
486        compress!(state);
487        compress!(state);
488    }
489
490    #[inline]
491    fn d_rounds(state: &mut State) {
492        compress!(state);
493        compress!(state);
494        compress!(state);
495        compress!(state);
496    }
497}
498
499#[cfg(test)]
500mod test {
501    use super::*;
502    use core::hash::Hasher;
503
504    #[test]
505    fn test_hash_vectors() {
506        let k_bytes = [0u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15];
507        let (k0, k1) = unsafe {
508            (
509                load_int_le!(&k_bytes, 0, u64),
510                load_int_le!(&k_bytes, 8, u64),
511            )
512        };
513
514        let mut msg = [0u8; 64];
515        for (pos, i) in msg.iter_mut().zip(0u8..64) {
516            *pos = i;
517        }
518
519        // From reference vectors, converted to u64:
520        let tests = [
521            (0, 0x726fdb47dd0e0e31),
522            (1, 0x74f839c593dc67fd),
523            (63, 0x958a324ceb064572),
524        ];
525
526        for (len, val) in &tests {
527            let mut hasher = SipHasher::from_keys(k0, k1);
528            hasher.write(&msg[0..*len]);
529            assert_eq!(
530                hasher.finish(),
531                *val,
532                "mismatch with reference vector for i={}",
533                *len
534            );
535        }
536    }
537
538    #[test]
539    fn test_rng_vectors() {
540        // SipRng has no external reference. These vectors are defined here.
541
542        let mut seed = [0u8; 32];
543        let mut rng = SipRng::from_seed(seed);
544
545        let vector = [
546            0x4c022e4ec04e602a,
547            0xc2c0399c269058d6,
548            0xf5c7399cde9c362c,
549            0x37e5b9491363680a,
550            0x9582782644903316,
551            0x2a9d2e160aad88d,
552            0x983958db9376e6f6,
553            0xdead8960b8524928,
554            0xcfa886c6642c1b2f,
555            0x8f8f91fcf7045f2a,
556            0x1bbda585fc387fb3,
557            0x242485d9cc54c688,
558            0x9be110f767d8cee,
559            0xd61076dfc3569ab3,
560            0x8f6092dd2692af57,
561            0xbdf362ab8e29260b,
562        ];
563        // for _ in 0..8 {
564        //     println!("0x{:x}, 0x{:x},", rng.next_u64(), rng.next_u64());
565        // }
566        for x in &vector {
567            assert_eq!(rng.next_u64(), *x);
568        }
569
570        // set seed to 0, 1, 2, ..., 31
571        for (pos, i) in seed.iter_mut().zip(0u8..32) {
572            *pos = i;
573        }
574        let mut rng = SipRng::from_seed(seed);
575
576        let vector = [
577            0x479bf2823a7a923e,
578            0xf04e2cbc75d554d,
579            0xd589aceb3b65f36b,
580            0x91f8758ab30951a,
581            0x10d2bebadd90c381,
582            0xb3a6345b6273b101,
583            0xd05dbd603684e153,
584            0xabaaa983f818f5db,
585            0x2a063ed10d464bf2,
586            0x1d395c4c511e9073,
587            0x43011ca87ead4d7c,
588            0x22acb2bfbca6a069,
589            0xdd6b8dd2abb4d8f,
590            0xb3bc3889e7142461,
591            0x62cbac703609d15,
592            0x74aec28d9fdd44bf,
593        ];
594        for x in &vector {
595            assert_eq!(rng.next_u64(), *x);
596        }
597    }
598}