const_siphasher/
sip.rs

1// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
2// file at the top-level directory of this distribution and at
3// http://rust-lang.org/COPYRIGHT.
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
10
11//! An implementation of SipHash.
12
13use core::cmp;
14use core::hash;
15use core::hash::Hasher as _;
16use core::marker::PhantomData;
17use core::mem;
18use core::ptr;
19use core::u64;
20
21/// An implementation of SipHash 1-3.
22///
23/// See: <https://www.aumasson.jp/siphash/siphash.pdf>
24#[derive(Debug, Clone, Copy, Default)]
25#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
26pub struct SipHasher13 {
27    hasher: Hasher<Sip13Rounds>,
28}
29
30/// An implementation of SipHash 2-4.
31///
32/// See: <https://www.aumasson.jp/siphash/siphash.pdf>
33#[derive(Debug, Clone, Copy, Default)]
34#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
35pub struct SipHasher24 {
36    hasher: Hasher<Sip24Rounds>,
37}
38
39/// An implementation of SipHash 2-4.
40///
41/// See: <https://www.aumasson.jp/siphash/siphash.pdf>
42///
43/// SipHash is a general-purpose hashing function: it runs at a good
44/// speed (competitive with Spooky and City) and permits strong _keyed_
45/// hashing. This lets you key your hashtables from a strong RNG, such as
46/// [`rand::os::OsRng`](https://doc.rust-lang.org/rand/rand/os/struct.OsRng.html).
47///
48/// Although the SipHash algorithm is considered to be generally strong,
49/// it is not intended for cryptographic purposes. As such, all
50/// cryptographic uses of this implementation are _strongly discouraged_.
51#[derive(Debug, Clone, Copy, Default)]
52#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
53pub struct SipHasher(SipHasher24);
54
55#[derive(Debug, Clone, Copy)]
56#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
57struct Hasher<S: Sip> {
58    k0: u64,
59    k1: u64,
60    length: usize, // how many bytes we've processed
61    state: State,  // hash State
62    tail: u64,     // unprocessed bytes le
63    ntail: usize,  // how many bytes in tail are valid
64    _marker: PhantomData<S>,
65}
66
67#[derive(Debug, Clone, Copy)]
68#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
69struct State {
70    // v0, v2 and v1, v3 show up in pairs in the algorithm,
71    // and simd implementations of SipHash will use vectors
72    // of v02 and v13. By placing them in this order in the struct,
73    // the compiler can pick up on just a few simd optimizations by itself.
74    v0: u64,
75    v2: u64,
76    v1: u64,
77    v3: u64,
78}
79
80macro_rules! compress {
81    ($state:expr) => {{
82        compress!($state.v0, $state.v1, $state.v2, $state.v3)
83    }};
84    ($v0:expr, $v1:expr, $v2:expr, $v3:expr) => {{
85        $v0 = $v0.wrapping_add($v1);
86        $v1 = $v1.rotate_left(13);
87        $v1 ^= $v0;
88        $v0 = $v0.rotate_left(32);
89        $v2 = $v2.wrapping_add($v3);
90        $v3 = $v3.rotate_left(16);
91        $v3 ^= $v2;
92        $v0 = $v0.wrapping_add($v3);
93        $v3 = $v3.rotate_left(21);
94        $v3 ^= $v0;
95        $v2 = $v2.wrapping_add($v1);
96        $v1 = $v1.rotate_left(17);
97        $v1 ^= $v2;
98        $v2 = $v2.rotate_left(32);
99    }};
100}
101
102/// Loads an integer of the desired type from a byte stream, in LE order. Uses
103/// `copy_nonoverlapping` to let the compiler generate the most efficient way
104/// to load it from a possibly unaligned address.
105///
106/// Unsafe because: unchecked indexing at `i..i+size_of(int_ty)`
107macro_rules! load_int_le {
108    ($buf:expr, $i:expr, $int_ty:ident) => {{
109        debug_assert!($i + mem::size_of::<$int_ty>() <= $buf.len());
110        let mut data = 0 as $int_ty;
111        ptr::copy_nonoverlapping(
112            $buf.as_ptr().add($i),
113            &mut data as *mut _ as *mut u8,
114            mem::size_of::<$int_ty>(),
115        );
116        data.to_le()
117    }};
118}
119
120/// Loads a u64 using up to 7 bytes of a byte slice. It looks clumsy but the
121/// `copy_nonoverlapping` calls that occur (via `load_int_le!`) all have fixed
122/// sizes and avoid calling `memcpy`, which is good for speed.
123///
124/// Unsafe because: unchecked indexing at start..start+len
125#[inline]
126unsafe fn u8to64_le(buf: &[u8], start: usize, len: usize) -> u64 {
127    debug_assert!(len < 8);
128    let mut i = 0; // current byte index (from LSB) in the output u64
129    let mut out = 0;
130    if i + 3 < len {
131        out = load_int_le!(buf, start + i, u32) as u64;
132        i += 4;
133    }
134    if i + 1 < len {
135        out |= (load_int_le!(buf, start + i, u16) as u64) << (i * 8);
136        i += 2
137    }
138    if i < len {
139        out |= (*buf.get_unchecked(start + i) as u64) << (i * 8);
140        i += 1;
141    }
142    debug_assert_eq!(i, len);
143    out
144}
145
146impl SipHasher {
147    /// Creates a new `SipHasher` with the two initial keys set to 0.
148    #[inline]
149    pub const fn new() -> SipHasher {
150        SipHasher::new_with_keys(0, 0)
151    }
152
153    /// Creates a `SipHasher` that is keyed off the provided keys.
154    #[inline]
155    pub const fn new_with_keys(key0: u64, key1: u64) -> SipHasher {
156        SipHasher(SipHasher24::new_with_keys(key0, key1))
157    }
158
159    /// Creates a `SipHasher` from a 16 byte key.
160    pub fn new_with_key(key: &[u8; 16]) -> SipHasher {
161        let mut b0 = [0u8; 8];
162        let mut b1 = [0u8; 8];
163        b0.copy_from_slice(&key[0..8]);
164        b1.copy_from_slice(&key[8..16]);
165        let key0 = u64::from_le_bytes(b0);
166        let key1 = u64::from_le_bytes(b1);
167        Self::new_with_keys(key0, key1)
168    }
169
170    /// Get the keys used by this hasher
171    pub const fn keys(&self) -> (u64, u64) {
172        (self.0.hasher.k0, self.0.hasher.k1)
173    }
174
175    /// Get the key used by this hasher as a 16 byte vector
176    pub fn key(&self) -> [u8; 16] {
177        let mut bytes = [0u8; 16];
178        bytes[0..8].copy_from_slice(&self.0.hasher.k0.to_le_bytes());
179        bytes[8..16].copy_from_slice(&self.0.hasher.k1.to_le_bytes());
180        bytes
181    }
182
183    /// Hash a byte array - This is the easiest and safest way to use SipHash.
184    #[inline]
185    pub fn hash(&self, bytes: &[u8]) -> u64 {
186        let mut hasher = self.0.hasher;
187        hasher.write(bytes);
188        hasher.finish()
189    }
190}
191
192impl SipHasher13 {
193    /// Creates a new `SipHasher13` with the two initial keys set to 0.
194    #[inline]
195    pub const fn new() -> SipHasher13 {
196        SipHasher13::new_with_keys(0, 0)
197    }
198
199    /// Creates a `SipHasher13` that is keyed off the provided keys.
200    #[inline]
201    pub const fn new_with_keys(key0: u64, key1: u64) -> SipHasher13 {
202        SipHasher13 {
203            hasher: Hasher::new_with_keys(key0, key1),
204        }
205    }
206
207    /// Creates a `SipHasher13` from a 16 byte key.
208    pub fn new_with_key(key: &[u8; 16]) -> SipHasher13 {
209        let mut b0 = [0u8; 8];
210        let mut b1 = [0u8; 8];
211        b0.copy_from_slice(&key[0..8]);
212        b1.copy_from_slice(&key[8..16]);
213        let key0 = u64::from_le_bytes(b0);
214        let key1 = u64::from_le_bytes(b1);
215        Self::new_with_keys(key0, key1)
216    }
217
218    /// Get the keys used by this hasher
219    pub const fn keys(&self) -> (u64, u64) {
220        (self.hasher.k0, self.hasher.k1)
221    }
222
223    /// Get the key used by this hasher as a 16 byte vector
224    pub fn key(&self) -> [u8; 16] {
225        let mut bytes = [0u8; 16];
226        bytes[0..8].copy_from_slice(&self.hasher.k0.to_le_bytes());
227        bytes[8..16].copy_from_slice(&self.hasher.k1.to_le_bytes());
228        bytes
229    }
230
231    /// Hash a byte array - This is the easiest and safest way to use SipHash.
232    #[inline]
233    pub fn hash(&self, bytes: &[u8]) -> u64 {
234        let mut hasher = self.hasher;
235        hasher.write(bytes);
236        hasher.finish()
237    }
238}
239
240impl SipHasher24 {
241    /// Creates a new `SipHasher24` with the two initial keys set to 0.
242    #[inline]
243    pub const fn new() -> SipHasher24 {
244        SipHasher24::new_with_keys(0, 0)
245    }
246
247    /// Creates a `SipHasher24` that is keyed off the provided keys.
248    #[inline]
249    pub const fn new_with_keys(key0: u64, key1: u64) -> SipHasher24 {
250        SipHasher24 {
251            hasher: Hasher::new_with_keys(key0, key1),
252        }
253    }
254
255    /// Creates a `SipHasher24` from a 16 byte key.
256    pub fn new_with_key(key: &[u8; 16]) -> SipHasher24 {
257        let mut b0 = [0u8; 8];
258        let mut b1 = [0u8; 8];
259        b0.copy_from_slice(&key[0..8]);
260        b1.copy_from_slice(&key[8..16]);
261        let key0 = u64::from_le_bytes(b0);
262        let key1 = u64::from_le_bytes(b1);
263        Self::new_with_keys(key0, key1)
264    }
265
266    /// Get the keys used by this hasher
267    pub const fn keys(&self) -> (u64, u64) {
268        (self.hasher.k0, self.hasher.k1)
269    }
270
271    /// Get the key used by this hasher as a 16 byte vector
272    pub fn key(&self) -> [u8; 16] {
273        let mut bytes = [0u8; 16];
274        bytes[0..8].copy_from_slice(&self.hasher.k0.to_le_bytes());
275        bytes[8..16].copy_from_slice(&self.hasher.k1.to_le_bytes());
276        bytes
277    }
278
279    /// Hash a byte array - This is the easiest and safest way to use SipHash.
280    #[inline]
281    pub fn hash(&self, bytes: &[u8]) -> u64 {
282        let mut hasher = self.hasher;
283        hasher.write(bytes);
284        hasher.finish()
285    }
286}
287
288impl<S: Sip> Hasher<S> {
289    #[inline]
290    const fn new_with_keys(key0: u64, key1: u64) -> Hasher<S> {
291        let mut state = Hasher {
292            k0: key0,
293            k1: key1,
294            length: 0,
295            state: State {
296                v0: 0,
297                v1: 0,
298                v2: 0,
299                v3: 0,
300            },
301            tail: 0,
302            ntail: 0,
303            _marker: PhantomData,
304        };
305        state = state.reset();
306        state
307    }
308
309    #[inline]
310    #[must_use]
311    const fn reset(mut self) -> Self {
312        self.length = 0;
313        self.state.v0 = self.k0 ^ 0x736f6d6570736575;
314        self.state.v1 = self.k1 ^ 0x646f72616e646f6d;
315        self.state.v2 = self.k0 ^ 0x6c7967656e657261;
316        self.state.v3 = self.k1 ^ 0x7465646279746573;
317        self.ntail = 0;
318        self
319    }
320
321    // A specialized write function for values with size <= 8.
322    //
323    // The hashing of multi-byte integers depends on endianness. E.g.:
324    // - little-endian: `write_u32(0xDDCCBBAA)` == `write([0xAA, 0xBB, 0xCC, 0xDD])`
325    // - big-endian:    `write_u32(0xDDCCBBAA)` == `write([0xDD, 0xCC, 0xBB, 0xAA])`
326    //
327    // This function does the right thing for little-endian hardware. On
328    // big-endian hardware `x` must be byte-swapped first to give the right
329    // behaviour. After any byte-swapping, the input must be zero-extended to
330    // 64-bits. The caller is responsible for the byte-swapping and
331    // zero-extension.
332    #[inline]
333    fn short_write<T>(&mut self, _x: T, x: u64) {
334        let size = mem::size_of::<T>();
335        self.length += size;
336
337        // The original number must be zero-extended, not sign-extended.
338        debug_assert!(if size < 8 { x >> (8 * size) == 0 } else { true });
339
340        // The number of bytes needed to fill `self.tail`.
341        let needed = 8 - self.ntail;
342
343        self.tail |= x << (8 * self.ntail);
344        if size < needed {
345            self.ntail += size;
346            return;
347        }
348
349        // `self.tail` is full, process it.
350        self.state.v3 ^= self.tail;
351        S::c_rounds(&mut self.state);
352        self.state.v0 ^= self.tail;
353
354        self.ntail = size - needed;
355        self.tail = if needed < 8 { x >> (8 * needed) } else { 0 };
356    }
357}
358
359impl hash::Hasher for SipHasher {
360    #[inline]
361    fn write(&mut self, msg: &[u8]) {
362        self.0.write(msg)
363    }
364
365    #[inline]
366    fn finish(&self) -> u64 {
367        self.0.finish()
368    }
369
370    #[inline]
371    fn write_usize(&mut self, i: usize) {
372        self.0.write_usize(i);
373    }
374
375    #[inline]
376    fn write_u8(&mut self, i: u8) {
377        self.0.write_u8(i);
378    }
379
380    #[inline]
381    fn write_u16(&mut self, i: u16) {
382        self.0.write_u16(i);
383    }
384
385    #[inline]
386    fn write_u32(&mut self, i: u32) {
387        self.0.write_u32(i);
388    }
389
390    #[inline]
391    fn write_u64(&mut self, i: u64) {
392        self.0.write_u64(i);
393    }
394}
395
396impl hash::Hasher for SipHasher13 {
397    #[inline]
398    fn write(&mut self, msg: &[u8]) {
399        self.hasher.write(msg)
400    }
401
402    #[inline]
403    fn finish(&self) -> u64 {
404        self.hasher.finish()
405    }
406
407    #[inline]
408    fn write_usize(&mut self, i: usize) {
409        self.hasher.write_usize(i);
410    }
411
412    #[inline]
413    fn write_u8(&mut self, i: u8) {
414        self.hasher.write_u8(i);
415    }
416
417    #[inline]
418    fn write_u16(&mut self, i: u16) {
419        self.hasher.write_u16(i);
420    }
421
422    #[inline]
423    fn write_u32(&mut self, i: u32) {
424        self.hasher.write_u32(i);
425    }
426
427    #[inline]
428    fn write_u64(&mut self, i: u64) {
429        self.hasher.write_u64(i);
430    }
431}
432
433impl hash::Hasher for SipHasher24 {
434    #[inline]
435    fn write(&mut self, msg: &[u8]) {
436        self.hasher.write(msg)
437    }
438
439    #[inline]
440    fn finish(&self) -> u64 {
441        self.hasher.finish()
442    }
443
444    #[inline]
445    fn write_usize(&mut self, i: usize) {
446        self.hasher.write_usize(i);
447    }
448
449    #[inline]
450    fn write_u8(&mut self, i: u8) {
451        self.hasher.write_u8(i);
452    }
453
454    #[inline]
455    fn write_u16(&mut self, i: u16) {
456        self.hasher.write_u16(i);
457    }
458
459    #[inline]
460    fn write_u32(&mut self, i: u32) {
461        self.hasher.write_u32(i);
462    }
463
464    #[inline]
465    fn write_u64(&mut self, i: u64) {
466        self.hasher.write_u64(i);
467    }
468}
469
470impl<S: Sip> hash::Hasher for Hasher<S> {
471    #[inline]
472    fn write_usize(&mut self, i: usize) {
473        self.short_write(i, i.to_le() as u64);
474    }
475
476    #[inline]
477    fn write_u8(&mut self, i: u8) {
478        self.short_write(i, i as u64);
479    }
480
481    #[inline]
482    fn write_u32(&mut self, i: u32) {
483        self.short_write(i, i.to_le() as u64);
484    }
485
486    #[inline]
487    fn write_u64(&mut self, i: u64) {
488        self.short_write(i, i.to_le());
489    }
490
491    #[inline]
492    fn write(&mut self, msg: &[u8]) {
493        let length = msg.len();
494        self.length += length;
495
496        let mut needed = 0;
497
498        if self.ntail != 0 {
499            needed = 8 - self.ntail;
500            self.tail |= unsafe { u8to64_le(msg, 0, cmp::min(length, needed)) } << (8 * self.ntail);
501            if length < needed {
502                self.ntail += length;
503                return;
504            } else {
505                self.state.v3 ^= self.tail;
506                S::c_rounds(&mut self.state);
507                self.state.v0 ^= self.tail;
508                self.ntail = 0;
509            }
510        }
511
512        // Buffered tail is now flushed, process new input.
513        let len = length - needed;
514        let left = len & 0x7;
515
516        let mut i = needed;
517        while i < len - left {
518            let mi = unsafe { load_int_le!(msg, i, u64) };
519
520            self.state.v3 ^= mi;
521            S::c_rounds(&mut self.state);
522            self.state.v0 ^= mi;
523
524            i += 8;
525        }
526
527        self.tail = unsafe { u8to64_le(msg, i, left) };
528        self.ntail = left;
529    }
530
531    #[inline]
532    fn finish(&self) -> u64 {
533        let mut state = self.state;
534
535        let b: u64 = ((self.length as u64 & 0xff) << 56) | self.tail;
536
537        state.v3 ^= b;
538        S::c_rounds(&mut state);
539        state.v0 ^= b;
540
541        state.v2 ^= 0xff;
542        S::d_rounds(&mut state);
543
544        state.v0 ^ state.v1 ^ state.v2 ^ state.v3
545    }
546}
547
548impl<S: Sip> Default for Hasher<S> {
549    /// Creates a `Hasher<S>` with the two initial keys set to 0.
550    #[inline]
551    fn default() -> Hasher<S> {
552        Hasher::new_with_keys(0, 0)
553    }
554}
555
556#[doc(hidden)]
557trait Sip {
558    fn c_rounds(_: &mut State);
559    fn d_rounds(_: &mut State);
560}
561
562#[derive(Debug, Clone, Copy, Default)]
563struct Sip13Rounds;
564
565impl Sip for Sip13Rounds {
566    #[inline]
567    fn c_rounds(state: &mut State) {
568        compress!(state);
569    }
570
571    #[inline]
572    fn d_rounds(state: &mut State) {
573        compress!(state);
574        compress!(state);
575        compress!(state);
576    }
577}
578
579#[derive(Debug, Clone, Copy, Default)]
580struct Sip24Rounds;
581
582impl Sip for Sip24Rounds {
583    #[inline]
584    fn c_rounds(state: &mut State) {
585        compress!(state);
586        compress!(state);
587    }
588
589    #[inline]
590    fn d_rounds(state: &mut State) {
591        compress!(state);
592        compress!(state);
593        compress!(state);
594        compress!(state);
595    }
596}