const_siphasher/
sip.rs

1// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
2// file at the top-level directory of this distribution and at
3// http://rust-lang.org/COPYRIGHT.
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
10
11//! An implementation of SipHash.
12
13use core::hash;
14use core::marker::PhantomData;
15use core::mem;
16use core::ptr;
17
18use crate::{Sip, Sip13Rounds, Sip24Rounds};
19
20/// An implementation of SipHash 1-3.
21///
22/// See: <https://www.aumasson.jp/siphash/siphash.pdf>
23#[derive(Debug, Clone, Copy, Default)]
24#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
25pub struct SipHasher13 {
26    hasher: Hasher<Sip13Rounds>,
27}
28
29/// An implementation of SipHash 2-4.
30///
31/// See: <https://www.aumasson.jp/siphash/siphash.pdf>
32#[derive(Debug, Clone, Copy, Default)]
33#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
34pub struct SipHasher24 {
35    hasher: Hasher<Sip24Rounds>,
36}
37
38/// An implementation of SipHash 2-4.
39///
40/// See: <https://www.aumasson.jp/siphash/siphash.pdf>
41///
42/// SipHash is a general-purpose hashing function: it runs at a good
43/// speed (competitive with Spooky and City) and permits strong _keyed_
44/// hashing. This lets you key your hashtables from a strong RNG, such as
45/// [`rand::os::OsRng`](https://doc.rust-lang.org/rand/rand/os/struct.OsRng.html).
46///
47/// Although the SipHash algorithm is considered to be generally strong,
48/// it is not intended for cryptographic purposes. As such, all
49/// cryptographic uses of this implementation are _strongly discouraged_.
50#[derive(Debug, Clone, Copy, Default)]
51#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
52pub struct SipHasher(SipHasher24);
53
54#[derive(Debug, Clone, Copy)]
55#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
56struct Hasher<S: Sip> {
57    k0: u64,
58    k1: u64,
59    length: usize, // how many bytes we've processed
60    state: State,  // hash State
61    tail: u64,     // unprocessed bytes le
62    ntail: usize,  // how many bytes in tail are valid
63    _marker: PhantomData<S>,
64}
65
66#[derive(Debug, Clone, Copy)]
67#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
68struct State {
69    // v0, v2 and v1, v3 show up in pairs in the algorithm,
70    // and simd implementations of SipHash will use vectors
71    // of v02 and v13. By placing them in this order in the struct,
72    // the compiler can pick up on just a few simd optimizations by itself.
73    v0: u64,
74    v2: u64,
75    v1: u64,
76    v3: u64,
77}
78
79macro_rules! compress {
80    ($state:expr) => {{
81        compress!($state.v0, $state.v1, $state.v2, $state.v3)
82    }};
83    ($v0:expr, $v1:expr, $v2:expr, $v3:expr) => {{
84        $v0 = $v0.wrapping_add($v1);
85        $v1 = $v1.rotate_left(13);
86        $v1 ^= $v0;
87        $v0 = $v0.rotate_left(32);
88        $v2 = $v2.wrapping_add($v3);
89        $v3 = $v3.rotate_left(16);
90        $v3 ^= $v2;
91        $v0 = $v0.wrapping_add($v3);
92        $v3 = $v3.rotate_left(21);
93        $v3 ^= $v0;
94        $v2 = $v2.wrapping_add($v1);
95        $v1 = $v1.rotate_left(17);
96        $v1 ^= $v2;
97        $v2 = $v2.rotate_left(32);
98    }};
99}
100
101/// Loads an integer of the desired type from a byte stream, in LE order. Uses
102/// `copy_nonoverlapping` to let the compiler generate the most efficient way
103/// to load it from a possibly unaligned address.
104///
105/// Unsafe because: unchecked indexing at `i..i+size_of(int_ty)`
106macro_rules! load_int_le {
107    ($buf:expr, $i:expr, $int_ty:ident) => {{
108        debug_assert!($i + mem::size_of::<$int_ty>() <= $buf.len());
109        let mut data = 0 as $int_ty;
110        ptr::copy_nonoverlapping(
111            $buf.as_ptr().add($i),
112            &mut data as *mut _ as *mut u8,
113            mem::size_of::<$int_ty>(),
114        );
115        data.to_le()
116    }};
117}
118
119/// Loads a u64 using up to 7 bytes of a byte slice. It looks clumsy but the
120/// `copy_nonoverlapping` calls that occur (via `load_int_le!`) all have fixed
121/// sizes and avoid calling `memcpy`, which is good for speed.
122///
123/// Unsafe because: unchecked indexing at start..start+len
124#[inline]
125const unsafe fn u8to64_le(buf: &[u8], start: usize, len: usize) -> u64 {
126    debug_assert!(len < 8);
127    let mut i = 0; // current byte index (from LSB) in the output u64
128    let mut out = 0;
129    if i + 3 < len {
130        out = load_int_le!(buf, start + i, u32) as u64;
131        i += 4;
132    }
133    if i + 1 < len {
134        out |= (load_int_le!(buf, start + i, u16) as u64) << (i * 8);
135        i += 2
136    }
137    if i < len {
138        out |= (ptr::read(buf.as_ptr().add(start + i)) as u64) << (i * 8);
139        i += 1;
140    }
141    debug_assert!(i == len);
142    out
143}
144
145impl SipHasher {
146    /// Creates a new `SipHasher` with the two initial keys set to 0.
147    #[inline]
148    pub const fn new() -> SipHasher {
149        SipHasher::new_with_keys(0, 0)
150    }
151
152    /// Creates a `SipHasher` that is keyed off the provided keys.
153    #[inline]
154    pub const fn new_with_keys(key0: u64, key1: u64) -> SipHasher {
155        SipHasher(SipHasher24::new_with_keys(key0, key1))
156    }
157
158    /// Creates a `SipHasher` from a 16 byte key.
159    pub fn new_with_key(key: &[u8; 16]) -> SipHasher {
160        let mut b0 = [0u8; 8];
161        let mut b1 = [0u8; 8];
162        b0.copy_from_slice(&key[0..8]);
163        b1.copy_from_slice(&key[8..16]);
164        let key0 = u64::from_le_bytes(b0);
165        let key1 = u64::from_le_bytes(b1);
166        Self::new_with_keys(key0, key1)
167    }
168
169    /// Get the keys used by this hasher
170    pub const fn keys(&self) -> (u64, u64) {
171        (self.0.hasher.k0, self.0.hasher.k1)
172    }
173
174    /// Get the key used by this hasher as a 16 byte vector
175    pub fn key(&self) -> [u8; 16] {
176        let mut bytes = [0u8; 16];
177        bytes[0..8].copy_from_slice(&self.0.hasher.k0.to_le_bytes());
178        bytes[8..16].copy_from_slice(&self.0.hasher.k1.to_le_bytes());
179        bytes
180    }
181
182    /// Hash a byte array - This is the easiest and safest way to use SipHash.
183    #[inline]
184    pub const fn hash(&self, bytes: &[u8]) -> u64 {
185        let mut hasher = self.0.hasher;
186        hasher.write(bytes);
187        hasher.finish()
188    }
189}
190
191impl SipHasher13 {
192    /// Creates a new `SipHasher13` with the two initial keys set to 0.
193    #[inline]
194    pub const fn new() -> SipHasher13 {
195        SipHasher13::new_with_keys(0, 0)
196    }
197
198    /// Creates a `SipHasher13` that is keyed off the provided keys.
199    #[inline]
200    pub const fn new_with_keys(key0: u64, key1: u64) -> SipHasher13 {
201        SipHasher13 {
202            hasher: Hasher::new_with_keys(key0, key1),
203        }
204    }
205
206    /// Creates a `SipHasher13` from a 16 byte key.
207    pub fn new_with_key(key: &[u8; 16]) -> SipHasher13 {
208        let mut b0 = [0u8; 8];
209        let mut b1 = [0u8; 8];
210        b0.copy_from_slice(&key[0..8]);
211        b1.copy_from_slice(&key[8..16]);
212        let key0 = u64::from_le_bytes(b0);
213        let key1 = u64::from_le_bytes(b1);
214        Self::new_with_keys(key0, key1)
215    }
216
217    /// Get the keys used by this hasher
218    pub const fn keys(&self) -> (u64, u64) {
219        (self.hasher.k0, self.hasher.k1)
220    }
221
222    /// Get the key used by this hasher as a 16 byte vector
223    pub fn key(&self) -> [u8; 16] {
224        let mut bytes = [0u8; 16];
225        bytes[0..8].copy_from_slice(&self.hasher.k0.to_le_bytes());
226        bytes[8..16].copy_from_slice(&self.hasher.k1.to_le_bytes());
227        bytes
228    }
229
230    /// Hash a byte array - This is the easiest and safest way to use SipHash.
231    #[inline]
232    pub const fn hash(&self, bytes: &[u8]) -> u64 {
233        let mut hasher = self.hasher;
234        hasher.write(bytes);
235        hasher.finish()
236    }
237}
238
239impl SipHasher24 {
240    /// Creates a new `SipHasher24` with the two initial keys set to 0.
241    #[inline]
242    pub const fn new() -> SipHasher24 {
243        SipHasher24::new_with_keys(0, 0)
244    }
245
246    /// Creates a `SipHasher24` that is keyed off the provided keys.
247    #[inline]
248    pub const fn new_with_keys(key0: u64, key1: u64) -> SipHasher24 {
249        SipHasher24 {
250            hasher: Hasher::new_with_keys(key0, key1),
251        }
252    }
253
254    /// Creates a `SipHasher24` from a 16 byte key.
255    pub fn new_with_key(key: &[u8; 16]) -> SipHasher24 {
256        let mut b0 = [0u8; 8];
257        let mut b1 = [0u8; 8];
258        b0.copy_from_slice(&key[0..8]);
259        b1.copy_from_slice(&key[8..16]);
260        let key0 = u64::from_le_bytes(b0);
261        let key1 = u64::from_le_bytes(b1);
262        Self::new_with_keys(key0, key1)
263    }
264
265    /// Get the keys used by this hasher
266    pub const fn keys(&self) -> (u64, u64) {
267        (self.hasher.k0, self.hasher.k1)
268    }
269
270    /// Get the key used by this hasher as a 16 byte vector
271    pub fn key(&self) -> [u8; 16] {
272        let mut bytes = [0u8; 16];
273        bytes[0..8].copy_from_slice(&self.hasher.k0.to_le_bytes());
274        bytes[8..16].copy_from_slice(&self.hasher.k1.to_le_bytes());
275        bytes
276    }
277
278    /// Hash a byte array - This is the easiest and safest way to use SipHash.
279    #[inline]
280    pub const fn hash(&self, bytes: &[u8]) -> u64 {
281        let mut hasher = self.hasher;
282        hasher.write(bytes);
283        hasher.finish()
284    }
285}
286
287impl<S: Sip> Hasher<S> {
288    #[inline]
289    const fn new_with_keys(key0: u64, key1: u64) -> Hasher<S> {
290        let mut state = Hasher {
291            k0: key0,
292            k1: key1,
293            length: 0,
294            state: State {
295                v0: 0,
296                v1: 0,
297                v2: 0,
298                v3: 0,
299            },
300            tail: 0,
301            ntail: 0,
302            _marker: PhantomData,
303        };
304        state = state.reset();
305        state
306    }
307
308    #[inline]
309    #[must_use]
310    const fn reset(mut self) -> Self {
311        self.length = 0;
312        self.state.v0 = self.k0 ^ 0x736f6d6570736575;
313        self.state.v1 = self.k1 ^ 0x646f72616e646f6d;
314        self.state.v2 = self.k0 ^ 0x6c7967656e657261;
315        self.state.v3 = self.k1 ^ 0x7465646279746573;
316        self.ntail = 0;
317        self
318    }
319
320    // A specialized write function for values with size <= 8.
321    //
322    // The hashing of multi-byte integers depends on endianness. E.g.:
323    // - little-endian: `write_u32(0xDDCCBBAA)` == `write([0xAA, 0xBB, 0xCC, 0xDD])`
324    // - big-endian:    `write_u32(0xDDCCBBAA)` == `write([0xDD, 0xCC, 0xBB, 0xAA])`
325    //
326    // This function does the right thing for little-endian hardware. On
327    // big-endian hardware `x` must be byte-swapped first to give the right
328    // behaviour. After any byte-swapping, the input must be zero-extended to
329    // 64-bits. The caller is responsible for the byte-swapping and
330    // zero-extension.
331    #[inline]
332    const fn short_write<T>(&mut self, _x: &T, x: u64) {
333        let size = mem::size_of::<T>();
334        self.length += size;
335
336        // The original number must be zero-extended, not sign-extended.
337        debug_assert!(if size < 8 { x >> (8 * size) == 0 } else { true });
338
339        // The number of bytes needed to fill `self.tail`.
340        let needed = 8 - self.ntail;
341
342        self.tail |= x << (8 * self.ntail);
343        if size < needed {
344            self.ntail += size;
345            return;
346        }
347
348        // `self.tail` is full, process it.
349        self.state.v3 ^= self.tail;
350        self.state = c_rounds::<S>(self.state);
351        self.state.v0 ^= self.tail;
352
353        self.ntail = size - needed;
354        self.tail = if needed < 8 { x >> (8 * needed) } else { 0 };
355    }
356}
357
358impl hash::Hasher for SipHasher {
359    #[inline]
360    fn write(&mut self, msg: &[u8]) {
361        self.write(msg)
362    }
363
364    #[inline]
365    fn finish(&self) -> u64 {
366        self.finish()
367    }
368
369    #[inline]
370    fn write_usize(&mut self, i: usize) {
371        self.write_usize(i);
372    }
373
374    #[inline]
375    fn write_u8(&mut self, i: u8) {
376        self.write_u8(i);
377    }
378
379    #[inline]
380    fn write_u16(&mut self, i: u16) {
381        self.write_u16(i);
382    }
383
384    #[inline]
385    fn write_u32(&mut self, i: u32) {
386        self.write_u32(i);
387    }
388
389    #[inline]
390    fn write_u64(&mut self, i: u64) {
391        self.write_u64(i);
392    }
393}
394
395impl SipHasher {
396    #[inline]
397    pub const fn write(&mut self, msg: &[u8]) {
398        self.0.write(msg)
399    }
400
401    #[inline]
402    pub const fn finish(&self) -> u64 {
403        self.0.finish()
404    }
405
406    #[inline]
407    pub const fn write_usize(&mut self, i: usize) {
408        self.0.write_usize(i);
409    }
410
411    #[inline]
412    pub const fn write_u8(&mut self, i: u8) {
413        self.0.write_u8(i);
414    }
415
416    #[inline]
417    pub const fn write_u16(&mut self, i: u16) {
418        self.0.write_u16(i);
419    }
420
421    #[inline]
422    pub const fn write_u32(&mut self, i: u32) {
423        self.0.write_u32(i);
424    }
425
426    #[inline]
427    pub const fn write_u64(&mut self, i: u64) {
428        self.0.write_u64(i);
429    }
430}
431
432impl hash::Hasher for SipHasher13 {
433    #[inline]
434    fn write(&mut self, msg: &[u8]) {
435        self.write(msg)
436    }
437
438    #[inline]
439    fn finish(&self) -> u64 {
440        self.finish()
441    }
442
443    #[inline]
444    fn write_usize(&mut self, i: usize) {
445        self.write_usize(i);
446    }
447
448    #[inline]
449    fn write_u8(&mut self, i: u8) {
450        self.write_u8(i);
451    }
452
453    #[inline]
454    fn write_u16(&mut self, i: u16) {
455        self.write_u16(i);
456    }
457
458    #[inline]
459    fn write_u32(&mut self, i: u32) {
460        self.write_u32(i);
461    }
462
463    #[inline]
464    fn write_u64(&mut self, i: u64) {
465        self.write_u64(i);
466    }
467}
468
469impl SipHasher13 {
470    #[inline]
471    pub const fn write(&mut self, msg: &[u8]) {
472        self.hasher.write(msg)
473    }
474
475    #[inline]
476    pub const fn finish(&self) -> u64 {
477        self.hasher.finish()
478    }
479
480    #[inline]
481    pub const fn write_usize(&mut self, i: usize) {
482        self.hasher.write_usize(i);
483    }
484
485    #[inline]
486    pub const fn write_u8(&mut self, i: u8) {
487        self.hasher.write_u8(i);
488    }
489
490    #[inline]
491    pub const fn write_u16(&mut self, i: u16) {
492        self.hasher.write_u16(i);
493    }
494
495    #[inline]
496    pub const fn write_u32(&mut self, i: u32) {
497        self.hasher.write_u32(i);
498    }
499
500    #[inline]
501    pub const fn write_u64(&mut self, i: u64) {
502        self.hasher.write_u64(i);
503    }
504}
505
506impl hash::Hasher for SipHasher24 {
507    #[inline]
508    fn write(&mut self, msg: &[u8]) {
509        self.write(msg)
510    }
511
512    #[inline]
513    fn finish(&self) -> u64 {
514        self.finish()
515    }
516
517    #[inline]
518    fn write_usize(&mut self, i: usize) {
519        self.write_usize(i);
520    }
521
522    #[inline]
523    fn write_u8(&mut self, i: u8) {
524        self.write_u8(i);
525    }
526
527    #[inline]
528    fn write_u16(&mut self, i: u16) {
529        self.write_u16(i);
530    }
531
532    #[inline]
533    fn write_u32(&mut self, i: u32) {
534        self.write_u32(i);
535    }
536
537    #[inline]
538    fn write_u64(&mut self, i: u64) {
539        self.write_u64(i);
540    }
541}
542
543impl SipHasher24 {
544    #[inline]
545    pub const fn write(&mut self, msg: &[u8]) {
546        self.hasher.write(msg)
547    }
548
549    #[inline]
550    pub const fn finish(&self) -> u64 {
551        self.hasher.finish()
552    }
553
554    #[inline]
555    pub const fn write_usize(&mut self, i: usize) {
556        self.hasher.write_usize(i);
557    }
558
559    #[inline]
560    pub const fn write_u8(&mut self, i: u8) {
561        self.hasher.write_u8(i);
562    }
563
564    #[inline]
565    pub const fn write_u16(&mut self, i: u16) {
566        self.hasher.write_u16(i);
567    }
568
569    #[inline]
570    pub const fn write_u32(&mut self, i: u32) {
571        self.hasher.write_u32(i);
572    }
573
574    #[inline]
575    pub const fn write_u64(&mut self, i: u64) {
576        self.hasher.write_u64(i);
577    }
578}
579
580impl<S: Sip> Hasher<S> {
581    #[inline]
582    const fn write_usize(&mut self, i: usize) {
583        self.short_write(&i, i.to_le() as u64);
584    }
585
586    #[inline]
587    const fn write_u8(&mut self, i: u8) {
588        self.short_write(&i, i as u64);
589    }
590
591    #[inline]
592    const fn write_u16(&mut self, i: u16) {
593        self.short_write(&i, i.to_le() as u64);
594    }
595
596    #[inline]
597    const fn write_u32(&mut self, i: u32) {
598        self.short_write(&i, i.to_le() as u64);
599    }
600
601    #[inline]
602    const fn write_u64(&mut self, i: u64) {
603        self.short_write(&i, i.to_le());
604    }
605
606    #[inline]
607    const fn write(&mut self, msg: &[u8]) {
608        let length = msg.len();
609        self.length += length;
610
611        let mut needed = 0;
612
613        if self.ntail != 0 {
614            needed = 8 - self.ntail;
615            if length < needed {
616                self.tail |= unsafe { u8to64_le(msg, 0, length) } << (8 * self.ntail);
617                self.ntail += length;
618                return;
619            } else {
620                self.tail |= unsafe { u8to64_le(msg, 0, needed) } << (8 * self.ntail);
621                self.state.v3 ^= self.tail;
622                self.state = c_rounds::<S>(self.state);
623                self.state.v0 ^= self.tail;
624                self.ntail = 0;
625            }
626        }
627
628        // Buffered tail is now flushed, process new input.
629        let len = length - needed;
630        let left = len & 0x7;
631
632        let mut i = needed;
633        while i < len - left {
634            let mi = unsafe { load_int_le!(msg, i, u64) };
635
636            self.state.v3 ^= mi;
637            self.state = c_rounds::<S>(self.state);
638            self.state.v0 ^= mi;
639
640            i += 8;
641        }
642
643        self.tail = unsafe { u8to64_le(msg, i, left) };
644        self.ntail = left;
645    }
646
647    #[inline]
648    const fn finish(&self) -> u64 {
649        let mut state = self.state;
650
651        let b: u64 = ((self.length as u64 & 0xff) << 56) | self.tail;
652
653        state.v3 ^= b;
654        state = c_rounds::<S>(state);
655        state.v0 ^= b;
656
657        state.v2 ^= 0xff;
658        state = d_rounds::<S>(state);
659
660        state.v0 ^ state.v1 ^ state.v2 ^ state.v3
661    }
662}
663
664impl<S: Sip> Default for Hasher<S> {
665    /// Creates a `Hasher<S>` with the two initial keys set to 0.
666    #[inline]
667    fn default() -> Hasher<S> {
668        Hasher::new_with_keys(0, 0)
669    }
670}
671
672const fn c_rounds<S: Sip>(mut state: State) -> State {
673    let mut i = 0;
674    while i < S::C_ROUNDS {
675        compress!(state);
676        i += 1;
677    }
678    state
679}
680
681const fn d_rounds<S: Sip>(mut state: State) -> State {
682    let mut i = 0;
683    while i < S::D_ROUNDS {
684        compress!(state);
685        i += 1;
686    }
687    state
688}