autobahn_hash/
lib.rs

1#![feature(portable_simd)]
2#![cfg_attr(docsrs, feature(doc_cfg))]
3#![cfg_attr(not(feature = "std"), no_std)]
4#![deny(unsafe_code)]
5//! A pure Rust implementation of [HighwayHash](https://github.com/google/highwayhash).
6//!
7//! A few highlights:
8//! * No `unsafe`
9//! * Fuzzed against the reference implementation
10//! * Minimal crate with few required dependencies
11//! * Portable to any SIMD instruction set (and reasonably fast without SIMD)
12//!
13//! This crate requires the `portable_simd` nightly feature.
14//!
15//! There are two optional features:
16//! * `std`: enables [`RandomState`]
17//! * `multiversion`: enables [`hash_64`] and friends, which select the optimal instruction set at
18//! runtime
19//!   * Adds the `multiversion` crate as a dependency
20//!   * Also enables the `std` feature
21
22use core::simd::{simd_swizzle, u32x8, u64x4, u8x32};
23
24/// A hash instance.
25///
26/// For maximum performance, use this hasher in a larger code block compiled with SIMD target
27/// features enabled.
28#[derive(Clone, Debug)]
29pub struct AutobahnHasher {
30    v0: u64x4,
31    v1: u64x4,
32    mul0: u64x4,
33    mul1: u64x4,
34}
35
36impl Default for AutobahnHasher {
37    fn default() -> Self {
38        Self::new()
39    }
40}
41
42fn zipper_merge(x: u64x4) -> u64x4 {
43    const INDEX: [usize; 32] = {
44        let half_index = [3, 12, 2, 5, 14, 1, 15, 0, 11, 4, 10, 13, 9, 6, 8, 7];
45        let mut index = [0; 32];
46        let mut i = 0;
47        while i < 32 {
48            index[i] = i / 16 * 16 + half_index[i % 16];
49            i += 1;
50        }
51        index
52    };
53
54    let x: u8x32 = bytemuck::cast(x);
55    bytemuck::cast(simd_swizzle!(x, INDEX))
56}
57
58fn permute(x: u64x4) -> u64x4 {
59    let x: u32x8 = bytemuck::cast(x);
60    let x = simd_swizzle!(x, [5, 4, 7, 6, 1, 0, 3, 2]);
61    bytemuck::cast(x)
62}
63
64fn remainder(bytes: &[u8]) -> [u8; 32] {
65    let mut packet: [u8; 32] = [0u8; 32];
66    let size_mod4 = bytes.len() & 3;
67    let remaining = bytes.len() & !3;
68    let size = bytes.len() as u64;
69
70    packet[..remaining].copy_from_slice(&bytes[..remaining]);
71    if size & 16 != 0 {
72        packet[28..32].copy_from_slice(&bytes[remaining + size_mod4 - 4..remaining + size_mod4]);
73    } else if size_mod4 != 0 {
74        let remainder = &bytes[remaining..];
75        packet[16] = remainder[0];
76        packet[16 + 1] = remainder[size_mod4 >> 1];
77        packet[16 + 2] = remainder[size_mod4 - 1];
78    }
79
80    packet
81}
82
83fn mul_lo_hi(lo: u64x4, hi: u64x4) -> u64x4 {
84    if cfg!(all(target_arch = "aarch64", not(target_feature = "sve"))) {
85        let lo = simd_swizzle!(bytemuck::cast::<_, u32x8>(lo), [0, 2, 4, 6]);
86        let hi = simd_swizzle!(bytemuck::cast::<_, u32x8>(hi), [1, 3, 5, 7]);
87        lo.cast::<u64>() * hi.cast::<u64>()
88    } else {
89        (lo & u64x4::splat(0xffffffff)) * (hi >> u64x4::splat(32))
90    }
91}
92
93fn modular_reduction(a3_unmasked: u64, a2: u64, a1: u64, a0: u64) -> (u64, u64) {
94    let a3 = a3_unmasked & 0x3fffffffffffffff;
95    (
96        a1 ^ ((a3 << 1) | (a2 >> 63)) ^ ((a3 << 2) | (a2 >> 62)),
97        a0 ^ (a2 << 1) ^ (a2 << 2),
98    )
99}
100
101impl AutobahnHasher {
102    /// Create a new `AutobahnHasher`.
103    pub fn new() -> Self {
104        Self::new_with_key([0; 4])
105    }
106
107    /// Create a new `AutobahnHasher` with the given key.
108    pub fn new_with_key(key: [u64; 4]) -> Self {
109        let key = u64x4::from_array(key);
110        let mul0 = u64x4::from_array([
111            0xdbe6d5d5fe4cce2f,
112            0xa4093822299f31d0,
113            0x13198a2e03707344,
114            0x243f6a8885a308d3,
115        ]);
116        let mul1 = u64x4::from_array([
117            0x3bd39e10cb0ef593,
118            0xc0acf169b5f18a8c,
119            0xbe5466cf34e90c6c,
120            0x452821e638d01377,
121        ]);
122        let v0 = mul0 ^ key;
123        let v1 = mul1 ^ (key >> u64x4::splat(32) | key << u64x4::splat(32));
124        Self { v0, v1, mul0, mul1 }
125    }
126
127    fn write_simd(&mut self, packet: u64x4) {
128        self.v1 += self.mul0 + packet;
129        self.mul0 ^= mul_lo_hi(self.v1, self.v0);
130        self.v0 += self.mul1;
131        self.mul1 ^= mul_lo_hi(self.v0, self.v1);
132        self.v0 += zipper_merge(self.v1);
133        self.v1 += zipper_merge(self.v0);
134    }
135
136    /// Write a packet of data to the hasher.
137    pub fn write_packet(&mut self, packet: [u64; 4]) {
138        let packet = u64x4::from_array(packet);
139        self.write_simd(packet);
140    }
141
142    /// Write a packet of data to the hasher, in the form of bytes.
143    pub fn write_bytes(&mut self, bytes: [u8; 32]) {
144        let mut packet = [0; 4];
145        for (i, chunk) in bytes.chunks(8).enumerate() {
146            packet[i] = u64::from_le_bytes(chunk.try_into().unwrap());
147        }
148        self.write_packet(packet);
149    }
150
151    fn finish(&mut self, remainder: &[u8]) {
152        fn rotate_32_by(count: u64, lanes: &mut u64x4) {
153            for lane in lanes.as_mut_array() {
154                let half0: u32 = *lane as u32;
155                let half1: u32 = (*lane >> 32) as u32;
156                *lane = u64::from((half0 << count) | (half0 >> (32 - count)));
157                *lane |= u64::from((half1 << count) | (half1 >> (32 - count))) << 32;
158            }
159        }
160
161        assert!(remainder.len() < 32, "remainder must be less than 32 bytes");
162        if !remainder.is_empty() {
163            let size = remainder.len() as u64;
164            self.v0 += u64x4::splat((size << 32) + size);
165            rotate_32_by(size, &mut self.v1);
166            self.write_bytes(self::remainder(remainder));
167        }
168    }
169
170    /// Produce a 64-bit hash.
171    ///
172    /// The `remainder` bytes must be less than a packet (less than 32 bytes).
173    ///
174    /// Writing the remainder is notably different than `Hasher::write`.  The remainder is padded
175    /// and permuted into a 32-byte packet.
176    ///
177    /// # Panics
178    /// Panics if remainder is not less than 32 bytes.
179    pub fn finish_64(mut self, remainder: &[u8]) -> u64 {
180        self.finish(remainder);
181        for _ in 0..4 {
182            self.write_packet(permute(self.v0).to_array());
183        }
184        self.v0[0]
185            .wrapping_add(self.v1[0])
186            .wrapping_add(self.mul0[0])
187            .wrapping_add(self.mul1[0])
188    }
189
190    /// Produce a 128-bit hash.
191    ///
192    /// The `remainder` bytes must be less than a packet (less than 32 bytes).
193    ///
194    /// Writing the remainder is notably different than `Hasher::write`.  The remainder is padded
195    /// and permuted into a 32-byte packet.
196    ///
197    /// # Panics
198    /// Panics if remainder is not less than 32 bytes.
199    pub fn finish_128(mut self, remainder: &[u8]) -> [u64; 2] {
200        self.finish(remainder);
201        for _ in 0..6 {
202            self.write_packet(permute(self.v0).to_array());
203        }
204
205        [
206            self.v0[0]
207                .wrapping_add(self.mul0[0])
208                .wrapping_add(self.v1[2])
209                .wrapping_add(self.mul1[2]),
210            self.v0[1]
211                .wrapping_add(self.mul0[1])
212                .wrapping_add(self.v1[3])
213                .wrapping_add(self.mul1[3]),
214        ]
215    }
216
217    /// Produce a 256-bit hash.
218    ///
219    /// The `remainder` bytes must be less than a packet (less than 32 bytes).
220    ///
221    /// Writing the remainder is notably different than `Hasher::write`.  The remainder is padded
222    /// and permuted into a 32-byte packet.
223    ///
224    /// # Panics
225    /// Panics if remainder is not less than 32 bytes.
226    pub fn finish_256(mut self, remainder: &[u8]) -> [u64; 4] {
227        self.finish(remainder);
228        for _ in 0..10 {
229            self.write_packet(permute(self.v0).to_array());
230        }
231
232        let (m1, m0) = modular_reduction(
233            self.v1[1].wrapping_add(self.mul1[1]),
234            self.v1[0].wrapping_add(self.mul1[0]),
235            self.v0[1].wrapping_add(self.mul0[1]),
236            self.v0[0].wrapping_add(self.mul0[0]),
237        );
238        let (m3, m2) = modular_reduction(
239            self.v1[3].wrapping_add(self.mul1[3]),
240            self.v1[2].wrapping_add(self.mul1[2]),
241            self.v0[3].wrapping_add(self.mul0[3]),
242            self.v0[2].wrapping_add(self.mul0[2]),
243        );
244        [m0, m1, m2, m3]
245    }
246}
247
248impl core::hash::Hasher for AutobahnHasher {
249    #[inline]
250    fn finish(&self) -> u64 {
251        self.clone().finish_64(&[])
252    }
253
254    #[inline]
255    fn write(&mut self, bytes: &[u8]) {
256        // `Hasher` requires calls to be exactly sequenced (e.g. two calls to `write` does not need
257        // to be the same as a single call to `write` with the same data concatenated).
258        // Therefore, we don't need to buffer and can simply pad bytes.
259        let (bytes, remainder) = bytes.split_at(bytes.len() / 32 * 32);
260        for packet in bytes.chunks(32) {
261            self.write_bytes(packet.try_into().unwrap())
262        }
263        let mut packet = [0; 32];
264        packet[..remainder.len()].copy_from_slice(remainder);
265        self.write_bytes(packet);
266    }
267
268    #[inline]
269    fn write_u8(&mut self, i: u8) {
270        self.write_u64(i as u64);
271    }
272
273    #[inline]
274    fn write_u16(&mut self, i: u16) {
275        self.write_u64(i as u64);
276    }
277
278    #[inline]
279    fn write_u32(&mut self, i: u32) {
280        self.write_u64(i as u64);
281    }
282
283    #[inline]
284    fn write_u64(&mut self, i: u64) {
285        self.write_simd(u64x4::splat(i));
286    }
287
288    #[inline]
289    fn write_usize(&mut self, i: usize) {
290        if core::mem::size_of::<usize>() > 8 {
291            self.write(&i.to_ne_bytes());
292        } else {
293            self.write_u64(i as u64);
294        }
295    }
296}
297
298/// Hash a slice with the given key.
299///
300/// This function dynamically selects the best instruction set at runtime.
301#[cfg(feature = "multiversion")]
302#[cfg_attr(docsrs, doc(cfg(feature = "multiversion")))]
303#[multiversion::multiversion(targets = "simd")]
304pub fn hash_64(bytes: &[u8], key: [u64; 4]) -> u64 {
305    let mut hasher = AutobahnHasher::new_with_key(key);
306    let (bytes, remainder) = bytes.split_at(bytes.len() / 32 * 32);
307    for packet in bytes.chunks(32) {
308        hasher.write_bytes(packet.try_into().unwrap());
309    }
310    hasher.finish_64(remainder)
311}
312
313/// Hash a slice with the given key.
314///
315/// This function dynamically selects the best instruction set at runtime.
316#[cfg(feature = "multiversion")]
317#[cfg_attr(docsrs, doc(cfg(feature = "multiversion")))]
318#[multiversion::multiversion(targets = "simd")]
319pub fn hash_128(bytes: &[u8], key: [u64; 4]) -> [u64; 2] {
320    let mut hasher = AutobahnHasher::new_with_key(key);
321    let (bytes, remainder) = bytes.split_at(bytes.len() / 32 * 32);
322    for packet in bytes.chunks(32) {
323        hasher.write_bytes(packet.try_into().unwrap());
324    }
325    hasher.finish_128(remainder)
326}
327
328/// Hash a slice with the given key.
329///
330/// This function dynamically selects the best instruction set at runtime.
331#[cfg(feature = "multiversion")]
332#[cfg_attr(docsrs, doc(cfg(feature = "multiversion")))]
333#[multiversion::multiversion(targets = "simd")]
334pub fn hash_256(bytes: &[u8], key: [u64; 4]) -> [u64; 4] {
335    let mut hasher = AutobahnHasher::new_with_key(key);
336    let (bytes, remainder) = bytes.split_at(bytes.len() / 32 * 32);
337    for packet in bytes.chunks(32) {
338        hasher.write_bytes(packet.try_into().unwrap());
339    }
340    hasher.finish_256(remainder)
341}
342
343/// Build `AutobahnHasher`s with random keys.
344///
345/// Like [`std::collections::hash_map::RandomState`], but for [`AutobahnHasher`].
346#[cfg(feature = "std")]
347#[cfg_attr(docsrs, doc(cfg(feature = "std")))]
348#[derive(Clone, Debug)]
349pub struct RandomState {
350    key: [u64; 4],
351}
352
353#[cfg(feature = "std")]
354impl Default for RandomState {
355    fn default() -> Self {
356        Self::new()
357    }
358}
359
360#[cfg(feature = "std")]
361impl RandomState {
362    /// Constructs a new RandomState that is initialized with a random key.
363    pub fn new() -> Self {
364        use std::{
365            collections::hash_map,
366            hash::{BuildHasher, Hasher},
367        };
368        Self {
369            key: [
370                hash_map::RandomState::new().build_hasher().finish(),
371                hash_map::RandomState::new().build_hasher().finish(),
372                hash_map::RandomState::new().build_hasher().finish(),
373                hash_map::RandomState::new().build_hasher().finish(),
374            ],
375        }
376    }
377}
378
379#[cfg(feature = "std")]
380impl core::hash::BuildHasher for RandomState {
381    type Hasher = AutobahnHasher;
382
383    fn build_hasher(&self) -> Self::Hasher {
384        AutobahnHasher::new_with_key(self.key)
385    }
386}