tenthash/
lib.rs

1//! TentHash is a high-quality, non-cryptographic, 160-bit hash function.  It is
2//! also portable, easy to implement, and reasonably fast.
3//!
4//! TentHash's target applications are data fingerprinting, content-addressable
5//! systems, and other use cases that don't tolerate hash collisions.
6//!
7//! Importantly, TentHash is explicitly *not* intended to stand up to attacks,
8//! and should never be used where the choice of hash function has security
9//! considerations.  Its robustness against collisions is only meaningful under
10//! non-adversarial conditions.  In other words, like a good tent, it will
11//! protect you from the elements, but will do very little to protect you from
12//! attackers.
13//!
14//!
15//! # Example
16//!
17//! ```rust
18//! let hash = tenthash::hash("Hello world!");
19//!
20//! assert_eq!(&hash[..4], &[0x15, 0x5f, 0xa, 0x35]);
21//! ```
22
23#![no_std]
24#![forbid(unsafe_code)]
25
26const DIGEST_SIZE: usize = 160 / 8; // Digest size, in bytes.
27const BLOCK_SIZE: usize = 256 / 8; // Internal block size of the hash, in bytes.
28
29/// Computes TentHash in one go for a contiguous slice of data.
30///
31/// # Example
32///
33/// ```rust
34/// let hash = tenthash::hash("Hello world!");
35///
36/// assert_eq!(&hash[..4], &[0x15, 0x5f, 0xa, 0x35]);
37/// ```
38pub fn hash(data: impl AsRef<[u8]>) -> [u8; DIGEST_SIZE] {
39    let mut state = [
40        0x5d6daffc4411a967,
41        0xe22d4dea68577f34,
42        0xca50864d814cbc2e,
43        0x894e29b9611eb173,
44    ];
45
46    let mut data = data.as_ref();
47    let message_bit_length = data.len() as u64 * 8;
48
49    // Process full-size chunks.
50    while data.len() >= BLOCK_SIZE {
51        xor_data_into_state(&mut state, data);
52        mix_state(&mut state);
53        data = &data[BLOCK_SIZE..];
54    }
55
56    // Process any remaining data if needed.
57    if !data.is_empty() {
58        let mut buffer = [0u8; BLOCK_SIZE];
59        (&mut buffer[..data.len()]).copy_from_slice(data);
60        xor_data_into_state(&mut state, &buffer);
61        mix_state(&mut state);
62    }
63
64    // Incorporate the message length (in bits) and do the
65    // final mixing.
66    state[0] ^= message_bit_length;
67    mix_state(&mut state);
68    mix_state(&mut state);
69
70    // Get the digest as a byte array and return it.
71    let mut digest = [0u8; DIGEST_SIZE];
72    digest[0..8].copy_from_slice(&state[0].to_le_bytes());
73    digest[8..16].copy_from_slice(&state[1].to_le_bytes());
74    digest[16..20].copy_from_slice(&state[2].to_le_bytes()[0..4]);
75    return digest;
76}
77
78/// Computes TentHash incrementally, taking input data in chunks.
79///
80/// The hash output is unaffected by how the input data is split into chunks.
81/// For example, the data can be passed a byte at a time or all at once, and the
82/// hash output will be the same.
83///
84/// However, chunk size does affect performance, with larger chunks being
85/// faster.
86///
87/// # Example
88///
89/// ```rust
90/// # use tenthash::TentHash;
91/// // As one chunk.
92/// let mut hasher1 = TentHash::new();
93/// hasher1.update("Hello world!");
94/// let hash1 = hasher1.finalize();
95///
96/// // As multiple chunks.
97/// let mut hasher2 = TentHash::new();
98/// hasher2.update("Hello");
99/// hasher2.update(" world!");
100/// let hash2 = hasher2.finalize();
101///
102/// assert_eq!(&hash1[..4], &[0x15, 0x5f, 0xa, 0x35]);
103/// assert_eq!(hash1, hash2);
104/// ```
105#[derive(Debug, Copy, Clone)]
106pub struct TentHash {
107    state: [u64; 4],       // Hash state.
108    buf: [u8; BLOCK_SIZE], // Accumulates message data for processing.
109    buf_length: usize,     // The number of message bytes currently stored in buf[].
110    message_length: u64,   // Accumulates the total message length, in bytes.
111}
112
113impl TentHash {
114    pub fn new() -> TentHash {
115        TentHash {
116            state: [
117                0x5d6daffc4411a967,
118                0xe22d4dea68577f34,
119                0xca50864d814cbc2e,
120                0x894e29b9611eb173,
121            ],
122            buf: [0; BLOCK_SIZE],
123            buf_length: 0,
124            message_length: 0,
125        }
126    }
127
128    /// Appends data to the data stream being hashed.
129    ///
130    /// Call this repeatedly to incrementally append more and more data.
131    pub fn update(&mut self, data: impl AsRef<[u8]>) {
132        let mut data = data.as_ref();
133        self.message_length += data.len() as u64;
134
135        while !data.is_empty() {
136            if self.buf_length == 0 && data.len() >= BLOCK_SIZE {
137                // Process data directly, skipping the buffer.
138                xor_data_into_state(&mut self.state, data);
139                mix_state(&mut self.state);
140                data = &data[BLOCK_SIZE..];
141            } else if self.buf_length == BLOCK_SIZE {
142                // Process the filled buffer.
143                xor_data_into_state(&mut self.state, &self.buf);
144                mix_state(&mut self.state);
145                self.buf_length = 0;
146            } else {
147                // Fill the buffer.
148                let n = (BLOCK_SIZE - self.buf_length).min(data.len());
149                (&mut self.buf[self.buf_length..(self.buf_length + n)]).copy_from_slice(&data[..n]);
150                data = &data[n..];
151                self.buf_length += n;
152            }
153        }
154    }
155
156    /// Finalizes the hash and returns the digest.
157    pub fn finalize(mut self) -> [u8; DIGEST_SIZE] {
158        // Hash the remaining bytes if there are any.
159        if self.buf_length > 0 {
160            (&mut self.buf[self.buf_length..]).fill(0); // Pad with zeros as needed.
161            xor_data_into_state(&mut self.state, &self.buf);
162            mix_state(&mut self.state);
163        }
164
165        // Incorporate the message length (in bits) and do the
166        // final mixing.
167        self.state[0] ^= self.message_length * 8;
168        mix_state(&mut self.state);
169        mix_state(&mut self.state);
170
171        // Get the digest as a byte array and return it.
172        let mut digest = [0u8; DIGEST_SIZE];
173        digest[0..8].copy_from_slice(&self.state[0].to_le_bytes());
174        digest[8..16].copy_from_slice(&self.state[1].to_le_bytes());
175        digest[16..20].copy_from_slice(&self.state[2].to_le_bytes()[0..4]);
176        return digest;
177    }
178}
179
180/// Xor message data into the hash state.
181///
182/// The data must be at least 32 bytes long, and only the first 32 bytes are
183/// used.
184#[inline(always)]
185fn xor_data_into_state(state: &mut [u64; 4], data: &[u8]) {
186    // Convert the data to native endian u64's and xor into the hash state.
187    assert!(data.len() >= BLOCK_SIZE);
188    state[0] ^= u64::from_le_bytes((&data[0..8]).try_into().unwrap());
189    state[1] ^= u64::from_le_bytes((&data[8..16]).try_into().unwrap());
190    state[2] ^= u64::from_le_bytes((&data[16..24]).try_into().unwrap());
191    state[3] ^= u64::from_le_bytes((&data[24..32]).try_into().unwrap());
192}
193
194/// Mixes the passed hash state.
195///
196/// Running this on the hash state once results in 179 bits of diffusion.
197/// Running it twice achieves full 256-bit diffusion.
198#[inline(always)]
199fn mix_state(state: &mut [u64; 4]) {
200    const ROTATIONS: &[[u32; 2]] = &[
201        [16, 28],
202        [14, 57],
203        [11, 22],
204        [35, 34],
205        [57, 16],
206        [59, 40],
207        [44, 13],
208    ];
209
210    for rot_pair in ROTATIONS.iter() {
211        state[0] = state[0].wrapping_add(state[2]);
212        state[2] = state[2].rotate_left(rot_pair[0]) ^ state[0];
213        state[1] = state[1].wrapping_add(state[3]);
214        state[3] = state[3].rotate_left(rot_pair[1]) ^ state[1];
215
216        state.swap(0, 1);
217    }
218}
219
220/// A helper trait with convenience methods for TentHash's output digest.
221pub trait DigestExt {
222    /// Truncates the digest to 128 bits, returned as an array of 16 bytes.
223    fn to_16_bytes(self) -> [u8; 16];
224
225    /// Truncates the digest to 128 bits, returned as a [`u128`].
226    ///
227    /// The digest bytes are interpreted as little endian.
228    fn to_u128(self) -> u128;
229}
230
231impl DigestExt for [u8; 20] {
232    #[inline(always)]
233    fn to_16_bytes(self) -> [u8; 16] {
234        (&self[0..16]).try_into().unwrap()
235    }
236
237    #[inline(always)]
238    fn to_u128(self) -> u128 {
239        u128::from_le_bytes(self.to_16_bytes())
240    }
241}