tenthash/lib.rs
1//! TentHash is a high-quality, non-cryptographic, 160-bit hash function. It is
2//! also portable, easy to implement, and reasonably fast.
3//!
4//! TentHash's target applications are data fingerprinting, content-addressable
5//! systems, and other use cases that don't tolerate hash collisions.
6//!
7//! Importantly, TentHash is explicitly *not* intended to stand up to attacks,
8//! and should never be used where the choice of hash function has security
9//! considerations. Its robustness against collisions is only meaningful under
10//! non-adversarial conditions. In other words, like a good tent, it will
11//! protect you from the elements, but will do very little to protect you from
12//! attackers.
13//!
14//!
15//! # Example
16//!
17//! ```rust
18//! let hash = tenthash::hash("Hello world!");
19//!
20//! assert_eq!(&hash[..4], &[0x15, 0x5f, 0xa, 0x35]);
21//! ```
22
23#![no_std]
24#![forbid(unsafe_code)]
25
26const DIGEST_SIZE: usize = 160 / 8; // Digest size, in bytes.
27const BLOCK_SIZE: usize = 256 / 8; // Internal block size of the hash, in bytes.
28
29/// Computes TentHash in one go for a contiguous slice of data.
30///
31/// # Example
32///
33/// ```rust
34/// let hash = tenthash::hash("Hello world!");
35///
36/// assert_eq!(&hash[..4], &[0x15, 0x5f, 0xa, 0x35]);
37/// ```
38pub fn hash(data: impl AsRef<[u8]>) -> [u8; DIGEST_SIZE] {
39 let mut state = [
40 0x5d6daffc4411a967,
41 0xe22d4dea68577f34,
42 0xca50864d814cbc2e,
43 0x894e29b9611eb173,
44 ];
45
46 let mut data = data.as_ref();
47 let message_bit_length = data.len() as u64 * 8;
48
49 // Process full-size chunks.
50 while data.len() >= BLOCK_SIZE {
51 xor_data_into_state(&mut state, data);
52 mix_state(&mut state);
53 data = &data[BLOCK_SIZE..];
54 }
55
56 // Process any remaining data if needed.
57 if !data.is_empty() {
58 let mut buffer = [0u8; BLOCK_SIZE];
59 (&mut buffer[..data.len()]).copy_from_slice(data);
60 xor_data_into_state(&mut state, &buffer);
61 mix_state(&mut state);
62 }
63
64 // Incorporate the message length (in bits) and do the
65 // final mixing.
66 state[0] ^= message_bit_length;
67 mix_state(&mut state);
68 mix_state(&mut state);
69
70 // Get the digest as a byte array and return it.
71 let mut digest = [0u8; DIGEST_SIZE];
72 digest[0..8].copy_from_slice(&state[0].to_le_bytes());
73 digest[8..16].copy_from_slice(&state[1].to_le_bytes());
74 digest[16..20].copy_from_slice(&state[2].to_le_bytes()[0..4]);
75 return digest;
76}
77
78/// Computes TentHash incrementally, taking input data in chunks.
79///
80/// The hash output is unaffected by how the input data is split into chunks.
81/// For example, the data can be passed a byte at a time or all at once, and the
82/// hash output will be the same.
83///
84/// However, chunk size does affect performance, with larger chunks being
85/// faster.
86///
87/// # Example
88///
89/// ```rust
90/// # use tenthash::TentHash;
91/// // As one chunk.
92/// let mut hasher1 = TentHash::new();
93/// hasher1.update("Hello world!");
94/// let hash1 = hasher1.finalize();
95///
96/// // As multiple chunks.
97/// let mut hasher2 = TentHash::new();
98/// hasher2.update("Hello");
99/// hasher2.update(" world!");
100/// let hash2 = hasher2.finalize();
101///
102/// assert_eq!(&hash1[..4], &[0x15, 0x5f, 0xa, 0x35]);
103/// assert_eq!(hash1, hash2);
104/// ```
105#[derive(Debug, Copy, Clone)]
106pub struct TentHash {
107 state: [u64; 4], // Hash state.
108 buf: [u8; BLOCK_SIZE], // Accumulates message data for processing.
109 buf_length: usize, // The number of message bytes currently stored in buf[].
110 message_length: u64, // Accumulates the total message length, in bytes.
111}
112
113impl TentHash {
114 pub fn new() -> TentHash {
115 TentHash {
116 state: [
117 0x5d6daffc4411a967,
118 0xe22d4dea68577f34,
119 0xca50864d814cbc2e,
120 0x894e29b9611eb173,
121 ],
122 buf: [0; BLOCK_SIZE],
123 buf_length: 0,
124 message_length: 0,
125 }
126 }
127
128 /// Appends data to the data stream being hashed.
129 ///
130 /// Call this repeatedly to incrementally append more and more data.
131 pub fn update(&mut self, data: impl AsRef<[u8]>) {
132 let mut data = data.as_ref();
133 self.message_length += data.len() as u64;
134
135 while !data.is_empty() {
136 if self.buf_length == 0 && data.len() >= BLOCK_SIZE {
137 // Process data directly, skipping the buffer.
138 xor_data_into_state(&mut self.state, data);
139 mix_state(&mut self.state);
140 data = &data[BLOCK_SIZE..];
141 } else if self.buf_length == BLOCK_SIZE {
142 // Process the filled buffer.
143 xor_data_into_state(&mut self.state, &self.buf);
144 mix_state(&mut self.state);
145 self.buf_length = 0;
146 } else {
147 // Fill the buffer.
148 let n = (BLOCK_SIZE - self.buf_length).min(data.len());
149 (&mut self.buf[self.buf_length..(self.buf_length + n)]).copy_from_slice(&data[..n]);
150 data = &data[n..];
151 self.buf_length += n;
152 }
153 }
154 }
155
156 /// Finalizes the hash and returns the digest.
157 pub fn finalize(mut self) -> [u8; DIGEST_SIZE] {
158 // Hash the remaining bytes if there are any.
159 if self.buf_length > 0 {
160 (&mut self.buf[self.buf_length..]).fill(0); // Pad with zeros as needed.
161 xor_data_into_state(&mut self.state, &self.buf);
162 mix_state(&mut self.state);
163 }
164
165 // Incorporate the message length (in bits) and do the
166 // final mixing.
167 self.state[0] ^= self.message_length * 8;
168 mix_state(&mut self.state);
169 mix_state(&mut self.state);
170
171 // Get the digest as a byte array and return it.
172 let mut digest = [0u8; DIGEST_SIZE];
173 digest[0..8].copy_from_slice(&self.state[0].to_le_bytes());
174 digest[8..16].copy_from_slice(&self.state[1].to_le_bytes());
175 digest[16..20].copy_from_slice(&self.state[2].to_le_bytes()[0..4]);
176 return digest;
177 }
178}
179
180/// Xor message data into the hash state.
181///
182/// The data must be at least 32 bytes long, and only the first 32 bytes are
183/// used.
184#[inline(always)]
185fn xor_data_into_state(state: &mut [u64; 4], data: &[u8]) {
186 // Convert the data to native endian u64's and xor into the hash state.
187 assert!(data.len() >= BLOCK_SIZE);
188 state[0] ^= u64::from_le_bytes((&data[0..8]).try_into().unwrap());
189 state[1] ^= u64::from_le_bytes((&data[8..16]).try_into().unwrap());
190 state[2] ^= u64::from_le_bytes((&data[16..24]).try_into().unwrap());
191 state[3] ^= u64::from_le_bytes((&data[24..32]).try_into().unwrap());
192}
193
194/// Mixes the passed hash state.
195///
196/// Running this on the hash state once results in 179 bits of diffusion.
197/// Running it twice achieves full 256-bit diffusion.
198#[inline(always)]
199fn mix_state(state: &mut [u64; 4]) {
200 const ROTATIONS: &[[u32; 2]] = &[
201 [16, 28],
202 [14, 57],
203 [11, 22],
204 [35, 34],
205 [57, 16],
206 [59, 40],
207 [44, 13],
208 ];
209
210 for rot_pair in ROTATIONS.iter() {
211 state[0] = state[0].wrapping_add(state[2]);
212 state[2] = state[2].rotate_left(rot_pair[0]) ^ state[0];
213 state[1] = state[1].wrapping_add(state[3]);
214 state[3] = state[3].rotate_left(rot_pair[1]) ^ state[1];
215
216 state.swap(0, 1);
217 }
218}
219
220/// A helper trait with convenience methods for TentHash's output digest.
221pub trait DigestExt {
222 /// Truncates the digest to 128 bits, returned as an array of 16 bytes.
223 fn to_16_bytes(self) -> [u8; 16];
224
225 /// Truncates the digest to 128 bits, returned as a [`u128`].
226 ///
227 /// The digest bytes are interpreted as little endian.
228 fn to_u128(self) -> u128;
229}
230
231impl DigestExt for [u8; 20] {
232 #[inline(always)]
233 fn to_16_bytes(self) -> [u8; 16] {
234 (&self[0..16]).try_into().unwrap()
235 }
236
237 #[inline(always)]
238 fn to_u128(self) -> u128 {
239 u128::from_le_bytes(self.to_16_bytes())
240 }
241}