1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
//! TentHash is a robust 160-bit non-cryptographic hash function.
//!
//! **WARNING:** TentHash's design is not yet finalized, and digest
//! results may change before 1.0 is declared.
//!
//! TentHash is intended to be used as a reasonably fast but (more
//! importantly) high-quality checksum for data identification.
//! Moreover, it has a simple design that is easy to understand and
//! straightforward to write conforming implementations of.
//!
//! TentHash is explicitly *not* intended to stand up to attacks.  Its
//! otherwise strong collision resistance is only meaningful under
//! non-adversarial conditions.  In other words, like a good tent, it
//! will protect you from the elements, but will do very little to
//! protect you from attackers.
//!
//! This implementation should work on platforms of any endianness,
//! but has only been tested on little endian platforms so far.
//! Running the test suite on a big-endian platform can verify.
//!
//! # Example
//!
//! ```rust
//! # use tenthash::TentHasher;
//! let mut hasher = TentHasher::new();
//! hasher.update("Hello world!");
//! let hash = hasher.finalize();
//!
//! assert_eq!(&hash[..4], &[0x30, 0xd0, 0x8a, 0x79]);
//! ```

#![no_std]

const DIGEST_SIZE: usize = 160 / 8; // Digest size, in bytes.
const BLOCK_SIZE: usize = 256 / 8; // Internal block size of the hash, in bytes.

/// The TentHash hasher.  Processes input bytes and outputs a TentHash digest.
#[derive(Debug, Copy, Clone)]
#[repr(C)]
#[repr(align(32))]
pub struct TentHasher {
    state: [u64; 4],       // Hash state.
    buf: [u8; BLOCK_SIZE], // Accumulates message data for processing when needed.
    buf_length: usize,     // The number of message bytes currently stored in buf[].
    message_length: u64,   // Accumulates the total message length, in bytes.
}

impl TentHasher {
    pub fn new() -> TentHasher {
        TentHasher {
            state: [
                0x5d6daffc4411a967,
                0xe22d4dea68577f34,
                0xca50864d814cbc2e,
                0x894e29b9611eb173,
            ],
            buf: [0; BLOCK_SIZE],
            buf_length: 0,
            message_length: 0,
        }
    }

    /// Appends data to the data stream being hashed.
    ///
    /// This can be called repeatedly to incrementally append more and more data.
    ///
    /// ```rust
    /// # use tenthash::TentHasher;
    /// // As one chunk.
    /// let mut hasher1 = TentHasher::new();
    /// hasher1.update("Hello world!");
    ///
    /// // As multiple chunks.
    /// let mut hasher2 = TentHasher::new();
    /// hasher2.update("Hello");
    /// hasher2.update(" world!");
    ///
    /// assert_eq!(hasher1.finalize(), hasher2.finalize());
    /// ```
    pub fn update(&mut self, data: impl AsRef<[u8]>) {
        let mut data = data.as_ref();
        self.message_length += data.len() as u64;

        while !data.is_empty() {
            if self.buf_length == 0 && data.len() >= BLOCK_SIZE {
                // Process data directly, skipping the buffer.
                xor_data_into_state(&mut self.state, data);
                mix_state(&mut self.state);
                data = &data[BLOCK_SIZE..];
            } else if self.buf_length == BLOCK_SIZE {
                // Process the filled buffer.
                xor_data_into_state(&mut self.state, &self.buf);
                mix_state(&mut self.state);
                self.buf_length = 0;
            } else {
                // Fill the buffer.
                let n = (BLOCK_SIZE - self.buf_length).min(data.len());
                (&mut self.buf[self.buf_length..(self.buf_length + n)]).copy_from_slice(&data[..n]);
                data = &data[n..];
                self.buf_length += n;
            }
        }
    }

    /// Finalizes the hash and returns the digest.
    pub fn finalize(mut self) -> [u8; DIGEST_SIZE] {
        // Hash the remaining bytes if there are any.
        if self.buf_length > 0 {
            (&mut self.buf[self.buf_length..]).fill(0); // Pad with zeros as needed.
            xor_data_into_state(&mut self.state, &self.buf);
            mix_state(&mut self.state);
        }

        // Incorporate the message length (in bits) and do the
        // final mixing.
        self.state[0] ^= self.message_length * 8;
        mix_state(&mut self.state);
        mix_state(&mut self.state);

        // Get the digest as a byte array and return it.
        let mut digest = [0u8; DIGEST_SIZE];
        digest[0..8].copy_from_slice(&self.state[0].to_le_bytes());
        digest[8..16].copy_from_slice(&self.state[1].to_le_bytes());
        digest[16..20].copy_from_slice(&self.state[2].to_le_bytes()[0..4]);
        return digest;
    }
}

/// Adds message data to the hash state.
///
/// The data must be at least 32 bytes long.  Only the first 32 bytes
/// are added.
#[inline(always)]
fn xor_data_into_state(state: &mut [u64; 4], data: &[u8]) {
    // Convert the data to native endian u64's and add to the
    // hash state.
    assert!(data.len() >= BLOCK_SIZE);
    state[0] ^= u64::from_le_bytes((&data[0..8]).try_into().unwrap());
    state[1] ^= u64::from_le_bytes((&data[8..16]).try_into().unwrap());
    state[2] ^= u64::from_le_bytes((&data[16..24]).try_into().unwrap());
    state[3] ^= u64::from_le_bytes((&data[24..32]).try_into().unwrap());
}

/// Mixes the passed hash state.
///
/// Running this on the hash state once is enough to diffuse the bits
/// equivalently to a full 170-bit diffusion.  Running it twice achieves
/// full 256-bit diffusion.
#[inline(always)]
fn mix_state(state: &mut [u64; 4]) {
    const ROTATIONS: &[[u32; 2]] = &[
        [51, 59],
        [25, 19],
        [8, 10],
        [35, 3],
        [45, 38],
        [61, 32],
        [23, 53],
    ];

    for rot_pair in ROTATIONS.iter() {
        state[0] = state[0].wrapping_add(state[2]);
        state[1] = state[1].wrapping_add(state[3]);
        state[2] = state[2].rotate_left(rot_pair[0]) ^ state[0];
        state[3] = state[3].rotate_left(rot_pair[1]) ^ state[1];

        state.swap(0, 1);
    }
}