Skip to main content

blake512_hash/
lib.rs

1// SPDX-License-Identifier: MIT
2//
3// Pure Rust implementation of the BLAKE-512 hash function.
4//
5// BLAKE is a cryptographic hash function that was one of the five finalists
6// in the NIST SHA-3 competition. This is the *original* BLAKE algorithm,
7// NOT BLAKE2 or BLAKE3. It is ported from the sphlib C implementation
8// by Thomas Pornin (MIT license, Projet RNRT SAPHIR).
9//
10// References:
11// - BLAKE specification: https://131002.net/blake/
12// - sphlib: https://www.saphir2.com/sphlib/
13
14#![no_std]
15#![deny(unsafe_code)]
16
17use digest::consts::U64;
18use digest::generic_array::GenericArray;
19use digest::{FixedOutput, HashMarker, OutputSizeUser, Reset, Update};
20
21pub use digest::Digest;
22
23// ---------------------------------------------------------------------------
24// Constants
25// ---------------------------------------------------------------------------
26
27/// Initial hash values for BLAKE-512 (same as SHA-512 IV).
28const IV512: [u64; 8] = [
29    0x6A09E667F3BCC908,
30    0xBB67AE8584CAA73B,
31    0x3C6EF372FE94F82B,
32    0xA54FF53A5F1D36F1,
33    0x510E527FADE682D1,
34    0x9B05688C2B3E6C1F,
35    0x1F83D9ABFB41BD6B,
36    0x5BE0CD19137E2179,
37];
38
39/// BLAKE constants: the first 16 fractional digits of pi, as 64-bit words.
40const CB: [u64; 16] = [
41    0x243F6A8885A308D3,
42    0x13198A2E03707344,
43    0xA4093822299F31D0,
44    0x082EFA98EC4E6C89,
45    0x452821E638D01377,
46    0xBE5466CF34E90C6C,
47    0xC0AC29B7C97C50DD,
48    0x3F84D5B5B5470917,
49    0x9216D5D98979FB1B,
50    0xD1310BA698DFB5AC,
51    0x2FFD72DBD01ADFB7,
52    0xB8E1AFED6A267E96,
53    0xBA7C9045F12C7F99,
54    0x24A19947B3916CF7,
55    0x0801F2E2858EFC16,
56    0x636920D871574E69,
57];
58
59/// Sigma permutation table (10 base permutations; BLAKE-512 uses 16 rounds,
60/// cycling through them as round % 10).
61const SIGMA: [[usize; 16]; 10] = [
62    [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
63    [14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3],
64    [11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4],
65    [7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8],
66    [9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13],
67    [2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9],
68    [12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11],
69    [13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10],
70    [6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5],
71    [10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0],
72];
73
74// ---------------------------------------------------------------------------
75// G function
76// ---------------------------------------------------------------------------
77
78/// The BLAKE-512 G mixing function.
79///
80/// Operates on four words of the working vector `v` at indices `a, b, c, d`,
81/// mixing in two message words `m0, m1` and two constants `c0, c1`.
82///
83/// Rotation amounts for the 64-bit variant: 32, 25, 16, 11.
84#[inline(always)]
85#[allow(clippy::too_many_arguments)]
86fn g(
87    v: &mut [u64; 16],
88    a: usize,
89    b: usize,
90    c: usize,
91    d: usize,
92    m0: u64,
93    m1: u64,
94    c0: u64,
95    c1: u64,
96) {
97    v[a] = v[a].wrapping_add(v[b]).wrapping_add(m0 ^ c1);
98    v[d] = (v[d] ^ v[a]).rotate_right(32);
99    v[c] = v[c].wrapping_add(v[d]);
100    v[b] = (v[b] ^ v[c]).rotate_right(25);
101    v[a] = v[a].wrapping_add(v[b]).wrapping_add(m1 ^ c0);
102    v[d] = (v[d] ^ v[a]).rotate_right(16);
103    v[c] = v[c].wrapping_add(v[d]);
104    v[b] = (v[b] ^ v[c]).rotate_right(11);
105}
106
107// ---------------------------------------------------------------------------
108// Compression function
109// ---------------------------------------------------------------------------
110
111/// Compress a single 128-byte block into the state.
112///
113/// `h` is the current chaining value (8 words), `s` is the salt (4 words),
114/// `t0` and `t1` form the 128-bit counter (in bits), and `block` is exactly
115/// 128 bytes of message data.
116fn compress(h: &mut [u64; 8], s: &[u64; 4], t0: u64, t1: u64, block: &[u8; 128]) {
117    // Parse the 128-byte block as 16 big-endian u64 message words.
118    let mut m = [0u64; 16];
119    for (word, chunk) in m.iter_mut().zip(block.chunks_exact(8)) {
120        *word = u64::from_be_bytes([
121            chunk[0], chunk[1], chunk[2], chunk[3], chunk[4], chunk[5], chunk[6], chunk[7],
122        ]);
123    }
124
125    // Initialize the 16-word working vector.
126    let mut v = [0u64; 16];
127    v[0] = h[0];
128    v[1] = h[1];
129    v[2] = h[2];
130    v[3] = h[3];
131    v[4] = h[4];
132    v[5] = h[5];
133    v[6] = h[6];
134    v[7] = h[7];
135    v[8] = s[0] ^ CB[0];
136    v[9] = s[1] ^ CB[1];
137    v[10] = s[2] ^ CB[2];
138    v[11] = s[3] ^ CB[3];
139    v[12] = t0 ^ CB[4];
140    v[13] = t0 ^ CB[5];
141    v[14] = t1 ^ CB[6];
142    v[15] = t1 ^ CB[7];
143
144    // 16 rounds of G (cycling through the 10 sigma permutations).
145    for r in 0..16 {
146        let s_row = &SIGMA[r % 10];
147
148        // Column step
149        g(
150            &mut v,
151            0,
152            4,
153            8,
154            12,
155            m[s_row[0]],
156            m[s_row[1]],
157            CB[s_row[0]],
158            CB[s_row[1]],
159        );
160        g(
161            &mut v,
162            1,
163            5,
164            9,
165            13,
166            m[s_row[2]],
167            m[s_row[3]],
168            CB[s_row[2]],
169            CB[s_row[3]],
170        );
171        g(
172            &mut v,
173            2,
174            6,
175            10,
176            14,
177            m[s_row[4]],
178            m[s_row[5]],
179            CB[s_row[4]],
180            CB[s_row[5]],
181        );
182        g(
183            &mut v,
184            3,
185            7,
186            11,
187            15,
188            m[s_row[6]],
189            m[s_row[7]],
190            CB[s_row[6]],
191            CB[s_row[7]],
192        );
193
194        // Diagonal step
195        g(
196            &mut v,
197            0,
198            5,
199            10,
200            15,
201            m[s_row[8]],
202            m[s_row[9]],
203            CB[s_row[8]],
204            CB[s_row[9]],
205        );
206        g(
207            &mut v,
208            1,
209            6,
210            11,
211            12,
212            m[s_row[10]],
213            m[s_row[11]],
214            CB[s_row[10]],
215            CB[s_row[11]],
216        );
217        g(
218            &mut v,
219            2,
220            7,
221            8,
222            13,
223            m[s_row[12]],
224            m[s_row[13]],
225            CB[s_row[12]],
226            CB[s_row[13]],
227        );
228        g(
229            &mut v,
230            3,
231            4,
232            9,
233            14,
234            m[s_row[14]],
235            m[s_row[15]],
236            CB[s_row[14]],
237            CB[s_row[15]],
238        );
239    }
240
241    // Finalize: H[i] ^= S[i%4] ^ v[i] ^ v[i+8]
242    h[0] ^= s[0] ^ v[0] ^ v[8];
243    h[1] ^= s[1] ^ v[1] ^ v[9];
244    h[2] ^= s[2] ^ v[2] ^ v[10];
245    h[3] ^= s[3] ^ v[3] ^ v[11];
246    h[4] ^= s[0] ^ v[4] ^ v[12];
247    h[5] ^= s[1] ^ v[5] ^ v[13];
248    h[6] ^= s[2] ^ v[6] ^ v[14];
249    h[7] ^= s[3] ^ v[7] ^ v[15];
250}
251
252// ---------------------------------------------------------------------------
253// Blake512 hasher
254// ---------------------------------------------------------------------------
255
256/// BLAKE-512 hash state.
257///
258/// Implements the [`digest::Digest`] trait, producing a 64-byte (512-bit) hash.
259///
260/// # Example
261///
262/// ```
263/// use blake512_hash::{Blake512, Digest};
264///
265/// let hash = Blake512::digest(b"hello");
266/// assert_eq!(hash.len(), 64);
267/// ```
268#[derive(Clone)]
269pub struct Blake512 {
270    /// Current chaining value (8 x u64).
271    h: [u64; 8],
272    /// Salt (always zero for standard hashing).
273    s: [u64; 4],
274    /// Low 64 bits of the bit counter.
275    t0: u64,
276    /// High 64 bits of the bit counter.
277    t1: u64,
278    /// Internal message buffer (128 bytes = one block).
279    buf: [u8; 128],
280    /// Number of valid bytes currently in `buf`.
281    buf_len: usize,
282}
283
284impl Blake512 {
285    /// Create a new BLAKE-512 hasher with the standard IV and zero salt.
286    pub fn new() -> Self {
287        Blake512 {
288            h: IV512,
289            s: [0u64; 4],
290            t0: 0,
291            t1: 0,
292            buf: [0u8; 128],
293            buf_len: 0,
294        }
295    }
296
297    /// Increment the bit counter by 1024 and compress the internal buffer.
298    fn compress_buffer(&mut self) {
299        let (new_t0, carry) = self.t0.overflowing_add(1024);
300        self.t0 = new_t0;
301        if carry {
302            self.t1 = self.t1.wrapping_add(1);
303        }
304
305        let block: [u8; 128] = self.buf;
306        compress(&mut self.h, &self.s, self.t0, self.t1, &block);
307    }
308
309    /// Feed data into the hasher.
310    fn update_inner(&mut self, mut data: &[u8]) {
311        if data.len() < 128 - self.buf_len {
312            self.buf[self.buf_len..self.buf_len + data.len()].copy_from_slice(data);
313            self.buf_len += data.len();
314            return;
315        }
316
317        while !data.is_empty() {
318            let space = 128 - self.buf_len;
319            let copy_len = space.min(data.len());
320            self.buf[self.buf_len..self.buf_len + copy_len].copy_from_slice(&data[..copy_len]);
321            self.buf_len += copy_len;
322            data = &data[copy_len..];
323
324            if self.buf_len == 128 {
325                self.compress_buffer();
326                self.buf_len = 0;
327            }
328        }
329    }
330
331    /// Finalize the hash and return the 64-byte digest.
332    fn finalize_inner(mut self) -> [u8; 64] {
333        let ptr = self.buf_len;
334        let bit_len = (ptr as u64) << 3;
335
336        let tl = self.t0.wrapping_add(bit_len);
337        let th = if tl < bit_len {
338            self.t1.wrapping_add(1)
339        } else {
340            self.t1
341        };
342
343        if ptr == 0 {
344            self.t0 = 0xFFFFFFFFFFFFFC00u64;
345            self.t1 = 0xFFFFFFFFFFFFFFFFu64;
346        } else if self.t0 == 0 {
347            self.t0 = 0xFFFFFFFFFFFFFC00u64.wrapping_add(bit_len);
348            self.t1 = self.t1.wrapping_sub(1);
349        } else {
350            self.t0 = self.t0.wrapping_sub(1024u64.wrapping_sub(bit_len));
351        }
352
353        if bit_len <= 894 {
354            // Single padding block.
355            self.buf[ptr] = 0x80;
356            if ptr < 111 {
357                for i in (ptr + 1)..111 {
358                    self.buf[i] = 0;
359                }
360                self.buf[111] = 0x01;
361            } else {
362                // ptr == 111: the 0x80 start-bit is at position 111,
363                // same as the 0x01 end-marker — combine them as 0x81.
364                self.buf[111] = 0x81;
365            }
366            self.buf[112..120].copy_from_slice(&th.to_be_bytes());
367            self.buf[120..128].copy_from_slice(&tl.to_be_bytes());
368
369            self.buf_len = 128;
370            let block: [u8; 128] = self.buf;
371            let (new_t0, carry) = self.t0.overflowing_add(1024);
372            self.t0 = new_t0;
373            if carry {
374                self.t1 = self.t1.wrapping_add(1);
375            }
376            compress(&mut self.h, &self.s, self.t0, self.t1, &block);
377        } else {
378            // Two padding blocks needed.
379            self.buf[ptr] = 0x80;
380            for i in (ptr + 1)..128 {
381                self.buf[i] = 0;
382            }
383            self.buf_len = 128;
384            let block1: [u8; 128] = self.buf;
385            let (new_t0, carry) = self.t0.overflowing_add(1024);
386            self.t0 = new_t0;
387            if carry {
388                self.t1 = self.t1.wrapping_add(1);
389            }
390            compress(&mut self.h, &self.s, self.t0, self.t1, &block1);
391
392            self.t0 = 0xFFFFFFFFFFFFFC00u64;
393            self.t1 = 0xFFFFFFFFFFFFFFFFu64;
394
395            self.buf = [0u8; 128];
396            self.buf[111] = 0x01;
397            self.buf[112..120].copy_from_slice(&th.to_be_bytes());
398            self.buf[120..128].copy_from_slice(&tl.to_be_bytes());
399
400            let block2: [u8; 128] = self.buf;
401            let (new_t0, carry) = self.t0.overflowing_add(1024);
402            self.t0 = new_t0;
403            if carry {
404                self.t1 = self.t1.wrapping_add(1);
405            }
406            compress(&mut self.h, &self.s, self.t0, self.t1, &block2);
407        }
408
409        let mut out = [0u8; 64];
410        for (i, word) in self.h.iter().enumerate() {
411            out[i * 8..(i + 1) * 8].copy_from_slice(&word.to_be_bytes());
412        }
413        out
414    }
415}
416
417// ---------------------------------------------------------------------------
418// digest trait implementations
419// ---------------------------------------------------------------------------
420
421impl Default for Blake512 {
422    fn default() -> Self {
423        Self::new()
424    }
425}
426
427impl Update for Blake512 {
428    fn update(&mut self, data: &[u8]) {
429        self.update_inner(data);
430    }
431}
432
433impl OutputSizeUser for Blake512 {
434    type OutputSize = U64;
435}
436
437impl FixedOutput for Blake512 {
438    fn finalize_into(self, out: &mut GenericArray<u8, Self::OutputSize>) {
439        let hash = self.finalize_inner();
440        out.copy_from_slice(&hash);
441    }
442}
443
444impl Reset for Blake512 {
445    fn reset(&mut self) {
446        *self = Self::new();
447    }
448}
449
450impl HashMarker for Blake512 {}
451
452// ---------------------------------------------------------------------------
453// Internal unit tests (access private fields for counter edge cases)
454// ---------------------------------------------------------------------------
455
456#[cfg(test)]
457mod tests {
458    use super::*;
459
460    #[test]
461    fn compress_buffer_counter_carry() {
462        // When t0 is near u64::MAX, adding 1024 should overflow and increment t1.
463        let mut h = Blake512::new();
464        h.t0 = 0xFFFFFFFFFFFFFC00; // Adding 1024 wraps to 0
465        h.t1 = 5;
466        h.buf = [0xAA; 128];
467        h.buf_len = 128;
468        h.compress_buffer();
469
470        assert_eq!(h.t0, 0, "t0 should wrap to 0");
471        assert_eq!(h.t1, 6, "t1 should increment on carry");
472    }
473
474    #[test]
475    fn compress_buffer_counter_no_carry() {
476        let mut h = Blake512::new();
477        h.t0 = 0;
478        h.t1 = 0;
479        h.buf = [0xBB; 128];
480        h.buf_len = 128;
481        h.compress_buffer();
482
483        assert_eq!(h.t0, 1024, "t0 should be 1024 after one block");
484        assert_eq!(h.t1, 0, "t1 should remain 0");
485    }
486
487    #[test]
488    fn finalize_branch_a2_ptr_gt0_t0_eq_zero() {
489        // Branch A2: ptr > 0, t0 == 0 (counter wrapped after prior blocks).
490        // This requires t0 to be exactly 0 at finalize with data in the buffer.
491        // Simulate: set t0=0, t1=1 (as if 2^54 blocks were processed), buf has 10 bytes.
492        let mut h = Blake512::new();
493        h.t0 = 0;
494        h.t1 = 1;
495        h.buf[..10].copy_from_slice(&[0xCC; 10]);
496        h.buf_len = 10;
497
498        let result = h.finalize_inner();
499
500        // We can't easily verify the exact hash without a reference, but we can
501        // verify the counter adjustment was applied correctly by checking the
502        // output is 64 bytes and deterministic.
503        assert_eq!(result.len(), 64);
504
505        // Re-create the same state and verify determinism.
506        let mut h2 = Blake512::new();
507        h2.t0 = 0;
508        h2.t1 = 1;
509        h2.buf[..10].copy_from_slice(&[0xCC; 10]);
510        h2.buf_len = 10;
511
512        assert_eq!(h2.finalize_inner(), result);
513    }
514
515    #[test]
516    fn finalize_tl_carry_propagation() {
517        // When t0 + bit_len overflows, th should be incremented.
518        let mut h = Blake512::new();
519        h.t0 = u64::MAX - 50; // t0 + bit_len(10 bytes = 80 bits) will overflow
520        h.t1 = 3;
521        h.buf[..10].copy_from_slice(&[0xDD; 10]);
522        h.buf_len = 10;
523
524        let result = h.finalize_inner();
525        assert_eq!(result.len(), 64);
526
527        // Verify determinism with same state.
528        let mut h2 = Blake512::new();
529        h2.t0 = u64::MAX - 50;
530        h2.t1 = 3;
531        h2.buf[..10].copy_from_slice(&[0xDD; 10]);
532        h2.buf_len = 10;
533
534        assert_eq!(h2.finalize_inner(), result);
535    }
536
537    #[test]
538    fn finalize_ptr0_sentinel_counter() {
539        // When ptr == 0 (empty buffer), sentinel counter values should be set.
540        // After compress_buffer in finalize, t0 wraps: 0xFFFFFFFFFFFFFC00 + 1024 = 0,
541        // and t1 wraps: 0xFFFFFFFFFFFFFFFF + 1 = 0.
542        let mut h = Blake512::new();
543        h.t0 = 1024; // As if one block was compressed
544        h.t1 = 0;
545        h.buf_len = 0;
546
547        let result = h.finalize_inner();
548        assert_eq!(result.len(), 64);
549
550        // This should match hashing 128 bytes... but with synthetic h state
551        // from just IV (no prior compress). The point is it doesn't panic.
552        // Verify determinism.
553        let mut h2 = Blake512::new();
554        h2.t0 = 1024;
555        h2.t1 = 0;
556        h2.buf_len = 0;
557
558        assert_eq!(h2.finalize_inner(), result);
559    }
560}