dcrypt_algorithms/xof/blake3/
mod.rs

1//! BLAKE3 extendable output function (XOF) implementation
2//!
3//! This module provides a pure Rust implementation of the BLAKE3 cryptographic
4//! hash function in XOF (eXtendable Output Function) mode, allowing for
5//! arbitrary-length output generation.
6//!
7//! # Overview
8//!
9//! BLAKE3 is a cryptographic hash function that is:
10//! - **Fast**: Optimized for modern CPUs with SIMD instructions
11//! - **Secure**: Based on the well-analyzed ChaCha permutation
12//! - **Versatile**: Supports hashing, keyed hashing, and key derivation
13//! - **Parallelizable**: Can process multiple chunks simultaneously
14//! - **Incremental**: Supports streaming/incremental hashing
15//!
16//! # Security Properties
17//!
18//! - **Security Level**: 256 bits (128-bit collision resistance)
19//! - **Output Size**: Variable (unlimited in XOF mode)
20//! - **Key Size**: 256 bits (32 bytes) for keyed variants
21//!
22//! # Features
23//!
24//! This implementation provides three modes of operation:
25//!
26//! 1. **Standard XOF**: Variable-length output from input data
27//! 2. **Keyed XOF**: HMAC-like keyed hashing with variable output
28//! 3. **Key Derivation**: Derive keys from a context string and input data
29//!
30//! # Implementation Notes
31//!
32//! This implementation prioritizes correctness and security over performance:
33//! - Uses secure memory handling with `SecretBuffer` for sensitive data
34//! - Implements proper zeroization of sensitive values
35//! - Based directly on the BLAKE3 reference implementation
36//! - Does not include SIMD optimizations
37//!
38//! # Example Usage
39//!
40//! ```rust,ignore
41//! use dcrypt_algorithms::xof::{Blake3Xof, ExtendableOutputFunction};
42//!
43//! // Standard hashing with variable output
44//! let data = b"Hello, BLAKE3!";
45//! let output = Blake3Xof::generate(data, 64)?; // 64 bytes output
46//!
47//! // Incremental hashing
48//! let mut xof = Blake3Xof::new();
49//! xof.update(b"Hello, ")?;
50//! xof.update(b"BLAKE3!")?;
51//! let mut output = vec![0u8; 64];
52//! xof.squeeze(&mut output)?;
53//!
54//! // Keyed hashing
55//! let key = b"this is a 32-byte key for BLAKE3";
56//! let output = Blake3Xof::keyed_generate(key, data, 32)?;
57//!
58//! // Key derivation
59//! let context = b"MyApp v1.0.0 session key";
60//! let output = Blake3Xof::derive_key(context, data, 32)?;
61//! ```
62//!
63//! # References
64//!
65//! - [BLAKE3 Specification](https://github.com/BLAKE3-team/BLAKE3-specs)
66//! - [BLAKE3 Paper](https://github.com/BLAKE3-team/BLAKE3-specs/blob/master/blake3.pdf)
67//! - [Reference Implementation](https://github.com/BLAKE3-team/BLAKE3)
68
69use super::{Blake3Algorithm, DeriveKeyXof, ExtendableOutputFunction, KeyedXof};
70use crate::error::{validate, Error, Result};
71use crate::xof::XofAlgorithm;
72use dcrypt_common::security::{EphemeralSecret, SecretBuffer};
73use zeroize::Zeroize;
74
75#[cfg(not(feature = "std"))]
76use alloc::vec::Vec;
77
78// BLAKE3 constants
79const OUT_LEN: usize = 32; // Standard output length (256 bits)
80const KEY_LEN: usize = 32; // Key length for keyed hashing (256 bits)
81const BLOCK_LEN: usize = 64; // Input block size (512 bits)
82const CHUNK_LEN: usize = 1024; // Chunk size (16 blocks)
83
84// Flags for domain separation and tree structure
85const CHUNK_START: u32 = 1 << 0; // First block of a chunk
86const CHUNK_END: u32 = 1 << 1; // Last block of a chunk
87const PARENT: u32 = 1 << 2; // Parent node in the tree
88const ROOT: u32 = 1 << 3; // Root node (final output)
89const KEYED_HASH: u32 = 1 << 4; // Keyed hashing mode
90const DERIVE_KEY_CONTEXT: u32 = 1 << 5; // Key derivation context
91const DERIVE_KEY_MATERIAL: u32 = 1 << 6; // Key derivation material
92
93// IV is the initialization vector for BLAKE3
94// These are the first 32 bits of the fractional parts of the square roots
95// of the first 8 primes: 2, 3, 5, 7, 11, 13, 17, 19
96const IV: [u32; 8] = [
97    0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19,
98];
99
100// Message word permutation for each round
101// This permutation is applied to the message words between rounds
102const MSG_PERMUTATION: [usize; 16] = [2, 6, 3, 10, 7, 0, 4, 13, 1, 11, 12, 5, 9, 14, 15, 8];
103
104// Convert bytes to words
105fn words_from_little_endian_bytes(bytes: &[u8], words: &mut [u32]) {
106    debug_assert_eq!(bytes.len(), 4 * words.len());
107    for i in 0..words.len() {
108        words[i] = u32::from_le_bytes([
109            bytes[i * 4],
110            bytes[i * 4 + 1],
111            bytes[i * 4 + 2],
112            bytes[i * 4 + 3],
113        ]);
114    }
115}
116
117// Convert words to bytes securely
118fn words_to_little_endian_bytes(words: &[u32], bytes: &mut [u8]) {
119    debug_assert_eq!(bytes.len(), 4 * words.len());
120    for i in 0..words.len() {
121        let word_bytes = words[i].to_le_bytes();
122        bytes[i * 4..i * 4 + 4].copy_from_slice(&word_bytes);
123    }
124}
125
126// G function for mixing
127/// The G function is the core mixing operation in BLAKE3, derived from ChaCha.
128/// It performs a series of additions, XORs, and rotations to mix the state.
129#[inline(always)]
130fn g(state: &mut [u32; 16], a: usize, b: usize, c: usize, d: usize, mx: u32, my: u32) {
131    state[a] = state[a].wrapping_add(state[b]).wrapping_add(mx);
132    state[d] = (state[d] ^ state[a]).rotate_right(16);
133    state[c] = state[c].wrapping_add(state[d]);
134    state[b] = (state[b] ^ state[c]).rotate_right(12);
135
136    state[a] = state[a].wrapping_add(state[b]).wrapping_add(my);
137    state[d] = (state[d] ^ state[a]).rotate_right(8);
138    state[c] = state[c].wrapping_add(state[d]);
139    state[b] = (state[b] ^ state[c]).rotate_right(7);
140}
141
142// Apply a single round of the compression function
143fn round(state: &mut [u32; 16], m: &[u32; 16]) {
144    // Column rounds - Mix the four columns
145    g(state, 0, 4, 8, 12, m[0], m[1]);
146    g(state, 1, 5, 9, 13, m[2], m[3]);
147    g(state, 2, 6, 10, 14, m[4], m[5]);
148    g(state, 3, 7, 11, 15, m[6], m[7]);
149
150    // Diagonal rounds - Mix the four diagonals
151    g(state, 0, 5, 10, 15, m[8], m[9]);
152    g(state, 1, 6, 11, 12, m[10], m[11]);
153    g(state, 2, 7, 8, 13, m[12], m[13]);
154    g(state, 3, 4, 9, 14, m[14], m[15]);
155}
156
157// Permute message words for the next round
158fn permute(m: &mut [u32; 16]) {
159    let mut permuted = [0u32; 16];
160    for i in 0..16 {
161        permuted[i] = m[MSG_PERMUTATION[i]];
162    }
163    *m = permuted;
164}
165
166// Compression function for BLAKE3
167/// The compression function is the heart of BLAKE3. It takes:
168/// - A 256-bit chaining value from the previous block
169/// - A 512-bit block of message data
170/// - A 64-bit counter for the block position
171/// - The block length (normally 64, may be less for the final block)
172/// - Flags for domain separation and tree structure
173///
174/// It produces a 512-bit output that can be used as:
175/// - The chaining value for the next block (first 256 bits)
176/// - Extended output in XOF mode (all 512 bits)
177fn compress(
178    chaining_value: &[u32; 8],
179    block_words: &[u32; 16],
180    counter: u64,
181    block_len: u32,
182    flags: u32,
183) -> [u32; 16] {
184    let counter_low = counter as u32;
185    let counter_high = (counter >> 32) as u32;
186
187    // Initialize state with chaining value and IV
188    let mut state = [
189        chaining_value[0],
190        chaining_value[1],
191        chaining_value[2],
192        chaining_value[3],
193        chaining_value[4],
194        chaining_value[5],
195        chaining_value[6],
196        chaining_value[7],
197        IV[0],
198        IV[1],
199        IV[2],
200        IV[3],
201        counter_low,
202        counter_high,
203        block_len,
204        flags,
205    ];
206
207    let mut block = *block_words;
208
209    // BLAKE3 uses exactly 7 rounds
210    for r in 0..7 {
211        // Apply the round function
212        round(&mut state, &block);
213
214        // Permute the message words for the next round
215        if r < 6 {
216            permute(&mut block);
217        }
218    }
219
220    // Create output array for the compression function
221    let mut output = [0u32; 16];
222
223    // First 8 words: XOR the first half of the state with the second half
224    for i in 0..8 {
225        output[i] = state[i] ^ state[i + 8];
226    }
227
228    // Second 8 words: XOR the second half of the state with the input chaining value
229    for i in 0..8 {
230        output[i + 8] = state[i + 8] ^ chaining_value[i];
231    }
232
233    output
234}
235
236// Get the first 8 words as a chaining value
237fn first_8_words(compression_output: &[u32; 16]) -> [u32; 8] {
238    let mut result = [0u32; 8];
239    result.copy_from_slice(&compression_output[0..8]);
240    result
241}
242
243// Output structure
244#[derive(Clone, Zeroize)]
245struct Output {
246    input_chaining_value: [u32; 8],
247    block_words: [u32; 16],
248    counter: u64,
249    block_len: u32,
250    flags: u32,
251}
252
253impl Output {
254    fn chaining_value(&self) -> [u32; 8] {
255        first_8_words(&compress(
256            &self.input_chaining_value,
257            &self.block_words,
258            self.counter,
259            self.block_len,
260            self.flags,
261        ))
262    }
263
264    fn root_output_bytes(&self, out_slice: &mut [u8]) {
265        for (output_block_counter, out_block) in out_slice.chunks_mut(2 * OUT_LEN).enumerate() {
266            let words = compress(
267                &self.input_chaining_value,
268                &self.block_words,
269                output_block_counter as u64,
270                self.block_len,
271                self.flags | ROOT,
272            );
273
274            // Copy output bytes - ensure little-endian encoding
275            for (i, word) in words.iter().enumerate() {
276                let word_bytes = word.to_le_bytes();
277                let start = i * 4;
278                if start >= out_block.len() {
279                    break;
280                }
281                let end = core::cmp::min((i + 1) * 4, out_block.len());
282                out_block[start..end].copy_from_slice(&word_bytes[..(end - start)]);
283            }
284        }
285    }
286}
287
288// Chunk state
289#[derive(Clone, Zeroize)]
290struct ChunkState {
291    chaining_value: [u32; 8],
292    chunk_counter: u64,
293    block: [u8; BLOCK_LEN],
294    block_len: u8,
295    blocks_compressed: u8,
296    flags: u32,
297}
298
299impl ChunkState {
300    fn new(key_words: [u32; 8], chunk_counter: u64, flags: u32) -> Self {
301        Self {
302            chaining_value: key_words,
303            chunk_counter,
304            block: [0; BLOCK_LEN],
305            block_len: 0,
306            blocks_compressed: 0,
307            flags,
308        }
309    }
310
311    fn len(&self) -> usize {
312        (self.blocks_compressed as usize) * BLOCK_LEN + (self.block_len as usize)
313    }
314
315    fn start_flag(&self) -> u32 {
316        if self.blocks_compressed == 0 {
317            CHUNK_START
318        } else {
319            0
320        }
321    }
322
323    // Internal update implementation
324    fn update_internal(&mut self, mut input: &[u8]) -> Result<()> {
325        // Check if adding this input would exceed chunk size limit
326        if self.len() + input.len() > CHUNK_LEN {
327            let want = CHUNK_LEN - self.len();
328            self.update_internal(&input[..want])?;
329            return Ok(());
330        }
331
332        while !input.is_empty() {
333            // If the block is full, compress it
334            if self.block_len as usize == BLOCK_LEN {
335                let mut block_words = [0u32; 16];
336                words_from_little_endian_bytes(&self.block, &mut block_words);
337
338                self.chaining_value = first_8_words(&compress(
339                    &self.chaining_value,
340                    &block_words,
341                    self.chunk_counter,
342                    BLOCK_LEN as u32,
343                    self.flags | self.start_flag(),
344                ));
345
346                self.blocks_compressed += 1;
347                self.block = [0; BLOCK_LEN];
348                self.block_len = 0;
349            }
350
351            // Copy input data into the block
352            let want = BLOCK_LEN - self.block_len as usize;
353            let take = core::cmp::min(want, input.len());
354
355            self.block[self.block_len as usize..self.block_len as usize + take]
356                .copy_from_slice(&input[..take]);
357
358            self.block_len += take as u8;
359            input = &input[take..];
360        }
361
362        Ok(())
363    }
364
365    // Public update method to maintain compatibility with tests
366    #[cfg(test)]
367    pub fn update(&mut self, input: &[u8]) -> Result<()> {
368        self.update_internal(input)
369    }
370
371    fn output(&self) -> Output {
372        // Zero-pad the block to create a full set of block words
373        let mut block_words = [0u32; 16];
374        let mut padded_block = [0u8; BLOCK_LEN];
375        padded_block[..self.block_len as usize]
376            .copy_from_slice(&self.block[..self.block_len as usize]);
377        words_from_little_endian_bytes(&padded_block, &mut block_words);
378
379        Output {
380            input_chaining_value: self.chaining_value,
381            block_words,
382            counter: self.chunk_counter,
383            block_len: self.block_len as u32,
384            flags: self.flags | self.start_flag() | CHUNK_END,
385        }
386    }
387}
388
389// Parent node creation
390fn parent_output(
391    left_child_cv: [u32; 8],
392    right_child_cv: [u32; 8],
393    key_words: [u32; 8],
394    flags: u32,
395) -> Output {
396    let mut block_words = [0u32; 16];
397    block_words[..8].copy_from_slice(&left_child_cv);
398    block_words[8..].copy_from_slice(&right_child_cv);
399
400    Output {
401        input_chaining_value: key_words,
402        block_words,
403        counter: 0,
404        block_len: BLOCK_LEN as u32,
405        flags: PARENT | flags,
406    }
407}
408
409// Parent chaining value
410fn parent_cv(
411    left_child_cv: [u32; 8],
412    right_child_cv: [u32; 8],
413    key_words: [u32; 8],
414    flags: u32,
415) -> [u32; 8] {
416    parent_output(left_child_cv, right_child_cv, key_words, flags).chaining_value()
417}
418
419/// BLAKE3 extendable output function
420///
421/// This struct implements the BLAKE3 algorithm as an XOF, capable of producing
422/// outputs of arbitrary length. It maintains the internal state required for
423/// incremental hashing and supports all three BLAKE3 modes of operation.
424///
425/// # Internal Structure
426///
427/// The implementation uses:
428/// - A chunk state for processing input data in 1024-byte chunks
429/// - A stack of chaining values for the tree structure
430/// - Secure key storage using `SecretBuffer`
431/// - Flags to indicate the current mode of operation
432///
433/// # Security
434///
435/// All sensitive data (keys and intermediate values) are:
436/// - Stored in secure memory containers
437/// - Properly zeroized on drop
438/// - Protected against timing attacks
439///
440/// # Thread Safety
441///
442/// `Blake3Xof` is not thread-safe for concurrent access. Each thread should
443/// use its own instance.
444#[derive(Clone)]
445pub struct Blake3Xof {
446    chunk_state: ChunkState,
447    key_words: SecretBuffer<32>, // Secure storage for key words (8 u32s = 32 bytes)
448    cv_stack: Vec<[u32; 8]>,
449    flags: u32,
450}
451
452// Implement Drop and ZeroizeOnDrop for Blake3Xof
453impl Drop for Blake3Xof {
454    fn drop(&mut self) {
455        self.zeroize();
456    }
457}
458
459impl zeroize::ZeroizeOnDrop for Blake3Xof {}
460
461// Manually implement Zeroize for Blake3Xof
462impl Zeroize for Blake3Xof {
463    fn zeroize(&mut self) {
464        self.chunk_state.zeroize();
465        self.key_words.zeroize();
466        for cv in self.cv_stack.iter_mut() {
467            cv.zeroize();
468        }
469        self.cv_stack.clear();
470        self.flags = 0;
471    }
472}
473
474impl Blake3Xof {
475    // Convert key words from SecretBuffer to [u32; 8]
476    fn get_key_words(&self) -> [u32; 8] {
477        let mut words = [0u32; 8];
478        let key_bytes = self.key_words.as_ref();
479        words_from_little_endian_bytes(key_bytes, &mut words);
480        words
481    }
482
483    fn push_stack(&mut self, cv: [u32; 8]) {
484        self.cv_stack.push(cv);
485    }
486
487    fn pop_stack(&mut self) -> Result<[u32; 8]> {
488        self.cv_stack.pop().ok_or(Error::Processing {
489            operation: "BLAKE3",
490            details: "Stack underflow",
491        })
492    }
493
494    fn add_chunk_chaining_value(
495        &mut self,
496        mut new_cv: [u32; 8],
497        mut total_chunks: u64,
498    ) -> Result<()> {
499        while total_chunks & 1 == 0 {
500            new_cv = parent_cv(self.pop_stack()?, new_cv, self.get_key_words(), self.flags);
501            total_chunks >>= 1;
502        }
503        self.push_stack(new_cv);
504        Ok(())
505    }
506
507    fn finalize(&mut self, out_slice: &mut [u8]) -> Result<()> {
508        let mut output = self.chunk_state.output();
509        let mut parent_nodes_remaining = self.cv_stack.len();
510
511        while parent_nodes_remaining > 0 {
512            parent_nodes_remaining -= 1;
513            output = parent_output(
514                self.cv_stack[parent_nodes_remaining],
515                output.chaining_value(),
516                self.get_key_words(),
517                self.flags,
518            );
519        }
520
521        output.root_output_bytes(out_slice);
522        Ok(())
523    }
524
525    /// Utility function for digest generation
526    ///
527    /// This is a convenience function that creates a BLAKE3 XOF instance,
528    /// processes the input data, and returns the requested number of output bytes.
529    ///
530    /// # Arguments
531    ///
532    /// * `data` - The input data to hash
533    /// * `len` - The desired output length in bytes
534    ///
535    /// # Returns
536    ///
537    /// A vector containing `len` bytes of output, or an error if the length is invalid.
538    ///
539    /// # Example
540    ///
541    /// ```rust,ignore
542    /// let hash = Blake3Xof::generate(b"hello world", 32)?;
543    /// assert_eq!(hash.len(), 32);
544    /// ```
545    pub fn generate(data: &[u8], len: usize) -> Result<Vec<u8>> {
546        Blake3Algorithm::validate_output_length(len)?;
547
548        let mut xof = Self::new();
549        xof.update(data)?;
550        let mut result = vec![0u8; len];
551        xof.squeeze(&mut result)?;
552        Ok(result)
553    }
554}
555
556impl ExtendableOutputFunction for Blake3Xof {
557    /// Creates a new BLAKE3 XOF instance in standard hashing mode.
558    ///
559    /// The instance is initialized with the standard BLAKE3 IV and is ready
560    /// to accept input data via the `update` method.
561    fn new() -> Self {
562        // Convert IV to bytes for SecretBuffer storage
563        let mut key_bytes = [0u8; 32];
564        words_to_little_endian_bytes(&IV, &mut key_bytes);
565
566        Self {
567            chunk_state: ChunkState::new(IV, 0, 0),
568            key_words: SecretBuffer::new(key_bytes),
569            cv_stack: Vec::new(),
570            flags: 0,
571        }
572    }
573
574    fn update(&mut self, mut input: &[u8]) -> Result<()> {
575        while !input.is_empty() {
576            if self.chunk_state.len() == CHUNK_LEN {
577                let chunk_cv = self.chunk_state.output().chaining_value();
578                let total_chunks = self.chunk_state.chunk_counter + 1;
579                self.add_chunk_chaining_value(chunk_cv, total_chunks)?;
580                self.chunk_state = ChunkState::new(self.get_key_words(), total_chunks, self.flags);
581            }
582
583            let want = CHUNK_LEN - self.chunk_state.len();
584            let take = core::cmp::min(want, input.len());
585            self.chunk_state.update_internal(&input[..take])?;
586            input = &input[take..];
587        }
588
589        Ok(())
590    }
591
592    fn finalize(&mut self) -> Result<()> {
593        Ok(())
594    }
595
596    fn squeeze(&mut self, output: &mut [u8]) -> Result<()> {
597        Blake3Algorithm::validate_output_length(output.len())?;
598        self.finalize(output)
599    }
600
601    fn squeeze_into_vec(&mut self, len: usize) -> Result<Vec<u8>> {
602        Blake3Algorithm::validate_output_length(len)?;
603        let mut result = vec![0u8; len];
604        self.squeeze(&mut result)?;
605        Ok(result)
606    }
607
608    fn reset(&mut self) -> Result<()> {
609        *self = Self::new();
610        Ok(())
611    }
612
613    fn security_level() -> usize {
614        Blake3Algorithm::SECURITY_LEVEL
615    }
616}
617
618impl KeyedXof for Blake3Xof {
619    /// Creates a new BLAKE3 XOF instance in keyed hashing mode.
620    ///
621    /// This mode uses a 256-bit key to create a MAC (Message Authentication Code).
622    /// The key is mixed into the initial state, providing authentication.
623    ///
624    /// # Arguments
625    ///
626    /// * `key` - A 32-byte (256-bit) key
627    ///
628    /// # Errors
629    ///
630    /// Returns an error if the key length is not exactly 32 bytes.
631    fn with_key(key: &[u8]) -> Result<Self> {
632        validate::length("BLAKE3 key", key.len(), KEY_LEN)?;
633
634        // Create SecretBuffer for the key
635        let key_buf = SecretBuffer::new({
636            let mut arr = [0u8; 32];
637            arr.copy_from_slice(key);
638            arr
639        });
640
641        // Convert key to key words for chunk state initialization
642        let mut key_words = [0u32; 8];
643        words_from_little_endian_bytes(key, &mut key_words);
644
645        let instance = Self {
646            chunk_state: ChunkState::new(key_words, 0, KEYED_HASH),
647            key_words: key_buf,
648            cv_stack: Vec::new(),
649            flags: KEYED_HASH,
650        };
651
652        // Zeroize the temporary key_words
653        key_words.zeroize();
654
655        Ok(instance)
656    }
657}
658
659impl DeriveKeyXof for Blake3Xof {
660    /// Creates a new BLAKE3 XOF instance in key derivation mode.
661    ///
662    /// This mode is designed for deriving keys from input key material.
663    /// It first hashes the context string to create a context-specific key,
664    /// then uses that key to process the actual key material.
665    ///
666    /// # Arguments
667    ///
668    /// * `context` - A context string that domain-separates different uses
669    ///
670    /// # Security Note
671    ///
672    /// The context string should be unique for each application to ensure
673    /// that keys derived for one purpose cannot be used for another.
674    fn for_derive_key(context: &[u8]) -> Result<Self> {
675        let mut context_hasher = Self::new();
676        context_hasher.update(context)?;
677
678        // Create key from context using DERIVE_KEY_CONTEXT flag
679        let context_key = EphemeralSecret::new({
680            let mut tmp = [0u8; KEY_LEN];
681            let mut output = context_hasher.chunk_state.output();
682            output.flags |= DERIVE_KEY_CONTEXT;
683            output.root_output_bytes(&mut tmp);
684            tmp
685        });
686
687        // Create SecretBuffer for the context key
688        let key_buf = SecretBuffer::new(*context_key);
689
690        // Convert context key to key words for chunk state initialization
691        let mut key_words = [0u32; 8];
692        words_from_little_endian_bytes(context_key.as_ref(), &mut key_words);
693
694        let instance = Self {
695            chunk_state: ChunkState::new(key_words, 0, DERIVE_KEY_MATERIAL),
696            key_words: key_buf,
697            cv_stack: Vec::new(),
698            flags: DERIVE_KEY_MATERIAL,
699        };
700
701        // Zeroize the temporary key_words
702        key_words.zeroize();
703
704        Ok(instance)
705    }
706}
707
708#[cfg(test)]
709mod tests;