Skip to main content

uhash_core/
uhash.rs

1//! Core UniversalHash v4 Implementation (Spec-Compliant)
2//!
3//! The algorithm uses 4 parallel chains with 512KB scratchpads each.
4//! Each chain performs rounds using raw compression functions (AES, SHA-256, BLAKE3)
5//! in a sequential pattern that prevents GPU parallelism.
6//!
7//! This implementation follows the spec exactly:
8//! - Seed generation: BLAKE3(header || (nonce ⊕ (c × golden_ratio)))
9//! - Primitive rotation: (nonce + c) mod 3, then +1 before each round
10//! - Write-back: Same address as read (not computed from new state)
11//! - No cross-chain mixing (spec doesn't specify it)
12
13#[cfg(not(feature = "std"))]
14use alloc::vec;
15#[cfg(not(feature = "std"))]
16use alloc::vec::Vec;
17
18use blake3::Hasher as Blake3;
19use sha2::{Digest, Sha256};
20
21#[cfg(feature = "parallel")]
22use rayon::prelude::*;
23
24use crate::params::*;
25use crate::primitives::{aes_compress, blake3_compress, sha256_compress};
26
27/// Mask for address calculation (BLOCKS_PER_SCRATCHPAD - 1)
28/// Since BLOCKS_PER_SCRATCHPAD = 8192 = 2^13, this is 0x1FFF
29const ADDRESS_MASK: usize = BLOCKS_PER_SCRATCHPAD - 1;
30
31/// Golden ratio constant for seed generation (Fibonacci hashing constant)
32const GOLDEN_RATIO: u64 = 0x9E3779B97F4A7C15;
33
34/// UniversalHash v4 hasher
35///
36/// This struct maintains the scratchpads and chain states needed for hashing.
37/// It can be reused for multiple hashes to avoid repeated allocations.
38pub struct UniversalHash {
39    /// 4 scratchpads, one per chain (512KB each)
40    scratchpads: Vec<Vec<u8>>,
41    /// Current state for each chain
42    chain_states: [[u8; 32]; CHAINS],
43    /// Effective nonce extracted from input (last 8 bytes)
44    effective_nonce: u64,
45}
46
47impl UniversalHash {
48    /// Create a new UniversalHash instance
49    ///
50    /// Allocates 2MB of memory for the scratchpads.
51    pub fn new() -> Self {
52        Self {
53            scratchpads: vec![vec![0u8; SCRATCHPAD_SIZE]; CHAINS],
54            chain_states: [[0u8; 32]; CHAINS],
55            effective_nonce: 0,
56        }
57    }
58
59    /// Compute the UniversalHash of input data
60    ///
61    /// The input should be formatted as:
62    /// `epoch_seed || miner_address || timestamp || nonce`
63    /// where nonce is the last 8 bytes.
64    ///
65    /// Returns a 32-byte hash.
66    pub fn hash(&mut self, input: &[u8]) -> [u8; 32] {
67        // Extract effective nonce from last 8 bytes of input (or hash if shorter)
68        self.effective_nonce = extract_nonce(input);
69
70        // Phase 1: Initialize scratchpads using input (spec-compliant seed generation)
71        self.init_scratchpads(input);
72
73        // Phase 2: Execute main mixing rounds (spec-compliant, no cross-chain mixing)
74        self.execute_rounds();
75
76        // Phase 3: Finalize and produce output
77        self.finalize()
78    }
79
80    /// Initialize all scratchpads from input using expansion
81    /// Spec: seed[c] = BLAKE3_256(header || (nonce ⊕ (c × golden_ratio)))
82    #[cfg(feature = "parallel")]
83    fn init_scratchpads(&mut self, input: &[u8]) {
84        let nonce = self.effective_nonce;
85
86        // Pre-compute all chain seeds using BLAKE3 with XORed nonce per spec
87        let mut chain_seeds: [[u8; 32]; CHAINS] = [[0u8; 32]; CHAINS];
88        for (chain, (seed, state)) in chain_seeds
89            .iter_mut()
90            .zip(self.chain_states.iter_mut())
91            .enumerate()
92        {
93            // Spec: nonce ⊕ (c × golden_ratio)
94            let offset = (chain as u64).wrapping_mul(GOLDEN_RATIO);
95            let modified_nonce = nonce ^ offset;
96
97            // Spec: BLAKE3(header || modified_nonce)
98            // Header is input without last 8 bytes (nonce)
99            let header_len = input.len().saturating_sub(8);
100            let mut hasher = Blake3::new();
101            hasher.update(&input[..header_len]);
102            hasher.update(&modified_nonce.to_le_bytes());
103            let hash = hasher.finalize();
104            seed.copy_from_slice(hash.as_bytes());
105            state.copy_from_slice(hash.as_bytes());
106        }
107
108        // Fill scratchpads in parallel
109        self.scratchpads
110            .par_iter_mut()
111            .zip(chain_seeds.par_iter())
112            .for_each(|(scratchpad, seed)| {
113                fill_scratchpad_aes(scratchpad, seed);
114            });
115    }
116
117    /// Initialize all scratchpads from input using expansion (sequential fallback)
118    /// Spec: seed[c] = BLAKE3_256(header || (nonce ⊕ (c × golden_ratio)))
119    #[cfg(not(feature = "parallel"))]
120    fn init_scratchpads(&mut self, input: &[u8]) {
121        let nonce = self.effective_nonce;
122        let header_len = input.len().saturating_sub(8);
123
124        for (chain, state) in self.chain_states.iter_mut().enumerate() {
125            // Spec: nonce ⊕ (c × golden_ratio)
126            let offset = (chain as u64).wrapping_mul(GOLDEN_RATIO);
127            let modified_nonce = nonce ^ offset;
128
129            // Spec: BLAKE3(header || modified_nonce)
130            let mut hasher = Blake3::new();
131            hasher.update(&input[..header_len]);
132            hasher.update(&modified_nonce.to_le_bytes());
133            let hash = hasher.finalize();
134
135            state.copy_from_slice(hash.as_bytes());
136
137            // Fill scratchpad using AES-based expansion
138            let seed_array: [u8; 32] = hash.as_bytes().try_into().unwrap();
139            fill_scratchpad_aes(&mut self.scratchpads[chain], &seed_array);
140        }
141    }
142
143    /// Execute the main mixing rounds (spec-compliant: no cross-chain mixing)
144    #[cfg(feature = "parallel")]
145    fn execute_rounds(&mut self) {
146        let nonce = self.effective_nonce;
147
148        // Process all chains in parallel - each chain runs all rounds independently
149        // Spec does NOT specify cross-chain mixing, so we don't do it
150        self.scratchpads
151            .par_iter_mut()
152            .zip(self.chain_states.par_iter_mut())
153            .enumerate()
154            .for_each(|(chain, (scratchpad, state))| {
155                // Spec: primitive = (nonce + c) mod 3
156                let initial_primitive = ((nonce as usize) + chain) % 3;
157
158                // Execute all rounds for this chain
159                for round in 0..ROUNDS {
160                    round_step_spec_compliant(scratchpad, state, initial_primitive, round);
161                }
162            });
163    }
164
165    /// Execute the main mixing rounds (sequential fallback, spec-compliant)
166    #[cfg(not(feature = "parallel"))]
167    fn execute_rounds(&mut self) {
168        let nonce = self.effective_nonce;
169
170        // Process each chain independently (spec-compliant: no cross-chain mixing)
171        for chain in 0..CHAINS {
172            // Spec: primitive = (nonce + c) mod 3
173            let initial_primitive = ((nonce as usize) + chain) % 3;
174
175            // Execute all rounds for this chain
176            for round in 0..ROUNDS {
177                round_step_spec_compliant(
178                    &mut self.scratchpads[chain],
179                    &mut self.chain_states[chain],
180                    initial_primitive,
181                    round,
182                );
183            }
184        }
185    }
186
187    /// Finalize and produce the 32-byte output hash per spec
188    /// Spec: result = BLAKE3_256(SHA256_256(combined))
189    fn finalize(&self) -> [u8; 32] {
190        // XOR all chain states together
191        let mut combined = [0u8; 32];
192        for state in &self.chain_states {
193            for i in 0..32 {
194                combined[i] ^= state[i];
195            }
196        }
197
198        // Double hash: SHA256 then BLAKE3 (per spec)
199        let sha_hash = Sha256::digest(combined);
200        let mut hasher = Blake3::new();
201        hasher.update(&sha_hash);
202        hasher.finalize().into()
203    }
204}
205
206/// Extract nonce from input (last 8 bytes, or hash if shorter)
207#[inline(always)]
208fn extract_nonce(input: &[u8]) -> u64 {
209    if input.len() >= 8 {
210        // Use last 8 bytes as nonce
211        let nonce_bytes: [u8; 8] = input[input.len() - 8..].try_into().unwrap();
212        u64::from_le_bytes(nonce_bytes)
213    } else {
214        // For short inputs, hash to get a nonce
215        let hash = blake3::hash(input);
216        let bytes: [u8; 8] = hash.as_bytes()[..8].try_into().unwrap();
217        u64::from_le_bytes(bytes)
218    }
219}
220
221/// Fill a scratchpad using AES-based expansion per spec
222/// Spec:
223///   key = seed[0:16]
224///   state = seed[16:32]
225///   For i = 0 to NUM_BLOCKS - 1:
226///     state = AES_4Rounds(state, key)
227///     scratchpad[i × 64 : (i+1) × 64] = state || AES_4Rounds(state, key)
228#[inline(always)]
229fn fill_scratchpad_aes(scratchpad: &mut [u8], seed: &[u8; 32]) {
230    use crate::primitives::aes_expand_block;
231
232    let key: [u8; 16] = seed[0..16].try_into().unwrap();
233    let mut state: [u8; 16] = seed[16..32].try_into().unwrap();
234
235    for i in 0..BLOCKS_PER_SCRATCHPAD {
236        // Apply 4 AESENC rounds (per spec)
237        state = aes_expand_block(&state, &key);
238        let offset = i * BLOCK_SIZE;
239
240        // First 16 bytes: state after first AES
241        scratchpad[offset..offset + 16].copy_from_slice(&state);
242
243        // Next 16 bytes: state after second AES (per spec)
244        let state2 = aes_expand_block(&state, &key);
245        scratchpad[offset + 16..offset + 32].copy_from_slice(&state2);
246
247        // Remaining 32 bytes: duplicate first 32 bytes
248        // (spec says 32 bytes per block but BLOCK_SIZE is 64)
249        scratchpad[offset + 32..offset + 48].copy_from_slice(&state);
250        scratchpad[offset + 48..offset + 64].copy_from_slice(&state2);
251    }
252}
253
254/// Single round step for one chain (spec-compliant version)
255///
256/// Spec:
257/// - Address: computed from current state
258/// - Primitive: (initial_primitive + round + 1) mod 3  (increment BEFORE use)
259/// - Write-back: SAME address as read (not new address)
260#[inline(always)]
261fn round_step_spec_compliant(
262    scratchpad: &mut [u8],
263    state: &mut [u8; 32],
264    initial_primitive: usize,
265    round: usize,
266) {
267    // Compute memory address from state per spec formula
268    let addr = compute_address(state, round);
269
270    // Read block from scratchpad
271    // SAFETY: addr is always within bounds due to ADDRESS_MASK
272    let block: [u8; BLOCK_SIZE] = unsafe {
273        core::ptr::read(scratchpad.as_ptr().add(addr) as *const [u8; BLOCK_SIZE])
274    };
275
276    // Spec: primitive = (primitive + 1) mod 3 BEFORE applying
277    // Where primitive starts at (nonce + chain) mod 3
278    // So at round r: primitive = (initial_primitive + r + 1) mod 3
279    let primitive = (initial_primitive + round + 1) % 3;
280
281    // Apply raw compression function based on primitive
282    let new_state = match primitive {
283        0 => aes_compress(state, &block),
284        1 => sha256_compress(state, &block),
285        _ => blake3_compress(state, &block),
286    };
287
288    // Spec: Write back to SAME address as read (not computed from new_state!)
289    // SAFETY: addr is always within bounds due to ADDRESS_MASK
290    unsafe {
291        core::ptr::copy_nonoverlapping(new_state.as_ptr(), scratchpad.as_mut_ptr().add(addr), 32);
292    }
293
294    // Update chain state
295    *state = new_state;
296}
297
298/// Compute scratchpad address from state per spec
299/// Spec: mixed = state[0:8] ⊕ state[8:16] ⊕ rotl64(round, 13) ⊕ (round × 0x517cc1b727220a95)
300///       addr = (mixed mod NUM_BLOCKS) × BLOCK_SIZE
301#[inline(always)]
302fn compute_address(state: &[u8; 32], round: usize) -> usize {
303    const MIXING_CONSTANT: u64 = 0x517cc1b727220a95;
304
305    // Read u64s directly using pointer reads (faster than try_into)
306    // SAFETY: state is 32 bytes, reading at offsets 0 and 8 is safe
307    let state_lo = unsafe { core::ptr::read_unaligned(state.as_ptr() as *const u64) };
308    let state_hi = unsafe { core::ptr::read_unaligned(state.as_ptr().add(8) as *const u64) };
309    let round_u64 = round as u64;
310
311    // Spec formula for unpredictable address
312    let mixed = state_lo
313        ^ state_hi
314        ^ round_u64.rotate_left(13)
315        ^ round_u64.wrapping_mul(MIXING_CONSTANT);
316
317    // Use bitwise AND instead of modulo (NUM_BLOCKS is power of 2)
318    ((mixed as usize) & ADDRESS_MASK) * BLOCK_SIZE
319}
320
321impl Default for UniversalHash {
322    fn default() -> Self {
323        Self::new()
324    }
325}
326
327/// Convenience function for single-shot hashing
328///
329/// Creates a new hasher, computes the hash, and returns it.
330/// For multiple hashes, prefer creating a `UniversalHash` instance
331/// and reusing it to avoid repeated memory allocation.
332pub fn hash(input: &[u8]) -> [u8; 32] {
333    let mut hasher = UniversalHash::new();
334    hasher.hash(input)
335}
336
337/// Check if a hash meets the required difficulty
338///
339/// Difficulty is measured as the number of leading zero bits required.
340/// For example, difficulty 16 requires the first 2 bytes to be zero.
341///
342/// # Example
343///
344/// ```rust
345/// use uhash_core::meets_difficulty;
346///
347/// // Hash with 20 leading zero bits (0x00, 0x00, 0x0F = 16 + 4 zeros)
348/// let hash: [u8; 32] = [
349///     0x00, 0x00, 0x0F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
350///     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
351///     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
352///     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
353/// ];
354/// assert!(meets_difficulty(&hash, 16));  // 16 leading zeros - pass
355/// assert!(meets_difficulty(&hash, 20));  // 20 leading zeros - pass
356/// assert!(!meets_difficulty(&hash, 21)); // Only 20 zeros - fail
357/// ```
358#[inline(always)]
359pub fn meets_difficulty(hash: &[u8; 32], difficulty: u32) -> bool {
360    let mut zero_bits = 0u32;
361
362    for byte in hash.iter() {
363        if *byte == 0 {
364            zero_bits += 8;
365        } else {
366            zero_bits += byte.leading_zeros();
367            break;
368        }
369    }
370
371    zero_bits >= difficulty
372}