uhash_core/hash.rs
1//! Core UniversalHash v4 Implementation (Spec-Compliant)
2//!
3//! The algorithm uses 4 parallel chains with 512KB scratchpads each.
4//! Each chain performs rounds using raw compression functions (AES, SHA-256, BLAKE3)
5//! in a sequential pattern that prevents GPU parallelism.
6//!
7//! This implementation follows the spec exactly:
8//! - Seed generation: BLAKE3(header || (nonce ⊕ (c × golden_ratio)))
9//! - Primitive rotation: (nonce + c) mod 3, then +1 before each round
10//! - Write-back: Same address as read (not computed from new state)
11//! - No cross-chain mixing (spec doesn't specify it)
12
13#[cfg(not(feature = "std"))]
14use alloc::vec;
15#[cfg(not(feature = "std"))]
16use alloc::vec::Vec;
17
18use blake3::Hasher as Blake3;
19use sha2::{Digest, Sha256};
20
21use crate::params::*;
22use crate::primitives::{aes_compress, blake3_compress, sha256_compress};
23
24/// Mask for address calculation (BLOCKS_PER_SCRATCHPAD - 1)
25/// Since BLOCKS_PER_SCRATCHPAD = 8192 = 2^13, this is 0x1FFF
26const ADDRESS_MASK: usize = BLOCKS_PER_SCRATCHPAD - 1;
27
28/// Golden ratio constant for seed generation (Fibonacci hashing constant)
29const GOLDEN_RATIO: u64 = 0x9E3779B97F4A7C15;
30
31#[inline(always)]
32fn initial_primitive_index(nonce: u64, chain: usize) -> usize {
33 // Keep primitive selection architecture-stable across 32-bit/64-bit targets.
34 // (nonce + chain) mod 3 must use full u64 nonce, not truncated usize.
35 ((nonce.wrapping_add(chain as u64)) % 3) as usize
36}
37
38/// UniversalHash v4 hasher
39///
40/// This struct maintains the scratchpads and chain states needed for hashing.
41/// It can be reused for multiple hashes to avoid repeated allocations.
42pub struct UniversalHash {
43 /// 4 scratchpads, one per chain (512KB each)
44 scratchpads: Vec<Vec<u8>>,
45 /// Current state for each chain
46 chain_states: [[u8; 32]; CHAINS],
47 /// Effective nonce extracted from input (last 8 bytes)
48 effective_nonce: u64,
49}
50
51impl UniversalHash {
52 /// Create a new UniversalHash instance
53 ///
54 /// Allocates 2MB of memory for the scratchpads.
55 pub fn new() -> Self {
56 Self {
57 scratchpads: vec![vec![0u8; SCRATCHPAD_SIZE]; CHAINS],
58 chain_states: [[0u8; 32]; CHAINS],
59 effective_nonce: 0,
60 }
61 }
62
63 /// Compute the UniversalHash of input data
64 ///
65 /// The input should be formatted as:
66 /// `epoch_seed || miner_address || timestamp || nonce`
67 /// where nonce is the last 8 bytes.
68 ///
69 /// Returns a 32-byte hash.
70 pub fn hash(&mut self, input: &[u8]) -> [u8; 32] {
71 // Extract effective nonce from last 8 bytes of input (or hash if shorter)
72 self.effective_nonce = extract_nonce(input);
73
74 // Phase 1: Initialize scratchpads using input (spec-compliant seed generation)
75 self.init_scratchpads(input);
76
77 // Phase 2: Execute main mixing rounds (spec-compliant, no cross-chain mixing)
78 self.execute_rounds();
79
80 // Phase 3: Finalize and produce output
81 self.finalize()
82 }
83
84 /// Initialize all scratchpads from input using expansion
85 /// Spec: seed[c] = BLAKE3_256(header || (nonce ⊕ (c × golden_ratio)))
86 fn init_scratchpads(&mut self, input: &[u8]) {
87 let nonce = self.effective_nonce;
88 let header_len = input.len().saturating_sub(8);
89
90 for (chain, state) in self.chain_states.iter_mut().enumerate() {
91 // Spec: nonce ⊕ (c × golden_ratio)
92 let offset = (chain as u64).wrapping_mul(GOLDEN_RATIO);
93 let modified_nonce = nonce ^ offset;
94
95 // Spec: BLAKE3(header || modified_nonce)
96 let mut hasher = Blake3::new();
97 hasher.update(&input[..header_len]);
98 hasher.update(&modified_nonce.to_le_bytes());
99 let hash = hasher.finalize();
100
101 let hash_bytes = hash.as_bytes();
102 state.copy_from_slice(hash_bytes);
103
104 // Fill scratchpad using AES-based expansion
105 let mut seed_array = [0u8; 32];
106 seed_array.copy_from_slice(hash_bytes);
107 fill_scratchpad_aes(&mut self.scratchpads[chain], &seed_array);
108 }
109 }
110
111 /// Execute the main mixing rounds (spec-compliant: no cross-chain mixing)
112 fn execute_rounds(&mut self) {
113 let nonce = self.effective_nonce;
114
115 // Process each chain independently (spec-compliant: no cross-chain mixing)
116 for chain in 0..CHAINS {
117 // Spec: primitive = (nonce + c) mod 3
118 let initial_primitive = initial_primitive_index(nonce, chain);
119
120 // Execute all rounds for this chain
121 for round in 0..ROUNDS {
122 round_step_spec_compliant(
123 &mut self.scratchpads[chain],
124 &mut self.chain_states[chain],
125 initial_primitive,
126 round,
127 );
128 }
129 }
130 }
131
132 /// Finalize and produce the 32-byte output hash per spec
133 /// Spec: result = BLAKE3_256(SHA256_256(combined))
134 fn finalize(&self) -> [u8; 32] {
135 // XOR all chain states together
136 let mut combined = [0u8; 32];
137 for state in &self.chain_states {
138 for i in 0..32 {
139 combined[i] ^= state[i];
140 }
141 }
142
143 // Double hash: SHA256 then BLAKE3 (per spec)
144 let sha_hash = Sha256::digest(combined);
145 let mut hasher = Blake3::new();
146 hasher.update(&sha_hash);
147 hasher.finalize().into()
148 }
149}
150
151/// Extract nonce from input (last 8 bytes, or hash if shorter)
152#[inline(always)]
153fn extract_nonce(input: &[u8]) -> u64 {
154 if input.len() >= 8 {
155 // Use last 8 bytes as nonce
156 let nonce_bytes: [u8; 8] = input[input.len() - 8..].try_into().unwrap();
157 u64::from_le_bytes(nonce_bytes)
158 } else {
159 // For short inputs, hash to get a nonce
160 let hash = blake3::hash(input);
161 let bytes: [u8; 8] = hash.as_bytes()[..8].try_into().unwrap();
162 u64::from_le_bytes(bytes)
163 }
164}
165
166/// Fill a scratchpad using AES-based expansion per spec
167/// Spec:
168/// key = seed[0:16]
169/// state = seed[16:32]
170/// For i = 0 to NUM_BLOCKS - 1:
171/// state = AES_4Rounds(state, key)
172/// scratchpad[i × 64 : (i+1) × 64] = state || AES_4Rounds(state, key)
173#[inline(always)]
174fn fill_scratchpad_aes(scratchpad: &mut [u8], seed: &[u8; 32]) {
175 use crate::primitives::aes_expand_block;
176
177 let key: [u8; 16] = seed[0..16].try_into().unwrap();
178 let mut state: [u8; 16] = seed[16..32].try_into().unwrap();
179
180 for i in 0..BLOCKS_PER_SCRATCHPAD {
181 // Apply 4 AESENC rounds (per spec)
182 state = aes_expand_block(&state, &key);
183 let offset = i * BLOCK_SIZE;
184
185 // First 16 bytes: state after first AES
186 scratchpad[offset..offset + 16].copy_from_slice(&state);
187
188 // Next 16 bytes: state after second AES (per spec)
189 let state2 = aes_expand_block(&state, &key);
190 scratchpad[offset + 16..offset + 32].copy_from_slice(&state2);
191
192 // Remaining 32 bytes: duplicate first 32 bytes
193 // (spec says 32 bytes per block but BLOCK_SIZE is 64)
194 scratchpad[offset + 32..offset + 48].copy_from_slice(&state);
195 scratchpad[offset + 48..offset + 64].copy_from_slice(&state2);
196 }
197}
198
199/// Single round step for one chain (spec-compliant version)
200///
201/// Spec:
202/// - Address: computed from current state
203/// - Primitive: (initial_primitive + round + 1) mod 3 (increment BEFORE use)
204/// - Write-back: SAME address as read (not new address)
205#[inline(always)]
206fn round_step_spec_compliant(
207 scratchpad: &mut [u8],
208 state: &mut [u8; 32],
209 initial_primitive: usize,
210 round: usize,
211) {
212 // Compute memory address from state per spec formula
213 let addr = compute_address(state, round);
214
215 // Read block from scratchpad
216 // SAFETY: addr is always within bounds due to ADDRESS_MASK
217 let block: [u8; BLOCK_SIZE] =
218 unsafe { core::ptr::read(scratchpad.as_ptr().add(addr) as *const [u8; BLOCK_SIZE]) };
219
220 // Spec: primitive = (primitive + 1) mod 3 BEFORE applying
221 // Where primitive starts at (nonce + chain) mod 3
222 // So at round r: primitive = (initial_primitive + r + 1) mod 3
223 let primitive = (initial_primitive + round + 1) % 3;
224
225 // Apply raw compression function based on primitive
226 let new_state = match primitive {
227 0 => aes_compress(state, &block),
228 1 => sha256_compress(state, &block),
229 _ => blake3_compress(state, &block),
230 };
231
232 // Spec: Write back to SAME address as read (not computed from new_state!)
233 // SAFETY: addr is always within bounds due to ADDRESS_MASK
234 unsafe {
235 core::ptr::copy_nonoverlapping(new_state.as_ptr(), scratchpad.as_mut_ptr().add(addr), 32);
236 }
237
238 // Update chain state
239 *state = new_state;
240}
241
242/// Compute scratchpad address from state per spec
243/// Spec: mixed = state[0:8] ⊕ state[8:16] ⊕ rotl64(round, 13) ⊕ (round × 0x517cc1b727220a95)
244/// addr = (mixed mod NUM_BLOCKS) × BLOCK_SIZE
245#[inline(always)]
246fn compute_address(state: &[u8; 32], round: usize) -> usize {
247 const MIXING_CONSTANT: u64 = 0x517cc1b727220a95;
248
249 // Read u64s directly using pointer reads (faster than try_into)
250 // SAFETY: state is 32 bytes, reading at offsets 0 and 8 is safe
251 let state_lo = unsafe { core::ptr::read_unaligned(state.as_ptr() as *const u64) };
252 let state_hi = unsafe { core::ptr::read_unaligned(state.as_ptr().add(8) as *const u64) };
253 let round_u64 = round as u64;
254
255 // Spec formula for unpredictable address
256 let mixed =
257 state_lo ^ state_hi ^ round_u64.rotate_left(13) ^ round_u64.wrapping_mul(MIXING_CONSTANT);
258
259 // Use bitwise AND instead of modulo (NUM_BLOCKS is power of 2)
260 ((mixed as usize) & ADDRESS_MASK) * BLOCK_SIZE
261}
262
263impl Default for UniversalHash {
264 fn default() -> Self {
265 Self::new()
266 }
267}
268
269/// Convenience function for single-shot hashing
270///
271/// Creates a new hasher, computes the hash, and returns it.
272/// For multiple hashes, prefer creating a `UniversalHash` instance
273/// and reusing it to avoid repeated memory allocation.
274pub fn hash(input: &[u8]) -> [u8; 32] {
275 let mut hasher = UniversalHash::new();
276 hasher.hash(input)
277}