runar_compiler_rust/codegen/blake3.rs
1//! BLAKE3 compression codegen for Bitcoin Script.
2//!
3//! Port of packages/runar-compiler/src/passes/blake3-codegen.ts.
4//!
5//! emit_blake3_compress: [chainingValue(32 BE), block(64 BE)] -> [hash(32 BE)]
6//! emit_blake3_hash: [message(<=64 BE)] -> [hash(32 BE)]
7//!
8//! Architecture (same as sha256.rs):
9//! - All 32-bit words stored as 4-byte little-endian during computation.
10//! - LE additions via BIN2NUM/NUM2BIN (13 ops per add32).
11//! - Byte-aligned rotations (16, 8) via SPLIT/SWAP/CAT on LE (4 ops).
12//! - Non-byte-aligned rotations (12, 7) via LE->BE->rotrBE->BE->LE (31 ops).
13//! - BE<->LE conversion only at input unpack and output pack.
14//!
15//! Stack layout during rounds:
16//! [m0..m15, v0..v15] (all LE 4-byte values)
17//! v15 at TOS (depth 0), v0 at depth 15, m15 at depth 16, m0 at depth 31.
18
19use super::stack::{PushValue, StackOp};
20
21use std::sync::OnceLock;
22
23// =========================================================================
24// BLAKE3 constants
25// =========================================================================
26
27const BLAKE3_IV: [u32; 8] = [
28 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
29 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19,
30];
31
32const MSG_PERMUTATION: [usize; 16] = [2, 6, 3, 10, 7, 0, 4, 13, 1, 11, 12, 5, 9, 14, 15, 8];
33
34// Flags
35const CHUNK_START: u32 = 1;
36const CHUNK_END: u32 = 2;
37const ROOT: u32 = 8;
38
39// =========================================================================
40// Helper: encode u32 as 4-byte little-endian
41// =========================================================================
42
43fn u32_to_le(n: u32) -> Vec<u8> {
44 vec![
45 (n & 0xff) as u8,
46 ((n >> 8) & 0xff) as u8,
47 ((n >> 16) & 0xff) as u8,
48 ((n >> 24) & 0xff) as u8,
49 ]
50}
51
52fn u32_to_be(n: u32) -> Vec<u8> {
53 vec![
54 ((n >> 24) & 0xff) as u8,
55 ((n >> 16) & 0xff) as u8,
56 ((n >> 8) & 0xff) as u8,
57 (n & 0xff) as u8,
58 ]
59}
60
61// =========================================================================
62// Precompute message schedule for all 7 rounds
63// =========================================================================
64
65/// For each round, compute which original message word index is used at each
66/// position. Returns msg_schedule[round][position] = original msg word index.
67fn compute_msg_schedule() -> [[usize; 16]; 7] {
68 let mut current: [usize; 16] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15];
69 let mut schedule = [[0usize; 16]; 7];
70 for round in 0..7 {
71 schedule[round] = current;
72 let mut next = [0usize; 16];
73 for i in 0..16 {
74 next[i] = current[MSG_PERMUTATION[i]];
75 }
76 current = next;
77 }
78 schedule
79}
80
81// =========================================================================
82// Emitter with depth tracking
83// =========================================================================
84
85struct Emitter {
86 ops: Vec<StackOp>,
87 depth: i64,
88 alt_depth: i64,
89}
90
91impl Emitter {
92 fn new(initial_depth: i64) -> Self {
93 Emitter {
94 ops: Vec::new(),
95 depth: initial_depth,
96 alt_depth: 0,
97 }
98 }
99
100 fn e_raw(&mut self, sop: StackOp) {
101 self.ops.push(sop);
102 }
103
104 fn oc(&mut self, code: &str) {
105 self.ops.push(StackOp::Opcode(code.to_string()));
106 }
107
108 fn push_i(&mut self, v: i128) {
109 self.ops.push(StackOp::Push(PushValue::Int(v)));
110 self.depth += 1;
111 }
112
113 fn push_b(&mut self, v: Vec<u8>) {
114 self.ops.push(StackOp::Push(PushValue::Bytes(v)));
115 self.depth += 1;
116 }
117
118 fn dup(&mut self) {
119 self.ops.push(StackOp::Dup);
120 self.depth += 1;
121 }
122
123 fn drop(&mut self) {
124 self.ops.push(StackOp::Drop);
125 self.depth -= 1;
126 }
127
128 fn swap(&mut self) {
129 self.ops.push(StackOp::Swap);
130 }
131
132 fn over(&mut self) {
133 self.ops.push(StackOp::Over);
134 self.depth += 1;
135 }
136
137 fn rot(&mut self) {
138 self.ops.push(StackOp::Rot);
139 }
140
141 fn pick(&mut self, d: usize) {
142 if d == 0 {
143 self.dup();
144 return;
145 }
146 if d == 1 {
147 self.over();
148 return;
149 }
150 self.push_i(d as i128);
151 self.ops.push(StackOp::Pick { depth: d });
152 // push_i added 1, pick removes the depth literal but adds the picked value = net 0
153 }
154
155 fn roll(&mut self, d: usize) {
156 if d == 0 {
157 return;
158 }
159 if d == 1 {
160 self.swap();
161 return;
162 }
163 if d == 2 {
164 self.rot();
165 return;
166 }
167 self.push_i(d as i128);
168 self.ops.push(StackOp::Roll { depth: d });
169 self.depth -= 1; // push_i added 1, roll removes depth literal and item = net -1
170 }
171
172 fn to_alt(&mut self) {
173 self.oc("OP_TOALTSTACK");
174 self.depth -= 1;
175 self.alt_depth += 1;
176 }
177
178 fn from_alt(&mut self) {
179 self.oc("OP_FROMALTSTACK");
180 self.depth += 1;
181 self.alt_depth -= 1;
182 }
183
184 fn bin_op(&mut self, code: &str) {
185 self.oc(code);
186 self.depth -= 1;
187 }
188
189 fn uni_op(&mut self, code: &str) {
190 self.oc(code);
191 }
192
193 fn split(&mut self) {
194 self.oc("OP_SPLIT");
195 // splits: consumes 2 (value + position), produces 2 = net 0
196 }
197
198 fn split4(&mut self) {
199 self.push_i(4);
200 self.split();
201 }
202
203 fn assert_depth(&self, expected: i64, msg: &str) {
204 assert_eq!(
205 self.depth, expected,
206 "BLAKE3 codegen: {}. Expected depth {}, got {}",
207 msg, expected, self.depth
208 );
209 }
210
211 // --- Byte reversal (only for BE<->LE conversion at boundaries) ---
212
213 /// Reverse 4 bytes on TOS: [abcd] -> [dcba]. Net: 0. 12 ops.
214 fn reverse_bytes4(&mut self) {
215 self.push_i(1);
216 self.split();
217 self.push_i(1);
218 self.split();
219 self.push_i(1);
220 self.split();
221 self.swap();
222 self.bin_op("OP_CAT");
223 self.swap();
224 self.bin_op("OP_CAT");
225 self.swap();
226 self.bin_op("OP_CAT");
227 }
228
229 // --- LE <-> Numeric conversions ---
230
231 /// Convert 4-byte LE to unsigned script number. [le4] -> [num]. Net: 0. 3 ops.
232 fn le2num(&mut self) {
233 self.push_b(vec![0x00]); // unsigned padding
234 self.bin_op("OP_CAT");
235 self.uni_op("OP_BIN2NUM");
236 }
237
238 /// Convert script number to 4-byte LE (truncates to 32 bits). [num] -> [le4]. Net: 0. 5 ops.
239 fn num2le(&mut self) {
240 self.push_i(5);
241 self.bin_op("OP_NUM2BIN"); // 5-byte LE
242 self.push_i(4);
243 self.split(); // [4-byte LE, overflow+sign]
244 self.drop(); // discard overflow byte
245 }
246
247 // --- LE arithmetic ---
248
249 /// [a(LE), b(LE)] -> [(a+b mod 2^32)(LE)]. Net: -1. 13 ops.
250 fn add32(&mut self) {
251 self.le2num();
252 self.swap();
253 self.le2num();
254 self.bin_op("OP_ADD");
255 self.num2le();
256 }
257
258 /// Add N LE values. [v0..vN-1] (vN-1=TOS) -> [sum(LE)]. Net: -(N-1).
259 fn add_n(&mut self, n: usize) {
260 if n < 2 {
261 return;
262 }
263 self.le2num();
264 for _ in 1..n {
265 self.swap();
266 self.le2num();
267 self.bin_op("OP_ADD");
268 }
269 self.num2le();
270 }
271
272 // --- ROTR using OP_LSHIFT/OP_RSHIFT (native BE byte-array shifts) ---
273
274 /// ROTR(x, n) on BE 4-byte value. [x_BE] -> [rotated_BE]. Net: 0. 7 ops.
275 fn rotr_be(&mut self, n: usize) {
276 self.dup(); // [x, x]
277 self.push_i(n as i128);
278 self.bin_op("OP_RSHIFT"); // [x, x>>n]
279 self.swap(); // [x>>n, x]
280 self.push_i((32 - n) as i128);
281 self.bin_op("OP_LSHIFT"); // [x>>n, x<<(32-n)]
282 self.bin_op("OP_OR"); // [ROTR result]
283 }
284
285 // --- ROTR on LE values ---
286
287 /// ROTR(x, 16) on LE 4-byte value. Net: 0. 4 ops.
288 /// Swaps the two 16-bit halves: [b0,b1,b2,b3] -> [b2,b3,b0,b1].
289 fn rotr16_le(&mut self) {
290 self.push_i(2);
291 self.split(); // [lo2, hi2]
292 self.swap(); // [hi2, lo2]
293 self.bin_op("OP_CAT"); // [hi2||lo2]
294 }
295
296 /// ROTR(x, 8) on LE 4-byte value. Net: 0. 4 ops.
297 /// [b0,b1,b2,b3] -> [b1,b2,b3,b0]
298 fn rotr8_le(&mut self) {
299 self.push_i(1);
300 self.split(); // [b0, b1b2b3]
301 self.swap(); // [b1b2b3, b0]
302 self.bin_op("OP_CAT"); // [b1b2b3||b0]
303 }
304
305 /// ROTR(x, n) on LE 4-byte value (general, non-byte-aligned). Net: 0. 31 ops.
306 /// Converts LE->BE, applies rotr_be, converts back.
307 fn rotr_le_general(&mut self, n: usize) {
308 self.reverse_bytes4(); // LE -> BE (12 ops)
309 self.rotr_be(n); // rotate on BE (7 ops)
310 self.reverse_bytes4(); // BE -> LE (12 ops)
311 }
312
313 /// Convert N x BE words on TOS to LE, preserving stack order.
314 fn be_words_to_le(&mut self, n: usize) {
315 for _ in 0..n {
316 self.reverse_bytes4();
317 self.to_alt();
318 }
319 for _ in 0..n {
320 self.from_alt();
321 }
322 }
323}
324
325// =========================================================================
326// State word position tracker
327// =========================================================================
328
329/// Tracks the stack depth of each of the 16 state words.
330/// Depth 0 = TOS. Message words sit below the state area at fixed positions.
331struct StateTracker {
332 /// positions[i] = current depth of state word v[i] from TOS
333 positions: [i32; 16],
334}
335
336impl StateTracker {
337 fn new() -> Self {
338 let mut positions = [0i32; 16];
339 // Initial: v0 at depth 15 (deepest state word), v15 at depth 0 (TOS)
340 for i in 0..16 {
341 positions[i] = (15 - i) as i32;
342 }
343 StateTracker { positions }
344 }
345
346 fn depth(&self, word_idx: usize) -> i32 {
347 self.positions[word_idx]
348 }
349
350 /// Update after rolling a state word from its current depth to TOS.
351 fn on_roll_to_top(&mut self, word_idx: usize) {
352 let d = self.positions[word_idx];
353 for j in 0..16 {
354 if j != word_idx && self.positions[j] >= 0 && self.positions[j] < d {
355 self.positions[j] += 1;
356 }
357 }
358 self.positions[word_idx] = 0;
359 }
360}
361
362// =========================================================================
363// G function (quarter-round)
364// =========================================================================
365
366/// Emit one half of the G function.
367/// Stack entry: [a, b, c, d, m] (m on TOS) -- 5 items
368/// Stack exit: [a', b', c', d'] (d' on TOS) -- 4 items
369/// Net depth: -1
370///
371/// Operations:
372/// a' = a + b + m
373/// d' = (d ^ a') >>> rotD
374/// c' = c + d'
375/// b' = (original_b ^ c') >>> rotB
376fn emit_half_g(em: &mut Emitter, rot_d: usize, rot_b: usize) {
377 let d0 = em.depth;
378
379 // Save original b for step 4 (b is at depth 3)
380 em.pick(3);
381 em.to_alt();
382
383 // Step 1: a' = a + b + m
384 // Stack: [a, b, c, d, m] -- a=4, b=3, c=2, d=1, m=0
385 em.roll(3); // [a, c, d, m, b]
386 em.roll(4); // [c, d, m, b, a]
387 em.add_n(3); // [c, d, a']
388 em.assert_depth(d0 - 2, "halfG step1");
389
390 // Step 2: d' = (d ^ a') >>> rotD
391 // Stack: [c, d, a'] -- c=2, d=1, a'=0
392 em.dup(); // [c, d, a', a']
393 em.rot(); // [c, a', a', d]
394 em.bin_op("OP_XOR"); // [c, a', (d^a')]
395 if rot_d == 16 {
396 em.rotr16_le();
397 } else if rot_d == 8 {
398 em.rotr8_le();
399 } else {
400 em.rotr_le_general(rot_d);
401 }
402 em.assert_depth(d0 - 2, "halfG step2");
403
404 // Step 3: c' = c + d'
405 // Stack: [c, a', d']
406 em.dup(); // [c, a', d', d']
407 em.roll(3); // [a', d', d', c]
408 em.add32(); // [a', d', c']
409 em.assert_depth(d0 - 2, "halfG step3");
410
411 // Step 4: b' = (original_b ^ c') >>> rotB
412 // Stack: [a', d', c']
413 em.from_alt(); // [a', d', c', b]
414 em.over(); // [a', d', c', b, c']
415 em.bin_op("OP_XOR"); // [a', d', c', (b^c')]
416 em.rotr_le_general(rot_b);
417 // Stack: [a', d', c', b']
418 em.assert_depth(d0 - 1, "halfG step4");
419
420 // Rearrange: [a', d', c', b'] -> [a', b', c', d']
421 em.swap(); // [a', d', b', c']
422 em.rot(); // [a', b', c', d']
423 em.assert_depth(d0 - 1, "halfG done");
424}
425
426/// Emit the full G function (quarter-round).
427/// Stack entry: [a, b, c, d, mx, my] (my on TOS) -- 6 items
428/// Stack exit: [a', b', c', d'] (d' on TOS) -- 4 items
429/// Net depth: -2
430fn emit_g(em: &mut Emitter) {
431 let d0 = em.depth;
432
433 // Save my to alt for phase 2
434 em.to_alt(); // [a, b, c, d, mx]
435
436 // Phase 1: first half with mx, ROTR(16) and ROTR(12)
437 emit_half_g(em, 16, 12);
438 em.assert_depth(d0 - 2, "G phase1");
439
440 // Restore my for phase 2
441 em.from_alt(); // [a', b', c', d', my]
442 em.assert_depth(d0 - 1, "G before phase2");
443
444 // Phase 2: second half with my, ROTR(8) and ROTR(7)
445 emit_half_g(em, 8, 7);
446 em.assert_depth(d0 - 2, "G done");
447}
448
449// =========================================================================
450// G call with state management
451// =========================================================================
452
453/// Emit a single G call with state word roll management.
454///
455/// Rolls 4 state words (ai, bi, ci, di) to top, picks 2 message words,
456/// runs G, then updates tracker.
457fn emit_g_call(
458 em: &mut Emitter,
459 tracker: &mut StateTracker,
460 ai: usize,
461 bi: usize,
462 ci: usize,
463 di: usize,
464 mx_orig_idx: usize,
465 my_orig_idx: usize,
466) {
467 let d0 = em.depth;
468
469 // Roll 4 state words to top: a, b, c, d (d ends up as TOS)
470 for &idx in &[ai, bi, ci, di] {
471 let d = tracker.depth(idx) as usize;
472 em.roll(d);
473 tracker.on_roll_to_top(idx);
474 }
475
476 // Pick message words from below the 16 state word area
477 // m[i] is at depth: 16 (state words) + (15 - i)
478 em.pick(16 + (15 - mx_orig_idx));
479 em.pick(16 + (15 - my_orig_idx) + 1); // +1 for mx just pushed
480 em.assert_depth(d0 + 2, "before G");
481
482 // Run G: consumes 6 (a, b, c, d, mx, my), produces 4 (a', b', c', d')
483 emit_g(em);
484 em.assert_depth(d0, "after G");
485
486 // Update tracker: result words at depths 0-3
487 tracker.positions[ai] = 3;
488 tracker.positions[bi] = 2;
489 tracker.positions[ci] = 1;
490 tracker.positions[di] = 0;
491}
492
493// =========================================================================
494// Full compression ops generator
495// =========================================================================
496
497fn generate_compress_ops() -> Vec<StackOp> {
498 let mut em = Emitter::new(2);
499 let msg_schedule = compute_msg_schedule();
500
501 // ================================================================
502 // Phase 1: Unpack block into 16 LE message words
503 // ================================================================
504 // Stack: [chainingValue(32 BE), block(64 BE)]
505 // Split block into 16 x 4-byte BE words, convert to LE
506 for _ in 0..15 {
507 em.split4();
508 }
509 em.assert_depth(17, "after block unpack"); // 16 block words + 1 chainingValue
510 em.be_words_to_le(16);
511 em.assert_depth(17, "after block LE convert");
512 // Stack: [CV, m0(LE), m1(LE), ..., m15(LE)] -- m0 deepest of msg words, m15 TOS
513
514 // ================================================================
515 // Phase 2: Initialize 16-word state on top of message words
516 // ================================================================
517 // Move CV to alt (it's below the 16 msg words, at depth 16)
518 em.roll(16);
519 em.to_alt();
520 em.assert_depth(16, "after CV to alt");
521 // Stack: [m0, m1, ..., m15] Alt: [CV]
522
523 // Get CV back, split into 8 LE words, place on top of msg
524 em.from_alt();
525 em.assert_depth(17, "after CV from alt");
526 for _ in 0..7 {
527 em.split4();
528 }
529 em.assert_depth(24, "after cv unpack");
530 em.be_words_to_le(8);
531 em.assert_depth(24, "after cv LE convert");
532 // Stack: [m0..m15, cv0(LE)..cv7(LE)]
533
534 // v[0..7] = chaining value (already on stack)
535 // v[8..11] = IV[0..3]
536 for i in 0..4 {
537 em.push_b(u32_to_le(BLAKE3_IV[i]));
538 }
539 em.assert_depth(28, "after IV push");
540
541 // v[12] = counter_low = 0, v[13] = counter_high = 0
542 em.push_b(u32_to_le(0));
543 em.push_b(u32_to_le(0));
544 // v[14] = block_len = 64
545 em.push_b(u32_to_le(64));
546 // v[15] = flags = CHUNK_START | CHUNK_END | ROOT = 11
547 em.push_b(u32_to_le(CHUNK_START | CHUNK_END | ROOT));
548 em.assert_depth(32, "after state init");
549
550 // Stack: [m0..m15(bottom), v0..v15(top)] -- v15=TOS, m0=deepest
551
552 // ================================================================
553 // Phase 3: 7 rounds of G function calls
554 // ================================================================
555 let mut tracker = StateTracker::new();
556
557 for round in 0..7 {
558 let s = &msg_schedule[round];
559
560 // Column mixing
561 emit_g_call(&mut em, &mut tracker, 0, 4, 8, 12, s[0], s[1]);
562 emit_g_call(&mut em, &mut tracker, 1, 5, 9, 13, s[2], s[3]);
563 emit_g_call(&mut em, &mut tracker, 2, 6, 10, 14, s[4], s[5]);
564 emit_g_call(&mut em, &mut tracker, 3, 7, 11, 15, s[6], s[7]);
565
566 // Diagonal mixing
567 emit_g_call(&mut em, &mut tracker, 0, 5, 10, 15, s[8], s[9]);
568 emit_g_call(&mut em, &mut tracker, 1, 6, 11, 12, s[10], s[11]);
569 emit_g_call(&mut em, &mut tracker, 2, 7, 8, 13, s[12], s[13]);
570 emit_g_call(&mut em, &mut tracker, 3, 4, 9, 14, s[14], s[15]);
571 }
572
573 em.assert_depth(32, "after all rounds");
574
575 // ================================================================
576 // Phase 4: Output -- hash[i] = state[i] XOR state[i+8], for i=0..7
577 // ================================================================
578
579 // Canonical reorder via alt stack
580 for i in (0..=15usize).rev() {
581 let d = tracker.depth(i);
582 em.roll(d as usize);
583 tracker.on_roll_to_top(i);
584 em.to_alt();
585 for j in 0..16 {
586 if j != i && tracker.positions[j] >= 0 {
587 tracker.positions[j] -= 1;
588 }
589 }
590 tracker.positions[i] = -1;
591 }
592
593 // Pop to get canonical order: [v0(bottom)..v15(TOS)]
594 for _ in 0..16 {
595 em.from_alt();
596 }
597 em.assert_depth(32, "after canonical reorder");
598
599 // State: [m0..m15, v0(bottom)..v15(TOS)], canonical order.
600 // XOR pairs: h[7-k] = v[7-k] ^ v[15-k] for k=0..7
601 // Process top-down: v15^v7, v14^v6, ..., v8^v0. Send each result to alt.
602 for k in 0..8usize {
603 em.roll(8 - k); // bring v[7-k] to TOS (past v[15-k] and remaining)
604 em.bin_op("OP_XOR"); // h[7-k] = v[7-k] ^ v[15-k]
605 em.to_alt(); // result to alt; main shrinks by 2
606 }
607 em.assert_depth(16, "after XOR pairs");
608 // Alt (bottom->top): h7, h6, h5, h4, h3, h2, h1, h0. Main: [m0..m15].
609
610 // Pop results to main: h0 first (LIFO), then h1, ..., h7
611 for _ in 0..8 {
612 em.from_alt();
613 }
614 em.assert_depth(24, "after XOR results restored");
615 // Main: [m0..m15, h0, h1, ..., h7] h7=TOS
616
617 // Pack into 32-byte BE result: h0_BE || h1_BE || ... || h7_BE
618 em.reverse_bytes4(); // h7 -> h7_BE
619 for _ in 1..8 {
620 em.swap(); // bring h[7-i] (LE) to TOS
621 em.reverse_bytes4(); // -> BE
622 em.swap(); // [new_BE, accumulated]
623 em.bin_op("OP_CAT"); // new_BE || accumulated
624 }
625 em.assert_depth(17, "after hash pack");
626
627 // Drop 16 message words
628 for _ in 0..16 {
629 em.swap();
630 em.drop();
631 }
632 em.assert_depth(1, "compress final");
633
634 em.ops
635}
636
637// Cache the ops since they're identical every time
638static COMPRESS_OPS: OnceLock<Vec<StackOp>> = OnceLock::new();
639
640fn get_compress_ops() -> &'static Vec<StackOp> {
641 COMPRESS_OPS.get_or_init(generate_compress_ops)
642}
643
644// =========================================================================
645// Public entry points
646// =========================================================================
647
648/// Emit BLAKE3 single-block compression in Bitcoin Script.
649/// Stack on entry: [..., chainingValue(32 BE), block(64 BE)]
650/// Stack on exit: [..., hash(32 BE)]
651/// Net depth: -1
652pub fn emit_blake3_compress(emit: &mut dyn FnMut(StackOp)) {
653 for op in get_compress_ops() {
654 emit(op.clone());
655 }
656}
657
658/// Emit BLAKE3 hash for a message up to 64 bytes.
659/// Stack on entry: [..., message(<=64 BE)]
660/// Stack on exit: [..., hash(32 BE)]
661/// Net depth: 0
662///
663/// Applies zero-padding and uses IV as chaining value.
664pub fn emit_blake3_hash(emit: &mut dyn FnMut(StackOp)) {
665 let mut em = Emitter::new(1);
666
667 // Pad message to 64 bytes (BLAKE3 zero-pads, no length suffix)
668 em.oc("OP_SIZE");
669 em.depth += 1; // [message, len]
670 em.push_i(64);
671 em.swap();
672 em.bin_op("OP_SUB"); // [message, 64-len]
673 em.push_i(0);
674 em.swap();
675 em.bin_op("OP_NUM2BIN"); // [message, zeros]
676 em.bin_op("OP_CAT"); // [paddedMessage(64)]
677
678 // Push IV as 32-byte BE chaining value
679 let mut iv_bytes = Vec::with_capacity(32);
680 for i in 0..8 {
681 iv_bytes.extend_from_slice(&u32_to_be(BLAKE3_IV[i]));
682 }
683 em.push_b(iv_bytes);
684 em.swap(); // [IV(32 BE), paddedMessage(64 BE)]
685
686 // Splice compression ops
687 let compress_ops = get_compress_ops();
688 for op in compress_ops {
689 em.e_raw(op.clone());
690 }
691 em.depth = 1;
692
693 em.assert_depth(1, "blake3Hash final");
694
695 for op in em.ops {
696 emit(op);
697 }
698}