use super::stack::{PushValue, StackOp};
use std::sync::OnceLock;
const BLAKE3_IV: [u32; 8] = [
0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19,
];
const MSG_PERMUTATION: [usize; 16] = [2, 6, 3, 10, 7, 0, 4, 13, 1, 11, 12, 5, 9, 14, 15, 8];
const CHUNK_START: u32 = 1;
const CHUNK_END: u32 = 2;
const ROOT: u32 = 8;
fn u32_to_le(n: u32) -> Vec<u8> {
vec![
(n & 0xff) as u8,
((n >> 8) & 0xff) as u8,
((n >> 16) & 0xff) as u8,
((n >> 24) & 0xff) as u8,
]
}
fn u32_to_be(n: u32) -> Vec<u8> {
vec![
((n >> 24) & 0xff) as u8,
((n >> 16) & 0xff) as u8,
((n >> 8) & 0xff) as u8,
(n & 0xff) as u8,
]
}
fn compute_msg_schedule() -> [[usize; 16]; 7] {
let mut current: [usize; 16] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15];
let mut schedule = [[0usize; 16]; 7];
for round in 0..7 {
schedule[round] = current;
let mut next = [0usize; 16];
for i in 0..16 {
next[i] = current[MSG_PERMUTATION[i]];
}
current = next;
}
schedule
}
struct Emitter {
ops: Vec<StackOp>,
depth: i64,
alt_depth: i64,
}
impl Emitter {
fn new(initial_depth: i64) -> Self {
Emitter {
ops: Vec::new(),
depth: initial_depth,
alt_depth: 0,
}
}
fn e_raw(&mut self, sop: StackOp) {
self.ops.push(sop);
}
fn oc(&mut self, code: &str) {
self.ops.push(StackOp::Opcode(code.to_string()));
}
fn push_i(&mut self, v: i128) {
self.ops.push(StackOp::Push(PushValue::Int(v)));
self.depth += 1;
}
fn push_b(&mut self, v: Vec<u8>) {
self.ops.push(StackOp::Push(PushValue::Bytes(v)));
self.depth += 1;
}
fn dup(&mut self) {
self.ops.push(StackOp::Dup);
self.depth += 1;
}
fn drop(&mut self) {
self.ops.push(StackOp::Drop);
self.depth -= 1;
}
fn swap(&mut self) {
self.ops.push(StackOp::Swap);
}
fn over(&mut self) {
self.ops.push(StackOp::Over);
self.depth += 1;
}
fn rot(&mut self) {
self.ops.push(StackOp::Rot);
}
fn pick(&mut self, d: usize) {
if d == 0 {
self.dup();
return;
}
if d == 1 {
self.over();
return;
}
self.push_i(d as i128);
self.ops.push(StackOp::Pick { depth: d });
}
fn roll(&mut self, d: usize) {
if d == 0 {
return;
}
if d == 1 {
self.swap();
return;
}
if d == 2 {
self.rot();
return;
}
self.push_i(d as i128);
self.ops.push(StackOp::Roll { depth: d });
self.depth -= 1; }
fn to_alt(&mut self) {
self.oc("OP_TOALTSTACK");
self.depth -= 1;
self.alt_depth += 1;
}
fn from_alt(&mut self) {
self.oc("OP_FROMALTSTACK");
self.depth += 1;
self.alt_depth -= 1;
}
fn bin_op(&mut self, code: &str) {
self.oc(code);
self.depth -= 1;
}
fn uni_op(&mut self, code: &str) {
self.oc(code);
}
fn split(&mut self) {
self.oc("OP_SPLIT");
}
fn split4(&mut self) {
self.push_i(4);
self.split();
}
fn assert_depth(&self, expected: i64, msg: &str) {
assert_eq!(
self.depth, expected,
"BLAKE3 codegen: {}. Expected depth {}, got {}",
msg, expected, self.depth
);
}
fn reverse_bytes4(&mut self) {
self.push_i(1);
self.split();
self.push_i(1);
self.split();
self.push_i(1);
self.split();
self.swap();
self.bin_op("OP_CAT");
self.swap();
self.bin_op("OP_CAT");
self.swap();
self.bin_op("OP_CAT");
}
fn le2num(&mut self) {
self.push_b(vec![0x00]); self.bin_op("OP_CAT");
self.uni_op("OP_BIN2NUM");
}
fn num2le(&mut self) {
self.push_i(5);
self.bin_op("OP_NUM2BIN"); self.push_i(4);
self.split(); self.drop(); }
fn add32(&mut self) {
self.le2num();
self.swap();
self.le2num();
self.bin_op("OP_ADD");
self.num2le();
}
fn add_n(&mut self, n: usize) {
if n < 2 {
return;
}
self.le2num();
for _ in 1..n {
self.swap();
self.le2num();
self.bin_op("OP_ADD");
}
self.num2le();
}
fn rotr_be(&mut self, n: usize) {
self.dup(); self.push_i(n as i128);
self.bin_op("OP_RSHIFT"); self.swap(); self.push_i((32 - n) as i128);
self.bin_op("OP_LSHIFT"); self.bin_op("OP_OR"); }
fn rotr16_le(&mut self) {
self.push_i(2);
self.split(); self.swap(); self.bin_op("OP_CAT"); }
fn rotr8_le(&mut self) {
self.push_i(1);
self.split(); self.swap(); self.bin_op("OP_CAT"); }
fn rotr_le_general(&mut self, n: usize) {
self.reverse_bytes4(); self.rotr_be(n); self.reverse_bytes4(); }
fn be_words_to_le(&mut self, n: usize) {
for _ in 0..n {
self.reverse_bytes4();
self.to_alt();
}
for _ in 0..n {
self.from_alt();
}
}
}
struct StateTracker {
positions: [i32; 16],
}
impl StateTracker {
fn new() -> Self {
let mut positions = [0i32; 16];
for i in 0..16 {
positions[i] = (15 - i) as i32;
}
StateTracker { positions }
}
fn depth(&self, word_idx: usize) -> i32 {
self.positions[word_idx]
}
fn on_roll_to_top(&mut self, word_idx: usize) {
let d = self.positions[word_idx];
for j in 0..16 {
if j != word_idx && self.positions[j] >= 0 && self.positions[j] < d {
self.positions[j] += 1;
}
}
self.positions[word_idx] = 0;
}
}
fn emit_half_g(em: &mut Emitter, rot_d: usize, rot_b: usize) {
let d0 = em.depth;
em.pick(3);
em.to_alt();
em.roll(3); em.roll(4); em.add_n(3); em.assert_depth(d0 - 2, "halfG step1");
em.dup(); em.rot(); em.bin_op("OP_XOR"); if rot_d == 16 {
em.rotr16_le();
} else if rot_d == 8 {
em.rotr8_le();
} else {
em.rotr_le_general(rot_d);
}
em.assert_depth(d0 - 2, "halfG step2");
em.dup(); em.roll(3); em.add32(); em.assert_depth(d0 - 2, "halfG step3");
em.from_alt(); em.over(); em.bin_op("OP_XOR"); em.rotr_le_general(rot_b);
em.assert_depth(d0 - 1, "halfG step4");
em.swap(); em.rot(); em.assert_depth(d0 - 1, "halfG done");
}
fn emit_g(em: &mut Emitter) {
let d0 = em.depth;
em.to_alt();
emit_half_g(em, 16, 12);
em.assert_depth(d0 - 2, "G phase1");
em.from_alt(); em.assert_depth(d0 - 1, "G before phase2");
emit_half_g(em, 8, 7);
em.assert_depth(d0 - 2, "G done");
}
fn emit_g_call(
em: &mut Emitter,
tracker: &mut StateTracker,
ai: usize,
bi: usize,
ci: usize,
di: usize,
mx_orig_idx: usize,
my_orig_idx: usize,
) {
let d0 = em.depth;
for &idx in &[ai, bi, ci, di] {
let d = tracker.depth(idx) as usize;
em.roll(d);
tracker.on_roll_to_top(idx);
}
em.pick(16 + (15 - mx_orig_idx));
em.pick(16 + (15 - my_orig_idx) + 1); em.assert_depth(d0 + 2, "before G");
emit_g(em);
em.assert_depth(d0, "after G");
tracker.positions[ai] = 3;
tracker.positions[bi] = 2;
tracker.positions[ci] = 1;
tracker.positions[di] = 0;
}
fn generate_compress_ops() -> Vec<StackOp> {
let mut em = Emitter::new(2);
let msg_schedule = compute_msg_schedule();
for _ in 0..15 {
em.split4();
}
em.assert_depth(17, "after block unpack"); em.be_words_to_le(16);
em.assert_depth(17, "after block LE convert");
em.roll(16);
em.to_alt();
em.assert_depth(16, "after CV to alt");
em.from_alt();
em.assert_depth(17, "after CV from alt");
for _ in 0..7 {
em.split4();
}
em.assert_depth(24, "after cv unpack");
em.be_words_to_le(8);
em.assert_depth(24, "after cv LE convert");
for i in 0..4 {
em.push_b(u32_to_le(BLAKE3_IV[i]));
}
em.assert_depth(28, "after IV push");
em.push_b(u32_to_le(0));
em.push_b(u32_to_le(0));
em.push_b(u32_to_le(64));
em.push_b(u32_to_le(CHUNK_START | CHUNK_END | ROOT));
em.assert_depth(32, "after state init");
let mut tracker = StateTracker::new();
for round in 0..7 {
let s = &msg_schedule[round];
emit_g_call(&mut em, &mut tracker, 0, 4, 8, 12, s[0], s[1]);
emit_g_call(&mut em, &mut tracker, 1, 5, 9, 13, s[2], s[3]);
emit_g_call(&mut em, &mut tracker, 2, 6, 10, 14, s[4], s[5]);
emit_g_call(&mut em, &mut tracker, 3, 7, 11, 15, s[6], s[7]);
emit_g_call(&mut em, &mut tracker, 0, 5, 10, 15, s[8], s[9]);
emit_g_call(&mut em, &mut tracker, 1, 6, 11, 12, s[10], s[11]);
emit_g_call(&mut em, &mut tracker, 2, 7, 8, 13, s[12], s[13]);
emit_g_call(&mut em, &mut tracker, 3, 4, 9, 14, s[14], s[15]);
}
em.assert_depth(32, "after all rounds");
for i in (0..=15usize).rev() {
let d = tracker.depth(i);
em.roll(d as usize);
tracker.on_roll_to_top(i);
em.to_alt();
for j in 0..16 {
if j != i && tracker.positions[j] >= 0 {
tracker.positions[j] -= 1;
}
}
tracker.positions[i] = -1;
}
for _ in 0..16 {
em.from_alt();
}
em.assert_depth(32, "after canonical reorder");
for k in 0..8usize {
em.roll(8 - k); em.bin_op("OP_XOR"); em.to_alt(); }
em.assert_depth(16, "after XOR pairs");
for _ in 0..8 {
em.from_alt();
}
em.assert_depth(24, "after XOR results restored");
em.reverse_bytes4(); for _ in 1..8 {
em.swap(); em.reverse_bytes4(); em.swap(); em.bin_op("OP_CAT"); }
em.assert_depth(17, "after hash pack");
for _ in 0..16 {
em.swap();
em.drop();
}
em.assert_depth(1, "compress final");
em.ops
}
static COMPRESS_OPS: OnceLock<Vec<StackOp>> = OnceLock::new();
fn get_compress_ops() -> &'static Vec<StackOp> {
COMPRESS_OPS.get_or_init(generate_compress_ops)
}
pub fn emit_blake3_compress(emit: &mut dyn FnMut(StackOp)) {
for op in get_compress_ops() {
emit(op.clone());
}
}
pub fn emit_blake3_hash(emit: &mut dyn FnMut(StackOp)) {
let mut em = Emitter::new(1);
em.oc("OP_SIZE");
em.depth += 1; em.push_i(64);
em.swap();
em.bin_op("OP_SUB"); em.push_i(0);
em.swap();
em.bin_op("OP_NUM2BIN"); em.bin_op("OP_CAT");
let mut iv_bytes = Vec::with_capacity(32);
for i in 0..8 {
iv_bytes.extend_from_slice(&u32_to_be(BLAKE3_IV[i]));
}
em.push_b(iv_bytes);
em.swap();
let compress_ops = get_compress_ops();
for op in compress_ops {
em.e_raw(op.clone());
}
em.depth = 1;
em.assert_depth(1, "blake3Hash final");
for op in em.ops {
emit(op);
}
}