#![allow(clippy::cast_possible_truncation, clippy::indexing_slicing)]
#[cfg(target_arch = "wasm32")]
use core::arch::wasm32::*;
use super::kernels::{SIGMA, init_v, load_msg};
#[cfg(target_arch = "wasm32")]
#[inline(always)]
fn ror32(x: v128) -> v128 {
i8x16_shuffle::<4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11>(x, x)
}
#[cfg(target_arch = "wasm32")]
#[inline(always)]
fn ror24(x: v128) -> v128 {
v128_or(u64x2_shr(x, 24), u64x2_shl(x, 40))
}
#[cfg(target_arch = "wasm32")]
#[inline(always)]
fn ror16(x: v128) -> v128 {
i8x16_shuffle::<2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9>(x, x)
}
#[cfg(target_arch = "wasm32")]
#[inline(always)]
fn ror63(x: v128) -> v128 {
v128_or(u64x2_shr(x, 63), u64x2_shl(x, 1))
}
#[cfg(target_arch = "wasm32")]
#[inline(always)]
#[allow(clippy::too_many_arguments)]
fn g2(
a0: &mut v128,
a1: &mut v128,
b0: &mut v128,
b1: &mut v128,
c0: &mut v128,
c1: &mut v128,
d0: &mut v128,
d1: &mut v128,
mx0: v128,
mx1: v128,
my0: v128,
my1: v128,
) {
*a0 = i64x2_add(i64x2_add(*a0, *b0), mx0);
*a1 = i64x2_add(i64x2_add(*a1, *b1), mx1);
*d0 = ror32(v128_xor(*d0, *a0));
*d1 = ror32(v128_xor(*d1, *a1));
*c0 = i64x2_add(*c0, *d0);
*c1 = i64x2_add(*c1, *d1);
*b0 = ror24(v128_xor(*b0, *c0));
*b1 = ror24(v128_xor(*b1, *c1));
*a0 = i64x2_add(i64x2_add(*a0, *b0), my0);
*a1 = i64x2_add(i64x2_add(*a1, *b1), my1);
*d0 = ror16(v128_xor(*d0, *a0));
*d1 = ror16(v128_xor(*d1, *a1));
*c0 = i64x2_add(*c0, *d0);
*c1 = i64x2_add(*c1, *d1);
*b0 = ror63(v128_xor(*b0, *c0));
*b1 = ror63(v128_xor(*b1, *c1));
}
#[cfg(target_arch = "wasm32")]
#[inline(always)]
fn diagonalize(b0: &mut v128, b1: &mut v128, c0: &mut v128, c1: &mut v128, d0: &mut v128, d1: &mut v128) {
let tb0 = *b0;
let tb1 = *b1;
*b0 = i64x2_shuffle::<1, 2>(tb0, tb1); *b1 = i64x2_shuffle::<1, 2>(tb1, tb0);
core::mem::swap(c0, c1);
let td0 = *d0;
let td1 = *d1;
*d0 = i64x2_shuffle::<1, 2>(td1, td0); *d1 = i64x2_shuffle::<1, 2>(td0, td1); }
#[cfg(target_arch = "wasm32")]
#[inline(always)]
fn undiagonalize(b0: &mut v128, b1: &mut v128, c0: &mut v128, c1: &mut v128, d0: &mut v128, d1: &mut v128) {
let tb0 = *b0;
let tb1 = *b1;
*b0 = i64x2_shuffle::<1, 2>(tb1, tb0);
*b1 = i64x2_shuffle::<1, 2>(tb0, tb1);
core::mem::swap(c0, c1);
let td0 = *d0;
let td1 = *d1;
*d0 = i64x2_shuffle::<1, 2>(td0, td1);
*d1 = i64x2_shuffle::<1, 2>(td1, td0);
}
#[cfg(target_arch = "wasm32")]
#[inline(always)]
fn load_msg_pair(m: &[u64; 16], i0: u8, i1: u8) -> v128 {
u64x2(m[i0 as usize], m[i1 as usize])
}
#[cfg(target_arch = "wasm32")]
#[inline(always)]
unsafe fn vload_u64_pair(p: *const u64) -> v128 {
unsafe { v128_load(p as *const v128) }
}
#[cfg(target_arch = "wasm32")]
#[target_feature(enable = "simd128")]
pub(super) unsafe fn compress_simd128(h: &mut [u64; 8], block: &[u8; 128], t: u128, last: bool) {
let m = load_msg(block);
let v = init_v(h, t, last);
let mut a0 = unsafe { vload_u64_pair(v.as_ptr()) }; let mut a1 = unsafe { vload_u64_pair(v.as_ptr().add(2)) }; let mut b0 = unsafe { vload_u64_pair(v.as_ptr().add(4)) }; let mut b1 = unsafe { vload_u64_pair(v.as_ptr().add(6)) }; let mut c0 = unsafe { vload_u64_pair(v.as_ptr().add(8)) }; let mut c1 = unsafe { vload_u64_pair(v.as_ptr().add(10)) }; let mut d0 = unsafe { vload_u64_pair(v.as_ptr().add(12)) }; let mut d1 = unsafe { vload_u64_pair(v.as_ptr().add(14)) };
for round in 0..12u8 {
let s = &SIGMA[(round % 10) as usize];
let mx0 = load_msg_pair(&m, s[0], s[2]);
let mx1 = load_msg_pair(&m, s[4], s[6]);
let my0 = load_msg_pair(&m, s[1], s[3]);
let my1 = load_msg_pair(&m, s[5], s[7]);
g2(
&mut a0, &mut a1, &mut b0, &mut b1, &mut c0, &mut c1, &mut d0, &mut d1, mx0, mx1, my0, my1,
);
diagonalize(&mut b0, &mut b1, &mut c0, &mut c1, &mut d0, &mut d1);
let mx0 = load_msg_pair(&m, s[8], s[10]);
let mx1 = load_msg_pair(&m, s[12], s[14]);
let my0 = load_msg_pair(&m, s[9], s[11]);
let my1 = load_msg_pair(&m, s[13], s[15]);
g2(
&mut a0, &mut a1, &mut b0, &mut b1, &mut c0, &mut c1, &mut d0, &mut d1, mx0, mx1, my0, my1,
);
undiagonalize(&mut b0, &mut b1, &mut c0, &mut c1, &mut d0, &mut d1);
}
let h0 = unsafe { vload_u64_pair(h.as_ptr()) };
let h1 = unsafe { vload_u64_pair(h.as_ptr().add(2)) };
let h2 = unsafe { vload_u64_pair(h.as_ptr().add(4)) };
let h3 = unsafe { vload_u64_pair(h.as_ptr().add(6)) };
let r0 = v128_xor(h0, v128_xor(a0, c0));
let r1 = v128_xor(h1, v128_xor(a1, c1));
let r2 = v128_xor(h2, v128_xor(b0, d0));
let r3 = v128_xor(h3, v128_xor(b1, d1));
unsafe {
v128_store(h.as_mut_ptr() as *mut v128, r0);
v128_store(h.as_mut_ptr().add(2) as *mut v128, r1);
v128_store(h.as_mut_ptr().add(4) as *mut v128, r2);
v128_store(h.as_mut_ptr().add(6) as *mut v128, r3);
}
}