#![allow(unsafe_code)]
#![allow(clippy::cast_possible_truncation, clippy::indexing_slicing)]
use super::kernels::{SIGMA, init_v, load_msg};
#[inline(always)]
fn ror32(x: [u64; 2]) -> [u64; 2] {
[x[0].rotate_right(32), x[1].rotate_right(32)]
}
#[inline(always)]
fn ror24(x: [u64; 2]) -> [u64; 2] {
[x[0].rotate_right(24), x[1].rotate_right(24)]
}
#[inline(always)]
fn ror16(x: [u64; 2]) -> [u64; 2] {
[x[0].rotate_right(16), x[1].rotate_right(16)]
}
#[inline(always)]
fn ror63(x: [u64; 2]) -> [u64; 2] {
[x[0].rotate_right(63), x[1].rotate_right(63)]
}
#[inline(always)]
fn vadd(a: [u64; 2], b: [u64; 2]) -> [u64; 2] {
[a[0].wrapping_add(b[0]), a[1].wrapping_add(b[1])]
}
#[inline(always)]
fn vxor(a: [u64; 2], b: [u64; 2]) -> [u64; 2] {
[a[0] ^ b[0], a[1] ^ b[1]]
}
#[inline(always)]
#[allow(clippy::too_many_arguments)]
fn g2(
a0: &mut [u64; 2],
a1: &mut [u64; 2],
b0: &mut [u64; 2],
b1: &mut [u64; 2],
c0: &mut [u64; 2],
c1: &mut [u64; 2],
d0: &mut [u64; 2],
d1: &mut [u64; 2],
mx0: [u64; 2],
mx1: [u64; 2],
my0: [u64; 2],
my1: [u64; 2],
) {
*a0 = vadd(vadd(*a0, *b0), mx0);
*a1 = vadd(vadd(*a1, *b1), mx1);
*d0 = ror32(vxor(*d0, *a0));
*d1 = ror32(vxor(*d1, *a1));
*c0 = vadd(*c0, *d0);
*c1 = vadd(*c1, *d1);
*b0 = ror24(vxor(*b0, *c0));
*b1 = ror24(vxor(*b1, *c1));
*a0 = vadd(vadd(*a0, *b0), my0);
*a1 = vadd(vadd(*a1, *b1), my1);
*d0 = ror16(vxor(*d0, *a0));
*d1 = ror16(vxor(*d1, *a1));
*c0 = vadd(*c0, *d0);
*c1 = vadd(*c1, *d1);
*b0 = ror63(vxor(*b0, *c0));
*b1 = ror63(vxor(*b1, *c1));
}
#[inline(always)]
fn diagonalize(
b0: &mut [u64; 2],
b1: &mut [u64; 2],
c0: &mut [u64; 2],
c1: &mut [u64; 2],
d0: &mut [u64; 2],
d1: &mut [u64; 2],
) {
let tb0 = *b0;
let tb1 = *b1;
*b0 = [tb0[1], tb1[0]];
*b1 = [tb1[1], tb0[0]];
core::mem::swap(c0, c1);
let td0 = *d0;
let td1 = *d1;
*d0 = [td1[1], td0[0]];
*d1 = [td0[1], td1[0]];
}
#[inline(always)]
fn undiagonalize(
b0: &mut [u64; 2],
b1: &mut [u64; 2],
c0: &mut [u64; 2],
c1: &mut [u64; 2],
d0: &mut [u64; 2],
d1: &mut [u64; 2],
) {
let tb0 = *b0;
let tb1 = *b1;
*b0 = [tb1[1], tb0[0]];
*b1 = [tb0[1], tb1[0]];
core::mem::swap(c0, c1);
let td0 = *d0;
let td1 = *d1;
*d0 = [td0[1], td1[0]];
*d1 = [td1[1], td0[0]];
}
#[target_feature(enable = "v")]
pub(super) unsafe fn compress_rvv(h: &mut [u64; 8], block: &[u8; 128], t: u128, last: bool) {
let m = load_msg(block);
let v = init_v(h, t, last);
let mut a0 = [v[0], v[1]];
let mut a1 = [v[2], v[3]];
let mut b0 = [v[4], v[5]];
let mut b1 = [v[6], v[7]];
let mut c0 = [v[8], v[9]];
let mut c1 = [v[10], v[11]];
let mut d0 = [v[12], v[13]];
let mut d1 = [v[14], v[15]];
for round in 0..12u8 {
let s = &SIGMA[(round % 10) as usize];
let mx0 = [m[s[0] as usize], m[s[2] as usize]];
let mx1 = [m[s[4] as usize], m[s[6] as usize]];
let my0 = [m[s[1] as usize], m[s[3] as usize]];
let my1 = [m[s[5] as usize], m[s[7] as usize]];
g2(
&mut a0, &mut a1, &mut b0, &mut b1, &mut c0, &mut c1, &mut d0, &mut d1, mx0, mx1, my0, my1,
);
diagonalize(&mut b0, &mut b1, &mut c0, &mut c1, &mut d0, &mut d1);
let mx0 = [m[s[8] as usize], m[s[10] as usize]];
let mx1 = [m[s[12] as usize], m[s[14] as usize]];
let my0 = [m[s[9] as usize], m[s[11] as usize]];
let my1 = [m[s[13] as usize], m[s[15] as usize]];
g2(
&mut a0, &mut a1, &mut b0, &mut b1, &mut c0, &mut c1, &mut d0, &mut d1, mx0, mx1, my0, my1,
);
undiagonalize(&mut b0, &mut b1, &mut c0, &mut c1, &mut d0, &mut d1);
}
h[0] ^= a0[0] ^ c0[0];
h[1] ^= a0[1] ^ c0[1];
h[2] ^= a1[0] ^ c1[0];
h[3] ^= a1[1] ^ c1[1];
h[4] ^= b0[0] ^ d0[0];
h[5] ^= b0[1] ^ d0[1];
h[6] ^= b1[0] ^ d1[0];
h[7] ^= b1[1] ^ d1[1];
}