#![allow(unsafe_code)]
#![allow(clippy::inline_always)]
#[cfg(target_arch = "aarch64")]
use core::arch::aarch64::*;
#[cfg(target_arch = "aarch64")]
macro_rules! keccakf_sha3_neon_round {
($a0:ident, $a1:ident, $a2:ident, $a3:ident, $a4:ident,
$a5:ident, $a6:ident, $a7:ident, $a8:ident, $a9:ident,
$a10:ident, $a11:ident, $a12:ident, $a13:ident, $a14:ident,
$a15:ident, $a16:ident, $a17:ident, $a18:ident, $a19:ident,
$a20:ident, $a21:ident, $a22:ident, $a23:ident, $a24:ident,
$rc:expr) => {{
let c0 = veor3q_u64(veor3q_u64($a0, $a5, $a10), $a15, $a20);
let c1 = veor3q_u64(veor3q_u64($a1, $a6, $a11), $a16, $a21);
let c2 = veor3q_u64(veor3q_u64($a2, $a7, $a12), $a17, $a22);
let c3 = veor3q_u64(veor3q_u64($a3, $a8, $a13), $a18, $a23);
let c4 = veor3q_u64(veor3q_u64($a4, $a9, $a14), $a19, $a24);
let d0 = vrax1q_u64(c4, c1);
let d1 = vrax1q_u64(c0, c2);
let d2 = vrax1q_u64(c1, c3);
let d3 = vrax1q_u64(c2, c4);
let d4 = vrax1q_u64(c3, c0);
let b0 = vxarq_u64::<0>($a0, d0);
let b16 = vxarq_u64::<28>($a5, d0);
let b7 = vxarq_u64::<61>($a10, d0);
let b23 = vxarq_u64::<23>($a15, d0);
let b14 = vxarq_u64::<46>($a20, d0);
let b10 = vxarq_u64::<63>($a1, d1);
let b1 = vxarq_u64::<20>($a6, d1);
let b17 = vxarq_u64::<54>($a11, d1);
let b8 = vxarq_u64::<19>($a16, d1);
let b24 = vxarq_u64::<62>($a21, d1);
let b20 = vxarq_u64::<2>($a2, d2);
let b11 = vxarq_u64::<58>($a7, d2);
let b2 = vxarq_u64::<21>($a12, d2);
let b18 = vxarq_u64::<49>($a17, d2);
let b9 = vxarq_u64::<3>($a22, d2);
let b5 = vxarq_u64::<36>($a3, d3);
let b21 = vxarq_u64::<9>($a8, d3);
let b12 = vxarq_u64::<39>($a13, d3);
let b3 = vxarq_u64::<43>($a18, d3);
let b19 = vxarq_u64::<8>($a23, d3);
let b15 = vxarq_u64::<37>($a4, d4);
let b6 = vxarq_u64::<44>($a9, d4);
let b22 = vxarq_u64::<25>($a14, d4);
let b13 = vxarq_u64::<56>($a19, d4);
let b4 = vxarq_u64::<50>($a24, d4);
$a0 = vbcaxq_u64(b0, b2, b1);
$a1 = vbcaxq_u64(b1, b3, b2);
$a2 = vbcaxq_u64(b2, b4, b3);
$a3 = vbcaxq_u64(b3, b0, b4);
$a4 = vbcaxq_u64(b4, b1, b0);
$a5 = vbcaxq_u64(b5, b7, b6);
$a6 = vbcaxq_u64(b6, b8, b7);
$a7 = vbcaxq_u64(b7, b9, b8);
$a8 = vbcaxq_u64(b8, b5, b9);
$a9 = vbcaxq_u64(b9, b6, b5);
$a10 = vbcaxq_u64(b10, b12, b11);
$a11 = vbcaxq_u64(b11, b13, b12);
$a12 = vbcaxq_u64(b12, b14, b13);
$a13 = vbcaxq_u64(b13, b10, b14);
$a14 = vbcaxq_u64(b14, b11, b10);
$a15 = vbcaxq_u64(b15, b17, b16);
$a16 = vbcaxq_u64(b16, b18, b17);
$a17 = vbcaxq_u64(b17, b19, b18);
$a18 = vbcaxq_u64(b18, b15, b19);
$a19 = vbcaxq_u64(b19, b16, b15);
$a20 = vbcaxq_u64(b20, b22, b21);
$a21 = vbcaxq_u64(b21, b23, b22);
$a22 = vbcaxq_u64(b22, b24, b23);
$a23 = vbcaxq_u64(b23, b20, b24);
$a24 = vbcaxq_u64(b24, b21, b20);
$a0 = veorq_u64($a0, vdupq_n_u64($rc));
}};
}
#[cfg(target_arch = "aarch64")]
#[target_feature(enable = "sha3")]
unsafe fn keccakf_sha3_single_impl(state: &mut [u64; 25]) {
let z = vcreate_u64(0);
let mut a0 = vcombine_u64(vcreate_u64(state[0]), z);
let mut a1 = vcombine_u64(vcreate_u64(state[1]), z);
let mut a2 = vcombine_u64(vcreate_u64(state[2]), z);
let mut a3 = vcombine_u64(vcreate_u64(state[3]), z);
let mut a4 = vcombine_u64(vcreate_u64(state[4]), z);
let mut a5 = vcombine_u64(vcreate_u64(state[5]), z);
let mut a6 = vcombine_u64(vcreate_u64(state[6]), z);
let mut a7 = vcombine_u64(vcreate_u64(state[7]), z);
let mut a8 = vcombine_u64(vcreate_u64(state[8]), z);
let mut a9 = vcombine_u64(vcreate_u64(state[9]), z);
let mut a10 = vcombine_u64(vcreate_u64(state[10]), z);
let mut a11 = vcombine_u64(vcreate_u64(state[11]), z);
let mut a12 = vcombine_u64(vcreate_u64(state[12]), z);
let mut a13 = vcombine_u64(vcreate_u64(state[13]), z);
let mut a14 = vcombine_u64(vcreate_u64(state[14]), z);
let mut a15 = vcombine_u64(vcreate_u64(state[15]), z);
let mut a16 = vcombine_u64(vcreate_u64(state[16]), z);
let mut a17 = vcombine_u64(vcreate_u64(state[17]), z);
let mut a18 = vcombine_u64(vcreate_u64(state[18]), z);
let mut a19 = vcombine_u64(vcreate_u64(state[19]), z);
let mut a20 = vcombine_u64(vcreate_u64(state[20]), z);
let mut a21 = vcombine_u64(vcreate_u64(state[21]), z);
let mut a22 = vcombine_u64(vcreate_u64(state[22]), z);
let mut a23 = vcombine_u64(vcreate_u64(state[23]), z);
let mut a24 = vcombine_u64(vcreate_u64(state[24]), z);
for &rc in &super::RC {
keccakf_sha3_neon_round!(
a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20, a21, a22, a23,
a24, rc
);
}
state[0] = vgetq_lane_u64(a0, 0);
state[1] = vgetq_lane_u64(a1, 0);
state[2] = vgetq_lane_u64(a2, 0);
state[3] = vgetq_lane_u64(a3, 0);
state[4] = vgetq_lane_u64(a4, 0);
state[5] = vgetq_lane_u64(a5, 0);
state[6] = vgetq_lane_u64(a6, 0);
state[7] = vgetq_lane_u64(a7, 0);
state[8] = vgetq_lane_u64(a8, 0);
state[9] = vgetq_lane_u64(a9, 0);
state[10] = vgetq_lane_u64(a10, 0);
state[11] = vgetq_lane_u64(a11, 0);
state[12] = vgetq_lane_u64(a12, 0);
state[13] = vgetq_lane_u64(a13, 0);
state[14] = vgetq_lane_u64(a14, 0);
state[15] = vgetq_lane_u64(a15, 0);
state[16] = vgetq_lane_u64(a16, 0);
state[17] = vgetq_lane_u64(a17, 0);
state[18] = vgetq_lane_u64(a18, 0);
state[19] = vgetq_lane_u64(a19, 0);
state[20] = vgetq_lane_u64(a20, 0);
state[21] = vgetq_lane_u64(a21, 0);
state[22] = vgetq_lane_u64(a22, 0);
state[23] = vgetq_lane_u64(a23, 0);
state[24] = vgetq_lane_u64(a24, 0);
}
#[cfg(target_arch = "aarch64")]
#[inline]
pub(crate) fn keccakf_aarch64_sha3_single(state: &mut [u64; 25]) {
unsafe { keccakf_sha3_single_impl(state) }
}
#[cfg(target_arch = "aarch64")]
#[target_feature(enable = "sha3")]
unsafe fn keccakf_sha3_absorb_single_impl<const RATE: usize>(state: &mut [u64; 25], block: &[u8; RATE]) {
debug_assert_eq!(RATE % 8, 0);
let lanes = RATE / 8;
let ptr = block.as_ptr();
macro_rules! lane {
($i:expr) => {{
if $i < lanes {
state[$i] ^ u64::from_le(unsafe { core::ptr::read_unaligned(ptr.add($i * 8).cast::<u64>()) })
} else {
state[$i]
}
}};
}
let z = vcreate_u64(0);
let mut a0 = vcombine_u64(vcreate_u64(lane!(0)), z);
let mut a1 = vcombine_u64(vcreate_u64(lane!(1)), z);
let mut a2 = vcombine_u64(vcreate_u64(lane!(2)), z);
let mut a3 = vcombine_u64(vcreate_u64(lane!(3)), z);
let mut a4 = vcombine_u64(vcreate_u64(lane!(4)), z);
let mut a5 = vcombine_u64(vcreate_u64(lane!(5)), z);
let mut a6 = vcombine_u64(vcreate_u64(lane!(6)), z);
let mut a7 = vcombine_u64(vcreate_u64(lane!(7)), z);
let mut a8 = vcombine_u64(vcreate_u64(lane!(8)), z);
let mut a9 = vcombine_u64(vcreate_u64(lane!(9)), z);
let mut a10 = vcombine_u64(vcreate_u64(lane!(10)), z);
let mut a11 = vcombine_u64(vcreate_u64(lane!(11)), z);
let mut a12 = vcombine_u64(vcreate_u64(lane!(12)), z);
let mut a13 = vcombine_u64(vcreate_u64(lane!(13)), z);
let mut a14 = vcombine_u64(vcreate_u64(lane!(14)), z);
let mut a15 = vcombine_u64(vcreate_u64(lane!(15)), z);
let mut a16 = vcombine_u64(vcreate_u64(lane!(16)), z);
let mut a17 = vcombine_u64(vcreate_u64(lane!(17)), z);
let mut a18 = vcombine_u64(vcreate_u64(lane!(18)), z);
let mut a19 = vcombine_u64(vcreate_u64(lane!(19)), z);
let mut a20 = vcombine_u64(vcreate_u64(lane!(20)), z);
let mut a21 = vcombine_u64(vcreate_u64(lane!(21)), z);
let mut a22 = vcombine_u64(vcreate_u64(lane!(22)), z);
let mut a23 = vcombine_u64(vcreate_u64(lane!(23)), z);
let mut a24 = vcombine_u64(vcreate_u64(lane!(24)), z);
for &rc in &super::RC {
keccakf_sha3_neon_round!(
a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20, a21, a22, a23,
a24, rc
);
}
state[0] = vgetq_lane_u64(a0, 0);
state[1] = vgetq_lane_u64(a1, 0);
state[2] = vgetq_lane_u64(a2, 0);
state[3] = vgetq_lane_u64(a3, 0);
state[4] = vgetq_lane_u64(a4, 0);
state[5] = vgetq_lane_u64(a5, 0);
state[6] = vgetq_lane_u64(a6, 0);
state[7] = vgetq_lane_u64(a7, 0);
state[8] = vgetq_lane_u64(a8, 0);
state[9] = vgetq_lane_u64(a9, 0);
state[10] = vgetq_lane_u64(a10, 0);
state[11] = vgetq_lane_u64(a11, 0);
state[12] = vgetq_lane_u64(a12, 0);
state[13] = vgetq_lane_u64(a13, 0);
state[14] = vgetq_lane_u64(a14, 0);
state[15] = vgetq_lane_u64(a15, 0);
state[16] = vgetq_lane_u64(a16, 0);
state[17] = vgetq_lane_u64(a17, 0);
state[18] = vgetq_lane_u64(a18, 0);
state[19] = vgetq_lane_u64(a19, 0);
state[20] = vgetq_lane_u64(a20, 0);
state[21] = vgetq_lane_u64(a21, 0);
state[22] = vgetq_lane_u64(a22, 0);
state[23] = vgetq_lane_u64(a23, 0);
state[24] = vgetq_lane_u64(a24, 0);
}
#[cfg(target_arch = "aarch64")]
#[inline]
pub(crate) fn keccakf_aarch64_sha3_absorb_single<const RATE: usize>(state: &mut [u64; 25], block: &[u8; RATE]) {
unsafe { keccakf_sha3_absorb_single_impl::<RATE>(state, block) }
}
#[cfg(target_arch = "aarch64")]
#[inline(always)]
unsafe fn combine_lanes(a: u64, b: u64) -> uint64x2_t {
unsafe { vcombine_u64(vcreate_u64(a), vcreate_u64(b)) }
}
#[cfg(target_arch = "aarch64")]
#[target_feature(enable = "sha3")]
unsafe fn keccakf_sha3_x2_impl(state_a: &mut [u64; 25], state_b: &mut [u64; 25]) {
unsafe {
let mut a0 = combine_lanes(state_a[0], state_b[0]);
let mut a1 = combine_lanes(state_a[1], state_b[1]);
let mut a2 = combine_lanes(state_a[2], state_b[2]);
let mut a3 = combine_lanes(state_a[3], state_b[3]);
let mut a4 = combine_lanes(state_a[4], state_b[4]);
let mut a5 = combine_lanes(state_a[5], state_b[5]);
let mut a6 = combine_lanes(state_a[6], state_b[6]);
let mut a7 = combine_lanes(state_a[7], state_b[7]);
let mut a8 = combine_lanes(state_a[8], state_b[8]);
let mut a9 = combine_lanes(state_a[9], state_b[9]);
let mut a10 = combine_lanes(state_a[10], state_b[10]);
let mut a11 = combine_lanes(state_a[11], state_b[11]);
let mut a12 = combine_lanes(state_a[12], state_b[12]);
let mut a13 = combine_lanes(state_a[13], state_b[13]);
let mut a14 = combine_lanes(state_a[14], state_b[14]);
let mut a15 = combine_lanes(state_a[15], state_b[15]);
let mut a16 = combine_lanes(state_a[16], state_b[16]);
let mut a17 = combine_lanes(state_a[17], state_b[17]);
let mut a18 = combine_lanes(state_a[18], state_b[18]);
let mut a19 = combine_lanes(state_a[19], state_b[19]);
let mut a20 = combine_lanes(state_a[20], state_b[20]);
let mut a21 = combine_lanes(state_a[21], state_b[21]);
let mut a22 = combine_lanes(state_a[22], state_b[22]);
let mut a23 = combine_lanes(state_a[23], state_b[23]);
let mut a24 = combine_lanes(state_a[24], state_b[24]);
for &rc in &super::RC {
keccakf_sha3_neon_round!(
a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20, a21, a22, a23,
a24, rc
);
}
state_a[0] = vgetq_lane_u64(a0, 0);
state_b[0] = vgetq_lane_u64(a0, 1);
state_a[1] = vgetq_lane_u64(a1, 0);
state_b[1] = vgetq_lane_u64(a1, 1);
state_a[2] = vgetq_lane_u64(a2, 0);
state_b[2] = vgetq_lane_u64(a2, 1);
state_a[3] = vgetq_lane_u64(a3, 0);
state_b[3] = vgetq_lane_u64(a3, 1);
state_a[4] = vgetq_lane_u64(a4, 0);
state_b[4] = vgetq_lane_u64(a4, 1);
state_a[5] = vgetq_lane_u64(a5, 0);
state_b[5] = vgetq_lane_u64(a5, 1);
state_a[6] = vgetq_lane_u64(a6, 0);
state_b[6] = vgetq_lane_u64(a6, 1);
state_a[7] = vgetq_lane_u64(a7, 0);
state_b[7] = vgetq_lane_u64(a7, 1);
state_a[8] = vgetq_lane_u64(a8, 0);
state_b[8] = vgetq_lane_u64(a8, 1);
state_a[9] = vgetq_lane_u64(a9, 0);
state_b[9] = vgetq_lane_u64(a9, 1);
state_a[10] = vgetq_lane_u64(a10, 0);
state_b[10] = vgetq_lane_u64(a10, 1);
state_a[11] = vgetq_lane_u64(a11, 0);
state_b[11] = vgetq_lane_u64(a11, 1);
state_a[12] = vgetq_lane_u64(a12, 0);
state_b[12] = vgetq_lane_u64(a12, 1);
state_a[13] = vgetq_lane_u64(a13, 0);
state_b[13] = vgetq_lane_u64(a13, 1);
state_a[14] = vgetq_lane_u64(a14, 0);
state_b[14] = vgetq_lane_u64(a14, 1);
state_a[15] = vgetq_lane_u64(a15, 0);
state_b[15] = vgetq_lane_u64(a15, 1);
state_a[16] = vgetq_lane_u64(a16, 0);
state_b[16] = vgetq_lane_u64(a16, 1);
state_a[17] = vgetq_lane_u64(a17, 0);
state_b[17] = vgetq_lane_u64(a17, 1);
state_a[18] = vgetq_lane_u64(a18, 0);
state_b[18] = vgetq_lane_u64(a18, 1);
state_a[19] = vgetq_lane_u64(a19, 0);
state_b[19] = vgetq_lane_u64(a19, 1);
state_a[20] = vgetq_lane_u64(a20, 0);
state_b[20] = vgetq_lane_u64(a20, 1);
state_a[21] = vgetq_lane_u64(a21, 0);
state_b[21] = vgetq_lane_u64(a21, 1);
state_a[22] = vgetq_lane_u64(a22, 0);
state_b[22] = vgetq_lane_u64(a22, 1);
state_a[23] = vgetq_lane_u64(a23, 0);
state_b[23] = vgetq_lane_u64(a23, 1);
state_a[24] = vgetq_lane_u64(a24, 0);
state_b[24] = vgetq_lane_u64(a24, 1);
} }
#[cfg(target_arch = "aarch64")]
#[inline]
pub(crate) fn keccakf_aarch64_sha3_x2(state_a: &mut [u64; 25], state_b: &mut [u64; 25]) {
unsafe { keccakf_sha3_x2_impl(state_a, state_b) }
}