use super::vst1q_u8;
use crate::{Block, ParBlocks};
use core::arch::aarch64::*;
#[target_feature(enable = "crypto")]
#[target_feature(enable = "neon")]
pub(super) unsafe fn decrypt<const N: usize>(expanded_keys: &[uint8x16_t; N], block: &mut Block) {
let rounds = N - 1;
assert!(rounds == 10 || rounds == 12 || rounds == 14);
let mut state = vld1q_u8(block.as_ptr());
for k in expanded_keys.iter().take(rounds - 1) {
state = vaesdq_u8(state, *k);
state = vaesimcq_u8(state);
}
state = vaesdq_u8(state, expanded_keys[rounds - 1]);
state = veorq_u8(state, expanded_keys[rounds]);
vst1q_u8(block.as_mut_ptr(), state);
}
#[target_feature(enable = "crypto")]
#[target_feature(enable = "neon")]
pub(super) unsafe fn decrypt8<const N: usize>(
expanded_keys: &[uint8x16_t; N],
blocks: &mut ParBlocks,
) {
let rounds = N - 1;
assert!(rounds == 10 || rounds == 12 || rounds == 14);
let mut state = [
vld1q_u8(blocks[0].as_ptr()),
vld1q_u8(blocks[1].as_ptr()),
vld1q_u8(blocks[2].as_ptr()),
vld1q_u8(blocks[3].as_ptr()),
vld1q_u8(blocks[4].as_ptr()),
vld1q_u8(blocks[5].as_ptr()),
vld1q_u8(blocks[6].as_ptr()),
vld1q_u8(blocks[7].as_ptr()),
];
for k in expanded_keys.iter().take(rounds - 1) {
for i in 0..8 {
state[i] = vaesdq_u8(state[i], *k);
state[i] = vaesimcq_u8(state[i]);
}
}
for i in 0..8 {
state[i] = vaesdq_u8(state[i], expanded_keys[rounds - 1]);
state[i] = veorq_u8(state[i], expanded_keys[rounds]);
vst1q_u8(blocks[i].as_mut_ptr(), state[i]);
}
}