#![allow(unsafe_code)]
#![allow(unused_unsafe)]
use core::arch::aarch64::*;
#[inline]
#[target_feature(enable = "aes")]
unsafe fn rk(rk: &[u8], i: usize) -> uint8x16_t {
unsafe { vld1q_u8(rk.as_ptr().add(i * 16)) }
}
#[inline]
#[target_feature(enable = "aes")]
unsafe fn enc_core(ks: &[uint8x16_t], nr: usize, mut s: uint8x16_t) -> uint8x16_t {
unsafe {
for &k in ks.iter().take(nr - 1) {
s = vaesmcq_u8(vaeseq_u8(s, k));
}
s = vaeseq_u8(s, ks[nr - 1]);
veorq_u8(s, ks[nr])
}
}
#[inline]
#[target_feature(enable = "aes")]
unsafe fn dec_core(ks: &[uint8x16_t], nr: usize, mut s: uint8x16_t) -> uint8x16_t {
unsafe {
s = vaesdq_u8(s, ks[nr]);
for round in (1..nr).rev() {
s = vaesimcq_u8(s);
s = vaesdq_u8(s, vaesimcq_u8(ks[round]));
}
veorq_u8(s, ks[0])
}
}
#[inline]
#[target_feature(enable = "aes")]
unsafe fn load_schedule(round_keys: &[u8], nr: usize) -> [uint8x16_t; 15] {
unsafe {
let mut ks = [vdupq_n_u8(0); 15];
for (i, k) in ks.iter_mut().enumerate().take(nr + 1) {
*k = rk(round_keys, i);
}
ks
}
}
#[target_feature(enable = "aes")]
pub(super) unsafe fn aes_round(state: [u8; 16], rk: [u8; 16]) -> [u8; 16] {
unsafe {
let s = vld1q_u8(state.as_ptr());
let k = vld1q_u8(rk.as_ptr());
let r = veorq_u8(vaesmcq_u8(vaeseq_u8(s, vdupq_n_u8(0))), k);
let mut out = [0u8; 16];
vst1q_u8(out.as_mut_ptr(), r);
out
}
}
#[target_feature(enable = "aes")]
pub(super) unsafe fn encrypt_block(round_keys: &[u8], nr: usize, block: &mut [u8; 16]) {
unsafe {
let ks = load_schedule(round_keys, nr);
let s = enc_core(&ks, nr, vld1q_u8(block.as_ptr()));
vst1q_u8(block.as_mut_ptr(), s);
}
}
#[target_feature(enable = "aes")]
pub(super) unsafe fn decrypt_block(round_keys: &[u8], nr: usize, block: &mut [u8; 16]) {
unsafe {
let ks = load_schedule(round_keys, nr);
let s = dec_core(&ks, nr, vld1q_u8(block.as_ptr()));
vst1q_u8(block.as_mut_ptr(), s);
}
}
#[target_feature(enable = "aes")]
pub(super) unsafe fn encrypt_blocks(round_keys: &[u8], nr: usize, blocks: &mut [u8]) {
unsafe {
let ks = load_schedule(round_keys, nr);
let mut wide = blocks.chunks_exact_mut(16 * 4);
for c in &mut wide {
let mut b = [vdupq_n_u8(0); 4];
for (j, bj) in b.iter_mut().enumerate() {
*bj = enc_core(&ks, nr, vld1q_u8(c.as_ptr().add(j * 16)));
}
for (j, &bj) in b.iter().enumerate() {
vst1q_u8(c.as_mut_ptr().add(j * 16), bj);
}
}
for block in wide.into_remainder().chunks_exact_mut(16) {
let s = enc_core(&ks, nr, vld1q_u8(block.as_ptr()));
vst1q_u8(block.as_mut_ptr(), s);
}
}
}
#[target_feature(enable = "aes")]
pub(super) unsafe fn decrypt_blocks(round_keys: &[u8], nr: usize, blocks: &mut [u8]) {
unsafe {
let ks = load_schedule(round_keys, nr);
let mut wide = blocks.chunks_exact_mut(16 * 4);
for c in &mut wide {
let mut b = [vdupq_n_u8(0); 4];
for (j, bj) in b.iter_mut().enumerate() {
*bj = dec_core(&ks, nr, vld1q_u8(c.as_ptr().add(j * 16)));
}
for (j, &bj) in b.iter().enumerate() {
vst1q_u8(c.as_mut_ptr().add(j * 16), bj);
}
}
for block in wide.into_remainder().chunks_exact_mut(16) {
let s = dec_core(&ks, nr, vld1q_u8(block.as_ptr()));
vst1q_u8(block.as_mut_ptr(), s);
}
}
}