#[cfg(all(feature = "simd", feature = "nightly", target_feature = "avx512f"))]
mod imp {
use std::{
alloc::{alloc, dealloc, Layout},
arch::x86_64::{__m512i, _mm512_load_si512, _mm512_xor_si512},
ptr,
};
const AVX512_ALIGNMENT: usize = 64;
pub fn frame(key: &[u8], input: &mut [u8], mut offset: usize) {
unsafe {
let (prefix, aligned_data, suffix) = input.align_to_mut::<__m512i>();
super::fallback_frame(key, prefix, offset);
offset = (offset + prefix.len()) & 3;
let layout = Layout::from_size_align_unchecked(AVX512_ALIGNMENT, AVX512_ALIGNMENT);
let mem_ptr = alloc(layout);
ptr::copy_nonoverlapping(key.as_ptr().add(offset), mem_ptr, 4 - offset);
for j in (4 - offset..AVX512_ALIGNMENT - offset).step_by(4) {
ptr::copy_nonoverlapping(key.as_ptr(), mem_ptr.add(j), 4);
}
if offset != 0 {
ptr::copy_nonoverlapping(
key.as_ptr(),
mem_ptr.add(AVX512_ALIGNMENT - offset),
offset,
);
}
let mask = _mm512_load_si512(mem_ptr.cast());
for block in &mut *aligned_data {
*block = _mm512_xor_si512(*block, mask);
}
dealloc(mem_ptr, layout);
offset = (offset + aligned_data.len() * AVX512_ALIGNMENT) & 3;
super::fallback_frame(key, suffix, offset);
}
}
}
#[cfg(all(
feature = "simd",
not(all(feature = "nightly", target_feature = "avx512f")),
target_feature = "avx2"
))]
mod imp {
use std::{
alloc::{alloc, dealloc, Layout},
arch::x86_64::{__m256i, _mm256_load_si256, _mm256_xor_si256},
ptr,
};
const AVX2_ALIGNMENT: usize = 32;
pub fn frame(key: &[u8], input: &mut [u8], mut offset: usize) {
unsafe {
let (prefix, aligned_data, suffix) = input.align_to_mut::<__m256i>();
super::fallback_frame(key, prefix, offset);
offset = (offset + prefix.len()) & 3;
let layout = Layout::from_size_align_unchecked(AVX2_ALIGNMENT, AVX2_ALIGNMENT);
let mem_ptr = alloc(layout);
ptr::copy_nonoverlapping(key.as_ptr().add(offset), mem_ptr, 4 - offset);
for j in (4 - offset..AVX2_ALIGNMENT - offset).step_by(4) {
ptr::copy_nonoverlapping(key.as_ptr(), mem_ptr.add(j), 4);
}
if offset != 0 {
ptr::copy_nonoverlapping(
key.as_ptr(),
mem_ptr.add(AVX2_ALIGNMENT - offset),
offset,
);
}
let mask = _mm256_load_si256(mem_ptr.cast());
for block in &mut *aligned_data {
*block = _mm256_xor_si256(*block, mask);
}
dealloc(mem_ptr, layout);
offset = (offset + aligned_data.len() * AVX2_ALIGNMENT) & 3;
super::fallback_frame(key, suffix, offset);
}
}
}
#[cfg(all(
feature = "simd",
not(all(feature = "nightly", target_feature = "avx512f")),
not(target_feature = "avx2"),
target_feature = "sse2"
))]
mod imp {
use std::{
alloc::{alloc, dealloc, Layout},
arch::x86_64::{__m128i, _mm_load_si128, _mm_xor_si128},
ptr,
};
const SSE2_ALIGNMENT: usize = 16;
pub fn frame(key: &[u8], input: &mut [u8], mut offset: usize) {
unsafe {
let (prefix, aligned_data, suffix) = input.align_to_mut::<__m128i>();
super::fallback_frame(key, prefix, offset);
offset = (offset + prefix.len()) & 3;
let layout = Layout::from_size_align_unchecked(SSE2_ALIGNMENT, SSE2_ALIGNMENT);
let mem_ptr = alloc(layout);
ptr::copy_nonoverlapping(key.as_ptr().add(offset), mem_ptr, 4 - offset);
for j in (4 - offset..SSE2_ALIGNMENT - offset).step_by(4) {
ptr::copy_nonoverlapping(key.as_ptr(), mem_ptr.add(j), 4);
}
if offset != 0 {
ptr::copy_nonoverlapping(
key.as_ptr(),
mem_ptr.add(SSE2_ALIGNMENT - offset),
offset,
);
}
let mask = _mm_load_si128(mem_ptr.cast());
for block in &mut *aligned_data {
*block = _mm_xor_si128(*block, mask);
}
dealloc(mem_ptr, layout);
offset = (offset + aligned_data.len() * SSE2_ALIGNMENT) & 3;
super::fallback_frame(key, suffix, offset);
}
}
}
#[cfg(all(feature = "simd", target_feature = "neon"))]
mod imp {
#[cfg(target_arch = "aarch64")]
use std::arch::aarch64::{uint8x16_t, veorq_u8, vld1q_u8};
#[cfg(target_arch = "arm")]
use std::arch::arm::{uint8x16_t, veorq_u8, vld1q_u8};
use std::{
alloc::{alloc, dealloc, Layout},
ptr,
};
const NEON_ALIGNMENT: usize = 16;
pub fn frame(key: &[u8], input: &mut [u8], mut offset: usize) {
unsafe {
let (prefix, aligned_data, suffix) = input.align_to_mut::<uint8x16_t>();
super::fallback_frame(key, prefix, offset);
offset = (offset + prefix.len()) & 3;
let layout = Layout::from_size_align_unchecked(NEON_ALIGNMENT, NEON_ALIGNMENT);
let mem_ptr = alloc(layout);
ptr::copy_nonoverlapping(key.as_ptr().add(offset), mem_ptr, 4 - offset);
for j in (4 - offset..NEON_ALIGNMENT - offset).step_by(4) {
ptr::copy_nonoverlapping(key.as_ptr(), mem_ptr.add(j), 4);
}
if offset != 0 {
ptr::copy_nonoverlapping(
key.as_ptr(),
mem_ptr.add(NEON_ALIGNMENT - offset),
offset,
);
}
let mask = vld1q_u8(mem_ptr);
for block in &mut *aligned_data {
*block = veorq_u8(*block, mask);
}
dealloc(mem_ptr, layout);
offset = (offset + aligned_data.len() * NEON_ALIGNMENT) & 3;
super::fallback_frame(key, suffix, offset);
}
}
}
#[cfg(any(
not(feature = "simd"),
all(
feature = "simd",
any(target_arch = "aarch64", target_arch = "arm"),
not(target_feature = "neon")
),
not(any(target_arch = "x86_64", target_arch = "aarch64", target_arch = "arm"))
))]
mod imp {
#[inline]
pub fn frame(key: &[u8], input: &mut [u8], offset: usize) {
super::fallback_frame(key, input, offset);
}
}
pub fn fallback_frame(key: &[u8], input: &mut [u8], offset: usize) {
for (index, byte) in input.iter_mut().enumerate() {
*byte ^= key[(index + offset) & 3];
}
}
pub use imp::frame;
#[cfg(all(test, feature = "client", feature = "fastrand"))]
#[test]
fn test_mask() {
use crate::rand::get_mask;
let data: Vec<u8> = std::iter::repeat_with(|| fastrand::u8(..))
.take(1024)
.collect();
let mut data = data[2..998].to_vec();
let mut data_clone = data.clone();
let key = get_mask();
frame(&key, &mut data, 0);
fallback_frame(&key, &mut data_clone, 0);
assert_eq!(&data, &data_clone);
}