use crate::macros::precondition_memory_range;
use crate::util::is_aligned_ptr_mut;
use mirai_annotations::debug_checked_precondition;
#[cfg(all(target_arch = "x86_64", target_feature = "ermsb"))]
pub unsafe fn asm_ermsb_zeroize(ptr: *mut u8, len: usize) {
precondition_memory_range!(ptr, len);
unsafe {
core::arch::asm!(
"rep stosb byte ptr es:[rdi], al",
inout("rcx") len => _,
inout("rdi") ptr => _,
in("al") 0u8,
options(nostack),
);
}
}
#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
pub unsafe fn x86_64_simd16_zeroize_align16_block16(mut ptr: *mut u8, len: usize) -> *mut u8 {
use core::arch::x86_64 as arch;
precondition_memory_range!(ptr, len);
debug_checked_precondition!(is_aligned_ptr_mut(ptr, 16));
let nblocks = (len - len % 16) / 16;
for _i in 0..nblocks {
unsafe {
core::arch::asm!(
"
/* write 16 zero bytes to ptr */
vmovdqa xmmword ptr [{0}], {1}
",
in(reg) ptr,
in(xmm_reg) arch::_mm_setzero_si128(),
options(nostack),
);
ptr = ptr.add(16);
}
}
ptr
}
#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
pub unsafe fn x86_64_simd16_unroll2_zeroize_align16_block16(
mut ptr: *mut u8,
len: usize,
) -> *mut u8 {
use core::arch::x86_64 as arch;
precondition_memory_range!(ptr, len);
debug_checked_precondition!(is_aligned_ptr_mut(ptr, 16));
let nblocks = (len - len % 16) / 16;
for _i in 0..nblocks / 2 {
unsafe {
core::arch::asm!(
"
/* write 16 zero bytes to ptr */
vmovdqa xmmword ptr [{0}], {1}
vmovdqa xmmword ptr [{0} + 16], {1}
",
in(reg) ptr,
in(xmm_reg) arch::_mm_setzero_si128(),
options(nostack),
);
ptr = ptr.add(32);
}
}
if nblocks % 2 == 1 {
unsafe {
core::arch::asm!(
"
/* write 16 zero bytes to ptr */
vmovdqa xmmword ptr [{0}], {1}
",
in(reg) ptr,
in(xmm_reg) arch::_mm_setzero_si128(),
options(nostack),
);
ptr = ptr.add(16);
}
}
ptr
}
#[cfg(all(target_arch = "x86_64", target_feature = "avx"))]
pub unsafe fn x86_64_simd32_zeroize_align32_block32(mut ptr: *mut u8, len: usize) -> *mut u8 {
use core::arch::x86_64 as arch;
precondition_memory_range!(ptr, len);
debug_checked_precondition!(is_aligned_ptr_mut(ptr, 32));
let nblocks = (len - len % 32) / 32;
for _i in 0..nblocks {
unsafe {
core::arch::asm!(
"
/* write 32 zero bytes to ptr */
vmovdqa ymmword ptr [{0}], {1}
",
in(reg) ptr,
in(ymm_reg) arch::_mm256_setzero_si256(),
options(nostack),
);
ptr = ptr.add(32);
}
}
ptr
}
#[cfg(all(target_arch = "x86_64", target_feature = "avx"))]
pub unsafe fn x86_64_simd32_unroll2_zeroize_align32_block32(
mut ptr: *mut u8,
len: usize,
) -> *mut u8 {
use core::arch::x86_64 as arch;
precondition_memory_range!(ptr, len);
debug_checked_precondition!(is_aligned_ptr_mut(ptr, 32));
let nblocks = (len - len % 32) / 32;
for _i in 0..(nblocks / 2) {
unsafe {
core::arch::asm!(
"
/* write 64 zero bytes to ptr */
vmovdqa ymmword ptr [{0}], {1}
vmovdqa ymmword ptr [{0} + 32], {1}
",
in(reg) ptr,
in(ymm_reg) arch::_mm256_setzero_si256(),
options(nostack),
);
ptr = ptr.add(64);
}
}
if nblocks % 2 == 1 {
unsafe {
core::arch::asm!(
"
/* write 32 zero bytes to ptr */
vmovdqa ymmword ptr [{0}], {1}
",
in(reg) ptr,
in(ymm_reg) arch::_mm256_setzero_si256(),
options(nostack),
);
ptr = ptr.add(32);
}
}
ptr
}
#[cfg(all(
target_arch = "x86_64",
target_feature = "avx512f",
feature = "nightly_stdsimd"
))]
pub unsafe fn x86_64_simd64_zeroize_align64_block64(mut ptr: *mut u8, len: usize) -> *mut u8 {
use core::arch::x86_64 as arch;
precondition_memory_range!(ptr, len);
debug_checked_precondition!(is_aligned_ptr_mut(ptr, 64));
let nblocks = (len - len % 64) / 64;
for _i in 0..nblocks {
unsafe {
core::arch::asm!(
"
/* write 64 zero bytes to ptr */
vmovdqa64 zmmword ptr [{0}], {1}
",
in(reg) ptr,
in(zmm_reg) arch::_mm512_setzero_si512(),
options(nostack),
);
ptr = ptr.add(64);
}
}
ptr
}