use crate::mem as basic;
use crate::utils::*;
use crate::RangeError;
#[cfg(target_arch = "x86")]
use core::arch::x86 as mmx;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64 as mmx;
use mmx::__m128i;
use mmx::_mm_load_si128;
use mmx::_mm_loadu_si128;
use mmx::_mm_store_si128;
use mmx::_mm_storeu_si128;
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
use super::cpuid;
use core::sync::atomic::AtomicPtr;
use core::sync::atomic::Ordering;
type _FuncType = fn(&mut [u8], &[u8]) -> Result<(), RangeError>;
const _FUNC: _FuncType = _fnptr_setup_func;
static _FUNC_PTR_ATOM: AtomicPtr<_FuncType> = AtomicPtr::new(_FUNC as *mut _FuncType);
#[inline(never)]
fn _fnptr_setup_func(dst: &mut [u8], src: &[u8]) -> Result<(), RangeError> {
#[cfg(target_arch = "x86_64")]
let func = if cpuid::has_avx2() {
_memcpy_avx2
} else {
_memcpy_sse2
};
#[cfg(target_arch = "x86")]
let func = if cpuid::has_avx2() {
_memcpy_avx2
} else if cpuid::has_sse2() {
_memcpy_sse2
} else {
_memcpy_basic
};
_FUNC_PTR_ATOM.store(func as *mut _FuncType, Ordering::Relaxed);
unsafe { func(dst, src) }
}
#[inline(always)]
pub(crate) fn _memcpy_impl(dst: &mut [u8], src: &[u8]) -> Result<(), RangeError> {
let func_u = _FUNC_PTR_ATOM.load(Ordering::Relaxed);
#[allow(clippy::crosspointer_transmute)]
let func: _FuncType = unsafe { core::mem::transmute(func_u) };
func(dst, src)
}
unsafe fn _memcpy_basic(dst: &mut [u8], src: &[u8]) -> Result<(), RangeError> {
basic::_memcpy_impl(dst, src)
}
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
#[target_feature(enable = "sse2")]
#[allow(clippy::missing_safety_doc)]
pub unsafe fn _memcpy_sse2(dst: &mut [u8], src: &[u8]) -> Result<(), RangeError> {
_memcpy_sse2_impl(dst, src)
}
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
#[target_feature(enable = "avx2")]
#[allow(clippy::missing_safety_doc)]
pub unsafe fn _memcpy_avx2(dst: &mut [u8], src: &[u8]) -> Result<(), RangeError> {
_memcpy_sse2_impl(dst, src)
}
#[inline(always)]
fn _memcpy_sse2_impl(dst: &mut [u8], src: &[u8]) -> Result<(), RangeError> {
if src.is_empty() {
return Ok(());
}
let dst_len = dst.len();
let src_len = src.len();
if dst_len < src_len {
return Err(RangeError);
}
let mut a_ptr = dst.as_mut_ptr();
let mut b_ptr = src.as_ptr();
let end_ptr = unsafe { a_ptr.add(src_len) };
b_ptr.prefetch_read_data();
if src_len >= 16 {
{
if !a_ptr.is_aligned_u128() {
#[cfg(not(feature = "test_alignment_check"))]
{
unsafe { _cpy_b16_uu_x1(a_ptr, b_ptr) };
let remaining_align = 0x10_usize - ((a_ptr as usize) & 0x0F_usize);
a_ptr = unsafe { a_ptr.add(remaining_align) };
b_ptr = unsafe { b_ptr.add(remaining_align) };
}
#[cfg(feature = "test_alignment_check")]
{
let (ap, bp) = basic::_cpy_to_aligned_u128(a_ptr, b_ptr);
a_ptr = ap;
b_ptr = bp;
}
}
}
if b_ptr.is_aligned_u128() {
let (ap, bp) =
_unroll_loop_dual_action_with_prefetch::<8, 16, _>(a_ptr, b_ptr, end_ptr, |ap, bp| {
unsafe { _cpy_b16_aa_x1(ap, bp) };
});
a_ptr = ap;
b_ptr = bp;
let (ap, bp) = _unroll_loop_dual_action::<1, 16, _>(a_ptr, b_ptr, end_ptr, |ap, bp| {
unsafe { _cpy_b16_aa_x1(ap, bp) };
});
a_ptr = ap;
b_ptr = bp;
} else {
let (ap, bp) =
_unroll_loop_dual_action_with_prefetch::<8, 16, _>(a_ptr, b_ptr, end_ptr, |ap, bp| {
unsafe { _cpy_b16_au_x1(ap, bp) };
});
a_ptr = ap;
b_ptr = bp;
let (ap, bp) = _unroll_loop_dual_action::<1, 16, _>(a_ptr, b_ptr, end_ptr, |ap, bp| {
unsafe { _cpy_b16_au_x1(ap, bp) };
});
a_ptr = ap;
b_ptr = bp;
}
}
basic::_memcpy_remaining_15_bytes_impl(a_ptr, b_ptr, end_ptr);
Ok(())
}
#[inline(always)]
unsafe fn _cpy_b16_uu_x1(a_ptr: *mut u8, b_ptr: *const u8) {
let mm_0_b = unsafe { _mm_loadu_si128(b_ptr as *const __m128i) };
unsafe { _mm_storeu_si128(a_ptr as *mut __m128i, mm_0_b) };
}
#[inline(always)]
unsafe fn _cpy_b16_au_x1(a_ptr: *mut u8, b_ptr: *const u8) {
let mm_0_b = unsafe { _mm_loadu_si128(b_ptr as *const __m128i) };
unsafe { _mm_store_si128(a_ptr as *mut __m128i, mm_0_b) };
}
#[inline(always)]
unsafe fn _cpy_b16_aa_x1(a_ptr: *mut u8, b_ptr: *const u8) {
let mm_0_b = unsafe { _mm_load_si128(b_ptr as *const __m128i) };
unsafe { _mm_store_si128(a_ptr as *mut __m128i, mm_0_b) };
}
#[cfg(test)]
mod disasm {
use super::*;
#[test]
fn do_procs() {
let mut a = b" ".to_vec();
let b = b"abcdefg".to_vec();
let a = a.as_mut_slice();
let b = b.as_slice();
assert_eq!(do_proc_basic(a, b), Ok(()));
assert_eq!(do_proc_sse2(a, b), Ok(()));
assert_eq!(do_proc_avx2(a, b), Ok(()));
}
#[inline(never)]
fn do_proc_basic(a: &mut [u8], b: &[u8]) -> Result<(), RangeError> {
unsafe { _memcpy_basic(a, b) }
}
#[inline(never)]
fn do_proc_sse2(a: &mut [u8], b: &[u8]) -> Result<(), RangeError> {
unsafe { _memcpy_sse2(a, b) }
}
#[inline(never)]
fn do_proc_avx2(a: &mut [u8], b: &[u8]) -> Result<(), RangeError> {
unsafe { _memcpy_avx2(a, b) }
}
}