#[inline]
pub fn copy_bytes_simd(dst: &mut [u8], src: &[u8]) {
#[cfg(all(target_arch = "x86_64", feature = "production"))]
{
if src.len() >= 64 && is_avx2_available() {
unsafe {
copy_bytes_avx2(dst, src);
}
return;
}
}
dst[..src.len()].copy_from_slice(src);
}
#[cfg(all(target_arch = "x86_64", feature = "production"))]
#[target_feature(enable = "avx2")]
unsafe fn copy_bytes_avx2(dst: &mut [u8], src: &[u8]) {
use std::arch::x86_64::*;
let chunks = src.len() / 32;
let mut dst_ptr = dst.as_mut_ptr();
let mut src_ptr = src.as_ptr();
for _ in 0..chunks {
let data = _mm256_loadu_si256(src_ptr as *const __m256i);
_mm256_storeu_si256(dst_ptr as *mut __m256i, data);
dst_ptr = dst_ptr.add(32);
src_ptr = src_ptr.add(32);
}
let remainder = src.len() % 32;
if remainder > 0 {
let offset = chunks * 32;
dst[offset..offset + remainder].copy_from_slice(&src[offset..offset + remainder]);
}
}
#[cfg(all(target_arch = "x86_64", feature = "production"))]
fn is_avx2_available() -> bool {
std::arch::is_x86_feature_detected!("avx2")
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_copy_bytes_small() {
let src = [1, 2, 3, 4, 5];
let mut dst = [0u8; 10];
copy_bytes_simd(&mut dst, &src);
assert_eq!(&dst[..5], &src);
}
#[test]
fn test_copy_bytes_large() {
let src: Vec<u8> = (0..128).collect();
let mut dst = vec![0u8; 128];
copy_bytes_simd(&mut dst, &src);
assert_eq!(dst, src);
}
#[test]
fn test_copy_bytes_exact_32() {
let src: Vec<u8> = (0..32).collect();
let mut dst = vec![0u8; 32];
copy_bytes_simd(&mut dst, &src);
assert_eq!(dst, src);
}
}