use crate::error::{Result, ZiporaError};
use crate::system::cpu_features::{CpuFeatures, get_cpu_features};
const SMALL_THRESHOLD: usize = 256; const LARGE_THRESHOLD: usize = 1024; const CACHE_LINE_SIZE: usize = 64;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum SimdCopyTier {
Avx512,
Avx2,
Sse2,
Neon,
Scalar,
}
#[derive(Debug, Clone)]
pub struct SimdCopy {
tier: SimdCopyTier,
cpu_features: &'static CpuFeatures,
}
impl SimdCopy {
pub fn new() -> Self {
let cpu_features = get_cpu_features();
let tier = Self::select_optimal_tier(cpu_features);
Self {
tier,
cpu_features,
}
}
fn select_optimal_tier(features: &CpuFeatures) -> SimdCopyTier {
#[cfg(target_arch = "x86_64")]
{
if features.has_avx512f && features.has_avx512vl && features.has_avx512bw {
return SimdCopyTier::Avx512;
}
if features.has_avx2 {
return SimdCopyTier::Avx2;
}
if features.has_sse41 {
return SimdCopyTier::Sse2;
}
}
#[cfg(target_arch = "aarch64")]
{
return SimdCopyTier::Neon;
}
SimdCopyTier::Scalar
}
pub fn tier(&self) -> SimdCopyTier {
self.tier
}
}
impl Default for SimdCopy {
fn default() -> Self {
Self::new()
}
}
static GLOBAL_SIMD_COPY: std::sync::OnceLock<SimdCopy> = std::sync::OnceLock::new();
fn get_global_simd_copy() -> &'static SimdCopy {
GLOBAL_SIMD_COPY.get_or_init(|| SimdCopy::new())
}
pub fn copy_large_simd(dst: &mut [u8], src: &[u8]) -> Result<()> {
if src.len() != dst.len() {
return Err(ZiporaError::invalid_data(
format!("Source and destination lengths don't match: {} vs {}", src.len(), dst.len())
));
}
if src.is_empty() {
return Ok(());
}
if buffers_overlap(src, dst) {
return Err(ZiporaError::invalid_data(
"Source and destination buffers must not overlap".to_string()
));
}
let simd = get_global_simd_copy();
unsafe {
simd.copy_large_internal(dst.as_mut_ptr(), src.as_ptr(), src.len());
}
Ok(())
}
pub fn copy_small_simd(dst: &mut [u8], src: &[u8]) -> Result<()> {
if src.len() != dst.len() {
return Err(ZiporaError::invalid_data(
format!("Source and destination lengths don't match: {} vs {}", src.len(), dst.len())
));
}
if src.is_empty() {
return Ok(());
}
if src.len() > SMALL_THRESHOLD {
return Err(ZiporaError::invalid_data(
format!("Buffer size {} exceeds small threshold {}", src.len(), SMALL_THRESHOLD)
));
}
if buffers_overlap(src, dst) {
return Err(ZiporaError::invalid_data(
"Source and destination buffers must not overlap".to_string()
));
}
let simd = get_global_simd_copy();
unsafe {
simd.copy_small_internal(dst.as_mut_ptr(), src.as_ptr(), src.len());
}
Ok(())
}
pub fn copy_aligned_simd(dst: &mut [u8], src: &[u8]) -> Result<()> {
if src.len() != dst.len() {
return Err(ZiporaError::invalid_data(
format!("Source and destination lengths don't match: {} vs {}", src.len(), dst.len())
));
}
let src_aligned = (src.as_ptr() as usize) % CACHE_LINE_SIZE == 0;
let dst_aligned = (dst.as_mut_ptr() as usize) % CACHE_LINE_SIZE == 0;
if !src_aligned || !dst_aligned {
return Err(ZiporaError::invalid_data(
"Source and destination must be 64-byte aligned for aligned copy".to_string()
));
}
if src.is_empty() {
return Ok(());
}
let simd = get_global_simd_copy();
unsafe {
simd.copy_aligned_internal(dst.as_mut_ptr(), src.as_ptr(), src.len());
}
Ok(())
}
#[inline]
fn buffers_overlap(a: &[u8], b: &[u8]) -> bool {
let a_start = a.as_ptr() as usize;
let a_end = a_start + a.len();
let b_start = b.as_ptr() as usize;
let b_end = b_start + b.len();
a_start < b_end && b_start < a_end
}
impl SimdCopy {
#[inline]
unsafe fn copy_large_internal(&self, dst: *mut u8, src: *const u8, len: usize) {
match self.tier {
SimdCopyTier::Avx512 => {
unsafe { self.avx512_copy_large(dst, src, len); }
}
SimdCopyTier::Avx2 => {
unsafe { self.avx2_copy_large(dst, src, len); }
}
SimdCopyTier::Sse2 => {
unsafe { self.sse2_copy_large(dst, src, len); }
}
SimdCopyTier::Neon => {
unsafe { self.neon_copy_large(dst, src, len); }
}
SimdCopyTier::Scalar => {
unsafe { self.scalar_copy(dst, src, len); }
}
}
}
#[inline]
unsafe fn copy_small_internal(&self, dst: *mut u8, src: *const u8, len: usize) {
match self.tier {
SimdCopyTier::Avx512 => {
unsafe { self.avx512_copy_small(dst, src, len); }
}
SimdCopyTier::Avx2 => {
unsafe { self.avx2_copy_small(dst, src, len); }
}
SimdCopyTier::Sse2 => {
unsafe { self.sse2_copy_small(dst, src, len); }
}
SimdCopyTier::Neon => {
unsafe { self.neon_copy_small(dst, src, len); }
}
SimdCopyTier::Scalar => {
unsafe { self.scalar_copy(dst, src, len); }
}
}
}
#[inline]
unsafe fn copy_aligned_internal(&self, dst: *mut u8, src: *const u8, len: usize) {
match self.tier {
SimdCopyTier::Avx512 => {
unsafe { self.avx512_copy_aligned(dst, src, len); }
}
SimdCopyTier::Avx2 => {
unsafe { self.avx2_copy_aligned(dst, src, len); }
}
SimdCopyTier::Sse2 => {
unsafe { self.sse2_copy_aligned(dst, src, len); }
}
SimdCopyTier::Neon => {
unsafe { self.neon_copy_aligned(dst, src, len); }
}
SimdCopyTier::Scalar => {
unsafe { self.scalar_copy(dst, src, len); }
}
}
}
}
#[cfg(target_arch = "x86_64")]
impl SimdCopy {
#[target_feature(enable = "avx512f,avx512vl,avx512bw")]
unsafe fn avx512_copy_large(&self, mut dst: *mut u8, mut src: *const u8, mut len: usize) {
use std::arch::x86_64::*;
while len >= 64 {
unsafe {
let data = _mm512_loadu_si512(src as *const __m512i);
_mm512_storeu_si512(dst as *mut __m512i, data);
src = src.add(64);
dst = dst.add(64);
}
len -= 64;
}
if len > 0 {
if len >= 32 {
unsafe {
let data = _mm256_loadu_si256(src as *const __m256i);
_mm256_storeu_si256(dst as *mut __m256i, data);
}
let mut offset = 32;
while offset < len.saturating_sub(32) {
unsafe {
let data = _mm256_loadu_si256(src.add(offset) as *const __m256i);
_mm256_storeu_si256(dst.add(offset) as *mut __m256i, data);
}
offset += 32;
}
if len > 32 {
let offset = len - 32;
unsafe {
let tail = _mm256_loadu_si256(src.add(offset) as *const __m256i);
_mm256_storeu_si256(dst.add(offset) as *mut __m256i, tail);
}
}
} else if len >= 16 {
unsafe {
let data = _mm_loadu_si128(src as *const __m128i);
_mm_storeu_si128(dst as *mut __m128i, data);
}
let mut offset = 16;
while offset < len.saturating_sub(16) {
unsafe {
let data = _mm_loadu_si128(src.add(offset) as *const __m128i);
_mm_storeu_si128(dst.add(offset) as *mut __m128i, data);
}
offset += 16;
}
if len > 16 {
let offset = len - 16;
unsafe {
let tail = _mm_loadu_si128(src.add(offset) as *const __m128i);
_mm_storeu_si128(dst.add(offset) as *mut __m128i, tail);
}
}
} else {
unsafe { self.scalar_copy(dst, src, len); }
}
}
}
#[target_feature(enable = "avx512f,avx512vl,avx512bw")]
unsafe fn avx512_copy_small(&self, dst: *mut u8, src: *const u8, len: usize) {
use std::arch::x86_64::*;
if len >= 64 {
unsafe {
let data = _mm512_loadu_si512(src as *const __m512i);
_mm512_storeu_si512(dst as *mut __m512i, data);
}
let mut offset = 64;
while offset < len.saturating_sub(64) {
unsafe {
let data = _mm512_loadu_si512(src.add(offset) as *const __m512i);
_mm512_storeu_si512(dst.add(offset) as *mut __m512i, data);
}
offset += 64;
}
if len > 64 {
let offset = len - 64;
unsafe {
let tail = _mm512_loadu_si512(src.add(offset) as *const __m512i);
_mm512_storeu_si512(dst.add(offset) as *mut __m512i, tail);
}
}
} else if len >= 32 {
unsafe {
let data = _mm256_loadu_si256(src as *const __m256i);
_mm256_storeu_si256(dst as *mut __m256i, data);
}
let mut offset = 32;
while offset < len.saturating_sub(32) {
unsafe {
let data = _mm256_loadu_si256(src.add(offset) as *const __m256i);
_mm256_storeu_si256(dst.add(offset) as *mut __m256i, data);
}
offset += 32;
}
if len > 32 {
let offset = len - 32;
unsafe {
let tail = _mm256_loadu_si256(src.add(offset) as *const __m256i);
_mm256_storeu_si256(dst.add(offset) as *mut __m256i, tail);
}
}
} else if len >= 16 {
unsafe {
let data = _mm_loadu_si128(src as *const __m128i);
_mm_storeu_si128(dst as *mut __m128i, data);
}
let mut offset = 16;
while offset < len.saturating_sub(16) {
unsafe {
let data = _mm_loadu_si128(src.add(offset) as *const __m128i);
_mm_storeu_si128(dst.add(offset) as *mut __m128i, data);
}
offset += 16;
}
if len > 16 {
let offset = len - 16;
unsafe {
let tail = _mm_loadu_si128(src.add(offset) as *const __m128i);
_mm_storeu_si128(dst.add(offset) as *mut __m128i, tail);
}
}
} else {
unsafe { self.scalar_copy(dst, src, len); }
}
}
#[target_feature(enable = "avx512f,avx512vl,avx512bw")]
unsafe fn avx512_copy_aligned(&self, mut dst: *mut u8, mut src: *const u8, mut len: usize) {
use std::arch::x86_64::*;
while len >= 64 {
unsafe {
let data = _mm512_load_si512(src as *const __m512i);
_mm512_store_si512(dst as *mut __m512i, data);
src = src.add(64);
dst = dst.add(64);
}
len -= 64;
}
if len > 0 {
unsafe { self.scalar_copy(dst, src, len); }
}
}
}
#[cfg(not(target_arch = "x86_64"))]
impl SimdCopy {
#[inline]
unsafe fn avx512_copy_large(&self, dst: *mut u8, src: *const u8, len: usize) {
unsafe { self.scalar_copy(dst, src, len); }
}
#[inline]
unsafe fn avx512_copy_small(&self, dst: *mut u8, src: *const u8, len: usize) {
unsafe { self.scalar_copy(dst, src, len); }
}
#[inline]
unsafe fn avx512_copy_aligned(&self, dst: *mut u8, src: *const u8, len: usize) {
unsafe { self.scalar_copy(dst, src, len); }
}
}
#[cfg(target_arch = "x86_64")]
impl SimdCopy {
#[target_feature(enable = "avx2")]
unsafe fn avx2_copy_large(&self, mut dst: *mut u8, mut src: *const u8, mut len: usize) {
use std::arch::x86_64::*;
while len >= 32 {
unsafe {
let data = _mm256_loadu_si256(src as *const __m256i);
_mm256_storeu_si256(dst as *mut __m256i, data);
src = src.add(32);
dst = dst.add(32);
}
len -= 32;
}
if len > 0 {
if len >= 16 {
unsafe {
let data = _mm_loadu_si128(src as *const __m128i);
_mm_storeu_si128(dst as *mut __m128i, data);
}
let mut offset = 16;
while offset < len.saturating_sub(16) {
unsafe {
let data = _mm_loadu_si128(src.add(offset) as *const __m128i);
_mm_storeu_si128(dst.add(offset) as *mut __m128i, data);
}
offset += 16;
}
if len > 16 {
let offset = len - 16;
unsafe {
let tail = _mm_loadu_si128(src.add(offset) as *const __m128i);
_mm_storeu_si128(dst.add(offset) as *mut __m128i, tail);
}
}
} else {
unsafe { self.scalar_copy(dst, src, len); }
}
}
}
#[target_feature(enable = "avx2")]
unsafe fn avx2_copy_small(&self, dst: *mut u8, src: *const u8, len: usize) {
use std::arch::x86_64::*;
if len >= 32 {
unsafe {
let data = _mm256_loadu_si256(src as *const __m256i);
_mm256_storeu_si256(dst as *mut __m256i, data);
}
let mut offset = 32;
while offset < len.saturating_sub(32) {
unsafe {
let data = _mm256_loadu_si256(src.add(offset) as *const __m256i);
_mm256_storeu_si256(dst.add(offset) as *mut __m256i, data);
}
offset += 32;
}
if len > 32 {
let offset = len - 32;
unsafe {
let tail = _mm256_loadu_si256(src.add(offset) as *const __m256i);
_mm256_storeu_si256(dst.add(offset) as *mut __m256i, tail);
}
}
} else if len >= 16 {
unsafe {
let data = _mm_loadu_si128(src as *const __m128i);
_mm_storeu_si128(dst as *mut __m128i, data);
}
let mut offset = 16;
while offset < len.saturating_sub(16) {
unsafe {
let data = _mm_loadu_si128(src.add(offset) as *const __m128i);
_mm_storeu_si128(dst.add(offset) as *mut __m128i, data);
}
offset += 16;
}
if len > 16 {
let offset = len - 16;
unsafe {
let tail = _mm_loadu_si128(src.add(offset) as *const __m128i);
_mm_storeu_si128(dst.add(offset) as *mut __m128i, tail);
}
}
} else {
unsafe { self.scalar_copy(dst, src, len); }
}
}
#[target_feature(enable = "avx2")]
unsafe fn avx2_copy_aligned(&self, mut dst: *mut u8, mut src: *const u8, mut len: usize) {
use std::arch::x86_64::*;
while len >= 32 {
unsafe {
let data = _mm256_load_si256(src as *const __m256i);
_mm256_store_si256(dst as *mut __m256i, data);
src = src.add(32);
dst = dst.add(32);
}
len -= 32;
}
if len > 0 {
unsafe { self.scalar_copy(dst, src, len); }
}
}
}
#[cfg(not(target_arch = "x86_64"))]
impl SimdCopy {
#[inline]
unsafe fn avx2_copy_large(&self, dst: *mut u8, src: *const u8, len: usize) {
unsafe { self.scalar_copy(dst, src, len); }
}
#[inline]
unsafe fn avx2_copy_small(&self, dst: *mut u8, src: *const u8, len: usize) {
unsafe { self.scalar_copy(dst, src, len); }
}
#[inline]
unsafe fn avx2_copy_aligned(&self, dst: *mut u8, src: *const u8, len: usize) {
unsafe { self.scalar_copy(dst, src, len); }
}
}
#[cfg(target_arch = "x86_64")]
impl SimdCopy {
#[target_feature(enable = "sse2")]
unsafe fn sse2_copy_large(&self, mut dst: *mut u8, mut src: *const u8, mut len: usize) {
use std::arch::x86_64::*;
while len >= 16 {
unsafe {
let data = _mm_loadu_si128(src as *const __m128i);
_mm_storeu_si128(dst as *mut __m128i, data);
src = src.add(16);
dst = dst.add(16);
}
len -= 16;
}
if len > 0 {
if len >= 8 {
let offset = len - 8;
unsafe {
let tail = (src.add(offset) as *const u64).read_unaligned();
(dst.add(offset) as *mut u64).write_unaligned(tail);
}
} else {
unsafe { self.scalar_copy(dst, src, len); }
}
}
}
#[target_feature(enable = "sse2")]
unsafe fn sse2_copy_small(&self, dst: *mut u8, src: *const u8, len: usize) {
use std::arch::x86_64::*;
if len >= 16 {
unsafe {
let data = _mm_loadu_si128(src as *const __m128i);
_mm_storeu_si128(dst as *mut __m128i, data);
}
let mut offset = 16;
while offset < len.saturating_sub(16) {
unsafe {
let data = _mm_loadu_si128(src.add(offset) as *const __m128i);
_mm_storeu_si128(dst.add(offset) as *mut __m128i, data);
}
offset += 16;
}
if len > 16 {
let offset = len - 16;
unsafe {
let tail = _mm_loadu_si128(src.add(offset) as *const __m128i);
_mm_storeu_si128(dst.add(offset) as *mut __m128i, tail);
}
}
} else {
unsafe { self.scalar_copy(dst, src, len); }
}
}
#[target_feature(enable = "sse2")]
unsafe fn sse2_copy_aligned(&self, mut dst: *mut u8, mut src: *const u8, mut len: usize) {
use std::arch::x86_64::*;
while len >= 16 {
unsafe {
let data = _mm_load_si128(src as *const __m128i);
_mm_store_si128(dst as *mut __m128i, data);
src = src.add(16);
dst = dst.add(16);
}
len -= 16;
}
if len > 0 {
unsafe { self.scalar_copy(dst, src, len); }
}
}
}
#[cfg(not(target_arch = "x86_64"))]
impl SimdCopy {
#[inline]
unsafe fn sse2_copy_large(&self, dst: *mut u8, src: *const u8, len: usize) {
unsafe { self.scalar_copy(dst, src, len); }
}
#[inline]
unsafe fn sse2_copy_small(&self, dst: *mut u8, src: *const u8, len: usize) {
unsafe { self.scalar_copy(dst, src, len); }
}
#[inline]
unsafe fn sse2_copy_aligned(&self, dst: *mut u8, src: *const u8, len: usize) {
unsafe { self.scalar_copy(dst, src, len); }
}
}
#[cfg(target_arch = "aarch64")]
impl SimdCopy {
unsafe fn neon_copy_large(&self, mut dst: *mut u8, mut src: *const u8, mut len: usize) {
use std::arch::aarch64::*;
while len >= 16 {
unsafe {
let data = vld1q_u8(src);
vst1q_u8(dst, data);
src = src.add(16);
dst = dst.add(16);
}
len -= 16;
}
if len > 0 {
unsafe { self.scalar_copy(dst, src, len); }
}
}
unsafe fn neon_copy_small(&self, dst: *mut u8, src: *const u8, len: usize) {
use std::arch::aarch64::*;
if len >= 16 {
unsafe {
let data = vld1q_u8(src);
vst1q_u8(dst, data);
}
let mut offset = 16;
while offset < len.saturating_sub(16) {
unsafe {
let data = vld1q_u8(src.add(offset));
vst1q_u8(dst.add(offset), data);
}
offset += 16;
}
if len > 16 {
let offset = len - 16;
unsafe {
let tail = vld1q_u8(src.add(offset));
vst1q_u8(dst.add(offset), tail);
}
}
} else {
unsafe { self.scalar_copy(dst, src, len); }
}
}
unsafe fn neon_copy_aligned(&self, mut dst: *mut u8, mut src: *const u8, mut len: usize) {
use std::arch::aarch64::*;
while len >= 16 {
unsafe {
let data = vld1q_u8(src);
vst1q_u8(dst, data);
src = src.add(16);
dst = dst.add(16);
}
len -= 16;
}
if len > 0 {
unsafe { self.scalar_copy(dst, src, len); }
}
}
}
#[cfg(not(target_arch = "aarch64"))]
impl SimdCopy {
#[inline]
unsafe fn neon_copy_large(&self, dst: *mut u8, src: *const u8, len: usize) {
unsafe { self.scalar_copy(dst, src, len); }
}
#[inline]
unsafe fn neon_copy_small(&self, dst: *mut u8, src: *const u8, len: usize) {
unsafe { self.scalar_copy(dst, src, len); }
}
#[inline]
unsafe fn neon_copy_aligned(&self, dst: *mut u8, src: *const u8, len: usize) {
unsafe { self.scalar_copy(dst, src, len); }
}
}
impl SimdCopy {
#[inline]
unsafe fn scalar_copy(&self, dst: *mut u8, src: *const u8, len: usize) {
unsafe {
std::ptr::copy_nonoverlapping(src, dst, len);
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_simd_copy_tier_selection() {
let copy = SimdCopy::new();
println!("Selected SIMD tier: {:?}", copy.tier());
assert!(matches!(
copy.tier(),
SimdCopyTier::Avx512 | SimdCopyTier::Avx2 | SimdCopyTier::Sse2 | SimdCopyTier::Neon | SimdCopyTier::Scalar
));
}
#[test]
fn test_copy_large_simd_basic() {
let src = vec![42u8; 8192];
let mut dst = vec![0u8; 8192];
let result = copy_large_simd(&mut dst, &src);
assert!(result.is_ok());
assert_eq!(src, dst);
}
#[test]
fn test_copy_large_simd_various_sizes() {
let sizes = vec![1024, 2048, 4096, 8192, 16384];
for size in sizes {
let src: Vec<u8> = (0..size).map(|i| (i % 256) as u8).collect();
let mut dst = vec![0u8; size];
let result = copy_large_simd(&mut dst, &src);
assert!(result.is_ok(), "Failed for size {}", size);
assert_eq!(src, dst, "Mismatch for size {}", size);
}
}
#[test]
fn test_copy_small_simd_basic() {
let src = vec![42u8; 128];
let mut dst = vec![0u8; 128];
let result = copy_small_simd(&mut dst, &src);
assert!(result.is_ok());
assert_eq!(src, dst);
}
#[test]
fn test_copy_small_simd_various_sizes() {
let sizes = vec![16, 32, 64, 128, 192, 256];
for size in sizes {
let src: Vec<u8> = (0..size).map(|i| (i % 256) as u8).collect();
let mut dst = vec![0u8; size];
let result = copy_small_simd(&mut dst, &src);
assert!(result.is_ok(), "Failed for size {}", size);
assert_eq!(src, dst, "Mismatch for size {}", size);
}
}
#[test]
fn test_copy_small_simd_size_validation() {
let src = vec![42u8; 512]; let mut dst = vec![0u8; 512];
let result = copy_small_simd(&mut dst, &src);
assert!(result.is_err());
}
#[test]
fn test_copy_aligned_simd() {
let layout = std::alloc::Layout::from_size_align(4096, 64).unwrap();
unsafe {
let src_ptr = std::alloc::alloc(layout);
let dst_ptr = std::alloc::alloc(layout);
if !src_ptr.is_null() && !dst_ptr.is_null() {
for i in 0..4096 {
*src_ptr.add(i) = (i % 256) as u8;
}
let src_slice = std::slice::from_raw_parts(src_ptr, 4096);
let dst_slice = std::slice::from_raw_parts_mut(dst_ptr, 4096);
let result = copy_aligned_simd(dst_slice, src_slice);
assert!(result.is_ok());
for i in 0..4096 {
assert_eq!(*src_ptr.add(i), *dst_ptr.add(i));
}
std::alloc::dealloc(src_ptr, layout);
std::alloc::dealloc(dst_ptr, layout);
}
}
}
#[test]
fn test_copy_aligned_simd_alignment_check() {
let mut src_buf = vec![0u8; 1024 + 64];
let mut dst_buf = vec![0u8; 1024 + 64];
let src_offset = src_buf.as_ptr() as usize % 64;
let dst_offset = dst_buf.as_ptr() as usize % 64;
let src_start = if src_offset == 0 { 1 } else { 0 };
let dst_start = if dst_offset == 0 { 1 } else { 0 };
let src = &src_buf[src_start..src_start + 1024];
let dst = &mut dst_buf[dst_start..dst_start + 1024];
let result = copy_aligned_simd(dst, src);
assert!(result.is_err(), "Should fail alignment check for unaligned buffers"); }
#[test]
fn test_size_mismatch_error() {
let src = vec![42u8; 128];
let mut dst = vec![0u8; 64];
let result = copy_large_simd(&mut dst, &src);
assert!(result.is_err());
let result2 = copy_small_simd(&mut dst, &src);
assert!(result2.is_err());
}
#[test]
fn test_empty_buffer() {
let src: Vec<u8> = vec![];
let mut dst: Vec<u8> = vec![];
let result = copy_large_simd(&mut dst, &src);
assert!(result.is_ok());
let result2 = copy_small_simd(&mut dst, &src);
assert!(result2.is_ok());
}
#[test]
fn test_buffer_overlap_detection() {
let src = vec![42u8; 512];
let mut dst = vec![0u8; 512];
let result = copy_large_simd(&mut dst, &src);
assert!(result.is_ok());
assert_eq!(src, dst);
}
#[test]
fn test_cross_tier_consistency() {
let sizes = vec![64, 128, 256, 1024, 4096];
for size in sizes {
let src: Vec<u8> = (0..size).map(|i| (i % 256) as u8).collect();
let mut dst = vec![0u8; size];
if size <= SMALL_THRESHOLD {
let result = copy_small_simd(&mut dst, &src);
assert!(result.is_ok(), "Failed for size {}", size);
} else {
let result = copy_large_simd(&mut dst, &src);
assert!(result.is_ok(), "Failed for size {}", size);
}
assert_eq!(src, dst, "Mismatch for size {}", size);
}
}
#[test]
fn test_unaligned_boundaries() {
let unaligned_sizes = vec![17, 33, 65, 127, 129, 255];
for size in unaligned_sizes {
let src: Vec<u8> = (0..size).map(|i| (i % 256) as u8).collect();
let mut dst = vec![0u8; size];
let result = if size <= SMALL_THRESHOLD {
copy_small_simd(&mut dst, &src)
} else {
copy_large_simd(&mut dst, &src)
};
assert!(result.is_ok(), "Failed for unaligned size {}", size);
assert_eq!(src, dst, "Mismatch for unaligned size {}", size);
}
}
#[test]
fn test_performance_comparison() {
let size = 8192;
let src: Vec<u8> = (0..size).map(|i| (i % 256) as u8).collect();
let mut dst_simd = vec![0u8; size];
let mut dst_std = vec![0u8; size];
let result = copy_large_simd(&mut dst_simd, &src);
assert!(result.is_ok());
dst_std.copy_from_slice(&src);
assert_eq!(dst_simd, dst_std);
}
#[test]
fn test_global_instance() {
let instance1 = get_global_simd_copy();
let instance2 = get_global_simd_copy();
assert_eq!(instance1.tier(), instance2.tier());
}
}