1#[cfg(all(target_arch = "x86_64", target_feature = "avx2"))]
7pub unsafe fn cold_copy(mut src: *const u8, mut dst: *mut u8, len: usize) {
8 use core::arch::x86_64::{__m256i, _mm256_lddqu_si256, _mm256_stream_si256};
9
10 let offset = dst.align_offset(64);
11
12 for _ in 0..std::cmp::min(offset, len) {
13 *dst = *src;
14
15 dst = dst.add(1);
16 src = src.add(1);
17 }
18
19 let len = len.saturating_sub(offset);
20
21 const STEP: usize = 32;
22
23 for _ in 0..len / STEP {
24 _mm256_stream_si256(
25 dst as *mut __m256i,
26 _mm256_lddqu_si256(src as *const __m256i),
27 );
28
29 src = src.add(STEP);
30 dst = dst.add(STEP);
31 }
32
33 for _ in 0..len * STEP {
34 *dst = *src;
35
36 src = src.add(1);
37 dst = dst.add(1);
38 }
39}
40
41#[cfg(not(all(target_arch = "x86_64", target_feature = "avx2")))]
47#[inline(always)]
48pub unsafe fn cold_copy(src: *const u8, dst: *mut u8, len: usize) {
49 std::ptr::copy_nonoverlapping(src, dst, len);
50}