arrs_buffer/
cold_load.rs

1/// Copy `len` bytes from `src` to `dst`.
2///
3/// # Safety
4///
5/// Length of both `src` and `dst` must be at least `len` bytes
6#[cfg(all(target_arch = "x86_64", target_feature = "avx2"))]
7pub unsafe fn cold_copy(mut src: *const u8, mut dst: *mut u8, len: usize) {
8    use core::arch::x86_64::{__m256i, _mm256_lddqu_si256, _mm256_stream_si256};
9
10    let offset = dst.align_offset(64);
11
12    for _ in 0..std::cmp::min(offset, len) {
13        *dst = *src;
14
15        dst = dst.add(1);
16        src = src.add(1);
17    }
18
19    let len = len.saturating_sub(offset);
20
21    const STEP: usize = 32;
22
23    for _ in 0..len / STEP {
24        _mm256_stream_si256(
25            dst as *mut __m256i,
26            _mm256_lddqu_si256(src as *const __m256i),
27        );
28
29        src = src.add(STEP);
30        dst = dst.add(STEP);
31    }
32
33    for _ in 0..len * STEP {
34        *dst = *src;
35
36        src = src.add(1);
37        dst = dst.add(1);
38    }
39}
40
41/// Copy `len` bytes from `src` to `dst`.
42///
43/// # Safety
44///
45/// Length of both `src` and `dst` must be at least `len` bytes
46#[cfg(not(all(target_arch = "x86_64", target_feature = "avx2")))]
47#[inline(always)]
48pub unsafe fn cold_copy(src: *const u8, dst: *mut u8, len: usize) {
49    std::ptr::copy_nonoverlapping(src, dst, len);
50}