Skip to main content

blvm_primitives/crypto/
simd_bytes.rs

1//! SIMD-optimized byte array operations
2//!
3//! Provides fast byte array copying and concatenation using AVX2 SIMD instructions
4//! when available, with automatic fallback to sequential operations for compatibility.
5
6/// Copy bytes from source to destination using SIMD when beneficial
7///
8/// Uses AVX2 SIMD for large arrays (>64 bytes), falls back to sequential
9/// copy for smaller arrays or non-AVX2 systems.
10///
11/// # Arguments
12/// * `dst` - Destination slice (must be at least as long as `src`)
13/// * `src` - Source slice
14///
15/// # Safety
16/// Caller must ensure `dst.len() >= src.len()` to avoid buffer overflows.
17#[inline]
18pub fn copy_bytes_simd(dst: &mut [u8], src: &[u8]) {
19    #[cfg(all(target_arch = "x86_64", feature = "production"))]
20    {
21        // Use SIMD for large arrays (threshold: 64 bytes)
22        if src.len() >= 64 && is_avx2_available() {
23            unsafe {
24                copy_bytes_avx2(dst, src);
25            }
26            return;
27        }
28    }
29
30    // Fallback: sequential copy
31    dst[..src.len()].copy_from_slice(src);
32}
33
34/// AVX2-optimized byte copy
35///
36/// Copies data in 32-byte chunks using AVX2, then handles remainder sequentially.
37#[cfg(all(target_arch = "x86_64", feature = "production"))]
38#[target_feature(enable = "avx2")]
39unsafe fn copy_bytes_avx2(dst: &mut [u8], src: &[u8]) {
40    use std::arch::x86_64::*;
41
42    let chunks = src.len() / 32;
43    let mut dst_ptr = dst.as_mut_ptr();
44    let mut src_ptr = src.as_ptr();
45
46    // Process 32-byte chunks with AVX2
47    for _ in 0..chunks {
48        unsafe {
49            let data = _mm256_loadu_si256(src_ptr as *const __m256i);
50            _mm256_storeu_si256(dst_ptr as *mut __m256i, data);
51            dst_ptr = dst_ptr.add(32);
52            src_ptr = src_ptr.add(32);
53        }
54    }
55
56    // Handle remainder sequentially
57    let remainder = src.len() % 32;
58    if remainder > 0 {
59        let offset = chunks * 32;
60        dst[offset..offset + remainder].copy_from_slice(&src[offset..offset + remainder]);
61    }
62}
63
64/// Check if AVX2 is available at runtime
65#[cfg(all(target_arch = "x86_64", feature = "production"))]
66fn is_avx2_available() -> bool {
67    std::arch::is_x86_feature_detected!("avx2")
68}
69
70#[cfg(test)]
71mod tests {
72    use super::*;
73
74    #[test]
75    fn test_copy_bytes_small() {
76        let src = [1, 2, 3, 4, 5];
77        let mut dst = [0u8; 10];
78        copy_bytes_simd(&mut dst, &src);
79        assert_eq!(&dst[..5], &src);
80    }
81
82    #[test]
83    fn test_copy_bytes_large() {
84        let src: Vec<u8> = (0..128).collect();
85        let mut dst = vec![0u8; 128];
86        copy_bytes_simd(&mut dst, &src);
87        assert_eq!(dst, src);
88    }
89
90    #[test]
91    fn test_copy_bytes_exact_32() {
92        let src: Vec<u8> = (0..32).collect();
93        let mut dst = vec![0u8; 32];
94        copy_bytes_simd(&mut dst, &src);
95        assert_eq!(dst, src);
96    }
97}