Skip to main content

blvm_primitives/crypto/
simd_bytes.rs

1//! SIMD-optimized byte array operations
2//!
3//! Provides fast byte array copying and concatenation using AVX2 SIMD instructions
4//! when available, with automatic fallback to sequential operations for compatibility.
5
6/// Copy bytes from source to destination using SIMD when beneficial
7///
8/// Uses AVX2 SIMD for large arrays (>64 bytes), falls back to sequential
9/// copy for smaller arrays or non-AVX2 systems.
10///
11/// # Arguments
12/// * `dst` - Destination slice (must be at least as long as `src`)
13/// * `src` - Source slice
14///
15/// # Safety
16/// Caller must ensure `dst.len() >= src.len()` to avoid buffer overflows.
17#[inline]
18pub fn copy_bytes_simd(dst: &mut [u8], src: &[u8]) {
19    #[cfg(all(target_arch = "x86_64", feature = "production"))]
20    {
21        // Use SIMD for large arrays (threshold: 64 bytes)
22        if src.len() >= 64 && is_avx2_available() {
23            unsafe {
24                copy_bytes_avx2(dst, src);
25            }
26            return;
27        }
28    }
29
30    // Fallback: sequential copy
31    dst[..src.len()].copy_from_slice(src);
32}
33
34/// AVX2-optimized byte copy
35///
36/// Copies data in 32-byte chunks using AVX2, then handles remainder sequentially.
37#[cfg(all(target_arch = "x86_64", feature = "production"))]
38#[target_feature(enable = "avx2")]
39unsafe fn copy_bytes_avx2(dst: &mut [u8], src: &[u8]) {
40    use std::arch::x86_64::*;
41
42    let chunks = src.len() / 32;
43    let mut dst_ptr = dst.as_mut_ptr();
44    let mut src_ptr = src.as_ptr();
45
46    // Process 32-byte chunks with AVX2
47    for _ in 0..chunks {
48        let data = _mm256_loadu_si256(src_ptr as *const __m256i);
49        _mm256_storeu_si256(dst_ptr as *mut __m256i, data);
50        dst_ptr = dst_ptr.add(32);
51        src_ptr = src_ptr.add(32);
52    }
53
54    // Handle remainder sequentially
55    let remainder = src.len() % 32;
56    if remainder > 0 {
57        let offset = chunks * 32;
58        dst[offset..offset + remainder].copy_from_slice(&src[offset..offset + remainder]);
59    }
60}
61
62/// Check if AVX2 is available at runtime
63#[cfg(all(target_arch = "x86_64", feature = "production"))]
64fn is_avx2_available() -> bool {
65    std::arch::is_x86_feature_detected!("avx2")
66}
67
68#[cfg(test)]
69mod tests {
70    use super::*;
71
72    #[test]
73    fn test_copy_bytes_small() {
74        let src = [1, 2, 3, 4, 5];
75        let mut dst = [0u8; 10];
76        copy_bytes_simd(&mut dst, &src);
77        assert_eq!(&dst[..5], &src);
78    }
79
80    #[test]
81    fn test_copy_bytes_large() {
82        let src: Vec<u8> = (0..128).collect();
83        let mut dst = vec![0u8; 128];
84        copy_bytes_simd(&mut dst, &src);
85        assert_eq!(dst, src);
86    }
87
88    #[test]
89    fn test_copy_bytes_exact_32() {
90        let src: Vec<u8> = (0..32).collect();
91        let mut dst = vec![0u8; 32];
92        copy_bytes_simd(&mut dst, &src);
93        assert_eq!(dst, src);
94    }
95}