#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::*;
#[cfg(target_arch = "x86")]
use core::arch::x86::*;
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
#[target_feature(enable = "avx512f,avx512bw")]
pub unsafe fn clock_mix_avx512_impl(message: &mut [u64; 16]) {
unsafe { clock_mix_xor_rotate_avx512(message) };
unsafe { clock_mix_sbox_avx512(message) };
}
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
#[target_feature(enable = "avx512f,avx512bw")]
unsafe fn clock_mix_xor_rotate_avx512(message: &mut [u64; 16]) {
use crate::constants::ROTATION_SCHEDULE;
let msg_ptr = message.as_ptr() as *const __m512i;
let mut msg0 = unsafe { _mm512_loadu_si512(msg_ptr) }; let mut msg1 = unsafe { _mm512_loadu_si512(msg_ptr.add(1)) };
let last_of_msg0 = _mm512_extracti64x4_epi64(msg0, 1); let _last_element = _mm256_extract_epi64(last_of_msg0, 3) as u64;
let first_of_msg1 = _mm512_castsi512_si256(msg1); let _first_element = _mm256_extract_epi64(first_of_msg1, 0) as u64;
let rot0 = _mm512_alignr_epi64(msg0, msg1, 1); let rot1_temp = _mm512_alignr_epi64(msg1, msg0, 1);
let msg1_lower = _mm512_castsi512_si256(msg1);
let element_8 = _mm256_extract_epi64(msg1_lower, 0);
let _element_8_broadcast = _mm512_set1_epi64(element_8 as i64);
let rot0 = _mm512_mask_set1_epi64(rot0, 0x80, element_8 as i64);
let msg0_lower = _mm512_castsi512_si256(msg0);
let element_0 = _mm256_extract_epi64(msg0_lower, 0);
let rot1 = _mm512_mask_set1_epi64(rot1_temp, 0x80, element_0 as i64);
let rot_sched0 = _mm512_set_epi64(
ROTATION_SCHEDULE[7] as i64,
ROTATION_SCHEDULE[6] as i64,
ROTATION_SCHEDULE[5] as i64,
ROTATION_SCHEDULE[4] as i64,
ROTATION_SCHEDULE[3] as i64,
ROTATION_SCHEDULE[2] as i64,
ROTATION_SCHEDULE[1] as i64,
ROTATION_SCHEDULE[0] as i64,
);
let rot_sched1 = _mm512_set_epi64(
ROTATION_SCHEDULE[15] as i64,
ROTATION_SCHEDULE[14] as i64,
ROTATION_SCHEDULE[13] as i64,
ROTATION_SCHEDULE[12] as i64,
ROTATION_SCHEDULE[11] as i64,
ROTATION_SCHEDULE[10] as i64,
ROTATION_SCHEDULE[9] as i64,
ROTATION_SCHEDULE[8] as i64,
);
let rot0_rotated = unsafe { avx512_rotate_left_epi64(rot0, rot_sched0) };
let rot1_rotated = unsafe { avx512_rotate_left_epi64(rot1, rot_sched1) };
msg0 = _mm512_xor_si512(msg0, rot0_rotated);
msg1 = _mm512_xor_si512(msg1, rot1_rotated);
unsafe { _mm512_storeu_si512(message.as_mut_ptr() as *mut __m512i, msg0) };
unsafe { _mm512_storeu_si512(message.as_mut_ptr().add(8) as *mut __m512i, msg1) };
}
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
#[target_feature(enable = "avx512f,avx512bw")]
#[inline]
unsafe fn avx512_rotate_left_epi64(x: __m512i, n: __m512i) -> __m512i {
let sixty_four = _mm512_set1_epi64(64);
let right_shift = _mm512_sub_epi64(sixty_four, n);
let left_shifted = _mm512_sllv_epi64(x, n);
let right_shifted = _mm512_srlv_epi64(x, right_shift);
_mm512_or_si512(left_shifted, right_shifted)
}
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
#[target_feature(enable = "avx512f,avx512bw")]
unsafe fn clock_mix_sbox_avx512(message: &mut [u64; 16]) {
use crate::constants::SBOX;
let msg_ptr = message.as_ptr() as *const __m512i;
let mut msg0 = unsafe { _mm512_loadu_si512(msg_ptr) }; let mut msg1 = unsafe { _mm512_loadu_si512(msg_ptr.add(1)) };
let mask_8bit = _mm512_set1_epi64(0xFF);
let indices0 = _mm512_and_si512(msg0, mask_8bit);
let indices1 = _mm512_and_si512(msg1, mask_8bit);
let sbox_ptr = SBOX.as_ptr() as *const u64;
let sbox_vals0 = unsafe { avx512_gather_sbox(indices0, sbox_ptr) };
let sbox_vals1 = unsafe { avx512_gather_sbox(indices1, sbox_ptr) };
msg0 = _mm512_add_epi64(msg0, sbox_vals0);
msg1 = _mm512_add_epi64(msg1, sbox_vals1);
unsafe { _mm512_storeu_si512(message.as_mut_ptr() as *mut __m512i, msg0) };
unsafe { _mm512_storeu_si512(message.as_mut_ptr().add(8) as *mut __m512i, msg1) };
}
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
#[target_feature(enable = "avx512f,avx512bw")]
#[inline]
unsafe fn avx512_gather_sbox(indices: __m512i, sbox_ptr: *const u64) -> __m512i {
let indices_32bit_256 = _mm512_cvtepi64_epi32(indices);
let indices_32bit_512 = _mm512_cvtepu32_epi64(indices_32bit_256);
let sbox_i32_ptr = sbox_ptr as *const i32;
let gathered_u32 = unsafe { _mm512_i32gather_epi32::<1>(indices_32bit_512, sbox_i32_ptr) };
_mm512_cvtepu32_epi64(_mm512_cvtepi64_epi32(gathered_u32))
}
#[cfg(test)]
mod tests {
use super::*;
use crate::simd::dispatch::is_avx512_available;
#[test]
fn test_avx512_target_feature_safety() {
if !is_avx512_available() {
return;
}
let mut data = [0xFEDCBA9876543210u64; 16];
let original = data;
unsafe { clock_mix_avx512_impl(&mut data) };
assert_ne!(data, original);
let mut scalar_data = original;
crate::simd::scalar::scalar_clock_mix(&mut scalar_data);
assert_eq!(data, scalar_data);
}
#[test]
fn test_avx512_feature_logic() {
let avx512_available = is_avx512_available();
let avx2_available = crate::simd::dispatch::is_avx2_available();
if avx512_available {
assert!(avx2_available, "AVX-512 requires AVX2");
}
}
}