#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::{__m256i, _mm256_extract_epi64, _mm256_loadu_si256, _mm256_xor_si256};
#[target_feature(enable = "avx2")]
#[cfg(target_arch = "x86_64")]
#[allow(clippy::cast_ptr_alignment)] pub(crate) unsafe fn hamming_distance_avx2(a: &[u8; 96], b: &[u8; 96]) -> u32 {
let a0 = _mm256_loadu_si256(a.as_ptr().cast::<__m256i>());
let a1 = _mm256_loadu_si256(a.as_ptr().add(32).cast::<__m256i>());
let a2 = _mm256_loadu_si256(a.as_ptr().add(64).cast::<__m256i>());
let b0 = _mm256_loadu_si256(b.as_ptr().cast::<__m256i>());
let b1 = _mm256_loadu_si256(b.as_ptr().add(32).cast::<__m256i>());
let b2 = _mm256_loadu_si256(b.as_ptr().add(64).cast::<__m256i>());
let xor0 = _mm256_xor_si256(a0, b0);
let xor1 = _mm256_xor_si256(a1, b1);
let xor2 = _mm256_xor_si256(a2, b2);
let pop0 = popcount_avx2(xor0);
let pop1 = popcount_avx2(xor1);
let pop2 = popcount_avx2(xor2);
pop0 + pop1 + pop2
}
#[target_feature(enable = "avx2")]
#[inline]
#[cfg(target_arch = "x86_64")]
#[allow(clippy::cast_sign_loss, clippy::many_single_char_names)]
unsafe fn popcount_avx2(v: __m256i) -> u32 {
let a = _mm256_extract_epi64(v, 0) as u64;
let b = _mm256_extract_epi64(v, 1) as u64;
let c = _mm256_extract_epi64(v, 2) as u64;
let d = _mm256_extract_epi64(v, 3) as u64;
a.count_ones() + b.count_ones() + c.count_ones() + d.count_ones()
}
#[cfg(test)]
#[cfg(target_arch = "x86_64")]
mod tests {
use super::*;
fn avx2_available() -> bool {
is_x86_feature_detected!("avx2")
}
#[test]
fn test_avx2_identical() {
if !avx2_available() {
eprintln!("Skipping AVX2 test: CPU does not support AVX2");
return;
}
let a = [0xAA; 96];
let b = [0xAA; 96];
let distance = unsafe { hamming_distance_avx2(&a, &b) };
assert_eq!(distance, 0);
}
#[test]
fn test_avx2_opposite() {
if !avx2_available() {
eprintln!("Skipping AVX2 test: CPU does not support AVX2");
return;
}
let a = [0x00; 96];
let b = [0xFF; 96];
let distance = unsafe { hamming_distance_avx2(&a, &b) };
assert_eq!(distance, 768);
}
#[test]
fn test_avx2_alternating() {
if !avx2_available() {
eprintln!("Skipping AVX2 test: CPU does not support AVX2");
return;
}
let a = [0xAA; 96]; let b = [0x55; 96];
let distance = unsafe { hamming_distance_avx2(&a, &b) };
assert_eq!(distance, 768);
}
#[test]
fn test_avx2_single_bit() {
if !avx2_available() {
eprintln!("Skipping AVX2 test: CPU does not support AVX2");
return;
}
let mut a = [0x00; 96];
let b = [0x00; 96];
a[0] = 0x01;
let distance = unsafe { hamming_distance_avx2(&a, &b) };
assert_eq!(distance, 1);
}
#[test]
fn test_avx2_boundary_32() {
if !avx2_available() {
eprintln!("Skipping AVX2 test: CPU does not support AVX2");
return;
}
let mut a = [0x00; 96];
let b = [0x00; 96];
a[31] = 0xFF; a[32] = 0xFF;
let distance = unsafe { hamming_distance_avx2(&a, &b) };
assert_eq!(distance, 16); }
#[test]
fn test_avx2_boundary_64() {
if !avx2_available() {
eprintln!("Skipping AVX2 test: CPU does not support AVX2");
return;
}
let mut a = [0x00; 96];
let b = [0x00; 96];
a[63] = 0xFF; a[64] = 0xFF;
let distance = unsafe { hamming_distance_avx2(&a, &b) };
assert_eq!(distance, 16);
}
}