const HASH_PRIME: u32 = 0x9E3779B9;
const HASH_PRIME2: u32 = 0x85EBCA6B;
#[inline(always)]
pub fn hash4_scalar(data: &[u8], pos: usize) -> u32 {
debug_assert!(pos + 4 <= data.len());
let bytes = u32::from_le_bytes([data[pos], data[pos + 1], data[pos + 2], data[pos + 3]]);
let h = bytes.wrapping_mul(HASH_PRIME);
let h = h ^ (h >> 15);
h.wrapping_mul(HASH_PRIME2)
}
#[inline]
pub fn hash4x4(data: &[u8], pos: usize) -> [u32; 4] {
debug_assert!(
pos + 7 <= data.len(),
"Need 7 bytes for 4 overlapping 4-byte windows"
);
#[cfg(target_arch = "x86_64")]
{
if is_x86_feature_detected!("sse4.1") {
return unsafe { hash4x4_sse(data, pos) };
}
}
hash4x4_scalar(data, pos)
}
#[inline]
pub fn hash4x8(data: &[u8], pos: usize) -> [u32; 8] {
debug_assert!(
pos + 11 <= data.len(),
"Need 11 bytes for 8 overlapping 4-byte windows"
);
#[cfg(target_arch = "x86_64")]
{
if is_x86_feature_detected!("avx2") {
return unsafe { hash4x8_avx2(data, pos) };
}
}
hash4x8_scalar(data, pos)
}
#[inline]
fn hash4x4_scalar(data: &[u8], pos: usize) -> [u32; 4] {
[
hash4_scalar(data, pos),
hash4_scalar(data, pos + 1),
hash4_scalar(data, pos + 2),
hash4_scalar(data, pos + 3),
]
}
#[inline]
fn hash4x8_scalar(data: &[u8], pos: usize) -> [u32; 8] {
[
hash4_scalar(data, pos),
hash4_scalar(data, pos + 1),
hash4_scalar(data, pos + 2),
hash4_scalar(data, pos + 3),
hash4_scalar(data, pos + 4),
hash4_scalar(data, pos + 5),
hash4_scalar(data, pos + 6),
hash4_scalar(data, pos + 7),
]
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "sse4.1")]
unsafe fn hash4x4_sse(data: &[u8], pos: usize) -> [u32; 4] {
use std::arch::x86_64::*;
unsafe {
let ptr = data.as_ptr().add(pos);
let w0 = std::ptr::read_unaligned(ptr as *const u32);
let w1 = std::ptr::read_unaligned(ptr.add(1) as *const u32);
let w2 = std::ptr::read_unaligned(ptr.add(2) as *const u32);
let w3 = std::ptr::read_unaligned(ptr.add(3) as *const u32);
let words = _mm_set_epi32(w3 as i32, w2 as i32, w1 as i32, w0 as i32);
let prime = _mm_set1_epi32(HASH_PRIME as i32);
let h = _mm_mullo_epi32(words, prime);
let h_shifted = _mm_srli_epi32(h, 15);
let h = _mm_xor_si128(h, h_shifted);
let prime2 = _mm_set1_epi32(HASH_PRIME2 as i32);
let h = _mm_mullo_epi32(h, prime2);
let mut result = [0u32; 4];
_mm_storeu_si128(result.as_mut_ptr() as *mut __m128i, h);
result
}
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2")]
unsafe fn hash4x8_avx2(data: &[u8], pos: usize) -> [u32; 8] {
use std::arch::x86_64::*;
unsafe {
let ptr = data.as_ptr().add(pos);
let w0 = std::ptr::read_unaligned(ptr as *const u32);
let w1 = std::ptr::read_unaligned(ptr.add(1) as *const u32);
let w2 = std::ptr::read_unaligned(ptr.add(2) as *const u32);
let w3 = std::ptr::read_unaligned(ptr.add(3) as *const u32);
let w4 = std::ptr::read_unaligned(ptr.add(4) as *const u32);
let w5 = std::ptr::read_unaligned(ptr.add(5) as *const u32);
let w6 = std::ptr::read_unaligned(ptr.add(6) as *const u32);
let w7 = std::ptr::read_unaligned(ptr.add(7) as *const u32);
let words = _mm256_set_epi32(
w7 as i32, w6 as i32, w5 as i32, w4 as i32, w3 as i32, w2 as i32, w1 as i32, w0 as i32,
);
let prime = _mm256_set1_epi32(HASH_PRIME as i32);
let h = _mm256_mullo_epi32(words, prime);
let h_shifted = _mm256_srli_epi32(h, 15);
let h = _mm256_xor_si256(h, h_shifted);
let prime2 = _mm256_set1_epi32(HASH_PRIME2 as i32);
let h = _mm256_mullo_epi32(h, prime2);
let mut result = [0u32; 8];
_mm256_storeu_si256(result.as_mut_ptr() as *mut __m256i, h);
result
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_hash4_scalar() {
let data = b"abcdefghijklmnop";
let h1 = hash4_scalar(data, 0);
let h2 = hash4_scalar(data, 0);
assert_eq!(h1, h2, "Same input should produce same hash");
let h3 = hash4_scalar(data, 1);
assert_ne!(
h1, h3,
"Different input should (usually) produce different hash"
);
}
#[test]
fn test_hash4x4_correctness() {
let data = b"abcdefghijklmnop";
let batch = hash4x4(data, 0);
let scalar = [
hash4_scalar(data, 0),
hash4_scalar(data, 1),
hash4_scalar(data, 2),
hash4_scalar(data, 3),
];
assert_eq!(batch, scalar, "Batch hash should match scalar");
}
#[test]
fn test_hash4x8_correctness() {
let data = b"abcdefghijklmnopqrstuvwxyz";
let batch = hash4x8(data, 0);
let scalar = [
hash4_scalar(data, 0),
hash4_scalar(data, 1),
hash4_scalar(data, 2),
hash4_scalar(data, 3),
hash4_scalar(data, 4),
hash4_scalar(data, 5),
hash4_scalar(data, 6),
hash4_scalar(data, 7),
];
assert_eq!(batch, scalar, "Batch hash should match scalar");
}
#[test]
fn test_hash4x4_various_offsets() {
let data: Vec<u8> = (0..100).collect();
for offset in 0..90 {
let batch = hash4x4(&data, offset);
let scalar = [
hash4_scalar(&data, offset),
hash4_scalar(&data, offset + 1),
hash4_scalar(&data, offset + 2),
hash4_scalar(&data, offset + 3),
];
assert_eq!(batch, scalar, "Mismatch at offset {}", offset);
}
}
#[test]
fn test_hash4x8_various_offsets() {
let data: Vec<u8> = (0..100).collect();
for offset in 0..85 {
let batch = hash4x8(&data, offset);
for i in 0..8 {
assert_eq!(
batch[i],
hash4_scalar(&data, offset + i),
"Mismatch at offset {} position {}",
offset,
i
);
}
}
}
#[test]
fn test_hash_distribution() {
let data: Vec<u8> = (0..256).map(|i| i as u8).collect();
let mut hashes = std::collections::HashSet::new();
for offset in 0..240 {
let h = hash4_scalar(&data, offset);
hashes.insert(h);
}
assert!(
hashes.len() > 200,
"Hash distribution too poor: only {} unique out of 240",
hashes.len()
);
}
}