#![allow(unused_unsafe)]
#![allow(unsafe_op_in_unsafe_fn)]
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
use crate::crypto::{ASCII_TO_UPPER, ENCRYPTION_TABLE};
#[target_feature(enable = "sse4.2")]
pub(super) unsafe fn crc32_sse42(data: &[u8], crc: u32) -> u32 {
use std::hash::Hasher;
let mut hasher = crc32fast::Hasher::new_with_initial(crc);
hasher.write(data);
hasher.finish() as u32
}
#[target_feature(enable = "avx2")]
pub(super) unsafe fn hash_string_avx2(filename: &[u8], hash_type: u32) -> u32 {
if filename.len() < 32 {
return super::scalar::hash_string_scalar(filename, hash_type);
}
let mut seed1: u32 = 0x7FED7FED;
let mut seed2: u32 = 0xEEEEEEEE;
let mut pos = 0;
let chunk_size = 32;
while pos + chunk_size <= filename.len() {
let chunk = unsafe { _mm256_loadu_si256(filename.as_ptr().add(pos) as *const __m256i) };
let forward_slash = _mm256_set1_epi8(b'/' as i8);
let backslash = _mm256_set1_epi8(b'\\' as i8);
let is_forward_slash = _mm256_cmpeq_epi8(chunk, forward_slash);
let normalized = _mm256_blendv_epi8(chunk, backslash, is_forward_slash);
let mut bytes = [0u8; 32];
unsafe { _mm256_storeu_si256(bytes.as_mut_ptr() as *mut __m256i, normalized) };
for &byte in &bytes {
let ch = ASCII_TO_UPPER[byte as usize];
let table_idx = hash_type.wrapping_add(ch as u32) as usize;
seed1 = ENCRYPTION_TABLE[table_idx] ^ (seed1.wrapping_add(seed2));
seed2 = (ch as u32)
.wrapping_add(seed1)
.wrapping_add(seed2)
.wrapping_add(seed2 << 5)
.wrapping_add(3);
}
pos += chunk_size;
}
for &byte in &filename[pos..] {
let mut ch = byte;
if ch == b'/' {
ch = b'\\';
}
ch = ASCII_TO_UPPER[ch as usize];
let table_idx = hash_type.wrapping_add(ch as u32) as usize;
seed1 = ENCRYPTION_TABLE[table_idx] ^ (seed1.wrapping_add(seed2));
seed2 = (ch as u32)
.wrapping_add(seed1)
.wrapping_add(seed2)
.wrapping_add(seed2 << 5)
.wrapping_add(3);
}
seed1
}
#[target_feature(enable = "avx2")]
pub(super) unsafe fn jenkins_hash_batch_avx2(filenames: &[&str]) -> Vec<u64> {
let mut results = Vec::with_capacity(filenames.len());
let mut i = 0;
while i + 4 <= filenames.len() {
for j in 0..4 {
results.push(unsafe { jenkins_hash_scalar_optimized(filenames[i + j]) });
}
i += 4;
}
while i < filenames.len() {
results.push(unsafe { jenkins_hash_scalar_optimized(filenames[i]) });
i += 1;
}
results
}
#[target_feature(enable = "avx2")]
unsafe fn jenkins_hash_scalar_optimized(filename: &str) -> u64 {
let bytes = filename.as_bytes();
if bytes.len() < 32 {
return super::scalar::jenkins_hash_scalar(filename);
}
let mut hash: u64 = 0;
let mut pos = 0;
let chunk_size = 32;
while pos + chunk_size <= bytes.len() {
let chunk = unsafe { _mm256_loadu_si256(bytes.as_ptr().add(pos) as *const __m256i) };
let forward_slash = _mm256_set1_epi8(b'/' as i8);
let backslash = _mm256_set1_epi8(b'\\' as i8);
let is_forward_slash = _mm256_cmpeq_epi8(chunk, forward_slash);
let slash_normalized = _mm256_blendv_epi8(chunk, backslash, is_forward_slash);
let uppercase_base = _mm256_set1_epi8(b'A' as i8 - 1);
let uppercase_limit = _mm256_set1_epi8(b'Z' as i8 + 1);
let is_uppercase = _mm256_and_si256(
_mm256_cmpgt_epi8(slash_normalized, uppercase_base),
_mm256_cmpgt_epi8(uppercase_limit, slash_normalized),
);
let lowercase_offset = _mm256_set1_epi8(32);
let case_corrected = _mm256_blendv_epi8(
slash_normalized,
_mm256_add_epi8(slash_normalized, lowercase_offset),
is_uppercase,
);
let mut normalized_bytes = [0u8; 32];
unsafe {
_mm256_storeu_si256(
normalized_bytes.as_mut_ptr() as *mut __m256i,
case_corrected,
)
};
for &byte in &normalized_bytes {
hash = hash.wrapping_add(byte as u64);
hash = hash.wrapping_add(hash << 10);
hash ^= hash >> 6;
}
pos += chunk_size;
}
for &byte in &bytes[pos..] {
let mut ch = byte;
if ch == b'/' {
ch = b'\\';
}
ch = if ch.is_ascii_uppercase() { ch + 32 } else { ch };
hash = hash.wrapping_add(ch as u64);
hash = hash.wrapping_add(hash << 10);
hash ^= hash >> 6;
}
hash = hash.wrapping_add(hash << 3);
hash ^= hash >> 11;
hash = hash.wrapping_add(hash << 15);
hash
}
#[target_feature(enable = "avx2")]
#[allow(dead_code)]
pub(super) unsafe fn normalize_filenames_avx2(filenames: &mut [Vec<u8>]) {
for filename in filenames {
if filename.len() < 32 {
normalize_filename_scalar(filename);
continue;
}
let mut pos = 0;
let chunk_size = 32;
while pos + chunk_size <= filename.len() {
let chunk = unsafe { _mm256_loadu_si256(filename.as_ptr().add(pos) as *const __m256i) };
let forward_slash = _mm256_set1_epi8(b'/' as i8);
let backslash = _mm256_set1_epi8(b'\\' as i8);
let is_forward_slash = _mm256_cmpeq_epi8(chunk, forward_slash);
let slash_normalized = _mm256_blendv_epi8(chunk, backslash, is_forward_slash);
let lowercase_base = _mm256_set1_epi8(b'a' as i8 - 1);
let lowercase_limit = _mm256_set1_epi8(b'z' as i8 + 1);
let is_lowercase = _mm256_and_si256(
_mm256_cmpgt_epi8(slash_normalized, lowercase_base),
_mm256_cmpgt_epi8(lowercase_limit, slash_normalized),
);
let uppercase_offset = _mm256_set1_epi8(-32); let case_corrected = _mm256_blendv_epi8(
slash_normalized,
_mm256_add_epi8(slash_normalized, uppercase_offset),
is_lowercase,
);
unsafe {
_mm256_storeu_si256(
filename.as_mut_ptr().add(pos) as *mut __m256i,
case_corrected,
)
};
pos += chunk_size;
}
for byte in &mut filename[pos..] {
if *byte == b'/' {
*byte = b'\\';
}
*byte = ASCII_TO_UPPER[*byte as usize];
}
}
}
#[allow(dead_code)]
fn normalize_filename_scalar(filename: &mut [u8]) {
for byte in filename {
if *byte == b'/' {
*byte = b'\\';
}
*byte = ASCII_TO_UPPER[*byte as usize];
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_sse42_availability() {
if is_x86_feature_detected!("sse4.2") {
let test_data = b"Hello, World!";
let result = unsafe { crc32_sse42(test_data, 0) };
let reference = super::super::scalar::crc32_scalar(test_data, 0);
assert_eq!(result, reference, "SSE4.2 CRC32 should match reference");
} else {
println!("SSE4.2 not available on this CPU");
}
}
#[test]
fn test_avx2_availability() {
if is_x86_feature_detected!("avx2") {
let test_string = b"Units\\Human\\Footman.mdx";
let result = unsafe { hash_string_avx2(test_string, 0) };
let reference = super::super::scalar::hash_string_scalar(test_string, 0);
assert_eq!(result, reference, "AVX2 hash should match reference");
} else {
println!("AVX2 not available on this CPU");
}
}
#[cfg(target_arch = "x86_64")]
#[test]
fn test_simd_correctness_if_available() {
if is_x86_feature_detected!("sse4.2") {
let test_cases = [
&b""[..],
b"a",
b"test",
b"The quick brown fox jumps over the lazy dog",
&vec![0x42; 1000][..], &(0..255).collect::<Vec<u8>>()[..], ];
for test_data in &test_cases {
let simd_result = unsafe { crc32_sse42(test_data, 0) };
let scalar_result = super::super::scalar::crc32_scalar(test_data, 0);
assert_eq!(
simd_result,
scalar_result,
"SSE4.2 CRC32 mismatch for {} bytes",
test_data.len()
);
let initial = 0x12345678;
let simd_result2 = unsafe { crc32_sse42(test_data, initial) };
let scalar_result2 = super::super::scalar::crc32_scalar(test_data, initial);
assert_eq!(
simd_result2,
scalar_result2,
"SSE4.2 CRC32 with initial value mismatch for {} bytes",
test_data.len()
);
}
}
if is_x86_feature_detected!("avx2") {
let test_cases = [
"",
"a",
"test.txt",
"Units\\Human\\Footman.mdx",
"interface/glue/mainmenu.blp",
"very/long/path/to/some/file/with/many/directories/file.txt",
&"x".repeat(100), ];
for test_string in &test_cases {
let test_bytes = test_string.as_bytes();
let simd_result = unsafe { hash_string_avx2(test_bytes, 0) };
let scalar_result = super::super::scalar::hash_string_scalar(test_bytes, 0);
assert_eq!(
simd_result, scalar_result,
"AVX2 hash mismatch for '{}'",
test_string
);
for hash_type in [1, 2, 3] {
let simd_result_typed = unsafe { hash_string_avx2(test_bytes, hash_type) };
let scalar_result_typed =
super::super::scalar::hash_string_scalar(test_bytes, hash_type);
assert_eq!(
simd_result_typed, scalar_result_typed,
"AVX2 hash type {} mismatch for '{}'",
hash_type, test_string
);
}
}
}
}
#[cfg(target_arch = "x86_64")]
#[test]
fn test_normalization() {
if is_x86_feature_detected!("avx2") {
let test_cases = vec![
"path/to/file.txt".to_string(),
"UPPERCASE/file.TXT".to_string(),
"mixed/Case/File.Ext".to_string(),
"a".repeat(50), ];
for test_case in test_cases {
let mut simd_version = test_case.clone().into_bytes();
let mut scalar_version = test_case.clone().into_bytes();
unsafe {
let mut simd_vec = vec![simd_version.clone()];
normalize_filenames_avx2(&mut simd_vec);
simd_version = simd_vec.into_iter().next().unwrap();
};
normalize_filename_scalar(&mut scalar_version);
assert_eq!(
simd_version, scalar_version,
"Normalization mismatch for '{}'",
test_case
);
}
}
}
}