#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::*;
#[cfg(test)]
use super::scalar;
#[inline]
pub fn hex_decode_16(bytes: &[u8; 16]) -> Result<u64, usize> {
hex_decode_16_inner(bytes)
}
#[inline]
pub fn hex_decode_32(bytes: &[u8; 32]) -> Result<(u64, u64), usize> {
let hi_bytes: &[u8; 16] = unsafe { &*(bytes.as_ptr().cast::<[u8; 16]>()) };
let lo_bytes: &[u8; 16] = unsafe { &*(bytes.as_ptr().add(16).cast::<[u8; 16]>()) };
let hi = hex_decode_16(hi_bytes)?;
let lo = hex_decode_16(lo_bytes).map_err(|pos| pos + 16)?;
Ok((hi, lo))
}
#[inline]
pub(crate) fn hex_decode_16_reg(input: __m128i) -> Result<u64, usize> {
unsafe {
let ge_0 = _mm_cmpgt_epi8(input, _mm_set1_epi8(0x2F)); let le_9 = _mm_cmpgt_epi8(_mm_set1_epi8(0x3A), input); let is_digit = _mm_and_si128(ge_0, le_9);
let ge_a_upper = _mm_cmpgt_epi8(input, _mm_set1_epi8(0x40)); let le_f_upper = _mm_cmpgt_epi8(_mm_set1_epi8(0x47), input); let is_upper = _mm_and_si128(ge_a_upper, le_f_upper);
let ge_a_lower = _mm_cmpgt_epi8(input, _mm_set1_epi8(0x60)); let le_f_lower = _mm_cmpgt_epi8(_mm_set1_epi8(0x67), input); let is_lower = _mm_and_si128(ge_a_lower, le_f_lower);
let valid = _mm_or_si128(_mm_or_si128(is_digit, is_upper), is_lower);
let valid_mask = _mm_movemask_epi8(valid) as u32;
if valid_mask != 0xFFFF {
let invalid_bits = !valid_mask & 0xFFFF;
return Err(invalid_bits.trailing_zeros() as usize);
}
let digit_val = _mm_sub_epi8(input, _mm_set1_epi8(0x30)); let upper_val = _mm_sub_epi8(input, _mm_set1_epi8(55)); let lower_val = _mm_sub_epi8(input, _mm_set1_epi8(87));
let nibbles = _mm_or_si128(
_mm_or_si128(
_mm_and_si128(is_digit, digit_val),
_mm_and_si128(is_upper, upper_val),
),
_mm_and_si128(is_lower, lower_val),
);
let even = _mm_and_si128(nibbles, _mm_set1_epi16(0x00FF));
let even_shifted = _mm_slli_epi16(even, 4);
let odd = _mm_srli_epi16(nibbles, 8);
let combined = _mm_or_si128(even_shifted, odd);
let packed = _mm_packus_epi16(combined, _mm_setzero_si128());
let raw = _mm_cvtsi128_si64(packed) as u64;
Ok(raw.swap_bytes())
}
}
#[inline]
fn hex_decode_16_inner(bytes: &[u8; 16]) -> Result<u64, usize> {
let input = unsafe { _mm_loadu_si128(bytes.as_ptr().cast()) };
hex_decode_16_reg(input)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn decode_16_matches_scalar() {
let cases: &[&[u8; 16]] = &[
b"0000000000000000",
b"ffffffffffffffff",
b"deadbeefcafebabe",
b"DEADBEEFCAFEBABE",
b"DeAdBeEfCaFeBaBe",
b"0123456789abcdef",
b"FEDCBA9876543210",
];
for &input in cases {
let sse2_result = hex_decode_16(input);
let scalar_result = scalar::hex_decode_16(input);
assert_eq!(
sse2_result,
scalar_result,
"mismatch for {:?}",
core::str::from_utf8(input)
);
}
}
#[test]
fn decode_16_invalid_positions() {
for pos in 0..16 {
let mut input = *b"0123456789abcdef";
input[pos] = b'g';
let result = hex_decode_16(&input);
assert_eq!(result, Err(pos), "expected error at pos {}", pos);
}
}
#[test]
fn decode_16_rejects_near_miss_chars() {
let near_misses: &[u8] = &[
b'/', b':', b'@', b'G', b'`', b'g', 0x00, 0x10, 0x20, 0x7F, ];
for &bad in near_misses {
let mut input = *b"0000000000000000";
input[5] = bad;
assert_eq!(hex_decode_16(&input), Err(5), "should reject 0x{:02x}", bad);
}
}
#[test]
fn decode_32_matches_scalar() {
let input = b"0123456789abcdeffedcba9876543210";
let sse2_result = hex_decode_32(input);
let scalar_result = scalar::hex_decode_32(input);
assert_eq!(sse2_result, scalar_result);
}
#[test]
fn decode_32_error_in_second_half() {
let mut input = *b"0123456789abcdef0123456789abcdef";
input[20] = b'x';
assert_eq!(hex_decode_32(&input), Err(20));
}
}