#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::*;
#[cfg(test)]
use super::scalar;
#[inline(always)]
fn hex_lut() -> __m128i {
unsafe {
_mm_setr_epi8(
b'0' as i8, b'1' as i8, b'2' as i8, b'3' as i8, b'4' as i8, b'5' as i8, b'6' as i8,
b'7' as i8, b'8' as i8, b'9' as i8, b'a' as i8, b'b' as i8, b'c' as i8, b'd' as i8,
b'e' as i8, b'f' as i8,
)
}
}
#[inline]
pub fn hex_encode_u64(value: u64) -> [u8; 16] {
let bytes = value.to_be_bytes();
let lut = hex_lut();
unsafe {
let mask_0f = _mm_set1_epi8(0x0F);
let input = _mm_loadl_epi64(bytes.as_ptr().cast());
let hi_nibbles = _mm_and_si128(_mm_srli_epi16(input, 4), mask_0f);
let lo_nibbles = _mm_and_si128(input, mask_0f);
let nibbles = _mm_unpacklo_epi8(hi_nibbles, lo_nibbles);
let hex_chars = _mm_shuffle_epi8(lut, nibbles);
let mut buf = [0u8; 16];
_mm_storeu_si128(buf.as_mut_ptr().cast(), hex_chars);
buf
}
}
#[inline]
pub fn hex_encode_u128(hi: u64, lo: u64) -> [u8; 32] {
let lut = hex_lut();
unsafe {
let mask_0f = _mm_set1_epi8(0x0F);
let mut buf = [0u8; 32];
let hi_bytes = hi.to_be_bytes();
let hi_input = _mm_loadl_epi64(hi_bytes.as_ptr().cast());
let hi_hi = _mm_and_si128(_mm_srli_epi16(hi_input, 4), mask_0f);
let hi_lo = _mm_and_si128(hi_input, mask_0f);
let hi_nibbles = _mm_unpacklo_epi8(hi_hi, hi_lo);
let hi_chars = _mm_shuffle_epi8(lut, hi_nibbles);
_mm_storeu_si128(buf.as_mut_ptr().cast(), hi_chars);
let lo_bytes = lo.to_be_bytes();
let lo_input = _mm_loadl_epi64(lo_bytes.as_ptr().cast());
let lo_hi = _mm_and_si128(_mm_srli_epi16(lo_input, 4), mask_0f);
let lo_lo = _mm_and_si128(lo_input, mask_0f);
let lo_nibbles = _mm_unpacklo_epi8(lo_hi, lo_lo);
let lo_chars = _mm_shuffle_epi8(lut, lo_nibbles);
_mm_storeu_si128(buf.as_mut_ptr().add(16).cast(), lo_chars);
buf
}
}
#[inline]
pub fn uuid_decode_dashed(bytes: &[u8; 36]) -> Result<(u64, u64), usize> {
unsafe {
let reg_a = _mm_loadu_si128(bytes.as_ptr().cast()); let reg_b = _mm_loadu_si128(bytes.as_ptr().add(16).cast()); let tail = core::ptr::read_unaligned(bytes.as_ptr().add(32).cast::<u32>());
let reg_c = _mm_cvtsi32_si128(tail as i32);
let mask_a1 = _mm_setr_epi8(
0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 14, 15, -1, -1, );
let first_from_a = _mm_shuffle_epi8(reg_a, mask_a1);
let mask_b1 = _mm_setr_epi8(
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0,
1, );
let first_from_b = _mm_shuffle_epi8(reg_b, mask_b1);
let first_16 = _mm_or_si128(first_from_a, first_from_b);
let mask_b2 = _mm_setr_epi8(
3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, );
let second_from_b = _mm_shuffle_epi8(reg_b, mask_b2);
let mask_c = _mm_setr_epi8(
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2,
3, );
let second_from_c = _mm_shuffle_epi8(reg_c, mask_c);
let second_16 = _mm_or_si128(second_from_b, second_from_c);
let hi = super::sse2::hex_decode_16_reg(first_16)?;
let lo = super::sse2::hex_decode_16_reg(second_16).map_err(|pos| pos + 16)?;
Ok((hi, lo))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn encode_u64_matches_scalar() {
let cases: &[u64] = &[
0,
1,
0xFF,
0xDEAD_BEEF,
0xDEAD_BEEF_CAFE_BABE,
0x0123_4567_89AB_CDEF,
u64::MAX,
];
for &value in cases {
let ssse3_result = hex_encode_u64(value);
let scalar_result = scalar::hex_encode_u64(value);
assert_eq!(ssse3_result, scalar_result, "mismatch for 0x{:016x}", value);
}
}
#[test]
fn encode_u128_matches_scalar() {
let cases: &[(u64, u64)] = &[
(0, 0),
(u64::MAX, u64::MAX),
(0x0123_4567_89AB_CDEF, 0xFEDC_BA98_7654_3210),
(1, 0),
(0, 1),
];
for &(hi, lo) in cases {
let ssse3_result = hex_encode_u128(hi, lo);
let scalar_result = scalar::hex_encode_u128(hi, lo);
assert_eq!(
ssse3_result, scalar_result,
"mismatch for ({:#x}, {:#x})",
hi, lo
);
}
}
#[test]
fn encode_decode_roundtrip() {
use super::super::sse2;
for value in [0u64, 1, 42, 0xDEAD_BEEF_CAFE_BABE, u64::MAX] {
let encoded = hex_encode_u64(value);
let decoded = sse2::hex_decode_16(&encoded).unwrap();
assert_eq!(decoded, value);
}
}
#[test]
fn uuid_decode_dashed_valid() {
let cases: &[(&[u8; 36], u64, u64)] = &[
(
b"01234567-89ab-cdef-fedc-ba9876543210",
0x0123_4567_89AB_CDEF,
0xFEDC_BA98_7654_3210,
),
(b"00000000-0000-0000-0000-000000000000", 0, 0),
(b"ffffffff-ffff-ffff-ffff-ffffffffffff", u64::MAX, u64::MAX),
(
b"DEADBEEF-CAFE-BABE-0123-456789ABCDEF",
0xDEAD_BEEF_CAFE_BABE,
0x0123_4567_89AB_CDEF,
),
(
b"DeAdBeEf-CaFe-BaBe-0123-456789abcdef",
0xDEAD_BEEF_CAFE_BABE,
0x0123_4567_89AB_CDEF,
),
];
for &(input, expected_hi, expected_lo) in cases {
let (hi, lo) = uuid_decode_dashed(input).unwrap();
assert_eq!(
hi,
expected_hi,
"hi mismatch for {:?}",
core::str::from_utf8(input)
);
assert_eq!(
lo,
expected_lo,
"lo mismatch for {:?}",
core::str::from_utf8(input)
);
}
}
#[test]
fn uuid_decode_dashed_matches_compact() {
use super::super::sse2;
let dashed = b"01234567-89ab-cdef-fedc-ba9876543210";
let compact = b"0123456789abcdeffedcba9876543210";
let (dhi, dlo) = uuid_decode_dashed(dashed).unwrap();
let (chi, clo) = sse2::hex_decode_32(compact).unwrap();
assert_eq!((dhi, dlo), (chi, clo));
}
#[test]
fn uuid_decode_dashed_invalid_positions() {
let base = *b"01234567-89ab-cdef-fedc-ba9876543210";
for compact_pos in 0..8 {
let mut input = base;
input[compact_pos] = b'x';
assert_eq!(uuid_decode_dashed(&input), Err(compact_pos));
}
for i in 0..4 {
let mut input = base;
input[9 + i] = b'x';
assert_eq!(uuid_decode_dashed(&input), Err(8 + i));
}
for i in 0..4 {
let mut input = base;
input[14 + i] = b'x';
assert_eq!(uuid_decode_dashed(&input), Err(12 + i));
}
for i in 0..4 {
let mut input = base;
input[19 + i] = b'x';
assert_eq!(uuid_decode_dashed(&input), Err(16 + i));
}
for i in 0..12 {
let mut input = base;
input[24 + i] = b'x';
assert_eq!(uuid_decode_dashed(&input), Err(20 + i));
}
}
}