use crate::core::dictionary::Dictionary;
use crate::simd::variants::{DictionaryMetadata, LutStrategy, TranslationStrategy};
pub struct SmallLutCodec {
metadata: DictionaryMetadata,
encode_lut: [u8; 16],
decode_lut: [u8; 256],
}
impl SmallLutCodec {
pub fn from_dictionary(dict: &Dictionary) -> Option<Self> {
let metadata = DictionaryMetadata::from_dictionary(dict);
if metadata.base > 16 || !metadata.base.is_power_of_two() {
return None;
}
if !matches!(metadata.strategy, TranslationStrategy::Arbitrary { .. }) {
return None;
}
if metadata.lut_strategy() != LutStrategy::SmallDirect {
return None;
}
let mut encode_lut = [0u8; 16];
for (i, lut_entry) in encode_lut.iter_mut().enumerate().take(metadata.base) {
let ch = dict.encode_digit(i)?;
if (ch as u32) > 0x7F {
return None; }
*lut_entry = ch as u8;
}
let mut decode_lut = [0xFFu8; 256];
for (idx, &ch_byte) in encode_lut[..metadata.base].iter().enumerate() {
decode_lut[ch_byte as usize] = idx as u8;
}
Some(Self {
metadata,
encode_lut,
decode_lut,
})
}
pub fn encode(&self, data: &[u8], _dict: &Dictionary) -> Option<String> {
if self.metadata.base != 16 {
return None;
}
if data.is_empty() {
return Some(String::new());
}
let output_len = data.len() * 2; let mut result = String::with_capacity(output_len);
#[cfg(target_arch = "x86_64")]
unsafe {
if is_x86_feature_detected!("avx2") {
self.encode_avx2_impl(data, &mut result);
} else if is_x86_feature_detected!("ssse3") {
self.encode_ssse3_impl(data, &mut result);
} else {
self.encode_scalar(data, &mut result);
}
}
#[cfg(target_arch = "aarch64")]
unsafe {
self.encode_neon_impl(data, &mut result);
}
#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
{
self.encode_scalar(data, &mut result);
}
Some(result)
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2")]
unsafe fn encode_avx2_impl(&self, data: &[u8], result: &mut String) {
use std::arch::x86_64::*;
const BLOCK_SIZE: usize = 32;
if data.len() < BLOCK_SIZE {
unsafe { self.encode_ssse3_impl(data, result) };
return;
}
let num_blocks = data.len() / BLOCK_SIZE;
let simd_bytes = num_blocks * BLOCK_SIZE;
let lut_128 = unsafe { _mm_loadu_si128(self.encode_lut.as_ptr() as *const __m128i) };
let lut_256 = _mm256_broadcastsi128_si256(lut_128);
let mask_0f = _mm256_set1_epi8(0x0F);
let mut offset = 0;
for _ in 0..num_blocks {
let input_vec =
unsafe { _mm256_loadu_si256(data.as_ptr().add(offset) as *const __m256i) };
let hi_nibbles = _mm256_and_si256(_mm256_srli_epi32(input_vec, 4), mask_0f);
let lo_nibbles = _mm256_and_si256(input_vec, mask_0f);
let hi_ascii = _mm256_shuffle_epi8(lut_256, hi_nibbles);
let lo_ascii = _mm256_shuffle_epi8(lut_256, lo_nibbles);
let lane0_lo = _mm256_unpacklo_epi8(hi_ascii, lo_ascii);
let lane0_hi = _mm256_unpackhi_epi8(hi_ascii, lo_ascii);
let result_lo = _mm256_permute2x128_si256(lane0_lo, lane0_hi, 0x20);
let result_hi = _mm256_permute2x128_si256(lane0_lo, lane0_hi, 0x31);
let mut output_buf = [0u8; 64];
unsafe {
_mm256_storeu_si256(output_buf.as_mut_ptr() as *mut __m256i, result_lo);
_mm256_storeu_si256(output_buf.as_mut_ptr().add(32) as *mut __m256i, result_hi);
}
for &byte in &output_buf {
result.push(byte as char);
}
offset += BLOCK_SIZE;
}
if simd_bytes < data.len() {
unsafe { self.encode_ssse3_impl(&data[simd_bytes..], result) };
}
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "ssse3")]
unsafe fn encode_ssse3_impl(&self, data: &[u8], result: &mut String) {
use std::arch::x86_64::*;
const BLOCK_SIZE: usize = 16;
if data.len() < BLOCK_SIZE {
self.encode_scalar(data, result);
return;
}
let num_blocks = data.len() / BLOCK_SIZE;
let simd_bytes = num_blocks * BLOCK_SIZE;
let lut = unsafe { _mm_loadu_si128(self.encode_lut.as_ptr() as *const __m128i) };
let mask_0f = _mm_set1_epi8(0x0F);
let mut offset = 0;
for _ in 0..num_blocks {
let input_vec = unsafe { _mm_loadu_si128(data.as_ptr().add(offset) as *const __m128i) };
let hi_nibbles = _mm_and_si128(_mm_srli_epi32(input_vec, 4), mask_0f);
let lo_nibbles = _mm_and_si128(input_vec, mask_0f);
let hi_ascii = _mm_shuffle_epi8(lut, hi_nibbles);
let lo_ascii = _mm_shuffle_epi8(lut, lo_nibbles);
let result_lo = _mm_unpacklo_epi8(hi_ascii, lo_ascii);
let result_hi = _mm_unpackhi_epi8(hi_ascii, lo_ascii);
let mut output_buf = [0u8; 32];
unsafe {
_mm_storeu_si128(output_buf.as_mut_ptr() as *mut __m128i, result_lo);
_mm_storeu_si128(output_buf.as_mut_ptr().add(16) as *mut __m128i, result_hi);
}
for &byte in &output_buf {
result.push(byte as char);
}
offset += BLOCK_SIZE;
}
if simd_bytes < data.len() {
self.encode_scalar(&data[simd_bytes..], result);
}
}
#[cfg(target_arch = "aarch64")]
#[target_feature(enable = "neon")]
unsafe fn encode_neon_impl(&self, data: &[u8], result: &mut String) {
use std::arch::aarch64::*;
const BLOCK_SIZE: usize = 16;
if data.len() < BLOCK_SIZE {
self.encode_scalar(data, result);
return;
}
let num_blocks = data.len() / BLOCK_SIZE;
let simd_bytes = num_blocks * BLOCK_SIZE;
let lut_vec = unsafe { vld1q_u8(self.encode_lut.as_ptr()) };
let mask_0f = vdupq_n_u8(0x0F);
let mut offset = 0;
for _ in 0..num_blocks {
let input_vec = unsafe { vld1q_u8(data.as_ptr().add(offset)) };
let hi_nibbles = vandq_u8(vshrq_n_u8(input_vec, 4), mask_0f);
let lo_nibbles = vandq_u8(input_vec, mask_0f);
let hi_ascii = vqtbl1q_u8(lut_vec, hi_nibbles);
let lo_ascii = vqtbl1q_u8(lut_vec, lo_nibbles);
let result_lo = vzip1q_u8(hi_ascii, lo_ascii);
let result_hi = vzip2q_u8(hi_ascii, lo_ascii);
let mut output_buf = [0u8; 32];
unsafe {
vst1q_u8(output_buf.as_mut_ptr(), result_lo);
vst1q_u8(output_buf.as_mut_ptr().add(16), result_hi);
}
for &byte in &output_buf {
result.push(byte as char);
}
offset += BLOCK_SIZE;
}
if simd_bytes < data.len() {
self.encode_scalar(&data[simd_bytes..], result);
}
}
fn encode_scalar(&self, data: &[u8], result: &mut String) {
for &byte in data {
let hi = (byte >> 4) as usize;
let lo = (byte & 0x0F) as usize;
result.push(self.encode_lut[hi] as char);
result.push(self.encode_lut[lo] as char);
}
}
pub fn decode(&self, encoded: &str, _dict: &Dictionary) -> Option<Vec<u8>> {
if self.metadata.base != 16 {
return None;
}
if encoded.is_empty() {
return Some(Vec::new());
}
if !encoded.len().is_multiple_of(2) {
return None;
}
let output_len = encoded.len() / 2;
let mut result = Vec::with_capacity(output_len);
let encoded_bytes = encoded.as_bytes();
#[cfg(target_arch = "x86_64")]
let success = unsafe {
if is_x86_feature_detected!("avx2") {
self.decode_avx2_impl(encoded_bytes, &mut result)
} else if is_x86_feature_detected!("ssse3") {
self.decode_ssse3_impl(encoded_bytes, &mut result)
} else {
self.decode_scalar(encoded_bytes, &mut result)
}
};
#[cfg(target_arch = "aarch64")]
let success = unsafe { self.decode_neon_impl(encoded_bytes, &mut result) };
#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
let success = self.decode_scalar(encoded_bytes, &mut result);
if success { Some(result) } else { None }
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2")]
unsafe fn decode_avx2_impl(&self, encoded: &[u8], result: &mut Vec<u8>) -> bool {
use std::arch::x86_64::*;
const BLOCK_SIZE: usize = 32;
if encoded.len() < BLOCK_SIZE {
return unsafe { self.decode_ssse3_impl(encoded, result) };
}
let lut_128 = unsafe { _mm_loadu_si128(self.encode_lut.as_ptr() as *const __m128i) };
let lut_256 = _mm256_broadcastsi128_si256(lut_128);
let num_blocks = encoded.len() / BLOCK_SIZE;
let simd_bytes = num_blocks * BLOCK_SIZE;
for i in 0..num_blocks {
let offset = i * BLOCK_SIZE;
let input =
unsafe { _mm256_loadu_si256(encoded.as_ptr().add(offset) as *const __m256i) };
let mut indices = _mm256_setzero_si256();
for j in 0..16 {
let candidate = _mm256_set1_epi8(self.encode_lut[j] as i8);
let match_mask = _mm256_cmpeq_epi8(input, candidate);
let idx_vec = _mm256_set1_epi8(j as i8);
indices = _mm256_blendv_epi8(indices, idx_vec, match_mask);
}
let validated = _mm256_shuffle_epi8(lut_256, indices);
let is_valid = _mm256_cmpeq_epi8(validated, input);
if _mm256_movemask_epi8(is_valid) != -1 {
return false; }
let shuffle_even = _mm256_setr_epi8(
0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1, -1, 0, 2, 4, 6, 8, 10, 12,
14, -1, -1, -1, -1, -1, -1, -1, -1,
);
let shuffle_odd = _mm256_setr_epi8(
1, 3, 5, 7, 9, 11, 13, 15, -1, -1, -1, -1, -1, -1, -1, -1, 1, 3, 5, 7, 9, 11, 13,
15, -1, -1, -1, -1, -1, -1, -1, -1,
);
let hi_nibbles = _mm256_shuffle_epi8(indices, shuffle_even);
let lo_nibbles = _mm256_shuffle_epi8(indices, shuffle_odd);
let packed = _mm256_or_si256(_mm256_slli_epi32(hi_nibbles, 4), lo_nibbles);
let lane0 = _mm256_castsi256_si128(packed);
let lane1 = _mm256_extracti128_si256(packed, 1);
let mut buf0 = [0u8; 16];
let mut buf1 = [0u8; 16];
unsafe {
_mm_storeu_si128(buf0.as_mut_ptr() as *mut __m128i, lane0);
_mm_storeu_si128(buf1.as_mut_ptr() as *mut __m128i, lane1);
}
result.extend_from_slice(&buf0[0..8]);
result.extend_from_slice(&buf1[0..8]);
}
if simd_bytes < encoded.len()
&& unsafe { !self.decode_ssse3_impl(&encoded[simd_bytes..], result) }
{
return false;
}
true
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "ssse3")]
unsafe fn decode_ssse3_impl(&self, encoded: &[u8], result: &mut Vec<u8>) -> bool {
use std::arch::x86_64::*;
const BLOCK_SIZE: usize = 16;
let inverse_lut = unsafe { _mm_loadu_si128(self.encode_lut.as_ptr() as *const __m128i) };
let num_blocks = encoded.len() / BLOCK_SIZE;
let simd_bytes = num_blocks * BLOCK_SIZE;
for i in 0..num_blocks {
let offset = i * BLOCK_SIZE;
let input = unsafe { _mm_loadu_si128(encoded.as_ptr().add(offset) as *const __m128i) };
let mut indices = _mm_setzero_si128();
for j in 0..16 {
let candidate = _mm_set1_epi8(self.encode_lut[j] as i8);
let match_mask = _mm_cmpeq_epi8(input, candidate);
let idx_vec = _mm_set1_epi8(j as i8);
indices = unsafe { _mm_blendv_epi8(indices, idx_vec, match_mask) };
}
let validated = _mm_shuffle_epi8(inverse_lut, indices);
let is_valid = _mm_cmpeq_epi8(validated, input);
if _mm_movemask_epi8(is_valid) != 0xFFFF {
return false; }
let shuffle_even =
_mm_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1, -1);
let shuffle_odd =
_mm_setr_epi8(1, 3, 5, 7, 9, 11, 13, 15, -1, -1, -1, -1, -1, -1, -1, -1);
let hi_nibbles = _mm_shuffle_epi8(indices, shuffle_even);
let lo_nibbles = _mm_shuffle_epi8(indices, shuffle_odd);
let packed = _mm_or_si128(_mm_slli_epi32(hi_nibbles, 4), lo_nibbles);
let mut output_buf = [0u8; 16];
unsafe { _mm_storeu_si128(output_buf.as_mut_ptr() as *mut __m128i, packed) };
result.extend_from_slice(&output_buf[0..8]);
}
if simd_bytes < encoded.len() && !self.decode_scalar(&encoded[simd_bytes..], result) {
return false;
}
true
}
#[cfg(target_arch = "aarch64")]
#[target_feature(enable = "neon")]
unsafe fn decode_neon_impl(&self, encoded: &[u8], result: &mut Vec<u8>) -> bool {
use std::arch::aarch64::*;
const BLOCK_SIZE: usize = 16;
let lut_vec = unsafe { vld1q_u8(self.encode_lut.as_ptr()) };
let num_blocks = encoded.len() / BLOCK_SIZE;
let simd_bytes = num_blocks * BLOCK_SIZE;
for i in 0..num_blocks {
let offset = i * BLOCK_SIZE;
let input_vec = unsafe { vld1q_u8(encoded.as_ptr().add(offset)) };
let mut indices = vdupq_n_u8(0xFF); for j in 0..16 {
let candidate = vdupq_n_u8(self.encode_lut[j]);
let match_mask = vceqq_u8(input_vec, candidate);
let idx_vec = vdupq_n_u8(j as u8);
indices = vbslq_u8(match_mask, idx_vec, indices);
}
let validated = vqtbl1q_u8(lut_vec, indices);
let is_valid = vceqq_u8(validated, input_vec);
let valid_mask = vminvq_u8(is_valid); if valid_mask != 0xFF {
return false;
}
let shuffle_even =
unsafe { vld1q_u8([0, 2, 4, 6, 8, 10, 12, 14, 0, 0, 0, 0, 0, 0, 0, 0].as_ptr()) };
let shuffle_odd =
unsafe { vld1q_u8([1, 3, 5, 7, 9, 11, 13, 15, 0, 0, 0, 0, 0, 0, 0, 0].as_ptr()) };
let hi_nibbles = vqtbl1q_u8(indices, shuffle_even);
let lo_nibbles = vqtbl1q_u8(indices, shuffle_odd);
let packed = vorrq_u8(vshlq_n_u8(hi_nibbles, 4), lo_nibbles);
let mut output_buf = [0u8; 16];
unsafe { vst1q_u8(output_buf.as_mut_ptr(), packed) };
result.extend_from_slice(&output_buf[0..8]);
}
if simd_bytes < encoded.len() && !self.decode_scalar(&encoded[simd_bytes..], result) {
return false;
}
true
}
fn decode_scalar(&self, encoded: &[u8], result: &mut Vec<u8>) -> bool {
for i in (0..encoded.len()).step_by(2) {
if i + 1 >= encoded.len() {
return false; }
let hi_char = encoded[i];
let lo_char = encoded[i + 1];
let hi_nibble = self.decode_lut[hi_char as usize];
let lo_nibble = self.decode_lut[lo_char as usize];
if hi_nibble == 0xFF || lo_nibble == 0xFF {
return false; }
let byte = (hi_nibble << 4) | lo_nibble;
result.push(byte);
}
true
}
}
#[cfg(test)]
#[allow(deprecated)]
mod tests {
use super::*;
#[test]
fn test_creation_from_arbitrary_base16() {
let chars: Vec<char> = "zyxwvutsrqponmlk".chars().collect();
let dict = Dictionary::new(chars).unwrap();
let codec = SmallLutCodec::from_dictionary(&dict);
assert!(codec.is_some(), "Should create codec for arbitrary base16");
}
#[test]
fn test_rejects_sequential_dictionary() {
let chars: Vec<char> = (0x30..0x40).map(|c| char::from_u32(c).unwrap()).collect();
let dict = Dictionary::new(chars).unwrap();
let codec = SmallLutCodec::from_dictionary(&dict);
assert!(
codec.is_none(),
"Should reject sequential (use GenericSimdCodec)"
);
}
#[test]
fn test_rejects_large_dictionary() {
let chars: Vec<char> = "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567".chars().collect();
let dict = Dictionary::new(chars).unwrap();
let codec = SmallLutCodec::from_dictionary(&dict);
assert!(codec.is_none(), "Should reject base32 (too large)");
}
#[test]
fn test_rejects_non_power_of_two() {
let chars: Vec<char> = "0123456789".chars().collect();
let dict = Dictionary::new(chars).unwrap();
let codec = SmallLutCodec::from_dictionary(&dict);
assert!(codec.is_none(), "Should reject non-power-of-2 base");
}
#[test]
fn test_lut_construction() {
let chars: Vec<char> = "9876543210ZYXWVU".chars().collect();
let dict = Dictionary::new(chars).unwrap();
let codec = SmallLutCodec::from_dictionary(&dict).unwrap();
assert_eq!(codec.encode_lut[0], b'9');
assert_eq!(codec.encode_lut[1], b'8');
assert_eq!(codec.encode_lut[15], b'U');
assert_eq!(codec.decode_lut[b'9' as usize], 0);
assert_eq!(codec.decode_lut[b'8' as usize], 1);
assert_eq!(codec.decode_lut[b'0' as usize], 9);
assert_eq!(codec.decode_lut[b'U' as usize], 15);
assert_eq!(codec.decode_lut[b'A' as usize], 0xFF);
assert_eq!(codec.decode_lut[b'a' as usize], 0xFF);
}
#[test]
fn test_encode_shuffled_base16() {
let chars: Vec<char> = "zyxwvutsrqponmlk".chars().collect();
let dict = Dictionary::new(chars).unwrap();
let codec = SmallLutCodec::from_dictionary(&dict).unwrap();
let data = &[0xABu8];
let encoded = codec.encode(data, &dict).unwrap();
assert_eq!(encoded, "po");
}
#[test]
fn test_encode_standard_hex_rejected() {
let chars: Vec<char> = "0123456789ABCDEF".chars().collect();
let dict = Dictionary::new(chars).unwrap();
let codec = SmallLutCodec::from_dictionary(&dict);
assert!(
codec.is_none(),
"Sequential hex should use GenericSimdCodec, not SmallLutCodec"
);
}
#[test]
fn test_encode_various_sizes() {
let chars: Vec<char> = "zyxwvutsrqponmlk".chars().collect();
let dict = Dictionary::new(chars).unwrap();
let codec = SmallLutCodec::from_dictionary(&dict).unwrap();
let data16: Vec<u8> = (0..16).collect();
let encoded16 = codec.encode(&data16, &dict).unwrap();
assert_eq!(encoded16.len(), 32);
let data32: Vec<u8> = (0..32).collect();
let encoded32 = codec.encode(&data32, &dict).unwrap();
assert_eq!(encoded32.len(), 64);
let data15: Vec<u8> = (0..15).collect();
let encoded15 = codec.encode(&data15, &dict).unwrap();
assert_eq!(encoded15.len(), 30);
let data17: Vec<u8> = (0..17).collect();
let encoded17 = codec.encode(&data17, &dict).unwrap();
assert_eq!(encoded17.len(), 34);
}
#[test]
fn test_encode_empty_input() {
let chars: Vec<char> = "zyxwvutsrqponmlk".chars().collect();
let dict = Dictionary::new(chars).unwrap();
let codec = SmallLutCodec::from_dictionary(&dict).unwrap();
let data: Vec<u8> = vec![];
let encoded = codec.encode(&data, &dict).unwrap();
assert_eq!(encoded, "");
}
#[test]
#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))]
fn test_integration_smalllut_selection() {
use crate::simd::encode_with_simd;
let dictionary = "fedcba9876543210";
let chars: Vec<char> = dictionary.chars().collect();
let dict = Dictionary::new(chars).unwrap();
let data = b"\x00\x11\x22\x33\x44\x55\x66\x77\x88\x99\xAA\xBB\xCC\xDD\xEE\xFF\
\x01\x23\x45\x67\x89\xAB\xCD\xEF\xFE\xDC\xBA\x98\x76\x54\x32\x10";
let result = encode_with_simd(data, &dict);
assert!(
result.is_some(),
"SmallLutCodec should be selected for shuffled base16"
);
let encoded = result.unwrap();
assert_eq!(encoded.len(), 64);
assert_eq!(encoded.chars().next().unwrap(), 'f');
assert_eq!(encoded.chars().nth(1).unwrap(), 'f');
assert_eq!(encoded.chars().nth(2).unwrap(), 'e');
assert_eq!(encoded.chars().nth(3).unwrap(), 'e');
for ch in encoded.chars() {
assert!(
dictionary.contains(ch),
"Output char '{}' should be in dictionary",
ch
);
}
assert_eq!(encoded.chars().nth(30).unwrap(), '0');
assert_eq!(encoded.chars().nth(31).unwrap(), '0');
}
#[test]
#[cfg(target_arch = "x86_64")]
fn test_simd_path_verified_x86() {
use crate::simd::{encode_with_simd, has_ssse3};
if !has_ssse3() {
eprintln!("SSSE3 not available, skipping SIMD verification");
return;
}
let chars: Vec<char> = "9876543210zyxwvu".chars().collect();
let dict = Dictionary::new(chars).unwrap();
let data = b"\x01\x23\x45\x67\x89\xAB\xCD\xEF\xFE\xDC\xBA\x98\x76\x54\x32\x10";
let result = encode_with_simd(data, &dict);
assert!(result.is_some(), "SIMD should be available");
let encoded = result.unwrap();
assert_eq!(encoded.len(), 32); }
#[test]
#[cfg(target_arch = "aarch64")]
fn test_simd_path_verified_arm() {
use crate::simd::encode_with_simd;
let chars: Vec<char> = "9876543210zyxwvu".chars().collect();
let dict = Dictionary::new(chars).unwrap();
let data = b"\x01\x23\x45\x67\x89\xAB\xCD\xEF\xFE\xDC\xBA\x98\x76\x54\x32\x10";
let result = encode_with_simd(data, &dict);
assert!(result.is_some(), "SIMD should be available");
let encoded = result.unwrap();
assert_eq!(encoded.len(), 32); }
#[test]
fn test_decode_round_trip() {
let chars: Vec<char> = "9876543210ZYXWVU".chars().collect();
let dict = Dictionary::new(chars).unwrap();
let codec = SmallLutCodec::from_dictionary(&dict).unwrap();
let data = b"\x01\x23\x45\x67\x89\xAB\xCD\xEF";
let encoded = codec.encode(data, &dict).unwrap();
let decoded = codec.decode(&encoded, &dict).unwrap();
assert_eq!(&decoded[..], &data[..]);
}
#[test]
fn test_decode_invalid_character() {
let chars: Vec<char> = "0123456789ZYXWVU".chars().collect();
let dict = Dictionary::new(chars).unwrap();
let codec = SmallLutCodec::from_dictionary(&dict).unwrap();
let invalid = "01A3";
let result = codec.decode(invalid, &dict);
assert!(result.is_none(), "Should reject invalid character 'A'");
}
#[test]
fn test_decode_odd_length() {
let chars: Vec<char> = "zyxwvutsrqponmlk".chars().collect();
let dict = Dictionary::new(chars).unwrap();
let codec = SmallLutCodec::from_dictionary(&dict).unwrap();
let invalid = "zyx";
let result = codec.decode(invalid, &dict);
assert!(result.is_none(), "Should reject odd-length input");
}
#[test]
fn test_decode_empty_input() {
let chars: Vec<char> = "zyxwvutsrqponmlk".chars().collect();
let dict = Dictionary::new(chars).unwrap();
let codec = SmallLutCodec::from_dictionary(&dict).unwrap();
let result = codec.decode("", &dict).unwrap();
assert_eq!(result.len(), 0);
}
#[test]
fn test_decode_various_sizes() {
let chars: Vec<char> = "zyxwvutsrqponmlk".chars().collect();
let dict = Dictionary::new(chars).unwrap();
let codec = SmallLutCodec::from_dictionary(&dict).unwrap();
let data1 = b"\xAB";
let enc1 = codec.encode(data1, &dict).unwrap();
let dec1 = codec.decode(&enc1, &dict).unwrap();
assert_eq!(&dec1[..], &data1[..]);
let data16: Vec<u8> = (0..16).collect();
let enc16 = codec.encode(&data16, &dict).unwrap();
let dec16 = codec.decode(&enc16, &dict).unwrap();
assert_eq!(&dec16[..], &data16[..]);
let data32: Vec<u8> = (0..32).collect();
let enc32 = codec.encode(&data32, &dict).unwrap();
let dec32 = codec.decode(&enc32, &dict).unwrap();
assert_eq!(&dec32[..], &data32[..]);
let data17: Vec<u8> = (0..17).collect();
let enc17 = codec.encode(&data17, &dict).unwrap();
let dec17 = codec.decode(&enc17, &dict).unwrap();
assert_eq!(&dec17[..], &data17[..]);
}
#[test]
fn test_decode_all_nibble_values() {
let chars: Vec<char> = "zyxwvutsrqponmlk".chars().collect();
let dict = Dictionary::new(chars).unwrap();
let codec = SmallLutCodec::from_dictionary(&dict).unwrap();
let data: Vec<u8> = (0..=255).collect();
let encoded = codec.encode(&data, &dict).unwrap();
let decoded = codec.decode(&encoded, &dict).unwrap();
assert_eq!(&decoded[..], &data[..]);
}
#[test]
#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))]
fn test_integration_decode_selection() {
use crate::simd::{decode_with_simd, encode_with_simd};
let dictionary = "fedcba9876543210";
let chars: Vec<char> = dictionary.chars().collect();
let dict = Dictionary::new(chars).unwrap();
let data = b"Hello, SIMD world! Testing decode path...";
let encoded = encode_with_simd(data, &dict).expect("Encode failed");
let decoded = decode_with_simd(&encoded, &dict).expect("Decode failed");
assert_eq!(&decoded[..], &data[..]);
}
#[test]
#[cfg(target_arch = "x86_64")]
fn test_smalllut_avx2() {
if !is_x86_feature_detected!("avx2") {
eprintln!("AVX2 not available, skipping");
return;
}
let chars: Vec<char> = "zyxwvutsrqponmlk".chars().collect();
let dict = Dictionary::new(chars).unwrap();
let codec = SmallLutCodec::from_dictionary(&dict).unwrap();
let data: Vec<u8> = (0..=255).collect();
let encoded = codec.encode(&data, &dict).unwrap();
assert_eq!(encoded.len(), 512);
let decoded = codec.decode(&encoded, &dict).unwrap();
assert_eq!(&decoded[..], &data[..]);
}
#[test]
fn test_decode_case_sensitive() {
let chars: Vec<char> = "zyxwvutsrqpABCDE".chars().collect();
let dict = Dictionary::new(chars).unwrap();
let codec = SmallLutCodec::from_dictionary(&dict).unwrap();
let data = b"\xF0";
let encoded = codec.encode(data, &dict).unwrap();
assert_eq!(encoded, "Ez");
let decoded = codec.decode(&encoded, &dict).unwrap();
assert_eq!(&decoded[..], &data[..]);
let wrong_case = "ez";
let result = codec.decode(wrong_case, &dict);
assert!(
result.is_none(),
"Should reject wrong case (lowercase 'e' not in dictionary)"
);
}
}