use crate::simd_gather::gather_u32index_u8;
use crate::wide_utils::WideUtilsExt;
use std::fmt;
use wide::{u8x16, u16x16, u32x16};
#[cfg(target_arch = "aarch64")]
use core::arch::aarch64::{uint8x16x4_t, vld1q_u8, vqtbl4q_u8, vst1q_u8};
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
use core::arch::x86_64::{
__m128i, __m512i, _mm_loadu_si128, _mm_storeu_si128, _mm512_castsi128_si512,
_mm512_castsi512_si128, _mm512_loadu_si512, _mm512_permutexvar_epi8, _mm512_storeu_si512,
};
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
use std::arch::is_x86_feature_detected as det;
pub struct Table64 {
#[cfg(target_arch = "aarch64")]
neon_tbl: uint8x16x4_t,
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
bytes: [u8; 64],
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
zmm: Option<__m512i>, }
impl Table64 {
#[inline]
pub fn new(table: &[u8; 64]) -> Self {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
let zmm = if is_x86_avx512_vbmi() {
unsafe {
let z = _mm512_loadu_si512(table.as_ptr() as *const _);
Some(z)
}
} else {
None
};
Self { bytes: *table, zmm }
}
#[cfg(target_arch = "aarch64")]
{
Self {
neon_tbl: unsafe {
let t0 = vld1q_u8(table.as_ptr());
let t1 = vld1q_u8(table.as_ptr().add(16));
let t2 = vld1q_u8(table.as_ptr().add(32));
let t3 = vld1q_u8(table.as_ptr().add(48));
uint8x16x4_t(t0, t1, t2, t3)
},
}
}
}
#[inline]
pub fn lookup_one(&self, idx: u8x16) -> u8x16 {
#[cfg(target_arch = "aarch64")]
unsafe {
let i = vld1q_u8(idx.as_array().as_ptr());
let r = vqtbl4q_u8(self.neon_tbl, i);
let mut out = [0u8; 16];
vst1q_u8(out.as_mut_ptr(), r);
u8x16::from(out)
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
if let Some(tzmm) = self.zmm {
unsafe {
let iv_128 = _mm_loadu_si128(idx.as_array().as_ptr() as *const __m128i);
let iv = _mm512_castsi128_si512(iv_128);
let rv = _mm512_permutexvar_epi8(iv, tzmm);
let rv_128 = _mm512_castsi512_si128(rv);
let mut result = [0u8; 16];
_mm_storeu_si128(result.as_mut_ptr() as *mut __m128i, rv_128);
u8x16::from(result)
}
} else {
scalar_lookup_1x16(&self.bytes, idx)
}
}
#[cfg(not(any(target_arch = "aarch64", target_arch = "x86", target_arch = "x86_64")))]
compile_error!(
"Table64::lookup_one is implemented for aarch64 (NEON) and x86/x86_64 (AVX-512VBMI)."
);
}
#[inline]
pub fn lookup_one_2d(&self, rows: u8x16, cols: u8x16) -> u8x16 {
debug_assert!(
rows.to_array().iter().all(|&r| r < 8),
"All row indices must be < 8"
);
debug_assert!(
cols.to_array().iter().all(|&c| c < 8),
"All column indices must be < 8"
);
let idx = rows.double().double().double() + cols;
self.lookup_one(idx)
}
#[inline]
fn as_bytes(&self) -> [u8; 64] {
#[cfg(target_arch = "aarch64")]
{
unsafe {
let mut bytes = [0u8; 64];
vst1q_u8(bytes.as_mut_ptr(), self.neon_tbl.0);
vst1q_u8(bytes.as_mut_ptr().add(16), self.neon_tbl.1);
vst1q_u8(bytes.as_mut_ptr().add(32), self.neon_tbl.2);
vst1q_u8(bytes.as_mut_ptr().add(48), self.neon_tbl.3);
bytes
}
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
self.bytes
}
}
#[inline]
pub fn lookup(&self, idx: &[u8x16], out: &mut [u8x16]) {
assert_eq!(idx.len(), out.len());
#[cfg(target_arch = "aarch64")]
unsafe {
let idx_bytes = idx.as_ptr() as *const u8;
let out_bytes = out.as_mut_ptr() as *mut u8;
for b in 0..idx.len() {
let i_ptr = idx_bytes.add(b * 16);
let o_ptr = out_bytes.add(b * 16);
let i = vld1q_u8(i_ptr);
let r = vqtbl4q_u8(self.neon_tbl, i); vst1q_u8(o_ptr, r);
}
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
unsafe {
let mut i = 0usize;
if let Some(tzmm) = self.zmm {
let idx_bytes = idx.as_ptr() as *const u8;
let out_bytes = out.as_mut_ptr() as *mut u8;
while i + 4 <= idx.len() {
let off = i * 16;
let iv = _mm512_loadu_si512(idx_bytes.add(off) as *const __m512i);
let rv = _mm512_permutexvar_epi8(iv, tzmm);
_mm512_storeu_si512(out_bytes.add(off) as *mut __m512i, rv);
i += 4;
}
}
for k in i..idx.len() {
out[k] = scalar_lookup_1x16(&self.bytes, idx[k]);
}
}
#[cfg(not(any(target_arch = "aarch64", target_arch = "x86", target_arch = "x86_64")))]
compile_error!(
"Table64::lookup is implemented for aarch64 (NEON) and x86/x86_64 (AVX-512VBMI)."
);
}
}
impl Clone for Table64 {
fn clone(&self) -> Self {
let bytes = self.as_bytes();
Self::new(&bytes)
}
}
impl Default for Table64 {
fn default() -> Self {
Self::new(&[0u8; 64])
}
}
impl fmt::Debug for Table64 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let bytes = self.as_bytes();
writeln!(f, "Table64 {{")?;
writeln!(f, " col 0 col 1 col 2 col 3 col 4 col 5 col 6 col 7")?;
for row in 0..8 {
write!(f, "row {}: ", row)?;
for col in 0..8 {
let idx = row * 8 + col;
write!(f, "{:5} ", bytes[idx])?;
}
writeln!(f)?;
}
write!(f, "}}")
}
}
#[derive(Clone, Default)]
pub struct Table2dU8xU8 {
data: Vec<u8>,
num_cols: u16,
}
impl Table2dU8xU8 {
#[inline]
pub fn from_flat(data: &[u8], num_cols: usize) -> Self {
assert!(num_cols > 0 && num_cols <= 256, "num_cols must be 1..=256");
assert!(data.len() % num_cols == 0, "data length must be multiple of num_cols");
assert!(data.len() <= 65536, "data length must be <= 65536 (64K entries)");
Self {
data: data.to_vec(),
num_cols: num_cols as u16,
}
}
#[inline]
pub fn from_2d(matrix: &[&[u8]]) -> Self {
assert!(!matrix.is_empty(), "matrix cannot be empty");
let num_cols = matrix[0].len();
assert!(num_cols > 0 && num_cols <= 256, "num_cols must be 1..=256");
assert!(matrix.iter().all(|row| row.len() == num_cols), "all rows must have same length");
assert!(matrix.len() * num_cols <= 65536, "total size must be <= 65536");
let mut data = Vec::with_capacity(matrix.len() * num_cols);
for row in matrix {
data.extend_from_slice(row);
}
Self {
data,
num_cols: num_cols as u16,
}
}
#[inline]
pub fn num_cols(&self) -> usize {
self.num_cols as usize
}
#[inline]
pub fn num_rows(&self) -> usize {
self.data.len() / self.num_cols as usize
}
#[inline]
pub fn len(&self) -> usize {
self.data.len()
}
#[inline]
pub fn is_empty(&self) -> bool {
self.data.is_empty()
}
#[inline]
pub fn lookup_one(&self, rows: u8x16, cols: u8x16) -> u8x16 {
let rows_u16: u16x16 = u16x16::from(rows);
let cols_u16: u16x16 = u16x16::from(cols);
let num_cols_u16 = u16x16::splat(self.num_cols);
let indices_u16 = rows_u16 * num_cols_u16 + cols_u16;
let indices_u32: u32x16 = u32x16::from(indices_u16);
#[cfg(debug_assertions)]
{
let idx_arr = indices_u32.to_array();
for (i, &idx) in idx_arr.iter().enumerate() {
debug_assert!(
(idx as usize) < self.data.len(),
"Index out of bounds at lane {}: {} >= {}",
i, idx, self.data.len()
);
}
}
gather_u32index_u8(indices_u32, &self.data, 1)
}
#[inline]
pub fn get(&self, row: u8, col: u8) -> u8 {
let index = (row as usize) * (self.num_cols as usize) + (col as usize);
self.data[index]
}
}
impl fmt::Debug for Table2dU8xU8 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let num_rows = self.num_rows();
let num_cols = self.num_cols as usize;
writeln!(f, "Table2dU8xU8 {{")?;
writeln!(f, " dimensions: {} rows × {} cols", num_rows, num_cols)?;
if self.data.is_empty() {
return write!(f, " (empty)}}");
}
const MAX_DISPLAY_ROWS: usize = 20;
const MAX_DISPLAY_COLS: usize = 20;
let display_rows = num_rows.min(MAX_DISPLAY_ROWS);
let display_cols = num_cols.min(MAX_DISPLAY_COLS);
let show_row_ellipsis = num_rows > MAX_DISPLAY_ROWS;
let show_col_ellipsis = num_cols > MAX_DISPLAY_COLS;
write!(f, " ")?;
for col in 0..display_cols {
write!(f, " col{:3}", col)?;
}
if show_col_ellipsis {
write!(f, " ...")?;
}
writeln!(f)?;
for row in 0..display_rows {
write!(f, " row{:3}:", row)?;
for col in 0..display_cols {
let idx = row * num_cols + col;
write!(f, "{:5}", self.data[idx])?;
}
if show_col_ellipsis {
write!(f, " ...")?;
}
writeln!(f)?;
}
if show_row_ellipsis {
writeln!(f, " ...")?;
}
write!(f, "}}")
}
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[inline]
fn is_x86_avx512_vbmi() -> bool {
det!("avx512bw") && det!("avx512vbmi")
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[inline]
fn scalar_lookup_1x16(table: &[u8; 64], idx: u8x16) -> u8x16 {
let i = idx.to_array();
debug_assert!(i.iter().all(|&x| x < 64));
let out = [
table[i[0] as usize],
table[i[1] as usize],
table[i[2] as usize],
table[i[3] as usize],
table[i[4] as usize],
table[i[5] as usize],
table[i[6] as usize],
table[i[7] as usize],
table[i[8] as usize],
table[i[9] as usize],
table[i[10] as usize],
table[i[11] as usize],
table[i[12] as usize],
table[i[13] as usize],
table[i[14] as usize],
table[i[15] as usize],
];
u8x16::from(out)
}
#[cfg(test)]
mod tests {
use super::*;
fn create_test_table() -> [u8; 64] {
let mut table = [0u8; 64];
for i in 0..64 {
table[i] = (i * 3 + 7) as u8; }
table
}
#[test]
fn test_table64_new() {
let table_data = create_test_table();
let table = Table64::new(&table_data);
println!("\n{:?}", table);
}
#[test]
fn test_lookup_one_basic() {
let table_data = create_test_table();
let table = Table64::new(&table_data);
let idx = u8x16::from([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
let result = table.lookup_one(idx);
let result_arr = result.to_array();
for i in 0..16 {
assert_eq!(
result_arr[i], table_data[i],
"Mismatch at index {}: expected {}, got {}",
i, table_data[i], result_arr[i]
);
}
}
#[test]
fn test_lookup_one_scattered_indices() {
let table_data = create_test_table();
let table = Table64::new(&table_data);
let idx = u8x16::from([0, 63, 32, 16, 48, 1, 62, 31, 15, 47, 8, 56, 4, 60, 20, 40]);
let result = table.lookup_one(idx);
let result_arr = result.to_array();
let idx_arr = idx.to_array();
for i in 0..16 {
assert_eq!(
result_arr[i],
table_data[idx_arr[i] as usize],
"Mismatch at position {}: idx={}, expected {}, got {}",
i,
idx_arr[i],
table_data[idx_arr[i] as usize],
result_arr[i]
);
}
}
#[test]
fn test_lookup_one_all_same_index() {
let table_data = create_test_table();
let table = Table64::new(&table_data);
let idx = u8x16::splat(42);
let result = table.lookup_one(idx);
let result_arr = result.to_array();
let expected = table_data[42];
for i in 0..16 {
assert_eq!(
result_arr[i], expected,
"All lookups should return the same value"
);
}
}
#[test]
fn test_lookup_batch() {
let table_data = create_test_table();
let table = Table64::new(&table_data);
let indices = vec![
u8x16::from([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
u8x16::from([16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]),
u8x16::from([32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47]),
u8x16::from([48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63]),
];
let mut output = vec![u8x16::splat(0); 4];
table.lookup(&indices, &mut output);
for (vec_idx, out_vec) in output.iter().enumerate() {
let out_arr = out_vec.to_array();
for lane in 0..16 {
let table_idx = vec_idx * 16 + lane;
assert_eq!(
out_arr[lane], table_data[table_idx],
"Mismatch at vec {}, lane {}: expected {}, got {}",
vec_idx, lane, table_data[table_idx], out_arr[lane]
);
}
}
}
#[test]
fn test_lookup_one_matches_lookup_batch() {
let table_data = create_test_table();
let table = Table64::new(&table_data);
let idx = u8x16::from([5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 0, 32, 63, 1]);
let single_result = table.lookup_one(idx);
let mut batch_output = vec![u8x16::splat(0); 1];
table.lookup(&[idx], &mut batch_output);
assert_eq!(
single_result.to_array(),
batch_output[0].to_array(),
"lookup_one and lookup should produce the same result"
);
}
#[test]
fn test_identity_table() {
let mut table_data = [0u8; 64];
for i in 0..64 {
table_data[i] = i as u8;
}
let table = Table64::new(&table_data);
let idx = u8x16::from([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
let result = table.lookup_one(idx);
assert_eq!(idx.to_array(), result.to_array(), "Identity table should return input indices");
}
fn create_2d_test_table() -> [u8; 64] {
let mut table = [0u8; 64];
for row in 0..8 {
for col in 0..8 {
table[row * 8 + col] = (row * 10 + col) as u8;
}
}
table
}
#[test]
fn test_lookup_one_2d_basic() {
let table_data = create_2d_test_table();
let table = Table64::new(&table_data);
let rows = u8x16::from([0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]);
let cols = u8x16::from([0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7]);
let result = table.lookup_one_2d(rows, cols);
let result_arr = result.to_array();
for col in 0..8 {
assert_eq!(result_arr[col], col as u8, "Row 0, col {}", col);
}
for col in 0..8 {
assert_eq!(result_arr[8 + col], (10 + col) as u8, "Row 1, col {}", col);
}
}
#[test]
fn test_lookup_one_2d_diagonal() {
let table_data = create_2d_test_table();
let table = Table64::new(&table_data);
let rows = u8x16::from([0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0]);
let cols = u8x16::from([0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7]);
let result = table.lookup_one_2d(rows, cols);
let result_arr = result.to_array();
for i in 0..8 {
let expected = (i * 10 + i) as u8; assert_eq!(result_arr[i], expected, "Main diagonal position {}", i);
}
let expected_anti = [70, 61, 52, 43, 34, 25, 16, 7u8];
for i in 0..8 {
assert_eq!(result_arr[8 + i], expected_anti[i], "Anti-diagonal position {}", i);
}
}
#[test]
fn test_lookup_one_2d_corners() {
let table_data = create_2d_test_table();
let table = Table64::new(&table_data);
let rows = u8x16::from([0, 0, 7, 7, 0, 0, 7, 7, 0, 0, 7, 7, 0, 0, 7, 7]);
let cols = u8x16::from([0, 7, 0, 7, 0, 7, 0, 7, 0, 7, 0, 7, 0, 7, 0, 7]);
let result = table.lookup_one_2d(rows, cols);
let result_arr = result.to_array();
let expected = [0u8, 7, 70, 77, 0, 7, 70, 77, 0, 7, 70, 77, 0, 7, 70, 77];
assert_eq!(result_arr, expected, "Corner lookups");
}
#[test]
fn test_lookup_one_2d_same_row() {
let table_data = create_2d_test_table();
let table = Table64::new(&table_data);
let rows = u8x16::splat(5);
let cols = u8x16::from([0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0]);
let result = table.lookup_one_2d(rows, cols);
let result_arr = result.to_array();
let cols_arr = cols.to_array();
for i in 0..16 {
let expected = (50 + cols_arr[i]) as u8;
assert_eq!(result_arr[i], expected, "Row 5, col {}", cols_arr[i]);
}
}
#[test]
fn test_lookup_one_2d_same_col() {
let table_data = create_2d_test_table();
let table = Table64::new(&table_data);
let rows = u8x16::from([0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7]);
let cols = u8x16::splat(3);
let result = table.lookup_one_2d(rows, cols);
let result_arr = result.to_array();
for i in 0..8 {
let expected = (i * 10 + 3) as u8;
assert_eq!(result_arr[i], expected, "Row {}, col 3", i);
assert_eq!(result_arr[8 + i], expected, "Row {}, col 3 (second half)", i);
}
}
#[test]
fn test_lookup_one_2d_matches_lookup_one() {
let table_data = create_2d_test_table();
let table = Table64::new(&table_data);
let rows = u8x16::from([0, 3, 7, 2, 5, 1, 6, 4, 7, 0, 3, 5, 2, 6, 1, 4]);
let cols = u8x16::from([5, 2, 0, 7, 3, 6, 1, 4, 7, 0, 4, 2, 6, 3, 5, 1]);
let rows_arr = rows.to_array();
let cols_arr = cols.to_array();
let mut expected_idx = [0u8; 16];
for i in 0..16 {
expected_idx[i] = rows_arr[i] * 8 + cols_arr[i];
}
let result_2d = table.lookup_one_2d(rows, cols);
let result_1d = table.lookup_one(u8x16::from(expected_idx));
assert_eq!(
result_2d.to_array(),
result_1d.to_array(),
"lookup_one_2d should match lookup_one with computed indices"
);
}
fn create_table2d_test_data(num_rows: usize, num_cols: usize) -> Vec<u8> {
let mut data = Vec::with_capacity(num_rows * num_cols);
for r in 0..num_rows {
for c in 0..num_cols {
data.push(((r * 10 + c) % 256) as u8);
}
}
data
}
#[test]
fn test_table2d_from_flat_basic() {
let data = create_table2d_test_data(16, 16);
let table = Table2dU8xU8::from_flat(&data, 16);
println!("\n{:?}", table);
assert_eq!(table.num_rows(), 16);
assert_eq!(table.num_cols(), 16);
assert_eq!(table.len(), 256);
}
#[test]
fn test_table2d_from_2d() {
let row0: &[u8] = &[0, 1, 2, 3];
let row1: &[u8] = &[10, 11, 12, 13];
let row2: &[u8] = &[20, 21, 22, 23];
let matrix: &[&[u8]] = &[row0, row1, row2];
let table = Table2dU8xU8::from_2d(matrix);
assert_eq!(table.num_rows(), 3);
assert_eq!(table.num_cols(), 4);
assert_eq!(table.len(), 12);
assert_eq!(table.get(0, 0), 0);
assert_eq!(table.get(0, 3), 3);
assert_eq!(table.get(1, 0), 10);
assert_eq!(table.get(2, 3), 23);
}
#[test]
fn test_table2d_lookup_one_basic() {
let data = create_table2d_test_data(16, 16);
let table = Table2dU8xU8::from_flat(&data, 16);
let rows = u8x16::splat(0);
let cols = u8x16::from([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
let result = table.lookup_one(rows, cols);
let result_arr = result.to_array();
for i in 0..16 {
assert_eq!(result_arr[i], i as u8, "Row 0, col {}", i);
}
}
#[test]
fn test_table2d_lookup_one_different_rows() {
let data = create_table2d_test_data(16, 16);
let table = Table2dU8xU8::from_flat(&data, 16);
let rows = u8x16::from([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
let cols = u8x16::splat(5);
let result = table.lookup_one(rows, cols);
let result_arr = result.to_array();
for i in 0..16 {
let expected = ((i * 10 + 5) % 256) as u8;
assert_eq!(result_arr[i], expected, "Row {}, col 5", i);
}
}
#[test]
fn test_table2d_lookup_one_scattered() {
let data = create_table2d_test_data(16, 16);
let table = Table2dU8xU8::from_flat(&data, 16);
let rows = u8x16::from([0, 5, 10, 15, 3, 8, 12, 1, 7, 14, 2, 9, 4, 11, 6, 13]);
let cols = u8x16::from([0, 15, 5, 10, 3, 8, 12, 1, 7, 14, 2, 9, 4, 11, 6, 13]);
let result = table.lookup_one(rows, cols);
let result_arr = result.to_array();
let rows_arr = rows.to_array();
let cols_arr = cols.to_array();
for i in 0..16 {
let expected = ((rows_arr[i] as usize * 10 + cols_arr[i] as usize) % 256) as u8;
assert_eq!(
result_arr[i], expected,
"Mismatch at lane {}: row={}, col={}, expected={}, got={}",
i, rows_arr[i], cols_arr[i], expected, result_arr[i]
);
}
}
#[test]
fn test_table2d_lookup_matches_scalar() {
let data = create_table2d_test_data(32, 20);
let table = Table2dU8xU8::from_flat(&data, 20);
let rows = u8x16::from([0, 5, 10, 15, 20, 25, 30, 31, 1, 6, 11, 16, 21, 26, 28, 29]);
let cols = u8x16::from([0, 5, 10, 15, 19, 0, 5, 10, 1, 6, 11, 16, 18, 1, 6, 11]);
let result = table.lookup_one(rows, cols);
let result_arr = result.to_array();
let rows_arr = rows.to_array();
let cols_arr = cols.to_array();
for i in 0..16 {
let expected = table.get(rows_arr[i], cols_arr[i]);
assert_eq!(
result_arr[i], expected,
"Mismatch at lane {}: SIMD={}, scalar={}",
i, result_arr[i], expected
);
}
}
#[test]
fn test_table2d_large_table() {
let mut data = vec![0u8; 65536];
for r in 0..256 {
for c in 0..256 {
data[r * 256 + c] = (r ^ c) as u8; }
}
let table = Table2dU8xU8::from_flat(&data, 256);
assert_eq!(table.num_rows(), 256);
assert_eq!(table.num_cols(), 256);
let rows = u8x16::from([0, 255, 128, 64, 32, 16, 8, 4, 2, 1, 100, 200, 50, 150, 75, 175]);
let cols = u8x16::from([255, 0, 128, 64, 32, 16, 8, 4, 2, 1, 50, 100, 200, 75, 175, 150]);
let result = table.lookup_one(rows, cols);
let result_arr = result.to_array();
let rows_arr = rows.to_array();
let cols_arr = cols.to_array();
for i in 0..16 {
let expected = rows_arr[i] ^ cols_arr[i];
assert_eq!(result_arr[i], expected, "XOR mismatch at lane {}", i);
}
}
#[test]
fn test_table2d_non_power_of_two_cols() {
let data = create_table2d_test_data(10, 17);
let table = Table2dU8xU8::from_flat(&data, 17);
assert_eq!(table.num_rows(), 10);
assert_eq!(table.num_cols(), 17);
let rows = u8x16::from([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5]);
let cols = u8x16::from([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16, 15, 14, 13, 12, 11]);
let result = table.lookup_one(rows, cols);
let result_arr = result.to_array();
let rows_arr = rows.to_array();
let cols_arr = cols.to_array();
for i in 0..16 {
let expected = table.get(rows_arr[i], cols_arr[i]);
assert_eq!(result_arr[i], expected, "Mismatch at lane {}", i);
}
}
}