use wide::{u8x16, u32x16};
use rustc_hash::FxHashMap;
use crate::{
bulk_vec_extender::{BulkVecExtender, SliceU8SIMDExtender}, compress_store_u8x16, compress_store_u32x16, gather_u32index_u32, gather_u32index_u8, prefetch::{L3, prefetch_eight_offsets}
};
#[inline(always)]
fn safe_lookup(lookup_table: &[u8], offset: u32) -> u8 {
lookup_table.get(offset as usize).copied().unwrap_or_default()
}
#[inline]
fn lookup_from_offsets(lookup_table: &[u8], offsets: &[u32; 16]) -> [u8; 16] {
[
safe_lookup(lookup_table, offsets[0]),
safe_lookup(lookup_table, offsets[1]),
safe_lookup(lookup_table, offsets[2]),
safe_lookup(lookup_table, offsets[3]),
safe_lookup(lookup_table, offsets[4]),
safe_lookup(lookup_table, offsets[5]),
safe_lookup(lookup_table, offsets[6]),
safe_lookup(lookup_table, offsets[7]),
safe_lookup(lookup_table, offsets[8]),
safe_lookup(lookup_table, offsets[9]),
safe_lookup(lookup_table, offsets[10]),
safe_lookup(lookup_table, offsets[11]),
safe_lookup(lookup_table, offsets[12]),
safe_lookup(lookup_table, offsets[13]),
safe_lookup(lookup_table, offsets[14]),
safe_lookup(lookup_table, offsets[15]),
]
}
#[derive(Debug, Clone)]
pub struct SimdSingleTableU32U8Lookup<'a> {
lookup_table: &'a [u8],
}
impl<'a> SimdSingleTableU32U8Lookup<'a> {
#[inline]
pub fn new(lookup_table: &'a [u8]) -> Self {
Self { lookup_table }
}
#[inline]
pub fn lookup_func<F>(&self, values: &[u32], f: &mut F)
where
F: FnMut(u8x16, usize),
{
let (chunks, rest) = values.as_chunks::<16>();
for chunk in chunks {
let values = lookup_from_offsets(&self.lookup_table, chunk);
(f)(u8x16::from(values), 16);
}
if !rest.is_empty() {
let mut values = [0u8; 16];
for i in 0..rest.len() {
values[i] = self.lookup_table[rest[i] as usize];
}
(f)(u8x16::from(values), rest.len());
}
}
#[inline]
pub fn lookup_into_vec(&self, values: &[u32], buffer: &mut Vec<u8>) {
let mut write_guard = buffer.bulk_extend_guard(values.len());
let mut write_slice = write_guard.as_mut_slice();
let mut num_written = 0;
self.lookup_func(values, &mut |lookedup_values, num_bytes| {
write_slice.write_u8x16(num_written, lookedup_values, num_bytes);
num_written += num_bytes;
});
}
#[inline]
pub fn lookup_into_u8x16_buffer(&self, values: &[u32], buffer: &mut [u8x16]) {
assert!(
(buffer.len() * 16) >= values.len(),
"Buffer must be at least as long as the input values"
);
let mut idx = 0;
self.lookup_func(values, &mut |lookedup_values, _num_bytes| {
buffer[idx] = lookedup_values;
idx += 1;
});
}
#[inline]
pub fn lookup_extend_u8x16_vec(&self, values: &[u32], vec: &mut Vec<u8x16>) {
let needed = values.len().div_ceil(16);
let mut guard = vec.bulk_extend_guard(needed);
self.lookup_into_u8x16_buffer(values, guard.as_mut_slice());
}
#[inline]
pub fn lookup_compress_into_nonzeroes(&self, values: &[u32], nonzero_results: &mut Vec<u8>, indices: &mut Vec<u32>, base_index: u32) {
let mut indices_simd = u32x16::from([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
indices_simd = indices_simd + u32x16::splat(base_index);
let sixteen = u32x16::splat(16);
let zeroes = u8x16::splat(0);
let min_len = (values.len() + 16).max(16);
let mut result_guard = nonzero_results.bulk_extend_guard(min_len);
let result_slice = result_guard.as_mut_slice();
let mut indices_guard = indices.bulk_extend_guard(min_len);
let indices_slice = indices_guard.as_mut_slice();
let mut num_written = 0;
self.lookup_func(values, &mut |lookedup_values, num_bytes| {
let eq_mask = lookedup_values.simd_eq(zeroes).to_bitmask();
let mut nonzero_mask = !eq_mask as u16;
if num_bytes < 16 {
nonzero_mask &= (1u16 << num_bytes) - 1;
}
let written = compress_store_u8x16(lookedup_values, nonzero_mask, &mut result_slice[num_written..]);
let _ = compress_store_u32x16(indices_simd, nonzero_mask, &mut indices_slice[num_written..]);
num_written += written;
if num_bytes < 16 {
indices_simd = indices_simd + u32x16::splat(num_bytes as u32);
} else {
indices_simd = indices_simd + sixteen;
}
});
result_guard.set_written(num_written);
indices_guard.set_written(num_written);
}
}
#[derive(Debug, Clone)]
pub struct PipelinedSingleTableU32U8Lookup<'a> {
lookup_table: &'a [u8],
}
impl<'a> PipelinedSingleTableU32U8Lookup<'a> {
#[inline]
pub fn new(lookup_table: &'a [u8]) -> Self {
Self { lookup_table }
}
#[inline]
pub fn lookup_func<F>(&self, values: &[u32], f: &mut F)
where
F: FnMut(u8x16, usize),
{
let (chunks, rest) = values.as_chunks::<16>();
if chunks.is_empty() {
if !rest.is_empty() {
self.process_remainder(rest, f);
}
return;
}
const PREFETCH_DISTANCE: usize = 4;
let prefetch_chunk = |chunk: &[u32; 16]| {
let first_half: &[u32; 8] = chunk[..8].try_into().unwrap();
let second_half: &[u32; 8] = chunk[8..].try_into().unwrap();
prefetch_eight_offsets::<_, L3>(&self.lookup_table[0], first_half);
prefetch_eight_offsets::<_, L3>(&self.lookup_table[0], second_half);
};
if chunks.len() <= PREFETCH_DISTANCE {
for chunk in chunks {
let values = lookup_from_offsets(&self.lookup_table, chunk);
(f)(u8x16::from(values), 16);
}
if !rest.is_empty() {
self.process_remainder(rest, f);
}
return;
}
for i in 0..PREFETCH_DISTANCE {
prefetch_chunk(&chunks[i]);
}
for i in 0..chunks.len() {
if i + PREFETCH_DISTANCE < chunks.len() {
prefetch_chunk(&chunks[i + PREFETCH_DISTANCE]);
}
let values = lookup_from_offsets(&self.lookup_table, &chunks[i]);
(f)(u8x16::from(values), 16);
}
if !rest.is_empty() {
self.process_remainder(rest, f);
}
}
#[inline]
fn process_remainder<F>(&self, rest: &[u32], f: &mut F)
where
F: FnMut(u8x16, usize),
{
let mut values = [0u8; 16];
for i in 0..rest.len() {
values[i] = self.lookup_table[rest[i] as usize];
}
(f)(u8x16::from(values), rest.len());
}
#[inline]
pub fn lookup_into_vec(&self, values: &[u32], buffer: &mut Vec<u8>) {
let mut write_guard = buffer.bulk_extend_guard(values.len());
let write_slice = write_guard.as_mut_slice();
let mut num_written = 0;
self.lookup_func(values, &mut |lookedup_values, num_bytes| {
let target_slice = &mut write_slice[num_written..num_written + num_bytes];
target_slice.copy_from_slice(&lookedup_values.to_array()[..num_bytes]);
num_written += num_bytes;
});
}
}
#[derive(Debug, Clone)]
pub struct SimdDualTableU32U8Lookup<'a> {
lookup_table1: &'a [u8],
lookup_table2: &'a [u8],
}
impl<'a> SimdDualTableU32U8Lookup<'a> {
#[inline]
pub fn new(lookup_table1: &'a [u8], lookup_table2: &'a [u8]) -> Self {
Self {
lookup_table1,
lookup_table2,
}
}
#[inline]
pub fn lookup_func<F>(&self, values1: &[u32], values2: &[u32], f: &mut F)
where
F: FnMut(u8x16, u8x16, usize),
{
assert!(
values1.len() == values2.len(),
"Values1 and values2 must have the same length"
);
let (chunks1, rest1) = values1.as_chunks::<16>();
let (chunks2, rest2) = values2.as_chunks::<16>();
for (chunk1, chunk2) in chunks1.iter().zip(chunks2.iter()) {
let values1 = lookup_from_offsets(self.lookup_table1, chunk1);
let mut values2 = [0u8; 16];
for i in 0..16 {
if values1[i] != 0 {
values2[i] = self.lookup_table2[chunk2[i] as usize];
}
}
(f)(u8x16::from(values1), u8x16::from(values2), 16);
}
if !rest1.is_empty() {
let mut values1 = [0u8; 16];
let mut values2 = [0u8; 16];
for i in 0..rest1.len() {
values1[i] = self.lookup_table1[rest1[i] as usize];
if values1[i] != 0 {
values2[i] = self.lookup_table2[rest2[i] as usize];
}
}
(f)(u8x16::from(values1), u8x16::from(values2), rest1.len());
}
}
#[inline]
pub fn lookup_into_vec<F>(
&self,
values1: &[u32],
values2: &[u32],
output: &mut Vec<u8>,
f: &mut F,
) where
F: FnMut(u8x16, u8x16) -> u8x16,
{
assert!(
values1.len() == values2.len(),
"Values1 and values2 must have the same length"
);
let mut write_guard = output.bulk_extend_guard(values1.len());
let mut write_slice = write_guard.as_mut_slice();
let mut num_written = 0;
self.lookup_func(
values1,
values2,
&mut |lookedup_values1, lookedup_values2, num_bytes| {
let combined = (f)(lookedup_values1, lookedup_values2);
write_slice.write_u8x16(num_written, combined, num_bytes);
num_written += num_bytes;
},
);
}
}
#[derive(Debug, Clone)]
pub struct SimdDualTableU32U8LookupV2<'a> {
lookup1: SimdSingleTableU32U8Lookup<'a>,
lookup2: &'a [u8],
temp_buffer: Vec<u8x16>,
}
impl<'a> SimdDualTableU32U8LookupV2<'a> {
#[inline]
pub fn new(lookup_table1: &'a [u8], lookup_table2: &'a [u8]) -> Self {
Self {
lookup1: SimdSingleTableU32U8Lookup::new(lookup_table1),
lookup2: lookup_table2,
temp_buffer: Vec::with_capacity(128),
}
}
#[inline]
pub fn lookup_func<F>(&mut self, values1: &[u32], values2: &[u32], f: &mut F)
where
F: FnMut(u8x16, u8x16, usize),
{
assert!(
values1.len() == values2.len(),
"Values1 and values2 must have the same length"
);
self.temp_buffer.clear();
self.lookup1
.lookup_extend_u8x16_vec(values1, &mut self.temp_buffer);
let (chunks2, rest2) = values2.as_chunks::<16>();
for (i, chunk2) in chunks2.iter().enumerate() {
let table1_result = self.temp_buffer[i];
let table1_array = table1_result.to_array();
let local_chunk = *chunk2;
let mut result_high = 0u64;
for j in (8..16).rev() {
result_high <<= 8;
if table1_array[j] != 0 {
result_high += self.lookup2[local_chunk[j] as usize] as u64;
}
}
let mut result_low = 0u64;
for j in (0..8).rev() {
result_low <<= 8;
if table1_array[j] != 0 {
result_low += self.lookup2[local_chunk[j] as usize] as u64;
}
}
let result = ((result_high as u128) << 64) | (result_low as u128);
(f)(table1_result, u8x16::from(result.to_le_bytes()), 16);
}
if !rest2.is_empty() {
let table1_result = self.temp_buffer[chunks2.len()];
let table1_array = table1_result.to_array();
let mut table2_result = [0u8; 16];
for i in 0..rest2.len() {
if table1_array[i] != 0 {
table2_result[i] = self.lookup2[rest2[i] as usize];
}
}
(f)(table1_result, u8x16::from(table2_result), rest2.len());
}
}
}
pub struct SimdDualTableWithHashLookup<'a> {
lookup_table1: &'a [u8],
lookup_table2: &'a FxHashMap<u32, u8>,
}
impl<'a> SimdDualTableWithHashLookup<'a> {
#[inline]
pub fn new(lookup_table1: &'a [u8], lookup_table2: &'a FxHashMap<u32, u8>) -> Self {
Self {
lookup_table1,
lookup_table2,
}
}
#[inline]
pub fn lookup_func<F>(&self, values1: &[u32], values2: &[u32], f: &mut F)
where
F: FnMut(u8x16, u8x16, usize),
{
assert!(
values1.len() == values2.len(),
"Values1 and values2 must have the same length"
);
let (chunks1, rest1) = values1.as_chunks::<16>();
let (chunks2, rest2) = values2.as_chunks::<16>();
for (chunk1, chunk2) in chunks1.iter().zip(chunks2.iter()) {
let values1 = lookup_from_offsets(self.lookup_table1, chunk1);
let mut values2 = [0u8; 16];
for i in 0..16 {
if values1[i] != 0 {
values2[i] = self.lookup_table2.get(&chunk2[i]).copied().unwrap_or(0);
}
}
(f)(u8x16::from(values1), u8x16::from(values2), 16);
}
if !rest1.is_empty() {
let mut values1 = [0u8; 16];
let mut values2 = [0u8; 16];
for i in 0..rest1.len() {
values1[i] = self.lookup_table1[rest1[i] as usize];
if values1[i] != 0 {
values2[i] = self.lookup_table2.get(&rest2[i]).copied().unwrap_or(0);
}
}
(f)(u8x16::from(values1), u8x16::from(values2), rest1.len());
}
}
#[inline]
pub fn lookup_into_vec<F>(
&self,
values1: &[u32],
values2: &[u32],
output: &mut Vec<u8>,
f: &mut F,
) where
F: FnMut(u8x16, u8x16) -> u8x16,
{
assert!(
values1.len() == values2.len(),
"Values1 and values2 must have the same length"
);
let mut write_guard = output.bulk_extend_guard(values1.len());
let mut write_slice = write_guard.as_mut_slice();
let mut num_written = 0;
self.lookup_func(
values1,
values2,
&mut |lookedup_values1, lookedup_values2, num_bytes| {
let combined = (f)(lookedup_values1, lookedup_values2);
write_slice.write_u8x16(num_written, combined, num_bytes);
num_written += num_bytes;
},
);
}
}
#[derive(Debug, Clone)]
pub struct SimdCascadingTableU32U8Lookup<'a> {
lookup_table: &'a [u8],
}
impl<'a> SimdCascadingTableU32U8Lookup<'a> {
#[inline]
pub fn new(lookup_table: &'a [u8]) -> Self {
Self { lookup_table }
}
#[inline]
pub fn cascading_lookup<F>(&self,
values: &[u32],
in_nonzero_results: &[u8],
in_indices: &[u32],
f: F,
out_results: &mut Vec<u8>,
out_indices: &mut Vec<u32>)
where
F: Fn(u8x16, u8x16) -> u8x16,
{
let min_len = (in_nonzero_results.len() + 16).max(16);
let mut result_guard = out_results.bulk_extend_guard(min_len);
let result_slice = result_guard.as_mut_slice();
let mut indices_guard = out_indices.bulk_extend_guard(min_len);
let indices_slice = indices_guard.as_mut_slice();
let mut num_written = 0;
let zeroes = u8x16::splat(0);
let (in_nonzero_chunks, in_nonzero_rest) = in_nonzero_results.as_chunks::<16>();
let (in_indices_chunks, in_indices_rest) = in_indices.as_chunks::<16>();
for (nonzero_chunk, indices_chunk) in in_nonzero_chunks.iter().zip(in_indices_chunks.iter()) {
let in_results = u8x16::from(*nonzero_chunk);
let in_indices_simd = u32x16::from(*indices_chunk);
let lookup_keys = gather_u32index_u32(in_indices_simd, values, 4);
let lookedup_values = gather_u32index_u8(lookup_keys, self.lookup_table, 1);
let mixed_results = f(in_results, lookedup_values);
let eq_mask = mixed_results.simd_eq(zeroes).to_bitmask();
let nonzero_mask = !eq_mask as u16;
let num_nonzeroes = compress_store_u8x16(mixed_results, nonzero_mask, &mut result_slice[num_written..]);
let _ = compress_store_u32x16(in_indices_simd, nonzero_mask, &mut indices_slice[num_written..]);
num_written += num_nonzeroes;
}
if !in_nonzero_rest.is_empty() {
let mut in_results_arr = [0u8; 16];
let mut lookedup_arr = [0u8; 16];
let mut indices_arr = [0u32; 16];
for (i, (&in_result, &in_idx)) in in_nonzero_rest.iter().zip(in_indices_rest.iter()).enumerate() {
let lookup_key = values[in_idx as usize];
let lookedup_value = self.lookup_table[lookup_key as usize];
in_results_arr[i] = in_result;
lookedup_arr[i] = lookedup_value;
indices_arr[i] = in_idx;
}
let mixed = f(u8x16::from(in_results_arr), u8x16::from(lookedup_arr));
let mixed_arr = mixed.to_array();
for i in 0..in_nonzero_rest.len() {
if mixed_arr[i] != 0 {
result_slice[num_written] = mixed_arr[i];
indices_slice[num_written] = indices_arr[i];
num_written += 1;
}
}
}
result_guard.set_written(num_written);
indices_guard.set_written(num_written);
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_pipelined_single_table_lookup_basic() {
let lookup_table: Vec<u8> = (0..256).map(|i| i as u8).collect();
let pipelined_lookup = PipelinedSingleTableU32U8Lookup::new(&lookup_table);
let values = vec![
10u32, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160,
];
let mut results = Vec::new();
pipelined_lookup.lookup_func(&values, &mut |lookedup_values, num_bytes| {
let array = lookedup_values.to_array();
results.extend_from_slice(&array[..num_bytes]);
});
let expected: Vec<u8> = values.iter().map(|&v| v as u8).collect();
assert_eq!(results, expected);
}
#[test]
fn test_pipelined_single_table_lookup_multiple_chunks() {
let lookup_table: Vec<u8> = (0..256).map(|i| i as u8).collect();
let pipelined_lookup = PipelinedSingleTableU32U8Lookup::new(&lookup_table);
let values: Vec<u32> = (1..36).collect();
let mut results = Vec::new();
pipelined_lookup.lookup_func(&values, &mut |lookedup_values, num_bytes| {
let array = lookedup_values.to_array();
results.extend_from_slice(&array[..num_bytes]);
});
let expected: Vec<u8> = values.iter().map(|&v| v as u8).collect();
assert_eq!(results, expected);
}
#[test]
fn test_pipelined_single_table_lookup_into_vec() {
let lookup_table: Vec<u8> = (0..256).map(|i| ((i * 2) % 256) as u8).collect();
let pipelined_lookup = PipelinedSingleTableU32U8Lookup::new(&lookup_table);
let values = vec![1u32, 2, 3, 4, 5, 100, 150, 200];
let mut buffer = Vec::new();
pipelined_lookup.lookup_into_vec(&values, &mut buffer);
let expected: Vec<u8> = values.iter().map(|&v| ((v * 2) % 256) as u8).collect();
assert_eq!(buffer, expected);
}
#[test]
fn test_pipelined_vs_original_consistency() {
let lookup_table: Vec<u8> = (0..256).map(|i| (i ^ 0xAA) as u8).collect();
let original_lookup = SimdSingleTableU32U8Lookup::new(&lookup_table);
let pipelined_lookup = PipelinedSingleTableU32U8Lookup::new(&lookup_table);
for size in [5, 16, 17, 32, 33, 100] {
let values: Vec<u32> = (0..size).map(|i| (i * 7) % 256).collect();
let mut original_results = Vec::new();
original_lookup.lookup_into_vec(&values, &mut original_results);
let mut pipelined_results = Vec::new();
pipelined_lookup.lookup_into_vec(&values, &mut pipelined_results);
assert_eq!(
original_results, pipelined_results,
"Results differ for size {}",
size
);
}
}
#[test]
fn test_single_table_lookup_into_vec() {
let lookup_table = vec![0u8, 10, 20, 30, 40];
let lookup = SimdSingleTableU32U8Lookup::new(&lookup_table);
let values = vec![0u32, 1, 2, 3, 4, 1, 2, 3];
let mut result = Vec::new();
lookup.lookup_into_vec(&values, &mut result);
assert_eq!(result.len(), values.len());
assert_eq!(result[0], 0);
assert_eq!(result[1], 10);
assert_eq!(result[2], 20);
assert_eq!(result[3], 30);
assert_eq!(result[4], 40);
assert_eq!(result[5], 10);
assert_eq!(result[6], 20);
assert_eq!(result[7], 30);
}
#[test]
fn test_single_table_lookup_compress_into_nonzeroes_small_input() {
let lookup_table: Vec<u8> = (0..256).map(|i| if i % 2 == 0 { i as u8 } else { 0 }).collect();
let lookup = SimdSingleTableU32U8Lookup::new(&lookup_table);
let values = vec![0u32, 1, 2, 3, 4];
let mut nonzero_results: Vec<u8> = Vec::new();
let mut indices: Vec<u32> = Vec::new();
lookup.lookup_compress_into_nonzeroes(&values, &mut nonzero_results, &mut indices, 0);
assert_eq!(nonzero_results.len(), 2);
assert_eq!(nonzero_results, vec![2, 4]);
assert_eq!(indices, vec![2, 4]);
}
#[test]
fn test_cascading_lookup_small_input() {
let lookup_table1: Vec<u8> = (0..256).map(|i| i as u8).collect();
let lookup_table2: Vec<u8> = (0..256).map(|i| i as u8).collect();
let single_table = SimdSingleTableU32U8Lookup::new(&lookup_table1);
let cascading_table = SimdCascadingTableU32U8Lookup::new(&lookup_table2);
let values1: Vec<u32> = vec![1, 2, 3];
let values2: Vec<u32> = vec![10, 20, 30];
let mut nonzero_results: Vec<u8> = Vec::new();
let mut indices: Vec<u32> = Vec::new();
single_table.lookup_compress_into_nonzeroes(&values1, &mut nonzero_results, &mut indices, 0);
assert_eq!(nonzero_results.len(), 3);
assert_eq!(indices, vec![0, 1, 2]);
let mut out_results: Vec<u8> = Vec::new();
let mut out_indices: Vec<u32> = Vec::new();
cascading_table.cascading_lookup(
&values2,
&nonzero_results,
&indices,
|v1, v2| v1 & v2,
&mut out_results,
&mut out_indices,
);
assert_eq!(out_results.len(), 1);
assert_eq!(out_results[0], 2); assert_eq!(out_indices[0], 2);
}
#[test]
fn test_dual_table_lookup_into_vec() {
let lookup_table1 = vec![0u8, 1, 2, 3, 4];
let lookup_table2 = vec![0u8, 10, 20, 30, 40];
let lookup = SimdDualTableU32U8Lookup::new(&lookup_table1, &lookup_table2);
let values1 = vec![0u32, 1, 2, 3, 4, 1, 2, 3];
let values2 = vec![0u32, 1, 2, 3, 4, 1, 2, 3];
let mut result = Vec::new();
lookup.lookup_into_vec(&values1, &values2, &mut result, &mut |v1, v2| v1 | v2);
assert_eq!(result.len(), values1.len());
assert_eq!(result[0], 0); assert_eq!(result[1], 1 | 10); assert_eq!(result[2], 2 | 20); assert_eq!(result[3], 3 | 30); assert_eq!(result[4], 4 | 40); assert_eq!(result[5], 1 | 10); assert_eq!(result[6], 2 | 20); assert_eq!(result[7], 3 | 30); }
#[test]
fn test_dual_table_lookup_into_vec_large() {
let lookup_table1 = vec![1u8; 100];
let lookup_table2 = vec![2u8; 100];
let lookup = SimdDualTableU32U8Lookup::new(&lookup_table1, &lookup_table2);
let values1 = vec![0u32; 50];
let values2 = vec![0u32; 50];
let mut result = Vec::new();
lookup.lookup_into_vec(&values1, &values2, &mut result, &mut |v1, v2| v1 + v2);
assert_eq!(result.len(), 50);
for &val in &result {
assert_eq!(val, 3);
}
}
#[test]
fn test_dual_table_v2_lookup_func_basic() {
let lookup_table1 = vec![0u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
let lookup_table2 = vec![0u8, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100];
let mut lookup = SimdDualTableU32U8LookupV2::new(&lookup_table1, &lookup_table2);
let values1 = vec![0u32, 1, 2, 3, 4];
let values2 = vec![1u32, 2, 3, 4, 5];
let mut table1_results = Vec::new();
let mut table2_results = Vec::new();
let mut num_bytes_list = Vec::new();
lookup.lookup_func(&values1, &values2, &mut |v1, v2, num_bytes| {
table1_results.push(v1);
table2_results.push(v2);
num_bytes_list.push(num_bytes);
});
assert_eq!(num_bytes_list.len(), 1);
assert_eq!(num_bytes_list[0], 5);
let v1_array = table1_results[0].as_array();
assert_eq!(v1_array[0], 0);
assert_eq!(v1_array[1], 1);
assert_eq!(v1_array[2], 2);
assert_eq!(v1_array[3], 3);
assert_eq!(v1_array[4], 4);
let v2_array = table2_results[0].as_array();
assert_eq!(v2_array[0], 0); assert_eq!(v2_array[1], 20); assert_eq!(v2_array[2], 30); assert_eq!(v2_array[3], 40); assert_eq!(v2_array[4], 50); }
#[test]
fn test_dual_table_v2_lookup_func_remainder() {
let lookup_table1 = vec![0u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
let lookup_table2 = vec![0u8, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100];
let mut lookup = SimdDualTableU32U8LookupV2::new(&lookup_table1, &lookup_table2);
let values1: Vec<u32> = (0..25).map(|i| (i % 5) as u32).collect();
let values2: Vec<u32> = (0..25).map(|i| ((i % 5) + 1) as u32).collect();
let mut table1_results = Vec::new();
let mut table2_results = Vec::new();
let mut num_bytes_list = Vec::new();
lookup.lookup_func(&values1, &values2, &mut |v1, v2, num_bytes| {
table1_results.push(v1);
table2_results.push(v2);
num_bytes_list.push(num_bytes);
});
assert_eq!(num_bytes_list.len(), 2);
assert_eq!(num_bytes_list[0], 16);
assert_eq!(num_bytes_list[1], 9);
let v1_chunk0 = table1_results[0].as_array();
let v2_chunk0 = table2_results[0].as_array();
for i in 0..16 {
let expected_v1 = lookup_table1[values1[i] as usize];
assert_eq!(v1_chunk0[i], expected_v1);
if expected_v1 != 0 {
assert_eq!(v2_chunk0[i], lookup_table2[values2[i] as usize]);
} else {
assert_eq!(v2_chunk0[i], 0);
}
}
let v1_remainder = table1_results[1].as_array();
let v2_remainder = table2_results[1].as_array();
for i in 0..9 {
let expected_v1 = lookup_table1[values1[16 + i] as usize];
assert_eq!(v1_remainder[i], expected_v1);
if expected_v1 != 0 {
assert_eq!(v2_remainder[i], lookup_table2[values2[16 + i] as usize]);
} else {
assert_eq!(v2_remainder[i], 0);
}
}
for i in 9..16 {
assert_eq!(v1_remainder[i], 0);
assert_eq!(v2_remainder[i], 0);
}
}
#[test]
fn test_dual_table_v2_lookup_func_zero_filtering() {
let lookup_table1 = vec![0u8, 0, 0, 5, 0, 0, 0, 10, 0, 0, 0];
let lookup_table2 = vec![0u8, 100, 200, 50, 150, 250, 60, 70, 80, 90, 100];
let mut lookup = SimdDualTableU32U8LookupV2::new(&lookup_table1, &lookup_table2);
let values1 = vec![0u32, 1, 2, 3, 4, 5, 6, 7];
let values2 = vec![1u32, 2, 3, 4, 5, 6, 7, 8];
let mut table1_results = Vec::new();
let mut table2_results = Vec::new();
lookup.lookup_func(&values1, &values2, &mut |v1, v2, _num_bytes| {
table1_results.push(v1);
table2_results.push(v2);
});
let v1_array = table1_results[0].as_array();
let v2_array = table2_results[0].as_array();
assert_eq!(v1_array[0], 0);
assert_eq!(v2_array[0], 0);
assert_eq!(v1_array[1], 0);
assert_eq!(v2_array[1], 0);
assert_eq!(v1_array[2], 0);
assert_eq!(v2_array[2], 0);
assert_eq!(v1_array[3], 5);
assert_eq!(v2_array[3], 150);
assert_eq!(v1_array[4], 0);
assert_eq!(v2_array[4], 0);
assert_eq!(v1_array[5], 0);
assert_eq!(v2_array[5], 0);
assert_eq!(v1_array[6], 0);
assert_eq!(v2_array[6], 0);
assert_eq!(v1_array[7], 10);
assert_eq!(v2_array[7], 80); }
#[test]
fn test_dual_table_v2_lookup_func_multiple_chunks() {
let lookup_table1 = vec![1u8; 100];
let lookup_table2 = vec![2u8; 100];
let mut lookup = SimdDualTableU32U8LookupV2::new(&lookup_table1, &lookup_table2);
let values1: Vec<u32> = (0..50).map(|i| (i % 10) as u32).collect();
let values2: Vec<u32> = (0..50).map(|i| ((i % 10) + 1) as u32).collect();
let mut table1_results = Vec::new();
let mut table2_results = Vec::new();
let mut num_bytes_list = Vec::new();
lookup.lookup_func(&values1, &values2, &mut |v1, v2, num_bytes| {
table1_results.push(v1);
table2_results.push(v2);
num_bytes_list.push(num_bytes);
});
assert_eq!(num_bytes_list.len(), 4);
assert_eq!(num_bytes_list[0], 16);
assert_eq!(num_bytes_list[1], 16);
assert_eq!(num_bytes_list[2], 16);
assert_eq!(num_bytes_list[3], 2);
let mut global_idx = 0;
for chunk_idx in 0..4 {
let v1_chunk = table1_results[chunk_idx].as_array();
let v2_chunk = table2_results[chunk_idx].as_array();
let chunk_len = num_bytes_list[chunk_idx];
for i in 0..chunk_len {
let expected_v1 = lookup_table1[values1[global_idx] as usize];
assert_eq!(v1_chunk[i], expected_v1);
assert_eq!(v2_chunk[i], lookup_table2[values2[global_idx] as usize]);
global_idx += 1;
}
}
}
#[test]
fn test_dual_table_v2_lookup_func_exact_multiple_of_16() {
let lookup_table1 = vec![0u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
let lookup_table2 = vec![0u8, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100];
let mut lookup = SimdDualTableU32U8LookupV2::new(&lookup_table1, &lookup_table2);
let values1: Vec<u32> = (0..32).map(|i| (i % 5) as u32).collect();
let values2: Vec<u32> = (0..32).map(|i| ((i % 5) + 1) as u32).collect();
let mut table1_results = Vec::new();
let mut table2_results = Vec::new();
let mut num_bytes_list = Vec::new();
lookup.lookup_func(&values1, &values2, &mut |v1, v2, num_bytes| {
table1_results.push(v1);
table2_results.push(v2);
num_bytes_list.push(num_bytes);
});
assert_eq!(num_bytes_list.len(), 2);
assert_eq!(num_bytes_list[0], 16);
assert_eq!(num_bytes_list[1], 16);
let mut global_idx = 0;
for chunk_idx in 0..2 {
let v1_chunk = table1_results[chunk_idx].as_array();
let v2_chunk = table2_results[chunk_idx].as_array();
for i in 0..16 {
let expected_v1 = lookup_table1[values1[global_idx] as usize];
assert_eq!(v1_chunk[i], expected_v1);
if expected_v1 != 0 {
assert_eq!(v2_chunk[i], lookup_table2[values2[global_idx] as usize]);
} else {
assert_eq!(v2_chunk[i], 0);
}
global_idx += 1;
}
}
}
#[test]
fn test_dual_table_with_hash_lookup_basic() {
let lookup_table1: Vec<u8> =
(0..256).map(|i| if i % 3 == 0 { 0 } else { i as u8 }).collect();
let mut hash_table2: FxHashMap<u32, u8> = FxHashMap::default();
hash_table2.insert(0, 100);
hash_table2.insert(5, 105);
hash_table2.insert(10, 110);
hash_table2.insert(15, 115);
hash_table2.insert(20, 120);
hash_table2.insert(100, 200);
let lookup = SimdDualTableWithHashLookup::new(&lookup_table1, &hash_table2);
let values1: Vec<u32> = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
let values2: Vec<u32> = vec![0, 5, 10, 15, 20, 100, 0, 5, 10, 15, 20, 100, 0, 5, 10, 15];
let mut table1_results = Vec::new();
let mut table2_results = Vec::new();
lookup.lookup_func(&values1, &values2, &mut |v1, v2, _num_bytes| {
table1_results.push(v1);
table2_results.push(v2);
});
assert_eq!(table1_results.len(), 1);
let v1_array = table1_results[0].as_array();
let v2_array = table2_results[0].as_array();
for i in 0..16 {
let expected_v1 = lookup_table1[values1[i] as usize];
assert_eq!(v1_array[i], expected_v1, "v1 mismatch at index {}", i);
if expected_v1 != 0 {
let expected_v2 = hash_table2.get(&values2[i]).copied().unwrap_or(0);
assert_eq!(v2_array[i], expected_v2, "v2 mismatch at index {}", i);
} else {
assert_eq!(v2_array[i], 0, "v2 should be 0 where v1 is 0 at index {}", i);
}
}
}
#[test]
fn test_dual_table_with_hash_lookup_into_vec() {
let lookup_table1: Vec<u8> = (0..256).map(|i| (i + 1) as u8).collect();
let mut hash_table2: FxHashMap<u32, u8> = FxHashMap::default();
hash_table2.insert(0, 10);
hash_table2.insert(1, 20);
hash_table2.insert(2, 30);
hash_table2.insert(3, 40);
hash_table2.insert(4, 50);
let lookup = SimdDualTableWithHashLookup::new(&lookup_table1, &hash_table2);
let values1: Vec<u32> = vec![0, 1, 2, 3, 4, 5, 6, 7];
let values2: Vec<u32> = vec![0, 1, 2, 3, 4, 0, 1, 2];
let mut result = Vec::new();
lookup.lookup_into_vec(&values1, &values2, &mut result, &mut |v1, v2| v1 & v2);
assert_eq!(result.len(), 8);
assert_eq!(result[0], 1 & 10);
assert_eq!(result[1], 2 & 20);
}
#[test]
fn test_cascading_lookup_basic() {
let lookup_table1: Vec<u8> = (0..256).map(|i| i as u8).collect();
let lookup_table2: Vec<u8> = (0..128).map(|i| ((i * 2) % 256) as u8).collect();
let single_table = SimdSingleTableU32U8Lookup::new(&lookup_table1);
let cascading_table = SimdCascadingTableU32U8Lookup::new(&lookup_table2);
let values1: Vec<u32> = vec![0, 1, 2, 3, 0, 5, 0, 7, 8, 9, 0, 11, 12, 0, 14, 15];
let values2: Vec<u32> = vec![10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 1, 2, 3, 4];
let mut nonzero_results: Vec<u8> = Vec::new();
let mut indices: Vec<u32> = Vec::new();
single_table.lookup_compress_into_nonzeroes(&values1, &mut nonzero_results, &mut indices, 0);
assert_eq!(nonzero_results.len(), 11, "Expected 11 nonzero results from table1");
assert_eq!(indices.len(), 11, "Expected 11 indices");
let expected_indices: Vec<u32> = vec![1, 2, 3, 5, 7, 8, 9, 11, 12, 14, 15];
assert_eq!(indices, expected_indices);
let expected_nonzero: Vec<u8> = vec![1, 2, 3, 5, 7, 8, 9, 11, 12, 14, 15];
assert_eq!(nonzero_results, expected_nonzero);
let mut out_results: Vec<u8> = Vec::new();
let mut out_indices: Vec<u32> = Vec::new();
cascading_table.cascading_lookup(
&values2,
&nonzero_results,
&indices,
|v1, v2| v1 & v2,
&mut out_results,
&mut out_indices,
);
assert_eq!(out_results.len(), 3, "Expected 3 nonzero cascading results, got {:?}", out_results);
assert_eq!(out_indices.len(), 3, "Expected 3 output indices");
assert_eq!(out_results[0], 8); assert_eq!(out_results[1], 6); assert_eq!(out_results[2], 8);
assert_eq!(out_indices[0], 9);
assert_eq!(out_indices[1], 14);
assert_eq!(out_indices[2], 15);
}
#[test]
fn test_cascading_lookup_remainder() {
let lookup_table1: Vec<u8> = (0..256).map(|i| i as u8).collect();
let lookup_table2: Vec<u8> = (0..256).map(|i| i as u8).collect();
let single_table = SimdSingleTableU32U8Lookup::new(&lookup_table1);
let cascading_table = SimdCascadingTableU32U8Lookup::new(&lookup_table2);
let values1: Vec<u32> = vec![1, 2, 3, 4, 5];
let values2: Vec<u32> = vec![10, 20, 30, 40, 50];
let mut nonzero_results: Vec<u8> = Vec::new();
let mut indices: Vec<u32> = Vec::new();
single_table.lookup_compress_into_nonzeroes(&values1, &mut nonzero_results, &mut indices, 0);
assert_eq!(nonzero_results.len(), 5);
assert_eq!(indices, vec![0, 1, 2, 3, 4]);
let mut out_results: Vec<u8> = Vec::new();
let mut out_indices: Vec<u32> = Vec::new();
cascading_table.cascading_lookup(
&values2,
&nonzero_results,
&indices,
|v1, v2| v1 & v2,
&mut out_results,
&mut out_indices,
);
assert_eq!(out_results.len(), 1);
assert_eq!(out_results[0], 2); assert_eq!(out_indices[0], 2);
}
#[test]
fn test_cascading_lookup_multiple_chunks() {
let lookup_table1: Vec<u8> = (0..256).map(|i| ((i + 1) % 256) as u8).collect(); let lookup_table2: Vec<u8> = (0..256).map(|i| i as u8).collect();
let single_table = SimdSingleTableU32U8Lookup::new(&lookup_table1);
let cascading_table = SimdCascadingTableU32U8Lookup::new(&lookup_table2);
let values1: Vec<u32> = (0..35).collect();
let values2: Vec<u32> = (0..35).collect();
let mut nonzero_results: Vec<u8> = Vec::new();
let mut indices: Vec<u32> = Vec::new();
single_table.lookup_compress_into_nonzeroes(&values1, &mut nonzero_results, &mut indices, 0);
assert_eq!(nonzero_results.len(), 35);
let mut out_results: Vec<u8> = Vec::new();
let mut out_indices: Vec<u32> = Vec::new();
cascading_table.cascading_lookup(
&values2,
&nonzero_results,
&indices,
|_v1, v2| v2,
&mut out_results,
&mut out_indices,
);
assert_eq!(out_results.len(), 34, "Expected 34 nonzero results (all except index 0)");
for (i, &result) in out_results.iter().enumerate() {
assert_eq!(result, (i + 1) as u8, "Result at position {} should be {}", i, i + 1);
}
}
}