#[macro_export]
macro_rules! iterate {
($T:ty, $lane: expr, | $_1:tt $idx:ident | $($body:tt)*) => {
macro_rules! __kernel__ {( $_1 $idx:ident ) => ( $($body)* )}
{
use $crate::{seq_t, FL_ORDER};
use paste::paste;
#[inline(always)]
fn index(row: usize, lane: usize) -> usize {
let o = row / 8;
let s = row % 8;
(FL_ORDER[o] * 16) + (s * 128) + lane
}
paste!(seq_t!(row in $T {
let idx = index(row, $lane);
__kernel__!(idx);
}));
}
}
}
#[macro_export]
macro_rules! pack {
($T:ty, $W:expr, $packed:expr, $lane:expr, | $_1:tt $idx:ident | $($body:tt)*) => {
macro_rules! __kernel__ {( $_1 $idx:ident ) => ( $($body)* )}
{
use $crate::{seq_t, FL_ORDER};
use paste::paste;
const T: usize = <$T>::T;
#[inline(always)]
fn index(row: usize, lane: usize) -> usize {
let o = row / 8;
let s = row % 8;
(FL_ORDER[o] * 16) + (s * 128) + lane
}
if $W == 0 {
} else if $W == T {
paste!(seq_t!(row in $T {
let idx = index(row, $lane);
$packed[<$T>::LANES * row + $lane] = __kernel__!(idx);
}));
} else {
let mask: $T = (1 << $W) - 1;
let mut tmp: $T = 0;
paste!(seq_t!(row in $T {
let idx = index(row, $lane);
let src = __kernel__!(idx);
let src = src & mask;
if row == 0 {
tmp = src;
} else {
tmp |= src << (row * $W) % T;
}
let curr_word: usize = (row * $W) / T;
let next_word: usize = ((row + 1) * $W) / T;
#[allow(unused_assignments)]
if next_word > curr_word {
$packed[<$T>::LANES * curr_word + $lane] = tmp;
let remaining_bits: usize = ((row + 1) * $W) % T;
tmp = src >> $W - remaining_bits;
}
}));
}
}
};
}
#[macro_export]
macro_rules! unpack {
($T:ty, $W:expr, $packed:expr, $lane:expr, | $_1:tt $idx:ident, $_2:tt $elem:ident | $($body:tt)*) => {
macro_rules! __kernel__ {( $_1 $idx:ident, $_2 $elem:ident ) => ( $($body)* )}
{
use $crate::{seq_t, FL_ORDER};
use paste::paste;
const T: usize = <$T>::T;
#[inline(always)]
fn index(row: usize, lane: usize) -> usize {
let o = row / 8;
let s = row % 8;
(FL_ORDER[o] * 16) + (s * 128) + lane
}
if $W == 0 {
let zero: $T = 0;
paste!(seq_t!(row in $T {
let idx = index(row, $lane);
__kernel__!(idx, zero);
}));
} else if $W == T {
paste!(seq_t!(row in $T {
let idx = index(row, $lane);
let src = $packed[<$T>::LANES * row + $lane];
__kernel__!(idx, src);
}));
} else {
#[inline(always)]
fn mask(width: usize) -> $T {
(1 << width) - 1
}
let mut src: $T = $packed[$lane];
let mut tmp: $T;
paste!(seq_t!(row in $T {
let curr_word: usize = (row * $W) / T;
let next_word = ((row + 1) * $W) / T;
let shift = (row * $W) % T;
if next_word > curr_word {
let remaining_bits = ((row + 1) * $W) % T;
let current_bits = $W - remaining_bits;
tmp = (src >> shift) & mask(current_bits);
if next_word < $W {
src = $packed[<$T>::LANES * next_word + $lane];
tmp |= (src & mask(remaining_bits)) << current_bits;
}
} else {
tmp = (src >> shift) & mask($W);
}
let idx = index(row, $lane);
__kernel__!(idx, tmp);
}));
}
}
};
}
#[cfg(test)]
mod test {
use crate::{BitPacking, FastLanes};
#[test]
fn test_pack() {
let mut values: [u16; 1024] = [0; 1024];
for i in 0..1024 {
values[i] = (i % (1 << 15)) as u16;
}
let mut packed: [u16; 960] = [0; 960];
for lane in 0..u16::LANES {
pack!(u16, 15, packed, lane, |$pos| {
values[$pos]
});
}
let mut packed_orig: [u16; 960] = [0; 960];
BitPacking::pack::<15, 960>(&values, &mut packed_orig);
let mut unpacked: [u16; 1024] = [0; 1024];
for lane in 0..u16::LANES {
unpack!(u16, 15, packed, lane, |$idx, $elem| {
unpacked[$idx] = $elem;
});
}
assert_eq!(values, unpacked);
}
}