pub mod scalar;
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
pub mod sse2;
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
pub mod avx2;
#[cfg(target_arch = "aarch64")]
pub mod neon;
#[derive(Debug, Clone, Copy)]
pub struct BitPlanes {
pub planes: [u64; 8],
}
pub type TransposeFn = unsafe fn(&[u8; 64]) -> BitPlanes;
pub fn select_transpose() -> TransposeFn {
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
{
#[cfg(feature = "std")]
{
if std::is_x86_feature_detected!("avx2") {
return avx2::transpose_64;
}
if std::is_x86_feature_detected!("sse2") {
return sse2::transpose_64;
}
}
#[cfg(not(feature = "std"))]
{
if cfg!(target_feature = "avx2") {
return avx2::transpose_64;
}
if cfg!(target_feature = "sse2") {
return sse2::transpose_64;
}
}
}
#[cfg(target_arch = "aarch64")]
{
return neon::transpose_64;
}
scalar::transpose_64
}
#[inline]
pub fn transpose_block(transpose: TransposeFn, data: &[u8], offset: usize) -> (BitPlanes, usize) {
let remaining = data.len() - offset;
if remaining >= 64 {
let block: &[u8; 64] = unsafe { &*(data.as_ptr().add(offset) as *const [u8; 64]) };
let planes = unsafe { transpose(block) };
(planes, 64)
} else {
let mut padded = [0u8; 64];
padded[..remaining].copy_from_slice(&data[offset..]);
let planes = unsafe { transpose(&padded) };
(planes, remaining)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn transpose_block_full() {
let data = [0x42u8; 128];
let (bp, len) = transpose_block(select_transpose(), &data, 0);
assert_eq!(len, 64);
assert_eq!(bp.planes[0], 0);
assert_eq!(bp.planes[1], u64::MAX);
assert_eq!(bp.planes[6], u64::MAX);
assert_eq!(bp.planes[7], 0);
}
#[test]
fn transpose_block_partial() {
let data = [0xFFu8; 10];
let (bp, len) = transpose_block(select_transpose(), &data, 0);
assert_eq!(len, 10);
let expected = (1u64 << 10) - 1;
for plane in &bp.planes {
assert_eq!(*plane, expected);
}
}
#[test]
fn transpose_block_with_offset() {
let mut data = [0u8; 80];
for b in &mut data[64..] {
*b = 0xFF;
}
let (bp, len) = transpose_block(select_transpose(), &data, 64);
assert_eq!(len, 16);
let expected = (1u64 << 16) - 1;
for plane in &bp.planes {
assert_eq!(*plane, expected);
}
}
#[test]
fn cross_validate_backends() {
let test_vectors: [[u8; 64]; 8] = [
[0x00; 64],
[0xFF; 64],
[0x55; 64],
[0xAA; 64],
core::array::from_fn(|i| i as u8),
core::array::from_fn(|i| (i as u8).wrapping_mul(37).wrapping_add(13)),
core::array::from_fn(|i| (i as u8).wrapping_mul(127).wrapping_add(97)),
core::array::from_fn(|i| {
match i % 8 {
0 => b'<',
1 => b'>',
2 => b'&',
3 => b'"',
4 => b' ',
5 => b'/',
6 => b'=',
_ => b'a',
}
}),
];
for (vec_idx, data) in test_vectors.iter().enumerate() {
let scalar = scalar::transpose_64(data);
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
{
if std::is_x86_feature_detected!("sse2") {
let simd = unsafe { sse2::transpose_64(data) };
for bit in 0..8 {
assert_eq!(
simd.planes[bit], scalar.planes[bit],
"SSE2 != scalar at plane {bit}, vector {vec_idx}",
);
}
}
if std::is_x86_feature_detected!("avx2") {
let simd = unsafe { avx2::transpose_64(data) };
for bit in 0..8 {
assert_eq!(
simd.planes[bit], scalar.planes[bit],
"AVX2 != scalar at plane {bit}, vector {vec_idx}",
);
}
}
}
#[cfg(target_arch = "aarch64")]
{
let simd = unsafe { neon::transpose_64(data) };
for bit in 0..8 {
assert_eq!(
simd.planes[bit], scalar.planes[bit],
"NEON != scalar at plane {bit}, vector {vec_idx}",
);
}
}
}
}
#[test]
fn select_transpose_picks_simd() {
let f = select_transpose();
let data: [u8; 64] = core::array::from_fn(|i| i as u8);
let bp = unsafe { f(&data) };
let scalar = scalar::transpose_64(&data);
for bit in 0..8 {
assert_eq!(bp.planes[bit], scalar.planes[bit],
"select_transpose() result differs from scalar at plane {bit}");
}
}
}