#[cfg(feature = "simd")]
use crate::util::bit_util;
#[cfg(feature = "simd")]
use packed_simd::u8x64;
#[cfg(feature = "avx512")]
use crate::arch::avx512::*;
use crate::util::bit_util::ceil;
#[cfg(any(feature = "simd", feature = "avx512"))]
use std::borrow::BorrowMut;
use super::{Buffer, MutableBuffer};
#[cfg(feature = "simd")]
pub fn bitwise_bin_op_simd_helper<SI, SC>(
left: &Buffer,
left_offset: usize,
right: &Buffer,
right_offset: usize,
len: usize,
simd_op: SI,
scalar_op: SC,
) -> Buffer
where
SI: Fn(u8x64, u8x64) -> u8x64,
SC: Fn(u8, u8) -> u8,
{
let mut result = MutableBuffer::new(len).with_bitset(len, false);
let lanes = u8x64::lanes();
let mut left_chunks = left.as_slice()[left_offset..].chunks_exact(lanes);
let mut right_chunks = right.as_slice()[right_offset..].chunks_exact(lanes);
let mut result_chunks = result.as_slice_mut().chunks_exact_mut(lanes);
result_chunks
.borrow_mut()
.zip(left_chunks.borrow_mut().zip(right_chunks.borrow_mut()))
.for_each(|(res, (left, right))| {
unsafe { bit_util::bitwise_bin_op_simd(&left, &right, res, &simd_op) };
});
result_chunks
.into_remainder()
.iter_mut()
.zip(
left_chunks
.remainder()
.iter()
.zip(right_chunks.remainder().iter()),
)
.for_each(|(res, (left, right))| {
*res = scalar_op(*left, *right);
});
result.into()
}
#[cfg(feature = "simd")]
pub fn bitwise_unary_op_simd_helper<SI, SC>(
left: &Buffer,
left_offset: usize,
len: usize,
simd_op: SI,
scalar_op: SC,
) -> Buffer
where
SI: Fn(u8x64) -> u8x64,
SC: Fn(u8) -> u8,
{
let mut result = MutableBuffer::new(len).with_bitset(len, false);
let lanes = u8x64::lanes();
let mut left_chunks = left.as_slice()[left_offset..].chunks_exact(lanes);
let mut result_chunks = result.as_slice_mut().chunks_exact_mut(lanes);
result_chunks
.borrow_mut()
.zip(left_chunks.borrow_mut())
.for_each(|(res, left)| unsafe {
let data_simd = u8x64::from_slice_unaligned_unchecked(left);
let simd_result = simd_op(data_simd);
simd_result.write_to_slice_unaligned_unchecked(res);
});
result_chunks
.into_remainder()
.iter_mut()
.zip(left_chunks.remainder().iter())
.for_each(|(res, left)| {
*res = scalar_op(*left);
});
result.into()
}
pub fn bitwise_bin_op_helper<F>(
left: &Buffer,
left_offset_in_bits: usize,
right: &Buffer,
right_offset_in_bits: usize,
len_in_bits: usize,
op: F,
) -> Buffer
where
F: Fn(u64, u64) -> u64,
{
let left_chunks = left.bit_chunks(left_offset_in_bits, len_in_bits);
let right_chunks = right.bit_chunks(right_offset_in_bits, len_in_bits);
let chunks = left_chunks
.iter()
.zip(right_chunks.iter())
.map(|(left, right)| op(left, right));
let mut buffer = unsafe { MutableBuffer::from_trusted_len_iter(chunks) };
let remainder_bytes = ceil(left_chunks.remainder_len(), 8);
let rem = op(left_chunks.remainder_bits(), right_chunks.remainder_bits());
let rem = &rem.to_le_bytes()[0..remainder_bytes];
buffer.extend_from_slice(rem);
buffer.into()
}
pub fn bitwise_unary_op_helper<F>(
left: &Buffer,
offset_in_bits: usize,
len_in_bits: usize,
op: F,
) -> Buffer
where
F: Fn(u64) -> u64,
{
let mut result =
MutableBuffer::new(ceil(len_in_bits, 8)).with_bitset(len_in_bits / 64 * 8, false);
let left_chunks = left.bit_chunks(offset_in_bits, len_in_bits);
let result_chunks = unsafe { result.typed_data_mut::<u64>().iter_mut() };
result_chunks
.zip(left_chunks.iter())
.for_each(|(res, left)| {
*res = op(left);
});
let remainder_bytes = ceil(left_chunks.remainder_len(), 8);
let rem = op(left_chunks.remainder_bits());
let rem = &rem.to_le_bytes()[0..remainder_bytes];
result.extend_from_slice(rem);
result.into()
}
#[cfg(all(target_arch = "x86_64", feature = "avx512"))]
pub fn buffer_bin_and(
left: &Buffer,
left_offset_in_bits: usize,
right: &Buffer,
right_offset_in_bits: usize,
len_in_bits: usize,
) -> Buffer {
if left_offset_in_bits % 8 == 0
&& right_offset_in_bits % 8 == 0
&& len_in_bits % 8 == 0
{
let len = len_in_bits / 8;
let left_offset = left_offset_in_bits / 8;
let right_offset = right_offset_in_bits / 8;
let mut result = MutableBuffer::new(len).with_bitset(len, false);
let mut left_chunks =
left.as_slice()[left_offset..].chunks_exact(AVX512_U8X64_LANES);
let mut right_chunks =
right.as_slice()[right_offset..].chunks_exact(AVX512_U8X64_LANES);
let mut result_chunks =
result.as_slice_mut().chunks_exact_mut(AVX512_U8X64_LANES);
result_chunks
.borrow_mut()
.zip(left_chunks.borrow_mut().zip(right_chunks.borrow_mut()))
.for_each(|(res, (left, right))| unsafe {
avx512_bin_and(left, right, res);
});
result_chunks
.into_remainder()
.iter_mut()
.zip(
left_chunks
.remainder()
.iter()
.zip(right_chunks.remainder().iter()),
)
.for_each(|(res, (left, right))| {
*res = *left & *right;
});
result.into()
} else {
bitwise_bin_op_helper(
&left,
left_offset_in_bits,
right,
right_offset_in_bits,
len_in_bits,
|a, b| a & b,
)
}
}
#[cfg(all(feature = "simd", not(feature = "avx512")))]
pub fn buffer_bin_and(
left: &Buffer,
left_offset_in_bits: usize,
right: &Buffer,
right_offset_in_bits: usize,
len_in_bits: usize,
) -> Buffer {
if left_offset_in_bits % 8 == 0
&& right_offset_in_bits % 8 == 0
&& len_in_bits % 8 == 0
{
bitwise_bin_op_simd_helper(
&left,
left_offset_in_bits / 8,
&right,
right_offset_in_bits / 8,
len_in_bits / 8,
|a, b| a & b,
|a, b| a & b,
)
} else {
bitwise_bin_op_helper(
&left,
left_offset_in_bits,
right,
right_offset_in_bits,
len_in_bits,
|a, b| a & b,
)
}
}
#[cfg(all(not(any(feature = "simd", feature = "avx512"))))]
pub fn buffer_bin_and(
left: &Buffer,
left_offset_in_bits: usize,
right: &Buffer,
right_offset_in_bits: usize,
len_in_bits: usize,
) -> Buffer {
bitwise_bin_op_helper(
left,
left_offset_in_bits,
right,
right_offset_in_bits,
len_in_bits,
|a, b| a & b,
)
}
#[cfg(all(target_arch = "x86_64", feature = "avx512"))]
pub fn buffer_bin_or(
left: &Buffer,
left_offset_in_bits: usize,
right: &Buffer,
right_offset_in_bits: usize,
len_in_bits: usize,
) -> Buffer {
if left_offset_in_bits % 8 == 0
&& right_offset_in_bits % 8 == 0
&& len_in_bits % 8 == 0
{
let len = len_in_bits / 8;
let left_offset = left_offset_in_bits / 8;
let right_offset = right_offset_in_bits / 8;
let mut result = MutableBuffer::new(len).with_bitset(len, false);
let mut left_chunks =
left.as_slice()[left_offset..].chunks_exact(AVX512_U8X64_LANES);
let mut right_chunks =
right.as_slice()[right_offset..].chunks_exact(AVX512_U8X64_LANES);
let mut result_chunks =
result.as_slice_mut().chunks_exact_mut(AVX512_U8X64_LANES);
result_chunks
.borrow_mut()
.zip(left_chunks.borrow_mut().zip(right_chunks.borrow_mut()))
.for_each(|(res, (left, right))| unsafe {
avx512_bin_or(left, right, res);
});
result_chunks
.into_remainder()
.iter_mut()
.zip(
left_chunks
.remainder()
.iter()
.zip(right_chunks.remainder().iter()),
)
.for_each(|(res, (left, right))| {
*res = *left | *right;
});
result.into()
} else {
bitwise_bin_op_helper(
&left,
left_offset_in_bits,
right,
right_offset_in_bits,
len_in_bits,
|a, b| a | b,
)
}
}
#[cfg(all(feature = "simd", not(feature = "avx512")))]
pub fn buffer_bin_or(
left: &Buffer,
left_offset_in_bits: usize,
right: &Buffer,
right_offset_in_bits: usize,
len_in_bits: usize,
) -> Buffer {
if left_offset_in_bits % 8 == 0
&& right_offset_in_bits % 8 == 0
&& len_in_bits % 8 == 0
{
bitwise_bin_op_simd_helper(
&left,
left_offset_in_bits / 8,
&right,
right_offset_in_bits / 8,
len_in_bits / 8,
|a, b| a | b,
|a, b| a | b,
)
} else {
bitwise_bin_op_helper(
&left,
left_offset_in_bits,
right,
right_offset_in_bits,
len_in_bits,
|a, b| a | b,
)
}
}
#[cfg(all(not(any(feature = "simd", feature = "avx512"))))]
pub fn buffer_bin_or(
left: &Buffer,
left_offset_in_bits: usize,
right: &Buffer,
right_offset_in_bits: usize,
len_in_bits: usize,
) -> Buffer {
bitwise_bin_op_helper(
left,
left_offset_in_bits,
right,
right_offset_in_bits,
len_in_bits,
|a, b| a | b,
)
}
pub fn buffer_unary_not(
left: &Buffer,
offset_in_bits: usize,
len_in_bits: usize,
) -> Buffer {
#[cfg(feature = "simd")]
if offset_in_bits % 8 == 0 && len_in_bits % 8 == 0 {
return bitwise_unary_op_simd_helper(
&left,
offset_in_bits / 8,
len_in_bits / 8,
|a| !a,
|a| !a,
);
}
#[allow(unreachable_code)]
{
bitwise_unary_op_helper(left, offset_in_bits, len_in_bits, |a| !a)
}
}