use seq_macro::seq;
use super::repeat;
use super::repeat::repeat;
import_simd!(as simd);
#[derive(Clone, Debug, Default)]
pub struct SimdVec<T, const N: usize>
where
T: simd::SimdElement,
simd::LaneCount<N>: simd::SupportedLaneCount,
{
inner: Vec<simd::Simd<T, N>>,
len: usize,
}
type IterSimd<'a, T, const N: usize> = std::slice::Iter<'a, simd::Simd<T, N>>;
impl<T, const N: usize> SimdVec<T, N>
where
T: simd::SimdElement + Default,
simd::LaneCount<N>: simd::SupportedLaneCount,
{
pub fn new() -> Self {
Self {
inner: Vec::new(),
len: 0usize,
}
}
#[inline]
pub fn from_slice(data: &[T]) -> Self {
let mut ret = Self::new();
ret.reset_from_slice(data);
ret
}
#[inline]
#[allow(dead_code)]
pub fn len(&self) -> usize {
self.len
}
#[inline]
pub fn simd_len(&self) -> usize {
self.inner.len()
}
#[inline]
pub fn reset_from_slice(&mut self, data: &[T]) {
pack_into_simd_vec(data, &mut self.inner);
self.len = data.len();
}
#[inline]
pub fn iter_simd(&self) -> IterSimd<'_, T, N> {
self.inner.iter()
}
#[inline]
#[allow(dead_code)]
pub fn resize(&mut self, new_len: usize, value: simd::Simd<T, N>) {
let len_v = new_len.div_ceil(N);
self.inner.resize(len_v, value);
self.len = new_len;
}
#[inline]
pub fn reset_from_iter_simd<I>(&mut self, new_len: usize, iter: I)
where
I: Iterator<Item = simd::Simd<T, N>>,
{
self.inner.clear();
let capacity_v = new_len.div_ceil(N);
self.inner.reserve(capacity_v);
self.inner.extend(iter.take(capacity_v));
self.len = new_len;
}
#[inline]
pub fn as_ref(&self) -> &[T] {
&transmute_and_flatten_simd(&self.inner)[0..self.len]
}
#[inline]
pub fn as_ref_simd(&self) -> &[simd::Simd<T, N>] {
&self.inner
}
#[inline]
#[allow(dead_code)]
pub fn as_mut_simd(&mut self) -> &mut [simd::Simd<T, N>] {
&mut self.inner
}
}
#[inline]
#[allow(dead_code)]
fn deinterleave_gen(interleaved: &[i32], channels: usize, channel_stride: usize, dest: &mut [i32]) {
let samples = dest.len() / channels;
let src_samples = interleaved.len() / channels;
for t in 0..samples {
for ch in 0..channels {
dest[channel_stride * ch + t] = if t < src_samples {
interleaved[channels * t + ch]
} else {
0i32
}
}
}
}
seq!(N in 2..=8 {
#[inline]
#[allow(dead_code)]
#[allow(clippy::cognitive_complexity)]
fn deinterleave_ch~N(interleaved: &[i32], channel_stride: usize, dest: &mut [i32]) {
let dst_samples = dest.len() / N;
let src_samples = interleaved.len() / N;
let mut t0 = 0;
while t0 < dst_samples {
repeat!(offset to 32 ; while (t0 + offset) < dst_samples => {
repeat!(ch to N => {
dest[channel_stride * ch + t0 + offset] = if (t0 + offset) < src_samples {
interleaved[N * (t0 + offset) + ch]
} else {
0i32
};
});
});
t0 += 32;
}
}
});
fn deinterleave_ch1(interleaved: &[i32], _channel_stride: usize, dest: &mut [i32]) {
let n = std::cmp::min(dest.len(), interleaved.len());
dest[0..n].copy_from_slice(&interleaved[0..n]);
}
#[cfg(feature = "simd-nightly")]
const DEINTERLEAVE_CH2_SIMD_LANES: usize = 16;
#[cfg(feature = "simd-nightly")]
fn deinterleave_ch2_simd(interleaved: &[i32], channel_stride: usize, dest: &mut [i32]) {
let n = std::cmp::min(dest.len(), interleaved.len());
debug_assert!(
n % 2 == 0,
"input length (interleaved.len={}, n={}) must be a multiple of the channel count (2).",
interleaved.len(),
n
);
let (head, body, foot) =
slice_as_simd::<i32, { DEINTERLEAVE_CH2_SIMD_LANES * 2 }>(&interleaved[0..n]);
let mut ch_offset = 0;
let mut t = 0;
for chunk in head.chunks(2) {
dest[t] = chunk[0];
if chunk.len() == 2 {
dest[channel_stride + t] = chunk[1];
t += 1;
} else {
ch_offset = 1;
}
}
for v in body {
let first: simd::Simd<i32, DEINTERLEAVE_CH2_SIMD_LANES> =
simd::Simd::from_slice(&v[..DEINTERLEAVE_CH2_SIMD_LANES]);
let last: simd::Simd<i32, DEINTERLEAVE_CH2_SIMD_LANES> =
simd::Simd::from_slice(&v[DEINTERLEAVE_CH2_SIMD_LANES..]);
let (even, odd) = first.deinterleave(last);
if ch_offset == 0 {
dest[t..t + DEINTERLEAVE_CH2_SIMD_LANES].copy_from_slice(even.as_array());
dest[channel_stride + t..channel_stride + t + DEINTERLEAVE_CH2_SIMD_LANES]
.copy_from_slice(odd.as_array());
} else {
#[allow(clippy::range_plus_one)]
dest[(t + 1)..(t + DEINTERLEAVE_CH2_SIMD_LANES + 1)].copy_from_slice(odd.as_array());
dest[channel_stride + t..channel_stride + t + DEINTERLEAVE_CH2_SIMD_LANES]
.copy_from_slice(even.as_array());
}
t += DEINTERLEAVE_CH2_SIMD_LANES;
}
if ch_offset == 1 {
dest[channel_stride + t] = foot[0];
t += 1;
}
for chunk in foot[ch_offset..].chunks(2) {
if chunk.len() == 2 {
dest[t] = chunk[0];
dest[channel_stride + t] = chunk[1];
t += 1;
}
}
dest[t..channel_stride].fill(0i32);
dest[channel_stride + t..].fill(0i32);
}
pub fn deinterleave(interleaved: &[i32], channels: usize, channel_stride: usize, dest: &mut [i32]) {
#[cfg(feature = "simd-nightly")]
{
if channels == 2 {
return deinterleave_ch2_simd(interleaved, channel_stride, dest);
}
}
seq!(CH in 1..=8 {
#[allow(clippy::unnecessary_semicolon)]
if channels == CH {
return deinterleave_ch~CH(interleaved, channel_stride, dest);
}
});
deinterleave_gen(interleaved, channels, channel_stride, dest);
}
fn le_bytes_to_i32s_impl<const BPS: usize>(bytes: &[u8], dest: &mut [i32]) {
let t_end = bytes.len();
assert!(t_end % BPS == 0, "t_end={t_end}, BPS={BPS}");
assert!(dest.len() >= t_end / BPS);
let mut t = 0;
let mut n = 0;
while t < t_end {
dest[n] = i32::from_le_bytes(std::array::from_fn(|i| {
if i < (4 - BPS) {
0u8
} else {
bytes[t + i - (4 - BPS)]
}
})) >> ((4 - BPS) * 8);
n += 1;
t += BPS;
}
}
#[cfg(feature = "simd-nightly")]
const LE16_BYTES_TO_I32S_LANES: usize = 16;
#[cfg(feature = "simd-nightly")]
fn le16_bytes_to_i32s(bytes: &[u8], dest: &mut [i32]) {
let (head, body, foot) = slice_as_simd::<u8, { LE16_BYTES_TO_I32S_LANES * 2 }>(bytes);
if head.len() % 2 != 0 {
return le_bytes_to_i32s_impl::<2>(bytes, dest);
}
let mut off = 0;
let mut n = 0;
while n < head.len() {
dest[off] = (i32::from(head[n + 1] as i8) << 8) | i32::from(head[n]);
n += 2;
off += 1;
}
for v in body {
let first: simd::Simd<u8, LE16_BYTES_TO_I32S_LANES> =
simd::Simd::from_slice(&v[..LE16_BYTES_TO_I32S_LANES]);
let last: simd::Simd<u8, LE16_BYTES_TO_I32S_LANES> =
simd::Simd::from_slice(&v[LE16_BYTES_TO_I32S_LANES..]);
let (evens, odds) = first.deinterleave(last);
let evens = evens.cast::<u16>();
let odds = odds.cast::<u16>() << simd::Simd::splat(8u16);
let ints = (evens | odds).cast::<i16>();
dest[off..off + LE16_BYTES_TO_I32S_LANES].copy_from_slice(ints.cast().as_array());
off += LE16_BYTES_TO_I32S_LANES;
}
debug_assert!(foot.len() % 2 == 0);
let mut n = 0;
while n < foot.len() {
dest[off] = (i32::from(foot[n + 1] as i8) << 8) | i32::from(foot[n]);
n += 2;
off += 1;
}
}
pub fn le_bytes_to_i32s(bytes: &[u8], dest: &mut [i32], bytes_per_sample: usize) {
if bytes_per_sample == 2 {
#[cfg(feature = "simd-nightly")]
{
le16_bytes_to_i32s(bytes, dest);
}
#[cfg(not(feature = "simd-nightly"))]
{
le_bytes_to_i32s_impl::<2>(bytes, dest);
}
} else if bytes_per_sample == 1 {
le_bytes_to_i32s_impl::<1>(bytes, dest);
} else if bytes_per_sample == 3 {
le_bytes_to_i32s_impl::<3>(bytes, dest);
} else if bytes_per_sample == 4 {
le_bytes_to_i32s_impl::<4>(bytes, dest);
} else {
panic!("bytes_per_samples > 4 or bytes_per_samples == 0");
}
}
#[allow(dead_code)] pub fn i32s_to_le_bytes(ints: &[i32], dest: &mut [u8], bytes_per_sample: usize) {
let mut n = 0;
for v in ints {
for offset in 0..bytes_per_sample {
dest[n] = v.to_le_bytes()[offset];
n += 1;
}
}
}
pub fn is_constant<T: PartialEq>(samples: &[T]) -> bool {
for t in 1..samples.len() {
if samples[0] != samples[t] {
return false;
}
}
true
}
#[inline]
pub fn pack_into_simd_vec<T, const LANES: usize>(src: &[T], dest: &mut Vec<simd::Simd<T, LANES>>)
where
T: simd::SimdElement + Default,
simd::LaneCount<LANES>: simd::SupportedLaneCount,
{
let zero_v = simd::Simd::<T, LANES>::default();
dest.clear();
let len = src.len();
let len_v = len.div_ceil(LANES);
dest.resize(len_v, zero_v);
transmute_and_flatten_simd_mut(dest)[0..len].copy_from_slice(src);
}
#[allow(dead_code)]
pub fn unpack_simds<T, const LANES: usize>(src: &[simd::Simd<T, LANES>], dest: &mut Vec<T>)
where
T: simd::SimdElement + Default,
simd::LaneCount<LANES>: simd::SupportedLaneCount,
{
dest.resize(src.len() * LANES, T::default());
let mut offset = 0;
for v in src {
let arr = <[T; LANES]>::from(*v);
dest[offset..offset + LANES].copy_from_slice(&arr);
offset += LANES;
}
}
fn slice_as_simd<T, const N: usize>(data: &[T]) -> (&[T], &[simd::Simd<T, N>], &[T])
where
T: simd::SimdElement,
simd::LaneCount<N>: simd::SupportedLaneCount,
{
#[cfg(feature = "simd-nightly")]
{
data.as_simd()
}
#[cfg(not(feature = "simd-nightly"))]
{
(data, &[], &[])
}
}
fn slice_as_simd_mut<T, const N: usize>(
data: &mut [T],
) -> (&mut [T], &mut [simd::Simd<T, N>], &mut [T])
where
T: simd::SimdElement,
simd::LaneCount<N>: simd::SupportedLaneCount,
{
#[cfg(feature = "simd-nightly")]
{
data.as_simd_mut()
}
#[cfg(not(feature = "simd-nightly"))]
{
(data, &mut [], &mut [])
}
}
fn simd_map_and_reduce<U, const N: usize, T, F, G, Q, R, S>(
data: &[T],
scalar_fn: F,
vector_fn: G,
scalar_reduce_fn: Q,
vector_reduce_fn: R,
vector_to_scalar_fn: S,
init: U,
) -> U
where
T: simd::SimdElement,
U: simd::SimdElement,
F: Fn(T) -> U,
G: Fn(simd::Simd<T, N>) -> simd::Simd<U, N>,
Q: Fn(U, U) -> U,
R: Fn(simd::Simd<U, N>, simd::Simd<U, N>) -> simd::Simd<U, N>,
S: Fn(simd::Simd<U, N>) -> U,
simd::LaneCount<N>: simd::SupportedLaneCount,
{
let mut acc_v = simd::Simd::splat(init);
let mut acc = init;
let (head, body, foot) = slice_as_simd(data);
for x in head {
let y = scalar_fn(*x);
acc = scalar_reduce_fn(y, acc);
}
for v in body {
let w = vector_fn(*v);
acc_v = vector_reduce_fn(w, acc_v);
}
for x in foot {
let y = scalar_fn(*x);
acc = scalar_reduce_fn(y, acc);
}
scalar_reduce_fn(acc, vector_to_scalar_fn(acc_v))
}
pub fn find_sum_abs_f32<const N: usize>(data: &[i32]) -> f32
where
simd::LaneCount<N>: simd::SupportedLaneCount,
{
simd_map_and_reduce(
data,
|x| x.abs() as f32,
|v| v.abs().cast(),
std::ops::Add::add,
std::ops::Add::add,
SimdFloat::reduce_sum,
0.0,
)
}
pub fn find_max_abs<const N: usize>(data: &[i32]) -> u32
where
simd::LaneCount<N>: simd::SupportedLaneCount,
{
simd_map_and_reduce(
data,
i32::unsigned_abs,
|v| v.abs().cast(),
std::cmp::max,
SimdOrd::simd_max,
SimdUint::reduce_max,
0,
)
}
pub fn find_min_and_max<const N: usize>(data: &[i32], init: i32) -> (i32, i32)
where
simd::LaneCount<N>: simd::SupportedLaneCount,
{
let mut max_v = simd::Simd::splat(init);
let mut min_v = simd::Simd::splat(init);
let mut max = init;
let mut min = init;
let (head, body, foot) = slice_as_simd(data);
for x in head {
max = std::cmp::max(*x, max);
min = std::cmp::min(*x, min);
}
for v in body {
max_v = v.simd_max(max_v);
min_v = v.simd_min(min_v);
}
for x in foot {
max = std::cmp::max(*x, max);
min = std::cmp::min(*x, min);
}
max = std::cmp::max(max, max_v.reduce_max());
min = std::cmp::min(min, min_v.reduce_min());
(min, max)
}
pub fn find_max<const N: usize>(data: &[u32]) -> u32
where
simd::LaneCount<N>: simd::SupportedLaneCount,
{
simd_map_and_reduce(
data,
#[inline(always)]
|x| x,
#[inline(always)]
|v| v,
std::cmp::max,
SimdOrd::simd_max,
SimdUint::reduce_max,
0,
)
}
#[inline]
pub fn wrapping_sum<T, const N: usize>(data: &[T]) -> T
where
T: simd::SimdElement + num_traits::WrappingAdd + num_traits::Zero,
simd::Simd<T, N>: SimdUint<Scalar = T> + std::ops::Add<Output = simd::Simd<T, N>>,
repeat::Count<N>: repeat::Repeat,
simd::LaneCount<N>: simd::SupportedLaneCount,
{
simd_map_and_reduce::<T, N, _, _, _, _, _, _>(
data,
#[inline(always)]
|x| x,
#[inline(always)]
|v| v,
#[inline(always)]
|x, y| x.wrapping_add(&y),
#[inline(always)]
|v, w| v + w,
SimdUint::reduce_sum,
T::zero(),
)
}
#[inline]
pub fn unaligned_map_and_update<T, const N: usize, U, F, G>(
src: &[U],
dest: &mut [T],
mut scalar_fn: F,
mut vector_fn: G,
) where
T: simd::SimdElement,
U: simd::SimdElement,
simd::LaneCount<N>: simd::SupportedLaneCount,
F: FnMut(&mut T, U),
G: FnMut(&mut simd::Simd<T, N>, simd::Simd<U, N>),
{
let (head, body, foot) = slice_as_simd_mut(dest);
let mut t = 0;
for p in head {
scalar_fn(p, src[t]);
t += 1;
}
for pv in body {
let src_v = simd::Simd::from_slice(&src[t..t + N]);
vector_fn(pv, src_v);
t += N;
}
for p in foot {
scalar_fn(p, src[t]);
t += 1;
}
}
pub fn transmute_and_flatten_simd<T, const N: usize>(simds: &[simd::Simd<T, N>]) -> &[T]
where
T: simd::SimdElement,
simd::LaneCount<N>: simd::SupportedLaneCount,
{
let newlen = simds.len() * N;
unsafe { std::slice::from_raw_parts(simds.as_ptr().cast(), newlen) }
}
pub fn transmute_and_flatten_simd_mut<T, const N: usize>(simds: &mut [simd::Simd<T, N>]) -> &mut [T]
where
T: simd::SimdElement,
simd::LaneCount<N>: simd::SupportedLaneCount,
{
let newlen = simds.len() * N;
unsafe { std::slice::from_raw_parts_mut(simds.as_mut_ptr().cast(), newlen) }
}
#[cfg(test)]
mod tests {
use super::*;
#[cfg(feature = "simd-nightly")]
use rand::distributions::Distribution;
#[cfg(feature = "simd-nightly")]
use rand::distributions::Uniform;
#[test]
fn do_deinterleave() {
let interleaved = [0, 0, -1, -2, 1, 2, -3, 6];
let mut dest = vec![0i32; interleaved.len()];
deinterleave(&interleaved, 2, 4, &mut dest);
assert_eq!(&dest, &[0, -1, 1, -3, 0, -2, 2, 6]);
let interleaved = [0, 0, -1, -2, 1, 2, -3, 6];
let mut dest = vec![-123i32; interleaved.len() + 4];
deinterleave(&interleaved, 4, 3, &mut dest);
assert_eq!(&dest, &[0, 1, 0, 0, 2, 0, -1, -3, 0, -2, 6, 0]);
let mut dest = vec![-123i32; interleaved.len() + 4];
deinterleave_gen(&interleaved, 4, 3, &mut dest);
assert_eq!(&dest, &[0, 1, 0, 0, 2, 0, -1, -3, 0, -2, 6, 0]);
}
#[test]
#[allow(clippy::needless_range_loop)]
fn deinterleave_ch2_direct_call() {
let block_size: usize = 4096;
let src_len: usize = 123;
let mut src = Vec::new();
src.extend(0i32..(src_len as i32 * 2));
let mut dest = vec![0i32; block_size * 2];
deinterleave_ch2(&src, block_size, &mut dest);
let mut expected = 0i32;
for t in 0..src_len {
assert_eq!(dest[t], expected);
expected += 2;
}
let mut expected = 1i32;
for t in block_size..(block_size + src_len) {
assert_eq!(dest[t], expected);
expected += 2;
}
}
#[test]
fn convert_le_bytes_to_ints() {
let bytes = [
0x56, 0x34, 0x12, 0x9B, 0x57, 0x13, 0xFF, 0xFF, 0xFF, 0xAC, 0x68, 0x24,
];
let mut dest = [0i32; 4];
le_bytes_to_i32s(&bytes, &mut dest, 3);
assert_eq!(dest, [0x12_3456, 0x13_579B, -1, 0x24_68AC]);
}
#[test]
fn convert_bytes_to_i8s() {
let bytes = [0x56, 0x34, 0x12, 0x9B, 0x80, 0x13, 0xFF, 0x68];
let mut dest = [0i32; 8];
le_bytes_to_i32s(&bytes, &mut dest, 1);
assert_eq!(dest, [0x56, 0x34, 0x12, -0x65, -0x80, 0x13, -0x01, 0x68]);
}
#[cfg(feature = "simd-nightly")]
#[test]
fn convert_le16_bytes_to_ints() {
let mut rng = rand::thread_rng();
let samples = 64;
let mut bytes = vec![];
for _t in 0..(samples * 2) {
let b = Uniform::from(0..=u8::MAX).sample(&mut rng);
bytes.push(b);
}
let mut reference = vec![0i32; samples];
let mut test = vec![0i32; samples];
le_bytes_to_i32s_impl::<2>(&bytes, &mut reference);
le16_bytes_to_i32s(&bytes, &mut test);
assert_eq!(test, reference);
let mut reference = vec![0i32; samples];
let mut test = vec![0i32; samples];
le_bytes_to_i32s(&bytes[1..samples * 2 - 1], &mut reference, 2);
le16_bytes_to_i32s(&bytes[1..samples * 2 - 1], &mut test);
assert_eq!(test, reference);
}
#[cfg(feature = "simd-nightly")]
#[test]
fn deinterleave_ch2_simd_version() {
let mut rng = rand::thread_rng();
let samples = 8192;
let mut interleaved = vec![];
for _t in 0..(samples * 2) {
interleaved
.push(Uniform::from(i32::from(i16::MIN)..=i32::from(i16::MAX)).sample(&mut rng));
}
let mut test = vec![0i32; samples];
let mut reference = vec![0i32; samples];
deinterleave_ch2(&interleaved, samples / 2, &mut reference);
deinterleave_ch2_simd(&interleaved, samples / 2, &mut test);
assert_eq!(test, reference);
let mut test = vec![0i32; samples - 2];
let mut reference = vec![0i32; samples - 2];
deinterleave_ch2(
&interleaved[1..samples - 1],
samples / 2 - 1,
&mut reference,
);
deinterleave_ch2_simd(&interleaved[1..samples - 1], samples / 2 - 1, &mut test);
assert_eq!(test, reference);
}
#[test]
fn constant_detector() {
let signal = vec![5; 64];
assert!(super::is_constant(&signal));
let signal = vec![-3; 192];
assert!(super::is_constant(&signal));
let mut signal = vec![8.2f32; 192];
signal[191] = f32::NAN;
assert!(!super::is_constant(&signal));
}
#[test]
fn packing_into_simd_vec() {
let vals: Vec<i16> = (0i16..7i16).collect();
let mut dest: Vec<simd::i16x4> = vec![];
pack_into_simd_vec(&vals, &mut dest);
assert_eq!(
dest,
[
simd::i16x4::from_array([0, 1, 2, 3]),
simd::i16x4::from_array([4, 5, 6, 0])
]
);
let vals: Vec<i16> = (0i16..12i16).collect();
let mut dest: Vec<simd::i16x4> = vec![];
pack_into_simd_vec(&vals, &mut dest);
assert_eq!(
dest,
[
simd::i16x4::from_array([0, 1, 2, 3]),
simd::i16x4::from_array([4, 5, 6, 7]),
simd::i16x4::from_array([8, 9, 10, 11])
]
);
let vals = [];
let mut dest: Vec<simd::i16x4> = vec![];
pack_into_simd_vec(&vals, &mut dest);
assert_eq!(dest, []);
}
#[test]
fn unpacking_simd_vectors() {
let vs = [
simd::i16x4::from_array([0, 1, 2, 3]),
simd::i16x4::from_array([4, 5, 6, 0]),
];
let mut dest = vec![];
unpack_simds(&vs, &mut dest);
assert_eq!(dest, [0, 1, 2, 3, 4, 5, 6, 0]);
let vs: [simd::i16x4; 0] = [];
let mut dest = vec![1, 2, 3];
unpack_simds(&vs, &mut dest);
assert_eq!(dest, []);
}
#[test]
fn simd_ref_can_be_flattened() {
let vs = [
simd::i16x4::from_array([0, 1, 2, 3]),
simd::i16x4::from_array([4, 5, 6, 0]),
];
assert_eq!(transmute_and_flatten_simd(&vs), &[0, 1, 2, 3, 4, 5, 6, 0]);
let vs = [
simd::i8x1::from_array([-1]),
simd::i8x1::from_array([-2]),
simd::i8x1::from_array([-3]),
];
assert_eq!(transmute_and_flatten_simd(&vs), &[-1, -2, -3]);
let vs = [
simd::u64x64::from_array(std::array::from_fn(|d| d as u64)),
simd::u64x64::from_array(std::array::from_fn(|d| d as u64 + 1)),
simd::u64x64::from_array(std::array::from_fn(|d| d as u64 + 2)),
];
let flat_view = transmute_and_flatten_simd(&vs);
for (i, v) in flat_view.iter().enumerate() {
let offset = i as u64 / 64;
assert_eq!(*v, offset + i as u64 % 64);
}
}
#[test]
fn find_max_abs_works() {
let vs = [
0,
0,
0,
i32::MIN,
i32::MAX,
1,
2,
3,
4,
5,
i32::MIN,
i32::MAX,
0,
0,
0,
];
assert_eq!(find_max_abs::<4>(&vs), i32::MIN.unsigned_abs());
}
#[test]
fn find_min_and_max_works() {
let vs = [
123,
234,
345,
234,
i32::MAX,
456,
3,
3,
4,
5,
i32::MIN,
i32::MAX,
2,
-2,
2,
];
assert_eq!(find_min_and_max::<4>(&vs, 0i32), (i32::MIN, i32::MAX));
let vs = [123, 234, 345, 234, i32::MAX, 456, 3, 3, 4, i32::MAX, 2, 2];
assert_eq!(find_min_and_max::<4>(&vs, 0i32), (0i32, i32::MAX));
let vs = [123, 234, 345, 234, i32::MIN, 3, 3, 4, i32::MIN, 2, 2];
assert_eq!(find_min_and_max::<4>(&vs, 456i32), (i32::MIN, 456));
}
}
#[cfg(all(test, feature = "simd-nightly"))]
mod bench {
use super::*;
extern crate test;
use test::bench::Bencher;
use test::black_box;
#[bench]
fn simd_packing(b: &mut Bencher) {
let mut src = Vec::new();
src.extend(0i32..4096i32);
let mut dest: Vec<simd::Simd<i32, 16>> = Vec::new();
b.iter(|| pack_into_simd_vec(black_box(&src), black_box(&mut dest)));
}
#[bench]
fn deinterleave_ch2_direct_call(b: &mut Bencher) {
let mut src = Vec::new();
src.extend(0i32..(4096i32 * 2));
let mut dest = vec![0i32; 4096 * 2];
b.iter(|| deinterleave_ch2(black_box(&src), black_box(4096), black_box(&mut dest)));
}
#[bench]
fn deinterleave_ch2_simd_version(b: &mut Bencher) {
let mut src = Vec::new();
src.extend(0i32..(4096i32 * 2));
let mut dest = vec![0i32; 4096 * 2];
b.iter(|| deinterleave_ch2_simd(black_box(&src), black_box(4096), black_box(&mut dest)));
}
#[bench]
fn le16_to_i32s(b: &mut Bencher) {
let mut src = Vec::new();
for _n in 0..64 {
src.extend(0u8..=255u8);
}
let mut dest = vec![0i32; 4096 * 2];
b.iter(|| le_bytes_to_i32s(black_box(&src), black_box(&mut dest), 2));
}
#[bench]
fn le16_to_i32s_nonaligned(b: &mut Bencher) {
let mut src = Vec::new();
for _n in 0..64 {
src.extend(0u8..=255u8);
}
let mut dest = vec![0i32; 4096 * 2];
let src_off = &src[1..src.len() - 1];
b.iter(|| le_bytes_to_i32s(black_box(src_off), black_box(&mut dest), 2));
}
}