#![forbid(unsafe_code)]
use crate::color_group::{ColorGroup, ld_g, st_g};
use crate::filter_weights::FilterWeights;
use crate::saturate_narrow::SaturateNarrow;
use crate::support::ROUNDING_CONST;
use num_traits::{AsPrimitive, WrappingAdd, WrappingMul};
use std::ops::{Add, AddAssign, Mul};
pub(crate) fn convolve_row_handler_fixed_point<
T: Copy + 'static + AsPrimitive<J> + Default,
J: Copy
+ 'static
+ AsPrimitive<T>
+ WrappingMul<Output = J>
+ AddAssign
+ SaturateNarrow<T>
+ Default
+ Add<J, Output = J>
+ WrappingAdd<Output = J>,
const CN: usize,
>(
src: &[T],
dst: &mut [T],
filter_weights: &FilterWeights<i16>,
bit_depth: u32,
) where
i32: AsPrimitive<J>,
i16: AsPrimitive<J>,
{
for ((chunk, &bounds), weights) in dst
.as_chunks_mut::<CN>()
.0
.iter_mut()
.zip(filter_weights.bounds.iter())
.zip(
filter_weights
.weights
.chunks_exact(filter_weights.aligned_size),
)
{
let mut sums = ColorGroup::<CN, J>::dup(ROUNDING_CONST.as_());
let start_x = bounds.start;
let bounds_size = bounds.size;
let px = start_x * CN;
let src_ptr0 = &src[px..(px + bounds_size * CN)];
for (&k_weight, src) in weights[..bounds.size]
.iter()
.zip(src_ptr0.as_chunks::<CN>().0.iter())
{
let weight: J = k_weight.as_();
let new_px = ld_g!(src, CN, J);
sums = sums.trunc_add(&new_px.trunc_mul(weight));
}
let narrowed = sums.saturate_narrow(bit_depth);
st_g!(narrowed, chunk, CN);
}
}
pub(crate) fn convolve_row_handler_fixed_point_4<
T: Copy + 'static + AsPrimitive<J> + Default,
J: Copy
+ 'static
+ AsPrimitive<T>
+ Mul<Output = J>
+ AddAssign
+ SaturateNarrow<T>
+ Default
+ Add<J, Output = J>
+ WrappingMul<Output = J>
+ WrappingAdd<Output = J>,
const CN: usize,
>(
src: &[T],
src_stride: usize,
dst: &mut [T],
dst_stride: usize,
filter_weights: &FilterWeights<i16>,
bit_depth: u32,
) where
i32: AsPrimitive<J>,
i16: AsPrimitive<J>,
{
let (row0_ref, rest) = dst.split_at_mut(dst_stride);
let (row1_ref, rest) = rest.split_at_mut(dst_stride);
let (row2_ref, row3_ref) = rest.split_at_mut(dst_stride);
let iter_row0 = row0_ref.as_chunks_mut::<CN>().0;
let iter_row1 = row1_ref.as_chunks_mut::<CN>().0;
let iter_row2 = row2_ref.as_chunks_mut::<CN>().0;
let iter_row3 = row3_ref.as_chunks_mut::<CN>().0;
for (((((chunk0, chunk1), chunk2), chunk3), &bounds), weights) in iter_row0
.iter_mut()
.zip(iter_row1.iter_mut())
.zip(iter_row2.iter_mut())
.zip(iter_row3.iter_mut())
.zip(filter_weights.bounds.iter())
.zip(
filter_weights
.weights
.chunks_exact(filter_weights.aligned_size),
)
{
let mut sums0 = ColorGroup::<CN, J>::dup(ROUNDING_CONST.as_());
let mut sums1 = ColorGroup::<CN, J>::dup(ROUNDING_CONST.as_());
let mut sums2 = ColorGroup::<CN, J>::dup(ROUNDING_CONST.as_());
let mut sums3 = ColorGroup::<CN, J>::dup(ROUNDING_CONST.as_());
let start_x = bounds.start;
let px = start_x * CN;
let bounds_size = bounds.size;
let src_ptr0 = &src[px..(px + bounds_size * CN)];
let src_ptr1 = &src[(px + src_stride)..(px + src_stride + bounds_size * CN)];
let src_ptr2 = &src[(px + src_stride * 2)..(px + src_stride * 2 + bounds_size * CN)];
let src_ptr3 = &src[(px + src_stride * 3)..(px + src_stride * 3 + bounds_size * CN)];
for ((((&k_weight, src0), src1), src2), src3) in weights[..bounds.size]
.iter()
.zip(src_ptr0.as_chunks::<CN>().0.iter())
.zip(src_ptr1.as_chunks::<CN>().0.iter())
.zip(src_ptr2.as_chunks::<CN>().0.iter())
.zip(src_ptr3.as_chunks::<CN>().0.iter())
{
let weight: J = k_weight.as_();
let new_px0 = ld_g!(src0, CN, J);
let new_px1 = ld_g!(src1, CN, J);
let new_px2 = ld_g!(src2, CN, J);
let new_px3 = ld_g!(src3, CN, J);
sums0 = sums0.trunc_add(&new_px0.trunc_mul(weight));
sums1 = sums1.trunc_add(&new_px1.trunc_mul(weight));
sums2 = sums2.trunc_add(&new_px2.trunc_mul(weight));
sums3 = sums3.trunc_add(&new_px3.trunc_mul(weight));
}
let narrowed0 = sums0.saturate_narrow(bit_depth);
let narrowed1 = sums1.saturate_narrow(bit_depth);
let narrowed2 = sums2.saturate_narrow(bit_depth);
let narrowed3 = sums3.saturate_narrow(bit_depth);
st_g!(narrowed0, chunk0, CN);
st_g!(narrowed1, chunk1, CN);
st_g!(narrowed2, chunk2, CN);
st_g!(narrowed3, chunk3, CN);
}
}