use crate::filter1d::arena::Arena;
use crate::img_size::ImageSize;
use crate::mlaf::mlaf;
use crate::primitives::PrimitiveCast;
use crate::to_storage::ToStorage;
use num_traits::MulAdd;
use std::ops::{Add, Mul};
pub(crate) fn filter_row<T, F, const N: usize>(
_: Arena,
arena_src: &[T],
dst: &mut [T],
image_size: ImageSize,
scanned_kernel: &[F],
) where
T: Copy + PrimitiveCast<F>,
F: ToStorage<T> + Mul<Output = F> + MulAdd<F, Output = F> + Add<F, Output = F>,
{
unsafe {
let width = image_size.width;
let src = arena_src;
let local_src = src;
let length = scanned_kernel.len();
let mut cx = 0usize;
let max_width = width * N;
while cx + 4 <= max_width {
let coeff = *scanned_kernel.get_unchecked(0);
let shifted_src = local_src.get_unchecked(cx..);
let mut k0 = (*shifted_src.get_unchecked(0)).cast_().mul(coeff);
let mut k1 = (*shifted_src.get_unchecked(1)).cast_().mul(coeff);
let mut k2 = (*shifted_src.get_unchecked(2)).cast_().mul(coeff);
let mut k3 = (*shifted_src.get_unchecked(3)).cast_().mul(coeff);
for i in 1..length {
let coeff = *scanned_kernel.get_unchecked(i);
k0 = mlaf(k0, (*shifted_src.get_unchecked(i * N)).cast_(), coeff);
k1 = mlaf(k1, (*shifted_src.get_unchecked(i * N + 1)).cast_(), coeff);
k2 = mlaf(k2, (*shifted_src.get_unchecked(i * N + 2)).cast_(), coeff);
k3 = mlaf(k3, (*shifted_src.get_unchecked(i * N + 3)).cast_(), coeff);
}
*dst.get_unchecked_mut(cx) = k0.to_();
*dst.get_unchecked_mut(cx + 1) = k1.to_();
*dst.get_unchecked_mut(cx + 2) = k2.to_();
*dst.get_unchecked_mut(cx + 3) = k3.to_();
cx += 4;
}
for x in cx..max_width {
let coeff = *scanned_kernel.get_unchecked(0);
let shifted_src = local_src.get_unchecked(x..);
let mut k0 = (*shifted_src.get_unchecked(0)).cast_().mul(coeff);
for i in 1..length {
let coeff = *scanned_kernel.get_unchecked(i);
k0 = mlaf(k0, (*shifted_src.get_unchecked(i * N)).cast_(), coeff);
}
*dst.get_unchecked_mut(x) = k0.to_();
}
}
}