use crate::ImageSize;
use crate::filter1d::Arena;
use crate::filter2d::scan_point_2d::ScanPoint2d;
use crate::mlaf::mlaf;
use crate::to_storage::ToStorage;
use num_traits::{AsPrimitive, MulAdd};
use std::ops::{Add, Mul};
pub(crate) fn convolve_segment_2d<T, F>(
arena: Arena,
arena_source: &[T],
dst: &mut [T],
image_size: ImageSize,
prepared_kernel: &[ScanPoint2d<F>],
y: usize,
) where
T: Copy + AsPrimitive<F>,
F: ToStorage<T> + Mul<Output = F> + MulAdd<F, Output = F> + Add<Output = F>,
{
unsafe {
let width = image_size.width;
let dx = arena.pad_w as i64;
let dy = arena.pad_h as i64;
let arena_stride = arena.width * arena.components;
let offsets = prepared_kernel
.iter()
.map(|&x| {
arena_source.get_unchecked(
((x.y + dy + y as i64) as usize * arena_stride
+ (x.x + dx) as usize * arena.components)..,
)
})
.collect::<Vec<_>>();
let length = prepared_kernel.len();
let total_width = width * arena.components;
let mut cx = 0usize;
let k_weight = prepared_kernel.get_unchecked(0).weight;
let off0 = offsets.get_unchecked(0);
while cx + 4 < total_width {
let mut k0 = (*off0).get_unchecked(cx).as_().mul(k_weight);
let mut k1 = (*off0).get_unchecked(cx + 1).as_().mul(k_weight);
let mut k2 = (*off0).get_unchecked(cx + 2).as_().mul(k_weight);
let mut k3 = (*off0.get_unchecked(cx + 3)).as_().mul(k_weight);
for i in 1..length {
let weight = prepared_kernel.get_unchecked(i).weight;
k0 = mlaf(k0, offsets.get_unchecked(i).get_unchecked(cx).as_(), weight);
k1 = mlaf(
k1,
offsets.get_unchecked(i).get_unchecked(cx + 1).as_(),
weight,
);
k2 = mlaf(
k2,
offsets.get_unchecked(i).get_unchecked(cx + 2).as_(),
weight,
);
k3 = mlaf(
k3,
offsets.get_unchecked(i).get_unchecked(cx + 3).as_(),
weight,
);
}
*dst.get_unchecked_mut(cx) = k0.to_();
*dst.get_unchecked_mut(cx + 1) = k1.to_();
*dst.get_unchecked_mut(cx + 2) = k2.to_();
*dst.get_unchecked_mut(cx + 3) = k3.to_();
cx += 4;
}
for x in cx..total_width {
let mut k0 = (*off0).get_unchecked(x).as_().mul(k_weight);
for i in 1..length {
let k_weight = prepared_kernel.get_unchecked(i).weight;
k0 = mlaf(
k0,
offsets.get_unchecked(i).get_unchecked(x).as_(),
k_weight,
);
}
*dst.get_unchecked_mut(cx) = k0.to_();
}
}
}