use crate::primitives::PrimitiveCast;
use crate::stackblur::sliding_window::SlidingWindow;
use crate::stackblur::stack_blur_pass::StackBlurWorkingPass;
use crate::unsafe_slice::UnsafeSlice;
use crate::util::ScratchBuffer;
use std::marker::PhantomData;
use std::ops::{AddAssign, Mul, Sub, SubAssign};
pub(crate) struct HorizontalStackBlurPass<T, J, F, const CN: usize> {
_phantom_t: PhantomData<T>,
_phantom_j: PhantomData<J>,
_phantom_f: PhantomData<F>,
}
impl<T, J, F, const CN: usize> Default for HorizontalStackBlurPass<T, J, F, CN> {
fn default() -> Self {
HorizontalStackBlurPass::<T, J, F, CN> {
_phantom_t: Default::default(),
_phantom_j: Default::default(),
_phantom_f: Default::default(),
}
}
}
impl<T, J, F, const CN: usize> HorizontalStackBlurPass<T, J, F, CN>
where
J: Copy
+ 'static
+ AddAssign<J>
+ Mul<Output = J>
+ Sub<Output = J>
+ PrimitiveCast<F>
+ SubAssign
+ PrimitiveCast<T>
+ Default,
T: Copy + PrimitiveCast<J> + Default,
i32: PrimitiveCast<J>,
u32: PrimitiveCast<J>,
F: PrimitiveCast<T> + PrimitiveCast<J> + 'static + Copy + Mul<Output = F> + Default,
usize: PrimitiveCast<J>,
f32: PrimitiveCast<F>,
{
fn pass_impl(
&self,
pixels: &UnsafeSlice<T>,
stride: u32,
width: u32,
height: u32,
radius: u32,
thread: usize,
total_threads: usize,
) {
let div = ((radius * 2) + 1) as usize;
let mut xp;
let mut sp;
let mut stack_start;
let mut stack_buffer = ScratchBuffer::<SlidingWindow<CN, J>, 1024>::new(div);
let stacks0 = stack_buffer.as_mut_slice();
let rad_p_1 = radius as f32 + 1.;
let scale_filter_value = (1. / (rad_p_1 * rad_p_1)).cast_();
let wm = width - 1;
let div = (radius * 2) + 1;
let min_y = thread * height as usize / total_threads;
let max_y = (thread + 1) * height as usize / total_threads;
let start_y = min_y;
for y in start_y..max_y {
let mut sum = SlidingWindow::default();
let mut sum_in = SlidingWindow::default();
let mut sum_out = SlidingWindow::default();
let mut src_ptr = stride as usize * y;
let src = SlidingWindow::from_store(pixels, src_ptr);
for i in 0..=radius {
unsafe { *stacks0.get_unchecked_mut(i as usize) = src };
let fi = (i + 1).cast_();
sum += src * fi;
sum_out += src;
}
for i in 1..=radius {
if i <= wm {
src_ptr += CN;
}
let src = SlidingWindow::from_store(pixels, src_ptr);
unsafe { *stacks0.get_unchecked_mut((i + radius) as usize) = src };
let re = (radius + 1 - i).cast_();
sum += src * re;
sum_in += src;
}
sp = radius;
xp = radius;
if xp > wm {
xp = wm;
}
src_ptr = CN * xp as usize + y * stride as usize;
let mut dst_ptr = y * stride as usize;
for _ in 0..width {
let sum_intermediate: SlidingWindow<CN, F> = sum.cast();
let finalized: SlidingWindow<CN, J> =
(sum_intermediate * scale_filter_value).cast();
finalized.to_store(pixels, dst_ptr);
dst_ptr += CN;
sum -= sum_out;
stack_start = sp + div - radius;
if stack_start >= div {
stack_start -= div;
}
let stack = unsafe { &mut *stacks0.get_unchecked_mut(stack_start as usize) };
sum_out -= *stack;
if xp < wm {
src_ptr += CN;
xp += 1;
}
let src = SlidingWindow::from_store(pixels, src_ptr);
*stack = src;
sum_in += src;
sum += sum_in;
sp += 1;
if sp >= div {
sp = 0;
}
let stack = unsafe { &mut *stacks0.get_unchecked_mut(sp as usize) };
sum_out += *stack;
sum_in -= *stack;
}
}
}
}
impl<T, J, F, const CN: usize> StackBlurWorkingPass<T, CN> for HorizontalStackBlurPass<T, J, F, CN>
where
J: Copy
+ 'static
+ AddAssign<J>
+ Mul<Output = J>
+ Sub<Output = J>
+ PrimitiveCast<F>
+ SubAssign
+ PrimitiveCast<T>
+ Default,
T: Copy + PrimitiveCast<J> + Default,
i32: PrimitiveCast<J>,
u32: PrimitiveCast<J>,
F: PrimitiveCast<T> + PrimitiveCast<J> + 'static + Copy + Mul<Output = F> + Default,
usize: PrimitiveCast<J>,
f32: PrimitiveCast<F> + PrimitiveCast<T>,
{
fn pass(
&self,
pixels: &UnsafeSlice<T>,
stride: u32,
width: u32,
height: u32,
radius: u32,
thread: usize,
total_threads: usize,
) {
self.pass_impl(pixels, stride, width, height, radius, thread, total_threads);
}
}