#[cfg(all(target_arch = "aarch64", feature = "neon"))]
use crate::stackblur::neon::{HorizontalNeonStackBlurPass, VerticalNeonStackBlurPass};
#[cfg(all(any(target_arch = "x86_64", target_arch = "x86"), feature = "sse"))]
use crate::stackblur::sse::{HorizontalSseStackBlurPass, VerticalSseStackBlurPass};
#[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
use crate::stackblur::wasm::{HorizontalWasmStackBlurPass, VerticalWasmStackBlurPass};
use crate::stackblur::*;
use crate::unsafe_slice::UnsafeSlice;
use crate::{AnisotropicRadius, BlurError, BlurImageMut, FastBlurChannels, ThreadingPolicy};
fn stack_blur_worker_horizontal(
slice: &UnsafeSlice<u8>,
stride: u32,
width: u32,
height: u32,
radius: u32,
channels: FastBlurChannels,
thread: usize,
thread_count: usize,
) {
fn pass<const N: usize>(
slice: &UnsafeSlice<u8>,
stride: u32,
width: u32,
height: u32,
radius: u32,
thread: usize,
thread_count: usize,
) {
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
fn select_blur_pass<const N: usize>() -> Box<dyn StackBlurWorkingPass<u8, N>> {
#[cfg(all(target_arch = "x86_64", feature = "avx"))]
{
if std::arch::is_x86_feature_detected!("avx2") {
use crate::stackblur::avx::HorizontalAvxStackBlurPass;
return Box::new(HorizontalAvxStackBlurPass::<N>::default());
}
}
#[cfg(feature = "sse")]
if std::arch::is_x86_feature_detected!("sse4.1") {
Box::new(HorizontalSseStackBlurPass::<N>::default())
} else {
Box::new(HorizontalStackBlurPass::<u8, i32, f32, N>::default())
}
#[cfg(not(feature = "sse"))]
Box::new(HorizontalStackBlurPass::<u8, i32, f32, N>::default())
}
#[cfg(target_arch = "aarch64")]
fn select_blur_pass<const N: usize>() -> Box<dyn StackBlurWorkingPass<u8, N>> {
#[cfg(feature = "sve")]
{
if std::arch::is_aarch64_feature_detected!("sve2") {
use crate::stackblur::sve::HorizontalSveStackBlurPassQ0_31;
return Box::new(HorizontalSveStackBlurPassQ0_31::<N>::default());
}
}
#[cfg(feature = "neon")]
{
#[cfg(feature = "rdm")]
if std::arch::is_aarch64_feature_detected!("rdm") {
use crate::stackblur::neon::HorizontalNeonStackBlurPassQ0_31;
return Box::new(HorizontalNeonStackBlurPassQ0_31::<N>::default());
}
Box::new(HorizontalNeonStackBlurPass::<N>::default())
}
#[cfg(not(feature = "neon"))]
{
Box::new(HorizontalStackBlurPass::<u8, i32, f32, N>::default())
}
}
#[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
fn select_blur_pass<const N: usize>() -> impl StackBlurWorkingPass<u8, N> {
HorizontalWasmStackBlurPass::<N>::default()
}
#[cfg(not(any(
target_arch = "aarch64",
all(target_arch = "wasm32", target_feature = "simd128"),
target_arch = "x86_64",
target_arch = "x86"
)))]
fn select_blur_pass<const N: usize>() -> impl StackBlurWorkingPass<u8, N> {
HorizontalStackBlurPass::<u8, i32, f32, N>::default()
}
let executor = select_blur_pass::<N>();
executor.pass(slice, stride, width, height, radius, thread, thread_count);
}
match channels {
FastBlurChannels::Plane => {
pass::<1>(slice, stride, width, height, radius, thread, thread_count);
}
FastBlurChannels::Channels3 => {
pass::<3>(slice, stride, width, height, radius, thread, thread_count);
}
FastBlurChannels::Channels4 => {
pass::<4>(slice, stride, width, height, radius, thread, thread_count);
}
}
}
#[allow(clippy::too_many_arguments)]
fn stack_blur_worker_vertical(
slice: &UnsafeSlice<u8>,
stride: u32,
width: u32,
height: u32,
radius: u32,
channels: FastBlurChannels,
thread: usize,
thread_count: usize,
) {
fn pass<const N: usize>(
slice: &UnsafeSlice<u8>,
stride: u32,
width: u32,
height: u32,
radius: u32,
thread: usize,
thread_count: usize,
) {
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
fn select_blur_pass<const N: usize>() -> Box<dyn StackBlurWorkingPass<u8, N>> {
#[cfg(all(target_arch = "x86_64", feature = "avx"))]
if std::arch::is_x86_feature_detected!("avx2") {
use crate::stackblur::avx::VerticalAvxStackBlurPass;
return Box::new(VerticalAvxStackBlurPass::<N>::default());
}
#[cfg(feature = "sse")]
if std::arch::is_x86_feature_detected!("sse4.1") {
Box::new(VerticalSseStackBlurPass::<N>::default())
} else {
Box::new(VerticalStackBlurPass::<u8, i32, f32, N>::default())
}
#[cfg(not(feature = "sse"))]
Box::new(VerticalStackBlurPass::<u8, i32, f32, N>::default())
}
#[cfg(all(target_arch = "aarch64", feature = "neon"))]
fn select_blur_pass<const N: usize>() -> Box<dyn StackBlurWorkingPass<u8, N>> {
#[cfg(feature = "rdm")]
if std::arch::is_aarch64_feature_detected!("rdm") {
use crate::stackblur::neon::VerticalNeonStackBlurPassQ0_31;
return Box::new(VerticalNeonStackBlurPassQ0_31::<N>::default());
}
Box::new(VerticalNeonStackBlurPass::<N>::default())
}
#[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
fn select_blur_pass<const N: usize>() -> impl StackBlurWorkingPass<u8, N> {
VerticalWasmStackBlurPass::<N>::default()
}
#[cfg(not(any(
all(target_arch = "aarch64", feature = "neon"),
all(target_arch = "wasm32", target_feature = "simd128"),
target_arch = "x86_64",
target_arch = "x86"
)))]
fn select_blur_pass<const N: usize>() -> impl StackBlurWorkingPass<u8, N> {
VerticalStackBlurPass::<u8, i32, f32, N>::default()
}
let executor = select_blur_pass::<N>();
executor.pass(slice, stride, width, height, radius, thread, thread_count);
}
match channels {
FastBlurChannels::Plane => {
pass::<1>(slice, stride, width, height, radius, thread, thread_count);
}
FastBlurChannels::Channels3 => {
pass::<3>(slice, stride, width, height, radius, thread, thread_count);
}
FastBlurChannels::Channels4 => {
pass::<4>(slice, stride, width, height, radius, thread, thread_count);
}
}
}
pub fn stack_blur(
image: &mut BlurImageMut<u8>,
radius: AnisotropicRadius,
threading_policy: ThreadingPolicy,
) -> Result<(), BlurError> {
image.check_layout(None)?;
let radius = radius.clamp(1, 1449);
let thread_count = threading_policy.thread_count(image.width, image.height) as u32;
let stride = image.row_stride();
let width = image.width;
let height = image.height;
let channels = image.channels;
if thread_count == 1 {
let slice = UnsafeSlice::new(image.data.borrow_mut());
stack_blur_worker_horizontal(&slice, stride, width, height, radius.x_axis, channels, 0, 1);
stack_blur_worker_vertical(&slice, stride, width, height, radius.y_axis, channels, 0, 1);
return Ok(());
}
let pool = novtb::ThreadPool::new(thread_count as usize);
let slice = UnsafeSlice::new(image.data.borrow_mut());
pool.parallel_for(|thread_id| {
stack_blur_worker_horizontal(
&slice,
stride,
width,
height,
radius.x_axis,
channels,
thread_id,
thread_count as usize,
);
});
pool.parallel_for(|thread_id| {
stack_blur_worker_vertical(
&slice,
stride,
width,
height,
radius.y_axis,
channels,
thread_id,
thread_count as usize,
);
});
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_stack_blur_u8_q_k5() {
let width: usize = 148;
let height: usize = 148;
let mut dst = vec![126; width * height * 3];
let mut dst_image = BlurImageMut::borrow(
&mut dst,
width as u32,
height as u32,
FastBlurChannels::Channels3,
);
stack_blur(
&mut dst_image,
AnisotropicRadius::new(5),
ThreadingPolicy::Single,
)
.unwrap();
for (i, &cn) in dst.iter().enumerate() {
let diff = (cn as i32 - 126).abs();
assert!(
diff <= 3,
"Diff expected to be less than 3 but it was {diff} at {i}"
);
}
}
}