use crate::gaussian::gaussian_kernel_1d;
use crate::unsafe_slice::UnsafeSlice;
use crate::{BlurError, BlurImage, BlurImageMut, FastBlurChannels, ThreadingPolicy};
use num_traits::AsPrimitive;
use num_traits::real::Real;
use rayon::ThreadPool;
use rayon::iter::{IndexedParallelIterator, ParallelIterator};
use rayon::prelude::{IntoParallelIterator, ParallelSlice, ParallelSliceMut};
use std::fmt::{Debug, Display};
use std::ops::{Add, AddAssign, Div, Index, Mul, MulAssign, Sub};
#[derive(Debug, Clone, Copy)]
struct Vector<T> {
pub x: T,
pub y: T,
}
impl<T> Mul<T> for Vector<T>
where
T: Mul<Output = T> + Copy,
{
type Output = Vector<T>;
#[inline]
fn mul(self, rhs: T) -> Self::Output {
Vector::new(self.x * rhs, self.y * rhs)
}
}
impl<T> Vector<T>
where
T: AsReal + Copy,
{
#[inline]
fn to_real(self) -> Vector<f32> {
Vector::new(self.x.as_real(), self.y.as_real())
}
#[inline]
fn from_real(real: &Vector<f32>) -> Vector<T> {
Vector::new(T::from_real(real.x), T::from_real(real.y))
}
}
impl<T> Mul<Vector<T>> for Vector<T>
where
T: Mul<T, Output = T> + Copy,
{
type Output = Vector<T>;
#[inline]
fn mul(self, rhs: Vector<T>) -> Self::Output {
Vector::new(self.x * rhs.x, self.y * rhs.y)
}
}
impl<T> Add<Vector<T>> for Vector<T>
where
T: Add<T, Output = T> + Copy,
{
type Output = Vector<T>;
#[inline]
fn add(self, rhs: Vector<T>) -> Self::Output {
Vector::new(self.x + rhs.x, self.y + rhs.y)
}
}
impl<T> Div<Vector<T>> for Vector<T>
where
T: Div<T, Output = T> + Copy,
{
type Output = Vector<T>;
#[inline]
fn div(self, rhs: Vector<T>) -> Self::Output {
Vector::new(self.x / rhs.x, self.y / rhs.y)
}
}
impl Mul<Vector<f32>> for f32 {
type Output = Vector<f32>;
#[inline]
fn mul(self, rhs: Vector<f32>) -> Self::Output {
Vector::new(self * rhs.x, self * rhs.y)
}
}
impl<T> AddAssign<Vector<T>> for Vector<T>
where
T: Add<T, Output = T> + Copy,
{
#[inline]
fn add_assign(&mut self, rhs: Vector<T>) {
self.x = self.x + rhs.x;
self.y = self.y + rhs.y;
}
}
impl<T> Vector<T>
where
T: Copy,
{
#[inline]
pub fn new(x: T, y: T) -> Vector<T> {
Vector { x, y }
}
}
impl<T> Default for Vector<T>
where
T: Default + Clone,
{
#[inline]
fn default() -> Self {
Vector {
x: T::default(),
y: T::default(),
}
}
}
trait AsReal {
fn as_real(&self) -> f32;
fn from_real(real: f32) -> Self;
}
impl AsReal for f32 {
#[inline]
fn as_real(&self) -> f32 {
*self
}
#[inline]
fn from_real(real: f32) -> Self {
real
}
}
impl AsReal for u8 {
#[inline]
fn as_real(&self) -> f32 {
(*self as f32) * (1. / 255.)
}
#[inline]
#[allow(clippy::manual_clamp)]
fn from_real(real: f32) -> Self {
(real * 255.).min(255f32).max(0.) as u8
}
}
#[derive(Debug, Clone)]
struct Array3D<'a, T> {
x_dim: usize,
y_dim: usize,
z_dim: usize,
store: UnsafeSlice<'a, Vector<T>>,
}
impl<'a, T> Array3D<'a, T>
where
T: Default + Clone,
{
pub fn new(
slice: &'a mut [Vector<T>],
width: usize,
height: usize,
z: usize,
) -> Array3D<'a, T> {
Array3D {
x_dim: width,
y_dim: height,
z_dim: z,
store: UnsafeSlice::new(slice),
}
}
}
impl<T> Index<usize> for Array3D<'_, T> {
type Output = Vector<T>;
#[inline]
fn index(&self, index: usize) -> &Self::Output {
self.store.get(index)
}
}
impl<T> Array3D<'_, T>
where
T: Clone,
{
#[inline]
pub fn find(&self, x: usize, y: usize, z: usize) -> &Vector<T> {
self.store.get((x * self.y_dim + y) * self.z_dim + z)
}
}
impl<T> Array3D<'_, T>
where
T: Clone + Copy,
{
#[inline]
pub fn get(&self, x: usize, y: usize, z: usize) -> Vector<T> {
*self.store.get((x * self.y_dim + y) * self.z_dim + z)
}
#[inline]
pub fn set(&self, x: usize, y: usize, z: usize, val: Vector<T>) {
unsafe {
self.store.write((x * self.y_dim + y) * self.z_dim + z, val);
}
}
}
impl<T> Array3D<'_, T>
where
T: AsReal + Clone + Copy,
{
fn trilinear_interpolation(&self, x: f32, y: f32, z: f32) -> Vector<T> {
let x_index = f32::clamp(x, 0., (self.x_dim - 1) as f32);
let xx_index = f32::clamp(x_index + 1., 0., (self.x_dim - 1) as f32) as usize;
let y_index = f32::clamp(y, 0., (self.y_dim - 1) as f32);
let yy_index = f32::clamp(y_index + 1., 0., (self.y_dim - 1) as f32) as usize;
let z_index = f32::clamp(z, 0., (self.z_dim - 1) as f32);
let zz_index = f32::clamp(z_index + 1., 0., (self.z_dim - 1) as f32) as usize;
let x_alpha = x - x_index;
let y_alpha = y - y_index;
let z_alpha = z - z_index;
let x_index = x_index as usize;
let y_index = y_index as usize;
let z_index = z_index as usize;
let result = self.get(x_index, y_index, z_index).to_real()
* (1.0f32 - x_alpha)
* (1.0f32 - y_alpha)
* (1.0f32 - z_alpha)
+ x_alpha
* (1.0f32 - y_alpha)
* (1.0f32 - z_alpha)
* self.get(xx_index, y_index, z_index).to_real()
+ (1.0f32 - x_alpha)
* y_alpha
* (1.0f32 - z_alpha)
* self.get(x_index, yy_index, z_index).to_real()
+ x_alpha
* y_alpha
* (1.0f32 - z_alpha)
* self.get(xx_index, yy_index, z_index).to_real()
+ (1.0f32 - x_alpha)
* (1.0f32 - y_alpha)
* z_alpha
* self.get(x_index, y_index, zz_index).to_real()
+ x_alpha
* (1.0f32 - y_alpha)
* z_alpha
* self.get(xx_index, y_index, zz_index).to_real()
+ (1.0f32 - x_alpha)
* y_alpha
* z_alpha
* self.get(x_index, yy_index, zz_index).to_real()
+ x_alpha * y_alpha * z_alpha * self.get(xx_index, yy_index, zz_index).to_real();
Vector::<T>::from_real(&result)
}
}
#[allow(clippy::manual_clamp)]
fn fast_bilateral_filter_impl<
T: Copy
+ Default
+ Clone
+ AsReal
+ Real
+ Add<T>
+ Sub<T>
+ Mul<T>
+ Div<T>
+ AsPrimitive<f32>
+ MulAssign<T>
+ AddAssign<T>
+ Display
+ Debug
+ Send
+ Sync,
>(
img: &BlurImage<T>,
dst: &mut [T],
spatial_sigma: f32,
range_sigma: f32,
) where
f32: AsPrimitive<T>,
{
let width = img.width;
let height = img.height;
let mut base_max = T::min_value();
let mut base_min = T::max_value();
for item in img.data.as_ref().iter() {
base_min = item.min(base_min);
base_max = item.max(base_max);
}
let base_delta = base_max - base_min;
let padding_xy = 2.;
let padding_z = 2.;
let spatial_sigma_scale = 1. / spatial_sigma;
let range_sigma_scale = 1. / range_sigma;
let small_width = (((width - 1) as f32 * spatial_sigma_scale) + 1. + 2. * padding_xy) as usize;
let small_height =
(((height - 1) as f32 * spatial_sigma_scale) + 1. + 2. * padding_xy) as usize;
let small_depth = ((base_delta.as_() * range_sigma_scale) + 1. + 2. * padding_z) as usize;
let mut target = vec![Vector::<T>::default(); small_height * small_depth * small_width];
let mut data = Array3D::<T>::new(&mut target, small_width, small_height, small_depth);
let stride = img.row_stride() as usize;
let img = img.data.as_ref();
for x in 0..width as usize {
let small_x = ((x as f32) * spatial_sigma_scale + 0.5f32) + padding_xy;
for y in 0..height as usize {
let pixel = unsafe { *img.get_unchecked(y * stride + x) };
let z = pixel - base_min;
let small_y = ((y as f32) * spatial_sigma_scale + 0.5f32) + padding_xy;
let small_z = ((z.as_()) * range_sigma_scale + 0.5f32) + padding_z;
let mut d = *data.find(small_x as usize, small_y as usize, small_z as usize);
d.x += pixel;
d.y += 1.0f32.as_();
data.set(small_x as usize, small_y as usize, small_z as usize, d)
}
}
let mut target2 = vec![Vector::<T>::default(); small_height * small_depth * small_width];
let mut buffer = Array3D::<T>::new(&mut target2, small_width, small_height, small_depth);
let preferred_sigma = 1.0f32;
let grid_kernel_size = 7u32; let gaussian_kernel = gaussian_kernel_1d(grid_kernel_size, preferred_sigma);
let half_kernel = gaussian_kernel.len() / 2;
(0..small_depth).into_par_iter().for_each(|z| {
for y in 0..small_height {
for x in 0..half_kernel.min(small_width) {
let mut sum = data
.get(x, y, z)
.mul(unsafe { *gaussian_kernel.get_unchecked(half_kernel) }.as_());
for i in 0..half_kernel {
let weight = unsafe { *gaussian_kernel.get_unchecked(i) }.as_();
let start_p = data.get(
(x + i).saturating_sub(half_kernel).min(small_width - 1),
y,
z,
);
let rolled = half_kernel - i - 1;
let end_p = data.get((x + rolled).min(small_width - 1), y, z);
sum += (start_p + end_p) * weight;
}
buffer.set(x, y, z, sum);
}
for x in half_kernel..small_width.saturating_sub(half_kernel) {
let mut sum = data
.get(x, y, z)
.mul(unsafe { *gaussian_kernel.get_unchecked(half_kernel) }.as_());
for i in 0..half_kernel {
let weight = unsafe { *gaussian_kernel.get_unchecked(i) }.as_();
let start_px = data.get((x + i).sub(half_kernel), y, z);
let rolled = half_kernel - i - 1;
let end_px = data.get(x + rolled, y, z);
sum += (start_px + end_px) * weight;
}
buffer.set(x, y, z, sum);
}
for x in small_width.saturating_sub(half_kernel)..small_width {
let mut sum = data
.get(x, y, z)
.mul(unsafe { *gaussian_kernel.get_unchecked(half_kernel) }.as_());
for i in 0..half_kernel {
let weight = unsafe { *gaussian_kernel.get_unchecked(i) }.as_();
let start_p = data.get(
(x + i).saturating_sub(half_kernel).min(small_width - 1),
y,
z,
);
let rolled = half_kernel - i - 1;
let end_p = data.get((x + rolled).min(small_width - 1), y, z);
sum += (start_p + end_p) * weight;
}
buffer.set(x, y, z, sum);
}
}
});
std::mem::swap(&mut buffer, &mut data);
(0..small_height).into_par_iter().for_each(|y| {
for x in 0..small_width {
for z in 0..half_kernel.min(small_depth) {
let mut sum = data
.get(x, y, z)
.mul(unsafe { *gaussian_kernel.get_unchecked(half_kernel) }.as_());
for i in 0..half_kernel {
let weight = unsafe { *gaussian_kernel.get_unchecked(i) };
let rolled = half_kernel - i - 1;
let start_px = data.get(
x,
y,
(z + i).saturating_sub(half_kernel).min(small_depth - 1),
);
let end_px = data.get(x, y, (z + rolled).min(small_depth - 1));
sum += (start_px + end_px) * weight.as_();
}
buffer.set(x, y, z, sum);
}
for z in half_kernel..small_depth.saturating_sub(half_kernel) {
let mut sum = data
.get(x, y, z)
.mul(unsafe { *gaussian_kernel.get_unchecked(half_kernel) }.as_());
for i in 0..half_kernel {
let weight = unsafe { *gaussian_kernel.get_unchecked(i) };
let start_px = data.get(x, y, (z + i).sub(half_kernel));
let rolled = half_kernel - i - 1;
let end_px = data.get(x, y, z + rolled);
sum += (start_px + end_px) * weight.as_();
}
buffer.set(x, y, z, sum);
}
for z in small_depth.saturating_sub(half_kernel)..small_depth {
let mut sum = data
.get(x, y, z)
.mul(unsafe { *gaussian_kernel.get_unchecked(half_kernel) }.as_());
for i in 0..half_kernel {
let weight = unsafe { *gaussian_kernel.get_unchecked(i) };
let rolled = half_kernel - i - 1;
let start_px = data.get(
x,
y,
(z + i).saturating_sub(half_kernel).min(small_depth - 1),
);
let end_px = data.get(x, y, (z + rolled).min(small_depth - 1));
sum += (start_px + end_px) * weight.as_();
}
buffer.set(x, y, z, sum);
}
}
});
std::mem::swap(&mut buffer, &mut data);
(0..small_depth).into_par_iter().for_each(|z| {
for x in 0..small_width {
for y in 0..half_kernel.min(small_height) {
let mut sum = data
.get(x, y, z)
.mul(unsafe { *gaussian_kernel.get_unchecked(half_kernel) }.as_());
for i in 0..half_kernel {
let weight = unsafe { *gaussian_kernel.get_unchecked(i) };
let start_px = data.get(
x,
(y + i).saturating_sub(half_kernel).min(small_height - 1),
z,
);
let rolled = half_kernel - i - 1;
let end_px = data.get(x, (y + rolled).min(small_height - 1), z);
sum += (start_px + end_px) * weight.as_();
}
buffer.set(x, y, z, sum);
}
for y in half_kernel..small_height.saturating_sub(half_kernel) {
let mut sum = data
.get(x, y, z)
.mul(unsafe { *gaussian_kernel.get_unchecked(half_kernel) }.as_());
for i in 0..half_kernel {
let weight = unsafe { *gaussian_kernel.get_unchecked(i) };
let start_px = data.get(x, (y + i).sub(half_kernel), z);
let rolled = half_kernel - i - 1;
let end_px = data.get(x, y + rolled, z);
sum += (start_px + end_px) * weight.as_();
}
buffer.set(x, y, z, sum);
}
for y in small_height.saturating_sub(half_kernel)..small_height {
let mut sum = data
.get(x, y, z)
.mul(unsafe { *gaussian_kernel.get_unchecked(half_kernel) }.as_());
for i in 0..half_kernel {
let weight = unsafe { *gaussian_kernel.get_unchecked(i) };
let start_px = data.get(
x,
(y + i).saturating_sub(half_kernel).min(small_height - 1),
z,
);
let rolled = half_kernel - i - 1;
let end_px = data.get(x, (y + rolled).min(small_height - 1), z);
sum += (start_px + end_px) * weight.as_();
}
buffer.set(x, y, z, sum);
}
}
});
std::mem::swap(&mut buffer, &mut data);
dst.par_chunks_exact_mut(width as usize)
.zip(img.par_chunks_exact(width as usize))
.enumerate()
.for_each(|(y, (row, src_row))| {
for (x, (t, s)) in row.iter_mut().zip(src_row.iter()).enumerate() {
let z = (*s - base_min).as_();
let d = data.trilinear_interpolation(
(x as f32) * spatial_sigma_scale + padding_xy,
(y as f32) * spatial_sigma_scale + padding_xy,
z * range_sigma_scale + padding_z,
);
*t = if d.y != 0f32.as_() {
d.x / d.y
} else {
0f32.as_()
};
}
})
}
pub trait BilinearWorkingItem<T> {
fn to_bilinear_f32(&self) -> f32;
fn from_bilinear_f32(value: f32) -> T;
}
impl BilinearWorkingItem<u8> for u8 {
#[inline]
#[allow(clippy::manual_clamp)]
fn from_bilinear_f32(value: f32) -> u8 {
(value * 255.).min(255.).max(0.).round() as u8
}
#[inline]
fn to_bilinear_f32(&self) -> f32 {
(*self as f32) * (1. / 255.)
}
}
impl BilinearWorkingItem<u16> for u16 {
#[inline]
fn from_bilinear_f32(value: f32) -> u16 {
(value * u16::MAX as f32)
.round()
.min(u16::MAX as f32)
.max(0.) as u16
}
#[inline]
fn to_bilinear_f32(&self) -> f32 {
(*self as f32) * (1. / u16::MAX as f32)
}
}
impl BilinearWorkingItem<f32> for f32 {
#[inline]
fn from_bilinear_f32(value: f32) -> f32 {
value
}
#[inline]
fn to_bilinear_f32(&self) -> f32 {
*self
}
}
fn fast_bilateral_filter_plane_impl<
V: Copy + Default + 'static + BilinearWorkingItem<V> + Debug + Send + Sync,
>(
img: &BlurImage<V>,
dst: &mut BlurImageMut<V>,
spatial_sigma: f32,
range_sigma: f32,
pool: &ThreadPool,
) -> Result<(), BlurError> {
pool.install(|| {
img.check_layout()?;
dst.check_layout(Some(img))?;
img.size_matches_mut(dst)?;
let width = img.width;
let height = img.height;
assert!(
!(spatial_sigma <= 0. || range_sigma <= 0.0),
"Spatial sigma and range sigma must be more than 0"
);
let mut chan0 = vec![0f32; width as usize * height as usize];
for (dst, src) in chan0
.chunks_exact_mut(width as usize)
.zip(img.data.chunks_exact(img.row_stride() as usize))
{
for (r, &src) in dst.iter_mut().zip(src.iter()) {
*r = src.to_bilinear_f32();
}
}
let in_image = BlurImage::borrow(&chan0, width, height, FastBlurChannels::Plane);
let mut working_dst = BlurImageMut::alloc(width, height, FastBlurChannels::Plane);
fast_bilateral_filter_impl(
&in_image,
working_dst.data.borrow_mut(),
spatial_sigma,
range_sigma,
);
let dst_stride = dst.row_stride() as usize;
for (dst, src) in dst.data.borrow_mut().chunks_exact_mut(dst_stride).zip(
working_dst
.data
.borrow()
.chunks_exact(working_dst.row_stride() as usize),
) {
for (r, &src) in dst.iter_mut().zip(src.iter()) {
*r = V::from_bilinear_f32(src);
}
}
Ok(())
})
}
#[cfg(feature = "image")]
pub(crate) fn fast_bilateral_filter_gray_alpha_impl<
V: Copy + Default + 'static + BilinearWorkingItem<V> + Send + Sync + Debug,
>(
img: &BlurImage<V>,
dst: &mut BlurImageMut<V>,
spatial_sigma: f32,
range_sigma: f32,
threading_policy: ThreadingPolicy,
) -> Result<(), BlurError> {
img.check_layout_channels(2)?;
dst.check_layout_channels(2, Some(img))?;
assert!(
!(spatial_sigma <= 0. || range_sigma <= 0.0),
"Spatial sigma and range sigma must be more than 0"
);
let thread_count = threading_policy.thread_count(img.width, img.height) as u32;
let pool = rayon::ThreadPoolBuilder::new()
.num_threads(thread_count as usize)
.build()
.unwrap();
pool.install(|| {
let width = img.width;
let height = img.height;
let mut in_image0 = BlurImageMut::alloc(width, height, FastBlurChannels::Plane);
let mut in_image1 = BlurImageMut::alloc(width, height, FastBlurChannels::Plane);
for ((dst0, dst1), src) in in_image0
.data
.borrow_mut()
.chunks_exact_mut(width as usize)
.zip(in_image1.data.borrow_mut().chunks_exact_mut(width as usize))
.zip(img.data.chunks_exact(img.row_stride() as usize))
{
for ((r, g), src) in dst0
.iter_mut()
.zip(dst1.iter_mut())
.zip(src.as_chunks::<2>().0.iter())
{
*r = src[0].to_bilinear_f32();
*g = src[1].to_bilinear_f32();
}
}
let mut working_dst0 = BlurImageMut::alloc(width, height, FastBlurChannels::Plane);
let mut working_dst1 = BlurImageMut::alloc(width, height, FastBlurChannels::Plane);
let ref0 = in_image0.to_immutable_ref();
let ref1 = in_image1.to_immutable_ref();
pool.scope(|s| {
s.spawn(|_| {
fast_bilateral_filter_impl(
&ref0,
working_dst0.data.borrow_mut(),
spatial_sigma,
range_sigma,
);
});
s.spawn(|_| {
fast_bilateral_filter_impl(
&ref1,
working_dst1.data.borrow_mut(),
spatial_sigma,
range_sigma,
);
});
});
let dst_stride = dst.row_stride() as usize;
dst.data
.borrow_mut()
.chunks_exact_mut(dst_stride)
.zip(working_dst0.data.borrow().chunks_exact(width as usize))
.zip(working_dst1.data.borrow().chunks_exact(width as usize))
.for_each(|((dst, src0), src1)| {
for ((dst, src0), src1) in dst
.as_chunks_mut::<2>()
.0
.iter_mut()
.zip(src0.iter())
.zip(src1.iter())
{
dst[0] = V::from_bilinear_f32(*src0);
dst[1] = V::from_bilinear_f32(*src1);
}
});
Ok(())
})
}
fn fast_bilateral_filter_rgb_impl<
V: Copy + Default + 'static + BilinearWorkingItem<V> + Send + Sync + Debug,
>(
img: &BlurImage<V>,
dst: &mut BlurImageMut<V>,
spatial_sigma: f32,
range_sigma: f32,
pool: &ThreadPool,
) -> Result<(), BlurError> {
pool.install(|| {
img.check_layout()?;
dst.check_layout(None)?;
img.size_matches_mut(dst)?;
let width = img.width;
let height = img.height;
assert!(
!(spatial_sigma <= 0. || range_sigma <= 0.0),
"Spatial sigma and range sigma must be more than 0"
);
let mut in_image0 = BlurImageMut::alloc(width, height, FastBlurChannels::Plane);
let mut in_image1 = BlurImageMut::alloc(width, height, FastBlurChannels::Plane);
let mut in_image2 = BlurImageMut::alloc(width, height, FastBlurChannels::Plane);
for (((dst0, dst1), dst2), src) in in_image0
.data
.borrow_mut()
.chunks_exact_mut(width as usize)
.zip(in_image1.data.borrow_mut().chunks_exact_mut(width as usize))
.zip(in_image2.data.borrow_mut().chunks_exact_mut(width as usize))
.zip(img.data.chunks_exact(img.row_stride() as usize))
{
for (((r, g), b), src) in dst0
.iter_mut()
.zip(dst1.iter_mut())
.zip(dst2.iter_mut())
.zip(src.as_chunks::<3>().0.iter())
{
*r = src[0].to_bilinear_f32();
*g = src[1].to_bilinear_f32();
*b = src[2].to_bilinear_f32();
}
}
let mut working_dst0 = BlurImageMut::alloc(width, height, FastBlurChannels::Plane);
let mut working_dst1 = BlurImageMut::alloc(width, height, FastBlurChannels::Plane);
let mut working_dst2 = BlurImageMut::alloc(width, height, FastBlurChannels::Plane);
let ref0 = in_image0.to_immutable_ref();
let ref1 = in_image1.to_immutable_ref();
let ref2 = in_image2.to_immutable_ref();
pool.scope(|s| {
s.spawn(|_| {
fast_bilateral_filter_impl(
&ref0,
working_dst0.data.borrow_mut(),
spatial_sigma,
range_sigma,
);
});
s.spawn(|_| {
fast_bilateral_filter_impl(
&ref1,
working_dst1.data.borrow_mut(),
spatial_sigma,
range_sigma,
);
});
s.spawn(|_| {
fast_bilateral_filter_impl(
&ref2,
working_dst2.data.borrow_mut(),
spatial_sigma,
range_sigma,
);
});
});
let dst_stride = dst.row_stride() as usize;
dst.data
.borrow_mut()
.chunks_exact_mut(dst_stride)
.zip(working_dst0.data.borrow().chunks_exact(width as usize))
.zip(working_dst1.data.borrow().chunks_exact(width as usize))
.zip(working_dst2.data.borrow().chunks_exact(width as usize))
.for_each(|(((dst, src0), src1), src2)| {
for (((dst, src0), src1), src2) in dst
.as_chunks_mut::<3>()
.0
.iter_mut()
.zip(src0.iter())
.zip(src1.iter())
.zip(src2)
{
dst[0] = V::from_bilinear_f32(*src0);
dst[1] = V::from_bilinear_f32(*src1);
dst[2] = V::from_bilinear_f32(*src2);
}
});
Ok(())
})
}
fn fast_bilateral_filter_rgba_impl<
V: Copy + Default + 'static + BilinearWorkingItem<V> + Sync + Send + Debug,
>(
img: &BlurImage<V>,
dst: &mut BlurImageMut<V>,
spatial_sigma: f32,
range_sigma: f32,
pool: &ThreadPool,
) -> Result<(), BlurError> {
pool.install(|| {
img.check_layout()?;
dst.check_layout(Some(img))?;
img.size_matches_mut(dst)?;
let width = img.width;
let height = img.height;
assert!(
!(spatial_sigma <= 0. || range_sigma <= 0.0),
"Spatial sigma and range sigma must be more than 0"
);
let mut in_image0 = BlurImageMut::alloc(width, height, FastBlurChannels::Plane);
let mut in_image1 = BlurImageMut::alloc(width, height, FastBlurChannels::Plane);
let mut in_image2 = BlurImageMut::alloc(width, height, FastBlurChannels::Plane);
let mut in_image3 = BlurImageMut::alloc(width, height, FastBlurChannels::Plane);
for ((((dst0, dst1), dst2), dst3), src) in in_image0
.data
.borrow_mut()
.chunks_exact_mut(width as usize)
.zip(in_image1.data.borrow_mut().chunks_exact_mut(width as usize))
.zip(in_image2.data.borrow_mut().chunks_exact_mut(width as usize))
.zip(in_image3.data.borrow_mut().chunks_exact_mut(width as usize))
.zip(img.data.chunks_exact(img.row_stride() as usize))
{
for ((((r, g), b), a), src) in dst0
.iter_mut()
.zip(dst1.iter_mut())
.zip(dst2.iter_mut())
.zip(dst3.iter_mut())
.zip(src.as_chunks::<4>().0.iter())
{
*r = src[0].to_bilinear_f32();
*g = src[1].to_bilinear_f32();
*b = src[2].to_bilinear_f32();
*a = src[3].to_bilinear_f32();
}
}
let mut working_dst0 = BlurImageMut::alloc(width, height, FastBlurChannels::Plane);
let mut working_dst1 = BlurImageMut::alloc(width, height, FastBlurChannels::Plane);
let mut working_dst2 = BlurImageMut::alloc(width, height, FastBlurChannels::Plane);
let mut working_dst3 = BlurImageMut::alloc(width, height, FastBlurChannels::Plane);
let ref0 = in_image0.to_immutable_ref();
let ref1 = in_image1.to_immutable_ref();
let ref2 = in_image2.to_immutable_ref();
let ref3 = in_image3.to_immutable_ref();
pool.scope(|s| {
s.spawn(|_| {
fast_bilateral_filter_impl(
&ref0,
working_dst0.data.borrow_mut(),
spatial_sigma,
range_sigma,
);
});
s.spawn(|_| {
fast_bilateral_filter_impl(
&ref1,
working_dst1.data.borrow_mut(),
spatial_sigma,
range_sigma,
);
});
s.spawn(|_| {
fast_bilateral_filter_impl(
&ref2,
working_dst2.data.borrow_mut(),
spatial_sigma,
range_sigma,
);
});
s.spawn(|_| {
fast_bilateral_filter_impl(
&ref3,
working_dst3.data.borrow_mut(),
spatial_sigma,
range_sigma,
);
})
});
let dst_stride = dst.row_stride() as usize;
dst.data
.borrow_mut()
.chunks_exact_mut(dst_stride)
.zip(working_dst0.data.borrow().chunks_exact(width as usize))
.zip(working_dst1.data.borrow().chunks_exact(width as usize))
.zip(working_dst2.data.borrow().chunks_exact(width as usize))
.zip(working_dst3.data.borrow().chunks_exact(width as usize))
.for_each(|((((dst, src0), src1), src2), src3)| {
for ((((dst, src0), src1), src2), src3) in dst
.as_chunks_mut::<4>()
.0
.iter_mut()
.zip(src0.iter())
.zip(src1.iter())
.zip(src2.iter())
.zip(src3.iter())
{
dst[0] = V::from_bilinear_f32(*src0);
dst[1] = V::from_bilinear_f32(*src1);
dst[2] = V::from_bilinear_f32(*src2);
dst[3] = V::from_bilinear_f32(*src3);
}
});
Ok(())
})
}
pub fn fast_bilateral_filter(
src: &BlurImage<u8>,
dst: &mut BlurImageMut<u8>,
spatial_sigma: f32,
range_sigma: f32,
threading_policy: ThreadingPolicy,
) -> Result<(), BlurError> {
src.check_layout()?;
dst.check_layout(Some(src))?;
src.size_matches_mut(dst)?;
let channels = src.channels;
let thread_count = threading_policy.thread_count(src.width, src.height) as u32;
let pool = rayon::ThreadPoolBuilder::new()
.num_threads(thread_count as usize)
.build()
.unwrap();
match channels {
FastBlurChannels::Plane => {
fast_bilateral_filter_plane_impl(src, dst, spatial_sigma, range_sigma, &pool)?;
}
FastBlurChannels::Channels3 => {
fast_bilateral_filter_rgb_impl(src, dst, spatial_sigma, range_sigma, &pool)?;
}
FastBlurChannels::Channels4 => {
fast_bilateral_filter_rgba_impl(src, dst, spatial_sigma, range_sigma, &pool)?;
}
}
Ok(())
}
pub fn fast_bilateral_filter_u16(
src: &BlurImage<u16>,
dst: &mut BlurImageMut<u16>,
spatial_sigma: f32,
range_sigma: f32,
threading_policy: ThreadingPolicy,
) -> Result<(), BlurError> {
src.check_layout()?;
dst.check_layout(Some(src))?;
src.size_matches_mut(dst)?;
let channels = src.channels;
let thread_count = threading_policy.thread_count(src.width, src.height) as u32;
let pool = rayon::ThreadPoolBuilder::new()
.num_threads(thread_count as usize)
.build()
.unwrap();
match channels {
FastBlurChannels::Plane => {
fast_bilateral_filter_plane_impl(src, dst, spatial_sigma, range_sigma, &pool)?;
}
FastBlurChannels::Channels3 => {
fast_bilateral_filter_rgb_impl(src, dst, spatial_sigma, range_sigma, &pool)?;
}
FastBlurChannels::Channels4 => {
fast_bilateral_filter_rgba_impl(src, dst, spatial_sigma, range_sigma, &pool)?;
}
}
Ok(())
}
pub fn fast_bilateral_filter_f32(
src: &BlurImage<f32>,
dst: &mut BlurImageMut<f32>,
spatial_sigma: f32,
range_sigma: f32,
threading_policy: ThreadingPolicy,
) -> Result<(), BlurError> {
src.check_layout()?;
dst.check_layout(Some(src))?;
src.size_matches_mut(dst)?;
let channels = src.channels;
let thread_count = threading_policy.thread_count(src.width, src.height) as u32;
let pool = rayon::ThreadPoolBuilder::new()
.num_threads(thread_count as usize)
.build()
.unwrap();
match channels {
FastBlurChannels::Plane => {
fast_bilateral_filter_plane_impl(src, dst, spatial_sigma, range_sigma, &pool)?;
}
FastBlurChannels::Channels3 => {
fast_bilateral_filter_rgb_impl(src, dst, spatial_sigma, range_sigma, &pool)?;
}
FastBlurChannels::Channels4 => {
fast_bilateral_filter_rgba_impl(src, dst, spatial_sigma, range_sigma, &pool)?;
}
}
Ok(())
}