use crate::edge_mode::{clamp_edge, reflect_index, reflect_index_101};
use crate::filter1d::arena_roi::copy_roi;
use crate::filter1d::filter_element::KernelShape;
use crate::img_size::ImageSize;
use crate::primitives::PrimitiveCast;
use crate::util::check_slice_size;
use crate::{BlurError, BlurImage, EdgeMode, EdgeMode2D, Scalar};
use num_traits::AsPrimitive;
use std::fmt::Debug;
#[derive(Copy, Clone)]
pub struct Arena {
pub width: usize,
#[allow(dead_code)]
pub height: usize,
pub pad_w: usize,
pub pad_h: usize,
pub components: usize,
}
impl Arena {
pub fn new(
arena_width: usize,
arena_height: usize,
arena_pad_w: usize,
arena_pad_h: usize,
components: usize,
) -> Arena {
Arena {
width: arena_width,
height: arena_height,
pad_w: arena_pad_w,
pad_h: arena_pad_h,
components,
}
}
}
#[derive(Copy, Clone)]
pub(crate) struct ArenaPads {
pub pad_left: usize,
pub pad_top: usize,
pub pad_right: usize,
pub pad_bottom: usize,
}
impl ArenaPads {
pub(crate) fn constant(v: usize) -> ArenaPads {
ArenaPads::new(v, v, v, v)
}
pub(crate) fn new(
pad_left: usize,
pad_top: usize,
pad_right: usize,
pad_bottom: usize,
) -> ArenaPads {
ArenaPads {
pad_left,
pad_top,
pad_right,
pad_bottom,
}
}
pub(crate) fn from_kernel_shape(kernel_shape: KernelShape) -> ArenaPads {
let pad_w = kernel_shape.width / 2;
let pad_h = kernel_shape.height / 2;
ArenaPads::new(pad_w, pad_h, pad_w, pad_h)
}
}
pub(crate) fn make_arena<T, const CN: usize>(
image: &[T],
image_stride: usize,
image_size: ImageSize,
pads: ArenaPads,
edge_modes: EdgeMode2D,
scalar: Scalar,
) -> Result<(Vec<T>, Arena), BlurError>
where
T: Default + Copy + Send + Sync + 'static,
f64: AsPrimitive<T>,
{
#[cfg(all(any(target_arch = "x86_64", target_arch = "x86"), feature = "avx"))]
{
if std::arch::is_x86_feature_detected!("avx2") {
return unsafe {
make_arena_avx2::<T, CN>(image, image_stride, image_size, pads, edge_modes, scalar)
};
}
}
#[cfg(all(any(target_arch = "x86_64", target_arch = "x86"), feature = "sse"))]
{
if std::arch::is_x86_feature_detected!("sse4.1") {
return unsafe {
make_arena_sse4_1::<T, CN>(
image,
image_stride,
image_size,
pads,
edge_modes,
scalar,
)
};
}
}
make_arena_exec::<T, CN>(image, image_stride, image_size, pads, edge_modes, scalar)
}
#[cfg(all(any(target_arch = "x86_64", target_arch = "x86"), feature = "avx"))]
#[target_feature(enable = "avx2")]
unsafe fn make_arena_avx2<T, const CN: usize>(
image: &[T],
image_stride: usize,
image_size: ImageSize,
pads: ArenaPads,
edge_modes: EdgeMode2D,
scalar: Scalar,
) -> Result<(Vec<T>, Arena), BlurError>
where
T: Default + Copy + Send + Sync + 'static,
f64: AsPrimitive<T>,
{
make_arena_exec::<T, CN>(image, image_stride, image_size, pads, edge_modes, scalar)
}
#[cfg(all(any(target_arch = "x86_64", target_arch = "x86"), feature = "sse"))]
#[target_feature(enable = "sse4.1")]
unsafe fn make_arena_sse4_1<T, const CN: usize>(
image: &[T],
image_stride: usize,
image_size: ImageSize,
pads: ArenaPads,
edge_modes: EdgeMode2D,
scalar: Scalar,
) -> Result<(Vec<T>, Arena), BlurError>
where
T: Default + Copy + Send + Sync + 'static,
f64: AsPrimitive<T>,
{
make_arena_exec::<T, CN>(image, image_stride, image_size, pads, edge_modes, scalar)
}
#[inline(always)]
fn make_arena_exec<T, const CN: usize>(
image: &[T],
image_stride: usize,
image_size: ImageSize,
pads: ArenaPads,
edge_modes: EdgeMode2D,
scalar: Scalar,
) -> Result<(Vec<T>, Arena), BlurError>
where
T: Default + Copy + Send + Sync + 'static,
f64: AsPrimitive<T>,
{
check_slice_size(image, image_stride, image_size.width, image_size.height, CN)?;
let new_height = image_size.height + pads.pad_top + pads.pad_bottom;
let new_width = image_size.width + pads.pad_left + pads.pad_right;
let height = image_size.height;
let width = image_size.width;
let mut padded_image = vec![T::default(); new_height * new_width * CN];
let old_stride = image_stride;
let new_stride = new_width * CN;
let offset = pads.pad_top * new_stride + pads.pad_left * CN;
copy_roi(
&mut padded_image[offset..],
image,
new_stride,
old_stride,
width * CN,
);
let filling_ranges = [
(0..pads.pad_top, 0..new_width), (
pads.pad_top..(new_height - pads.pad_bottom),
0..pads.pad_left,
), ((height + pads.pad_top)..new_height, 0..new_width), (
pads.pad_top..(new_height - pads.pad_bottom),
(width + pads.pad_left)..new_width,
), ];
let pad_w = pads.pad_left;
let pad_h = pads.pad_top;
if edge_modes.horizontal != EdgeMode::Constant && edge_modes.vertical != EdgeMode::Constant {
for ranges in filling_ranges.iter() {
for (i, dst) in ranges.0.clone().zip(
padded_image
.chunks_exact_mut(new_stride)
.skip(ranges.0.start),
) {
for (j, dst) in ranges
.1
.clone()
.zip(dst.as_chunks_mut::<CN>().0.iter_mut().skip(ranges.1.start))
{
let y = clamp_edge!(
edge_modes.vertical,
i as i64 - pad_h as i64,
0,
height as i64
);
let x = clamp_edge!(
edge_modes.horizontal,
j as i64 - pad_w as i64,
0,
width as i64
);
let v_src = y * old_stride + x * CN;
let src_iter = &image[v_src..(v_src + CN)];
for (dst, src) in dst.iter_mut().zip(src_iter.iter()) {
*dst = *src;
}
}
}
}
} else if edge_modes.vertical != EdgeMode::Constant
&& edge_modes.horizontal == EdgeMode::Constant
{
for ranges in filling_ranges.iter() {
for (i, dst) in ranges.0.clone().zip(
padded_image
.chunks_exact_mut(new_stride)
.skip(ranges.0.start),
) {
for (j, dst) in ranges
.1
.clone()
.zip(dst.as_chunks_mut::<CN>().0.iter_mut().skip(ranges.1.start))
{
let y = clamp_edge!(
edge_modes.vertical,
i as i64 - pad_h as i64,
0,
height as i64
);
let x = j as i64 - pad_w as i64;
if x < 0 || x >= width as i64 {
for (y, dst) in dst.iter_mut().enumerate() {
*dst = scalar[y].as_();
}
} else {
let v_src = y * old_stride + (x as usize) * CN;
let src_iter = &image[v_src..(v_src + CN)];
for (dst, src) in dst.iter_mut().zip(src_iter.iter()) {
*dst = *src;
}
}
}
}
}
} else if edge_modes.vertical == EdgeMode::Constant
&& edge_modes.horizontal != EdgeMode::Constant
{
for ranges in filling_ranges.iter() {
for (i, dst) in ranges.0.clone().zip(
padded_image
.chunks_exact_mut(new_stride)
.skip(ranges.0.start),
) {
for (j, dst) in ranges
.1
.clone()
.zip(dst.as_chunks_mut::<CN>().0.iter_mut().skip(ranges.1.start))
{
let y = i as i64 - pad_h as i64;
let x = clamp_edge!(
edge_modes.horizontal,
j as i64 - pad_w as i64,
0,
width as i64
);
if y < 0 || y >= height as i64 {
for (y, dst) in dst.iter_mut().enumerate() {
*dst = scalar[y].as_();
}
} else {
let v_src = (y as usize) * old_stride + x * CN;
let src_iter = &image[v_src..(v_src + CN)];
for (dst, src) in dst.iter_mut().zip(src_iter.iter()) {
*dst = *src;
}
}
}
}
}
} else {
for ranges in filling_ranges.iter() {
for (_, dst) in ranges.0.clone().zip(
padded_image
.chunks_exact_mut(new_stride)
.skip(ranges.0.start),
) {
for (_, dst) in ranges
.1
.clone()
.zip(dst.as_chunks_mut::<CN>().0.iter_mut().skip(ranges.1.start))
{
for (y, dst) in dst.iter_mut().enumerate() {
*dst = scalar[y].as_();
}
}
}
}
}
Ok((
padded_image,
Arena::new(new_width, new_height, pad_w, pad_h, CN),
))
}
pub fn make_arena_row<T, const CN: usize>(
image: &BlurImage<T>,
source_y: usize,
kernel_size: KernelShape,
border_mode: EdgeMode,
scalar: Scalar,
) -> Result<(Vec<T>, usize), BlurError>
where
T: Default + Copy + Send + Sync + 'static + Debug,
f64: PrimitiveCast<T>,
{
image.check_layout()?;
let pad_w = kernel_size.width / 2;
let image_size = image.size();
let arena_width = image_size.width * CN + pad_w * 2 * CN;
let mut row = vec![T::default(); arena_width];
write_arena_row::<T, CN>(&mut row, image, source_y, kernel_size, border_mode, scalar)?;
Ok((row, image_size.width + pad_w * 2))
}
pub(crate) fn write_arena_row<T, const CN: usize>(
row: &mut [T],
image: &BlurImage<T>,
source_y: usize,
kernel_size: KernelShape,
border_mode: EdgeMode,
scalar: Scalar,
) -> Result<(), BlurError>
where
T: Default + Copy + Send + Sync + 'static + Debug,
f64: PrimitiveCast<T>,
{
image.check_layout()?;
let pad_w = kernel_size.width / 2;
let image_size = image.size();
let arena_width = image_size.width * CN + pad_w * 2 * CN;
if row.len() < arena_width {
return Err(BlurError::ImagesMustMatch);
}
let source_offset = source_y * image.row_stride() as usize;
let source_row = &image.data.as_ref()[source_offset..(source_offset + image_size.width * CN)];
let row_dst = &mut row[pad_w * CN..(pad_w * CN + image_size.width * CN)];
for (dst, src) in row_dst.iter_mut().zip(source_row.iter()) {
*dst = *src;
}
for (x, dst) in (0..pad_w).zip(row.chunks_exact_mut(CN)) {
match border_mode {
EdgeMode::Clamp => {
let old_x = x.saturating_sub(pad_w).min(image_size.width - 1);
let old_px = old_x * CN;
let src_iter = &source_row[old_px..(old_px + CN)];
for (dst, src) in dst.iter_mut().zip(src_iter.iter()) {
*dst = *src;
}
}
EdgeMode::Wrap => {
let old_x = (x as i64 - pad_w as i64).rem_euclid(image_size.width as i64) as usize;
let old_px = old_x * CN;
let src_iter = &source_row[old_px..(old_px + CN)];
for (dst, src) in dst.iter_mut().zip(src_iter.iter()) {
*dst = *src;
}
}
EdgeMode::Reflect => {
let old_x = reflect_index(x as isize - pad_w as isize, image_size.width as isize);
let old_px = old_x * CN;
let src_iter = &source_row[old_px..(old_px + CN)];
for (dst, src) in dst.iter_mut().zip(src_iter.iter()) {
*dst = *src;
}
}
EdgeMode::Reflect101 => {
let old_x =
reflect_index_101(x as isize - pad_w as isize, image_size.width as isize);
let old_px = old_x * CN;
let src_iter = &source_row[old_px..(old_px + CN)];
for (dst, src) in dst.iter_mut().zip(src_iter.iter()) {
*dst = *src;
}
}
EdgeMode::Constant => {
for (i, dst) in dst.iter_mut().enumerate() {
*dst = scalar[i].cast_();
}
}
}
}
for (x, dst) in
(image_size.width..(image_size.width + pad_w)).zip(row.chunks_exact_mut(CN).rev())
{
match border_mode {
EdgeMode::Clamp => {
let old_x = x.min(image_size.width - 1);
let old_px = old_x * CN;
let src_iter = &source_row[old_px..(old_px + CN)];
for (dst, src) in dst.iter_mut().zip(src_iter.iter()) {
*dst = *src;
}
}
EdgeMode::Wrap => {
let old_x = (x as i64).rem_euclid(image_size.width as i64) as usize;
let old_px = old_x * CN;
let src_iter = &source_row[old_px..(old_px + CN)];
for (dst, src) in dst.iter_mut().zip(src_iter.iter()) {
*dst = *src;
}
}
EdgeMode::Reflect => {
let old_x = reflect_index(x as isize, image_size.width as isize);
let old_px = old_x * CN;
let src_iter = &source_row[old_px..(old_px + CN)];
for (dst, src) in dst.iter_mut().zip(src_iter.iter()) {
*dst = *src;
}
}
EdgeMode::Reflect101 => {
let old_x = reflect_index_101(x as isize, image_size.width as isize);
let old_px = old_x * CN;
let src_iter = &source_row[old_px..(old_px + CN)];
for (dst, src) in dst.iter_mut().zip(src_iter.iter()) {
*dst = *src;
}
}
EdgeMode::Constant => {
for (i, dst) in dst.iter_mut().enumerate() {
*dst = scalar[i].cast_();
}
}
}
}
Ok(())
}
#[derive(Clone)]
pub struct ArenaColumns<T>
where
T: Copy,
{
pub top_pad: Vec<T>,
pub bottom_pad: Vec<T>,
}
impl<T> ArenaColumns<T>
where
T: Copy,
{
pub fn new(top_pad: Vec<T>, bottom_pad: Vec<T>) -> ArenaColumns<T> {
ArenaColumns {
top_pad,
bottom_pad,
}
}
}
pub(crate) fn make_arena_columns<T, const CN: usize>(
image: &[T],
image_size: ImageSize,
kernel_size: KernelShape,
border_mode: EdgeMode,
scalar_projection: [T; CN],
) -> Result<ArenaColumns<T>, BlurError>
where
T: Default + Copy + Send + Sync + 'static,
{
#[cfg(all(any(target_arch = "x86_64", target_arch = "x86"), feature = "avx"))]
{
if std::arch::is_x86_feature_detected!("avx2") {
return unsafe {
mac_avx2::<T, CN>(
image,
image_size,
kernel_size,
border_mode,
scalar_projection,
)
};
}
}
#[cfg(all(any(target_arch = "x86_64", target_arch = "x86"), feature = "sse"))]
{
if std::arch::is_x86_feature_detected!("sse4.1") {
return unsafe {
mac_sse_4_1::<T, CN>(
image,
image_size,
kernel_size,
border_mode,
scalar_projection,
)
};
}
}
make_arena_columns_exec::<T, CN>(
image,
image_size,
kernel_size,
border_mode,
scalar_projection,
)
}
#[cfg(all(any(target_arch = "x86_64", target_arch = "x86"), feature = "avx"))]
#[target_feature(enable = "avx2")]
unsafe fn mac_avx2<T, const CN: usize>(
image: &[T],
image_size: ImageSize,
kernel_size: KernelShape,
border_mode: EdgeMode,
scalar_projection: [T; CN],
) -> Result<ArenaColumns<T>, BlurError>
where
T: Default + Copy + Send + Sync + 'static,
{
make_arena_columns_exec::<T, CN>(
image,
image_size,
kernel_size,
border_mode,
scalar_projection,
)
}
#[cfg(all(any(target_arch = "x86_64", target_arch = "x86"), feature = "sse"))]
#[target_feature(enable = "sse4.1")]
unsafe fn mac_sse_4_1<T, const CN: usize>(
image: &[T],
image_size: ImageSize,
kernel_size: KernelShape,
border_mode: EdgeMode,
scalar_projection: [T; CN],
) -> Result<ArenaColumns<T>, BlurError>
where
T: Default + Copy + Send + Sync + 'static,
{
make_arena_columns_exec::<T, CN>(
image,
image_size,
kernel_size,
border_mode,
scalar_projection,
)
}
#[inline(always)]
fn make_arena_columns_exec<T, const CN: usize>(
image: &[T],
image_size: ImageSize,
kernel_size: KernelShape,
border_mode: EdgeMode,
scalar_projection: [T; CN],
) -> Result<ArenaColumns<T>, BlurError>
where
T: Default + Copy + Send + Sync + 'static,
{
check_slice_size(
image,
image_size.width * CN,
image_size.width,
image_size.height,
CN,
)?;
let pad_h = kernel_size.height / 2;
let mut top_pad = vec![T::default(); pad_h * image_size.width * CN];
let mut bottom_pad = vec![T::default(); pad_h * image_size.width * CN];
let top_pad_stride = image_size.width * CN;
for (ky, dst) in (0..pad_h).zip(top_pad.chunks_exact_mut(top_pad_stride)) {
for (kx, dst) in (0..image_size.width).zip(dst.chunks_exact_mut(CN)) {
match border_mode {
EdgeMode::Clamp => {
let y = ky.saturating_sub(pad_h).min(image_size.height - 1);
let v_src = y * top_pad_stride + kx * CN;
let src_iter = &image[v_src..(v_src + CN)];
for (dst, src) in dst.iter_mut().zip(src_iter.iter()) {
*dst = *src;
}
}
EdgeMode::Wrap => {
let y =
(ky as i64 - pad_h as i64).rem_euclid(image_size.height as i64) as usize;
let v_src = y * top_pad_stride + kx * CN;
let src_iter = &image[v_src..(v_src + CN)];
for (dst, src) in dst.iter_mut().zip(src_iter.iter()) {
*dst = *src;
}
}
EdgeMode::Reflect => {
let y = reflect_index(ky as isize - pad_h as isize, image_size.height as isize);
let v_src = y * top_pad_stride + kx * CN;
let src_iter = &image[v_src..(v_src + CN)];
for (dst, src) in dst.iter_mut().zip(src_iter.iter()) {
*dst = *src;
}
}
EdgeMode::Reflect101 => {
let y =
reflect_index_101(ky as isize - pad_h as isize, image_size.height as isize);
let v_src = y * top_pad_stride + kx * CN;
let src_iter = &image[v_src..(v_src + CN)];
for (dst, src) in dst.iter_mut().zip(src_iter.iter()) {
*dst = *src;
}
}
EdgeMode::Constant => {
for (i, dst) in dst.iter_mut().enumerate() {
*dst = scalar_projection[i];
}
}
}
}
}
let bottom_iter_dst = bottom_pad.chunks_exact_mut(top_pad_stride);
for (ky, dst) in (0..pad_h).zip(bottom_iter_dst) {
for (kx, dst) in (0..image_size.width).zip(dst.chunks_exact_mut(CN)) {
match border_mode {
EdgeMode::Clamp => {
let y = (ky + image_size.height).min(image_size.height - 1);
let v_src = y * top_pad_stride + kx * CN;
let src_iter = &image[v_src..(v_src + CN)];
for (dst, src) in dst.iter_mut().zip(src_iter.iter()) {
*dst = *src;
}
}
EdgeMode::Wrap => {
let y = (ky as i64 + image_size.height as i64)
.rem_euclid(image_size.height as i64) as usize;
let v_src = y * top_pad_stride + kx * CN;
let src_iter = &image[v_src..(v_src + CN)];
for (dst, src) in dst.iter_mut().zip(src_iter.iter()) {
*dst = *src;
}
}
EdgeMode::Reflect => {
let y = reflect_index(
ky as isize + image_size.height as isize,
image_size.height as isize,
);
let v_src = y * top_pad_stride + kx * CN;
let src_iter = &image[v_src..(v_src + CN)];
for (dst, src) in dst.iter_mut().zip(src_iter.iter()) {
*dst = *src;
}
}
EdgeMode::Reflect101 => {
let y = reflect_index_101(
ky as isize + image_size.height as isize,
image_size.height as isize,
);
let v_src = y * top_pad_stride + kx * CN;
let src_iter = &image[v_src..(v_src + CN)];
for (dst, src) in dst.iter_mut().zip(src_iter.iter()) {
*dst = *src;
}
}
EdgeMode::Constant => {
for (i, dst) in dst.iter_mut().enumerate() {
*dst = scalar_projection[i];
}
}
}
}
}
Ok(ArenaColumns::new(top_pad, bottom_pad))
}