use scirs2_core::ndarray::{Array1, Array2, ArrayView1, ArrayView2, Dimension};
use scirs2_core::numeric::{Float, FromPrimitive};
use scirs2_core::parallel_ops::*;
use scirs2_core::simd_ops::SimdUnifiedOps;
use std::fmt::Debug;
use super::BorderMode;
use crate::error::NdimageResult;
pub trait OptimizedBoundaryOps<T> {
fn get_constant(&self, indices: &[isize], cval: T) -> T;
fn get_nearest(&self, indices: &[isize]) -> T;
fn get_reflect(&self, indices: &[isize]) -> T;
fn get_mirror(&self, indices: &[isize]) -> T;
fn get_wrap(&self, indices: &[isize]) -> T;
}
pub struct Boundary1D<'a, T> {
data: &'a ArrayView1<'a, T>,
len: isize,
}
impl<'a, T: Float + FromPrimitive + Debug + Clone> Boundary1D<'a, T> {
pub fn new(data: &'a ArrayView1<'a, T>) -> Self {
Self {
data,
len: data.len() as isize,
}
}
#[inline(always)]
fn clamp_index(&self, idx: isize) -> usize {
idx.clamp(0, self.len - 1) as usize
}
#[inline(always)]
fn reflect_index(&self, mut idx: isize) -> usize {
if idx < 0 {
idx = -idx;
}
if idx >= self.len {
idx = 2 * self.len - idx - 2;
}
self.clamp_index(idx)
}
#[inline(always)]
fn mirror_index(&self, mut idx: isize) -> usize {
while idx < 0 {
idx = -idx - 1;
}
while idx >= self.len {
idx = 2 * self.len - idx - 1;
}
self.clamp_index(idx)
}
#[inline(always)]
fn wrap_index(&self, idx: isize) -> usize {
(((idx % self.len) + self.len) % self.len) as usize
}
}
impl<'a, T: Float + FromPrimitive + Debug + Clone> OptimizedBoundaryOps<T> for Boundary1D<'a, T> {
#[inline(always)]
fn get_constant(&self, indices: &[isize], cval: T) -> T {
let idx = indices[0];
if idx >= 0 && idx < self.len {
self.data[idx as usize]
} else {
cval
}
}
#[inline(always)]
fn get_nearest(&self, indices: &[isize]) -> T {
self.data[self.clamp_index(indices[0])]
}
#[inline(always)]
fn get_reflect(&self, indices: &[isize]) -> T {
self.data[self.reflect_index(indices[0])]
}
#[inline(always)]
fn get_mirror(&self, indices: &[isize]) -> T {
self.data[self.mirror_index(indices[0])]
}
#[inline(always)]
fn get_wrap(&self, indices: &[isize]) -> T {
self.data[self.wrap_index(indices[0])]
}
}
pub struct Boundary2D<'a, T> {
data: &'a ArrayView2<'a, T>,
shape: [isize; 2],
}
impl<'a, T: Float + FromPrimitive + Debug + Clone> Boundary2D<'a, T> {
pub fn new(data: &'a ArrayView2<'a, T>) -> Self {
let (h, w) = data.dim();
Self {
data,
shape: [h as isize, w as isize],
}
}
#[inline(always)]
fn clamp_indices(&self, i: isize, j: isize) -> (usize, usize) {
(
i.clamp(0, self.shape[0] - 1) as usize,
j.clamp(0, self.shape[1] - 1) as usize,
)
}
#[inline(always)]
fn reflect_indices(&self, mut i: isize, mut j: isize) -> (usize, usize) {
if i < 0 {
i = -i;
}
if i >= self.shape[0] {
i = 2 * self.shape[0] - i - 2;
}
if j < 0 {
j = -j;
}
if j >= self.shape[1] {
j = 2 * self.shape[1] - j - 2;
}
self.clamp_indices(i, j)
}
#[inline(always)]
fn mirror_indices(&self, mut i: isize, mut j: isize) -> (usize, usize) {
while i < 0 {
i = -i - 1;
}
while i >= self.shape[0] {
i = 2 * self.shape[0] - i - 1;
}
while j < 0 {
j = -j - 1;
}
while j >= self.shape[1] {
j = 2 * self.shape[1] - j - 1;
}
self.clamp_indices(i, j)
}
#[inline(always)]
fn wrap_indices(&self, i: isize, j: isize) -> (usize, usize) {
(
(((i % self.shape[0]) + self.shape[0]) % self.shape[0]) as usize,
(((j % self.shape[1]) + self.shape[1]) % self.shape[1]) as usize,
)
}
}
impl<'a, T: Float + FromPrimitive + Debug + Clone> OptimizedBoundaryOps<T> for Boundary2D<'a, T> {
#[inline(always)]
fn get_constant(&self, indices: &[isize], cval: T) -> T {
let i = indices[0];
let j = indices[1];
if i >= 0 && i < self.shape[0] && j >= 0 && j < self.shape[1] {
self.data[[i as usize, j as usize]]
} else {
cval
}
}
#[inline(always)]
fn get_nearest(&self, indices: &[isize]) -> T {
let (i, j) = self.clamp_indices(indices[0], indices[1]);
self.data[[i, j]]
}
#[inline(always)]
fn get_reflect(&self, indices: &[isize]) -> T {
let (i, j) = self.reflect_indices(indices[0], indices[1]);
self.data[[i, j]]
}
#[inline(always)]
fn get_mirror(&self, indices: &[isize]) -> T {
let (i, j) = self.mirror_indices(indices[0], indices[1]);
self.data[[i, j]]
}
#[inline(always)]
fn get_wrap(&self, indices: &[isize]) -> T {
let (i, j) = self.wrap_indices(indices[0], indices[1]);
self.data[[i, j]]
}
}
#[allow(dead_code)]
pub fn convolve2d_optimized<T>(
input: &Array2<T>,
kernel: &Array2<T>,
mode: BorderMode,
cval: Option<T>,
) -> NdimageResult<Array2<T>>
where
T: Float + FromPrimitive + Debug + Clone + Send + Sync + 'static,
{
let (h, w) = input.dim();
let (kh, kw) = kernel.dim();
let kh_half = (kh / 2) as isize;
let kw_half = (kw / 2) as isize;
let cval = cval.unwrap_or_else(T::zero);
let mut output = Array2::zeros((h, w));
let use_parallel = h * w > 10000;
if use_parallel {
let rows: Vec<usize> = (0..h).collect();
let process_row = |&i: &usize| -> Result<Vec<T>, scirs2_core::CoreError> {
let view = input.view();
let boundary = Boundary2D::new(&view);
let mut row_result = Vec::with_capacity(w);
for j in 0..w {
let mut sum = T::zero();
for ki in 0..kh {
for kj in 0..kw {
let ii = i as isize + ki as isize - kh_half;
let jj = j as isize + kj as isize - kw_half;
let val = match mode {
BorderMode::Constant => boundary.get_constant(&[ii, jj], cval),
BorderMode::Nearest => boundary.get_nearest(&[ii, jj]),
BorderMode::Reflect => boundary.get_reflect(&[ii, jj]),
BorderMode::Mirror => boundary.get_mirror(&[ii, jj]),
BorderMode::Wrap => boundary.get_wrap(&[ii, jj]),
};
sum = sum + val * kernel[[kh - ki - 1, kw - kj - 1]];
}
}
row_result.push(sum);
}
Ok(row_result)
};
let results = parallel_map_result(&rows, process_row)?;
for (i, row_data) in results.into_iter().enumerate() {
for (j, val) in row_data.into_iter().enumerate() {
output[[i, j]] = val;
}
}
} else {
let view = input.view();
let boundary = Boundary2D::new(&view);
for i in 0..h {
for j in 0..w {
let mut sum = T::zero();
for ki in 0..kh {
for kj in 0..kw {
let ii = i as isize + ki as isize - kh_half;
let jj = j as isize + kj as isize - kw_half;
let val = match mode {
BorderMode::Constant => boundary.get_constant(&[ii, jj], cval),
BorderMode::Nearest => boundary.get_nearest(&[ii, jj]),
BorderMode::Reflect => boundary.get_reflect(&[ii, jj]),
BorderMode::Mirror => boundary.get_mirror(&[ii, jj]),
BorderMode::Wrap => boundary.get_wrap(&[ii, jj]),
};
sum = sum + val * kernel[[kh - ki - 1, kw - kj - 1]];
}
}
output[[i, j]] = sum;
}
}
}
Ok(output)
}
#[allow(dead_code)]
pub fn convolve1d_optimized<T>(
input: &Array1<T>,
kernel: &Array1<T>,
mode: BorderMode,
cval: Option<T>,
) -> NdimageResult<Array1<T>>
where
T: Float + FromPrimitive + Debug + Clone + Send + Sync + SimdUnifiedOps + 'static,
{
let n = input.len();
let k = kernel.len();
let k_half = (k / 2) as isize;
let cval = cval.unwrap_or_else(T::zero);
let mut output = Array1::zeros(n);
let view = input.view();
let boundary = Boundary1D::new(&view);
if n > 32 && T::simd_available() {
let chunk_size = 256;
for chunk_start in (0..n).step_by(chunk_size) {
let chunk_end = (chunk_start + chunk_size).min(n);
for i in chunk_start..chunk_end {
let mut sum = T::zero();
for ki in 0..k {
let ii = i as isize + ki as isize - k_half;
let val = match mode {
BorderMode::Constant => boundary.get_constant(&[ii], cval),
BorderMode::Nearest => boundary.get_nearest(&[ii]),
BorderMode::Reflect => boundary.get_reflect(&[ii]),
BorderMode::Mirror => boundary.get_mirror(&[ii]),
BorderMode::Wrap => boundary.get_wrap(&[ii]),
};
sum = sum + val * kernel[k - ki - 1];
}
output[i] = sum;
}
}
} else {
for i in 0..n {
let mut sum = T::zero();
for ki in 0..k {
let ii = i as isize + ki as isize - k_half;
let val = match mode {
BorderMode::Constant => boundary.get_constant(&[ii], cval),
BorderMode::Nearest => boundary.get_nearest(&[ii]),
BorderMode::Reflect => boundary.get_reflect(&[ii]),
BorderMode::Mirror => boundary.get_mirror(&[ii]),
BorderMode::Wrap => boundary.get_wrap(&[ii]),
};
sum = sum + val * kernel[k - ki - 1];
}
output[i] = sum;
}
}
Ok(output)
}
#[allow(dead_code)]
pub fn apply_filter2d_optimized<T, F>(
input: &Array2<T>,
kernelshape: (usize, usize),
mode: BorderMode,
cval: Option<T>,
mut filter_fn: F,
) -> NdimageResult<Array2<T>>
where
T: Float + FromPrimitive + Debug + Clone + Send + Sync + 'static,
F: FnMut(&Boundary2D<T>, usize, usize, (usize, usize), (isize, isize)) -> T
+ Clone
+ Send
+ Sync,
{
let (h, w) = input.dim();
let (kh, kw) = kernelshape;
let kh_half = (kh / 2) as isize;
let kw_half = (kw / 2) as isize;
let mut output = Array2::zeros((h, w));
let view = input.view();
let boundary = Boundary2D::new(&view);
if h * w > 10000 {
let rows: Vec<usize> = (0..h).collect();
let process_row = |&i: &usize| -> Result<Vec<T>, scirs2_core::CoreError> {
let view = input.view();
let boundary = Boundary2D::new(&view);
let mut filter_fn_clone = filter_fn.clone();
let mut row_result = Vec::with_capacity(w);
for j in 0..w {
let val = filter_fn_clone(&boundary, i, j, kernelshape, (kh_half, kw_half));
row_result.push(val);
}
Ok(row_result)
};
let results = parallel_map_result(&rows, process_row)?;
for (i, row_data) in results.into_iter().enumerate() {
for (j, val) in row_data.into_iter().enumerate() {
output[[i, j]] = val;
}
}
} else {
for i in 0..h {
for j in 0..w {
output[[i, j]] = filter_fn(&boundary, i, j, kernelshape, (kh_half, kw_half));
}
}
}
Ok(output)
}
#[cfg(test)]
mod tests {
use super::*;
use scirs2_core::ndarray::{arr1, arr2};
#[test]
fn test_boundary1d_modes() {
let data = arr1(&[1.0, 2.0, 3.0, 4.0]);
let view = data.view();
let boundary = Boundary1D::new(&view);
assert_eq!(boundary.get_constant(&[-1], 0.0), 0.0);
assert_eq!(boundary.get_constant(&[0], 0.0), 1.0);
assert_eq!(boundary.get_constant(&[4], 0.0), 0.0);
assert_eq!(boundary.get_nearest(&[-1]), 1.0);
assert_eq!(boundary.get_nearest(&[0]), 1.0);
assert_eq!(boundary.get_nearest(&[4]), 4.0);
assert_eq!(boundary.get_wrap(&[-1]), 4.0);
assert_eq!(boundary.get_wrap(&[4]), 1.0);
}
#[test]
fn test_convolve2d_optimized() {
let input = arr2(&[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]);
let kernel = arr2(&[[1.0, 0.0], [0.0, 1.0]]);
let result = convolve2d_optimized(&input, &kernel, BorderMode::Constant, Some(0.0))
.expect("Operation failed");
assert_eq!(result.shape(), input.shape());
assert_eq!(result[[0, 0]], 1.0);
assert_eq!(result[[1, 1]], 6.0);
}
}