#![allow(unsafe_code)]
use crate::error::{AlgorithmError, Result};
const GAUSSIAN_3X3: [f32; 9] = [
1.0 / 16.0,
2.0 / 16.0,
1.0 / 16.0,
2.0 / 16.0,
4.0 / 16.0,
2.0 / 16.0,
1.0 / 16.0,
2.0 / 16.0,
1.0 / 16.0,
];
const GAUSSIAN_3X3_ROW: [f32; 3] = [0.25, 0.5, 0.25];
const GAUSSIAN_3X3_COL: [f32; 3] = [0.25, 0.5, 0.25];
const SOBEL_X_3X3: [i16; 9] = [-1, 0, 1, -2, 0, 2, -1, 0, 1];
const SOBEL_Y_3X3: [i16; 9] = [-1, -2, -1, 0, 0, 0, 1, 2, 1];
const LAPLACIAN_3X3: [i16; 9] = [0, -1, 0, -1, 4, -1, 0, -1, 0];
#[cfg(target_arch = "aarch64")]
mod neon_impl {
use std::arch::aarch64::*;
#[target_feature(enable = "neon")]
pub(crate) unsafe fn separable_row_f32(
input: &[f32],
output: &mut [f32],
width: usize,
height: usize,
kernel: &[f32; 3],
) {
unsafe {
let k0 = vdupq_n_f32(kernel[0]);
let k1 = vdupq_n_f32(kernel[1]);
let k2 = vdupq_n_f32(kernel[2]);
for y in 0..height {
let row_off = y * width;
let interior_end = if width > 5 { width - 1 } else { 1 };
let chunks = (interior_end - 1) / 4;
for c in 0..chunks {
let x = 1 + c * 4;
if x + 3 < width - 1 {
let left = vld1q_f32(input.as_ptr().add(row_off + x - 1));
let center = vld1q_f32(input.as_ptr().add(row_off + x));
let right = vld1q_f32(input.as_ptr().add(row_off + x + 1));
let result =
vfmaq_f32(vfmaq_f32(vmulq_f32(left, k0), center, k1), right, k2);
vst1q_f32(output.as_mut_ptr().add(row_off + x), result);
}
}
let processed = 1 + chunks * 4;
for x in processed..interior_end {
if x > 0 && x < width - 1 {
output[row_off + x] = input[row_off + x - 1] * kernel[0]
+ input[row_off + x] * kernel[1]
+ input[row_off + x + 1] * kernel[2];
}
}
output[row_off] = input[row_off];
if width > 1 {
output[row_off + width - 1] = input[row_off + width - 1];
}
}
}
}
#[target_feature(enable = "neon")]
pub(crate) unsafe fn separable_col_f32(
input: &[f32],
output: &mut [f32],
width: usize,
height: usize,
kernel: &[f32; 3],
) {
unsafe {
let k0 = vdupq_n_f32(kernel[0]);
let k1 = vdupq_n_f32(kernel[1]);
let k2 = vdupq_n_f32(kernel[2]);
for y in 1..(height - 1) {
let row_above = (y - 1) * width;
let row_center = y * width;
let row_below = (y + 1) * width;
let chunks = width / 4;
for c in 0..chunks {
let x = c * 4;
let above = vld1q_f32(input.as_ptr().add(row_above + x));
let center = vld1q_f32(input.as_ptr().add(row_center + x));
let below = vld1q_f32(input.as_ptr().add(row_below + x));
let result = vfmaq_f32(vfmaq_f32(vmulq_f32(above, k0), center, k1), below, k2);
vst1q_f32(output.as_mut_ptr().add(row_center + x), result);
}
let rem_start = chunks * 4;
for x in rem_start..width {
output[row_center + x] = input[row_above + x] * kernel[0]
+ input[row_center + x] * kernel[1]
+ input[row_below + x] * kernel[2];
}
}
for x in 0..width {
output[x] = input[x];
output[(height - 1) * width + x] = input[(height - 1) * width + x];
}
}
}
#[target_feature(enable = "neon")]
pub(crate) unsafe fn sobel_magnitude(gx: &[i16], gy: &[i16], mag: &mut [u8]) {
unsafe {
let len = gx.len();
let chunks = len / 4;
for c in 0..chunks {
let off = c * 4;
let gx_i16 = vld1_s16(gx.as_ptr().add(off));
let gy_i16 = vld1_s16(gy.as_ptr().add(off));
let gx_i32 = vmovl_s16(gx_i16);
let gy_i32 = vmovl_s16(gy_i16);
let gx_f32 = vcvtq_f32_s32(gx_i32);
let gy_f32 = vcvtq_f32_s32(gy_i32);
let sum_sq = vfmaq_f32(vmulq_f32(gx_f32, gx_f32), gy_f32, gy_f32);
let magnitude = vsqrtq_f32(sum_sq);
let clamped = vminq_f32(vmaxq_f32(magnitude, vdupq_n_f32(0.0)), vdupq_n_f32(255.0));
let as_u32 = vcvtq_u32_f32(clamped);
mag[off] = vgetq_lane_u32(as_u32, 0) as u8;
mag[off + 1] = vgetq_lane_u32(as_u32, 1) as u8;
mag[off + 2] = vgetq_lane_u32(as_u32, 2) as u8;
mag[off + 3] = vgetq_lane_u32(as_u32, 3) as u8;
}
let rem = chunks * 4;
for i in rem..len {
let gx_f = f32::from(gx[i]);
let gy_f = f32::from(gy[i]);
let m = (gx_f * gx_f + gy_f * gy_f).sqrt();
mag[i] = m.clamp(0.0, 255.0) as u8;
}
}
}
}
pub fn gaussian_blur_3x3(
input: &[u8],
output: &mut [u8],
width: usize,
height: usize,
) -> Result<()> {
validate_buffer_size(input, output, width, height)?;
if width < 3 || height < 3 {
return Err(AlgorithmError::InvalidParameter {
parameter: "dimensions",
message: format!("Image too small for 3x3 filter: {}x{}", width, height),
});
}
#[cfg(target_arch = "aarch64")]
{
let size = width * height;
let mut f32_input: Vec<f32> = Vec::with_capacity(size);
for &v in input.iter() {
f32_input.push(f32::from(v));
}
let mut temp = vec![0.0_f32; size];
let mut f32_output = vec![0.0_f32; size];
unsafe {
neon_impl::separable_row_f32(&f32_input, &mut temp, width, height, &GAUSSIAN_3X3_ROW);
neon_impl::separable_col_f32(&temp, &mut f32_output, width, height, &GAUSSIAN_3X3_COL);
}
for i in 0..size {
output[i] = f32_output[i].clamp(0.0, 255.0) as u8;
}
Ok(())
}
#[cfg(not(target_arch = "aarch64"))]
{
for y in 1..(height - 1) {
for x in 1..(width - 1) {
let mut sum = 0.0_f32;
for ky in 0..3 {
for kx in 0..3 {
let px = x + kx - 1;
let py = y + ky - 1;
let idx = py * width + px;
let kernel_idx = ky * 3 + kx;
sum += f32::from(input[idx]) * GAUSSIAN_3X3[kernel_idx];
}
}
let out_idx = y * width + x;
output[out_idx] = sum.clamp(0.0, 255.0) as u8;
}
}
copy_borders(input, output, width, height);
Ok(())
}
}
pub fn sobel_x_3x3(input: &[u8], output: &mut [i16], width: usize, height: usize) -> Result<()> {
if input.len() != width * height || output.len() != width * height {
return Err(AlgorithmError::InvalidParameter {
parameter: "buffers",
message: format!(
"Buffer size mismatch: input={}, output={}, expected={}",
input.len(),
output.len(),
width * height
),
});
}
if width < 3 || height < 3 {
return Err(AlgorithmError::InvalidParameter {
parameter: "dimensions",
message: format!("Image too small for 3x3 filter: {}x{}", width, height),
});
}
for y in 1..(height - 1) {
for x in 1..(width - 1) {
let mut sum = 0_i32;
for ky in 0..3 {
for kx in 0..3 {
let px = x + kx - 1;
let py = y + ky - 1;
let idx = py * width + px;
let kernel_idx = ky * 3 + kx;
sum += i32::from(input[idx]) * i32::from(SOBEL_X_3X3[kernel_idx]);
}
}
let out_idx = y * width + x;
output[out_idx] = sum.clamp(-32768, 32767) as i16;
}
}
zero_borders_i16(output, width, height);
Ok(())
}
pub fn sobel_y_3x3(input: &[u8], output: &mut [i16], width: usize, height: usize) -> Result<()> {
if input.len() != width * height || output.len() != width * height {
return Err(AlgorithmError::InvalidParameter {
parameter: "buffers",
message: format!(
"Buffer size mismatch: input={}, output={}, expected={}",
input.len(),
output.len(),
width * height
),
});
}
if width < 3 || height < 3 {
return Err(AlgorithmError::InvalidParameter {
parameter: "dimensions",
message: format!("Image too small for 3x3 filter: {}x{}", width, height),
});
}
for y in 1..(height - 1) {
for x in 1..(width - 1) {
let mut sum = 0_i32;
for ky in 0..3 {
for kx in 0..3 {
let px = x + kx - 1;
let py = y + ky - 1;
let idx = py * width + px;
let kernel_idx = ky * 3 + kx;
sum += i32::from(input[idx]) * i32::from(SOBEL_Y_3X3[kernel_idx]);
}
}
let out_idx = y * width + x;
output[out_idx] = sum.clamp(-32768, 32767) as i16;
}
}
zero_borders_i16(output, width, height);
Ok(())
}
pub fn sobel_magnitude(gradient_x: &[i16], gradient_y: &[i16], magnitude: &mut [u8]) -> Result<()> {
if gradient_x.len() != gradient_y.len() || gradient_x.len() != magnitude.len() {
return Err(AlgorithmError::InvalidParameter {
parameter: "buffers",
message: format!(
"Buffer size mismatch: gx={}, gy={}, mag={}",
gradient_x.len(),
gradient_y.len(),
magnitude.len()
),
});
}
#[cfg(target_arch = "aarch64")]
{
unsafe {
neon_impl::sobel_magnitude(gradient_x, gradient_y, magnitude);
}
Ok(())
}
#[cfg(not(target_arch = "aarch64"))]
{
for i in 0..gradient_x.len() {
let gx = f32::from(gradient_x[i]);
let gy = f32::from(gradient_y[i]);
let mag = (gx * gx + gy * gy).sqrt();
magnitude[i] = mag.clamp(0.0, 255.0) as u8;
}
Ok(())
}
}
pub fn laplacian_3x3(input: &[u8], output: &mut [i16], width: usize, height: usize) -> Result<()> {
if input.len() != width * height || output.len() != width * height {
return Err(AlgorithmError::InvalidParameter {
parameter: "buffers",
message: format!(
"Buffer size mismatch: input={}, output={}, expected={}",
input.len(),
output.len(),
width * height
),
});
}
if width < 3 || height < 3 {
return Err(AlgorithmError::InvalidParameter {
parameter: "dimensions",
message: format!("Image too small for 3x3 filter: {}x{}", width, height),
});
}
for y in 1..(height - 1) {
for x in 1..(width - 1) {
let idx = y * width + x;
let top = i32::from(input[(y - 1) * width + x]);
let left = i32::from(input[y * width + x - 1]);
let center = i32::from(input[idx]);
let right = i32::from(input[y * width + x + 1]);
let bottom = i32::from(input[(y + 1) * width + x]);
let sum = 4 * center - top - left - right - bottom;
output[idx] = sum.clamp(-32768, 32767) as i16;
}
}
zero_borders_i16(output, width, height);
Ok(())
}
pub fn box_filter_3x3(input: &[u8], output: &mut [u8], width: usize, height: usize) -> Result<()> {
validate_buffer_size(input, output, width, height)?;
if width < 3 || height < 3 {
return Err(AlgorithmError::InvalidParameter {
parameter: "dimensions",
message: format!("Image too small for 3x3 filter: {}x{}", width, height),
});
}
const KERNEL_SIZE: f32 = 9.0;
for y in 1..(height - 1) {
for x in 1..(width - 1) {
let mut sum = 0_u32;
for ky in 0..3 {
for kx in 0..3 {
let px = x + kx - 1;
let py = y + ky - 1;
let idx = py * width + px;
sum += u32::from(input[idx]);
}
}
let out_idx = y * width + x;
output[out_idx] = ((sum as f32) / KERNEL_SIZE) as u8;
}
}
copy_borders(input, output, width, height);
Ok(())
}
pub fn sharpen_3x3(input: &[u8], output: &mut [u8], width: usize, height: usize) -> Result<()> {
validate_buffer_size(input, output, width, height)?;
if width < 3 || height < 3 {
return Err(AlgorithmError::InvalidParameter {
parameter: "dimensions",
message: format!("Image too small for 3x3 filter: {}x{}", width, height),
});
}
for y in 1..(height - 1) {
for x in 1..(width - 1) {
let idx = y * width + x;
let top = i32::from(input[(y - 1) * width + x]);
let left = i32::from(input[y * width + x - 1]);
let center = i32::from(input[idx]);
let right = i32::from(input[y * width + x + 1]);
let bottom = i32::from(input[(y + 1) * width + x]);
let sum = 5 * center - top - left - right - bottom;
output[idx] = sum.clamp(0, 255) as u8;
}
}
copy_borders(input, output, width, height);
Ok(())
}
pub fn convolve_3x3(
input: &[u8],
output: &mut [f32],
kernel: &[f32; 9],
width: usize,
height: usize,
normalize: bool,
) -> Result<()> {
if input.len() != width * height || output.len() != width * height {
return Err(AlgorithmError::InvalidParameter {
parameter: "buffers",
message: format!(
"Buffer size mismatch: input={}, output={}, expected={}",
input.len(),
output.len(),
width * height
),
});
}
if width < 3 || height < 3 {
return Err(AlgorithmError::InvalidParameter {
parameter: "dimensions",
message: format!("Image too small for 3x3 filter: {}x{}", width, height),
});
}
let mut normalized_kernel = *kernel;
if normalize {
let sum: f32 = kernel.iter().sum();
if sum.abs() > 1e-6 {
for k in &mut normalized_kernel {
*k /= sum;
}
}
}
for y in 1..(height - 1) {
for x in 1..(width - 1) {
let mut sum = 0.0_f32;
for ky in 0..3 {
for kx in 0..3 {
let px = x + kx - 1;
let py = y + ky - 1;
let idx = py * width + px;
let kernel_idx = ky * 3 + kx;
sum += f32::from(input[idx]) * normalized_kernel[kernel_idx];
}
}
let out_idx = y * width + x;
output[out_idx] = sum;
}
}
for y in 0..height {
for x in 0..width {
if y == 0 || y == height - 1 || x == 0 || x == width - 1 {
output[y * width + x] = 0.0;
}
}
}
Ok(())
}
pub fn separable_convolve_f32(
input: &[f32],
output: &mut [f32],
width: usize,
height: usize,
row_kernel: &[f32],
col_kernel: &[f32],
) -> Result<()> {
let size = width * height;
if input.len() != size || output.len() != size {
return Err(AlgorithmError::InvalidParameter {
parameter: "buffers",
message: format!(
"Buffer size mismatch: input={}, output={}, expected={}",
input.len(),
output.len(),
size
),
});
}
if row_kernel.is_empty() || col_kernel.is_empty() {
return Err(AlgorithmError::InvalidParameter {
parameter: "kernel",
message: "Kernel must not be empty".to_string(),
});
}
let rk_half = row_kernel.len() / 2;
let ck_half = col_kernel.len() / 2;
if width < row_kernel.len() || height < col_kernel.len() {
return Err(AlgorithmError::InvalidParameter {
parameter: "dimensions",
message: format!(
"Image {}x{} too small for kernel {}x{}",
width,
height,
row_kernel.len(),
col_kernel.len()
),
});
}
let mut temp = vec![0.0_f32; size];
for y in 0..height {
let row_off = y * width;
for x in rk_half..(width - rk_half) {
let mut sum = 0.0_f32;
for (ki, &kv) in row_kernel.iter().enumerate() {
let xi = x + ki - rk_half;
sum += input[row_off + xi] * kv;
}
temp[row_off + x] = sum;
}
for x in 0..rk_half {
temp[row_off + x] = input[row_off + x];
}
for x in (width - rk_half)..width {
temp[row_off + x] = input[row_off + x];
}
}
for y in ck_half..(height - ck_half) {
let row_off = y * width;
for x in 0..width {
let mut sum = 0.0_f32;
for (ki, &kv) in col_kernel.iter().enumerate() {
let yi = y + ki - ck_half;
sum += temp[yi * width + x] * kv;
}
output[row_off + x] = sum;
}
}
for y in 0..ck_half {
for x in 0..width {
output[y * width + x] = temp[y * width + x];
}
}
for y in (height - ck_half)..height {
for x in 0..width {
output[y * width + x] = temp[y * width + x];
}
}
Ok(())
}
fn validate_buffer_size(input: &[u8], output: &[u8], width: usize, height: usize) -> Result<()> {
let expected_size = width * height;
if input.len() != expected_size || output.len() != expected_size {
return Err(AlgorithmError::InvalidParameter {
parameter: "buffers",
message: format!(
"Buffer size mismatch: input={}, output={}, expected={}",
input.len(),
output.len(),
expected_size
),
});
}
Ok(())
}
fn copy_borders(input: &[u8], output: &mut [u8], width: usize, height: usize) {
for x in 0..width {
output[x] = input[x];
output[(height - 1) * width + x] = input[(height - 1) * width + x];
}
for y in 0..height {
output[y * width] = input[y * width];
output[y * width + width - 1] = input[y * width + width - 1];
}
}
fn zero_borders_i16(output: &mut [i16], width: usize, height: usize) {
for x in 0..width {
output[x] = 0;
output[(height - 1) * width + x] = 0;
}
for y in 0..height {
output[y * width] = 0;
output[y * width + width - 1] = 0;
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_gaussian_blur_uniform() {
let width = 10;
let height = 10;
let input = vec![128u8; width * height];
let mut output = vec![0u8; width * height];
gaussian_blur_3x3(&input, &mut output, width, height).expect("gaussian_blur_3x3 failed");
for y in 1..(height - 1) {
for x in 1..(width - 1) {
assert_eq!(output[y * width + x], 128);
}
}
}
#[test]
fn test_sobel_x_vertical_edge() {
let width = 5;
let height = 5;
let mut input = vec![0u8; width * height];
for y in 0..height {
for x in 0..width {
input[y * width + x] = if x < 2 { 0 } else { 255 };
}
}
let mut output = vec![0i16; width * height];
sobel_x_3x3(&input, &mut output, width, height).expect("sobel_x_3x3 failed");
let center = output[2 * width + 2];
assert!(center.abs() > 100);
}
#[test]
fn test_sobel_magnitude() {
let gx = vec![100i16, 200, 300];
let gy = vec![100i16, 0, 400];
let mut mag = vec![0u8; 3];
sobel_magnitude(&gx, &gy, &mut mag).expect("sobel_magnitude failed");
assert!(mag[0] > 100);
assert_eq!(mag[1], 200);
assert_eq!(mag[2], 255); }
#[test]
fn test_laplacian() {
let width = 5;
let height = 5;
let input = vec![100u8; width * height];
let mut output = vec![0i16; width * height];
laplacian_3x3(&input, &mut output, width, height).expect("laplacian_3x3 failed");
for y in 1..(height - 1) {
for x in 1..(width - 1) {
assert_eq!(output[y * width + x], 0);
}
}
}
#[test]
fn test_box_filter() {
let width = 5;
let height = 5;
let input = vec![100u8; width * height];
let mut output = vec![0u8; width * height];
box_filter_3x3(&input, &mut output, width, height).expect("box_filter_3x3 failed");
for y in 1..(height - 1) {
for x in 1..(width - 1) {
assert_eq!(output[y * width + x], 100);
}
}
}
#[test]
fn test_sharpen() {
let width = 5;
let height = 5;
let input = vec![100u8; width * height];
let mut output = vec![0u8; width * height];
sharpen_3x3(&input, &mut output, width, height).expect("sharpen_3x3 failed");
for y in 1..(height - 1) {
for x in 1..(width - 1) {
assert_eq!(output[y * width + x], 100);
}
}
}
#[test]
fn test_custom_convolution() {
let width = 5;
let height = 5;
let input = vec![100u8; width * height];
let mut output = vec![0.0f32; width * height];
let kernel = [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0];
convolve_3x3(&input, &mut output, &kernel, width, height, false)
.expect("convolve_3x3 failed");
for y in 1..(height - 1) {
for x in 1..(width - 1) {
assert!((output[y * width + x] - 100.0).abs() < 1e-6);
}
}
}
#[test]
fn test_separable_convolution() {
let width = 10;
let height = 10;
let input = vec![1.0_f32; width * height];
let mut output = vec![0.0_f32; width * height];
let kernel = [1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0];
separable_convolve_f32(&input, &mut output, width, height, &kernel, &kernel)
.expect("separable_convolve_f32 failed");
for y in 2..(height - 2) {
for x in 2..(width - 2) {
assert!((output[y * width + x] - 1.0).abs() < 1e-5);
}
}
}
#[test]
fn test_dimensions_too_small() {
let width = 2;
let height = 2;
let input = vec![0u8; width * height];
let mut output = vec![0u8; width * height];
let result = gaussian_blur_3x3(&input, &mut output, width, height);
assert!(result.is_err());
}
#[test]
fn test_buffer_size_mismatch() {
let width = 10;
let height = 10;
let input = vec![0u8; width * height];
let mut output = vec![0u8; 50];
let result = gaussian_blur_3x3(&input, &mut output, width, height);
assert!(result.is_err());
}
}