pub trait SobelKernel: Send + Sync {
fn process_row_slice(
&self,
src: &[f32],
mag_out: &mut [f32],
dir_out: &mut [u8],
width: usize,
x_start: usize,
x_end: usize,
y: usize,
);
}
#[cfg(target_arch = "x86_64")]
pub mod avx2;
#[cfg(target_arch = "aarch64")]
pub mod aarch64;
pub fn detect() -> Box<dyn SobelKernel> {
#[cfg(target_arch = "x86_64")]
{
if is_x86_feature_detected!("avx2") && is_x86_feature_detected!("fma") {
log::info!("[kernel::detect] selected: AVX2+FMA");
return Box::new(avx2::Avx2SobelKernel);
}
log::warn!("[kernel::detect] AVX2/FMA not available, falling back to scalar");
}
#[cfg(target_arch = "aarch64")]
{
log::info!("[kernel::detect] selected: AArch64 NEON");
return Box::new(aarch64::NeonSobelKernel);
}
#[allow(unreachable_code)]
{
log::warn!("[kernel::detect] no SIMD available, using scalar fallback");
Box::new(ScalarSobelKernel)
}
}
pub(crate) struct ScalarSobelKernel;
impl SobelKernel for ScalarSobelKernel {
fn process_row_slice(
&self,
src: &[f32],
mag_out: &mut [f32],
dir_out: &mut [u8],
width: usize,
x_start: usize,
x_end: usize,
y: usize,
) {
debug_assert!(x_start >= 1 && x_end <= width - 1);
debug_assert!(y >= 1 && y < src.len() / width);
let row = y * width;
for x in x_start..x_end {
let idx = row + x;
let tl = src[idx - width - 1];
let tm = src[idx - width];
let tr = src[idx - width + 1];
let ml = src[idx - 1];
let mr = src[idx + 1];
let bl = src[idx + width - 1];
let bm = src[idx + width];
let br = src[idx + width + 1];
let gx = -tl + tr - 2.0 * ml + 2.0 * mr - bl + br;
let gy = -tl - 2.0 * tm - tr + bl + 2.0 * bm + br;
mag_out[idx] = (gx * gx + gy * gy).sqrt();
let ax = gx.abs();
let ay = gy.abs();
dir_out[idx] = if ay <= ax * 0.414_213_56 {
0u8
} else if ay >= ax * 2.414_213_56 {
2u8
} else if (gx >= 0.0) == (gy >= 0.0) {
1u8
} else {
3u8
};
}
}
}