fast-canny 0.1.0

Industrial-grade Zero-Allocation SIMD Canny Edge Detector
Documentation
/// Sobel 融合算子的硬件无关接口。
///
/// # 安全性
/// 实现者必须保证:
/// - `src` 指向有效的 `width × height` f32 数组
/// - `x_start >= 1`,`x_end <= width - 1`,`y >= 1` 且 `y <= height - 2`
/// - `mag_out` 和 `dir_out` 指向同等大小的可写缓冲区
pub trait SobelKernel: Send + Sync {
    /// 处理图像中第 `y` 行 `[x_start, x_end)` 范围内的像素。
    ///
    /// 使用切片接口替代裸指针,降低调用方的 unsafe 负担。
    fn process_row_slice(
        &self,
        src:     &[f32],
        mag_out: &mut [f32],
        dir_out: &mut [u8],
        width:   usize,
        x_start: usize,
        x_end:   usize,
        y:       usize,
    );
}

#[cfg(target_arch = "x86_64")]
pub mod avx2;

#[cfg(target_arch = "aarch64")]
pub mod aarch64;

/// 返回当前平台最优的 SobelKernel 实现。
pub fn detect() -> Box<dyn SobelKernel> {
    #[cfg(target_arch = "x86_64")]
    {
        if is_x86_feature_detected!("avx2") && is_x86_feature_detected!("fma") {
            log::info!("[kernel::detect] selected: AVX2+FMA");
            return Box::new(avx2::Avx2SobelKernel);
        }
        log::warn!("[kernel::detect] AVX2/FMA not available, falling back to scalar");
    }

    #[cfg(target_arch = "aarch64")]
    {
        log::info!("[kernel::detect] selected: AArch64 NEON");
        return Box::new(aarch64::NeonSobelKernel);
    }

    #[allow(unreachable_code)]
    {
        log::warn!("[kernel::detect] no SIMD available, using scalar fallback");
        Box::new(ScalarSobelKernel)
    }
}

// ── 标量后备实现 ──────────────────────────────────────────────────
pub(crate) struct ScalarSobelKernel;

impl SobelKernel for ScalarSobelKernel {
    fn process_row_slice(
        &self,
        src:     &[f32],
        mag_out: &mut [f32],
        dir_out: &mut [u8],
        width:   usize,
        x_start: usize,
        x_end:   usize,
        y:       usize,
    ) {
        debug_assert!(x_start >= 1 && x_end <= width - 1);
        debug_assert!(y >= 1 && y < src.len() / width);

        let row = y * width;
        for x in x_start..x_end {
            let idx = row + x;
            // 3×3 邻域索引,边界由调用方保证合法
            let tl = src[idx - width - 1];
            let tm = src[idx - width];
            let tr = src[idx - width + 1];
            let ml = src[idx - 1];
            let mr = src[idx + 1];
            let bl = src[idx + width - 1];
            let bm = src[idx + width];
            let br = src[idx + width + 1];

            let gx = -tl + tr - 2.0 * ml + 2.0 * mr - bl + br;
            let gy = -tl - 2.0 * tm - tr + bl + 2.0 * bm + br;

            mag_out[idx] = (gx * gx + gy * gy).sqrt();

            let ax = gx.abs();
            let ay = gy.abs();
            dir_out[idx] = if ay <= ax * 0.414_213_56 {
                0u8
            } else if ay >= ax * 2.414_213_56 {
                2u8
            } else if (gx >= 0.0) == (gy >= 0.0) {
                1u8
            } else {
                3u8
            };
        }
    }
}