pub fn convolve_2d( image: &[f32], width: usize, height: usize, kernel: &[f32], kernel_size: usize, ) -> Vec<f32>
2D convolution with SIMD optimization