pub fn simd_kernel_diagonal<K: SparseKernel>( kernel: &K, x: &Array2<f64>, ) -> Array1<f64>
SIMD-accelerated kernel diagonal computation