pub fn quickselect_f32_simd(arr: &mut [f32], k: usize) -> f32
SIMD-optimized quickselect for k-th smallest element