pub fn relu6_simd(input: &[f32], output: &mut [f32])
SIMD-accelerated ReLU6 activation with automatic architecture dispatch