pub fn scale_block(out: &mut [f32], src: &[f32], scale: f32)
out[i] = src[i] * scale. Vectorized when wide-backend is on.
out[i] = src[i] * scale
wide-backend