pub fn std_py(data: &Bound<'_, PyArray1<f64>>, ddof: usize) -> PyResult<f64>
Calculate standard deviation - optimized two-pass with multi-accumulator