pub fn transpose_blocked( matrix: &Array2<f64>, tile_size: usize, ) -> Result<Array2<f64>, SklearsError>
Transpose matrix with cache blocking