rstsr_native_impl/cpu_serial/
transpose.rs

1//! Naive implementation of matrix transpose
2
3use crate::prelude_dev::*;
4
5const BLOCK_SIZE: usize = 64;
6
7/// Change order (row/col-major) a matrix out-place using a naive algorithm.
8///
9/// Transpose from `a` (row-major) to `c` (col-major).
10/// If shape or stride is not compatible, an error will be returned.
11pub fn orderchange_out_r2c_ix2_cpu_serial<T>(c: &mut [T], lc: &Layout<Ix2>, a: &[T], la: &Layout<Ix2>) -> Result<()>
12where
13    T: Clone,
14{
15    // shape check
16    let sc = lc.shape();
17    let sa = la.shape();
18    rstsr_assert_eq!(sc[0], sa[0], InvalidLayout, "This function requires shape identity")?;
19    rstsr_assert_eq!(sc[1], sa[1], InvalidLayout, "This function requires shape identity")?;
20    let [nrow, ncol] = *sa;
21
22    // stride check
23    rstsr_assert_eq!(lc.stride()[0], 1, InvalidLayout, "This function requires col-major output")?;
24    rstsr_assert_eq!(la.stride()[1], 1, InvalidLayout, "This function requires row-major input")?;
25
26    let offset_a = la.offset() as isize;
27    let offset_c = lc.offset() as isize;
28    let lda = la.stride()[0];
29    let ldc = lc.stride()[1];
30
31    (0..ncol).step_by(BLOCK_SIZE).for_each(|j_start| {
32        let j_end = (j_start + BLOCK_SIZE).min(ncol);
33        let (j_start, j_end) = (j_start as isize, j_end as isize);
34        (0..nrow).step_by(BLOCK_SIZE).for_each(|i_start| {
35            let i_end = (i_start + BLOCK_SIZE).min(nrow);
36            let (i_start, i_end) = (i_start as isize, i_end as isize);
37            for j in j_start..j_end {
38                for i in i_start..i_end {
39                    let src_idx = (offset_a + i * lda + j) as usize;
40                    let dst_idx = (offset_c + j * ldc + i) as usize;
41                    c[dst_idx] = a[src_idx].clone();
42                }
43            }
44        });
45    });
46
47    Ok(())
48}
49
50/// Change order (row/col-major) a matrix out-place using a naive algorithm.
51///
52/// Transpose from `a` (col-major) to `c` (row-major).
53/// If shape or stride is not compatible, an error will be returned.
54pub fn orderchange_out_c2r_ix2_cpu_serial<T>(c: &mut [T], lc: &Layout<Ix2>, a: &[T], la: &Layout<Ix2>) -> Result<()>
55where
56    T: Clone,
57{
58    let lc = lc.reverse_axes();
59    let la = la.reverse_axes();
60    orderchange_out_r2c_ix2_cpu_serial(c, &lc, a, &la)
61}