blasoxide/
l2d.rs

1pub unsafe fn dgemv(
2    trans: bool,
3    m: usize,
4    n: usize,
5    alpha: f64,
6    a: *const f64,
7    lda: usize,
8    x: *const f64,
9    incx: usize,
10    beta: f64,
11    y: *mut f64,
12    incy: usize,
13) {
14    const MC: usize = 2048;
15
16    if trans {
17        crate::dscal(m, beta, y, incy);
18
19        let m_b: usize = MC / incx;
20
21        for i in (0..m).step_by(m_b) {
22            let ib = std::cmp::min(m - i, m_b);
23            inner_kernel_trans(ib, n, alpha, a.add(i), lda, x.add(i * incx), incx, y, incy);
24        }
25        return;
26    }
27
28    let m_b = MC / incy;
29
30    for i in (0..m).step_by(m_b) {
31        let ib = std::cmp::min(m - i, m_b);
32        inner_kernel(
33            ib,
34            n,
35            alpha,
36            a.add(i),
37            lda,
38            x,
39            incx,
40            beta,
41            y.add(i * incy),
42            incy,
43        );
44    }
45
46    unsafe fn inner_kernel(
47        m: usize,
48        n: usize,
49        alpha: f64,
50        a: *const f64,
51        lda: usize,
52        x: *const f64,
53        incx: usize,
54        beta: f64,
55        y: *mut f64,
56        incy: usize,
57    ) {
58        crate::dscal(m, beta, y, incy);
59        for j in 0..n {
60            crate::daxpy(m, alpha * *x.add(j * incx), a.add(j * lda), 1, y, incy);
61        }
62    }
63
64    unsafe fn inner_kernel_trans(
65        m: usize,
66        n: usize,
67        alpha: f64,
68        a: *const f64,
69        lda: usize,
70        x: *const f64,
71        incx: usize,
72        y: *mut f64,
73        incy: usize,
74    ) {
75        for j in 0..n {
76            *y.add(j * incy) += alpha * crate::ddot(m, a.add(j * lda), 1, x, incx);
77        }
78    }
79}
80
81pub unsafe fn dtrmv(
82    upper: bool,
83    trans: bool,
84    diag: bool,
85    n: usize,
86    a: *const f64,
87    lda: usize,
88    x: *mut f64,
89    incx: usize,
90) {
91    if trans {
92        if upper {
93            if diag {
94                for j in (1..n).rev() {
95                    *x.add(j * incx) += crate::ddot(j, a.add(j * lda), 1, x, incx);
96                }
97            } else {
98                for j in (0..n).rev() {
99                    *x.add(j * incx) = crate::ddot(j + 1, a.add(j * lda), 1, x, incx);
100                }
101            }
102        } else if diag {
103            for j in 0..n - 1 {
104                *x.add(j * incx) += crate::ddot(
105                    n - (j + 1),
106                    a.add((j + 1) + j * lda),
107                    1,
108                    x.add((j + 1) * incx),
109                    incx,
110                );
111            }
112        } else {
113            for j in 0..n {
114                *x.add(j * incx) = crate::ddot(n - j, a.add(j + j * lda), 1, x.add(j * incx), incx);
115            }
116        }
117    } else if upper {
118        if diag {
119            for j in 1..n {
120                crate::daxpy(j, *x.add(j * incx), a.add(j * lda), 1, x, incx);
121            }
122        } else {
123            for j in 0..n {
124                let scal = *x.add(j * incx);
125                *x.add(j * incx) = 0.;
126                crate::daxpy(j + 1, scal, a.add(j * lda), 1, x, incx);
127            }
128        }
129    } else if diag {
130        for j in (0..n - 1).rev() {
131            crate::daxpy(
132                n - (j + 1),
133                *x.add(j * incx),
134                a.add((j + 1) + j * lda),
135                1,
136                x.add((j + 1) * incx),
137                incx,
138            );
139        }
140    } else {
141        for j in (0..n).rev() {
142            let scal = *x.add(j * incx);
143            *x.add(j * incx) = 0.;
144            crate::daxpy(n - j, scal, a.add(j + j * lda), 1, x.add(j * incx), incx);
145        }
146    }
147}
148
149pub unsafe fn dsymv(
150    upper: bool,
151    n: usize,
152    alpha: f64,
153    a: *const f64,
154    lda: usize,
155    x: *const f64,
156    incx: usize,
157    beta: f64,
158    y: *mut f64,
159    incy: usize,
160) {
161    crate::dscal(n, beta, y, incy);
162    if upper {
163        for j in 0..n {
164            crate::daxpy(j + 1, alpha * *x.add(j * incx), a.add(j * lda), 1, y, incy);
165            *y.add(j * incy) += alpha * crate::ddot(j, a.add(j * lda), 1, x, incx);
166        }
167    } else {
168        for j in 0..n {
169            crate::daxpy(
170                n - j,
171                alpha * *x.add(j * incx),
172                a.add(j + j * lda),
173                1,
174                y.add(j * incy),
175                incy,
176            );
177            *y.add(j * incy) += alpha
178                * crate::ddot(
179                    n - (j + 1),
180                    a.add((j + 1) + j * lda),
181                    1,
182                    x.add((j + 1) * incx),
183                    incx,
184                );
185        }
186    }
187}