1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
crate::ix!();
impl crate::LipolPs {
/**
| # Safety
|
| need to make sure we can access nquads blocks
| safely from src and dst
*/
pub unsafe fn mac_block_to<NQ: TryInto<usize>>(
&mut self,
src: *mut f32,
dst: *mut f32,
nquads: NQ)
where <NQ as TryInto<usize>>::Error: Debug
{
let nquads: usize = nquads.try_into().unwrap();
lipol_ps_sse_block!(self, nquads, 2;
|i:usize, ref mut y1, ref mut y2, ref mut dy| {
let dst = dst as *mut __m128;
let src = src as *mut __m128;
*dst.add(i) =
_mm_add_ps(
*dst.add(i),
_mm_mul_ps(*src.add(i), *y1)
);
*y1 = _mm_add_ps(*y1, *dy);
*dst.add(i + 1) =
_mm_add_ps(
*dst.add(i + 1),
_mm_mul_ps(*src.add(i + 1), *y2)
);
*y2 = _mm_add_ps(*y2, *dy);
});
}
/**
| # Safety
|
| need to make sure we can access nquads blocks
| safely from src1, src2, dst1, and dst2
*/
pub unsafe fn mac_2_blocks_to<NQ: TryInto<usize>>(
&mut self,
src1: *mut f32,
src2: *mut f32,
dst1: *mut f32,
dst2: *mut f32,
nquads: NQ)
where <NQ as TryInto<usize>>::Error: Debug
{
let nquads: usize = nquads.try_into().unwrap();
lipol_ps_sse_block!(self, nquads, 2;
|i:usize, ref mut y1, ref mut y2, ref mut dy| {
let dst1 = dst1 as *mut __m128;
let dst2 = dst2 as *mut __m128;
let src1 = src1 as *mut __m128;
let src2 = src2 as *mut __m128;
*dst1.add(i) =
_mm_add_ps(
*dst1.add(i),
_mm_mul_ps(*src1.add(i), *y1)
);
*dst2.add(i) =
_mm_add_ps(
*dst2.add(i),
_mm_mul_ps(*src2.add(i), *y1)
);
*y1 = _mm_add_ps(*y1, *dy);
*dst1.add(i + 1) =
_mm_add_ps(
*dst1.add(i + 1),
_mm_mul_ps(*src1.add(i + 1), *y2)
);
*dst2.add(i + 1) =
_mm_add_ps(
*dst2.add(i + 1),
_mm_mul_ps(*src2.add(i + 1), *y2)
);
*y2 = _mm_add_ps(*y2, *dy);
});
}
}