1use crate::simd::traits::SimdOpsExt;
7
8pub struct TransformSimd<S> {
10 simd: S,
11}
12
13impl<S: SimdOpsExt> TransformSimd<S> {
14 #[inline]
16 pub const fn new(simd: S) -> Self {
17 Self { simd }
18 }
19
20 pub fn forward_dct_4x4(&self, input: &[i16; 16], output: &mut [i16; 16]) {
25 use crate::simd::types::I16x8;
26
27 let mut rows = [I16x8::zero(); 4];
29 for i in 0..4 {
30 for j in 0..4 {
31 rows[i][j] = input[i * 4 + j];
32 }
33 }
34
35 self.dct_4_1d(&mut rows);
37
38 let rows = self.simd.transpose_4x4_i16(&rows);
40
41 let mut cols = rows;
43 self.dct_4_1d(&mut cols);
44
45 let result = self.simd.transpose_4x4_i16(&cols);
47 for i in 0..4 {
48 for j in 0..4 {
49 output[i * 4 + j] = result[i][j];
50 }
51 }
52 }
53
54 pub fn inverse_dct_4x4(&self, input: &[i16; 16], output: &mut [i16; 16]) {
56 use crate::simd::types::I16x8;
57
58 let mut rows = [I16x8::zero(); 4];
59 for i in 0..4 {
60 for j in 0..4 {
61 rows[i][j] = input[i * 4 + j];
62 }
63 }
64
65 self.idct_4_1d(&mut rows);
67
68 let rows = self.simd.transpose_4x4_i16(&rows);
70
71 let mut cols = rows;
73 self.idct_4_1d(&mut cols);
74
75 let result = self.simd.transpose_4x4_i16(&cols);
77 for i in 0..4 {
78 for j in 0..4 {
79 output[i * 4 + j] = result[i][j];
80 }
81 }
82 }
83
84 pub fn forward_dct_8x8(&self, input: &[i16; 64], output: &mut [i16; 64]) {
86 use crate::simd::types::I16x8;
87
88 let mut rows = [I16x8::zero(); 8];
90 for i in 0..8 {
91 for j in 0..8 {
92 rows[i][j] = input[i * 8 + j];
93 }
94 }
95
96 self.dct_8_1d(&mut rows);
98
99 let rows = self.simd.transpose_8x8_i16(&rows);
101
102 let mut cols = rows;
104 self.dct_8_1d(&mut cols);
105
106 let result = self.simd.transpose_8x8_i16(&cols);
108 for i in 0..8 {
109 for j in 0..8 {
110 output[i * 8 + j] = result[i][j];
111 }
112 }
113 }
114
115 pub fn inverse_dct_8x8(&self, input: &[i16; 64], output: &mut [i16; 64]) {
117 use crate::simd::types::I16x8;
118
119 let mut rows = [I16x8::zero(); 8];
120 for i in 0..8 {
121 for j in 0..8 {
122 rows[i][j] = input[i * 8 + j];
123 }
124 }
125
126 self.idct_8_1d(&mut rows);
128
129 let rows = self.simd.transpose_8x8_i16(&rows);
131
132 let mut cols = rows;
134 self.idct_8_1d(&mut cols);
135
136 let result = self.simd.transpose_8x8_i16(&cols);
138 for i in 0..8 {
139 for j in 0..8 {
140 output[i * 8 + j] = result[i][j];
141 }
142 }
143 }
144
145 pub fn forward_adst_4x4(&self, input: &[i16; 16], output: &mut [i16; 16]) {
149 use crate::simd::types::I16x8;
150
151 let mut rows = [I16x8::zero(); 4];
152 for i in 0..4 {
153 for j in 0..4 {
154 rows[i][j] = input[i * 4 + j];
155 }
156 }
157
158 self.adst_4_1d(&mut rows);
160
161 let rows = self.simd.transpose_4x4_i16(&rows);
163
164 let mut cols = rows;
166 self.adst_4_1d(&mut cols);
167
168 let result = self.simd.transpose_4x4_i16(&cols);
170 for i in 0..4 {
171 for j in 0..4 {
172 output[i * 4 + j] = result[i][j];
173 }
174 }
175 }
176
177 pub fn inverse_adst_4x4(&self, input: &[i16; 16], output: &mut [i16; 16]) {
179 use crate::simd::types::I16x8;
180
181 let mut rows = [I16x8::zero(); 4];
182 for i in 0..4 {
183 for j in 0..4 {
184 rows[i][j] = input[i * 4 + j];
185 }
186 }
187
188 self.iadst_4_1d(&mut rows);
190
191 let rows = self.simd.transpose_4x4_i16(&rows);
193
194 let mut cols = rows;
196 self.iadst_4_1d(&mut cols);
197
198 let result = self.simd.transpose_4x4_i16(&cols);
200 for i in 0..4 {
201 for j in 0..4 {
202 output[i * 4 + j] = result[i][j];
203 }
204 }
205 }
206
207 pub fn identity_4x4(&self, input: &[i16; 16], output: &mut [i16; 16]) {
209 for i in 0..16 {
212 let scaled = i32::from(input[i]) * 181 + 128;
213 output[i] = (scaled >> 8) as i16;
214 }
215 }
216
217 fn dct_4_1d(&self, rows: &mut [crate::simd::types::I16x8; 4]) {
223 let (s0, s3) = self.simd.butterfly_i16x8(rows[0], rows[3]);
226 let (s1, s2) = self.simd.butterfly_i16x8(rows[1], rows[2]);
227
228 let (x0, x1) = self.simd.butterfly_i16x8(s0, s1);
230 let (x3, x2) = self.simd.butterfly_i16x8(s3, s2);
231
232 rows[0] = x0;
233 rows[1] = x2;
234 rows[2] = x1;
235 rows[3] = x3;
236 }
237
238 fn idct_4_1d(&self, rows: &mut [crate::simd::types::I16x8; 4]) {
240 let t0 = rows[0];
242 let t1 = rows[2];
243 let t2 = rows[1];
244 let t3 = rows[3];
245
246 let (s0, s1) = self.simd.butterfly_i16x8(t0, t2);
248 let (s3, s2) = self.simd.butterfly_i16x8(t3, t1);
249
250 let (x0, x3) = self.simd.butterfly_i16x8(s0, s3);
252 let (x1, x2) = self.simd.butterfly_i16x8(s1, s2);
253
254 rows[0] = x0;
255 rows[1] = x1;
256 rows[2] = x2;
257 rows[3] = x3;
258 }
259
260 fn dct_8_1d(&self, rows: &mut [crate::simd::types::I16x8; 8]) {
262 let (s0, s7) = self.simd.butterfly_i16x8(rows[0], rows[7]);
265 let (s1, s6) = self.simd.butterfly_i16x8(rows[1], rows[6]);
266 let (s2, s5) = self.simd.butterfly_i16x8(rows[2], rows[5]);
267 let (s3, s4) = self.simd.butterfly_i16x8(rows[3], rows[4]);
268
269 let (t0, t3) = self.simd.butterfly_i16x8(s0, s3);
271 let (t1, t2) = self.simd.butterfly_i16x8(s1, s2);
272 let (t4, t7) = self.simd.butterfly_i16x8(s4, s7);
273 let (t5, t6) = self.simd.butterfly_i16x8(s5, s6);
274
275 let (u0, u1) = self.simd.butterfly_i16x8(t0, t1);
277 let (u2, u3) = self.simd.butterfly_i16x8(t2, t3);
278 let (u4, u5) = self.simd.butterfly_i16x8(t4, t5);
279 let (u6, u7) = self.simd.butterfly_i16x8(t6, t7);
280
281 rows[0] = u0;
282 rows[1] = u4;
283 rows[2] = u2;
284 rows[3] = u6;
285 rows[4] = u1;
286 rows[5] = u5;
287 rows[6] = u3;
288 rows[7] = u7;
289 }
290
291 fn idct_8_1d(&self, rows: &mut [crate::simd::types::I16x8; 8]) {
293 let t0 = rows[0];
295 let t4 = rows[1];
296 let t2 = rows[2];
297 let t6 = rows[3];
298 let t1 = rows[4];
299 let t5 = rows[5];
300 let t3 = rows[6];
301 let t7 = rows[7];
302
303 let (s0, s1) = self.simd.butterfly_i16x8(t0, t1);
305 let (s2, s3) = self.simd.butterfly_i16x8(t2, t3);
306 let (s4, s5) = self.simd.butterfly_i16x8(t4, t5);
307 let (s6, s7) = self.simd.butterfly_i16x8(t6, t7);
308
309 let (u0, u3) = self.simd.butterfly_i16x8(s0, s3);
311 let (u1, u2) = self.simd.butterfly_i16x8(s1, s2);
312 let (u4, u7) = self.simd.butterfly_i16x8(s4, s7);
313 let (u5, u6) = self.simd.butterfly_i16x8(s5, s6);
314
315 let (x0, x7) = self.simd.butterfly_i16x8(u0, u7);
317 let (x1, x6) = self.simd.butterfly_i16x8(u1, u6);
318 let (x2, x5) = self.simd.butterfly_i16x8(u2, u5);
319 let (x3, x4) = self.simd.butterfly_i16x8(u3, u4);
320
321 rows[0] = x0;
322 rows[1] = x1;
323 rows[2] = x2;
324 rows[3] = x3;
325 rows[4] = x4;
326 rows[5] = x5;
327 rows[6] = x6;
328 rows[7] = x7;
329 }
330
331 fn adst_4_1d(&self, rows: &mut [crate::simd::types::I16x8; 4]) {
333 let s0 = rows[0];
336 let s1 = rows[1];
337 let s2 = rows[2];
338 let s3 = rows[3];
339
340 let t0 = self.simd.add_i16x8(s0, s3);
342 let t1 = self.simd.add_i16x8(s1, s2);
343 let t2 = self.simd.sub_i16x8(s1, s2);
344 let t3 = self.simd.sub_i16x8(s0, s3);
345
346 rows[0] = t0;
347 rows[1] = t2;
348 rows[2] = t1;
349 rows[3] = t3;
350 }
351
352 fn iadst_4_1d(&self, rows: &mut [crate::simd::types::I16x8; 4]) {
354 let t0 = rows[0];
356 let t2 = rows[1];
357 let t1 = rows[2];
358 let t3 = rows[3];
359
360 let s0 = self.simd.add_i16x8(t0, t3);
361 let s1 = self.simd.add_i16x8(t1, t2);
362 let s2 = self.simd.sub_i16x8(t1, t2);
363 let s3 = self.simd.sub_i16x8(t0, t3);
364
365 rows[0] = s0;
366 rows[1] = s1;
367 rows[2] = s2;
368 rows[3] = s3;
369 }
370}