oximedia_codec/simd/scalar/
fallback.rs1use crate::simd::traits::{SimdOps, SimdOpsExt};
7use crate::simd::types::{I16x8, I32x4, U8x16};
8
9#[derive(Clone, Copy, Debug)]
11pub struct ScalarFallback;
12
13impl ScalarFallback {
14 #[inline]
16 #[must_use]
17 pub const fn new() -> Self {
18 Self
19 }
20}
21
22impl Default for ScalarFallback {
23 fn default() -> Self {
24 Self::new()
25 }
26}
27
28impl SimdOps for ScalarFallback {
29 #[inline]
30 fn name(&self) -> &'static str {
31 "scalar"
32 }
33
34 #[inline]
35 fn is_available(&self) -> bool {
36 true }
38
39 #[inline]
40 fn add_i16x8(&self, a: I16x8, b: I16x8) -> I16x8 {
41 let mut result = I16x8::zero();
42 for i in 0..8 {
43 result[i] = a[i].wrapping_add(b[i]);
44 }
45 result
46 }
47
48 #[inline]
49 fn sub_i16x8(&self, a: I16x8, b: I16x8) -> I16x8 {
50 let mut result = I16x8::zero();
51 for i in 0..8 {
52 result[i] = a[i].wrapping_sub(b[i]);
53 }
54 result
55 }
56
57 #[inline]
58 fn mul_i16x8(&self, a: I16x8, b: I16x8) -> I16x8 {
59 let mut result = I16x8::zero();
60 for i in 0..8 {
61 result[i] = a[i].wrapping_mul(b[i]);
62 }
63 result
64 }
65
66 #[inline]
67 fn add_i32x4(&self, a: I32x4, b: I32x4) -> I32x4 {
68 let mut result = I32x4::zero();
69 for i in 0..4 {
70 result[i] = a[i].wrapping_add(b[i]);
71 }
72 result
73 }
74
75 #[inline]
76 fn sub_i32x4(&self, a: I32x4, b: I32x4) -> I32x4 {
77 let mut result = I32x4::zero();
78 for i in 0..4 {
79 result[i] = a[i].wrapping_sub(b[i]);
80 }
81 result
82 }
83
84 #[inline]
85 fn min_i16x8(&self, a: I16x8, b: I16x8) -> I16x8 {
86 let mut result = I16x8::zero();
87 for i in 0..8 {
88 result[i] = a[i].min(b[i]);
89 }
90 result
91 }
92
93 #[inline]
94 fn max_i16x8(&self, a: I16x8, b: I16x8) -> I16x8 {
95 let mut result = I16x8::zero();
96 for i in 0..8 {
97 result[i] = a[i].max(b[i]);
98 }
99 result
100 }
101
102 #[inline]
103 fn clamp_i16x8(&self, v: I16x8, min: i16, max: i16) -> I16x8 {
104 let mut result = I16x8::zero();
105 for i in 0..8 {
106 result[i] = v[i].clamp(min, max);
107 }
108 result
109 }
110
111 #[inline]
112 fn min_u8x16(&self, a: U8x16, b: U8x16) -> U8x16 {
113 let mut result = U8x16::zero();
114 for i in 0..16 {
115 result[i] = a[i].min(b[i]);
116 }
117 result
118 }
119
120 #[inline]
121 fn max_u8x16(&self, a: U8x16, b: U8x16) -> U8x16 {
122 let mut result = U8x16::zero();
123 for i in 0..16 {
124 result[i] = a[i].max(b[i]);
125 }
126 result
127 }
128
129 #[inline]
130 fn clamp_u8x16(&self, v: U8x16, min: u8, max: u8) -> U8x16 {
131 let mut result = U8x16::zero();
132 for i in 0..16 {
133 result[i] = v[i].clamp(min, max);
134 }
135 result
136 }
137
138 #[inline]
139 fn horizontal_sum_i16x8(&self, v: I16x8) -> i32 {
140 v.iter().map(|&x| i32::from(x)).sum()
141 }
142
143 #[inline]
144 fn horizontal_sum_i32x4(&self, v: I32x4) -> i32 {
145 v.iter().sum()
146 }
147
148 #[inline]
149 fn sad_u8x16(&self, a: U8x16, b: U8x16) -> u32 {
150 a.iter()
151 .zip(b.iter())
152 .map(|(&x, &y): (&u8, &u8)| u32::from(x.abs_diff(y)))
153 .sum()
154 }
155
156 #[inline]
157 fn sad_8(&self, a: &[u8], b: &[u8]) -> u32 {
158 assert!(a.len() >= 8 && b.len() >= 8);
159 a[..8]
160 .iter()
161 .zip(b[..8].iter())
162 .map(|(&x, &y)| u32::from(x.abs_diff(y)))
163 .sum()
164 }
165
166 #[inline]
167 fn sad_16(&self, a: &[u8], b: &[u8]) -> u32 {
168 assert!(a.len() >= 16 && b.len() >= 16);
169 a[..16]
170 .iter()
171 .zip(b[..16].iter())
172 .map(|(&x, &y)| u32::from(x.abs_diff(y)))
173 .sum()
174 }
175
176 #[inline]
177 fn widen_low_u8_to_i16(&self, v: U8x16) -> I16x8 {
178 let mut result = I16x8::zero();
179 for i in 0..8 {
180 result[i] = i16::from(v[i]);
181 }
182 result
183 }
184
185 #[inline]
186 fn widen_high_u8_to_i16(&self, v: U8x16) -> I16x8 {
187 let mut result = I16x8::zero();
188 for i in 0..8 {
189 result[i] = i16::from(v[i + 8]);
190 }
191 result
192 }
193
194 #[inline]
195 fn narrow_i32x4_to_i16x8(&self, low: I32x4, high: I32x4) -> I16x8 {
196 let mut result = I16x8::zero();
197 for i in 0..4 {
198 result[i] = low[i].clamp(i32::from(i16::MIN), i32::from(i16::MAX)) as i16;
199 result[i + 4] = high[i].clamp(i32::from(i16::MIN), i32::from(i16::MAX)) as i16;
200 }
201 result
202 }
203
204 #[inline]
205 fn madd_i16x8(&self, a: I16x8, b: I16x8, c: I16x8) -> I16x8 {
206 let mut result = I16x8::zero();
207 for i in 0..8 {
208 result[i] = a[i].wrapping_mul(b[i]).wrapping_add(c[i]);
209 }
210 result
211 }
212
213 #[inline]
214 fn pmaddwd(&self, a: I16x8, b: I16x8) -> I32x4 {
215 let mut result = I32x4::zero();
216 for i in 0..4 {
217 result[i] = i32::from(a[i * 2]) * i32::from(b[i * 2])
218 + i32::from(a[i * 2 + 1]) * i32::from(b[i * 2 + 1]);
219 }
220 result
221 }
222
223 #[inline]
224 fn shr_i16x8(&self, v: I16x8, shift: u32) -> I16x8 {
225 let mut result = I16x8::zero();
226 for i in 0..8 {
227 result[i] = v[i] >> shift;
228 }
229 result
230 }
231
232 #[inline]
233 fn shl_i16x8(&self, v: I16x8, shift: u32) -> I16x8 {
234 let mut result = I16x8::zero();
235 for i in 0..8 {
236 result[i] = v[i] << shift;
237 }
238 result
239 }
240
241 #[inline]
242 fn shr_i32x4(&self, v: I32x4, shift: u32) -> I32x4 {
243 let mut result = I32x4::zero();
244 for i in 0..4 {
245 result[i] = v[i] >> shift;
246 }
247 result
248 }
249
250 #[inline]
251 fn shl_i32x4(&self, v: I32x4, shift: u32) -> I32x4 {
252 let mut result = I32x4::zero();
253 for i in 0..4 {
254 result[i] = v[i] << shift;
255 }
256 result
257 }
258
259 #[inline]
260 fn avg_u8x16(&self, a: U8x16, b: U8x16) -> U8x16 {
261 let mut result = U8x16::zero();
262 for i in 0..16 {
263 result[i] = ((u16::from(a[i]) + u16::from(b[i]) + 1) / 2) as u8;
264 }
265 result
266 }
267}
268
269impl SimdOpsExt for ScalarFallback {
270 #[inline]
271 fn load4_u8_to_i16x8(&self, src: &[u8]) -> I16x8 {
272 assert!(src.len() >= 4);
273 let mut result = I16x8::zero();
274 for i in 0..4 {
275 result[i] = i16::from(src[i]);
276 }
277 result
278 }
279
280 #[inline]
281 fn load8_u8_to_i16x8(&self, src: &[u8]) -> I16x8 {
282 assert!(src.len() >= 8);
283 let mut result = I16x8::zero();
284 for i in 0..8 {
285 result[i] = i16::from(src[i]);
286 }
287 result
288 }
289
290 #[inline]
291 fn store4_i16x8_as_u8(&self, v: I16x8, dst: &mut [u8]) {
292 assert!(dst.len() >= 4);
293 for i in 0..4 {
294 dst[i] = v[i].clamp(0, 255) as u8;
295 }
296 }
297
298 #[inline]
299 fn store8_i16x8_as_u8(&self, v: I16x8, dst: &mut [u8]) {
300 assert!(dst.len() >= 8);
301 for i in 0..8 {
302 dst[i] = v[i].clamp(0, 255) as u8;
303 }
304 }
305
306 #[inline]
307 fn transpose_4x4_i16(&self, rows: &[I16x8; 4]) -> [I16x8; 4] {
308 let mut out = [I16x8::zero(); 4];
309 for i in 0..4 {
310 for j in 0..4 {
311 out[i][j] = rows[j][i];
312 }
313 }
314 out
315 }
316
317 #[inline]
318 fn transpose_8x8_i16(&self, rows: &[I16x8; 8]) -> [I16x8; 8] {
319 let mut out = [I16x8::zero(); 8];
320 for i in 0..8 {
321 for j in 0..8 {
322 out[i][j] = rows[j][i];
323 }
324 }
325 out
326 }
327
328 #[inline]
329 fn butterfly_i16x8(&self, a: I16x8, b: I16x8) -> (I16x8, I16x8) {
330 let sum = self.add_i16x8(a, b);
331 let diff = self.sub_i16x8(a, b);
332 (sum, diff)
333 }
334
335 #[inline]
336 fn butterfly_i32x4(&self, a: I32x4, b: I32x4) -> (I32x4, I32x4) {
337 let sum = self.add_i32x4(a, b);
338 let diff = self.sub_i32x4(a, b);
339 (sum, diff)
340 }
341}