1#![forbid(unsafe_code)]
11
12use super::scalar::ScalarFallback;
13use super::traits::{SimdOps, SimdOpsExt};
14use super::types::{I16x8, U8x16};
15
16pub struct BlendOps<S: SimdOps> {
18 simd: S,
19}
20
21impl<S: SimdOps + Default> Default for BlendOps<S> {
22 fn default() -> Self {
23 Self::new(S::default())
24 }
25}
26
27impl<S: SimdOps> BlendOps<S> {
28 #[inline]
30 #[must_use]
31 pub const fn new(simd: S) -> Self {
32 Self { simd }
33 }
34
35 #[inline]
37 #[must_use]
38 pub const fn simd(&self) -> &S {
39 &self.simd
40 }
41
42 #[inline]
51 #[allow(clippy::cast_sign_loss)]
52 pub fn lerp_u8(&self, a: u8, b: u8, weight: u8) -> u8 {
53 let a32 = i32::from(a);
54 let b32 = i32::from(b);
55 let w32 = i32::from(weight);
56 let result = a32 + ((b32 - a32) * w32 + 128) / 256;
57 result.clamp(0, 255) as u8
59 }
60
61 #[inline]
65 pub fn lerp_i16x8(&self, a: I16x8, b: I16x8, weight: i16) -> I16x8 {
66 let diff = self.simd.sub_i16x8(b, a);
67 let weight_vec = I16x8::splat(weight);
68 let scaled = self.simd.mul_i16x8(diff, weight_vec);
69 let shifted = self.simd.shr_i16x8(scaled, 8);
70 self.simd.add_i16x8(a, shifted)
71 }
72
73 #[inline]
77 #[allow(clippy::needless_range_loop, clippy::cast_possible_truncation)]
78 pub fn weighted_avg_u8x16(&self, a: U8x16, b: U8x16, weight: u8) -> U8x16 {
79 let mut result = [0u8; 16];
80 let w = u16::from(weight);
81 let inv_w = 256 - w;
82
83 for i in 0..16 {
84 let val = (u16::from(a.0[i]) * inv_w + u16::from(b.0[i]) * w + 128) / 256;
86 result[i] = val as u8;
87 }
88
89 U8x16(result)
90 }
91
92 #[inline]
106 #[allow(dead_code)]
107 pub fn bilinear_blend_u8(
108 &self,
109 tl: u8,
110 tr: u8,
111 bl: u8,
112 br: u8,
113 hweight: u8,
114 vweight: u8,
115 ) -> u8 {
116 let top = self.lerp_u8(tl, tr, hweight);
118 let bottom = self.lerp_u8(bl, br, hweight);
119
120 self.lerp_u8(top, bottom, vweight)
122 }
123
124 #[inline]
128 #[allow(dead_code, clippy::too_many_arguments)]
129 pub fn bilinear_blend_row_8(
130 &self,
131 tl: &[u8],
132 tr: &[u8],
133 bl: &[u8],
134 br: &[u8],
135 hweight: u8,
136 vweight: u8,
137 dst: &mut [u8],
138 ) {
139 let len = 8
140 .min(tl.len())
141 .min(tr.len())
142 .min(bl.len())
143 .min(br.len())
144 .min(dst.len());
145 for i in 0..len {
146 dst[i] = self.bilinear_blend_u8(tl[i], tr[i], bl[i], br[i], hweight, vweight);
147 }
148 }
149}
150
151impl<S: SimdOps + SimdOpsExt> BlendOps<S> {
152 #[allow(dead_code, clippy::similar_names, clippy::too_many_arguments)]
154 pub fn bilinear_blend_row_8_simd(
155 &self,
156 tl: &[u8],
157 tr: &[u8],
158 bl: &[u8],
159 br: &[u8],
160 hweight: u8,
161 vweight: u8,
162 dst: &mut [u8],
163 ) {
164 let tl_v = self.simd.load8_u8_to_i16x8(tl);
166 let tr_v = self.simd.load8_u8_to_i16x8(tr);
167 let bl_v = self.simd.load8_u8_to_i16x8(bl);
168 let br_v = self.simd.load8_u8_to_i16x8(br);
169
170 let top = self.lerp_i16x8(tl_v, tr_v, i16::from(hweight));
172 let bottom = self.lerp_i16x8(bl_v, br_v, i16::from(hweight));
173
174 let result = self.lerp_i16x8(top, bottom, i16::from(vweight));
176
177 self.simd.store8_i16x8_as_u8(result, dst);
179 }
180}
181
182#[inline]
184#[must_use]
185pub fn blend_ops() -> BlendOps<ScalarFallback> {
186 BlendOps::new(ScalarFallback::new())
187}
188
189#[allow(dead_code)]
193pub const HALF_PEL_FILTER: [i16; 6] = [1, -5, 20, 20, -5, 1];
194
195#[allow(dead_code)]
197pub const QUARTER_PEL_FILTER: [i16; 6] = [1, -5, 52, 20, -5, 1];
198
199#[allow(dead_code, clippy::cast_sign_loss)]
201pub fn apply_half_pel_h(src: &[u8], dst: &mut [u8], width: usize) {
202 if width < 6 || src.len() < width + 5 {
203 return;
204 }
205
206 for x in 0..width {
207 let mut sum: i32 = 0;
208 for (k, &tap) in HALF_PEL_FILTER.iter().enumerate() {
209 sum += i32::from(src[x + k]) * i32::from(tap);
210 }
211 let result = (sum + 16) >> 5;
213 dst[x] = result.clamp(0, 255) as u8;
214 }
215}
216
217#[allow(dead_code, clippy::cast_sign_loss)]
219pub fn apply_half_pel_v(src: &[&[u8]], dst: &mut [u8], width: usize) {
220 if src.len() < 6 {
221 return;
222 }
223
224 for x in 0..width.min(dst.len()) {
225 let mut sum: i32 = 0;
226 for (k, &tap) in HALF_PEL_FILTER.iter().enumerate() {
227 if x < src[k].len() {
228 sum += i32::from(src[k][x]) * i32::from(tap);
229 }
230 }
231 let result = (sum + 16) >> 5;
233 dst[x] = result.clamp(0, 255) as u8;
234 }
235}
236
237#[cfg(test)]
238mod tests {
239 use super::*;
240
241 #[test]
242 fn test_lerp_u8() {
243 let blend = blend_ops();
244
245 assert_eq!(blend.lerp_u8(100, 200, 0), 100);
247
248 let mid = blend.lerp_u8(0, 200, 128);
250 assert!(mid >= 99 && mid <= 101); let high = blend.lerp_u8(0, 200, 255);
254 assert!(high >= 198 && high <= 200);
255 }
256
257 #[test]
258 fn test_weighted_avg_u8x16() {
259 let blend = blend_ops();
260
261 let a = U8x16::splat(100);
262 let b = U8x16::splat(200);
263
264 let result = blend.weighted_avg_u8x16(a, b, 128);
266 for &v in &result.0 {
267 assert!(v >= 149 && v <= 151);
268 }
269
270 let result_a = blend.weighted_avg_u8x16(a, b, 0);
272 assert_eq!(result_a.0, [100; 16]);
273
274 let result_b = blend.weighted_avg_u8x16(a, b, 255);
276 for &v in &result_b.0 {
277 assert!(v >= 199 && v <= 200);
278 }
279 }
280
281 #[test]
282 fn test_bilinear_blend() {
283 let blend = blend_ops();
284
285 let result = blend.bilinear_blend_u8(100, 100, 100, 100, 128, 128);
287 assert_eq!(result, 100);
288
289 let tl_only = blend.bilinear_blend_u8(100, 0, 0, 0, 0, 0);
291 assert_eq!(tl_only, 100);
292
293 let tr_only = blend.bilinear_blend_u8(0, 100, 0, 0, 255, 0);
294 assert!(tr_only >= 99);
295
296 let bl_only = blend.bilinear_blend_u8(0, 0, 100, 0, 0, 255);
297 assert!(bl_only >= 99);
298 }
299
300 #[test]
301 fn test_lerp_i16x8() {
302 let blend = blend_ops();
303
304 let a = I16x8::from_array([0, 10, 20, 30, 40, 50, 60, 70]);
305 let b = I16x8::from_array([100, 110, 120, 130, 140, 150, 160, 170]);
306
307 let result = blend.lerp_i16x8(a, b, 128);
309 assert!(result.0[0] >= 49 && result.0[0] <= 51);
311 }
312
313 #[test]
314 fn test_bilinear_row() {
315 let blend = blend_ops();
316
317 let tl = [100u8; 8];
318 let tr = [100u8; 8];
319 let bl = [100u8; 8];
320 let br = [100u8; 8];
321 let mut dst = [0u8; 8];
322
323 blend.bilinear_blend_row_8(&tl, &tr, &bl, &br, 128, 128, &mut dst);
324
325 for &v in &dst {
326 assert_eq!(v, 100);
327 }
328 }
329
330 #[test]
331 fn test_half_pel_filter() {
332 let sum: i16 = HALF_PEL_FILTER.iter().sum();
334 assert_eq!(sum, 32);
335 }
336
337 #[test]
338 fn test_apply_half_pel_h() {
339 let src = [128u8; 16];
341 let mut dst = [0u8; 10];
342
343 apply_half_pel_h(&src, &mut dst, 10);
344
345 for &v in &dst {
347 assert!(v >= 127 && v <= 129);
348 }
349 }
350}