1#![forbid(unsafe_code)]
11#![allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
13#![allow(clippy::needless_range_loop)]
15
16use super::scalar::ScalarFallback;
17use super::traits::{SimdOps, SimdOpsExt};
18use super::types::I16x8;
19
20pub struct FilterOps<S: SimdOps> {
22 simd: S,
23}
24
25impl<S: SimdOps + Default> Default for FilterOps<S> {
26 fn default() -> Self {
27 Self::new(S::default())
28 }
29}
30
31impl<S: SimdOps> FilterOps<S> {
32 #[inline]
34 #[must_use]
35 pub const fn new(simd: S) -> Self {
36 Self { simd }
37 }
38
39 #[inline]
41 #[must_use]
42 pub const fn simd(&self) -> &S {
43 &self.simd
44 }
45
46 #[allow(dead_code)]
50 pub fn filter_h_2tap(&self, src: &[u8], dst: &mut [u8], width: usize) {
51 if src.len() < width + 1 || dst.len() < width {
52 return;
53 }
54
55 for x in 0..width {
56 let a = u16::from(src[x]);
58 let b = u16::from(src[x + 1]);
59 dst[x] = ((a + b + 1) >> 1) as u8;
60 }
61 }
62
63 #[allow(dead_code)]
67 pub fn filter_h_4tap(&self, src: &[u8], dst: &mut [u8], coeffs: &[i16; 4], width: usize) {
68 if src.len() < width + 3 || dst.len() < width {
69 return;
70 }
71
72 for x in 0..width {
73 let mut sum = 0i32;
74 for k in 0..4 {
75 sum += i32::from(src[x + k]) * i32::from(coeffs[k]);
76 }
77 let result = (sum + 64) >> 7;
79 dst[x] = result.clamp(0, 255) as u8;
80 }
81 }
82
83 #[allow(dead_code)]
85 pub fn filter_h_6tap(&self, src: &[u8], dst: &mut [u8], coeffs: &[i16; 6], width: usize) {
86 if src.len() < width + 5 || dst.len() < width {
87 return;
88 }
89
90 for x in 0..width {
91 let mut sum = 0i32;
92 for k in 0..6 {
93 sum += i32::from(src[x + k]) * i32::from(coeffs[k]);
94 }
95 let result = (sum + 64) >> 7;
96 dst[x] = result.clamp(0, 255) as u8;
97 }
98 }
99
100 #[allow(dead_code)]
104 pub fn filter_h_8tap(&self, src: &[u8], dst: &mut [u8], coeffs: &[i16; 8], width: usize) {
105 if src.len() < width + 7 || dst.len() < width {
106 return;
107 }
108
109 for x in 0..width {
110 let mut sum = 0i32;
111 for k in 0..8 {
112 sum += i32::from(src[x + k]) * i32::from(coeffs[k]);
113 }
114 let result = (sum + 64) >> 7;
115 dst[x] = result.clamp(0, 255) as u8;
116 }
117 }
118
119 #[allow(dead_code)]
123 pub fn filter_v_8tap(&self, rows: &[&[u8]; 8], col: usize, coeffs: &[i16; 8]) -> u8 {
124 let mut sum = 0i32;
125 for k in 0..8 {
126 if col < rows[k].len() {
127 sum += i32::from(rows[k][col]) * i32::from(coeffs[k]);
128 }
129 }
130 let result = (sum + 64) >> 7;
131 result.clamp(0, 255) as u8
132 }
133
134 #[allow(dead_code)]
136 pub fn filter_v_row_8tap(
137 &self,
138 rows: &[&[u8]; 8],
139 dst: &mut [u8],
140 coeffs: &[i16; 8],
141 width: usize,
142 ) {
143 let width = width.min(dst.len());
144 for x in 0..width {
145 dst[x] = self.filter_v_8tap(rows, x, coeffs);
146 }
147 }
148}
149
150impl<S: SimdOps + SimdOpsExt> FilterOps<S> {
151 #[allow(dead_code)]
153 pub fn filter_h_8tap_simd(&self, src: &[u8], dst: &mut [u8], coeffs: &[i16; 8], width: usize) {
154 if src.len() < width + 7 || dst.len() < width {
155 return;
156 }
157
158 let coeff_vec = I16x8::from_array(*coeffs);
159 let mut x = 0;
160
161 while x + 8 <= width {
163 let mut results = [0i16; 8];
164
165 for i in 0..8 {
166 let src_slice = &src[x + i..];
167 let samples = self.simd.load8_u8_to_i16x8(src_slice);
168 let prod = self.simd.pmaddwd(samples, coeff_vec);
169 let sum = self.simd.horizontal_sum_i32x4(prod);
170 results[i] = ((sum + 64) >> 7).clamp(0, 255) as i16;
171 }
172
173 let result_vec = I16x8::from_array(results);
174 self.simd.store8_i16x8_as_u8(result_vec, &mut dst[x..]);
175 x += 8;
176 }
177
178 while x < width {
180 let mut sum = 0i32;
181 for k in 0..8 {
182 sum += i32::from(src[x + k]) * i32::from(coeffs[k]);
183 }
184 dst[x] = ((sum + 64) >> 7).clamp(0, 255) as u8;
185 x += 1;
186 }
187 }
188}
189
190#[derive(Clone, Copy, Debug)]
196#[allow(dead_code)]
197pub struct LoopFilterParams {
198 pub level: u8,
200 pub sharpness: u8,
202 pub edge_strength: u8,
204}
205
206impl Default for LoopFilterParams {
207 fn default() -> Self {
208 Self {
209 level: 32,
210 sharpness: 0,
211 edge_strength: 0,
212 }
213 }
214}
215
216#[allow(dead_code)]
218#[must_use]
219pub fn calculate_thresholds(params: &LoopFilterParams) -> (u8, u8, u8) {
220 let level = params.level;
221 let sharpness = params.sharpness;
222
223 let e = if level == 0 {
225 0
226 } else {
227 (u16::from(level) * 2 + 1).min(255) as u8
228 };
229
230 let i = if sharpness == 0 {
232 level
233 } else if sharpness <= 4 {
234 level.saturating_sub(sharpness * 2)
235 } else {
236 level.saturating_sub(8)
237 };
238
239 let hev = if level <= 15 {
241 0
242 } else if level <= 40 {
243 1
244 } else {
245 2
246 };
247
248 (e, i, hev)
249}
250
251#[allow(dead_code)]
255pub fn loop_filter_4(
256 p1: &mut u8,
257 p0: &mut u8,
258 q0: &mut u8,
259 q1: &mut u8,
260 e_threshold: u8,
261 i_threshold: u8,
262) {
263 let p1_val = i16::from(*p1);
265 let p0_val = i16::from(*p0);
266 let q0_val = i16::from(*q0);
267 let q1_val = i16::from(*q1);
268
269 let edge = (p0_val - q0_val).abs();
270 if edge > i16::from(e_threshold) {
271 return;
272 }
273
274 let interior = (p1_val - p0_val).abs().max((q1_val - q0_val).abs());
275 if interior > i16::from(i_threshold) {
276 return;
277 }
278
279 let delta = ((q0_val - p0_val) * 4 + (p1_val - q1_val) + 4) >> 3;
281 let delta = delta.clamp(-128, 127);
282
283 *p0 = (p0_val + delta).clamp(0, 255) as u8;
284 *q0 = (q0_val - delta).clamp(0, 255) as u8;
285}
286
287#[allow(dead_code, clippy::too_many_arguments)]
291pub fn loop_filter_8(
292 p3: &mut u8,
293 p2: &mut u8,
294 p1: &mut u8,
295 p0: &mut u8,
296 q0: &mut u8,
297 q1: &mut u8,
298 q2: &mut u8,
299 q3: &mut u8,
300 threshold: u8,
301) {
302 let p = [*p3, *p2, *p1, *p0];
303 let q = [*q0, *q1, *q2, *q3];
304
305 let is_flat = (0..4).all(|i| {
307 let diff_p = (i16::from(p[i]) - i16::from(p[3])).abs();
308 let diff_q = (i16::from(q[i]) - i16::from(q[0])).abs();
309 diff_p <= i16::from(threshold) && diff_q <= i16::from(threshold)
310 });
311
312 if !is_flat {
313 loop_filter_4(p1, p0, q0, q1, threshold, threshold);
315 return;
316 }
317
318 let sum: i32 = p.iter().chain(q.iter()).map(|&v| i32::from(v)).sum();
320 let avg = ((sum + 4) >> 3).clamp(0, 255) as u8;
321
322 *p0 = blend_to_avg(*p0, avg);
324 *q0 = blend_to_avg(*q0, avg);
325 *p1 = blend_to_avg(*p1, avg);
326 *q1 = blend_to_avg(*q1, avg);
327 *p2 = blend_to_avg(*p2, avg);
328 *q2 = blend_to_avg(*q2, avg);
329 *p3 = blend_to_avg(*p3, avg);
330 *q3 = blend_to_avg(*q3, avg);
331}
332
333#[inline]
335#[allow(clippy::cast_possible_truncation)]
336fn blend_to_avg(val: u8, avg: u8) -> u8 {
337 ((u16::from(val) + u16::from(avg) + 1) >> 1) as u8
339}
340
341#[allow(dead_code)]
347pub const BILINEAR_COEFFS: [[i16; 2]; 8] = [
348 [128, 0], [112, 16], [96, 32], [80, 48], [64, 64], [48, 80], [32, 96], [16, 112], ];
357
358#[allow(dead_code)]
360pub const SUBPEL_6TAP_COEFFS: [[i16; 6]; 8] = [
361 [0, 0, 128, 0, 0, 0], [1, -5, 126, 8, -2, 0], [1, -11, 114, 28, -7, 3], [2, -14, 98, 48, -12, 6], [2, -16, 78, 78, -16, 2], [6, -12, 48, 98, -14, 2], [3, -7, 28, 114, -11, 1], [0, -2, 8, 126, -5, 1], ];
370
371#[allow(dead_code)]
373pub const SUBPEL_8TAP_REGULAR: [[i16; 8]; 16] = [
374 [0, 0, 0, 128, 0, 0, 0, 0],
375 [0, 2, -6, 126, 8, -2, 0, 0],
376 [0, 2, -10, 122, 18, -4, 0, 0],
377 [0, 2, -12, 116, 28, -8, 2, 0],
378 [0, 2, -14, 110, 38, -10, 2, 0],
379 [0, 2, -14, 102, 48, -12, 2, 0],
380 [0, 2, -16, 94, 58, -12, 2, 0],
381 [0, 2, -14, 84, 66, -12, 2, 0],
382 [0, 2, -14, 76, 76, -14, 2, 0], [0, 2, -12, 66, 84, -14, 2, 0],
384 [0, 2, -12, 58, 94, -16, 2, 0],
385 [0, 2, -12, 48, 102, -14, 2, 0],
386 [0, 2, -10, 38, 110, -14, 2, 0],
387 [0, 2, -8, 28, 116, -12, 2, 0],
388 [0, 0, -4, 18, 122, -10, 2, 0],
389 [0, 0, -2, 8, 126, -6, 2, 0],
390];
391
392#[inline]
394#[must_use]
395pub fn filter_ops() -> FilterOps<ScalarFallback> {
396 FilterOps::new(ScalarFallback::new())
397}
398
399#[cfg(test)]
400mod tests {
401 use super::*;
402
403 #[test]
404 fn test_filter_h_2tap() {
405 let ops = filter_ops();
406
407 let src = [100u8, 200, 100, 200, 100, 200, 100, 200];
408 let mut dst = [0u8; 7];
409
410 ops.filter_h_2tap(&src, &mut dst, 7);
411
412 for (i, &v) in dst.iter().enumerate() {
414 let expected = ((u16::from(src[i]) + u16::from(src[i + 1]) + 1) >> 1) as u8;
415 assert_eq!(v, expected);
416 }
417 }
418
419 #[test]
420 fn test_filter_h_4tap() {
421 let ops = filter_ops();
422
423 let coeffs = [32i16, 32, 32, 32];
425 let src = [100u8; 16];
426 let mut dst = [0u8; 12];
427
428 ops.filter_h_4tap(&src, &mut dst, &coeffs, 12);
429
430 for &v in &dst {
432 assert!(v >= 99 && v <= 101);
433 }
434 }
435
436 #[test]
437 fn test_filter_h_8tap() {
438 let ops = filter_ops();
439
440 let coeffs = [0i16, 0, 0, 128, 0, 0, 0, 0];
442 let src = [50u8, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160];
443 let mut dst = [0u8; 4];
444
445 ops.filter_h_8tap(&src, &mut dst, &coeffs, 4);
446
447 assert_eq!(dst[0], 80);
449 assert_eq!(dst[1], 90);
450 assert_eq!(dst[2], 100);
451 assert_eq!(dst[3], 110);
452 }
453
454 #[test]
455 fn test_loop_filter_4() {
456 let mut p1 = 100u8;
457 let mut p0 = 110u8;
458 let mut q0 = 150u8;
459 let mut q1 = 160u8;
460
461 loop_filter_4(&mut p1, &mut p0, &mut q0, &mut q1, 50, 30);
462
463 let diff_after = (i16::from(p0) - i16::from(q0)).abs();
465 assert!(diff_after < 40);
466 }
467
468 #[test]
469 fn test_loop_filter_4_no_filter() {
470 let mut p1 = 100u8;
471 let mut p0 = 110u8;
472 let mut q0 = 150u8;
473 let mut q1 = 160u8;
474
475 loop_filter_4(&mut p1, &mut p0, &mut q0, &mut q1, 5, 5);
477
478 assert_eq!(p0, 110);
480 assert_eq!(q0, 150);
481 }
482
483 #[test]
484 fn test_calculate_thresholds() {
485 let params = LoopFilterParams {
486 level: 32,
487 sharpness: 0,
488 edge_strength: 0,
489 };
490
491 let (e, i, hev) = calculate_thresholds(¶ms);
492
493 assert!(e > 0);
494 assert_eq!(i, 32); assert_eq!(hev, 1); }
497
498 #[test]
499 fn test_calculate_thresholds_zero_level() {
500 let params = LoopFilterParams {
501 level: 0,
502 sharpness: 0,
503 edge_strength: 0,
504 };
505
506 let (e, i, hev) = calculate_thresholds(¶ms);
507
508 assert_eq!(e, 0);
509 assert_eq!(i, 0);
510 assert_eq!(hev, 0);
511 }
512
513 #[test]
514 fn test_bilinear_coeffs_sum() {
515 for coeffs in BILINEAR_COEFFS {
517 assert_eq!(coeffs[0] + coeffs[1], 128);
518 }
519 }
520
521 #[test]
522 fn test_subpel_coeffs_sum() {
523 for coeffs in SUBPEL_6TAP_COEFFS {
525 let sum: i16 = coeffs.iter().sum();
526 assert_eq!(sum, 128, "Sum mismatch: {}", sum);
527 }
528
529 for coeffs in SUBPEL_8TAP_REGULAR {
531 let sum: i16 = coeffs.iter().sum();
532 assert_eq!(sum, 128, "Sum mismatch: {}", sum);
533 }
534 }
535
536 #[test]
537 fn test_loop_filter_8_flat() {
538 let mut p3 = 100u8;
540 let mut p2 = 101u8;
541 let mut p1 = 102u8;
542 let mut p0 = 103u8;
543 let mut q0 = 104u8;
544 let mut q1 = 105u8;
545 let mut q2 = 106u8;
546 let mut q3 = 107u8;
547
548 loop_filter_8(
549 &mut p3, &mut p2, &mut p1, &mut p0, &mut q0, &mut q1, &mut q2, &mut q3, 10,
550 );
551
552 let avg = (100 + 101 + 102 + 103 + 104 + 105 + 106 + 107) / 8;
554 assert!((i16::from(p0) - avg as i16).abs() < 5);
555 }
556
557 #[test]
558 fn test_filter_v_8tap() {
559 let ops = filter_ops();
560
561 let row = [128u8; 16];
563 let rows: [&[u8]; 8] = [&row, &row, &row, &row, &row, &row, &row, &row];
564
565 let coeffs = [0i16, 0, 0, 128, 0, 0, 0, 0];
567
568 let result = ops.filter_v_8tap(&rows, 0, &coeffs);
569 assert_eq!(result, 128);
570 }
571}