oximedia_codec/simd/
filter.rs

1//! Filter operations for video codec implementations.
2//!
3//! This module provides filtering primitives used in:
4//! - Scaling (horizontal and vertical resampling)
5//! - Loop filtering (deblocking)
6//! - In-loop restoration filters
7//!
8//! All operations are designed to map efficiently to SIMD instructions.
9
10#![forbid(unsafe_code)]
11// Allow truncation and sign loss casts for filter operations (values are clamped)
12#![allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
13// Allow loop indexing for filter operations
14#![allow(clippy::needless_range_loop)]
15
16use super::scalar::ScalarFallback;
17use super::traits::{SimdOps, SimdOpsExt};
18use super::types::I16x8;
19
20/// Filter operations using SIMD.
21pub struct FilterOps<S: SimdOps> {
22    simd: S,
23}
24
25impl<S: SimdOps + Default> Default for FilterOps<S> {
26    fn default() -> Self {
27        Self::new(S::default())
28    }
29}
30
31impl<S: SimdOps> FilterOps<S> {
32    /// Create a new filter operations instance.
33    #[inline]
34    #[must_use]
35    pub const fn new(simd: S) -> Self {
36        Self { simd }
37    }
38
39    /// Get the underlying SIMD implementation.
40    #[inline]
41    #[must_use]
42    pub const fn simd(&self) -> &S {
43        &self.simd
44    }
45
46    /// Apply a horizontal 2-tap filter (bilinear).
47    ///
48    /// Used for simple 2x scaling or half-pixel interpolation.
49    #[allow(dead_code)]
50    pub fn filter_h_2tap(&self, src: &[u8], dst: &mut [u8], width: usize) {
51        if src.len() < width + 1 || dst.len() < width {
52            return;
53        }
54
55        for x in 0..width {
56            // Simple average of two adjacent pixels
57            let a = u16::from(src[x]);
58            let b = u16::from(src[x + 1]);
59            dst[x] = ((a + b + 1) >> 1) as u8;
60        }
61    }
62
63    /// Apply a horizontal 4-tap filter.
64    ///
65    /// Common filter coefficients for sub-pixel interpolation.
66    #[allow(dead_code)]
67    pub fn filter_h_4tap(&self, src: &[u8], dst: &mut [u8], coeffs: &[i16; 4], width: usize) {
68        if src.len() < width + 3 || dst.len() < width {
69            return;
70        }
71
72        for x in 0..width {
73            let mut sum = 0i32;
74            for k in 0..4 {
75                sum += i32::from(src[x + k]) * i32::from(coeffs[k]);
76            }
77            // Round and clip
78            let result = (sum + 64) >> 7;
79            dst[x] = result.clamp(0, 255) as u8;
80        }
81    }
82
83    /// Apply a horizontal 6-tap filter.
84    #[allow(dead_code)]
85    pub fn filter_h_6tap(&self, src: &[u8], dst: &mut [u8], coeffs: &[i16; 6], width: usize) {
86        if src.len() < width + 5 || dst.len() < width {
87            return;
88        }
89
90        for x in 0..width {
91            let mut sum = 0i32;
92            for k in 0..6 {
93                sum += i32::from(src[x + k]) * i32::from(coeffs[k]);
94            }
95            let result = (sum + 64) >> 7;
96            dst[x] = result.clamp(0, 255) as u8;
97        }
98    }
99
100    /// Apply a horizontal 8-tap filter.
101    ///
102    /// Used in AV1 for high-quality scaling.
103    #[allow(dead_code)]
104    pub fn filter_h_8tap(&self, src: &[u8], dst: &mut [u8], coeffs: &[i16; 8], width: usize) {
105        if src.len() < width + 7 || dst.len() < width {
106            return;
107        }
108
109        for x in 0..width {
110            let mut sum = 0i32;
111            for k in 0..8 {
112                sum += i32::from(src[x + k]) * i32::from(coeffs[k]);
113            }
114            let result = (sum + 64) >> 7;
115            dst[x] = result.clamp(0, 255) as u8;
116        }
117    }
118
119    /// Apply a vertical filter to a column of pixels.
120    ///
121    /// Takes pointers to multiple rows and produces one output pixel.
122    #[allow(dead_code)]
123    pub fn filter_v_8tap(&self, rows: &[&[u8]; 8], col: usize, coeffs: &[i16; 8]) -> u8 {
124        let mut sum = 0i32;
125        for k in 0..8 {
126            if col < rows[k].len() {
127                sum += i32::from(rows[k][col]) * i32::from(coeffs[k]);
128            }
129        }
130        let result = (sum + 64) >> 7;
131        result.clamp(0, 255) as u8
132    }
133
134    /// Apply vertical filter to a row of pixels.
135    #[allow(dead_code)]
136    pub fn filter_v_row_8tap(
137        &self,
138        rows: &[&[u8]; 8],
139        dst: &mut [u8],
140        coeffs: &[i16; 8],
141        width: usize,
142    ) {
143        let width = width.min(dst.len());
144        for x in 0..width {
145            dst[x] = self.filter_v_8tap(rows, x, coeffs);
146        }
147    }
148}
149
150impl<S: SimdOps + SimdOpsExt> FilterOps<S> {
151    /// SIMD-accelerated horizontal 8-tap filter.
152    #[allow(dead_code)]
153    pub fn filter_h_8tap_simd(&self, src: &[u8], dst: &mut [u8], coeffs: &[i16; 8], width: usize) {
154        if src.len() < width + 7 || dst.len() < width {
155            return;
156        }
157
158        let coeff_vec = I16x8::from_array(*coeffs);
159        let mut x = 0;
160
161        // Process 8 pixels at a time
162        while x + 8 <= width {
163            let mut results = [0i16; 8];
164
165            for i in 0..8 {
166                let src_slice = &src[x + i..];
167                let samples = self.simd.load8_u8_to_i16x8(src_slice);
168                let prod = self.simd.pmaddwd(samples, coeff_vec);
169                let sum = self.simd.horizontal_sum_i32x4(prod);
170                results[i] = ((sum + 64) >> 7).clamp(0, 255) as i16;
171            }
172
173            let result_vec = I16x8::from_array(results);
174            self.simd.store8_i16x8_as_u8(result_vec, &mut dst[x..]);
175            x += 8;
176        }
177
178        // Handle remaining pixels
179        while x < width {
180            let mut sum = 0i32;
181            for k in 0..8 {
182                sum += i32::from(src[x + k]) * i32::from(coeffs[k]);
183            }
184            dst[x] = ((sum + 64) >> 7).clamp(0, 255) as u8;
185            x += 1;
186        }
187    }
188}
189
190// ============================================================================
191// Loop Filter Operations
192// ============================================================================
193
194/// Deblocking filter strength parameters.
195#[derive(Clone, Copy, Debug)]
196#[allow(dead_code)]
197pub struct LoopFilterParams {
198    /// Filter level (0-63).
199    pub level: u8,
200    /// Sharp threshold.
201    pub sharpness: u8,
202    /// Block edge strength (0-3).
203    pub edge_strength: u8,
204}
205
206impl Default for LoopFilterParams {
207    fn default() -> Self {
208        Self {
209            level: 32,
210            sharpness: 0,
211            edge_strength: 0,
212        }
213    }
214}
215
216/// Calculate loop filter thresholds from parameters.
217#[allow(dead_code)]
218#[must_use]
219pub fn calculate_thresholds(params: &LoopFilterParams) -> (u8, u8, u8) {
220    let level = params.level;
221    let sharpness = params.sharpness;
222
223    // E (edge) threshold
224    let e = if level == 0 {
225        0
226    } else {
227        (u16::from(level) * 2 + 1).min(255) as u8
228    };
229
230    // I (interior) threshold
231    let i = if sharpness == 0 {
232        level
233    } else if sharpness <= 4 {
234        level.saturating_sub(sharpness * 2)
235    } else {
236        level.saturating_sub(8)
237    };
238
239    // Hev (high edge variance) threshold
240    let hev = if level <= 15 {
241        0
242    } else if level <= 40 {
243        1
244    } else {
245        2
246    };
247
248    (e, i, hev)
249}
250
251/// Simple 4-tap deblocking filter.
252///
253/// Applies filtering to reduce blocking artifacts at block boundaries.
254#[allow(dead_code)]
255pub fn loop_filter_4(
256    p1: &mut u8,
257    p0: &mut u8,
258    q0: &mut u8,
259    q1: &mut u8,
260    e_threshold: u8,
261    i_threshold: u8,
262) {
263    // Check if filtering should be applied
264    let p1_val = i16::from(*p1);
265    let p0_val = i16::from(*p0);
266    let q0_val = i16::from(*q0);
267    let q1_val = i16::from(*q1);
268
269    let edge = (p0_val - q0_val).abs();
270    if edge > i16::from(e_threshold) {
271        return;
272    }
273
274    let interior = (p1_val - p0_val).abs().max((q1_val - q0_val).abs());
275    if interior > i16::from(i_threshold) {
276        return;
277    }
278
279    // Apply simple filter
280    let delta = ((q0_val - p0_val) * 4 + (p1_val - q1_val) + 4) >> 3;
281    let delta = delta.clamp(-128, 127);
282
283    *p0 = (p0_val + delta).clamp(0, 255) as u8;
284    *q0 = (q0_val - delta).clamp(0, 255) as u8;
285}
286
287/// Strong 8-tap deblocking filter.
288///
289/// Applied at strong edges with flat regions.
290#[allow(dead_code, clippy::too_many_arguments)]
291pub fn loop_filter_8(
292    p3: &mut u8,
293    p2: &mut u8,
294    p1: &mut u8,
295    p0: &mut u8,
296    q0: &mut u8,
297    q1: &mut u8,
298    q2: &mut u8,
299    q3: &mut u8,
300    threshold: u8,
301) {
302    let p = [*p3, *p2, *p1, *p0];
303    let q = [*q0, *q1, *q2, *q3];
304
305    // Check flatness
306    let is_flat = (0..4).all(|i| {
307        let diff_p = (i16::from(p[i]) - i16::from(p[3])).abs();
308        let diff_q = (i16::from(q[i]) - i16::from(q[0])).abs();
309        diff_p <= i16::from(threshold) && diff_q <= i16::from(threshold)
310    });
311
312    if !is_flat {
313        // Fall back to simple filter
314        loop_filter_4(p1, p0, q0, q1, threshold, threshold);
315        return;
316    }
317
318    // Strong filtering: average all 8 pixels
319    let sum: i32 = p.iter().chain(q.iter()).map(|&v| i32::from(v)).sum();
320    let avg = ((sum + 4) >> 3).clamp(0, 255) as u8;
321
322    // Blend toward average
323    *p0 = blend_to_avg(*p0, avg);
324    *q0 = blend_to_avg(*q0, avg);
325    *p1 = blend_to_avg(*p1, avg);
326    *q1 = blend_to_avg(*q1, avg);
327    *p2 = blend_to_avg(*p2, avg);
328    *q2 = blend_to_avg(*q2, avg);
329    *p3 = blend_to_avg(*p3, avg);
330    *q3 = blend_to_avg(*q3, avg);
331}
332
333/// Blend a value toward an average.
334#[inline]
335#[allow(clippy::cast_possible_truncation)]
336fn blend_to_avg(val: u8, avg: u8) -> u8 {
337    // 50% blend - result is always in range [0, 255] since both inputs are u8
338    ((u16::from(val) + u16::from(avg) + 1) >> 1) as u8
339}
340
341// ============================================================================
342// Standard Filter Coefficients
343// ============================================================================
344
345/// Bilinear interpolation coefficients (2-tap).
346#[allow(dead_code)]
347pub const BILINEAR_COEFFS: [[i16; 2]; 8] = [
348    [128, 0],  // 0/8 = 0
349    [112, 16], // 1/8
350    [96, 32],  // 2/8
351    [80, 48],  // 3/8
352    [64, 64],  // 4/8 = 0.5
353    [48, 80],  // 5/8
354    [32, 96],  // 6/8
355    [16, 112], // 7/8
356];
357
358/// 6-tap sub-pixel interpolation coefficients.
359#[allow(dead_code)]
360pub const SUBPEL_6TAP_COEFFS: [[i16; 6]; 8] = [
361    [0, 0, 128, 0, 0, 0],     // 0/8
362    [1, -5, 126, 8, -2, 0],   // 1/8
363    [1, -11, 114, 28, -7, 3], // 2/8
364    [2, -14, 98, 48, -12, 6], // 3/8
365    [2, -16, 78, 78, -16, 2], // 4/8 (symmetric)
366    [6, -12, 48, 98, -14, 2], // 5/8
367    [3, -7, 28, 114, -11, 1], // 6/8
368    [0, -2, 8, 126, -5, 1],   // 7/8
369];
370
371/// 8-tap high-quality interpolation coefficients (AV1 regular filter).
372#[allow(dead_code)]
373pub const SUBPEL_8TAP_REGULAR: [[i16; 8]; 16] = [
374    [0, 0, 0, 128, 0, 0, 0, 0],
375    [0, 2, -6, 126, 8, -2, 0, 0],
376    [0, 2, -10, 122, 18, -4, 0, 0],
377    [0, 2, -12, 116, 28, -8, 2, 0],
378    [0, 2, -14, 110, 38, -10, 2, 0],
379    [0, 2, -14, 102, 48, -12, 2, 0],
380    [0, 2, -16, 94, 58, -12, 2, 0],
381    [0, 2, -14, 84, 66, -12, 2, 0],
382    [0, 2, -14, 76, 76, -14, 2, 0], // symmetric
383    [0, 2, -12, 66, 84, -14, 2, 0],
384    [0, 2, -12, 58, 94, -16, 2, 0],
385    [0, 2, -12, 48, 102, -14, 2, 0],
386    [0, 2, -10, 38, 110, -14, 2, 0],
387    [0, 2, -8, 28, 116, -12, 2, 0],
388    [0, 0, -4, 18, 122, -10, 2, 0],
389    [0, 0, -2, 8, 126, -6, 2, 0],
390];
391
392/// Create a filter operations instance with scalar fallback.
393#[inline]
394#[must_use]
395pub fn filter_ops() -> FilterOps<ScalarFallback> {
396    FilterOps::new(ScalarFallback::new())
397}
398
399#[cfg(test)]
400mod tests {
401    use super::*;
402
403    #[test]
404    fn test_filter_h_2tap() {
405        let ops = filter_ops();
406
407        let src = [100u8, 200, 100, 200, 100, 200, 100, 200];
408        let mut dst = [0u8; 7];
409
410        ops.filter_h_2tap(&src, &mut dst, 7);
411
412        // Each output should be average of adjacent pixels
413        for (i, &v) in dst.iter().enumerate() {
414            let expected = ((u16::from(src[i]) + u16::from(src[i + 1]) + 1) >> 1) as u8;
415            assert_eq!(v, expected);
416        }
417    }
418
419    #[test]
420    fn test_filter_h_4tap() {
421        let ops = filter_ops();
422
423        // Simple averaging filter
424        let coeffs = [32i16, 32, 32, 32];
425        let src = [100u8; 16];
426        let mut dst = [0u8; 12];
427
428        ops.filter_h_4tap(&src, &mut dst, &coeffs, 12);
429
430        // Constant input should produce constant output
431        for &v in &dst {
432            assert!(v >= 99 && v <= 101);
433        }
434    }
435
436    #[test]
437    fn test_filter_h_8tap() {
438        let ops = filter_ops();
439
440        // Use identity-like filter (all weight on center)
441        let coeffs = [0i16, 0, 0, 128, 0, 0, 0, 0];
442        let src = [50u8, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160];
443        let mut dst = [0u8; 4];
444
445        ops.filter_h_8tap(&src, &mut dst, &coeffs, 4);
446
447        // Output should match input offset by 3 (filter center)
448        assert_eq!(dst[0], 80);
449        assert_eq!(dst[1], 90);
450        assert_eq!(dst[2], 100);
451        assert_eq!(dst[3], 110);
452    }
453
454    #[test]
455    fn test_loop_filter_4() {
456        let mut p1 = 100u8;
457        let mut p0 = 110u8;
458        let mut q0 = 150u8;
459        let mut q1 = 160u8;
460
461        loop_filter_4(&mut p1, &mut p0, &mut q0, &mut q1, 50, 30);
462
463        // Filter should reduce the p0-q0 difference
464        let diff_after = (i16::from(p0) - i16::from(q0)).abs();
465        assert!(diff_after < 40);
466    }
467
468    #[test]
469    fn test_loop_filter_4_no_filter() {
470        let mut p1 = 100u8;
471        let mut p0 = 110u8;
472        let mut q0 = 150u8;
473        let mut q1 = 160u8;
474
475        // Very low threshold should prevent filtering
476        loop_filter_4(&mut p1, &mut p0, &mut q0, &mut q1, 5, 5);
477
478        // Values should be unchanged
479        assert_eq!(p0, 110);
480        assert_eq!(q0, 150);
481    }
482
483    #[test]
484    fn test_calculate_thresholds() {
485        let params = LoopFilterParams {
486            level: 32,
487            sharpness: 0,
488            edge_strength: 0,
489        };
490
491        let (e, i, hev) = calculate_thresholds(&params);
492
493        assert!(e > 0);
494        assert_eq!(i, 32); // Same as level when sharpness is 0
495        assert_eq!(hev, 1); // Level 32 is in middle range
496    }
497
498    #[test]
499    fn test_calculate_thresholds_zero_level() {
500        let params = LoopFilterParams {
501            level: 0,
502            sharpness: 0,
503            edge_strength: 0,
504        };
505
506        let (e, i, hev) = calculate_thresholds(&params);
507
508        assert_eq!(e, 0);
509        assert_eq!(i, 0);
510        assert_eq!(hev, 0);
511    }
512
513    #[test]
514    fn test_bilinear_coeffs_sum() {
515        // Each pair should sum to 128
516        for coeffs in BILINEAR_COEFFS {
517            assert_eq!(coeffs[0] + coeffs[1], 128);
518        }
519    }
520
521    #[test]
522    fn test_subpel_coeffs_sum() {
523        // 6-tap coefficients should sum to 128
524        for coeffs in SUBPEL_6TAP_COEFFS {
525            let sum: i16 = coeffs.iter().sum();
526            assert_eq!(sum, 128, "Sum mismatch: {}", sum);
527        }
528
529        // 8-tap coefficients should sum to 128
530        for coeffs in SUBPEL_8TAP_REGULAR {
531            let sum: i16 = coeffs.iter().sum();
532            assert_eq!(sum, 128, "Sum mismatch: {}", sum);
533        }
534    }
535
536    #[test]
537    fn test_loop_filter_8_flat() {
538        // Create a flat region that should be smoothed
539        let mut p3 = 100u8;
540        let mut p2 = 101u8;
541        let mut p1 = 102u8;
542        let mut p0 = 103u8;
543        let mut q0 = 104u8;
544        let mut q1 = 105u8;
545        let mut q2 = 106u8;
546        let mut q3 = 107u8;
547
548        loop_filter_8(
549            &mut p3, &mut p2, &mut p1, &mut p0, &mut q0, &mut q1, &mut q2, &mut q3, 10,
550        );
551
552        // After filtering, values should be closer to average
553        let avg = (100 + 101 + 102 + 103 + 104 + 105 + 106 + 107) / 8;
554        assert!((i16::from(p0) - avg as i16).abs() < 5);
555    }
556
557    #[test]
558    fn test_filter_v_8tap() {
559        let ops = filter_ops();
560
561        // Create 8 rows of constant value
562        let row = [128u8; 16];
563        let rows: [&[u8]; 8] = [&row, &row, &row, &row, &row, &row, &row, &row];
564
565        // Identity filter centered on position 3
566        let coeffs = [0i16, 0, 0, 128, 0, 0, 0, 0];
567
568        let result = ops.filter_v_8tap(&rows, 0, &coeffs);
569        assert_eq!(result, 128);
570    }
571}
oximedia_codec/simd/filter.rs

oximedia_codec/simd/
filter.rs