use crate::simd::traits::SimdOps;
use crate::simd::types::{I16x8, U8x16};
pub trait CdefPixel: Copy + Default + PartialOrd + Sized {
fn as_i32(self) -> i32;
fn from_clamped(v: i32, max_val: i32) -> Self;
fn max_value(bit_depth: u8) -> i32;
}
impl CdefPixel for u8 {
#[inline]
fn as_i32(self) -> i32 {
i32::from(self)
}
#[inline]
fn from_clamped(v: i32, max_val: i32) -> Self {
v.clamp(0, max_val) as u8
}
#[inline]
fn max_value(_bit_depth: u8) -> i32 {
255
}
}
impl CdefPixel for u16 {
#[inline]
fn as_i32(self) -> i32 {
i32::from(self)
}
#[inline]
fn from_clamped(v: i32, max_val: i32) -> Self {
v.clamp(0, max_val) as u16
}
#[inline]
fn max_value(bit_depth: u8) -> i32 {
i32::from((1u16 << bit_depth.min(16)) - 1)
}
}
#[inline]
fn cdef_direction_offset(direction: u8) -> (i32, i32) {
match direction % 8 {
0 => (1, 0),
1 => (1, 1),
2 => (0, 1),
3 => (-1, 1),
4 => (-1, 0),
5 => (-1, -1),
6 => (0, -1),
7 => (1, -1),
_ => (1, 0),
}
}
#[allow(clippy::too_many_arguments)]
#[inline]
fn cdef_tap_weight_u16(
src: &[u16],
stride: usize,
x: usize,
y: usize,
ox: i32,
oy: i32,
pixel: u16,
strength: u16,
damping: u8,
) -> (i32, i32) {
let tx = x as i32 + ox;
let ty = y as i32 + oy;
if tx < 0 || ty < 0 {
return (0, 0);
}
let offset = ty as usize * stride + tx as usize;
if offset >= src.len() {
return (0, 0);
}
let tap = src[offset];
let diff = i32::from(tap) - i32::from(pixel);
let abs_diff = diff.unsigned_abs() as i32;
let threshold = 1i32 << damping;
if abs_diff >= threshold {
return (0, 0);
}
let weight = i32::from(strength) * (threshold - abs_diff) / threshold;
(diff * weight, weight)
}
#[allow(clippy::too_many_arguments)]
#[inline]
fn cdef_filter_pixel_u16(
src: &[u16],
stride: usize,
x: usize,
y: usize,
pixel: u16,
pri_strength: u16,
sec_strength: u16,
direction: u8,
damping: u8,
bit_depth: u8,
) -> u16 {
if pri_strength == 0 && sec_strength == 0 {
return pixel;
}
let (dx, dy) = cdef_direction_offset(direction);
let pri_taps = [(dx, dy), (-dx, -dy), (dx * 2, dy * 2), (-dx * 2, -dy * 2)];
let (sdx, sdy) = (-dy, dx);
let sec_taps = [
(sdx, sdy),
(-sdx, -sdy),
(sdx * 2, sdy * 2),
(-sdx * 2, -sdy * 2),
];
let mut correction = 0i32;
for &(ox, oy) in &pri_taps {
let (wv, _w) = cdef_tap_weight_u16(src, stride, x, y, ox, oy, pixel, pri_strength, damping);
correction += wv;
}
for &(ox, oy) in &sec_taps {
let (wv, _w) = cdef_tap_weight_u16(src, stride, x, y, ox, oy, pixel, sec_strength, damping);
correction += wv;
}
let adjustment = (correction + 8) >> 4;
let result = i32::from(pixel) + adjustment;
let max_val = i32::from((1u16 << bit_depth.min(16)) - 1);
result.clamp(0, max_val) as u16
}
#[allow(clippy::too_many_arguments)]
pub fn cdef_filter_u16(
frame: &mut [u16],
width: usize,
height: usize,
stride: usize,
pri_strength: u16,
sec_strength: u16,
direction: u8,
damping: u8,
bit_depth: u8,
) {
let src: Vec<u16> = frame.to_vec();
for y in 0..height {
for x in 0..width {
let idx = y * stride + x;
if idx >= frame.len() {
continue;
}
frame[idx] = cdef_filter_pixel_u16(
&src,
stride,
x,
y,
src[idx],
pri_strength,
sec_strength,
direction,
damping,
bit_depth,
);
}
}
}
#[allow(clippy::too_many_arguments)]
pub fn cdef_filter_block_u16(
frame: &mut [u16],
block_x: usize,
block_y: usize,
frame_width: usize,
frame_height: usize,
stride: usize,
pri_strength: u16,
sec_strength: u16,
direction: u8,
damping: u8,
bit_depth: u8,
) {
let src: Vec<u16> = frame.to_vec();
let end_x = (block_x + 8).min(frame_width);
let end_y = (block_y + 8).min(frame_height);
for y in block_y..end_y {
for x in block_x..end_x {
let idx = y * stride + x;
if idx >= frame.len() {
continue;
}
frame[idx] = cdef_filter_pixel_u16(
&src,
stride,
x,
y,
src[idx],
pri_strength,
sec_strength,
direction,
damping,
bit_depth,
);
}
}
}
pub fn cdef_find_direction_u16(frame: &[u16], stride: usize, block_size: usize) -> u8 {
let mut best_direction = 0u8;
let mut best_variance = u64::MAX;
for dir in 0..8u8 {
let (dx, dy) = cdef_direction_offset(dir);
let mut variance = 0u64;
let mut count = 0u64;
for y in 1..block_size.saturating_sub(1) {
for x in 1..block_size.saturating_sub(1) {
let offset = y * stride + x;
if offset >= frame.len() {
continue;
}
let pixel = frame[offset];
let tx = x as i32 + dx;
let ty = y as i32 + dy;
if tx >= 0 && ty >= 0 {
let tap_offset = ty as usize * stride + tx as usize;
if tap_offset < frame.len() {
let tap = frame[tap_offset];
let diff = u64::from(pixel.abs_diff(tap));
variance = variance.saturating_add(diff * diff);
count += 1;
}
}
}
}
let avg_variance = variance.checked_div(count).unwrap_or(u64::MAX);
if avg_variance < best_variance {
best_variance = avg_variance;
best_direction = dir;
}
}
best_direction
}
pub struct CdefSimd<S> {
simd: S,
}
impl<S: SimdOps> CdefSimd<S> {
#[inline]
pub const fn new(simd: S) -> Self {
Self { simd }
}
#[allow(clippy::too_many_arguments)]
pub fn filter_block_8x8(
&self,
src: &[u8],
dst: &mut [u8],
src_stride: usize,
dst_stride: usize,
pri_strength: u8,
sec_strength: u8,
direction: u8,
damping: u8,
) {
for y in 0..8 {
for x in 0..8 {
let src_offset = y * src_stride + x;
let dst_offset = y * dst_stride + x;
if src.len() <= src_offset || dst.len() <= dst_offset {
continue;
}
let pixel = src[src_offset];
let filtered = self.filter_pixel(
src,
src_stride,
x,
y,
pixel,
pri_strength,
sec_strength,
direction,
damping,
);
dst[dst_offset] = filtered;
}
}
}
#[allow(clippy::too_many_arguments)]
pub fn filter_block_4x4(
&self,
src: &[u8],
dst: &mut [u8],
src_stride: usize,
dst_stride: usize,
pri_strength: u8,
sec_strength: u8,
direction: u8,
damping: u8,
) {
for y in 0..4 {
for x in 0..4 {
let src_offset = y * src_stride + x;
let dst_offset = y * dst_stride + x;
if src.len() <= src_offset || dst.len() <= dst_offset {
continue;
}
let pixel = src[src_offset];
let filtered = self.filter_pixel(
src,
src_stride,
x,
y,
pixel,
pri_strength,
sec_strength,
direction,
damping,
);
dst[dst_offset] = filtered;
}
}
}
pub fn find_direction(&self, src: &[u8], stride: usize, block_size: usize) -> u8 {
let mut best_direction = 0u8;
let mut best_variance = u32::MAX;
for dir in 0..8 {
let variance = self.calculate_directional_variance(src, stride, block_size, dir);
if variance < best_variance {
best_variance = variance;
best_direction = dir;
}
}
best_direction
}
#[allow(clippy::too_many_arguments)]
fn filter_pixel(
&self,
src: &[u8],
stride: usize,
x: usize,
y: usize,
pixel: u8,
pri_strength: u8,
sec_strength: u8,
direction: u8,
damping: u8,
) -> u8 {
if pri_strength == 0 && sec_strength == 0 {
return pixel;
}
let (dx, dy) = self.get_direction_offset(direction);
let pri_taps = [
(dx, dy), (-dx, -dy), (dx * 2, dy * 2), (-dx * 2, -dy * 2), ];
let (sdx, sdy) = (-dy, dx);
let sec_taps = [
(sdx, sdy),
(-sdx, -sdy),
(sdx * 2, sdy * 2),
(-sdx * 2, -sdy * 2),
];
let mut sum = i32::from(pixel) << 7; let mut total_weight = 128i32;
for &(ox, oy) in &pri_taps {
let weight =
self.calculate_weight(src, stride, x, y, ox, oy, pixel, pri_strength, damping);
sum += weight.0;
total_weight += weight.1;
}
for &(ox, oy) in &sec_taps {
let weight =
self.calculate_weight(src, stride, x, y, ox, oy, pixel, sec_strength, damping);
sum += weight.0;
total_weight += weight.1;
}
let result = (sum + total_weight / 2) / total_weight;
result.clamp(0, 255) as u8
}
#[allow(clippy::too_many_arguments)]
fn calculate_weight(
&self,
src: &[u8],
stride: usize,
x: usize,
y: usize,
ox: i32,
oy: i32,
pixel: u8,
strength: u8,
damping: u8,
) -> (i32, i32) {
let tx = x as i32 + ox;
let ty = y as i32 + oy;
if tx < 0 || ty < 0 {
return (0, 0);
}
let offset = ty as usize * stride + tx as usize;
if offset >= src.len() {
return (0, 0);
}
let tap_pixel = src[offset];
let diff = i32::from(tap_pixel) - i32::from(pixel);
let abs_diff = diff.abs();
let threshold = 1 << damping;
if abs_diff >= threshold {
return (0, 0);
}
let weight = i32::from(strength) * (threshold - abs_diff) / threshold;
let weighted_value = diff * weight;
(weighted_value, weight)
}
fn get_direction_offset(&self, direction: u8) -> (i32, i32) {
match direction % 8 {
0 => (1, 0), 1 => (1, 1), 2 => (0, 1), 3 => (-1, 1), 4 => (-1, 0), 5 => (-1, -1), 6 => (0, -1), 7 => (1, -1), _ => (1, 0),
}
}
fn calculate_directional_variance(
&self,
src: &[u8],
stride: usize,
block_size: usize,
direction: u8,
) -> u32 {
let (dx, dy) = self.get_direction_offset(direction);
let mut variance = 0u32;
let mut count = 0u32;
for y in 1..block_size.saturating_sub(1) {
for x in 1..block_size.saturating_sub(1) {
let offset = y * stride + x;
if offset >= src.len() {
continue;
}
let pixel = src[offset];
let tx = x as i32 + dx;
let ty = y as i32 + dy;
if tx >= 0 && ty >= 0 {
let tap_offset = ty as usize * stride + tx as usize;
if tap_offset < src.len() {
let tap_pixel = src[tap_offset];
let diff = u32::from(pixel.abs_diff(tap_pixel));
variance += diff * diff;
count += 1;
}
}
}
}
variance.checked_div(count).unwrap_or(u32::MAX)
}
#[allow(dead_code)]
fn filter_row_simd(
&self,
src: &[u8],
dst: &mut [u8],
width: usize,
pri_strength: u8,
sec_strength: u8,
) {
let chunks = width / 8;
for i in 0..chunks {
let offset = i * 8;
if offset + 8 > src.len() || offset + 8 > dst.len() {
continue;
}
let mut pixels = U8x16::zero();
for j in 0..8 {
pixels[j] = src[offset + j];
}
let pixels_i16 = self.simd.widen_low_u8_to_i16(pixels);
let strength_vec = I16x8::from_array([i16::from(pri_strength + sec_strength); 8]);
let filtered = self.simd.add_i16x8(pixels_i16, strength_vec);
for j in 0..8 {
dst[offset + j] = filtered[j].clamp(0, 255) as u8;
}
}
for i in (chunks * 8)..width.min(src.len()).min(dst.len()) {
dst[i] = src[i];
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_cdef_pixel_u8_max_value() {
assert_eq!(<u8 as CdefPixel>::max_value(8), 255);
assert_eq!(<u8 as CdefPixel>::max_value(10), 255);
}
#[test]
fn test_cdef_pixel_u16_max_value() {
assert_eq!(<u16 as CdefPixel>::max_value(8), 255);
assert_eq!(<u16 as CdefPixel>::max_value(10), 1023);
assert_eq!(<u16 as CdefPixel>::max_value(12), 4095);
}
#[test]
fn test_cdef_pixel_u16_as_i32() {
assert_eq!(<u16 as CdefPixel>::as_i32(1023u16), 1023);
assert_eq!(<u16 as CdefPixel>::as_i32(4095u16), 4095);
}
#[test]
fn test_cdef_pixel_u16_from_clamped() {
assert_eq!(<u16 as CdefPixel>::from_clamped(512, 1023), 512u16);
assert_eq!(<u16 as CdefPixel>::from_clamped(2000, 1023), 1023u16);
assert_eq!(<u16 as CdefPixel>::from_clamped(-5, 1023), 0u16);
}
#[test]
fn test_cdef_filter_u16_zero_strength_is_noop() {
let width = 8usize;
let height = 8usize;
let stride = 8usize;
let mut frame: Vec<u16> = (0..64).map(|i| (i * 16) as u16).collect();
let original = frame.clone();
cdef_filter_u16(&mut frame, width, height, stride, 0, 0, 0, 4, 10);
assert_eq!(frame, original, "zero-strength CDEF must be a no-op");
}
#[test]
fn test_cdef_filter_u16_output_in_range_10bit() {
let width = 16usize;
let height = 16usize;
let stride = 16usize;
let mut frame: Vec<u16> = (0..256).map(|i| (i as u16 * 4).min(1023)).collect();
cdef_filter_u16(&mut frame, width, height, stride, 4, 2, 2, 3, 10);
for &px in &frame {
assert!(px <= 1023, "10-bit CDEF produced out-of-range value: {px}");
}
}
#[test]
fn test_cdef_filter_u16_output_in_range_12bit() {
let width = 16usize;
let height = 16usize;
let stride = 16usize;
let mut frame: Vec<u16> = (0..256).map(|i| (i as u16 * 16).min(4095)).collect();
cdef_filter_u16(&mut frame, width, height, stride, 8, 4, 5, 4, 12);
for &px in &frame {
assert!(px <= 4095, "12-bit CDEF produced out-of-range value: {px}");
}
}
#[test]
fn test_cdef_filter_u16_uniform_plane_unchanged() {
let width = 8usize;
let height = 8usize;
let stride = 8usize;
let mut frame = vec![512u16; 64];
cdef_filter_u16(&mut frame, width, height, stride, 8, 4, 0, 5, 10);
for &px in &frame {
assert_eq!(px, 512, "uniform plane should be unchanged by CDEF");
}
}
#[test]
fn test_cdef_filter_block_u16_in_range() {
let frame_width = 16usize;
let frame_height = 16usize;
let stride = frame_width;
let mut frame: Vec<u16> = (0..256).map(|i| (i as u16 * 4).min(1023)).collect();
cdef_filter_block_u16(
&mut frame,
0,
0,
frame_width,
frame_height,
stride,
4,
2,
0,
4,
10,
);
for &px in &frame {
assert!(
px <= 1023,
"block CDEF produced out-of-range 10-bit value: {px}"
);
}
}
#[test]
fn test_cdef_filter_block_u16_partial_frame() {
let frame_width = 10usize;
let frame_height = 10usize;
let stride = frame_width;
let mut frame = vec![500u16; frame_width * frame_height];
cdef_filter_block_u16(
&mut frame,
6,
6,
frame_width,
frame_height,
stride,
2,
1,
1,
3,
10,
);
for &px in &frame {
assert!(px <= 1023);
}
}
#[test]
fn test_cdef_find_direction_u16_returns_valid_direction() {
let stride = 8usize;
let frame: Vec<u16> = (0..64).map(|i| (i as u16) * 16).collect();
let dir = cdef_find_direction_u16(&frame, stride, 8);
assert!(dir < 8, "direction must be in range 0–7, got {dir}");
}
#[test]
fn test_cdef_find_direction_u16_uniform_block() {
let stride = 8usize;
let frame = vec![512u16; 64];
let dir = cdef_find_direction_u16(&frame, stride, 8);
assert!(dir < 8);
}
#[test]
fn test_cdef_direction_offset_all_directions() {
let offsets: Vec<(i32, i32)> = (0..8).map(cdef_direction_offset).collect();
assert_eq!(offsets.len(), 8);
for (dx, dy) in &offsets {
assert!(*dx != 0 || *dy != 0, "direction offset must be non-zero");
}
for d in 0..4usize {
let (dx, dy) = offsets[d];
let (odx, ody) = offsets[d + 4];
assert_eq!(
(-dx, -dy),
(odx, ody),
"directions {d} and {} must be opposite",
d + 4
);
}
}
}