use crate::simd::traits::SimdOps;
use crate::simd::types::I16x8;
pub struct Vp9InterpolateSimd<S> {
simd: S,
}
impl<S: SimdOps> Vp9InterpolateSimd<S> {
#[inline]
pub const fn new(simd: S) -> Self {
Self { simd }
}
#[allow(clippy::too_many_arguments)]
pub fn filter_h_8tap(
&self,
src: &[u8],
src_stride: usize,
dst: &mut [u8],
dst_stride: usize,
filter_idx: usize,
width: usize,
height: usize,
) {
let filter = &VP9_FILTERS[filter_idx % VP9_FILTERS.len()];
for y in 0..height {
for x in 0..width {
let src_offset = y * src_stride + x.saturating_sub(3);
let dst_offset = y * dst_stride + x;
if dst_offset >= dst.len() || src_offset + 8 > src.len() {
continue;
}
let result = self.convolve_8tap_h(&src[src_offset..], filter);
dst[dst_offset] = result;
}
}
}
#[allow(clippy::too_many_arguments)]
pub fn filter_v_8tap(
&self,
src: &[u8],
src_stride: usize,
dst: &mut [u8],
dst_stride: usize,
filter_idx: usize,
width: usize,
height: usize,
) {
let filter = &VP9_FILTERS[filter_idx % VP9_FILTERS.len()];
for y in 0..height {
for x in 0..width {
let dst_offset = y * dst_stride + x;
if dst_offset >= dst.len() {
continue;
}
let result = self.convolve_8tap_v(src, src_stride, x, y.saturating_sub(3), filter);
dst[dst_offset] = result;
}
}
}
#[allow(clippy::too_many_arguments)]
pub fn filter_2d_8tap(
&self,
src: &[u8],
src_stride: usize,
dst: &mut [u8],
dst_stride: usize,
h_filter_idx: usize,
v_filter_idx: usize,
width: usize,
height: usize,
) {
let temp_height = height + 7;
let mut temp = vec![0u8; temp_height * width];
self.filter_h_8tap(
src,
src_stride,
&mut temp,
width,
h_filter_idx,
width,
temp_height,
);
self.filter_v_8tap(&temp, width, dst, dst_stride, v_filter_idx, width, height);
}
pub fn bilinear_h(
&self,
src: &[u8],
src_stride: usize,
dst: &mut [u8],
dst_stride: usize,
fraction: u8,
width: usize,
height: usize,
) {
let inv_fraction = 8 - fraction;
for y in 0..height {
for x in 0..width {
let src_offset = y * src_stride + x;
let dst_offset = y * dst_stride + x;
if src_offset + 1 >= src.len() || dst_offset >= dst.len() {
continue;
}
let p0 = u16::from(src[src_offset]);
let p1 = u16::from(src[src_offset + 1]);
let result = (p0 * u16::from(inv_fraction) + p1 * u16::from(fraction) + 4) >> 3;
dst[dst_offset] = result as u8;
}
}
}
pub fn bilinear_v(
&self,
src: &[u8],
src_stride: usize,
dst: &mut [u8],
dst_stride: usize,
fraction: u8,
width: usize,
height: usize,
) {
let inv_fraction = 8 - fraction;
for y in 0..height {
for x in 0..width {
let src_offset = y * src_stride + x;
let dst_offset = y * dst_stride + x;
if src_offset + src_stride >= src.len() || dst_offset >= dst.len() {
continue;
}
let p0 = u16::from(src[src_offset]);
let p1 = u16::from(src[src_offset + src_stride]);
let result = (p0 * u16::from(inv_fraction) + p1 * u16::from(fraction) + 4) >> 3;
dst[dst_offset] = result as u8;
}
}
}
fn convolve_8tap_h(&self, src: &[u8], filter: &[i16; 8]) -> u8 {
if src.len() < 8 {
return 0;
}
let mut pixels = I16x8::zero();
for i in 0..8 {
pixels[i] = i16::from(src[i]);
}
let coeffs = I16x8::from_array(*filter);
let products = self.simd.mul_i16x8(pixels, coeffs);
let sum = self.simd.horizontal_sum_i16x8(products);
let result = (sum + 64) >> 7;
result.clamp(0, 255) as u8
}
fn convolve_8tap_v(
&self,
src: &[u8],
stride: usize,
x: usize,
y: usize,
filter: &[i16; 8],
) -> u8 {
let mut pixels = I16x8::zero();
for i in 0..8 {
let offset = (y + i) * stride + x;
if offset < src.len() {
pixels[i] = i16::from(src[offset]);
}
}
let coeffs = I16x8::from_array(*filter);
let products = self.simd.mul_i16x8(pixels, coeffs);
let sum = self.simd.horizontal_sum_i16x8(products);
let result = (sum + 64) >> 7;
result.clamp(0, 255) as u8
}
#[allow(dead_code)]
fn convolve_8tap_h_4(&self, src: &[u8], stride: usize, filter: &[i16; 8]) -> [u8; 4] {
let mut result = [0u8; 4];
for i in 0..4 {
let offset = i * stride;
if offset + 8 <= src.len() {
result[i] = self.convolve_8tap_h(&src[offset..], filter);
}
}
result
}
#[allow(dead_code)]
fn convolve_8tap_h_8_simd(&self, src: &[u8], filter: &[i16; 8]) -> [u8; 8] {
let mut result = [0u8; 8];
for i in 0..8 {
if i + 8 <= src.len() {
result[i] = self.convolve_8tap_h(&src[i..], filter);
}
}
result
}
}
pub const VP9_FILTERS: [[i16; 8]; 16] = [
[0, 0, 0, 128, 0, 0, 0, 0],
[0, 1, -5, 126, 8, -3, 1, 0],
[0, 2, -10, 123, 18, -6, 1, 0],
[0, 3, -15, 116, 30, -10, 2, 0],
[0, 3, -18, 106, 44, -14, 3, 0],
[0, 4, -20, 94, 58, -17, 3, 0],
[0, 4, -21, 80, 74, -21, 4, 0],
[0, 3, -17, 58, 94, -20, 4, 0],
[0, -3, -1, 128, 8, -3, 0, 0],
[0, -2, 2, 126, 8, -2, 0, 0],
[0, -2, 6, 120, 12, -4, 0, 0],
[0, -1, 8, 112, 18, -6, 1, 0],
[0, 1, -3, 127, 4, -1, 0, 0],
[0, 2, -7, 123, 12, -4, 2, 0],
[0, 3, -11, 114, 26, -8, 4, 0],
[0, 4, -16, 102, 44, -13, 5, 0],
];
#[inline]
pub fn get_filter_index(subpel: u8, filter_type: u8) -> usize {
if subpel == 0 {
return 0; }
let base = match filter_type {
0 => 1, 1 => 8, 2 => 12, _ => 1,
};
base + (subpel as usize - 1).min(7)
}