#![forbid(unsafe_code)]
use super::scalar::ScalarFallback;
use super::traits::{SimdOps, SimdOpsExt};
use super::types::{I16x8, U8x16};
pub struct BlendOps<S: SimdOps> {
simd: S,
}
impl<S: SimdOps + Default> Default for BlendOps<S> {
fn default() -> Self {
Self::new(S::default())
}
}
impl<S: SimdOps> BlendOps<S> {
#[inline]
#[must_use]
pub const fn new(simd: S) -> Self {
Self { simd }
}
#[inline]
#[must_use]
pub const fn simd(&self) -> &S {
&self.simd
}
#[inline]
#[allow(clippy::cast_sign_loss)]
pub fn lerp_u8(&self, a: u8, b: u8, weight: u8) -> u8 {
let a32 = i32::from(a);
let b32 = i32::from(b);
let w32 = i32::from(weight);
let result = a32 + ((b32 - a32) * w32 + 128) / 256;
result.clamp(0, 255) as u8
}
#[inline]
pub fn lerp_i16x8(&self, a: I16x8, b: I16x8, weight: i16) -> I16x8 {
let diff = self.simd.sub_i16x8(b, a);
let weight_vec = I16x8::splat(weight);
let scaled = self.simd.mul_i16x8(diff, weight_vec);
let shifted = self.simd.shr_i16x8(scaled, 8);
self.simd.add_i16x8(a, shifted)
}
#[inline]
#[allow(clippy::needless_range_loop, clippy::cast_possible_truncation)]
pub fn weighted_avg_u8x16(&self, a: U8x16, b: U8x16, weight: u8) -> U8x16 {
let mut result = [0u8; 16];
let w = u16::from(weight);
let inv_w = 256 - w;
for i in 0..16 {
let val = (u16::from(a.0[i]) * inv_w + u16::from(b.0[i]) * w + 128) / 256;
result[i] = val as u8;
}
U8x16(result)
}
#[inline]
#[allow(dead_code)]
pub fn bilinear_blend_u8(
&self,
tl: u8,
tr: u8,
bl: u8,
br: u8,
hweight: u8,
vweight: u8,
) -> u8 {
let top = self.lerp_u8(tl, tr, hweight);
let bottom = self.lerp_u8(bl, br, hweight);
self.lerp_u8(top, bottom, vweight)
}
#[inline]
#[allow(dead_code, clippy::too_many_arguments)]
pub fn bilinear_blend_row_8(
&self,
tl: &[u8],
tr: &[u8],
bl: &[u8],
br: &[u8],
hweight: u8,
vweight: u8,
dst: &mut [u8],
) {
let len = 8
.min(tl.len())
.min(tr.len())
.min(bl.len())
.min(br.len())
.min(dst.len());
for i in 0..len {
dst[i] = self.bilinear_blend_u8(tl[i], tr[i], bl[i], br[i], hweight, vweight);
}
}
}
impl<S: SimdOps + SimdOpsExt> BlendOps<S> {
#[allow(dead_code, clippy::similar_names, clippy::too_many_arguments)]
pub fn bilinear_blend_row_8_simd(
&self,
tl: &[u8],
tr: &[u8],
bl: &[u8],
br: &[u8],
hweight: u8,
vweight: u8,
dst: &mut [u8],
) {
let tl_v = self.simd.load8_u8_to_i16x8(tl);
let tr_v = self.simd.load8_u8_to_i16x8(tr);
let bl_v = self.simd.load8_u8_to_i16x8(bl);
let br_v = self.simd.load8_u8_to_i16x8(br);
let top = self.lerp_i16x8(tl_v, tr_v, i16::from(hweight));
let bottom = self.lerp_i16x8(bl_v, br_v, i16::from(hweight));
let result = self.lerp_i16x8(top, bottom, i16::from(vweight));
self.simd.store8_i16x8_as_u8(result, dst);
}
}
#[inline]
#[must_use]
pub fn blend_ops() -> BlendOps<ScalarFallback> {
BlendOps::new(ScalarFallback::new())
}
#[allow(dead_code)]
pub const HALF_PEL_FILTER: [i16; 6] = [1, -5, 20, 20, -5, 1];
#[allow(dead_code)]
pub const QUARTER_PEL_FILTER: [i16; 6] = [1, -5, 52, 20, -5, 1];
#[allow(dead_code, clippy::cast_sign_loss)]
pub fn apply_half_pel_h(src: &[u8], dst: &mut [u8], width: usize) {
if width < 6 || src.len() < width + 5 {
return;
}
for x in 0..width {
let mut sum: i32 = 0;
for (k, &tap) in HALF_PEL_FILTER.iter().enumerate() {
sum += i32::from(src[x + k]) * i32::from(tap);
}
let result = (sum + 16) >> 5;
dst[x] = result.clamp(0, 255) as u8;
}
}
#[allow(dead_code, clippy::cast_sign_loss)]
pub fn apply_half_pel_v(src: &[&[u8]], dst: &mut [u8], width: usize) {
if src.len() < 6 {
return;
}
for x in 0..width.min(dst.len()) {
let mut sum: i32 = 0;
for (k, &tap) in HALF_PEL_FILTER.iter().enumerate() {
if x < src[k].len() {
sum += i32::from(src[k][x]) * i32::from(tap);
}
}
let result = (sum + 16) >> 5;
dst[x] = result.clamp(0, 255) as u8;
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_lerp_u8() {
let blend = blend_ops();
assert_eq!(blend.lerp_u8(100, 200, 0), 100);
let mid = blend.lerp_u8(0, 200, 128);
assert!(mid >= 99 && mid <= 101);
let high = blend.lerp_u8(0, 200, 255);
assert!(high >= 198 && high <= 200);
}
#[test]
fn test_weighted_avg_u8x16() {
let blend = blend_ops();
let a = U8x16::splat(100);
let b = U8x16::splat(200);
let result = blend.weighted_avg_u8x16(a, b, 128);
for &v in &result.0 {
assert!(v >= 149 && v <= 151);
}
let result_a = blend.weighted_avg_u8x16(a, b, 0);
assert_eq!(result_a.0, [100; 16]);
let result_b = blend.weighted_avg_u8x16(a, b, 255);
for &v in &result_b.0 {
assert!(v >= 199 && v <= 200);
}
}
#[test]
fn test_bilinear_blend() {
let blend = blend_ops();
let result = blend.bilinear_blend_u8(100, 100, 100, 100, 128, 128);
assert_eq!(result, 100);
let tl_only = blend.bilinear_blend_u8(100, 0, 0, 0, 0, 0);
assert_eq!(tl_only, 100);
let tr_only = blend.bilinear_blend_u8(0, 100, 0, 0, 255, 0);
assert!(tr_only >= 99);
let bl_only = blend.bilinear_blend_u8(0, 0, 100, 0, 0, 255);
assert!(bl_only >= 99);
}
#[test]
fn test_lerp_i16x8() {
let blend = blend_ops();
let a = I16x8::from_array([0, 10, 20, 30, 40, 50, 60, 70]);
let b = I16x8::from_array([100, 110, 120, 130, 140, 150, 160, 170]);
let result = blend.lerp_i16x8(a, b, 128);
assert!(result.0[0] >= 49 && result.0[0] <= 51);
}
#[test]
fn test_bilinear_row() {
let blend = blend_ops();
let tl = [100u8; 8];
let tr = [100u8; 8];
let bl = [100u8; 8];
let br = [100u8; 8];
let mut dst = [0u8; 8];
blend.bilinear_blend_row_8(&tl, &tr, &bl, &br, 128, 128, &mut dst);
for &v in &dst {
assert_eq!(v, 100);
}
}
#[test]
fn test_half_pel_filter() {
let sum: i16 = HALF_PEL_FILTER.iter().sum();
assert_eq!(sum, 32);
}
#[test]
fn test_apply_half_pel_h() {
let src = [128u8; 16];
let mut dst = [0u8; 10];
apply_half_pel_h(&src, &mut dst, 10);
for &v in &dst {
assert!(v >= 127 && v <= 129);
}
}
}