#[cfg(test)]
mod tests;
use std::sync::Arc;
use cfg_if::cfg_if;
use v_frame::{frame::Frame, pixel::Pixel, plane::Plane};
use super::intra::BLOCK_TO_PLANE_SHIFT;
pub const IMPORTANCE_BLOCK_SIZE: usize =
1 << (IMPORTANCE_BLOCK_TO_BLOCK_SHIFT + BLOCK_TO_PLANE_SHIFT);
pub const IMPORTANCE_BLOCK_TO_BLOCK_SHIFT: usize = 1;
pub const IMP_BLOCK_MV_UNITS_PER_PIXEL: i64 = 8;
pub const IMP_BLOCK_SIZE_IN_MV_UNITS: i64 =
IMPORTANCE_BLOCK_SIZE as i64 * IMP_BLOCK_MV_UNITS_PER_PIXEL;
pub(crate) fn estimate_importance_block_difference<T: Pixel>(
frame: &Arc<Frame<T>>,
ref_frame: &Arc<Frame<T>>,
) -> f64 {
let plane_org = &frame.y_plane;
let plane_ref = &ref_frame.y_plane;
let h_in_imp_b = plane_org.height().get() / IMPORTANCE_BLOCK_SIZE;
let w_in_imp_b = plane_org.width().get() / IMPORTANCE_BLOCK_SIZE;
let mut imp_block_costs = 0;
(0..h_in_imp_b).for_each(|y| {
(0..w_in_imp_b).for_each(|x| {
let histogram_org_sum = sum_8x8_block(plane_org, x, y);
let histogram_ref_sum = sum_8x8_block(plane_ref, x, y);
let count = (IMPORTANCE_BLOCK_SIZE * IMPORTANCE_BLOCK_SIZE) as i64;
let mean = (((histogram_org_sum + count / 2) / count)
- ((histogram_ref_sum + count / 2) / count))
.abs();
imp_block_costs += mean as u64;
});
});
imp_block_costs as f64 / (w_in_imp_b * h_in_imp_b) as f64
}
fn sum_8x8_block<T: Pixel>(plane: &Plane<T>, x: usize, y: usize) -> i64 {
cfg_if! {
if #[cfg(asm_x86_64)] {
unsafe { sum_8x8_block_sse2(plane, x, y) }
} else {
sum_8x8_block_rust(plane, x, y)
}
}
}
#[cfg(any(not(asm_x86_64), test))]
fn sum_8x8_block_rust<T: Pixel>(plane: &Plane<T>, x: usize, y: usize) -> i64 {
use crate::data::get_dbg;
let x = x * IMPORTANCE_BLOCK_SIZE;
let y = y * IMPORTANCE_BLOCK_SIZE;
let data = get_dbg(plane.data(), plane.data_origin()..);
let stride = plane.geometry().stride.get();
(y..(y + 8)).fold(0, |acc, row_idx| {
let row = get_dbg(
get_dbg(data, (x + row_idx * stride)..),
..IMPORTANCE_BLOCK_SIZE,
);
acc + row
.iter()
.map(|pixel| pixel.to_u16().expect("value should fit in u16"))
.sum::<u16>() as i64
})
}
#[cfg(asm_x86_64)]
#[target_feature(enable = "sse2")]
unsafe fn sum_8x8_block_sse2<T: Pixel>(plane: &Plane<T>, x: usize, y: usize) -> i64 {
use std::arch::x86_64::*;
let bx = x * IMPORTANCE_BLOCK_SIZE;
let by = y * IMPORTANCE_BLOCK_SIZE;
let stride = plane.geometry().stride.get();
let origin = plane.data_origin();
unsafe {
match size_of::<T>() {
1 => {
let base_ptr = plane.data().as_ptr().add(origin).cast::<u8>();
let zero = _mm_setzero_si128();
let mut acc = _mm_setzero_si128();
for row in 0..IMPORTANCE_BLOCK_SIZE {
let row_ptr = base_ptr.add(bx + (by + row) * stride);
let pixels = _mm_loadl_epi64(row_ptr.cast());
acc = _mm_add_epi64(acc, _mm_sad_epu8(pixels, zero));
}
_mm_cvtsi128_si64(acc)
}
2 => {
let base_ptr = plane.data().as_ptr().add(origin).cast::<u16>();
let ones = _mm_set1_epi16(1);
let mut acc = _mm_setzero_si128();
for row in 0..IMPORTANCE_BLOCK_SIZE {
let row_ptr = base_ptr.add(bx + (by + row) * stride);
let pixels = _mm_loadu_si128(row_ptr.cast());
acc = _mm_add_epi16(acc, pixels);
}
let paired = _mm_madd_epi16(acc, ones);
let hi64 = _mm_srli_si128::<8>(paired);
let sum2 = _mm_add_epi32(paired, hi64);
let hi32 = _mm_srli_si128::<4>(sum2);
let total = _mm_add_epi32(sum2, hi32);
_mm_cvtsi128_si32(total) as i64
}
_ => unreachable!(),
}
}
}