#![cfg_attr(not(feature = "std"), allow(dead_code))]
use archmage::prelude::*;
pub(crate) const LUMA_STRIDE: usize = 32;
pub(crate) const LUMA_BLOCK_SIZE: usize = LUMA_STRIDE * (1 + 16);
pub(crate) fn create_border_luma(
mbx: usize,
mby: usize,
mbw: usize,
top: &[u8],
left: &[u8],
) -> [u8; LUMA_BLOCK_SIZE] {
let stride = LUMA_STRIDE;
let mut ws = [0u8; LUMA_BLOCK_SIZE];
{
let above = &mut ws[1..stride];
if mby == 0 {
for above in above.iter_mut() {
*above = 127;
}
} else {
for (above, &top) in above[..16].iter_mut().zip(&top[mbx * 16..]) {
*above = top;
}
if mbx == mbw - 1 {
for above in &mut above[16..] {
*above = top[mbx * 16 + 15];
}
} else {
for (above, &top) in above[16..].iter_mut().zip(&top[mbx * 16 + 16..]) {
*above = top;
}
}
}
}
for i in 17usize..21 {
ws[4 * stride + i] = ws[i];
ws[8 * stride + i] = ws[i];
ws[12 * stride + i] = ws[i];
}
if mbx == 0 {
for i in 0usize..16 {
ws[(i + 1) * stride] = 129;
}
} else {
for (i, &left) in (0usize..16).zip(&left[1..]) {
ws[(i + 1) * stride] = left;
}
}
ws[0] = if mby == 0 {
127
} else if mbx == 0 {
129
} else {
left[0]
};
ws
}
#[inline(always)]
pub(crate) fn update_border_luma(
ws: &mut [u8; LUMA_BLOCK_SIZE],
mbx: usize,
mby: usize,
mbw: usize,
top: &[u8],
left: &[u8],
) {
let stride = LUMA_STRIDE;
if mby == 0 {
ws[1..stride].fill(127);
} else {
let top_mb = &top[mbx * 16..];
ws[1..][..16].copy_from_slice(&top_mb[..16]);
if mbx == mbw - 1 {
let last = top_mb[15];
ws[17..stride].fill(last);
} else {
let tr_len = stride - 17; let copy_len = tr_len.min(top_mb.len() - 16);
ws[17..][..copy_len].copy_from_slice(&top_mb[16..][..copy_len]);
}
}
for i in 17usize..21 {
ws[4 * stride + i] = ws[i];
ws[8 * stride + i] = ws[i];
ws[12 * stride + i] = ws[i];
}
if mbx == 0 {
for i in 0usize..16 {
ws[(i + 1) * stride] = 129;
}
} else {
for (i, &l) in (0usize..16).zip(&left[1..]) {
ws[(i + 1) * stride] = l;
}
}
ws[0] = if mby == 0 {
127
} else if mbx == 0 {
129
} else {
left[0]
};
}
#[inline(always)]
pub(crate) fn update_border_chroma(
cb: &mut [u8; CHROMA_BLOCK_SIZE],
mbx: usize,
mby: usize,
top: &[u8],
left: &[u8],
) {
let stride = CHROMA_STRIDE;
if mby == 0 {
cb[1..stride].fill(127);
} else {
cb[1..][..8].copy_from_slice(&top[mbx * 8..mbx * 8 + 8]);
}
if mbx == 0 {
for y in 0usize..8 {
cb[(y + 1) * stride] = 129;
}
} else {
for (y, &l) in (0usize..8).zip(&left[1..]) {
cb[(y + 1) * stride] = l;
}
}
cb[0] = if mby == 0 {
127
} else if mbx == 0 {
129
} else {
left[0]
};
}
pub(crate) const CHROMA_STRIDE: usize = 32;
pub(crate) const CHROMA_BLOCK_SIZE: usize = CHROMA_STRIDE * (8 + 1);
pub(crate) const MB_COEFF_SIZE: usize = 24 * 16;
#[inline(always)]
pub(crate) fn coeff_block(blocks: &mut [i32; MB_COEFF_SIZE], idx: usize) -> &mut [i32; 16] {
let start = idx * 16;
(&mut blocks[start..start + 16]).try_into().unwrap()
}
pub(crate) fn create_border_chroma(
mbx: usize,
mby: usize,
top: &[u8],
left: &[u8],
) -> [u8; CHROMA_BLOCK_SIZE] {
let stride: usize = CHROMA_STRIDE;
let mut chroma_block = [0u8; CHROMA_BLOCK_SIZE];
{
let above = &mut chroma_block[1..stride];
if mby == 0 {
for above in above.iter_mut() {
*above = 127;
}
} else {
for (above, &top) in above.iter_mut().zip(&top[mbx * 8..]) {
*above = top;
}
}
}
if mbx == 0 {
for y in 0usize..8 {
chroma_block[(y + 1) * stride] = 129;
}
} else {
for (y, &left) in (0usize..8).zip(&left[1..]) {
chroma_block[(y + 1) * stride] = left;
}
}
chroma_block[0] = if mby == 0 {
127
} else if mbx == 0 {
129
} else {
left[0]
};
chroma_block
}
#[allow(clippy::manual_clamp)]
#[inline(always)]
pub(crate) fn add_residue<const N: usize>(
pblock: &mut [u8; N],
rblock: &[i32; 16],
y0: usize,
x0: usize,
stride: usize,
) {
incant!(
add_residue_dispatch(pblock.as_mut_slice(), rblock, y0, x0, stride),
[v3, neon, scalar]
);
}
#[cfg(target_arch = "x86_64")]
#[cfg(target_arch = "x86_64")]
#[inline(always)]
fn add_residue_dispatch_v3(
token: X64V3Token,
pblock: &mut [u8],
rblock: &[i32; 16],
y0: usize,
x0: usize,
stride: usize,
) {
add_residue_sse2(token, pblock, rblock, y0, x0, stride);
}
#[cfg(target_arch = "aarch64")]
#[inline(always)]
fn add_residue_dispatch_neon(
token: NeonToken,
pblock: &mut [u8],
rblock: &[i32; 16],
y0: usize,
x0: usize,
stride: usize,
) {
crate::common::transform::add_residue_neon(token, pblock, rblock, y0, x0, stride);
}
#[inline(always)]
fn add_residue_dispatch_scalar(
_token: ScalarToken,
pblock: &mut [u8],
rblock: &[i32; 16],
y0: usize,
x0: usize,
stride: usize,
) {
let mut pos = y0 * stride + x0;
for row in rblock.chunks(4) {
for (p, &a) in pblock[pos..][..4].iter_mut().zip(row.iter()) {
*p = (a + i32::from(*p)).clamp(0, 255) as u8;
}
pos += stride;
}
}
#[archmage::arcane]
#[inline(always)]
fn add_residue_sse2(
_token: archmage::X64V3Token,
pblock: &mut [u8],
rblock: &[i32; 16],
y0: usize,
x0: usize,
stride: usize,
) {
use archmage::intrinsics::x86_64 as simd_mem;
#[cfg(target_arch = "x86")]
use core::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::*;
let zero = _mm_setzero_si128();
let mut pos = y0 * stride + x0;
for row_idx in 0..4 {
let r = simd_mem::_mm_loadu_si128(
<&[i32; 4]>::try_from(&rblock[row_idx * 4..row_idx * 4 + 4]).unwrap(),
);
let p_arr: [u8; 4] = pblock[pos..pos + 4].try_into().unwrap();
let p_bytes = i32::from_ne_bytes(p_arr);
let p4 = _mm_cvtsi32_si128(p_bytes);
let p_words = _mm_unpacklo_epi8(p4, zero);
let p = _mm_unpacklo_epi16(p_words, zero);
let sum = _mm_add_epi32(p, r);
let packed_16 = _mm_packs_epi32(sum, sum); let packed_8 = _mm_packus_epi16(packed_16, packed_16);
let result = _mm_cvtsi128_si32(packed_8) as u32;
pblock[pos..pos + 4].copy_from_slice(&result.to_ne_bytes());
pos += stride;
}
}
#[allow(dead_code)]
#[inline(always)]
pub(crate) fn add_residue_and_clear<const N: usize>(
pblock: &mut [u8; N],
rblock: &mut [i32; 16],
y0: usize,
x0: usize,
stride: usize,
) {
incant!(
add_residue_and_clear_dispatch(pblock.as_mut_slice(), rblock, y0, x0, stride),
[v3, scalar]
);
}
#[cfg(target_arch = "x86_64")]
#[allow(dead_code)]
#[cfg(target_arch = "x86_64")]
#[inline(always)]
fn add_residue_and_clear_dispatch_v3(
token: X64V3Token,
pblock: &mut [u8],
rblock: &mut [i32; 16],
y0: usize,
x0: usize,
stride: usize,
) {
add_residue_and_clear_sse2(token, pblock, rblock, y0, x0, stride);
}
#[allow(dead_code)]
#[inline(always)]
fn add_residue_and_clear_dispatch_scalar(
_token: ScalarToken,
pblock: &mut [u8],
rblock: &mut [i32; 16],
y0: usize,
x0: usize,
stride: usize,
) {
let mut pos = y0 * stride + x0;
for row in rblock.chunks_mut(4) {
for (p, coeff) in pblock[pos..][..4].iter_mut().zip(row.iter_mut()) {
*p = (*coeff + i32::from(*p)).clamp(0, 255) as u8;
*coeff = 0;
}
pos += stride;
}
}
#[cfg(target_arch = "x86_64")]
#[allow(dead_code)]
#[inline(always)]
pub(crate) fn add_residue_and_clear_with_token<const N: usize>(
token: archmage::X64V3Token,
pblock: &mut [u8; N],
rblock: &mut [i32; 16],
y0: usize,
x0: usize,
stride: usize,
) {
add_residue_and_clear_sse2(token, pblock.as_mut_slice(), rblock, y0, x0, stride);
}
#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
#[allow(dead_code)]
#[inline(always)]
pub(crate) fn add_residue_and_clear_with_token<const N: usize>(
_token: (),
pblock: &mut [u8; N],
rblock: &mut [i32; 16],
y0: usize,
x0: usize,
stride: usize,
) {
add_residue_and_clear_scalar(pblock, rblock, y0, x0, stride);
}
#[cfg(target_arch = "x86_64")]
pub(crate) type SimdTokenType = Option<archmage::X64V3Token>;
#[cfg(target_arch = "aarch64")]
pub(crate) type SimdTokenType = Option<archmage::NeonToken>;
#[cfg(target_arch = "wasm32")]
pub(crate) type SimdTokenType = Option<archmage::Wasm128Token>;
#[cfg(not(any(
target_arch = "x86_64",
target_arch = "aarch64",
target_arch = "wasm32"
)))]
#[allow(dead_code)]
pub(crate) type SimdTokenType = Option<()>;
#[allow(dead_code)]
#[inline(always)]
fn add_residue_and_clear_scalar<const N: usize>(
pblock: &mut [u8; N],
rblock: &mut [i32; 16],
y0: usize,
x0: usize,
stride: usize,
) {
let mut pos = y0 * stride + x0;
for row in rblock.chunks_mut(4) {
for (p, coeff) in pblock[pos..][..4].iter_mut().zip(row.iter_mut()) {
*p = (*coeff + i32::from(*p)).clamp(0, 255) as u8;
*coeff = 0; }
pos += stride;
}
}
#[archmage::arcane]
#[inline(always)]
fn add_residue_and_clear_sse2(
_token: archmage::X64V3Token,
pblock: &mut [u8],
rblock: &mut [i32; 16],
y0: usize,
x0: usize,
stride: usize,
) {
use archmage::intrinsics::x86_64 as simd_mem;
#[cfg(target_arch = "x86")]
use core::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::*;
let start = y0 * stride + x0;
let end = start + 3 * stride + 4;
assert!(end <= pblock.len(), "pblock too small for 4x4 block");
let zero = _mm_setzero_si128();
let mut pos = start;
for row_idx in 0..4 {
let rslice = &mut rblock[row_idx * 4..row_idx * 4 + 4];
let r = simd_mem::_mm_loadu_si128(<&[i32; 4]>::try_from(&*rslice).unwrap());
simd_mem::_mm_storeu_si128(<&mut [i32; 4]>::try_from(rslice).unwrap(), zero);
let p_arr: [u8; 4] = pblock[pos..pos + 4].try_into().unwrap();
let p_bytes = i32::from_ne_bytes(p_arr);
let p4 = _mm_cvtsi32_si128(p_bytes);
let p_words = _mm_unpacklo_epi8(p4, zero);
let p = _mm_unpacklo_epi16(p_words, zero);
let sum = _mm_add_epi32(p, r);
let packed_16 = _mm_packs_epi32(sum, sum);
let packed_8 = _mm_packus_epi16(packed_16, packed_16);
let result = _mm_cvtsi128_si32(packed_8) as u32;
pblock[pos..pos + 4].copy_from_slice(&result.to_ne_bytes());
pos += stride;
}
}
fn avg3(left: u8, this: u8, right: u8) -> u8 {
let avg = (u16::from(left) + 2 * u16::from(this) + u16::from(right) + 2) >> 2;
avg as u8
}
fn avg2(this: u8, right: u8) -> u8 {
let avg = (u16::from(this) + u16::from(right) + 1) >> 1;
avg as u8
}
pub(crate) fn predict_vpred<const N: usize>(
a: &mut [u8; N],
size: usize,
x0: usize,
y0: usize,
stride: usize,
) {
assert!((y0 + size - 1) * stride + x0 + size <= N);
assert!(y0 >= 1); let (above, curr) = a.as_mut_slice().split_at_mut(stride * y0);
let above_slice = &above[x0..][..size];
for curr_chunk in curr.chunks_exact_mut(stride).take(size) {
curr_chunk[x0..][..size].copy_from_slice(above_slice);
}
}
pub(crate) fn predict_hpred<const N: usize>(
a: &mut [u8; N],
size: usize,
x0: usize,
y0: usize,
stride: usize,
) {
assert!((y0 + size - 1) * stride + x0 + size <= N);
assert!(x0 >= 1); for chunk in a
.as_mut_slice()
.chunks_exact_mut(stride)
.skip(y0)
.take(size)
{
let left = chunk[x0 - 1];
chunk[x0..][..size].fill(left);
}
}
#[inline(always)]
pub(crate) fn predict_dcpred<const N: usize>(
a: &mut [u8; N],
size: usize,
stride: usize,
above: bool,
left: bool,
) {
assert!(stride * (size + 1) <= N);
let mut sum = 0u32;
let mut shf = if size == 8 { 2u32 } else { 3u32 };
if left {
for y in 0usize..size {
sum += u32::from(a[(y + 1) * stride]);
}
shf += 1;
}
if above {
for x in 0usize..size {
sum += u32::from(a[1 + x]);
}
shf += 1;
}
let dcval = if !left && !above {
128u8
} else {
((sum + (1 << (shf - 1))) >> shf) as u8
};
for y in 0usize..size {
a[1 + stride * (y + 1)..][..size].fill(dcval);
}
}
#[inline]
#[allow(dead_code)] pub(crate) fn predict_dc_16x16<const N: usize>(
a: &mut [u8; N],
stride: usize,
x0: usize,
y0: usize,
) {
let above = y0 > 0;
let left = x0 > 0;
let mut sum = 0u32;
let mut shf = 3;
if left {
for y in 0..16 {
sum += u32::from(a[(y0 + y) * stride + (x0 - 1)]);
}
shf += 1;
}
if above {
for x in 0..16 {
sum += u32::from(a[(y0 - 1) * stride + x0 + x]);
}
shf += 1;
}
let dcval = if !left && !above {
128u8
} else {
((sum + (1 << (shf - 1))) >> shf) as u8
};
for y in 0..16 {
for x in 0..16 {
a[(y0 + y) * stride + x0 + x] = dcval;
}
}
}
#[inline]
#[allow(dead_code)] pub(crate) fn predict_dc_8x8<const N: usize>(a: &mut [u8; N], stride: usize, x0: usize, y0: usize) {
let above = y0 > 0;
let left = x0 > 0;
let mut sum = 0u32;
let mut shf = 2;
if left {
for y in 0..8 {
sum += u32::from(a[(y0 + y) * stride + (x0 - 1)]);
}
shf += 1;
}
if above {
for x in 0..8 {
sum += u32::from(a[(y0 - 1) * stride + x0 + x]);
}
shf += 1;
}
let dcval = if !left && !above {
128u8
} else {
((sum + (1 << (shf - 1))) >> shf) as u8
};
for y in 0..8 {
for x in 0..8 {
a[(y0 + y) * stride + x0 + x] = dcval;
}
}
}
#[allow(clippy::manual_clamp)]
#[inline(always)]
pub(crate) fn predict_tmpred<const N: usize>(
a: &mut [u8; N],
size: usize,
x0: usize,
y0: usize,
stride: usize,
) {
assert!((y0 + size - 1) * stride + x0 + size <= N);
assert!(y0 >= 1 && x0 >= 1);
let (above, x_block) = a.as_mut_slice().split_at_mut(y0 * stride + (x0 - 1));
let p = i32::from(above[(y0 - 1) * stride + x0 - 1]);
let above_slice = &above[(y0 - 1) * stride + x0..];
for y in 0usize..size {
let left_minus_p = i32::from(x_block[y * stride]) - p;
x_block[y * stride + 1..][..size]
.iter_mut()
.zip(above_slice)
.for_each(|(cur, &abv)| *cur = (left_minus_p + i32::from(abv)).max(0).min(255) as u8);
}
}
pub(crate) fn predict_bdcpred(a: &mut [u8; LUMA_BLOCK_SIZE], x0: usize, y0: usize, stride: usize) {
assert!((y0 + 3) * stride + x0 + 3 < LUMA_BLOCK_SIZE);
assert!(x0 >= 1); assert!(y0 >= 1);
let mut v = 4;
a[(y0 - 1) * stride + x0..][..4]
.iter()
.for_each(|&a| v += u32::from(a));
for i in 0usize..4 {
v += u32::from(a[(y0 + i) * stride + x0 - 1]);
}
v >>= 3;
for chunk in a.as_mut_slice().chunks_exact_mut(stride).skip(y0).take(4) {
for ch in &mut chunk[x0..][..4] {
*ch = v as u8;
}
}
}
fn topleft_pixel(a: &[u8; LUMA_BLOCK_SIZE], x0: usize, y0: usize, stride: usize) -> u8 {
a[(y0 - 1) * stride + x0 - 1]
}
#[inline(always)]
fn top_pixels(
a: &[u8; LUMA_BLOCK_SIZE],
x0: usize,
y0: usize,
stride: usize,
) -> (u8, u8, u8, u8, u8, u8, u8, u8) {
let pos = (y0 - 1) * stride + x0;
assert!(pos + 7 < LUMA_BLOCK_SIZE);
let a0 = a[pos];
let a1 = a[pos + 1];
let a2 = a[pos + 2];
let a3 = a[pos + 3];
let a4 = a[pos + 4];
let a5 = a[pos + 5];
let a6 = a[pos + 6];
let a7 = a[pos + 7];
(a0, a1, a2, a3, a4, a5, a6, a7)
}
#[inline(always)]
fn left_pixels(a: &[u8; LUMA_BLOCK_SIZE], x0: usize, y0: usize, stride: usize) -> (u8, u8, u8, u8) {
assert!((y0 + 3) * stride + x0 > 0);
assert!((y0 + 3) * stride + x0 - 1 < LUMA_BLOCK_SIZE);
let l0 = a[y0 * stride + x0 - 1];
let l1 = a[(y0 + 1) * stride + x0 - 1];
let l2 = a[(y0 + 2) * stride + x0 - 1];
let l3 = a[(y0 + 3) * stride + x0 - 1];
(l0, l1, l2, l3)
}
#[inline(always)]
fn edge_pixels(
a: &[u8; LUMA_BLOCK_SIZE],
x0: usize,
y0: usize,
stride: usize,
) -> (u8, u8, u8, u8, u8, u8, u8, u8, u8) {
let pos = (y0 - 1) * stride + x0 - 1;
assert!(pos + 4 * stride < LUMA_BLOCK_SIZE);
let e0 = a[pos + 4 * stride];
let e1 = a[pos + 3 * stride];
let e2 = a[pos + 2 * stride];
let e3 = a[pos + stride];
let e4 = a[pos];
let e5 = a[pos + 1];
let e6 = a[pos + 2];
let e7 = a[pos + 3];
let e8 = a[pos + 4];
(e0, e1, e2, e3, e4, e5, e6, e7, e8)
}
pub(crate) fn predict_bvepred(a: &mut [u8; LUMA_BLOCK_SIZE], x0: usize, y0: usize, stride: usize) {
assert!((y0 + 3) * stride + x0 + 3 < LUMA_BLOCK_SIZE);
let p = topleft_pixel(a, x0, y0, stride);
let (a0, a1, a2, a3, a4, ..) = top_pixels(a, x0, y0, stride);
let avg_1 = avg3(p, a0, a1);
let avg_2 = avg3(a0, a1, a2);
let avg_3 = avg3(a1, a2, a3);
let avg_4 = avg3(a2, a3, a4);
let avg = [avg_1, avg_2, avg_3, avg_4];
let mut pos = y0 * stride + x0;
for _ in 0..4 {
a[pos..=pos + 3].copy_from_slice(&avg);
pos += stride;
}
}
pub(crate) fn predict_bhepred(a: &mut [u8; LUMA_BLOCK_SIZE], x0: usize, y0: usize, stride: usize) {
assert!((y0 + 3) * stride + x0 + 3 < LUMA_BLOCK_SIZE);
let p = topleft_pixel(a, x0, y0, stride);
let (l0, l1, l2, l3) = left_pixels(a, x0, y0, stride);
let avgs = [
avg3(p, l0, l1),
avg3(l0, l1, l2),
avg3(l1, l2, l3),
avg3(l2, l3, l3),
];
let mut pos = y0 * stride + x0;
for avg in avgs {
for a_p in &mut a[pos..=pos + 3] {
*a_p = avg;
}
pos += stride;
}
}
pub(crate) fn predict_bldpred(a: &mut [u8; LUMA_BLOCK_SIZE], x0: usize, y0: usize, stride: usize) {
assert!((y0 + 3) * stride + x0 + 3 < LUMA_BLOCK_SIZE);
let (a0, a1, a2, a3, a4, a5, a6, a7) = top_pixels(a, x0, y0, stride);
let avgs = [
avg3(a0, a1, a2),
avg3(a1, a2, a3),
avg3(a2, a3, a4),
avg3(a3, a4, a5),
avg3(a4, a5, a6),
avg3(a5, a6, a7),
avg3(a6, a7, a7),
];
let mut pos = y0 * stride + x0;
for i in 0..4 {
a[pos..=pos + 3].copy_from_slice(&avgs[i..=i + 3]);
pos += stride;
}
}
pub(crate) fn predict_brdpred(a: &mut [u8; LUMA_BLOCK_SIZE], x0: usize, y0: usize, stride: usize) {
assert!((y0 + 3) * stride + x0 + 3 < LUMA_BLOCK_SIZE);
let (e0, e1, e2, e3, e4, e5, e6, e7, e8) = edge_pixels(a, x0, y0, stride);
let avgs = [
avg3(e0, e1, e2),
avg3(e1, e2, e3),
avg3(e2, e3, e4),
avg3(e3, e4, e5),
avg3(e4, e5, e6),
avg3(e5, e6, e7),
avg3(e6, e7, e8),
];
let mut pos = y0 * stride + x0;
for i in 0..4 {
a[pos..=pos + 3].copy_from_slice(&avgs[3 - i..7 - i]);
pos += stride;
}
}
pub(crate) fn predict_bvrpred(a: &mut [u8; LUMA_BLOCK_SIZE], x0: usize, y0: usize, stride: usize) {
assert!((y0 + 3) * stride + x0 + 3 < LUMA_BLOCK_SIZE);
let (_, e1, e2, e3, e4, e5, e6, e7, e8) = edge_pixels(a, x0, y0, stride);
a[(y0 + 3) * stride + x0] = avg3(e1, e2, e3);
a[(y0 + 2) * stride + x0] = avg3(e2, e3, e4);
a[(y0 + 3) * stride + x0 + 1] = avg3(e3, e4, e5);
a[(y0 + 1) * stride + x0] = avg3(e3, e4, e5);
a[(y0 + 2) * stride + x0 + 1] = avg2(e4, e5);
a[y0 * stride + x0] = avg2(e4, e5);
a[(y0 + 3) * stride + x0 + 2] = avg3(e4, e5, e6);
a[(y0 + 1) * stride + x0 + 1] = avg3(e4, e5, e6);
a[(y0 + 2) * stride + x0 + 2] = avg2(e5, e6);
a[y0 * stride + x0 + 1] = avg2(e5, e6);
a[(y0 + 3) * stride + x0 + 3] = avg3(e5, e6, e7);
a[(y0 + 1) * stride + x0 + 2] = avg3(e5, e6, e7);
a[(y0 + 2) * stride + x0 + 3] = avg2(e6, e7);
a[y0 * stride + x0 + 2] = avg2(e6, e7);
a[(y0 + 1) * stride + x0 + 3] = avg3(e6, e7, e8);
a[y0 * stride + x0 + 3] = avg2(e7, e8);
}
pub(crate) fn predict_bvlpred(a: &mut [u8; LUMA_BLOCK_SIZE], x0: usize, y0: usize, stride: usize) {
assert!((y0 + 3) * stride + x0 + 3 < LUMA_BLOCK_SIZE);
let (a0, a1, a2, a3, a4, a5, a6, a7) = top_pixels(a, x0, y0, stride);
a[y0 * stride + x0] = avg2(a0, a1);
a[(y0 + 1) * stride + x0] = avg3(a0, a1, a2);
a[(y0 + 2) * stride + x0] = avg2(a1, a2);
a[y0 * stride + x0 + 1] = avg2(a1, a2);
a[(y0 + 1) * stride + x0 + 1] = avg3(a1, a2, a3);
a[(y0 + 3) * stride + x0] = avg3(a1, a2, a3);
a[(y0 + 2) * stride + x0 + 1] = avg2(a2, a3);
a[y0 * stride + x0 + 2] = avg2(a2, a3);
a[(y0 + 3) * stride + x0 + 1] = avg3(a2, a3, a4);
a[(y0 + 1) * stride + x0 + 2] = avg3(a2, a3, a4);
a[(y0 + 2) * stride + x0 + 2] = avg2(a3, a4);
a[y0 * stride + x0 + 3] = avg2(a3, a4);
a[(y0 + 3) * stride + x0 + 2] = avg3(a3, a4, a5);
a[(y0 + 1) * stride + x0 + 3] = avg3(a3, a4, a5);
a[(y0 + 2) * stride + x0 + 3] = avg3(a4, a5, a6);
a[(y0 + 3) * stride + x0 + 3] = avg3(a5, a6, a7);
}
pub(crate) fn predict_bhdpred(a: &mut [u8; LUMA_BLOCK_SIZE], x0: usize, y0: usize, stride: usize) {
assert!((y0 + 3) * stride + x0 + 3 < LUMA_BLOCK_SIZE);
let (e0, e1, e2, e3, e4, e5, e6, e7, _) = edge_pixels(a, x0, y0, stride);
a[(y0 + 3) * stride + x0] = avg2(e0, e1);
a[(y0 + 3) * stride + x0 + 1] = avg3(e0, e1, e2);
a[(y0 + 2) * stride + x0] = avg2(e1, e2);
a[(y0 + 3) * stride + x0 + 2] = avg2(e1, e2);
a[(y0 + 2) * stride + x0 + 1] = avg3(e1, e2, e3);
a[(y0 + 3) * stride + x0 + 3] = avg3(e1, e2, e3);
a[(y0 + 2) * stride + x0 + 2] = avg2(e2, e3);
a[(y0 + 1) * stride + x0] = avg2(e2, e3);
a[(y0 + 2) * stride + x0 + 3] = avg3(e2, e3, e4);
a[(y0 + 1) * stride + x0 + 1] = avg3(e2, e3, e4);
a[(y0 + 1) * stride + x0 + 2] = avg2(e3, e4);
a[y0 * stride + x0] = avg2(e3, e4);
a[(y0 + 1) * stride + x0 + 3] = avg3(e3, e4, e5);
a[y0 * stride + x0 + 1] = avg3(e3, e4, e5);
a[y0 * stride + x0 + 2] = avg3(e4, e5, e6);
a[y0 * stride + x0 + 3] = avg3(e5, e6, e7);
}
pub(crate) fn predict_bhupred(a: &mut [u8; LUMA_BLOCK_SIZE], x0: usize, y0: usize, stride: usize) {
assert!((y0 + 3) * stride + x0 + 3 < LUMA_BLOCK_SIZE);
let (l0, l1, l2, l3) = left_pixels(a, x0, y0, stride);
a[y0 * stride + x0] = avg2(l0, l1);
a[y0 * stride + x0 + 1] = avg3(l0, l1, l2);
a[y0 * stride + x0 + 2] = avg2(l1, l2);
a[(y0 + 1) * stride + x0] = avg2(l1, l2);
a[y0 * stride + x0 + 3] = avg3(l1, l2, l3);
a[(y0 + 1) * stride + x0 + 1] = avg3(l1, l2, l3);
a[(y0 + 1) * stride + x0 + 2] = avg2(l2, l3);
a[(y0 + 2) * stride + x0] = avg2(l2, l3);
a[(y0 + 1) * stride + x0 + 3] = avg3(l2, l3, l3);
a[(y0 + 2) * stride + x0 + 1] = avg3(l2, l3, l3);
a[(y0 + 2) * stride + x0 + 2] = l3;
a[(y0 + 2) * stride + x0 + 3] = l3;
a[(y0 + 3) * stride + x0] = l3;
a[(y0 + 3) * stride + x0 + 1] = l3;
a[(y0 + 3) * stride + x0 + 2] = l3;
a[(y0 + 3) * stride + x0 + 3] = l3;
}
#[derive(Clone, Copy)]
pub(crate) struct I4Predictions {
pub data: [[u8; 16]; 10],
}
impl I4Predictions {
#[inline]
pub fn compute(src: &[u8; LUMA_BLOCK_SIZE], x0: usize, y0: usize, stride: usize) -> Self {
let mut data = [[0u8; 16]; 10];
let p = src[(y0 - 1) * stride + x0 - 1];
let top_pos = (y0 - 1) * stride + x0;
let a0 = src[top_pos];
let a1 = src[top_pos + 1];
let a2 = src[top_pos + 2];
let a3 = src[top_pos + 3];
let a4 = src[top_pos + 4];
let a5 = src[top_pos + 5];
let a6 = src[top_pos + 6];
let a7 = src[top_pos + 7];
let l0 = src[y0 * stride + x0 - 1];
let l1 = src[(y0 + 1) * stride + x0 - 1];
let l2 = src[(y0 + 2) * stride + x0 - 1];
let l3 = src[(y0 + 3) * stride + x0 - 1];
let e0 = src[(y0 + 3) * stride + x0 - 1]; let e1 = l2;
let e2 = l1;
let e3 = l0;
let e4 = p;
let e5 = a0;
let e6 = a1;
let e7 = a2;
let e8 = a3;
{
let mut v = 4u32;
v += u32::from(a0) + u32::from(a1) + u32::from(a2) + u32::from(a3);
v += u32::from(l0) + u32::from(l1) + u32::from(l2) + u32::from(l3);
let dc = (v >> 3) as u8;
data[0] = [dc; 16];
}
{
let p_i32 = i32::from(p);
let l0_p = i32::from(l0) - p_i32;
let l1_p = i32::from(l1) - p_i32;
let l2_p = i32::from(l2) - p_i32;
let l3_p = i32::from(l3) - p_i32;
let a0_i = i32::from(a0);
let a1_i = i32::from(a1);
let a2_i = i32::from(a2);
let a3_i = i32::from(a3);
data[1][0] = (l0_p + a0_i).clamp(0, 255) as u8;
data[1][1] = (l0_p + a1_i).clamp(0, 255) as u8;
data[1][2] = (l0_p + a2_i).clamp(0, 255) as u8;
data[1][3] = (l0_p + a3_i).clamp(0, 255) as u8;
data[1][4] = (l1_p + a0_i).clamp(0, 255) as u8;
data[1][5] = (l1_p + a1_i).clamp(0, 255) as u8;
data[1][6] = (l1_p + a2_i).clamp(0, 255) as u8;
data[1][7] = (l1_p + a3_i).clamp(0, 255) as u8;
data[1][8] = (l2_p + a0_i).clamp(0, 255) as u8;
data[1][9] = (l2_p + a1_i).clamp(0, 255) as u8;
data[1][10] = (l2_p + a2_i).clamp(0, 255) as u8;
data[1][11] = (l2_p + a3_i).clamp(0, 255) as u8;
data[1][12] = (l3_p + a0_i).clamp(0, 255) as u8;
data[1][13] = (l3_p + a1_i).clamp(0, 255) as u8;
data[1][14] = (l3_p + a2_i).clamp(0, 255) as u8;
data[1][15] = (l3_p + a3_i).clamp(0, 255) as u8;
}
{
let avg = [
avg3(p, a0, a1),
avg3(a0, a1, a2),
avg3(a1, a2, a3),
avg3(a2, a3, a4),
];
for y in 0..4 {
data[2][y * 4..y * 4 + 4].copy_from_slice(&avg);
}
}
{
let avgs = [
avg3(p, l0, l1),
avg3(l0, l1, l2),
avg3(l1, l2, l3),
avg3(l2, l3, l3),
];
for y in 0..4 {
data[3][y * 4..y * 4 + 4].fill(avgs[y]);
}
}
{
let avgs = [
avg3(a0, a1, a2),
avg3(a1, a2, a3),
avg3(a2, a3, a4),
avg3(a3, a4, a5),
avg3(a4, a5, a6),
avg3(a5, a6, a7),
avg3(a6, a7, a7),
];
for y in 0..4 {
data[4][y * 4..y * 4 + 4].copy_from_slice(&avgs[y..y + 4]);
}
}
{
let avgs = [
avg3(e0, e1, e2),
avg3(e1, e2, e3),
avg3(e2, e3, e4),
avg3(e3, e4, e5),
avg3(e4, e5, e6),
avg3(e5, e6, e7),
avg3(e6, e7, e8),
];
for y in 0..4 {
data[5][y * 4..y * 4 + 4].copy_from_slice(&avgs[3 - y..7 - y]);
}
}
{
data[6][12] = avg3(e1, e2, e3);
data[6][8] = avg3(e2, e3, e4);
data[6][13] = avg3(e3, e4, e5);
data[6][4] = avg3(e3, e4, e5);
data[6][9] = avg2(e4, e5);
data[6][0] = avg2(e4, e5);
data[6][14] = avg3(e4, e5, e6);
data[6][5] = avg3(e4, e5, e6);
data[6][10] = avg2(e5, e6);
data[6][1] = avg2(e5, e6);
data[6][15] = avg3(e5, e6, e7);
data[6][6] = avg3(e5, e6, e7);
data[6][11] = avg2(e6, e7);
data[6][2] = avg2(e6, e7);
data[6][7] = avg3(e6, e7, e8);
data[6][3] = avg2(e7, e8);
}
{
data[7][0] = avg2(a0, a1);
data[7][4] = avg3(a0, a1, a2);
data[7][8] = avg2(a1, a2);
data[7][1] = avg2(a1, a2);
data[7][5] = avg3(a1, a2, a3);
data[7][12] = avg3(a1, a2, a3);
data[7][9] = avg2(a2, a3);
data[7][2] = avg2(a2, a3);
data[7][13] = avg3(a2, a3, a4);
data[7][6] = avg3(a2, a3, a4);
data[7][10] = avg2(a3, a4);
data[7][3] = avg2(a3, a4);
data[7][14] = avg3(a3, a4, a5);
data[7][7] = avg3(a3, a4, a5);
data[7][11] = avg3(a4, a5, a6);
data[7][15] = avg3(a5, a6, a7);
}
{
data[8][12] = avg2(e0, e1);
data[8][13] = avg3(e0, e1, e2);
data[8][8] = avg2(e1, e2);
data[8][14] = avg2(e1, e2);
data[8][9] = avg3(e1, e2, e3);
data[8][15] = avg3(e1, e2, e3);
data[8][10] = avg2(e2, e3);
data[8][4] = avg2(e2, e3);
data[8][11] = avg3(e2, e3, e4);
data[8][5] = avg3(e2, e3, e4);
data[8][6] = avg2(e3, e4);
data[8][0] = avg2(e3, e4);
data[8][7] = avg3(e3, e4, e5);
data[8][1] = avg3(e3, e4, e5);
data[8][2] = avg3(e4, e5, e6);
data[8][3] = avg3(e5, e6, e7);
}
{
data[9][0] = avg2(l0, l1);
data[9][1] = avg3(l0, l1, l2);
data[9][2] = avg2(l1, l2);
data[9][4] = avg2(l1, l2);
data[9][3] = avg3(l1, l2, l3);
data[9][5] = avg3(l1, l2, l3);
data[9][6] = avg2(l2, l3);
data[9][8] = avg2(l2, l3);
data[9][7] = avg3(l2, l3, l3);
data[9][9] = avg3(l2, l3, l3);
data[9][10] = l3;
data[9][11] = l3;
data[9][12] = l3;
data[9][13] = l3;
data[9][14] = l3;
data[9][15] = l3;
}
I4Predictions { data }
}
#[inline]
pub fn get(&self, mode_idx: usize) -> &[u8; 16] {
&self.data[mode_idx]
}
}
#[cfg(target_arch = "x86_64")]
#[allow(dead_code)]
#[inline(always)]
pub(crate) fn idct_add_residue_and_clear_with_token<const N: usize>(
token: archmage::X64V3Token,
pblock: &mut [u8; N],
rblock: &mut [i32; 16],
y0: usize,
x0: usize,
stride: usize,
) {
let dc_only = rblock[1..].iter().all(|&c| c == 0);
use crate::common::transform;
transform::idct_add_residue_inplace_with_token(
token,
rblock,
pblock.as_mut_slice(),
y0,
x0,
stride,
dc_only,
);
}
#[cfg(target_arch = "aarch64")]
#[inline(always)]
pub(crate) fn idct_add_residue_and_clear_with_token<const N: usize>(
token: archmage::NeonToken,
pblock: &mut [u8; N],
rblock: &mut [i32; 16],
y0: usize,
x0: usize,
stride: usize,
) {
let dc_only = rblock[1..].iter().all(|&c| c == 0);
crate::common::transform::idct_add_residue_inplace_neon(
token,
rblock,
pblock.as_mut_slice(),
y0,
x0,
stride,
dc_only,
);
}
#[cfg(target_arch = "wasm32")]
#[inline(always)]
pub(crate) fn idct_add_residue_and_clear_with_token<const N: usize>(
token: archmage::Wasm128Token,
pblock: &mut [u8; N],
rblock: &mut [i32; 16],
y0: usize,
x0: usize,
stride: usize,
) {
use crate::common::transform;
let dc_only = rblock[1..].iter().all(|&c| c == 0);
if dc_only {
transform::idct4x4_dc(rblock);
} else {
crate::common::transform::idct4x4_wasm(token, rblock);
}
add_residue_and_clear_scalar(pblock, rblock, y0, x0, stride);
}
#[cfg(not(any(
target_arch = "x86_64",
target_arch = "x86",
target_arch = "aarch64",
target_arch = "wasm32"
)))]
#[inline(always)]
pub(crate) fn idct_add_residue_and_clear_with_token<const N: usize>(
_token: (),
pblock: &mut [u8; N],
rblock: &mut [i32; 16],
y0: usize,
x0: usize,
stride: usize,
) {
use crate::common::transform;
let dc_only = rblock[1..].iter().all(|&c| c == 0);
if dc_only {
transform::idct4x4_dc(rblock);
} else {
transform::idct4x4(rblock);
}
add_residue_and_clear_scalar(pblock, rblock, y0, x0, stride);
}
#[inline(always)]
pub(crate) fn idct_add_residue_and_clear<const N: usize>(
pblock: &mut [u8; N],
rblock: &mut [i32; 16],
y0: usize,
x0: usize,
stride: usize,
) {
incant!(
idct_add_residue_and_clear_dispatch(pblock.as_mut_slice(), rblock, y0, x0, stride),
[v3, neon, wasm128, scalar]
);
}
#[cfg(target_arch = "x86_64")]
#[cfg(target_arch = "x86_64")]
#[inline(always)]
fn idct_add_residue_and_clear_dispatch_v3(
token: X64V3Token,
pblock: &mut [u8],
rblock: &mut [i32; 16],
y0: usize,
x0: usize,
stride: usize,
) {
use crate::common::transform;
let dc_only = rblock[1..].iter().all(|&c| c == 0);
transform::idct_add_residue_inplace_with_token(token, rblock, pblock, y0, x0, stride, dc_only);
}
#[cfg(target_arch = "aarch64")]
#[inline(always)]
fn idct_add_residue_and_clear_dispatch_neon(
token: NeonToken,
pblock: &mut [u8],
rblock: &mut [i32; 16],
y0: usize,
x0: usize,
stride: usize,
) {
let dc_only = rblock[1..].iter().all(|&c| c == 0);
crate::common::transform::idct_add_residue_inplace_neon(
token, rblock, pblock, y0, x0, stride, dc_only,
);
}
#[cfg(target_arch = "wasm32")]
#[inline(always)]
fn idct_add_residue_and_clear_dispatch_wasm128(
_token: Wasm128Token,
pblock: &mut [u8],
rblock: &mut [i32; 16],
y0: usize,
x0: usize,
stride: usize,
) {
use crate::common::transform;
let dc_only = rblock[1..].iter().all(|&c| c == 0);
if dc_only {
transform::idct4x4_dc(rblock);
} else {
crate::common::transform::idct4x4_wasm(_token, rblock);
}
add_residue_and_clear_scalar_inner(pblock, rblock, y0, x0, stride);
}
#[inline(always)]
fn idct_add_residue_and_clear_dispatch_scalar(
_token: ScalarToken,
pblock: &mut [u8],
rblock: &mut [i32; 16],
y0: usize,
x0: usize,
stride: usize,
) {
use crate::common::transform;
let dc_only = rblock[1..].iter().all(|&c| c == 0);
if dc_only {
transform::idct4x4_dc(rblock);
} else {
transform::idct4x4(rblock);
}
add_residue_and_clear_scalar_inner(pblock, rblock, y0, x0, stride);
}
#[inline(always)]
fn add_residue_and_clear_scalar_inner(
pblock: &mut [u8],
rblock: &mut [i32; 16],
y0: usize,
x0: usize,
stride: usize,
) {
let mut pos = y0 * stride + x0;
for row in rblock.chunks_mut(4) {
for (p, coeff) in pblock[pos..][..4].iter_mut().zip(row.iter_mut()) {
*p = (*coeff + i32::from(*p)).clamp(0, 255) as u8;
*coeff = 0;
}
pos += stride;
}
}
#[cfg(all(test, feature = "_benchmarks"))]
mod benches {
use super::*;
use test::{Bencher, black_box};
fn make_luma_ws() -> [u8; LUMA_BLOCK_SIZE] {
let mut ws = [0u8; LUMA_BLOCK_SIZE];
for (i, b) in ws.iter_mut().enumerate() {
*b = (i % 256) as u8;
}
ws
}
#[bench]
fn bench_predict_bvepred(b: &mut Bencher) {
let mut ws = make_luma_ws();
b.iter(|| {
predict_bvepred(black_box(&mut ws), 5, 5, LUMA_STRIDE);
});
}
#[bench]
fn bench_predict_bldpred(b: &mut Bencher) {
let mut ws = make_luma_ws();
b.iter(|| {
predict_bldpred(black_box(&mut ws), 5, 5, LUMA_STRIDE);
black_box(());
});
}
#[bench]
fn bench_predict_brdpred(b: &mut Bencher) {
let mut ws = make_luma_ws();
b.iter(|| {
predict_brdpred(black_box(&mut ws), 5, 5, LUMA_STRIDE);
black_box(());
});
}
#[bench]
fn bench_predict_bhepred(b: &mut Bencher) {
let mut ws = make_luma_ws();
b.iter(|| {
predict_bhepred(black_box(&mut ws), 5, 5, LUMA_STRIDE);
black_box(());
});
}
#[bench]
fn bench_top_pixels(b: &mut Bencher) {
let ws = make_luma_ws();
b.iter(|| {
black_box(top_pixels(black_box(&ws), 5, 5, LUMA_STRIDE));
});
}
#[bench]
fn bench_edge_pixels(b: &mut Bencher) {
let ws = make_luma_ws();
b.iter(|| {
black_box(edge_pixels(black_box(&ws), 5, 5, LUMA_STRIDE));
});
}
}
#[cfg(test)]
#[allow(clippy::erasing_op, clippy::identity_op)]
mod tests {
use super::*;
#[test]
fn test_avg2() {
for i in 0u8..=255 {
for j in 0u8..=255 {
let ceil_avg = (f32::from(i) + f32::from(j)) / 2.0;
let ceil_avg = ceil_avg.ceil() as u8;
assert_eq!(
ceil_avg,
avg2(i, j),
"avg2({}, {}), expected {}, got {}.",
i,
j,
ceil_avg,
avg2(i, j)
);
}
}
}
#[test]
fn test_avg2_specific() {
assert_eq!(
255,
avg2(255, 255),
"avg2(255, 255), expected 255, got {}.",
avg2(255, 255)
);
assert_eq!(1, avg2(1, 1), "avg2(1, 1), expected 1, got {}.", avg2(1, 1));
assert_eq!(2, avg2(2, 1), "avg2(2, 1), expected 2, got {}.", avg2(2, 1));
}
#[test]
fn test_avg3() {
for i in 0u8..=255 {
for j in 0u8..=255 {
for k in 0u8..=255 {
let floor_avg =
(2.0f32.mul_add(f32::from(j), f32::from(i)) + { f32::from(k) } + 2.0) / 4.0;
let floor_avg = floor_avg.floor() as u8;
assert_eq!(
floor_avg,
avg3(i, j, k),
"avg3({}, {}, {}), expected {}, got {}.",
i,
j,
k,
floor_avg,
avg3(i, j, k)
);
}
}
}
}
#[test]
fn test_edge_pixels() {
let mut im = [0u8; LUMA_BLOCK_SIZE];
let stride = LUMA_STRIDE;
im[0 * stride + 0] = 5; im[0 * stride + 1] = 6; im[0 * stride + 2] = 7; im[0 * stride + 3] = 8; im[0 * stride + 4] = 9; im[1 * stride + 0] = 4; im[2 * stride + 0] = 3; im[3 * stride + 0] = 2; im[4 * stride + 0] = 1;
let (e0, e1, e2, e3, e4, e5, e6, e7, e8) = edge_pixels(&im, 1, 1, stride);
assert_eq!(e0, 1);
assert_eq!(e1, 2);
assert_eq!(e2, 3);
assert_eq!(e3, 4);
assert_eq!(e4, 5);
assert_eq!(e5, 6);
assert_eq!(e6, 7);
assert_eq!(e7, 8);
assert_eq!(e8, 9);
}
#[test]
fn test_top_pixels() {
let mut im = [0u8; LUMA_BLOCK_SIZE];
let stride = LUMA_STRIDE;
im[0 * stride + 1] = 1;
im[0 * stride + 2] = 2;
im[0 * stride + 3] = 3;
im[0 * stride + 4] = 4;
im[0 * stride + 5] = 5;
im[0 * stride + 6] = 6;
im[0 * stride + 7] = 7;
im[0 * stride + 8] = 8;
let (e0, e1, e2, e3, e4, e5, e6, e7) = top_pixels(&im, 1, 1, stride);
assert_eq!(e0, 1);
assert_eq!(e1, 2);
assert_eq!(e2, 3);
assert_eq!(e3, 4);
assert_eq!(e4, 5);
assert_eq!(e5, 6);
assert_eq!(e6, 7);
assert_eq!(e7, 8);
}
#[test]
fn test_add_residue() {
let mut pblock = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
let rblock = [
-1, -2, -3, -4, 250, 249, 248, 250, -10, -18, -192, -17, -3, 15, 18, 9,
];
let expected: [u8; 16] = [0, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 10, 29, 33, 25];
add_residue(&mut pblock, &rblock, 0, 0, 4);
for (&e, &i) in expected.iter().zip(&pblock) {
assert_eq!(e, i);
}
}
#[test]
fn test_predict_bhepred() {
let stride = LUMA_STRIDE;
let mut im = [0u8; LUMA_BLOCK_SIZE];
im[0 * stride + 0] = 5; im[1 * stride + 0] = 4; im[2 * stride + 0] = 3; im[3 * stride + 0] = 2; im[4 * stride + 0] = 1;
predict_bhepred(&mut im, 1, 1, stride);
assert_eq!(im[1 * stride + 1], 4);
assert_eq!(im[1 * stride + 2], 4);
assert_eq!(im[1 * stride + 3], 4);
assert_eq!(im[1 * stride + 4], 4);
assert_eq!(im[2 * stride + 1], 3);
assert_eq!(im[3 * stride + 1], 2);
assert_eq!(im[4 * stride + 1], 1);
}
#[test]
fn test_predict_brdpred() {
let stride = LUMA_STRIDE;
let mut im = [0u8; LUMA_BLOCK_SIZE];
im[0 * stride + 0] = 5; im[0 * stride + 1] = 6; im[0 * stride + 2] = 7; im[0 * stride + 3] = 8; im[0 * stride + 4] = 9; im[1 * stride + 0] = 4; im[2 * stride + 0] = 3; im[3 * stride + 0] = 2; im[4 * stride + 0] = 1;
predict_brdpred(&mut im, 1, 1, stride);
assert_eq!(im[1 * stride + 1], 5);
assert_eq!(im[1 * stride + 2], 6);
assert_eq!(im[1 * stride + 3], 7);
assert_eq!(im[1 * stride + 4], 8);
assert_eq!(im[2 * stride + 1], 4);
assert_eq!(im[2 * stride + 2], 5);
assert_eq!(im[2 * stride + 3], 6);
assert_eq!(im[2 * stride + 4], 7);
assert_eq!(im[3 * stride + 1], 3);
assert_eq!(im[3 * stride + 2], 4);
assert_eq!(im[3 * stride + 3], 5);
assert_eq!(im[3 * stride + 4], 6);
assert_eq!(im[4 * stride + 1], 2);
assert_eq!(im[4 * stride + 2], 3);
assert_eq!(im[4 * stride + 3], 4);
assert_eq!(im[4 * stride + 4], 5);
}
#[test]
fn test_predict_bldpred() {
let stride = LUMA_STRIDE;
let mut im = [0u8; LUMA_BLOCK_SIZE];
for i in 0..8 {
im[0 * stride + 1 + i] = (i + 1) as u8;
}
predict_bldpred(&mut im, 1, 1, stride);
assert_eq!(im[1 * stride + 1], 2);
assert_eq!(im[1 * stride + 2], 3);
assert_eq!(im[1 * stride + 3], 4);
assert_eq!(im[1 * stride + 4], 5);
assert_eq!(im[2 * stride + 1], 3);
assert_eq!(im[2 * stride + 2], 4);
assert_eq!(im[2 * stride + 3], 5);
assert_eq!(im[2 * stride + 4], 6);
assert_eq!(im[3 * stride + 1], 4);
assert_eq!(im[3 * stride + 2], 5);
assert_eq!(im[3 * stride + 3], 6);
assert_eq!(im[3 * stride + 4], 7);
assert_eq!(im[4 * stride + 1], 5);
assert_eq!(im[4 * stride + 2], 6);
assert_eq!(im[4 * stride + 3], 7);
assert_eq!(im[4 * stride + 4], 8);
}
#[test]
fn test_predict_bvepred() {
let stride = LUMA_STRIDE;
let mut im = [0u8; LUMA_BLOCK_SIZE];
im[0 * stride + 0] = 1; for i in 0..8 {
im[0 * stride + 1 + i] = (i + 2) as u8;
}
predict_bvepred(&mut im, 1, 1, stride);
for row in 1..=4 {
assert_eq!(im[row * stride + 1], 2, "row {row} col 1");
assert_eq!(im[row * stride + 2], 3, "row {row} col 2");
assert_eq!(im[row * stride + 3], 4, "row {row} col 3");
assert_eq!(im[row * stride + 4], 5, "row {row} col 4");
}
}
}